@article {icl:567, title = {Self-Healing Network for Scalable Fault-Tolerant Runtime Environments}, journal = {Future Generation Computer Systems}, volume = {26}, number = {3}, year = {2010}, month = {2010-03}, pages = {479-485}, author = {Thara Angskun and Graham Fagg and George Bosilca and Jelena Pjesivac{\textendash}Grbovic and Jack Dongarra} } @inproceedings {icl:355, title = {Binomial Graph: A Scalable and Fault- Tolerant Logical Network Topology}, journal = {Proceedings of The Fifth International Symposium on Parallel and Distributed Processing and Applications (ISPA07)}, year = {2007}, month = {2007-08}, publisher = {Springer}, address = {Niagara Falls, Canada}, keywords = {ftmpi}, author = {Thara Angskun and George Bosilca and Jack Dongarra} } @article {icl:357, title = {Decision Trees and MPI Collective Algorithm Selection Problem}, journal = {Euro-Par 2007}, year = {2007}, month = {2007-08}, pages = {105{\textendash}115}, publisher = {Springer}, address = {Rennes, France}, keywords = {ftmpi}, author = {Jelena Pjesivac{\textendash}Grbovic and George Bosilca and Graham Fagg and Thara Angskun and Jack Dongarra} } @article {icl:356, title = {MPI Collective Algorithm Selection and Quadtree Encoding}, journal = {Parallel Computing (Special Edition: EuroPVM/MPI 2006)}, year = {2007}, month = {2007-00}, publisher = {Elsevier}, keywords = {ftmpi}, author = {Jelena Pjesivac{\textendash}Grbovic and George Bosilca and Graham Fagg and Thara Angskun and Jack Dongarra} } @inproceedings {icl:374, title = {Optimal Routing in Binomial Graph Networks}, journal = {The International Conference on Parallel and Distributed Computing, applications and Technologies (PDCAT)}, year = {2007}, month = {2007-12}, publisher = {IEEE Computer Society}, address = {Adelaide, Australia}, keywords = {ftmpi}, author = {Thara Angskun and George Bosilca and Brad Vander Zanden and Jack Dongarra} } @article {icl:358, title = {Performance Analysis of MPI Collective Operations}, journal = {Cluster computing}, volume = {10}, number = {2}, year = {2007}, month = {2007-06}, pages = {127-143}, publisher = {Springer Netherlands}, keywords = {ftmpi}, author = {Jelena Pjesivac{\textendash}Grbovic and Thara Angskun and George Bosilca and Graham Fagg and Edgar Gabriel and Jack Dongarra} } @inproceedings {icl:354, title = {Reliability Analysis of Self-Healing Network using Discrete-Event Simulation}, journal = {Proceedings of Seventh IEEE International Symposium on Cluster Computing and the Grid (CCGrid {\textquoteright}07)}, year = {2007}, month = {2007-05}, pages = {437-444}, publisher = {IEEE Computer Society}, keywords = {ftmpi}, author = {Thara Angskun and George Bosilca and Graham Fagg and Jelena Pjesivac{\textendash}Grbovic and Jack Dongarra} } @inproceedings {icl:380, title = {Self-Healing in Binomial Graph Networks}, journal = {2nd International Workshop On Reliability in Decentralized Distributed Systems (RDDS 2007)}, year = {2007}, month = {2007-11}, address = {Vilamoura, Algarve, Portugal}, author = {Thara Angskun and George Bosilca and Jack Dongarra} } @article {icl:315, title = {Flexible collective communication tuning architecture applied to Open MPI}, journal = {2006 Euro PVM/MPI (submitted)}, year = {2006}, month = {2006-01}, address = {Bonn, Germany}, keywords = {ftmpi}, author = {Graham Fagg and Jelena Pjesivac{\textendash}Grbovic and George Bosilca and Thara Angskun and Jack Dongarra} } @techreport {icl:314, title = {MPI Collective Algorithm Selection and Quadtree Encoding}, journal = {ICL Technical Report}, number = {ICL-UT-06-11}, year = {2006}, month = {2006-00}, keywords = {ftmpi}, author = {Jelena Pjesivac{\textendash}Grbovic and Graham Fagg and Thara Angskun and George Bosilca and Jack Dongarra} } @article {icl:323, title = {MPI Collective Algorithm Selection and Quadtree Encoding}, journal = {Lecture Notes in Computer Science}, volume = {4192}, number = {ICL-UT-06-13}, year = {2006}, month = {2006-09}, pages = {40-48}, publisher = {Springer Berlin / Heidelberg}, keywords = {ftmpi}, author = {Jelena Pjesivac{\textendash}Grbovic and Graham Fagg and Thara Angskun and George Bosilca and Jack Dongarra} } @article {icl:316, title = {Scalable Fault Tolerant Protocol for Parallel Runtime Environments}, journal = {2006 Euro PVM/MPI}, number = {ICL-UT-06-12}, year = {2006}, month = {2006-00}, address = {Bonn, Germany}, keywords = {ftmpi}, author = {Thara Angskun and Graham Fagg and George Bosilca and Jelena Pjesivac{\textendash}Grbovic and Jack Dongarra} } @inproceedings {icl:330, title = {Self-Healing Network for Scalable Fault Tolerant Runtime Environments}, journal = {DAPSYS 2006, 6th Austrian-Hungarian Workshop on Distributed and Parallel Systems}, year = {2006}, month = {2006-01}, address = {Innsbruck, Austria}, author = {Thara Angskun and Graham Fagg and George Bosilca and Jelena Pjesivac{\textendash}Grbovic and Jack Dongarra} } @inproceedings {icl:265, title = {Fault Tolerant High Performance Computing by a Coding Approach}, journal = {Proceedings of ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming (to appear)}, year = {2005}, month = {2005-01}, address = {Chicago, Illinois}, keywords = {ftmpi, grads, lacsi, sans}, author = {Zizhong Chen and Graham Fagg and Edgar Gabriel and Julien Langou and Thara Angskun and George Bosilca and Jack Dongarra} } @inproceedings {icl:249, title = {Performance Analysis of MPI Collective Operations}, journal = {4th International Workshop on Performance Modeling, Evaluation, and Optmization of Parallel and Distributed Systems (PMEO-PDS {\textquoteright}05)}, year = {2005}, month = {2005-04}, address = {Denver, Colorado}, keywords = {ftmpi}, author = {Jelena Pjesivac{\textendash}Grbovic and Thara Angskun and George Bosilca and Graham Fagg and Edgar Gabriel and Jack Dongarra} } @article {icl:306, title = {Performance Analysis of MPI Collective Operations}, journal = {Cluster Computing Journal (to appear)}, year = {2005}, month = {2005-01}, keywords = {ftmpi}, author = {Jelena Pjesivac{\textendash}Grbovic and Thara Angskun and George Bosilca and Graham Fagg and Edgar Gabriel and Jack Dongarra} } @inproceedings {icl:279, title = {Scalable Fault Tolerant MPI: Extending the Recovery Algorithm}, journal = {Proceedings of 12th European Parallel Virtual Machine and Message Passing Interface Conference - Euro PVM/MPI}, volume = {3666}, year = {2005}, month = {2005-09}, pages = {67}, publisher = {Springer-Verlag Berlin}, address = {Sorrento (Naples) , Italy}, keywords = {ftmpi}, author = {Graham Fagg and Thara Angskun and George Bosilca and Jelena Pjesivac{\textendash}Grbovic and Jack Dongarra}, editor = {Beniamino Di Martino} } @inproceedings {icl:230, title = {Extending the MPI Specification for Process Fault Tolerance on High Performance Computing Systems}, journal = {Proceedings of ISC2004 (to appear)}, year = {2004}, month = {2004-06}, address = {Heidelberg, Germany}, keywords = {ftmpi, lacsi}, author = {Graham Fagg and Edgar Gabriel and George Bosilca and Thara Angskun and Zizhong Chen and Jelena Pjesivac{\textendash}Grbovic and Kevin London and Jack Dongarra} } @article {icl:240, title = {Process Fault-Tolerance: Semantics, Design and Applications for High Performance Computing}, journal = {International Journal for High Performance Applications and Supercomputing (to appear)}, year = {2004}, month = {2004-04}, keywords = {ftmpi, lacsi}, author = {Graham Fagg and Edgar Gabriel and Zizhong Chen and Thara Angskun and George Bosilca and Jelena Pjesivac{\textendash}Grbovic and Jack Dongarra} } @inproceedings {icl:153, title = {Fault Tolerant Communication Library and Applications for High Performance Computing}, journal = {Los Alamos Computer Science Institute (LACSI) Symposium 2003 (presented)}, year = {2003}, month = {2003-10}, address = {Santa Fe, NM}, keywords = {ftmpi, lacsi}, author = {Graham Fagg and Edgar Gabriel and Zizhong Chen and Thara Angskun and George Bosilca and Antonin Bukovsky and Jack Dongarra} } @inproceedings {icl:144, title = {A Fault-Tolerant Communication Library for Grid Environments}, journal = {17th Annual ACM International Conference on Supercomputing (ICS{\textquoteright}03) International Workshop on Grid Computing and e-Science}, year = {2003}, month = {2003-06}, address = {San Francisco}, keywords = {ftmpi, lacsi}, author = {Edgar Gabriel and Graham Fagg and Antonin Bukovsky and Thara Angskun and Jack Dongarra} }