@conference {761, title = {Standards for Graph Algorithm Primitives}, booktitle = {17th IEEE High Performance Extreme Computing Conference (HPEC {\textquoteright}13)}, year = {2013}, month = {2013-09}, publisher = {IEEE}, organization = {IEEE}, address = {Waltham, MA}, abstract = {It is our view that the state of the art in constructing a large collection of graph algorithms in terms of linear algebraic operations is mature enough to support the emergence of a standard set of primitive building blocks. This paper is a position paper defining the problem and announcing our intention to launch an open effort to define this standard.}, keywords = {algorithms, graphs, linear algebra, software standards}, doi = {10.1109/HPEC.2013.6670338}, author = {Tim Mattson and David Bader and Jon Berry and Aydin Buluc and Jack Dongarra and Christos Faloutsos and John Feo and John Gilbert and Joseph Gonzalez and Bruce Hendrickson and Jeremy Kepner and Charles Lieserson and Andrew Lumsdaine and David Padua and Steve W. Poole and Steve Reinhardt and Mike Stonebraker and Steve Wallach and Andrew Yoo} } @inbook {875, title = {A New Approach to MPI Collective Communication Implementations}, booktitle = {Distributed and Parallel Systems}, year = {2007}, pages = {45-54}, publisher = {Springer US}, organization = {Springer US}, abstract = {Recent research into the optimization of collective MPI operations has resulted in a wide variety of algorithms and corresponding implementations, each typically only applicable in a relatively narrow scope: on a specific architecture, on a specific network, with a specific number of processes, with a specific data size and/or data-type {\textendash} or any combination of these (or other) factors. This situation presents an enormous challenge to portable MPI implementations which are expected to provide optimized collective operation performance on all platforms. Many portable implementations have attempted to provide a token number of algorithms that are intended to realize good performance on most systems. However, many platform configurations are still left without well-tuned collective operations. This paper presents a proposal for a framework that will allow a wide variety of collective algorithm implementations and a flexible, multi-tiered selection process for choosing which implementation to use when an application invokes an MPI collective function.}, keywords = {Automatic Selection, Collective Operation, Framework, Message Passing (MPI), Open MPI}, isbn = {978-0-387-69857-1}, doi = {10.1007/978-0-387-69858-8_5}, author = {Torsten Hoefler and Jeffrey M. Squyres and Graham Fagg and George Bosilca and Wolfgang Rehm and Andrew Lumsdaine} } @article {icl:652, title = {A High-Performance, Heterogeneous MPI}, journal = {HeteroPar 2006}, year = {2006}, month = {2006-09}, address = {Barcelona, Spain}, author = {Richard L. Graham and Galen M. Shipman and Brian Barrett and Ralph Castain and George Bosilca and Andrew Lumsdaine} } @article {icl:125, title = {An Updated Set of Basic Linear Algebra Subprograms (BLAS)}, journal = {ACM Transactions on Mathematical Software}, volume = {28}, number = {2}, year = {2002}, month = {2002-12}, pages = {135-151}, doi = {10.1145/567806.567807}, author = {Susan Blackford and James Demmel and Jack Dongarra and Iain Duff and Sven Hammarling and Greg Henry and Michael Heroux and Linda Kaufman and Andrew Lumsdaine and Antoine Petitet and Roldan Pozo and Karin Remington and Clint Whaley} } @article {icl:6, title = {Basic Linear Algebra Subprograms (BLAS)}, journal = {(an update), submitted to ACM TOMS}, year = {2001}, month = {2001-02}, author = {Susan Blackford and James Demmel and Jack Dongarra and Iain Duff and Sven Hammarling and Greg Henry and Michael Heroux and Linda Kaufman and Andrew Lumsdaine and Antoine Petitet and Roldan Pozo and Karin Remington and Clint Whaley} }