@conference {1478, title = {Communication Avoiding 2D Stencil Implementations over PaRSEC Task-Based Runtime}, booktitle = {2020 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW)}, year = {2020}, month = {2020-05}, publisher = {IEEE}, organization = {IEEE}, address = {New Orleans, LA}, abstract = {Stencil computation or general sparse matrix-vector product (SpMV) are key components in many algorithms like geometric multigrid or Krylov solvers. But their low arithmetic intensity means that memory bandwidth and network latency will be the performance limiting factors. The current architectural trend favors computations over bandwidth, worsening the already unfavorable imbalance. Previous work approached stencil kernel optimization either by improving memory bandwidth usage or by providing a Communication Avoiding (CA) scheme to minimize network latency in repeated sparse vector multiplication by replicating remote work in order to delay communications on the critical path. Focusing on minimizing communication bottleneck in distributed stencil computation, in this study we combine a CA scheme with the computation and communication overlapping that is inherent in a dataflow task-based runtime system such as PaRSEC to demonstrate their combined benefits. We implemented the 2D five point stencil (Jacobi iteration) in PETSc, and over PaRSEC in two flavors, full communications (base-PaRSEC) and CA-PaRSEC which operate directly on a 2D compute grid. Our results running on two clusters, NaCL and Stampede2 indicate that we can achieve 2{\texttimes} speedup over the standard SpMV solution implemented in PETSc, and in certain cases when kernel execution is not dominating the execution time, the CA-PaRSEC version achieved up to 57\% and 33\% speedup over base-PaRSEC implementation on NaCL and Stampede2 respectively.}, doi = {https://doi.org/10.1109/IPDPSW50202.2020.00127}, author = {Yu Pei and Qinglei Cao and George Bosilca and Piotr Luszczek and Victor Eijkhout and Jack Dongarra} } @techreport {icl:190, title = {Numerical Metadata API Reference}, journal = {Innovative Computing Laboratory Technical Report}, year = {2007}, month = {2007-02}, keywords = {salsa}, author = {Victor Eijkhout} } @article {icl:333, title = {Application of Machine Learning to the Selection of Sparse Linear Solvers}, journal = {International Journal of High Performance Computing Applications (submitted)}, year = {2006}, month = {2006-00}, keywords = {salsa, sans}, author = {Sanjukta Bhowmick and Victor Eijkhout and Yoav Freund and Erika Fuentes and David Keyes} } @article {icl:332, title = {Self Adapting Numerical Software SANS Effort}, journal = {IBM Journal of Research and Development}, volume = {50}, number = {2/3}, year = {2006}, month = {2006-01}, pages = {223-238}, keywords = {gco}, author = {George Bosilca and Zizhong Chen and Jack Dongarra and Victor Eijkhout and Graham Fagg and Erika Fuentes and Julien Langou and Piotr Luszczek and Jelena Pjesivac{\textendash}Grbovic and Keith Seymour and Haihang You and Sathish Vadhiyar} } @article {icl:277, title = {The Component Structure of a Self-Adapting Numerical Software System}, journal = {International Journal of Parallel Programming}, volume = {33}, number = {2}, year = {2005}, month = {2005-06}, keywords = {salsa, sans}, author = {Victor Eijkhout and Erika Fuentes and Thomas Eidson and Jack Dongarra} } @inproceedings {icl:237, title = {Improvements in the Efficient Composition of Applications}, journal = {IPDPS 2004, NGS Workshop (to appear)}, year = {2004}, month = {2004-00}, address = {Sante Fe}, keywords = {salsa, sans}, author = {Thomas Eidson and Victor Eijkhout and Jack Dongarra} } @techreport {icl:252, title = {Performance Optimization and Modeling of Blocked Sparse Kernels}, journal = {ICL Technical Report}, number = {ICL-UT-04-05}, year = {2004}, month = {2004-00}, keywords = {sans}, author = {Alfredo Buttari and Victor Eijkhout and Julien Langou and Salvatore Filippone} } @inproceedings {icl:238, title = {Self Adapting Linear Algebra Algorithms and Software}, journal = {IEEE Proceedings (to appear)}, year = {2004}, month = {2004-00}, keywords = {salsa, sans}, author = {James Demmel and Jack Dongarra and Victor Eijkhout and Erika Fuentes and Antoine Petitet and Rich Vuduc and Clint Whaley and Katherine Yelick} } @inproceedings {icl:137, title = {Applying Aspect-Oriented Programming Concepts to a Component-based Programming Model}, journal = {IPDPS 2003, Workshop on NSF-Next Generation Software}, year = {2003}, month = {2003-03}, address = {Nice, France}, keywords = {salsa, sans}, author = {Thomas Eidson and Jack Dongarra and Victor Eijkhout} } @techreport {icl:208, title = {Finite-choice Algorithm Optimization in Conjugate Gradients (LAPACK Working Note 159)}, journal = {University of Tennessee Computer Science Technical Report, UT-CS-03-502}, year = {2003}, month = {2003-01}, author = {Jack Dongarra and Victor Eijkhout} } @techreport {icl:189, title = {A Proposed Standard for Matrix Metadata}, journal = {Innovative Computing Laboratory Technical Report}, number = {ICL-UT-03-02}, year = {2003}, month = {2003-11}, address = {Submitted to ACM TOMS}, keywords = {salsa, sans}, author = {Victor Eijkhout and Erika Fuentes} } @inproceedings {icl:158, title = {Scalable, Trustworthy Network Computing Using Untrusted Intermediaries: A Position Paper}, journal = {DOE/NSF Workshop on New Directions in Cyber-Security in Large-Scale Networks: Development Obstacles}, year = {2003}, month = {2003-03}, address = {National Conference Center - Landsdowne, Virginia}, keywords = {netsolve}, author = {Micah Beck and Jack Dongarra and Victor Eijkhout and Mike Langston and Terry Moore and James Plank} } @article {icl:184, title = {Self Adapting Numerical Algorithm for Next Generation Applications}, journal = {International Journal of High Performance Computing Applications}, volume = {17}, number = {2}, year = {2003}, month = {2003-01}, pages = {125-132}, keywords = {lacsi, sans}, author = {Jack Dongarra and Victor Eijkhout} } @article {icl:127, title = {Self-Adapting Numerical Software and Automatic Tuning of Heuristics}, journal = {Lecture Notes in Computer Science}, volume = {2660}, year = {2003}, month = {2003-06}, pages = {759-770}, publisher = {Springer Verlag}, address = {Melbourne, Australia}, keywords = {salsa, sans}, author = {Jack Dongarra and Victor Eijkhout} } @article {icl:10, title = {An Iterative Solver Benchmark}, journal = {Scientific Programming (to appear)}, year = {2002}, month = {2002-00}, author = {Jack Dongarra and Victor Eijkhout and Henk van der Vorst} } @techreport {icl:84, title = {Polynomial Acceleration of Optimised Multi-grid Smoothers; Basic Theory}, journal = {ICL Technical Report}, volume = {156}, number = {ICL-UT-02-03}, year = {2002}, month = {2002-01}, author = {Victor Eijkhout} } @techreport {icl:102, title = {Self-adapting Numerical Software for Next Generation Applications (LAPACK Working Note 157)}, journal = {ICL Technical Report}, number = {ICL-UT-02-07}, year = {2002}, month = {2002-00}, keywords = {salsa, sans}, author = {Jack Dongarra and Victor Eijkhout} } @techreport {icl:13, title = {Automatic Determination of Matrix-Blocks}, journal = {Lapack Working Note 151, University of Tennessee Computer Science Technical Report}, number = {UT-CS-01-458}, year = {2001}, month = {2001-01}, author = {Victor Eijkhout} } @article {icl:223, title = {Iterative Solver Benchmark (LAPACK Working Note 152)}, journal = {Scientific Programming}, volume = {9}, number = {4}, year = {2001}, month = {2001-00}, pages = {223-231}, author = {Jack Dongarra and Victor Eijkhout and Henk van der Vorst} } @article {icl:81, title = {Recursive Approach in Sparse Matrix LU Factorization}, journal = {Scientific Programming}, volume = {9}, number = {1}, year = {2001}, month = {2001-00}, pages = {51-60}, author = {Jack Dongarra and Victor Eijkhout and Piotr Luszczek} } @inproceedings {icl:38, title = {Recursive approach in sparse matrix LU factorization}, journal = {Proceedings of 1st SGI Users Conference}, year = {2000}, month = {2000-01}, pages = {409-418}, address = {Cracow, Poland (ACC Cyfronet UMM, 2000)}, author = {Jack Dongarra and Victor Eijkhout and Piotr Luszczek} } @inproceedings {icl:25, title = {Seamless Access to Adaptive Solver Algorithms}, journal = {Proceedings of 16th IMACS World Congress 2000 on Scientific Computing, Applications Mathematics and Simulation}, year = {2000}, month = {2000-08}, address = {Lausanne, Switzerland}, keywords = {netsolve}, author = {Dorian Arnold and Susan Blackford and Jack Dongarra and Victor Eijkhout and Tinghua Xu} } @techreport {icl:70, title = {On the Existence Problem of Incomplete Factorisation Methods}, journal = {University of Tennessee Computer Science Department Technical Report}, number = {UT-CS-99-435}, year = {1999}, month = {1999-12}, author = {Victor Eijkhout} } @article {icl:66, title = {Numerical Linear Algebra}, journal = {Encyclopedia of Computer Science and Technology, eds. Kent, A., Williams, J.}, volume = {41}, year = {1999}, month = {1999-08}, pages = {207-233}, author = {Jack Dongarra and Victor Eijkhout}, editor = {Marcel Dekker} } @article {icl:65, title = {Numerical Linear Algebra Algorithms and Software}, journal = {Journal of Computational and Applied Mathematics}, volume = {123}, number = {1-2}, year = {1999}, month = {1999-10}, pages = {489-514}, author = {Jack Dongarra and Victor Eijkhout} } @techreport {icl:71, title = {The {\textquoteright}Weighted Modification{\textquoteright} Incomplete Factorisation Method}, journal = {University of Tennessee Computer Science Department Technical Report}, number = {UT-CS-99-436}, year = {1999}, month = {1999-12}, author = {Victor Eijkhout} }