@article {, title = {Ginkgo: A High Performance Numerical Linear Algebra Library}, journal = {Journal of Open Source Software}, volume = {5}, year = {2020}, month = {2020-08}, abstract = {Ginkgo is a production-ready sparse linear algebra library for high performance computing on GPU-centric architectures with a high level of performance portability and focuses on software sustainability. The library focuses on solving sparse linear systems and accommodates a large variety of matrix formats, state-of-the-art iterative (Krylov) solvers and preconditioners, which make the library suitable for a variety of scientific applications. Ginkgo supports many architectures such as multi-threaded CPU, NVIDIA GPUs, and AMD GPUs. The heavy use of modern C++ features simplifies the addition of new executor paradigms and algorithmic functionality without introducing significant performance overhead. Solving linear systems is usually one of the most computationally and memory intensive aspects of any application. Hence there has been a significant amount of effort in this direction with software libraries such as UMFPACK (Davis, 2004) and CHOLMOD (Chen, Davis, Hager, \& Rajamanickam, 2008) for solving linear systems with direct methods and PETSc (Balay et al., 2020), Trilinos ({\textquotedblleft}The Trilinos Project Website,{\textquotedblright} 2020), Eigen (Guennebaud, Jacob, \& others, 2010) and many more to solve linear systems with iterative methods. With Ginkgo, we aim to ensure high performance while not compromising portability. Hence, we provide very efficient low level kernels optimized for different architectures and separate these kernels from the algorithms thereby ensuring extensibility and ease of use. Ginkgo is also a part of the xSDK effort (Bartlett et al., 2017) and available as a Spack (Gamblin et al., 2015) package. xSDK aims to provide infrastructure for and interoperability between a collection of related and complementary software elements to foster rapid and efficient development of scientific applications using High Performance Computing. Within this effort, we provide interoperability with application libraries such as deal.ii (Arndt et al., 2019) and mfem (Anderson et al., 2020). Ginkgo provides wrappers within these two libraries so that they can take advantage of the features of Ginkgo.}, doi = {https://doi.org/10.21105/joss.02260}, author = {Hartwig Anzt and Terry Cojean and Yen-Chen Chen and Fritz Goebel and Thomas Gruetzmacher and Pratik Nayak and Tobias Ribizel and Yu-Hsiang Tsai} } @article {, title = {Ginkgo: A Node-Level Sparse Linear Algebra Library for HPC (Poster)}, year = {2020}, month = {2020-02}, publisher = {2020 Exascale Computing Project Annual Meeting}, address = {Houston, TX}, author = {Hartwig Anzt and Terry Cojean and Yen-Chen Chen and Fritz Goebel and Thomas Gruetzmacher and Pratik Nayak and Tobias Ribizel and Yu-Hsiang Tsai and Jack Dongarra} } @techreport {, title = {A Survey of Numerical Methods Utilizing Mixed Precision Arithmetic}, journal = {SLATE Working Notes}, number = {15, ICL-UT-20-08}, year = {2020}, month = {2020-07}, publisher = {University of Tennessee}, type = {SLATE Working Notes}, author = {Ahmad Abdelfattah and Hartwig Anzt and Erik Boman and Erin Carson and Terry Cojean and Jack Dongarra and Mark Gates and Thomas Gruetzmacher and Nicholas J. Higham and Sherry Li and Neil Lindquist and Yang Liu and Jennifer Loe and Piotr Luszczek and Pratik Nayak and Sri Pranesh and Siva Rajamanickam and Tobias Ribizel and Barry Smith and Kasia Swirydowicz and Stephen Thomas and Stanimire Tomov and Yaohung Tsai and Ichitaro Yamazaki and Urike Meier Yang} } @article {1369, title = {A Customized Precision Format Based on Mantissa Segmentation for Accelerating Sparse Linear Algebra}, journal = {Concurrency and Computation: Practice and Experience}, volume = {40319}, year = {2019}, month = {2019-01}, issn = {1532-0626}, doi = {https://doi.org/10.1002/cpe.5418}, author = {Thomas Gruetzmacher and Terry Cojean and Goran Flegar and Fritz G{\"o}bel and Hartwig Anzt} } @article {1317, title = {Toward a Modular Precision Ecosystem for High-Performance Computing}, journal = {The International Journal of High Performance Computing Applications}, volume = {33}, year = {2019}, month = {2019-11}, pages = {1069-1078}, abstract = {With the memory bandwidth of current computer architectures being significantly slower than the (floating point) arithmetic performance, many scientific computations only leverage a fraction of the computational power in today{\textquoteright}s high-performance architectures. At the same time, memory operations are the primary energy consumer of modern architectures, heavily impacting the resource cost of large-scale applications and the battery life of mobile devices. This article tackles this mismatch between floating point arithmetic throughput and memory bandwidth by advocating a disruptive paradigm change with respect to how data are stored and processed in scientific applications. Concretely, the goal is to radically decouple the data storage format from the processing format and, ultimately, design a {\textquotedblleft}modular precision ecosystem{\textquotedblright} that allows for more flexibility in terms of customized data access. For memory-bounded scientific applications, dynamically adapting the memory precision to the numerical requirements allows for attractive resource savings. In this article, we demonstrate the potential of employing a modular precision ecosystem for the block-Jacobi preconditioner and the PageRank algorithm{\textemdash}two applications that are popular in the communities and at the same characteristic representatives for the field of numerical linear algebra and data analytics, respectively.}, keywords = {conjugate gradient, GPUs, Jacobi method, Modular precision, multicore processors, PageRank, parallel numerical linear algebra}, issn = {1094-3420}, doi = {https://doi.org/10.1177/1094342019846547}, author = {Hartwig Anzt and Goran Flegar and Thomas Gruetzmacher and Enrique S. Quintana-Orti} } @conference {1235, title = {High-Performance GPU Implementation of PageRank with Reduced Precision based on Mantissa Segmentation}, booktitle = {8th Workshop on Irregular Applications: Architectures and Algorithms}, year = {2018}, author = {Anzt, Hartwig and Thomas Gruetzmacher and Enrique S. Quintana-Orti and Scheidegger, Florian} } @conference {1234, title = {Variable-Size Batched Condition Number Calculation on GPUs}, booktitle = {SBAC-PAD}, year = {2018}, month = {2018-09}, address = {Lyon, France}, url = {https://ieeexplore.ieee.org/document/8645907}, author = {Hartwig Anzt and Jack Dongarra and Goran Flegar and Thomas Gruetzmacher} }