@conference {, title = {Flexible Batched Sparse Matrix-Vector Product on GPUs}, booktitle = {8th Workshop on Latest Advances in Scalable Algorithms for Large-Scale Systems (ScalA {\textquoteright}17)}, year = {2017}, month = {2017-11}, publisher = {ACM Press}, organization = {ACM Press}, address = {Denver, CO}, abstract = { We propose a variety of batched routines for concurrently processing a large collection of small-size, independent sparse matrix-vector products (SpMV) on graphics processing units (GPUs). These batched SpMV kernels are designed to be flexible in order to handle a batch of matrices which differ in size, nonzero count, and nonzero distribution. Furthermore, they support three most commonly used sparse storage formats: CSR, COO and ELL. Our experimental results on a state-of-the-art GPU reveal performance improvements of up to 25X compared to non-batched SpMV routines.}, doi = {http://dx.doi.org/10.1145/3148226.3148230}, author = {Hartwig Anzt and Gary Collins and Jack Dongarra and Goran Flegar and Enrique S. Quintana-Orti} }