@article {icl:572, title = {A Class of Parallel Tiled Linear Algebra Algorithms for Multicore Architectures}, journal = {Parallel Computing (to appear)}, year = {2010}, month = {2010-00}, author = {Alfredo Buttari and Julien Langou and Jakub Kurzak and Jack Dongarra} } @article {, title = {Accelerating Scientific Computations with Mixed Precision Algorithms}, journal = {Computer Physics Communications}, volume = {180}, year = {2009}, month = {2009-12}, pages = {2526-2533}, abstract = {On modern architectures, the performance of 32-bit operations is often at least twice as fast as the performance of 64-bit operations. By using a combination of 32-bit and 64-bit floating point arithmetic, the performance of many dense and sparse linear algebra algorithms can be significantly enhanced while maintaining the 64-bit accuracy of the resulting solution. The approach presented here can apply not only to conventional processors but also to other technologies such as Field Programmable Gate Arrays (FPGA), Graphical Processing Units (GPU), and the STI Cell BE processor. Results on modern processor architectures and the STI Cell BE are presented.}, doi = {https://doi.org/10.1016/j.cpc.2008.11.005}, author = {Marc Baboulin and Alfredo Buttari and Jack Dongarra and Jakub Kurzak and Julie Langou and Julien Langou and Piotr Luszczek and Stanimire Tomov} } @article {icl:509, title = {A Class of Parallel Tiled Linear Algebra Algorithms for Multicore Architectures}, journal = {Parallel Computing}, volume = {35}, year = {2009}, month = {2009-00}, pages = {38-53}, keywords = {plasma}, author = {Alfredo Buttari and Julien Langou and Jakub Kurzak and Jack Dongarra} } @article {icl:505, title = {Parallel Dense Linear Algebra Software in the Multicore Era}, journal = {in Cyberinfrastructure Technologies and Applications}, year = {2009}, month = {2009-00}, pages = {9-24}, publisher = {Nova Science Publishers, Inc.}, keywords = {plasma}, author = {Alfredo Buttari and Jack Dongarra and Jakub Kurzak and Julien Langou}, editor = {Junwei Cao} } @article {icl:449, title = {Exploiting Mixed Precision Floating Point Hardware in Scientific Computations}, journal = {in High Performance Computing and Grids in Action}, year = {2008}, month = {2008-01}, publisher = {IOS Press}, address = {Amsterdam}, author = {Alfredo Buttari and Jack Dongarra and Jakub Kurzak and Julien Langou and Julien Langou and Piotr Luszczek and Stanimire Tomov}, editor = {Lucio Grandinetti} } @article {icl:446, title = {Parallel Tiled QR Factorization for Multicore Architectures}, journal = {Concurrency and Computation: Practice and Experience}, volume = {20}, year = {2008}, month = {2008-01}, pages = {1573-1590}, author = {Alfredo Buttari and Julien Langou and Jakub Kurzak and Jack Dongarra} } @article {icl:444, title = {The PlayStation 3 for High Performance Scientific Computing}, journal = {Computing in Science and Engineering}, year = {2008}, month = {2008-01}, pages = {80-83}, author = {Jakub Kurzak and Alfredo Buttari and Piotr Luszczek and Jack Dongarra} } @techreport {icl:406, title = {The PlayStation 3 for High Performance Scientific Computing}, journal = {University of Tennessee Computer Science Technical Report}, number = {UT-CS-08-608}, year = {2008}, month = {2008-01}, author = {Jakub Kurzak and Alfredo Buttari and Piotr Luszczek and Jack Dongarra} } @article {icl:445, title = {Solving Systems of Linear Equations on the CELL Processor Using Cholesky Factorization}, journal = {IEEE Transactions on Parallel and Distributed Systems}, volume = {19}, number = {9}, year = {2008}, month = {2008-01}, pages = {1-11}, author = {Jakub Kurzak and Alfredo Buttari and Jack Dongarra} } @article {icl:424, title = {Using Mixed Precision for Sparse Matrix Computations to Enhance the Performance while Achieving 64-bit Accuracy}, journal = {ACM Transactions on Mathematical Software}, volume = {34}, number = {4}, year = {2008}, month = {2008-00}, pages = {17-22}, keywords = {plasma}, author = {Alfredo Buttari and Jack Dongarra and Jakub Kurzak and Piotr Luszczek and Stanimire Tomov} } @techreport {icl:375, title = {A Class of Parallel Tiled Linear Algebra Algorithms for Multicore Architectures}, journal = {University of Tennessee Computer Science Technical Report}, number = {UT-CS-07-600 (also LAPACK Working Note 191)}, year = {2007}, month = {2007-01}, keywords = {plasma}, author = {Alfredo Buttari and Julien Langou and Jakub Kurzak and Jack Dongarra} } @article {icl:392, title = {Exploiting Mixed Precision Floating Point Hardware in Scientific Computations}, journal = {In High Performance Computing and Grids in Action (to appear)}, year = {2007}, month = {2007-00}, publisher = {IOS Press}, address = {Amsterdam}, author = {Alfredo Buttari and Jack Dongarra and Jakub Kurzak and Julien Langou and Julie Langou and Piotr Luszczek and Stanimire Tomov}, editor = {Lucio Grandinetti} } @techreport {icl:404, title = {Limitations of the Playstation 3 for High Performance Cluster Computing}, journal = {University of Tennessee Computer Science Technical Report, UT-CS-07-597 (Also LAPACK Working Note 185)}, year = {2007}, month = {2007-00}, author = {Alfredo Buttari and Jack Dongarra and Jakub Kurzak} } @article {icl:395, title = {Mixed Precision Iterative Refinement Techniques for the Solution of Dense Linear Systems}, journal = {International Journal of High Performance Computer Applications (to appear)}, year = {2007}, month = {2007-08}, author = {Alfredo Buttari and Jack Dongarra and Julien Langou and Julie Langou and Piotr Luszczek and Jakub Kurzak} } @inproceedings {icl:388, title = {Multithreading for synchronization tolerance in matrix factorization}, journal = {Journal of Physics: Conference Series, SciDAC 2007}, volume = {78}, number = {2007}, year = {2007}, month = {2007-01}, author = {Alfredo Buttari and Jack Dongarra and Parry Husbands and Jakub Kurzak and Katherine Yelick} } @techreport {icl:363, title = {Parallel Tiled QR Factorization for Multicore Architectures}, journal = {University of Tennessee Computer Science Dept. Technical Report, UT-CS-07-598 (also LAPACK Working Note 190)}, year = {2007}, month = {2007-00}, keywords = {plasma}, author = {Alfredo Buttari and Julien Langou and Jakub Kurzak and Jack Dongarra} } @techreport {icl:364, title = {SCOP3: A Rough Guide to Scientific Computing On the PlayStation 3}, journal = {University of Tennessee Computer Science Dept. Technical Report, UT-CS-07-595}, year = {2007}, month = {2007-00}, keywords = {multi-core}, author = {Alfredo Buttari and Piotr Luszczek and Jakub Kurzak and Jack Dongarra and George Bosilca} } @techreport {icl:341, title = {Solving Systems of Linear Equations on the CELL Processor Using Cholesky Factorization}, journal = {UT Computer Science Technical Report (Also LAPACK Working Note 184)}, number = {UT-CS-07-596}, year = {2007}, month = {2007-01}, keywords = {lapack}, author = {Jakub Kurzak and Alfredo Buttari and Jack Dongarra} } @article {icl:317, title = {Exploiting the Performance of 32 bit Floating Point Arithmetic in Obtaining 64 bit Accuracy}, journal = {University of Tennessee Computer Science Tech Report}, number = {UT-CS-06-574, LAPACK Working Note $\#$175}, year = {2006}, month = {2006-04}, keywords = {iter-ref}, author = {Julien Langou and Julien Langou and Piotr Luszczek and Jakub Kurzak and Alfredo Buttari and Jack Dongarra} } @article {icl:369, title = {The Impact of Multicore on Math Software}, journal = {PARA 2006}, year = {2006}, month = {2006-06}, address = {Umea, Sweden}, keywords = {plasma}, author = {Alfredo Buttari and Jack Dongarra and Jakub Kurzak and Julien Langou and Piotr Luszczek and Stanimire Tomov} } @article {icl:370, title = {Prospectus for the Next LAPACK and ScaLAPACK Libraries}, journal = {PARA 2006}, year = {2006}, month = {2006-06}, address = {Umea, Sweden}, author = {James Demmel and Jack Dongarra and B. Parlett and William Kahan and Ming Gu and David Bindel and Yozo Hida and Xiaoye Li and Osni Marques and Jason E. Riedy and Christof Voemel and Julien Langou and Piotr Luszczek and Jakub Kurzak and Alfredo Buttari and Julien Langou and Stanimire Tomov} } @techreport {icl:252, title = {Performance Optimization and Modeling of Blocked Sparse Kernels}, journal = {ICL Technical Report}, number = {ICL-UT-04-05}, year = {2004}, month = {2004-00}, keywords = {sans}, author = {Alfredo Buttari and Victor Eijkhout and Julien Langou and Salvatore Filippone} }