@conference {icl:692, title = {Virtual Systolic Array for QR Decomposition}, booktitle = {15th Workshop on Advances in Parallel and Distributed Computational Models, IEEE International Parallel \& Distributed Processing Symposium (IPDPS 2013)}, year = {2013}, month = {2013-05}, publisher = {IEEE}, organization = {IEEE}, address = {Boston, MA}, abstract = {Systolic arrays offer a very attractive, data-centric, execution model as an alternative to the von Neumann architecture. Hardware implementations of systolic arrays turned out not to be viable solutions in the past. This article shows how the systolic design principles can be applied to a software solution to deliver an algorithm with unprecedented strong scaling capabilities. Systolic array for the QR decomposition is developed and a virtualization layer is used for mapping of the algorithm to a large distributed memory system. Strong scaling properties are discovered, superior to existing solutions.}, keywords = {dataflow programming, message passing, multi-core, QR decomposition, roofline model, systolic array}, doi = {10.1109/IPDPS.2013.119}, author = {Jakub Kurzak and Piotr Luszczek and Mark Gates and Ichitaro Yamazaki and Jack Dongarra} }