@article {851, title = {Computing Low-rank Approximation of a Dense Matrix on Multicore CPUs with a GPU and its Application to Solving a Hierarchically Semiseparable Linear System of Equations}, journal = {Scientific Programming}, year = {2015}, abstract = {Low-rank matrices arise in many scientific and engineering computation. Both computational and storage costs of manipulating such matrices may be reduced by taking advantages of their low-rank properties. To compute a low-rank approximation of a dense matrix, in this paper, we study the performance of QR factorization with column pivoting or with restricted pivoting on multicore CPUs with a GPU. We first propose several techniques to reduce the postprocessing time, which is required for restricted pivoting, on a modern CPU. We then examine the potential of using a GPU to accelerate the factorization process with both column and restricted pivoting. Our performance results on two eight-core Intel Sandy Bridge CPUs with one NVIDIA Kepler GPU demonstrate that using the GPU, the factorization time can be reduced by a factor of more than two. In addition, to study the performance of our implementations in practice, we integrate them into a recently-developed software StruMF which algebraically exploits such low-rank structures for solving a general sparse linear system of equations. Our performance results for solving Poisson{\textquoteright}s equations demonstrate that the proposed techniques can significantly reduce the preconditioner construction time of StruMF on the CPUs, and the construction time can be further reduced by 10\%-50\% using the GPU.}, author = {Ichitaro Yamazaki and Stanimire Tomov and Jack Dongarra} }