@conference {icl:633, title = {Parallel Performance Measurement of Heterogeneous Parallel Systems with GPUs}, booktitle = {International Conference on Parallel Processing (ICPP{\textquoteright}11)}, year = {2011}, month = {2011-09}, publisher = {ACM}, organization = {ACM}, address = {Taipei, Taiwan}, abstract = {The power of GPUs is giving rise to heterogeneous parallel computing, with new demands on programming environments, runtime systems, and tools to deliver high-performing applications. This paper studies the problems associated with performance measurement of heterogeneous machines with GPUs. A heterogeneous computation model and alternative host-GPU measurement approaches are discussed to set the stage for reporting new capabilities for heterogeneous parallel performance measurement in three leading HPC tools: PAPI, Vampir, and the TAU Performance System. Our work leverages the new CUPTI tool support in NVIDIA{\textquoteright}s CUDA device library. Heterogeneous benchmarks from the SHOC suite are used to demonstrate the measurement methods and tool support.}, keywords = {magma, mumi, papi}, isbn = {978-0-7695-4510-3}, doi = {10.1109/ICPP.2011.71}, author = {Allen D. Malony and Scott Biersdorff and Sameer Shende and Heike Jagode and Stanimire Tomov and Guido Juckeland and Robert Dietrich and Duncan Poole and Christopher Lamb} }