@conference {977, title = {Towards Achieving Performance Portability Using Directives for Accelerators}, booktitle = {The International Conference for High Performance Computing, Networking, Storage and Analysis (SC{\textquoteright}16), Third Workshop on Accelerator Programming Using Directives (WACCPD)}, year = {2016}, month = {2016-11}, publisher = {Innovative Computing Laboratory, University of Tennessee}, organization = {Innovative Computing Laboratory, University of Tennessee}, address = {Salt Lake City, Utah}, abstract = {In this paper we explore the performance portability of directives provided by OpenMP 4 and OpenACC to program various types of node architectures with attached accelerators, both self-hosted multicore and offload multicore/GPU. Our goal is to examine how successful OpenACC and the newer of- fload features of OpenMP 4.5 are for moving codes between architectures, how much tuning might be required and what lessons we can learn from this experience. To do this, we use examples of algorithms with varying computational intensities for our evaluation, as both compute and data access efficiency are important considerations for overall application performance. We implement these kernels using various methods provided by newer OpenACC and OpenMP implementations, and we evaluate their performance on various platforms including both X86 64 with attached NVIDIA GPUs, self-hosted Intel Xeon Phi KNL, as well as an X86 64 host system with Intel Xeon Phi coprocessors. In this paper, we explain what factors affected the performance portability such as how to pick the right programming model, its programming style, its availability on different platforms, and how well compilers can optimize and target to multiple platforms.}, author = {M. Graham Lopez and Larrea, V and Joubert, W and Hernandez, O and Azzam Haidar and Stanimire Tomov and Jack Dongarra} }