@conference {1395, title = {Give MPI Threading a Fair Chance: A Study of Multithreaded MPI Designs}, booktitle = {IEEE Cluster}, year = {2019}, month = {2019-09}, publisher = {IEEE}, organization = {IEEE}, address = {Albuquerque, NM}, abstract = {The Message Passing Interface (MPI) has been one of the most prominent programming paradigms in highperformance computing (HPC) for the past decade. Lately, with changes in modern hardware leading to a drastic increase in the number of processor cores, developers of parallel applications are moving toward more integrated parallel programming paradigms, where MPI is used along with other, possibly node-level, programming paradigms, or MPI+X. MPI+threads emerged as one of the favorite choices in HPC community, according to a survey of the HPC community. However, threading support in MPI comes with many compromises to the overall performance delivered, and, therefore, its adoption is compromised. This paper studies in depth the MPI multi-threaded implementation design in one of the leading MPI implementations, Open MPI, and expose some of the shortcomings of the current design. We propose, implement, and evaluate a new design of the internal handling of communication progress which allows for a significant boost in multi-threading performance, increasing the viability of MPI in the MPI+X programming paradigm.}, keywords = {communication contention, MPI, thread}, author = {Thananon Patinyasakdikul and David Eberius and George Bosilca and Nathan Hjelm} } @conference {1127, title = {Using Software-Based Performance Counters to Expose Low-Level Open MPI Performance Information}, booktitle = {EuroMPI}, year = {2017}, month = {2017-09}, publisher = {ACM}, organization = {ACM}, address = {Chicago, IL}, abstract = {This paper details the implementation and usage of software-based performance counters to understand the performance of a particular implementation of the MPI standard, Open MPI. Such counters can expose intrinsic features of the software stack that are not available otherwise in a generic and portable way. The PMPI-interface is useful for instrumenting MPI applications at a user level, however it is insufficient for providing meaningful internal MPI performance details. While the Peruse interface provides more detailed information on state changes within Open MPI, it has not seen widespread adoption. We introduce a simple low-level approach that instruments the Open MPI code at key locations to provide fine-grained MPI performance metrics. We evaluate the overhead associated with adding these counters to Open MPI as well as their use in determining bottlenecks and areas for improvement both in user code and the MPI implementation itself.}, keywords = {MPI, Performance Counters, Profiling, Tools}, isbn = {978-1-4503-4849-2/17/09}, doi = {https://doi.org/10.1145/3127024.3127039}, url = {https://dl.acm.org/citation.cfm?id=3127024}, author = {David Eberius and Thananon Patinyasakdikul and George Bosilca} }