@article {1211, title = {Big Data and Extreme-Scale Computing: Pathways to Convergence - Toward a Shaping Strategy for a Future Software and Data Ecosystem for Scientific Inquiry}, journal = {The International Journal of High Performance Computing Applications}, volume = {32}, year = {2018}, month = {2018-07}, pages = {435{\textendash}479}, abstract = {Over the past four years, the Big Data and Exascale Computing (BDEC) project organized a series of five international workshops that aimed to explore the ways in which the new forms of data-centric discovery introduced by the ongoing revolution in high-end data analysis (HDA) might be integrated with the established, simulation-centric paradigm of the high-performance computing (HPC) community. Based on those meetings, we argue that the rapid proliferation of digital data generators, the unprecedented growth in the volume and diversity of the data they generate, and the intense evolution of the methods for analyzing and using that data are radically reshaping the landscape of scientific computing. The most critical problems involve the logistics of wide-area, multistage workflows that will move back and forth across the computing continuum, between the multitude of distributed sensors, instruments and other devices at the networks edge, and the centralized resources of commercial clouds and HPC centers. We suggest that the prospects for the future integration of technological infrastructures and research ecosystems need to be considered at three different levels. First, we discuss the convergence of research applications and workflows that establish a research paradigm that combines both HPC and HDA, where ongoing progress is already motivating efforts at the other two levels. Second, we offer an account of some of the problems involved with creating a converged infrastructure for peripheral environments, that is, a shared infrastructure that can be deployed throughout the network in a scalable manner to meet the highly diverse requirements for processing, communication, and buffering/storage of massive data workflows of many different scientific domains. Third, we focus on some opportunities for software ecosystem convergence in big, logically centralized facilities that execute large-scale simulations and models and/or perform large-scale data analytics. We close by offering some conclusions and recommendations for future investment and policy review.}, doi = {https://doi.org/10.1177/1094342018778123}, author = {Mark Asch and Terry Moore and Rosa M. Badia and Micah Beck and Pete Beckman and Thierry Bidot and Fran{\c c}ois Bodin and Franck Cappello and Alok Choudhary and Bronis R. de Supinski and Ewa Deelman and Jack Dongarra and Anshu Dubey and Geoffrey Fox and Haohuan Fu and Sergi Girona and Michael Heroux and Yutaka Ishikawa and Kate Keahey and David Keyes and William T. Kramer and Jean-Fran{\c c}ois Lavignon and Yutong Lu and Satoshi Matsuoka and Bernd Mohr and St{\'e}phane Requena and Joel Saltz and Thomas Schulthess and Rick Stevens and Martin Swany and Alexander Szalay and William Tang and Ga{\"e}l Varoquaux and Jean-Pierre Vilotte and Robert W. Wisniewski and Zhiwei Xu and Igor Zacharov} } @article {icl:643, title = {The International Exascale Software Project Roadmap}, journal = {International Journal of High Performance Computing}, volume = {25}, number = {1}, year = {2011}, month = {2011-01}, pages = {3-60}, abstract = {Over the last 20 years, the open-source community has provided more and more software on which the world{\textquoteright}s high-performance computing systems depend for performance and productivity. The community has invested millions of dollars and years of effort to build key components. However, although the investments in these separate software elements have been tremendously valuable, a great deal of productivity has also been lost because of the lack of planning, coordination, and key integration of technologies necessary to make them work together smoothly and efficiently, both within individual petascale systems and between different systems. It seems clear that this completely uncoordinated development model will not provide the software needed to support the unprecedented parallelism required for peta/ exascale computation on millions of cores, or the flexibility required to exploit new hardware models and features, such as transactional memory, speculative execution, and graphics processing units. This report describes the work of the community to prepare for the challenges of exascale computing, ultimately combing their efforts in a coordinated International Exascale Software Project.}, doi = {https://doi.org/10.1177/1094342010391989}, author = {Jack Dongarra and Pete Beckman and Terry Moore and Patrick Aerts and Giovanni Aloisio and Jean-Claude Andre and David Barkai and Jean-Yves Berthou and Taisuke Boku and Bertrand Braunschweig and Franck Cappello and Barbara Chapman and Xuebin Chi and Alok Choudhary and Sudip Dosanjh and Thom Dunning and Sandro Fiore and Al Geist and Bill Gropp and Robert Harrison and Mark Hereld and Michael Heroux and Adolfy Hoisie and Koh Hotta and Zhong Jin and Yutaka Ishikawa and Fred Johnson and Sanjay Kale and Richard Kenway and David Keyes and Bill Kramer and Jesus Labarta and Alain Lichnewsky and Thomas Lippert and Bob Lucas and Barney MacCabe and Satoshi Matsuoka and Paul Messina and Peter Michielse and Bernd Mohr and Matthias S. Mueller and Wolfgang E. Nagel and Hiroshi Nakashima and Michael E. Papka and Dan Reed and Mitsuhisa Sato and Ed Seidel and John Shalf and David Skinner and Marc Snir and Thomas Sterling and Rick Stevens and Fred Streitz and Bob Sugar and Shinji Sumimoto and William Tang and John Taylor and Rajeev Thakur and Anne Trefethen and Mateo Valero and Aad van der Steen and Jeffrey Vetter and Peg Williams and Robert Wisniewski and Kathy Yelick} } @article {icl:417, title = {Performance Instrumentation and Compiler Optimizations for MPI/OpenMP Applications}, journal = {Lecture Notes in Computer Science, OpenMP Shared Memory Parallel Programming}, volume = {4315}, year = {2008}, month = {2008-00}, publisher = {Springer Berlin / Heidelberg}, author = {Oscar Hernandez and Fengguang Song and Barbara Chapman and Jack Dongarra and Bernd Mohr and Shirley Moore and Felix Wolf} } @inproceedings {icl:412, title = {Usage of the Scalasca Toolset for Scalable Performance Analysis of Large-scale Parallel Applications}, journal = {Proceedings of the 2nd International Workshop on Tools for High Performance Computing}, year = {2008}, month = {2008-01}, pages = {157-167}, publisher = {Springer}, address = {Stuttgart, Germany}, keywords = {point}, author = {Felix Wolf and Brian Wylie and Erika Abraham and Wolfgang Frings and Karl F{\"u}rlinger and Markus Geimer and Marc-Andre Hermanns and Bernd Mohr and Shirley Moore and Matthias Pfeifer}, editor = {Michael Resch and Rainer Keller and Valentin Himmler and Bettina Krammer and A Schulz} } @article {icl:400, title = {Automatic Analysis of Inefficiency Patterns in Parallel Applications}, journal = {Concurrency and Computation: Practice and Experience}, volume = {19}, number = {11}, year = {2007}, month = {2007-08}, pages = {1481-1496}, author = {Felix Wolf and Bernd Mohr and Jack Dongarra and Shirley Moore} } @inproceedings {icl:309, title = {Large Event Traces in Parallel Performance Analysis}, journal = {8th Workshop {\textquoteright}Parallel Systems and Algorithms{\textquoteright} (PASA), Lecture Notes in Informatics}, number = {ICL-UT-06-08}, year = {2006}, month = {2006-03}, publisher = {Gesellschaft f{\"u}r Informatik}, address = {Frankfurt/Main, Germany}, keywords = {kojak}, author = {Felix Wolf and Felix Freitag and Bernd Mohr and Shirley Moore and Brian Wylie} } @inproceedings {icl:319, title = {Performance Instrumentation and Compiler Optimizations for MPI/OpenMP Applications}, journal = {Second International Workshop on OpenMP}, year = {2006}, month = {2006-01}, address = {Reims, France}, keywords = {kojak}, author = {Oscar Hernandez and Fengguang Song and Barbara Chapman and Jack Dongarra and Bernd Mohr and Shirley Moore and Felix Wolf} } @article {icl:271, title = {Automatic analysis of inefficiency patterns in parallel applications}, journal = {Concurrency and Computation: Practice and Experience, Special issue "Automatic Performance Analysis" (submitted)}, year = {2005}, month = {2005-00}, keywords = {kojak}, author = {Felix Wolf and Bernd Mohr and Jack Dongarra and Shirley Moore} } @inproceedings {icl:288, title = {Automatic Experimental Analysis of Communication Patterns in Virtual Topologies}, journal = {In Proceedings of the International Conference on Parallel Processing}, year = {2005}, month = {2005-06}, publisher = {IEEE Computer Society}, address = {Oslo, Norway}, keywords = {kojak}, author = {Nikhil Bhatia and Fengguang Song and Felix Wolf and Jack Dongarra and Bernd Mohr and Shirley Moore} } @inproceedings {icl:272, title = {Event-based Measurement and Analysis of One-sided Communication}, journal = {In Proceedings of the European Conference on Parallel Computing (Euro-Par)}, year = {2005}, month = {2005-08}, publisher = {Springer}, address = {Lisbon, Portugal}, keywords = {kojak}, author = {Marc-Andre Hermanns and Bernd Mohr and Felix Wolf} } @inproceedings {icl:248, title = {Improving Time to Solution with Automated Performance Analysis}, journal = {Second Workshop on Productivity and Performance in High-End Computing (P-PHEC) at 11th International Symposium on High Performance Computer Architecture (HPCA-2005)}, year = {2005}, month = {2005-02}, address = {San Francisco}, keywords = {kojak}, author = {Shirley Moore and Felix Wolf and Jack Dongarra and Bernd Mohr} } @inproceedings {icl:274, title = {A Pattern-Based Approach to Automated Application Performance Analysis}, journal = {Workshop on Patterns in High Performance Computing}, year = {2005}, month = {2005-05}, address = {University of Illinois at Urbana-Champaign}, keywords = {kojak}, author = {Nikhil Bhatia and Shirley Moore and Felix Wolf and Jack Dongarra and Bernd Mohr} } @inproceedings {icl:295, title = {Performance Analysis of One-sided Communication Mechanisms}, journal = {Mini-Symposium "Tools Support for Parallel Programming", Proceedings of Parallel Computing (ParCo)}, number = {ICL-UT-06-07}, year = {2005}, month = {2005-09}, address = {Malaga, Spain}, keywords = {kojak}, author = {Bernd Mohr and Andrej K{\"u}hnal and Marc-Andre Hermanns and Felix Wolf} } @inproceedings {icl:270, title = {A Scalable Approach to MPI Application Performance Analysis}, journal = {In Proc. of the 12th European Parallel Virtual Machine and Message Passing Interface Conference}, year = {2005}, month = {2005-09}, publisher = {Springer LNCS}, keywords = {kojak}, author = {Shirley Moore and Felix Wolf and Jack Dongarra and Sameer Shende and Allen D. Malony and Bernd Mohr} } @inproceedings {icl:232, title = {Efficient Pattern Search in Large Traces through Successive Refinement}, journal = {Proceedings of Euro-Par 2004}, year = {2004}, month = {2004-08}, publisher = {Springer-Verlag}, address = {Pisa, Italy}, keywords = {kojak}, author = {Felix Wolf and Bernd Mohr and Jack Dongarra and Shirley Moore} } @article {icl:195, title = {Automatic performance analysis of hybrid MPI/OpenMP applications}, journal = {Journal of Systems Architecture, Special Issue {\textquoteright}Evolutions in parallel distributed and network-based processing{\textquoteright}}, volume = {49(10-11)}, year = {2003}, month = {2003-11}, pages = {421-439}, publisher = {Elsevier}, keywords = {kojak}, author = {Felix Wolf and Bernd Mohr}, editor = {Andrea Clematis and Daniele D{\textquoteright}Agostino} } @article {icl:192, title = {Hardware-Counter Based Automatic Performance Analysis of Parallel Programs}, journal = {Advances in Parallel Computing}, volume = {13}, year = {2003}, month = {2004-01}, pages = {753-760}, publisher = {Elsevier}, address = {Dresden, Germany}, abstract = {The KOJAK performance-analysis environment identifies a large number of performance problems on parallel computers with SMP nodes. The current version concentrates on parallelism-related performance problems that arise from an inefficient usage of the parallel programming interfaces MPI and OpenMP, while ignoring individual CPU performance. This chapter describes an extended design of KOJAK capable of diagnosing low individual-CPU performance based on hardware-counter information and of integrating the results with those of the parallelism-centered analysis. The performance of parallel applications is determined by a variety of different factors. Performance of single components frequently influences the overall behavior in unexpected ways. Application programmers on current parallel machines have to deal with numerous performance-critical aspects: different modes of parallel execution, such as message passing, multi-threading or even a combination of the two, and performance on individual CPU that is determined by the interaction of different functional units. The KOJAK analysis process is composed of two parts: a semi-automatic instrumentation of the user application followed by an automatic analysis of the generated performance data. KOJAK{\textquoteright}s instrumentation software runs on most major UNlX platforms and works on multiple levels, including source-code, compiler, and linker.}, keywords = {kojak, papi}, doi = {https://doi.org/10.1016/S0927-5452(04)80092-3}, author = {Felix Wolf and Bernd Mohr} } @inproceedings {icl:193, title = {KOJAK - A Tool Set for Automatic Performance Analysis of Parallel Applications}, journal = {Proc. of the European Conference on Parallel Computing (EuroPar)}, volume = {2790}, year = {2003}, month = {2003-08}, pages = {1301-1304}, publisher = {Springer-Verlag}, address = {Klagenfurt, Austria}, keywords = {kojak}, author = {Bernd Mohr and Felix Wolf} }