@article {icl:417, title = {Performance Instrumentation and Compiler Optimizations for MPI/OpenMP Applications}, journal = {Lecture Notes in Computer Science, OpenMP Shared Memory Parallel Programming}, volume = {4315}, year = {2008}, month = {2008-00}, publisher = {Springer Berlin / Heidelberg}, author = {Oscar Hernandez and Fengguang Song and Barbara Chapman and Jack Dongarra and Bernd Mohr and Shirley Moore and Felix Wolf} } @inproceedings {icl:412, title = {Usage of the Scalasca Toolset for Scalable Performance Analysis of Large-scale Parallel Applications}, journal = {Proceedings of the 2nd International Workshop on Tools for High Performance Computing}, year = {2008}, month = {2008-01}, pages = {157-167}, publisher = {Springer}, address = {Stuttgart, Germany}, keywords = {point}, author = {Felix Wolf and Brian Wylie and Erika Abraham and Wolfgang Frings and Karl F{\"u}rlinger and Markus Geimer and Marc-Andre Hermanns and Bernd Mohr and Shirley Moore and Matthias Pfeifer}, editor = {Michael Resch and Rainer Keller and Valentin Himmler and Bettina Krammer and A Schulz} } @article {icl:400, title = {Automatic Analysis of Inefficiency Patterns in Parallel Applications}, journal = {Concurrency and Computation: Practice and Experience}, volume = {19}, number = {11}, year = {2007}, month = {2007-08}, pages = {1481-1496}, author = {Felix Wolf and Bernd Mohr and Jack Dongarra and Shirley Moore} } @inproceedings {icl:309, title = {Large Event Traces in Parallel Performance Analysis}, journal = {8th Workshop {\textquoteright}Parallel Systems and Algorithms{\textquoteright} (PASA), Lecture Notes in Informatics}, number = {ICL-UT-06-08}, year = {2006}, month = {2006-03}, publisher = {Gesellschaft f{\"u}r Informatik}, address = {Frankfurt/Main, Germany}, keywords = {kojak}, author = {Felix Wolf and Felix Freitag and Bernd Mohr and Shirley Moore and Brian Wylie} } @inproceedings {icl:319, title = {Performance Instrumentation and Compiler Optimizations for MPI/OpenMP Applications}, journal = {Second International Workshop on OpenMP}, year = {2006}, month = {2006-01}, address = {Reims, France}, keywords = {kojak}, author = {Oscar Hernandez and Fengguang Song and Barbara Chapman and Jack Dongarra and Bernd Mohr and Shirley Moore and Felix Wolf} } @inproceedings {icl:307, title = {A Systematic Multi-step Methodology for Performance Analysis of Communication Traces of Distributed Applications based on Hierarchical Clustering}, journal = {Proc. of the 5th International Workshop on Performance Modeling, Evaluation, and Organization of Parallel and Distributed Systems (PMEO-PDS 2006)}, number = {ICL-UT-05-06}, year = {2006}, month = {2006-04}, publisher = {IEEE Computer Society}, address = {Rhodes Island, Greece}, keywords = {kojak}, author = {Gabriela Aguilera and Patricia J. Teller and Michela Taufer and Felix Wolf} } @article {icl:271, title = {Automatic analysis of inefficiency patterns in parallel applications}, journal = {Concurrency and Computation: Practice and Experience, Special issue "Automatic Performance Analysis" (submitted)}, year = {2005}, month = {2005-00}, keywords = {kojak}, author = {Felix Wolf and Bernd Mohr and Jack Dongarra and Shirley Moore} } @inproceedings {icl:288, title = {Automatic Experimental Analysis of Communication Patterns in Virtual Topologies}, journal = {In Proceedings of the International Conference on Parallel Processing}, year = {2005}, month = {2005-06}, publisher = {IEEE Computer Society}, address = {Oslo, Norway}, keywords = {kojak}, author = {Nikhil Bhatia and Fengguang Song and Felix Wolf and Jack Dongarra and Bernd Mohr and Shirley Moore} } @inproceedings {icl:272, title = {Event-based Measurement and Analysis of One-sided Communication}, journal = {In Proceedings of the European Conference on Parallel Computing (Euro-Par)}, year = {2005}, month = {2005-08}, publisher = {Springer}, address = {Lisbon, Portugal}, keywords = {kojak}, author = {Marc-Andre Hermanns and Bernd Mohr and Felix Wolf} } @inproceedings {icl:248, title = {Improving Time to Solution with Automated Performance Analysis}, journal = {Second Workshop on Productivity and Performance in High-End Computing (P-PHEC) at 11th International Symposium on High Performance Computer Architecture (HPCA-2005)}, year = {2005}, month = {2005-02}, address = {San Francisco}, keywords = {kojak}, author = {Shirley Moore and Felix Wolf and Jack Dongarra and Bernd Mohr} } @inproceedings {icl:274, title = {A Pattern-Based Approach to Automated Application Performance Analysis}, journal = {Workshop on Patterns in High Performance Computing}, year = {2005}, month = {2005-05}, address = {University of Illinois at Urbana-Champaign}, keywords = {kojak}, author = {Nikhil Bhatia and Shirley Moore and Felix Wolf and Jack Dongarra and Bernd Mohr} } @inproceedings {icl:287, title = {Performance Analysis of GYRO: A Tool Evaluation}, journal = {In Proceedings of the 2005 SciDAC Conference}, year = {2005}, month = {2005-06}, address = {San Francisco, CA}, keywords = {kojak}, author = {Patrick H. Worley and Jeff Candy and Laura Carrington and Kevin Huck and Timothy Kaiser and Kumar Mahinthakumar and Allen D. Malony and Shirley Moore and Dan Reed and Philip C. Roth and H. Shan and Sameer Shende and Allan Snavely and S. Sreepathi and Felix Wolf and Y. Zhang} } @inproceedings {icl:295, title = {Performance Analysis of One-sided Communication Mechanisms}, journal = {Mini-Symposium "Tools Support for Parallel Programming", Proceedings of Parallel Computing (ParCo)}, number = {ICL-UT-06-07}, year = {2005}, month = {2005-09}, address = {Malaga, Spain}, keywords = {kojak}, author = {Bernd Mohr and Andrej K{\"u}hnal and Marc-Andre Hermanns and Felix Wolf} } @conference {icl:298, title = {Performance Profiling and Analysis of DoD Applications using PAPI and TAU}, booktitle = {Proceedings of DoD HPCMP UGC 2005}, year = {2005}, month = {2005-06}, publisher = {IEEE}, organization = {IEEE}, address = {Nashville, TN}, keywords = {papi}, author = {Shirley Moore and David Cronk and Felix Wolf and Avi Purkayastha and Patricia J. Teller and Robert Araiza and Gabriela Aguilera and Jamie Nava} } @inproceedings {icl:269, title = {Performance Profiling Overhead Compensation for MPI Programs}, journal = {In Proc. of the 12th European Parallel Virtual Machine and Message Passing Interface Conference}, year = {2005}, month = {2005-09}, publisher = {Springer LNCS}, keywords = {kojak}, author = {Sameer Shende and Allen D. Malony and Alan Morris and Felix Wolf} } @inproceedings {icl:270, title = {A Scalable Approach to MPI Application Performance Analysis}, journal = {In Proc. of the 12th European Parallel Virtual Machine and Message Passing Interface Conference}, year = {2005}, month = {2005-09}, publisher = {Springer LNCS}, keywords = {kojak}, author = {Shirley Moore and Felix Wolf and Jack Dongarra and Sameer Shende and Allen D. Malony and Bernd Mohr} } @inproceedings {icl:268, title = {Trace-Based Parallel Performance Overhead Compensation}, journal = {In Proc. of the International Conference on High Performance Computing and Communications (HPCC)}, year = {2005}, month = {2005-09}, address = {Sorrento (Naples), Italy}, keywords = {kojak}, author = {Felix Wolf and Allen D. Malony and Sameer Shende and Alan Morris} } @inproceedings {icl:233, title = {An Algebra for Cross-Experiment Performance Analysis}, journal = {2004 International Conference on Parallel Processing (ICCP-04)}, year = {2004}, month = {2004-08}, address = {Montreal, Quebec, Canada}, keywords = {kojak}, author = {Fengguang Song and Felix Wolf and Nikhil Bhatia and Jack Dongarra and Shirley Moore} } @conference {icl:239, title = {Automating the Large-Scale Collection and Analysis of Performance}, booktitle = {5th LCI International Conference on Linux Clusters: The HPC Revolution}, year = {2004}, month = {2004-05}, address = {Austin, Texas}, keywords = {kojak, papi}, author = {Phil Mucci and Jack Dongarra and Rick Kufrin and Shirley Moore and Fengguang Song and Felix Wolf} } @techreport {icl:196, title = {CUBE User Manual}, journal = {ICL Technical Report}, number = {ICL-UT-04-01}, year = {2004}, month = {2004-02}, keywords = {kojak}, author = {Fengguang Song and Felix Wolf} } @techreport {icl:243, title = {EARL - API Documentation}, journal = {ICL Technical Report}, number = {ICL-UT-04-03}, year = {2004}, month = {2004-10}, keywords = {kojak}, author = {Felix Wolf} } @inproceedings {icl:232, title = {Efficient Pattern Search in Large Traces through Successive Refinement}, journal = {Proceedings of Euro-Par 2004}, year = {2004}, month = {2004-08}, publisher = {Springer-Verlag}, address = {Pisa, Italy}, keywords = {kojak}, author = {Felix Wolf and Bernd Mohr and Jack Dongarra and Shirley Moore} } @article {icl:195, title = {Automatic performance analysis of hybrid MPI/OpenMP applications}, journal = {Journal of Systems Architecture, Special Issue {\textquoteright}Evolutions in parallel distributed and network-based processing{\textquoteright}}, volume = {49(10-11)}, year = {2003}, month = {2003-11}, pages = {421-439}, publisher = {Elsevier}, keywords = {kojak}, author = {Felix Wolf and Bernd Mohr}, editor = {Andrea Clematis and Daniele D{\textquoteright}Agostino} } @article {icl:192, title = {Hardware-Counter Based Automatic Performance Analysis of Parallel Programs}, journal = {Advances in Parallel Computing}, volume = {13}, year = {2003}, month = {2004-01}, pages = {753-760}, publisher = {Elsevier}, address = {Dresden, Germany}, abstract = {The KOJAK performance-analysis environment identifies a large number of performance problems on parallel computers with SMP nodes. The current version concentrates on parallelism-related performance problems that arise from an inefficient usage of the parallel programming interfaces MPI and OpenMP, while ignoring individual CPU performance. This chapter describes an extended design of KOJAK capable of diagnosing low individual-CPU performance based on hardware-counter information and of integrating the results with those of the parallelism-centered analysis. The performance of parallel applications is determined by a variety of different factors. Performance of single components frequently influences the overall behavior in unexpected ways. Application programmers on current parallel machines have to deal with numerous performance-critical aspects: different modes of parallel execution, such as message passing, multi-threading or even a combination of the two, and performance on individual CPU that is determined by the interaction of different functional units. The KOJAK analysis process is composed of two parts: a semi-automatic instrumentation of the user application followed by an automatic analysis of the generated performance data. KOJAK{\textquoteright}s instrumentation software runs on most major UNlX platforms and works on multiple levels, including source-code, compiler, and linker.}, keywords = {kojak, papi}, doi = {https://doi.org/10.1016/S0927-5452(04)80092-3}, author = {Felix Wolf and Bernd Mohr} } @inproceedings {icl:193, title = {KOJAK - A Tool Set for Automatic Performance Analysis of Parallel Applications}, journal = {Proc. of the European Conference on Parallel Computing (EuroPar)}, volume = {2790}, year = {2003}, month = {2003-08}, pages = {1301-1304}, publisher = {Springer-Verlag}, address = {Klagenfurt, Austria}, keywords = {kojak}, author = {Bernd Mohr and Felix Wolf} }