@conference {icl:297, title = {PerfMiner: Cluster-Wide Collection, Storage and Presentation of Application Level Hardware Performance Data}, booktitle = {European Conference on Parallel Processing (Euro-Par 2005)}, year = {2005}, month = {2005-09}, publisher = {Springer}, organization = {Springer}, address = {Monte de Caparica, Portugal}, abstract = {We present PerfMiner, a system for the transparent collection, storage and presentation of thread-level hardware performance data across an entire cluster. Every sub-process/thread spawned by the user through the batch system is measured with near zero overhead and no dilation of run-time. Performance metrics are collected at the thread level using tool built on top of the Performance Application Programming Interface (PAPI). As the hardware counters are virtualized by the OS, the resulting counts are largely unaffected by other kernel or user processes. PerfMiner correlates this performance data with metadata from the batch system and places it in a database. Through a command line and web interface, the user can make queries to the database to report information on everything from overall workload characterization and system utilization to the performance of a single thread in a specific application. This is in contrast to other monitoring systems that report aggregate system-wide metrics sampled over a period of time. In this paper, we describe our implementation of PerfMiner as well as present some results from the test deployment of PerfMiner across three different clusters at the Center for Parallel Computers at The Royal Institute of Technology in Stockholm, Sweden.}, keywords = {papi}, doi = {https://doi.org/10.1007/11549468_1}, author = {Phil Mucci and Daniel Ahlin and Johan Danielsson and Per Ekman and Lars Malinowski} }