@conference {, title = {The Template Task Graph (TTG) - An Emerging Practical Dataflow Programming Paradigm for Scientific Simulation at Extreme Scale}, booktitle = { 2020 IEEE/ACM 5th International Workshop on Extreme Scale Programming Models and Middleware (ESPM2)}, year = {2020}, month = {2020-11}, publisher = {IEEE}, organization = {IEEE}, abstract = {We describe TESSE, an emerging general-purpose, open-source software ecosystem that attacks the twin challenges of programmer productivity and portable performance for advanced scientific applications on modern high-performance computers. TESSE builds upon and extends the ParsecDAG/-dataflow runtime with a new Domain Specific Languages (DSL) and new integration capabilities. Motivating this work is our belief that such a dataflow model, perhaps with applications composed in domain specific languages, can overcome many of the challenges faced by a wide variety of irregular applications that are poorly served by current programming and execution models. Two such applications from many-body physics and applied mathematics are briefly explored. This paper focuses upon the Template Task Graph (TTG), which is TESSE{\textquoteright}s main C++ Api that provides a powerful work/data-flow programming model. Algorithms on spatial trees, block-sparse tensors, and wave fronts are used to illustrate the API and associated concepts, as well as to compare with related approaches.}, keywords = {dag, dataflow, exascale, graph, High-performance computing, workflow}, doi = {https://doi.org/10.1109/ESPM251964.2020.00011}, author = {George Bosilca and Robert Harrison and Thomas Herault and Mohammad Mahdi Javanmard and Poornima Nookala and Edward Valeev} } @article {1212, title = {Accelerating NWChem Coupled Cluster through dataflow-based Execution}, journal = {The International Journal of High Performance Computing Applications}, volume = {32}, year = {2018}, month = {2018-07}, pages = {540--551}, type = {Journal Article}, chapter = {540}, abstract = {Numerical techniques used for describing many-body systems, such as the Coupled Cluster methods (CC) of the quantum chemistry package NWCHEM, are of extreme interest to the computational chemistry community in fields such as catalytic reactions, solar energy, and bio-mass conversion. In spite of their importance, many of these computationally intensive algorithms have traditionally been thought of in a fairly linear fashion, or are parallelized in coarse chunks. In this paper, we present our effort of converting the NWCHEM{\textquoteright}s CC code into a dataflow-based form that is capable of utilizing the task scheduling system PARSEC (Parallel Runtime Scheduling and Execution Controller): a software package designed to enable high-performance computing at scale. We discuss the modularity of our approach and explain how the PARSEC-enabled dataflow version of the subroutines seamlessly integrate into the NWCHEM codebase. Furthermore, we argue how the CC algorithms can be easily decomposed into finer-grained tasks (compared with the original version of NWCHEM); and how data distribution and load balancing are decoupled and can be tuned independently. We demonstrate performance acceleration by more than a factor of two in the execution of the entire CC component of NWCHEM, concluding that the utilization of dataflow-based execution for CC methods enables more efficient and scalable computation.}, keywords = {CCSD, dag, dataflow, NWChem, parsec, ptg, tasks}, doi = {10.1177/1094342016672543}, url = {http://journals.sagepub.com/doi/10.1177/1094342016672543}, author = {Heike Jagode and Anthony Danalis and Jack Dongarra} } @article {999, title = {Accelerating NWChem Coupled Cluster through Dataflow-Based Execution}, journal = {The International Journal of High Performance Computing Applications}, year = {2017}, month = {2017-01}, pages = {1{\textendash}13}, abstract = {Numerical techniques used for describing many-body systems, such as the Coupled Cluster methods (CC) of the quantum chemistry package NWChem, are of extreme interest to the computational chemistry community in fields such as catalytic reactions, solar energy, and bio-mass conversion. In spite of their importance, many of these computationally intensive algorithms have traditionally been thought of in a fairly linear fashion, or are parallelized in coarse chunks. In this paper, we present our effort of converting the NWChem{\textquoteright}s CC code into a dataflow-based form that is capable of utilizing the task scheduling system PaRSEC (Parallel Runtime Scheduling and Execution Controller): a software package designed to enable high-performance computing at scale. We discuss the modularity of our approach and explain how the PaRSEC-enabled dataflow version of the subroutines seamlessly integrate into the NWChem codebase. Furthermore, we argue how the CC algorithms can be easily decomposed into finer-grained tasks (compared with the original version of NWChem); and how data distribution and load balancing are decoupled and can be tuned independently. We demonstrate performance acceleration by more than a factor of two in the execution of the entire CC component of NWChem, concluding that the utilization of dataflow-based execution for CC methods enables more efficient and scalable computation.}, keywords = {CCSD, dag, dataflow, NWChem, parsec, ptg, tasks}, doi = {10.1177/1094342016672543}, url = {http://journals.sagepub.com/doi/10.1177/1094342016672543}, author = {Heike Jagode and Anthony Danalis and Jack Dongarra} } @conference {921, title = {Accelerating NWChem Coupled Cluster through dataflow-based Execution}, booktitle = {11th International Conference on Parallel Processing and Applied Mathematics (PPAM 2015)}, year = {2015}, month = {2015-09}, publisher = {Springer International Publishing}, organization = {Springer International Publishing}, address = {Krakow, Poland}, abstract = {Numerical techniques used for describing many-body systems, such as the Coupled Cluster methods (CC) of the quantum chemistry package NWChem, are of extreme interest to the computational chemistry community in fields such as catalytic reactions, solar energy, and bio-mass conversion. In spite of their importance, many of these computationally intensive algorithms have traditionally been thought of in a fairly linear fashion, or are parallelised in coarse chunks. In this paper, we present our effort of converting the NWChem{\textquoteright}s CC code into a dataflow-based form that is capable of utilizing the task scheduling system PaRSEC (Parallel Runtime Scheduling and Execution Controller) {\textendash} a software package designed to enable high performance computing at scale. We discuss the modularity of our approach and explain how the PaRSEC-enabled dataflow version of the subroutines seamlessly integrate into the NWChem codebase. Furthermore, we argue how the CC algorithms can be easily decomposed into finer grained tasks (compared to the original version of NWChem); and how data distribution and load balancing are decoupled and can be tuned independently. We demonstrate performance acceleration by more than a factor of two in the execution of the entire CC component of NWChem, concluding that the utilization of dataflow-based execution for CC methods enables more efficient and scalable computation.}, keywords = {CCSD, dag, dataflow, NWChem, parsec, ptg, tasks}, author = {Heike Jagode and Anthony Danalis and George Bosilca and Jack Dongarra} } @conference {915, title = {PaRSEC in Practice: Optimizing a Legacy Chemistry Application through Distributed Task-Based Execution}, booktitle = {2015 IEEE International Conference on Cluster Computing}, year = {2015}, month = {2015-09}, publisher = {IEEE}, organization = {IEEE}, address = {Chicago, IL}, abstract = {Task-based execution has been growing in popularity as a means to deliver a good balance between performance and portability in the post-petascale era. The Parallel Runtime Scheduling and Execution Control (PARSEC) framework is a task-based runtime system that we designed to achieve high performance computing at scale. PARSEC offers a programming paradigm that is different than what has been traditionally used to develop large scale parallel scientific applications. In this paper, we discuss the use of PARSEC to convert a part of the Coupled Cluster (CC) component of the Quantum Chemistry package NWCHEM into a task-based form. We explain how we organized the computation of the CC methods in individual tasks with explicitly defined data dependencies between them and re-integrated the modified code into NWCHEM. We present a thorough performance evaluation and demonstrate that the modified code outperforms the original by more than a factor of two. We also compare the performance of different variants of the modified code and explain the different behaviors that lead to the differences in performance.}, keywords = {dag, parsec, ptg, tasks}, author = {Anthony Danalis and Heike Jagode and George Bosilca and Jack Dongarra} }