@conference {, title = {The Template Task Graph (TTG) - An Emerging Practical Dataflow Programming Paradigm for Scientific Simulation at Extreme Scale}, booktitle = { 2020 IEEE/ACM 5th International Workshop on Extreme Scale Programming Models and Middleware (ESPM2)}, year = {2020}, month = {2020-11}, publisher = {IEEE}, organization = {IEEE}, abstract = {We describe TESSE, an emerging general-purpose, open-source software ecosystem that attacks the twin challenges of programmer productivity and portable performance for advanced scientific applications on modern high-performance computers. TESSE builds upon and extends the ParsecDAG/-dataflow runtime with a new Domain Specific Languages (DSL) and new integration capabilities. Motivating this work is our belief that such a dataflow model, perhaps with applications composed in domain specific languages, can overcome many of the challenges faced by a wide variety of irregular applications that are poorly served by current programming and execution models. Two such applications from many-body physics and applied mathematics are briefly explored. This paper focuses upon the Template Task Graph (TTG), which is TESSE{\textquoteright}s main C++ Api that provides a powerful work/data-flow programming model. Algorithms on spatial trees, block-sparse tensors, and wave fronts are used to illustrate the API and associated concepts, as well as to compare with related approaches.}, keywords = {dag, dataflow, exascale, graph, High-performance computing, workflow}, doi = {https://doi.org/10.1109/ESPM251964.2020.00011}, author = {George Bosilca and Robert Harrison and Thomas Herault and Mohammad Mahdi Javanmard and Poornima Nookala and Edward Valeev} } @techreport {1280, title = {Tensor Contraction on Distributed Hybrid Architectures using a Task-Based Runtime System}, journal = {Innovative Computing Laboratory Technical Report}, number = {ICL-UT-18-13}, year = {2018}, month = {2018-12}, publisher = {University of Tennessee}, abstract = {The needs for predictive simulation of electronic structure in chemistry and materials science calls for fast/reduced-scaling formulations of quantum n-body methods that replace the traditional dense tensors with element-, block-, rank-, and block-rank-sparse (data-sparse) tensors. The resulting, highly irregular data structures are a poor match to imperative, bulk-synchronous parallel programming style due to the dynamic nature of the problem and to the lack of clear domain decomposition to guarantee a fair load-balance. TESSE runtime and the associated programming model aim to support performance-portable composition of applications involving irregular and dynamically changing data. In this paper we report an implementation of irregular dense tensor contraction in a paradigmatic electronic structure application based on the TESSE extension of PaRSEC, a distributed hybrid task runtime system, and analyze the resulting performance on a distributed memory cluster of multi-GPU nodes. Unprecedented strong scaling and promising efficiency indicate a viable future for task-based programming of complete production-quality reduced scaling models of electronic structure.}, author = {George Bosilca and Damien Genet and Robert Harrison and Thomas Herault and Mohammad Mahdi Javanmard and Chong Peng and Edward Valeev} }