@conference {, title = {Reducing Data Motion and Energy Consumption of Geospatial Modeling Applications Using Automated Precision Conversion}, booktitle = {2023 IEEE International Conference on Cluster Computing (CLUSTER)}, year = {2023}, month = {2023-11}, publisher = {IEEE}, organization = {IEEE}, address = {Santa Fe, NM, USA}, abstract = {The burgeoning interest in large-scale geospatial modeling, particularly within the domains of climate and weather prediction, underscores the concomitant critical importance of accuracy, scalability, and computational speed. Harnessing these complex simulations{\textquoteright} potential, however, necessitates innovative computational strategies, especially considering the increasing volume of data involved. Recent advancements in Graphics Processing Units (GPUs) have opened up new avenues for accelerating these modeling processes. In particular, their efficient utilization necessitates new strategies, such as mixed-precision arithmetic, that can balance the trade-off between computational speed and model accuracy. This paper leverages PaRSEC runtime system and delves into the opportunities provided by mixed-precision arithmetic to expedite large-scale geospatial modeling in heterogeneous environments. By using an automated conversion strategy, our mixed-precision approach significantly improves computational performance (up to 3X) on Summit supercomputer and reduces the associated energy consumption on various Nvidia GPU generations. Importantly, this implementation ensures the requisite accuracy in environmental applications, a critical factor in their operational viability. The findings of this study bear significant implications for future research and development in high-performance computing, underscoring the transformative potential of mixed-precision arithmetic on GPUs in addressing the computational demands of large-scale geospatial modeling and making a stride toward a more sustainable, efficient, and accurate future in large-scale environmental applications.}, doi = {10.1109/CLUSTER52292.2023.00035}, url = {https://ieeexplore.ieee.org/document/10319946/}, author = {Cao, Qinglei and Abdulah, Sameh and Ltaief, Hatem and Genton, Marc G. and Keyes, David and Bosilca, George} } @article {, title = {Accelerating Geostatistical Modeling and Prediction With Mixed-Precision Computations: A High-Productivity Approach With PaRSEC}, journal = {IEEE Transactions on Parallel and Distributed Systems}, volume = {33}, year = {2022}, month = {2022-04}, pages = {964 - 976}, abstract = {Geostatistical modeling, one of the prime motivating applications for exascale computing, is a technique for predicting desired quantities from geographically distributed data, based on statistical models and optimization of parameters. Spatial data are assumed to possess properties of stationarity or non-stationarity via a kernel fitted to a covariance matrix. A primary workhorse of stationary spatial statistics is Gaussian maximum log-likelihood estimation (MLE), whose central data structure is a dense, symmetric positive definite covariance matrix of the dimension of the number of correlated observations. Two essential operations in MLE are the application of the inverse and evaluation of the determinant of the covariance matrix. These can be rendered through the Cholesky decomposition and triangular solution. In this contribution, we reduce the precision of weakly correlated locations to single- or half- precision based on distance. We thus exploit mathematical structure to migrate MLE to a three-precision approximation that takes advantage of contemporary architectures offering BLAS3-like operations in a single instruction that are extremely fast for reduced precision. We illustrate application-expected accuracy worthy of double-precision from a majority half-precision computation, in a context where uniform single-precision is by itself insufficient. In tackling the complexity and imbalance caused by the mixing of three precisions, we deploy the PaRSEC runtime system. PaRSEC delivers on-demand casting of precisions while orchestrating tasks and data movement in a multi-GPU distributed-memory environment within a tile-based Cholesky factorization. Application-expected accuracy is maintained while achieving up to 1.59X by mixing FP64/FP32 operations on 1536 nodes of HAWK or 4096 nodes of Shaheen II , and up to 2.64X by mixing FP64/FP32/FP16 operations on 128 nodes of Summit , relative to FP64-only operations. This translates into up to 4.5, 4.7, ...}, keywords = {Computational modeling, Covariance matrices, Data models, Maximum likelihood estimation, Predictive models, runtime, Task analysis}, issn = {1045-9219}, doi = {10.1109/TPDS.2021.3084071}, url = {https://ieeexplore.ieee.org/document/9442267/https://ieeexplore.ieee.org/ielam/71/9575177/9442267-aam.pdfhttp://xplorestaging.ieee.org/ielx7/71/9575177/09442267.pdf?arnumber=9442267}, author = {Abdulah, Sameh and Qinglei Cao and Pei, Yu and George Bosilca and Jack Dongarra and Genton, Marc G. and Keyes, David E. and Ltaief, Hatem and Sun, Ying} } @inproceedings {, title = {Reshaping Geostatistical Modeling and Prediction for Extreme-Scale Environmental Applications}, journal = {2022 International Conference for High Performance Computing, Networking, Storage and Analysis (SC22)}, year = {2022}, month = {2022-11}, publisher = {IEEE Press}, address = {Dallas, TX}, abstract = {We extend the capability of space-time geostatistical modeling using algebraic approximations, illustrating application-expected accuracy worthy of double precision from majority low-precision computations and low-rank matrix approximations. We exploit the mathematical structure of the dense covariance matrix whose inverse action and determinant are repeatedly required in Gaussian log-likelihood optimization. Geostatistics augments first-principles modeling approaches for the prediction of environmental phenomena given the availability of measurements at a large number of locations; however, traditional Cholesky-based approaches grow cubically in complexity, gating practical extension to continental and global datasets now available. We combine the linear algebraic contributions of mixed-precision and low-rank computations within a tilebased Cholesky solver with on-demand casting of precisions and dynamic runtime support from PaRSEC to orchestrate tasks and data movement. Our adaptive approach scales on various systems and leverages the Fujitsu A64FX nodes of Fugaku to achieve up to 12X performance speedup against the highly optimized dense Cholesky implementation.}, keywords = {climate/weather prediction, dynamic runtime systems, high performance computing., low- rank matrix approximations, mixed-precision computations, space-time geospatial statistics, Task-based programming models}, isbn = {9784665454445}, url = {https://dl.acm.org/doi/abs/10.5555/3571885.3571888}, author = {Cao, Qinglei and Abdulah, Sameh and Rabab Alomairy and Pei, Yu and Pratik Nag and George Bosilca and Dongarra, Jack and Genton, Marc G. and Keyes, David and Ltaief, Hatem and Sun, Ying} }