From 2a87862358700a50dfaabe7a48922526618a87cb Mon Sep 17 00:00:00 2001 From: Cody Balos Date: Fri, 20 May 2022 12:29:22 -0700 Subject: [PATCH] Updates for xsdk@0.3.0 (#20) --- .cmake-format.py | 20 + .gitignore | 2 + CMakeLists.txt | 167 +++- README.md | 87 +- amrex/CMakeLists.txt | 6 + amrex/sundials/CMakeLists.txt | 17 + amrex/sundials/README.md | 63 ++ .../amrex_sundials_advection_diffusion.cpp | 829 ++++++++++++++++++ .../amrex_sundials_advection_diffusion.h | 143 +++ cmake/CorrectWindowsPaths.cmake | 13 - cmake/FindAMReX.cmake | 9 + cmake/FindBLASPP.cmake | 41 + cmake/FindHYPRE.cmake | 8 +- cmake/FindLAPACKPP.cmake | 42 + cmake/FindMAGMA.cmake | 8 +- cmake/FindMETIS.cmake | 7 +- cmake/FindMFEM.cmake | 13 +- cmake/FindPETSC.cmake | 777 ---------------- cmake/FindPETSc.cmake | 421 +++++++++ cmake/FindPLASMA.cmake | 50 ++ cmake/FindPackageMultipass.cmake | 106 --- cmake/FindSLATE.cmake | 42 + cmake/FindSUNDIALS.cmake | 45 +- cmake/FindSUPERLUDIST.cmake | 4 +- cmake/ResolveCompilerPaths.cmake | 105 --- cmake/XsdkAddTest.cmake | 28 + hypre/CMakeLists.txt | 62 +- mfem/CMakeLists.txt | 38 +- mfem/ginkgo/CMakeLists.txt | 31 +- mfem/ginkgo/README.md | 6 +- mfem/hypre-superlu/CMakeLists.txt | 41 +- mfem/hypre-superlu/README.md | 23 +- mfem/hypre-superlu/convdiff.cpp | 30 +- mfem/hypre-superlu/makefile | 53 -- mfem/hypre/CMakeLists.txt | 59 ++ mfem/hypre/README.md | 29 + mfem/hypre/magnetic-diffusion.cpp | 313 +++++++ mfem/petsc/CMakeLists.txt | 24 +- mfem/petsc/README.md | 6 +- mfem/petsc/makefile | 58 -- mfem/strumpack/CMakeLists.txt | 53 ++ mfem/strumpack/README.md | 29 + mfem/strumpack/diffusion-eigen.cpp | 360 ++++++++ mfem/sundials/CMakeLists.txt | 75 +- mfem/sundials/README.md | 3 +- mfem/sundials/advection.cpp | 30 +- mfem/sundials/makefile | 95 -- mfem/sundials/transient-heat.cpp | 24 +- petsc/CMakeLists.txt | 99 ++- petsc/README.md | 4 +- petsc/ex19.c | 56 +- petsc/makefile | 42 +- petsc/output/ex19_1.testout | 2 + petsc/output/ex19_cuda_1.out | 15 + plasma/CMakeLists.txt | 19 + plasma/README.md | 11 + plasma/ex1solve.cpp | 118 +++ strumpack/CMakeLists.txt | 24 + strumpack/README.md | 101 +++ strumpack/sparse.cpp | 158 ++++ sundials/CMakeLists.txt | 83 +- tools/package.py | 63 -- trilinos/CMakeLists.txt | 88 +- 63 files changed, 3775 insertions(+), 1603 deletions(-) create mode 100644 .cmake-format.py create mode 100644 .gitignore create mode 100644 amrex/CMakeLists.txt create mode 100644 amrex/sundials/CMakeLists.txt create mode 100644 amrex/sundials/README.md create mode 100644 amrex/sundials/amrex_sundials_advection_diffusion.cpp create mode 100644 amrex/sundials/amrex_sundials_advection_diffusion.h delete mode 100644 cmake/CorrectWindowsPaths.cmake create mode 100644 cmake/FindAMReX.cmake create mode 100644 cmake/FindBLASPP.cmake create mode 100644 cmake/FindLAPACKPP.cmake delete mode 100644 cmake/FindPETSC.cmake create mode 100644 cmake/FindPETSc.cmake create mode 100644 cmake/FindPLASMA.cmake delete mode 100644 cmake/FindPackageMultipass.cmake create mode 100644 cmake/FindSLATE.cmake delete mode 100644 cmake/ResolveCompilerPaths.cmake create mode 100644 cmake/XsdkAddTest.cmake delete mode 100644 mfem/hypre-superlu/makefile create mode 100644 mfem/hypre/CMakeLists.txt create mode 100644 mfem/hypre/README.md create mode 100644 mfem/hypre/magnetic-diffusion.cpp delete mode 100644 mfem/petsc/makefile create mode 100644 mfem/strumpack/CMakeLists.txt create mode 100644 mfem/strumpack/README.md create mode 100644 mfem/strumpack/diffusion-eigen.cpp delete mode 100644 mfem/sundials/makefile create mode 100644 petsc/output/ex19_1.testout create mode 100644 petsc/output/ex19_cuda_1.out create mode 100644 plasma/CMakeLists.txt create mode 100644 plasma/README.md create mode 100644 plasma/ex1solve.cpp create mode 100644 strumpack/CMakeLists.txt create mode 100644 strumpack/README.md create mode 100644 strumpack/sparse.cpp delete mode 100644 tools/package.py diff --git a/.cmake-format.py b/.cmake-format.py new file mode 100644 index 0000000..cb345db --- /dev/null +++ b/.cmake-format.py @@ -0,0 +1,20 @@ +# -*- Python -*- + +with section("format"): + + # How wide to allow formatted cmake files + line_width = 100 + + # How many spaces to tab for indent + tab_size = 4 + + # If true, separate flow control names from their parentheses with a space + separate_ctrl_name_with_space = False + + # If true, separate function names from parentheses with a space + separate_fn_name_with_space = False + + # If a statement is wrapped to more than one line, than dangle the closing + # parenthesis on its own line. + dangle_parens = True + diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6e6a94e --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/build +/builddir diff --git a/CMakeLists.txt b/CMakeLists.txt index fecb7f8..5b2a60b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,73 +1,170 @@ -cmake_minimum_required(VERSION 3.12) -project(xsdk-examples - DESCRIPTION "xSDK Examples" - LANGUAGES CXX C) +cmake_minimum_required(VERSION 3.21) +project( + xsdk-examples + DESCRIPTION "xSDK Examples" + LANGUAGES CXX C +) set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) +include(CTest) include(FindPackageHandleStandardArgs) +include(XsdkAddTest) # build options option(ENABLE_CUDA "Enable CUDA" OFF) -option(CMAKE_CUDA_ARCHITECTURES "CUDA architecture(s) to target" "70") +option(ENABLE_HIP "Enable HIP" OFF) + +option(ENABLE_AMREX "Enable AMReX" ON) +set(AMREX_DIR + "${AMREX_DIR}" + CACHE PATH "Path to AMReX installation directory" +) + +option(ENABLE_GINKGO "Enable Ginkgo" ON) +set(Ginkgo_DIR + "${Ginkgo_DIR}" + CACHE PATH "Path to Ginkgo installation directory" +) option(ENABLE_HYPRE "Enable hypre" ON) -set(HYPRE_DIR "${HYPRE_DIR}" CACHE PATH "Path to hypre installation directory") +set(HYPRE_DIR + "${HYPRE_DIR}" + CACHE PATH "Path to hypre installation directory" +) option(ENABLE_MFEM "Enable MFEM" ON) -set(MFEM_DIR "${MFEM_DIR}" CACHE PATH "Path to MFEM installation directory") +set(MFEM_DIR + "${MFEM_DIR}" + CACHE PATH "Path to MFEM installation directory" +) option(ENABLE_MAGMA "Enable MAGMA" OFF) -set(MAGMA_DIR "${MAGMA_DIR}" CACHE PATH "Path to MAGMA installation directory") +set(MAGMA_DIR + "${MAGMA_DIR}" + CACHE PATH "Path to MAGMA installation directory" +) option(ENABLE_PETSC "Enable PETSc" ON) -set(PETSC_DIR "${PETSC_DIR}" CACHE PATH "Path to PETSc installation directory") +set(PETSc_DIR + "${PETSc_DIR}" + CACHE PATH "Path to PETSc installation directory" +) + +option(ENABLE_PLASMA "Enable PLASMA" ON) +set(PLASMA_DIR + "${PLASMA_DIR}" + CACHE PATH "Path to PLASMA installation directory" +) option(ENABLE_SUNDIALS "Enable SUNDIALS" ON) -set(SUNDIALS_DIR "${SUNDIALS_DIR}" CACHE PATH "Path to SUNDIALS installation directory") +set(SUNDIALS_DIR + "${SUNDIALS_DIR}" + CACHE PATH "Path to SUNDIALS installation directory" +) option(ENABLE_SUPERLU "Enable SuperLU" ON) -set(SUPERLUDIST_DIR "${SUPERLUDIST__DIR}" CACHE PATH "Path to SuperLU_DIST installation directory") +set(SUPERLUDIST_DIR + "${SUPERLUDIST_DIR}" + CACHE PATH "Path to SuperLU_DIST installation directory" +) + +option(ENABLE_STRUMPACK "Enable STRUMPACK" OFF) +set(STRUMPACK_DIR + "${STRUMPACK_DIR}" + CACHE PATH "Path to STRUMPACK installation directory" +) + +option(ENABLE_TRILINOS "Enable TRILINOS" OFF) +set(TRILINOS_DIR + "${Trilinos_DIR}" + CACHE PATH "Path to Trilinos installation directory" +) + +set(METIS_DIR + "${METIS_DIR}" + CACHE PATH "Path to Metis installation directory" +) -option(ENABLE_TRILINOS "Enable TRILINOS" ON) -set(TRILINOS_DIR "${Trilinos_DIR}" CACHE PATH "Path to Trilinos installation directory") +# check for MPI +find_package(MPI REQUIRED) -set(METIS_DIR "${METIS_DIR}" CACHE PATH "Path to Metis installation directory") +# check for OpenMP +find_package(OpenMP) +# compiler options +if(NOT DEFINED CMAKE_CXX_STANDARD) + set(CMAKE_CXX_STANDARD 14) + set(CMAKE_CXX_STANDARD_REQUIRED TRUE) + set(CMAKE_CXX_EXTENSIONS OFF) +endif() + +# setup CUDA if(ENABLE_CUDA) - enable_language(CUDA) - set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER}) + enable_language(CUDA) + set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER}) + find_package(CUDAToolkit REQUIRED) +endif() + +if(ENABLE_HIP) + enable_language(HIP) + find_package(hip REQUIRED) + find_package(hipsparse REQUIRED) + find_package(hiprand REQUIRED) + find_package(rocrand REQUIRED) + find_package(rocprim REQUIRED) + find_package(rocsparse REQUIRED) +endif() + +# check for AMReX +if(ENABLE_AMREX) + find_package(AMReX REQUIRED) endif() # check for hypre if(ENABLE_HYPRE) - find_package(HYPRE REQUIRED) + find_package(HYPRE REQUIRED) endif() # check for MFEM if(ENABLE_MFEM) - find_package(ZLIB REQUIRED) - find_package(MFEM REQUIRED) - find_package(Ginkgo REQUIRED) + find_package(ZLIB REQUIRED) + find_package(MFEM REQUIRED) + if(ENABLE_GINKGO) + find_package(Ginkgo REQUIRED) + endif() endif() # check for MAGMA if(ENABLE_MAGMA) - find_package(MAGMA REQUIRED) + find_package(MAGMA REQUIRED) endif() # check for PETSC if(ENABLE_PETSC) - find_package(PETSC REQUIRED) + find_package(PETSc REQUIRED) +endif() + +# check for PLASMA +if(ENABLE_PLASMA) + find_package(PLASMA REQUIRED) + find_package(BLASPP REQUIRED) + find_package(LAPACKPP REQUIRED) + find_package(SLATE REQUIRED) endif() # check for SUNDIALS if(ENABLE_SUNDIALS) - find_package(SUNDIALS REQUIRED) + find_package(SUNDIALS REQUIRED) endif() # check for SuperLU DIST if(ENABLE_SUPERLU) - find_package(SUPERLUDIST REQUIRED) + find_package(SUPERLUDIST REQUIRED) +endif() + +# check for STRUMPACK +if(ENABLE_STRUMPACK) + find_package(STRUMPACK REQUIRED) endif() # check for math @@ -76,22 +173,28 @@ find_library(MATH_LIBRARY NAMES m) # check for metis find_package(METIS) -# check for MPI -find_package(MPI REQUIRED) - # example subdirectories +if(ENABLE_AMREX) + add_subdirectory(amrex) +endif() if(ENABLE_HYPRE) - add_subdirectory(hypre) + add_subdirectory(hypre) endif() if(ENABLE_MFEM) - add_subdirectory(mfem) + add_subdirectory(mfem) endif() if(ENABLE_PETSC) - add_subdirectory(petsc) + add_subdirectory(petsc) +endif() +if(ENABLE_PLASMA) + add_subdirectory(plasma) endif() if(ENABLE_SUNDIALS) - add_subdirectory(sundials) + add_subdirectory(sundials) endif() if(ENABLE_TRILINOS) - add_subdirectory(trilinos) + add_subdirectory(trilinos) +endif() +if(ENABLE_STRUMPACK) + add_subdirectory(strumpack) endif() diff --git a/README.md b/README.md index 61f5945..dba7300 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# xSDK Examples +# xSDK Examples v0.3.0 The example codes provided here demonstrate the use of of various xSDK libraries in tandem to solve problems of interest. Each of the library folders has one or more examples codes that are built of that library @@ -7,34 +7,77 @@ examining the output is a good way to better understand how these libraries can code samples are a good place to start for new projects. More details about the examples can be found in the README.md files in the library subfolders. For more information on the xSDK see . -## Code Example Summary - -| Example | Libraries | Description | -|:-------------------------------------------|:----------------------------|:--------------------------------------------------| -| hypre/ij_laplacian.c | HYPRE+SuperLU_Dist | 2D Laplacian problem | -| libensemble/test_persistent_aposmm_tao.py | libEnsemble+PETSc | 2D constrained optimization problem | -| mfem/hypre-superlu/convdiff.cpp | MFEM+HYPRE+SuperLU_Dist | 2D steady state convective diffusion | -| mfem/ginkgo/mfem_ex1_gko.cpp | MFEM+Ginkgo | 2D Poisson problem with Ginko solver | -| mfem/petsc/obstacle.cpp | MFEM+PETSc | Membrane obstacle problem (min energy functional) | -| mfem/sundials/transient-heat.cpp | MFEM+SUNDIALS | 2D Transient nonlinear heat conduction | -| petsc/ex19.c | PETSc+HYPRE+SuperLU_Dist | 2D nonlinear driven cavity problem | -| sundials/ark_brusselator1D_FEM_sludist.cpp| SUNDIALS+SuperLU_Dist | Chemical kinetics brusselator problem | -| sundials/cv_petsc_ex7.c | SUNDIALS+PETSc | 2D nonlinear PDE solution | -| trilinos/SimpleSolve_WithParameters.cpp | Trilinos+SuperLU_Dist | Small linear system direct solution | +## Example Summary + +These examples were tested and verified against xsdk@0.7.0. + +| Example | Libraries | Description | GPUs | +|:------------------------------------------------------|:-------------------------|:--------------------------------------------------|:---------------| +| hypre/ij_laplacian.c | HYPRE+SuperLU_Dist | 2D Laplacian problem | | +| libensemble/test_persistent_aposmm_tao.py | libEnsemble+PETSc | 2D constrained optimization problem | | +| mfem/hypre-superlu/convdiff.cpp | MFEM+HYPRE+SuperLU_Dist | 2D steady state convective diffusion | | +| mfem/ginkgo/mfem_ex1_gko.cpp | MFEM+Ginkgo | 2D Poisson problem with Ginko solver | ![cuda] | +| mfem/petsc/obstacle.cpp | MFEM+PETSc | Membrane obstacle problem (min energy functional) | | +| mfem/strumpack/diffusion-eigen.cpp | MFEM+STRUMPACK+HYPRE | Diffusion eigenvalue problem | | +| mfem/sundials/transient-heat.cpp | MFEM+SUNDIALS | 2D Transient nonlinear heat conduction | | +| mfem/hypre/magnetic-diffusion.cpp | MFEM+HYPRE | Steady state magnetic diffusion problem | ![cuda] | +| mfem/sundials/advection.cpp | MFEM+SUNDIALS | 2D Time-dependent advection | ![cuda] | +| petsc/ex19.c | PETSc+HYPRE+SuperLU_Dist | 2D nonlinear driven cavity problem | ![cuda] | +| plasma/ex1solve.c | PLASMA+SLATE+BLASPP | Linear system direct solution | ![cuda] | +| sundials/ark_brusselator1D_FEM_sludist.cpp | SUNDIALS+SuperLU_Dist | Chemical kinetics brusselator problem | | +| sundials/cv_petsc_ex7.c | SUNDIALS+PETSc | 2D nonlinear PDE solution | | +| sundials/cvRoberts_blockdiag_magma.cpp | SUNDIALS+MAGMA | Solves a group of chemical kinetics ODEs | ![cuda] ![hip] | +| trilinos/SimpleSolve_WithParameters.cpp | Trilinos+SuperLU_Dist | Small linear system direct solution | | +| strumpack/sparse.cpp | STRUMPACK+ButterflyPACK | 3D Poisson problem with STRUMPACK preconditioner | | These examples are currently in the repo but will not be enabled in the xsdk-examples spack package until we release a new version of the xSDK. They can still be built using CMake directly. -| Example | Libraries | Description | -|:-------------------------------------------|:----------------------------|:--------------------------------------------------| -| mfem/sundials/advection.cpp | MFEM+SUNDIALS (CUDA) | 2D Time-dependent advection | -| sundials/cvRoberts_blockdiag_magma.cpp | SUNDIALS+MAGMA (CUDA) | Solves a group of chemical kinetics ODEs | +| Example | Libraries | Description | GPUs | +|:------------------------------------------------------|:-------------------------|:--------------------------------------------------|:---------------| +| amrex/sundials/amrex_sundials_advection_diffusion.cpp | AMReX+SUNDIALS | 2D Advection-diffusion problem | ![cuda] ![hip] | +| mfem/hypre/magnetic-diffusion.cpp | MFEM+HYPRE | Steady state magnetic diffusion problem | ![hip] | + +## Installing the Examples -## Install the code samples +The examples can be installed along with the xSDK utilizing the Spack package. -The examples can be installed along with the xSDK utilizing the spack package. ``` spack install xsdk-examples ``` -Further details on how to run each example code can be found in each example folder's README.md file. +To install with CUDA support, + +``` +spack install xsdk-examples+cuda cuda_arch= +``` + +Since `xsdk-examples` depends on the `xsdk` Spack package, Spack will also install `xsdk`. In some cases, it may be easier to install the `xsdk` package (separately) following https://xsdk.info/download/ prior to the `xsdk-examples` package. + +Alternatively the examples can be built and installed with CMake directly: + +``` +git clone https://github.com/xsdk-project/xsdk-examples +cmake -DCMAKE_PREFIX_PATH=/path/to/libraries -DENABLE_CUDA= -DENABLE_HIP= -S xsdk-examples/ -B xsdk-examples/builddir +cd xsdk-examples/builddir +make +make install +``` + +Note, that to build with HIP support CMake must be used directly. + +## Running and Testing + +xsdk-examples is setup to use `ctest`. Each example in the repository is tested with at least a set of default options. If CMake is used to build xsdk-examples, the tests can be run from the build directory (`builddir` above): +``` +ctest . +``` +or +``` +make test +``` +Details on how to run each example code manually (and with different options) can be found in each example folder's README.md file. + + +[cuda]: https://img.shields.io/badge/-cuda-brightgreen?style=flat "CUDA" +[hip]: https://img.shields.io/badge/-hip-red?style=flat "HIP" diff --git a/amrex/CMakeLists.txt b/amrex/CMakeLists.txt new file mode 100644 index 0000000..6ecae5d --- /dev/null +++ b/amrex/CMakeLists.txt @@ -0,0 +1,6 @@ +# AMReX+SUNDIALS requires amrex@22.04: and sundials@6.2.0: +if(ENABLE_SUNDIALS AND ("${AMReX_RELEASE_NUMBER}" VERSION_GREATER_EQUAL "22.04") AND ("${SUNDIALS_PACKAGE_VERSION}" VERSION_GREATER_EQUAL 6.2.0)) + add_subdirectory(sundials) +else() + message(STATUS "SKIPPED AMReX+SUNDIALS example because AMReX version is too old or SUNDIALS version is too old (need 22.04: and 6.2.0:)") +endif() diff --git a/amrex/sundials/CMakeLists.txt b/amrex/sundials/CMakeLists.txt new file mode 100644 index 0000000..75b732b --- /dev/null +++ b/amrex/sundials/CMakeLists.txt @@ -0,0 +1,17 @@ +add_executable(amrex_sundials_advection_diffusion amrex_sundials_advection_diffusion.cpp) + +if(ENABLE_CUDA) + set_source_files_properties(amrex_sundials_advection_diffusion.cpp PROPERTIES LANGUAGE CUDA) + set_target_properties( + amrex_sundials_advection_diffusion PROPERTIES CUDA_SEPARABLE_COMPILATION ON + ) # This adds -dc +endif() + +target_link_libraries(amrex_sundials_advection_diffusion PRIVATE XSDK::AMReX XSDK::SUNDIALS MPI::MPI_CXX) + +xsdk_add_test( + NAME AMREX-amrex_sundials_advection_diffusion COMMAND + $ +) + +install(TARGETS amrex_sundials_advection_diffusion RUNTIME DESTINATION bin) diff --git a/amrex/sundials/README.md b/amrex/sundials/README.md new file mode 100644 index 0000000..a2f0b69 --- /dev/null +++ b/amrex/sundials/README.md @@ -0,0 +1,63 @@ +# AMReX + SUNDIALS examples + +Example codes demonstrating the use of [AMReX](https://amrex-codes.github.io/) +and [SUNDIALS](https://computing.llnl.gov/projects/sundials). + +## Advection-Diffusion Example + +This is an example of a scalar-valued advection-diffusion problem for chemical +transport. The governing equation is: + +u_t + a \cdot \nabla u - \nabla \cdot ( D \nabla u ) = 0 + +where u(t,x,y) is the chemical concentration, a is the advection vector, and D +is diagonal matrix containing anisotropic diffusion coefficients. The problem is +solved on the unit square domain centered at the origin and evolved from time 0 +to 10^3. + +### Problem Options + +The problem inputs are listed below and may be specified on the command line +e.g., `./amrex_sundials_advection_diffusion help=1` or by supplying an input +file of values e.g., `./amrex_sundials_advection_diffusion inputs` where +`inputs` is a text file with `option = value` lines. + +| Option | Type | Description | Default | +|:---------------------|:-------|:---------------------------------------------------|:---------| +| `n_cell` | `int` | number of cells on each side of the square domain | 128 | +| `max_grid_size` | `int` | max size of boxes in box array | 64 | +| `plot_int` | `int` | enable (1) or disable (0) plots | -1 | +| `arkode_order` | `int` | ARKStep method order | 4 | +| `nls_max_iter` | `ìnt` | maximum number of nonlinear iterations | 3 | +| `ls_max_iter` | `int` | maximum number of linear iterations | 5 | +| `rhs_adv` | `int` | advection: disable (0), implicit (1), explicit (2) | 2 | +| `rhs_diff` | `int` | diffusion: disable (0), implicit (1), explicit (2) | 1 | +| `rtol` | `Real` | relative tolerance | 1e-4 | +| `atol` | `Real` | absolute tolerance | 1e-9 | +| `fixed_dt` | `Real` | use a fixed time step size (if `fixed_dt` > 0.0) | -1.0 | +| `tfinal` | `Real` | final integration time | 1e3 | +| `dtout` | `Real` | output frequency | `tfinal` | +| `max_steps` | `int` | maximum number of steps between outputs | 10000 | +| `advCoeffx` | `Real` | advection speed in the x-direction | 5e-4 | +| `advCoeffy` | `Real` | advection speed in the y-direction | 5e-4 | +| `diffCoeffx` | `Real` | diffusion coefficient in the x-direction | 2e-5 | +| `diffCoeffy` | `Real` | diffusion coefficient in the y-direction | 2e-5 | +| `use_preconditioner` | `int` | use preconditioning (1) or not (0) | 0 | + +If preconditioning is enabled, then additional options may be set (see AMReX +documentation of the `MLMG` solver for descriptions): + +| Option | Type | Default | +|:--------------------------|:-------|:--------| +| mlmg.agglomeration | `int` | 1 | +| mlmg.consolidation | `int` | 1 | +| mlmg.max_coarsening_level | `int` | 1000 | +| mlmg.linop_maxorder | `int` | 2 | +| mlmg.max_iter | `int` | 1000 | +| mlmg.max_fmg_iter | `int` | 1000 | +| mlmg.verbose | `int` | 0 | +| mlmg.bottom_verbose | `int` | 0 | +| mlmg.use_hypre | `int` | 1 | +| mlmg.hypre_interface | `int` | 3 | +| mlmg.use_petsc | `int` | 0 | +| mlmg.tol_rel | `Real` | 1.0e-6 | diff --git a/amrex/sundials/amrex_sundials_advection_diffusion.cpp b/amrex/sundials/amrex_sundials_advection_diffusion.cpp new file mode 100644 index 0000000..f29a871 --- /dev/null +++ b/amrex/sundials/amrex_sundials_advection_diffusion.cpp @@ -0,0 +1,829 @@ +/* ----------------------------------------------------------------------------- + * AMReX + SUNDIALS xSDK 2D Advection-Diffusion example code + * + * Based on Hands-on Lessons with SUNDIALS + AMReX from the Argonne Training + * Program in Extreme-Scale Computing (ATPESC) written by (alphabetical): + * David Gardner (gardner48@llnl.gov) + * John Loffeld (loffeld1@llnl.gov) + * Daniel Reynolds (reynolds@smu.edu) + * Donald Willcox (dewillcox@lbl.gov) + * ---------------------------------------------------------------------------*/ + +#include +#include +#include +#include + +#include +#include +#include + +#include "amrex_sundials_advection_diffusion.h" + +using namespace amrex; + +void ComputeSolutionARK(N_Vector nv_sol, ProblemOpt* prob_opt, + ProblemData* prob_data) +{ + // Extract problem data and options + Geometry* geom = prob_data->geom; + int plot_int = prob_opt->plot_int; + int arkode_order = prob_opt->arkode_order; + int nls_max_iter = prob_opt->nls_max_iter; + int ls_max_iter = prob_opt->ls_max_iter; + int rhs_adv = prob_opt->rhs_adv; + int rhs_diff = prob_opt->rhs_diff; + Real rtol = prob_opt->rtol; + Real atol = prob_opt->atol; + Real fixed_dt = prob_opt->fixed_dt; + Real tfinal = prob_opt->tfinal; + Real dtout = prob_opt->dtout; + int max_steps = prob_opt->max_steps; + int use_preconditioner = prob_opt->use_preconditioner; + + // initial time, number of outputs, and error flag + Real time = 0.0; + int nout = ceil(tfinal/dtout); + int ier = 0; + + // Write a plotfile of the initial data + if (plot_int > 0) + { + const std::string& pltfile = amrex::Concatenate("plt", 0, 5); + MultiFab* sol = amrex::sundials::N_VGetVectorPointer_MultiFab(nv_sol); + WriteSingleLevelPlotfile(pltfile, *sol, {"u"}, *geom, time, 0); + } + + // Create the ARK stepper + void* arkode_mem = nullptr; + + if (rhs_adv > 0 && rhs_diff > 0) + { + if (rhs_adv > 1 && rhs_diff > 1) + { + // explicit advection and diffusion + arkode_mem = ARKStepCreate(ComputeRhsAdvDiff, nullptr, time, nv_sol, + *amrex::sundials::The_Sundials_Context()); + } + else if (rhs_adv > 1) + { + // explicit advection and implicit diffusion + arkode_mem = ARKStepCreate(ComputeRhsAdv, ComputeRhsDiff, time, nv_sol, + *amrex::sundials::The_Sundials_Context()); + } + else if (rhs_diff > 1) + { + // implicit advection and explicit diffusion + arkode_mem = ARKStepCreate(ComputeRhsDiff, ComputeRhsAdv, time, nv_sol, + *amrex::sundials::The_Sundials_Context()); + } + else + { + // implicit advection and diffusion + arkode_mem = ARKStepCreate(nullptr, ComputeRhsAdvDiff, time, nv_sol, + *amrex::sundials::The_Sundials_Context()); + } + } + else if (rhs_adv > 0) + { + if (rhs_adv > 1) + { + // explicit advection + arkode_mem = ARKStepCreate(ComputeRhsAdv, nullptr, time, nv_sol, + *amrex::sundials::The_Sundials_Context()); + } + else + { + // implicit advection + arkode_mem = ARKStepCreate(nullptr, ComputeRhsAdv, time, nv_sol, + *amrex::sundials::The_Sundials_Context()); + } + } + else if (rhs_diff > 0) + { + if (rhs_diff > 1) + { + // explicit diffusion + arkode_mem = ARKStepCreate(ComputeRhsDiff, nullptr, time, nv_sol, + *amrex::sundials::The_Sundials_Context()); + } + else + { + // implicit diffusion + arkode_mem = ARKStepCreate(nullptr, ComputeRhsDiff, time, nv_sol, + *amrex::sundials::The_Sundials_Context()); + } + } + else + { + amrex::Print() << "Invalid RHS options for ARKode" << std::endl; + return; + } + + // Attach the user data structure to ARKStep + ARKStepSetUserData(arkode_mem, prob_data); + + // Set the method order + ARKStepSetOrder(arkode_mem, arkode_order); + + // Set the time step size or integration tolerances + if (fixed_dt > 0.0) + ARKStepSetFixedStep(arkode_mem, fixed_dt); + else + ARKStepSStolerances(arkode_mem, atol, rtol); + + // Set the max number of steps between outputs + ARKStepSetMaxNumSteps(arkode_mem, max_steps); + + // Attach linear solver (if needed) + if (rhs_adv == 1 || rhs_diff == 1) + { + // Create and attach GMRES linear solver for Newton + SUNLinearSolver LS; + if (use_preconditioner) + LS = SUNLinSol_SPGMR(nv_sol, PREC_LEFT, ls_max_iter, + *amrex::sundials::The_Sundials_Context()); + else + LS = SUNLinSol_SPGMR(nv_sol, PREC_NONE, ls_max_iter, + *amrex::sundials::The_Sundials_Context()); + + ier = ARKStepSetLinearSolver(arkode_mem, LS, nullptr); + if (ier != ARKLS_SUCCESS) + { + amrex::Print() << "Creation of linear solver unsuccessful" << std::endl; + return; + } + + if (use_preconditioner) + { + // Attach preconditioner setup/solve functions + ier = ARKStepSetPreconditioner(arkode_mem, precondition_setup, precondition_solve); + if (ier != ARKLS_SUCCESS) + { + amrex::Print() << "Attachment of preconditioner unsuccessful" << std::endl; + return; + } + } + + // Set max number of nonlinear iterations + ier = ARKStepSetMaxNonlinIters(arkode_mem, nls_max_iter); + if (ier != ARK_SUCCESS) + { + amrex::Print() << "Error setting max number of nonlinear iterations" << std::endl; + return; + } + } + + // Advance the solution in time + Real tout = time + dtout; // first output time + Real tret; // return time + for (int iout=0; iout < nout; iout++) + { + ier = ARKStepEvolve(arkode_mem, tout, nv_sol, &tret, ARK_NORMAL); + if (ier < 0) + { + amrex::Print() << "Error in ARKStepEvolve" << std::endl; + return; + } + + // Get integration stats + long nfe_evals, nfi_evals; + ARKStepGetNumRhsEvals(arkode_mem, &nfe_evals, &nfi_evals); + amrex::Print() << "t = " << std::setw(5) << tret + << " explicit evals = " << std::setw(7) << nfe_evals + << " implicit evals = " << std::setw(7) << nfi_evals + << std::endl; + + // Write output + if (plot_int > 0) + { + const std::string& pltfile = amrex::Concatenate("plt", iout+1, 5); + MultiFab* sol = amrex::sundials::N_VGetVectorPointer_MultiFab(nv_sol); + WriteSingleLevelPlotfile(pltfile, *sol, {"u"}, *geom, tret, iout+1); + } + + // Update output time + tout += dtout; + if (tout > tfinal) tout = tfinal; + } + + // Output final solution statistics + amrex::Print() << "\nFinal Solver Statistics:\n" << std::endl; + ARKStepPrintAllStats(arkode_mem, stdout, SUN_OUTPUTFORMAT_TABLE); +} + + +void ParseInputs(ProblemOpt& prob_opt, ProblemData& prob_data) +{ + // ParmParse is way of reading inputs from the inputs file + ParmParse pp; + + // -------------------------------------------------------------------------- + // Problem options + // -------------------------------------------------------------------------- + + // Enable (>0) or disable (<0) writing output files + prob_opt.plot_int = -1; // plots off + pp.query("plot_int", prob_opt.plot_int); + + // Specify the ARKode method order + prob_opt.arkode_order = 4; // 4th order + pp.query("arkode_order", prob_opt.arkode_order); + + // Specify the max number of nonlinear iterations + prob_opt.nls_max_iter = 3; + pp.query("nls_max_iter", prob_opt.nls_max_iter); + + // Specify the max number of linear iterations + prob_opt.ls_max_iter = 5; + pp.query("ls_max_iter", prob_opt.ls_max_iter); + + // Specify RHS functions/splitting + prob_opt.rhs_adv = 2; // explicit advection + prob_opt.rhs_diff = 1; // implicit diffusion + pp.query("rhs_adv", prob_opt.rhs_adv); + pp.query("rhs_diff", prob_opt.rhs_diff); + + // Specify relative and absolute tolerances + prob_opt.rtol = 1.0e-4; + prob_opt.atol = 1.0e-9; + pp.query("rtol", prob_opt.rtol); + pp.query("atol", prob_opt.atol); + + // Specify a fixed time step size + prob_opt.fixed_dt = -1.0; // diabled by default (use adaptive steps) + pp.query("fixed_dt", prob_opt.fixed_dt); + + // Specify final time for integration + prob_opt.tfinal = 1.0e3; + pp.query("tfinal", prob_opt.tfinal); + + // Specify output frequency + prob_opt.dtout = prob_opt.tfinal; + pp.query("dtout", prob_opt.dtout); + + // Specify maximum number of steps between outputs + prob_opt.max_steps = 10000; + pp.query("max_steps", prob_opt.max_steps); + + // Decide whether to use a preconditioner or not + prob_opt.use_preconditioner = 0; + pp.query("use_preconditioner", prob_opt.use_preconditioner); + + // -------------------------------------------------------------------------- + // Problem data + // -------------------------------------------------------------------------- + + // The number of cells on each side of a square domain. + prob_data.n_cell = 128; + pp.query("n_cell", prob_data.n_cell); + + // The domain is broken into boxes of size max_grid_size + prob_data.max_grid_size = 64; + pp.query("max_grid_size", prob_data.max_grid_size); + + // Advection coefficients + prob_data.advCoeffx = 5.0e-4; + prob_data.advCoeffy = 2.5e-4; + pp.query("advCoeffx", prob_data.advCoeffx); + pp.query("advCoeffy", prob_data.advCoeffy); + + // Diffusion coefficients + prob_data.diffCoeffx = 1.0e-6; + prob_data.diffCoeffy = 1.0e-6; + pp.query("diffCoeffx", prob_data.diffCoeffx); + pp.query("diffCoeffy", prob_data.diffCoeffy); + + // MLMG options + ParmParse ppmg("mlmg"); + prob_data.mg_agglomeration = 1; + ppmg.query("agglomeration", prob_data.mg_agglomeration); + prob_data.mg_consolidation = 1; + ppmg.query("consolidation", prob_data.mg_consolidation); + prob_data.mg_max_coarsening_level = 1000; + ppmg.query("max_coarsening_level", prob_data.mg_max_coarsening_level); + prob_data.mg_linop_maxorder = 2; + ppmg.query("linop_maxorder", prob_data.mg_linop_maxorder); + prob_data.mg_max_iter = 1000; + ppmg.query("max_iter", prob_data.mg_max_iter); + prob_data.mg_max_fmg_iter = 1000; + ppmg.query("max_fmg_iter", prob_data.mg_max_fmg_iter); + prob_data.mg_verbose = 0; + ppmg.query("verbose", prob_data.mg_verbose); + prob_data.mg_bottom_verbose = 0; + ppmg.query("bottom_verbose", prob_data.mg_bottom_verbose); + prob_data.mg_use_hypre = 1; + ppmg.query("use_hypre", prob_data.mg_use_hypre); + prob_data.mg_hypre_interface = 3; + ppmg.query("hypre_interface", prob_data.mg_hypre_interface); + prob_data.mg_use_petsc = 0; + ppmg.query("use_petsc", prob_data.mg_use_petsc); + prob_data.mg_tol_rel = 1.0e-6; + ppmg.query("tol_rel", prob_data.mg_tol_rel); + + // Ouput problem options and parameters + amrex::Print() + << "n_cell = " << prob_data.n_cell << std::endl + << "max_grid_size = " << prob_data.max_grid_size << std::endl + << "plot_int = " << prob_opt.plot_int << std::endl + << "arkode_order = " << prob_opt.arkode_order << std::endl + << "rhs_adv = " << prob_opt.rhs_adv << std::endl + << "rhs_diff = " << prob_opt.rhs_diff << std::endl; + if (prob_opt.fixed_dt > 0.0) + amrex::Print() + << "fixed_dt = " << prob_opt.fixed_dt << std::endl; + else + amrex::Print() + << "rtol = " << prob_opt.rtol << std::endl + << "atol = " << prob_opt.atol << std::endl; + amrex::Print() + << "tfinal = " << prob_opt.tfinal << std::endl + << "dtout = " << prob_opt.dtout << std::endl; + if (prob_opt.rhs_adv > 0) + amrex::Print() + << "advCoeffx = " << prob_data.advCoeffx << std::endl + << "advCoeffy = " << prob_data.advCoeffy << std::endl; + if (prob_opt.rhs_diff > 0) + amrex::Print() + << "diffCoeffx = " << prob_data.diffCoeffx << std::endl + << "diffCoeffy = " << prob_data.diffCoeffy << std::endl; + if ((prob_opt.rhs_adv > 0) && (prob_opt.rhs_diff > 0) && + (prob_opt.rhs_adv != prob_opt.rhs_diff)) + if (prob_opt.rhs_diff > 1) + amrex::Print() << "ImEx treatment: implicit advection and explicit diffusion" << std::endl; + else + amrex::Print() << "ImEx treatment: implicit diffusion and explicit advection" << std::endl; + if (prob_opt.use_preconditioner) + amrex::Print() + << "preconditioning enabled" << std::endl + << " mlmg.agglomeration = " << prob_data.mg_agglomeration << std::endl + << " mlmg.consolidation = " << prob_data.mg_consolidation << std::endl + << " mlmg.max_coarsening_level = " << prob_data.mg_max_coarsening_level << std::endl + << " mlmg.linop_maxorder = " << prob_data.mg_linop_maxorder << std::endl + << " mlmg.max_iter = " << prob_data.mg_max_iter << std::endl + << " mlmg.max_fmg_iter = " << prob_data.mg_max_fmg_iter << std::endl + << " mlmg.verbose = " << prob_data.mg_verbose << std::endl + << " mlmg.bottom_verbose = " << prob_data.mg_bottom_verbose << std::endl + << " mlmg.use_hypre = " << prob_data.mg_use_hypre << std::endl + << " mlmg.hypre_interface = " << prob_data.mg_hypre_interface << std::endl + << " mlmg.use_petsc = " << prob_data.mg_use_petsc << std::endl + << " mlmg.tol_rel = " << prob_data.mg_tol_rel << std::endl; +} + + +int main(int argc, char* argv[]) +{ + amrex::Initialize(argc,argv); + + DoProblem(); + + amrex::Finalize(); + return 0; +} + + +void DoProblem() +{ + // What time is it now? We'll use this to compute total run time. + Real strt_time = amrex::second(); + + // Set problem data and options + ProblemData prob_data; + ProblemOpt prob_opt; + ParseInputs(prob_opt, prob_data); + + // Make BoxArray and Geometry + BoxArray ba; + Geometry geom; + SetUpGeometry(ba, geom, prob_data); + + // How Boxes are distrubuted among MPI processes + DistributionMapping dm(ba); + prob_data.dmap = &dm; + + // Allocate the solution MultiFab + int nGhost = 1; // number of ghost cells for each array + int nComp = 1; // number of components for each array + MultiFab sol(ba, dm, nComp, nGhost); + + // Allocate the linear solver coefficient MultiFabs + MultiFab acoef(ba, dm, nComp, nGhost); + MultiFab bcoef(ba, dm, nComp, nGhost); + acoef = 1.0; + bcoef = 1.0; + prob_data.acoef = &acoef; + prob_data.bcoef = &bcoef; + + // Build the flux MultiFabs + Array flux; + for (int dir = 0; dir < AMREX_SPACEDIM; dir++) + { + // flux(dir) has one component, zero ghost cells, and is nodal in + // direction dir + BoxArray edge_ba = ba; + edge_ba.surroundingNodes(dir); + flux[dir].define(edge_ba, dm, 1, 0); + } + prob_data.flux = &flux; + + // Create an N_Vector wrapper for the solution MultiFab + sunindextype length = nComp * prob_data.n_cell * prob_data.n_cell; + N_Vector nv_sol = amrex::sundials::N_VMake_MultiFab(length, &sol); + + // Set the initial condition + FillInitConds2D(sol, geom); + + // Integrate in time + ComputeSolutionARK(nv_sol, &prob_opt, &prob_data); + + // Call the timer again and compute the maximum difference between the start + // time and stop time over all processors + Real stop_time = amrex::second() - strt_time; + const int IOProc = ParallelDescriptor::IOProcessorNumber(); + ParallelDescriptor::ReduceRealMax(stop_time, IOProc); + + // Tell the I/O Processor to write out the "run time" + amrex::Print() << "Run time = " << stop_time << std::endl; +} + + +void FillInitConds2D(MultiFab& sol, const Geometry& geom) +{ + const auto dx = geom.CellSize(); + const auto prob_lo = geom.ProbLo(); + const auto prob_hi = geom.ProbHi(); + + Real sigma = 0.1; + Real a = 1.0/(sigma*sqrt(2*M_PI)); + Real b = -0.5/(sigma*sigma); + + for (MFIter mfi(sol,TilingIfNotGPU()); mfi.isValid(); ++mfi) + { + const Box& bx = mfi.tilebox(); + Array4 const& fab = sol[mfi].array(); + + amrex::ParallelFor + (bx, 1, [=] AMREX_GPU_DEVICE(int i, int j, int k, int n) + { + Real y = prob_lo[1] + (((Real) j) + 0.5) * dx[1]; + Real x = prob_lo[0] + (((Real) i) + 0.5) * dx[0]; + Real r = x * x + y * y; + fab(i,j,k,n) = a * exp(b * r); + }); + } +} + +void SetUpGeometry(BoxArray& ba, Geometry& geom, ProblemData& prob_data) +{ + // Extract problem options + int n_cell = prob_data.n_cell; + int max_grid_size = prob_data.max_grid_size; + + IntVect dom_lo(AMREX_D_DECL( 0, 0, 0)); + IntVect dom_hi(AMREX_D_DECL(n_cell-1, n_cell-1, n_cell-1)); + Box domain(dom_lo, dom_hi); // cell-centered + + // Initialize the boxarray "ba" from the single box "domain" + ba.define(domain); + + // Break up boxarray "ba" into chunks no larger than "max_grid_size" along a + // direction + ba.maxSize(max_grid_size); + + // This defines the physical box, [-1,1] in each direction. + RealBox real_box({AMREX_D_DECL(-1.0, -1.0, -1.0)}, + {AMREX_D_DECL(1.0, 1.0, 1.0)}); + + // This defines a Geometry object + Vector is_periodic(AMREX_SPACEDIM, 1); // periodic in all direction + geom.define(domain, &real_box, CoordSys::cartesian, is_periodic.data()); + + prob_data.geom = &geom; + prob_data.grid = &ba; +} + + +/* --------------------------------------------------------------------------- + * SUNDIALS RHS functions + * ---------------------------------------------------------------------------*/ + +int ComputeRhsAdv(Real t, N_Vector nv_sol, N_Vector nv_rhs, void* data) +{ + // extract MultiFabs + MultiFab* sol = amrex::sundials::N_VGetVectorPointer_MultiFab(nv_sol); + MultiFab* rhs = amrex::sundials::N_VGetVectorPointer_MultiFab(nv_rhs); + + // extract problem data + ProblemData* prob_data = (ProblemData*) data; + Geometry* geom = prob_data->geom; + Real advCoeffx = prob_data->advCoeffx; + Real advCoeffy = prob_data->advCoeffy; + + // clear the RHS + *rhs = 0.0; + + // fill ghost cells + sol->FillBoundary(geom->periodicity()); + + // compute advection + ComputeAdvectionUpwind(*sol, *rhs, *geom, advCoeffx, advCoeffy); + + return 0; +} + +int ComputeRhsDiff(Real t, N_Vector nv_sol, N_Vector nv_rhs, void* data) +{ + // extract MultiFabs + MultiFab* sol = amrex::sundials::N_VGetVectorPointer_MultiFab(nv_sol); + MultiFab* rhs = amrex::sundials::N_VGetVectorPointer_MultiFab(nv_rhs); + + // extract problem data + ProblemData *prob_data = (ProblemData*) data; + Geometry* geom = prob_data->geom; + Array& flux = *(prob_data->flux); + Real diffCoeffx = prob_data->diffCoeffx; + Real diffCoeffy = prob_data->diffCoeffy; + + // fill ghost cells + sol->FillBoundary(geom->periodicity()); + + // clear the RHS + *rhs = 0.0; + + // compute diffusion + ComputeDiffusion(*sol, *rhs, flux[0], flux[1], *geom, + diffCoeffx, diffCoeffy); + + return 0; +} + +int ComputeRhsAdvDiff(Real t, N_Vector nv_sol, N_Vector nv_rhs, void* data) +{ + // extract MultiFabs + MultiFab* sol = amrex::sundials::N_VGetVectorPointer_MultiFab(nv_sol); + MultiFab* rhs = amrex::sundials::N_VGetVectorPointer_MultiFab(nv_rhs); + + // extract problem data + ProblemData* prob_data = (ProblemData*) data; + Geometry* geom = prob_data->geom; + Array& flux = *(prob_data->flux); + Real advCoeffx = prob_data->advCoeffx; + Real advCoeffy = prob_data->advCoeffy; + Real diffCoeffx = prob_data->diffCoeffx; + Real diffCoeffy = prob_data->diffCoeffy; + + // clear the RHS + *rhs = 0.0; + + // fill ghost cells + sol->FillBoundary(geom->periodicity()); + + // compute advection + ComputeAdvectionUpwind(*sol, *rhs, *geom, advCoeffx, advCoeffy); + + // compute diffusion + ComputeDiffusion(*sol, *rhs, flux[0], flux[1], *geom, + diffCoeffx, diffCoeffy); + + return 0; +} + +/* --------------------------------------------------------------------------- + * Advection RHS functions + * ---------------------------------------------------------------------------*/ + +// Assumes ghost cells already filled +// Adds result to adv_mf MultiFab +void ComputeAdvectionUpwind(MultiFab& sol_mf, MultiFab& adv_mf, Geometry& geom, + Real advCoeffx, Real advCoeffy) +{ + const auto dx = geom.CellSize(); + Real dxInv = 1.0 / dx[0]; // assume same over entire mesh + Real dyInv = 1.0 / dx[1]; // assume same over entire mesh + Real sideCoeffx = advCoeffx * dxInv; + Real sideCoeffy = advCoeffy * dyInv; + + for (MFIter mfi(sol_mf,TilingIfNotGPU); mfi.isValid(); ++mfi) + { + const Box& bx = mfi.tilebox(); + Array4 const& sol_fab = sol_mf[mfi].array(); + Array4 const& adv_fab = adv_mf[mfi].array(); + + // x-direction + if (advCoeffx > 0) + { + amrex::ParallelFor + (bx, 1, [=] AMREX_GPU_DEVICE(int i, int j, int k, int n) + { + adv_fab(i,j,k,n) -= sideCoeffx * + (sol_fab(i,j,k,n) - sol_fab(i-1,j,k,n)); + }); + } + else + { + amrex::ParallelFor + (bx, 1, [=] AMREX_GPU_DEVICE(int i, int j, int k, int n) + { + adv_fab(i,j,k,n) -= sideCoeffx * + (sol_fab(i+1,j,k,n) - sol_fab(i,j,k,n)); + }); + } + + // y-direction + if (advCoeffy > 0) + { + amrex::ParallelFor + (bx, 1, [=] AMREX_GPU_DEVICE(int i, int j, int k, int n) + { + adv_fab(i,j,k,n) -= sideCoeffy * + (sol_fab(i,j,k,n) - sol_fab(i,j-1,k,n)); + }); + } + else + { + amrex::ParallelFor + (bx, 1, [=] AMREX_GPU_DEVICE(int i, int j, int k, int n) + { + adv_fab(i,j,k,n) -= sideCoeffy * + (sol_fab(i,j+1,k,n) - sol_fab(i,j,k,n)); + }); + } + } +} + +/* --------------------------------------------------------------------------- + * Diffusion RHS functions + * ---------------------------------------------------------------------------*/ + +// Assumes ghots cells are already filled +// Adds result to diff_mf +void ComputeDiffusion(MultiFab& sol, MultiFab& diff_mf, MultiFab& fx_mf, + MultiFab& fy_mf, Geometry& geom, + Real diffCoeffx, Real diffCoeffy) +{ + ComputeDiffFlux(sol, fx_mf, fy_mf, geom, diffCoeffx, diffCoeffy); + ComputeDivergence(diff_mf, fx_mf, fy_mf, geom); +} + +// Assumes ghost cells already filled +// Overwrites fx_mf and fy_mf MultiFabs +void ComputeDiffFlux(MultiFab& sol_mf, MultiFab& fx_mf, MultiFab& fy_mf, + Geometry& geom, Real diffCoeffx, Real diffCoeffy) +{ + const auto dx = geom.CellSize(); + Real dxInv = 1.0 / dx[0]; // assume same over entire mesh + Real dyInv = 1.0 / dx[1]; // assume same over entire mesh + Real coeffX = diffCoeffx * dxInv; + Real coeffY = diffCoeffy * dyInv; + + for (MFIter mfi(sol_mf,TilingIfNotGPU()); mfi.isValid(); ++mfi) + { + const Box& bx = mfi.tilebox(); + Array4 const& sol = sol_mf[mfi].array(); + Array4 const& fx = fx_mf[mfi].array(); + Array4 const& fy = fy_mf[mfi].array(); + + // x-flux + amrex::ParallelFor + (bx, 1, [=] AMREX_GPU_DEVICE(int i, int j, int k, int n) + { + // always use zero component for flux + fx(i,j,k,0) = coeffX * (sol(i,j,k,n) - sol(i-1,j,k,n)); + }); + + // y-flux + amrex::ParallelFor + (bx, 1, [=] AMREX_GPU_DEVICE(int i, int j, int k, int n) + { + // always use zero component for flux + fy(i,j,k,0) = coeffY * (sol(i,j,k,n) - sol(i,j-1,k,n)); + }); + } +} + +// Assumes ghost cells already filled +// Adds result to div_mf MultiFab +void ComputeDivergence(MultiFab& div_mf, MultiFab& fx_mf, + MultiFab& fy_mf, Geometry& geom) +{ + const auto dx = geom.CellSize(); + Real dxInv = 1.0 / dx[0]; // assume same over entire mesh + Real dyInv = 1.0 / dx[1]; // assume same over entire mesh + + for (MFIter mfi(div_mf,TilingIfNotGPU()); mfi.isValid(); ++mfi) + { + const Box& bx = mfi.tilebox(); + Array4 const& div = div_mf[mfi].array(); + Array4 const& fx = fx_mf[mfi].array(); + Array4 const& fy = fy_mf[mfi].array(); + + amrex::ParallelFor + (bx, 1, [=] AMREX_GPU_DEVICE(int i, int j, int k, int n) + { + // always use zero component for flux + div(i,j,k,n) += (dxInv * (fx(i+1,j,k,0) - fx(i,j,k,0)) + + dyInv * (fy(i,j+1,k,0) - fy(i,j,k,0))); + }); + } +} + +/* --------------------------------------------------------------------------- + * Preconditioning routines + * ---------------------------------------------------------------------------*/ + +int precondition_setup(realtype tn, N_Vector u, N_Vector fu, + booleantype jok, booleantype *jcurPtr, + realtype gamma, void *user_data) +{ + return 0; +} + +int precondition_solve(realtype tn, N_Vector u, N_Vector fu, + N_Vector r, N_Vector z, + realtype gamma, realtype delta, + int lr, void *user_data) +{ + ProblemData *prob_data = (ProblemData*) user_data; + + auto geom = *(prob_data->geom); + auto grid = *(prob_data->grid); + auto dmap = *(prob_data->dmap); + auto& acoef = *(prob_data->acoef); + auto& bcoef = *(prob_data->acoef); + + MultiFab* solution = amrex::sundials::N_VGetVectorPointer_MultiFab(z); + MultiFab* rhs = amrex::sundials::N_VGetVectorPointer_MultiFab(r); + + LPInfo info; + info.setAgglomeration(prob_data->mg_agglomeration); + info.setConsolidation(prob_data->mg_consolidation); + info.setMaxCoarseningLevel(prob_data->mg_max_coarsening_level); + + const Real tol_abs = 0.0; + const Real ascalar = 1.0; + const Real bscalar = gamma; + + MLABecLaplacian mlabec({geom}, {grid}, {dmap}, info); + + mlabec.setMaxOrder(prob_data->mg_linop_maxorder); + + // Set periodic BC + mlabec.setDomainBC({AMREX_D_DECL(LinOpBCType::Periodic, + LinOpBCType::Periodic, + LinOpBCType::Periodic)}, + {AMREX_D_DECL(LinOpBCType::Periodic, + LinOpBCType::Periodic, + LinOpBCType::Periodic)}); + + mlabec.setLevelBC(0, nullptr); + + mlabec.setScalars(ascalar, bscalar); + + mlabec.setACoeffs(0, acoef); + + Array face_bcoef; + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) + { + const BoxArray& ba = amrex::convert(bcoef.boxArray(), + IntVect::TheDimensionVector(idim)); + face_bcoef[idim].define(ba, bcoef.DistributionMap(), 1, 0); + + switch (idim) + { + case 0: + face_bcoef[idim] = prob_data->diffCoeffx; + case 1: + face_bcoef[idim] = prob_data->diffCoeffy; + } + } + + mlabec.setBCoeffs(0, amrex::GetArrOfConstPtrs(face_bcoef)); + + MLMG mlmg(mlabec); + mlmg.setMaxIter(prob_data->mg_max_iter); + mlmg.setMaxFmgIter(prob_data->mg_max_fmg_iter); + mlmg.setVerbose(prob_data->mg_verbose); + mlmg.setBottomVerbose(prob_data->mg_bottom_verbose); +#ifdef AMREX_USE_HYPRE + if (prob_data->mg_use_hypre) + { + mlmg.setBottomSolver(MLMG::BottomSolver::hypre); + if (prob_data->mg_hypre_interface == 1) + mlmg.setHypreInterface(amrex::Hypre::Interface::structed); + else if (prob_data->mg_hypre_interface == 2) + mlmg.setHypreInterface(amrex::Hypre::Interface::semi_structed); + else + mlmg.setHypreInterface(amrex::Hypre::Interface::ij); + } +#endif +#ifdef AMREX_USE_PETSC + if (prob_data->mg_use_petsc) + { + mlmg.setBottomSolver(MLMG::BottomSolver::petsc); + } +#endif + + mlmg.solve({solution}, {rhs}, prob_data->mg_tol_rel, tol_abs); + + return 0; +} diff --git a/amrex/sundials/amrex_sundials_advection_diffusion.h b/amrex/sundials/amrex_sundials_advection_diffusion.h new file mode 100644 index 0000000..1d3f6c5 --- /dev/null +++ b/amrex/sundials/amrex_sundials_advection_diffusion.h @@ -0,0 +1,143 @@ +/* ----------------------------------------------------------------------------- + * AMReX + SUNDIALS xSDK 2D Advection-Diffusion example code + * + * Based on Hands-on Lessons with SUNDIALS + AMReX from the Argonne Training + * Program in Extreme-Scale Computing (ATPESC) written by (alphabetical): + * David Gardner (gardner48@llnl.gov) + * John Loffeld (loffeld1@llnl.gov) + * Daniel Reynolds (reynolds@smu.edu) + * Donald Willcox (dewillcox@lbl.gov) + * ---------------------------------------------------------------------------*/ + +#ifndef ADVECTION_DIFFUSION_H +#define ADVECTION_DIFFUSION_H + +#include +#include +#include +#include +#include +#include +#include + +// user-data structure for problem options +struct ProblemOpt +{ + int plot_int; + int arkode_order; + int nls_method; + int nls_max_iter; + int nls_fp_acc; + int ls_max_iter; + int rhs_adv; + int rhs_diff; + amrex::Real rtol; + amrex::Real atol; + amrex::Real fixed_dt; + amrex::Real tfinal; + amrex::Real dtout; + int max_steps; + int write_diag; + int use_preconditioner; +}; + +// user-data structure passed through SUNDIALS to RHS functions +struct ProblemData +{ + // Requested grid options + int n_cell; + int max_grid_size; + + // AMReX grid data structures + amrex::Geometry* geom; + amrex::BoxArray* grid; + amrex::DistributionMapping* dmap; + + // AMReX MLMG data and parameters + amrex::MultiFab* acoef; + amrex::MultiFab* bcoef; + + int mg_agglomeration; + int mg_consolidation; + int mg_max_coarsening_level; + int mg_linop_maxorder; + int mg_max_iter; + int mg_max_fmg_iter; + int mg_verbose; + int mg_bottom_verbose; + int mg_use_hypre; + int mg_hypre_interface; + int mg_use_petsc; + amrex::Real mg_tol_rel; + + // Problem data + amrex::Real advCoeffx; + amrex::Real advCoeffy; + amrex::Real diffCoeffx; + amrex::Real diffCoeffy; + amrex::Array* flux; +}; + +// Run problem +void DoProblem(); + +// Parse the problem input file +void ParseInputs(ProblemOpt& prob_opt, ProblemData& prob_data); + +// Advance the solution in time with ARKode ARKStep +void ComputeSolutionARK(N_Vector nv_sol, ProblemOpt* prob_opt, + ProblemData* prob_data); + +// Set the ODE initial condition +void FillInitConds2D(amrex::MultiFab& sol, const amrex::Geometry& geom); + +// Decompose the problem in space +void SetUpGeometry(amrex::BoxArray& ba, amrex::Geometry& geom, + ProblemData& prob_data); + +// SUNDIALS ODE RHS functions +int ComputeRhsAdv(amrex::Real t, N_Vector nv_sol, N_Vector nv_rhs, void* data); +int ComputeRhsDiff(amrex::Real t, N_Vector nv_sol, N_Vector nv_rhs, void* data); +int ComputeRhsAdvDiff(amrex::Real t, N_Vector nv_sol, N_Vector nv_rhs, + void* data); + +// Advective portion of ODE RHS +void ComputeAdvectionUpwind(amrex::MultiFab& sol, + amrex::MultiFab& advection, + amrex::Geometry& geom, + amrex::Real advCoeffx, + amrex::Real advCoeffy); + +// Diffusive portion of ODE RHS +void ComputeDiffusion(amrex::MultiFab& sol, + amrex::MultiFab& diff_mf, + amrex::MultiFab& fx_mf, + amrex::MultiFab& fy_mf, + amrex::Geometry& geom, + amrex::Real diffCoeffx, + amrex::Real diffCoeffy); + +// Utility functions for computing diffusion +void ComputeDiffFlux(amrex::MultiFab& sol, + amrex::MultiFab& fx, + amrex::MultiFab& fy, + amrex::Geometry& geom, + amrex::Real diffCoeffx, + amrex::Real diffCoeffy); + +void ComputeDivergence(amrex::MultiFab& div, + amrex::MultiFab& fx, + amrex::MultiFab& fy, + amrex::Geometry& geom); + +// Preconditioner routines +int precondition_setup(realtype tn, N_Vector u, N_Vector fu, + booleantype jok, booleantype *jcurPtr, + realtype gamma, void *user_data); + +int precondition_solve(realtype tn, N_Vector u, N_Vector fu, + N_Vector r, N_Vector z, + realtype gamma, realtype delta, + int lr, void *user_data); + +#endif diff --git a/cmake/CorrectWindowsPaths.cmake b/cmake/CorrectWindowsPaths.cmake deleted file mode 100644 index 5058282..0000000 --- a/cmake/CorrectWindowsPaths.cmake +++ /dev/null @@ -1,13 +0,0 @@ -# CorrectWindowsPaths - this module defines one macro -# -# CONVERT_CYGWIN_PATH( PATH ) -# This uses the command cygpath (provided by cygwin) to convert -# unix-style paths into paths useable by cmake on windows - -macro (CONVERT_CYGWIN_PATH _path) - if (WIN32) - EXECUTE_PROCESS(COMMAND cygpath.exe -m ${${_path}} - OUTPUT_VARIABLE ${_path}) - string (STRIP ${${_path}} ${_path}) - endif (WIN32) -endmacro (CONVERT_CYGWIN_PATH) diff --git a/cmake/FindAMReX.cmake b/cmake/FindAMReX.cmake new file mode 100644 index 0000000..8979a5e --- /dev/null +++ b/cmake/FindAMReX.cmake @@ -0,0 +1,9 @@ +find_package(AMReX REQUIRED COMPONENTS + HINTS ${AMREX_DIR} $ENV{AMREX_DIR} ${CMAKE_PREFIX_PATH} + NO_DEFAULT_PATH) + +if(NOT TARGET XSDK::AMReX) + add_library(XSDK_AMREX INTERFACE) + target_link_libraries(XSDK_AMREX INTERFACE AMReX::amrex) + add_library(XSDK::AMReX ALIAS XSDK_AMREX) +endif() diff --git a/cmake/FindBLASPP.cmake b/cmake/FindBLASPP.cmake new file mode 100644 index 0000000..781ce22 --- /dev/null +++ b/cmake/FindBLASPP.cmake @@ -0,0 +1,41 @@ +# find the BLAS++ include path +find_path(BLASPP_INCLUDE_DIR blas.hh + NAMES blas.hh + HINTS ${BLASPP_DIR} $ENV{BLASPP_DIR} ${CMAKE_PREFIX_PATH} + PATH_SUFFIXES include + NO_DEFAULT_PATH + DOC "Directory with BLASPP header" +) + +# find the main BLAS++ library +find_library(BLASPP_LIBRARIES + NAMES blaspp + HINTS ${BLASPP_DIR} $ENV{BLASPP_DIR} ${CMAKE_PREFIX_PATH} + PATH_SUFFIXES lib lib64 + NO_DEFAULT_PATH + DOC "The BLAS++ library." +) + +find_package_handle_standard_args(BLASPP + REQUIRED_VARS + BLASPP_LIBRARIES + BLASPP_INCLUDE_DIR + VERSION_VAR + BLASPP_VERSION +) + +# Create target for BLAS++ +if(BLASPP_FOUND) + + if(NOT TARGET XSDK::BLASPP) + add_library(XSDK::BLASPP INTERFACE IMPORTED) + endif() + + message(STATUS "Created XSDK::BLASPP target") + message(STATUS " INTERFACE_INCLUDE_DIRECTORIES: ${BLASPP_INCLUDE_DIR}") + message(STATUS " INTERFACE_LINK_LIBRARIES: ${BLASPP_LIBRARIES}") + + set_target_properties(XSDK::BLASPP PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${BLASPP_INCLUDE_DIR}" + INTERFACE_LINK_LIBRARIES "${BLASPP_LIBRARIES}") +endif() diff --git a/cmake/FindHYPRE.cmake b/cmake/FindHYPRE.cmake index 60476a5..f0a77b1 100644 --- a/cmake/FindHYPRE.cmake +++ b/cmake/FindHYPRE.cmake @@ -25,7 +25,7 @@ ### find include dir find_path(HYPRE_INCLUDE_DIR NAMES HYPRE.h hypre.h - HINTS ${HYPRE_DIR} $ENV{HYPRE_DIR} + HINTS ${HYPRE_DIR} $ENV{HYPRE_DIR} ${CMAKE_PREFIX_PATH} PATH_SUFFIXES include NO_DEFAULT_PATH DOC "Directory with hypre header.") @@ -33,7 +33,7 @@ find_path(HYPRE_INCLUDE_DIR ### find library find_library(HYPRE_LIBRARY NAMES HYPRE hypre - HINTS ${HYPRE_DIR} $ENV{HYPRE_DIR} + HINTS ${HYPRE_DIR} $ENV{HYPRE_DIR} ${CMAKE_PREFIX_PATH} PATH_SUFFIXES lib lib64 NO_DEFAULT_PATH DOC "The hypre library.") @@ -63,4 +63,8 @@ if(HYPRE_FOUND) INTERFACE_LINK_LIBRARIES "${HYPRE_LIBRARIES}" IMPORTED_LOCATION "${HYPRE_LIBRARY}") + if(ENABLE_HIP) + target_link_libraries(XSDK::HYPRE INTERFACE roc::rocsparse roc::rocrand) + endif() + endif() diff --git a/cmake/FindLAPACKPP.cmake b/cmake/FindLAPACKPP.cmake new file mode 100644 index 0000000..09c0f64 --- /dev/null +++ b/cmake/FindLAPACKPP.cmake @@ -0,0 +1,42 @@ +# find the LAPACK++ include path +find_path(LAPACKPP_INCLUDE_DIR lapack.hh + NAMES lapack.hh + HINTS ${LAPACKPP_DIR} $ENV{LAPACKPP_DIR} ${CMAKE_PREFIX_PATH} + PATH_SUFFIXES include + NO_DEFAULT_PATH + DOC "Directory with LAPACKPP header" +) + +# find the main LAPACK++ library +find_library(LAPACKPP_LIBRARIES + NAMES lapackpp + HINTS ${LAPACKPP_DIR} $ENV{LAPACKPP_DIR} ${CMAKE_PREFIX_PATH} + PATH_SUFFIXES lib lib64 + NO_DEFAULT_PATH + DOC "The LAPACK++ library." +) + +find_package_handle_standard_args(LAPACKPP + REQUIRED_VARS + LAPACKPP_LIBRARIES + LAPACKPP_INCLUDE_DIR + VERSION_VAR + LAPACKPP_VERSION +) + +# Create target for LAPACK++ +if(LAPACKPP_FOUND) + + if(NOT TARGET XSDK::LAPACKPP) + add_library(XSDK::LAPACKPP INTERFACE IMPORTED) + endif() + + message(STATUS "Created XSDK::LAPACKPP target") + message(STATUS " INTERFACE_INCLUDE_DIRECTORIES: ${LAPACKPP_INCLUDE_DIR}") + message(STATUS " INTERFACE_LINK_LIBRARIES: ${LAPACKPP_LIBRARIES}") + + set_target_properties(XSDK::LAPACKPP PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${LAPACKPP_INCLUDE_DIR}" + INTERFACE_LINK_LIBRARIES "${LAPACKPP_LIBRARIES}") + +endif() diff --git a/cmake/FindMAGMA.cmake b/cmake/FindMAGMA.cmake index 0cf6512..dd867cc 100644 --- a/cmake/FindMAGMA.cmake +++ b/cmake/FindMAGMA.cmake @@ -14,7 +14,7 @@ # find the MAGMA include path find_path(MAGMA_INCLUDE_DIR magma_v2.h NAMES magma_v2.h - HINTS ${MAGMA_DIR} $ENV{MAGMA_DIR} + HINTS ${MAGMA_DIR} $ENV{MAGMA_DIR} ${CMAKE_PREFIX_PATH} PATH_SUFFIXES include NO_DEFAULT_PATH DOC "Directory with MAGMA header" @@ -23,7 +23,7 @@ find_path(MAGMA_INCLUDE_DIR magma_v2.h # find the main MAGMA library find_library(MAGMA_LIBRARY NAMES magma - HINTS ${MAGMA_DIR} $ENV{MAGMA_DIR} + HINTS ${MAGMA_DIR} $ENV{MAGMA_DIR} ${CMAKE_PREFIX_PATH} PATH_SUFFIXES lib lib64 NO_DEFAULT_PATH DOC "The MAGMA library.") @@ -33,7 +33,7 @@ if("SPARSE" IN_LIST MAGMA_FIND_COMPONENTS) set(_sparse_required MAGMA_SPARSE_LIBRARY) find_library(MAGMA_SPARSE_LIBRARY NAMES magma_sparse - HINTS ${MAGMA_DIR} $ENV{MAGMA_DIR} + HINTS ${MAGMA_DIR} $ENV{MAGMA_DIR} ${CMAKE_PREFIX_PATH} PATH_SUFFIXES lib lib64 NO_DEFAULT_PATH DOC "The MAGMA sparse library.") @@ -63,7 +63,7 @@ if(MAGMA_LIBRARY AND MAGMA_INCLUDE_DIR) set(_interface_libraires ) foreach(lib ${_libraries_list}) if(NOT (lib STREQUAL "-lmagma" OR lib STREQUAL "-lmagma_sparse" OR lib STREQUAL "-L\${libdir}" OR lib STREQUAL "") ) - string(REPLACE "-l" "" lib ${lib}) + string(REGEX REPLACE "^-l" "" lib ${lib}) list(APPEND _interface_libraires ${lib}) endif() endforeach() diff --git a/cmake/FindMETIS.cmake b/cmake/FindMETIS.cmake index 614fe87..a85ffe2 100644 --- a/cmake/FindMETIS.cmake +++ b/cmake/FindMETIS.cmake @@ -1,18 +1,18 @@ find_path(METIS_INCLUDE_DIRS metis.h - HINTS ${METIS_DIR} $ENV{METIS_DIR} + HINTS ${METIS_DIR} $ENV{METIS_DIR} ${CMAKE_PREFIX_PATH} PATH_SUFFIXES include NO_DEFAULT_PATH DOC "The directory with the Metis header file.") find_library(METIS_LIBRARY metis - HINTS ${METIS_DIR} $ENV{METIS_DIR} + HINTS ${METIS_DIR} $ENV{METIS_DIR} ${CMAKE_PREFIX_PATH} PATH_SUFFIXES lib NO_DEFAULT_PATH DOC "The Metis library.") find_library(ZLIB_LIBRARY z - HINTS ${ZLIB_LIBRARY_DIR} $ENV{ZLIB_LIBRARY_DIR} + HINTS ${ZLIB_LIBRARY_DIR} $ENV{ZLIB_LIBRARY_DIR} ${CMAKE_PREFIX_PATH} PATH_SUFFIXES lib NO_DEFAULT_PATH DOC "The zlib library.") @@ -21,7 +21,6 @@ find_library(ZLIB_LIBRARY z find_package_handle_standard_args(METIS REQUIRED_VARS METIS_LIBRARY - METIS_INCLUDE_DIRS ZLIB_LIBRARY ) diff --git a/cmake/FindMFEM.cmake b/cmake/FindMFEM.cmake index 41ad19b..33319ec 100644 --- a/cmake/FindMFEM.cmake +++ b/cmake/FindMFEM.cmake @@ -4,20 +4,20 @@ find_package(METIS REQUIRED) # Find MFEM # Find the MFEM header files find_path(MFEM_INCLUDE_DIRS mfem.hpp - HINTS ${MFEM_DIR} $ENV{MFEM_DIR} + HINTS ${MFEM_DIR} $ENV{MFEM_DIR} ${CMAKE_PREFIX_PATH} PATH_SUFFIXES include NO_DEFAULT_PATH DOC "Directory with MFEM header.") # Find the MFEM library find_library(MFEM_LIBRARY mfem - HINTS ${MFEM_DIR} $ENV{MFEM_DIR} + HINTS ${MFEM_DIR} $ENV{MFEM_DIR} ${CMAKE_PREFIX_PATH} PATH_SUFFIXES lib NO_DEFAULT_PATH DOC "The MFEM library.") find_library(ZLIB_LIBRARY z - HINTS ${ZLIB_LIBRARY_DIR} $ENV{ZLIB_LIBRARY_DIR} + HINTS ${ZLIB_LIBRARY_DIR} $ENV{ZLIB_LIBRARY_DIR} ${CMAKE_PREFIX_PATH} PATH_SUFFIXES lib NO_DEFAULT_PATH DOC "The zlib library.") @@ -41,4 +41,11 @@ if(MFEM_FOUND) INTERFACE_LINK_LIBRARIES "${METIS_LIBRARY}" IMPORTED_LOCATION "${MFEM_LIBRARY}") + if(ENABLE_CUDA) + target_link_libraries(XSDK::MFEM INTERFACE CUDA::cudart CUDA::cusparse) + endif() + if(ENABLE_HIP) + target_link_libraries(XSDK::MFEM INTERFACE hip::amdhip64 roc::hipsparse) + endif() + endif() diff --git a/cmake/FindPETSC.cmake b/cmake/FindPETSC.cmake deleted file mode 100644 index 99db85e..0000000 --- a/cmake/FindPETSC.cmake +++ /dev/null @@ -1,777 +0,0 @@ -# ------------------------------------------------------------------------------ -# Programmer(s): Cody J. Balos and David J. Gardner @ LLNL -# ------------------------------------------------------------------------------ -# Based on the FindPETSC module by Jed Brown. -# ------------------------------------------------------------------------------ -# SUNDIALS Copyright Start -# Copyright (c) 2002-2020, Lawrence Livermore National Security -# and Southern Methodist University. -# All rights reserved. -# -# See the top-level LICENSE and NOTICE files for details. -# -# SPDX-License-Identifier: BSD-3-Clause -# SUNDIALS Copyright End -# ------------------------------------------------------------------------------ -# Copyright Jed Brown -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in -# the documentation and/or other materials provided with the -# distribution. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS -# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR -# TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE -# USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# ------------------------------------------------------------------------------ -# Try to find PETSC. This has three usage modes. -# -# The first usage mode is to find PETSC by introspection. -# This case is triggered when PETSC_DIR is not set by the user. -# Setting the variables below change the behavior of the search in this mode: -# PETSC_DIR - directory in which PETSC resides -# PETSC_ARCH - build architecture -# PETSC_CURRENT - (advanced) redo the find stage and executable tests -# PETSC_WORKS - (advanced) set to ON to ignore the output of the -# executable tests (not recommended) -# -# The second usage mode is to find PETSC based on the user-provided -# PETSC_DIR, and optionally PETSC_ARCH, variables. This case is triggered -# when just PETSC_DIR, and optionally PETSC_ARCH, are set by the user. -# Setting the variables below change the behavior of the search in this mode: -# PETSC_DIR - directory in which PETSC resides -# PETSC_ARCH - build architecture -# PETSC_CURRENT - (advanced) redo the find stage and executable tests -# PETSC_WORKS - (advanced) set to ON to ignore the output of the -# executable tests (not recommended) -# -# The third usage mode is to 'find' PETSC based on the user-provided list -# of include directories and libraries. This mode will only use the includes -# and libraries provided in the PETSC_INCLUDES and PETSC_LIBRARIES variable. -# This case is triggered when PETSC_INCLUDES, and PETSC_LIBRARIES are set. -# Setting the variables below change the behavior of the search in this mode: -# PETSC_LIBRARIES - (advanced) link these to use PETSC -# PETSC_INCLUDES - (advanced) the PETSC include directories -# PETSC_CURRENT - (advanced) redo the executable tests -# PETSC_WORKS - (advanced) set to ON to ignore the output of the -# executable tests (not recommended) -# -# Note that setting PETSC_LIBRARIES and PETSC_INCLUDES takes precedence over -# setting PETSC_DIR. -# -# Once done this will define the targets: -# -# PETSC::ALL - a CMake target for all of PETSc -# PETSC::SYS - a CMake target for the main PETSc library -# PETSC::VEC - a CMake target for the PETSc vector library -# PETSC::MAT - a CMake target for the PETSc matrix library -# PETSC::DM - a CMake target for the PETSc DM library -# PETSC::KSP - a CMake target for the PETSc KSP library -# PETSC::SNES - a CMake target for the PETSc SNES library -# PETSC::TS - a CMake target for the PETSc TS library -# -# It will also define the following, potentially useful, variables: -# -# PETSC_COMPILER - (advanced) Compiler used by PETSC, helpful to find a compatible MPI -# PETSC_DEFINITIONS - (advanced) Compiler switches for using PETSC -# PETSC_MPIEXEC - (advanced) Executable for running MPI programs -# PETSC_INDEX_SIZE - (internal) the size of indices in PETSC -# PETSC_PRECISION - (internal) the real type precision in PETSC -# PETSC_VERSION - (internal) Version string (MAJOR.MINOR.SUBMINOR) -# -# Usage: -# find_package(PETSC COMPONENTS CXX) - required if build --with-clanguage=C++ --with-c-support=0 -# find_package(PETSC COMPONENTS C) - standard behavior of checking build using a C compiler -# find_package(PETSC) - same as above -# -# Redistribution and use is allowed according to the terms of the BSD license. -# ------------------------------------------------------------------------------ - -# ------------------------------------------------------------------------------ -# helper macros and functions -# ------------------------------------------------------------------------------ - -function (PETSC_GET_VERSION) - if (EXISTS "${PETSC_INCLUDE_DIR}/petscversion.h") - file (STRINGS "${PETSC_INCLUDE_DIR}/petscversion.h" vstrings REGEX "#define PETSC_VERSION_(RELEASE|MAJOR|MINOR|SUBMINOR|PATCH) ") - foreach (line ${vstrings}) - string (REGEX REPLACE " +" ";" fields ${line}) # break line into three fields (the first is always "#define") - list (GET fields 1 var) - list (GET fields 2 val) - set (${var} ${val} PARENT_SCOPE) - set (${var} ${val}) # Also in local scope so we have access below - endforeach () - if (PETSC_VERSION_RELEASE) - if ($(PETSC_VERSION_PATCH) GREATER 0) - set (PETSC_VERSION "${PETSC_VERSION_MAJOR}.${PETSC_VERSION_MINOR}.${PETSC_VERSION_SUBMINOR}p${PETSC_VERSION_PATCH}" CACHE INTERNAL "PETSC version" FORCE) - else () - set (PETSC_VERSION "${PETSC_VERSION_MAJOR}.${PETSC_VERSION_MINOR}.${PETSC_VERSION_SUBMINOR}" CACHE INTERNAL "PETSC version" FORCE) - endif () - else () - # make dev version compare higher than any patch level of a released version - set (PETSC_VERSION "${PETSC_VERSION_MAJOR}.${PETSC_VERSION_MINOR}.${PETSC_VERSION_SUBMINOR}.99" CACHE INTERNAL "PETSC version" FORCE) - endif () - else () - message (SEND_ERROR "${PETSC_INCLUDE_DIR}/petscversion.h does not exist") - endif () -endfunction () - -macro (PETSC_GET_VARIABLE name var) - if (NOT DEFINED MAKE_EXECUTABLE) - # need to find the make executable the first time this macro is used - find_program (MAKE_EXECUTABLE NAMES make gmake) - if (MAKE_EXECUTABLE MATCHES "NOTFOUND") - message(SEND_ERROR "MAKE_EXECUTABLE could not be found (looked for `make` and `gmake`)") - endif () - endif () - set (${var} "NOTFOUND" CACHE INTERNAL "Cleared" FORCE) - execute_process (COMMAND ${MAKE_EXECUTABLE} --no-print-directory -f ${petsc_config_makefile} show VARIABLE=${name} - OUTPUT_VARIABLE ${var} - RESULT_VARIABLE petsc_return) -endmacro (PETSC_GET_VARIABLE) - -macro (PETSC_TEST_RUNS includes libraries runs) - if (PETSC_VERSION VERSION_GREATER 3.1) - set (_PETSC_TSDestroy "TSDestroy(&ts)") - else () - set (_PETSC_TSDestroy "TSDestroy(ts)") - endif () - - set (_PETSC_TEST_SOURCE " -static const char help[] = \"PETSC test program.\"; -#include -int main(int argc,char *argv[]) { - PetscErrorCode ierr; - TS ts; - - ierr = PetscInitialize(&argc,&argv,0,help);CHKERRQ(ierr); - ierr = TSCreate(PETSC_COMM_WORLD,&ts);CHKERRQ(ierr); - ierr = TSSetFromOptions(ts);CHKERRQ(ierr); - ierr = ${_PETSC_TSDestroy};CHKERRQ(ierr); - ierr = PetscFinalize();CHKERRQ(ierr); - return 0; -} -") - - multipass_source_runs ("${includes}" "${libraries}" "${_PETSC_TEST_SOURCE}" ${runs} "${PETSC_LANGUAGE_BINDINGS}") - - if (${${runs}}) - set (PETSC_EXECUTABLE_RUNS "YES" CACHE INTERNAL - "The system can successfully run a PETSC executable" FORCE) - else() - set (PETSC_EXECUTABLE_RUNS "NO" CACHE INTERNAL - "The system can NOT successfully run a PETSC executable" FORCE) - endif () -endmacro (PETSC_TEST_RUNS) - -macro (PETSC_FIND_LIBRARY suffix name) - # Clear any stale value, if we got here, we need to find it again - set (PETSC_LIBRARY_${suffix} "NOTFOUND" CACHE INTERNAL "Cleared" FORCE) - - if (WIN32) - set (libname lib${name}) # windows expects "libfoo", linux expects "foo" - else (WIN32) - set (libname ${name}) - endif (WIN32) - - find_library (PETSC_LIBRARY_${suffix} NAMES ${libname} HINTS ${petsc_lib_dir} NO_DEFAULT_PATH) - set (PETSC_LIBRARIES_${suffix} "${PETSC_LIBRARY_${suffix}}" CACHE INTERNAL "PETSC ${suffix} libraries" FORCE) - mark_as_advanced(PETSC_LIBRARY_${suffix}) -endmacro (PETSC_FIND_LIBRARY suffix name) - -macro (PETSC_FIND_LIBRARY_IN_LIST suffix names liblist) - # Clear any stale value, if we got here, we need to find it again - set (PETSC_LIBRARY_${suffix} "NOTFOUND" CACHE INTERNAL "Cleared" FORCE) - - foreach (name ${names}) - if (WIN32) - set (libname lib${name}) # windows expects "libfoo", linux expects "foo" - else (WIN32) - set (libname ${name}) - endif (WIN32) - foreach (lib ${${liblist}}) - if ("${lib}" MATCHES "${libname}[.].*") - set (PETSC_LIBRARY_${suffix} ${lib} CACHE INTERNAL "" FORCE) - list (REMOVE_ITEM ${liblist} ${lib}) - break () - endif () - endforeach () - endforeach () - set (PETSC_LIBRARIES_${suffix} "${PETSC_LIBRARY_${suffix}}" CACHE INTERNAL "PETSC ${suffix} libraries" FORCE) - mark_as_advanced(PETSC_LIBRARY_${suffix}) - -endmacro (PETSC_FIND_LIBRARY_IN_LIST suffix names liblist) - -macro (PETSC_JOIN libs deps) - list (APPEND PETSC_LIBRARIES_${libs} ${PETSC_LIBRARIES_${deps}}) - # since list APPEND creates a new local variable in the current scope we need - # to set the cache variable value to propagate the changes upwards - set (PETSC_LIBRARIES_${libs} ${PETSC_LIBRARIES_${libs}} CACHE INTERNAL "PETSC ${libs} libraries" FORCE) -endmacro (PETSC_JOIN libs deps) - -# ------------------------------------------------------------------------------ -# FindPETSC -# ------------------------------------------------------------------------------ - -set (PETSC_VALID_COMPONENTS C CXX) - -if (NOT PETSC_FIND_COMPONENTS) - - get_property (_enabled_langs GLOBAL PROPERTY ENABLED_LANGUAGES) - list(FIND _enabled_langs "C" _c_index) - if (${_c_index} GREATER -1) - set (PETSC_LANGUAGE_BINDINGS "C") - else () - set (PETSC_LANGUAGE_BINDINGS "CXX") - endif () - -else() - - # Right now, this is designed for compatability with the --with-clanguage option, so - # only allow one item in the components list. - list(LENGTH ${PETSC_FIND_COMPONENTS} components_length) - if(${components_length} GREATER 1) - message(FATAL_ERROR "Only one component for PETSC is allowed to be specified") - endif() - # This is a stub for allowing multiple components should that time ever come. Perhaps - # to also test Fortran bindings? - foreach(component ${PETSC_FIND_COMPONENTS}) - list(FIND PETSC_VALID_COMPONENTS ${component} component_location) - if(${component_location} EQUAL -1) - message(FATAL_ERROR "\"${component}\" is not a valid PETSC component.") - else() - list(APPEND PETSC_LANGUAGE_BINDINGS ${component}) - endif() - endforeach() - -endif() - -# Set which state variables to check to determine if the PETSC configuration is -# current and clear the other state variables -if (PETSC_INCLUDES OR PETSC_LIBRARIES) - - if (PETSC_INCLUDES AND PETSC_LIBRARIES) - - set (PETSC_STATES "LIBRARIES;INCLUDES" CACHE INTERNAL "" FORCE) - set (PETSC_DIR "" CACHE PATH "Path to the root of a PETSc installation" FORCE) - set (PETSC_ARCH "" CACHE STRING "PETSc architecture" FORCE) - - else () - - string (CONCAT msg - "Both PETSC_INCLUDES and PETSC_LIBRARIES must be provided:\n" - " PETSC_INCLUDES=${PETSC_INCLUDES}\n" - " PETSC_LIBRARIES=${PETSC_LIBRARIES}") - message (FATAL_ERROR ${msg}) - - endif () - -else () - - set (PETSC_STATES "DIR;ARCH" CACHE INTERNAL "" FORCE) - set (PETSC_INCLUDES "" CACHE STRING "Semi-colon separated list of PETSc include directories" FORCE) - set (PETSC_LIBRARIES "" CACHE STRING "Semi-colon separated list of PETSc link libraries" FORCE) - -endif () - -# Keep track of FindPETSC state so that we do not do the complete -# set of tests and variable lookups every time cmake is run. -include (FindPackageMultipass) -set (petsc_slaves LIBRARIES_SYS LIBRARIES_VEC LIBRARIES_MAT LIBRARIES_DM LIBRARIES_KSP LIBRARIES_SNES LIBRARIES_TS) -set (petsc_deps LIBRARY_DIR INCLUDE_DIR LIBRARIES_ INCLUDES_ COMPILER MPIEXEC EXECUTABLE_RUNS ${petsc_slaves}) -find_package_multipass (PETSC petsc_config_current STATES ${PETSC_STATES} DEPENDENTS ${petsc_deps}) - -# This runs anytime the current configuration is not current. -# This happens either when a user sets PETSC_CURRENT=FALSE, -# or when one of the dependents given to find_package_multipass changes. -if (NOT petsc_config_current) - - if (PETSC_INCLUDES AND PETSC_LIBRARIES) - - message (STATUS "Finding PETSC using PETSC_INCLUDES and PETSC_LIBRARIES") - - # extract path from PETSC_INCLUDES - foreach (_include_dir ${PETSC_INCLUDES}) - if (EXISTS "${_include_dir}/petsc.h") - set (PETSC_INCLUDE_DIR "${_include_dir}" CACHE INTERNAL "Internal PETSc include directory" FORCE) - break () - endif () - endforeach () - - # check if the include directory was found - if (NOT PETSC_INCLUDE_DIR) - string (CONCAT msg - "Could not determine PETSc include directory from PETSC_INCLUDES:\n" - " PETSC_INCLUDES=${PETSC_INCLUDES}\n") - message (FATAL_ERROR ${msg}) - endif() - - # extract path from PETSC_LIBRARIES - foreach (_library_path ${PETSC_LIBRARIES}) - get_filename_component (_library_name "${_library_path}" NAME) - if (_library_name MATCHES "petsc") - get_filename_component (_library_dir "${_library_path}" DIRECTORY) - set (PETSC_LIBRARY_DIR "${_library_dir}" CACHE INTERNAL "Internal PETSc library directory" FORCE) - break () - endif () - endforeach () - - # check if the library directory was found - if (NOT PETSC_LIBRARY_DIR) - string (CONCAT msg - "Could not DETERMINE PETSc library directory from PETSC_LIBRARIES:\n" - " PETSC_LIBRARIES=${PETSC_LIBRARIES}") - message (FATAL_ERROR ${msg}) - endif() - - # set internal PETSC_DIR and PETSC_ARCH variables - set (PETSC_DIR_ "${PETSC_LIBRARY_DIR}/.." CACHE INTERNAL "Internal PETSC_DIR" FORCE) - set (PETSC_ARCH_ "" CACHE INTERNAL "Internal PETSC_ARCH" FORCE) - - else() - - message (STATUS "Finding PETSC using PETSC_DIR") - - # find PETSC_DIR - if (NOT PETSC_DIR) - - message (STATUS "Looking for PETSc in common install locations") - - # Debian uses versioned paths e.g /usr/lib/petscdir/3.5/ - file (GLOB DEB_PATHS "/usr/lib/petscdir/*") - - find_path (PETSC_DIR include/petsc.h - HINTS ENV PETSC_DIR - PATHS - /usr/lib/petsc - # Debian paths - ${DEB_PATHS} - # Arch Linux path - /opt/petsc/linux-c-opt - # MacPorts path - /opt/local/lib/petsc - $ENV{HOME}/petsc - DOC "PETSC Directory") - - # check if PETSC_DIR was set/found - if (NOT PETSC_DIR) - - string (CONCAT msg - "Could not locate PETSc install directory please set:\n" - " - PETSC_DIR and (optionally) PETSC_ARCH\n" - "or used the advanced options\n" - " - PETSC_INCLUDES and PETSC_LIBRARIES.") - message (FATAL_ERROR ${msg}) - - endif () - - endif() - - # find PETSC_ARCH - if (NOT PETSC_ARCH) - - set (_petsc_arches - $ENV{PETSC_ARCH} # If set, use environment variable first - linux-gnu-c-debug linux-gnu-c-opt # Debian defaults - x86_64-unknown-linux-gnu i386-unknown-linux-gnu) - set (PETSCCONF "NOTFOUND" CACHE INTERNAL "Cleared" FORCE) - foreach (arch ${_petsc_arches}) - find_path (PETSCCONF petscconf.h - HINTS ${PETSC_DIR} - PATH_SUFFIXES ${arch}/include bmake/${arch} - NO_DEFAULT_PATH) - if (PETSCCONF) - set (PETSC_ARCH "${arch}" CACHE STRING "PETSC build architecture" FORCE) - break () - endif () - endforeach () - set (PETSCCONF "NOTFOUND" CACHE INTERNAL "Scratch variable" FORCE) - - endif () - - if (PETSC_ARCH) - set (PETSC_INCLUDE_DIR "${PETSC_DIR}/${PETSC_ARCH}/include" CACHE INTERNAL "Internal PETSc include directory" FORCE) - set (PETSC_LIBRARY_DIR "${PETSC_DIR}/${PETSC_ARCH}/lib" CACHE INTERNAL "Internal PETSc library directory" FORCE) - else () - set (PETSC_INCLUDE_DIR "${PETSC_DIR}/include" CACHE INTERNAL "Internal PETSc include directory" FORCE) - set (PETSC_LIBRARY_DIR "${PETSC_DIR}/lib" CACHE INTERNAL "Internal PETSc library directory" FORCE) - endif () - - # set internal PETSC_DIR and PETSC_ARCH variables - set (PETSC_DIR_ "${PETSC_DIR}" CACHE INTERNAL "Internal PETS_DIR" FORCE) - set (PETSC_ARCH_ "${PETSC_ARCH}" CACHE INTERNAL "Internal PETS_ARCH" FORCE) - - endif () - - # Resolve the conf/rules and conf/variables files. - # The location of these files has changed with different PETSc versions, - # so look in a few different locations for them. - if (EXISTS "${PETSC_LIBRARY_DIR}/petsc/conf/petscvariables") # > 3.5 - set (petsc_conf_rules "${PETSC_LIBRARY_DIR}/petsc/conf/rules") - set (petsc_conf_variables "${PETSC_LIBRARY_DIR}/petsc/conf/variables") - elseif (EXISTS "${PETSC_INCLUDE_DIR}/petscconf.h") # > 2.3.3 - set (petsc_conf_rules "${PETSC_DIR_}/conf/rules") - set (petsc_conf_variables "${PETSC_DIR_}/conf/variables") - elseif (EXISTS "${PETSC_DIR_}/bmake/${PETSC_ARCH_}/petscconf.h") # <= 2.3.3 - set (petsc_conf_rules "${PETSC_DIR_}/bmake/common/rules") - set (petsc_conf_variables "${PETSC_DIR_}/bmake/common/variables") - elseif (PETSC_LIBRARIES AND PETSC_INCLUDES) - message (FATAL_ERROR "PETSC_LIBRARIES=${PETSC_LIBRARIES} and PETSC_INCLUDES=${PETSC_INCLUDES} do not specify a valid PETSC installation") - else () - message (FATAL_ERROR "PETSC_DIR=${PETSC_DIR} and PETSC_ARCH=${PETSC_ARCH} do not specify a valid PETSC installation") - endif () - - # ---------------------------------------------------------------------------- - # Probe the PETSc installation for information about how it was configured. - # ---------------------------------------------------------------------------- - - # Get the PETSc version - petsc_get_version() - - # Put variables into environment since they are needed to get - # configuration (petscvariables) in the PETSC makefile - set (ENV{PETSC_DIR} "${PETSC_DIR_}") - set (ENV{PETSC_ARCH} "${PETSC_ARCH_}") - - # A temporary makefile to probe the PETSC configuration - set (petsc_config_makefile "${PROJECT_BINARY_DIR}/Makefile.petsc") - file (WRITE "${petsc_config_makefile}" -"## This file was autogenerated by FindPETSC.cmake -# PETSC_DIR = ${PETSC_DIR_} -# PETSC_ARCH = ${PETSC_ARCH_} -include ${petsc_conf_rules} -include ${petsc_conf_variables} -show : -\t-@echo -n \${\${VARIABLE}} -") - - # Extract information about the PETSC configuration - petsc_get_variable (PETSC_LIB_DIR petsc_lib_dir) - petsc_get_variable (PETSC_EXTERNAL_LIB_BASIC petsc_libs_external) - petsc_get_variable (PETSC_CCPPFLAGS petsc_cpp_line) - petsc_get_variable (PETSC_INCLUDE petsc_include) - petsc_get_variable (PCC petsc_cc) - petsc_get_variable (PCC_FLAGS petsc_cc_flags) - petsc_get_variable (MPIEXEC petsc_mpiexec) - petsc_get_variable (PETSC_INDEX_SIZE petsc_index_size) - petsc_get_variable (PETSC_PRECISION petsc_precision) - - # We are done with the temporary Makefile, calling PETSC_GET_VARIABLE after this point is invalid! - file (REMOVE ${petsc_config_makefile}) - - # ---------------------------------------------------------------------------- - # Determine what libraries and includes are needed. - # ---------------------------------------------------------------------------- - - if (PETSC_INCLUDES AND PETSC_LIBRARIES) - - # If the user manually set PETSC_INCUDES and PETSC_LIBRARIES, we work off of - # what they provided. - - # Make a copy of the user-provided library list to modify as libraries are - # found and extracted - set (PETSC_LIBRARIES_REMAINING ${PETSC_LIBRARIES}) - - # Look for petscvec first, if it doesn't exist, we must be using single-library - petsc_find_library_in_list (VEC petscvec PETSC_LIBRARIES_REMAINING) - - if (PETSC_LIBRARY_VEC) - - # libpetscsys is called libpetsc prior to 3.1 (when single-library was introduced) - petsc_find_library_in_list (SYS "petscsys;petsc" PETSC_LIBRARIES_REMAINING) - petsc_find_library_in_list (MAT petscmat PETSC_LIBRARIES_REMAINING) - petsc_find_library_in_list (DM petscdm PETSC_LIBRARIES_REMAINING) - petsc_find_library_in_list (KSP petscksp PETSC_LIBRARIES_REMAINING) - petsc_find_library_in_list (SNES petscsnes PETSC_LIBRARIES_REMAINING) - petsc_find_library_in_list (TS petscts PETSC_LIBRARIES_REMAINING) - petsc_join (SYS REMAINING) - petsc_join (VEC SYS) - petsc_join (MAT VEC) - petsc_join (DM MAT) - petsc_join (KSP DM) - petsc_join (SNES KSP) - petsc_join (TS SNES) - - set (PETSC_LIBRARY_ALL ${PETSC_LIBRARY_TS} CACHE INTERNAL "All PETSC libraries" FORCE) - set (PETSC_LIBRARIES_ALL ${PETSC_LIBRARIES_TS} CACHE INTERNAL "All PETSC libraries" FORCE) - - message (STATUS "Recognized PETSC install with separate libraries for each package") - - else () - - # There is no libpetscvec - set (PETSC_LIBRARY_VEC "NOTFOUND" CACHE INTERNAL "Cleared" FORCE) - - petsc_find_library_in_list (SINGLE petsc PETSC_LIBRARIES_REMAINING) - # Debian 9/Ubuntu 16.04 uses _real and _complex extensions when using libraries in /usr/lib/petsc. - if (NOT PETSC_LIBRARY_SINGLE) - petsc_find_library_in_list (SINGLE petsc_real PETSC_LIBRARIES_REMAINING) - endif() - if (NOT PETSC_LIBRARY_SINGLE) - petsc_find_library_in_list (SINGLE petsc_complex PETSC_LIBRARIES_REMAINING) - endif() - - foreach (pkg SYS VEC MAT DM KSP SNES TS ALL) - set (PETSC_LIBRARIES_${pkg} "${PETSC_LIBRARY_SINGLE}" CACHE INTERNAL "PETSC ${pkg} libraries" FORCE) - endforeach () - - message (STATUS "Recognized PETSC install with single library for all packages") - - endif () - - # At this point PETSC_LIBRARIES_REMAINING should only contain external - # libraries needed by PETSc. These may (e.g., static build) or may not - # (e.g., shared build) be needed to compile but are added to the package - # libraries regardless. - foreach (pkg SYS VEC MAT DM KSP SNES TS ALL) - list (APPEND PETSC_LIBRARIES_${pkg} ${PETSC_LIBRARIES_REMAINING}) - # since list APPEND creates a new local variable in the current scope we need - # to set the cache variable value to propagate the changes upwards - set (PETSC_LIBRARIES_${pkg} ${PETSC_LIBRARIES_${pkg}} CACHE INTERNAL "PETSC ${pkg} libraries" FORCE) - endforeach () - - # Try to run a simple executable - petsc_test_runs ("${PETSC_INCLUDES}" "${PETSC_LIBRARIES_TS}" petsc_works_userprovided) - if (petsc_works_userprovided) - message (STATUS "PETSC works with the includes and libraries given.") - else () - message (STATUS "PETSC could not be used, maybe the install is broken.") - endif () - - # set include and library variables needed to create targets below - set (petsc_includes_needed ${PETSC_INCLUDES}) - set (petsc_libraries_needed ${PETSC_LIBRARIES}) - - else () - - include (ResolveCompilerPaths) - # Extract include paths and libraries from compile command line - resolve_includes (petsc_includes_all "${petsc_cpp_line}") - - # On windows we need to make sure we're linking against the right - # runtime library - if (WIN32) - if (petsc_cc_flags MATCHES "-MT") - - set (using_md False) - foreach(flag_var - CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE - CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO - CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE - CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO) - if(${flag_var} MATCHES "/MD") - set (using_md True) - endif(${flag_var} MATCHES "/MD") - endforeach(flag_var) - if(${using_md} MATCHES "True") - string(CONCAT msg "PETSC was built with /MT, but /MD is currently set.\n" - "See http://www.cmake.org/Wiki/CMake_FAQ#How_can_I_build_my_MSVC_application_with_a_static_runtime.3F") - message(WARNING ${msg}) - endif(${using_md} MATCHES "True") - - endif (petsc_cc_flags MATCHES "-MT") - endif (WIN32) - - include (CorrectWindowsPaths) - convert_cygwin_path(petsc_lib_dir) - - # Look for petscvec first, if it doesn't exist, we must be using single-library - petsc_find_library (VEC petscvec) - if (PETSC_LIBRARY_VEC) - - petsc_find_library (SYS "petscsys;petsc") # libpetscsys is called libpetsc prior to 3.1 (when single-library was introduced) - petsc_find_library (MAT petscmat) - petsc_find_library (DM petscdm) - petsc_find_library (KSP petscksp) - petsc_find_library (SNES petscsnes) - petsc_find_library (TS petscts) - petsc_join (VEC SYS) - petsc_join (MAT VEC) - petsc_join (DM MAT) - petsc_join (KSP DM) - petsc_join (SNES KSP) - petsc_join (TS SNES) - - set (PETSC_LIBRARY_ALL ${PETSC_LIBRARY_TS} CACHE INTERNAL "All PETSC libraries" FORCE) - set (PETSC_LIBRARIES_ALL ${PETSC_LIBRARIES_TS} CACHE INTERNAL "All PETSC libraries" FORCE) - - message (STATUS "Recognized PETSC install with separate libraries for each package") - - else () - - # There is no libpetscvec - set (PETSC_LIBRARY_VEC "NOTFOUND" CACHE INTERNAL "Cleared" FORCE) - - petsc_find_library (SINGLE petsc) - # Debian 9/Ubuntu 16.04 uses _real and _complex extensions when using libraries in /usr/lib/petsc. - if (NOT PETSC_LIBRARY_SINGLE) - petsc_find_library (SINGLE petsc_real) - endif() - if (NOT PETSC_LIBRARY_SINGLE) - petsc_find_library (SINGLE petsc_complex) - endif() - - foreach (pkg SYS VEC MAT DM KSP SNES TS ALL) - set (PETSC_LIBRARIES_${pkg} "${PETSC_LIBRARY_SINGLE}" CACHE INTERNAL "PETSC ${pkg} libraries" FORCE) - endforeach () - - message (STATUS "Recognized PETSC install with single library for all packages") - - endif () - - # determine the include and library variables needed to create targets below - - find_path (PETSC_INCLUDE_CONF petscconf.h HINTS "${PETSC_INCLUDE_DIR}" "${PETSC_DIR_}/bmake/${PETSC_ARCH_}" NO_DEFAULT_PATH) - mark_as_advanced (PETSC_INCLUDE_CONF) - - set (petsc_includes_minimal ${PETSC_INCLUDE_CONF} ${PETSC_INCLUDE_DIR}) - - petsc_test_runs ("${petsc_includes_minimal}" "${PETSC_LIBRARIES_TS}" petsc_works_minimal) - if (petsc_works_minimal) - - message (STATUS "Minimal PETSC includes and libraries work. This probably means we are building with shared libs.") - set (petsc_includes_needed "${petsc_includes_minimal}") - - else (petsc_works_minimal) # Minimal includes fail, see if just adding full includes fixes it - - petsc_test_runs ("${petsc_includes_all}" "${PETSC_LIBRARIES_TS}" petsc_works_allincludes) - if (petsc_works_allincludes) # It does, we just need all the includes - - string (CONCAT msg "PETSC requires extra include paths, but links correctly with only interface libraries.\n" - "This is an unexpected configuration (but it seems to work fine).") - message (STATUS ${msg}) - set (petsc_includes_needed ${petsc_includes_all}) - - else (petsc_works_allincludes) # We are going to need to link the external libs explicitly - - resolve_libraries (petsc_libraries_external "${petsc_libs_external}") - foreach (pkg SYS VEC MAT DM KSP SNES TS ALL) - list (APPEND PETSC_LIBRARIES_${pkg} ${petsc_libraries_external}) - # since list APPEND creates a new local variable in the current scope we need - # to set the cache variable value to propagate the changes upwards - set (PETSC_LIBRARIES_${pkg} ${PETSC_LIBRARIES_${pkg}} CACHE INTERNAL "PETSC ${pkg} libraries" FORCE) - endforeach (pkg) - - petsc_test_runs ("${petsc_includes_minimal}" "${PETSC_LIBRARIES_TS}" petsc_works_alllibraries) - if (petsc_works_alllibraries) - - string (CONCAT msg "PETSC only need minimal includes, but requires explicit linking to all dependencies.\n" - "This is expected when PETSC is built with static libraries.") - message(STATUS ${msg}) - set (petsc_includes_needed ${petsc_includes_minimal}) - - else (petsc_works_alllibraries) - - # It looks like we really need everything, should have listened to Matt - set (petsc_includes_needed ${petsc_includes_all}) - petsc_test_runs ("${petsc_includes_all}" "${PETSC_LIBRARIES_TS}" petsc_works_all) - if (petsc_works_all) # We fail anyways - string (CONCAT msg "PETSC requires extra include paths and explicit linking to all dependencies.\n" - "This probably means you have static libraries and something unexpected in PETSC headers.") - message (STATUS ${msg}) - else (petsc_works_all) # We fail anyways - message (STATUS "PETSC could not be used, maybe the install is broken.") - endif (petsc_works_all) - - endif (petsc_works_alllibraries) - - endif (petsc_works_allincludes) - - endif (petsc_works_minimal) - - set (petsc_libraries_needed ${PETSC_LIBRARIES_ALL}) - - endif () - - # ---------------------------------------------------------------------------- - # Now we set all of the variables needed to build targets. - # ---------------------------------------------------------------------------- - - # If PETSC_WORKS is set override the executable test results. This variable - # can be manually set to ON to force CMake to accept a given PETSC - # configuration, but this will almost always result in a broken build. - if (PETSC_WORKS) - message (STATUS "Overwriting PETSc test results with PETSC_WORKS = ${PETSC_WORKS}") - set (PETSC_EXECUTABLE_RUNS ${PETSC_WORKS} CACHE INTERNAL "Overwritten by PETSC_WORKS" FORCE) - endif () - - # We do an out-of-source build so __FILE__ will be an absolute path, hence __INSDIR__ is superfluous - if (${PETSC_VERSION} VERSION_LESS 3.1) - set (PETSC_DEFINITIONS "-D__SDIR__=\"\"" CACHE STRING "PETSC definitions" FORCE) - else () - set (PETSC_DEFINITIONS "-D__INSDIR__=\"\"" CACHE STRING "PETSC definitions" FORCE) - endif () - - # Sometimes this can be used to assist FindMPI.cmake - set (PETSC_COMPILER ${petsc_cc} CACHE FILEPATH "PETSC compiler" FORCE) - set (PETSC_MPIEXEC ${petsc_mpiexec} CACHE FILEPATH "Executable for running PETSC MPI programs" FORCE) - - # Internal variables needed for configuring targets - set (PETSC_INDEX_SIZE ${petsc_index_size} CACHE INTERNAL "PETSC index size" FORCE) - set (PETSC_PRECISION ${petsc_precision} CACHE INTERNAL "PETSC real type precision" FORCE) - set (PETSC_INCLUDES_ ${petsc_includes_needed} CACHE INTERNAL "PETSC include paths to be used" FORCE) - set (PETSC_LIBRARIES_ ${petsc_libraries_needed} CACHE INTERNAL "PETSC libraries to be used" FORCE) - - # Note that we have forced values for all these choices. If you - # change these, you are telling the system to trust you that they - # work. It is likely that you will end up with a broken build. - mark_as_advanced (PETSC_CURRENT PETSC_COMPILER PETSC_DEFINITIONS PETSC_MPIEXEC PETSC_EXECUTABLE_RUNS) - -endif () - -include (FindPackageHandleStandardArgs) -find_package_handle_standard_args (PETSC - REQUIRED_VARS PETSC_EXECUTABLE_RUNS - VERSION_VAR PETSC_VERSION - FAIL_MESSAGE "PETSC could not be found.") - -# Create targets -if (PETSC_FOUND) - if (PETSC_LIBRARY_SINGLE) - foreach (suffix SYS VEC MAT DM KSP SNES TS ALL) - if (NOT TARGET PETSC::${suffix}) - add_library (PETSC::${suffix} UNKNOWN IMPORTED) - # add properties one-by-one for easier debugging - set_target_properties (PETSC::${suffix} PROPERTIES - INTERFACE_INCLUDE_DIRECTORIES "${PETSC_INCLUDES_}") - set_target_properties (PETSC::${suffix} PROPERTIES - INTERFACE_LINK_LIBRARIES "${PETSC_LIBRARIES_}") - set_target_properties (PETSC::${suffix} PROPERTIES - INTERFACE_COMPILE_OPTIONS ${PETSC_DEFINITIONS}) - set_target_properties (PETSC::${suffix} PROPERTIES - IMPORTED_LOCATION ${PETSC_LIBRARY_SINGLE}) - endif () - endforeach () - else () - foreach (suffix SYS VEC MAT DM KSP SNES TS ALL) - if (PETSC_LIBRARY_${suffix} AND (NOT TARGET PETSC::${suffix})) - add_library (PETSC::${suffix} UNKNOWN IMPORTED) - # add properties one-by-one for easier debugging - set_target_properties (PETSC::${suffix} PROPERTIES - INTERFACE_INCLUDE_DIRECTORIES "${PETSC_INCLUDES_}") - set_target_properties (PETSC::${suffix} PROPERTIES - INTERFACE_LINK_LIBRARIES "${PETSC_LIBRARIES_${suffix}}") - set_target_properties (PETSC::${suffix} PROPERTIES - INTERFACE_COMPILE_OPTIONS ${PETSC_DEFINITIONS}) - set_target_properties (PETSC::${suffix} PROPERTIES - IMPORTED_LOCATION ${PETSC_LIBRARY_${suffix}}) - endif () - endforeach () - endif () -endif (PETSC_FOUND) diff --git a/cmake/FindPETSc.cmake b/cmake/FindPETSc.cmake new file mode 100644 index 0000000..74960bc --- /dev/null +++ b/cmake/FindPETSc.cmake @@ -0,0 +1,421 @@ +# FindPETSc +# --------- +# +# Locates the PETSc library using pkg-config +# +# Imported Targets +# ^^^^^^^^^^^^^^^^ +# +# This module defines the followwing IMPORTED target: +# +# PETSc::PETSc - the PETSc library +# +# Result Variables +# ^^^^^^^^^^^^^^^^ +# +# This module will set the following variables in your project: +# +# PETSc_FOUND - if false, do not try to link to PETSc +# PETSc_LIBRARIES - a list of the full paths to all libraries +# PETSc_INCLUDE_DIRS - a list of all include directories +# PETSc_VERSION - the full version of PETSc MAJOR.MINOR.PATCH +# PETSc_VERSION_MAJOR - the MAJOR part of PETSc_VERSION +# PETSc_VERSION_MINOR - the MINOR part of PETSc_VERSION +# PETSc_VERSION_PATCH - the PATCH part of PETSc_VERSION +# +# Variables for locating PETSc +# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# +# Additional CMake variables for locating PETSc +# PETSc_DIR - the path to the root directory of PETSc +# PETSc_ARCH - the PETSc architecture +# PETSc_NO_ENV - instructs the module not to use the environment variables 'PETSC_DIR' and 'PETSC_ENV' to find PETSc +# +# Environment Variables for locating PETSc +# PETSC_DIR - the path to the root directory of PETSc, part of the PETSc installation process +# PETSC_ARCH - the PETSc architecture, part of the PETSc installation process +# +# +# The orignal author is Frédéric Simonis @fsimonis. +# This file is derived from https://github.com/precice/precice/blob/develop/cmake/modules/FindPETSc.cmake, +# as such it falls under the original license, copied below: +# +# GNU LESSER GENERAL PUBLIC LICENSE +# Version 3, 29 June 2007 +# +# Copyright (C) 2007 Free Software Foundation, Inc. +# Everyone is permitted to copy and distribute verbatim copies +# of this license document, but changing it is not allowed. +# +# +# This version of the GNU Lesser General Public License incorporates +# the terms and conditions of version 3 of the GNU General Public +# License, supplemented by the additional permissions listed below. +# +# 0. Additional Definitions. +# +# As used herein, "this License" refers to version 3 of the GNU Lesser +# General Public License, and the "GNU GPL" refers to version 3 of the GNU +# General Public License. +# +# "The Library" refers to a covered work governed by this License, +# other than an Application or a Combined Work as defined below. +# +# An "Application" is any work that makes use of an interface provided +# by the Library, but which is not otherwise based on the Library. +# Defining a subclass of a class defined by the Library is deemed a mode +# of using an interface provided by the Library. +# +# A "Combined Work" is a work produced by combining or linking an +# Application with the Library. The particular version of the Library +# with which the Combined Work was made is also called the "Linked +# Version". +# +# The "Minimal Corresponding Source" for a Combined Work means the +# Corresponding Source for the Combined Work, excluding any source code +# for portions of the Combined Work that, considered in isolation, are +# based on the Application, and not on the Linked Version. +# +# The "Corresponding Application Code" for a Combined Work means the +# object code and/or source code for the Application, including any data +# and utility programs needed for reproducing the Combined Work from the +# Application, but excluding the System Libraries of the Combined Work. +# +# 1. Exception to Section 3 of the GNU GPL. +# +# You may convey a covered work under sections 3 and 4 of this License +# without being bound by section 3 of the GNU GPL. +# +# 2. Conveying Modified Versions. +# +# If you modify a copy of the Library, and, in your modifications, a +# facility refers to a function or data to be supplied by an Application +# that uses the facility (other than as an argument passed when the +# facility is invoked), then you may convey a copy of the modified +# version: +# +# a) under this License, provided that you make a good faith effort to +# ensure that, in the event an Application does not supply the +# function or data, the facility still operates, and performs +# whatever part of its purpose remains meaningful, or +# +# b) under the GNU GPL, with none of the additional permissions of +# this License applicable to that copy. +# +# 3. Object Code Incorporating Material from Library Header Files. +# +# The object code form of an Application may incorporate material from +# a header file that is part of the Library. You may convey such object +# code under terms of your choice, provided that, if the incorporated +# material is not limited to numerical parameters, data structure +# layouts and accessors, or small macros, inline functions and templates +# (ten or fewer lines in length), you do both of the following: +# +# a) Give prominent notice with each copy of the object code that the +# Library is used in it and that the Library and its use are +# covered by this License. +# +# b) Accompany the object code with a copy of the GNU GPL and this license +# document. +# +# 4. Combined Works. +# +# You may convey a Combined Work under terms of your choice that, +# taken together, effectively do not restrict modification of the +# portions of the Library contained in the Combined Work and reverse +# engineering for debugging such modifications, if you also do each of +# the following: +# +# a) Give prominent notice with each copy of the Combined Work that +# the Library is used in it and that the Library and its use are +# covered by this License. +# +# b) Accompany the Combined Work with a copy of the GNU GPL and this license +# document. +# +# c) For a Combined Work that displays copyright notices during +# execution, include the copyright notice for the Library among +# these notices, as well as a reference directing the user to the +# copies of the GNU GPL and this license document. +# +# d) Do one of the following: +# +# 0) Convey the Minimal Corresponding Source under the terms of this +# License, and the Corresponding Application Code in a form +# suitable for, and under terms that permit, the user to +# recombine or relink the Application with a modified version of +# the Linked Version to produce a modified Combined Work, in the +# manner specified by section 6 of the GNU GPL for conveying +# Corresponding Source. +# +# 1) Use a suitable shared library mechanism for linking with the +# Library. A suitable mechanism is one that (a) uses at run time +# a copy of the Library already present on the user's computer +# system, and (b) will operate properly with a modified version +# of the Library that is interface-compatible with the Linked +# Version. +# +# e) Provide Installation Information, but only if you would otherwise +# be required to provide such information under section 6 of the +# GNU GPL, and only to the extent that such information is +# necessary to install and execute a modified version of the +# Combined Work produced by recombining or relinking the +# Application with a modified version of the Linked Version. (If +# you use option 4d0, the Installation Information must accompany +# the Minimal Corresponding Source and Corresponding Application +# Code. If you use option 4d1, you must provide the Installation +# Information in the manner specified by section 6 of the GNU GPL +# for conveying Corresponding Source.) +# +# 5. Combined Libraries. +# +# You may place library facilities that are a work based on the +# Library side by side in a single library together with other library +# facilities that are not Applications and are not covered by this +# License, and convey such a combined library under terms of your +# choice, if you do both of the following: +# +# a) Accompany the combined library with a copy of the same work based +# on the Library, uncombined with any other library facilities, +# conveyed under the terms of this License. +# +# b) Give prominent notice with the combined library that part of it +# is a work based on the Library, and explaining where to find the +# accompanying uncombined form of the same work. +# +# 6. Revised Versions of the GNU Lesser General Public License. +# +# The Free Software Foundation may publish revised and/or new versions +# of the GNU Lesser General Public License from time to time. Such new +# versions will be similar in spirit to the present version, but may +# differ in detail to address new problems or concerns. +# +# Each version is given a distinguishing version number. If the +# Library as you received it specifies that a certain numbered version +# of the GNU Lesser General Public License "or any later version" +# applies to it, you have the option of following the terms and +# conditions either of that published version or of any later version +# published by the Free Software Foundation. If the Library as you +# received it does not specify a version number of the GNU Lesser +# General Public License, you may choose any version of the GNU Lesser +# General Public License ever published by the Free Software Foundation. +# +# If the Library as you received it specifies that a proxy can decide +# whether future versions of the GNU Lesser General Public License shall +# apply, that proxy's public statement of acceptance of any version is +# permanent authorization for you to choose that version for the +# Library. +# + + +cmake_policy(VERSION 3.10) + + +# Macro to print the search context used by pkg-config +macro(_petsc_print_pkg_env) + if(NOT PETSc_FIND_QUIETLY) + set(_env_mess "pkg-config will search the following paths:") + if(DEFINED ENV{PKG_CONFIG_PATH}) + set(_env_mess "${_env_mess}\nPKG_CONFIG_PATH") + string(REPLACE ":" ";" _env_pkg_list "$ENV{PKG_CONFIG_PATH}") + foreach(p IN LISTS _env_pkg_list) + set(_env_mess "${_env_mess}\n ${p}") + endforeach() + unset(_env_pkg_list) + endif() + if(CMAKE_PREFIX_PATH) + set(_env_mess "${_env_mess}\nCMAKE_PREFIX_PATH") + foreach(p IN LISTS CMAKE_PREFIX_PATH) + set(_env_mess "${_env_mess}\n ${p}") + endforeach() + endif() + if(CMAKE_FRAMEWORK_PATH) + set(_env_mess "${_env_mess}\nCMAKE_FRAMEWORK_PATH") + foreach(p IN LISTS CMAKE_FRAMEWORK_PATH) + set(_env_mess "${_env_mess}\n ${p}") + endforeach() + endif() + if(CMAKE_APPBUNDLE_PATH) + set(_env_mess "${_env_mess}\nCMAKE_APPBUNDLE_PATH") + foreach(p IN LISTS CMAKE_APPBUNDLE_PATH) + set(_env_mess "${_env_mess}\n ${p}") + endforeach() + endif() + message(STATUS "${_env_mess}") + unset(_env_mess) + endif() +endmacro() + + +# Message macro which respects the QUIET arguemnt of the package +macro(_message) + if(NOT PETSc_FIND_QUIETLY) + message(${ARGV}) + endif() +endmacro() + +set(_petsc_quiet_arg "") +if(PETSc_FIND_QUIETLY) + set(_petsc_quiet_arg "QUIET") +endif() +find_package(PkgConfig ${_petsc_quiet_arg}) + +if(PKG_CONFIG_FOUND) + # Detect additional pefix paths + set(_petsc_detected_prefixes "") + if(DEFINED PETSc_DIR) + list(APPEND _petsc_detected_prefixes "${PETSc_DIR}") + if(DEFINED PETSc_ARCH) + list(APPEND _petsc_detected_prefixes "${PETSc_DIR}/${PETSc_ARCH}") + endif() + endif() + + if(DEFINED ENV{PETSC_DIR} AND NOT PETSc_NO_ENV) + list(APPEND _petsc_detected_prefixes "$ENV{PETSC_DIR}") + if(DEFINED ENV{PETSC_ARCH}) + list(APPEND _petsc_detected_prefixes "$ENV{PETSC_DIR}/$ENV{PETSC_ARCH}") + endif() + endif() + list(REMOVE_DUPLICATES _petsc_detected_prefixes) + + set(_petsc_prefixes "") + set(_petsc_skipped_prefixes "") + _message(STATUS "Detecting additional PETSc prefixes") + foreach(prefix IN LISTS _petsc_detected_prefixes ) + if(EXISTS "${prefix}/lib/pkgconfig") + _message(STATUS "Detected ${prefix}") + list(APPEND _petsc_prefixes "${prefix}") + else() + list(APPEND _petsc_skipped_prefixes "${prefix}") + endif() + endforeach() + if(_petsc_skipped_prefixes) + _message(STATUS "Skipped the following invalid prefixes: ${_petsc_skipped_prefixes}") + endif() + unset(_petsc_detected_prefixes) + + # Remember the previous state of CMAKE_PREFIX_PATH + set(_petsc_prefix_unset True) + if(DEFINED CMAKE_PREFIX_PATH) + set(_petsc_prefix_unset False) + set(_petsc_prefix_old ${CMAKE_PREFIX_PATH}) + endif() + + list(APPEND CMAKE_PREFIX_PATH ${_petsc_prefixes}) + _petsc_print_pkg_env() + + # Build the pkg-config version spec + set(_pkg_version_spec "") + if(DEFINED PETSc_FIND_VERSION) + if(PETSc_FIND_VERSION_EXACT) + set(_pkg_version_spec "=${PETSc_FIND_VERSION}") + else() + set(_pkg_version_spec ">=${PETSc_FIND_VERSION}") + endif() + endif() + + # Set PKG_CONFIG_ALLOW_SYSTEM_CFLAGS + set(_petsc_prev_allow_system_cflags $ENV{PKG_CONFIG_ALLOW_SYSTEM_CFLAGS}) + set(ENV{PKG_CONFIG_ALLOW_SYSTEM_CFLAGS} 1) + + # Use pkg-config to find PETSc + set(PKG_CONFIG_USE_CMAKE_PREFIX_PATH "YES") + pkg_check_modules(PC_PETSc ${_petsc_quiet_arg} "PETSc${_pkg_version_spec}") + + # Restore/Reset PKG_CONFIG_USE_CMAKE_PREFIX_PATH + set(ENV{PKG_CONFIG_ALLOW_SYSTEM_CFLAGS} ${_petsc_prev_allow_system_cflags}) + + unset(PKG_CONFIG_USE_CMAKE_PREFIX_PATH) + unset(_pkg_version_spec) + + # Restore the previous state of CMAKE_PREFIX_PATH + if(_petsc_prefix_unset) + unset(CMAKE_PREFIX_PATH) + else() + set(CMAKE_PREFIX_PATH "${_petsc_prefix_old}") + endif() + + # Set straight forward result variables + set(PETSc_FOUND ${PC_PETSc_FOUND}) + set(PETSc_INCLUDE_DIRS ${PC_PETSc_INCLUDE_DIRS}) + + # libm is always required + set(_petsc_libs "m") + set(_petsc_missing_libs "") + + # Find main PETSc libraries + foreach(_next_lib IN LISTS PC_PETSc_LIBRARIES) + find_library(_petsc_lib_${_next_lib} NAMES ${_next_lib} HINTS ${PC_PETSc_LIBRARY_DIRS}) + if(_petsc_lib_${_next_lib}) + list(APPEND _petsc_libs "${_petsc_lib_${_next_lib}}") + else() + list(APPEND _petsc_missing_libs "${_next_lib}") + endif() + endforeach() + + # Link against MPI if it is used. + # This adds all required link directories. + foreach(_next_lib IN LISTS PC_PETSc_STATIC_LIBRARIES) + if(_next_lib STREQUAL "mpi") + find_package(MPI ${_petsc_quiet_arg}) + if(MPI_FOUND) + # Prefer to use the CXX dependencies if enabled otherwise use the C + if(DEFINED CMAKE_CXX_COMPILER) + list(APPEND _petsc_libs "MPI::MPI_CXX") + else() + enable_language(C) + list(APPEND _petsc_libs "MPI::MPI_C") + endif() + break() + else() + list(APPEND _petsc_missing_libs "MPI") + endif() + endif() + endforeach() + + # Check if everything was detected + if(_petsc_missing_libs AND NOT PETSc_FIND_QUIETLY) + message("The following libraries were not detected: ${_petsc_missing_libs}") + elseif(NOT _petsc_missing_libs) + # Set the visible variable. This will let the module to succeed. + set(PETSc_LIBRARIES "${_petsc_libs}") + endif() + unset(_petsc_libs) + unset(_petsc_missing_libs) + + # Extract version parts from the version information + if(PC_PETSc_VERSION) + set(_petsc_versions "") + string(REGEX MATCHALL "[0-9]+" _petsc_versions ${PC_PETSc_VERSION}) + list(GET _petsc_versions 0 _petsc_version_major) + list(GET _petsc_versions 1 _petsc_version_minor) + list(GET _petsc_versions 2 _petsc_version_patch) + + set(PETSc_VERSION ${PC_PETSc_VERSION} CACHE STRING "Full version of PETSc") + set(PETSc_VERSION_MAJOR ${_petsc_version_major} CACHE INTERNAL "Major version of PETSc") + set(PETSc_VERSION_MINOR ${_petsc_version_minor} CACHE INTERNAL "Minor version of PETSc") + set(PETSc_VERSION_PATCH ${_petsc_version_patch} CACHE INTERNAL "Patch version of PETSc") + + unset(_petsc_versions) + unset(_petsc_version_major) + unset(_petsc_version_minor) + unset(_petsc_version_patch) + endif() +endif() +unset(_petsc_quiet_arg) + +include (FindPackageHandleStandardArgs) +find_package_handle_standard_args (PETSc + REQUIRED_VARS PETSc_FOUND PETSc_INCLUDE_DIRS PETSc_LIBRARIES + VERSION_VAR PETSc_VERSION + ) + +if(NOT TARGET XSDK::PETSc) + add_library(XSDK::PETSc INTERFACE IMPORTED) + set_target_properties(XSDK::PETSc PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${PETSc_INCLUDE_DIRS}" + INTERFACE_LINK_LIBRARIES "${PETSc_LIBRARIES}" + ) +endif() + +mark_as_advanced(PETSc_INCLUDE_DIRS PETSc_LIBRARIES PETSc_VERSION_MAJOR PETSc_VERSION_MINOR PETSc_VERSION_PATCH VERSION_VAR PETSc_VERSION) diff --git a/cmake/FindPLASMA.cmake b/cmake/FindPLASMA.cmake new file mode 100644 index 0000000..5f180bd --- /dev/null +++ b/cmake/FindPLASMA.cmake @@ -0,0 +1,50 @@ +# find the PLASMA include path +find_path(PLASMA_INCLUDE_DIR plasma.h + NAMES plasma.h + HINTS ${PLASMA_DIR} $ENV{PLASMA_DIR} ${CMAKE_PREFIX_PATH} + PATH_SUFFIXES include + NO_DEFAULT_PATH + DOC "Directory with PLASMA header" +) + +# find the main PLASMA library +find_library(PLASMA_LIBRARY + NAMES plasma + HINTS ${PLASMA_DIR} $ENV{PLASMA_DIR} ${CMAKE_PREFIX_PATH} + PATH_SUFFIXES lib lib64 + NO_DEFAULT_PATH + DOC "The PLASMA library." +) + +find_library(PLASMA_CORE_BLAS_LIBRARY + NAMES plasma_core_blas + HINTS ${PLASMA_DIR} $ENV{PLASMA_DIR} ${CMAKE_PREFIX_PATH} + PATH_SUFFIXES lib lib64 + NO_DEFAULT_PATH + DOC "The PLASMA core blas library." +) + +set(PLASMA_LIBRARIES "${PLASMA_LIBRARY};${PLASMA_CORE_BLAS_LIBRARY}") + +find_package_handle_standard_args(PLASMA + REQUIRED_VARS + PLASMA_LIBRARIES + PLASMA_INCLUDE_DIR + VERSION_VAR + PLASMA_VERSION +) + +# Create target for PLASMA +if(PLASMA_FOUND) + if(NOT TARGET XSDK::PLASMA) + add_library(XSDK::PLASMA INTERFACE IMPORTED) + endif() + + message(STATUS "Created XSDK::PLASMA target") + message(STATUS " INTERFACE_INCLUDE_DIRECTORIES: ${PLASMA_INCLUDE_DIR}") + message(STATUS " INTERFACE_LINK_LIBRARIES: ${PLASMA_LIBRARIES}") + + set_target_properties(XSDK::PLASMA PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${PLASMA_INCLUDE_DIR}" + INTERFACE_LINK_LIBRARIES "${PLASMA_LIBRARIES}") +endif() diff --git a/cmake/FindPackageMultipass.cmake b/cmake/FindPackageMultipass.cmake deleted file mode 100644 index fbf06a7..0000000 --- a/cmake/FindPackageMultipass.cmake +++ /dev/null @@ -1,106 +0,0 @@ -# PackageMultipass - this module defines two macros -# -# FIND_PACKAGE_MULTIPASS (Name CURRENT -# STATES VAR0 VAR1 ... -# DEPENDENTS DEP0 DEP1 ...) -# -# This function creates a cache entry _CURRENT which -# the user can set to "NO" to trigger a reconfiguration of the package. -# The first time this function is called, the values of -# _VAR0, ... are saved. If _CURRENT -# is false or if any STATE has changed since the last time -# FIND_PACKAGE_MULTIPASS() was called, then CURRENT will be set to "NO", -# otherwise CURRENT will be "YES". IF not CURRENT, then -# _DEP0, ... will be FORCED to NOTFOUND. -# Example: -# find_path (FOO_DIR include/foo.h) -# FIND_PACKAGE_MULTIPASS (Foo foo_current -# STATES DIR -# DEPENDENTS INCLUDES LIBRARIES) -# if (NOT foo_current) -# # Make temporary files, run programs, etc, to determine FOO_INCLUDES and FOO_LIBRARIES -# endif (NOT foo_current) -# -# MULTIPASS_SOURCE_RUNS (Name INCLUDES LIBRARIES SOURCE RUNS LANGUAGE) -# Always runs the given test, use this when you need to re-run tests -# because parent variables have made old cache entries stale. The LANGUAGE -# variable is either C or CXX indicating which compiler the test should -# use. -# MULTIPASS_C_SOURCE_RUNS (Name INCLUDES LIBRARIES SOURCE RUNS) -# DEPRECATED! This is only included for backwards compatability. Use -# the more general MULTIPASS_SOURCE_RUNS instead. -# Always runs the given test, use this when you need to re-run tests -# because parent variables have made old cache entries stale. - -macro (FIND_PACKAGE_MULTIPASS _name _current) - string (TOUPPER ${_name} _NAME) - set (_args ${ARGV}) - list (REMOVE_AT _args 0 1) - - set (_states_current "YES") - list (GET _args 0 _cmd) - if (_cmd STREQUAL "STATES") - list (REMOVE_AT _args 0) - list (GET _args 0 _state) - while (_state AND NOT _state STREQUAL "DEPENDENTS") - # The name of the stored value for the given state - set (_stored_var PACKAGE_MULTIPASS_${_NAME}_${_state}) - if (NOT "${${_stored_var}}" STREQUAL "${${_NAME}_${_state}}") - set (_states_current "NO") - endif (NOT "${${_stored_var}}" STREQUAL "${${_NAME}_${_state}}") - set (${_stored_var} "${${_NAME}_${_state}}" CACHE INTERNAL "Stored state for ${_name}." FORCE) - list (REMOVE_AT _args 0) - list (GET _args 0 _state) - endwhile (_state AND NOT _state STREQUAL "DEPENDENTS") - endif (_cmd STREQUAL "STATES") - - set (_stored ${_NAME}_CURRENT) - if (NOT ${_stored}) - set (${_stored} "YES" CACHE BOOL "Is the configuration for ${_name} current? Set to \"NO\" to reconfigure." FORCE) - set (_states_current "NO") - endif (NOT ${_stored}) - - set (${_current} ${_states_current}) - if (NOT ${_current} AND PACKAGE_MULTIPASS_${_name}_CALLED) - message (STATUS "Clearing ${_name} dependent variables") - # Clear all the dependent variables so that the module can reset them - list (GET _args 0 _cmd) - if (_cmd STREQUAL "DEPENDENTS") - list (REMOVE_AT _args 0) - foreach (dep ${_args}) - set (${_NAME}_${dep} "NOTFOUND" CACHE INTERNAL "Cleared" FORCE) - endforeach (dep) - endif (_cmd STREQUAL "DEPENDENTS") - set (${_NAME}_FOUND "NOTFOUND" CACHE INTERNAL "Cleared" FORCE) - endif () - set (PACKAGE_MULTIPASS_${name}_CALLED YES CACHE INTERNAL "Private" FORCE) -endmacro (FIND_PACKAGE_MULTIPASS) - - -macro (MULTIPASS_SOURCE_RUNS includes libraries source runs language) - include (Check${language}SourceRuns) - # This is a ridiculous hack. CHECK_${language}_SOURCE_* thinks that if the - # *name* of the return variable doesn't change, then the test does - # not need to be re-run. We keep an internal count which we - # increment to guarantee that every test name is unique. If we've - # gotten here, then the configuration has changed enough that the - # test *needs* to be rerun. - if (NOT MULTIPASS_TEST_COUNT) - set (MULTIPASS_TEST_COUNT 00) - endif (NOT MULTIPASS_TEST_COUNT) - math (EXPR _tmp "${MULTIPASS_TEST_COUNT} + 1") # Why can't I add to a cache variable? - set (MULTIPASS_TEST_COUNT ${_tmp} CACHE INTERNAL "Unique test ID") - set (testname MULTIPASS_TEST_${MULTIPASS_TEST_COUNT}_${runs}) - set (CMAKE_REQUIRED_INCLUDES ${includes}) - set (CMAKE_REQUIRED_LIBRARIES ${libraries}) - if(${language} STREQUAL "C") - check_c_source_runs ("${source}" ${testname}) - elseif(${language} STREQUAL "CXX") - check_cxx_source_runs ("${source}" ${testname}) - endif() - set (${runs} "${${testname}}") -endmacro (MULTIPASS_SOURCE_RUNS) - -macro (MULTIPASS_C_SOURCE_RUNS includes libraries source runs) - multipass_source_runs("${includes}" "${libraries}" "${source}" ${runs} "C") -endmacro (MULTIPASS_C_SOURCE_RUNS) diff --git a/cmake/FindSLATE.cmake b/cmake/FindSLATE.cmake new file mode 100644 index 0000000..44e5046 --- /dev/null +++ b/cmake/FindSLATE.cmake @@ -0,0 +1,42 @@ +# find the SLATE include path +find_path(SLATE_INCLUDE_DIR slate.hh + NAMES slate/slate.hh + HINTS ${SLATE_DIR} $ENV{SLATE_DIR} ${CMAKE_PREFIX_PATH} + PATH_SUFFIXES include + NO_DEFAULT_PATH + DOC "Directory with SLATE header" +) + +# find the main SLATE library +find_library(SLATE_LIBRARIES + NAMES slate + HINTS ${SLATE_DIR} $ENV{SLATE_DIR} ${CMAKE_PREFIX_PATH} + PATH_SUFFIXES lib lib64 + NO_DEFAULT_PATH + DOC "The SLATE library." +) + +find_package_handle_standard_args(SLATE + REQUIRED_VARS + SLATE_LIBRARIES + SLATE_INCLUDE_DIR + VERSION_VAR + SLATE_VERSION +) + +# Create target for SLATE +if(SLATE_FOUND) + + if(NOT TARGET XSDK::SLATE) + add_library(XSDK::SLATE INTERFACE IMPORTED) + endif() + + message(STATUS "Created XSDK::SLATE target") + message(STATUS " INTERFACE_INCLUDE_DIRECTORIES: ${SLATE_INCLUDE_DIR}") + message(STATUS " INTERFACE_LINK_LIBRARIES: ${SLATE_LIBRARIES}") + + set_target_properties(XSDK::SLATE PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${SLATE_INCLUDE_DIR}" + INTERFACE_LINK_LIBRARIES "${SLATE_LIBRARIES}") + +endif() diff --git a/cmake/FindSUNDIALS.cmake b/cmake/FindSUNDIALS.cmake index 0a80d31..d74bdf7 100644 --- a/cmake/FindSUNDIALS.cmake +++ b/cmake/FindSUNDIALS.cmake @@ -12,12 +12,6 @@ # SUNDIALS Copyright End # --------------------------------------------------------------- -# check if the SUNDIALS path is set -if(NOT SUNDIALS_DIR) - message(FATAL_ERROR "Error: SUNDIALS_DIR not set!") - set(SUNDIALS_DIR "" CACHE PATH "SUNDIALS install directory") -endif() - # determine SUNDIALS components needed if(NOT SUNDIALS_FIND_COMPONENTS) set(SUNDIALS_FIND_COMPONENTS @@ -47,39 +41,14 @@ if(ENABLE_SUPERLU) list(APPEND SUNDIALS_FIND_COMPONENTS "sunlinsolsuperludist") endif() -# find the library for each component -foreach(component ${SUNDIALS_FIND_COMPONENTS}) - find_library(${component}_LIBRARY sundials_${component} - PATHS ${SUNDIALS_DIR}/lib ${SUNDIALS_DIR}/lib64 +find_package(SUNDIALS REQUIRED COMPONENTS ${SUNDIALS_FIND_COMPONENTS} + HINTS ${SUNDIALS_DIR} $ENV{SUNDIALS_DIR} ${CMAKE_PREFIX_PATH} NO_DEFAULT_PATH) - if(${component}_LIBRARY) - list(APPEND SUNDIALS_LIBRARIES ${${component}_LIBRARY}) - list(APPEND SUNDIALS_REQUIRED_VARS ${component}_LIBRARY) - set(SUNDIALS_${component}_FOUND TRUE) - endif() -endforeach() - -find_package_handle_standard_args(SUNDIALS - REQUIRED_VARS ${SUNDIALS_REQUIRED_VARS} - HANDLE_COMPONENTS) -# create a target for each component -if(SUNDIALS_FOUND) - foreach(component ${SUNDIALS_FIND_COMPONENTS}) - if(NOT TARGET SUNDIALS::${component}) - add_library(SUNDIALS::${component} UNKNOWN IMPORTED) - set_target_properties(SUNDIALS::${component} - PROPERTIES - IMPORTED_LOCATION ${${component}_LIBRARY} - INTERFACE_INCLUDE_DIRECTORIES ${SUNDIALS_DIR}/include) - endif() +if(NOT TARGET XSDK::SUNDIALS) + add_library(XSDK_SUNDIALS INTERFACE) + foreach(_component ${SUNDIALS_FIND_COMPONENTS}) + target_link_libraries(XSDK_SUNDIALS INTERFACE SUNDIALS::${_component}) endforeach() - if(NOT TARGET XSDK::SUNDIALS) - add_library(XSDK::SUNDIALS UNKNOWN IMPORTED) - set_target_properties(XSDK::SUNDIALS - PROPERTIES - IMPORTED_LOCATION "${nvecserial_LIBRARY}" - INTERFACE_LINK_LIBRARIES "${SUNDIALS_LIBRARIES}" - INTERFACE_INCLUDE_DIRECTORIES ${SUNDIALS_DIR}/include) - endif() + add_library(XSDK::SUNDIALS ALIAS XSDK_SUNDIALS) endif() diff --git a/cmake/FindSUPERLUDIST.cmake b/cmake/FindSUPERLUDIST.cmake index ef630ac..a521db0 100644 --- a/cmake/FindSUPERLUDIST.cmake +++ b/cmake/FindSUPERLUDIST.cmake @@ -23,14 +23,14 @@ ### find include dir find_path(SUPERLUDIST_INCLUDE_DIR superlu_defs.h - HINTS ${SUPERLU_DIR} $ENV{SUPERLU_DIR} + HINTS ${SUPERLU_DIR} $ENV{SUPERLU_DIR} ${CMAKE_PREFIX_PATH} PATH_SUFFIXES include NO_DEFAULT_PATH DOC "Directory with the SuperLU DIST header") ### find library find_library(SUPERLUDIST_LIBRARY superlu_dist - HINTS ${SUPERLU_DIR} $ENV{SUPERLU_DIR} + HINTS ${SUPERLU_DIR} $ENV{SUPERLU_DIR} ${CMAKE_PREFIX_PATH} PATH_SUFFIXES lib lib64 NO_DEFAULT_PATH DOC "The SuperLU DIST library") diff --git a/cmake/ResolveCompilerPaths.cmake b/cmake/ResolveCompilerPaths.cmake deleted file mode 100644 index 1007820..0000000 --- a/cmake/ResolveCompilerPaths.cmake +++ /dev/null @@ -1,105 +0,0 @@ -# ResolveCompilerPaths - this module defines two macros -# -# RESOLVE_LIBRARIES (XXX_LIBRARIES LINK_LINE) -# This macro is intended to be used by FindXXX.cmake modules. -# It parses a compiler link line and resolves all libraries -# (-lfoo) using the library path contexts (-L/path) in scope. -# The result in XXX_LIBRARIES is the list of fully resolved libs. -# Example: -# -# RESOLVE_LIBRARIES (FOO_LIBRARIES "-L/A -la -L/B -lb -lc -ld") -# -# will be resolved to -# -# FOO_LIBRARIES:STRING="/A/liba.so;/B/libb.so;/A/libc.so;/usr/lib/libd.so" -# -# if the filesystem looks like -# -# /A: liba.so libc.so -# /B: liba.so libb.so -# /usr/lib: liba.so libb.so libc.so libd.so -# -# and /usr/lib is a system directory. -# -# Note: If RESOLVE_LIBRARIES() resolves a link line differently from -# the native linker, there is a bug in this macro (please report it). -# -# RESOLVE_INCLUDES (XXX_INCLUDES INCLUDE_LINE) -# This macro is intended to be used by FindXXX.cmake modules. -# It parses a compile line and resolves all includes -# (-I/path/to/include) to a list of directories. Other flags are ignored. -# Example: -# -# RESOLVE_INCLUDES (FOO_INCLUDES "-I/A -DBAR='\"irrelevant -I/string here\"' -I/B") -# -# will be resolved to -# -# FOO_INCLUDES:STRING="/A;/B" -# -# assuming both directories exist. -# Note: as currently implemented, the -I/string will be picked up mistakenly (cry, cry) -include (CorrectWindowsPaths) - -macro (RESOLVE_LIBRARIES LIBS LINK_LINE) - string (REGEX MATCHALL "((-L|-l|-Wl)([^\" ]+|\"[^\"]+\")|[^\" ]+\\.(a|so|dll|lib))" _all_tokens "${LINK_LINE}") - set (_libs_found "") - set (_directory_list "") - foreach (token ${_all_tokens}) - if (token MATCHES "-L([^\" ]+|\"[^\"]+\")") - # If it's a library path, add it to the list - string (REGEX REPLACE "^-L" "" token ${token}) - string (REGEX REPLACE "//" "/" token ${token}) - convert_cygwin_path(token) - list (APPEND _directory_list ${token}) - elseif (token MATCHES "^(-l([^\" ]+|\"[^\"]+\")|[^\" ]+\\.(a|so|dll|lib))") - # It's a library, resolve the path by looking in the list and then (by default) in system directories - if (WIN32) #windows expects "libfoo", linux expects "foo" - string (REGEX REPLACE "^-l" "lib" token ${token}) - else (WIN32) - string (REGEX REPLACE "^-l" "" token ${token}) - endif (WIN32) - set (_root "") - if (token MATCHES "^/") # We have an absolute path - #separate into a path and a library name: - string (REGEX MATCH "[^/]*\\.(a|so|dll|lib)$" libname ${token}) - string (REGEX MATCH ".*[^${libname}$]" libpath ${token}) - convert_cygwin_path(libpath) - set (_directory_list ${_directory_list} ${libpath}) - set (token ${libname}) - endif (token MATCHES "^/") - set (_lib "NOTFOUND" CACHE FILEPATH "Cleared" FORCE) - find_library (_lib ${token} HINTS ${_directory_list} ${_root}) - if (_lib) - string (REPLACE "//" "/" _lib ${_lib}) - list (APPEND _libs_found ${_lib}) - else (_lib) - message (STATUS "Unable to find library ${token}") - endif (_lib) - endif (token MATCHES "-L([^\" ]+|\"[^\"]+\")") - endforeach (token) - set (_lib "NOTFOUND" CACHE INTERNAL "Scratch variable" FORCE) - # only the LAST occurence of each library is required since there should be no circular dependencies - if (_libs_found) - list (REVERSE _libs_found) - list (REMOVE_DUPLICATES _libs_found) - list (REVERSE _libs_found) - endif (_libs_found) - set (${LIBS} "${_libs_found}") -endmacro (RESOLVE_LIBRARIES) - -macro (RESOLVE_INCLUDES INCS COMPILE_LINE) - string (REGEX MATCHALL "-I([^\" ]+|\"[^\"]+\")" _all_tokens "${COMPILE_LINE}") - set (_incs_found "") - foreach (token ${_all_tokens}) - string (REGEX REPLACE "^-I" "" token ${token}) - string (REGEX REPLACE "//" "/" token ${token}) - convert_cygwin_path(token) - if (EXISTS ${token}) - list (APPEND _incs_found ${token}) - else (EXISTS ${token}) - message (STATUS "Include directory ${token} does not exist") - endif (EXISTS ${token}) - endforeach (token) - list (REMOVE_DUPLICATES _incs_found) - set (${INCS} "${_incs_found}") -endmacro (RESOLVE_INCLUDES) diff --git a/cmake/XsdkAddTest.cmake b/cmake/XsdkAddTest.cmake new file mode 100644 index 0000000..60b91a7 --- /dev/null +++ b/cmake/XsdkAddTest.cmake @@ -0,0 +1,28 @@ +macro(xsdk_add_test) + + set(options) + + # MPI_NPROCS = number of mpi tasks to use in parallel tests + set(oneValueArgs "NAME" "MPI_NPROCS") + + set(multiValueArgs "COMMAND" "ENVIRONMENT") + + # parse inputs and create variables SUNDIALS_ADD_TEST_ + cmake_parse_arguments(xsdk_add_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + if(xsdk_add_test_MPI_NPROCS) + add_test(NAME ${xsdk_add_test_NAME} + COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} ${xsdk_add_test_MPI_NPROCS} + ${MPIEXEC_PREFLAGS} ${xsdk_add_test_COMMAND} ${MPIEXEC_POSTFLAGS} + ) + else() + add_test(NAME ${xsdk_add_test_NAME} COMMAND ${xsdk_add_test_COMMAND}) + endif() + + if(xsdk_add_test_ENVIRONMENT) + set_tests_properties( + ${xsdk_add_test_NAME} PROPERTIES ENVIRONMENT ${xsdk_add_test_ENVIRONMENT} + ) + endif() + +endmacro(xsdk_add_test) diff --git a/hypre/CMakeLists.txt b/hypre/CMakeLists.txt index 74e3d01..3a5e823 100644 --- a/hypre/CMakeLists.txt +++ b/hypre/CMakeLists.txt @@ -1,28 +1,44 @@ -cmake_minimum_required(VERSION 3.12) -project(hypre-superludist - DESCRIPTION "HYPRE + SuperLU_DIST Example" - LANGUAGES C) +if(ENABLE_SUPERLU AND (NOT ENABLE_CUDA)) + add_executable(ij_laplacian ij_laplacian.c) + target_link_libraries(ij_laplacian PRIVATE XSDK::HYPRE XSDK::SUPERLU MPI::MPI_C) -set(CMAKE_C_COMPILER ${MPI_C_COMPILER}) -set(CMAKE_CXX_COMPILER ${MPI_CXX_COMPILER}) + if(MATH_LIBRARY) + target_link_libraries(ij_laplacian PRIVATE ${MATH_LIBRARY}) + endif() -add_executable(ij_laplacian ij_laplacian.c) + xsdk_add_test( + NAME + HYPRE-ij_laplacian + MPI_NPROCS + 4 + COMMAND + $ + -dslu_th + 50 + ) -target_link_libraries(ij_laplacian PRIVATE XSDK::HYPRE XSDK::SUPERLU) -if(MATH_LIBRARY) - target_link_libraries(ij_laplacian PRIVATE ${MATH_LIBRARY}) -endif() - -install(TARGETS ij_laplacian RUNTIME DESTINATION bin) + install(TARGETS ij_laplacian RUNTIME DESTINATION bin) -# Copy glvis helper script and directory -file(COPY vis DESTINATION .) - -install(DIRECTORY vis - DESTINATION share/xsdk-examples/hypre - PATTERN vis/* - PERMISSIONS - OWNER_EXECUTE OWNER_WRITE OWNER_READ - GROUP_EXECUTE GROUP_READ - WORLD_READ WORLD_EXECUTE) + # Copy glvis helper script and directory + file(COPY vis DESTINATION .) + install( + DIRECTORY vis + DESTINATION share/xsdk-examples/hypre + PATTERN + vis/* + PERMISSIONS + OWNER_EXECUTE + OWNER_WRITE + OWNER_READ + GROUP_EXECUTE + GROUP_READ + WORLD_READ + WORLD_EXECUTE + ) +else() + message( + STATUS + "SKIPPED HYPRE-ij_laplacian example. Requires ENABLE_SUPERLU=ON and ENABLE_CUDA=OFF but got ${ENABLE_SUPERLU} and ${ENABLE_CUDA}" + ) +endif() diff --git a/mfem/CMakeLists.txt b/mfem/CMakeLists.txt index 208ff23..1aa0527 100644 --- a/mfem/CMakeLists.txt +++ b/mfem/CMakeLists.txt @@ -1,11 +1,31 @@ -cmake_minimum_required(VERSION 3.12) -project(mfem-examples - DESCRIPTION "MFEM Examples" - LANGUAGES CXX) +# Ginkgo is included at the upper level when MFEM is enabled +if(ENABLE_GINKGO) + add_subdirectory(ginkgo) +endif() +if(ENABLE_HYPRE) + add_subdirectory(hypre) +endif() +if(ENABLE_SUPERLU AND ENABLE_HYPRE) + add_subdirectory(hypre-superlu) +endif() +if(ENABLE_PETSC) + add_subdirectory(petsc) +endif() +if(ENABLE_SUNDIALS AND ENABLE_HYPRE) + add_subdirectory(sundials) +endif() +if(ENABLE_STRUMPACK AND ENABLE_HYPRE) + add_subdirectory(strumpack) +endif() -add_subdirectory(hypre-superlu) -add_subdirectory(ginkgo) -add_subdirectory(petsc) -add_subdirectory(sundials) +# Copy the data directory from the source tree to the build tree so that the examples can be run +# from in the build tree. +add_custom_command( + OUTPUT data_is_copied + COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/data data + COMMAND ${CMAKE_COMMAND} -E touch data_is_copied + COMMENT "Copying the MFEM data directory ..." +) +add_custom_target(copy_mfem_data ALL DEPENDS data_is_copied) -install(FILES data/star.mesh data/beam-hex.mesh DESTINATION data) +install(FILES data/star.mesh data/beam-hex.mesh data/periodic-hexagon.mesh DESTINATION data) diff --git a/mfem/ginkgo/CMakeLists.txt b/mfem/ginkgo/CMakeLists.txt index dc364e1..ca97ab6 100644 --- a/mfem/ginkgo/CMakeLists.txt +++ b/mfem/ginkgo/CMakeLists.txt @@ -1,16 +1,23 @@ -cmake_minimum_required(VERSION 3.8) -project(mfem-gko LANGUAGES CXX) - -find_package(OpenMP) - -set(CMAKE_C_COMPILER ${MPI_C_COMPILER}) -set(CMAKE_CXX_COMPILER ${MPI_CXX_COMPILER}) - -option(ENABLE_MFEM "Enable MFEM" ON) - add_executable(mfem_ex1_gko mfem_ex1_gko.cpp) -target_sources(mfem_ex1_gko PRIVATE mfem_ex1_gko.cpp mfem_wrapper.cpp) +target_sources(mfem_ex1_gko PRIVATE mfem_wrapper.cpp) + +target_link_libraries( + mfem_ex1_gko + PRIVATE XSDK::MFEM + XSDK::SUNDIALS + XSDK::HYPRE + XSDK::PETSc + Ginkgo::ginkgo + ZLIB::ZLIB + MPI::MPI_CXX +) -target_link_libraries(mfem_ex1_gko PRIVATE XSDK::MFEM XSDK::SUNDIALS XSDK::HYPRE Ginkgo::ginkgo PETSC::ALL ${METIS_LIBRARY} ZLIB::ZLIB) +xsdk_add_test(NAME MFEM-mfem_ex1_gko COMMAND $ --no-partial-assembly) +if(ENABLE_CUDA) + xsdk_add_test( + NAME MFEM-mfem_ex1_gko--gpu + COMMAND $ + --partial-assembly --device cuda) +endif() install(TARGETS mfem_ex1_gko RUNTIME DESTINATION bin) diff --git a/mfem/ginkgo/README.md b/mfem/ginkgo/README.md index 6cb6aaa..c7eb2c7 100644 --- a/mfem/ginkgo/README.md +++ b/mfem/ginkgo/README.md @@ -12,7 +12,11 @@ Ginkgo's use. This example is built to run in serial, so launch it with your desired options: ``` -./mfem-gko --no-partial-assembly +./mfem_ex1_gko --no-partial-assembly +``` +When CUDA is enabled, this example can be run on GPU, e.g. with +``` +./mfem_ex1_gko --partial-assembly --device cuda ``` Useful non-default options: diff --git a/mfem/hypre-superlu/CMakeLists.txt b/mfem/hypre-superlu/CMakeLists.txt index 1ed94f7..4685ed2 100644 --- a/mfem/hypre-superlu/CMakeLists.txt +++ b/mfem/hypre-superlu/CMakeLists.txt @@ -1,11 +1,36 @@ -cmake_minimum_required(VERSION 3.8) -project( convdiff - DESCRIPTION "Convective diffusion using MFEM + SUPERLU + HYPRE" - LANGUAGES CXX) - -set(CMAKE_CXX_COMPILER ${MPI_CXX_COMPILER}) - add_executable(convdiff convdiff.cpp) -target_link_libraries(convdiff PRIVATE XSDK::MFEM PETSC::ALL XSDK::SUNDIALS XSDK::HYPRE XSDK::SUPERLU ZLIB::ZLIB) +target_link_libraries( + convdiff + PRIVATE XSDK::MFEM + XSDK::PETSc + XSDK::HYPRE + XSDK::SUPERLU + XSDK::SUNDIALS + ZLIB::ZLIB + MPI::MPI_CXX +) + +xsdk_add_test( + NAME + MFEM-convdiff--hypre-boomeramg + MPI_NPROCS + 4 + COMMAND + $ + --no-superlu + --velocity + 100.0 +) +xsdk_add_test( + NAME + MFEM-convdiff--superlu + MPI_NPROCS + 4 + COMMAND + $ + --superlu + --velocity + 100.0 +) install(TARGETS convdiff RUNTIME DESTINATION bin) diff --git a/mfem/hypre-superlu/README.md b/mfem/hypre-superlu/README.md index 56e8937..a5308ef 100644 --- a/mfem/hypre-superlu/README.md +++ b/mfem/hypre-superlu/README.md @@ -16,14 +16,33 @@ be solved. This example is built to run in parallel, so launch it with mpirun and your desired options: ``` mpirun -np 4 ./convdiff --velocity 100.0 --no-superlu +mpirun -np 4 ./convdiff --velocity 100.0 --superlu ``` -Useful non-default options: +Available options: | Flag | Meaning | |:----------------------| :-----------------------------------------------------| +| --help | Print a help message and exit. | | --refine n | Number of times to uniformly refine the initial mesh. | | --order n | Set the polynomial order of the discretization. | | --velocity n | Velocity of the flow field. | | --superlu | Use the SuperLU_Disy direct solver. | -| --no-superlu | Use the interative HYPRE BoomerAMG solver. | +| --no-superlu | Use the GMRES + HYPRE BoomerAMG solver. (default) | +| --slu-colperm n | Set the SuperLU Column permutation algorithm: | +| | 0 - natural | +| | 1 - mmd-ata | +| | 2 - mmd_at_plus_a | +| | 3 - colamd | +| | 4 - metis_at_plus_a (default) | +| | 5 - parmetis | +| | 6 - zoltan | +| --slu-rowperm n | Set the SuperLU Row permutation algorithm: | +| | 0 - NoRowPerm | +| | 1 - LargeDiag (default) | +| | 2 - MyPermR | +| --one-matrix | Solve with one matrix. (default) | +| --two-matrix | Solve with two different matrices. | +| --one-matrix | Solve with one rhs. (default) | +| --two-matrix | Solve with two different rhs. | | --visit | Output VisIt files for visualation of the solution. | +| --no-visit | Do not output VisIt files. (default) | diff --git a/mfem/hypre-superlu/convdiff.cpp b/mfem/hypre-superlu/convdiff.cpp index 769df01..1e9dff9 100644 --- a/mfem/hypre-superlu/convdiff.cpp +++ b/mfem/hypre-superlu/convdiff.cpp @@ -51,7 +51,7 @@ int main(int argc, char *argv[]) args.AddOption(&two_matrix, "-2mat", "--two-matrix", "-1mat", "--one-matrix", "Solve with 1 or two different matrices."); args.AddOption(&two_rhs, "-2rhs", "--two-rhs", "-1rhs", "--one-rhs", - "Solve with 1 or two different rhs."); + "Solve with 1 or two different rhs."); args.Parse(); if (!args.Good()) { @@ -61,7 +61,7 @@ int main(int argc, char *argv[]) } MPI_Finalize(); return 1; - } + } if (myid == 0) { args.PrintOptions(cout); @@ -70,15 +70,16 @@ int main(int argc, char *argv[]) // 3. Read the (serial) mesh from the given mesh file on all processors. We // can handle triangular, quadrilateral, tetrahedral, hexahedral, surface // and volume meshes with the same code. - Mesh *mesh = new Mesh(100, 100, Element::QUADRILATERAL, 1, 1.0, 1.0); - int dim = mesh->Dimension(); + Mesh mesh = Mesh::MakeCartesian2D(100, 100, Element::QUADRILATERAL, + true, 1.0, 1.0); + int dim = mesh.Dimension(); // 5. Define a parallel mesh by a partitioning of the serial mesh. Refine // this mesh further in parallel to increase the resolution (1 time by // default, or specified on the command line with -rp). Once the parallel // mesh is defined, the serial mesh can be deleted. - ParMesh *pmesh = new ParMesh(MPI_COMM_WORLD, *mesh); - delete mesh; + ParMesh *pmesh = new ParMesh(MPI_COMM_WORLD, mesh); + mesh.Clear(); for (int lev = 0; lev < ref_levels; lev++) { pmesh->UniformRefinement(); @@ -160,7 +161,9 @@ int main(int argc, char *argv[]) } else { + CD.HostRead(); SLUCD = new SuperLURowLocMatrix(CD); + CD.HypreRead(); superlu = new SuperLUSolver(MPI_COMM_WORLD); superlu->SetPrintStatistics(true); superlu->SetSymmetricPattern(false); @@ -237,9 +240,10 @@ int main(int argc, char *argv[]) } Vector R(B); // R = B CD.Mult(1.0, X, -1.0, R); // R = CD X - B + double res_final = sqrt(InnerProduct(pmesh->GetComm(), R, R)); if (myid == 0) { - cout << "Final L2 norm of residual: " << sqrt(R*R) << endl << endl; + cout << "Final L2 norm of residual: " << res_final << endl << endl; } if (two_rhs) { @@ -252,10 +256,11 @@ int main(int argc, char *argv[]) } Vector R(B); // R = B CD.Mult(1.0, X, -1.0, R); // R = CD X - B + res_final = sqrt(InnerProduct(pmesh->GetComm(), R, R)); if (myid == 0) { - cout << "Final L2 norm of residual: " << sqrt(R*R) << endl << endl; - } + cout << "Final L2 norm of residual: " << res_final << endl << endl; + } } // 9b. Complete the solve a second time with another matrix to show off the saved @@ -277,9 +282,11 @@ int main(int argc, char *argv[]) solver = gmres; } else - { + { delete SLUCD; + CD.HostRead(); SLUCD = new SuperLURowLocMatrix(CD); + CD.HypreRead(); superlu->SetOperator(*SLUCD); } tic(); @@ -290,9 +297,10 @@ int main(int argc, char *argv[]) } R = B; CD.Mult(1.0, X, -1.0, R); // R = CD X - B + res_final = sqrt(InnerProduct(pmesh->GetComm(), R, R)); if (myid == 0) { - cout << "Final L2 norm of residual: " << sqrt(R*R) << endl; + cout << "Final L2 norm of residual: " << res_final << endl; } } cd->RecoverFEMSolution(X, *b, x); diff --git a/mfem/hypre-superlu/makefile b/mfem/hypre-superlu/makefile deleted file mode 100644 index 770e20a..0000000 --- a/mfem/hypre-superlu/makefile +++ /dev/null @@ -1,53 +0,0 @@ -# Copyright (c) 2010, Lawrence Livermore National Security, LLC. Produced at the -# Lawrence Livermore National Laboratory. LLNL-CODE-443211. All Rights reserved. -# See file COPYRIGHT for details. -# -# This file is part of the MFEM library. For more information and source code -# availability see http://mfem.org. -# -# MFEM is free software; you can redistribute it and/or modify it under the -# terms of the GNU Lesser General Public License (as published by the Free -# Software Foundation) version 2.1 dated February 1999. - -# Use the MFEM build directory -MFEM_DIR ?= ../../.. -MFEM_BUILD_DIR ?= ../../.. -SRC = $(if $(MFEM_DIR:../../..=),$(MFEM_DIR)/examples/atpesc/superlu,) -CONFIG_MK = $(MFEM_BUILD_DIR)/config/config.mk - -MFEM_LIB_FILE = mfem_is_not_built --include $(CONFIG_MK) - -SEQ_EXAMPLES = -PAR_EXAMPLES = convdiff -ifeq ($(MFEM_USE_MPI),NO) - EXAMPLES = $(SEQ_EXAMPLES) -else - EXAMPLES = $(PAR_EXAMPLES) $(SEQ_EXAMPLES) -endif - -.SUFFIXES: -.SUFFIXES: .o .cpp .mk -.PHONY: all clean clean-build clean-exec - -# Remove built-in rule -%: %.cpp - -# Replace the default implicit rule for *.cpp files -%: $(SRC)%.cpp $(MFEM_LIB_FILE) $(CONFIG_MK) - $(MFEM_CXX) $(MFEM_FLAGS) $< -o $@ $(MFEM_LIBS) - -all: $(EXAMPLES) - -# Generate an error message if the MFEM library is not built and exit -$(MFEM_LIB_FILE): - $(error The MFEM library is not built) - -clean: clean-build clean-exec - -clean-build: - rm -f *.o *~ $(SEQ_EXAMPLES) $(PAR_EXAMPLES) - rm -rf *.dSYM *.TVD.*breakpoints - -clean-exec: - rm -rf dump_* mesh.* sol.* *.core diff --git a/mfem/hypre/CMakeLists.txt b/mfem/hypre/CMakeLists.txt new file mode 100644 index 0000000..97942a9 --- /dev/null +++ b/mfem/hypre/CMakeLists.txt @@ -0,0 +1,59 @@ +set(example magnetic-diffusion) + +add_executable(${example} ${example}.cpp) +target_link_libraries( + ${example} + PRIVATE XSDK::MFEM + XSDK::PETSc + XSDK::HYPRE + XSDK::SUPERLU + XSDK::SUNDIALS + ZLIB::ZLIB + MPI::MPI_CXX +) + +xsdk_add_test( + NAME + MFEM-magnetic-diffusion--cpu + MPI_NPROCS + 4 + COMMAND + $ + -m + ../data/star.mesh +) +# TODO: add the other CPU sample runs from README.md +if(ENABLE_CUDA) + xsdk_add_test( + NAME + MFEM-magnetic-diffusion--gpu + MPI_NPROCS + 4 + COMMAND + $ + -m + ../data/star.mesh + -pa + -d + cuda + ) + # TODO: add the other GPU sample runs from README.md +endif() +if(ENABLE_HIP) + xsdk_add_test( + NAME + MFEM-magnetic-diffusion--gpu + MPI_NPROCS + 4 + COMMAND + $ + -m + ../data/star.mesh + -pa + -d + hip + ) + # TODO: add the other GPU sample runs from README.md +endif() + +install(TARGETS ${example} RUNTIME DESTINATION bin) diff --git a/mfem/hypre/README.md b/mfem/hypre/README.md new file mode 100644 index 0000000..ce0134d --- /dev/null +++ b/mfem/hypre/README.md @@ -0,0 +1,29 @@ +# MFEM-HYPRE example + +This example demonstrates the integration of MFEM with HYPRE on both CPUs and +GPUs. + +This example code utilizes MFEM to discretize a steady state magnetic diffusion +problem, curl curl E + E = f, with suitable boundary condition. The problem is +discretized using H(curl)-conforming finite elements of arbitrary specified +order on any given mesh. The discretized problem is then solved using PCG with +HYPRE's AMS preconditioner. + +Sample CPU runs: +``` +mpirun -np 4 ./magnetic-diffusion -m ../data/star.mesh +mpirun -np 4 ./magnetic-diffusion -m ../data/beam-hex.mesh +mpirun -np 4 ./magnetic-diffusion -m ../data/beam-hex.mesh -o 2 -pa +``` + +Sample GPU runs, replace `` with `cuda` or `hip`: +``` +mpirun -np 4 ./magnetic-diffusion -m ../data/star.mesh -pa -d +mpirun -np 4 ./magnetic-diffusion -m ../data/star.mesh -no-pa -d +mpirun -np 4 ./magnetic-diffusion -m ../data/beam-hex.mesh -pa -d +``` + +For a full list of options, see +``` +./magnetic-diffusion -h +``` diff --git a/mfem/hypre/magnetic-diffusion.cpp b/mfem/hypre/magnetic-diffusion.cpp new file mode 100644 index 0000000..eafbe4d --- /dev/null +++ b/mfem/hypre/magnetic-diffusion.cpp @@ -0,0 +1,313 @@ +// xSDK Example based on +// MFEM Example 3 - Parallel Version +// +// Sample CPU runs: +// mpirun -np 4 ./magnetic-diffusion -m ../data/star.mesh +// mpirun -np 4 ./magnetic-diffusion -m ../data/beam-hex.mesh +// mpirun -np 4 ./magnetic-diffusion -m ../data/beam-hex.mesh -o 2 -pa +// +// Sample GPU runs, replace with cuda or hip: +// mpirun -np 4 ./magnetic-diffusion -m ../data/star.mesh -pa -d +// mpirun -np 4 ./magnetic-diffusion -m ../data/star.mesh -no-pa -d +// mpirun -np 4 ./magnetic-diffusion -m ../data/beam-hex.mesh -pa -d +// +// Description: This xSDK example demonstrates the integration of MFEM with +// HYPRE on both CPUs and GPUs. +// +// This example code solves a simple electromagnetic diffusion +// problem corresponding to the second order definite Maxwell +// equation curl curl E + E = f with boundary condition +// E x n = . Here, we use a given exact +// solution E and compute the corresponding r.h.s. f. +// We discretize with Nedelec finite elements in 2D or 3D. +// +// The example demonstrates the use of H(curl) finite element +// spaces with the curl-curl and the (vector finite element) mass +// bilinear form, as well as the computation of discretization +// error when the exact solution is known. Static condensation is +// also illustrated. +// +// We recommend viewing examples 1-2 before viewing this example. + +#include "mfem.hpp" +#include +#include + +using namespace std; +using namespace mfem; + +// Exact solution, E, and r.h.s., f. See below for implementation. +void E_exact(const Vector &, Vector &); +void f_exact(const Vector &, Vector &); +double freq = 1.0, kappa; +int dim; + +int main(int argc, char *argv[]) +{ + // 1. Initialize MPI. + int num_procs, myid; + MPI_Init(&argc, &argv); + MPI_Comm_size(MPI_COMM_WORLD, &num_procs); + MPI_Comm_rank(MPI_COMM_WORLD, &myid); + + // 2. Parse command-line options. + const char *mesh_file = "../data/star.mesh"; + int order = 1; + bool static_cond = false; + bool pa = false; + const char *device_config = "cpu"; + bool visualization = true; + + OptionsParser args(argc, argv); + args.AddOption(&mesh_file, "-m", "--mesh", + "Mesh file to use."); + args.AddOption(&order, "-o", "--order", + "Finite element order (polynomial degree)."); + args.AddOption(&freq, "-f", "--frequency", "Set the frequency for the exact" + " solution."); + args.AddOption(&static_cond, "-sc", "--static-condensation", "-no-sc", + "--no-static-condensation", "Enable static condensation."); + args.AddOption(&pa, "-pa", "--partial-assembly", "-no-pa", + "--no-partial-assembly", "Enable Partial Assembly."); + args.AddOption(&device_config, "-d", "--device", + "Device configuration string, see Device::Configure()."); + args.AddOption(&visualization, "-vis", "--visualization", "-no-vis", + "--no-visualization", + "Enable or disable GLVis visualization."); + + args.Parse(); + if (!args.Good()) + { + if (myid == 0) + { + args.PrintUsage(cout); + } + // HYPRE_Finalize(); + MPI_Finalize(); + return 1; + } + if (myid == 0) + { + args.PrintOptions(cout); + } + kappa = freq * M_PI; + + // 3. Enable hardware devices such as GPUs, and programming models such as + // CUDA, OCCA, RAJA and OpenMP based on command line options. + Device device(device_config); + if (myid == 0) { device.Print(); } + + // 4. Read the (serial) mesh from the given mesh file on all processors. We + // can handle triangular, quadrilateral, tetrahedral, hexahedral, surface + // and volume meshes with the same code. + Mesh *mesh = new Mesh(mesh_file, 1, 1); + dim = mesh->Dimension(); + int sdim = mesh->SpaceDimension(); + + // 5. Refine the serial mesh on all processors to increase the resolution. In + // this example we do 'ref_levels' of uniform refinement. We choose + // 'ref_levels' to be the largest number that gives a final mesh with no + // more than 1,000 elements. + { + int ref_levels = (int)floor(log(1000./mesh->GetNE())/log(2.)/dim); + for (int l = 0; l < ref_levels; l++) + { + mesh->UniformRefinement(); + } + } + + // 6. Define a parallel mesh by a partitioning of the serial mesh. Refine + // this mesh further in parallel to increase the resolution. Once the + // parallel mesh is defined, the serial mesh can be deleted. Tetrahedral + // meshes need to be reoriented before we can define high-order Nedelec + // spaces on them. + ParMesh *pmesh = new ParMesh(MPI_COMM_WORLD, *mesh); + delete mesh; + { + int par_ref_levels = 2; + for (int l = 0; l < par_ref_levels; l++) + { + pmesh->UniformRefinement(); + } + } + pmesh->ReorientTetMesh(); + + // 7. Define a parallel finite element space on the parallel mesh. Here we + // use the Nedelec finite elements of the specified order. + FiniteElementCollection *fec = new ND_FECollection(order, dim); + ParFiniteElementSpace *fespace = new ParFiniteElementSpace(pmesh, fec); + HYPRE_BigInt size = fespace->GlobalTrueVSize(); + if (myid == 0) + { + cout << "Number of finite element unknowns: " << size << endl; + } + + // 8. Determine the list of true (i.e. parallel conforming) essential + // boundary dofs. In this example, the boundary conditions are defined + // by marking all the boundary attributes from the mesh as essential + // (Dirichlet) and converting them to a list of true dofs. + Array ess_tdof_list; + Array ess_bdr; + if (pmesh->bdr_attributes.Size()) + { + ess_bdr.SetSize(pmesh->bdr_attributes.Max()); + ess_bdr = 1; + fespace->GetEssentialTrueDofs(ess_bdr, ess_tdof_list); + } + + // 9. Set up the parallel linear form b(.) which corresponds to the + // right-hand side of the FEM linear system, which in this case is + // (f,phi_i) where f is given by the function f_exact and phi_i are the + // basis functions in the finite element fespace. + VectorFunctionCoefficient f(sdim, f_exact); + ParLinearForm *b = new ParLinearForm(fespace); + b->AddDomainIntegrator(new VectorFEDomainLFIntegrator(f)); + b->Assemble(); + + // 10. Define the solution vector x as a parallel finite element grid function + // corresponding to fespace. Initialize x by projecting the exact + // solution. Note that only values from the boundary edges will be used + // when eliminating the non-homogeneous boundary condition to modify the + // r.h.s. vector b. + ParGridFunction x(fespace); + VectorFunctionCoefficient E(sdim, E_exact); + x.ProjectCoefficient(E); + + // 11. Set up the parallel bilinear form corresponding to the EM diffusion + // operator curl muinv curl + sigma I, by adding the curl-curl and the + // mass domain integrators. + Coefficient *muinv = new ConstantCoefficient(1.0); + Coefficient *sigma = new ConstantCoefficient(1.0); + ParBilinearForm *a = new ParBilinearForm(fespace); + if (pa) { a->SetAssemblyLevel(AssemblyLevel::PARTIAL); } + a->AddDomainIntegrator(new CurlCurlIntegrator(*muinv)); + a->AddDomainIntegrator(new VectorFEMassIntegrator(*sigma)); + + // 12. Assemble the parallel bilinear form and the corresponding linear + // system, applying any necessary transformations such as: parallel + // assembly, eliminating boundary conditions, applying conforming + // constraints for non-conforming AMR, static condensation, etc. + if (static_cond) { a->EnableStaticCondensation(); } + a->Assemble(); + + OperatorPtr A; + Vector B, X; + a->FormLinearSystem(ess_tdof_list, x, *b, A, X, B); + + // 13. Solve the system AX=B using PCG with an AMS preconditioner. + if (pa) + { + MatrixFreeAMS ams(*a, *A, *fespace, muinv, sigma, NULL, ess_bdr); + CGSolver cg(MPI_COMM_WORLD); + cg.SetRelTol(1e-12); + cg.SetMaxIter(1000); + cg.SetPrintLevel(1); + cg.SetOperator(*A); + cg.SetPreconditioner(ams); + cg.Mult(B, X); + } + else + { + if (myid == 0) + { + cout << "Size of linear system: " + << A.As()->GetGlobalNumRows() << endl; + } + + ParFiniteElementSpace *prec_fespace = + (a->StaticCondensationIsEnabled() ? a->SCParFESpace() : fespace); + HypreAMS ams(*A.As(), prec_fespace); + HyprePCG pcg(*A.As()); + pcg.SetTol(1e-12); + pcg.SetMaxIter(500); + pcg.SetPrintLevel(2); + pcg.SetPreconditioner(ams); + pcg.Mult(B, X); + } + + // 14. Recover the parallel grid function corresponding to X. This is the + // local finite element solution on each processor. + a->RecoverFEMSolution(X, *b, x); + + // 15. Compute and print the L^2 norm of the error. + { + double err = x.ComputeL2Error(E); + if (myid == 0) + { + cout << "\n|| E_h - E ||_{L^2} = " << err << '\n' << endl; + } + } + + // 16. Save the refined mesh and the solution in parallel. This output can + // be viewed later using GLVis: "glvis -np -m mesh -g sol". + { + ostringstream mesh_name, sol_name; + mesh_name << "mesh." << setfill('0') << setw(6) << myid; + sol_name << "sol." << setfill('0') << setw(6) << myid; + + ofstream mesh_ofs(mesh_name.str().c_str()); + mesh_ofs.precision(8); + pmesh->Print(mesh_ofs); + + ofstream sol_ofs(sol_name.str().c_str()); + sol_ofs.precision(8); + x.Save(sol_ofs); + } + + // 17. Send the solution by socket to a GLVis server. + if (visualization) + { + char vishost[] = "localhost"; + int visport = 19916; + socketstream sol_sock(vishost, visport); + sol_sock << "parallel " << num_procs << " " << myid << "\n"; + sol_sock.precision(8); + sol_sock << "solution\n" << *pmesh << x << flush; + } + + // 18. Free the used memory. + delete a; + delete sigma; + delete muinv; + delete b; + delete fespace; + delete fec; + delete pmesh; + + MPI_Finalize(); + + return 0; +} + + +void E_exact(const Vector &x, Vector &E) +{ + if (dim == 3) + { + E(0) = sin(kappa * x(1)); + E(1) = sin(kappa * x(2)); + E(2) = sin(kappa * x(0)); + } + else + { + E(0) = sin(kappa * x(1)); + E(1) = sin(kappa * x(0)); + if (x.Size() == 3) { E(2) = 0.0; } + } +} + +void f_exact(const Vector &x, Vector &f) +{ + if (dim == 3) + { + f(0) = (1. + kappa * kappa) * sin(kappa * x(1)); + f(1) = (1. + kappa * kappa) * sin(kappa * x(2)); + f(2) = (1. + kappa * kappa) * sin(kappa * x(0)); + } + else + { + f(0) = (1. + kappa * kappa) * sin(kappa * x(1)); + f(1) = (1. + kappa * kappa) * sin(kappa * x(0)); + if (x.Size() == 3) { f(2) = 0.0; } + } +} diff --git a/mfem/petsc/CMakeLists.txt b/mfem/petsc/CMakeLists.txt index ad5d0aa..5184b69 100644 --- a/mfem/petsc/CMakeLists.txt +++ b/mfem/petsc/CMakeLists.txt @@ -1,12 +1,20 @@ -cmake_minimum_required(VERSION 3.8) -project( obstacle - DESCRIPTION "Obstacle problem using MFEM + PETSC/Tao" - LANGUAGES CXX) +add_executable(obstacle obstacle.cpp) -set(CMAKE_C_COMPILER ${MPI_C_COMPILER}) -set(CMAKE_CXX_COMPILER ${MPI_CXX_COMPILER}) +target_link_libraries( + obstacle PRIVATE XSDK::MFEM XSDK::HYPRE XSDK::PETSc XSDK::SUNDIALS ZLIB::ZLIB MPI::MPI_CXX +) -add_executable(obstacle obstacle.cpp) -target_link_libraries(obstacle PRIVATE XSDK::MFEM XSDK::HYPRE XSDK::SUNDIALS PETSC::ALL ZLIB::ZLIB) +# This is a serial example linked with MPI, so launch it with 'mpirun' or +# equivalent to avoid warnings on some platforms. +xsdk_add_test( + NAME + MFEM-obstacle + MPI_NPROCS + 1 + COMMAND + $ + --order + 2 +) install(TARGETS obstacle RUNTIME DESTINATION bin) diff --git a/mfem/petsc/README.md b/mfem/petsc/README.md index 3aadb6b..cc73661 100644 --- a/mfem/petsc/README.md +++ b/mfem/petsc/README.md @@ -2,8 +2,8 @@ This example solves the classical obstacle problem which models an edge clamped elastic membrane pulled over a rigid obstacle. MFEM is used to discretize the underlying Poisson equation and PETSC-TAO is used the solve to optimization -problem. This proble also demonstrates the how MFEM and PETSc can share -the data from vectors of each type. +problem. This problem also demonstrates the how MFEM and PETSc can share the +data from vectors of each type. This example is built to run in serial, so launch it wth your desired options: ``` @@ -14,4 +14,4 @@ Useful non-default options: | Flag | Meaning | |:----------------------| :-----------------------------------------------------| | --order n | Set the polynomial order of the discretization. | -| --visit | Output VisIt files for visualation of the solution. | \ No newline at end of file +| --visit | Output VisIt files for visualation of the solution. | diff --git a/mfem/petsc/makefile b/mfem/petsc/makefile deleted file mode 100644 index 583aeb7..0000000 --- a/mfem/petsc/makefile +++ /dev/null @@ -1,58 +0,0 @@ -# Copyright (c) 2010, Lawrence Livermore National Security, LLC. Produced at the -# Lawrence Livermore National Laboratory. LLNL-CODE-443211. All Rights reserved. -# See file COPYRIGHT for details. -# -# This file is part of the MFEM library. For more information and source code -# availability see http://mfem.org. -# -# MFEM is free software; you can redistribute it and/or modify it under the -# terms of the GNU Lesser General Public License (as published by the Free -# Software Foundation) version 2.1 dated February 1999. - -# Use the MFEM build directory -MFEM_DIR ?= ../../.. -MFEM_BUILD_DIR ?= ../../.. -SRC = $(if $(MFEM_DIR:../../..=),$(MFEM_DIR)/examples/atpesc/tao,) -CONFIG_MK = $(MFEM_BUILD_DIR)/config/config.mk - -MFEM_LIB_FILE = mfem_is_not_built --include $(CONFIG_MK) - -SEQ_EXAMPLES = -PAR_EXAMPLES = obstacle -ifeq ($(MFEM_USE_MPI),NO) - EXAMPLES = $(SEQ_EXAMPLES) -else - EXAMPLES = $(PAR_EXAMPLES) $(SEQ_EXAMPLES) -endif - -.SUFFIXES: -.SUFFIXES: .o .cpp .mk -.PHONY: all clean clean-build clean-exec - -# Remove built-in rule -%: %.cpp - -# Replace the default implicit rule for *.cpp files -%: $(SRC)%.cpp $(MFEM_LIB_FILE) $(CONFIG_MK) - $(MFEM_CXX) $(MFEM_FLAGS) $< -o $@ $(MFEM_LIBS) - -all: $(EXAMPLES) - -ifeq ($(MFEM_USE_PETSC),NO) -$(EXAMPLES): - $(error MFEM is not configured with PETSC) -endif - -# Generate an error message if the MFEM library is not built and exit -$(MFEM_LIB_FILE): - $(error The MFEM library is not built) - -clean: clean-build clean-exec - -clean-build: - rm -f *.o *~ $(SEQ_EXAMPLES) $(PAR_EXAMPLES) - rm -rf *.dSYM *.TVD.*breakpoints - -clean-exec: - rm -rf obstacle_* *.core diff --git a/mfem/strumpack/CMakeLists.txt b/mfem/strumpack/CMakeLists.txt new file mode 100644 index 0000000..4931764 --- /dev/null +++ b/mfem/strumpack/CMakeLists.txt @@ -0,0 +1,53 @@ +add_executable(diffusion-eigen diffusion-eigen.cpp) + +target_link_libraries( + diffusion-eigen + PRIVATE XSDK::MFEM + XSDK::PETSc + XSDK::HYPRE + XSDK::SUPERLU + XSDK::SUNDIALS + STRUMPACK::strumpack + ZLIB::ZLIB +) + +xsdk_add_test( + NAME + MFEM-diffusion-eigen--strumpack + MPI_NPROCS + 4 + COMMAND + $ + -m + ../data/star.mesh + -ls + strumpack +) +if(ENABLE_SUPERLU) + xsdk_add_test( + NAME + MFEM-diffusion-eigen--superlu + MPI_NPROCS + 4 + COMMAND + $ + -m + ../data/star.mesh + -ls + superlu + ) +endif() +xsdk_add_test( + NAME + MFEM-diffusion-eigen--hypre-boomeramg + MPI_NPROCS + 4 + COMMAND + $ + -m + ../data/star.mesh + -ls + hypre +) + +install(TARGETS diffusion-eigen RUNTIME DESTINATION bin) diff --git a/mfem/strumpack/README.md b/mfem/strumpack/README.md new file mode 100644 index 0000000..a4dc8dc --- /dev/null +++ b/mfem/strumpack/README.md @@ -0,0 +1,29 @@ +# MFEM-STRUMPACK-SuperLU-HYPRE example + +This example code utilizes MFEM to discretize a diffusion eigenvalue problem +-Delta u = lambda u, with homogeneous Dirichlet boundary conditions. The problem +is discretized using continuous finite elements of arbitrary specified order on +any given mesh. The discretized eigenvalue problem is solved using HYPRE's +LOBPCG eigenvalue solver with linear system preconditioner/solver using +STRUMPACK, SuperLU, or HYPRE's BoomerAMG. + +This example is built to run in parallel, so launch it with mpirun and your +desired options, e.g. using STRUMPACK: +``` +mpirun -np 4 ./diffusion-eigen -m ../data/star.mesh -ls strumpack +``` +SuperLU: +``` +mpirun -np 4 ./diffusion-eigen -m ../data/star.mesh -ls superlu +``` +or HYPRE: +``` +mpirun -np 4 ./diffusion-eigen -m ../data/star.mesh -ls hypre +``` + +For a full list of options, see +``` +./diffusion-eigen -h +``` +Note that when using STRUMPACK (the default) command-line parameters are also +passed to STRUMPACK to support STRUMPACK-specifiic parameters. diff --git a/mfem/strumpack/diffusion-eigen.cpp b/mfem/strumpack/diffusion-eigen.cpp new file mode 100644 index 0000000..7c36589 --- /dev/null +++ b/mfem/strumpack/diffusion-eigen.cpp @@ -0,0 +1,360 @@ +// xSDK Example based on +// MFEM Example 11 - Parallel Version +// +// Sample runs: +// mpirun -np 4 ./diffusion-eigen -m ../data/star.mesh -ls strumpack +// mpirun -np 4 ./diffusion-eigen -m ../data/star.mesh -ls superlu +// mpirun -np 4 ./diffusion-eigen -m ../data/star.mesh -ls hypre +// +// Description: This xSDK examples demonstrates the integration of MFEM with +// STRUMPACK, as well as HYPRE and SuperLU. +// +// This example code demonstrates the use of MFEM to solve the +// eigenvalue problem -Delta u = lambda u with homogeneous +// Dirichlet boundary conditions. +// +// We compute a number of the lowest eigenmodes by discretizing +// the Laplacian and Mass operators using a FE space of the +// specified order, or an isoparametric/isogeometric space if +// order < 1 (quadratic for quadratic curvilinear mesh, NURBS for +// NURBS mesh, etc.) +// +// The example highlights the use of the LOBPCG eigenvalue solver +// together with the BoomerAMG preconditioner in HYPRE, as well as +// optionally the SuperLU or STRUMPACK parallel direct solvers. +// Reusing a single GLVis visualization window for multiple +// eigenfunctions is also illustrated. +// +// We recommend viewing Example 1 before viewing this example. + +#include "mfem.hpp" +#include +#include + +#ifndef MFEM_USE_STRUMPACK +#error This example requires MFEM built with MFEM_USE_STRUMPACK=YES. +#endif + +using namespace std; +using namespace mfem; + +int main(int argc, char *argv[]) +{ + // 1. Initialize MPI. + int num_procs, myid; + MPI_Init(&argc, &argv); + MPI_Comm_size(MPI_COMM_WORLD, &num_procs); + MPI_Comm_rank(MPI_COMM_WORLD, &myid); + + // 2. Parse command-line options. + const char *mesh_file = "../data/star.mesh"; + int ser_ref_levels = 2; + int par_ref_levels = 1; + int order = 1; + int nev = 5; + int seed = 75; + const char *linear_solver = "strumpack"; + bool visualization = false; + + OptionsParser args(argc, argv); + args.AddOption(&mesh_file, "-m", "--mesh", + "Mesh file to use."); + args.AddOption(&ser_ref_levels, "-rs", "--refine-serial", + "Number of times to refine the mesh uniformly in serial."); + args.AddOption(&par_ref_levels, "-rp", "--refine-parallel", + "Number of times to refine the mesh uniformly in parallel."); + args.AddOption(&order, "-o", "--order", + "Finite element order (polynomial degree) or -1 for" + " isoparametric space."); + args.AddOption(&nev, "-n", "--num-eigs", + "Number of desired eigenmodes."); + args.AddOption(&seed, "-s", "--seed", + "Random seed used to initialize LOBPCG."); + args.AddOption(&linear_solver, "-ls", "--linear-solver", + "Linear solver to use: " + "'strumpack' (default), 'hypre', or 'superlu'."); + args.AddOption(&visualization, "-vis", "--visualization", "-no-vis", + "--no-visualization", + "Enable or disable GLVis visualization."); + args.Parse(); + const string lin_solver(linear_solver); + if (lin_solver != "strumpack" && lin_solver != "superlu" && + lin_solver != "hypre") + { + if (myid == 0) + { + cout << "Unknown linear solver: " << lin_solver << endl; + } + MPI_Finalize(); + return 2; + } +#ifndef MFEM_USE_SUPERLU + if (lin_solver == "superlu") + { + if (myid == 0) + { + cout << "Linear solver \"superlu\" requires MFEM_USE_SUPERLU=YES." + << endl; + } + MPI_Finalize(); + return 3; + } +#endif + // The command line options are also passed to the STRUMPACK + // solver. So do not exit if some options are not recognized. + if (lin_solver != "strumpack" || args.Help()) + { + if (!args.Good()) + { + if (myid == 0) + { + args.PrintUsage(cout); + } + MPI_Finalize(); + return 1; + } + } + if (myid == 0) + { + args.PrintOptions(cout); + } + + // 3. Read the (serial) mesh from the given mesh file on all processors. We + // can handle triangular, quadrilateral, tetrahedral, hexahedral, surface + // and volume meshes with the same code. + Mesh *mesh = new Mesh(mesh_file, 1, 1); + int dim = mesh->Dimension(); + + // 4. Refine the serial mesh on all processors to increase the resolution. In + // this example we do 'ref_levels' of uniform refinement (2 by default, or + // specified on the command line with -rs). + for (int lev = 0; lev < ser_ref_levels; lev++) + { + mesh->UniformRefinement(); + } + + // 5. Define a parallel mesh by a partitioning of the serial mesh. Refine + // this mesh further in parallel to increase the resolution (1 time by + // default, or specified on the command line with -rp). Once the parallel + // mesh is defined, the serial mesh can be deleted. + ParMesh *pmesh = new ParMesh(MPI_COMM_WORLD, *mesh); + delete mesh; + for (int lev = 0; lev < par_ref_levels; lev++) + { + pmesh->UniformRefinement(); + } + + // 6. Define a parallel finite element space on the parallel mesh. Here we + // use continuous Lagrange finite elements of the specified order. If + // order < 1, we instead use an isoparametric/isogeometric space. + FiniteElementCollection *fec; + if (order > 0) + { + fec = new H1_FECollection(order, dim); + } + else if (pmesh->GetNodes()) + { + fec = pmesh->GetNodes()->OwnFEC(); + } + else + { + fec = new H1_FECollection(order = 1, dim); + } + ParFiniteElementSpace *fespace = new ParFiniteElementSpace(pmesh, fec); + HYPRE_BigInt size = fespace->GlobalTrueVSize(); + if (myid == 0) + { + cout << "Number of unknowns: " << size << endl; + } + + // 7. Set up the parallel bilinear forms a(.,.) and m(.,.) on the finite + // element space. The first corresponds to the Laplacian operator -Delta, + // while the second is a simple mass matrix needed on the right hand side + // of the generalized eigenvalue problem below. The boundary conditions + // are implemented by elimination with special values on the diagonal to + // shift the Dirichlet eigenvalues out of the computational range. After + // serial and parallel assembly we extract the corresponding parallel + // matrices A and M. + ConstantCoefficient one(1.0); + Array ess_bdr; + if (pmesh->bdr_attributes.Size()) + { + ess_bdr.SetSize(pmesh->bdr_attributes.Max()); + ess_bdr = 1; + } + + ParBilinearForm *a = new ParBilinearForm(fespace); + a->AddDomainIntegrator(new DiffusionIntegrator(one)); + if (pmesh->bdr_attributes.Size() == 0) + { + // Add a mass term if the mesh has no boundary, e.g. periodic mesh or + // closed surface. + a->AddDomainIntegrator(new MassIntegrator(one)); + } + a->Assemble(); + a->EliminateEssentialBCDiag(ess_bdr, 1.0); + a->Finalize(); + + ParBilinearForm *m = new ParBilinearForm(fespace); + m->AddDomainIntegrator(new MassIntegrator(one)); + m->Assemble(); + // shift the eigenvalue corresponding to eliminated dofs to a large value + m->EliminateEssentialBCDiag(ess_bdr, numeric_limits::min()); + m->Finalize(); + + HypreParMatrix *A = a->ParallelAssemble(); + HypreParMatrix *M = m->ParallelAssemble(); + + Operator * Arow = NULL; + A->HostRead(); + if (lin_solver == "strumpack") + { + Arow = new STRUMPACKRowLocMatrix(*A); + } +#ifdef MFEM_USE_SUPERLU + if (lin_solver == "superlu") + { + Arow = new SuperLURowLocMatrix(*A); + } +#endif + A->HypreRead(); + + delete a; + delete m; + + // 8. Define and configure the LOBPCG eigensolver and the BoomerAMG + // preconditioner for A to be used within the solver. Set the matrices + // which define the generalized eigenproblem A x = lambda M x. + Solver * precond = NULL; + if (lin_solver == "strumpack") + { + STRUMPACKSolver * strumpack = new STRUMPACKSolver(argc, argv, MPI_COMM_WORLD); + strumpack->SetPrintFactorStatistics(true); + strumpack->SetPrintSolveStatistics(false); + strumpack->SetKrylovSolver(strumpack::KrylovSolver::DIRECT); + strumpack->SetReorderingStrategy(strumpack::ReorderingStrategy::METIS); + strumpack->DisableMatching(); + strumpack->SetOperator(*Arow); + strumpack->SetFromCommandLine(); + precond = strumpack; + } + else if (lin_solver != "superlu") + { + HypreBoomerAMG * amg = new HypreBoomerAMG(*A); + amg->SetPrintLevel(0); + precond = amg; + } + else // lin_solver == "superlu" + { +#ifdef MFEM_USE_SUPERLU + SuperLUSolver * superlu = new SuperLUSolver(MPI_COMM_WORLD); + superlu->SetPrintStatistics(false); + superlu->SetSymmetricPattern(true); + superlu->SetColumnPermutation(superlu::PARMETIS); + superlu->SetOperator(*Arow); + precond = superlu; +#endif + } + + HypreLOBPCG * lobpcg = new HypreLOBPCG(MPI_COMM_WORLD); + lobpcg->SetNumModes(nev); + lobpcg->SetRandomSeed(seed); + lobpcg->SetPreconditioner(*precond); + lobpcg->SetMaxIter(200); + lobpcg->SetTol(1e-8); + lobpcg->SetPrecondUsageMode(1); + lobpcg->SetPrintLevel(1); + lobpcg->SetMassMatrix(*M); + lobpcg->SetOperator(*A); + + // 9. Compute the eigenmodes and extract the array of eigenvalues. Define a + // parallel grid function to represent each of the eigenmodes returned by + // the solver. + Array eigenvalues; + lobpcg->Solve(); + lobpcg->GetEigenvalues(eigenvalues); + ParGridFunction x(fespace); + + // 10. Save the refined mesh and the modes in parallel. This output can be + // viewed later using GLVis: "glvis -np -m mesh -g mode". + { + ostringstream mesh_name, mode_name; + mesh_name << "mesh." << setfill('0') << setw(6) << myid; + + ofstream mesh_ofs(mesh_name.str().c_str()); + mesh_ofs.precision(8); + pmesh->Print(mesh_ofs); + + for (int i=0; iGetEigenvector(i); + + mode_name << "mode_" << setfill('0') << setw(2) << i << "." + << setfill('0') << setw(6) << myid; + + ofstream mode_ofs(mode_name.str().c_str()); + mode_ofs.precision(8); + x.Save(mode_ofs); + mode_name.str(""); + } + } + + // 11. Send the solution by socket to a GLVis server. + if (visualization) + { + char vishost[] = "localhost"; + int visport = 19916; + socketstream mode_sock(vishost, visport); + mode_sock.precision(8); + + for (int i=0; i " << flush; + cin >> c; + } + MPI_Bcast(&c, 1, MPI_CHAR, 0, MPI_COMM_WORLD); + + if (c != 'c') + { + break; + } + } + mode_sock.close(); + } + + // 12. Free the used memory. + delete lobpcg; + delete precond; + delete M; + delete A; + delete Arow; + + delete fespace; + if (order > 0) + { + delete fec; + } + delete pmesh; + + MPI_Finalize(); + + return 0; +} diff --git a/mfem/sundials/CMakeLists.txt b/mfem/sundials/CMakeLists.txt index a94bfda..ba764bf 100644 --- a/mfem/sundials/CMakeLists.txt +++ b/mfem/sundials/CMakeLists.txt @@ -1,15 +1,66 @@ -cmake_minimum_required(VERSION 3.12) -project(transient-heat - DESCRIPTION "Transient Heat Conduction using MFEM + SUNDIALS + HYPRE" - LANGUAGES C CXX) - -set(CMAKE_C_COMPILER ${MPI_C_COMPILER}) -set(CMAKE_CXX_COMPILER ${MPI_CXX_COMPILER}) - add_executable(transient-heat transient-heat.cpp) -target_link_libraries(transient-heat PRIVATE XSDK::MFEM XSDK::SUNDIALS XSDK::HYPRE PETSC::ALL ZLIB::ZLIB) +target_link_libraries( + transient-heat PRIVATE XSDK::SUNDIALS XSDK::PETSc XSDK::HYPRE XSDK::MFEM ZLIB::ZLIB + MPI::MPI_CXX +) +if(NOT ENABLE_CUDA) + # This example fails (as of 2022/05/16) when HYPRE is built with CUDA, so + # it is disabled in that case for now. + xsdk_add_test( + NAME + MFEM-transient-heat + MPI_NPROCS + 4 + COMMAND + $ + --mesh + ../data/star.mesh + --kappa + 0.5 + --alpha + 0.01 + --ode-solver + 8 + ) +endif() +install(TARGETS transient-heat RUNTIME DESTINATION bin) add_executable(advection advection.cpp) -target_link_libraries(advection PRIVATE XSDK::MFEM XSDK::SUNDIALS XSDK::HYPRE PETSC::ALL ZLIB::ZLIB) - -install(TARGETS transient-heat RUNTIME DESTINATION bin) +target_link_libraries( + advection PRIVATE XSDK::PETSc XSDK::SUNDIALS XSDK::HYPRE XSDK::MFEM ZLIB::ZLIB MPI::MPI_CXX +) +if(NOT ENABLE_CUDA) + # This example fails (as of 2022/05/16) when CUDA is enabled, so we disable + # it here for now. + xsdk_add_test( + NAME + MFEM-advection--cpu + MPI_NPROCS + 4 + COMMAND + $ + --device + cpu + --partial-assembly + --ode-solver + 8 + --no-visualization + ) +endif() +if(ENABLE_CUDA) + xsdk_add_test( + NAME + MFEM-advection--gpu + MPI_NPROCS + 4 + COMMAND + $ + --device + cuda + --partial-assembly + --ode-solver + 8 + --no-visualization + ) +endif() +install(TARGETS advection RUNTIME DESTINATION bin) diff --git a/mfem/sundials/README.md b/mfem/sundials/README.md index 4447940..a83a779 100644 --- a/mfem/sundials/README.md +++ b/mfem/sundials/README.md @@ -43,7 +43,8 @@ demonstrates MFEM integration with the SUNDIALS CVODE and ARKODE solvers for CUD This example is built to run in parallel, so launch it with mpirun and your desired options: ``` -mpirun -np 4 ./advection --device cuda --partial-assembly --ode-solver 9 +mpirun -np 4 ./advection --device cuda --partial-assembly --ode-solver 8 +mpirun -np 4 ./advection --device cpu --partial-assembly --ode-solver 8 ``` Useful non-default options: diff --git a/mfem/sundials/advection.cpp b/mfem/sundials/advection.cpp index 79b498a..50e27ee 100644 --- a/mfem/sundials/advection.cpp +++ b/mfem/sundials/advection.cpp @@ -1,28 +1,20 @@ +// xSDK Example is based on // MFEM Example 9 - Parallel Version // SUNDIALS Modification // -// Compile with: make ex9p -// // Sample runs: -// mpirun -np 4 ex9p -m ../../data/periodic-segment.mesh -p 1 -rp 1 -s 7 -dt 0.0025 -// mpirun -np 4 ex9p -m ../../data/periodic-square.mesh -p 1 -rp 1 -s 8 -dt 0.0025 -tf 9 -// mpirun -np 4 ex9p -m ../../data/periodic-hexagon.mesh -p 0 -rp 1 -s 7 -dt 0.0009 -vs 25 -// mpirun -np 4 ex9p -m ../../data/periodic-hexagon.mesh -p 0 -rp 1 -s 9 -dt 0.005 -vs 15 -// mpirun -np 4 ex9p -m ../../data/amr-quad.mesh -p 1 -rp 1 -s 9 -dt 0.001 -tf 9 -// mpirun -np 4 ex9p -m ../../data/star-q3.mesh -p 1 -rp 1 -s 9 -dt 0.0025 -tf 9 -// mpirun -np 4 ex9p -m ../../data/disc-nurbs.mesh -p 1 -rp 2 -s 7 -dt 0.0025 -tf 9 -// mpirun -np 4 ex9p -m ../../data/periodic-cube.mesh -p 0 -rp 1 -s 8 -dt 0.01 -tf 8 -o 2 +// mpirun -np 4 ./advection -m ../data/periodic-hexagon.mesh -p 0 -rp 1 -s 7 -dt 0.0009 -vs 25 +// mpirun -np 4 ./advection -m ../data/periodic-hexagon.mesh -p 0 -rp 1 -s 9 -dt 0.005 -vs 15 // // Device sample runs: -// mpirun -np 4 ex9p -pa -// mpirun -np 4 ex9p -ea -// mpirun -np 4 ex9p -fa -// mpirun -np 4 ex9p -pa -m ../../data/periodic-cube.mesh -// mpirun -np 4 ex9p -pa -m ../../data/periodic-cube.mesh -d cuda -// mpirun -np 4 ex9p -ea -m ../../data/periodic-cube.mesh -d cuda -// mpirun -np 4 ex9p -fa -m ../../data/periodic-cube.mesh -d cuda +// mpirun -np 4 ./advection --device cuda -pa -s 8 +// mpirun -np 4 ./advection --device cuda -ea -s 8 +// mpirun -np 4 ./advection --device cuda -fa -s 8 +// +// Description: This xSDK example demonstrates the integration of MFEM with +// SUNDIALS. // -// Description: This example code solves the time-dependent advection equation +// This example code solves the time-dependent advection equation // du/dt + v.grad(u) = 0, where v is a given fluid velocity, and // u0(x)=u(0,x) is a given initial condition. // @@ -160,7 +152,7 @@ int main(int argc, char *argv[]) // 2. Parse command-line options. problem = 0; - const char *mesh_file = "../../data/periodic-hexagon.mesh"; + const char *mesh_file = "../data/periodic-hexagon.mesh"; int ser_ref_levels = 2; int par_ref_levels = 0; int order = 3; diff --git a/mfem/sundials/makefile b/mfem/sundials/makefile deleted file mode 100644 index e86ccf7..0000000 --- a/mfem/sundials/makefile +++ /dev/null @@ -1,95 +0,0 @@ -# Copyright (c) 2010-2020, Lawrence Livermore National Security, LLC. Produced -# at the Lawrence Livermore National Laboratory. All Rights reserved. See files -# LICENSE and NOTICE for details. LLNL-CODE-806117. -# -# This file is part of the MFEM library. For more information and source code -# availability visit https://mfem.org. -# -# MFEM is free software; you can redistribute it and/or modify it under the -# terms of the BSD-3 license. We welcome feedback and contributions, see file -# CONTRIBUTING.md for details. - -# Use the MFEM build directory -MFEM_DIR ?= ../.. -MFEM_BUILD_DIR ?= ../.. -SRC = $(if $(MFEM_DIR:../..=),$(MFEM_DIR)/examples/sundials/,) -CONFIG_MK = $(MFEM_BUILD_DIR)/config/config.mk -# Use the MFEM install directory -# MFEM_INSTALL_DIR = ../../mfem -# CONFIG_MK = $(MFEM_INSTALL_DIR)/share/mfem/config.mk - -MFEM_LIB_FILE = mfem_is_not_built --include $(CONFIG_MK) - -SEQ_EXAMPLES = ex9 ex10 ex16 -PAR_EXAMPLES = ex9p ex10p ex16p -ifeq ($(MFEM_USE_MPI),NO) - EXAMPLES = $(SEQ_EXAMPLES) -else - EXAMPLES = $(PAR_EXAMPLES) $(SEQ_EXAMPLES) -endif - -.SUFFIXES: -.SUFFIXES: .o .cpp .mk -.PHONY: all clean clean-build clean-exec - -# Remove built-in rule -%: %.cpp - -# Replace the default implicit rule for *.cpp files -%: $(SRC)%.cpp $(MFEM_LIB_FILE) $(CONFIG_MK) - $(MFEM_CXX) $(MFEM_FLAGS) $< -o $@ $(MFEM_LIBS) - -all: $(EXAMPLES) - -ifeq ($(MFEM_USE_SUNDIALS),NO) -$(EXAMPLES): - $(error MFEM is not configured with SUNDIALS) -endif - -MFEM_TESTS = EXAMPLES -include $(MFEM_TEST_MK) - -# Testing: Parallel vs. serial runs -RUN_MPI = $(MFEM_MPIEXEC) $(MFEM_MPIEXEC_NP) $(MFEM_MPI_NP) -SERIAL_NAME := Serial SUNDIALS example -PARALLEL_NAME := Parallel SUNDIALS example -%-test-par: % - @$(call mfem-test,$<, $(RUN_MPI), $(PARALLEL_NAME)) -%-test-seq: % - @$(call mfem-test,$<,, $(SERIAL_NAME)) - -# Testing: Specific execution options: -# Example 9: test CVODE with CV_ADAMS (non-stiff implicit) time stepping -EX9_COMMON_ARGS := -m ../../data/periodic-hexagon.mesh -p 0 -s 7 -EX9_ARGS := $(EX9_COMMON_ARGS) -r 2 -dt 0.0018 -vs 25 -EX9P_ARGS := $(EX9_COMMON_ARGS) -rp 1 -dt 0.0009 -vs 50 -ex9-test-seq: ex9 - @$(call mfem-test,$<,, $(SERIAL_NAME),$(EX9_ARGS)) -ex9p-test-par: ex9p - @$(call mfem-test,$<, $(RUN_MPI), $(PARALLEL_NAME),$(EX9P_ARGS)) -# Example 10: test CVODE with CV_BDF (stiff implicit) time stepping -EX10_COMMON_ARGS := -m ../../data/beam-quad.mesh -o 2 -s 5 -dt 0.15 -tf 6 -vs 10 -EX10_ARGS := $(EX10_COMMON_ARGS) -r 2 -EX10P_ARGS := $(EX10_COMMON_ARGS) -rp 1 -ex10-test-seq: ex10 - @$(call mfem-test,$<,, $(SERIAL_NAME),$(EX10_ARGS)) -ex10p-test-par: ex10p - @$(call mfem-test,$<, $(RUN_MPI), $(PARALLEL_NAME),$(EX10P_ARGS)) - -# Testing: "test" target and mfem-test* variables are defined in config/test.mk - -# Generate an error message if the MFEM library is not built and exit -$(MFEM_LIB_FILE): - $(error The MFEM library is not built) - -clean: clean-build clean-exec - -clean-build: - rm -f *.o *~ $(SEQ_EXAMPLES) $(PAR_EXAMPLES) - rm -rf *.dSYM *.TVD.*breakpoints - -clean-exec: - @rm -f ex9.mesh ex9-mesh.* ex9-init.* ex9-final.* Example9* - @rm -f deformed.* velocity.* elastic_energy.* - @rm -f ex16.mesh ex16-mesh.* ex16-init.* ex16-final.* Example16* diff --git a/mfem/sundials/transient-heat.cpp b/mfem/sundials/transient-heat.cpp index 8ee8c12..f0ffe8c 100644 --- a/mfem/sundials/transient-heat.cpp +++ b/mfem/sundials/transient-heat.cpp @@ -1,21 +1,13 @@ +// xSDK Example based on // MFEM Example 16 - Parallel Version -// SUNDIALS Modification -// -// Compile with: make ex16p +// SUNDIALS Modification // // Sample runs: -// mpirun -np 4 ex16p -// mpirun -np 4 ex16p -m ../../data/inline-tri.mesh -// mpirun -np 4 ex16p -m ../../data/disc-nurbs.mesh -tf 2 -// mpirun -np 4 ex16p -s 12 -a 0.0 -k 1.0 -// mpirun -np 4 ex16p -s 8 -a 1.0 -k 0.0 -dt 4e-6 -tf 2e-2 -vs 50 -// mpirun -np 8 ex16p -s 9 -a 0.5 -k 0.5 -o 4 -dt 8e-6 -tf 2e-2 -vs 50 -// mpirun -np 4 ex16p -s 10 -dt 2.0e-4 -tf 4.0e-2 -// mpirun -np 16 ex16p -m ../../data/fichera-q2.mesh -// mpirun -np 16 ex16p -m ../../data/escher-p2.mesh -// mpirun -np 8 ex16p -m ../../data/beam-tet.mesh -tf 10 -dt 0.1 -// mpirun -np 4 ex16p -m ../../data/amr-quad.mesh -o 4 -rs 0 -rp 0 -// mpirun -np 4 ex16p -m ../../data/amr-hex.mesh -o 2 -rs 0 -rp 0 +// mpirun -np 4 ./transient-heat +// mpirun -np 4 ./transient-heat -s 12 -a 0.0 -k 1.0 +// mpirun -np 4 ./transient-heat -s 8 -a 1.0 -k 0.0 -dt 4e-6 -tf 2e-2 -vs 50 +// mpirun -np 8 ./transient-heat -s 9 -a 0.5 -k 0.5 -o 4 -dt 8e-6 -tf 2e-2 -vs 50 +// mpirun -np 4 ./transient-heat -s 10 -dt 2.0e-4 -tf 4.0e-2 // // Description: This example solves a time dependent nonlinear heat equation // problem of the form du/dt = C(u), with a non-linear diffusion @@ -179,7 +171,7 @@ int main(int argc, char *argv[]) args.PrintOptions(cout); } - // check for vaild ODE solver option + // check for valid ODE solver option if (ode_solver_type < 1 || ode_solver_type > 12) { if (myid == 0) diff --git a/petsc/CMakeLists.txt b/petsc/CMakeLists.txt index e90e484..ce3d828 100644 --- a/petsc/CMakeLists.txt +++ b/petsc/CMakeLists.txt @@ -1,14 +1,101 @@ -cmake_minimum_required(VERSION 3.0) -project(petsc-example DESCRIPTION "PETSc Example" LANGUAGES C) +add_executable(ex19 ex19.c) +target_link_libraries(ex19 PRIVATE XSDK::PETSc MPI::MPI_C) +if (TARGET OpenMP::OpenMP_C) + target_link_libraries(ex19 PRIVATE OpenMP::OpenMP_C) +endif() -set(CMAKE_C_COMPILER ${MPI_C_COMPILER}) +# The makefile to run examples requires rm command +find_program(RM rm) -add_executable(ex19 ex19.c) -target_link_libraries(ex19 PRIVATE PETSC::ALL) -target_include_directories(ex19 PRIVATE ${PETSC_DIR}/include) +# We use the makefile to launch the tests +configure_file(makefile ${CMAKE_CURRENT_BINARY_DIR}/makefile-run) +file(COPY output DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/output) if(MATH_LIBRARY) target_link_libraries(ex19 PRIVATE ${MATH_LIBRARY}) endif() +xsdk_add_test( + NAME + PETSc-ex19_1 + COMMAND + make + -f + makefile-run + runex19 + ENVIRONMENT + PETSC_DIR=${PETSc_DIR} +) +if(ENABLE_CUDA) + xsdk_add_test( + NAME + PETSc-ex19_cuda + COMMAND + make + -f + makefile-run + runex19_cuda + ENVIRONMENT + PETSC_DIR=${PETSc_DIR} + ) +endif() +if(ENABLE_HYPRE) + if(ENABLE_CUDA) + xsdk_add_test( + NAME + PETSc-ex19_hypre_cuda + COMMAND + make + -f + makefile-run + runex19_hypre_cuda + ENVIRONMENT + PETSC_DIR=${PETSc_DIR} + ) + else() + xsdk_add_test( + NAME + PETSc-ex19_hypre + COMMAND + make + -f + makefile-run + runex19_hypre + ENVIRONMENT + PETSC_DIR=${PETSc_DIR} + ) + endif() +endif() +if(ENABLE_SUPERLU) + xsdk_add_test( + NAME + PETSc-ex19_superlu_dist + COMMAND + make + -f + makefile-run + runex19_superlu_dist + ENVIRONMENT + PETSC_DIR=${PETSc_DIR} + ) +endif() +if(ENABLE_TRILINOS) + xsdk_add_test( + NAME + PETSc-ex19_ml + MPI_NPROC + 2 + COMMAND + make + -f + makefile-run + runex19_ml + ENVIRONMENT + PETSC_DIR=${PETSc_DIR} + ) +endif() + +install(FILES output/ex19_1.testout output/ex19_cuda_1.out output/ex19_hypre.out + output/ex19_ml.out output/ex19_superlu.out DESTINATION output +) install(TARGETS ex19 RUNTIME DESTINATION bin) diff --git a/petsc/README.md b/petsc/README.md index 49668b3..db53a11 100644 --- a/petsc/README.md +++ b/petsc/README.md @@ -18,11 +18,11 @@ mpirun -np 2 ./ex19 -da_grid_x 20 -da_grid_y 20 -pc_type lu -pc_factor_mat_solve ``` To run with ML from Trilinos: ``` -mpirun -np 2 ./ex19 -da_refine 3 -snes_monitor_short -pc_type hypre +mpirun -np 2 ./ex19 -da_refine 3 -snes_monitor_short -pc_type ml ``` Useful non-default options: | Flag | Meaning | |:----------------------| :-----------------------------------------------------| | -pctype [type] | Set the pc solver type for example: hypre, ml, lu. | -| -da_refine n | Set the number of times to refine the mesh. | \ No newline at end of file +| -da_refine n | Set the number of times to refine the mesh. | diff --git a/petsc/ex19.c b/petsc/ex19.c index 3f37c7c..faa8799 100644 --- a/petsc/ex19.c +++ b/petsc/ex19.c @@ -10,7 +10,7 @@ The flow can be driven with the lid or with bouyancy or both:\n\ /* in HTML, '<' = '<' and '>' = '>' */ /* - See src/ksp/ksp/examples/tutorials/ex45.c + See src/ksp/ksp/tutorials/ex45.c */ /*T @@ -20,7 +20,6 @@ The flow can be driven with the lid or with bouyancy or both:\n\ Processors: n T*/ - /*F----------------------------------------------------------------------- We thank David E. Keyes for contributing the driven cavity discretization within this example code. @@ -151,7 +150,6 @@ int main(int argc,char **argv) ierr = SNESSetFromOptions(snes);CHKERRQ(ierr); ierr = PetscPrintf(comm,"lid velocity = %g, prandtl # = %g, grashof # = %g\n",(double)user.lidvelocity,(double)user.prandtl,(double)user.grashof);CHKERRQ(ierr); - /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Solve the nonlinear system - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ @@ -262,7 +260,6 @@ PetscErrorCode FormFunctionLocal(DMDALocalInfo *info,Field **x,Field **f,void *p Note: FD formulae below are normalized by multiplying through by local volume element (i.e. hx*hy) to obtain coefficients O(1) in two dimensions. - */ dhx = (PetscReal)(info->mx-1); dhy = (PetscReal)(info->my-1); hx = 1.0/dhx; hy = 1.0/dhy; @@ -371,7 +368,7 @@ PetscErrorCode FormFunctionLocal(DMDALocalInfo *info,Field **x,Field **f,void *p } /* - Performs sweeps of point block nonlinear Gauss-Seidel on all the local grid points + Performs sweeps of point block nonlinear Gauss-Seidel on all the local grid points */ PetscErrorCode NonlinearGS(SNES snes, Vec X, Vec B, void *ctx) { @@ -648,7 +645,6 @@ PetscErrorCode NonlinearGS(SNES snes, Vec X, Vec B, void *ctx) PetscFunctionReturn(0); } - /*TEST test: @@ -700,7 +696,7 @@ PetscErrorCode NonlinearGS(SNES snes, Vec X, Vec B, void *ctx) test: suffix: 18 - args: -ksp_monitor_snes_lg -ksp_pc_side right + args: -snes_monitor_ksp draw::draw_lg -ksp_pc_side right requires: x !single test: @@ -889,7 +885,7 @@ PetscErrorCode NonlinearGS(SNES snes, Vec X, Vec B, void *ctx) test: suffix: fieldsplit_hypre nsize: 2 - requires: hypre mumps !complex + requires: hypre mumps !complex !defined(PETSC_HAVE_HYPRE_DEVICE) args: -pc_type fieldsplit -pc_fieldsplit_block_size 4 -pc_fieldsplit_type SCHUR -pc_fieldsplit_0_fields 0,1,2 -pc_fieldsplit_1_fields 3 -fieldsplit_0_pc_type lu -fieldsplit_0_pc_factor_mat_solver_type mumps -fieldsplit_1_pc_type hypre -fieldsplit_1_pc_hypre_type boomeramg -snes_monitor_short -ksp_monitor_short test: @@ -909,9 +905,10 @@ PetscErrorCode NonlinearGS(SNES snes, Vec X, Vec B, void *ctx) test: suffix: hypre nsize: 2 - requires: hypre !complex - args: -da_refine 3 -snes_monitor_short -pc_type hypre + requires: hypre !complex !defined(PETSC_HAVE_HYPRE_DEVICE) + args: -da_refine 3 -snes_monitor_short -pc_type hypre -ksp_norm_type unpreconditioned + # ibcgs is broken when using device vectors test: suffix: ibcgs nsize: 2 @@ -933,7 +930,7 @@ PetscErrorCode NonlinearGS(SNES snes, Vec X, Vec B, void *ctx) test: suffix: klu_2 requires: suitesparse - args: -da_grid_x 20 -da_grid_y 20 -pc_type lu -pc_factor_mat_solver_type klu -mat_klu_ordering PETSC + args: -da_grid_x 20 -da_grid_y 20 -pc_type lu -pc_factor_mat_solver_type klu -pc_factor_mat_ordering_type nd output_file: output/ex19_superlu.out test: @@ -1034,7 +1031,7 @@ PetscErrorCode NonlinearGS(SNES snes, Vec X, Vec B, void *ctx) test: suffix: umfpack requires: suitesparse - args: -da_refine 2 -pc_type lu -pc_factor_mat_solver_type umfpack -snes_view -snes_monitor_short -ksp_monitor_short + args: -da_refine 2 -pc_type lu -pc_factor_mat_solver_type umfpack -snes_view -snes_monitor_short -ksp_monitor_short -pc_factor_mat_ordering_type external test: suffix: tut_1 @@ -1052,7 +1049,7 @@ PetscErrorCode NonlinearGS(SNES snes, Vec X, Vec B, void *ctx) test: suffix: tut_3 nsize: 4 - requires: hypre !single !complex + requires: hypre !single !complex !defined(PETSC_HAVE_HYPRE_DEVICE) args: -da_refine 5 -snes_monitor -ksp_monitor -snes_view -pc_type hypre test: @@ -1099,7 +1096,6 @@ PetscErrorCode NonlinearGS(SNES snes, Vec X, Vec B, void *ctx) requires: cuda args: -snes_monitor -dm_mat_type seqaijcusparse -dm_vec_type seqcuda -pc_type gamg -ksp_monitor -mg_levels_ksp_max_it 3 - test: suffix: cuda_2 nsize: 3 @@ -1109,13 +1105,13 @@ PetscErrorCode NonlinearGS(SNES snes, Vec X, Vec B, void *ctx) test: suffix: seqbaijmkl nsize: 1 - requires: define(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) + requires: defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) args: -dm_mat_type baij -snes_monitor -ksp_monitor -snes_view test: suffix: mpibaijmkl nsize: 2 - requires: define(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) + requires: defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) args: -dm_mat_type baij -snes_monitor -ksp_monitor -snes_view test: @@ -1126,8 +1122,34 @@ PetscErrorCode NonlinearGS(SNES snes, Vec X, Vec B, void *ctx) test: suffix: logviewmemory - requires: define(PETSC_USE_LOG) !define(PETSC_HAVE_VALGRIND) + requires: defined(PETSC_USE_LOG) !defined(PETSCTEST_VALGRIND) args: -log_view -log_view_memory -da_refine 4 filter: grep MatFDColorSetUp | wc -w | xargs -I % sh -c "expr % \> 21" + test: + suffix: fs + args: -pc_type fieldsplit -da_refine 3 -all_ksp_monitor -fieldsplit_y_velocity_pc_type lu -fieldsplit_temperature_pc_type lu -fieldsplit_x_velocity_pc_type lu -snes_view + + test: + suffix: asm_matconvert + args: -mat_type aij -pc_type asm -pc_asm_sub_mat_type dense -snes_view + + test: + suffix: euclid + nsize: 2 + requires: hypre !single !complex !defined(PETSC_HAVE_HYPRE_MIXEDINT) !defined(PETSC_HAVE_HYPRE_DEVICE) + args: -da_refine 2 -ksp_monitor -snes_monitor -snes_view -pc_type hypre -pc_hypre_type euclid + + test: + suffix: euclid_bj + nsize: 2 + requires: hypre !single !complex !defined(PETSC_HAVE_HYPRE_MIXEDINT) !defined(PETSC_HAVE_HYPRE_DEVICE) + args: -da_refine 2 -ksp_monitor -snes_monitor -snes_view -pc_type hypre -pc_hypre_type euclid -pc_hypre_euclid_bj + + test: + suffix: euclid_droptolerance + nsize: 1 + requires: hypre !single !complex !defined(PETSC_HAVE_HYPRE_MIXEDINT) !defined(PETSC_HAVE_HYPRE_DEVICE) + args: -da_refine 2 -ksp_monitor -snes_monitor -snes_view -pc_type hypre -pc_hypre_type euclid -pc_hypre_euclid_droptolerance .1 + TEST*/ diff --git a/petsc/makefile b/petsc/makefile index 302ea6c..b458522 100644 --- a/petsc/makefile +++ b/petsc/makefile @@ -1,30 +1,48 @@ -CFLAGS = +CFLAGS = FFLAGS = -CPPFLAGS = -FPPFLAGS = +CPPFLAGS = +FPPFLAGS = -include ${PETSC_DIR}/lib/petsc/conf/variables -include ${PETSC_DIR}/lib/petsc/conf/rules +include @PETSc_DIR@/lib/petsc/conf/variables +include @PETSc_DIR@/lib/petsc/conf/rules #------------------------------------------------------------------------- +runex19: + -@@MPIEXEC_EXECUTABLE@ @MPIEXEC_NUMPROC_FLAG@ 2 ./ex19 -da_refine 3 -pc_type mg -ksp_type fgmres > ex19_1.tmp 2>&1; \ + if (${DIFF} output/ex19_1.testout ex19_1.tmp) then \ + echo "C/C++ example src/snes/tutorials/ex19 run successfully with 2 MPI processes"; \ + else printf "${PWD}\nPossible problem with ex19, diffs above\n=========================================\n"; fi; \ + @RM@ -f ex19_1.tmp +runex19_cuda: + -@@MPIEXEC_EXECUTABLE@ @MPIEXEC_NUMPROC_FLAG@ 2 ./ex19 -snes_monitor -dm_mat_type seqaijcusparse -dm_vec_type seqcuda -pc_type gamg -ksp_monitor -mg_levels_ksp_max_it 3 > ex19_1.tmp 2>&1; \ + if (${DIFF} output/ex19_cuda_1.out ex19_1.tmp) then \ + echo "C/C++ example src/snes/tutorials/ex19 run successfully with cuda"; \ + else printf "${PWD}\nPossible problem with ex19 running with cuda, diffs above\n=========================================\n"; fi; \ + @RM@ -f ex19_1.tmp runex19_hypre: - -@${MPIEXEC} -n 2 ./ex19 -da_refine 3 -snes_monitor_short -pc_type hypre > ex19_1.tmp 2>&1; \ + -@@MPIEXEC_EXECUTABLE@ @MPIEXEC_NUMPROC_FLAG@ 2 ./ex19 -da_refine 3 -snes_monitor_short -pc_type hypre > ex19_1.tmp 2>&1; \ if (${DIFF} output/ex19_hypre.out ex19_1.tmp) then \ echo "C/C++ example src/snes/examples/tutorials/ex19 run successfully with hypre"; \ else printf "${PWD}\nPossible problem with ex19 running with hypre, diffs above\n=========================================\n"; fi; \ - ${RM} -f ex19_1.tmp + @RM@ -f ex19_1.tmp +runex19_hypre_cuda: + -@@MPIEXEC_EXECUTABLE@ @MPIEXEC_NUMPROC_FLAG@ 2 ./ex19 -dm_vec_type cuda -dm_mat_type aijcusparse -da_refine 3 -snes_monitor_short -ksp_norm_type unpreconditioned -pc_type hypre > ex19_1.tmp 2>&1; \ + if (${DIFF} output/ex19_hypre.out ex19_1.tmp) then \ + echo "C/C++ example src/snes/tutorials/ex19 run successfully with hypre/cuda"; \ + else printf "${PWD}\nPossible problem with ex19 running with hypre, diffs above\n=========================================\n"; fi; \ + @RM@ -f ex19_1.tmp runex19_ml: - -@${MPIEXEC} -n 2 ./ex19 -da_refine 3 -snes_monitor_short -pc_type ml > ex19_1.tmp 2>&1; \ + -@@MPIEXEC_EXECUTABLE@ @MPIEXEC_NUMPROC_FLAG@ 2 ./ex19 -da_refine 3 -snes_monitor_short -pc_type ml > ex19_1.tmp 2>&1; \ if (${DIFF} output/ex19_ml.out ex19_1.tmp) then \ echo "C/C++ example src/snes/examples/tutorials/ex19 run successfully with ml"; \ else printf "${PWD}\nPossible problem with ex19 running with ml, diffs above\n=========================================\n"; fi; \ - ${RM} -f ex19_1.tmp + @RM@ -f ex19_1.tmp runex19_superlu_dist: - -@${MPIEXEC} -n 1 ./ex19 -da_grid_x 20 -da_grid_y 20 -pc_type lu -pc_factor_mat_solver_type superlu_dist > ex19.tmp 2>&1; \ + -@@MPIEXEC_EXECUTABLE@ @MPIEXEC_NUMPROC_FLAG@ 2 ./ex19 -da_grid_x 20 -da_grid_y 20 -pc_type lu -pc_factor_mat_solver_type superlu_dist > ex19.tmp 2>&1; \ if (${DIFF} output/ex19_superlu.out ex19.tmp) then \ echo "C/C++ example src/snes/examples/tutorials/ex19 run successfully with superlu_dist"; \ else printf "${PWD}\nPossible problem with ex19 running with superlu_dist, diffs above\n=========================================\n"; fi; \ - ${RM} -f ex19.tmp + @RM@ -f ex19.tmp -include ${PETSC_DIR}/lib/petsc/conf/test +include @PETSc_DIR@/lib/petsc/conf/test diff --git a/petsc/output/ex19_1.testout b/petsc/output/ex19_1.testout new file mode 100644 index 0000000..be22b30 --- /dev/null +++ b/petsc/output/ex19_1.testout @@ -0,0 +1,2 @@ +lid velocity = 0.0016, prandtl # = 1., grashof # = 1. +Number of SNES iterations = 2 diff --git a/petsc/output/ex19_cuda_1.out b/petsc/output/ex19_cuda_1.out new file mode 100644 index 0000000..7de53a1 --- /dev/null +++ b/petsc/output/ex19_cuda_1.out @@ -0,0 +1,15 @@ +lid velocity = 0.0625, prandtl # = 1., grashof # = 1. + 0 SNES Function norm 2.391552133017e-01 + 0 KSP Residual norm 2.325621076120e-01 + 1 KSP Residual norm 1.654206318674e-02 + 2 KSP Residual norm 7.202836119880e-04 + 3 KSP Residual norm 1.796861424199e-05 + 4 KSP Residual norm 2.461332992052e-07 + 1 SNES Function norm 6.826585648929e-05 + 0 KSP Residual norm 2.347339172985e-05 + 1 KSP Residual norm 8.356798075993e-07 + 2 KSP Residual norm 1.844045309619e-08 + 3 KSP Residual norm 5.336386977405e-10 + 4 KSP Residual norm 2.662608472862e-11 + 2 SNES Function norm 6.549682264799e-11 +Number of SNES iterations = 2 diff --git a/plasma/CMakeLists.txt b/plasma/CMakeLists.txt new file mode 100644 index 0000000..9e76f9d --- /dev/null +++ b/plasma/CMakeLists.txt @@ -0,0 +1,19 @@ +add_executable(ex1solve ex1solve.cpp) +if(NOT ENABLE_CUDA) + target_compile_definitions(ex1solve PRIVATE SLATE_NO_CUDA) +endif() +target_compile_definitions(ex1solve PRIVATE SLATE_NO_HIP) + +target_link_libraries( + ex1solve PRIVATE XSDK::PLASMA XSDK::SLATE XSDK::LAPACKPP XSDK::BLASPP MPI::MPI_CXX +) +if(MATH_LIBRARY) + target_link_libraries(ex1solve PRIVATE ${MATH_LIBRARY}) +endif() +if(ENABLE_CUDA) + target_link_libraries(ex1solve PRIVATE CUDA::cudart) +endif() + +xsdk_add_test(NAME PLASMA-ex1solve COMMAND $ -n 1000) + +install(TARGETS ex1solve RUNTIME DESTINATION bin) diff --git a/plasma/README.md b/plasma/README.md new file mode 100644 index 0000000..7ff8a44 --- /dev/null +++ b/plasma/README.md @@ -0,0 +1,11 @@ +# PLASMA examples +Example codes demonstrating the use of PLASMA using other XSDK packages. + +## Using SLATE and its compute layers +`ex1solve` solves a system of linear equations by using Level 3 BLAS from +SLATE's BLAS++ instead of PLASMA's internal interface. The example accepts an +optional command line parameter to specify linear system size like so: +``` +./ex1solve [--n=1000] +``` +If the size is not specified then 1000 is used. diff --git a/plasma/ex1solve.cpp b/plasma/ex1solve.cpp new file mode 100644 index 0000000..2911f83 --- /dev/null +++ b/plasma/ex1solve.cpp @@ -0,0 +1,118 @@ +/* ex1solve.cpp */ + +#include + +#include +#include + +#include +#include +#include +#include +#include + +#include +#include + +static double +wtime() { +struct timeval tv; + gettimeofday(&tv, NULL); + return tv.tv_sec + 1e-6 * tv.tv_usec; +} + + +static inline blas::Op +t_p2bpp(plasma_enum_t trans) { + return PlasmaNoTrans == trans ? blas::Op::NoTrans : (PlasmaTrans == trans ? blas::Op::Trans : blas::Op::ConjTrans); +} + +static inline blas::Side +s_p2bpp(plasma_enum_t side) { + return PlasmaLeft == side ? blas::Side::Left : blas::Side::Right; +} + +static inline blas::Uplo +u_p2bpp(plasma_enum_t uplo) { + return PlasmaUpper == uplo ? blas::Uplo::Upper : blas::Uplo::Lower; +} + +static inline blas::Diag +d_p2bpp(plasma_enum_t diag) { + return PlasmaUnit == diag ? blas::Diag::Unit : blas::Diag::NonUnit; +} + +static std::atomic Counter_Gemm; + +extern "C" void +plasma_core_dgemm(plasma_enum_t transa, plasma_enum_t transb, int m, int n, int k, double alpha, const double *A, int lda, const double *B, int ldb, double beta, double *C, int ldc) { + ++Counter_Gemm; + blas::gemm(blas::Layout::ColMajor, t_p2bpp(transa), t_p2bpp(transb), m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); +} + +static std::atomic Counter_Trsm; + +extern "C" void +plasma_core_dtrsm(plasma_enum_t side, plasma_enum_t uplo, plasma_enum_t trans, plasma_enum_t diag, int m, int n, double alpha, const double *A, int lda, double *B, int ldb) { + ++Counter_Trsm; + blas::trsm(blas::Layout::ColMajor, s_p2bpp(side), u_p2bpp(uplo), t_p2bpp(trans), d_p2bpp(diag), m, n, alpha, A, lda, B, ldb); +} + +void +drndset(int m, int n, double *A, int A_ld, int seed) { + double rcp = 1.0-1.0/RAND_MAX; + + srand(seed); + + for (int j = 0; j < n; ++j) + for (int i = 0; i < m; ++i) + A[i + j * A_ld] = rand() * rcp; +} + +int +main(int argc, char *argv[]) { + int n, nrhs, nb, ib, A_ld, B_ld, *piv, info; + double *A, *B; + + n = 1000; + nrhs = 1; + + for (int i = 1; i < argc && argv[i]; ++i) + if (strncmp("--n=", argv[i], 2+1+1) != 0 || sscanf(argv[i]+2+1+1, "%d", &n) <= 0 || n < 1) + n = 1000; + + A_ld = n; + B_ld = n; + + A = (double *)malloc( sizeof *A * A_ld * n ); + B = (double *)malloc( sizeof *B * B_ld * nrhs ); + piv = (int *)malloc( sizeof *piv * n ); + + drndset(n, n, A, A_ld, 1313); + drndset(n, nrhs, B, B_ld, 1313); + + plasma_init(); + + plasma_get(PlasmaNb, &nb); + plasma_get(PlasmaIb, &ib); + + Counter_Gemm = 0; + Counter_Trsm = 0; + double t = -wtime(); + info = plasma_dgesv(n, nrhs, A, A_ld, piv, B, B_ld); + t += wtime(); + int cntgemm = Counter_Gemm; + int cnttrsm = Counter_Trsm; + + plasma_finalize(); + + free(piv); + free(B); + free(A); + + printf("n=%d nrhs=%d t=%g gflop/s=%g", n, nrhs, t, lapack::Gflop::gesv(n, nrhs) / t); + printf(" nb=%d ib=%d gemm=%d trsm=%d", nb, ib, cnttrsm, cntgemm); + printf("\n"); + + return 0; +} diff --git a/strumpack/CMakeLists.txt b/strumpack/CMakeLists.txt new file mode 100644 index 0000000..20744d0 --- /dev/null +++ b/strumpack/CMakeLists.txt @@ -0,0 +1,24 @@ +add_executable(sparse sparse.cpp) + +target_link_libraries(sparse PRIVATE STRUMPACK::strumpack) +if(MATH_LIBRARY) + target_link_libraries(sparse PRIVATE ${MATH_LIBRARY}) +endif() + +xsdk_add_test( + NAME + STRUMPACK-sparse + MPI_NPROCS + 4 + COMMAND + $ + 24 + --sp_compression + hodlr + --hodlr_butterfly_levels + 10 + ENVIRONMENT + OMP_NUM_THREADS=4 +) + +install(TARGETS sparse RUNTIME DESTINATION bin) diff --git a/strumpack/README.md b/strumpack/README.md new file mode 100644 index 0000000..8241666 --- /dev/null +++ b/strumpack/README.md @@ -0,0 +1,101 @@ + +# STRUMPACK examples + +Example codes demonstrating the use of [STRUMPACK](https://portal.nersc.gov/project/sparse/strumpack/) using other XSDK packages ([ButterflyPACK](https://github.com/liuyangzhuan/ButterflyPACK)). + +## STRUMPACK + ButterflyPACK + +The `sparse.cpp` example demonstrates STRUMPACK's algebraic sparse +direct solver and preconditioners for solving the 3-D Laplacian +problem with zero boundary conditions on an n x n x n grid. The +number of unknowns is N=n^3. The standard 7-point stencil is used, +and we solve for the interior nodes only. + +STRUMPACK implements multifrontal sparse LU factorization, with the +option of compression of the larger frontal matrices. Compression +algorithms include HODLR (Hierarchically Off-Diagonal Low Rank) and +HODBF (Hierarchically Off-Diagonal Butterfly), BLR (Block Low Rank), +HSS (Hierarchically Semi Separable), lossy or lossless. + +After factorization, linear system can be solved using forward and +backward substitution with the lower and upper triangular factors +respectively. Without compression, the solver behaves as a sparse +direct method. The sparse direct solver still uses iterative +refinement, but typically only needs a single iteration. When +compression is enabled, the LU factorization is only approximate, and +the solver is used as a preconditioner for GMRES (or BiCGStab). + + +### Usage + +**Sample run**: `OMP_NUM_THREADS=4 mpirun -np 4 ./sparse 100 --sp_compression hodlr --hodlr_butterfly_levels 10` + + - This run solves a system corresponding to a discretization + of the Laplace equation -Delta u = f with zero boundary + conditions on a 100 x 100 x 100 grid, using STRUMPACK's + distributed LU factorization, and with both HODBF compression + for the largest fronts (dense sub-blocks in the sparse factors). + - The output of the example is various information regarding the + solver and solver performance. + +Options for the compression algorithm include `none` (direct solver), +`blr`, `lossy`/`lossless` (needs the ZFP library), `hodlr` (needs +ButterflyPACK), `blr_hodlr` (needs ButterflyPACK). `blr_hodlr` +combines both BLR (on medium sized blocks) and HODLR (on the largest +blocks). + +The thresholds for using the compression schemes can be set using +`--sp_compression_min_sep_size 1000` or, for specific formats +``--sp_blr_min_sep_size 1000``. The compression tolerance can be +tuned using `--blr_rel_tol 1e-6` or `--hodlr_rel_tol 1e-6` + + +To see the full list of command-line options, run: `./sparse --help` + +For more information on how to tune the preconditioners, see +[here](https://portal.nersc.gov/project/sparse/strumpack/master/prec.html). + +## License +STRUMPACK -- STRUctured Matrix PACKage, Copyright (c) 2014-2022, The +Regents of the University of California, through Lawrence Berkeley +National Laboratory (subject to receipt of any required approvals from +the U.S. Dept. of Energy). All rights reserved. + +If you have questions about your rights to use or distribute this +software, please contact Berkeley Lab's Technology Transfer Department +at TTD@lbl.gov. + +NOTICE. This software is owned by the U.S. Department of Energy. As +such, the U.S. Government has been granted for itself and others +acting on its behalf a paid-up, nonexclusive, irrevocable, worldwide +license in the Software to reproduce, prepare derivative works, and +perform publicly and display publicly. Beginning five (5) years after +the date permission to assert copyright is obtained from the +U.S. Department of Energy, and subject to any subsequent five (5) year +renewals, the U.S. Government is granted for itself and others acting +on its behalf a paid-up, nonexclusive, irrevocable, worldwide license +in the Software to reproduce, prepare derivative works, distribute +copies to the public, perform publicly and display publicly, and to +permit others to do so diff --git a/strumpack/sparse.cpp b/strumpack/sparse.cpp new file mode 100644 index 0000000..b86df2e --- /dev/null +++ b/strumpack/sparse.cpp @@ -0,0 +1,158 @@ +/* + * STRUMPACK -- STRUctured Matrix PACKage, Copyright (c) 2014-2022, + * The Regents of the University of California, through Lawrence + * Berkeley National Laboratory (subject to receipt of any required + * approvals from the U.S. Dept. of Energy). All rights reserved. + * + * If you have questions about your rights to use or distribute this + * software, please contact Berkeley Lab's Technology Transfer + * Department at TTD@lbl.gov. + * + * NOTICE. This software is owned by the U.S. Department of Energy. As + * such, the U.S. Government has been granted for itself and others + * acting on its behalf a paid-up, nonexclusive, irrevocable, + * worldwide license in the Software to reproduce, prepare derivative + * works, and perform publicly and display publicly. Beginning five + * (5) years after the date permission to assert copyright is obtained + * from the U.S. Department of Energy, and subject to any subsequent + * five (5) year renewals, the U.S. Government is granted for itself + * and others acting on its behalf a paid-up, nonexclusive, + * irrevocable, worldwide license in the Software to reproduce, + * prepare derivative works, distribute copies to the public, perform + * publicly and display publicly, and to permit others to do so. + * + * Developers: Pieter Ghysels, and others. + * (Lawrence Berkeley National Lab, Computational Research + * Division). + * + */ +#include +#include "StrumpackSparseSolverMPIDist.hpp" +#include "sparse/CSRMatrix.hpp" +#include "misc/TaskTimer.hpp" + +typedef double scalar; +// typedef int64_t integer; // to use 64 bit integers +typedef int integer; + +using namespace strumpack; + +int main(int argc, char* argv[]) { + int thread_level, myrank; + MPI_Init_thread(&argc, &argv, MPI_THREAD_FUNNELED, &thread_level); + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + if (thread_level != MPI_THREAD_FUNNELED && myrank == 0) + std::cout << "MPI implementation does not support MPI_THREAD_FUNNELED" + << std::endl; + { + int n = 30, nrhs = 1; + if (argc > 1) n = atoi(argv[1]); // get grid size + else std::cout << "# please provide grid size" << std::endl; + // get number of right-hand sides + if (argc > 2) nrhs = std::max(1, atoi(argv[2])); + if (!myrank) + std::cout << "solving 3D " << n << "^3 Poisson problem" + << " with " << nrhs << " right hand sides" << std::endl; + + // Create the main solver object, using an MPI communicator. + StrumpackSparseSolverMPIDist spss(MPI_COMM_WORLD); + + // The matching phase finds a column permutation that maximizes + // the diagonal elements. Since the 3D Poisson problem is already + // diagonally dominant, we can disable this matching. + spss.options().set_matching(MatchingJob::NONE); + + // A fill reducing ordering ordering is a symmtric permutation of + // the matrix that minimizes the fill in the sparse triangular + // factors. Since the problem here is defined on a regular mesh, + // we can use a simple geometric nested dissection algorithm (see + // also below spss.reorder(n, n, n) where the mesh dimensions are + // specified). For general sparse matrices, use any other option, + // such as the default ReorderingStrategy::METIS, or the parallel + // ReorderingStrategy::PARMETIS (if STRUMPACK was configured with + // PARMETIS support). + spss.options().set_reordering_method(ReorderingStrategy::GEOMETRIC); + spss.options().set_from_command_line(argc, argv); + + // construct a sparse matrix from a simple 7 point stencil, with + // zero boundary conditions + CSRMatrix A; + if (!myrank) { + int n2 = n * n; + int N = n * n2; + int nnz = 7 * N - 6 * n2; + A = CSRMatrix(N, nnz); + integer* col_ptr = A.ptr(); + integer* row_ind = A.ind(); + scalar* val = A.val(); + + nnz = 0; + col_ptr[0] = 0; + for (integer xdim=0; xdim 0) { val[nnz] = -1.0; row_ind[nnz++] = ind-1; } // left + if (zdim < n-1){ val[nnz] = -1.0; row_ind[nnz++] = ind+1; } // right + if (ydim > 0) { val[nnz] = -1.0; row_ind[nnz++] = ind-n; } // front + if (ydim < n-1){ val[nnz] = -1.0; row_ind[nnz++] = ind+n; } // back + if (xdim > 0) { val[nnz] = -1.0; row_ind[nnz++] = ind-n2; } // up + if (xdim < n-1){ val[nnz] = -1.0; row_ind[nnz++] = ind+n2; } // down + col_ptr[ind+1] = nnz; + } + A.set_symm_sparse(); + } + // This scatters the sparse matrix A from the root over all the + // ranks, using a 1D block row distribution, see + // https://portal.nersc.gov/project/sparse/strumpack/master/sparse_example_usage.html#autotoc_md9 + CSRMatrixMPI Adist(&A, MPI_COMM_WORLD, true); + // delete sequential sparse matrix (on the root) + A = CSRMatrix(); + + auto n_local = Adist.local_rows(); + DenseMatrix b(n_local, nrhs), x(n_local, nrhs), + x_exact(n_local, nrhs); + + // construct a random exact solution + x_exact.random(); + + // compute a right hand-side corresponding to exact solution + // x_exact + Adist.spmv(x_exact, b); + + + // One can also directly pass the CSR rowptr, colind, and value + // arrays, see + // https://portal.nersc.gov/project/sparse/strumpack/master/classstrumpack_1_1SparseSolver.html + spss.set_matrix(Adist); + + // For geometric nested dissection, the the mesh dimensions n x n + // x n (and separator width if not 1) need to be provided. For + // other fill-reducing orderings, such as the default + // ReorderingStrategy::GEOMETRIC, just call spss.reorder(); + spss.reorder(n, n, n); + + // the actual numerical factorization phase. If reorder() was not + // already called, it will be called by factor internally. + spss.factor(); + + // solve a linear system Ax=b for x. If factor was not already + // called, then it will be called by solve internally. + spss.solve(b, x); + + auto scaled_res = Adist.max_scaled_residual(x, b); + x.scaled_add(-1., x_exact); + auto relerr = x.normF() / x_exact.normF(); + if (!myrank) { + std::cout << "# COMPONENTWISE SCALED RESIDUAL = " + << scaled_res << std::endl; + std::cout << "# relative error = ||x-x_exact||_F/||x_exact||_F = " + << relerr << std::endl; + } + } + scalapack::Cblacs_exit(1); + MPI_Finalize(); + return 0; +} diff --git a/sundials/CMakeLists.txt b/sundials/CMakeLists.txt index 763d4dd..387548a 100644 --- a/sundials/CMakeLists.txt +++ b/sundials/CMakeLists.txt @@ -1,48 +1,59 @@ -# SUNDIALS Copyright Start -# Copyright (c) 2002-2019, Lawrence Livermore National Security -# and Southern Methodist University. -# All rights reserved. +# SUNDIALS Copyright Start Copyright (c) 2002-2019, Lawrence Livermore National Security and +# Southern Methodist University. All rights reserved. # # See the top-level LICENSE and NOTICE files for details. # -# SPDX-License-Identifier: BSD-3-Clause -# SUNDIALS Copyright End +# SPDX-License-Identifier: BSD-3-Clause SUNDIALS Copyright End # --------------------------------------------------------------- -cmake_minimum_required(VERSION 3.12) - -# set the project name and default languages -project(sundials-xsdk-examples CXX C) - -set(CMAKE_C_COMPILER ${MPI_C_COMPILER}) -set(CMAKE_CXX_COMPILER ${MPI_CXX_COMPILER}) -set(CMAKE_CUDA_HOST_COMPILER ${MPI_CXX_COMPILER}) - # add the executables if(ENABLE_PETSC) - add_executable(cv_petsc_ex7 cv_petsc_ex7.c) - target_link_libraries(cv_petsc_ex7 XSDK::SUNDIALS PETSC::ALL) - if(MATH_LIBRARY) - target_link_libraries(cv_petsc_ex7 ${MATH_LIBRARY}) - endif() - install(TARGETS cv_petsc_ex7 RUNTIME DESTINATION bin) + add_executable(cv_petsc_ex7 cv_petsc_ex7.c) + target_link_libraries( + cv_petsc_ex7 PRIVATE XSDK::SUNDIALS XSDK::PETSc MPI::MPI_C + ) + if(MATH_LIBRARY) + target_link_libraries(cv_petsc_ex7 PRIVATE ${MATH_LIBRARY}) + endif() + if (TARGET OpenMP::OpenMP_C) + target_link_libraries(cv_petsc_ex7 PRIVATE OpenMP::OpenMP_C) + endif() + xsdk_add_test(NAME SUNDIALS-cv_petsc_ex7_1 COMMAND $ MPI_NPROCS 4) + xsdk_add_test( + NAME + SUNDIALS-cv_petsc_ex7_2 + COMMAND + $ + --snes_type + anderson + MPI_NPROCS + 4 + ) + install(TARGETS cv_petsc_ex7 RUNTIME DESTINATION bin) endif() if(ENABLE_SUPERLU) - add_executable(ark_brusselator1D_FEM_sludist ark_brusselator1D_FEM_sludist.cpp) - target_link_libraries(ark_brusselator1D_FEM_sludist PRIVATE XSDK::SUNDIALS XSDK::SUPERLU) - if(MATH_LIBRARY) - target_link_libraries(ark_brusselator1D_FEM_sludist PRIVATE ${MATH_LIBRARY}) - endif() - install(TARGETS ark_brusselator1D_FEM_sludist RUNTIME DESTINATION bin) + set(tgt ark_brusselator1D_FEM_sludist) + add_executable(${tgt} ark_brusselator1D_FEM_sludist.cpp) + target_link_libraries(${tgt} PRIVATE XSDK::SUNDIALS XSDK::SUPERLU MPI::MPI_C) + if(MATH_LIBRARY) + target_link_libraries(ark_brusselator1D_FEM_sludist PRIVATE ${MATH_LIBRARY}) + endif() + xsdk_add_test(NAME SUNDIALS-${tgt} COMMAND $ MPI_NPROCS 1) + install(TARGETS ${tgt} RUNTIME DESTINATION bin) endif() if(ENABLE_MAGMA) - set_source_files_properties(cvRoberts_blockdiag_magma.cpp PROPERTIES LANGUAGE CUDA) - add_executable(cvRoberts_blockdiag_magma cvRoberts_blockdiag_magma.cpp) - target_link_libraries(cvRoberts_blockdiag_magma PRIVATE XSDK::SUNDIALS XSDK::MAGMA) - target_compile_definitions(cvRoberts_blockdiag_magma PRIVATE SUNDIALS_MAGMA_BACKENDS_CUDA) - if(MATH_LIBRARY) - target_link_libraries(cvRoberts_blockdiag_magma PRIVATE ${MATH_LIBRARY}) - endif() - install(TARGETS cvRoberts_blockdiag_magma RUNTIME DESTINATION bin) + if(ENABLE_CUDA) + set_source_files_properties(cvRoberts_blockdiag_magma.cpp PROPERTIES LANGUAGE CUDA) + elseif(ENABLE_HIP) + set_source_files_properties(cvRoberts_blockdiag_magma.cpp PROPERTIES LANGUAGE HIP) + endif() + add_executable(cvRoberts_blockdiag_magma cvRoberts_blockdiag_magma.cpp) + target_link_libraries(cvRoberts_blockdiag_magma PRIVATE XSDK::SUNDIALS XSDK::MAGMA) + if(MATH_LIBRARY) + target_link_libraries(cvRoberts_blockdiag_magma PRIVATE ${MATH_LIBRARY}) + endif() + xsdk_add_test( + NAME SUNDIALS-cvRoberts_blockdiag_magma COMMAND $ + ) + install(TARGETS cvRoberts_blockdiag_magma RUNTIME DESTINATION bin) endif() - diff --git a/tools/package.py b/tools/package.py deleted file mode 100644 index 73e333a..0000000 --- a/tools/package.py +++ /dev/null @@ -1,63 +0,0 @@ -# Copyright 2013-2021 Lawrence Livermore National Security, LLC and other -# Spack Project Developers. See the top-level COPYRIGHT file for details. -# -# SPDX-License-Identifier: (Apache-2.0 OR MIT) - - -from spack import * - -class XsdkExamples(CMakePackage): - """xSDK Examples show usage of libraries in the xSDK package.""" - - homepage = 'http://xsdk.info' - url = 'https://github.com/xsdk-project/xsdk-examples/archive/v0.1.0.tar.gz' - git = "https://github.com/xsdk-project/xsdk-examples" - - maintainers = ['acfisher', 'balay', 'balos1', 'luszczek'] - - version('develop', branch='master') - version('0.2.0', sha256='cf26e3a16a83eba6fb297fb106b0934046f17cf978f96243b44d9d17ad186db6') - version('0.1.0', sha256='d24cab1db7c0872b6474d69e598df9c8e25d254d09c425fb0a6a8d6469b8018f') - - variant('cuda', default=False, description='Compile CUDA examples') - - depends_on('xsdk+cuda ^mfem+cuda', when='+cuda') - depends_on('xsdk@0.6.0', when='@0.2.0') - depends_on('xsdk@0.5.0', when='@0.1.0') - - def cmake_args(self): - spec = self.spec - args = [ - '-DCMAKE_C_COMPILER=%s' % spec['mpi'].mpicc, - '-DMETIS_INCLUDE_DIRS=%s' % spec['metis'].prefix.include, - '-DMETIS_LIBRARY=%s' % spec['metis'].libs, - '-DMPI_DIR=%s' % spec['mpi'].prefix, - '-DSUNDIALS_DIR=%s' % spec['sundials'].prefix, - '-DHYPRE_DIR=%s' % spec['hypre'].prefix, - '-DHYPRE_INCLUDE_DIR=%s' % spec['hypre'].prefix.include, - '-DPETSC_DIR=%s' % spec['petsc'].prefix, - '-DPETSC_INCLUDE_DIR=%s' % spec['petsc'].prefix.include, - '-DPETSC_LIBRARY_DIR=%s' % spec['petsc'].prefix.lib, - '-DSUPERLUDIST_DIR=%s' % spec['superlu-dist'].prefix, - '-DSUPERLUDIST_INCLUDE_DIR=%s' % - spec['superlu-dist'].prefix.include, - '-DSUPERLUDIST_LIBRARY_DIR=%s' % spec['superlu-dist'].prefix.lib, - '-DSUPERLUDIST_LIBRARY=%s' % spec['superlu-dist'].libs, - '-DMFEM_DIR=%s' % spec['mfem'].prefix, - '-DMFEM_INCLUDE_DIR=%s' % spec['mfem'].prefix.include, - '-DMFEM_LIBRARY_DIR=%s' % spec['mfem'].prefix.include.lib, - '-DGINKGO_DIR=%s' % spec['ginkgo'].prefix, - '-DGINKGO_INCLUDE_DIR=%s' % spec['ginkgo'].prefix.include, - '-DGINKGO_LIBRARY_DIR=%s' % spec['ginkgo'].prefix.include.lib, - # allow use of default `find_package(Ginkgo)` - '-DCMAKE_PREFIX_PATH=%s/cmake' % spec['ginkgo'].prefix.include.lib - ] - if 'trilinos' in spec: # if trilinos variant was activated for xsdk - args.extend([ - '-DTRILINOS_DIR_PATH=%s' % spec['trilinos'].prefix, - ]) - if 'zlib' in spec: # if zlib variant was activated for MFEM - args.extend([ - '-DZLIB_LIBRARY_DIR=%s' % spec['zlib'].prefix.lib, - ]) - return args diff --git a/trilinos/CMakeLists.txt b/trilinos/CMakeLists.txt index 36ba973..57de7d7 100644 --- a/trilinos/CMakeLists.txt +++ b/trilinos/CMakeLists.txt @@ -1,7 +1,9 @@ cmake_minimum_required(VERSION 3.12) -project( amesos-superlu - DESCRIPTION "Trilinos Examples" - LANGUAGES CXX) +project( + amesos-superlu + DESCRIPTION "Trilinos Examples" + LANGUAGES CXX +) set(CMAKE_CXX_COMPILER ${MPI_CXX_COMPILER}) @@ -9,35 +11,65 @@ set(TRILINOS_INCLUDE_DIRS ${TRILINOS_DIR}/include) set(TRILINOS_LIBRARIES ${TRILINOS_DIR}/lib) find_package(OpenMP) -if (OPENMP_FOUND) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}" ) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}" ) -endif() -add_executable(SimpleSolve_WithParameters SimpleSolve_WithParameters.cpp) -target_compile_options(SimpleSolve_WithParameters PRIVATE ${OPENMP_C_FLAGS}) -target_link_libraries(SimpleSolve_WithParameters PRIVATE XSDK::SUPERLU METIS) +find_library( + AMESOS2_LIB + NAMES amesos2 + PATHS ${TRILINOS_LIBRARIES} +) +find_library( + TPETRA_LIB + NAMES tpetra + PATHS ${TRILINOS_LIBRARIES} +) +find_library( + TEUCHOSCOMM_LIB + NAMES teuchoscomm + PATHS ${TRILINOS_LIBRARIES} +) +find_library( + TEUCHOSCORE_LIB + NAMES teuchoscore + PATHS ${TRILINOS_LIBRARIES} +) +find_library( + TEUCHOSLIST_LIB + NAMES teuchosparameterlist + PATHS ${TRILINOS_LIBRARIES} +) +find_library( + KOKKOSCORE_LIB + NAMES kokkoscore + PATHS ${TRILINOS_LIBRARIES} +) -set(CMAKE_PREFIX_PATH ${TRILINOS_LIBRARIES}) -find_library(AMESOS2_LIB NAMES amesos2 PATHS ${TRILINOS_LIBRARIES}) -find_library(TPETRA_LIB NAMES tpetra PATHS ${TRILINOS_LIBRARIES}) -find_library(TEUCHOSCOMM_LIB NAMES teuchoscomm PATHS ${TRILINOS_LIBRARIES}) -find_library(TEUCHOSCORE_LIB NAMES teuchoscore PATHS ${TRILINOS_LIBRARIES}) -find_library(TEUCHOSLIST_LIB NAMES teuchosparameterlist PATHS ${TRILINOS_LIBRARIES}) -find_library(KOKKOSCORE_LIB NAMES kokkoscore PATHS ${TRILINOS_LIBRARIES}) +if(ENABLE_SUPERLU) + add_executable(SimpleSolve_WithParameters SimpleSolve_WithParameters.cpp) + target_link_libraries(SimpleSolve_WithParameters PRIVATE XSDK::SUPERLU METIS) + if (TARGET OpenMP::OpenMP_CXX) + target_link_libraries(SimpleSolve_WithParameters PRIVATE OpenMP::OpenMP_CXX) + endif() -target_link_libraries(SimpleSolve_WithParameters PRIVATE - ${AMESOS2_LIB} ${TPETRA_LIB} ${TEUCHOSCOMM_LIB} ${TEUCHOSCORE_LIB} ${TEUCHOSLIST_LIB} ${KOKKOSCORE_LIB}) + target_link_libraries( + SimpleSolve_WithParameters PRIVATE ${AMESOS2_LIB} ${TPETRA_LIB} ${TEUCHOSCOMM_LIB} + ${TEUCHOSCORE_LIB} ${TEUCHOSLIST_LIB} ${KOKKOSCORE_LIB} + ) -target_include_directories(SimpleSolve_WithParameters PRIVATE ${TRILINOS_INCLUDE_DIRS}) + target_include_directories(SimpleSolve_WithParameters PRIVATE ${TRILINOS_INCLUDE_DIRS}) -if(MATH_LIBRARY) - target_link_libraries(SimpleSolve_WithParameters PRIVATE ${MATH_LIBRARY}) -endif() + if(MATH_LIBRARY) + target_link_libraries(SimpleSolve_WithParameters PRIVATE ${MATH_LIBRARY}) + endif() -find_library(DYNLNK_LIBRARY NAMES dl) -if(DYNLNK_LIBRARY) - target_link_libraries(SimpleSolve_WithParameters PRIVATE ${DYNLNK_LIBRARY}) -endif() + find_library(DYNLNK_LIBRARY NAMES dl) + if(DYNLNK_LIBRARY) + target_link_libraries(SimpleSolve_WithParameters PRIVATE ${DYNLNK_LIBRARY}) + endif() -install(TARGETS SimpleSolve_WithParameters RUNTIME DESTINATION bin) + xsdk_add_test( + NAME TRILINOS-SimpleSolve_WithParameters MPI_NPROCS 1 COMMAND + $ + ) + + install(TARGETS SimpleSolve_WithParameters RUNTIME DESTINATION bin) +endif()