diff --git a/Advanced_Tutorial/00-Kernel-Fusion/00-Kernel-Fusion.cpp b/Advanced_Tutorial/00-Kernel-Fusion/00-Kernel-Fusion.cpp new file mode 100644 index 0000000..f3a0a2d --- /dev/null +++ b/Advanced_Tutorial/00-Kernel-Fusion/00-Kernel-Fusion.cpp @@ -0,0 +1,10 @@ +#include + +int main(int argc, char *argv[]) +{ + + std::cout<<"TO DO"< + +int main(int argc, char *argv[]) +{ + + + auto& rm = umpire::ResourceManager::getInstance(); + unsigned char *cnt{nullptr}; + auto allocator = rm.getAllocator("UM"); + auto pool = rm.makeAllocator("qpool", allocator); + cnt = static_cast(pool.allocate(width * width * sizeof(unsigned char))); + + using device_launch = RAJA::cuda_launch_t; + using launch_policy = RAJA::LaunchPolicy; + + //Example 1. Global Indexing: + //GPU programming models such as CUDA and HIP follow a thread/block(team) programming model + //in which a predefined compute grid + { + const int N_x = 10000; + const int N_y = 20000; + const int block_sz = 256; + const int n_blocks_x = (N_x + block_sz) / block_sz + 1; + const int n_blocks_y = (N_y + block_sz) / block_sz + 1; + + using loop_pol_x = RAJA::LoopPolicy; + + RAJA::launch + (RAJA::LaunchParams(RAJA::Teams(n_blocks_x, n_blocks_y), RAJA::Threads(block_sz)), + [=] RAJA_HOST_DEVICE (RAJA::LaunchContext ctx) { + + RAJA::loop(ctx, RAJA::RangeSegment(0, N_y), [&] (int gy) { + RAJA::loop(ctx, RAJA::RangeSegment(0, N_x), [&] (int gx) { + + //populate + + + }); + }); + + + }); + + } + + + + + + + + + + //Iteration Space: + { + const int n_blocks = 50000; + const int block_sz = 64; + + RAJA::launch + ( RAJA::LaunchParams(RAJA::Teams(n_blocks), + RAJA::Threads(block_sz)), + [=] RAJA_HOST_DEVICE (RAJA::LaunchContext ctx) { + + + RAJA::loop(ctx, RAJA::RangeSegment(0, width), [&] (int col) { + + }); + + }); + } + + + + + + + + return 0; +} diff --git a/Advanced_Tutorial/01-GPU-Threads/CMakeLists.txt b/Advanced_Tutorial/01-GPU-Threads/CMakeLists.txt new file mode 100644 index 0000000..7179d6a --- /dev/null +++ b/Advanced_Tutorial/01-GPU-Threads/CMakeLists.txt @@ -0,0 +1,11 @@ +############################################################################### +# Copyright (c) 2016-23, Lawrence Livermore National Security, LLC +# and RAJA project contributors. See the RAJA/LICENSE file for details. +# +# SPDX-License-Identifier: (BSD-3-Clause) +############################################################################### + +blt_add_executable( + NAME 01-GPU-Threads + SOURCES 01-GPU-Threads.cpp + DEPENDS_ON cuda umpire RAJA) diff --git a/Advanced_Tutorial/CMakeLists.txt b/Advanced_Tutorial/CMakeLists.txt new file mode 100644 index 0000000..9bfb736 --- /dev/null +++ b/Advanced_Tutorial/CMakeLists.txt @@ -0,0 +1,10 @@ +############################################################################### +# Copyright (c) 2016-23, Lawrence Livermore National Security, LLC +# and RAJA project contributors. See the RAJA/LICENSE file for details. +# +# SPDX-License-Identifier: (BSD-3-Clause) +############################################################################### + +add_subdirectory(00-Kernel-Fusion) + +add_subdirectory(01-GPU-Threads) diff --git a/CMakeLists.txt b/CMakeLists.txt index 49d50ed..8d0cb19 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -25,3 +25,4 @@ endif() add_subdirectory(tpl) add_subdirectory(Intro_Tutorial) add_subdirectory(Intermediate_Tutorial) +add_subdirectory(Advanced_Tutorial)