Skip to content

Commit

Permalink
advanced tutorial
Browse files Browse the repository at this point in the history
  • Loading branch information
artv3 committed Jul 8, 2024
1 parent cb66f04 commit b58bc24
Show file tree
Hide file tree
Showing 6 changed files with 122 additions and 0 deletions.
10 changes: 10 additions & 0 deletions Advanced_Tutorial/00-Kernel-Fusion/00-Kernel-Fusion.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#include <iostream>

int main(int argc, char *argv[])
{

std::cout<<"TO DO"<<std::endl;


return 0;
}
11 changes: 11 additions & 0 deletions Advanced_Tutorial/00-Kernel-Fusion/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
###############################################################################
# Copyright (c) 2016-23, Lawrence Livermore National Security, LLC
# and RAJA project contributors. See the RAJA/LICENSE file for details.
#
# SPDX-License-Identifier: (BSD-3-Clause)
###############################################################################

blt_add_executable(
NAME 00-Kernel-Fusion
SOURCES 00-Kernel-Fusion.cpp
DEPENDS_ON cuda umpire RAJA)
79 changes: 79 additions & 0 deletions Advanced_Tutorial/01-GPU-Threads/01-GPU-Threads.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
#include <iostream>

int main(int argc, char *argv[])
{


auto& rm = umpire::ResourceManager::getInstance();
unsigned char *cnt{nullptr};
auto allocator = rm.getAllocator("UM");
auto pool = rm.makeAllocator<umpire::strategy::QuickPool>("qpool", allocator);
cnt = static_cast<unsigned char*>(pool.allocate(width * width * sizeof(unsigned char)));

using device_launch = RAJA::cuda_launch_t<false>;
using launch_policy = RAJA::LaunchPolicy<device_launch>;

//Example 1. Global Indexing:
//GPU programming models such as CUDA and HIP follow a thread/block(team) programming model
//in which a predefined compute grid
{
const int N_x = 10000;
const int N_y = 20000;
const int block_sz = 256;
const int n_blocks_x = (N_x + block_sz) / block_sz + 1;
const int n_blocks_y = (N_y + block_sz) / block_sz + 1;

using loop_pol_x = RAJA::LoopPolicy<RAJA::cuda_global_x>;

RAJA::launch<device_launch>
(RAJA::LaunchParams(RAJA::Teams(n_blocks_x, n_blocks_y), RAJA::Threads(block_sz)),
[=] RAJA_HOST_DEVICE (RAJA::LaunchContext ctx) {

RAJA::loop<loop_pol_y>(ctx, RAJA::RangeSegment(0, N_y), [&] (int gy) {
RAJA::loop<loop_pol_x>(ctx, RAJA::RangeSegment(0, N_x), [&] (int gx) {

//populate


});
});


});

}









//Iteration Space:
{
const int n_blocks = 50000;
const int block_sz = 64;

RAJA::launch<launch_policy>
( RAJA::LaunchParams(RAJA::Teams(n_blocks),
RAJA::Threads(block_sz)),
[=] RAJA_HOST_DEVICE (RAJA::LaunchContext ctx) {


RAJA::loop<col_loop>(ctx, RAJA::RangeSegment(0, width), [&] (int col) {

});

});
}







return 0;
}
11 changes: 11 additions & 0 deletions Advanced_Tutorial/01-GPU-Threads/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
###############################################################################
# Copyright (c) 2016-23, Lawrence Livermore National Security, LLC
# and RAJA project contributors. See the RAJA/LICENSE file for details.
#
# SPDX-License-Identifier: (BSD-3-Clause)
###############################################################################

blt_add_executable(
NAME 01-GPU-Threads
SOURCES 01-GPU-Threads.cpp
DEPENDS_ON cuda umpire RAJA)
10 changes: 10 additions & 0 deletions Advanced_Tutorial/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
###############################################################################
# Copyright (c) 2016-23, Lawrence Livermore National Security, LLC
# and RAJA project contributors. See the RAJA/LICENSE file for details.
#
# SPDX-License-Identifier: (BSD-3-Clause)
###############################################################################

add_subdirectory(00-Kernel-Fusion)

add_subdirectory(01-GPU-Threads)
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,4 @@ endif()
add_subdirectory(tpl)
add_subdirectory(Intro_Tutorial)
add_subdirectory(Intermediate_Tutorial)
add_subdirectory(Advanced_Tutorial)

0 comments on commit b58bc24

Please sign in to comment.