Skip to content

Commit

Permalink
Remove block wide search code
Browse files Browse the repository at this point in the history
always use binary search code
  • Loading branch information
MrBurmark committed Nov 21, 2023
1 parent 3c6b399 commit 9906d4c
Show file tree
Hide file tree
Showing 2 changed files with 0 additions and 38 deletions.
19 changes: 0 additions & 19 deletions src/stream/TRIAD_PARTED_FUSED-Cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,6 @@ __launch_bounds__(block_size)
__global__ void triad_parted_fused_scan_aos(scan_index_type* first_blocks, scan_index_type num_fused,
triad_holder* triad_holders)
{
#if 0
scan_index_type min_j = 0;
scan_index_type max_j = num_fused-1;
scan_index_type j = (min_j + max_j + 1) / 2;
Expand All @@ -119,24 +118,6 @@ __global__ void triad_parted_fused_scan_aos(scan_index_type* first_blocks, scan_
j = (min_j + max_j + 1) / 2;
first_block = first_blocks[j];
}
#elif 1
__shared__ scan_index_type s_j;
__shared__ scan_index_type s_first_block;
for (scan_index_type j = threadIdx.x; j < num_fused; j += block_size) {
scan_index_type first_block = first_blocks[j];
if (first_block <= blockIdx.x) {
if (j+1 == num_fused || first_blocks[j+1] > blockIdx.x) {
s_j = j;
s_first_block = first_block;
}
} else {
break;
}
}
__syncthreads();
scan_index_type j = s_j;
scan_index_type first_block = s_first_block;
#endif

Index_type len = triad_holders[j].len;
Real_ptr a = triad_holders[j].a;
Expand Down
19 changes: 0 additions & 19 deletions src/stream/TRIAD_PARTED_FUSED-Hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,6 @@ __launch_bounds__(block_size)
__global__ void triad_parted_fused_scan_aos(scan_index_type* first_blocks, scan_index_type num_fused,
triad_holder* triad_holders)
{
#if 1
scan_index_type min_j = 0;
scan_index_type max_j = num_fused-1;
scan_index_type j = (min_j + max_j + 1) / 2;
Expand All @@ -119,24 +118,6 @@ __global__ void triad_parted_fused_scan_aos(scan_index_type* first_blocks, scan_
j = (min_j + max_j + 1) / 2;
first_block = first_blocks[j];
}
#elif 0
__shared__ scan_index_type s_j;
__shared__ scan_index_type s_first_block;
for (scan_index_type j = threadIdx.x; j < num_fused; j += block_size) {
scan_index_type first_block = first_blocks[j];
if (first_block <= blockIdx.x) {
if (j+1 == num_fused || first_blocks[j+1] > blockIdx.x) {
s_j = j;
s_first_block = first_block;
}
} else {
break;
}
}
__syncthreads();
scan_index_type j = s_j;
scan_index_type first_block = s_first_block;
#endif

Index_type len = triad_holders[j].len;
Real_ptr a = triad_holders[j].a;
Expand Down

0 comments on commit 9906d4c

Please sign in to comment.