Update params for KernelConfig and add basic tests for getThreadId
Some checks are pending
Build and Test / build-and-test (push) Waiting to run

This commit is contained in:
Alex Selimov 2025-09-18 23:47:40 -04:00
parent 9825c0d14d
commit 8dec472929
Signed by: aselimov
GPG key ID: 3DDB9C3E023F1F31
5 changed files with 63 additions and 12 deletions

View file

@ -28,17 +28,19 @@ struct KernelConfig {
* @param max_blocks_per_dim Maximum blocks per grid dimension (default: 65535)
* @return LaunchConfig with optimal grid and block dimensions
*/
KernelConfig get_launch_config(size_t n_elements, int threads_per_block = 256,
int max_blocks_per_dim = 65535);
KernelConfig get_launch_config(size_t n_elements,
size_t threads_per_block = 256,
size_t max_blocks_per_dim = 65535);
/**
* Calculate 1D thread index for kernels launched with get_launch_config()
* Use this inside your CUDA kernels
*/
__device__ inline size_t get_thread_id() {
return (size_t)blockIdx.z * gridDim.x * gridDim.y * blockDim.x +
(size_t)blockIdx.y * gridDim.x * blockDim.x +
(size_t)blockIdx.x * blockDim.x + threadIdx.x;
size_t index = (size_t)blockIdx.z * gridDim.x * gridDim.y * blockDim.x +
(size_t)blockIdx.y * gridDim.x * blockDim.x +
(size_t)blockIdx.x * blockDim.x + threadIdx.x;
return index;
}
/**