Update params for KernelConfig and add basic tests for getThreadId
Some checks are pending
Build and Test / build-and-test (push) Waiting to run
Some checks are pending
Build and Test / build-and-test (push) Waiting to run
This commit is contained in:
parent
9825c0d14d
commit
8dec472929
5 changed files with 63 additions and 12 deletions
|
@ -28,17 +28,19 @@ struct KernelConfig {
|
|||
* @param max_blocks_per_dim Maximum blocks per grid dimension (default: 65535)
|
||||
* @return LaunchConfig with optimal grid and block dimensions
|
||||
*/
|
||||
KernelConfig get_launch_config(size_t n_elements, int threads_per_block = 256,
|
||||
int max_blocks_per_dim = 65535);
|
||||
KernelConfig get_launch_config(size_t n_elements,
|
||||
size_t threads_per_block = 256,
|
||||
size_t max_blocks_per_dim = 65535);
|
||||
|
||||
/**
|
||||
* Calculate 1D thread index for kernels launched with get_launch_config()
|
||||
* Use this inside your CUDA kernels
|
||||
*/
|
||||
__device__ inline size_t get_thread_id() {
|
||||
return (size_t)blockIdx.z * gridDim.x * gridDim.y * blockDim.x +
|
||||
(size_t)blockIdx.y * gridDim.x * blockDim.x +
|
||||
(size_t)blockIdx.x * blockDim.x + threadIdx.x;
|
||||
size_t index = (size_t)blockIdx.z * gridDim.x * gridDim.y * blockDim.x +
|
||||
(size_t)blockIdx.y * gridDim.x * blockDim.x +
|
||||
(size_t)blockIdx.x * blockDim.x + threadIdx.x;
|
||||
return index;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue