Update params for KernelConfig and add basic tests for getThreadId
Some checks are pending
Build and Test / build-and-test (push) Waiting to run
Some checks are pending
Build and Test / build-and-test (push) Waiting to run
This commit is contained in:
parent
9825c0d14d
commit
8dec472929
5 changed files with 63 additions and 12 deletions
|
@ -13,12 +13,12 @@ void KernelConfig::print() const {
|
|||
total_threads());
|
||||
}
|
||||
|
||||
KernelConfig get_launch_config(size_t n_elements, int threads_per_block,
|
||||
int max_blocks_per_dim) {
|
||||
KernelConfig get_launch_config(size_t n_elements, size_t threads_per_block,
|
||||
size_t max_blocks_per_dim) {
|
||||
|
||||
// Ensure threads_per_block is valid
|
||||
threads_per_block = std::min(threads_per_block, 1024);
|
||||
threads_per_block = std::max(threads_per_block, 32);
|
||||
threads_per_block = std::min(threads_per_block, (size_t)1024);
|
||||
threads_per_block = std::max(threads_per_block, (size_t)32);
|
||||
|
||||
// Calculate total blocks needed
|
||||
size_t total_blocks =
|
||||
|
@ -66,8 +66,8 @@ KernelConfig get_launch_config_advanced(size_t n_elements, int device_id) {
|
|||
cudaDeviceProp prop;
|
||||
cudaGetDeviceProperties(&prop, device_id);
|
||||
|
||||
int threads_per_block = get_optimal_block_size(device_id);
|
||||
int max_blocks_per_dim = prop.maxGridSize[0];
|
||||
size_t threads_per_block = get_optimal_block_size(device_id);
|
||||
size_t max_blocks_per_dim = prop.maxGridSize[0];
|
||||
|
||||
return get_launch_config(n_elements, threads_per_block, max_blocks_per_dim);
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue