diff --git a/CMakeLists.txt b/CMakeLists.txt index 0c411db..618c891 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,11 +16,11 @@ set(CMAKE_CXX_STANDARD 17) # Add debug configuration if(CMAKE_BUILD_TYPE STREQUAL "Debug") - set(CMAKE_CUDA_FLAGS_DEBUG "-g -G -O0 --extended-lambda") + set(CMAKE_CUDA_FLAGS_DEBUG "-g -G -O0") elseif(CMAKE_BUILD_TYPE STREQUAL "Release") - set(CMAKE_CUDA_FLAGS_RELEASE "-O3 --use_fast_math --extended-lambda") + set(CMAKE_CUDA_FLAGS_RELEASE "-O3 --use_fast_math") elseif(CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo") - set(CMAKE_CUDA_FLAGS_RELWITHDEBINFO "-g -lineinfo -O2 --extended-lambda") + set(CMAKE_CUDA_FLAGS_RELWITHDEBINFO "-g -lineinfo -O2") endif() # Set C++ debug/release flags globally diff --git a/kernels/neighbor_list.cuh b/kernels/neighbor_list.cuh index 9708348..73039be 100644 --- a/kernels/neighbor_list.cuh +++ b/kernels/neighbor_list.cuh @@ -6,8 +6,6 @@ #include "utils.cuh" #include #include -#include -#include #include #include #include @@ -53,6 +51,14 @@ struct CellList { cudaFree(particle_cells); } + // Get cell index from 3D coordinates + // TODO; Maybe update this to use Morton Encodings in the future to improve + // locality of particle indices. Unclear how much of a benefit this will add, + // but would be cool to do + __host__ __device__ int get_cell_index(int x, int y, int z) const { + return z * grid_size.x * grid_size.y + y * grid_size.x + x; + } + std::pair calc_grid_and_cell_size(Box &box, float r_cutoff) const { int3 grid_size = {utils::max((int)(box.xhi - box.xlo) / r_cutoff, 1), @@ -68,62 +74,12 @@ struct CellList { return std::make_pair(grid_size, cell_size); } - // Get cell index from 3D coordinates - // TODO; Maybe update this to use Morton Encodings in the future to improve - // locality of particle indices. Unclear how much of a benefit this will add, - // but would be cool to do - __host__ __device__ static int - get_cell_index_from_cell_coords(int3 cell_coords, int3 grid_size) { - return cell_coords.z * grid_size.x * grid_size.y + - cell_coords.y * grid_size.x + cell_coords.x; - } - - __host__ __device__ static int3 - get_cell_coords_from_position(float3 pos, float3 box_min, float3 cell_size) { + // Get cell coordinates from position + __device__ int3 get_cell_coords(float3 pos) const { return make_int3((int)((pos.x - box_min.x) / cell_size.x), (int)((pos.y - box_min.y) / cell_size.y), (int)((pos.z - box_min.z) / cell_size.z)); } - - __host__ __device__ static int - get_cell_index_from_position(float3 pos, int3 grid_size, float3 box_min, - float3 cell_size) { - return get_cell_index_from_cell_coords( - get_cell_coords_from_position(pos, box_min, cell_size), grid_size); - } - - void assign_particles_to_cells(float3 *d_positions) { - thrust::device_ptr particle_cells_ptr(particle_cells); - thrust::device_ptr sorted_particles_ptr(sorted_particles); - thrust::device_ptr cell_starts_ptr(cell_starts); - thrust::device_ptr cell_count_ptr(cell_count); - - thrust::sequence(sorted_particles_ptr, sorted_particles_ptr + n_particles); - - int3 grid_size = this->grid_size; - float3 box_min = this->box_min; - float3 cell_size = this->cell_size; - - thrust::transform(d_positions, d_positions + n_particles, - particle_cells_ptr, - [grid_size, box_min, cell_size] __device__(auto pos) { - return CellList::get_cell_index_from_position( - pos, grid_size, box_min, cell_size); - }); - - thrust::sort_by_key(particle_cells_ptr, particle_cells_ptr + n_particles, - sorted_particles_ptr); - - thrust::fill(cell_starts_ptr, cell_starts_ptr + total_cells, 0); - thrust::fill(cell_count_ptr, cell_count_ptr + total_cells, 0); - - thrust::reduce_by_key(particle_cells_ptr, particle_cells_ptr + n_particles, - thrust::constant_iterator(1), - thrust::discard_iterator(), cell_count_ptr); - - thrust::exclusive_scan(cell_count_ptr, cell_count_ptr + total_cells, - cell_starts_ptr); - } }; #endif diff --git a/tests/cuda_unit_tests/test_neighbor_list.cu b/tests/cuda_unit_tests/test_neighbor_list.cu index 3534697..9818075 100644 --- a/tests/cuda_unit_tests/test_neighbor_list.cu +++ b/tests/cuda_unit_tests/test_neighbor_list.cu @@ -2,8 +2,6 @@ #include "box.hpp" #include "neighbor_list.cuh" #include "gtest/gtest.h" -#include -#include TEST(CellListTest, Constructor) { // Test case 1: Simple case @@ -29,68 +27,5 @@ TEST(CellListTest, GetCellIndex) { int expected_index = z * cell_list.grid_size.x * cell_list.grid_size.y + y * cell_list.grid_size.x + x; - EXPECT_EQ( - CellList::get_cell_index_from_cell_coords({x, y, z}, cell_list.grid_size), - expected_index); -} - -TEST(CellListTest, AssignParticlesToCells) { - Box box(0, 2, 0, 1, 0, 1); // Create a 2x1x1 box - float cutoff = 1.0; - CellList cell_list(4, box, cutoff); // 4 particles - - // Particle positions - thrust::host_vector h_positions(4); - h_positions[0] = make_float3(0.1, 0.1, 0.1); // Cell 0 - h_positions[1] = make_float3(1.1, 0.1, 0.1); // Cell 1 - h_positions[2] = make_float3(0.2, 0.1, 0.1); // Cell 0 - h_positions[3] = make_float3(1.2, 0.1, 0.1); // Cell 1 - - thrust::device_vector d_positions = h_positions; - - cell_list.assign_particles_to_cells( - thrust::raw_pointer_cast(d_positions.data())); - - // Copy results back to host - thrust::host_vector h_particle_cells(cell_list.n_particles); - thrust::host_vector h_sorted_particles(cell_list.n_particles); - thrust::host_vector h_cell_starts(cell_list.total_cells); - thrust::host_vector h_cell_count(cell_list.total_cells); - - cudaMemcpy(h_particle_cells.data(), cell_list.particle_cells, - cell_list.n_particles * sizeof(int), cudaMemcpyDeviceToHost); - cudaMemcpy(h_sorted_particles.data(), cell_list.sorted_particles, - cell_list.n_particles * sizeof(int), cudaMemcpyDeviceToHost); - cudaMemcpy(h_cell_starts.data(), cell_list.cell_starts, - cell_list.total_cells * sizeof(int), cudaMemcpyDeviceToHost); - cudaMemcpy(h_cell_count.data(), cell_list.cell_count, - cell_list.total_cells * sizeof(int), cudaMemcpyDeviceToHost); - - // Expected values - // Particles 0 and 2 are in cell 0, particles 1 and 3 are in cell 1 - // Sorted by cell: (0, 2, 1, 3) - // Cell 0: particles 0, 2 - // Cell 1: particles 1, 3 - - // Verify particle_cells (after sorting by key, this will be sorted) - // Original particle_cells: [0, 1, 0, 1] - // After sort_by_key, particle_cells will be sorted: [0, 0, 1, 1] - thrust::host_vector expected_particle_cells = {0, 0, 1, 1}; - EXPECT_EQ(h_particle_cells, expected_particle_cells); - - // Verify sorted_particles (original indices sorted by cell) - thrust::host_vector expected_sorted_particles = {0, 2, 1, 3}; - EXPECT_EQ(h_sorted_particles, expected_sorted_particles); - - // Verify cell_starts - // Cell 0 starts at index 0 in sorted_particles - // Cell 1 starts at index 2 in sorted_particles - thrust::host_vector expected_cell_starts = {0, 2}; - EXPECT_EQ(h_cell_starts, expected_cell_starts); - - // Verify cell_count - // Cell 0 has 2 particles - // Cell 1 has 2 particles - thrust::host_vector expected_cell_count = {2, 2}; - EXPECT_EQ(h_cell_count, expected_cell_count); + EXPECT_EQ(cell_list.get_cell_index(x, y, z), expected_index); }