cudaCAC/kernels/neighbor_list.cuh

118 lines
4.1 KiB
Text

#ifndef NEIGHBOR_LIST_CUH
#define NEIGHBOR_LIST_CUH
#include "box.hpp"
#include "kernel_config.cuh"
#include "utils.cuh"
#include <cuda_runtime.h>
#include <thrust/device_vector.h>
#include <thrust/iterator/constant_iterator.h>
#include <thrust/iterator/discard_iterator.h>
#include <thrust/pair.h>
#include <thrust/sequence.h>
#include <thrust/sort.h>
/**
* Cell list structure for spatial hashing
*/
struct CellList {
int *cell_starts; // Size: total_cells, start index in sorted_particles
int *cell_count; // Size: total_cells, end index in sorted_particles
int *sorted_particles; // Size: n_particles, particle IDs sorted by cell
int *particle_cells; // Size: n_particles, which cell each particle belongs to
int3 grid_size; // Number of cells in each dimension
float3 cell_size; // Size of each cell
float3 box_min; // Minimum corner of simulation box
size_t total_cells;
size_t n_particles;
CellList(size_t n_particles, Box &box, float r_cutoff)
: n_particles(n_particles) {
box_min.x = box.xlo;
box_min.y = box.ylo;
box_min.z = box.zlo;
auto [grid_size, cell_size] = calc_grid_and_cell_size(box, r_cutoff);
this->grid_size = grid_size;
this->cell_size = cell_size;
total_cells = grid_size.x * grid_size.y * grid_size.z;
cudaMalloc(&cell_starts, total_cells * sizeof(int));
cudaMalloc(&cell_count, total_cells * sizeof(int));
cudaMalloc(&sorted_particles, n_particles * sizeof(int));
cudaMalloc(&particle_cells, n_particles * sizeof(int));
}
~CellList() {
cudaFree(cell_starts);
cudaFree(cell_count);
cudaFree(sorted_particles);
cudaFree(particle_cells);
}
// Get cell index from 3D coordinates
// TODO; Maybe update this to use Morton Encodings in the future to improve
// locality of particle indices. Unclear how much of a benefit this will add,
// but would be cool to do
__host__ __device__ int
get_cell_index_from_cell_coords(int3 cell_coords) const {
return cell_coords.z * grid_size.x * grid_size.y +
cell_coords.y * grid_size.x + cell_coords.x;
}
std::pair<int3, float3> calc_grid_and_cell_size(Box &box,
float r_cutoff) const {
int3 grid_size = {utils::max((int)(box.xhi - box.xlo) / r_cutoff, 1),
utils::max((int)(box.yhi - box.ylo) / r_cutoff, 1),
utils::max((int)(box.zhi - box.zlo) / r_cutoff, 1)};
float3 cell_size = {
(box.xhi - box.xlo) / grid_size.x,
(box.yhi - box.ylo) / grid_size.y,
(box.zhi - box.zlo) / grid_size.z,
};
return std::make_pair(grid_size, cell_size);
}
__host__ __device__ int3 get_cell_coords_from_position(float3 pos) const {
return make_int3((int)((pos.x - box_min.x) / cell_size.x),
(int)((pos.y - box_min.y) / cell_size.y),
(int)((pos.z - box_min.z) / cell_size.z));
}
__host__ __device__ int get_cell_index_from_position(float3 pos) const {
return get_cell_index_from_cell_coords(get_cell_coords_from_position(pos));
}
void assign_particles_to_cells(float3 *positions) {
thrust::device_ptr<int> particle_cells_ptr(particle_cells);
thrust::device_ptr<int> sorted_particles_ptr(sorted_particles);
thrust::device_ptr<int> cell_starts_ptr(cell_starts);
thrust::device_ptr<int> cell_count_ptr(cell_count);
thrust::sequence(sorted_particles_ptr, sorted_particles_ptr + n_particles);
for (size_t i = 0; i < n_particles; i++) {
particle_cells[i] = get_cell_index_from_position(positions[i]);
}
thrust::sort_by_key(particle_cells_ptr, particle_cells_ptr + n_particles,
sorted_particles_ptr);
thrust::fill(cell_starts_ptr, cell_starts_ptr + total_cells, 0);
thrust::fill(cell_count_ptr, cell_count_ptr + total_cells, 0);
thrust::reduce_by_key(particle_cells_ptr, particle_cells_ptr + n_particles,
thrust::constant_iterator<int>(1),
thrust::discard_iterator(), cell_count_ptr);
thrust::exclusive_scan(cell_count_ptr, cell_count_ptr + total_cells,
cell_starts_ptr);
}
};
#endif