cudaCAC/kernels/neighbor_list.cuh

133 lines
4.6 KiB
Text
Raw Normal View History

2025-09-13 23:11:53 -04:00
#ifndef NEIGHBOR_LIST_CUH
#define NEIGHBOR_LIST_CUH
#include "kernel_config.cuh" // From previous artifact
#include <cuda_runtime.h>
#include <thrust/device_vector.h>
#include <thrust/sequence.h>
#include <thrust/sort.h>
/**
* Simple kernel to initialize neighbor offsets
*/
__global__ void init_neighbor_offsets(int *offsets, size_t n_particles,
int max_neighbors) {
size_t i = get_thread_id();
if (i <= n_particles) { // Note: <= because we need n_particles + 1 elements
offsets[i] = i * max_neighbors;
}
}
/**
* Neighbor list data structure
* Uses a compact format similar to CSR sparse matrices
*/
struct NeighborList {
int *neighbor_offsets; // Size: n_particles + 1, offset into neighbor_indices
int *neighbor_indices; // Size: total_neighbors, actual neighbor particle IDs
int *neighbor_counts; // Size: n_particles, number of neighbors per particle
size_t max_neighbors; // Maximum neighbors allocated per particle
size_t n_particles;
// Constructor
NeighborList(size_t n_particles, size_t max_neighbors_per_particle)
: max_neighbors(max_neighbors_per_particle), n_particles(n_particles) {
cudaMalloc(&neighbor_offsets, (n_particles + 1) * sizeof(int));
cudaMalloc(&neighbor_indices, n_particles * max_neighbors * sizeof(int));
cudaMalloc(&neighbor_counts, n_particles * sizeof(int));
// Initialize offsets
auto kernel_config = get_launch_config(n_particles);
init_neighbor_offsets<<<kernel_config.blocks, kernel_config.threads>>>(
neighbor_offsets, n_particles, max_neighbors);
}
~NeighborList() {
cudaFree(neighbor_offsets);
cudaFree(neighbor_indices);
cudaFree(neighbor_counts);
}
// Get neighbors for particle i (device function)
__device__ int *get_neighbors(int i) const {
return &neighbor_indices[neighbor_offsets[i]];
}
__device__ int get_neighbor_count(int i) const { return neighbor_counts[i]; }
};
/**
* Cell list structure for spatial hashing
*/
struct CellList {
int *cell_starts; // Size: total_cells, start index in sorted_particles
int *cell_ends; // Size: total_cells, end index in sorted_particles
int *sorted_particles; // Size: n_particles, particle IDs sorted by cell
int *particle_cells; // Size: n_particles, which cell each particle belongs to
int3 grid_size; // Number of cells in each dimension
float3 cell_size; // Size of each cell
float3 box_min; // Minimum corner of simulation box
size_t total_cells;
size_t n_particles;
CellList(size_t n_particles, float3 box_size, float cutoff)
: n_particles(n_particles) {
// Calculate grid dimensions (cell size slightly larger than cutoff)
float cell_margin = 1.001f; // Small safety margin
cell_size = make_float3(cutoff * cell_margin, cutoff * cell_margin,
cutoff * cell_margin);
grid_size.x = (int)ceilf(box_size.x / cell_size.x);
grid_size.y = (int)ceilf(box_size.y / cell_size.y);
grid_size.z = (int)ceilf(box_size.z / cell_size.z);
total_cells = (size_t)grid_size.x * grid_size.y * grid_size.z;
box_min = make_float3(0.0f, 0.0f, 0.0f); // Assume box starts at origin
// Allocate memory
cudaMalloc(&cell_starts, total_cells * sizeof(int));
cudaMalloc(&cell_ends, total_cells * sizeof(int));
cudaMalloc(&sorted_particles, n_particles * sizeof(int));
cudaMalloc(&particle_cells, n_particles * sizeof(int));
}
~CellList() {
cudaFree(cell_starts);
cudaFree(cell_ends);
cudaFree(sorted_particles);
cudaFree(particle_cells);
}
// Get cell index from 3D coordinates
__device__ int get_cell_index(int x, int y, int z) const {
return z * grid_size.x * grid_size.y + y * grid_size.x + x;
}
// Get cell coordinates from position
__device__ int3 get_cell_coords(float3 pos) const {
return make_int3((int)((pos.x - box_min.x) / cell_size.x),
(int)((pos.y - box_min.y) / cell_size.y),
(int)((pos.z - box_min.z) / cell_size.z));
}
};
// Forward declarations
void build_cell_list(const float4 *positions, CellList &cell_list,
const float3 *box_lengths);
void build_neighbor_list(const float4 *positions, const CellList &cell_list,
NeighborList &neighbor_list, float cutoff_squared,
const float3 *box_lengths);
/**
* High-level interface - builds complete neighbor list
*/
void build_neighbor_list_optimized(const float4 *positions, size_t n_particles,
const float3 *box_lengths, float cutoff,
NeighborList &neighbor_list);
#endif