133 lines
4.6 KiB
Text
133 lines
4.6 KiB
Text
|
#ifndef NEIGHBOR_LIST_CUH
|
||
|
#define NEIGHBOR_LIST_CUH
|
||
|
|
||
|
#include "kernel_config.cuh" // From previous artifact
|
||
|
#include <cuda_runtime.h>
|
||
|
#include <thrust/device_vector.h>
|
||
|
#include <thrust/sequence.h>
|
||
|
#include <thrust/sort.h>
|
||
|
|
||
|
/**
|
||
|
* Simple kernel to initialize neighbor offsets
|
||
|
*/
|
||
|
__global__ void init_neighbor_offsets(int *offsets, size_t n_particles,
|
||
|
int max_neighbors) {
|
||
|
size_t i = get_thread_id();
|
||
|
if (i <= n_particles) { // Note: <= because we need n_particles + 1 elements
|
||
|
offsets[i] = i * max_neighbors;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Neighbor list data structure
|
||
|
* Uses a compact format similar to CSR sparse matrices
|
||
|
*/
|
||
|
struct NeighborList {
|
||
|
int *neighbor_offsets; // Size: n_particles + 1, offset into neighbor_indices
|
||
|
int *neighbor_indices; // Size: total_neighbors, actual neighbor particle IDs
|
||
|
int *neighbor_counts; // Size: n_particles, number of neighbors per particle
|
||
|
size_t max_neighbors; // Maximum neighbors allocated per particle
|
||
|
size_t n_particles;
|
||
|
|
||
|
// Constructor
|
||
|
NeighborList(size_t n_particles, size_t max_neighbors_per_particle)
|
||
|
: max_neighbors(max_neighbors_per_particle), n_particles(n_particles) {
|
||
|
|
||
|
cudaMalloc(&neighbor_offsets, (n_particles + 1) * sizeof(int));
|
||
|
cudaMalloc(&neighbor_indices, n_particles * max_neighbors * sizeof(int));
|
||
|
cudaMalloc(&neighbor_counts, n_particles * sizeof(int));
|
||
|
|
||
|
// Initialize offsets
|
||
|
auto kernel_config = get_launch_config(n_particles);
|
||
|
init_neighbor_offsets<<<kernel_config.blocks, kernel_config.threads>>>(
|
||
|
neighbor_offsets, n_particles, max_neighbors);
|
||
|
}
|
||
|
|
||
|
~NeighborList() {
|
||
|
cudaFree(neighbor_offsets);
|
||
|
cudaFree(neighbor_indices);
|
||
|
cudaFree(neighbor_counts);
|
||
|
}
|
||
|
|
||
|
// Get neighbors for particle i (device function)
|
||
|
__device__ int *get_neighbors(int i) const {
|
||
|
return &neighbor_indices[neighbor_offsets[i]];
|
||
|
}
|
||
|
|
||
|
__device__ int get_neighbor_count(int i) const { return neighbor_counts[i]; }
|
||
|
};
|
||
|
|
||
|
/**
|
||
|
* Cell list structure for spatial hashing
|
||
|
*/
|
||
|
struct CellList {
|
||
|
int *cell_starts; // Size: total_cells, start index in sorted_particles
|
||
|
int *cell_ends; // Size: total_cells, end index in sorted_particles
|
||
|
int *sorted_particles; // Size: n_particles, particle IDs sorted by cell
|
||
|
int *particle_cells; // Size: n_particles, which cell each particle belongs to
|
||
|
|
||
|
int3 grid_size; // Number of cells in each dimension
|
||
|
float3 cell_size; // Size of each cell
|
||
|
float3 box_min; // Minimum corner of simulation box
|
||
|
size_t total_cells;
|
||
|
size_t n_particles;
|
||
|
|
||
|
CellList(size_t n_particles, float3 box_size, float cutoff)
|
||
|
: n_particles(n_particles) {
|
||
|
|
||
|
// Calculate grid dimensions (cell size slightly larger than cutoff)
|
||
|
float cell_margin = 1.001f; // Small safety margin
|
||
|
cell_size = make_float3(cutoff * cell_margin, cutoff * cell_margin,
|
||
|
cutoff * cell_margin);
|
||
|
|
||
|
grid_size.x = (int)ceilf(box_size.x / cell_size.x);
|
||
|
grid_size.y = (int)ceilf(box_size.y / cell_size.y);
|
||
|
grid_size.z = (int)ceilf(box_size.z / cell_size.z);
|
||
|
|
||
|
total_cells = (size_t)grid_size.x * grid_size.y * grid_size.z;
|
||
|
box_min = make_float3(0.0f, 0.0f, 0.0f); // Assume box starts at origin
|
||
|
|
||
|
// Allocate memory
|
||
|
cudaMalloc(&cell_starts, total_cells * sizeof(int));
|
||
|
cudaMalloc(&cell_ends, total_cells * sizeof(int));
|
||
|
cudaMalloc(&sorted_particles, n_particles * sizeof(int));
|
||
|
cudaMalloc(&particle_cells, n_particles * sizeof(int));
|
||
|
}
|
||
|
|
||
|
~CellList() {
|
||
|
cudaFree(cell_starts);
|
||
|
cudaFree(cell_ends);
|
||
|
cudaFree(sorted_particles);
|
||
|
cudaFree(particle_cells);
|
||
|
}
|
||
|
|
||
|
// Get cell index from 3D coordinates
|
||
|
__device__ int get_cell_index(int x, int y, int z) const {
|
||
|
return z * grid_size.x * grid_size.y + y * grid_size.x + x;
|
||
|
}
|
||
|
|
||
|
// Get cell coordinates from position
|
||
|
__device__ int3 get_cell_coords(float3 pos) const {
|
||
|
return make_int3((int)((pos.x - box_min.x) / cell_size.x),
|
||
|
(int)((pos.y - box_min.y) / cell_size.y),
|
||
|
(int)((pos.z - box_min.z) / cell_size.z));
|
||
|
}
|
||
|
};
|
||
|
|
||
|
// Forward declarations
|
||
|
void build_cell_list(const float4 *positions, CellList &cell_list,
|
||
|
const float3 *box_lengths);
|
||
|
|
||
|
void build_neighbor_list(const float4 *positions, const CellList &cell_list,
|
||
|
NeighborList &neighbor_list, float cutoff_squared,
|
||
|
const float3 *box_lengths);
|
||
|
|
||
|
/**
|
||
|
* High-level interface - builds complete neighbor list
|
||
|
*/
|
||
|
void build_neighbor_list_optimized(const float4 *positions, size_t n_particles,
|
||
|
const float3 *box_lengths, float cutoff,
|
||
|
NeighborList &neighbor_list);
|
||
|
|
||
|
#endif
|