Some checks failed
Build and Test / build-and-test (push) Failing after 5m3s
194 lines
6.9 KiB
Text
194 lines
6.9 KiB
Text
#include "neighbor_list.cuh"
|
|
|
|
/**
|
|
* Step 1: Assign particles to cells
|
|
*/
|
|
__global__ void assign_particles_to_cells_kernel(const float4 *positions,
|
|
int *particle_cells,
|
|
const CellList cell_list,
|
|
size_t n_particles) {
|
|
size_t i = get_thread_id();
|
|
if (i >= n_particles)
|
|
return;
|
|
|
|
float4 pos = positions[i];
|
|
float3 pos3 = make_float3(pos.x, pos.y, pos.z);
|
|
|
|
int3 cell_coords = cell_list.get_cell_coords(pos3);
|
|
|
|
// Clamp to valid range (handle edge cases)
|
|
cell_coords.x = max(0, min(cell_coords.x, cell_list.grid_size.x - 1));
|
|
cell_coords.y = max(0, min(cell_coords.y, cell_list.grid_size.y - 1));
|
|
cell_coords.z = max(0, min(cell_coords.z, cell_list.grid_size.z - 1));
|
|
|
|
particle_cells[i] =
|
|
cell_list.get_cell_index(cell_coords.x, cell_coords.y, cell_coords.z);
|
|
}
|
|
|
|
/**
|
|
* Step 2: Find cell boundaries after sorting
|
|
*/
|
|
__global__ void find_cell_boundaries_kernel(const int *sorted_particle_cells,
|
|
int *cell_starts, int *cell_ends,
|
|
size_t n_particles,
|
|
size_t total_cells) {
|
|
size_t i = get_thread_id();
|
|
if (i >= n_particles)
|
|
return;
|
|
|
|
int cell = sorted_particle_cells[i];
|
|
|
|
// Check if this is the start of a new cell
|
|
if (i == 0 || sorted_particle_cells[i - 1] != cell) {
|
|
cell_starts[cell] = i;
|
|
}
|
|
|
|
// Check if this is the end of a cell
|
|
if (i == n_particles - 1 || sorted_particle_cells[i + 1] != cell) {
|
|
cell_ends[cell] = i + 1;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Step 3: Build actual neighbor lists using cell lists
|
|
*/
|
|
__global__ void
|
|
build_neighbor_lists_kernel(const float4 *positions, const CellList cell_list,
|
|
NeighborList neighbor_list, float cutoff_squared,
|
|
const float3 *box_lengths, size_t n_particles) {
|
|
size_t i = get_thread_id();
|
|
if (i >= n_particles)
|
|
return;
|
|
|
|
float4 pos_i = positions[i];
|
|
float3 my_pos = make_float3(pos_i.x, pos_i.y, pos_i.z);
|
|
|
|
int3 my_cell = cell_list.get_cell_coords(my_pos);
|
|
int neighbor_count = 0;
|
|
int max_neighbors = neighbor_list.max_neighbors;
|
|
int *my_neighbors = neighbor_list.get_neighbors(i);
|
|
|
|
// Search neighboring cells (3x3x3 = 27 cells including self)
|
|
for (int dz = -1; dz <= 1; dz++) {
|
|
for (int dy = -1; dy <= 1; dy++) {
|
|
for (int dx = -1; dx <= 1; dx++) {
|
|
|
|
int3 neighbor_cell =
|
|
make_int3(my_cell.x + dx, my_cell.y + dy, my_cell.z + dz);
|
|
|
|
// Check bounds
|
|
if (neighbor_cell.x < 0 || neighbor_cell.x >= cell_list.grid_size.x ||
|
|
neighbor_cell.y < 0 || neighbor_cell.y >= cell_list.grid_size.y ||
|
|
neighbor_cell.z < 0 || neighbor_cell.z >= cell_list.grid_size.z) {
|
|
continue;
|
|
}
|
|
|
|
int cell_idx = cell_list.get_cell_index(
|
|
neighbor_cell.x, neighbor_cell.y, neighbor_cell.z);
|
|
int start = cell_list.cell_starts[cell_idx];
|
|
int end = cell_list.cell_ends[cell_idx];
|
|
|
|
// Check all particles in this cell
|
|
for (int idx = start; idx < end; idx++) {
|
|
int j = cell_list.sorted_particles[idx];
|
|
|
|
if (i >= j)
|
|
continue; // Avoid double counting and self-interaction
|
|
|
|
float4 pos_j = positions[j];
|
|
float3 other_pos = make_float3(pos_j.x, pos_j.y, pos_j.z);
|
|
|
|
// Calculate distance with periodic boundary conditions
|
|
float dx = my_pos.x - other_pos.x;
|
|
float dy = my_pos.y - other_pos.y;
|
|
float dz = my_pos.z - other_pos.z;
|
|
|
|
// Apply PBC
|
|
dx -= box_lengths->x * roundf(dx / box_lengths->x);
|
|
dy -= box_lengths->y * roundf(dy / box_lengths->y);
|
|
dz -= box_lengths->z * roundf(dz / box_lengths->z);
|
|
|
|
float r_squared = dx * dx + dy * dy + dz * dz;
|
|
|
|
if (r_squared <= cutoff_squared && neighbor_count < max_neighbors) {
|
|
my_neighbors[neighbor_count] = j;
|
|
neighbor_count++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
neighbor_list.neighbor_counts[i] = neighbor_count;
|
|
}
|
|
|
|
// =============================================================================
|
|
// HOST FUNCTIONS
|
|
// =============================================================================
|
|
|
|
void build_cell_list(const float4 *positions, CellList &cell_list,
|
|
const float3 *box_lengths) {
|
|
|
|
// Step 1: Assign particles to cells
|
|
auto config = get_launch_config(cell_list.n_particles);
|
|
|
|
assign_particles_to_cells_kernel<<<config.blocks, config.threads>>>(
|
|
positions, cell_list.particle_cells, cell_list, cell_list.n_particles);
|
|
|
|
// Step 2: Sort particles by cell index using Thrust
|
|
thrust::device_ptr<int> particle_cells_ptr(cell_list.particle_cells);
|
|
thrust::device_ptr<int> sorted_particles_ptr(cell_list.sorted_particles);
|
|
|
|
// Initialize particle indices 0, 1, 2, ...
|
|
thrust::sequence(sorted_particles_ptr,
|
|
sorted_particles_ptr + cell_list.n_particles);
|
|
|
|
// Sort particle indices by their cell assignments
|
|
thrust::sort_by_key(particle_cells_ptr,
|
|
particle_cells_ptr + cell_list.n_particles,
|
|
sorted_particles_ptr);
|
|
|
|
// Step 3: Initialize cell boundaries to -1
|
|
cudaMemset(cell_list.cell_starts, -1, cell_list.total_cells * sizeof(int));
|
|
cudaMemset(cell_list.cell_ends, -1, cell_list.total_cells * sizeof(int));
|
|
|
|
// Step 4: Find cell boundaries
|
|
config = get_launch_config(cell_list.n_particles);
|
|
find_cell_boundaries_kernel<<<config.blocks, config.threads>>>(
|
|
cell_list.particle_cells, cell_list.cell_starts, cell_list.cell_ends,
|
|
cell_list.n_particles, cell_list.total_cells);
|
|
}
|
|
|
|
void build_neighbor_list(const float4 *positions, const CellList &cell_list,
|
|
NeighborList &neighbor_list, float cutoff_squared,
|
|
const float3 *box_lengths) {
|
|
|
|
// Initialize neighbor counts to 0
|
|
cudaMemset(neighbor_list.neighbor_counts, 0,
|
|
neighbor_list.n_particles * sizeof(int));
|
|
|
|
auto config = get_launch_config(neighbor_list.n_particles);
|
|
|
|
build_neighbor_lists_kernel<<<config.blocks, config.threads>>>(
|
|
positions, cell_list, neighbor_list, cutoff_squared, box_lengths,
|
|
neighbor_list.n_particles);
|
|
}
|
|
|
|
void build_neighbor_list_optimized(const float4 *positions, size_t n_particles,
|
|
const float3 *box_lengths, float cutoff,
|
|
NeighborList &neighbor_list) {
|
|
|
|
// Get box size for cell list construction
|
|
float3 box_size;
|
|
cudaMemcpy(&box_size, box_lengths, sizeof(float3), cudaMemcpyDeviceToHost);
|
|
|
|
// Create cell list
|
|
CellList cell_list(n_particles, box_size, cutoff);
|
|
|
|
// Build cell list
|
|
build_cell_list(positions, cell_list, box_lengths);
|
|
|
|
// Build neighbor list using cell list
|
|
build_neighbor_list(positions, cell_list, neighbor_list, cutoff * cutoff,
|
|
box_lengths);
|
|
}
|