commit c8be709be9bbc589ec6def986c9286f7fe54424b Author: Alex Selimov Date: Sat Feb 17 21:51:18 2024 -0500 INitial commit of CAC code diff --git a/docs/Commands/boundary.md b/docs/Commands/boundary.md new file mode 100644 index 0000000..fdf9133 --- /dev/null +++ b/docs/Commands/boundary.md @@ -0,0 +1,22 @@ +# boundary + +```sh +boundary xyz +``` + +## Inputs + +`xyz` - string describing the boundary conditions in which `x`, `y`, and `z` can have values of either `p` for periodic or `s` for shrink-wrapped. + +## Examples + +```sh +boundary ppp +boundary sps +``` + +## Description + +Boundary sets the boundary conditions of the model being either periodic or shrink-wrapped. +Because boundary data is included within the restart file format, this command is not strictly required. +If included this command will override the boundary conditions defined within the data file. diff --git a/docs/Commands/dump.md b/docs/Commands/dump.md new file mode 100644 index 0000000..43d294f --- /dev/null +++ b/docs/Commands/dump.md @@ -0,0 +1,23 @@ +# dump + +```sh +dump id group style n fname +``` + +## Inputs + +`id` - id for dump command + +`group` - group to be dumped + +`style` - dump style, currently only accepts `out` + +`n` - dump every `n` steps. If `n` is `0` then the dump file is only written at the beginning and end of a minimize or run. + +`fname` - file name to write to. Will strip extensions and replace with the timestep and .out. + +## Example + +``` + +``` diff --git a/docs/Commands/group.md b/docs/Commands/group.md new file mode 100644 index 0000000..c657604 --- /dev/null +++ b/docs/Commands/group.md @@ -0,0 +1,52 @@ +# group + +```sh +group id selection shape ... +``` + +## Inputs + +`id` - assigned id for the group. + +`selection` - either `atoms`, `elements`, or `all`. + +`shape` - either `block`, `sphere`, or `type`. + +**Additional arguments depend on the group shape passed and are listed below:** + +```sh +group id selection block xlo xhi ylo yhi zlo zhi +``` +where `{x,y,z}lo` are the lower bounds along `x,y,z` and `{x,y,z}hi` are the upper bounds along `x,y,z`. + +```sh +group id selection sphere x y z r +``` +where `x y z` define the centroid of the sphere and `r` defines the radius of the sphere. + +```sh +group id selection type n +``` +where `n` is a number which selects the `n`th atom type. + +## Examples +```sh +group tophalf all block -inf inf -inf inf inf*0.5 inf +group precip atoms sphere 100 78 78 10 +group Ni atoms type 2 +``` + +## Description + +The group option is used in conjunction with other commands which require a group input. +**A default `all` group is defined which contains all atoms and elements.** +A subsection of all atoms or elements can be selected using the group command. +`id` is used to refer to the group in other commands. +`selection` determines which element types are selected within the group. +A group can be defined containing only `atoms`, `elements`, or `both`. +If element types are within the bounds of the group but are not specified with `selection` they are not defined as members of the group. + +All positions for the group command can be specified using position specifications described [here](../Misc/position.md). + + + diff --git a/docs/Commands/min_style.md b/docs/Commands/min_style.md new file mode 100644 index 0000000..496c675 --- /dev/null +++ b/docs/Commands/min_style.md @@ -0,0 +1,25 @@ +# min_style + +```sh +min_style minimizer +``` + +## Inputs + +`minimizer` - Either `fire` or `cg`. + +## Examples + +```sh +min_style fire +min_style cg +``` + +## Description + +This code sets the minimizer for future `minimize` commands. +Currently CAC has implemented both conjugate gradient minimization, `cg`, and the fast inertial relaxation engine [1], `fire`. + +## References + +[1] Guénolé, Julien et al. "Assessment and optimization of the fast inertial relaxation engine (fire) for energy minimization in atomistic simulations and its implementation in lammps" Computational Materials Science (2020) diff --git a/docs/Commands/minimize.md b/docs/Commands/minimize.md new file mode 100644 index 0000000..bf52281 --- /dev/null +++ b/docs/Commands/minimize.md @@ -0,0 +1,31 @@ +# minimize + +```sh +minimize ftol etol max_iter +``` + +## Inputs + +`ftol` - force tolerance + +`etol` - energy tolerance + +`max_iter` - maximum number of iterations to calculate before exiting. + +## Examples + +```sh +minimize 10d-10 10d-10 10000 +``` + +## Description + +The minimize command is used to perform energy minimization on the CAC model. +The minimization method is chosen by the [min_style](./min_style.md) command. +Minimization is considered to be complete under the following conditions: + +1. \( \frac{(E_i - E_{i-1})}{E_i} < etol \), where \( i \) is the current iteration. +2. No atom has a force component greater than `ftol` +3. The number of iterations is greater than `max_iter` + + diff --git a/docs/Commands/neighbor.md b/docs/Commands/neighbor.md new file mode 100644 index 0000000..b9752b8 --- /dev/null +++ b/docs/Commands/neighbor.md @@ -0,0 +1,21 @@ +# neighbor + +```sh +neighbor skin +``` + +## Inputs + +`skin` - The skin distance in angstrom added to the cutoff radius for construction of verlet lists. + +## Examples + +```sh +neighbor 1.5 +``` + +## Description + +This command specifies the additional skin distance which is used when constructing the verlet neighbor lists. +When an atom has moved greater than the half of the skin distance, then the neighbor lists are reconstructed. + diff --git a/docs/Commands/potential.md b/docs/Commands/potential.md new file mode 100644 index 0000000..63c3d2d --- /dev/null +++ b/docs/Commands/potential.md @@ -0,0 +1,29 @@ +# potential + +```sh +potential pot_type file elements ... +``` + +## Inputs + +`pot_type` - Potential type, currently accepts either "eam" for setfl formatted eam/alloy potentials, or "fs" for eam finnis sinclair style potentials. +`file` - Potential file, currently only accepts .eam.alloy style potential files + +`elements ...` - List of atomic elements mapping the atom type number to atomic species. Can be excluded resulting in mapping atomic types based on ordering within the potential file. + +## Examples + +```sh +potential CuNi.eam.alloy Ni Cu +potential Cu_mishin1.eam.alloy +``` + +## Description + +This command specifies the potential information. +**Currently only .eam.alloy style potentials are accepted.** +The `elements ...` portion of the code is used to map the numeric atom type to the potential file. +For the first example, `Ni Cu` maps atoms with type 1 to Ni and atoms with type 2 to Cu. +Inputting `Cu Ni` would instead reverse the order and map atoms with type 1 to Cu and vice-versa. +This can be excluded, as in the second example. +If the CuNi.eam.alloy file is ordered with Ni first and Cu second, then `potential CuNi.eam.alloy Ni Cu` is equivalent to `potential CuNi.eam.alloy`. diff --git a/docs/Commands/read_data.md b/docs/Commands/read_data.md new file mode 100644 index 0000000..94a972a --- /dev/null +++ b/docs/Commands/read_data.md @@ -0,0 +1,24 @@ +# read_data + +``` +read_data filename +``` + +## Inputs + +`filename` - Input file to be read in restart format. + +## Examples + +```sh +read_data model_1.restart +``` + +## Description + +Model building capabilities are not included into the CAC simulation tool. +Instead the [CAC model builder (CACmb)](https://gitlab.com/aselimov/cacmb) must be used to create models in the .restart format. +The read_data command reads in one .restart formatted data file and distributes the model across all processors. + + + diff --git a/docs/Commands/run.md b/docs/Commands/run.md new file mode 100644 index 0000000..f510f4a --- /dev/null +++ b/docs/Commands/run.md @@ -0,0 +1,10 @@ +# run +```sh +run ensemble timesteps +``` + +## Inputs + +`ensemble` - currently either `nve` or `qd`. + +`timesteps` - The number of timesteps to run of dynamics diff --git a/docs/Commands/setforce.md b/docs/Commands/setforce.md new file mode 100644 index 0000000..4fae7cc --- /dev/null +++ b/docs/Commands/setforce.md @@ -0,0 +1,9 @@ +# setforce + +``` +setforce group_id fx fy fz +``` + +## Inputs + +`group_id` - id for student diff --git a/docs/Commands/temp.md b/docs/Commands/temp.md new file mode 100644 index 0000000..de673ce --- /dev/null +++ b/docs/Commands/temp.md @@ -0,0 +1,26 @@ +# temp + +```sh +temp command group_id args +``` + +## Inputs + +`command` - Command for temp option, either `create` or `control`. + +`group_id` - Group id to apply temperature control to + +**Additional arguments depend on the command passed and are listed below** + +```sh +temp create group_id T_target +``` + +- `T_target` - Target temperature + +```sh +temp control group_id T_target time_constant +``` + +- `T_target` - Target temperature +- `time_constant` - time constant diff --git a/docs/Commands/thermo.md b/docs/Commands/thermo.md new file mode 100644 index 0000000..da6c308 --- /dev/null +++ b/docs/Commands/thermo.md @@ -0,0 +1,22 @@ +# thermo + +```sh +thermo step +``` + +## Inputs + +`step` - Output every `step` steps. + +## Examples + +``` +thermo 50 +``` + +## Descriptions + +Currently the thermo output is fixed. +For energy minimization, the thermo output contains only the energy and second norm of the global force vector. +For dynamics, the thermo output also contains the kinetic energy and the temperature. + diff --git a/docs/Commands/thermo_style.md b/docs/Commands/thermo_style.md new file mode 100644 index 0000000..256ce89 --- /dev/null +++ b/docs/Commands/thermo_style.md @@ -0,0 +1,32 @@ +# thermo_style + +```sh +thermo_style args +``` + +## Inputs + +`args` - List of thermo_style options. Acceptable options are listed below: + +## Examples + +``` +thermo 50 +``` + +## Descriptions + +The `thermo_style` command sets the thermo output for CAC. +By default the thermostyle command only outputs the potential energy and the global force norm. +Acceptable options for thermo_style are listed below: + +- `pe`: potential energy +- `fnorm`: global force norm +- `temp`: temperature +- `temp_c`: temperature in CG +- `temp_a`: temperature in atoms +- `ke`: kinetic energy +- `ke_c`: kinetic energy in CG +- `ke_a`: kinetic energy in atoms +- `lx`, `ly`, `lz`: Box dimensions in x, y, z respectively +- `px`, `py`, `pz`: Box pressure in x, y, z respectively diff --git a/docs/Misc/position.md b/docs/Misc/position.md new file mode 100644 index 0000000..e69de29 diff --git a/docs/img/CuNi_int.png b/docs/img/CuNi_int.png new file mode 100644 index 0000000..4f86ad5 Binary files /dev/null and b/docs/img/CuNi_int.png differ diff --git a/docs/img/CuNi_xyz.png b/docs/img/CuNi_xyz.png new file mode 100644 index 0000000..32c94cf Binary files /dev/null and b/docs/img/CuNi_xyz.png differ diff --git a/docs/img/cacmb.png b/docs/img/cacmb.png new file mode 100644 index 0000000..b91690c Binary files /dev/null and b/docs/img/cacmb.png differ diff --git a/docs/img/demo.gif b/docs/img/demo.gif new file mode 100644 index 0000000..4f09818 Binary files /dev/null and b/docs/img/demo.gif differ diff --git a/docs/img/demo.svg b/docs/img/demo.svg new file mode 100644 index 0000000..fec63e6 --- /dev/null +++ b/docs/img/demo.svg @@ -0,0 +1 @@ +[I]~cccacmb--mergez2Cu.mbCu.mbbi_Cu.mb-owcacmb***********************CACmb**********************CACmodelbuildingtoolkit**_______**//**//**/______/**_|__|__|_______**//**//**/______/*******************************************************STOPNothingtodo,pleaseruncacmbusinganavailablemodecacmb--createCufcc3.61515duplicate444Cu.mb-----------------------ModeCreate---------------------------Usingmodecreate,172elementsarecreatedand283500atomsarecreated.FileCu.mbalreadyexists.Wouldyouliketooverwrite?(Y/N)yWritingdataouttoCu.mbcacmb--createCufcc3.61515duplicate444Cu.mbcacmb--mergez2Cu.mbCu.mbbi_Cu.mb-owOverwriteflagpassed,outputfileswillbeoverwritten-----------------------ModeMerge---------------------------Readin172elementsand283500atomsfromCu.mbNewboxdimensionsare:-0.90375000000000005215.99625000000000-0.90375000000000005215.99625000000000-0.90375000000000005215.99625000000000432.89625000000001Writingdataouttobi_Cu.mbexitcacmb--mergez2Cu.mbCu.mbbi_Cu.mb-owcacmb--mergez2Cu.mbCu.mbbi_Cu.mb-owcacmb--mergez2Cu.mbCu.mbbi_Cu.mb-owcacmb--mergez2Cu.mbCu.mbbi_Cu.mb-owcacmb--mergez2Cu.mbCu.mbbi_Cu.mb-owcacmbcacmbcacmbcacmbcacmbcacmb--mergez2Cu.mbCu.mbbi_Cu.mb-owcacmb--mergez2Cu.mbCu.mbbi_Cu.mb-owcacmb--mergez2Cu.mbCu.mbbi_Cu.mb-owcacmb--ccacmb--createCufcc3.61515duplicate444Cu.mbcacmb--createCufcc3.61515duplicate444Cu.mbcacmb--createCufcc3.61515duplicate444Cu.mbcacmb--createCufcc3.61515duplicate444Cu.mbcacmb--createCufcc3.61515duplicate444Cu.mbcacmb--createCufcc3.61515duplicate444Cu.mbcacmb--createCufcc3.61515duplicate444Cu.mbcacmb--createCufcc3.61515duplicate444Cu.mbcacmb--createCufcc3.61515duplicate444Cu.mbcacmb--createCufcc3.61515duplicate444Cu.mbcacmb--createCufcc3.61515duplicate444Cu.mbcacmb--createCufcc3.61515duplicate444Cu.mbcacmb--createCufcc3.61515duplicate444Cu.mbcacmb--createCufcc3.61515duplicate444Cu.mbcacmb--createCufcc3.61515duplicate444Cu.mbcacmb--mcacmb--mergez2Cu.mbCu.mbbi_Cu.mb-owcacmb--mergez2Cu.mbCu.mbbi_Cu.mb-owcacmb--mergez2Cu.mbCu.mbbi_Cu.mb-owcacmb--mergez2Cu.mbCu.mbbi_Cu.mb-owcacmb--mergez2Cu.mbCu.mbbi_Cu.mb-owcacmb--mergez2Cu.mbCu.mbbi_Cu.mb-oweexitexitexitexitexitexit \ No newline at end of file diff --git a/docs/img/efilled_vtk.png b/docs/img/efilled_vtk.png new file mode 100644 index 0000000..64edc6d Binary files /dev/null and b/docs/img/efilled_vtk.png differ diff --git a/docs/img/not_efilled_vtk.png b/docs/img/not_efilled_vtk.png new file mode 100644 index 0000000..271fc08 Binary files /dev/null and b/docs/img/not_efilled_vtk.png differ diff --git a/docs/img/rhomb.png b/docs/img/rhomb.png new file mode 100644 index 0000000..0d4d1df Binary files /dev/null and b/docs/img/rhomb.png differ diff --git a/docs/img/simple_example_1.png b/docs/img/simple_example_1.png new file mode 100644 index 0000000..158fea6 Binary files /dev/null and b/docs/img/simple_example_1.png differ diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000..78ea320 --- /dev/null +++ b/docs/index.md @@ -0,0 +1,16 @@ +# CAC: An implementation of the Concurrent Atomistic-Continuum method + +Copyright © 2017-2018 Georgia Institute of Technology. All Rights Reserved. + +CAC is an implementation of the concurrent atomistic-continuum (CAC) method [1] that allows for both dynamics and energy minimization of CAC models. +CAC is written in Fortran 2008. +For a standalone tool used to build and analyze models for simulation with CAC please see the [CAC model builder tool](https://gitlab.com/aselimov/cacmb). + +This user's manual is maintained by [Alex Selimov](https://alexselimov.xyz) and [Kevin Chu](https://kvnchv.github.io/cv/). Both are current Ph.D. students of Prof. David L. McDowell at the Georgia Institute of Technology. + + +Click [here](./intro/getstarted.md) for the getting started guide. + +## References + +[1] Chen, Youping et al. "Concurrent atomistic-continuum modeling of crystalline materials" Journal of Applied Physics (2019) diff --git a/docs/intro/getstarted.md b/docs/intro/getstarted.md new file mode 100644 index 0000000..bad5562 --- /dev/null +++ b/docs/intro/getstarted.md @@ -0,0 +1 @@ +# Getting Started diff --git a/src/Makefile b/src/Makefile new file mode 100644 index 0000000..d5ca8ce --- /dev/null +++ b/src/Makefile @@ -0,0 +1,122 @@ +.DEFAULT_GOAL := all + +FC=mpif90 +OBJDIR=obj +SRCS := $(wildcard *.f90) +OBJECTS := $(addprefix $(OBJDIR)/,$(SRCS:%.f90=%.o)) +FFLAGS=-Wall -mcmodel=large -O2 -g -fbacktrace -fcheck=all -ffpe-trap=invalid,zero,overflow,underflow,denormal -J$(OBJDIR) +# +#FFLAGS=-O3 -g -J$(OBJDIR) + +#----------------- DEPENDENCIES -----------------# +# GENERATED USING https://github.com/ZedThree/fort_depend.py **requires python3** +# > pip install fortdepend +# > fortdepend -o Makefile.dep -i mpi -b obj/ +include Makefile.dep + +#----------------- DEFAULTS -----------------# + +all: CAC + +.PHONY: deps +CAC: $(OBJECTS) $(OBJDIR)/main.o + $(FC) $(FFLAGS) $(OBJECTS) -o $@ + +$(OBJDIR)/%.o: %.f90 + @mkdir -p $(@D) + $(FC) $(FFLAGS) -c -o $@ $< + +.f90.o: + $(FC) $(FFLAGS) -c $< + + +#deps: +# @fortdepend -o Makefile.dep -i mpi -b obj -w + +deps: + @makedepf90 -b obj *.f90 > Makefile.dep +#----------------- TEST RULES -----------------# + +COMMS_TEST_OBJECTS := $(addprefix $(OBJDIR)/,$(COMMS_TEST)) + +MULTICOMM_TEST:=$(COMMS_TEST) +MULTICOMM_TEST_OBJECTS:=$(addprefix $(OBJDIR)/,$(MULTICOMM_TEST)) + +ELEMENTS_TEST=parameters.o elements.o box.o errors.o math.o logger.o +ELEMENTS_TEST_OBJECTS := $(addprefix $(OBJDIR)/,$(ELEMENTS_TEST)) + + +DUMP_TEST=$(POT_TEST) dump.o +DUMP_TEST_OBJECTS := $(addprefix $(OBJDIR)/,$(DUMP_TEST)) + + + +.PHONY: tests +tests: test_cohesive_energy test_gsf test_pair_force test_multi_pair_force test_fs_pair_force test_cg_calc test_mixed_potentials + +test_cohesive_energy: $(OBJECTS) + $(eval TESTOBJECTS := $(filter-out $(OBJDIR)/main.o,$(OBJECTS))) + @cp $(TESTOBJECTS) ./tests/cohesive_energy + @cp $(subst .o,.mod,$(TESTOBJECTS)) ./tests/cohesive_energy + $(MAKE) -C ./tests/cohesive_energy + +test_gsf: $(OBJECTS) + $(eval TESTOBJECTS := $(filter-out $(OBJDIR)/main.o,$(OBJECTS))) + @cp $(TESTOBJECTS) ./tests/gsf + @cp $(subst .o,.mod,$(TESTOBJECTS)) ./tests/gsf + $(MAKE) -C ./tests/gsf + +test_pair_force: $(OBJECTS) + $(eval TESTOBJECTS := $(filter-out $(OBJDIR)/main.o,$(OBJECTS))) + @cp $(TESTOBJECTS) ./tests/pair_force + @cp $(subst .o,.mod,$(TESTOBJECTS)) ./tests/pair_force + $(MAKE) -C ./tests/pair_force + +test_multi_pair_force: $(OBJECTS) + $(eval TESTOBJECTS := $(filter-out $(OBJDIR)/main.o,$(OBJECTS))) + @cp $(TESTOBJECTS) ./tests/multi_pair_force + @cp $(subst .o,.mod,$(TESTOBJECTS)) ./tests/multi_pair_force + $(MAKE) -C ./tests/multi_pair_force + +test_fs_pair_force: $(OBJECTS) + $(eval TESTOBJECTS := $(filter-out $(OBJDIR)/main.o,$(OBJECTS))) + @cp $(TESTOBJECTS) ./tests/fs_pair_force + @cp $(subst .o,.mod,$(TESTOBJECTS)) ./tests/fs_pair_force + $(MAKE) -C ./tests/fs_pair_force + +test_morse_force: $(OBJECTS) + $(eval TESTOBJECTS := $(filter-out $(OBJDIR)/main.o,$(OBJECTS))) + @cp $(TESTOBJECTS) ./tests/morse_force + @cp $(subst .o,.mod,$(TESTOBJECTS)) ./tests/morse_force + $(MAKE) -C ./tests/morse_force + +test_cg_calc: $(OBJECTS) + $(eval TESTOBJECTS := $(filter-out $(OBJDIR)/main.o,$(OBJECTS))) + @cp $(TESTOBJECTS) ./tests/cg_calc + @cp $(subst .o,.mod,$(TESTOBJECTS)) ./tests/cg_calc + $(MAKE) -C ./tests/cg_calc + + +test_mixed_potential: $(OBJECTS) + $(eval TESTOBJECTS := $(filter-out $(OBJDIR)/main.o,$(OBJECTS))) + @cp $(TESTOBJECTS) ./tests/mixed_potentials + @cp $(subst .o,.mod,$(TESTOBJECTS)) ./tests/mixed_potentials + $(MAKE) -C ./tests/mixed_potentials + +#----------------- CLEAN UP -----------------# + +.PHONY: clean +clean: + $(RM) *.mod *.o + $(RM) $(OBJDIR)/*.mod $(OBJDIR)/*.o CAC + @$(RM) -rf obj/ + +.PHONY: cleantest +cleantest: + $(RM) tests/*/*.o tests/*/*.mod tests/*/*_test + +.PHONY: clean-all +clean-all: clean cleantest + +# DEBUGGING VARIABLE PRINT +print-% : ; @echo $* = $($*) diff --git a/src/Makefile.dep b/src/Makefile.dep new file mode 100644 index 0000000..8d96f31 --- /dev/null +++ b/src/Makefile.dep @@ -0,0 +1,39 @@ +obj/atom_types.o : atom_types.f90 obj/str.o obj/logger.o obj/parameters.o +obj/berendsen.o : berendsen.f90 obj/time.o obj/forces.o obj/compute.o obj/parameters.o obj/elements.o +obj/box.o : box.f90 obj/errors.o obj/parameters.o +obj/cg.o : cg.f90 obj/min_arrays.o obj/integration.o obj/potential.o obj/neighbors.o obj/atom_types.o obj/time.o obj/forces.o obj/elements.o obj/comms.o obj/parameters.o +obj/comms.o : comms.f90 obj/math.o obj/errors.o obj/box.o obj/elements.o obj/parameters.o +obj/compute.o : compute.f90 obj/box.o obj/group.o obj/elements.o obj/forces.o obj/atom_types.o obj/comms.o obj/parameters.o +obj/debug.o : debug.f90 obj/str.o obj/elements.o obj/forces.o obj/parameters.o +obj/deform.o : deform.f90 obj/comms.o obj/time.o obj/neighbors.o obj/elements.o obj/box.o obj/parameters.o +obj/displace.o : displace.f90 obj/neighbors.o obj/time.o obj/str.o obj/group.o obj/elements.o obj/parameters.o obj/forces.o +obj/dump.o : dump.f90 obj/time.o obj/group.o obj/math.o obj/forces.o obj/errors.o obj/box.o obj/elements.o obj/comms.o obj/parameters.o obj/atom_types.o +obj/dynamics.o : dynamics.f90 obj/deform.o obj/berendsen.o obj/temp.o obj/time.o obj/thermo.o obj/dump.o obj/potential.o obj/comms.o obj/forces.o obj/quenched_dynamics.o obj/neighbors.o obj/vel_verlet.o obj/parameters.o +obj/eam.o : eam.f90 obj/atom_types.o obj/errors.o obj/comms.o obj/neighbors.o obj/integration.o obj/elements.o obj/forces.o obj/math.o obj/parameters.o +obj/elements.o : elements.f90 obj/math.o obj/box.o obj/errors.o obj/parameters.o +obj/errors.o : errors.f90 obj/logger.o obj/parameters.o +obj/fire.o : fire.f90 obj/debug.o obj/potential.o obj/neighbors.o obj/atom_types.o obj/time.o obj/forces.o obj/elements.o obj/comms.o obj/parameters.o +obj/force_mod.o : force_mod.f90 obj/time.o obj/str.o obj/group.o obj/elements.o obj/parameters.o obj/forces.o +obj/forces.o : forces.f90 obj/integration.o obj/elements.o obj/parameters.o +obj/group.o : group.f90 obj/comms.o obj/str.o obj/box.o obj/elements.o obj/parameters.o +obj/input_parser.o : input_parser.f90 obj/langevin.o obj/modify.o obj/deform.o obj/set.o obj/displace.o obj/force_mod.o obj/minimize.o obj/group.o obj/dump.o obj/logger.o obj/neighbors.o obj/read_data.o obj/eam.o obj/potential.o obj/parameters.o obj/comms.o +obj/integration.o : integration.f90 obj/errors.o obj/elements.o obj/comms.o obj/parameters.o +obj/langevin.o : langevin.f90 obj/group.o obj/atom_types.o obj/time.o obj/forces.o obj/elements.o obj/comms.o obj/parameters.o +obj/logger.o : logger.f90 obj/parameters.o +obj/main.o : main.f90 obj/langevin.o obj/deform.o obj/debug.o obj/potential.o obj/berendsen.o obj/temp.o obj/thermo.o obj/dump.o obj/group.o obj/time.o obj/minimize.o obj/logger.o obj/neighbors.o obj/integration.o obj/input_parser.o obj/parameters.o obj/comms.o +obj/math.o : math.f90 obj/parameters.o +obj/min_arrays.o : min_arrays.f90 obj/errors.o obj/elements.o obj/parameters.o +obj/minimize.o : minimize.f90 obj/debug.o obj/logger.o obj/errors.o obj/thermo.o obj/dump.o obj/potential.o obj/comms.o obj/forces.o obj/neighbors.o obj/cg.o obj/fire.o obj/parameters.o +obj/modify.o : modify.f90 obj/potential.o obj/elements.o obj/neighbors.o obj/parameters.o +obj/morse.o : morse.f90 obj/atom_types.o obj/errors.o obj/comms.o obj/neighbors.o obj/integration.o obj/elements.o obj/forces.o obj/math.o obj/parameters.o +obj/neighbors.o : neighbors.f90 obj/min_arrays.o obj/temp.o obj/group.o obj/elements.o obj/forces.o obj/logger.o obj/comms.o obj/math.o obj/integration.o obj/parameters.o +obj/parameters.o : parameters.f90 +obj/potential.o : potential.f90 obj/force_mod.o obj/atom_types.o obj/str.o obj/comms.o obj/forces.o obj/morse.o obj/eam.o obj/parameters.o +obj/quenched_dynamics.o : quenched_dynamics.f90 obj/vel_verlet.o obj/time.o obj/potential.o obj/neighbors.o obj/elements.o obj/comms.o obj/parameters.o +obj/read_data.o : read_data.f90 obj/time.o obj/str.o obj/logger.o obj/comms.o obj/dynamics.o obj/potential.o obj/integration.o obj/box.o obj/elements.o obj/parameters.o obj/math.o +obj/set.o : set.f90 obj/logger.o obj/group.o obj/elements.o +obj/str.o : str.f90 +obj/temp.o : temp.f90 obj/atom_types.o obj/time.o obj/compute.o obj/group.o obj/elements.o obj/comms.o obj/parameters.o +obj/thermo.o : thermo.f90 obj/str.o obj/compute.o obj/forces.o obj/elements.o +obj/time.o : time.f90 obj/errors.o obj/logger.o obj/parameters.o +obj/vel_verlet.o : vel_verlet.f90 obj/time.o obj/potential.o obj/neighbors.o obj/elements.o obj/comms.o obj/parameters.o diff --git a/src/atom_types.f90 b/src/atom_types.f90 new file mode 100644 index 0000000..8b6ab6f --- /dev/null +++ b/src/atom_types.f90 @@ -0,0 +1,87 @@ +module atom_types + !This module contains the atom type variables + use parameters + use logger + use str + implicit none + + + !Max number of atom types allowed + integer, parameter :: max_atom_types=10 + !number of different potential types, eam all count for the same + integer, parameter :: max_pot_types=2 + !Atom type variables + integer, save :: natom_types + real(kind=wp), dimension(max_atom_types), save :: masses + character(len=2), dimension(max_atom_types), save :: atom_names + !Have to move some potential variables here + integer, save, dimension(max_atom_types, max_atom_types) :: types_to_pot_type + logical, dimension(max_pot_types) :: potential_types + logical :: atom_types_set + + public + contains + + subroutine init_atom_types + !Initialization subroutine + atom_types_set = .false. + types_to_pot_type=0 + end subroutine init_atom_types + + subroutine set_atom_types(num,names) + !This subroutine sets the atom types for use with the potential functions + integer, intent(in) :: num + character(len=2), dimension(:) :: names + integer :: i + + natom_types = num + do i = 1, natom_types + atom_names(i) = names(i) + end do + atom_types_set = .true. + + return + end subroutine set_atom_types + + subroutine parse_types(line) + character(len=*), intent(in) :: line + character(len=read_len) :: tmptxt + integer :: i + + i = tok_count(line) + natom_types = i-1 + read(line,*) tmptxt, (atom_names(i), i = 1, natom_types) + + atom_types_set = .true. + return + end subroutine parse_types + + subroutine parse_mass(line) + character(len=*), intent(in) :: line + character(len=read_len) :: tmptxt + integer :: i + real(kind=wp) :: m + + read(line,*) tmptxt, i, m + + masses(i) = m + + write(tmptxt, *) "Masses are now: ", masses(1:natom_types) + call log_msg(tmptxt) + + return + end subroutine parse_mass + + subroutine log_types + !This command logs the atom_types to the log file + integer :: i + character(len = read_len) :: msg + msg = '' + do i = 1, natom_types + write(msg, *) trim(adjustl(msg)), i, atom_names(i) + end do + + call log_msg("Atom types are mapped as "//msg) + end subroutine log_types +end module atom_types + diff --git a/src/berendsen.f90 b/src/berendsen.f90 new file mode 100644 index 0000000..0aee55f --- /dev/null +++ b/src/berendsen.f90 @@ -0,0 +1,122 @@ +module berendsen + use elements + use parameters + use compute + use forces + use time + + implicit none + + real(kind=wp), save, private:: Ptarget(3), time_constant + logical, save :: pflag, need_barostat(3) + public + contains + + subroutine parse_berendsen(line) + !This subroutine parses the press command + character(len=*), intent(in) :: line + character(len = read_len) :: tmptxt, msg, press_targets(3) + integer :: iospara, i + + read(line, *, iomsg = msg, iostat = iospara) tmptxt, time_constant, (press_targets(i), i = 1, 3) + if(iospara > 0) call read_error(msg, iospara) + + need_barostat=.false. + do i = 1, 3 + if ((trim(adjustl(press_targets(i))) == 'null').or.(trim(adjustl(press_targets(i))) == 'NULL')) then + Ptarget(i)=0 + else + read(press_targets(i), *) Ptarget(i) + need_barostat(i)=.true. + end if + end do + pflag = .true. + + write(msg, *) "Using Berendsen barostat to target pressure ", Ptarget, " for dimensions", need_barostat + call log_msg(msg) + return + end subroutine parse_berendsen + + subroutine rescale_box + !This function actually rescales the box for barostatting. + integer :: i, j + real(kind = wp) :: pressure(3,3), identity(3,3), mu(3,3), cntr(3), cdamp + + !First compute the box pressure + pressure = compute_box_press() + + !Initialize some needed variables + identity(:,:) = 0.0_wp + forall(i=1:3) identity(i,i) = 1.0_wp + mu(:,:) = 0.0_wp + + !Now calculate the scaling factor + cdamp = time_step/time_constant + forall(i=1:3)mu(i,i) = (identity(i,i) - cdamp*(Ptarget(i) - pressure(i,i)))**(1.0_wp/3.0_wp) + !We set the scaling factor to 1 in non-periodic dimensions + do i = 1, 3 + if ((.not.period(i)).or.(.not.need_barostat(i))) mu(i,i) = 1.0_wp + end do + !Now get the box center + cntr = 0.0_wp + do i = 1,3 + cntr(i) = box_length(i)/2 + box_bd(2*i-1) + end do + !Now scale the box boundaries + do i = 1, 6 + j = (i+1)/2 + box_bd(i) = cntr(j) + mu(j,j)*(box_bd(i)-cntr(j)) + end do + + !update box length + do i = 1, 3 + box_length(i) = box_bd(2*i) - box_bd(2*i-1) + end do + + !Now scale the atomic and nodal positions + if(atom_num > 0) then + do i = 1, atom_num_l + do j = 1, 3 + r_atom(j,i) = cntr(j) + mu(j,j)*(r_atom(j,i)-cntr(j)) + end do + end do + end if + if(ele_num > 0) then + do i = 1, node_num_l + do j=1, 3 + r(j,:,i) = cntr(j) + mu(j,j)*(r(j,:,i)-cntr(j)) + end do + end do + end if + + !Now scale the box matrix + box_mat = matmul(box_mat,mu) + + !Now update the processor boundaries + pro_length(:) = 0.0_wp + pro_bd(:) = 0.0_wp + + do i = 1, 3 + !Define processor boundaries + pro_length(i) = box_length(i) / num_pro(i) + pro_bd(2*i-1) = box_bd(2*i-1) + grid_coords(i) * pro_length(i) + pro_bd(2*i) = box_bd(2*i-1) + (grid_coords(i) + 1) * pro_length(i) + + if(grid_coords(i) == 0) then + !If the processor has coordinate 0 it's the bottom one so set the boundary equal to the box boundary + pro_bd(2*i-1) = box_bd(2*i-1) + end if + + if(grid_coords(i) == num_pro(i) - 1) then + !If the processor has coordinate num_pro(i) - 1 it's the top one so set the boundary equal to top boundary + pro_bd(2*i) = box_bd(2*i) + end if + + pro_length(i) = pro_bd(2*i) - pro_bd(2*i-1) + end do + call processor_bds + + return + end subroutine rescale_box + +end module berendsen diff --git a/src/box.f90 b/src/box.f90 new file mode 100644 index 0000000..56ecf98 --- /dev/null +++ b/src/box.f90 @@ -0,0 +1,197 @@ + +module box + !This module contains all information related to simulation box definitions + + use parameters + use errors + + implicit none + + !Box definition variables + real(kind=wp) :: box_bd(6), orig_box_bd(6), box_length(3), orig_box_length(3) + real(kind=wp) :: box_mat_ori(3,3), box_mat(3,3) + !Boundary definitions + character(len=3) :: boundary + logical :: periodic + logical, dimension(3) :: period + + real(kind=wp), dimension(3) :: center_mass, center_mass_ori, center_mass_disp + + public + contains + + subroutine parse_boundary(line) + !This subroutine parses the optional boundary command + character(len=*), intent(in) :: line + integer :: iospara, i + character(len=read_len) :: msg, tmptxt + read(line, *, iostat = iospara, iomsg = msg) tmptxt, boundary + if(iospara > 0) call read_error(msg, iospara) + + do i = 1, 3 + select case(boundary(i:i)) + case('s', 'S') + period(i) = .false. + case('p','P') + period(i) = .true. + case default + call command_error("Boundary must be either p or s") + end select + end do + + if(any(period)) then + periodic = .true. + if(.not.allocated(pb_node))then + allocate(pb_node(3,max_basisnum, node_num_lr)) + end if + else + periodic = .false. + end if + + + end subroutine parse_boundary + + subroutine cross_pb(r, info) + !This subroutine sets which periodic boundaries the nodes have crossed + real(kind = wp), dimension(3), intent(inout) :: r + integer, dimension(3), intent(out) :: info + + integer :: i + + info(:) = 0 + do i = 1, 3 + if(period(i).eqv..true.) then + if(r(i) < box_bd(i*2-1)) then + r(i) = r(i) + box_length(i) + info(i) = 1 + + else if(r(i) > box_bd(i*2)) then + r(i) = r(i) - box_length(i) + info(i) = -1 + + end if + end if + end do + return + end subroutine cross_pb + + subroutine restore_pb(r_in, pb_move, ipb) + + real(kind = wp), dimension(3), intent(inout) :: r_in + integer, dimension(3), intent(in) :: pb_move + logical, optional, intent(out) :: ipb + + integer :: i + + if(present(ipb)) ipb = .false. + do i = 1, 3 + if((period(i).eqv..true.).and. (pb_move(i) /= 0)) then + r_in(i) = r_in(i) - pb_move(i) * box_length(i) + if(present(ipb)) ipb = .true. + end if + end do + + return + end subroutine restore_pb + + function in_box_bd(r) + !this function just checks to make sure position r is within the overall simulation box + real(kind=wp), intent(in) :: r(3) + logical :: in_box_bd + + integer :: i + in_box_bd = .true. + do i = 1,3 + if (r(i) < box_bd(2*i-1)) then + !Lower bound + in_box_bd = .false. + return + else if (r(i) > box_bd(2*i)) then + !Upper bound + in_box_bd = .false. + return + end if + end do + + return + end function in_box_bd + + recursive subroutine parse_pos(i, pos_string, pos) + !This subroutine parses the pos command allowing for command which include inf + integer, intent(in) :: i !The dimension of the position + character(len=*), intent(in) :: pos_string !The position string + real(kind=dp), intent(out) :: pos !The output parsed position value + + integer :: iospara + real(kind=dp) :: rand, rone, rtwo + character(len=100) :: cone, ctwo + + iospara = 0 + + if(trim(adjustl(pos_string)) == 'inf') then + pos=box_bd(2*i) + + else if(trim(adjustl(pos_string)) == '-inf') then + pos=box_bd(2*i-1) + + else if (trim(adjustl(pos_string)) == 'rand') then + call random_number(rand) + pos = (box_bd(2*i)-box_bd(2*i-1))*rand + box_bd(2*i-1) + + else if (index(pos_string,'rand')>0) then + call random_number(rand) + cone = pos_string(index(pos_string, '[')+1:index(pos_string,':')-1) + call parse_pos(i, cone, rone) + ctwo = pos_string(index(pos_string, ':')+1:index(pos_string,']')-1) + call parse_pos(i, ctwo, rtwo) + pos = (rtwo - rone)*rand + rone + + else if ((index(pos_string,'-') > 0).and.(index(pos_string,'inf')>0)) then + !Now extract the number we are reducing from infinity + if(index(pos_string,'inf') < index(pos_string,'-')) then + read(pos_string(index(pos_string,'-')+1:), *, iostat=iospara) pos + else + read(pos_string(1:index(pos_string,'-')-1), *, iostat=iospara) pos + end if + pos = box_bd(2*i) - pos + + else if ((index(pos_string,'+') > 0).and.(index(pos_string,'inf')>0)) then + !Now extract the number we are reducing from infinity + if(index(pos_string,'inf') < index(pos_string,'+')) then + read(pos_string(index(pos_string,'+')+1:), *, iostat=iospara) pos + else + read(pos_string(1:index(pos_string,'+')-1), *, iostat=iospara) pos + end if + pos = box_bd(2*i-1) + pos + + else if ((index(pos_string,'*') > 0).and.(index(pos_string,'inf')>0)) then + !Now extract the number we are reducing from infinity + if(index(pos_string,'inf') < index(pos_string,'*')) then + read(pos_string(index(pos_string,'*')+1:), *, iostat=iospara) pos + else + read(pos_string(1:index(pos_string,'*')-1), *, iostat=iospara) pos + end if + pos = (box_bd(2*i)-box_bd(2*i-1))*pos + box_bd(2*i-1) + + else + read(pos_string, *, iostat=iospara) pos + + end if + + if (iospara > 0) then + print *, "Error reading position argument ", trim(adjustl(pos_string)), ". Please reformat and try again." + end if + end subroutine parse_pos + + subroutine update_box_mat + !This subroutine updates the box matrix + integer :: i + + box_mat = 0.0_wp + do i = 1, 3 + box_mat(i,i) = box_length(i) + end do + + return + end subroutine update_box_mat +end module box diff --git a/src/cg.f90 b/src/cg.f90 new file mode 100644 index 0000000..a694582 --- /dev/null +++ b/src/cg.f90 @@ -0,0 +1,560 @@ +module cg + + use parameters + use comms + use elements + use forces + use time + use atom_types + use neighbors + use potential + use integration + use min_arrays + + implicit none + + integer :: neval, nlimit + real(kind=wp), private, save :: alpha_max, alpha_reduce, backtrack_slope, quadratic_tol, emach, eps_quad, dmax, gg + + + public + contains + + subroutine cg_defaults + alpha_max = 1.0_wp + alpha_reduce = 0.25_wp + backtrack_slope = 0.4_wp + quadratic_tol = 0.1_wp + emach = 1.0d-8 + eps_quad = 1.0d-28 + dmax = 0.1 + nlimit = min(huge(1),atom_num+node_num) + end subroutine cg_defaults + + subroutine cg_init + integer :: ia, ibasis, ip, ie, inod + real(kind = wp) :: ggme + + !Initialize cg variables + call alloc_min_arrays + if(atom_num > 0) then + hatom = force_atom(:,1:atom_num_l) + gatom = hatom + end if + if(ele_num > 0) then + hnode = force_eq(:,:,1:node_num_l) + gnode = hnode + end if + + !Initialize number of evaluations to 0 + neval = 0 + + !Initialize force norm_squared + ggme = 0 + do ia = 1, atom_num_l + ggme = ggme + force_atom(1, ia)*force_atom(1,ia) + force_atom(2,ia)*force_atom(2,ia) & + + force_atom(3,ia)*force_atom(3,ia) + end do + do ie = 1, ele_num_l + if(who_has_ele(ie)) then + do inod = 1, ng_node(etype(ie)) + ip = cg_node(inod,ie) + do ibasis = 1, basis_num(node_cg(ip)) + ggme = ggme + force_eq(1, ibasis,ip)*force_eq(1,ibasis,ip) & + + force_eq(2,ibasis,ip)*force_eq(2,ibasis,ip) & + + force_eq(3,ibasis,ip)*force_eq(3,ibasis,ip) + end do + end do + end if + end do + + call mpi_allreduce(ggme, gg, 1, mpi_wp, mpi_sum, world, ierr) + end subroutine cg_init + + subroutine cg_clean + if(atom_num > 0) deallocate(hatom, gatom, rzeroatom) + if(ele_num > 0) deallocate(hnode, gnode, rzeronode) + end subroutine cg_clean + + subroutine reset_cg_dir + !This simple subroutine just resets the cg search direction + if(atom_num > 0) then + hatom = force_atom(:,1:atom_num_l) + gatom = hatom + end if + if(ele_num > 0) then + hnode = force_eq(:,:,1:node_num_l) + gnode = hnode + end if + end subroutine reset_cg_dir + + subroutine resize_min_array + !This subroutine resizes the min arrays if needed + real(kind=wp), allocatable :: gatom_array(:,:), hatom_array(:,:), gnode_array(:,:,:), hnode_array(:,:,:) + if (atom_num > 0) then + allocate(gatom_array(3, atom_num_l), hatom_array(3, atom_num_l)) + gatom_array=0.0_wp + call move_alloc(gatom_array, gatom) + hatom_array=0.0_wp + call move_alloc(hatom_array, hatom) + + hatom = force_atom(:,1:atom_num_l) + gatom = hatom + end if + + if (ele_num > 0) then + allocate(gnode_array(3, max_basisnum, node_num_l), hnode_array(3, max_basisnum, node_num_l)) + gnode_array=0.0_wp + call move_alloc(gnode_array, gnode) + hnode_array=0.0_wp + call move_alloc(hnode_array, hnode) + + hnode = force_eq(:,:,1:node_num_l) + gnode = hnode + end if + + end subroutine resize_min_array + + + subroutine cg_iterate(code) + integer, intent(out) :: code + + integer :: ie, inod, ibasis, ip, ia + real(kind= wp) :: beta, dotme(2), dotall(2), fdotf, eprevious + + !Line minimization along direction h from current position + eprevious = compute_pe(1) + call linesearch_backtrack(code) + + if(code > 0) return + + !Calculate force dot products + dotme(:) = 0 + do ia = 1, atom_num_l + dotme(1) = dotme(1) + force_atom(1, ia)*force_atom(1,ia) + force_atom(2,ia)*force_atom(2,ia) & + + force_atom(3,ia)*force_atom(3,ia) + + dotme(2) = dotme(2) + force_atom(1, ia)*gatom(1,ia) + force_atom(2,ia)*gatom(2,ia) & + + force_atom(3,ia)*gatom(3,ia) + end do + do ie = 1, ele_num_l + if(who_has_ele(ie)) then + do inod = 1, ng_node(etype(ie)) + ip = cg_node(inod, ie) + do ibasis = 1, basis_num(node_cg(ip)) + dotme(1) = dotme(1) + force_eq(1, ibasis,ip)*force_eq(1,ibasis,ip) & + + force_eq(2,ibasis,ip)*force_eq(2,ibasis,ip) & + + force_eq(3,ibasis,ip)*force_eq(3,ibasis,ip) + + dotme(2) = dotme(2) + force_eq(1, ibasis,ip)*gnode(1,ibasis,ip) & + + force_eq(2,ibasis,ip)*gnode(2,ibasis,ip) & + + force_eq(3,ibasis,ip)*gnode(3,ibasis,ip) + end do + end do + end if + end do + call mpi_allreduce(dotme, dotall, 2, mpi_wp, mpi_sum, world, ierr) + fdotf = dotall(1) + + ! update new search direction h from new f = -Grad(x) and old g + ! this is Polak-Ribieri formulation + ! beta = dotall[0]/gg would be Fletcher-Reeves + ! reinitialize CG every ndof iterations by setting beta = 0.0 + beta = max(0.0_wp,(dotall(1)-dotall(2))/gg) + if( mod((iter+1),nlimit) == 0) beta = 0.0_wp + gg = dotall(1) + + !Update g and h + do ia = 1, atom_num_l + gatom(:,ia) = force_atom(:,ia) + hatom(:,ia) = gatom(:, ia) + beta*hatom(:,ia) + end do + do ip = 1, node_num_l + do ibasis = 1, basis_num(node_cg(ip)) + gnode(:,ibasis,ip) = force_eq(:,ibasis,ip) + hnode(:,ibasis,ip) = gnode(:, ibasis,ip) + beta*hnode(:,ibasis,ip) + end do + end do + + !Reinitialize CG if new search direction is not downhill + dotme(1) = 0.0_wp + do ia = 1, atom_num_l + dotme(1) = dotme(1) + gatom(1,ia)*hatom(1,ia) + gatom(2,ia)*hatom(2,ia) + gatom(3,ia)*hatom(3,ia) + end do + do ie = 1, ele_num_l + if(who_has_ele(ie)) then + do inod = 1, ng_node(etype(ie)) + ip = cg_node(inod, ie) + do ibasis = 1, basis_num(ie) + dotme(1) = dotme(1) + gnode(1,ibasis,ip)*hnode(1,ibasis,ip) + gnode(2,ibasis,ip)*hnode(2,ibasis,ip) & + + gnode(3,ibasis,ip)*hnode(3,ibasis,ip) + end do + end do + end if + end do + call mpi_allreduce(dotme(1), dotall(1), 1, mpi_wp, mpi_sum, world, ierr) + if(dotall(1) <= 0.0) then + if(atom_num > 0) hatom=gatom + if(ele_num > 0) hnode=gnode + end if + + end subroutine cg_iterate + + subroutine linesearch_backtrack(code) + !linemin: backtracking line search (Proc 3.1, p 41 in Nocedal and Wright) + !uses no gradient info, but should be very robust + !start at maxdist, backtrack until energy decrease is sufficient + integer, intent(out) :: code + + integer :: ia, ip, ibasis, inod, ie + real(kind=wp) :: fdothall, fdothme, hme, hmaxall, de_ideal, de, ecurrent, eoriginal, alpha + + !First calculate fdothall which is the projection of search dir along downhill gradient + fdothme = 0.0_wp + do ia = 1, atom_num_l + fdothme = fdothme + force_atom(1,ia)*hatom(1,ia) + force_atom(2,ia)*hatom(2,ia) + force_atom(3, ia)*hatom(3,ia) + end do + do ie = 1, ele_num_l + if(who_has_ele(ie)) then + do inod = 1, ng_node(etype(ie)) + ip = cg_node(inod, ie) + do ibasis = 1, basis_num(ie) + fdothme = fdothme + force_eq(1,ibasis,ip)*hnode(1,ibasis,ip) + force_eq(2,ibasis,ip)*hnode(2,ibasis,ip) & + + force_eq(3,ibasis,ip)*hnode(3,ibasis,ip) + end do + end do + end if + end do + call mpi_allreduce(fdothme, fdothall, 1, mpi_wp, mpi_sum, world, ierr) + !If search direction isn't downhill return with error + if(fdothall <= 0.0_wp) then + code = 4 + return + end if + + ! set alpha so no dof is changed by more than max allowed amount + ! for atom coords, max amount = dmax + ! for extra per-atom dof, max amount = extra_max[] + ! for extra global dof, max amount is set by fix + ! also insure alpha <= ALPHA_MAX + ! else will have to backtrack from huge value when forces are tiny + ! if all search dir components are already 0.0, exit with error + hme = 0.0_wp + do ia = 1, atom_num_l + hme = max(hme, hatom(1,ia), hatom(2,ia), hatom(3,ia)) + end do + do ie = 1, ele_num_l + if(who_has_ele(ie)) then + do inod = 1, ng_node(etype(ie)) + ip = cg_node(inod, ie) + do ibasis = 1, basis_num(node_cg(ip)) + hme = max(hme, hnode(1,ibasis,ip), hnode(2,ibasis,ip), hnode(3,ibasis,ip)) + end do + end do + end if + end do + call mpi_allreduce(hme, hmaxall, 1, mpi_wp, mpi_max, world, ierr) + if(is_equal(hmaxall, 0.0_wp)) then + code = 5 + return + end if + !Set starting alpha + alpha = min(alpha_max, dmax/hmaxall) + + !Save initial coordinates + if(atom_num_l > 0) rzeroatom(:,1:atom_num_l) = r_atom(:,1:atom_num_l) + if(node_num_l > 0) rzeronode(:,:,1:node_num_l) = r(:,:,1:node_num_l) + eoriginal = compute_pe(1) + + !Backtrack until decrease in energy is sufficient + do while(.true.) + !Step forward along h + if (alpha > 0.0_wp) then + do ia = 1, atom_num_l + r_atom(:,ia) = rzeroatom(:,ia) + alpha*hatom(:,ia) + end do + + do ip = 1, node_num_l + do ibasis = 1, basis_num(node_cg(ip)) + r(:,ibasis,ip) = rzeronode(:,ibasis, ip) + alpha*hnode(:,ibasis, ip) + end do + end do + neval = neval + 1 + call update_neighbor(iter, .true.) + call update_force + ecurrent=compute_pe(1) + + end if + + !Quit with success if energy change is better than ideal + de_ideal = -backtrack_slope*alpha*fdothall + de = ecurrent-eoriginal + if(de <= de_ideal) then + code = 0 + return + end if + + !Reduce alpha + alpha = alpha * alpha_reduce + !Backtracked too much, reset to starting point + ! if de is positive exit with error' + ! if de is negative than we have reached energy tol + if((alpha <= lim_zero) .or. (de_ideal >= EMACH)) then + if(atom_num_l > 0) r_atom(:, 1:atom_num_l) = rzeroatom(:, 1:atom_num_l) + if(ele_num_l > 0) r(:,:,1:node_num_l) = rzeronode(:,:,1:node_num_l) + call update_neighbor(iter, .true.) + call update_force + if(de < 0.0) then + code = 1 + return + else + code = 6 + return + end if + end if + end do + end subroutine linesearch_backtrack + + + + subroutine linesearch_quadratic(code) + !---------------------------------------------------------------------- + ! linemin: quadratic line search (adapted from Dennis and Schnabel) + ! The objective function is approximated by a quadratic + ! function in alpha, for sufficiently small alpha. + ! This idea is the same as that used in the well-known secant + ! method. However, since the change in the objective function + ! (difference of two finite numbers) is not known as accurately + ! as the gradient (which is close to zero), all the expressions + ! are written in terms of gradients. In this way, we can converge + ! the LAMMPS forces much closer to zero. + ! + ! We know E,Eprev,fh,fhprev. The Taylor series about alpha_prev + ! truncated at the quadratic term is: + ! + ! E = Eprev - del_alpha*fhprev + (1/2)del_alpha^2*Hprev + ! + ! and + ! + ! fh = fhprev - del_alpha*Hprev + ! + ! where del_alpha = alpha-alpha_prev + ! + ! We solve these two equations for Hprev and E=Esolve, giving: + ! + ! Esolve = Eprev - del_alpha*(f+fprev)/2 + ! + ! We define relerr to be: + ! + ! relerr = |(Esolve-E)/Eprev| + ! = |1.0 - (0.5*del_alpha*(f+fprev)+E)/Eprev| + ! + ! If this is accurate to within a reasonable tolerance, then + ! we go ahead and use a secant step to fh = 0: + ! + ! alpha0 = alpha - (alpha-alphaprev)*fh/delfh; + ! + !------------------------------------------------------------------------- + integer, intent(out) :: code + + integer :: ia, ip, ibasis, ie, inod + real(kind=wp) :: fdothall, fdothme, hme, hmaxall, de_ideal, de, ecurrent, eoriginal, alpha, alphaprev, fhprev, & + engprev, dotme(2), dotall(2), fh, ff, delfh, relerr, alpha0, alphamax + + !First calculate fdothall which is the projection of search dir along downhill gradient + fdothme = 0.0_wp + do ia = 1, atom_num_l + fdothme = fdothme + force_atom(1,ia)*hatom(1,ia) + force_atom(2,ia)*hatom(2,ia) + force_atom(3, ia)*hatom(3,ia) + end do + do ie = 1, ele_num_l + if(who_has_ele(ie)) then + do inod = 1, ng_node(etype(ie)) + ip = cg_node(inod, ie) + do ibasis = 1, basis_num(ie) + fdothme = fdothme + force_eq(1,ibasis,ip)*hnode(1,ibasis,ip) & + + force_eq(2,ibasis,ip)*hnode(2,ibasis,ip) & + + force_eq(3,ibasis,ip)*hnode(3,ibasis,ip) + end do + end do + end if + end do + call mpi_allreduce(fdothme, fdothall, 1, mpi_wp, mpi_sum, world, ierr) + !If search direction isn't downhill return with error + if(fdothall <= 0.0_wp) then + code = 4 + return + end if + + + ! set alpha so no dof is changed by more than max allowed amount + ! for atom coords, max amount = dmax + ! for extra per-atom dof, max amount = extra_max[] + ! for extra global dof, max amount is set by fix + ! also insure alpha <= ALPHA_MAX + ! else will have to backtrack from huge value when forces are tiny + ! if all search dir components are already 0.0, exit with error + hme = 0.0_wp + do ia = 1, atom_num_l + hme = max(hme, hatom(1,ia), hatom(2,ia), hatom(3,ia)) + end do + do ie = 1, ele_num_l + if(who_has_ele(ie)) then + do inod = 1, ng_node(etype(ie)) + ip = cg_node(inod, ie) + do ibasis = 1, basis_num(ie) + hme = max(hme, hnode(1,ibasis,ip), hnode(2,ibasis,ip), hnode(3,ibasis,ip)) + end do + end do + end if + end do + call mpi_allreduce(hme, hmaxall, 1, mpi_wp, mpi_max, world, ierr) + if(is_equal(hmaxall, 0.0_wp)) then + code = 5 + return + end if + !Get alphamax + alphamax = min(alpha_max, dmax/hmaxall) + + !Save initial coordinates + if(atom_num_l > 0) rzeroatom(:,1:atom_num_l) = r_atom(:,1:atom_num_l) + if(node_num_l > 0) rzeronode(:,:,1:node_num_l) = r(:,:,1:node_num_l) + eoriginal = compute_pe(1) + alpha = alphamax + fhprev = fdothall + engprev = eoriginal + alphaprev = 0.0_wp + + !Backtrack until decrease in energy is sufficient + do while(.true.) + + + !Step forward along h + if (alpha > 0.0_wp) then + do ia = 1, atom_num_l + r_atom(:,ia) = rzeroatom(:,ia) + alpha*hatom(:,ia) + end do + + do ip = 1, node_num_l + do ibasis = 1, basis_num(node_cg(ip)) + r(:,ibasis,ip) = rzeronode(:,ibasis, ip) + alpha*hnode(:,ibasis, ip) + end do + end do + neval = neval + 1 + call update_neighbor(iter, .true.) + call update_force + ecurrent=compute_pe(1) + end if + + ! Compute new fh, alpha, delfh + dotme(:) = 0 + do ia = 1, atom_num_l + dotme(1) = dotme(1) + force_atom(1, ia)*force_atom(1,ia) + force_atom(2,ia)*force_atom(2,ia) & + + force_atom(3,ia)*force_atom(3,ia) + + dotme(2) = dotme(2) + force_atom(1, ia)*hatom(1,ia) + force_atom(2,ia)*hatom(2,ia) & + + force_atom(3,ia)*hatom(3,ia) + end do + do ie = 1, ele_num_l + if(who_has_ele(ie)) then + do inod = 1, ng_node(etype(ie)) + ip = cg_node(inod, ie) + do ibasis = 1, basis_num(ie) + dotme(1) = dotme(1) + force_eq(1, ibasis,ip)*force_eq(1,ibasis,ip) & + + force_eq(2,ibasis,ip)*force_eq(2,ibasis,ip) & + + force_eq(3,ibasis,ip)*force_eq(3,ibasis,ip) + + dotme(2) = dotme(2) + force_eq(1, ibasis,ip)*hnode(1,ibasis,ip) & + + force_eq(2,ibasis,ip)*hnode(2,ibasis,ip) & + + force_eq(3,ibasis,ip)*hnode(3,ibasis,ip) + end do + end do + end if + end do + call mpi_allreduce(dotme, dotall, 2, mpi_wp, mpi_sum, world, ierr) + + ff = dotall(1) + fh = dotall(2) + delfh = fh-fhprev + + !If fh or delfh is epsilon, reset to starting point and exit with error + if((abs(fh) < eps_quad) .or. (abs(delfh) < eps_quad)) then + if(atom_num_l > 0) r_atom(:, 1:atom_num_l) = rzeroatom(:, 1:atom_num_l) + if(node_num_l > 0) r(:,:,1:node_num_l) = rzeronode(:, :, 1:node_num_l) + call update_neighbor(iter, .true.) + call update_force + + code = 11 + return + end if + + !!Check if ready for quadratic projection, quivalent to secant method + !alpha0 = projected alpha + relerr = abs(1.0_wp-(0.5_wp*(alpha-alphaprev)*(fh+fhprev)+ecurrent)/engprev) + alpha0 = alpha - (alpha-alphaprev)*fh/delfh + if((relerr <= quadratic_tol).and.(alpha0 > 0.0).and.(alpha0= EMACH)) then + if(atom_num_l > 0) r_atom(:, 1:atom_num_l) = rzeroatom + if(ele_num_l > 0) r(:,:,1:node_num_l) = rzeronode + + call update_neighbor(iter, .true.) + call update_force + + if(de < 0.0) then + code = 1 + return + else + code = 6 + return + end if + end if + end do + end subroutine linesearch_quadratic + + pure function get_cg_neval() + integer :: get_cg_neval + + get_cg_neval = neval + return + end function get_cg_neval + +end module cg diff --git a/src/comms.f90 b/src/comms.f90 new file mode 100644 index 0000000..124bfa3 --- /dev/null +++ b/src/comms.f90 @@ -0,0 +1,3392 @@ +module comms + !This is code in charge of most interprocessor communications + + use mpi + use parameters + use elements + use box + use errors + use math + + implicit none + + !Communicator parameters + integer, save :: grank, pro_num, grid_comm, mpi_wp, & + nreplica, ireplica, world, universe, uroots_comm, & + urank, upro_num + + character(len=100) :: rank_string + + !Processor arrays + integer :: & + num_pro(3), & + grid_coords(3), & + list_atom_num(2, 3), & + list_atomap_num(2, 3) + + integer, allocatable, save :: & + list_atom(:, :, :), list_atomap(:, :, :) + + real(kind = wp), save :: & + pro_bd(6), & + pro_bd_pre(6), & + pro_bd_out(6), & + pro_bd_in(6), & + pro_bd_kj(6, 2, 3), & + pro_length(3), & + pro_length_out(3) + + logical, save :: & + pro_gather(3), pro_newton(2), & + pro_atomap(2), & + list_atom_logic(2, 3), & + list_atomap_logic(2, 3) + + + !Scatter arrays for atoms + integer, allocatable, save :: tag_atom_scatter(:), type_atom_scatter(:) + real(kind=wp), allocatable, save :: r_atom_scatter(:,:), vel_atom_scatter(:,:) + !Scatter arrays for elements + integer, allocatable, save :: tag_ele_scatter(:), size_ele_scatter(:), etype_scatter(:), & + basis_num_scatter(:), basis_type_scatter(:,:) + real(kind=wp), allocatable, save :: r_scatter(:,:,:), vel_scatter(:,:,:) + + !arrays for sharing elements + integer :: ele_shared_num + integer, allocatable :: pro_shared_num(:), ele_id_shared(:) + logical, allocatable :: who_has_ele(:) + + public + contains + subroutine processor_array + + implicit none + + allocate( & + list_atom(seg_num, 2, 3), & + stat = allostat & + ) + + return + end subroutine processor_array + + subroutine scatter_cg_array + !Allocate scatter arrays for cg + if(allocated(tag_ele_scatter)) then + deallocate(tag_ele_scatter, r_scatter, vel_scatter, size_ele_scatter, etype_scatter, & + basis_num_scatter, basis_type_scatter) + end if + allocate( & + r_scatter(3, max_basisnum, node_num), & + vel_scatter(3, max_basisnum, node_num), & + tag_ele_scatter(ele_num), & + size_ele_scatter(ele_num), & + etype_scatter(ele_num), & + basis_num_scatter(ele_num), & + basis_type_scatter(max_basisnum, ele_num), & + stat = allostat & + ) + + end subroutine scatter_cg_array + + subroutine dealloc_scatter_cg + !Deallocate scatter arrays for cg + deallocate( r_scatter, vel_scatter, tag_ele_scatter, size_ele_scatter, etype_scatter, basis_num_scatter, & + basis_type_scatter, stat = allostat ) + end subroutine dealloc_scatter_cg + + subroutine scatter_at_array + !Allocate scatter arrays for at + if(allocated(tag_atom_scatter)) then + deallocate(tag_atom_scatter, type_atom_scatter, r_atom_scatter, vel_atom_scatter) + end if + allocate( & + tag_atom_scatter(atom_num), type_atom_scatter(atom_num), & + r_atom_scatter(3,atom_num), vel_atom_scatter(3,atom_num), & + stat = allostat & + ) + end subroutine scatter_at_array + + subroutine dealloc_scatter_at + !Deallocate scatter arrays for at + deallocate( & + tag_atom_scatter, type_atom_scatter, & + r_atom_scatter, vel_atom_scatter, & + stat = allostat & + ) + end subroutine dealloc_scatter_at + + subroutine precision_init + !This function defines the mpi precison to be used + + select case(wp) + case(-1, -2, -3) + if(rank == root) then + print *, 'Error: The processor does not support the desired', & + ' real data type precision' + call mpi_abort(universe, 1, ierr) + end if + + case(selected_real_kind(15, 307)) + mpi_wp = mpi_double_precision + + case(selected_real_kind(33, 4931)) + mpi_wp = mpi_real16 + + case default + if(rank == root) then + print *, 'Error: Wp', wp, ' is not accepted' + call mpi_abort(universe, 1, ierr) + end if + end select + + return + end subroutine precision_init + + subroutine comm_init + !This is a simple subroutine which initializes the communicator information + + call mpi_init(ierr) + universe=mpi_comm_world + world=universe + call mpi_comm_rank(mpi_comm_world, urank, ierr) + write(rank_string, *) "urank ", urank !rank_string is just to make it easier to pass the rank to error subroutines + call mpi_comm_size(mpi_comm_world, upro_num, ierr) + call precision_init + + universe = mpi_comm_world + world = universe + uroots_comm = universe + pro_num = upro_num + rank = urank + + end subroutine + + subroutine multicomm_init + ! initializes a multicomm communicator setup with nreplica sub-worlds + + ! check to make sure we can actually split the communicator + if ( mod(upro_num, nreplica) /= 0 ) then + print *, 'Error: Cannot evenly divide ', upro_num, ' processors into', & + nreplica, ' replica images.' + call mpi_abort(universe, 1, ierr) + endif + + ! split comm world, make a universe communicator and save parameters + ireplica = mod(urank, nreplica) + call mpi_comm_split(universe, ireplica, urank, world, ierr) + call mpi_comm_rank(world, rank, ierr) + call mpi_comm_size(world, pro_num, ierr) + + ! create a communicator with all the world roots for easy bcasting + call mpi_comm_split(universe, rank, urank, uroots_comm, ierr) + + end subroutine + + subroutine divide_domain + !This subroutine divides the simulation domain among processors. It does rectilinear domain decomposition, + !but attempts to max the calculation points + + + integer :: i, k, ix, iy, iz, pro_num_try, i_this + logical :: ireo + integer, dimension(3) :: num_index + integer, allocatable :: & + num_pro_try(:, :), & + num_pro_try_array(:, :) + real(kind = wp), allocatable :: & + area_try(:) + + if(pro_num == 1) then + !If only one processor then the whole domain is assigned to the root processor + num_pro(:) = 1 + grid_coords(:) = 0 + grank = 0 + do i = 1, 3 + pro_length(i) = box_length(i) + pro_bd(2*i-1) = box_bd(2*i-1) + pro_bd(2*i) = box_bd(2*i) + end do + else + !Otherwise we have to split the domain + + pro_num_try = pro_num + allocate(num_pro_try(pro_num_try, 3), stat = allostat) + if(allostat /= 0) call alloc_error(rank_string//" failed to allocate num_pro_try in divide domain",allostat) + + num_pro_try(:, :) = 0 + k = 0 + do ix = 1, pro_num + if(mod(pro_num, ix) == 0) then + do iy = 1, pro_num / ix + if(mod(pro_num, ix * iy) == 0) then + iz = pro_num / (ix * iy) + k = k + 1 + + if(k > pro_num_try) then + allocate( & + num_pro_try_array(pro_num_try+20, 3), & + stat = allostat & + ) + + if(allostat /= 0) call alloc_error(rank_string// & + " failed to allocate num_pro_try_array in divide_domain", & + allostat) + + num_pro_try_array(:, :) = 0 + num_pro_try_array(1:pro_num_try, :) = num_pro_try(:, :) + call move_alloc(num_pro_try_array, num_pro_try) + pro_num_try = pro_num_try + 20 + end if + + num_pro_try(k, :) = [ ix, iy, iz ] + end if + end do + end if + end do + + !compare all possible set of num_pro_try + allocate(area_try(k),stat = allostat) + if(allostat /= 0) call alloc_error(rank_string // " failed to allocate area_try in divide_domain ", allostat) + + area_try(:) = 0.0_wp + + do i = 1, k + + num_index(:) = num_pro_try(i, :) + + if(period(1).eqv..true.) then + area_try(i) = area_try(i) + (num_index(1) + 1) & + * box_length(2) * box_length(3) + else + area_try(i) = area_try(i) + (num_index(1) - 1) & + * box_length(2) * box_length(3) + + end if + + if(period(2).eqv..true.) then + area_try(i) = area_try(i) + (num_index(2) + 1) & + * box_length(3) * box_length(1) + + else + area_try(i) = area_try(i) + (num_index(2) - 1) & + * box_length(3) * box_length(1) + + end if + + if(period(3).eqv..true.) then + area_try(i) = area_try(i) + (num_index(3) + 1) & + * box_length(1) * box_length(2) + + else + area_try(i) = area_try(i) + (num_index(3) - 1) & + * box_length(1) * box_length(2) + + end if + + end do + + i_this = minloc(area_try, 1) + + num_pro(:) = num_pro_try(i_this, :) + + !Check to make sure everything has been setup correctly + if(rank == root) then + !Check to make sure the number of domains equals the total number of processors + if(pro_num /= product(num_pro)) then + print *, 'Error: Wrong num_pro array, the product', & + product(num_pro), ' does not equal', pro_num + call mpi_abort(universe, 1, ierr) + end if + end if + + !create a cartesian grid + ireo = .false. + call mpi_cart_create(world, 3, num_pro, period, ireo, grid_comm, ierr) + call mpi_comm_rank(grid_comm, grank, ierr) + call mpi_cart_coords(grid_comm, grank, 3, grid_coords, ierr) + end if + + + pro_length(:) = 0.0_wp + pro_bd(:) = 0.0_wp + + do i = 1, 3 + !Define processor boundaries + pro_length(i) = box_length(i) / num_pro(i) + pro_bd(2*i-1) = box_bd(2*i-1) + grid_coords(i) * pro_length(i) + pro_bd(2*i) = box_bd(2*i-1) + (grid_coords(i) + 1) * pro_length(i) + + if(grid_coords(i) == 0) then + !If the processor has coordinate 0 it's the bottom one so set the boundary equal to the box boundary + pro_bd(2*i-1) = box_bd(2*i-1) + end if + + if(grid_coords(i) == num_pro(i) - 1) then + !If the processor has coordinate num_pro(i) - 1 it's the top one so set the boundary equal to top boundary + pro_bd(2*i) = box_bd(2*i) + end if + + pro_length(i) = pro_bd(2*i) - pro_bd(2*i-1) + end do + + return + end subroutine divide_domain + + subroutine processor_bds + !This subroutine defines processor boundaries + integer :: i, j, k + + do i = 1, 3 + !Calculate the pro_length in each dimension and make sure it's larger than 2*rc_neigh + pro_length(i) = box_length(i) / num_pro(i) + if(pro_length(i) < 2.0_wp * rc_neigh) then + print *, 'Error: pro_length along the', i, ' direction', & + pro_length(i), ' of rank', rank, ' is smaller than 2*rc_neigh:', & + 2.0_wp * rc_neigh + + call mpi_abort(universe, 1, ierr) + end if + end do + + !define inner boundaries + !Inner boundaries are used to check if atoms are within cutoff radius of processor + !boundary requiring creation of ghost atoms. + + pro_bd_in(:) = pro_bd(:) + do i = 1, 3 + pro_bd_in(2*i-1) = pro_bd(2*i-1) + rc_neigh + pro_bd_in(2*i) = pro_bd(2*i) - rc_neigh + end do + + !define outer boundaries + pro_bd_out(:) = pro_bd(:) + do i = 1, 3 + pro_bd_out(2*i-1) = pro_bd(2*i-1) - rc_neigh + pro_bd_out(2*i) = pro_bd(2*i) + rc_neigh + if(period(i).eqv..false.) then + + if(grid_coords(i) == 0) then + pro_bd_out(2*i-1) = box_bd(2*i-1) + end if + + if(grid_coords(i) == num_pro(i) - 1) then + pro_bd_out(2*i) = box_bd(2*i) + end if + end if + + pro_length_out(i) = pro_bd_out(2*i) - pro_bd_out(2*i-1) + + end do + + !Now get processor kj boundaries. These are the inner and outer boundaries for every step in the process of sharing ghost + !atoms/elemenmts + do j = 1 ,3 + do k = 1, 2 + pro_bd_kj(:, k, j) = pro_bd(:) + + if(k == 1) then + pro_bd_kj(2*j-1, k, j) = pro_bd_in(2*j) + + else + pro_bd_kj(2*j, k, j) = pro_bd_in(2*j-1) + end if + if(j == 2) then + pro_bd_kj(1:2, k, j) = pro_bd_out(1:2) + + else if(j == 3) then + pro_bd_kj(1:4, k, j) = pro_bd_out(1:4) + end if + end do + end do + return + end subroutine processor_bds + + subroutine update_proc_bd(opt1) + !This subroutine updates processor boundaries if needed + logical, intent(in), optional :: opt1 + + integer :: i, ip, ibasis + real(kind = wp) :: max_posme(3), max_posall(3), min_posme(3), min_posall(3) + logical :: resize_box, resize_all + + if(present(opt1)) then + resize_all=opt1 + else + resize_all = .true. + end if + + max_posme(:) = 0 + min_posme(:) = 0 + resize_box = .false. + + if(resize_all) then + do i = 1, 3 + !Only need to resize if shrink wrapped in that boundary + if(.not.period(i)) then + !Get atomic max and min + if(atom_num_l > 0) then + max_posme(i) = maxval(r_atom(i, 1:atom_num_l)) + min_posme(i) = minval(r_atom(i, 1:atom_num_l)) + end if + + !Now get nodal max and min + do ip = 1, node_num_l + do ibasis = 1, basis_num(node_cg(ip)) + max_posme(i) = max(max_posme(i), r(i, ibasis, ip)) + min_posme(i) = min(min_posme(i), r(i, ibasis, ip)) + end do + end do + end if + end do + + call mpi_allreduce(max_posme, max_posall, 3, mpi_wp, mpi_max, world, ierr) + call mpi_allreduce(min_posme, min_posall, 3, mpi_wp, mpi_min, world, ierr) + + do i = 1, 3 + if (.not.period(i)) then + if (max_posall(i) > box_bd(2*i)) then + box_bd(2*i) = max_posall(i)+10D-4 + resize_box = .true. + end if + if (min_posall(i) < box_bd(2*i-1)) then + box_bd(2*i-1) = min_posall(i)-10D-4 + resize_box = .true. + end if + end if + box_length(i) = box_bd(2*i) - box_bd(2*i-1) + end do + + + else + !If not then we only check to resize the outer boundary + do i = 1, 3 + !Only need to resize if shrink wrapped in that boundary + if(.not.period(i)) then + !Get atomic max and min + if(atom_num_l > 0) then + max_posme(i) = maxval(r_atom(i, 1:atom_num_l)) + min_posme(i) = minval(r_atom(i, 1:atom_num_l)) + end if + + !Now get nodal max and min + do ip = 1, node_num_l + do ibasis = 1, basis_num(node_cg(ip)) + max_posme(i) = max(max_posme(i), r(i, ibasis, ip)) + min_posme(i) = min(min_posme(i), r(i, ibasis, ip)) + end do + end do + end if + end do + + call mpi_allreduce(max_posme, max_posall, 3, mpi_wp, mpi_max, world, ierr) + call mpi_allreduce(min_posme, min_posall, 3, mpi_wp, mpi_min, world, ierr) + + !Now check to see if the max and min are greater than the box boundaries for shrink wrapped directions + do i = 1, 3 + if(.not.period(i)) then + if(min_posall(i) < box_bd(2*i-1)) then + box_bd(2*i-1) = min_posall(i) - zerotol + !If the processor has coordinate 0 it's the bottom one so set the boundary equal to the box boundary + if(grid_coords(i) == 0) then + pro_bd(2*i-1) = box_bd(2*i-1) + end if + end if + + if(max_posall(i) > box_bd(2*i)) then + box_bd(2*i) = max_posall(i) + zerotol + if(grid_coords(i) == num_pro(i) - 1) then + !If the processor has coordinate num_pro(i) - 1 it's the top one so set the boundary equal to top boundary + pro_bd(2*i) = box_bd(2*i) + end if + end if + end if + box_length(i) = box_bd(2*i) - box_bd(2*i-1) + end do + end if + + !If we change the box size then resize processor boundaries + if(resize_box) then + pro_length(:) = 0.0_wp + pro_bd(:) = 0.0_wp + + do i = 1, 3 + !Define processor boundaries + pro_length(i) = box_length(i) / num_pro(i) + pro_bd(2*i-1) = box_bd(2*i-1) + grid_coords(i) * pro_length(i) + pro_bd(2*i) = box_bd(2*i-1) + (grid_coords(i) + 1) * pro_length(i) + + if(grid_coords(i) == 0) then + !If the processor has coordinate 0 it's the bottom one so set the boundary equal to the box boundary + pro_bd(2*i-1) = box_bd(2*i-1) + end if + + if(grid_coords(i) == num_pro(i) - 1) then + !If the processor has coordinate num_pro(i) - 1 it's the top one so set the boundary equal to top boundary + pro_bd(2*i) = box_bd(2*i) + end if + + pro_length(i) = pro_bd(2*i) - pro_bd(2*i-1) + end do + call processor_bds + end if + return + end subroutine update_proc_bd + + subroutine scatter_cg + !Scatter coarse-grained elements to the different processors + integer :: i, j, k, ie, je, le, jp, ip, inod, lp, iatomap, jatomap, latomap, & + seg_real, iatom, irank, jrank, & + ibasis, etype_buff, basis_num_buff, size_ele_buff, & + ele_atomap_num, tag_ele_buff + + integer, dimension(3) :: info + integer, dimension(max_basisnum) :: basis_type_buff + integer, dimension(3,max_basisnum,ng_max_node) :: pb_node_buff + integer, dimension(atomap_max) :: tag_atomap_buff, type_atomap_buff, & + iatom_array, jatom_array + + !Add integer variables for setting up interpolation + integer :: max_esize, unique_esizes(100), unique_enum, send_array(2) + + integer, dimension(pro_num) :: irank_array + integer, dimension(3, max_basisnum, ng_max_node) :: pb_in + + real(kind = wp), dimension(3, max_basisnum) :: r_interp + real(kind=wp), dimension(3, max_basisnum, ng_max_node) :: r_nodes, vel_nodes + real(kind = wp), dimension(3,atomap_max) :: r_atomap_buff + + logical(kind = wp), dimension(atomap_max) :: need_ele + logical(kind = wp), dimension(pro_num) :: logic_array + + !Variables for communication + integer :: send_ni, send_nr + integer, dimension(4+max_basisnum+(1+3*max_basisnum)*ng_max_node+atomap_max) :: send_int + real(kind=wp), dimension(2*3*max_basisnum*ng_max_node + 3*atomap_max) :: send_real + + integer, allocatable :: & + who_ele(:), who_ele_all(:), who_element(:), who_element_all(:), who_ele_long(:), who_ele_long_all(:), & + ele_id_buff(:) + + + + !First setup the atom interpolation + max_esize = 0 + unique_enum = 0 + if (rank == root) then + eleloop:do i = 1, ele_num + !Figure out max esize + if(size_ele_scatter(i) > max_esize) max_esize = size_ele_scatter(i) + do j = 1, unique_enum + if (unique_esizes(j) == size_ele_scatter(i)) cycle eleloop + end do + unique_enum = unique_enum + 1 + unique_esizes(unique_enum) = size_ele_scatter(i) + end do eleloop + send_array(1) = unique_enum + send_array(2) = max_esize + call mpi_bcast(send_array, 2, mpi_integer, root, world, ierr) + call mpi_bcast(unique_esizes(1:unique_enum), unique_enum, mpi_integer, root, & + world, ierr) + else + call mpi_bcast(send_array, 2, mpi_integer, root, world, ierr) + unique_enum = send_array(1) + max_esize = send_array(2) + call mpi_bcast(unique_esizes(1:unique_enum), unique_enum, mpi_integer, root, & + world, ierr) + end if + + !Now setup the interpolation arrays + call setup_interpolation(max_esize, unique_enum, unique_esizes(1:unique_enum)) + + !Allocate some variables + allocate(who_ele(ele_num), who_ele_long(pro_num*ele_num), stat=allostat) + if (allostat > 0) call alloc_error("Failure allocating who_ele in scatter_cg", allostat) + who_ele(:) = 0 + who_ele_long(:) = 0 + + !If we only have one processor the code is simple + if (pro_num == 1) then + + le=0 + lp=0 + latomap=0 + + do ie = 1, ele_num + + le = le + 1 + ele_glob_id(ie) = ie + tag_ele(ie) = tag_ele_scatter(ie) + size_ele(ie) = size_ele_scatter(ie) + etype(ie) = etype_scatter(ie) + basis_num(ie) = basis_num_scatter(ie) + basis_type(:,ie) = basis_type_scatter(:,ie) + who_ele(ie) = who_ele(ie) + 1 + who_ele_long(pro_num*(ie-1)+rank+1) = 1 + + do inod = 1, ng_node(etype(ie)) + + lp = lp + 1 + cg_node(inod, ie) = lp + node_cg(lp) = ie + + do ibasis = 1, basis_num(ie) + + if(periodic) then + call cross_pb(r_scatter(:,ibasis,lp), info) + pb_node(:, ibasis, lp) = info(:) + pb_in(:, ibasis, inod) = info(:) + end if + r(:, ibasis, lp) = r_scatter(:,ibasis,lp) + r_nodes(:,ibasis,inod) = r(:,ibasis, lp) + if(need_vel) then + vel(:, ibasis, lp) = vel_scatter(:, ibasis, lp) + end if + + end do + + end do + + do iatom = 1, (size_ele(ie)+1)**3 + !Interpolate atoms + call interp_atom(iatom, size_ele(ie), etype(ie), pb_in, basis_num(ie), r_nodes, r_interp) + + do ibasis = 1, basis_num(ie) + !Assign interpolated atoms to arrays + latomap = latomap + 1 + tag_atomap(latomap) = latomap + type_atomap(latomap) = basis_type(ibasis,ie) + cg_atomap(basis_num(ie)*(iatom-1) + ibasis, ie) = latomap + r_atomap(:, latomap) = r_interp(:,ibasis) + end do + end do + + end do + + if(le /= ele_num) then + print *, 'Error: Le', le, ' should equal', ele_num + call mpi_abort(universe, 1, ierr) + + else if(lp /= node_num) then + print *, 'Error: Lp', lp, ' should equal', node_num + call mpi_abort(universe, 1, ierr) + + else if(latomap /= atomap_num) then + print *, 'Error: Latomap', latomap, ' should equal', atomap_num + call mpi_abort(universe, 1, ierr) + end if + + else + !If we have more than one processor than we have to run more complex code + ip = 0 + lp = 0 + le = 0 + iatomap = 0 + jatomap = 0 + latomap = 0 + + !Initialize send counts for integer data and real data + if (periodic) then + send_ni = 4+max_basisnum+(1+3*max_basisnum)*ng_max_node+atomap_max + else + send_ni = 4+max_basisnum+ng_max_node+atomap_max + end if + + if(need_vel) then + send_nr = 2*3*max_basisnum*ng_max_node + 3*atomap_max + else + send_nr = 3*max_basisnum*ng_max_node+3*atomap_max + end if + + do ie = 1, ele_num + + tag_atomap_buff(:) = 0 + type_atomap_buff(:) = 0 + basis_type_buff(:) = 0 + pb_node_buff(:,:,:) = 0 + r_nodes(:,:,:) = 0.0_wp + vel_nodes(:,:,:) = 0.0_wp + r_atomap_buff(:,:) = 0.0_wp + + if(rank == root) then + + pb_in(:,:,:) = 0 + do inod = 1, ng_node(etype_scatter(ie)) + ip = ip + 1 + + do ibasis = 1, basis_num_scatter(ie) + !Now get all of the correct nodal positions + if(periodic.eqv..true.) then + call cross_pb(r_scatter(:,ibasis,ip), info) + pb_in(:, ibasis, inod) = info + end if + + !r_nodes will be used later interpolation + r_nodes(:,ibasis,inod) = r_scatter(:,ibasis,ip) + !vel nodes will be sent + vel_nodes(:,ibasis,inod) = vel_scatter(:, ibasis, ip) + + end do + end do + + do iatom = 1, (size_ele_scatter(ie)+1)**3 + !Now interpolate all of the atoms + !NOTE in this case iatom is the number of lattice points + call interp_atom(iatom, size_ele_scatter(ie), etype_scatter(ie), pb_in, basis_num_scatter(ie), & + r_nodes, r_interp) + + do ibasis = 1, basis_num_scatter(ie) + iatomap = iatomap + 1 + tag_atomap_buff((iatom-1)*basis_num_scatter(ie) + ibasis) = iatomap + do i = 1,3 + r_atomap_buff(i,(iatom-1)*basis_num_scatter(ie) + ibasis) = r_interp(i,ibasis) + end do + end do + end do + + if (need_vel) then + !Now pack the element data + call pack_ele_atomap(etype_scatter(ie),tag_ele_scatter(ie),size_ele_scatter(ie), basis_num_scatter(ie),& + basis_type_scatter(:,ie), pb_in, r_nodes, tag_atomap_buff, & + r_atomap_buff, send_int, send_real, vel_nodes) + else + call pack_ele_atomap(etype_scatter(ie),tag_ele_scatter(ie),size_ele_scatter(ie), basis_num_scatter(ie),& + basis_type_scatter(:,ie), pb_in, r_nodes, tag_atomap_buff, & + r_atomap_buff, send_int, send_real) + end if + end if + + + !Send integer data + call mpi_bcast(send_int(1:send_ni), send_ni, mpi_integer, root, world, ierr) + !Send real data + call mpi_bcast(send_real(1:send_nr), send_nr, mpi_wp, root, world, ierr) + + + !Now unpack the data into the buff arrays + if (need_vel) then + call unpack_ele_atomap(send_int, send_real, etype_buff, tag_ele_buff, size_ele_buff, basis_num_buff,& + basis_type_buff, pb_node_buff, r_nodes, tag_atomap_buff, & + type_atomap_buff, r_atomap_buff, vel_nodes) + else + call unpack_ele_atomap(send_int, send_real, etype_buff, tag_ele_buff, size_ele_buff, basis_num_buff,& + basis_type_buff, pb_node_buff, r_nodes, tag_atomap_buff, & + type_atomap_buff, r_atomap_buff) + end if + + need_ele(:) = .false. + iatom_array(:) = 0 + + !Loops through all interpolated atoms in the element and tag which ones belong to use + ele_atomap_num = basis_num_buff*(size_ele_buff+1)**3 + do iatom = 1, ele_atomap_num + !NOTE: In this case iatom is each individual interpolated atom + if(in_block_bd(r_atomap_buff(:,iatom), pro_bd)) then + !If it's in the processor boundaries than mark it + need_ele(iatom) = .true. + iatom_array(iatom) = 1 + end if + end do + + !Check to make sure all interpolated atoms have been grabbed + seg_real = count(need_ele) + + call mpi_reduce(seg_real, i, 1, mpi_integer, & + mpi_sum, root, world, ierr) + jatom_array(:) = 0 + call mpi_reduce(iatom_array(1:ele_atomap_num), jatom_array(1:ele_atomap_num), ele_atomap_num, mpi_integer, & + mpi_sum, root, world, ierr) + + if(rank == root) then + do iatom = 1, ele_atomap_num + if(jatom_array(iatom) /= 1) then + print *, 'Error: Jatom_array', jatom_array(iatom), & + ' for iatom', iatom, ' should be 1 with position ', r_atomap_buff(:,iatom) + call mpi_abort(universe, 1, ierr) + end if + end do + + if(i /= (ele_atomap_num)) then + print *, 'Error: Total number of atomaps', i, ' of element', & + ie, ' should equal', ele_atomap_num + call mpi_abort(universe, 1, ierr) + end if + end if + +! If the current element has atoms belonging to the processor than save information +! regarding those atoms + if(any(need_ele).eqv..true.) then + le = le + 1 + + !Grow cg arrays if needed + if(le > ele_num_lr) call grow_cg_arrays(1) + + lp = lp + ng_node(etype_buff) + + if(lp > node_num_lr) call grow_cg_arrays(2, max(seg_num, seg_real)) + + latomap = latomap + seg_real + + if(latomap > atomap_num_lr) call grow_cg_arrays(3, max(seg_num, seg_real)) + + + !Save the element + ele_glob_id(le) = ie + tag_ele(le) = tag_ele_buff + size_ele(le) = size_ele_buff + etype(le) = etype_buff + basis_num(le) = basis_num_buff + basis_type(:,le) = basis_type_buff(:) + + !Mark that we need it + who_ele(ie) = who_ele(ie) + 1 + who_ele_long(pro_num*(ie-1)+rank+1) = 1 + + do inod = 1, ng_node(etype(le)) + !Save the nodes of the elements + jp = lp - ng_node(etype(le)) + inod + node_cg(jp) = le + cg_node(inod, le) = jp + + do ibasis = 1, basis_num(le) + do i = 1, 3 + + r(i, ibasis, jp) = r_nodes(i, ibasis, inod) + + if(periodic) then + pb_node(i, ibasis, jp) = pb_node_buff(i,ibasis,inod) + end if + + if(need_vel) then + vel(i, ibasis, jp) = vel_nodes(i, ibasis, inod) + end if + + end do + end do + + end do + + !Runs through all atoms in the element, if the atom belongs to the current processor save + !the atom number to tag_atomap and the position to r_atomap, + + do iatom = 1, ele_atomap_num + if(need_ele(iatom).eqv..true.) then + + !Save the atom information + jatomap = jatomap + 1 + tag_atomap(jatomap) = tag_atomap_buff(iatom) + type_atomap(jatomap) = type_atomap_buff(iatom) + cg_atomap(iatom, le) = jatomap + r_atomap(:, jatomap) = r_atomap_buff(:,iatom) + + end if + end do + end if + end do + +! debug + + if(jatomap /= latomap) then + print *, 'Error: Total number of atomaps', jatomap, ' of rank', & + rank, ' should equal latomap', latomap + call mpi_abort(universe, 1, ierr) + end if + + !Total element count + call mpi_reduce(le, i, 1, mpi_integer, mpi_sum, root, world, ierr) + + !Total node count + call mpi_reduce(lp, j, 1, mpi_integer, mpi_sum, root, world, ierr) + + !total atomap count + call mpi_reduce(latomap, k, 1, mpi_integer, mpi_sum, root, world, ierr) + + if(rank == root) then + + if(i < ele_num) then + print *, 'Error: Total number of elements', i, ' should not be', & + ' smaller than ele_num', ele_num + call mpi_abort(universe, 1, ierr) + + else if(j < node_num) then + print *, 'Error: Total number of nodes', j, ' should not be', & + ' smaller than node_num', node_num + call mpi_abort(universe, 1, ierr) + + else if(k /= atomap_num) then + + print *, 'Error: Total number of atomaps', k, ' should equal', & + ' atomap_num', atomap_num + call mpi_abort(universe, 1, ierr) + end if + + end if + end if + + ele_num_l = le + node_num_l = lp + atomap_num_l = latomap + + !debug who_ele + + if(sum(who_ele) /= ele_num_l) then + print *, 'Error: sum(who_ele)', sum(who_ele), & + ' should equal', ele_num_l + call mpi_abort(universe, 1, ierr) + + else if(maxval(who_ele) > 1) then + print *, 'Error: Some elements are accounted for', & + ' more than once by rank', rank + call mpi_abort(universe, 1, ierr) + end if + + !debug who_ele_all + + allocate( who_ele_all(ele_num), stat = allostat) + if(allostat /= 0) call alloc_error("Failure to allocate who_ele_all in scatter_cg",allostat) + + who_ele_all(:) = 0 + + if(pro_num == 1) then + who_ele_all(:) = who_ele(:) + + else + call mpi_allreduce(who_ele, who_ele_all, ele_num, mpi_integer, mpi_sum, world, ierr) + + end if + + if(rank == root) then + if(sum(who_ele_all) < ele_num) then + print *, 'Error: sum(who_ele_all)', sum(who_ele_all), & + ' should not be smaller than', ele_num + call mpi_abort(universe, 1, ierr) + + end if + + end if + + !who_ele_long_all + allocate( who_ele_long_all(pro_num*ele_num), stat = allostat ) + if(allostat>0) call alloc_error('Failure to allocate who_ele_long_all', allostat) + + who_ele_long_all(:) = 0 + + if(pro_num == 1) then + who_ele_long_all(:) = who_ele_long(:) + + else + call mpi_allreduce(who_ele_long, who_ele_long_all, pro_num*ele_num, & + mpi_integer, mpi_sum, world, ierr) + end if + + !ele_shared_num and tag_ele_buff + + allocate( ele_id_buff(ele_num), stat = allostat ) + if(allostat /= 0) call alloc_error("Failure to allocate tag_ele_buff in scatter_cg", allostat) + + !Figure out which elements are shared + ele_id_buff(:) = 0 + je = 0 + do ie = 1, ele_num + if(who_ele_all(ie) > 1) then + je = je + 1 + ele_id_buff(ie) = je + end if + end do + ele_shared_num = je + + !ele_id_shared is an array which marks which elements are shared and among + !how many other processors. + + allocate(ele_id_shared(ele_num_l), pro_shared_num(ele_num_l), stat = allostat) + if(allostat /= 0) call alloc_error("Failure to allocate ele_id_shared in scatter_cg", allostat) + + ele_id_shared(:) = 0 + pro_shared_num(:) = 0 + + do ie = 1, ele_num_l + le = ele_glob_id(ie) + je = ele_id_buff(le) + if(je /= 0) then + ele_id_shared(ie) = je + end if + end do + + !who_has_ele distributes the elements. who_has_ele(ie) = true for the processor which is + !in charge of calculating element number ie. + allocate( who_has_ele(ele_num_l), who_element(ele_num), who_element_all(ele_num), stat = allostat) + if(allostat>0) call alloc_error("Failure allocating who_element arrays in scatter_cg", allostat) + + who_has_ele(:) = .false. + who_element(:) = 0 + who_element_all(:) = 0 + irank_array(:) = [ (irank, irank = 1, pro_num) ] + + do ie = 1, ele_num_l + + le = ele_glob_id(ie) + logic_array(:) = .false. + + do irank = 1, pro_num + if(who_ele_long_all(pro_num*(le-1)+irank) == 1) then + logic_array(irank) = .true. + end if + end do + + pro_shared_num(ie) = count(logic_array) + + if((pro_shared_num(ie) > 1).and.(ele_id_shared(ie) == 0)) then + print *, 'Error: When more than one processors share element', le, & + ' ele_id_shared(ie) should not be zero' + call mpi_abort(universe, 1, ierr) + end if + + jrank = maxloc(irank_array, 1, logic_array) + if(rank == jrank - 1) then + who_has_ele(ie) = .true. + who_element(le) = 1 + end if + + end do + + !Get who_element_all + if(pro_num == 1) then + who_element_all(:) = who_element(:) + else + call mpi_reduce(who_element, who_element_all, ele_num, mpi_integer, mpi_sum, root, world, ierr) + end if + + if(rank == root) then + do ie = 1, ele_num + + if(who_element_all(ie) > 1) then + print *, 'Error: Element', ie, ' is included', & + ' in more than one,', who_element_all(ie), ' processors' + call mpi_abort(universe, 1, ierr) + + else if(who_element_all(ie) < 0) then + print *, 'Error: Element', ie, ' has a negative', & + ' who_element_all,', who_element_all(ie) + call mpi_abort(universe, 1, ierr) + + else if(who_element_all(ie) == 0) then + print *, 'Error: Element', ie, ' does not', & + ' belong to any processor' + call mpi_abort(universe, 1, ierr) + + end if + end do + end if + + if(pro_num == 1) then + if(all(who_has_ele).eqv..false.) then + print *, 'Error: All(who_has_ele) should be true', & + ' when there is only one processor' + call mpi_abort(universe, 1, ierr) + end if + end if + + return + + end subroutine scatter_cg + + + subroutine scatter_at + !This subroutine scatters atomistic regions + + integer :: i, ia, ja, ka, la, seg_real, iatom, send_ni, send_nr, sent_nums, packet_num + integer, dimension(3) :: info + integer, dimension(seg_num) :: tag_buff, type_buff, iatom_array, jatom_array + real(kind = wp), dimension(3,seg_num) :: r_buff, vel_buff + integer, dimension(2*seg_num) :: send_int + real(kind=wp), dimension(2*3*seg_num) :: send_real + + + atom_num_lr = atom_num_l + + if(pro_num == 1) then + !Code is only run on one processor (no MPI needed) + type_atom(1:atom_num) = type_atom_scatter(:) + + do ia = 1, atom_num + + tag_atom(ia) = tag_atom_scatter(ia) + + if(periodic.eqv..true.) call cross_pb(r_atom_scatter(:,ia), info) + r_atom(:, ia) = r_atom_scatter(:, ia) + + end do + + if(need_vel) then + vel_atom(:, 1:atom_num) = vel_atom_scatter(:, :) + end if + + atom_num_l = atom_num + + else + !Code is run on multiple processors, MPI needed. + + ka = 0 + la = 0 + ia = 0 + tag_buff(:) = 0 + type_buff(:) = 0 + r_buff(:,:) = 0.0_wp + vel_buff(:,:) = 0.0_wp + atom_num_l = 0 + + send_ni = 2*seg_num + send_nr = 2*3*seg_num + send_int=0 + send_real=0 + sent_nums = 0 + + do while(sent_nums < atom_num) + + if(rank == root) then + + ia = ia +1 + ka = ka + 1 + tag_buff(ka) = tag_atom_scatter(ia) + type_buff(ka) = type_atom_scatter(ia) + + + r_buff(:,ka) = r_atom_scatter(:,ia) + if(periodic.eqv..true.) call cross_pb(r_buff(:,ka), info) + if(need_vel) vel_buff(:,ka) = vel_atom_scatter(:, ia) + + if(ka == seg_num) then + seg_real = seg_num + + else if(ia == atom_num) then + seg_real = mod(ia, seg_num) + + else + cycle + end if + + !Pack atom arrays + if (need_vel) then + call pack_atoms(seg_num, tag_buff, type_buff, r_buff, send_int, send_real, vel_buff) + else + call pack_atoms(seg_num, tag_buff, type_buff, r_buff, send_int, send_real) + end if + + end if + + !Broadcast send arrays + call mpi_bcast(send_int, send_ni, mpi_integer, root, world, ierr) + call mpi_bcast(send_real, send_nr, mpi_wp, root, world, ierr) + + if(need_vel) then + call unpack_atoms(seg_num, send_int, send_real, tag_buff, type_buff, r_buff, vel_buff) + else + call unpack_atoms(seg_num, send_int, send_real, tag_buff, type_buff, r_buff) + end if + + packet_num = 0 + iatom_array(:) = 0 + ja = 0 + do ka = 1, seg_num + + sent_nums = sent_nums + 1 + + !First check to make sure the atom type is real + if(sent_nums>atom_num) then + exit + else if(type_buff(ka) == 0) then + print *, "Atom_type cannot be 0 in scatter_at for atom number ", sent_nums-seg_num+ka + call mpi_abort(1, universe, ierr) + end if + + packet_num = packet_num + 1 + !checks to see if the atom is within the processor boundaries + + if(in_block_bd(r_buff(:,ka), pro_bd)) then + + ja = ja + 1 + la = la + 1 + if(la > atom_num_lr) call grow_at_arrays + + !Pull out values of properties belonging to atoms in the current processor + tag_atom(la) = tag_buff(ka) + type_atom(la) = type_buff(ka) + r_atom(:, la) = r_buff(:,ka) + if(need_vel) vel_atom(:, la) = vel_buff(:,ka) + + !Iatom array just makes sure everything is only assigned once + iatom_array(ka) = 1 + + atom_num_l = atom_num_l + 1 + end if + end do + + !Check to make sure everything was sent out correctly + call mpi_reduce(ja, i, 1, mpi_integer, mpi_sum, root, world, ierr) + call mpi_reduce(iatom_array(1:packet_num), jatom_array(1:packet_num), packet_num, mpi_integer, mpi_sum, root,& + world, ierr) + + if(rank == root) then + do iatom = 1, packet_num + if (jatom_array(iatom) /= 1) then + print *, box_bd, r_buff(:,iatom) + print *, "Error: Jatom array should be 1 not ", jatom_array(iatom), " for iatom ", iatom + call mpi_abort(universe, 1, ierr) + end if + end do + if (i/=packet_num) then + print *, packet_num, " atoms were sent but ", i, " atoms were claimed by the processors." + call mpi_abort(1, universe, ierr) + end if + + end if + + + if(rank == root) ka = 0 + + end do + + end if + + + !Now make sure everything has been properly distributed + if(pro_num == 1) then + i = atom_num_l + else + call mpi_reduce(atom_num_l, i, 1, mpi_integer, mpi_sum, root, world, ierr) + + end if + + if(rank == root) then + if(i /= atom_num) then + print *, 'Error: Total number of atoms', i, ' should equal', & + ' atom_num', atom_num + call mpi_abort(universe, 1, ierr) + end if + + end if + + return + end subroutine scatter_at + + subroutine ghost_cg + !Share ghost interpolated atoms from elements + + integer :: i, j, k, send_n, recv_n, iatomap, jatomap, latomap, & + seg_num_real, itag_t, itag_r, itag_n, itag_ty, & + ireq_t, ireq_r, ireq_n, ireq_ty, & + send_rank, recv_rank, send_ori, & + atomap_num_s, atomap_num_r, accept_count, accept_count_r + + logical :: send_l, recv_l + + integer, dimension(3) :: send_coords, recv_coords + + integer, dimension(mpi_status_size) :: mstatus + + real(kind = wp), dimension(3) :: r_in + + integer, allocatable :: & + dir_atomap(:), dir_atomap_array(:), & + tag_send_buff_array(:), tag_send_buff_ori(:), & + tag_send_buff(:), tag_recv_buff(:), & + tag_atomap_array(:), tag_send_array(:), tag_recv_array(:), & + type_send_buff_array(:), type_send_buff_ori(:), & + type_send_buff(:), type_recv_buff(:), & + type_atomap_array(:), type_send_array(:), type_recv_array(:), & + list_atomap_array(:, :, :), pos_send_buff_ori(:), & + pos_send_buff(:), pos_send_array(:), accept_send(:), accept_recv(:), & + accept_array(:) + + real(kind = wp), allocatable :: & + r_send_buff_array(:), r_send_buff_ori(:), & + r_send_buff(:), r_recv_buff(:), & + r_atomap_array(:, :), r_send_array(:), r_recv_array(:) + + !Set seg_num_real which is basically array growth size + seg_num_real = seg_num + + !Allocate original send_buff arrays + allocate(tag_send_buff_ori(seg_num_real), & + type_send_buff_ori(seg_num_real),& + pos_send_buff_ori(seg_num_real),& + r_send_buff_ori(3*seg_num_real), & + stat = allostat) + if(allostat /= 0) call alloc_error("Failure to allocate tag/r_send_buff_ori",allostat) + + !Initialize some variales + send_n = 0 + tag_send_buff_ori(:) = 0 + pos_send_buff_ori(:) = 0 + type_send_buff_ori(:) = 0 + r_send_buff_ori(:) = 0.0_wp + + !Figure out which atomaps need to be sent + do iatomap = 1, atomap_num_l + r_in(:) = r_atomap(:, iatomap) + + !Check position of the atomap to see if it needs to be sent + if(.not.in_block_bd(r_in, pro_bd_in)) then + send_n = send_n + 1 + + !Grow arrays if needed + if(send_n > seg_num_real) then + allocate( tag_send_buff_array(seg_num_real+seg_num), & + pos_send_array(seg_num_real + seg_num), & + type_send_buff_array(seg_num_real+seg_num), & + r_send_buff_array(3*(seg_num_real+seg_num)), & + stat = allostat ) + if(allostat /= 0) call alloc_error("Failure to allocate tag/r_send_buff_array", allostat) + + tag_send_buff_array(1:seg_num_real) = tag_send_buff_ori(:) + tag_send_buff_array(seg_num_real+1:) = 0 + call move_alloc(tag_send_buff_array, tag_send_buff_ori) + + pos_send_array(1:seg_num_real) = pos_send_buff_ori(:) + pos_send_array(seg_num_real+1:) = 0 + call move_alloc(pos_send_array, pos_send_buff_ori) + + type_send_buff_array(1:seg_num_real) = type_send_buff_ori(:) + type_send_buff_array(seg_num_real+1:) = 0 + call move_alloc(type_send_buff_array, type_send_buff_ori) + + r_send_buff_array(1:3*seg_num_real) = r_send_buff_ori(:) + r_send_buff_array(3*seg_num_real+1:) = 0.0_wp + call move_alloc(r_send_buff_array, r_send_buff_ori) + seg_num_real = seg_num_real + seg_num + end if + + !Build send_buff_ori + tag_send_buff_ori(send_n) = tag_atomap(iatomap) + pos_send_buff_ori(send_n) = iatomap + type_send_buff_ori(send_n) = type_atomap(iatomap) + do i = 1, 3 + r_send_buff_ori(3*(send_n-1)+i) = r_in(i) + end do + end if + end do + + allocate(tag_send_buff(send_n), type_send_buff(send_n), r_send_buff(3*send_n), pos_send_buff(send_n), stat = allostat) + if (allostat /= 0) call alloc_error("Failure to allocate tag/r_send_buff", allostat) + + !Initialize allocated buffs and recv_n + recv_n = 0 + tag_send_buff(:) = 0 + type_send_buff(:) = 0 + r_send_buff(:) = 0.0_wp + + !Set recv_num + if(pro_num == 1) then + recv_n = send_n + else + call mpi_allreduce(send_n, recv_n, 1, mpi_integer, mpi_max, grid_comm, ierr) + end if + + !Allocate arrays for receiving data + allocate(tag_recv_buff(recv_n), type_recv_buff(recv_n), r_recv_buff(3*recv_n), stat = allostat) + if(allostat /= 0) call alloc_error("Failure to allocate tag/r_recv_buff", allostat) + + !Also allocate arrays for communicating which atomaps were received + allocate(accept_send(recv_n), accept_recv(recv_n), stat=allostat) + if(allostat /= 0) call alloc_error("Failure to allocate accept arrays", allostat) + + !Allocate receive buff + tag_recv_buff(:) = 0 + type_recv_buff(:) = 0 + r_recv_buff(:) = 0.0_wp + accept_send(:) = 0 + accept_recv(:) = 0 + + !Initialize tags used for communications + itag_r = 1 + itag_n = 2 + itag_t = 3 + itag_ty = 4 + ireq_r = 0 + ireq_n = 0 + ireq_t = 0 + ireq_ty = 0 + + !Initialize some more variables + send_rank = grank + recv_rank = grank + list_atomap_logic(:, :) = .true. + + !Allocate variables and initialize + allocate(dir_atomap(atomap_num_lr), stat = allostat) + if(allostat /= 0) call alloc_error("Failure to allocate dir_atomap", allostat) + + !dir_atomap is the direction to send each atomap + dir_atomap(:) = 4 + + !Initialize the atomap counts where atomap_num_s is the send_count and atomap_num_r is the recv count + jatomap = atomap_num_l + atomap_num_s = send_n + send_ori = send_n + atomap_num_r = recv_n + + !For these loops, j is the box direction to communicate along and k is to direct either positive or negative comm. + !The ghost approach here is described in https://doi.org/10.1016/j.commatsci.2017.11.051 + + do j = 1, 3 + do k = 1, 2 + + send_coords(:) = grid_coords(:) + recv_coords(:) = grid_coords(:) + send_l = .true. + recv_l = .true. + + if(num_pro(j) == 1) then + + !If we have one processor in the dimension then we don't do a normal send. + send_l = .false. + !Only send if periodic in that dimension + if(period(j).eqv..true.) then + recv_l = .true. + tag_recv_buff(1:send_ori) = tag_send_buff_ori(1:send_ori) + type_recv_buff(1:send_ori) = type_send_buff_ori(1:send_ori) + r_recv_buff(1:3*send_ori) = r_send_buff_ori(1:3*send_ori) + pos_send_buff(1:send_ori) = pos_send_buff_ori(1:send_ori) + latomap = send_ori + + !Now loop over all the ghost atoms + do iatomap = atomap_num_l+1, jatomap + !If the ghost was received in the previous communication step add it to the send_Buff + if(dir_atomap(iatomap) < j) then + latomap = latomap + 1 + !Resize arrays if needed + if(latomap > atomap_num_r) then + + allocate(tag_recv_array(atomap_num_r+seg_num), & + type_recv_array(atomap_num_r+seg_num), & + r_recv_array(3*(atomap_num_r+seg_num)), & + stat = allostat) + + if(allostat /= 0) call alloc_error("Failure to allocate tag/r_recv_array", allostat) + + tag_recv_array(:) = 0 + tag_recv_array(1:atomap_num_r) = tag_recv_buff(:) + call move_alloc(tag_recv_array, tag_recv_buff) + + type_recv_array(:) = 0 + type_recv_array(1:atomap_num_r) = type_recv_buff(:) + call move_alloc(type_recv_array, type_recv_buff) + + r_recv_array(:) = 0.0_wp + r_recv_array(1:3*atomap_num_r) = r_recv_buff(:) + call move_alloc(r_recv_array, r_recv_buff) + atomap_num_r = atomap_num_r + seg_num + + end if + + !Save the send atoms to the recieved array + tag_recv_buff(latomap) = tag_atomap(iatomap) + type_recv_buff(latomap) = type_atomap(iatomap) + do i = 1, 3 + r_recv_buff(3*(latomap-1)+i) = r_atomap(i, iatomap) + end do + + !Add values to pos_send_buff + if(latomap > size(pos_send_buff)) then + allocate(pos_send_array(size(pos_send_buff)+ seg_num)) + pos_send_array(1:size(pos_send_buff)) = pos_send_buff + pos_send_array(size(pos_send_buff)+1:) = 0 + call move_alloc(pos_send_array, pos_send_buff) + end if + pos_send_buff(latomap) = iatomap + end if + end do + + recv_n = latomap + !Apply periodic boundaries + do iatomap = 1, recv_n + r_recv_buff(3*(iatomap-1)+j) = r_recv_buff(3*(iatomap-1)+j) + (-1)**k * box_length(j) + end do + + else + !Otherwise + recv_l = .false. + recv_n = 0 + end if + + + !If we have more than one processor in the dimension + else + + send_coords(j) = grid_coords(j) - (-1)**k + recv_coords(j) = grid_coords(j) + (-1)**k + !If periodic then we send to the next processor in every situation + if(period(j).eqv..true.) then + call mpi_cart_rank(grid_comm, send_coords, send_rank, ierr) + call mpi_cart_rank(grid_comm, recv_coords, recv_rank, ierr) + !Otherwise the first and last ones along each dimension have special rules as they don't send to one processro + !and don't receive from one processor at a specific step + else + if(k == 1) then + !Don't send if it's the last one and k = 1 + if(grid_coords(j) < num_pro(j) - 1) then + call mpi_cart_rank(grid_comm, send_coords, send_rank, ierr) + else + send_l = .false. + send_rank = -1 + end if + + !Don't receive if first one and k = 1 + if(grid_coords(j) > 0) then + call mpi_cart_rank(grid_comm, recv_coords, recv_rank, ierr) + else + recv_l = .false. + recv_rank = -1 + end if + + else if(k == 2) then + !Don't send if first one and k = 2 + if(grid_coords(j) > 0) then + call mpi_cart_rank(grid_comm, send_coords, send_rank, ierr) + else + send_l = .false. + send_rank = -1 + end if + + !Don't receive if last one and k = 2 + if(grid_coords(j) < num_pro(j) - 1) then + call mpi_cart_rank(grid_comm, recv_coords, recv_rank, ierr) + else + recv_l = .false. + recv_rank = -1 + end if + end if + end if + + !prepare send_buff + if(send_l.eqv..true.) then + + tag_send_buff(1:send_ori) = tag_send_buff_ori(1:send_ori) + pos_send_buff(1:send_ori) = pos_send_buff_ori(1:send_ori) + type_send_buff(1:send_ori) = type_send_buff_ori(1:send_ori) + r_send_buff(1:3*send_ori) = r_send_buff_ori(1:3*send_ori) + latomap = send_ori + + !Loop over all received ghost atoms + do iatomap = atomap_num_l+1, jatomap + + !if we previously received the ghosts than we add them to the send_buff + if(dir_atomap(iatomap) < j) then + + latomap = latomap + 1 + !Grow arrays if eneded + if(latomap > atomap_num_s) then + + allocate( & + tag_send_array(atomap_num_s+seg_num), & + type_send_array(atomap_num_s+seg_num), & + r_send_array(3*(atomap_num_s+seg_num)), & + stat = allostat & + ) + + if(allostat /= 0) call alloc_error("Failure to allocate tag/r_send_array", allostat) + + tag_send_array(:) = 0 + tag_send_array(1:atomap_num_s) = tag_send_buff(:) + call move_alloc(tag_send_array, tag_send_buff) + + type_send_array(:) = 0 + type_send_array(1:atomap_num_s) = type_send_buff(:) + call move_alloc(type_send_array, type_send_buff) + + r_send_array(:) = 0.0_wp + r_send_array(1:3*atomap_num_s) = r_send_buff(:) + call move_alloc(r_send_array, r_send_buff) + + atomap_num_s = atomap_num_s + seg_num + + end if + + tag_send_buff(latomap) = tag_atomap(iatomap) + type_send_buff(latomap) = type_atomap(iatomap) + do i = 1, 3 + r_send_buff(3*(latomap-1)+i) = r_atomap(i, iatomap) + end do + + if(latomap > size(pos_send_buff)) then + + allocate(pos_send_array(size(pos_send_buff)+seg_num)) + pos_send_array(1:size(pos_send_buff)) = pos_send_buff + pos_send_array(size(pos_send_buff)+1:) = 0 + call move_alloc(pos_send_array, pos_send_buff) + end if + pos_send_buff(latomap) = iatomap + + end if + end do + + send_n = latomap + +! update r_send_buff for pbc + if(period(j).eqv..true.) then + do iatomap = 1, send_n + if(k == 1) then + if(grid_coords(j) == num_pro(j) - 1) then + r_send_buff(3*(iatomap-1)+j) = r_send_buff(3*(iatomap-1)+j) - box_length(j) + end if + end if + + if(k == 2) then + + if(grid_coords(j) == 0) then + + r_send_buff(3*(iatomap-1)+j) = r_send_buff(3*(iatomap-1)+j) + box_length(j) + end if + end if + end do + end if + end if + + !send/recv number + if(recv_l.eqv..true.) then + call mpi_irecv(recv_n, 1, mpi_integer, recv_rank, itag_n, grid_comm, ireq_n, ierr) + end if + + if(send_l.eqv..true.) then + call mpi_send(send_n, 1, mpi_integer, send_rank, itag_n, grid_comm, ierr) + end if + + if(recv_l.eqv..true.) then + call mpi_wait(ireq_n, mstatus, ierr) + end if + + !resize recv_buff if needed + if(recv_l.eqv..true.) then + if(recv_n > atomap_num_r) then + deallocate( tag_recv_buff, type_recv_buff, r_recv_buff, stat = deallostat) + if(deallostat /= 0) call alloc_error("Failure to deallocate tag/r_recv_buff", allostat) + allocate( tag_recv_buff(recv_n), r_recv_buff(3*recv_n), type_recv_buff(recv_n), stat = allostat) + if(allostat /= 0) call alloc_error("Failure to allocate tag/r_recv_buff", allostat) + !Initialize arrays + tag_recv_buff(:) = 0 + type_recv_buff(:) = 0 + r_recv_buff(:) = 0.0_wp + atomap_num_r = recv_n + + !Reallocate accept arrays + deallocate(accept_send) + allocate(accept_send(recv_n)) + accept_send(:) = 0 + end if + end if + + !send/recv tag + if(recv_l.eqv..true.) then + tag_recv_buff=0 + call mpi_irecv(tag_recv_buff(1:recv_n), recv_n, mpi_integer, recv_rank, itag_t, grid_comm, ireq_t, ierr) + end if + if(send_l.eqv..true.) then + call mpi_send(tag_send_buff(1:send_n), send_n, mpi_integer, send_rank, itag_t, grid_comm, ierr) + end if + if(recv_l.eqv..true.) then + call mpi_wait(ireq_t, mstatus, ierr) + end if + + !send/recv type + if(recv_l.eqv..true.) then + type_recv_buff=0 + call mpi_irecv(type_recv_buff(1:recv_n), recv_n, mpi_integer, recv_rank, itag_ty, grid_comm, ireq_ty, ierr) + end if + if(send_l.eqv..true.) then + call mpi_send(type_send_buff(1:send_n), send_n, mpi_integer, send_rank, itag_ty, grid_comm, ierr) + end if + if(recv_l.eqv..true.) then + call mpi_wait(ireq_ty, mstatus, ierr) + end if + + !Send and receive atomap positions + if(recv_l.eqv..true.) then + r_recv_buff=0.0_wp + call mpi_irecv(r_recv_buff(1:3*recv_n), 3*recv_n, mpi_wp, recv_rank, itag_r, grid_comm, ireq_r, ierr) + end if + if(send_l.eqv..true.) then + call mpi_send(r_send_buff(1:3*send_n), 3*send_n, mpi_wp, send_rank, itag_r, grid_comm, ierr) + end if + if(recv_l.eqv..true.) then + call mpi_wait(ireq_r, mstatus, ierr) + end if + end if + + !check if each core needs the received atomaps + if(recv_l.eqv..true.) then + + accept_count = 0 + accept_send(:) = 0 + do iatomap = 1, recv_n + + do i = 1, 3 + r_in(i) = r_recv_buff(3*(iatomap-1)+i) + end do + + !If it's in our processor boundaries then add it + if(in_block_bd(r_in, pro_bd_out)) then + + jatomap = jatomap + 1 + + !Grow arrays if needed + if(jatomap > atomap_num_lr) then + + allocate(tag_atomap_array(atomap_num_lr+seg_num), & + type_atomap_array(atomap_num_lr+seg_num), & + r_atomap_array(3, atomap_num_lr+seg_num), & + dir_atomap_array(atomap_num_lr+seg_num), & + stat = allostat) + + if(allostat /= 0) call alloc_error("Failure to allocate tag/r/dir_atomap_array", allostat) + + + tag_atomap_array(1:atomap_num_lr) = tag_atomap(:) + tag_atomap_array(atomap_num_lr+1:) = 0 + call move_alloc(tag_atomap_array, tag_atomap) + + type_atomap_array(1:atomap_num_lr) = type_atomap(:) + type_atomap_array(atomap_num_lr+1:) = 0 + call move_alloc(type_atomap_array, type_atomap) + + r_atomap_array(:, 1:atomap_num_lr) = r_atomap(:, :) + r_atomap_array(:, atomap_num_lr+1:) = 0.0_wp + call move_alloc(r_atomap_array, r_atomap) + + dir_atomap_array(1:atomap_num_lr) = dir_atomap(:) + dir_atomap_array(atomap_num_lr+1:) = 0 + call move_alloc(dir_atomap_array, dir_atomap) + + atomap_num_lr = atomap_num_lr + seg_num + end if + + tag_atomap(jatomap) = tag_recv_buff(iatomap) + type_atomap(jatomap) = type_recv_buff(iatomap) + r_atomap(:, jatomap) = r_in(:) + dir_atomap(jatomap) = j + + !Now mark this atom as accepted + accept_count = accept_count + 1 + if(accept_count > size(accept_send)) then + allocate(accept_array(size(accept_send) + seg_num)) + accept_array(1:size(accept_send)) = accept_send + accept_array(size(accept_send)+1:) = 0 + call move_alloc(accept_array, accept_send) + end if + + accept_send(accept_count) = iatomap + end if + end do + end if + list_atomap_logic(k, j) = send_l + + if((num_pro(j) == 1).and.(period(j))) then + !Resize if needed + if(accept_count > size(accept_recv)) then + deallocate(accept_recv) + allocate(accept_recv(accept_count)) + end if + accept_recv(1:accept_count) = accept_send(1:accept_count) + accept_count_r = accept_count + else + !Now if we sent atoms this turn we have to receive the accepted ghost atoms to build our list, + !First get the accepted counts + if(send_l) call mpi_irecv(accept_count_r, 1, mpi_integer, send_rank, 15, grid_comm, ireq_n, ierr) + if(recv_l) call mpi_send(accept_count, 1, mpi_integer, recv_rank, 15, grid_comm, ierr) + if(send_l) call mpi_wait(ireq_n, mstatus, ierr) + + if(send_l) then + !Resize if needed + if(accept_count_r > size(accept_recv)) then + deallocate(accept_recv) + allocate(accept_recv(accept_count_r)) + end if + + !Now receive the accepted atomaps + call mpi_irecv(accept_recv, accept_count_r, mpi_integer, send_rank, 16, grid_comm, ireq_n, ierr) + end if + + if(recv_l) call mpi_send(accept_send(1:accept_count), accept_count, mpi_integer, & + recv_rank, 16, grid_comm, ierr) + + if(send_l) call mpi_wait(ireq_n, mstatus, ierr) + + end if + + if(send_l.or.((num_pro(j)==1).and.period(j))) then + !Now build the list of sent atoms + list_atomap_num(k,j) = accept_count_r + + !initialize list_atomap which contains information on which atoms were sent on every dimension + if(.not.allocated(list_atomap)) then + allocate(list_atomap(accept_count, 2, 3), stat = allostat) + if (allostat>0) call alloc_error("Failure to allocate list_atomap", allostat) + end if + + + if (accept_count_r > size(list_atomap(:,k,j))) then + allocate(list_atomap_array(accept_count_r, 2, 3)) + list_atomap_array(1:size(list_atomap(:,k,j)),:,:) = list_atomap(1:size(list_atomap(:,k,j)),:,:) + list_atomap_array(size(list_atomap(:,k,j))+1,:,:) = 0 + call move_alloc(list_atomap_array, list_atomap) + end if + + do i = 1, accept_count_r + list_atomap(i, k, j) = pos_send_buff(accept_recv(i)) + end do + else + list_atomap_num(k,j) = 0 + end if + end do + end do + + atomap_num_lg = jatomap + + !debug to make sure code is correct + if(atomap_num_lr /= size(r_atomap, 2)) then + print *, 'Error: Wrong atomap_num_lr', atomap_num_lr, ' which should equal size(r_atomap, 2)', size(r_atomap, 2) + call mpi_abort(mpi_comm_world, 1, ierr) + + else if(atomap_num_lr /= size(tag_atomap)) then + print *, 'Error: Wrong atomap_num_lr', atomap_num_lr, ' which should equal size(tag_atomap)', size(tag_atomap) + call mpi_abort(mpi_comm_world, 1, ierr) + + else if(atomap_num_lr /= size(dir_atomap)) then + print *, 'Error: Wrong atomap_num_lr', atomap_num_lr, ' which should equal size(dir_atomap)', size(dir_atomap) + call mpi_abort(mpi_comm_world, 1, ierr) + + else if(atomap_num_lg > atomap_num_lr) then + print *, 'Error: Wrong atomap_num_lg', atomap_num_lg, ' which is larger than', atomap_num_lr + call mpi_abort(mpi_comm_world, 1, ierr) + end if + + +! list_atomap(:, :, :) = 0 +! list_atomap_num(:, :) = 0 +! seg_num_atomap = size(list_atomap, 1) +! +! do j = 1, 3 +! do k = 1, 2 +! +! send_n = 0 +! pro_bd_temp(:) = pro_bd_kj(:, k, j) +! do iatomap = 1, atomap_num_lg +! +! !If we send the atom then +! r_in(:) = r_atomap(:, iatomap) +! if(in_block_bd(r_in, pro_bd_temp)) then +! +! send_n = send_n + 1 +! +! if(send_n > seg_num_atomap) then +! allocate(list_atomap_array(seg_num_atomap+seg_num, 2, 3), stat = allostat) +! if(allostat /= 0) call alloc_error("Failure to allocate list_atomap_array", allostat) +! +! list_atomap_array(1:seg_num_atomap, :, :) = list_atomap(:, :, :) +! list_atomap_array(seg_num_atomap+1:, :, :) = 0 +! call move_alloc(list_atomap_array, list_atomap) +! seg_num_atomap = seg_num_atomap + seg_num +! end if +! +! list_atomap(send_n, k, j) = iatomap +! end if +! end do +! list_atomap_num(k, j) = send_n +! end do +! end do + + return + end subroutine ghost_cg + + subroutine ghost_at + !This code send ghost atoms to processors that need them + integer :: i, j, k, send_n, recv_n, ia, ja, la, & + seg_num_real, itag_t, itag_r, itag_n, itag_ty,& + ireq_t, ireq_r, ireq_n, ireq_ty,& + send_rank, recv_rank, send_ori, & + atom_num_s, atom_num_r, atom_num_ln, & + accept_count, accept_count_r + + logical :: send_l, recv_l + + integer, dimension(3) :: send_coords, recv_coords + + integer, dimension(mpi_status_size) :: mstatus + + real(kind = wp), dimension(3) :: r_in + + integer, allocatable :: dir_atom(:), dir_atom_array(:), & + tag_send_buff_array(:), tag_send_buff_ori(:), & + tag_send_buff(:), tag_recv_buff(:), & + tag_send_array(:), tag_recv_array(:), & + type_send_buff_array(:), type_send_buff_ori(:), & + type_send_buff(:), type_recv_buff(:), & + type_send_array(:), type_recv_array(:), & + list_atom_array(:, :, :), accept_array(:), & + pos_send_buff_ori(:), pos_send_buff(:), pos_send_array(:), & + accept_send(:), accept_recv(:) + + real(kind = wp), allocatable :: r_send_buff_array(:), r_send_buff_ori(:), & + r_send_buff(:), r_recv_buff(:), & + r_send_array(:), r_recv_array(:), & + vel_array(:, :) + + !Initialize size variables and allocate variables + seg_num_real = seg_num + allocate(tag_send_buff_ori(seg_num_real), & + type_send_buff_ori(seg_num_real),& + pos_send_buff_ori(seg_num_real), & + r_send_buff_ori(3*seg_num_real), & + stat = allostat ) + if(allostat /= 0) call alloc_error("Failure to allocate tag/r_send_buff_ori", allostat) + + !Initialize other variables + send_n = 0 + tag_send_buff_ori(:) = 0 + type_send_buff_ori(:) = 0 + pos_send_buff_ori(:) = 0 + r_send_buff_ori(:) = 0.0_wp + + !Build atom send lists + do ia = 1, atom_num_l + + r_in(:) = r_atom(:, ia) + + !Check to see if this needs to be sent + if(.not.in_block_bd(r_in, pro_bd_in)) then + + !If we need to save it then check if we need to resize the arrays + send_n = send_n + 1 + if(send_n > seg_num_real) then + allocate(tag_send_buff_array(seg_num_real+seg_num), & + type_send_buff_array(seg_num_real+seg_num), & + pos_send_array(seg_num_real + seg_num), & + r_send_buff_array(3*(seg_num_real+seg_num)), & + stat = allostat) + if(allostat /= 0) call alloc_error("Failure to allocate tag/r_send_buff_array", allostat) + + tag_send_buff_array(1:seg_num_real) = tag_send_buff_ori(:) + tag_send_buff_array(seg_num_real+1:) = 0 + call move_alloc(tag_send_buff_array, tag_send_buff_ori) + + pos_send_array(1:seg_num_real) = pos_send_buff_ori(:) + pos_send_array(seg_num_real+1:) = 0 + call move_alloc(pos_send_array, pos_send_buff_ori) + + type_send_buff_array(1:seg_num_real) = type_send_buff_ori(:) + type_send_buff_array(seg_num_real+1:) = 0 + call move_alloc(type_send_buff_array, type_send_buff_ori) + + r_send_buff_array(1:3*seg_num_real) = r_send_buff_ori(:) + r_send_buff_array(3*seg_num_real+1:) = 0.0_wp + call move_alloc(r_send_buff_array, r_send_buff_ori) + + seg_num_real = seg_num_real + seg_num + end if + + tag_send_buff_ori(send_n) = tag_atom(ia) + type_send_buff_ori(send_n)= type_atom(ia) + pos_send_buff_ori(send_n) = ia + do i = 1, 3 + r_send_buff_ori(3*(send_n-1)+i) = r_in(i) + end do + end if + end do + + !Allocate send_buff variable which changes upon each send step + allocate(tag_send_buff(send_n), & + type_send_buff(send_n),& + pos_send_buff(send_n), & + r_send_buff(3*send_n), & + stat = allostat) + if(allostat /= 0) call alloc_error("Failure to allocate tag/r_send_buff", allostat) + + !Initialize new arrays + tag_send_buff(:) = 0 + type_send_buff(:)= 0 + pos_send_buff(:) = 0 + r_send_buff(:) = 0.0_wp + + if(pro_num == 1) then + recv_n = send_n + else + call mpi_allreduce(send_n, recv_n, 1, mpi_integer, mpi_max, grid_comm, ierr) + end if + + !Allocate arrays for receiving data + allocate(tag_recv_buff(recv_n), type_recv_buff(recv_n), r_recv_buff(3*recv_n), stat = allostat ) + if(allostat /= 0) call alloc_error("Failure to allocate tag/r_recv_buff", allostat) + + !Also allocate arrays for communicating which atoms were received + allocate(accept_send(recv_n), accept_recv(recv_n), stat=allostat) + if(allostat /= 0) call alloc_error("Failure to allocate accept arrays", allostat) + accept_send = 0 + accept_recv = 0 + + + !Initalize new variables and tags used for communications + tag_recv_buff(:) = 0 + type_recv_buff(:)= 0 + r_recv_buff(:) = 0.0_wp + itag_r = 1 + itag_n = 2 + itag_t = 3 + itag_ty= 4 + ireq_r = 0 + ireq_n = 0 + ireq_t = 0 + ireq_ty= 0 + send_rank = grank + recv_rank = grank + list_atom_logic(:, :) = .true. + + allocate(dir_atom(atom_num_lr), stat = allostat) + if(allostat /= 0) call alloc_error("Failure to allocate dir_atom", allostat) + + dir_atom(:) = 4 + ja = atom_num_l + atom_num_s = send_n + send_ori = send_n + atom_num_r = recv_n + + ! Check comments on ghost_cg for more information on the ghost scheme + ! k == 1: positive + ! k == 2: negative + + do j = 1, 3 + do k = 1, 2 + !Get send and receive coordinates + send_coords(:) = grid_coords(:) + recv_coords(:) = grid_coords(:) + send_l = .true. + recv_l = .true. + + !If only one processor in the dimension then we don't send ghosts unless periodic in that dim + if(num_pro(j) == 1) then + + send_l = .false. + if(period(j).eqv..true.) then + + recv_l = .true. + tag_recv_buff(1:send_ori) = tag_send_buff_ori(1:send_ori) + type_recv_buff(1:send_ori)= type_send_buff_ori(1:send_ori) + r_recv_buff(1:3*send_ori) = r_send_buff_ori(1:3*send_ori) + pos_send_buff(1:send_ori) = pos_send_buff_ori(1:send_ori) + la = send_ori + + do ia = atom_num_l+1, ja + !Check to add atoms which were previously sent + if(dir_atom(ia) < j) then + + la = la + 1 + + !Grow arrays if needed + if(la > atom_num_r) then + allocate( tag_recv_array(atom_num_r+seg_num), & + type_recv_array(atom_num_r+seg_num), & + r_recv_array(3*(atom_num_r+seg_num)), & + stat = allostat ) + if(allostat /= 0) call alloc_error("Failure to allocate tag/r_recv_array", allostat) + + tag_recv_array(1:atom_num_r) = tag_recv_buff(:) + tag_recv_array(atom_num_r+1:) = 0 + call move_alloc(tag_recv_array, tag_recv_buff) + + type_recv_array(1:atom_num_r) = type_recv_buff(:) + type_recv_array(atom_num_r+1:) = 0 + call move_alloc(type_recv_array, type_recv_buff) + + r_recv_array(1:3*atom_num_r) = r_recv_buff(:) + r_recv_array(3*atom_num_r+1:) = 0.0_wp + call move_alloc(r_recv_array, r_recv_buff) + + atom_num_r = atom_num_r + seg_num + end if + + tag_recv_buff(la) = tag_atom(ia) + type_recv_buff(la)= type_atom(ia) + do i = 1, 3 + r_recv_buff(3*(la-1)+i) = r_atom(i, ia) + end do + + !Add values to pos_send_buff + if(la > size(pos_send_buff)) then + allocate(pos_send_array(size(pos_send_buff)+ seg_num)) + pos_send_array(1:size(pos_send_buff)) = pos_send_buff + pos_send_array(size(pos_send_buff)+1:) = 0 + call move_alloc(pos_send_array, pos_send_buff) + end if + pos_send_buff(la) = ia + end if + end do + + recv_n = la + do ia = 1, recv_n + r_recv_buff(3*(ia-1)+j) = r_recv_buff(3*(ia-1)+j) + (-1)**k * box_length(j) + end do + !If not periodic in that dimension then we don't send + else + recv_l = .false. + recv_n = 0 + end if + + else + !Get Actual send and recv coords + send_coords(j) = grid_coords(j) - (-1)**k + recv_coords(j) = grid_coords(j) + (-1)**k + + if(period(j).eqv..true.) then + call mpi_cart_rank(grid_comm, send_coords, send_rank, ierr) + call mpi_cart_rank(grid_comm, recv_coords, recv_rank, ierr) + else + if(k == 1) then + if(grid_coords(j) < num_pro(j) - 1) then + call mpi_cart_rank(grid_comm, send_coords, send_rank, ierr) + else + send_l = .false. + send_rank = -1 + end if + + if(grid_coords(j) > 0) then + call mpi_cart_rank(grid_comm, recv_coords, recv_rank, ierr) + else + recv_l = .false. + recv_rank = -1 + end if + + else if(k == 2) then + + if(grid_coords(j) > 0) then + call mpi_cart_rank(grid_comm, send_coords, send_rank, ierr) + else + send_l = .false. + send_rank = -1 + end if + + if(grid_coords(j) < num_pro(j) - 1) then + call mpi_cart_rank(grid_comm, recv_coords, recv_rank, ierr) + else + recv_l = .false. + recv_rank = -1 + end if + end if + end if + + !prepare send_buff + if(send_l.eqv..true.) then + + tag_send_buff(1:send_ori) = tag_send_buff_ori(1:send_ori) + pos_send_buff(1:send_ori) = pos_send_buff_ori(1:send_ori) + type_send_buff(1:send_ori)= type_send_buff_ori(1:send_ori) + r_send_buff(1:3*send_ori) = r_send_buff_ori(1:3*send_ori) + + la = send_ori + do ia = atom_num_l+1, ja + if(dir_atom(ia) < j) then + + la = la + 1 + if(la > atom_num_s) then + + allocate( tag_send_array(atom_num_s+seg_num), & + type_send_array(atom_num_s+seg_num), & + r_send_array(3*(atom_num_s+seg_num)), & + stat = allostat) + + if(allostat /= 0) call alloc_error("Failure to allocate tag/r_send_array", allostat) + + tag_send_array(1:atom_num_s) = tag_send_buff(:) + tag_send_array(atom_num_s+1:) = 0 + call move_alloc(tag_send_array, tag_send_buff) + + type_send_array(1:atom_num_s) = type_send_buff(:) + type_send_array(atom_num_s+1:) = 0 + call move_alloc(type_send_array, type_send_buff) + + r_send_array(1:3*atom_num_s) = r_send_buff(:) + r_send_array(3*atom_num_s+1:) = 0.0_wp + call move_alloc(r_send_array, r_send_buff) + + atom_num_s = atom_num_s + seg_num + + end if + + tag_send_buff(la) = tag_atom(ia) + type_send_buff(la) = type_atom(ia) + do i = 1, 3 + r_send_buff(3*(la-1)+i) = r_atom(i, ia) + end do + + if(la > size(pos_send_buff)) then + allocate(pos_send_array(size(pos_send_buff)+seg_num)) + pos_send_array(1:size(pos_send_buff)) = pos_send_buff + pos_send_array(size(pos_send_buff)+1:) = 0 + call move_alloc(pos_send_array, pos_send_buff) + end if + pos_send_buff(la) = ia + end if + end do + + send_n = la + !update r_send_buff for pbc + if(period(j).eqv..true.) then + do ia = 1, send_n + if(k == 1) then + if(grid_coords(j) == num_pro(j) - 1) then + r_send_buff(3*(ia-1)+j) = r_send_buff(3*(ia-1)+j) - box_length(j) + end if + end if + + if(k == 2) then + if(grid_coords(j) == 0) then + r_send_buff(3*(ia-1)+j) = r_send_buff(3*(ia-1)+j) + box_length(j) + end if + end if + end do + end if + end if + + !send/recv number + if(recv_l.eqv..true.) then + call mpi_irecv(recv_n, 1, mpi_integer, recv_rank, itag_n, grid_comm, ireq_n, ierr) + end if + + if(send_l.eqv..true.) then + call mpi_send(send_n, 1, mpi_integer, send_rank, itag_n, grid_comm, ierr) + end if + if(recv_l.eqv..true.) then + call mpi_wait(ireq_n, mstatus, ierr) + end if + + !update recv_buff for size + if(recv_l.eqv..true.) then + if(recv_n > atom_num_r) then + + deallocate(tag_recv_buff, type_recv_buff,r_recv_buff, stat = deallostat) + if(deallostat /= 0) call alloc_error("Failure to deallocate tag/r_recv_buff", deallostat) + + allocate(tag_recv_buff(recv_n), type_recv_buff(recv_n), r_recv_buff(3*recv_n), stat = allostat) + if(allostat /= 0) call alloc_error("Failure to allocate tag/r_recv_buff", allostat) + + tag_recv_buff(:) = 0 + type_recv_buff(:)= 0 + r_recv_buff(:) = 0.0_wp + atom_num_r = recv_n + + !Reallocate accept arrays + deallocate(accept_send) + allocate(accept_send(recv_n)) + end if + end if + + !send/recv tag + if(recv_l.eqv..true.) then + call mpi_irecv(tag_recv_buff, recv_n, mpi_integer, recv_rank, itag_t, grid_comm, ireq_t, ierr) + end if + if(send_l.eqv..true.) then + call mpi_send(tag_send_buff, send_n, mpi_integer, send_rank, itag_t, grid_comm, ierr) + end if + if(recv_l.eqv..true.) then + call mpi_wait(ireq_t, mstatus, ierr) + end if + !send/recv type + if(recv_l.eqv..true.) then + call mpi_irecv(type_recv_buff, recv_n, mpi_integer, recv_rank, itag_ty, grid_comm, ireq_ty, ierr) + end if + if(send_l.eqv..true.) then + call mpi_send(type_send_buff, send_n, mpi_integer, send_rank, itag_ty, grid_comm, ierr) + end if + if(recv_l.eqv..true.) then + call mpi_wait(ireq_t, mstatus, ierr) + end if + + !send/recv r + if(recv_l.eqv..true.) then + call mpi_irecv(r_recv_buff, 3*recv_n, mpi_wp, recv_rank, itag_r, grid_comm, ireq_r, ierr) + end if + if(send_l.eqv..true.) then + call mpi_send(r_send_buff, 3*send_n, mpi_wp, send_rank, itag_r, grid_comm, ierr) + end if + if(recv_l.eqv..true.) then + call mpi_wait(ireq_r, mstatus, ierr) + end if + end if + + !check if each core needs the received atoms + if(recv_l.eqv..true.) then + + accept_count = 0 + accept_send = 0 + do ia = 1, recv_n + + do i = 1, 3 + r_in(i) = r_recv_buff(3*(ia-1)+i) + end do + + !We need this atom as a ghost + if(in_block_bd(r_in, pro_bd_out)) then + + ja = ja + 1 + !Resize if needed + if(ja > atom_num_lr) then + + allocate(dir_atom_array(atom_num_lr+seg_num), stat = allostat) + if(allostat /= 0) call alloc_error("Failure to allocate dir_atom_array", allostat) + dir_atom_array(1:atom_num_lr) = dir_atom(:) + dir_atom_array(atom_num_lr+1:) = 0 + call move_alloc(dir_atom_array, dir_atom) + + call grow_at_arrays + + end if + + tag_atom(ja) = tag_recv_buff(ia) + type_atom(ja)= type_recv_buff(ia) + r_atom(:, ja) = r_in(:) + dir_atom(ja) = j + + !Now mark this atom as accepted + accept_count = accept_count +1 + if(accept_count > size(accept_send)) then + allocate(accept_array(size(accept_send) + seg_num)) + accept_array(1:size(accept_send)) = accept_send + accept_array(size(accept_send)+1:) = 0 + call move_alloc(accept_array, accept_send) + end if + + accept_send(accept_count) = ia + + end if + end do + end if + list_atom_logic(k, j) = send_l + if((num_pro(j) == 1).and.(period(j))) then + !Resize if needed + if(accept_count > size(accept_recv)) then + deallocate(accept_recv) + allocate(accept_recv(accept_count)) + end if + accept_recv(1:accept_count) = accept_send(1:accept_count) + accept_count_r = accept_count + else + !Now if we sent atoms this turn we have to receive the accepted ghost atoms to build our list, + !First get the accepted counts + if(send_l) call mpi_irecv(accept_count_r, 1, mpi_integer, send_rank, 15, grid_comm, ireq_n, ierr) + if(recv_l) call mpi_send(accept_count, 1, mpi_integer, recv_rank, 15, grid_comm, ierr) + if(send_l) call mpi_wait(ireq_n, mstatus, ierr) + + if(send_l) then + !Resize if needed + if(accept_count_r > size(accept_recv)) then + deallocate(accept_recv) + allocate(accept_recv(accept_count_r)) + end if + + !Now receive the accepted atoms + call mpi_irecv(accept_recv, accept_count_r, mpi_integer, send_rank, 16, grid_comm, ireq_n, ierr) + end if + + if(recv_l) call mpi_send(accept_send(1:accept_count), accept_count, mpi_integer, & + recv_rank, 16, grid_comm, ierr) + + if(send_l) call mpi_wait(ireq_n, mstatus, ierr) + + end if + + if(send_l.or.((num_pro(j)==1).and.period(j))) then + !Now build the list of sent atoms + list_atom_num(k,j) = accept_count_r + + !initialize list_atom which contains information on which atoms were sent on every dimension + if(.not.allocated(list_atom)) then + allocate(list_atom(accept_count_r, 2, 3), stat = allostat) + if (allostat>0) call alloc_error("Failure to allocate list_atom", allostat) + end if + + + if (accept_count_r > size(list_atom(:,k,j))) then + allocate(list_atom_array(accept_count_r, 2, 3)) + list_atom_array(1:size(list_atom(:,k,j)),:,:) = list_atom(1:size(list_atom(:,k,j)),:,:) + list_atom_array(size(list_atom(:,k,j))+1,:,:) = 0 + call move_alloc(list_atom_array, list_atom) + end if + + do i = 1, accept_count_r + list_atom(i, k, j) = pos_send_buff(accept_recv(i)) + end do + else + list_atom_num(k,j) = 0 + end if + end do + end do + atom_num_lg = ja + + + !debug + if(atom_num_lr /= size(r_atom, 2)) then + print *, 'Error: Wrong atom_num_lr', atom_num_lr, & + ' which should equal size(r_atom, 2)', size(r_atom, 2) + call mpi_abort(mpi_comm_world, 1, ierr) + + else if(atom_num_lr /= size(tag_atom)) then + print *, 'Error: Wrong atom_num_lr', atom_num_lr, & + ' which should equal size(tag_atom)', size(tag_atom) + call mpi_abort(mpi_comm_world, 1, ierr) + + else if(atom_num_lr /= size(dir_atom)) then + print *, 'Error: Wrong atom_num_lr', atom_num_lr, & + ' which should equal size(dir_atom)', size(dir_atom) + call mpi_abort(mpi_comm_world, 1, ierr) + + else if(atom_num_lg > atom_num_lr) then + print *, 'Error: Wrong atom_num_lg', atom_num_lg, & + ' which is larger than', atom_num_lr + call mpi_abort(mpi_comm_world, 1, ierr) + end if + + !resize vel_atom + if(need_vel) then + atom_num_ln = size(vel_atom,2) + allocate( vel_array(3, atom_num_lr), stat = allostat) + if(allostat /= 0) call alloc_error("Failure to allocate vel_array", allostat) + vel_array(:, 1:atom_num_ln) = vel_atom(:, :) + vel_array(:, atom_num_ln+1:) = 0.0_wp + call move_alloc(vel_array, vel_atom) + end if + + !list_atom +! if(.not.allocated(list_atom)) allocate(list_atom(seg_num, 2, 3), stat = allostat) +! list_atom(:, :, :) = 0 +! list_atom_num(:, :) = 0 +! seg_num_atom = size(list_atom, 1) +! +! do j = 1, 3 +! do k = 1, 2 +! +! send_n = 0 +! pro_bd_temp(:) = pro_bd_kj(:, k, j) +! +! do ia = 1, atom_num_lg +! +! r_in(:) = r_atom(:, ia) +! if(in_block_bd(r_in, pro_bd_temp)) then +! send_n = send_n + 1 +! if(send_n > seg_num_atom) then +! +! allocate(list_atom_array(seg_num_atom+seg_num, 2, 3), stat = allostat) +! if(allostat /= 0) call alloc_error("Failure to allocate list_atom_array", allostat) +! list_atom_array(1:seg_num_atom, :, :) = list_atom(:, :, :) +! list_atom_array(seg_num_atom+1:, :, :) = 0 +! call move_alloc(list_atom_array, list_atom) +! seg_num_atom = seg_num_atom + seg_num +! end if +! list_atom(send_n, k, j) = ia +! end if +! end do +! list_atom_num(k, j) = send_n +! end do +! end do + + return + end subroutine ghost_at + + subroutine processor_atomap + + integer :: i, j, k, send_n, recv_n, iatomap, jatomap, latomap, send_n_max, & + itag_r, itag_t, itag_n, ireq_r, ireq_t, ireq_n, & + send_rank, recv_rank, atomap_num_r + + logical :: send_l, recv_l + + integer, dimension(3) :: send_coords, recv_coords + + integer, dimension(mpi_status_size) :: mstatus + + integer, allocatable :: & + tag_send_buff(:), tag_recv_buff(:) + + real(kind = wp), allocatable :: & + r_send_buff(:), r_recv_buff(:) + + send_n_max = maxval(list_atomap_num) + + allocate(tag_send_buff(send_n_max), tag_recv_buff(send_n_max), r_send_buff(3*send_n_max), & + r_recv_buff(3*send_n_max), stat = allostat) + if(allostat /= 0) call alloc_error("Failure to allocate send_buff arrays in processor_atomap", allostat) + + !Initialize variables + tag_send_buff(:) = 0 + tag_recv_buff(:) = 0 + r_send_buff(:) = 0.0_wp + r_recv_buff(:) = 0.0_wp + itag_t = 1 + itag_r = 2 + itag_n = 3 + ireq_t = 0 + ireq_r = 0 + ireq_n = 0 + send_rank = grank + recv_rank = grank + atomap_num_r = send_n_max + jatomap = atomap_num_l + + !Check the ghost code to see how this looping works + ! k == 1: positive + ! k == 2: negative + do j = 1, 3 + do k = 1, 2 + + send_coords(:) = grid_coords(:) + recv_coords(:) = grid_coords(:) + send_l = .true. + recv_l = .true. + + if(num_pro(j) == 1) then + send_l = .false. + + if(period(j).eqv..true.) then + + recv_l = .true. + send_n = list_atomap_num(k, j) + recv_n = send_n + + if(recv_n > atomap_num_r) then + deallocate(tag_recv_buff, r_recv_buff, stat = deallostat) + if(deallostat /= 0) call alloc_error("Failure to deallocate tag/r_recv_buff", allostat) + allocate(tag_recv_buff(recv_n), r_recv_buff(3*recv_n), stat = allostat) + + tag_recv_buff(:) = 0 + r_recv_buff(:) = 0.0_wp + atomap_num_r = recv_n + + end if + + do iatomap = 1, recv_n + latomap = list_atomap(iatomap, k, j) + tag_recv_buff(iatomap) = tag_atomap(latomap) + do i = 1, 3 + r_recv_buff(3*(iatomap-1)+i) = r_atomap(i, latomap) + end do + r_recv_buff(3*(iatomap-1)+j) = r_recv_buff(3*(iatomap-1)+j) & + + (-1)**k * box_length(j) + end do + + else + recv_l = .false. + recv_n = 0 + end if + else + send_coords(j) = grid_coords(j) - (-1)**k + recv_coords(j) = grid_coords(j) + (-1)**k + + if(period(j).eqv..true.) then + call mpi_cart_rank(grid_comm, send_coords, send_rank, ierr) + call mpi_cart_rank(grid_comm, recv_coords, recv_rank, ierr) + + else + if(k == 1) then + if(grid_coords(j) < num_pro(j) - 1) then + call mpi_cart_rank(grid_comm, send_coords, send_rank, ierr) + else + send_l = .false. + send_rank = -1 + end if + + if(grid_coords(j) > 0) then + call mpi_cart_rank(grid_comm, recv_coords, recv_rank, ierr) + else + recv_l = .false. + recv_rank = -1 + end if + + else if(k == 2) then + if(grid_coords(j) > 0) then + call mpi_cart_rank(grid_comm, send_coords, send_rank, ierr) + else + send_l = .false. + send_rank = -1 + end if + if(grid_coords(j) < num_pro(j) - 1) then + call mpi_cart_rank(grid_comm, recv_coords, recv_rank, ierr) + else + recv_l = .false. + recv_rank = -1 + end if + end if + end if + + !check send_l + if(send_l.neqv.list_atomap_logic(k, j)) then + print *, 'Error: send_l', send_l, ' should equal list_atomap_logic', & + list_atomap_logic(k, j), ' for k', k, ' and j', j + call mpi_abort(mpi_comm_world, 1, ierr) + end if + + !prepare send_buff + if(send_l.eqv..true.) then + send_n = list_atomap_num(k, j) + do iatomap = 1, send_n + latomap = list_atomap(iatomap, k, j) + tag_send_buff(iatomap) = tag_atomap(latomap) + do i = 1, 3 + r_send_buff(3*(iatomap-1)+i) = r_atomap(i, latomap) + end do + + !update send buff for pbc + if(period(j).eqv..true.) then + if(k == 1) then + if(grid_coords(j) == num_pro(j) - 1) then + r_send_buff(3*(iatomap-1)+j) = r_send_buff(3*(iatomap-1)+j) - box_length(j) + end if + end if + if(k == 2) then + if(grid_coords(j) == 0) then + r_send_buff(3*(iatomap-1)+j) = r_send_buff(3*(iatomap-1)+j) + box_length(j) + end if + end if + end if + end do + end if + + !send/recv number + if(recv_l.eqv..true.) then + call mpi_irecv(recv_n, 1, mpi_integer, recv_rank, itag_n, grid_comm, ireq_n, ierr) + end if + if(send_l.eqv..true.) then + call mpi_send(send_n, 1, mpi_integer, send_rank, itag_n, grid_comm, ierr) + end if + if(recv_l.eqv..true.) then + call mpi_wait(ireq_n, mstatus, ierr) + end if + + !update recv_buff + if(recv_l.eqv..true.) then + + if(recv_n > atomap_num_r) then + + deallocate(tag_recv_buff, r_recv_buff, stat = deallostat) + if(deallostat /= 0) call alloc_error("Failure to deallocate recvbuff in processor_atomap", allostat) + + allocate(tag_recv_buff(recv_n), r_recv_buff(3*recv_n), stat = allostat) + + if(allostat /= 0) call alloc_error("Failure to allocate recv_buff in processor_atomap", allostat) + + tag_recv_buff(:) = 0 + r_recv_buff(:) = 0.0_wp + atomap_num_r = recv_n + end if + end if + + !send/recv tag and r + if(recv_l.eqv..true.) then + call mpi_irecv(tag_recv_buff, recv_n, mpi_integer, recv_rank, itag_t, grid_comm, ireq_t, ierr) + end if + if(send_l.eqv..true.) then + call mpi_send(tag_send_buff, send_n, mpi_integer, send_rank, itag_t, grid_comm, ierr) + end if + if(recv_l.eqv..true.) then + call mpi_wait(ireq_t, mstatus, ierr) + end if + + if(recv_l.eqv..true.) then + call mpi_irecv(r_recv_buff, 3*recv_n, mpi_wp, recv_rank, itag_r, grid_comm, ireq_r, ierr) + end if + if(send_l.eqv..true.) then + call mpi_send(r_send_buff, 3*send_n, mpi_wp, send_rank, itag_r, grid_comm, ierr) + end if + if(recv_l.eqv..true.) then + call mpi_wait(ireq_r, mstatus, ierr) + end if + end if + + !check if the tags match + if(recv_l.eqv..true.) then + do iatomap = 1, recv_n + + jatomap = jatomap + 1 + if(tag_recv_buff(iatomap) /= tag_atomap(jatomap)) then + print *, 'Error: Rank', rank, ' received atomap tag', & + tag_recv_buff(iatomap), ' of iatomap', iatomap, & + ' from rank', recv_rank, ' which should equal tag_atomap', & + tag_atomap(jatomap), ' of jatomap', jatomap, & + ' when j is', j, ' and k is', k, ' in atomap' + + call mpi_abort(mpi_comm_world, 1, ierr) + end if + + do i = 1, 3 + r_atomap(i, jatomap) = r_recv_buff(3*(iatomap-1)+i) + end do + end do + end if + end do + end do + + !debug + + if(jatomap /= atomap_num_lg) then + print *, 'Error: Wrong jatomap', jatomap, & + ' which should equal atomap_num_lg', atomap_num_lg + call mpi_abort(mpi_comm_world, 1, ierr) + end if + return + + end subroutine processor_atomap + + subroutine processor_atomistic + + integer :: i, j, k, send_n, recv_n, ia, ja, la, send_n_max, & + itag_r, itag_t, itag_n, ireq_r, ireq_t, ireq_n, & + send_rank, recv_rank, atom_num_r + logical :: send_l, recv_l + integer, dimension(3) :: send_coords, recv_coords + integer, dimension(mpi_status_size) :: mstatus + integer, allocatable :: tag_send_buff(:), tag_recv_buff(:) + real(kind = wp), allocatable :: r_send_buff(:), r_recv_buff(:) + + send_n_max = maxval(list_atom_num) + allocate(tag_send_buff(send_n_max), tag_recv_buff(send_n_max), r_send_buff(3*send_n_max), & + r_recv_buff(3*send_n_max), stat = allostat) + if(allostat /= 0) call alloc_error("Failure to allocate tag/r_send/recv_buff", allostat) + + !Initialize variables + tag_send_buff(:) = 0 + tag_recv_buff(:) = 0 + r_send_buff(:) = 0.0_wp + r_recv_buff(:) = 0.0_wp + itag_t = 1 + itag_r = 2 + itag_n = 3 + ireq_t = 0 + ireq_r = 0 + ireq_n = 0 + send_rank = grank + recv_rank = grank + atom_num_r = send_n_max + ja = atom_num_l + + !Check ghost code for more comments + ! k == 1: positive + ! k == 2: negative + do j = 1, 3 + do k = 1, 2 + + send_coords(:) = grid_coords(:) + recv_coords(:) = grid_coords(:) + send_l = .true. + recv_l = .true. + + if(num_pro(j) == 1) then + + send_l = .false. + if(period(j).eqv..true.) then + + recv_l = .true. + send_n = list_atom_num(k, j) + recv_n = send_n + + if(recv_n > atom_num_r) then + + deallocate(tag_recv_buff, r_recv_buff, stat = deallostat) + if(deallostat /= 0) call alloc_error("Failure to deallocate tag/r_recv_buff", deallostat) + + allocate(tag_recv_buff(recv_n), r_recv_buff(3*recv_n), stat = allostat) + if(allostat /= 0) call alloc_error("Failure to allocate tag/r_recv_buff", allostat) + + tag_recv_buff(:) = 0 + r_recv_buff(:) = 0.0_wp + atom_num_r = recv_n + end if + + do ia = 1, recv_n + la = list_atom(ia, k, j) + tag_recv_buff(ia) = tag_atom(la) + do i = 1, 3 + r_recv_buff(3*(ia-1)+i) = r_atom(i, la) + end do + + r_recv_buff(3*(ia-1)+j) = r_recv_buff(3*(ia-1)+j) + (-1)**k * box_length(j) + end do + else + recv_l = .false. + recv_n = 0 + end if + + else + + send_coords(j) = grid_coords(j) - (-1)**k + recv_coords(j) = grid_coords(j) + (-1)**k + if(period(j).eqv..true.) then + call mpi_cart_rank(grid_comm, send_coords, send_rank, ierr) + call mpi_cart_rank(grid_comm, recv_coords, recv_rank, ierr) + else + if(k == 1) then + if(grid_coords(j) < num_pro(j) - 1) then + call mpi_cart_rank(grid_comm, send_coords, send_rank, ierr) + else + send_l = .false. + send_rank = -1 + end if + if(grid_coords(j) > 0) then + call mpi_cart_rank(grid_comm, recv_coords, recv_rank, ierr) + else + recv_l = .false. + recv_rank = -1 + end if + + else if(k == 2) then + if(grid_coords(j) > 0) then + call mpi_cart_rank(grid_comm, send_coords, send_rank, ierr) + else + send_l = .false. + send_rank = -1 + end if + if(grid_coords(j) < num_pro(j) - 1) then + call mpi_cart_rank(grid_comm, recv_coords, recv_rank, ierr) + else + recv_l = .false. + recv_rank = -1 + end if + end if + end if + !check send_l + if(send_l.neqv.list_atom_logic(k, j)) then + print *, 'Error: send_l', send_l, ' should equal list_atom_logic', & + list_atom_logic(k, j), ' for k', k, ' and j', j + call mpi_abort(mpi_comm_world, 1, ierr) + end if + + !prepare send_buff + if(send_l.eqv..true.) then + + send_n = list_atom_num(k, j) + + do ia = 1, send_n + + la = list_atom(ia, k, j) + tag_send_buff(ia) = tag_atom(la) + + do i = 1, 3 + r_send_buff(3*(ia-1)+i) = r_atom(i, la) + end do + + !update send buff for pbc + if(period(j).eqv..true.) then + if(k == 1) then + if(grid_coords(j) == num_pro(j) - 1) then + r_send_buff(3*(ia-1)+j) = r_send_buff(3*(ia-1)+j) - box_length(j) + end if + end if + if(k == 2) then + if(grid_coords(j) == 0) then + r_send_buff(3*(ia-1)+j) = r_send_buff(3*(ia-1)+j) + box_length(j) + end if + end if + end if + end do + end if + + !send/recv number + if(recv_l.eqv..true.) then + call mpi_irecv(recv_n, 1, mpi_integer, recv_rank, itag_n, grid_comm, ireq_n, ierr) + end if + if(send_l.eqv..true.) then + call mpi_send(send_n, 1, mpi_integer, send_rank, itag_n, grid_comm, ierr) + end if + if(recv_l.eqv..true.) then + call mpi_wait(ireq_n, mstatus, ierr) + end if + + !update recv_buff + if(recv_l.eqv..true.) then + + if(recv_n > atom_num_r) then + + deallocate(tag_recv_buff, r_recv_buff, stat = deallostat) + if(deallostat /= 0) call alloc_error("Failure to deallocate recv_buff in processor_atomistic",allostat) + + allocate(tag_recv_buff(recv_n), r_recv_buff(3*recv_n), stat = allostat) + + tag_recv_buff(:) = 0 + r_recv_buff(:) = 0.0_wp + atom_num_r = recv_n + end if + end if + + !send/recv tag and r + if(recv_l.eqv..true.) then + call mpi_irecv(tag_recv_buff, recv_n, mpi_integer, recv_rank, itag_t, grid_comm, ireq_t, ierr) + end if + if(send_l.eqv..true.) then + call mpi_send(tag_send_buff, send_n, mpi_integer, send_rank, itag_t, grid_comm, ierr) + end if + if(recv_l.eqv..true.) then + call mpi_wait(ireq_t, mstatus, ierr) + end if + itag_t = itag_t + 1 + + if(recv_l.eqv..true.) then + call mpi_irecv(r_recv_buff, 3*recv_n, mpi_wp, recv_rank, itag_r, grid_comm, ireq_r, ierr) + end if + if(send_l.eqv..true.) then + call mpi_send(r_send_buff, 3*send_n, mpi_wp, send_rank, itag_r, grid_comm, ierr) + end if + if(recv_l.eqv..true.) then + call mpi_wait(ireq_r, mstatus, ierr) + end if + + itag_r = itag_r + 1 + end if + + !check if the tags match + if(recv_l.eqv..true.) then + do ia = 1, recv_n + ja = ja + 1 + if(tag_recv_buff(ia) /= tag_atom(ja)) then + print *, 'Error: Rank', rank, ' received atom tag', & + tag_recv_buff(ia), ' of ia', ia, & + ' from rank', recv_rank, ' which should equal tag_atom', & + tag_atom(ja), ' of ja', ja, & + ' when j is', j, ' and k is', k, ' in atomistic' + call mpi_abort(mpi_comm_world, 1, ierr) + end if + do i = 1, 3 + r_atom(i, ja) = r_recv_buff(3*(ia-1)+i) + end do + end do + end if + end do + end do + !debug + if(ja /= atom_num_lg) then + print *, 'Error: Wrong ja', ja, ' which should equal atom_num_lg', atom_num_lg + call mpi_abort(mpi_comm_world, 1, ierr) + end if + + return + end subroutine processor_atomistic + + subroutine pack_ele_dump(send_etype, send_tag_ele, send_esize, mask, send_basis_num, send_basis_type, & + send_pb_node, send_r_nodes, send_energy, send_force, send_virial, send_vel, send_int, send_real) + !This subroutine packs all data for one element into 2 arrays, a send_int array and a send_real array + !containing all information needed for elements + + integer, intent(in) :: send_etype, send_tag_ele, send_esize, send_basis_num, send_basis_type(max_basisnum),& + send_pb_node(3, max_basisnum, ng_max_node), mask + real(kind=wp), intent(in) :: send_r_nodes(3,max_basisnum, ng_max_node), send_energy(max_basisnum, ng_max_node), & + send_force(3, max_basisnum, ng_max_node), send_virial(3,3, max_basisnum, ng_max_node), & + send_vel(3, max_basisnum, ng_max_node) + integer, dimension(:), intent(out) :: send_int + real(kind=wp), dimension(:), intent(out) :: send_real + + integer i, j, k, inod, ibasis + real(kind=wp) :: r_out(3) + + !Initialize variables + send_int(:) = 0 + send_real(:) = 0.0_wp + + !Calculate send counts + !First pack the send_int variable + send_int(1) = send_etype + send_int(2) = send_tag_ele + send_int(3) = send_esize + send_int(4) = send_basis_num + send_int(5) = mask + j = 5+send_basis_num + send_int(6:j) = send_basis_type(1:send_basis_num) + + !Now pack the send_real variable + j = 1 + do inod = 1, ng_node(send_etype) + do ibasis = 1, send_basis_num + send_real(j) = send_energy(ibasis,inod) + j=j+1 + !Apply periodic boundaries if needed + r_out = send_r_nodes(:, ibasis, inod) + call restore_pb(r_out, send_pb_node(:,ibasis,inod)) + do i =1, 3 + send_real(j) = r_out(i) + j = j+1 + end do + do i = 1, 3 + send_real(j) = send_force(i, ibasis, inod) + j = j+1 + end do + do i = 1, 3 + do k = 1,3 + send_real(j) = send_virial(i,k,ibasis,inod) + j=j+1 + end do + end do + + if(need_vel) then + do i = 1, 3 + send_real(j) = send_vel(i, ibasis, inod) + j=j+1 + end do + end if + end do + end do + + return + end subroutine pack_ele_dump + + subroutine unpack_ele_dump(recv_int, recv_real, recv_etype, recv_tag_ele, recv_esize, mask, recv_basis_num, & + recv_basis_type, recv_r_nodes, recv_energy, recv_force, recv_virial, recv_vel) + !This subroutine unpacks all data for one element into 2 arrays, a recv_int array and a recv_real array + !containing all information needed for elements + integer, dimension(:), intent(in) :: recv_int + real(kind=wp), dimension(:), intent(in) :: recv_real + integer, intent(out) :: recv_etype, recv_tag_ele, recv_esize, mask, recv_basis_num, recv_basis_type(max_basisnum) + real(kind=wp), intent(out) :: recv_r_nodes(3,max_basisnum, ng_max_node), recv_energy(max_basisnum, ng_max_node), & + recv_force(3, max_basisnum, ng_max_node), recv_virial(3,3, max_basisnum, ng_max_node), & + recv_vel(3, max_basisnum, ng_max_node) + + integer i, j, k, inod, ibasis + + !Calculate recv counts + !First pack the recv_int variable + recv_etype = recv_int(1) + recv_tag_ele = recv_int(2) + recv_esize = recv_int(3) + recv_basis_num = recv_int(4) + mask = recv_int(5) + j = 5+recv_basis_num + recv_basis_type(1:recv_basis_num) = recv_int(6:j) + + !Now pack the recv_real variable + j = 1 + do inod = 1, ng_node(recv_etype) + do ibasis = 1, recv_basis_num + recv_energy(ibasis,inod) = recv_real(j) + j=j+1 + !Apply periodic boundaries if needed + do i =1, 3 + recv_r_nodes(i, ibasis, inod) = recv_real(j) + j = j+1 + end do + do i = 1, 3 + recv_force(i, ibasis, inod) = recv_real(j) + j = j+1 + end do + do i = 1, 3 + do k = 1,3 + recv_virial(i,k,ibasis,inod) = recv_real(j) + j=j+1 + end do + end do + if(need_vel) then + do i = 1, 3 + recv_vel(i, ibasis, inod) = recv_real(j) + j=j+1 + end do + end if + end do + end do + + return + end subroutine unpack_ele_dump + + subroutine pack_atom_dump(send_tag, send_type, mask, send_r, send_energy, send_force, send_virial, send_vel, & + send_int, send_real) + !Pack the atom information into the dump + !This subroutine packs all data for one atom into 2 arrays, a send_int array and a send_real array + !containing all information needed for the atoms + + integer, intent(in) :: send_tag, send_type, mask + real(kind=wp), intent(in) :: send_r(3), send_energy, send_force(3), send_virial(3,3), send_vel(3) + + integer, dimension(:), intent(out) :: send_int + real(kind=wp), dimension(:), intent(out) :: send_real + + integer i, j, k + + !Initialize variables + send_int(:) = 0 + send_real(:) = 0.0_wp + + !First pack the send_int variable + send_int(1) = send_tag + send_int(2) = send_type + send_int(3) = mask + + !Now pack the send_real variable + j = 1 + send_real(j:j+2) = send_r(:) + j =j+3 + send_real(j) = send_energy + j = j+1 + send_real(j:j+2) = send_force(:) + j = j+3 + do i = 1, 3 + do k = 1, 3 + send_real(j) = send_virial(i,k) + j = j+1 + end do + end do + + if(need_vel) send_real(j:j+2) = send_vel + return + end subroutine pack_atom_dump + + subroutine unpack_atom_dump(recv_int, recv_real, recv_tag, recv_type, mask, recv_r, recv_energy, recv_force, recv_virial, & + recv_vel) + !Unpack the atom information into the dump + !This subroutine unpacks all data for one atom from 2 arrays, a send_int array and a send_real array + integer, dimension(:), intent(in) :: recv_int + real(kind=wp), dimension(:), intent(in) :: recv_real + + integer, intent(out) :: recv_tag, recv_type , mask + real(kind=wp), intent(out) :: recv_r(3), recv_energy, recv_force(3), recv_virial(3,3), recv_vel(3) + + integer i, j, k + !Initialize variables + !First pack the recv_int variable + recv_tag = recv_int(1) + recv_type = recv_int(2) + mask = recv_int(3) + + !Now pack the recv_real variable + j = 1 + recv_r(:) = recv_real(j:j+2) + j =j+3 + recv_energy = recv_real(j) + j = j+1 + recv_force = recv_real(j:j+2) + j = j+3 + do i = 1, 3 + do k = 1, 3 + recv_virial(i,k) = recv_real(j) + j = j+1 + end do + end do + + if(need_vel) recv_vel = recv_real(j:j+2) + return + end subroutine unpack_atom_dump + + subroutine pack_ele_atomap(send_etype, send_tag_ele, send_esize, send_basis_num, send_basis_type, & + send_pb_node, send_r_nodes, send_tag_atomap, send_r_atomap, send_int, send_real, send_vel_nodes) + + !This subroutine packs the element and interpolated atom information into one send array + !NOTE: For this code to interface properly with the unpack ele_atomap code, the atomaps should be arranged such that + !All basis atoms belonging to one lattice point should be grouped together. So if we have 2 basis atoms + !of different types, index 1 should be type 1, index 2 should be type 2, index 3 should be type 1, and + !index 4 should be type 4. That way we can get the types from the basis_type variable + + integer, intent(in) :: send_etype, send_tag_ele, send_esize, send_basis_num, send_basis_type(max_basisnum),& + send_pb_node(3, max_basisnum ,ng_max_node), send_tag_atomap(atomap_max) + real(kind=wp), intent(in) :: send_r_nodes(3,max_basisnum, ng_max_node), send_r_atomap(3,atomap_max) + + integer, dimension(:), intent(out) :: send_int + real(kind=wp), dimension(:), intent(out) :: send_real + real(kind=wp), intent(in), optional :: send_vel_nodes(3, max_basisnum, ng_max_node) + + integer :: ia, i, j, inod, ibasis, send_atomap_num + + logical :: pack_vel + pack_vel=.false. + if (present(send_vel_nodes)) pack_vel = .true. + + !Initialize variables + send_int(:) = 0 + send_real(:) = 0.0_wp + send_atomap_num = send_basis_num*(send_esize+1)**3 + + !Calculate send counts + !First pack the send_int variable + send_int(1) = send_etype + send_int(2) = send_tag_ele + send_int(3) = send_esize + send_int(4) = send_basis_num + j = 4+send_basis_num + send_int(5:j) = send_basis_type(1:send_basis_num) + i=1 + if(periodic) then + do inod = 1, ng_node(send_etype) + do ibasis =1, send_basis_num + do i = 1,3 + j=j+1 + send_int(j) = send_pb_node(i,ibasis,inod) + end do + end do + end do + end if + !Now add the tag_atomap + send_int(j+1:j+send_atomap_num) = send_tag_atomap(1:send_atomap_num) + + !Now pack the send_real variable + j = 1 + do inod = 1,ng_node(send_etype) + do ibasis = 1, send_basis_num + do i =1, 3 + send_real(j) = send_r_nodes(i, ibasis, inod) + j = j+1 + if(pack_vel) then + send_real(j) = send_vel_nodes(i, ibasis,inod) + j = j+1 + end if + end do + end do + end do + + !Now add r_atomap + do ia = 1, send_atomap_num + do i = 1, 3 + send_real(j) = send_r_atomap(i,ia) + j=j+1 + end do + end do + + return + end subroutine pack_ele_atomap + + subroutine unpack_ele_atomap(recv_int, recv_real, recv_etype, recv_tag_ele, recv_esize, recv_basis_num, & + recv_basis_type, recv_pb_node, recv_r_nodes, recv_tag_atomap, & + recv_type_atomap, recv_r_atomap, recv_vel_nodes) + !This subroutine unpacks the arrays that are communicated + + integer, dimension(4+max_basisnum+(1+3*max_basisnum)*ng_max_node + atomap_max), intent(in) :: recv_int + real(kind=wp), dimension(2*3*max_basisnum*ng_max_node + 3*atomap_max), intent(in) :: recv_real + + integer, intent(out) :: recv_etype, recv_tag_ele, recv_esize, recv_basis_num, recv_basis_type(max_basisnum),& + recv_pb_node(3, max_basisnum, ng_max_node), & + recv_type_atomap(atomap_max), recv_tag_atomap(atomap_max) + real(kind=wp), intent(out) :: recv_r_nodes(3,max_basisnum, ng_max_node), recv_r_atomap(3,atomap_max) + + real(kind=wp), intent(out), optional :: recv_vel_nodes(3, max_basisnum, ng_max_node) + + + integer :: ia, i, j, inod, ibasis, recv_atomap_num + + logical :: pack_vel + pack_vel = .false. + if (present(recv_vel_nodes)) pack_vel = .true. + + recv_basis_type(:) = 0 + recv_pb_node(:,:,:) = 0 + recv_r_nodes(:,:,:) = 0.0_wp + if(pack_vel) recv_vel_nodes(:,:,:) = 0.0_wp + + !First unpack the recv_int variable + recv_etype = recv_int(1) + recv_tag_ele = recv_int(2) + recv_esize = recv_int(3) + recv_basis_num = recv_int(4) + j = 4+recv_basis_num + recv_basis_type(1:recv_basis_num) = recv_int(5:j) + if(periodic) then + do inod = 1, ng_node(recv_etype) + do ibasis =1, recv_basis_num + do i =1,3 + j=j+1 + recv_pb_node(i, ibasis, inod) = recv_int(j) + end do + end do + end do + end if + + !Now unpack the interpolated atom tags + recv_atomap_num = recv_basis_num*(recv_esize+1)**3 + recv_tag_atomap(1:recv_atomap_num) = recv_int(j+1:j+recv_atomap_num) + + !Now unpack the recv_real variable + j = 1 + do inod = 1,ng_node(recv_etype) + do ibasis = 1, recv_basis_num + do i =1, 3 + recv_r_nodes(i, ibasis, inod) = recv_real(j) + j = j+1 + if(pack_vel) then + recv_vel_nodes(i, ibasis,inod) = recv_real(j) + j = j+1 + end if + end do + end do + end do + + !Now unpack r_atomap and assign the atom types + do ia = 1, recv_atomap_num + recv_type_atomap(ia) = recv_basis_type(ia-int((ia-1)/recv_basis_num)*recv_basis_num) + do i = 1, 3 + recv_r_atomap(i,ia) = recv_real(j) + j = j+1 + end do + end do + return + end subroutine unpack_ele_atomap + + subroutine pack_atoms(num_pack, tag_buff, type_buff, r_buff, send_int, send_real, vel_buff) + !This subroutine packs the atom information into send_int and send_real + integer, intent(in) :: num_pack + integer, dimension(num_pack), intent(in) :: tag_buff, type_buff + real(kind=wp), dimension(3,num_pack), intent(in) :: r_buff + real(kind=wp), intent(in), optional :: vel_buff(3, num_pack) + integer, dimension(2*num_pack), intent(out) :: send_int + real(kind=wp), dimension(2*3*num_pack), intent(out) :: send_real + + integer :: i, j, k + logical :: pack_vel + + pack_vel = .false. + if (present(vel_buff)) pack_vel = .true. + + !Check to make sure vel_buff is provided if need_vel + !First pack send_int + send_int(1:num_pack) = tag_buff + send_int(num_pack+1:) = type_buff + + !Now pack send_real + k = 1 + do j = 1, num_pack + do i = 1,3 + send_real(k) = r_buff(i,j) + k=k+1 + if(pack_vel) then + send_real(k) = vel_buff(i,j) + k=k+1 + end if + end do + end do + + return + + end subroutine pack_atoms + + subroutine unpack_atoms(num_pack, recv_int, recv_real, tag_buff, type_buff, r_buff, vel_buff) + !This subroutine packs the atom information into recv_int and recv_real + integer, intent(in) :: num_pack + integer, dimension(2*num_pack), intent(in) :: recv_int + real(kind=wp), dimension(2*3*num_pack), intent(in) :: recv_real + integer, dimension(num_pack), intent(out) :: tag_buff, type_buff + real(kind=wp), dimension(3,num_pack), intent(out) :: r_buff + real(kind=wp), dimension(3,num_pack), intent(out), optional :: vel_buff + + integer :: i, j, k + logical :: pack_vel + + !Figure out if we are packing the velocity + pack_vel = .false. + if (present(vel_buff)) pack_vel = .true. + + !First pack recv_int + tag_buff=recv_int(1:num_pack) + type_buff=recv_int(num_pack+1:) + + !Now pack recv_real + k = 1 + do j = 1, num_pack + do i = 1,3 + r_buff(i,j)=recv_real(k) + k=k+1 + if(pack_vel) then + vel_buff(i,j)=recv_real(k) + k=k+1 + end if + end do + end do + + return + + end subroutine unpack_atoms + +end module comms diff --git a/src/compute.f90 b/src/compute.f90 new file mode 100644 index 0000000..1a4f43a --- /dev/null +++ b/src/compute.f90 @@ -0,0 +1,255 @@ +module compute + + !This module contains all of the compute subroutines + use mpi + use parameters + use comms + use atom_types + use forces + use elements + use group + use box + + implicit none + + public + contains + + function compute_pe(group, arg1) + !This function returns the summed energy for all atoms and elements, for elements it adds the contribution for all + !interpolated atoms + integer, intent(in) :: group + logical, intent(in), optional :: arg1 + real(kind=wp) :: compute_pe + integer :: ia, ie, ip, ibasis, inod + real(kind=wp) :: pe, peall + logical :: weigh_ele + + if(present(arg1)) then + weigh_ele=arg1 + else + weigh_ele=.true. + end if + + pe = 0 + do ia = 1, atom_num_l + if (btest(a_mask(ia), group)) then + pe = pe + energy_atom(ia) + end if + end do + + if(weigh_ele) then + do ie = 1, ele_num_l + if(btest(e_mask(ie), group).and.who_has_ele(ie)) then + do inod =1, ng_node(etype(ie)) + ip = cg_node(inod, ie) + do ibasis=1, basis_num(ie) + pe = pe + energy_eq(ibasis, ip)*mass_mat_coeff(size_ele(ie), etype(ie)) + end do + end do + end if + end do + else + do ie = 1, ele_num_l + if(btest(e_mask(ie), group).and.who_has_ele(ie)) then + do inod =1, ng_node(etype(ie)) + ip = cg_node(inod, ie) + do ibasis=1, basis_num(ie) + pe = pe + energy_eq(ibasis, ip) + end do + end do + end if + end do + end if + + call mpi_allreduce(pe, peall, 1, mpi_wp, mpi_sum, world, ierr) + compute_pe = peall + return + end function compute_pe + + function compute_fnorm(group) + !This function returns the global norm of the force vector for all atoms and elements, + !for elements it adds the contribution for all interpolated atoms + integer, intent(in) :: group + real(kind=wp) :: compute_fnorm + integer :: ia, ie, ip, ibasis, inod + real(kind=wp) :: fme, f(3), fall + + fme = 0.0_wp + do ia = 1, atom_num_l + if (btest(a_mask(ia), group)) then + fme = fme + force_atom(1,ia)*force_atom(1,ia) + force_atom(2, ia)*force_atom(2,ia) & + +force_atom(3,ia)*force_atom(3,ia) + end if + end do + + do ie = 1, ele_num_l + if(btest(e_mask(ie), group).and.who_has_ele(ie)) then + do inod =1, ng_node(etype(ie)) + ip = cg_node(inod, ie) + do ibasis=1, basis_num(ie) + f = force_eq(:,ibasis, ip) + fme = fme + (f(1)*f(1) + f(2)*f(2) + f(3)*f(3)) + end do + end do + end if + end do + + call mpi_allreduce(fme, fall, 1, mpi_wp, mpi_sum, world, ierr) + compute_fnorm = sqrt(fall) + return + end function compute_fnorm + + function compute_avgvel(group) + !This function returns the average velocity of the system + integer, intent(in) :: group + real(kind=wp) :: compute_avgvel(3) + integer :: ia, ie, ip, ibasis, inod + real(kind=wp) :: vme(3), vall(3) + + vme=0.0_wp + do ia =1, atom_num_l + if (btest(a_mask(ia), group)) then + vme = vme + vel_atom(:,ia) + end if + end do + + do ie = 1, ele_num_l + if(btest(e_mask(ie), group).and.who_has_ele(ie)) then + do inod =1, ng_node(etype(ie)) + ip = cg_node(inod, ie) + do ibasis=1, basis_num(ie) + vme = vme + vel(:, ibasis,ip) + end do + end do + end if + end do + + call mpi_allreduce(vme(:), vall(:), 3, mpi_wp, mpi_sum, world, ierr) + + compute_avgvel = vall/(group_counts(1,group)+group_counts(3,group)) + + return + end function compute_avgvel + + function compute_ke(group, in_avg_vel) + !This subroutine calculates the kinetic energy of the group + integer, intent(in) :: group + real(kind=wp), intent(in), optional :: in_avg_vel(3) + + real(kind=wp) :: compute_ke(2) + integer :: ia, j, ie, ip, ibasis, inod + real(kind=wp) :: kme(2), avgvel(3), kall(2) + + + if(present(in_avg_vel)) then + avgvel = in_avg_vel + else + avgvel = compute_avgvel(group) + end if + + kme = 0.0_wp + do ia = 1, atom_num_l + if (btest(a_mask(ia), group)) then + do j = 1,3 + kme(1) = kme(1) + 0.5_wp*masses(type_atom(ia))*const_motion*(vel_atom(j,ia) - avgvel(j))**2.0_wp + end do + end if + end do + + do ie = 1, ele_num_l + if(btest(e_mask(ie), group).and.who_has_ele(ie)) then + do inod =1, ng_node(etype(ie)) + ip = cg_node(inod, ie) + do ibasis=1, basis_num(ie) + do j = 1,3 + kme(2) = kme(2) + mass_mat_coeff(size_ele(ie), etype(ie))*0.5_wp*masses(basis_type(ibasis,ie)) & + *const_motion*(vel(j,ibasis,ip) - avgvel(j))**2.0_wp + end do + end do + end do + end if + end do + + call mpi_allreduce(kme, kall, 2, mpi_wp, mpi_sum, world, ierr) + + compute_ke = kall + return + end function compute_ke + + function compute_temp(group, in_ke) + !This function calculates the temperature for the current model + integer, intent(in) :: group + real(kind=wp), intent(in), optional :: in_ke(2) + + real(kind=wp) :: compute_temp(3) + real(kind=wp) :: ke(2) + + if(present(in_ke)) then + ke = in_ke + else + ke = compute_ke(group) + end if + + compute_temp = 0 + + compute_temp(1) = (2.0_wp*(ke(1)+ke(2)))/(3*(group_counts(1,group)+group_counts(4,group))*boltzmann) + if (atom_num > 0) compute_temp(2) = (2.0_wp*(ke(1)))/(3*(group_counts(1,group))*boltzmann) + if (ele_num > 0) compute_temp(3) = (2.0_wp*(ke(2)))/(3*group_counts(4,group)*boltzmann) + return + end function compute_temp + + function compute_box_press() + !This calculates the box pressure. Can only be done on the full box which is why it takes no inputs + real(kind=wp) :: compute_box_press(3,3) + + integer :: ia, ibasis, ip, inod, ie, i, j, k + real(kind=wp) :: vir_buffme(9), vir_buffall(9), box_volume + + vir_buffme(:) = 0.0_wp + !Sum virial stress in the atom region + if(atom_num_l > 0) then + do ia = 1, atom_num_l + k = 1 + do j = 1,3 + do i = 1,3 + vir_buffme(k) = vir_buffme(k) - virial_atom(i,j,ia) + k = k+1 + end do + end do + end do + end if + + if(ele_num_l > 0) then + do ie = 1, ele_num_l + if(who_has_ele(ie)) then + do inod = 1, ng_node(etype(ie)) + ip = cg_node(inod,ie) + do ibasis = 1, basis_num(ie) + k = 1 + do j = 1,3 + do i = 1,3 + vir_buffme(k) = vir_buffme(k) - virial_eq(i,j,ibasis,ip)*mass_mat_coeff(size_ele(ie), itype(ie)) + k = k + 1 + end do + end do + end do + end do + end if + end do + end if + + !Now sum the virial over all processors + call mpi_allreduce(vir_buffme, vir_buffall, 9, mpi_wp, mpi_sum, world, ierr) + + box_volume = box_length(1)*box_length(2)*box_length(3) + k = 1 + do j = 1,3 + do i = 1,3 + compute_box_press(i,j) = nktv2p*vir_buffall(k)/box_volume + end do + end do + return + end function compute_box_press + +end module compute diff --git a/src/debug.f90 b/src/debug.f90 new file mode 100644 index 0000000..6870470 --- /dev/null +++ b/src/debug.f90 @@ -0,0 +1,119 @@ +module debug + !This module is used to run various debugging commands which can be used during simulations for additional correctness checks + use parameters + use forces + use elements + use str + + implicit none + + logical, private :: check_ele_flag + logical :: dflag + public + contains + + + subroutine parse_debug(line) + !This subroutine parses the debug command + character(len=*), intent(in) :: line + integer :: i, j, iospara + character(len=read_len) :: tmptxt, commands(10), msg + + j = tok_count(line)-1 + dflag=.true. + + !Initialize flags + check_ele_flag=.false. + + read(line, *, iostat=iospara, iomsg = msg) tmptxt, (commands(i), i =1, j) + + !Now set the debug flags + do i = 1, j + select case(commands(i)) + case('check_ele') + check_ele_flag= .true. + end select + end do + + end subroutine parse_debug + + subroutine run_debug + !This is the main running function which calls all of the sub debug subroutines + if(check_ele_flag) call check_ele + end subroutine run_debug + + subroutine check_ele + !This code goes over every single element and compares the positions among all elements. + !This code is very slow + integer :: ie, ke, inod, i, j, k + real(kind=wp) :: rbuff(3*ng_max_node), rbuffall(3*ng_max_node*pro_num) + real(kind=wp) :: velbuff(3*ng_max_node), velbuffall(3*ng_max_node*pro_num) + logical :: have_ele, have_ele_all(pro_num) + character(len=read_len) :: msg + + rbuff=0.0_wp + velbuff=0.0_wp + !Loop over all global tags + do ke = 1, ele_num + !Loop over all local tags to find the matching element + do ie = 1, ele_num_l + if(ele_glob_id(ie) == ke) exit + end do + + if (ie > ele_num_l) then + have_ele=.false. + rbuff = 0.0_wp + else + have_ele=.true. + do inod =1, ng_node(etype(ie)) + do i = 1, 3 + rbuff(3*(inod-1)+i) = r(i, 1, cg_node(inod,ie)) + if(need_vel) velbuff(3*(inod-1)+i) = vel(i, 1, cg_node(inod,ie)) + end do + end do + end if + + rbuffall=0.0_wp + velbuffall=0.0_wp + !Now communicate this element + call mpi_gather(have_ele, 1, mpi_logical, have_ele_all, 1, mpi_logical, root, world, ierr) + call mpi_gather(rbuff, 3*ng_max_node, mpi_wp, rbuffall, 3*ng_max_node, mpi_wp, root, world, ierr) + if(need_vel) call mpi_gather(velbuff, 3*ng_max_node, mpi_wp, velbuffall, 3*ng_max_node, mpi_wp, root, world, ierr) + + !Now check the elements + if(rank==root) then + if(count(have_ele_all) > 1) then + do i = 1, pro_num + if(have_ele_all(i)) then + do j=1, pro_num + if((j>i) .and. (have_ele_all(j))) then + do k = 1, 3*ng_max_node + if(.not.is_equal(rbuffall((i-1)*ng_max_node*3 + k), & + rbuffall((j-1)*ng_max_node*3 + k))) then + write(msg, *) "Element with global tag ", ke, " does not have matching nodal ", & + "positions for processors ", i-1, " and ", j-1, " with values ", & + rbuffall((i-1)*ng_max_node*3 + k), " and ", & + rbuffall((j-1)*ng_max_node*3 + k) + call misc_error(msg) + + end if + if(need_vel) then + if(.not.is_equal(velbuffall((i-1)*ng_max_node*3 + k), & + velbuffall((j-1)*ng_max_node*3 + k))) then + write(msg, *) "Element with global tag ", ke, " does not have matching nodal ", & + "velocities for processors ", i-1, " and ", j-1, " with values ", & + velbuffall((i-1)*ng_max_node*3 + k), " and ", & + velbuffall((j-1)*ng_max_node*3 + k) + call misc_error(msg) + end if + end if + end do + end if + end do + end if + end do + end if + end if + end do + end subroutine check_ele +end module debug diff --git a/src/deform.f90 b/src/deform.f90 new file mode 100644 index 0000000..f4f80fd --- /dev/null +++ b/src/deform.f90 @@ -0,0 +1,176 @@ +module deform + !This module applies deformations to the box which is then applied to all of the atoms + + use parameters + use box + use elements + use neighbors + use time + use comms + + implicit none + + integer :: deform_num + integer, private, parameter :: max_deform_num = 10 + real(kind=wp),private :: rate(max_deform_num) + integer, private :: ddim(max_deform_num), nnevery(max_deform_num), exclude_gnum(max_deform_num) + + public + contains + + subroutine parse_deform(line) + !Parse the deform command + character(len=*), intent(in) :: line + character(len=read_len) :: tmptxt, deform_dim, msg, excludeg, args(20) + integer :: iospara, i, d, j, now_excludeg + real(kind = dp) :: strain, def + real(kind=dp), allocatable :: r_zero_atom(:, :), r_zero_node(:,:,:) + + + read(line, *, iostat = iospara, iomsg = msg) tmptxt, tmptxt + if(iospara > 0) call read_error(msg, iospara) + select case(tmptxt) + case('now') + now_excludeg=0 + read(line, *, iostat= iospara, iomsg = msg) tmptxt, tmptxt, deform_dim, strain + if(iospara > 0) call read_error(msg, iospara) + j=tok_count(line) + if (j>4) then + read(line, *) args(1:j) + i=5 + do while(i<=j) + select case(args(i)) + case('exclude') + i=i+1 + read(args(i), *) excludeg + i=i+1 + now_excludeg = get_group_index(excludeg) + allocate(r_zero_atom(3,atom_num_l), r_zero_node(3, max_basisnum, node_num_l)) + if(atom_num_l > 0) r_zero_atom = r_atom + if(ele_num_l > 0) r_zero_node = r + write(msg, *) "Excluding group ", trim(adjustl(excludeg)), ' from deform command' + call log_msg(msg) + case default + write(msg, *) "Keyword ", trim(adjustl(args(i))), " is not accepted in neighbor command" + call command_error(msg) + end select + end do + end if + !Now convert the dimension to an integer for use later + select case(deform_dim) + case('x','X') + d = 1 + case('y','Y') + d = 2 + case('z','Z') + d = 3 + end select + !First we convert atoms and nodes to fractional coordinates if necessary + call x2frac + + def = strain*orig_box_length(d) + box_bd(2*d-1) = box_bd(2*d-1) - 0.5_wp*def + box_bd(2*d) = box_bd(2*d) + 0.5_wp*def + box_length(d) = box_bd(2*d) - box_bd(2*d-1) + !Now update proc bds + do i = 1, 3 + pro_length(i) = box_length(i) / num_pro(i) + pro_bd(2*i-1) = box_bd(2*i-1) + grid_coords(i) * pro_length(i) + pro_bd(2*i) = box_bd(2*i-1) + (grid_coords(i) + 1) * pro_length(i) + end do + call processor_bds + !Convert back to real coordinates if we applied a deformation + call frac2x + + if(now_excludeg > 0) then + do i = 1, atom_num_l + if(btest(a_mask(i),now_excludeg)) then + r_atom(:, i)=r_zero_atom(:,i) + end if + end do + do i = 1, ele_num_l + if(btest(e_mask(i), now_excludeg)) then + r(:,:,i) = r_zero_node(:,:,i) + end if + end do + end if + + write(msg, *) "Increasing box bounds in ", trim(adjustl(deform_dim)), " by ", def + call log_msg(msg) + + call update_neighbor(iter, .false.,.true.) + + case default + deform_num=deform_num + 1 + read(line, *, iostat= iospara, iomsg = msg)tmptxt, nnevery(deform_num), deform_dim, rate(deform_num) + if(iospara > 0) call read_error(msg, iospara) + + !Now convert the dimension to an integer for use later + select case(deform_dim) + case('x','X') + ddim(deform_num) = 1 + case('y','Y') + ddim(deform_num) = 2 + case('z','Z') + ddim(deform_num) = 3 + end select + end select + + return + + end subroutine parse_deform + + subroutine deform_box + !This subroutine updates the box boundaries and then rescales the atomic positions + + integer :: i + real(kind=wp) :: def + logical :: convert + character(len=1) :: def_dim(3) + character(len=read_len) :: msg + + def_dim = ['x','y','z'] + + + !First update the box length + !Now update the box boundaries + convert = .false. + do i = 1, deform_num + + if(mod(iter, nnevery(i)) == 0) then + !First c + if(.not.convert) then + !First we convert atoms and nodes to factional coordinates if necessary + call x2frac + convert = .true. + end if + + def = time_step*rate(i)*orig_box_length(ddim(i)) + box_bd(2*ddim(i)-1) = box_bd(2*ddim(i)-1) - 0.5_wp*def + box_bd(2*(ddim(i))) = box_bd(2*(ddim(i))) + 0.5_wp*def + box_length(ddim(i)) = box_bd(2*ddim(i)) - box_bd(2*ddim(i)-1) + + write(msg, *) "Increasing box bounds in ", trim(adjustl(def_dim(ddim(1)))), " by ", def, " for deform ", i + call log_msg(msg) + end if + end do + + + !Now update proc bds + if(convert) then + do i = 1, 3 + pro_length(i) = box_length(i) / num_pro(i) + pro_bd(2*i-1) = box_bd(2*i-1) + grid_coords(i) * pro_length(i) + pro_bd(2*i) = box_bd(2*i-1) + (grid_coords(i) + 1) * pro_length(i) + end do + call processor_bds + + !Convert back to real coordinates if we applied a deformation + call frac2x + + call update_neighbor(iter, .false.,.true.) + end if + return + end subroutine deform_box + +end module deform diff --git a/src/displace.f90 b/src/displace.f90 new file mode 100644 index 0000000..9674ec3 --- /dev/null +++ b/src/displace.f90 @@ -0,0 +1,140 @@ +module displace + !This module contains subroutines which apply displacements to atoms and elements + use forces + use parameters + use elements + use group + use str + use time + use neighbors + + implicit none + + public + contains + subroutine displace_points(line) + !This subroutine parses the displace command + character(len = *), intent(in) :: line + character(len=read_len) :: tmptxt, g, txtvec(3) + integer :: iospara, i, ia, ip, ibasis, ie, gnum + real(kind=wp) :: disp(3) + + if((ele_num == 0).and.(atom_num == 0)) call misc_error("Model must be read in before calling displace command") + + read(line, *, iostat=iospara) tmptxt, g, (txtvec(i), i = 1, 3) + if(iospara > 0) call read_error("Failure to read displace command", iospara) + + !Get group number + gnum = get_group_index(g) + if(gnum == 0) then + call misc_error("Group "//trim(adjustl(g))// " in displace command has not been defined. "// & + "Please define group before use") + end if + + !Read displacement vector + do i =1, 3 + read(txtvec(i), *, iostat = iospara) disp(i) + if(iospara > 0) call read_error("Failure to read disp vector component "//trim(adjustl(txtvec(i))), iospara) + end do + + !Now displace points + do ia = 1, atom_num_l + if(btest(a_mask(ia), gnum)) then + r_atom(:,ia) = r_atom(:, ia) + disp + end if + end do + do ip = 1, node_num_l + ie = node_cg(ip) + if(btest(e_mask(ie), gnum)) then + do ibasis = 1, basis_num(ie) + r(:, ibasis, ip) = r(:, ibasis, ip) + disp + end do + end if + end do + + call update_neighbor(iter, .false., .true.) + + end subroutine displace_points + + subroutine ramp_displace(line) + !This subroutine parses the displace command + character(len = *), intent(in) :: line + character(len=read_len) :: tmptxt, g, txtvec(4), rdim, ddim + integer :: iospara, ia, ip, ibasis, ie, gnum, j, k + real(kind=wp) :: rhi, rlo, disp, hi, lo, frac + + if((ele_num == 0).and.(atom_num == 0)) call misc_error("Model must be read in before calling ramp command") + + read(line, *, iostat=iospara) tmptxt, g, ddim, txtvec(1), txtvec(2), rdim, rlo, rhi + if(iospara > 0) call read_error("Failure to read ramp command", iospara) + + !Get the ramp dimensions + select case(ddim) + case('x','X') + j = 1 + case('y','Y') + j = 2 + case('z','Z') + j = 3 + end select + + select case(rdim) + case('x','X') + k = 1 + case('y','Y') + k = 2 + case('z','Z') + k = 3 + end select + + !Get the displacement magnitude + disp = rhi-rlo + + !Parse the displacement hi and lo positions + call parse_pos(j, txtvec(1), lo) + call parse_pos(j, txtvec(2), hi) + + !Get group number + gnum = get_group_index(g) + if(gnum == 0) then + call misc_error("Group "//trim(adjustl(g))// " in ramp command has not been defined. "// & + "Please define group before use") + end if + + !Now displace points + do ia = 1, atom_num_l + if(btest(a_mask(ia), gnum)) then + if(r_atom(j,ia) > hi) then + frac = 1.0_wp + else if(r_atom(j,ia) < lo) then + frac = 0.0_wp + else + frac = (r_atom(j,ia) - lo)/(hi-lo) + end if + r_atom(k,ia) = r_atom(k, ia) + frac*disp + rlo + end if + end do + do ip = 1, node_num_l + ie = node_cg(ip) + if(btest(e_mask(ie), gnum)) then + do ibasis = 1, basis_num(ie) + if(r(j,ibasis,ip) > hi) then + frac = 1.0_wp + else if(r(j,ibasis,ip) < lo) then + frac = 0.0_wp + else + frac = (r(j, ibasis, ip) - lo)/(hi-lo) + end if + r(k, ibasis, ip) = r(k, ibasis, ip) + frac*disp + rlo + end do + end if + end do + + if(nei_init) then + call update_neighbor(iter, .false., .true.) + end if + + return + end subroutine ramp_displace + +end module displace diff --git a/src/dump.f90 b/src/dump.f90 new file mode 100644 index 0000000..6d8e5c2 --- /dev/null +++ b/src/dump.f90 @@ -0,0 +1,831 @@ +module dump + ! + use atom_types + use parameters + use comms + use elements + use box + use errors + use forces + use math + use group + use time + + implicit none + + !Dump command parameters + integer, parameter :: max_dumps = 20 + integer, private :: dump_num + integer, dimension(max_dumps), private :: dump_every, dump_type, dump_group + character(len = read_len), dimension(max_dumps), private :: base_filename, dump_name + + + !Gather arrays for atoms + integer, parameter :: max_send_count = 1024 + integer, allocatable, save :: tag_atom_gather(:), type_atom_gather(:), a_mask_gather(:) + real(kind=wp), allocatable, save :: r_atom_gather(:,:), energy_atom_gather(:), force_atom_gather(:,:), & + virial_atom_gather(:,:,:), vel_atom_gather(:,:) + !Gather arrays for elements + integer, allocatable, save :: tag_ele_gather(:), size_ele_gather(:), etype_gather(:), & + basis_num_gather(:), basis_type_gather(:,:), e_mask_gather(:) + real(kind=wp), allocatable, save :: r_gather(:,:,:), energy_gather(:,:), force_gather(:,:,:), virial_gather(:,:,:,:), & + vel_gather(:,:,:) + + !Gather arrays for interpolated atoms + integer, allocatable :: atomap_mask_gather(:), type_atomap_gather(:) + real(kind = wp), allocatable, save :: r_atomap_gather(:,:) + + + public + contains + + subroutine dump_defaults + dump_every = huge(1) + dump_num = 0 + end subroutine dump_defaults + + subroutine parse_dump(line) + !Parse the dump command + character(len = *), intent(in) :: line + character(len = read_len) :: label, tmptxt, msg, dtype, dgroup + integer :: iospara, ppos + + dump_num = dump_num + 1 + read(line,*, iostat=iospara) label, dump_name(dump_num), dgroup, dtype, dump_every(dump_num), base_filename(dump_num) + if(iospara > 0) call read_error("Failure to read dump command", iospara) + + !Check to make sure dump file format is correct + select case(dtype) + case('out') + dump_type(dump_num) = 1 + case('lmp') + dump_type(dump_num) = 2 + case default + write(tmptxt, *) "dump file format ", trim(adjustl(dtype)), " is not accepted" + call command_error(tmptxt) + end select + + write(msg, *) "Writing output to ", trim(adjustl(base_filename(dump_num))), " using ", trim(adjustl(dtype)), " file format" + call log_msg(msg) + + !Now get rid of the extension on the filename + ppos = scan(base_filename(dump_num), ".") + if(ppos > 0) then + base_filename(dump_num)(ppos:ppos+3) = '' + end if + + !Parse the dump group + dump_group(dump_num) = get_group_index(dgroup) + if(dump_group(dump_num) > group_num) then + write(msg, *) "Group ", dgroup, " has not been created, please select an existing group to dump" + call command_error(msg) + end if + + !Now if dump every = 0 set it to a very large number + if (dump_every(dump_num) == 0) then + dump_every(dump_num) = (huge(1)) + end if + + return + end subroutine parse_dump + + subroutine parse_undump(line) + !This deletes a dump command from memory + character(len=*) :: line + + integer :: i, iospara + character(len=read_len) :: undump_name, msg, tmp + + !Parse the command to get the undump name + read(line, *, iostat = iospara, iomsg=msg) tmp, undump_name + if(iospara > 0) call read_error(msg, iospara) + + !Now loop over all dump commands and delete the one that matches the name + do i = 1, dump_num + if(dump_name(i) == undump_name) then + dump_name(i:dump_num-1) = dump_name(i+1:dump_num) + dump_every(i:dump_num-1) = dump_every(i+1:dump_num) + dump_type(i:dump_num-1) = dump_type(i+1:dump_num) + dump_group(i:dump_num-1) = dump_group(i+1:dump_num) + base_filename(i:dump_num-1) = base_filename(i+1:dump_num) + dump_num = dump_num -1 + exit + end if + end do + return + end subroutine parse_undump + + subroutine parse_write_out(line) + !Parse the dump command + integer :: type_for_dump, dg + character(len = *), intent(in) :: line + character(len = read_len) :: label, tmptxt, msg, dtype, dgroup, fname + integer :: iospara, ppos + + fname='' + read(line,*, iostat=iospara) label, dgroup, dtype, fname + if(iospara > 0) call read_error("Failure to read write_out command", iospara) + if(fname=='') call misc_error('Missing filename for write_out command' ) + + !Check to make sure dump file format is correct + select case(dtype) + case('out') + type_for_dump=1 + case('lmp') + type_for_dump=2 + case default + write(tmptxt, *) "dump file format ", trim(adjustl(dtype)), " is not accepted" + call command_error(tmptxt) + end select + + write(msg, *) "Writing output to ", trim(adjustl(fname)), " using ", trim(adjustl(dtype)), " file format" + call log_msg(msg) + + !Parse the dump group + dg = get_group_index(dgroup) + if(dg > group_num) then + write(msg, *) "Group ", dgroup, " has not been created, please select an existing group to dump" + call command_error(msg) + end if + + !Now write it out + if(atom_num > 0) call gather_at + if((ele_num > 0).and.(type_for_dump == 1)) call gather_cg + if((ele_num > 0).and.(type_for_dump == 2)) call gather_atomap + !Loop over all dumps and check to see if any need to be written + if(rank == root) then + select case (type_for_dump) + case(1) + call write_pycac(iter, trim(adjustl(fname)), dg) + case(2) + call write_lmp(trim(adjustl(fname)), dg) + end select + end if + + call dealloc_gather + + return + end subroutine parse_write_out + + subroutine write_dump(timestep, force_dump) + !This subroutine converts the timestep to the filename and writes the correctly formatted dump file + integer, intent(in) :: timestep + logical, intent(in), optional :: force_dump + + integer :: i + character(len = read_len) :: fname + logical :: gathered, want_dump + + if(present(force_dump)) then + want_dump = force_dump + else + want_dump = .false. + end if + + gathered = .false. + do i = 1, dump_num + if((mod(timestep, dump_every(i)) == 0).or.want_dump) then + !Gather arrays if needed + if(.not.gathered) then + if(atom_num > 0) call gather_at + if((ele_num > 0).and.any(dump_type == 1)) call gather_cg + if((ele_num > 0).and.any(dump_type == 2)) call gather_atomap + gathered = .true. + end if + !Loop over all dumps and check to see if any need to be written + write(fname, *) iter + + if(rank == root) then + select case (dump_type(i)) + case(1) + call write_pycac(iter, trim(adjustl(base_filename(i)))//trim(adjustl(fname))//".out", dump_group(i)) + case(2) + call write_lmp(trim(adjustl(base_filename(i)))//trim(adjustl(fname))//".lmp", dump_group(i)) + end select + end if + end if + end do + + if(gathered) call dealloc_gather + + end subroutine write_dump + + subroutine dealloc_gather + !Deallocate gather arrays when no longer needed + if(allocated(tag_ele_gather)) then + deallocate(tag_ele_gather, size_ele_gather, etype_gather, basis_num_gather, basis_type_gather, e_mask_gather, & + r_gather, energy_gather, force_gather, virial_gather, stat=allostat) + if(allostat > 0) call alloc_error("Failure to deallocate cg gather arrays", allostat) + if(need_vel) deallocate(vel_gather) + end if + + if(allocated(tag_atom_gather)) then + deallocate(tag_atom_gather, type_atom_gather, a_mask_gather, r_atom_gather, energy_atom_gather, & + force_atom_gather, virial_atom_gather, stat = allostat) + if(allostat > 0) call alloc_error("Failure to deallocate at gather arrays", allostat) + if(need_vel) deallocate(vel_atom_gather) + + if(allocated(type_atomap_gather)) then + deallocate(r_atomap_gather, type_atomap_gather, atomap_mask_gather, stat = allostat) + if(allostat > 0) call alloc_error("Failure to deallocate atomap gather arrays", allostat) + end if + end if + end subroutine dealloc_gather + + subroutine gather_cg + integer :: n_ints, n_reals, ntot_ints, ntot_reals, packet_num, ie, ip, ireal, iint, & + n, je, jp, pro, inod, i, pb_in(3,max_basisnum, ng_max_node) + real(kind=wp) :: r_nodes(3, max_basisnum, ng_max_node), force_nodes(3, max_basisnum, ng_max_node), & + eng_nodes(max_basisnum, ng_max_node), virial_nodes(3,3,max_basisnum,ng_max_node),& + vel_nodes(3, max_basisnum, ng_max_node) + integer, allocatable :: gather_ints(:), send_ints(:) + real(kind=wp), allocatable :: gather_reals(:), send_reals(:) + + !Allocate gather arrays if necessary + if(rank == root) then + allocate(tag_ele_gather(ele_num), size_ele_gather(ele_num), etype_gather(ele_num), e_mask_gather(ele_num), & + basis_num_gather(ele_num), basis_type_gather(max_basisnum, ele_num), & + r_gather(3, max_basisnum, node_num), energy_gather(max_basisnum, node_num), & + force_gather(3, max_basisnum, node_num), virial_gather(3,3,max_basisnum,node_num), stat=allostat) + if (allostat > 0) call alloc_error("Failure to allocate gather arrays", allostat) + if(need_vel) allocate(vel_gather(3, max_basisnum, node_num), stat=allostat) + if (allostat > 0) call alloc_error("Failure to allocate vel gather arrays", allostat) + + end if + + !Now allocate the send_arrays + n_ints = 5+max_basisnum + ntot_ints = max_send_count*n_ints + + if(need_vel) then + n_reals = 22*max_basisnum*ng_max_node + else + n_reals = 19*max_basisnum*ng_max_node + end if + ntot_reals = max_send_count*n_reals + + allocate(send_ints(ntot_ints), send_reals(ntot_reals), stat = allostat) + if (allostat > 0 ) call alloc_error("Failure to allocate send arrays for cg comm in gather_in_root", allostat) + !Only allocate gather arrays on root + if(rank == root) then + allocate(gather_ints(pro_num*ntot_ints), gather_reals(pro_num*ntot_reals)) + if (allostat > 0 ) call alloc_error("Failure to allocate gather arrays for cg comm in gather_cg", allostat) + else + allocate(gather_ints(0), gather_reals(0)) + if (allostat > 0 ) call alloc_error("Failure to allocate gather arrays for cg comm in gather_cg", allostat) + end if + + !Now build the local send buff array + vel_nodes=0.0_wp + packet_num = 0 + ip = 0 + ie = 0 + je = 0 + jp = 0 + iint = 0 + ireal = 0 + send_ints(:) = 0 + send_reals(:) = 0.0_wp + pb_in = 0 + + if (rank == root) then + gather_ints(:) = 0 + gather_reals(:) = 0.0_wp + end if + do while (ie < ele_num) + if( (je == ele_num_l).or.(packet_num == max_send_count) ) then + !If we have sent/packed all of our elements or if this packet is equal to send_counts then we communicate + !Gather integer data + call mpi_gather(send_ints, ntot_ints, mpi_integer, gather_ints, ntot_ints, mpi_integer, root, world, ierr) + !Gather real data + call mpi_gather(send_reals, ntot_reals, mpi_wp, gather_reals, ntot_reals, mpi_wp, root, world, ierr) + !If root then loop over all received values and unpack + if (rank == root) then + outerloop:do pro= 1, pro_num + innerloop:do i = 1, max_send_count + !Index the start point for the current element in the array + ireal=n_reals*((pro-1)*max_send_count + (i-1)) + iint = n_ints*((pro-1)*max_send_count + (i-1)) + + !If this buffer in the array is 0 then we move it ahead to where the next processor beings + if (ie == ele_num) then + exit outerloop + !Exit if ie == ele_num + else if(gather_ints(iint+1) == 0) then + exit innerloop + end if + + ie = ie + 1 + + !Unpack the received buffer for the current element + call unpack_ele_dump(gather_ints(iint+1:iint+n_ints), gather_reals(ireal+1:ireal+n_reals), & + etype_gather(ie), tag_ele_gather(ie), size_ele_gather(ie), e_mask_gather(ie), & + basis_num_gather(ie), basis_type_gather(:,ie), r_nodes, eng_nodes, & + force_nodes, virial_nodes, vel_nodes) + + !Set nodal quantities + n = ng_node(etype_gather(ie)) + r_gather(:,:,ip+1:ip+n) = r_nodes(:,:,1:n) + energy_gather(:,ip+1:ip+n) = eng_nodes(:,1:n) + force_gather(:,:,ip+1:ip+n) = force_nodes(:,:,1:n) + virial_gather(:,:,:,ip+1:ip+n) = virial_nodes(:,:, :,1:n) + + if(need_vel) vel_gather(:,:,ip+1:ip+n) = vel_nodes(:,:,1:n) + + ip = ip + n + end do innerloop + end do outerloop + end if + + !broadcast gathered num to see if we need to continue in the loop + call mpi_bcast(ie, 1, mpi_int, root, world, ierr) + + !gathered_num can't be more than ele_num + if(ie > ele_num) then + call misc_error("Gathered_num can't be larger than ele_num ") + end if + !Zero out arrays to reset for next round of communications + packet_num = 0 + send_ints(:) = 0 + send_reals(:) = 0.0_wp + ireal = 0 + iint = 0 + else + !We aren't ready to communicate so keep packing + je = je+1 + + if(je > ele_num_l) call misc_error("je greater than ele_num_l in gather_cg") + if(who_has_ele(je)) then + !get the node positions + do inod = 1, ng_node(etype(je)) + jp = cg_node(inod, je) + r_nodes(:,:,inod) = r(:,:,jp) + eng_nodes(:,inod) = energy_eq(:,jp) + force_nodes(:,:,inod) = force_eq(:, :, jp) + virial_nodes(:,:,:,inod) = virial_eq(:,:,:,jp) + if(periodic) then + pb_in(:,:,inod) = pb_node(:,:,jp) + end if + if(need_vel) vel_nodes(:,:,inod) = vel(:,:,jp) + end do + + !Get the start position for the data in the send arrays + ireal=n_reals*(packet_num) + iint = n_ints*(packet_num) + + call pack_ele_dump(etype(je), tag_ele(je), size_ele(je), e_mask(je), basis_num(je), basis_type(:, je), & + pb_in, r_nodes, eng_nodes, force_nodes, virial_nodes, vel_nodes,& + send_ints(iint+1:iint+n_ints), send_reals(ireal+1:ireal+n_reals)) + !Increment the packet num + packet_num = packet_num + 1 + end if + end if + end do + return + end subroutine gather_cg + + subroutine gather_at + !Gather all atoms to root processor + integer :: ia, ja, n_ints, n_reals, ntot_ints, ntot_reals, packet_num, ireal, iint, pro, i + + integer, allocatable :: gather_ints(:), send_ints(:) + real(kind=wp) :: v(3) + real(kind=wp), allocatable :: gather_reals(:), send_reals(:) + + !Allocate gather arrays if necessary + if(rank == root) then + allocate(tag_atom_gather(atom_num), type_atom_gather(atom_num), a_mask_gather(atom_num), r_atom_gather(3, atom_num), & + energy_atom_gather(atom_num), force_atom_gather(3,atom_num), virial_atom_gather(3,3,atom_num), & + stat = allostat) + if (allostat > 0) call alloc_error("Failure to allocate gather at arrays", allostat) + + if(need_vel) allocate(vel_atom_gather(3,atom_num), stat = allostat) + if (allostat > 0) call alloc_error("Failure to allocate vel at arrays", allostat) + + end if + + !Now allocate the send_arrays + n_ints = 3 + ntot_ints = max_send_count*n_ints + + if(need_vel) then + n_reals=22 + else + n_reals = 19 + end if + ntot_reals = max_send_count*n_reals + + allocate(send_ints(ntot_ints), send_reals(ntot_reals), stat = allostat) + if (allostat > 0 ) call alloc_error("Failure to allocate send arrays for cg comm in gather_in_root", allostat) + + !Only allocate gather arrays on root + if(rank == root) then + allocate(gather_ints(pro_num*ntot_ints), gather_reals(pro_num*ntot_reals)) + if (allostat > 0 ) call alloc_error("Failure to allocate gather arrays for cg comm in gather_cg", allostat) + else + allocate(gather_ints(0), gather_reals(0)) + if (allostat > 0 ) call alloc_error("Failure to allocate gather arrays for cg comm in gather_cg", allostat) + end if + + !Now build the local send buff array + packet_num = 0 + iint = 0 + ireal = 0 + ia = 0 + ja = 0 + v(:) = 0.0_wp + send_ints(:) = 0 + send_reals(:) = 0.0_wp + + if (rank == root) then + gather_ints(:) = 0 + gather_reals(:) = 0.0_wp + end if + do while (ia < atom_num) + if( (ja == atom_num_l).or.(packet_num == max_send_count) ) then + !If we have sent/packed all of our atoms or if this packet is equal to send_counts then we communicate + !Gather integer data + call mpi_gather(send_ints, ntot_ints, mpi_integer, gather_ints, ntot_ints, mpi_integer, root, world, ierr) + !Gather real data + call mpi_gather(send_reals, ntot_reals, mpi_wp, gather_reals, ntot_reals, mpi_wp, root, world, ierr) + !If root then loop over all received values and unpack + if (rank == root) then + outerloop:do pro= 1, pro_num + innerloop:do i = 1, max_send_count + !Index the start point for the current element in the array + ireal=n_reals*((pro-1)*max_send_count + (i-1)) + iint = n_ints*((pro-1)*max_send_count + (i-1)) + + !If this buffer in the array is 0 then we move it ahead to where the next processor beings + !Exit if ia == atom_num + if (ia == atom_num) then + exit outerloop + else if(gather_ints(iint+1) == 0) then + exit innerloop + end if + + ia = ia + 1 + + !Unpack the received buffer for the current element + call unpack_atom_dump(gather_ints(iint+1:iint+n_ints), gather_reals(ireal+1:ireal+n_reals), & + tag_atom_gather(ia), type_atom_gather(ia), a_mask_gather(ia), & + r_atom_gather(:,ia), energy_atom_gather(ia), force_atom_gather(:,ia),& + virial_atom_gather(:,:,ia), v(1:3)) + + if(need_vel) vel_atom_gather(:,ia) = v(1:3) + + end do innerloop + end do outerloop + end if + + !broadcast gathered num to see if we need to continue in the loop + call mpi_bcast(ia, 1, mpi_int, root, world, ierr) + + !ia can't be more than atom_num + if(ia > atom_num) then + call misc_error("Gathered_num can't be larger than atom_num ") + end if + !Zero out arrays to reset for next round of communications + packet_num = 0 + send_ints(:) = 0 + send_reals(:) = 0.0_wp + ireal = 0 + iint = 0 + else + !We aren't ready to communicate so keep packing + ja = ja+1 + + if(ja > atom_num_l) call misc_error("ja greater than atom_num_l in gather_cg") + !Get the start position for the data in the send arrays + ireal=n_reals*(packet_num) + iint = n_ints*(packet_num) + + if(need_vel) v(:) = vel_atom(:,ja) + call pack_atom_dump(tag_atom(ja), type_atom(ja), a_mask(ja), r_atom(:,ja), energy_atom(ja), force_atom(:,ja), & + virial_atom(:,:, ja), v, send_ints(iint+1:iint+n_ints), send_reals(ireal+1:ireal+n_reals)) + + !Increment the packet num + packet_num = packet_num + 1 + end if + end do + return + end subroutine gather_at + + + subroutine gather_atomap + !This subroutine gathers atomap information. This is required to get the correct atomistic positions for + !NVT simulations + integer :: ia, ja, ntot_ints, ntot_reals, packet_num, ireal, iint, pro, i, & + atomap_mask(atomap_num_l), ie, iatom, iatomap, virt_count, ibasis + + integer, allocatable :: gather_ints(:), send_ints(:) + real(kind=wp), allocatable :: gather_reals(:), send_reals(:) + + !Allocate gather arrays if necessary + ntot_ints = 2*max_send_count + ntot_reals = 3*max_send_count + + if(rank == root) then + allocate(r_atomap_gather(3, atomap_num), type_atomap_gather(atomap_num), atomap_mask_gather(atomap_num), & + stat = allostat) + if (allostat > 0) call alloc_error("Failure to allocate gather atomap arrays", allostat) + + allocate(gather_ints(pro_num*ntot_ints), gather_reals(pro_num*ntot_reals), stat=allostat) + if(allostat > 0) call alloc_error("Failure to allocate gather account", allostat) + else + allocate(gather_ints(0), gather_reals(0), stat=allostat) + if(allostat > 0) call alloc_error("Failure to allocate gather account", allostat) + + end if + + !Now allocate the send_arrays + allocate(send_ints(ntot_ints), send_reals(ntot_reals), stat = allostat) + if (allostat > 0 ) call alloc_error("Failure to allocate send arrays for cg comm in gather_in_root", allostat) + + !First get the group positions for all atomaps + do ie = 1, ele_num_l + select case(etype(ie)) + case(1,2,3) + virt_count=(size_ele(ie)+1)**3 + end select + do iatom= 1, virt_count + do ibasis = 1, basis_num(ie) + iatomap = cg_atomap(basis_num(ie)*(iatom-1) + ibasis, ie) + if(iatomap /= 0) then + atomap_mask(iatomap) = e_mask(ie) + end if + end do + end do + end do + + + packet_num = 0 + iint = 0 + ireal = 0 + ia = 0 + ja = 0 + send_ints(:) = 0 + send_reals(:) = 0.0_wp + + if(rank == root) then + gather_ints(:) = 0 + gather_reals(:) = 0.0_wp + end if + do while (ia < atomap_num) + if( (ja == atomap_num_l).or.(packet_num == max_send_count) ) then + !If we have sent/packed all of our atoms or if this packet is equal to send_counts then we communicate + !Gather integer data + call mpi_gather(send_ints, ntot_ints, mpi_integer, gather_ints, ntot_ints, mpi_integer, root, world, ierr) + !Gather real data + call mpi_gather(send_reals, ntot_reals, mpi_wp, gather_reals, ntot_reals, mpi_wp, root, world, ierr) + !If root then loop over all received values and unpack + if (rank == root) then + outerloop:do pro= 1, pro_num + innerloop:do i = 1, max_send_count + !Index the start point for the current element in the array + ireal=3*((pro-1)*max_send_count + (i-1)) + iint =2*((pro-1)*max_send_count + (i-1)) + + !If this buffer in the array is 0 then we move it ahead to where the next processor beings + !Exit if ia == atom_num + if (ia == atomap_num) then + exit outerloop + else if(gather_ints(iint+1) == 0) then + exit innerloop + end if + + ia = ia + 1 + + !Unpack the received buffer for the current element + type_atomap_gather(ia) = gather_ints(iint+1) + atomap_mask_gather(ia) = gather_ints(iint+2) + r_atomap_gather(:, ia) = gather_reals(ireal+1:ireal+3) + end do innerloop + end do outerloop + end if + + !broadcast gathered num to see if we need to continue in the loop + call mpi_bcast(ia, 1, mpi_int, root, world, ierr) + + !ia can't be more than atom_num + if(ia > atomap_num) then + call misc_error("Gathered_num can't be larger than atom_num ") + end if + !Zero out arrays to reset for next round of communications + packet_num = 0 + send_ints(:) = 0 + send_reals(:) = 0.0_wp + ireal = 0 + iint = 0 + else + !We aren't ready to communicate so keep packing + ja = ja+1 + + if(ja > atomap_num_l) call misc_error("ja greater than atom_num_l in gather_cg") + !Get the start position for the data in the send arrays + ireal=3*(packet_num) + iint = 2*(packet_num) + + + send_ints(iint+1) = type_atomap(ja) + send_ints(iint+2) = atomap_mask(ja) + send_reals(ireal+1:ireal+3) = r_atomap(:,ja) + !Increment the packet num + packet_num = packet_num + 1 + end if + end do + return + end subroutine gather_atomap + + subroutine write_pycac(timestep, filename, g) + integer, intent(in) :: timestep, g + character(len=*), intent(in) :: filename + integer :: i, ia, ie, ip, ibasis, inod, write_atoms, write_eles + real(kind=wp) :: v_vec(6) + + !Write pycac dump file +1 format("#This is a pycac dump file created using the quasi-static CAC code"/ & + "Can be converted using cacmb code from https://gitlab.com/aselimov/cacmb") +2 format("Atoms: ", i16, " Elements:", i16) +3 format("Atom format: id atom_type x y z pe fx fy fz v11 v22 v33 v32 v31 v21") +13 format("Atom format: id atom_type x y z pe fx fy fz v11 v22 v33 v32 v31 v21 velx vely velz") +4 format('Element format: id num_node num_basis esize') +5 format('Node format: ip ib basis_type x y z pe fx fy fz v11 v22 v33 v32 v31 v21') +15 format('Node format: ip ib basis_type x y z pe fx fy fz v11 v22 v33 v32 v31 v21 velx vely velz') +6 format("Timestep: ", i16) +7 format("Box_bd ", 3a, 6f23.15) + + !First get counts of atoms/elements in write group + !If group is all then it's just atom_num and ele_num + if (g == 1) then + write_atoms = atom_num + write_eles = ele_num + else + write_atoms = 0 + write_eles = 0 + do i = 1, atom_num + if(btest(a_mask_gather(i),g)) write_atoms = write_atoms + 1 + end do + do i =1, ele_num + if (btest(e_mask_gather(i), g)) write_eles = write_eles + 1 + end do + end if + !Open file if root + open(unit=11, file=trim(adjustl(filename)), action='write', status='replace', position='rewind') + !Write header information + write(11, 1) + write(11, 6) timestep + write(11, 2) write_atoms, write_eles + write(11, 7) (boundary(i:i)//" ", i = 1, 3), (box_bd(i), i = 1, 6) + + !get the atom type number into the form string + write(11, '(a, 999(f23.15,1x))') "Masses: ", (masses(i), i = 1, natom_types) + + !First write the atom section if needed + if (atom_num > 0) then + !Write atom header + if(need_vel) then + write(11,13) + else + write(11,3) + end if + do ia = 1, atom_num + if(btest(a_mask_gather(ia), g)) then + !Assign virial vector + v_vec(1) = virial_atom_gather(1,1,ia) + v_vec(2) = virial_atom_gather(2,2,ia) + v_vec(3) = virial_atom_gather(3,3,ia) + v_vec(4) = virial_atom_gather(3,2,ia) + v_vec(5) = virial_atom_gather(3,1,ia) + v_vec(6) = virial_atom_gather(2,1,ia) + + if (need_vel) then + write(11, *) tag_atom_gather(ia), type_atom_gather(ia), r_atom_gather(:,ia), & + energy_atom_gather(ia), force_atom_gather(:,ia), v_vec(:), & + vel_atom_gather(:,ia) + else + write(11, *) tag_atom_gather(ia), type_atom_gather(ia), r_atom_gather(:,ia), & + energy_atom_gather(ia), force_atom_gather(:,ia), v_vec(:) + end if + end if + end do + end if + + if(ele_num > 0) then + !Write cg header + write(11,4) + if(need_vel) then + write(11,15) + else + write(11,5) + end if + ip=0 + do ie = 1, ele_num + if(btest(e_mask_gather(ie), g)) then + !Write element information + write(11,'(4i16)' ) tag_ele_gather(ie), ng_node(etype_gather(ie)), basis_num_gather(ie), size_ele_gather(ie) + !write node information + do inod = 1, ng_node(etype_gather(ie)) + ip = ip + 1 + do ibasis = 1, basis_num_gather(ie) + v_vec(1) = virial_gather(1, 1, ibasis, ip) + v_vec(2) = virial_gather(2, 2, ibasis, ip) + v_vec(3) = virial_gather(3, 3, ibasis, ip) + v_vec(4) = virial_gather(3, 2, ibasis, ip) + v_vec(5) = virial_gather(3, 1, ibasis, ip) + v_vec(6) = virial_gather(2, 1, ibasis, ip) + if(need_vel) then + write(11, *) inod, ibasis, basis_type_gather(ibasis, ie), r_gather(:,ibasis,ip), & + energy_gather(ibasis,ip), force_gather(:, ibasis, ip), v_vec(:), & + vel_gather(:, ibasis,ip) + else + write(11, *) inod, ibasis, basis_type_gather(ibasis, ie), r_gather(:,ibasis,ip), & + energy_gather(ibasis,ip), force_gather(:, ibasis, ip), v_vec(:) + end if + end do + end do + end if + end do + end if + + close(11) + return + end subroutine write_pycac + + subroutine write_lmp(filename, g) + integer, intent(in) :: g + character(len = *) :: filename + + integer :: i, write_num =0, max_atom_tag + + open(unit=11, file=trim(adjustl(filename)), action='write', status='replace', position='rewind') + + !Write comment line + write(11, '(a)') '# lmp file made using CAC code' + write(11, '(a)') + + !Calculate total atom number to write + write_num = 0 + do i = 1, atom_num + if (btest(a_mask_gather(i), g)) write_num = write_num + 1 + end do + do i = 1, atomap_num + if (btest(atomap_mask_gather(i), g)) write_num = write_num + 1 + end do + + !Write total number of atoms + interpolated atoms + write(11, '(i16, a)') write_num, ' atoms' + !Write number of atom types + write(11, '(i16, a)') natom_types, ' atom types' + + write(11, '(a)') + !Write box boundaries + write(11, '(2f23.15, a)') box_bd(1:2), ' xlo xhi' + write(11, '(2f23.15, a)') box_bd(3:4), ' ylo yhi' + write(11, '(2f23.15, a)') box_bd(5:6), ' zlo zhi' + + !Masses + write(11, '(a)') 'Masses' + + write(11, '(a)') + do i = 1, natom_types + write(11, '(i16, f23.15)') i, masses(i) + end do + write(11, '(a)') + + !Now first write atom positions + write(11, '(a)') 'Atoms' + write(11, '(a)') + + do i = 1, atom_num + if (btest(a_mask_gather(i), g)) then + write(11, '(2i16, 3f23.15)') tag_atom_gather(i), type_atom_gather(i), r_atom_gather(:,i) + end if + end do + + max_atom_tag = maxval(tag_atom_gather) + do i = 1, atomap_num + if (btest(atomap_mask_gather(i), g)) then + write(11, '(2i16, 3f23.15)') max_atom_tag+i, type_atomap_gather(i), r_atomap_gather(:,i) + end if + end do + + close(11) + return + end subroutine write_lmp + + pure function need_dump(time) + integer, intent(in) :: time + logical :: need_dump + integer :: i + + need_dump = .false. + do i = 1, dump_num + if(mod(time, dump_every(i)) == 0) then + need_dump = .true. + return + end if + end do + return + end function need_dump +end module dump diff --git a/src/dynamics.f90 b/src/dynamics.f90 new file mode 100644 index 0000000..9b51d63 --- /dev/null +++ b/src/dynamics.f90 @@ -0,0 +1,195 @@ +module dynamics + + !This code contains the code needed to run the various types of dynamics + use parameters + use vel_verlet + use neighbors + use quenched_dynamics + use forces + use comms + use potential + use dump + use thermo + use time + use temp + use berendsen + use deform + + implicit none + + real(kind=wp), save :: itime + character(len = 100) :: ensemble + integer :: ensemble_option + + private :: pre_step, post_step, step + public + contains + + subroutine dynamics_defaults + ensemble_option = 0 + ensemble = "none" + end subroutine dynamics_defaults + + subroutine parse_dynamics(line) + character(len = *), intent(in) :: line + + character(len=read_len) tmptxt + integer :: iospara + + read(line, *, iostat = iospara) tmptxt, ensemble + if (iospara > 0) call read_error("Invalid read of dynamics command", iospara) + + select case(ensemble) + case("none") + ensemble_option = 0 + if(rank==root) call log_msg("Warning: no dynamics specified. Atom positions will not evolve") + case('NVE', 'nve') + ensemble_option = 1 + case('qd') + ensemble_option = 2 + case('eu') + ensemble_option = 3 + case('ld') + ensemble_option = 4 + case default + call command_error("Ensemble "//trim(adjustl(ensemble))//" is not currently accepted as an option for dynamics") + end select + + return + end subroutine parse_dynamics + + subroutine parse_run(line) + !parse the run command + character(len = *), intent(in) :: line + character(len=read_len) :: tmptxt + integer :: iospara + + begin_step = iter + read(line, *, iostat = iospara) tmptxt, run_steps + if (iospara > 0) call read_error("Invalid read of run command", iospara) + call run_dynamics(run_steps) + + end subroutine parse_run + + subroutine run_dynamics(num_steps) + !This subroutine actually runs the dynamics steps + integer, intent(in) :: num_steps + + integer ::i + + !Check to make sure timestep is greater than 0 + if(time_step <= 0.0_wp) then + call misc_error("Time_step was never set, please add command time_step val before run command") + end if + !Initialize the force calculation and the first dump + + if(nei_init) then + call update_neighbor(iter, .false., .true.) + else + if(ele_num > 0) call ghost_cg + if(atom_num> 0) call ghost_at + call neighbor_lists + end if + call update_force + if(first_run) call write_dump(iter, .true.) + call write_thermo_style + call write_thermo_out(iter) + + do i = 1, num_steps + iter = iter + 1 + t = t+time_step + + call pre_step(iter) + call step + call post_step(iter) + end do + + need_virial = .true. + call update_force + call write_dump(iter, .true.) + call write_thermo_out(iter) + + call log_neighbor_info + first_run = .false. + + end subroutine run_dynamics + + subroutine pre_step(i) + !This subroutine is run before each dynamics timestep is calculated + + !i is the current step number + integer, intent(in) ::i + + !If we are dumping this timestep then we need to calculate the virial stress + if(need_dump(i).or.pflag) need_virial = .true. + + !Check to see if we need virial for the thermo command + if((mod(i,thermo_every)==0).and.need_p) need_virial = .true. + + call deform_box + + return + end subroutine pre_step + + subroutine post_step(i) + !This subroutine is run after the dynamics timestep is calculated + !i is the current step number + integer, intent(in) ::i + + !Check to see if we need to rescale velocity + if(tflag) call rescale_v + + !Check to see if we need to dilate box + if(pflag) call rescale_box + + !Dump check + call write_dump(i) + + !Thermo if we need it + if(mod(i,thermo_every)==0) call write_thermo_out(i) + + + need_virial = .false. + + return + end subroutine post_step + + subroutine step + !This subroutine calls the correct dynamics to run + !i is the current step number + select case(ensemble_option) + case(0) + continue + case(1) + call verlet(iter) + case(2) + call qd(iter) + case(3) + call euler(iter) + end select + + end subroutine step + + subroutine euler(iter) + integer, intent(in) :: iter + + integer :: ip, ibasis, ia + + do ip = 1, node_num_l + do ibasis=1, basis_num(node_cg(ip)) + vel(:, ibasis, ip) = vel(:, ibasis, ip) + time_step*ftm2v/masses(basis_type(ibasis,node_cg(ip))) & + *force_eq(:,ibasis,ip) + r(:, ibasis, ip) = r(:, ibasis, ip) + time_step*vel(:, ibasis,ip) + end do + end do + + do ia = 1, atom_num_l + vel_atom(:,ia) = vel_atom(:,ia) + time_step*ftm2v/masses(type_atom(ia))*force_atom(:,ia) + r_atom(:,ia) = r_atom(:,ia) + time_step*vel_atom(:,ia) + end do + + call update_neighbor(iter) + call update_force + end subroutine euler + +end module dynamics diff --git a/src/eam.f90 b/src/eam.f90 new file mode 100644 index 0000000..7a450f9 --- /dev/null +++ b/src/eam.f90 @@ -0,0 +1,1607 @@ +module eam + !Subroutines for eam potential style + + use parameters + use math + use forces + use elements + use integration + use neighbors + use comms + use errors + use atom_types + + implicit none + + integer, save :: numpts_p, numpts_d, numpts_e, pot_numf, pot_atom_types, eammpot_map(max_atom_types), & + eamtype_to_pair(max_atom_types, max_atom_types), eamtype_to_dens(max_atom_types, max_atom_types), & + pair_numf + + + integer :: nr, nrho + real(kind = wp), save :: drho, dr, rdrho, rdr + real(kind = wp), save :: edens_host_atom_sum, edens_host_atom_max, edens_host_atom_min, edens_host_intpo_sum, & + edens_host_intpo_max, edens_host_intpo_min, edensmax + + real(kind = wp), save :: p_start, p_finish, d_start, d_finish, e_start, e_finish + + real(kind = wp), allocatable, save :: edens_host_atom(:), edens_host_intpo(:, :), edens_host_atomap(:), & + embed_deriv_atom(:), embed_deriv_intpo(:,:), embed_deriv_atomap(:) + + real(kind = wp), allocatable, save :: zr(:) + real(kind = wp), allocatable, save :: pair_tab(:,:), edens_tab(:,:), embed_tab(:,:) + + + real(kind=wp), allocatable, private, save :: pair_spline(:,:,:), edens_spline(:,:,:), embed_spline(:,:,:) + real(kind=wp), allocatable, private, save :: pair_cutoff(:) + + integer, save :: type_to_pair(max_atom_types, max_atom_types), type_to_dens(max_atom_types, max_atom_types), & + pot_map(max_atom_types) + + character(len=2), dimension(max_atom_types) :: pair_atom_type + !fs + logical :: fsflag + + public + contains + + subroutine eam_lammps(filename, alloy_flag, fs_flag) + ! read a DYNAMO setfl formatted EAM(/ALLOY) file as used by LAMMPS + character(len=*), intent(in) :: filename + integer, intent(in) :: alloy_flag + logical, intent(in), optional :: fs_flag + + integer :: n, idx, i, j, k, ncol, nline_rho, nline_r, eloop, e, ei + real(kind =wp) :: val, mass + character(len=5) :: lattice + character(len=read_len) line + real(kind = wp), dimension(10) :: test_cols ! expected tokens per line <= 10 + logical :: fs + + if(present(fs_flag)) then + fs=fs_flag + else + fs=.false. + end if + open(21, file = filename, status = 'old', action = 'read', position = 'rewind') + ! check if we are reading single eam or eam/alloy to determine header settings + n = 4 + if (alloy_flag == 1) then + ! setfl header + ! skip 3 comment header lines and extract atom type info + do i=1, 3 + read(21,*) + end do + read(21, *, iostat = error) pot_numf, (pair_atom_type(i), i = 1, pot_numf) + read(21,*) numpts_e, drho, numpts_d, dr, rc_off + + else + + read(21,*) ! skip over first line comment info + + ! standard funcfl header + read(21,*) val, mass, val, lattice + + ! initialize fixed vals for single element + pot_numf = 1 + pair_numf = ((pot_numf**2 + pot_numf)/2) + end if + + + read(21,*) ! skip first header section + + ! determine the number of tokens per line + read(21, '(a)', iostat = error) line + do i = 1, 10 + read(line, *, iostat=error) test_cols(1:i) + if (error==-1) exit + enddo + ncol = i - 1 + + ! setup loop variables, array allocations and setup + nline_rho = numpts_e / ncol + nline_r = numpts_d / ncol + numpts_p = numpts_d + pair_numf = ((pot_numf**2 + pot_numf)/2) + + allocate( pair_tab(numpts_p+n, pair_numf), stat = allostat) + if(allostat /= 0) call alloc_error('failure to allocate pair_tab', allostat) + + p_start = 0 + p_finish = (numpts_d-1) * dr + pair_tab(:,:) = 0.0_wp + pair_tab(1,:) = real(numpts_p+n, wp) + pair_tab(4,:) = (p_finish - p_start) / real(numpts_p-1, wp) + pair_tab(2,:) = p_start - 5.0_wp * pair_tab(4,:) + pair_tab(3,:) = p_finish + + if(fs) then + allocate(edens_tab(numpts_d+n,pot_numf*pot_numf), stat = allostat) + if(allostat /= 0) call alloc_error('failure to allocate edens_tab', allostat) + else + allocate(edens_tab(numpts_d+n,pot_numf), stat = allostat) + if(allostat /= 0) call alloc_error('failure to allocate edens_tab', allostat) + end if + d_start = 0 + d_finish = (numpts_d-1) * dr + edens_tab(:,:) = 0.0_wp + edens_tab(1,:) = real(numpts_d+n, wp) + edens_tab(4,:) = (d_finish-d_start)/real(numpts_d-1, wp) + edens_tab(2,:) = d_start-5.0_wp*edens_tab(4,:) + edens_tab(3,:) = d_finish + + allocate( embed_tab(numpts_e+n, pot_numf), stat = allostat ) + if(allostat /= 0) call alloc_error("failure to allocate embed_tab", allostat) + e_start = 0 + e_finish = (numpts_e-1) * drho + embed_tab(:,:) = 0.0_wp + embed_tab(1,:) = real(numpts_e+n, wp) + embed_tab(4,:) = (e_finish-e_start)/real(numpts_e-1, wp) + embed_tab(2,:) = e_start-5.0_wp*embed_tab(4,:) + embed_tab(3,:) = e_finish + edensmax=e_finish + + ! reset to start of embedding data (two lines for alloy) + backspace 21 + if (alloy_flag == 1) backspace 21 + + if(fs) then + eloop = pot_numf + else + eloop = 1 + end if + + ei = 1 + do i = 1, pot_numf + ! read header info + read(21,*, iostat = error) val, val, val, lattice + if (error == -1) call read_error("failed to read element header", allostat) + ! read embedding data + do j = 1, nline_rho + idx = (j-1) * ncol + 1 + n + read(21, '(a)', iostat = error) line + if (ncol /= 1) then + read(line, *, iostat = error) (embed_tab(k, i), k = idx, idx + ncol -1) + else + read(line, *, iostat = error) embed_tab(idx, i) + endif + end do + + ! read electron density data + do e = 1, eloop + do j = 1, nline_r + idx = (j-1) * ncol + 1 + n + read(21, '(a)', iostat = error) line + if (ncol /= 1) then + read(line, *, iostat = error) (edens_tab(k, ei), k = idx, idx + ncol -1) + else + read(line, *, iostat = error) edens_tab(idx,ei) + endif + end do + ei = ei + 1 + end do + end do + + do i = 1, pair_numf + do j = 1, nline_r + idx = (j-1) * ncol + 1 + n + read(21, '(a)', iostat = error) line + if (ncol /= 1) then + read(line, *, iostat = error) (pair_tab(k, i), k = idx, idx + ncol -1) + else + read(line, *, iostat = error) pair_tab(idx,i) + endif + end do + + end do + + !Setup permutation lists for pair potential + eamtype_to_pair(:, :) = 0 + k = 1 + do i = 1, pot_numf + do j = 1, pot_numf + if (i >= j) then + eamtype_to_pair(i,j) = k + eamtype_to_pair(j,i) = k + k = k + 1 + else + exit + end if + end do + end do + + eamtype_to_dens(:,:) = 0 + if (fs) then + fsflag = .true. + ei = 1 + do i = 1, pot_numf + do j = 1, pot_numf + eamtype_to_dens(i,j) = ei + ei = ei + 1 + end do + end do + else + fsflag = .false. + do i = 1, pot_numf + eamtype_to_dens(i,:) = i + end do + end if + + close(21) + p_finish = min(rc_off, p_finish) + + end subroutine eam_lammps + + subroutine alloc_eam_arrays + !Allocate eam arrays + if(ele_num > 0) then + if(allocated(edens_host_atomap)) then + deallocate(edens_host_atomap, edens_host_intpo, embed_deriv_atomap, embed_deriv_intpo, stat = allostat) + if(allostat>0) call alloc_error("Failure to deallocate cg_eam arrays", allostat) + end if + allocate(edens_host_atomap(atomap_num_lg), edens_host_intpo(max_basisnum*max_intpo_num, ele_num_l), & + embed_deriv_atomap(atomap_num_lg), embed_deriv_intpo(max_basisnum*max_intpo_num, ele_num_l), stat = allostat) + if(allostat > 0) call alloc_error("Failure to alloc cg_eam arrays", allostat) + end if + + if(atom_num > 0 ) then + if(allocated(edens_host_atom)) then + deallocate(edens_host_atom, embed_deriv_atom, stat = allostat) + if(allostat > 0) call alloc_error("Failure to alloc at_eam arrays", allostat) + end if + allocate(edens_host_atom(atom_num_lg), embed_deriv_atom(atom_num_lg), stat=allostat) + if(allostat > 0) call alloc_error("Failure to alloc at_eam arrays", allostat) + end if + end subroutine alloc_eam_arrays + + subroutine comm_edens_intpo + !Now communicate edens for all integration points to make sure every processor has the host density for + !all integration points in the elements that it is in charge of + integer :: ie, je, iep, jep, ibasis + real(kind=wp), dimension(max_intpo_num*ele_shared_num*max_basisnum) :: edens_host_array, edens_host_buff + + edens_host_array(:) = 0.0_wp + do ie = 1, ele_num_l + je = ele_id_shared(ie) + + if(je /= 0) then + do iep = 1, intpo_count(itype(ie)) + do ibasis = 1, basis_num(ie) + jep = basis_num(ie)*(iep-1) + ibasis + + if(who_has_intpo(jep, ie)) then + + !First check to make sure the index fits within the element + if(max_intpo_num*max_basisnum*(je-1)+ jep > max_intpo_num*max_basisnum*ele_shared_num) then + print *, 'Error: Index of edens_host_array', & + max_intpo_num*max_basisnum*(je-1)+ jep, ' is larger than', & + ' array size', max_intpo_num*max_basisnum*ele_shared_num + call mpi_abort(mpi_comm_world, 1, ierr) + end if + + edens_host_array(max_intpo_num*max_basisnum*(je-1)+jep) = edens_host_intpo(jep, ie) + end if + end do + end do + end if + end do + !Gather all edens host by summing them into edens_host_buff + edens_host_buff(:) = 0.0_wp + call mpi_allreduce(edens_host_array, edens_host_buff, max_intpo_num*ele_shared_num*max_basisnum, & + mpi_wp, mpi_sum, world, ierr) + !Now set edenshost for intpo that we don't own + do ie = 1, ele_num_l + je = ele_id_shared(ie) + if(je /= 0) then + do iep = 1, intpo_count(itype(ie)) + do ibasis = 1, basis_num(ie) + jep = basis_num(ie)*(iep-1) + ibasis + if(.not.who_has_intpo(jep, ie)) then + edens_host_intpo(jep, ie) = edens_host_buff(max_intpo_num*max_basisnum*(je-1)+jep) + end if + end do + end do + end if + end do + end subroutine comm_edens_intpo + + subroutine comm_edens_ghost_cg + !Now communicate edenhost for all ghost atomaps + + integer :: j, k, send_n, recv_n, iatomap, jatomap, latomap, send_n_max, & + ireq_e, ireq_t, ireq_n, send_rank, recv_rank, atomap_num_r + logical :: send_l, recv_l + integer, dimension(3) :: send_coords, recv_coords + integer, dimension(mpi_status_size) :: mstatus + integer, allocatable :: tag_send_buff(:), tag_recv_buff(:) + real(kind = wp), allocatable :: ed_send_buff(:), ed_recv_buff(:), em_send_buff(:), em_recv_buff(:) + + send_n_max = maxval(list_atomap_num) + allocate(tag_send_buff(send_n_max), tag_recv_buff(send_n_max), & + ed_send_buff(send_n_max), ed_recv_buff(send_n_max), & + em_send_buff(send_n_max), em_recv_buff(send_n_max), stat = allostat) + + if(allostat > 0) call alloc_error("Failure to allocate tag/ed_send/recv_buff", allostat) + tag_send_buff(:) = 0 + tag_recv_buff(:) = 0 + ed_send_buff(:) = 0 + ed_recv_buff(:) = 0 + em_send_buff(:) = 0 + em_recv_buff(:) = 0 + send_rank = grank + recv_rank = grank + atomap_num_r = send_n_max + jatomap = atomap_num_l + + !Loop over all of the send directions + do j = 1, 3 + do k = 1, 2 + send_coords(:) = grid_coords(:) + recv_coords(:) = grid_coords(:) + send_l = .true. + recv_l = .true. + + if(num_pro(j) == 1) then + send_l = .false. + !IF period than we may need to match some ghost atoms to local atoms + if(period(j).eqv..true.) then + recv_l = .true. + send_n = list_atomap_num(k, j) + recv_n = send_n + + !Resize if needed + if(recv_n > atomap_num_r) then + deallocate(tag_recv_buff, ed_recv_buff, stat = deallostat ) + if(deallostat /= 0) call alloc_error("Failure to deallocate tag/ed_recv_buff", deallostat) + allocate(tag_recv_buff(recv_n), ed_recv_buff(recv_n), stat = allostat) + if(allostat /= 0) call alloc_error("Failure to allocate tag/ed_recv_buff", allostat) + tag_recv_buff(:) = 0 + ed_recv_buff(:) = 0.0_wp + atomap_num_r = recv_n + end if + + !Add the needed information to the receive buffs + do iatomap = 1, send_n + latomap = list_atomap(iatomap, k, j) + tag_recv_buff(iatomap) = tag_atomap(latomap) + ed_recv_buff(iatomap) = edens_host_atomap(latomap) + em_recv_buff(iatomap) = embed_deriv_atomap(latomap) + end do + else + !If not periodic then we don't need to do anything + recv_l = .false. + recv_n = 0 + end if + !If more than one proc per side than we have to send it + else + send_coords(j) = grid_coords(j) - (-1)**k + recv_coords(j) = grid_coords(j) + (-1)**k + if(period(j).eqv..true.) then + call mpi_cart_rank(grid_comm, send_coords, send_rank, ierr) + call mpi_cart_rank(grid_comm, recv_coords, recv_rank, ierr) + else + !If not periodic then we have to make sure that the first/last processors don't send/recv when unneeded + if(k == 1) then + if(grid_coords(j) < num_pro(j) - 1) then + call mpi_cart_rank(grid_comm, send_coords, send_rank, ierr) + else + send_l = .false. + send_rank = -1 + end if + + if(grid_coords(j) > 0) then + call mpi_cart_rank(grid_comm, recv_coords, recv_rank, ierr) + else + recv_l = .false. + recv_rank = -1 + end if + + else if(k == 2) then + if(grid_coords(j) > 0) then + call mpi_cart_rank(grid_comm, send_coords, send_rank, ierr) + else + send_l = .false. + send_rank = -1 + end if + if(grid_coords(j) < num_pro(j) - 1) then + call mpi_cart_rank(grid_comm, recv_coords, recv_rank, ierr) + else + recv_l = .false. + recv_rank = -1 + end if + end if + end if + + !check send_l + if(send_l.neqv.list_atomap_logic(k, j)) then + print *, 'Error: send_l', send_l, ' should equal list_atomap_logic', & + list_atomap_logic(k, j), ' for k', k, ' and j', j + call mpi_abort(mpi_comm_world, 1, ierr) + end if + + !prepare send_buff + if(send_l.eqv..true.) then + send_n = list_atomap_num(k, j) + do iatomap = 1, send_n + latomap = list_atomap(iatomap, k, j) + tag_send_buff(iatomap) = tag_atomap(latomap) + ed_send_buff(iatomap) = edens_host_atomap(latomap) + em_send_buff(iatomap) = embed_deriv_atomap(latomap) + end do + end if + + !send/recv number + if(recv_l.eqv..true.) then + call mpi_irecv(recv_n, 1, mpi_integer, recv_rank, 1, grid_comm, ireq_n, ierr) + end if + if(send_l.eqv..true.) then + call mpi_send(send_n, 1, mpi_integer, send_rank, 1, grid_comm, ierr) + end if + if(recv_l.eqv..true.) then + call mpi_wait(ireq_n, mstatus, ierr) + end if + + !grow recv_buff if needed + if(recv_l.eqv..true.) then + if(recv_n > atomap_num_r) then + deallocate(tag_recv_buff, ed_recv_buff, em_recv_buff, stat = deallostat ) + if(deallostat /= 0) call alloc_error("Failure to deallocate tag/ed_recv_buff",deallostat) + + allocate(tag_recv_buff(recv_n), ed_recv_buff(recv_n), em_recv_buff(recv_n), stat = allostat) + if(allostat /= 0) call alloc_error("Failure to allocate tag/ed_recv_buff", allostat) + + tag_recv_buff(:) = 0 + ed_recv_buff(:) = 0.0_wp + em_recv_buff(:) = 0.0_wp + atomap_num_r = recv_n + end if + end if + + !send/recv tag and ed + if(recv_l.eqv..true.) then + call mpi_irecv(tag_recv_buff, recv_n, mpi_integer, recv_rank, 2, grid_comm, ireq_t, ierr) + end if + if(send_l.eqv..true.) then + call mpi_send(tag_send_buff, send_n, mpi_integer, send_rank, 2, grid_comm, ierr) + end if + if(recv_l.eqv..true.) then + call mpi_wait(ireq_t, mstatus, ierr) + end if + + if(recv_l.eqv..true.) then + call mpi_irecv(ed_recv_buff, recv_n, mpi_wp, recv_rank, 3, grid_comm, ireq_e, ierr) + end if + if(send_l.eqv..true.) then + call mpi_send(ed_send_buff, send_n, mpi_wp, send_rank, 3, grid_comm, ierr) + end if + if(recv_l.eqv..true.) then + call mpi_wait(ireq_e, mstatus, ierr) + end if + + if(recv_l.eqv..true.) then + call mpi_irecv(em_recv_buff, recv_n, mpi_wp, recv_rank, 3, grid_comm, ireq_e, ierr) + end if + if(send_l.eqv..true.) then + call mpi_send(em_send_buff, send_n, mpi_wp, send_rank, 3, grid_comm, ierr) + end if + if(recv_l.eqv..true.) then + call mpi_wait(ireq_e, mstatus, ierr) + end if + end if + + !check to make sure the tags match for ghost atoms + if(recv_l.eqv..true.) then + do iatomap = 1, recv_n + jatomap = jatomap + 1 + if(tag_recv_buff(iatomap) /= tag_atomap(jatomap)) then + print *, 'Error: Rank', rank, ' received atomap tag', & + tag_recv_buff(iatomap), ' of iatomap', iatomap, & + ' from rank', recv_rank, ' which should equal tag_atomap', & + tag_atomap(jatomap), ' of jatomap', jatomap, & + ' when j is', j, ' and k is', k, ' in edenshost' + call mpi_abort(mpi_comm_world, 1, ierr) + end if + edens_host_atomap(jatomap) = ed_recv_buff(iatomap) + embed_deriv_atomap(jatomap) = em_recv_buff(iatomap) + end do + end if + end do + end do + + !debug + + if(jatomap /= atomap_num_lg) then + print *, 'Error: Wrong jatomap', jatomap, & + ' which should equal atomap_num_lg', atomap_num_lg + call mpi_abort(mpi_comm_world, 1, ierr) + end if + end subroutine comm_edens_ghost_cg + + subroutine comm_edens_ghost_at + + integer :: j, k, send_n, recv_n, ia, ja, la, send_n_max, & + ireq_e, ireq_t, ireq_n, send_rank, recv_rank, atom_num_r, ireq_ee + logical :: send_l, recv_l + integer, dimension(3) :: send_coords, recv_coords + integer, dimension(mpi_status_size) :: mstatus + integer, allocatable :: tag_send_buff(:), tag_recv_buff(:) + real(kind = wp), allocatable :: ed_send_buff(:), ed_recv_buff(:), em_send_buff(:), em_recv_buff(:) + + send_n_max = maxval(list_atom_num) + allocate( tag_send_buff(send_n_max), tag_recv_buff(send_n_max), & + ed_send_buff(send_n_max), ed_recv_buff(send_n_max), & + em_send_buff(send_n_max), em_recv_buff(send_n_max), stat = allostat ) + if(allostat /= 0) call alloc_error("Failure to allocate tag/ed_send/recv_buff", allostat) + + !Initialize some variables + tag_send_buff(:) = 0 + tag_recv_buff(:) = 0 + ed_send_buff(:) = 0.0_wp + ed_recv_buff(:) = 0.0_wp + em_send_buff = 0.0_wp + em_recv_buff = 0.0_wp + ireq_t = 0 + ireq_e = 0 + ireq_n = 0 + ireq_ee = 0 + send_rank = grank + recv_rank = grank + atom_num_r = send_n_max + ja = atom_num_l + + ! k == 1: positive + ! k == 2: negative + !Do same looping strategy as in ghost code, this code is almost exactly the same + !as the code in comm_edens_ghost_cg so check that code or the ghost code itself for + !more indepth description + do j = 1, 3 + do k = 1, 2 + + send_coords(:) = grid_coords(:) + recv_coords(:) = grid_coords(:) + send_l = .true. + recv_l = .true. + + if(num_pro(j) == 1) then + + send_l = .false. + if(period(j).eqv..true.) then + recv_l = .true. + send_n = list_atom_num(k, j) + recv_n = send_n + + if(recv_n > atom_num_r) then + deallocate(tag_recv_buff, ed_recv_buff, em_recv_buff, stat = deallostat) + + if(deallostat /= 0) call alloc_error("Failure to deallocate tag/ed_recv_buff", deallostat) + + allocate(tag_recv_buff(recv_n), ed_recv_buff(recv_n), em_recv_buff(recv_n), stat = allostat) + if(allostat /= 0) call alloc_error("Failure to allocate tag/ed_recv_buff", allostat) + + tag_recv_buff(:) = 0 + ed_recv_buff(:) = 0.0_wp + em_recv_buff(:) = 0.0_wp + atom_num_r = recv_n + + end if + !Build send_array + do ia = 1, send_n + la = list_atom(ia, k, j) + tag_recv_buff(ia) = tag_atom(la) + ed_recv_buff(ia) = edens_host_atom(la) + em_recv_buff(ia) = embed_deriv_atom(la) + end do + + else + recv_l = .false. + recv_n = 0 + end if + else + send_coords(j) = grid_coords(j) - (-1) ** k + recv_coords(j) = grid_coords(j) + (-1) ** k + + if(period(j).eqv..true.) then + call mpi_cart_rank(grid_comm, send_coords, send_rank, ierr) + call mpi_cart_rank(grid_comm, recv_coords, recv_rank, ierr) + else + if(k == 1) then + if(grid_coords(j) < num_pro(j) - 1) then + call mpi_cart_rank(grid_comm, send_coords, send_rank, ierr) + else + send_l = .false. + send_rank = -1 + end if + if(grid_coords(j) > 0) then + call mpi_cart_rank(grid_comm, recv_coords, recv_rank, ierr) + else + recv_l = .false. + recv_rank = -1 + end if + + else if(k == 2) then + if(grid_coords(j) > 0) then + call mpi_cart_rank(grid_comm, send_coords, send_rank, ierr) + else + send_l = .false. + send_rank = -1 + end if + + if(grid_coords(j) < num_pro(j) - 1) then + call mpi_cart_rank(grid_comm, recv_coords, recv_rank, ierr) + else + recv_l = .false. + recv_rank = -1 + end if + end if + end if + !check send_l + if(send_l.neqv.list_atom_logic(k, j)) then + print *, 'Error: Send_l', send_l, ' should equal list_atom_logic', & + list_atom_logic(k, j), ' for k', k, ' and j', j + call mpi_abort(mpi_comm_world, 1, ierr) + end if + + !prepare send_buff + if(send_l.eqv..true.) then + send_n = list_atom_num(k, j) + do ia = 1, send_n + la = list_atom(ia, k, j) + tag_send_buff(ia) = tag_atom(la) + ed_send_buff(ia) = edens_host_atom(la) + em_send_buff(ia) = embed_deriv_atom(la) + end do + end if + + !send/recv number + if(recv_l.eqv..true.) then + call mpi_irecv(recv_n, 1, mpi_integer, recv_rank, 0, grid_comm, ireq_n, ierr) + end if + if(send_l.eqv..true.) then + call mpi_send(send_n, 1, mpi_integer, send_rank, 0, grid_comm, ierr) + end if + if(recv_l.eqv..true.) then + call mpi_wait(ireq_n, mstatus, ierr) + end if + + !update recv_buff + if(recv_l.eqv..true.) then + if(recv_n > atom_num_r) then + deallocate( tag_recv_buff, ed_recv_buff, em_recv_buff, stat = deallostat) + if(deallostat /= 0) call alloc_error("Failure to deallocate tag/ed_recv_buff", deallostat) + + allocate(tag_recv_buff(recv_n), ed_recv_buff(recv_n), em_recv_buff(recv_n), stat = allostat) + if(allostat /= 0) call alloc_error("Failure to allocate tag/ed_recv_buff", allostat) + + tag_recv_buff(:) = 0 + ed_recv_buff(:) = 0.0_wp + em_recv_buff(:) = 0.0_wp + atom_num_r = recv_n + end if + end if + !send/recv tag and ed + + if(recv_l.eqv..true.) then + call mpi_irecv(tag_recv_buff, recv_n, mpi_integer, recv_rank, 1, grid_comm, ireq_t, ierr) + end if + if(send_l.eqv..true.) then + call mpi_send(tag_send_buff, send_n, mpi_integer, send_rank, 1, grid_comm, ierr) + end if + if(recv_l.eqv..true.) then + call mpi_wait(ireq_t, mstatus, ierr) + end if + + if(recv_l.eqv..true.) then + call mpi_irecv(ed_recv_buff, recv_n, mpi_wp, recv_rank, 2, grid_comm, ireq_e, ierr) + end if + if(send_l.eqv..true.) then + call mpi_send(ed_send_buff, send_n, mpi_wp, send_rank, 2, grid_comm, ierr) + end if + if(recv_l.eqv..true.) then + call mpi_wait(ireq_e, mstatus, ierr) + end if + + if(recv_l.eqv..true.) then + call mpi_irecv(em_recv_buff, recv_n, mpi_wp, recv_rank, 3, grid_comm, ireq_ee, ierr) + end if + if(send_l.eqv..true.) then + call mpi_send(em_send_buff, send_n, mpi_wp, send_rank, 3, grid_comm, ierr) + end if + if(recv_l.eqv..true.) then + call mpi_wait(ireq_ee, mstatus, ierr) + end if + + end if + + !check if the tags match + if(recv_l.eqv..true.) then + do ia = 1, recv_n + ja = ja + 1 + if(tag_recv_buff(ia) /= tag_atom(ja)) then + print *, 'Error: Rank', rank, ' received atom tag', & + tag_recv_buff(ia), ' of ia', ia, & + ' from rank', recv_rank, ' which should equal tag_atom', & + tag_atom(ja), ' of ja', ja, & + ' when j is', j, ' and k is', k, ' in edenshost' + + call mpi_abort(mpi_comm_world, 1, ierr) + end if + edens_host_atom(ja) = ed_recv_buff(ia) + embed_deriv_atom(ja) = em_recv_buff(ia) + end do + end if + end do + end do + !debug + if(ja /= atom_num_lg) then + print *, 'Error: Wrong ja', ja, & + ' which should equal atom_num_lg', atom_num_lg + + call mpi_abort(mpi_comm_world, 1, ierr) + end if + + return + end subroutine comm_edens_ghost_at + + subroutine eamspline_arrays(nd, ne, np, dn, en, pn ) + !This subroutine allocates the spline arrays + integer, intent(in) :: nd, ne, np, dn, en, pn + + integer :: i, j, k, l, m, n + real(kind = wp), allocatable :: ptmp(:,:,:), etmp(:,:,:), dtmp(:,:,:), ctmp(:) + + if (allocated(pair_spline)) then + i = size(pair_spline(1,1,:)) + j = size(embed_spline(1,1,:)) + k = size(edens_spline(1,1,:)) + + l = max(np, size(pair_spline(1,:,1))) + m = max(ne, size(embed_spline(1,:,1))) + n = max(nd, size(edens_spline(1,:,1))) + + allocate(ptmp(7, l, i+pn), ctmp(i+pn), etmp(7, m, j + en), dtmp(7, n, k + dn)) + ptmp = 0.0_wp + ctmp = 0 + etmp = 0.0_wp + dtmp = 0.0_wp + + !Move allocation + ptmp(:, 1:size(pair_spline(1,:,1)), 1:size(pair_spline(1,1,:))) = pair_spline + call move_alloc(ptmp, pair_spline) + + ctmp(1:size(pair_cutoff)) = pair_cutoff + call move_alloc(ctmp, pair_cutoff) + + etmp(:, 1:size(embed_spline(1,:,1)), 1:size(embed_spline(1,1,:))) = embed_spline + call move_alloc(etmp, embed_spline) + + dtmp(:, 1:size(edens_spline(1,:,1)), 1:size(edens_spline(1,1,:))) = edens_spline + call move_alloc(dtmp, edens_spline) + else + allocate(pair_spline(7, np, pn), pair_cutoff(pn), embed_spline(7, ne, en), edens_spline(7, nd, dn)) + pair_spline = 0.0_wp + embed_spline = 0.0_wp + edens_spline = 0.0_wp + pair_cutoff = 0.0_wp + + end if + end subroutine eamspline_arrays + + + subroutine eamarray2spline + !This assumes that all of the potential arrays are equally spaced, get the calculation coefficients for + !the splines + integer :: i, eloop, pstart, estart, dstart + + if(allocated(pair_spline)) then + pstart = size(pair_spline(1,1,:)) + estart = size(embed_spline(1,1,:)) + dstart = size(edens_spline(1,1,:)) + else + pstart = 0 + estart = 0 + dstart = 0 + end if + + if(fsflag) then + eloop = pot_numf*pot_numf + else + eloop = pot_numf + end if + + call eamspline_arrays(numpts_d, numpts_e, numpts_p, eloop, pot_numf, pair_numf) + + + rdrho = 1.0/embed_tab(4,1) + rdr = 1.0/edens_tab(4,1) + nr = numpts_d + nrho = numpts_e + + do i =1, pot_numf + call interpolate(numpts_e, embed_tab(4,i), embed_tab(5:,i), embed_spline(:,:,estart+i)) + end do + + do i=1, eloop + call interpolate(numpts_d, edens_tab(4,i), edens_tab(5:,i), edens_spline(:,:,dstart+i)) + end do + + if (edens_tab(4,1) /= pair_tab(4,1)) then + call misc_error("Pair tab spacing must be the same as the electron density tab spacing") + end if + + do i = 1, pair_numf + call interpolate(numpts_p, pair_tab(4,i), pair_tab(5:,i), pair_spline(:,:,pstart+i)) + end do + + + return + end subroutine eamarray2spline + + subroutine set_eam_map_arrays + character(len=read_len) :: tmptxt + integer :: i, j + !Now map the type arrays from the eam code + pot_map=0 + if(atom_types_set) then + typeloop: do i = 1, natom_types + do j = 1, pot_numf + if (atom_names(i) == trim(adjustl(pair_atom_type(j)))) then + pot_map(i) = j + cycle typeloop + end if + end do + !If we don't get a match than log a warning + write(tmptxt,*) "Warning: ", atom_names(i), " not defined in eam potential file" + call log_msg(tmptxt) + end do typeloop + + !Now map the pair types and density types to the real ones + do i = 1, natom_types + do j = 1, natom_types + if((pot_map(i) > 0).and.(pot_map(j) > 0)) then + type_to_pair(i,j) = eamtype_to_pair(pot_map(i), pot_map(j)) + types_to_pot_type(i,j) = ibset(types_to_pot_type(i,j), 1) + pair_cutoff(type_to_pair(i,j)) = rc_off + end if + end do + end do + + !Now do the same thing to type to the edens maps + do i = 1, natom_types + do j = 1, natom_types + if((pot_map(i) > 0).and.(pot_map(j) > 0)) then + type_to_dens(i,j) = eamtype_to_dens(pot_map(i), pot_map(j)) + end if + end do + end do + + !If we don't have atom types defined then we can set them based on the elements within the potential file + else + call set_atom_types(pot_numf, pair_atom_type) + do i = 1, natom_types + pot_map(i) = i + end do + type_to_pair = eamtype_to_pair + type_to_dens = eamtype_to_dens + + end if + + + end subroutine set_eam_map_arrays + + subroutine update_force_eam + !This subroutine updates the force for all atoms and elements + + integer :: i,ie, iep, jep, ia, ja, iatom, iatomap, katomtype, latomtype, iatom_counts, ibasis, nei, & + ic, inod, ip, m, num_nei, en_list + real(kind=wp) :: rl(3), rk(3), flk(3), rlk(4), rsq, edens, edens_l, edens_k, f_intpo,d_embed_l, d_embed_k, & + eshape, f_atom, talliesme(3), recip, d_edens_l, & + d_edens_k, z2, z2p, phi, phip, psip, p, energy_sum, sum_e + real(kind=wp), dimension(max_basisnum*max_intpo_num) :: energy_intpo + real(kind=wp) :: embed_atom, coef(7) + real(kind=wp), dimension(max_basisnum*max_intpo_num, ele_num_l) :: pair_intpo, embed_intpo + real(kind=wp), dimension(3, max_basisnum*max_intpo_num, ele_num_l) :: force_intpo + real(kind=wp), dimension(3,3, max_basisnum*max_intpo_num, ele_num_l) :: virial_intpo + real(kind=wp), dimension(size(pair_cutoff)) :: pair_cutoffsq + + + !eam is potential type one so figure out which neighbor list is potential type one + en_list=1 !en_list is always one because eam is the first pair style defined + num_nei = 0 + pair_cutoffsq = pair_cutoff*pair_cutoff + if(.not.allocated(edens_host_atomap)) then + call alloc_eam_arrays + else if((atomap_num_lg > size(edens_host_atomap))) then + call alloc_eam_arrays + else if((ele_num_l > size(edens_host_intpo,2))) then + call alloc_eam_arrays + else if(.not.allocated(edens_host_atom)) then + call alloc_eam_arrays + else if((atom_num_lg > size(edens_host_atom))) then + call alloc_eam_arrays + end if + coef(:) = 0.0_wp + + if(ele_num > 0) then + edens_host_intpo = 0.0_wp + edens_host_atomap = 0.0_wp + end if + + !First we update electron density for the atoms + if(atom_num > 0) then + edens_host_atom = 0.0_wp + do ia = 1, atom_num_l + do i = 1, 3 + rl(i) = r_atom(i, ia) + end do + + !Loop over all neighbor atoms + do nei = 1, n_at_at(ia, en_list) + + ja = at_nei_at(nei, ia, en_list) + !Use symmetry of electron density + do i = 1, 3 + rk(i) = r_atom(i,ja) + end do + katomtype = type_atom(ja) + !Update edens host for current atom + do i = 1, 3 + rlk(i) = rk(i) - rl(i) + end do + rsq = rlk(1)*rlk(1) + rlk(2)*rlk(2) + rlk(3)*rlk(3) + + if(rsq < pair_cutoffsq(type_to_pair(katomtype, type_atom(ia)))) then + rlk(4) = sqrt(rsq) + p = rlk(4)*rdr+1.0_wp + m = int(p) + m = min(m, nr-1) + p = p - m + p = min(p, 1.0) + coef = edens_spline(:,m,type_to_dens(katomtype, type_atom(ia))) + edens = ((coef(4)*p + coef(5))*p + coef(6))*p + coef(7) + edens_host_atom(ia) = edens_host_atom(ia) + edens + + if(ja <= atom_num_l) then + coef = edens_spline(:,m,type_to_dens(type_atom(ia), katomtype)) + edens = ((coef(4)*p + coef(5))*p + coef(6))*p + coef(7) + edens_host_atom(ja) = edens_host_atom(ja) + edens + end if + end if + end do + !Loop over all neighbor atomaps + do nei = 1, n_at_cg(ia, en_list) + + ja = at_nei_cg(nei, ia, en_list) + katomtype = type_atomap(ja) + !Update edens host for current atom + rlk(1:3) = r_atomap(:,ja) - rl + rsq = rlk(1)*rlk(1) + rlk(2)*rlk(2) + rlk(3)*rlk(3) + + if(rsq < pair_cutoffsq(type_to_pair(katomtype, type_atom(ia)))) then + rlk(4) = sqrt(rsq) + p = rlk(4)*rdr+1.0_wp + m = int(p) + m = min(m, nr-1) + p = p - m + p = min(p, 1.0) + coef = edens_spline(:,m,type_to_dens(katomtype, type_atom(ia))) + edens = ((coef(4)*p + coef(5))*p + coef(6))*p + coef(7) + edens_host_atom(ia) = edens_host_atom(ia) + edens + + if(ja <=atomap_num_l) then + if(atomap_to_intpo(1,ja) /= 0) then + coef = edens_spline(:,m,type_to_dens(type_atom(ia), katomtype)) + edens = ((coef(4)*p + coef(5))*p + coef(6))*p + coef(7) + jep = atomap_to_intpo(1,ja) + ie = atomap_to_intpo(2,ja) + edens_host_intpo(jep, ie) = edens_host_intpo(jep, ie) + edens + edens_host_atomap(ja) = edens_host_atomap(ja) + edens + else if (needed_atomap(ja)) then + coef = edens_spline(:,m,type_to_dens(type_atom(ia), katomtype)) + edens = ((coef(4)*p + coef(5))*p + coef(6))*p + coef(7) + edens_host_atomap(ja) = edens_host_atomap(ja) + edens + end if + end if + end if + end do + + end do + !Now calculate all embedding energies and embedding energy derivatives + embed_deriv_atom = 0.0_wp + do ia = 1, atom_num_l + if(pot_map(type_atom(ia)) > 0) then + !First calculate the embedding energy contribution to the energy + p = edens_host_atom(ia)*rdrho+1.0_wp + m = int(p) + m = max(1, min(m, nrho-1)) + p = p -m + p = min(p, 1.0) + coef = embed_spline(:,m,pot_map(type_atom(ia))) + embed_atom = ((coef(4)*p + coef(5))*p + coef(6))*p + coef(7) + d_embed_l = (coef(1)*p + coef(2))*p + coef(3) + !Extra linear term to conserve energy when atoms are too close + if(edens_host_atom(ia) > edensmax) then + embed_atom = embed_atom + d_embed_l*(edens_host_atom(ia) - edensmax) + end if + energy_atom(ia) = energy_atom(ia) + embed_atom + embed_deriv_atom(ia) = d_embed_l + end if + end do + + !Now communicate ghost atom edens and embed_deriv + call comm_edens_ghost_at + + end if + + + !Now we have to update the electron host densities for all integration points + if (ele_num > 0) then + do ie = 1, ele_num_l + do iep = 1, intpo_count(itype(ie)) + do ibasis = 1, basis_num(ie) + jep = basis_num(ie)*(iep-1) + ibasis + if(who_has_intpo(jep, ie)) then + + iatom = atom_intpo(iep, size_to_shape(size_ele(ie)), itype(ie)) + iatomap = cg_atomap((basis_num(ie) * (iatom-1)) + ibasis, ie) + do i = 1, 3 + rl(i) = r_atomap(i, iatomap) + end do + + !Loop over all atomaps + do nei = 1, n_cg_cg(jep, ie, en_list) + ja = cg_nei_cg(nei, jep, ie, en_list) + katomtype = type_atomap(ja) + rk = r_atomap(:,ja) + do i = 1, 3 + rlk(i) = rk(i) - rl(i) + end do + rsq = rlk(1)*rlk(1) + rlk(2)*rlk(2) + rlk(3)*rlk(3) + + if(rsq < pair_cutoffsq(type_to_pair(basis_type(ibasis,ie), katomtype))) then + rlk(4) = sqrt(rsq) + p = rlk(4)*rdr+1.0_wp + m = int(p) + m = min(m, nr-1) + p = p - m + p = min(p, 1.0) + coef = edens_spline(:,m,type_to_dens(katomtype, basis_type(ibasis, ie))) + edens = ((coef(4)*p + coef(5))*p + coef(6))*p + coef(7) + edens_host_intpo(jep, ie) = edens_host_intpo(jep, ie) + edens + end if + end do + + !Loop over all ghost atoms + do nei = 1, n_cg_at(jep,ie,en_list) + ja = cg_nei_at(nei, jep, ie, en_list) + katomtype = type_atom(ja) + rk = r_atom(:,ja) + do i = 1, 3 + rlk(i) = rk(i) - rl(i) + end do + rsq = rlk(1)*rlk(1) + rlk(2)*rlk(2) + rlk(3)*rlk(3) + if(rsq < pair_cutoffsq(type_to_pair(basis_type(ibasis,ie), katomtype))) then + rlk(4) = sqrt(rsq) + p = rlk(4)*rdr+1.0_wp + m = int(p) + m = min(m, nr-1) + p = p - m + p = min(p, 1.0) + coef = edens_spline(:,m,type_to_dens(katomtype, basis_type(ibasis, ie))) + edens = ((coef(4)*p + coef(5))*p + coef(6))*p + coef(7) + edens_host_intpo(jep, ie) = edens_host_intpo(jep, ie) + edens + end if + end do + end if + end do + end do + end do + + !Now communicate the edens for the integration points + call comm_edens_intpo + + embed_intpo = 0.0_wp + !Now discretely calculate the densities for all the needed atomaps if desired + if(atomap_neighbors) then + do iatomap = 1, atomap_num_l + !If it's an integration point then we already have the electron density + if(atomap_to_intpo(1, iatomap) /= 0) then + jep = atomap_to_intpo(1,iatomap) + ie = atomap_to_intpo(2,iatomap) + edens_host_atomap(iatomap) = edens_host_intpo(jep, ie) + !Otherwise if we need this density + else if (needed_atomap(iatomap)) then + do i = 1, 3 + rl(i) = r_atomap(i, iatomap) + end do + + !Loop over all atomaps + do nei = 1, n_atomap_cg(iatomap, en_list) + ja = atomap_nei_cg(nei, iatomap, en_list) + katomtype = type_atomap(ja) + rk = r_atomap(:,ja) + do i = 1, 3 + rlk(i) = rk(i) - rl(i) + end do + rsq = rlk(1)*rlk(1) + rlk(2)*rlk(2) + rlk(3)*rlk(3) + + if(rsq < pair_cutoffsq(type_to_pair(type_atomap(iatomap), katomtype))) then + rlk(4) = sqrt(rsq) + p = rlk(4)*rdr+1.0_wp + m = int(p) + m = min(m, nr-1) + p = p - m + p = min(p, 1.0) + coef = edens_spline(:,m,type_to_dens(katomtype, type_atomap(iatomap))) + edens = ((coef(4)*p + coef(5))*p + coef(6))*p + coef(7) + edens_host_atomap(iatomap) = edens_host_atomap(iatomap) + edens + end if + end do + + !Loop over all ghost atoms + do nei = 1, n_atomap_at(iatomap, en_list) + ja = atomap_nei_at(nei, iatomap, en_list) + katomtype = type_atom(ja) + rk = r_atom(:,ja) + do i = 1, 3 + rlk(i) = rk(i) - rl(i) + end do + rsq = rlk(1)*rlk(1) + rlk(2)*rlk(2) + rlk(3)*rlk(3) + if(rsq < pair_cutoffsq(type_to_pair(type_atomap(iatomap), katomtype))) then + rlk(4) = sqrt(rsq) + p = rlk(4)*rdr+1.0_wp + m = int(p) + m = min(m, nr-1) + p = p - m + p = min(p, 1.0) + coef = edens_spline(:,m,type_to_dens(katomtype, type_atomap(iatomap))) + edens = ((coef(4)*p + coef(5))*p + coef(6))*p + coef(7) + edens_host_atomap(iatomap) = edens_host_atomap(iatomap) + edens + end if + end do + end if + end do + + !Now loop over all atomaps and calculate the embedding energy derivative if needed + do ia = 1, atomap_num_l + if(needed_atomap(ia)) then + !First calculate the embedding energy contribution to the energy + p = edens_host_atomap(ia)*rdrho+1.0_wp + m = int(p) + m = max(1, min(m, nrho-1)) + p = p -m + p = min(p, 1.0) + coef = embed_spline(:,m,pot_map(type_atomap(ia))) + embed_atom = ((coef(4)*p + coef(5))*p + coef(6))*p + coef(7) + d_embed_l = (coef(1)*p + coef(2))*p + coef(3) + !Extra linear term to conserve energy when atoms are too close + if(edens_host_atomap(ia) > edensmax) then + embed_atom = embed_atom + d_embed_l*(edens_host_atomap(ia) - edensmax) + end if + embed_deriv_atomap(ia) = d_embed_l + + if(atomap_to_intpo(1,ia) /= 0) then + jep = atomap_to_intpo(1,ia) + ie = atomap_to_intpo(2,ia) + embed_intpo(jep,ie) = embed_atom + embed_deriv_intpo(jep,ie) = d_embed_l + end if + end if + end do + else + !If not doing it discretely then set the edenshost for all atomaps based on the integration point values + !First we add the embedding energy to integration point energy and calculate the erivative of the embedding energy + + do ie = 1, ele_num_l + do iep = 1, intpo_count(itype(ie)) + do ibasis = 1, basis_num(ie) + if(pot_map(basis_type(ibasis, ie)) > 0) then + jep = basis_num(ie)*(iep-1) + ibasis + !First calculate the embedding energy contribution to the energy + p = edens_host_intpo(jep,ie)*rdrho+1.0_wp + m = int(p) + m = max(1, min(m, nrho-1)) + p = p -m + p = min(p, 1.0) + coef = embed_spline(:,m,pot_map(basis_type(ibasis,ie))) + embed_atom = ((coef(4)*p + coef(5))*p + coef(6))*p + coef(7) + d_embed_l = (coef(1)*p + coef(2))*p + coef(3) + !Extra linear term to conserve energy when atoms are too close + if(edens_host_intpo(jep,ie) > edensmax) then + embed_atom = embed_atom + d_embed_l*(edens_host_intpo(jep,ie) - edensmax) + end if + embed_intpo(jep,ie) = embed_atom + embed_deriv_intpo(jep,ie) = d_embed_l + end if + end do + end do + end do + do ie = 1, ele_num_l + select case(etype(ie)) + case(1,2,3) + iatom_counts = (size_ele(ie)+1)**3 + end select + do iatom = 1, iatom_counts + do ibasis = 1, basis_num(ie) + iatomap = cg_atomap(basis_num(ie)*(iatom-1)+ibasis, ie) + if(iatomap /= 0) then + iep = who_rep_atomap(iatom, size_to_shape(size_ele(ie)), itype(ie)) + jep = basis_num(ie)*(iep-1) + ibasis + edens_host_atomap(iatomap) = edens_host_intpo(jep, ie) + embed_deriv_atomap(iatomap) = embed_deriv_intpo(jep,ie) + end if + end do + end do + end do + end if + + !Now communicate ghost virtual atom edens + call comm_edens_ghost_cg + end if + + + + talliesme(:) = 0 + + if(ele_num>0) then + force_intpo(:,:,:) = 0.0_wp + pair_intpo(:,:) = 0.0_wp + if(need_virial) then + virial_intpo(:,:,:,:) = 0.0_wp + end if + end if + + !Update atomistic region if needed + + if(atom_num > 0) then + !Loop over all atoms + do ia = 1, atom_num_l + + rl(:) = r_atom(:, ia) + latomtype = type_atom(ia) + edens_l = edens_host_atom(ia) + d_embed_l = embed_deriv_atom(ia) + + + !Now calculate the pair potential from atoms + do nei = 1, n_at_at(ia, en_list) + + ja = at_nei_at(nei, ia, en_list) + rk(:) = r_atom(:, ja) + katomtype = type_atom(ja) + edens_k = edens_host_atom(ja) + + rlk(1:3) = rk - rl + rsq = rlk(1)*rlk(1) + rlk(2)*rlk(2) + rlk(3)*rlk(3) + if(rsq < pair_cutoffsq(type_to_pair(latomtype, katomtype))) then + rlk(4) = sqrt(rsq) + !Get derivatives for edens + p = rlk(4)*rdr+1.0_wp + m = int(p) + m = min(m, nr-1) + p = p -m + p = min(p, 1.0) + coef = edens_spline(:,m, type_to_dens(latomtype, katomtype)) + d_edens_l = (coef(1)*p + coef(2))*p + coef(3) + coef = edens_spline(:,m, type_to_dens(katomtype, latomtype)) + d_edens_k = (coef(1)*p + coef(2))*p + coef(3) + + + d_embed_k = embed_deriv_atom(ja) + + !Now calculate pair energy and pair force + coef=pair_spline(:, m, type_to_pair(latomtype, katomtype)) + z2p = (coef(1)*p + coef(2))*p + coef(3) + z2 = ((coef(4)*p + coef(5))*p + coef(6))*p + coef(7) + + + !Now calculate force and energy + recip = 1.0/rlk(4) + phi = z2*recip + phip = z2p*recip - phi*recip + psip = d_embed_l*d_edens_k + d_embed_k*d_edens_l + phip + f_atom = psip*recip + + !Update force + flk(:) = f_atom * rlk(1:3) + force_atom(:, ia) = force_atom(:, ia) + flk(:) + + if(need_virial) then + do ic = 1, 3 + virial_atom(:, ic, ia) = virial_atom(:, ic, ia) + flk(:) * rlk(ic) / 2.0_wp + end do + end if + + !Update pair + energy_atom(ia) = energy_atom(ia) + phi/2 + + !Newtons law and symmetric pair potential + if(ja <= atom_num_l) then + force_atom(:, ja) = force_atom(:, ja) - flk(:) + + if(need_virial) then + do ic = 1, 3 + virial_atom(:, ic, ja) = virial_atom(:, ic, ja) + flk(:) * rlk(ic) / 2.0_wp + end do + end if + + !Update pair + energy_atom(ja) = energy_atom(ja) + phi/2 + end if + end if + end do + !Now calculate the pair potential from virtual atoms + do nei = 1, n_at_cg(ia, en_list) + + ja = at_nei_cg(nei, ia, en_list) + rk(:) = r_atomap(:, ja) + katomtype = type_atomap(ja) + edens_k = edens_host_atomap(ja) + + rlk(1:3) = rk - rl + rsq = rlk(1)*rlk(1) + rlk(2)*rlk(2) + rlk(3)*rlk(3) + if(rsq < pair_cutoffsq(type_to_pair(latomtype, katomtype))) then + rlk(4) = sqrt(rsq) + !Get derivatives for edens + p = rlk(4)*rdr+1.0_wp + m = int(p) + m = min(m, nr-1) + p = p -m + p = min(p, 1.0) + coef = edens_spline(:,m, type_to_dens(latomtype, katomtype)) + d_edens_l = (coef(1)*p + coef(2))*p + coef(3) + coef = edens_spline(:,m, type_to_dens(katomtype, latomtype)) + d_edens_k = (coef(1)*p + coef(2))*p + coef(3) + + !Now calculate pair energy and pair force + coef=pair_spline(:, m, type_to_pair(latomtype, katomtype)) + z2p = (coef(1)*p + coef(2))*p + coef(3) + z2 = ((coef(4)*p + coef(5))*p + coef(6))*p + coef(7) + + !Get embedding energy derivative for neighbor + d_embed_k = embed_deriv_atomap(ja) + + !Now calculate force and energy + recip = 1.0/rlk(4) + phi = z2*recip + phip = z2p*recip - phi*recip + psip = d_embed_l*d_edens_k + d_embed_k*d_edens_l + phip + f_atom = psip*recip + + !Update force + flk(:) = f_atom * rlk(1:3) + force_atom(:, ia) = force_atom(:, ia) + flk(:) + + if(need_virial) then + do ic = 1, 3 + virial_atom(:, ic, ia) = virial_atom(:, ic, ia) + flk(:) * rlk(ic) / 2.0_wp + end do + end if + + !Update pair + energy_atom(ia) = energy_atom(ia) + phi/2 + + + !Update for intpo neighbors + if(ja <= atomap_num_l) then + if(atomap_to_intpo(1,ja)/=0) then + jep = atomap_to_intpo(1,ja) + ie = atomap_to_intpo(2,ja) + + force_intpo(:,jep,ie) = force_intpo(:,jep,ie) - flk(:) + if(need_virial)then + do ic = 1, 3 + virial_intpo(:, ic, jep,ie) = virial_intpo(:, ic, jep,ie) + flk(:) * rlk(ic) / 2.0_wp + end do + end if + pair_intpo(jep,ie) = pair_intpo(jep,ie) + phi/2.0_wp + end if + end if + end if + end do + end do + end if + + + !Now actually update the force for the elements + if(ele_num > 0) then + + energy_intpo=0.0_wp + !Loop over all elements + do ie = 1, ele_num_l + !Initialize integration point variables + do iep = 1, intpo_count(itype(ie)) + + do ibasis = 1, basis_num(ie) + jep = basis_num(ie)*(iep-1)+ibasis + !If we have the integration point then calculate + if(who_has_intpo(iep, ie).eqv..true.) then + + iatom = atom_intpo(iep, size_to_shape(size_ele(ie)), itype(ie)) + iatomap = cg_atomap(basis_num(ie)*(iatom-1) + ibasis, ie) + latomtype = type_atomap(iatomap) + rl(:) = r_atomap(:, iatomap) + edens_l = edens_host_intpo(jep,ie) + d_embed_l = embed_deriv_atomap(iatomap) + + !Now loop over all virtual atoms + do nei = 1, n_cg_cg(jep, ie, en_list) + + ja = cg_nei_cg(nei, jep, ie, en_list) + + rk(:) = r_atomap(:, ja) + katomtype = type_atomap(ja) + edens_k = edens_host_atomap(ja) + + rlk(1:3) = rk - rl + rsq = rlk(1)*rlk(1) + rlk(2)*rlk(2) + rlk(3)*rlk(3) + if(rsq < pair_cutoffsq(type_to_pair(latomtype,katomtype))) then + + + rlk(4) = sqrt(rsq) + + !Get derivatives for edens + p = rlk(4)*rdr+1.0_wp + m = int(p) + m = min(m, nr-1) + p = p -m + p = min(p, 1.0) + coef = edens_spline(:,m, type_to_dens(latomtype, katomtype)) + d_edens_l = (coef(1)*p + coef(2))*p + coef(3) + coef = edens_spline(:,m, type_to_dens(katomtype, latomtype)) + d_edens_k = (coef(1)*p + coef(2))*p + coef(3) + + !Now calculate pair energy and pair force + coef=pair_spline(:, m, type_to_pair(latomtype, katomtype)) + z2p = (coef(1)*p + coef(2))*p + coef(3) + z2 = ((coef(4)*p + coef(5))*p + coef(6))*p + coef(7) + + !Get embedding energy derivative for neighbor + d_embed_k = embed_deriv_atomap(ja) + + !Now calculate force and energy + recip = 1.0/rlk(4) + phi = z2*recip + phip = z2p*recip - phi*recip + psip = d_embed_l*d_edens_k + d_embed_k*d_edens_l + phip + f_intpo = psip*recip + + !Update force + flk(:) = f_intpo * rlk(1:3) + + force_intpo(:, jep, ie) = force_intpo(:, jep, ie) + flk(:) + + if(need_virial) then + do ic = 1, 3 + virial_intpo(:, ic, jep, ie) = virial_intpo(:, ic, jep, ie) & + + flk(:) * rlk(ic) / 2.0_wp + end do + end if + + !Update pair + pair_intpo(jep, ie) = pair_intpo(jep, ie) + phi/2.0_wp + end if + end do + + !Now calculate the pair potential from the atoms + do nei = 1, n_cg_at(jep, ie, en_list) + + ja = cg_nei_at(nei, jep, ie, en_list) + + rk(:) = r_atom(:, ja) + katomtype = type_atom(ja) + edens_k = edens_host_atom(ja) + + rlk(1:3) = rk - rl + rsq = rlk(1)*rlk(1) + rlk(2)*rlk(2) + rlk(3)*rlk(3) + if(rsq < pair_cutoffsq(type_to_pair(latomtype,katomtype))) then + + rlk(4) = sqrt(rsq) + + !Get derivatives for edens + p = rlk(4)*rdr+1.0_wp + m = int(p) + m = min(m, nr-1) + p = p -m + p = min(p, 1.0) + coef = edens_spline(:,m, type_to_dens(latomtype, katomtype)) + d_edens_l = (coef(1)*p + coef(2))*p + coef(3) + coef = edens_spline(:,m, type_to_dens(katomtype, latomtype)) + d_edens_k = (coef(1)*p + coef(2))*p + coef(3) + + !Now calculate pair energy and pair force + coef=pair_spline(:, m, type_to_pair(latomtype, katomtype)) + z2p = (coef(1)*p + coef(2))*p + coef(3) + z2 = ((coef(4)*p + coef(5))*p + coef(6))*p + coef(7) + + !Get embedding energy derivative for neighbor + d_embed_k = embed_deriv_atom(ja) + + !Now calculate force and energy + recip = 1.0/rlk(4) + phi = z2*recip + phip = z2p*recip - phi*recip + psip = d_embed_l*d_edens_k + d_embed_k*d_edens_l + phip + f_intpo = psip*recip + + !Update force + flk(:) = f_intpo * rlk(1:3) + force_intpo(:, jep, ie) = force_intpo(:, jep, ie) + flk(:) + + if(need_virial) then + do ic = 1, 3 + virial_intpo(:, ic, jep, ie) = virial_intpo(:, ic, jep, ie) & + + flk(:) * rlk(ic) / 2.0_wp + end do + end if + + !Update pair + pair_intpo(jep, ie) = pair_intpo(jep, ie) + phi/2.0_wp + end if + end do + end if + end do + end do + + !Now get the integration point energy + do iep = 1, intpo_count(itype(ie)) + do ibasis = 1, basis_num(ie) + jep = basis_num(ie)*(iep-1)+ibasis + if(who_has_intpo(iep, ie).eqv..true.) then + energy_intpo(jep) = pair_intpo(jep, ie) + embed_intpo(jep,ie) + end if + end do + end do + + + !update integration point values using weight + + do iep = 1, intpo_count(itype(ie)) + do ibasis = 1, basis_num(ie) + jep = basis_num(ie)*(iep-1)+ibasis + iatom = atom_intpo(iep, size_to_shape(size_ele(ie)), itype(ie)) + iatomap = cg_atomap(basis_num(ie)*(iatom-1) + ibasis, ie) + + if(who_has_intpo(jep, ie).eqv..true.) then + force_intpo(:, jep, ie) = force_intpo(:, jep, ie) & + * weight_intpo(iep, size_to_shape(size_ele(ie)), itype(ie)) + + if(need_virial) then + virial_intpo(:, :, jep, ie) = virial_intpo(:, :, jep, ie) & + * weight_intpo(iep, size_to_shape(size_ele(ie)), itype(ie)) + end if + energy_intpo(jep) = energy_intpo(jep) * weight_intpo(iep, size_to_shape(size_ele(ie)), itype(ie)) + end if + end do + end do + + !calculate force, virial, and energy for nodes from integration point values + do inod = 1, ng_node(etype(ie)) + ip = cg_node(inod, ie) + do iep = 1, intpo_count(itype(ie)) + do ibasis = 1, basis_num(ie) + jep = basis_num(ie)*(iep-1) + ibasis + if(who_has_intpo(iep, ie).eqv..true.) then + + iatom = atom_intpo(iep, size_to_shape(size_ele(ie)), itype(ie)) + eshape = a_interpo(inod, iatom, size_to_shape(size_ele(ie)), etype(ie)) + force(:, ibasis, ip ) = force(:, ibasis, ip) + eshape * force_intpo(:, jep, ie) + + if(need_virial) then + virial(:, :, ibasis, ip) = virial(:, :, ibasis, ip) & + + eshape * virial_intpo(:, :, jep, ie) + end if + + energy(ibasis, ip) = energy(ibasis, ip) + eshape*energy_intpo(jep) + + end if + end do + end do + end do + end do + + end if + + return + end subroutine update_force_eam + +end module eam diff --git a/src/elements.f90 b/src/elements.f90 new file mode 100644 index 0000000..5995470 --- /dev/null +++ b/src/elements.f90 @@ -0,0 +1,662 @@ +module elements + + use parameters + use errors + use box + use math + + implicit none + !This module contains all arrays and subroutines for processing atom and element data + + !Variables to track the various global values + integer, save :: ele_num, node_num, atom_num, ng_max_node, max_basisnum, atomap_num, atomap_max + + logical :: need_vel !this variable denotes if we need velocity arrays + + !Variables to track various local values + integer, save :: ele_num_l, node_num_l, atom_num_l, atomap_num_l, ele_num_lr, node_num_lr, atom_num_lr, atomap_num_lr, & + atomap_num_lg, atom_num_lg + + !Current accepted element type definitions + integer, parameter :: defined_element_types=3 + character(len=5), parameter, dimension(defined_element_types) :: element_types = (/ '1NN', '2NN', '20C' /) + logical, dimension(defined_element_types) :: etype_present + integer, parameter, dimension(defined_element_types) :: ng_node = (/ 8, 8, 20 /) !Number of nodes per element + integer :: max_size !Max of size_ele + + !Coarse-grained elements + integer, allocatable, save :: tag_ele(:), size_ele(:), etype(:), basis_num(:), basis_type(:,:), & + cg_node(:,:), node_cg(:), pb_node(:,:,:), cg_atomap(:,:), ele_glob_id(:), e_mask(:) + real(kind=wp), allocatable, save :: r(:,:,:), vel(:,:,:) !allocate to 3, max_basis_num, node_num_l, so it's nodal not + ! by element + + !Interpolated atoms + integer, allocatable, save :: tag_atomap(:), type_atomap(:), atomap_to_intpo(:,:) + real(kind=wp), allocatable, save :: r_atomap(:,:) + + !Interpolation variable + real(kind=wp), allocatable, save :: a_interpo(:,:,:,:) + integer :: unique_sizes + integer, allocatable :: size_to_shape(:), shape_sizes(:) + logical, allocatable :: needed_atomap(:) + + !Real atoms + integer, allocatable :: tag_atom(:), type_atom(:), a_mask(:) + real(kind=wp), allocatable :: r_atom(:,:), vel_atom(:,:) + public + contains + + subroutine alloc_at_arrays + !Allocate atom arrays + integer :: allostat + + if(allocated(tag_atom)) deallocate(tag_atom, type_atom, a_mask, r_atom) + + allocate( tag_atom(atom_num_l), & + type_atom(atom_num_l), & + a_mask(atom_num_l), & + r_atom(3,atom_num_l), & + stat=allostat) + if(allostat > 0) call alloc_error("Failed to allocate at_arrays", allostat) + + + tag_atom(:) = 0 + type_atom(:) = 0 + a_mask(:) = 0 + r_atom = 0.0_wp + + if(need_vel) then + if(allocated(vel_atom)) deallocate(vel_atom) + allocate(vel_atom(3,atom_num_l), stat=allostat) + if(allostat > 0) call alloc_error("Failed to allocate vel_atom", allostat) + vel_atom = 0.0_wp + end if + return + + end subroutine alloc_at_arrays + + subroutine dealloc_at_arrays + !deallocate atom arrays + integer :: allostat + + deallocate( tag_atom, type_atom, a_mask, r_atom, stat=allostat) + if(allostat > 0) call alloc_error("Failed to deallocate at_arrays", allostat) + + if(need_vel) then + deallocate(vel_atom, stat=allostat) + if(allostat > 0) call alloc_error("Failed to deallocate vel_atom", allostat) + end if + return + + end subroutine dealloc_at_arrays + + subroutine alloc_velocity_arrays + if((ele_num > 0) .and..not.allocated(vel)) then + allocate(vel(3, max_basisnum, node_num_lr), stat = allostat) + if (allostat > 0) call alloc_error("Failure to allocate vel in alloc_velocity array", allostat) + vel(:,:,:) = 0.0_wp + end if + if((atom_num > 0).and..not.allocated(vel_atom)) then + allocate(vel_atom(3, atom_num_lr), stat = allostat) + if (allostat > 0) call alloc_error("Failure to allocate vel_atom in alloc_velocity array", allostat) + vel_atom(:,:) = 0.0_wp + end if + end subroutine alloc_velocity_arrays + + subroutine dealloc_velocity_arrays + if((ele_num > 0) .and..not.allocated(vel)) then + deallocate(vel, stat = allostat) + if (allostat > 0) call alloc_error("Failure to deallocate vel in alloc_velocity array", allostat) + end if + if((atom_num > 0).and..not.allocated(vel_atom)) then + deallocate(vel_atom, stat = allostat) + if (allostat > 0) call alloc_error("Failure to deallocate vel_atom in alloc_velocity array", allostat) + end if + end subroutine dealloc_velocity_arrays + + subroutine grow_at_arrays(seg_real) + !Grow atom arrays + integer, intent(in), optional :: seg_real + + integer :: grow_by + integer, allocatable :: tag_array(:), type_array(:), mask_array(:) + real(kind=wp), allocatable :: r_array(:,:), vel_array(:,:) + + if (present(seg_real)) then + grow_by = seg_real + else + grow_by = seg_num + end if + + allocate( tag_array(atom_num_lr+grow_by), & + type_array(atom_num_lr+grow_by),& + mask_array(atom_num_lr+grow_by),& + r_array(3,atom_num_lr+grow_by), & + stat=allostat) + if(allostat > 0) call alloc_error("Failure allocating atom arrays in grow_at_arrays", allostat) + + tag_array(1:atom_num_lr) = tag_atom + tag_array(atom_num_lr+1:) = 0 + call move_alloc(tag_array, tag_atom) + + type_array(1:atom_num_lr) = type_atom + type_array(atom_num_lr+1:) = 0 + call move_alloc(type_array, type_atom) + + mask_array(1:atom_num_lr) = a_mask + mask_array(atom_num_lr+1:) = 0 + call move_alloc(mask_array, a_mask) + + r_array(:,1:atom_num_lr) = r_atom + r_array(:,atom_num_lr+1:) = 0.0_wp + call move_alloc(r_array, r_atom) + + if(need_vel) then + allocate(vel_array(3,atom_num_lr+grow_by), stat=allostat) + if(allostat > 0) call alloc_error("Failed allocating vel_array in grow_at_arrays", allostat) + vel_array(:,1:atom_num_lr) = vel_atom + vel_array(:,atom_num_lr+1:) = 0.0_wp + call move_alloc(vel_array, vel_atom) + end if + + atom_num_lr = atom_num_lr +grow_by + return + end subroutine grow_at_arrays + + subroutine alloc_cg_arrays + !Allocate arrays needed for coarse-grained elements + integer :: allostat + + if(allocated(tag_ele)) deallocate(tag_ele, size_ele, ele_glob_id, etype, cg_node, cg_atomap, basis_num, basis_type, & + node_cg, r) + allocate( & + tag_ele(ele_num_l), & + size_ele(ele_num_l), & + ele_glob_id(ele_num_l), & + etype(ele_num_l), & + e_mask(ele_num_l), & + cg_node(ng_max_node, ele_num_l), & + cg_atomap(atomap_max, ele_num_l), & + basis_num(ele_num_l), & + basis_type(max_basisnum, ele_num_l), & + node_cg(node_num_l), & + r(3,max_basisnum, node_num_l), & + stat=allostat ) + if (allostat > 0) call alloc_error("Failed allocating cg_arrays", allostat) + tag_ele(:) = 0 + size_ele(:) = 0 + etype(:) = 0 + e_mask(:) = 0 + ele_glob_id(:) = 0 + cg_node(:,:) =0 + cg_atomap(:,:) = 0 + basis_num(:) = 0 + basis_type(:,:) = 0 + node_cg(:) = 0 + r(:,:,:) = 0.0_wp + + + if(periodic) then + if (allocated(pb_node)) deallocate(pb_node) + allocate(pb_node(3, max_basisnum, node_num_l), stat=allostat) + if (allostat > 0) call alloc_error("Failed allcoating pb_node in alloc_cg_arrays", allostat) + pb_node(:,:,:) = 0 + end if + + if(need_vel) then + if (allocated(vel)) deallocate(vel) + allocate(vel(3, max_basisnum, node_num_l)) + if (allostat > 0) call alloc_error("Failed allocating cg vel array", allostat) + vel(:,:,:) = 0.0_wp + end if + + !Interpolation arrays + if(allocated(tag_atomap)) deallocate(tag_atomap, type_atomap, atomap_to_intpo, r_atomap) + allocate (& + tag_atomap(atomap_num_l), & + type_atomap(atomap_num_l), & + atomap_to_intpo(2, atomap_num_l), & + r_atomap(3, atomap_num_l), & + stat=allostat & + ) + + if (allostat > 0) call alloc_error("Failed allocating interp_arrays", allostat) + tag_atomap(:) = 0 + type_atomap = 0 + atomap_to_intpo = 0 + r_atomap(:,:) = 0.0_wp + + end subroutine alloc_cg_arrays + + subroutine dealloc_cg_arrays + !Deallocate arrays needed for coarse-grained elements + integer :: allostat + deallocate( tag_ele, size_ele, ele_glob_id, etype, e_mask, cg_node, cg_atomap, basis_num, basis_type, & + node_cg, r, stat=allostat ) + if (allostat > 0) call alloc_error("Failed deallocating cg_arrays", allostat) + if(periodic) then + deallocate(pb_node, stat=allostat) + if (allostat > 0) call alloc_error("Failed deallcoating pb_node in alloc_cg_arrays", allostat) + end if + + if(need_vel) then + deallocate(vel) + if (allostat > 0) call alloc_error("Failed deallocating cg vel array", allostat) + end if + + !Interpolation arrays + deallocate(tag_atomap, type_atomap, r_atomap, atomap_to_intpo, stat=allostat) + + if (allostat > 0) call alloc_error("Failed allocating interp_arrays", allostat) + + return + end subroutine dealloc_cg_arrays + + + subroutine grow_cg_arrays(opt, seg_real) + !This subroutine just grows the element arrays if needed + + !opt is 1 for growing element arrays, 2 for growing node arrays, 3 for growing atomap arrays + !seg_real is the growth size, usually compared to + integer, intent(in) :: opt + integer, intent(in), optional :: seg_real + + !temp element arrays + integer, allocatable ::tag_ele_array(:), size_ele_array(:), etype_array(:), cg_node_array(:,:), basis_num_array(:), & + basis_type_array(:,:), cg_atomap_array(:,:), mask_array(:) + + !Temp node arrays + integer, allocatable :: node_cg_array(:), pb_node_array(:,:,:), ele_glob_id_array(:) + real(kind=wp), allocatable :: r_array(:,:,:), vel_array(:,:,:) + + !temp atomap arrays + integer, allocatable :: tag_atomap_array(:), type_atomap_array(:), atomap_to_intpo_array(:,:) + real(kind=wp), allocatable :: r_atomap_array(:,:) + + integer :: grow_by + + if (present(seg_real)) then + grow_by = seg_real + else + grow_by = seg_num + end if + + if (opt == 1) then + allocate( & + tag_ele_array(ele_num_lr+grow_by), & + ele_glob_id_array(ele_num_lr+grow_by), & + size_ele_array(ele_num_lr+grow_by), & + etype_array(ele_num_lr+grow_by), & + mask_array(ele_num_lr+grow_by), & + cg_node_array(ng_max_node, ele_num_lr+grow_by), & + cg_atomap_array(atomap_max, ele_num_lr+grow_by), & + basis_num_array(ele_num_lr+grow_by), & + basis_type_array(max_basisnum, ele_num_lr+grow_by), & + stat=allostat & + ) + if(allostat > 0) call alloc_error("Failed allocation of temp ele arrays in grow_cg_arrays", allostat) + + tag_ele_array(1:ele_num_lr) = tag_ele(:) + tag_ele_array(ele_num_lr+1:) = 0 + call move_alloc(tag_ele_array, tag_ele) + + ele_glob_id_array(1:ele_num_lr) = ele_glob_id(:) + ele_glob_id_array(ele_num_lr+1:) = 0 + call move_alloc(ele_glob_id_array, ele_glob_id) + + size_ele_array(1:ele_num_lr) = size_ele(:) + size_ele_array(ele_num_lr+1:) = 0 + call move_alloc(size_ele_array, size_ele) + + etype_array(1:ele_num_lr) = etype(:) + etype_array(ele_num_lr+1:) = 0 + call move_alloc(etype_array, etype) + + mask_array(1:ele_num_lr) = e_mask + mask_array(ele_num_lr+1:)= 0 + call move_alloc(mask_array, e_mask) + + cg_node_array(:,1:ele_num_lr) = cg_node(:,:) + cg_node_array(:,ele_num_lr+1:) = 0 + call move_alloc(cg_node_array, cg_node) + + cg_atomap_array(:,1:ele_num_lr) = cg_atomap(:,:) + cg_atomap_array(:,ele_num_lr+1:) = 0 + call move_alloc(cg_atomap_array, cg_atomap) + + basis_num_array(1:ele_num_lr) = basis_num(:) + basis_num_array(ele_num_lr+1:) = 0 + call move_alloc(basis_num_array, basis_num) + + basis_type_array(:,1:ele_num_lr) = basis_type(:,:) + basis_type_array(:,ele_num_lr+1:) = 0 + call move_alloc(basis_type_array, basis_type) + + ele_num_lr = ele_num_lr + grow_by + else if (opt==2) then + allocate( node_cg_array(node_num_lr + grow_by), & + r_array(3, max_basisnum, node_num_lr + grow_by), & + stat=allostat) + + node_cg_array(1:node_num_lr) = node_cg + node_cg_array(node_num_lr+1:) = 0 + call move_alloc(node_cg_array, node_cg) + + r_array(:,:,1:node_num_lr) = r(:,:,1:node_num_lr) + r_array(:,:,node_num_lr+1:) = 0.0_wp + call move_alloc(r_array, r) + + if(need_vel) then + allocate( vel_array(3, max_basisnum, node_num_lr + grow_by), & + stat=allostat) + + if(allostat > 0) call alloc_error("Failure allocating vel in grow_cg_arrays",allostat) + vel_array(:,:,1:node_num_lr) = vel(:,:,1:node_num_lr) + vel_array(:,:,node_num_lr+1:) = 0.0_wp + call move_alloc(vel_array, vel) + end if + + if(periodic) then + allocate( pb_node_array(3, max_basisnum, node_num_lr + grow_by), & + stat=allostat) + if(allostat > 0) call alloc_error("Failure allocating pb_node in grow_cg_arrays",allostat) + pb_node_array(:,:,1:node_num_lr) = pb_node + pb_node_array(:,:,node_num_lr+1:) = 0.0_wp + call move_alloc(pb_node_array, pb_node) + end if + + node_num_lr = node_num_lr + grow_by + + else if(opt==3) then + !Resize atomap arrays + allocate(tag_atomap_array(atomap_num_lr + grow_by), & + type_atomap_array(atomap_num_lr + grow_by), & + atomap_to_intpo_array(2, size(atomap_to_intpo,2)+grow_by), & + r_atomap_array(3,atomap_num_lr + grow_by), & + stat = allostat) + if(allostat > 0) call alloc_error("Failure allocating temp atomap arrays in grow_cg_arrays", allostat) + + tag_atomap_array(1:atomap_num_lr) = tag_atomap(:) + tag_atomap_array(atomap_num_lr+1:) = 0 + call move_alloc(tag_atomap_array, tag_atomap) + + type_atomap_array(1:atomap_num_lr) = type_atomap(:) + type_atomap_array(atomap_num_lr+1:) = 0 + call move_alloc(type_atomap_array, type_atomap) + + atomap_to_intpo_array = 0 + atomap_to_intpo_array(:, 1:size(atomap_to_intpo,2)) = atomap_to_intpo + call move_alloc(atomap_to_intpo_array, atomap_to_intpo) + + r_atomap_array(:,1:atomap_num_lr) = r_atomap(:,:) + r_atomap_array(:,atomap_num_lr+1:) = 0.0_wp + call move_alloc(r_atomap_array, r_atomap) + + atomap_num_lr = atomap_num_lr + grow_by + end if + + return + end subroutine grow_cg_arrays + + subroutine setup_interpolation(max_esize, unique_enum, unique_esize) + !This subroutine sets up the interpolation arrays + integer, intent(in) :: max_esize, unique_enum + integer, dimension(unique_enum), intent(in) :: unique_esize + + integer :: i, ix, iy, iz, esize, iatom + real(kind = wp) :: dr, ds, dt + + !First allocate important arrays + allocate( a_interpo(maxval(ng_node), atomap_max, unique_enum, defined_element_types), & + size_to_shape(max_esize), & + shape_sizes(max_esize), & + stat = allostat) + if (allostat > 0) call alloc_error("Failed to allocate shape arrays in setup_interpolation", allostat) + + !Save the maximum esize as we need it later + max_size = max_esize + + !Now loop over all unique elements. We duplicate the interpolation shape function for all element types, this leads to some + !extra memory usage, but it shoudl be small enough to where it doesn't matter + size_to_shape(:) = 0 + unique_sizes = unique_enum + a_interpo(:,:,:,:) = 0.0_wp + do i = 1, unique_enum + esize = unique_esize(i) + shape_sizes(i) =unique_esize(i) + !size_to_shape maps the element size to the shape function index + size_to_shape(esize) = i + iatom=0 + + !Cube element shape functions + do iz = 1, esize+1 + dt=-1.0_dp +(iz-1)*(2.0_dp/(esize)) + do iy = 1, esize+1 + ds=-1.0_dp +(iy-1)*(2.0_dp/(esize)) + do ix = 1, esize+1 + dr=-1.0_dp +(ix-1)*(2.0_dp/(esize)) + iatom = iatom + 1 + a_interpo(1,iatom,i,1:2) = (1.0_wp-dr)*(1.0_wp-ds)*(1.0_wp-dt)/8.0_wp + a_interpo(2,iatom,i,1:2) = (1.0_wp+dr)*(1.0_wp-ds)*(1.0_wp-dt)/8.0_wp + a_interpo(3,iatom,i,1:2) = (1.0_wp+dr)*(1.0_wp+ds)*(1.0_wp-dt)/8.0_wp + a_interpo(4,iatom,i,1:2) = (1.0_wp-dr)*(1.0_wp+ds)*(1.0_wp-dt)/8.0_wp + a_interpo(5,iatom,i,1:2) = (1.0_wp-dr)*(1.0_wp-ds)*(1.0_wp+dt)/8.0_wp + a_interpo(6,iatom,i,1:2) = (1.0_wp+dr)*(1.0_wp-ds)*(1.0_wp+dt)/8.0_wp + a_interpo(7,iatom,i,1:2) = (1.0_wp+dr)*(1.0_wp+ds)*(1.0_wp+dt)/8.0_wp + a_interpo(8,iatom,i,1:2) = (1.0_wp-dr)*(1.0_wp+ds)*(1.0_wp+dt)/8.0_wp + end do + end do + end do + + !serendipity 20 node element shape function + iatom = 0 + do iz = 1, esize+1 + dt = (iz - ((real(esize,wp)/2.0_wp) + 1.0_wp)) / (real(esize,wp) / 2.0_wp) + do iy = 1, esize+1 + ds = (iy - ((real(esize,wp)/2.0_wp) + 1.0_wp)) / (real(esize,wp) / 2.0_wp) + do ix = 1, esize+1 + dr = (ix - ((real(esize,wp) / 2.0_wp) + 1.0_wp)) / (real(esize,wp) / 2.0_wp) + iatom = iatom + 1 + + !Corner nodes + a_interpo(1,iatom,i,3) = (1.0_dp-dr)*(1.0_dp-ds)*(1.0_dp-dt)*(-dr-ds-dt-2)/8.0_dp + a_interpo(2, iatom,i,3) = (1.0_dp+dr)*(1.0_dp-ds)*(1.0_dp-dt)*(dr-ds-dt-2)/8.0_dp + a_interpo(3, iatom,i,3) = (1.0_dp+dr)*(1.0_dp+ds)*(1.0_dp-dt)*(dr+ds-dt-2)/8.0_dp + a_interpo(4, iatom,i,3) = (1.0_dp-dr)*(1.0_dp+ds)*(1.0_dp-dt)*(-dr+ds-dt-2)/8.0_dp + a_interpo(5, iatom,i,3) = (1.0_dp-dr)*(1.0_dp-ds)*(1.0_dp+dt)*(-dr-ds+dt-2)/8.0_dp + a_interpo(6, iatom,i,3) = (1.0_dp+dr)*(1.0_dp-ds)*(1.0_dp+dt)*(dr-ds+dt-2)/8.0_dp + a_interpo(7, iatom,i,3) = (1.0_dp+dr)*(1.0_dp+ds)*(1.0_dp+dt)*(dr+ds+dt-2)/8.0_dp + a_interpo(8, iatom,i,3) = (1.0_dp-dr)*(1.0_dp+ds)*(1.0_dp+dt)*(-dr+ds+dt-2)/8.0_dp + + !Side nodes, first node r is zero + a_interpo(9, iatom,i,3) = (1-dr*dr)*(1-ds)*(1-dt)/4.0_dp + a_interpo(11, iatom,i,3) = (1-dr*dr)*(1+ds)*(1-dt)/4.0_dp + a_interpo(17, iatom,i,3) = (1-dr*dr)*(1-ds)*(1+dt)/4.0_dp + a_interpo(19, iatom,i,3) = (1-dr*dr)*(1+ds)*(1+dt)/4.0_dp + + !node s is zero + a_interpo(10, iatom,i,3) = (1+dr)*(1-ds*ds)*(1-dt)/4.0_dp + a_interpo(12, iatom,i,3) = (1-dr)*(1-ds*ds)*(1-dt)/4.0_dp + a_interpo(18, iatom,i,3) = (1+dr)*(1-ds*ds)*(1+dt)/4.0_dp + a_interpo(20, iatom,i,3) = (1-dr)*(1-ds*ds)*(1+dt)/4.0_dp + + !node t is zero + a_interpo(13, iatom,i,3) = (1-dr)*(1-ds)*(1-dt*dt)/4.0_dp + a_interpo(14, iatom,i,3) = (1+dr)*(1-ds)*(1-dt*dt)/4.0_dp + a_interpo(15, iatom,i,3) = (1+dr)*(1+ds)*(1-dt*dt)/4.0_dp + a_interpo(16, iatom,i,3) = (1-dr)*(1+ds)*(1-dt*dt)/4.0_dp + + end do + end do + end do + end do + + return + end subroutine setup_interpolation + + subroutine dealloc_shape_arrays + !this subroutine deallocates shape arrays. Will be needed if element sizes change for some reason. + if(allocated(a_interpo)) then + deallocate( a_interpo, size_to_shape, shape_sizes, stat = allostat) + if (allostat > 0) call alloc_error("Failed deallocating shape arrays in dealloc_shape_arrays", allostat) + end if + + end subroutine + + subroutine interp_atom(iatom, esize, ele_type, pb_in, basisnum, r_nodes, r_iatom) + !This subroutine interpolates atom positions based on the iatom number + + !ARGUMENTS + integer, intent(in) :: iatom !Atom to interpolate + integer, intent(in) :: esize !Size of element, needed for getting the correct interpolation array + integer, intent(in) :: ele_type + integer, dimension(3, max_basisnum, ng_max_node), intent(in) :: pb_in !Periodic boundary info + integer, intent(in) :: basisnum !Number of basis atoms at nodes + real(kind=wp), dimension(3, max_basisnum, ng_max_node), intent(in) :: r_nodes !Contiguous array of nodal positions + real(kind=wp), dimension(3,max_basisnum), intent(out) :: r_iatom !Position of all interpolated atoms at lat point + + !INTERNAL VARIABLES + integer :: inod, ibasis, info(3), node_num + real(kind=wp) :: rout(3), shape_fun(ng_node(ele_type)) + logical :: ipb, ip_change(max_basisnum,ng_node(ele_type)) + + node_num = ng_node(ele_type) + !Initialization + r_iatom(:,:) = 0.0_wp + ipb =.false. + ip_change(:,:) = .false. + shape_fun = a_interpo(1:node_num, iatom, size_to_shape(esize), ele_type) + + do inod = 1, node_num + do ibasis = 1, basisnum + !Figure out periodic boundaries for the nodes + rout=r_nodes(:,ibasis, inod) + if(periodic) then + !Restore periodic boundaries if needed + call restore_pb(rout, pb_in(:,ibasis,inod), ipb) + ip_change(ibasis,inod) = ipb + end if + + r_iatom(:,ibasis) = r_iatom(:,ibasis) + rout*shape_fun(inod) + end do + end do + + !Now apply periodic boundaries if needed + + if(any(ip_change)) then + do ibasis = 1, basisnum + call cross_pb(r_iatom(:,ibasis), info) + if(.not. in_box_bd(r_iatom(:, ibasis))) then + call bounds_error("Atom incorrectly interpolated", r_iatom, box_bd) + end if + end do + end if + + return + end subroutine interp_atom + + + + subroutine update_virtual_atoms(update_all) + !This subroutine updates the virtual atoms from nodal positions + logical, intent(in) :: update_all + + integer :: ie, virt_count, iatom, ibasis, jatomap, iatomap, inod, ip, pb_in(3, max_basisnum, ng_max_node) + real(kind = wp) :: r_interp(3, max_basisnum), r_nodes(3, max_basisnum, ng_max_node) + + !Loop over all atoms + jatomap = 0 + do ie = 1, ele_num_l + select case(etype(ie)) + case(1,2,3) + virt_count = (size_ele(ie)+1)**3 + end select + + !Get the nodal values into a connected array + r_nodes(:,:,:) = 0.0_wp + pb_in(:,:,:) = 0 + do inod = 1, ng_node(etype(ie)) + ip = cg_node(inod, ie) + r_nodes(:,:,inod) = r(:,:,ip) + if(periodic) pb_in(:,:,inod) = pb_node(:,:,ip) + end do + + do iatom = 1, virt_count + + !interpolate the atoms + call interp_atom(iatom, size_ele(ie), etype(ie), pb_in, basis_num(ie), r_nodes, r_interp) + do ibasis = 1, basis_num(ie) + !Now get the real atom position + iatomap=cg_atomap(basis_num(ie)*(iatom-1) + ibasis, ie) + + if(iatomap /= 0) then + jatomap = jatomap+1 + !This just checks to make sure that the cg_atomap is sorted, ie the first virtual atom we have + ! is first in r_atomap and the second is second etc. + if(iatomap /= jatomap) then + print *, "Error: Iatomap", iatomap, " should equal", jatomap + call mpi_abort(mpi_comm_world, 1, ierr) + end if + + r_atomap(:,iatomap) = r_interp(:,ibasis) + end if + end do + end do + end do + return + end subroutine update_virtual_atoms + + pure function get_virtual_count(et, esize) + !Return the number of virtual lattice points + integer, intent(in) :: et, esize + integer :: get_virtual_count + + select case(et) + case(1,2,3) + get_virtual_count = (esize+1)**3 + end select + return + end function get_virtual_count + + subroutine x2frac + !This subroutine changes r arrays to be fractional coordinates + + integer :: i, j, ibasis + + do i = 1, atom_num_l + do j = 1,3 + r_atom(j, i) = (r_atom(j,i)-box_bd(2*j-1))/(box_length(j)) + end do + end do + + do i = 1, node_num_l + do ibasis= 1, basis_num(node_cg(i)) + do j = 1, 3 + r(j, ibasis, i) = (r(j,ibasis,i)-box_bd(2*j-1))/(box_length(j)) + end do + end do + end do + return + end subroutine x2frac + + subroutine frac2x + !This subroutine changes the fractional coordinates back to real coordinates + integer :: i, j, ibasis + + do i = 1, atom_num_l + do j = 1,3 + r_atom(j,i) = r_atom(j,i)*box_length(j) + box_bd(2*j-1) + end do + end do + + do i = 1, node_num_l + do ibasis= 1, basis_num(node_cg(i)) + do j = 1, 3 + r(j,ibasis,i) = r(j, ibasis, i)*box_length(j) + box_bd(2*j-1) + end do + end do + end do + return + end subroutine frac2x + +end module elements diff --git a/src/errors.f90 b/src/errors.f90 new file mode 100644 index 0000000..3125922 --- /dev/null +++ b/src/errors.f90 @@ -0,0 +1,73 @@ +module errors + !This subroutines is in charge of handling error strings and exiting the application + use mpi + use parameters + use logger + + implicit none + public + contains + + subroutine alloc_error(error_string, allostat) + integer, intent(in) :: allostat + character(len=*), intent(in) :: error_string + character(len=read_len) :: msg + + write(msg, *) "Allocate Error: ", trim(adjustl(error_string)), " with allostat ", allostat + call exit_error(msg) + return + end subroutine alloc_error + + subroutine bounds_error(error_string, r, bounds) + !Atom unexpectedly outside boundary, usually box boundary + real(kind=wp), intent(in) :: r(3), bounds(6) + character(len=*), intent(in) :: error_string + character(len=read_len) :: msg + + write(msg,*) "Bounds Error: ", trim(adjustl(error_string)), " for position ", r, " and bounds", bounds + call exit_error(msg) + return + end subroutine bounds_error + + subroutine read_error(error_string, stat) + !Error when parsing string + character(len = *), intent(in) :: error_string + integer, intent(in) :: stat + character(len=read_len) :: msg + + write(msg,*) "Read Error: ", trim(adjustl(error_string)), " with stat ", stat + call exit_error(msg) + return + end subroutine read_error + + subroutine misc_error(error_string) + !Misc_error + character(len=*), intent(in) :: error_string + character(len=read_len) :: msg + + write(msg,*) "Error: ", trim(adjustl(error_string)) + call exit_error(msg) + return + end subroutine misc_error + + subroutine command_error(error_string) + !Error with incorrect command + character(len = *) :: error_string + character(len=read_len) :: msg + + write(msg,*) "Command Error: ", trim(adjustl(error_string)) + call exit_error(msg) + return + end subroutine command_error + + subroutine exit_error(msg) + !Exit code with error + character(len=*), intent(in) :: msg + + call log_msg(msg, 0, .true.) + call close_log + call mpi_abort(mpi_comm_world, 1, ierr) + return + end subroutine exit_error + +end module errors diff --git a/src/fire.f90 b/src/fire.f90 new file mode 100644 index 0000000..a07e6a6 --- /dev/null +++ b/src/fire.f90 @@ -0,0 +1,284 @@ +module fire + + use parameters + use comms + use elements + use forces + use time + use atom_types + use neighbors + use potential + use debug + + implicit none + + real(kind=wp), private, save :: tmax, tmin, dtgrow, dtshrink, alpha0, alphashrink, alpha, last_negative, dmax, & + dtmax, dtmin + integer, private, save :: delaystep, vdfmax, vdotf_negatif, neval + logical, private, save :: initialdelay, flagv0 + + real(kind = wp), parameter :: & + f_inc_fire = 1.1_wp, & + f_dec_fire = 0.5_wp, & + f_alpha_fire = 0.99_wp, & + alpha_start_fire = 0.1_wp + + real(kind=wp), save :: alpha_fire + + + public + contains + + subroutine fire_defaults + !This sets the default parameters for fire. These are taken from the default parameters used in lammps + !Presented in https://doi.org/10.1016%2Fj.commatsci.2020.109584 + !Max timstep is tmax x delta t_start + tmax = 10.0_wp + !Minimum timestep is tmin x delta t_start + tmin = 0.02_wp + !Number of steps to wait after P<0 before increasing deltat + delaystep = 20 + !Factor by which del t increases/decrases + dtgrow = 1.1_wp + dtshrink = 0.5_wp + !Coefficient for mixing velocity and force vectors and the factor by which it decreases + alpha0 = 0.25_wp + alpha = alpha0 + alphashrink = 0.99_wp + !Exit after vdfmax consecutive iterations with P(t) < 0 + vdfmax = 2000 !Inertia correction halfstepback = .true. activates initial delay in modifying delta t and alpha + initialdelay = .true. + dmax = 0.1 + + end subroutine fire_defaults + + subroutine set_fire_param(param, val) + character(len=*), intent(in) :: param + real(kind=wp), intent(in) :: val + + select case(param) + case('alpha0') + alpha0=val + case('tmax') + tmax=val + case('tmin') + tmin=val + end select + + end subroutine set_fire_param + + subroutine fire_init + !Initialize fire variables + last_negative = 0 + vdotf_negatif = 0 + flagv0=.true. + neval = 0 + alpha = alpha0 + dtmax = tmax*time_step + dtmin = tmin*time_step + end subroutine fire_init + + subroutine fire_iterate(i, code) + integer, intent(in) :: i + integer, intent(out) :: code + + integer :: ie, ibasis, ip, ia + real(kind= wp) :: vdotfme, vdotfall, vdotvme, vdotvall, fdotfme, fdotfall, scale1, scale2, dtf, dtfm, & + dtvone, vmax, dtv + logical :: delayflag + + !First calculate vdotfall + vdotfme = 0.0_wp + code=0 + do ip = 1, node_num_l + ie = node_cg(ip) + if(who_has_ele(ie)) then + do ibasis = 1, basis_num(ie) + !We multiply by the energy of all virtual atoms for the energy, which is force_eq times the nodal mass + vdotfme = vdotfme + vel(1,ibasis,ip)*force_eq(1,ibasis,ip)+vel(2,ibasis,ip)*force_eq(2,ibasis,ip) & + + vel(3,ibasis,ip)*force_eq(3,ibasis,ip) + end do + end if + end do + do ia = 1, atom_num_l + vdotfme = vdotfme + force_atom(1,ia)*vel_atom(1,ia) + force_atom(2,ia)*vel_atom(2,ia) & + + force_atom(3,ia)*vel_atom(3,ia) + end do + call mpi_allreduce(vdotfme, vdotfall, 1, mpi_wp, mpi_sum, world, ierr) + + + !if (v dot f) > 0: + !v = (1-alpha) v + alpha |v| Fhat + !|v| = length of v, Fhat = unit f + !Only: (1-alpha) and alpha |v| Fhat is calculated here + !the modificatin of v is made within the integration, after v update + !if more than delaystep since v dot f was negative: + !increase timestep, update global timestep and decrease alpha + if (vdotfall > 0) then + vdotvme = 0.0_wp + fdotfme = 0.0_wp + vdotf_negatif = 0 + do ip = 1, node_num_l + ie = node_cg(ip) + if (who_has_ele(ie)) then + do ibasis = 1, basis_num(ie) + !We multiply by the force/velocity of all virtual atoms for the energy, which is force_eq times the nodal mass + vdotvme = vdotvme + vel(1,ibasis,ip)*vel(1,ibasis,ip)+vel(2,ibasis,ip)*vel(2,ibasis,ip) & + + vel(3,ibasis,ip)*vel(3,ibasis,ip) + + fdotfme = fdotfme + force_eq(1,ibasis,ip)*force_eq(1,ibasis,ip) & + + force_eq(2,ibasis,ip)*force_eq(2,ibasis,ip) & + + force_eq(3,ibasis,ip)*force_eq(3,ibasis,ip) + + end do + end if + end do + do ia = 1, atom_num_l + vdotvme = vdotvme + vel_atom(1,ia)*vel_atom(1,ia) + vel_atom(2,ia)*vel_atom(2,ia) & + + vel_atom(3,ia)*vel_atom(3,ia) + fdotfme = fdotfme + force_atom(1,ia)*force_atom(1,ia) + force_atom(2,ia)*force_atom(2,ia) & + + force_atom(3,ia)*force_atom(3,ia) + end do + call mpi_allreduce(vdotvme, vdotvall, 1, mpi_wp, mpi_sum, world, ierr) + call mpi_allreduce(fdotfme, fdotfall, 1, mpi_wp, mpi_sum, world, ierr) + + !Calculate scaling factors + scale1 = 1.0 - alpha + if(fdotfall <= 1d-20) then + scale2 = 0.0_wp + else + scale2 = alpha * sqrt(vdotvall/fdotfall) + end if + + if(i - last_negative > delaystep) then + time_step = min(time_step*dtgrow, dtmax) + alpha = alpha * alphashrink + end if + else + last_negative = i + delayflag = .true. + if ((i - 1 < delaystep).and.initialdelay) delayflag = .false. + if(delayflag) then + alpha = alpha0 + if(time_step*dtshrink >= dtmin) time_step = time_step * dtshrink + end if + + !Check stopping criterion + vdotf_negatif = vdotf_negatif + 1 + if ((vdfmax > 0).and.(vdotf_negatif > vdfmax)) then + code = 3 + return + end if + + !Apply intertia correcection + do ip = 1, node_num_l + do ibasis = 1, basis_num(ie) + r(:,ibasis, ip) = r(:, ibasis, ip) - 0.5_wp*time_step*vel(:,ibasis,ip) + end do + end do + do ia = 1, atom_num_l + r_atom(:,ia) = r_atom(:, ia) - 0.5_wp*time_step*vel_atom(:,ia) + end do + + !Zero velocities + if(node_num_l > 0) vel(:,:,:) = 0.0_wp + if(atom_num_l > 0 ) vel_atom(:,:) = 0.0_wp + flagv0 = .true. + end if + + !Evaluate velocity to determine whether dtv has to be limited. Required when v is reset + if(flagv0) then + dtf = time_step*ftm2v + call update_neighbor(iter) + call update_force + neval = neval + 1 + do ip = 1, node_num_l + ie = node_cg(ip) + do ibasis = 1, basis_num(ie) + dtfm = dtf/masses(basis_type(ibasis,ie)) + vel(:,ibasis, ip) = dtfm * force_eq(:, ibasis, ip) + end do + end do + do ia = 1, atom_num_l + dtfm = dtf/masses(type_atom(ia)) + vel_atom(:,ia) = dtfm*force_atom(:,ia) + end do + end if + + !Limit timestep so no particle moves further than dmax + dtvone = time_step + do ip = 1, node_num_l + if(who_has_ele(node_cg(ip))) then + vmax = maxval(vel(:,:,ip)) + if(dtvone*vmax > dmax) dtvone = dmax/vmax + end if + end do + do ia = 1, atom_num_l + vmax = maxval(vel_atom(:,ia)) + if(dtvone*vmax > dmax) dtvone = dmax/vmax + end do + call mpi_allreduce(dtvone, dtv, 1, mpi_wp, mpi_min, world, ierr) + + !Reset velocities if necessary + if(flagv0) then + if(node_num_l > 0) vel(:,:,:) = 0.0_wp + if(atom_num_l > 0) vel_atom(:,:) = 0.0_wp + end if + + !Now do the integration step, this is the lammps default semi-implicit Euler scheme + + dtf = dtv * ftm2v + do ip = 1, node_num_l + do ibasis = 1, basis_num(node_cg(ip)) + dtfm = dtf/masses(basis_type(ibasis,node_cg(ip))) + vel(:,ibasis, ip) = vel(:, ibasis, ip) + dtfm * force_eq(:, ibasis, ip) + if (vdotfall > 0.0_wp) vel(:,ibasis,ip) = scale1*vel(:,ibasis,ip) + scale2*force_eq(:,ibasis,ip) + r(:,ibasis,ip) = r(:, ibasis, ip) + dtv*vel(:, ibasis,ip) + end do + end do + + do ia = 1, atom_num_l + dtfm = dtf/masses(type_atom(ia)) + vel_atom(:,ia) = vel_atom(:,ia) + dtfm*force_atom(:,ia) + if(vdotfall > 0.0_wp) vel_atom(:,ia) = scale1*vel_atom(:,ia) + scale2*force_atom(:,ia) + r_atom(:,ia) = r_atom(:,ia) + dtv*vel_atom(:,ia) + end do + + call update_neighbor(iter) + call update_force + + neval = neval + 1 +! do ip = 1, node_num_l +! do ibasis = 1, basis_num(ie) +! dtfm = dtf/masses(basis_type(ibasis,ie)) +! vel(:,ibasis, ip) = vel(:, ibasis, ip) + dtfm * force_eq(:, ibasis, ip) +! end do +! end do +! do ia = 1, atom_num_l +! dtfm = dtf/masses(type_atom(ia)) +! vel_atom(:,ia) = vel_atom(:,ia) + dtfm*force_atom(:,ia) +! end do + + !Set the velocity evaluation flog + flagv0 = .false. + + end subroutine fire_iterate + + pure function get_fire_delaystep() + integer :: get_fire_delaystep + + get_fire_delaystep = delaystep + return + end function + + pure function get_fire_neval() + integer :: get_fire_neval + + get_fire_neval = neval + return + end function + + subroutine fire_clean + time_step = orig_time_step + end subroutine fire_clean +end module diff --git a/src/force_mod.f90 b/src/force_mod.f90 new file mode 100644 index 0000000..da8d069 --- /dev/null +++ b/src/force_mod.f90 @@ -0,0 +1,196 @@ +module force_mod + use forces + use parameters + use elements + use group + use str + use time + + implicit none + + integer, parameter, private :: max_force = 20 + integer :: set_force_num, add_force_num + + integer, private :: force_group(max_force), add_force_group(max_force), add_force_every(max_force) + logical, private :: force_mask(3, max_force), add_force_mask(3, max_force), add_force_norm(max_force), & + first_add(max_force) + real(kind=wp), private :: fvec(3, max_force), add_f(3, max_force) + + public + contains + + subroutine init_set_force + set_force_num = 0 + force_group = 0 + force_mask = .false. + first_add = .false. + fvec = 0.0_wp + end subroutine + + subroutine init_add_force + add_force_num = 0 + add_force_group = 0 + add_force_mask = .false. + add_force_every(:) = 0 + add_f = 0.0_wp + end subroutine init_add_force + + subroutine parse_set_force(line) + !This subroutine parses the set_force command + character(len = *), intent(in) :: line + character(len=read_len) :: tmptxt, g, txtvec(3) + integer :: iospara, i + + set_force_num = set_force_num + 1 + first_add(set_force_num) = .true. + read(line, *, iostat=iospara) tmptxt, g, (txtvec(i), i = 1, 3) + if(iospara > 0) call read_error("Failure to read set_force command", iospara) + + !Get group number + force_group(set_force_num) = get_group_index(g) + if(force_group(set_force_num) == 0) then + call misc_error("Group "//trim(adjustl(g))// " in set_force command has not been defined. "// & + "Please define group before use") + end if + + !Read force_vec and force_mask + do i =1, 3 + call to_lower(txtvec(i)) + if(.not.(txtvec(i) == 'null')) then + force_mask(i, set_force_num) = .true. + read(txtvec(i), *, iostat = iospara) fvec(i, set_force_num) + if(iospara > 0) call read_error("Failure to read force vector component "//trim(adjustl(txtvec(i))), iospara) + end if + end do + + end subroutine parse_set_force + + subroutine parse_add_force(line) + character(len=*), intent(in) :: line + character(len=read_len) :: tmptxt, g, txtvec(3), extra_commands(20), msg + integer :: iospara, i, j + + add_force_num = add_force_num + 1 + read(line, *, iostat=iospara) tmptxt, g, (txtvec(i), i = 1, 3) + if(iospara > 0) call read_error("Failure to read add_force command", iospara) + + !Get group number + add_force_group(add_force_num) = get_group_index(g) + if(add_force_group(add_force_num) == 0) then + call misc_error("Group "//trim(adjustl(g))// " in add_force command has not been defined. "// & + "Please define group before use") + end if + + !Read force_vec and force_mask + do i =1, 3 + call to_lower(txtvec(i)) + if(.not.(txtvec(i) == 'null')) then + add_force_mask(i, add_force_num) = .true. + read(txtvec(i), *, iostat = iospara) add_f(i, add_force_num) + if(iospara > 0) call read_error("Failure to read force vector component "//trim(adjustl(txtvec(i))), iospara) + end if + end do + + !Now get parse any extra optional commands + j = tok_count(line)-5 + if( j > 0) then + read(line, *, iostat=iospara, iomsg=msg) (tmptxt, i = 1, 5), (extra_commands(i), i = 1, j) + i = 1 + do while (i<=j) + select case(extra_commands(i)) + case('every') + i = i+1 + read(extra_commands(i), *, iostat=iospara, iomsg=msg) add_force_every(add_force_num) + if(iospara>0) call read_error(msg, iospara) + case('norm') + add_force_norm(add_force_num) = .true. + end select + i=i+1 + end do + end if + + end subroutine parse_add_force + + subroutine add_force + + integer :: i, ip, ia, ie, j + real(kind=wp) :: f(3) + logical :: do_now + + do i = 1, add_force_num + !Check to see if we need to add force now + do_now = .false. + if(first_add(i)) then + do_now = .true. + first_add(i) = .false. + else if(add_force_every(i) == 0) then + do_now = .false. + else if (mod(iter, add_force_every(i)) == 0) then + do_now = .true. + end if + + if (do_now) then + f = add_f(:,i) + !Check to see if the force is normalized, if so make sure the group contains only atoms + if(add_force_norm(i)) then + if(group_counts(2,add_force_group(i)) > 0) then + call command_error("Currently cannot normalize force when addforce is used on a group containing elements") + end if + if(group_counts(1, add_force_group(i)) >0) then + f(:) = f(:)/group_counts(1,add_force_group(i)) + else + f=0.0_wp + end if + end if + do ia = 1, atom_num_l + if(btest(a_mask(ia), add_force_group(i))) then + do j = 1, 3 + if (add_force_mask(j, i)) then + force_atom(j,ia) = force_atom(j,ia) + f(j) + end if + end do + end if + end do + do ip = 1, node_num_l + ie = node_cg(ip) + if(btest(e_mask(ie),add_force_group(i))) then + do j = 1, 3 + if (add_force_mask(j,i)) then + force_eq(j,:, ip) = force_eq(j,:,ip) + f(j) + end if + end do + end if + end do + end if + end do + + end subroutine + + subroutine run_set_force + integer :: i, j, ia, ip, ie + + do i = 1, set_force_num + do ia = 1, atom_num_l + if(btest(a_mask(ia), force_group(i))) then + do j = 1, 3 + if (force_mask(j, i)) then + force_atom(j,ia) = fvec(j,i) + end if + end do + end if + end do + do ip = 1, node_num_l + ie = node_cg(ip) + if(btest(e_mask(ie),force_group(i))) then + do j = 1, 3 + if (force_mask(j,i)) then + force_eq(j,:, ip) = fvec(j,i) + end if + end do + end if + end do + end do + + end subroutine run_set_force + +end module force_mod diff --git a/src/forces.f90 b/src/forces.f90 new file mode 100644 index 0000000..19c674e --- /dev/null +++ b/src/forces.f90 @@ -0,0 +1,330 @@ +module forces + !This module is in charge of calculating energy, force, and virial + + use parameters + use elements + use integration + + implicit none + + real(kind = wp) :: energy_sum, energy_atom_sum, energy_tally, f_norm_tally, energy_equiv_tally + !Atom arrays + real(kind = wp), allocatable :: energy_atom(:), force_atom(:,:), force_atom_pre(:,:), virial_atom(:,:,:) + + !Element arrays + real(kind=wp), allocatable :: energy(:,:), energy_eq(:,:), & + force(:,:,:), force_eq(:,:,:), force_eq_pre(:,:,:), & + virial(:,:,:,:), virial_eq(:,:,:,:) + logical :: need_force_pre + + public + contains + + subroutine alloc_force_arrays + if(atom_num > 0) then + if( allocated(force_atom)) then + deallocate(energy_atom, force_atom, virial_atom, stat=allostat) + if (allostat > 0) call alloc_error("Failure to deallocate force_atom arrays", allostat) + end if + allocate(energy_atom(atom_num_l), force_atom(3, atom_num_l), & + virial_atom(3,3,atom_num_l), stat=allostat) + if (allostat > 0) call alloc_error("Failure to allocate force_atom arrays", allostat) + end if + if(ele_num> 0) then + if(allocated(force_eq)) then + deallocate(force_eq, energy_eq, virial_eq, stat=allostat) + if (allostat > 0) call alloc_error("Failure deallocating eq arrays", allostat) + end if + allocate(force_eq(3, max_basisnum, node_num_l), energy_eq(max_basisnum, node_num_l), & + virial_eq(3, 3, max_basisnum, node_num_l), stat = allostat) + if (allostat > 0) call alloc_error("Failure allocating init_force_arrays", allostat) + end if + end subroutine alloc_force_arrays + + subroutine alloc_pre_array + if(atom_num > 0) then + allocate(force_atom_pre(3, atom_num_lr)) + end if + if(ele_num>0) then + allocate(force_eq_pre(3, max_basisnum, node_num_lr)) + end if + end subroutine alloc_pre_array + + subroutine resize_force_arrays + !Resize the force arrays to match the node number + real(kind=wp), allocatable :: force_pre_array(:,:,:), force_atom_array(:,:) + + call alloc_force_arrays + !Force_eq_pre is the only array that we have to preserve data for because the other ones update prior to use + if(need_force_pre) then + if(ele_num > 0) then + allocate(force_pre_array(3,max_basisnum, node_num_l), stat = allostat) + if(allostat > 0) call alloc_error("Failure to allocate force_pre_array", allostat) + force_pre_array = force_eq_pre(:, :, 1:node_num_l) + call move_alloc(force_pre_array, force_eq_pre) + end if + + if(atom_num > 0) then + allocate(force_atom_array(3,atom_num_l), stat = allostat) + if(allostat > 0) call alloc_error("Failure to allocate force_atom_array", allostat) + force_atom_array = force_atom_pre(:,1:atom_num_l) + call move_alloc(force_atom_array, force_atom_pre) + end if + end if + + end subroutine resize_force_arrays + + subroutine update_equiv + !This subroutine calculates the normalized nodal quantities (referred to here as the equivalent) + integer :: ie, ip, i, j, je, inod, jnod, ibasis, ind + real(kind = wp), allocatable :: force_array(:), force_buff(:), virial_array(:), virial_buff(:), & + energy_array(:), energy_buff(:) + + + + + !If we have more than one processor than we need to share all this information for all elements that share data + if(pro_num > 1) then + allocate(force_array(3*ng_max_node*ele_shared_num*max_basisnum), & + energy_array(ng_max_node*ele_shared_num*max_basisnum), stat = allostat) + if(allostat /= 0) call alloc_error("Failure to allocate force/energy_array in update_equiv", allostat) + + force_array(:) = 0.0_wp + energy_array(:) = 0.0_wp + + if(need_virial) then + allocate(virial_array(9*ng_max_node*ele_shared_num*max_basisnum), stat = allostat) + if(allostat /= 0) call alloc_error("Failure to allocate virial_array in update_equiv", allostat) + virial_array(:) = 0.0_wp + end if + + !Assign the buffers for reducing to calculate the equivalent forces. + !The buffer will have empty space so the communications aren't completely + !efficient memory wise but it shouldn't cause very bad slowdowns. + do ie = 1, ele_num_l + je = ele_id_shared(ie) + + if(je /= 0) then + do inod = 1, ng_node(etype(ie)) + ip = cg_node(inod, ie) + do ibasis = 1, basis_num(ie) + + !Calculate the current index and put the force, virial, and energy into the send arrays + ind = 3*ng_max_node*max_basisnum*(je-1) + 3*max_basisnum*(inod-1) + 3*(ibasis-1) + do i = 1, 3 + if(ind + i > 3*ng_max_node*ele_shared_num*max_basisnum) then + print *, 'Error: Index of force_array', & + ind+i, ' is larger than', & + ' array size', 3*ng_max_node*ele_shared_num*max_basisnum + call mpi_abort(mpi_comm_world, 1, ierr) + end if + + force_array(ind+i) = force(i, ibasis, ip) + + if(need_virial) then + do j = 1, 3 + if(3*ind + 3*(i-1)+j > 9*ng_max_node*ele_shared_num*max_basisnum) then + print *, 'Error: Index of virial_array', & + 3*ind + 3*(i-1)+j, ' is larger than', & + ' array size', 9*ng_max_node*ele_shared_num + call mpi_abort(mpi_comm_world, 1, ierr) + end if + virial_array(3*ind+3*(i-1)+j) = virial(j, i, ibasis, ip) + end do + end if + end do + + if(ng_max_node*max_basisnum*(je-1)+max_basisnum*(inod-1)+ibasis & + > ng_max_node*ele_shared_num*max_basisnum) then + print *, 'Error: Index of energy_array', & + ng_max_node*(je-1)+max_basisnum*(inod-1)+ibasis, ' is larger than', & + ' array size', ng_max_node*ele_shared_num*max_basisnum + call mpi_abort(mpi_comm_world, 1, ierr) + + end if + energy_array(ng_max_node*max_basisnum*(je-1)+max_basisnum*(inod-1)+ibasis) = energy(ibasis, ip) + end do + end do + end if + end do + + !allocate force_buff, virial_buff, and energy_buff + allocate(force_buff(3*ng_max_node*ele_shared_num*max_basisnum), & + energy_buff(ng_max_node*ele_shared_num*max_basisnum), stat = allostat) + if(allostat /= 0) call alloc_error("Failure to allocate force/energy_buff in update_equiv", allostat) + + !Reduce all force_buff, energy_buff, and virial_buff + force_buff(:) = 0.0_wp + call mpi_allreduce(force_array, force_buff, 3*ng_max_node*ele_shared_num*max_basisnum, & + mpi_wp, mpi_sum, world, ierr) + + energy_buff(:) = 0.0_wp + call mpi_allreduce(energy_array, energy_buff, ng_max_node*ele_shared_num*max_basisnum, & + mpi_wp, mpi_sum, world, ierr) + + if(need_virial) then + allocate(virial_buff(9*ng_max_node*ele_shared_num*max_basisnum), stat = allostat) + if(allostat /= 0) call alloc_error("Failure to allocate virial_buff in update_equiv", allostat) + + virial_buff(:) = 0.0_wp + call mpi_allreduce(virial_array, virial_buff, 9*ng_max_node*ele_shared_num*max_basisnum, & + mpi_wp, mpi_sum, world, ierr) + end if + + !update local energy_eq, force_eq, virial_eq from the buff received from all processors + do ie = 1, ele_num_l + je = ele_id_shared(ie) + if(je /= 0) then + do inod = 1, ng_node(etype(ie)) + + ip = cg_node(inod, ie) + + do ibasis = 1, basis_num(ie) + + ind = 3*ng_max_node * max_basisnum * (je-1) + 3*(inod-1)*max_basisnum + 3*(ibasis-1) + + do i = 1, 3 + force(i, ibasis, ip) = force_buff(ind+i) + + if(need_virial) then + do j = 1, 3 + virial(j, i, ibasis, ip) = virial_buff(3*ind + 3*(i-1)+j) + end do + end if + end do + energy(ibasis, ip) = energy_buff(ng_max_node*max_basisnum*(je-1)+max_basisnum*(inod-1) + ibasis) + end do + end do + end if + end do + end if + + !Reset values for eq arrays + force_eq(:, :, :) = 0.0_wp + if(need_virial) then + virial_eq(:, :, :, :) = 0.0_wp + end if + energy_eq(:, :) = 0.0_wp + + do ie = 1, ele_num_l + + do inod = 1, ng_node(etype(ie)) + + ip = cg_node(inod, ie) + + select case(mass_mat_ty) + case('lumped') + !Calculate equivalent values by dividing by the lumped mass matrix + do ibasis = 1, basis_num(ie) + force_eq(:,ibasis, ip) = force(:,ibasis, ip)/mass_mat_coeff(size_ele(ie), etype(ie)) + if(need_virial) then + virial_eq(:, :, ibasis, ip) = virial(:, :, ibasis, ip) / mass_mat_coeff(size_ele(ie), etype(ie)) + end if + energy_eq(ibasis, ip) = energy(ibasis, ip)/mass_mat_coeff(size_ele(ie), etype(ie)) + end do + + case('consistent') + !Use a consistent mass matrix + do jnod = 1, ng_node(etype(ie)) + do ibasis = 1, basis_num(ie) + force_eq(:, ibasis, ip) = force(:, ibasis, ip) * mass_mat_inv(inod, jnod) & + / mass_mat_coeff(size_ele(ie),etype(ie)) + if(need_virial) then + virial_eq(:, :, ibasis, ip) = virial(:, :, ibasis, ip) * mass_mat_inv(inod, jnod) & + / mass_mat_coeff(size_ele(ie),etype(ie)) + end if + energy_eq(ibasis, ip) = energy(ibasis, ip) * mass_mat_inv(inod, jnod) & + / mass_mat_coeff(size_ele(ie), etype(ie)) + end do + end do + + case default + print *, 'Error: Mass matrix type ', mass_mat_ty, ' is not accepted' + call mpi_abort(mpi_comm_world, 1, ierr) + end select + end do + end do + !Deallocate arrays we no longer need + deallocate(force, energy, stat = deallostat) + if(need_virial) then + deallocate(virial, stat = deallostat) + end if + if(deallostat /= 0) call alloc_error("Failure to deallocate force/energy/virial", deallostat) + + return + end subroutine update_equiv + + subroutine communicate_force_eq + !This subroutine communicates forces for all shared elements when needed (such as when using langevin dynamics + integer :: ie, ip, i, je, inod, ibasis, ind + real(kind = wp), allocatable :: force_array(:), force_buff(:) + + !If we have more than one processor than we need to share all this information for all elements that share data + if(pro_num > 1) then + allocate(force_array(3*ng_max_node*ele_shared_num*max_basisnum), stat = allostat) + if(allostat /= 0) call alloc_error("Failure to allocate force in communicate_force_eq", allostat) + + force_array(:) = 0.0_wp + + !Assign the buffers for reducing to calculate the equivalent forces. + !The buffer will have empty space so the communications aren't completely + !efficient memory wise but it shouldn't cause very bad slowdowns. + do ie = 1, ele_num_l + je = ele_id_shared(ie) + + if(je /= 0) then + do inod = 1, ng_node(etype(ie)) + ip = cg_node(inod, ie) + do ibasis = 1, basis_num(ie) + !Calculate the current index and put the force, virial, and energy into the send arrays + ind = 3*ng_max_node*max_basisnum*(je-1) + 3*max_basisnum*(inod-1) + 3*(ibasis-1) + do i = 1, 3 + if(ind + i > 3*ng_max_node*ele_shared_num*max_basisnum) then + print *, 'Error: Index of force_array', & + ind+i, ' is larger than', & + ' array size', 3*ng_max_node*ele_shared_num*max_basisnum + call mpi_abort(mpi_comm_world, 1, ierr) + end if + force_array(ind+i) = force_eq(i, ibasis, ip) + end do + end do + end do + end if + end do + + !allocate force_buff, virial_buff, and energy_buff + allocate(force_buff(3*ng_max_node*ele_shared_num*max_basisnum), stat = allostat) + if(allostat /= 0) call alloc_error("Failure to allocate force_buff in update_equiv", allostat) + + !Reduce all force_buff, energy_buff, and virial_buff + force_buff(:) = 0.0_wp + call mpi_allreduce(force_array, force_buff, 3*ng_max_node*ele_shared_num*max_basisnum, & + mpi_wp, mpi_sum, world, ierr) + + !update local energy_eq, force_eq, virial_eq from the buff received from all processors + do ie = 1, ele_num_l + je = ele_id_shared(ie) + if(je /= 0) then + do inod = 1, ng_node(etype(ie)) + ip = cg_node(inod, ie) + do ibasis = 1, basis_num(ie) + ind = 3*ng_max_node * max_basisnum * (je-1) + 3*(inod-1)*max_basisnum + 3*(ibasis-1) + do i = 1, 3 + force_eq(i, ibasis, ip) = force_buff(ind+i) + end do + end do + end do + end if + end do + end if + return + end subroutine communicate_force_eq + + subroutine reset_tallies + !Reset energy and force tallies + energy_tally = 0 + energy_equiv_tally = 0 + f_norm_tally = 0 + end subroutine + +end module forces diff --git a/src/group.f90 b/src/group.f90 new file mode 100644 index 0000000..db34b39 --- /dev/null +++ b/src/group.f90 @@ -0,0 +1,402 @@ +module group + + use parameters + use elements + use box + use str + use comms + + implicit none + + integer, parameter :: max_group_num = 20 + integer, save :: group_num, group_counts(4,max_group_num) + integer, dimension(10, max_group_num), save :: group_parameters_int + real(kind=wp), dimension(10, max_group_num) :: group_parameters_real + character(len=50), dimension(10, max_group_num) :: group_parameters_string + character(len=read_len), dimension(max_group_num) :: group_name, group_string, group_shape, which_in_group + + + public + contains + + + subroutine group_defaults + group_parameters_int = 0 + group_parameters_real=0.0_wp + group_counts = 0 + group_name(1) = 'all' + group_num = 1 + end subroutine group_defaults + + subroutine init_group_all + !This initializes the all group + integer :: i, group_countme(4) + group_countme = 0 + + if(ele_num > 0) e_mask=0 + if(atom_num > 0) a_mask=0 + group_countme(1) = atom_num_l + group_countme(2) = ele_num_l + do i = 1, atom_num_l + a_mask(i)= ibset(a_mask(i), 1) + end do + do i = 1, ele_num_l + e_mask(i)= ibset(e_mask(i), 1) + group_countme(3) = group_countme(3) + ng_node(etype(i))*basis_num(i) + group_countme(4) = group_countme(4) + basis_num(i)*(size_ele(i)+1)**3 + end do + call mpi_allreduce(group_countme, group_counts(:,1), 4, mpi_integer, mpi_sum, world, ierr) + end subroutine init_group_all + + function get_group_index(gname) + !This subroutine returns the index of a group + character(len=*), intent(in) :: gname + integer :: get_group_index + integer :: i + + get_group_index = 0 + do i = 1, group_num + if (trim(group_name(i)) == trim(gname)) then + get_group_index=i + exit + end if + end do + return + end function get_group_index + + subroutine parse_group(line) + !This subroutine parses the group command + character(len=*), intent(in) :: line + integer :: iospara, i, j, shape_args + character(len=read_len) :: err, tmptxt + !First figure out what group_shape + group_num = group_num + 1 + + j = tok_count(line) + + read(line, *, iomsg=err, iostat=iospara) tmptxt, group_name(group_num), which_in_group(group_num), group_shape(group_num),& + (group_parameters_string(i,group_num), i = 1, j) + if(iospara > 0) call read_error(err, iospara) + + !Check to make sure which_in_group is correct + select case(which_in_group(group_num)) + case('elements', 'atoms', 'all') + continue + case default + call command_error("Group command doesn't accept selection based on "//trim(adjustl(which_in_group(group_num))) & + //" should be elements, atoms, or all") + end select + + !Now make sure that the group_shape is one of the acceptable options and check the inputted parameters + select case(group_shape(group_num)) + case('block', 'sphere', 'type', 'file') + continue + case default + call command_error("Group command doesn't accept shape "//trim(adjustl(group_shape(group_num))) & + //" should be block, sphere, or type") + end select + + + !Now parse the group parameters + select case(group_shape(group_num)) + case('block') + do i =1, 6 + call parse_pos(int((i+1)/2), group_parameters_string(i, group_num), group_parameters_real(i, group_num)) + end do + shape_args = 6 + + case('sphere') + do i =1, 3 + call parse_pos(i, group_parameters_string(i, group_num), group_parameters_real(i, group_num)) + end do + !Now get the radius + read(group_parameters_string(4, group_num), *, iostat = iospara, iomsg = err) group_parameters_real(i, group_num) + if(iospara > 0) call read_error(err, iospara) + group_parameters_real(4, group_num) = group_parameters_real(4, group_num) * group_parameters_real(4, group_num) + shape_args=4 + + case('type') + read(group_parameters_string(1,group_num), *, iostat = iospara, iomsg = err) group_parameters_int(1, group_num) + if(iospara > 0) call read_error(err, iospara) + shape_args = 1 + + case('file') + continue + end select + + call assign_group(group_num) + end subroutine parse_group + + subroutine assign_group(i) + !This subroutine finds all elements and/or atoms within the group boundaries + !specified by the user. + integer, intent(in) :: i + integer :: inod, ibasis, ia, ie, ip, group_countme(4), j, reada, reade, intag + integer, allocatable :: amap(:), emap(:) + real(kind=dp) :: rc(3) + character(len = read_len) :: msg + + group_countme = 0 + select case(trim(adjustl(group_shape(i)))) + case('block') + write(msg, *) "Group ", trim(group_name(i)), " has block shape with boundaries: ", group_parameters_real(1:6, i) + case('sphere') + write(msg, *) "Group ", trim(group_name(i)), " has sphere shape with centroid ", group_parameters_real(1:3, i), & + " and radius ", group_parameters_real(4, i) + case('type') + write(msg, *) "Group ", trim(group_name(i)), " selects all atoms of type ", group_parameters_int(1,i) + + case('file') + write(msg, *) "Group ", trim(group_name(i)), " is read in from file ", trim(adjustl(group_parameters_string(1,i))) + end select + call log_msg(msg) + + + !Assign atom groups + if(trim(adjustl(group_shape(i))) == "file") then + !First make a map for the tags to position in the array + + open(unit=11, file=trim(adjustl(group_parameters_string(1,i))), status='old', action='read', position='rewind') + !First read in number of atoms/elements to read + read(11,*) reada, reade + + !Now first read the atoms + if(reada > 0) then + !If we are actually interested + if( (trim(adjustl(which_in_group(i))) == 'atoms').or. (trim(adjustl(which_in_group(i))) == 'all'))then + allocate(amap(maxval(tag_atom))) + amap=0 + !First create the tag to index map + do j = 1, atom_num_l + amap(tag_atom(j)) = j + end do + + !Now read in the data and set the group mask + do j = 1, reada + read(11,*) intag + if(intag <= size(amap)) then + if(amap(intag) > 0) then + a_mask(amap(intag)) = ibset(a_mask(amap(intag)),i) + group_countme(1) = group_countme(1) + 1 + end if + end if + end do + else + !Otherwise just read until you are done with the atom reading + do j = 1, reada + read(11,*) intag + end do + end if + end if + + if(reade > 0) then + if( (trim(adjustl(which_in_group(i))) == 'elements').or. (trim(adjustl(which_in_group(i))) == 'all'))then + !First create the tag to index map + allocate(emap(maxval(tag_ele))) + emap =0 + do j = 1, ele_num_l + emap(tag_ele(j)) = j + end do + + !Now read in the data and set the group mask + do j=1, reade + read(11,*) intag + if(intag <= size(emap)) then + if(emap(intag) > 0) then + e_mask(emap(intag)) = ibset(e_mask(emap(intag)), i) + ie=emap(intag) + if(who_has_ele(ie)) then + group_countme(2) = group_countme(2) + 1 + group_countme(3) = group_countme(3) + ng_node(etype(ie))*basis_num(ie) + group_countme(4) = group_countme(4) + basis_num(ie) * (size_ele(ie)+1)**3 + end if + end if + end if + end do + end if + end if + close(11) + else + if((trim(adjustl(which_in_group(i))) == 'atoms').or. (trim(adjustl(which_in_group(i))) == 'all')) then + do ia = 1, atom_num_l + if( trim(adjustl(group_shape(i))) == 'type') then + if(type_atom(ia) == group_parameters_int(1,i)) then + a_mask(ia) = ibset(a_mask(ia), i) + group_countme(1) = group_countme(1) + 1 + end if + + else if (in_group(i, r_atom(:,ia))) then + a_mask(ia) = ibset(a_mask(ia), i) + group_countme(1) = group_countme(1) + 1 + end if + end do + end if + + !Assign element groups + if((trim(adjustl(which_in_group(i))) == 'elements').or. (trim(adjustl(which_in_group(i))) == 'all')) then + j=0 + do ie = 1, ele_num_l + if( trim(adjustl(group_shape(i))) == 'type') then + if(any(basis_type(:,ie) == group_parameters_int(1,i))) then + e_mask(ie) = ibset(e_mask(ie), i) + if(who_has_ele(ie)) then + group_countme(2) = group_countme(2) + 1 + group_countme(3) = group_countme(3) + ng_node(etype(ie))*basis_num(ie) + group_countme(4) = group_countme(4) + basis_num(ie)*(size_ele(ie)+1)**3 + end if + end if + + else + !Otherwise get the element centroid + rc = 0.0_wp + do inod = 1, ng_node(etype(ie)) + ip = cg_node(inod, ie) + do ibasis = 1, basis_num(ie) + rc = rc + r(:,ibasis, ip) + end do + end do + rc = rc/(ng_node(etype(ie))*basis_num(ie)) + + !If centroid is in group then we set the mask + if (in_group(i, rc)) then + e_mask(ie) = ibset(e_mask(ie), i) + if(who_has_ele(ie)) then + group_countme(2) = group_countme(2) + 1 + group_countme(3) = group_countme(3) + ng_node(etype(ie))*basis_num(ie) + group_countme(4) = group_countme(4) + basis_num(ie)*(size_ele(ie)+1)**3 + end if + end if + end if + end do + end if + end if + + call mpi_allreduce(group_countme, group_counts(:,i), 4, mpi_integer, mpi_sum, world, ierr) + write(msg,*) "Group ", trim(group_name(i)), " with num ", i, " has ", group_counts(1,i), " atoms and ", & + group_counts(2,i), " elements" + call log_msg(msg) + + + end subroutine assign_group + + pure function in_group(g, r) + !Return true if in group + integer, intent(in) :: g + real(kind=wp), intent(in) :: r(3) + + real(kind=wp) :: rsq, rdiff(3) + logical :: in_group + + in_group = .false. + + select case(trim(adjustl(group_shape(g)))) + case('block') + in_group = in_block_bd(r(:), group_parameters_real(1:6, g)) + + case('sphere') + rdiff = r-group_parameters_real(1:3, g) + rsq = rdiff(1)*rdiff(1) + rdiff(2)*rdiff(2) + rdiff(3)*rdiff(3) + if (rsq < group_parameters_real(4, g)) in_group = .true. + end select + + end function in_group + + subroutine write_group(line) + !This subroutine writes out the group information to a file named after the group + character(len=*),intent(in) :: line + + character(len=read_len) :: group_name, txtholder + integer :: g, at, el, a, e, i, counts_atom(pro_num), displs_atom(pro_num), counts_ele(pro_num), displs_ele(pro_num) + integer, allocatable :: gatoms(:), gelements(:), gatoms_gather(:), gelements_gather(:) + + read(line, *) txtholder, group_name + + g=get_group_index(group_name) + + !Now allocate the variables which will contain the tags + at=group_counts(1,g) + el=group_counts(2,g) + allocate(gatoms(at), gelements(el)) + + !Allocate gather variables on root + if(rank==root) then + allocate(gatoms_gather(at), gelements_gather(el)) + else + allocate(gatoms_gather(1), gelements_gather(1)) + end if + + !Now add the atoms and element tags within the group to the list + if(atom_num > 0) then + a=0 + do i = 1, atom_num_l + if (btest(a_mask(i), g)) then + a=a+1 + gatoms(a) = tag_atom(i) + end if + end do + end if + if(ele_num > 0) then + e=0 + do i = 1, ele_num_l + if(who_has_ele(i)) then + if(btest(e_mask(i), g)) then + e=e+1 + gelements(e) = tag_ele(i) + end if + end if + end do + end if + + !First gather counts and displacements for atoms + if(atom_num > 0 ) then + displs_atom=0 + counts_atom=0 + call mpi_gather(a, 1, mpi_integer, counts_atom, 1, mpi_integer, root, world, ierr) + if(rank==root) then + displs_atom=0 + do i =2, pro_num + displs_atom(i)=displs_atom(i-1)+counts_atom(i-1) + end do + if(at /= sum(counts_atom)) call misc_error("Sum of counts doesn't match atoms in group in write_group") + end if + end if + + !Now on root gather counts and displacements for elements + if(ele_num > 0) then + counts_ele=0 + displs_ele=0 + call mpi_gather(e, 1, mpi_integer, counts_ele, 1, mpi_integer, root, world, ierr) + if(rank==root) then + displs_ele=0 + do i =2, pro_num + displs_ele(i)=displs_ele(i-1)+counts_ele(i-1) + end do + if(el /= sum(counts_ele)) call misc_error("Sum of counts doesn't match eles in group in write_group") + end if + end if + + !Now gatherv the tags to root + call mpi_gatherv(gatoms(1:a), a, mpi_integer, gatoms_gather, counts_atom, displs_atom, mpi_integer, root, world, ierr) + call mpi_gatherv(gelements(1:e), e, mpi_integer, gelements_gather, & + counts_ele, displs_ele, mpi_integer, root, world, ierr) + + !Now write out the data if you are root + if(rank==root) then + open(unit=11, file=trim(adjustl(group_name))//"_tags.dat", action="write", status='replace', position='rewind') + write(11,*) at, el + + if(atom_num > 0) then + do i=1, at + write(11, *) gatoms_gather(i) + end do + end if + if(ele_num > 0) then + do i=1, el + write(11, *) gelements_gather(i) + end do + end if + close(11) + end if + end subroutine write_group + +end module group diff --git a/src/input_parser.f90 b/src/input_parser.f90 new file mode 100644 index 0000000..523fefa --- /dev/null +++ b/src/input_parser.f90 @@ -0,0 +1,280 @@ +module input_parser + !This code is in charge of reading the code and running functions that mattter + + use mpi + use comms + use parameters + use potential + use eam + use read_data + use neighbors + use logger + use dump + use group + use minimize + use force_mod + use displace + use set + use deform + use modify + use langevin + + implicit none + + logical, private :: initialized + + public + contains + + subroutine read_input + !Actually read the input, every processor should have access to the input arguments + !so we should be able to loop over these arguments + + character(len=read_len) :: line, label, req_commands(2), msg, loop_commands(100) + integer :: iosline, iospara, i, loop_times, nloopcom, nloop, j + logical :: req_flags(2), first_flag(2), read_loop_flag, run_loop_flag + + initialized = .false. + req_flags(:) = .false. + read_loop_flag = .false. + run_loop_flag = .false. + nloopcom = 0 + first_flag = .true. + req_commands(1) = 'read' + req_commands(2) = 'potential' + iosline = 0 + do while (iosline == 0) + if(run_loop_flag) then + if(j > nloopcom) then + j = 1 + nloop = nloop + 1 + if(nloop > loop_times) then + run_loop_flag = .false. + nloopcom = 0 + cycle + end if + end if + + line = loop_commands(j) + j = j+1 + else + if(rank == root) then + line='' + read(*, '(a)', iostat = iosline) line + if(iosline > 0) then + print *, 'Error: Wrong reading input file line ', line, & + ' because of', iosline + call mpi_abort(mpi_comm_world, 1, ierr) + end if + + !scan(line, '#' /= 1) is used to skip all lines starting with # + if((scan(line, '#') /= 1).and.(line /= '')) then + + !This communication needs to be done over mpi_comm_world to make sure every processor + !is running the commands + call mpi_bcast(line, read_len, mpi_character, root, mpi_comm_world, ierr) + else + cycle + end if + else + call mpi_bcast(line, read_len, mpi_character, root, mpi_comm_world, ierr) + !Check for the exit input loop code + if (line == "exit input loop") exit + end if + end if + + !Get the command + read(line, *, iostat = iospara) label + if(iospara > 0) then + print *, 'Error: Wrong reading input file label ', label, & + ' because of', iospara + call mpi_abort(mpi_comm_world, 1, ierr) + end if + + !Log the command + call log_msg(line,0) + + !run the command + if(read_loop_flag) then + if(label == 'endloop') then + read_loop_flag = .false. + run_loop_flag= .true. + nloop = 1 + j = 1 + else + nloopcom = nloopcom+1 + loop_commands(nloopcom) = trim(adjustl(line)) + end if + else + select case(label) + case('loop') + !Get the number of times to loop + read(line, *) label, loop_times + read_loop_flag = .true. + nloopcom = 0 + + case('read_data') + call parse_read(line) + initialized=.false. + need_vel=.false. + need_force_pre=.false. + req_flags(1) = .true. + case('potential') + call parse_potential(line) + req_flags(2) = .true. + case('neighbor') + call parse_neighbors(line) + case('write_out') + call parse_write_out(line) + case('dump') + call parse_dump(line) + case('undump') + call parse_undump(line) + case('timestep') + call parse_timestep(line) + case('modify') + call parse_modify(line) + case('group') + call parse_group(line) + case('mass') + call parse_mass(line) + case('debug') + call parse_debug(line) + case('langevin') + call parse_langevin(line) + case('setforce') + if(first_flag(1)) then + call init_set_force + first_flag(1) = .false. + end if + call parse_set_force(line) + case('addforce') + if(first_flag(2)) then + call init_add_force + first_flag(2) = .false. + end if + call parse_add_force(line) + case('displace') + call displace_points(line) + case('ramp') + call ramp_displace(line) + case('boundary') + call parse_boundary(line) + + !Now regenerate the grid comm + call mpi_cart_create(world, 3, num_pro, period, .false., grid_comm, ierr) + call mpi_comm_rank(grid_comm, grank, ierr) + call mpi_cart_coords(grid_comm, grank, 3, grid_coords, ierr) + + case('thermo') + call parse_thermo(line) + case('types') + call parse_types(line) + case('min_style') + call parse_min_style(line) + case('minimize') + select case(min_style) + case(1) + !If we are using fire than we need all of the info + call pre_calc(2) + case(2) + !If we are using cg than we only need the virial + call pre_calc(3) + end select + call parse_minimize(line) + case('run') + do i=1, 2 + if(.not.req_flags(i)) then + print *, "Error: must call ", req_commands(i), " before run command" + call mpi_abort(mpi_comm_world, 1, ierr) + end if + end do + !Prepare for calculation + call pre_calc(1) + call parse_run(line) + case('dynamics') + call parse_dynamics(line) + case('set') + call parse_set(line) + case('temp') + call parse_temp(line) + case('press') + call parse_berendsen(line) + case('unpress') + pflag=.false. + case('deform') + call parse_deform(line) + case('thermo_style') + call parse_thermo_style(line) + case('write_group') + call write_group(line) + case default + write(msg, *) 'Input parameter label ', trim(adjustl(label)), & + ' in line ', trim(adjustl(line)), ' is not accepted' + call misc_error(msg) + end select + end if + end do + + + !Because root will be the only processor that has access to stdin we have to broadcast a code to all the + !other processors for them to exit the input loop + if(rank == root) then + line = "exit input loop" + call mpi_bcast(line, read_len, mpi_character, root, mpi_comm_world, ierr) + end if + + return + end subroutine read_input + + subroutine pre_calc(runtype) + !This is the code which distributes the model and builds neighbor lists before calculation + + !Runtype dictates what the command is being called for calculation. The option for runtype changes how flags are set + integer, intent(in) :: runtype + + !Set flags and allocate variables + need_virial = .true. + select case(runtype) + case(1) + if(.not.need_vel) then + need_vel = .true. + call alloc_velocity_arrays + end if + + if(.not.need_force_pre) then + need_force_pre = .true. + call alloc_pre_array + end if + case(2) + if(.not.need_vel) then + need_vel = .true. + call alloc_velocity_arrays + end if + if(need_force_pre) then + need_force_pre=.false. + if(atom_num > 0) deallocate(force_atom_pre) + if(ele_num>0) deallocate(force_eq_pre) + end if + case(3) + if(need_vel) then + need_vel = .false. + call dealloc_velocity_arrays + end if + end select + + if (.not. initialized) then + !Set up the potential map + + !Allocate necessary force and potential arrays + call alloc_force_arrays + call alloc_potential_arrays + + initialized = .true. + + end if + + end subroutine pre_calc + +end module input_parser + diff --git a/src/integration.f90 b/src/integration.f90 new file mode 100644 index 0000000..e9038e7 --- /dev/null +++ b/src/integration.f90 @@ -0,0 +1,467 @@ +module integration + !This module contains the code required for integration point setup and virtual atom data + use parameters + use comms + use elements + use errors + + implicit none + + integer :: max_intpo_num, intpo_num, intpo_num_l + integer, save :: atomap_max_ele, etype_count, etype_to_itype(defined_element_types), & + intpo_nums(4, defined_element_types), intpo_count(defined_element_types) + integer, allocatable, save :: atom_intpo(:,:,:), who_rep_atomap(:,:,:) + real(kind = wp), allocatable, save :: weight_intpo(:,:,:) + real(kind = wp), allocatable, save :: mass_mat_inv(:,:) + character(len = 100) :: mass_mat_ty + logical, allocatable, save :: who_has_intpo(:,:) + + !Itype is the integration array type for the elements + integer, allocatable, save :: itype(:) + + public + contains + + subroutine integration_defaults + mass_mat_ty = 'lumped' + end subroutine integration_defaults + + subroutine init_integration + !Initialize the integration + + integer :: i, j, ie, iep, local_iep(2), global_iep(2) + real(kind=wp) :: weight_temp + real(kind=wp) :: v_inv_matrix(8**2) + + !First set up the mass arrays + !Allocate inverse mass arays + allocate(mass_mat_inv(8,8)) + + !Now calculate the inverse mass matrix, this is just for FCC 8 node elements + v_inv_matrix = [ 8.0_wp, -4.0_wp, 2.0_wp, -4.0_wp, -4.0_wp, 2.0_wp, -1.0_wp, 2.0_wp, & + -4.0_wp, 8.0_wp, -4.0_wp, 2.0_wp, 2.0_wp, -4.0_wp, 2.0_wp, -1.0_wp, & + 2.0_wp, -4.0_wp, 8.0_wp, -4.0_wp, -1.0_wp, 2.0_wp, -4.0_wp, 2.0_wp, & + -4.0_wp, 2.0_wp, -4.0_wp, 8.0_wp, 2.0_wp, -1.0_wp, 2.0_wp, -4.0_wp, & + -4.0_wp, 2.0_wp, -1.0_wp, 2.0_wp, 8.0_wp, -4.0_wp, 2.0_wp, -4.0_wp, & + 2.0_wp, -4.0_wp, 2.0_wp, -1.0_wp, -4.0_wp, 8.0_wp, -4.0_wp, 2.0_wp, & + -1.0_wp, 2.0_wp, -4.0_wp, 2.0_wp, 2.0_wp, -4.0_wp, 8.0_wp, -4.0_wp, & + 2.0_wp, -1.0_wp, 2.0_wp, -4.0_wp, -4.0_wp, 2.0_wp, -4.0_wp, 8.0_wp ] + + mass_mat_inv(:,:) = reshape(v_inv_matrix, [8,8]) + + !First communicate the etypes present, this array is only on the root processor as it's defined in read_restart + call mpi_bcast(etype_present, defined_element_types, mpi_logical, root, world, ierr) + + !Assign the etype_to_itype array, this just maps the etype to the correct index of the itype variables + !This is because generally all the element types won't be used in one simulation to save on memory + j = 0 + intpo_nums(:,:) = 0 + intpo_count(:) = 0 + etype_to_itype(:) = 0 + do i = 1, defined_element_types + if (etype_present(i)) then + j=j+1 + etype_to_itype(i) = j + select case(i) + !Now set up the integration point definitions for FCC elements (Will be expanded to other element types + !Intpo_nums has the integration point count for the integration schemes + !The first index si the corner integration points, the second is the number of edge integration points, + !the third is the surface integration points, and the 4th is the interior integration points + case(1) + !1NN element integration point count + intpo_nums(:,j) = (/ 8, 12, 6, 1 /) + intpo_count(j) = 27 + case(2,3) + !2NN element integration point count + intpo_nums(:,j) = (/ 64, 48, 12, 1 /) + intpo_count(j) = 125 + end select + end if + end do + + !Calculate max number of intpo nums for elements needed for model + max_intpo_num = maxval(intpo_count) + + !Allocate arrays + etype_count = count(etype_present) + allocate(who_rep_atomap((max_size+1)**3, unique_sizes, etype_count), & + weight_intpo(max_intpo_num, unique_sizes, etype_count), & + atom_intpo(max_intpo_num, unique_sizes, etype_count), & + stat = allostat) + if(allostat > 0) call alloc_error("Failure allocating weight arrays in init_integration", allostat) + + !Now initialize the integration points for all of the elements that we need + !At the moment this code creates the integration points for all possible combinations of esize and etype + if(etype_present(1)) call init_rhomb(1) + if (etype_present(2))call init_rhomb(2) + if (etype_present(3)) call init_rhomb(2, 3) + + !Now update the itypes for all the elements + call update_itype + !Now double check to make sure all of the weights for all elements add up correctly and count the number of integration + !points + local_iep = 0 + global_iep = 0 + do ie = 1, ele_num_l + weight_temp = 0.0_wp + do iep = 1, intpo_count(itype(ie)) + weight_temp = weight_temp + weight_intpo(iep, size_to_shape(size_ele(ie)), itype(ie)) + end do + if(.not. is_equal(weight_temp, real((size_ele(ie)+1)**3,wp))) then + print *, "Error: weight_temp for element ", ie, " should equal ", (size_ele(ie)+1)**3 + call mpi_abort(mpi_comm_world, 1, ierr) + end if + if(who_has_ele(ie)) then + local_iep(1) = local_iep(1) + intpo_count(itype(ie)) + local_iep(2) = local_iep(2) + intpo_count(itype(ie))*basis_num(ie) + end if + end do + + !Sum to get total intpo_num + call mpi_allreduce(local_iep, global_iep, 2, mpi_integer, mpi_sum, world, ierr) + + !Now multiply by the maximum basis_num to get the right dimensions for the arrays + intpo_num = global_iep(2) + + !Now update integration point arrays + call update_intpo + + end subroutine init_integration + + subroutine init_rhomb(intpo_depth, etyp) + !Initialize one NN rhombohedral elements + + !intpo_depth is the integration point depth. The only difference between 1NN and 2NN elements is this integration point + !depth + integer, intent(in) :: intpo_depth + integer, intent(in), optional :: etyp + + integer :: i, j, k, ide, ix, iy, iz, ie, iep, esize, et, interp_count, intpo_set_num, iatom, & + i_edge, i_node, i_surf, i_inner + real(kind=wp) :: node_weight, edge_weight, surf_rect_weight, inner_weight, x, y, z + + !Variables needed for primitive unit cell definitions + real(kind=wp) :: weight_temp, cube_temp(24), cubic_mat(3,8), prim_cell(3,max_basisnum, ng_max_node) + !We need a pb_in variable for the interp function even though the pb doesn't matter + integer ::pb_in(3, max_basisnum, ng_max_node) + !arrays needed for assigning integration points to virtual atoms + integer :: pos_to_iatom(max_size+1, max_size+1, max_size+1), atom_rep_iatom((max_size+1)**3) + real(kind=wp) :: all_atom_set((max_size+1)), r_in_nat(3, (max_size+1)**3), rtemp(3,max_basisnum) + character :: msg + + !etype denotes the type of the element in the element type definition array. For the rhombohedral elements + !it's the same as the intpo_depth + if (present(etyp)) then + et=etype_to_itype(etyp) + else + et=etype_to_itype(intpo_depth) + end if + + !Initialize the matrix of node positions in natural coordinates + + cube_temp(:) = [ -1.0_wp, -1.0_wp, -1.0_wp, & + 1.0_wp, -1.0_wp, -1.0_wp, & + 1.0_wp, 1.0_wp, -1.0_wp, & + -1.0_wp, 1.0_wp, -1.0_wp, & + -1.0_wp, -1.0_wp, 1.0_wp, & + 1.0_wp, -1.0_wp, 1.0_wp, & + 1.0_wp, 1.0_wp, 1.0_wp, & + -1.0_wp, 1.0_wp, 1.0_wp ] + + prim_cell(:,1, 1:8) = reshape(cube_temp, [ 3, 8]) + pb_in(:,:,:) = 0 + + do ie = 1, unique_sizes + esize = shape_sizes(ie) + node_weight = 1.0_wp + edge_weight = esize + 1 - 2*intpo_depth + surf_rect_weight = (esize + 1 - 2*intpo_depth)**2.0_wp + inner_weight = (esize + 1 - 2 * intpo_depth) **3.0_wp + + if(rank == root) then + weight_temp = node_weight*intpo_nums(1, et) & + + edge_weight*intpo_nums(2, et) & + + surf_rect_weight*intpo_nums(3, et) & + + inner_weight*intpo_nums(4, et) + if(.not. is_equal(weight_temp, real((esize+1)**3, wp))) then + print *, "Error: Wrong total weight of integration points: ", weight_temp, ' for element type ', et, & + " with esize ", esize, " should be ", (esize+1)**3 + call mpi_abort(mpi_comm_world, 1, ierr) + end if + end if + + !Now figure out which iatoms are at integration point sites + + !Elements are defined from -esize/2 to esize/2 centered around 0. Here i has the number of interpolated atoms that are either + ! greater than or less than 0. So for esize=4, the atoms are at -2 -1 0 1 2 and i will be 2 + all_atom_set(:) = 0 + if(mod(esize,2) == 0) then + i = esize/2 + do ide =1, i + all_atom_set(2*ide) = -1.0_wp + 2.0_wp*(ide-1)/real(esize,wp) + all_atom_set(2*ide+1) = 1.0_wp - 2.0_wp*(ide-1)/real(esize,wp) + end do + else + i = (esize)/2 + all_atom_set(1)= 1/real(esize,wp) + all_atom_set(esize+1)= -1/real(esize,wp) + do ide =1, i + all_atom_set(2*ide) = -1.0_wp + 2.0_wp*(ide-1)/real(esize,wp) + all_atom_set(2*ide+1) = 1.0_wp - 2.0_wp*(ide-1)/real(esize,wp) + end do + end if + + !all_atom set contains only the positions which will have integration points. + !As an example, for esize =4 and intpo_depth=1 all_atom_set is (0, -2, 2). All integration points + !positions can be created from those 3 values in natural coordinates + + + !First get the position of the interpolated atoms in the natural coordinates and find the relevant position in + !all_atom_set + interp_count = (esize+1)**3 + do iatom = 1, interp_count + call interp_atom(iatom, esize, 1, pb_in, 1, prim_cell, rtemp) + r_in_nat(:,iatom) = rtemp(:,1) + end do + + !Now map the all_atom_set positions to iatoms + pos_to_iatom(:,:,:) = 0 + do k = 1, esize+1 + do j = 1, esize+1 + do i = 1, esize +1 + x = all_atom_set(i) + y = all_atom_set(j) + z = all_atom_set(k) + do iatom = 1, interp_count + if( is_equal(r_in_nat(1,iatom), real(x,wp)).and. & + is_equal(r_in_nat(2,iatom), real(y,wp)).and. & + is_equal(r_in_nat(3,iatom), real(z,wp))) then + + pos_to_iatom(i,j,k) = iatom + exit + end if + end do + end do + end do + end do + + intpo_set_num = 2*intpo_depth + 1 + + !Now loop over all integration point + iep=0 + i_node = 0 + i_edge = 0 + i_surf = 0 + i_inner= 0 + atom_rep_iatom=0 + do iz = 1, esize+1 + do iy = 1, esize+1 + do ix = 1, esize + 1 + + !Get the position for pos_to_iatom array + iatom = pos_to_iatom(ix,iy,iz) + + !These are node integration points + if(((ix > 1).and.(ix <= intpo_set_num)).and. & + ((iy >1).and.(iy <= intpo_set_num)).and. & + ((iz > 1).and.(iz <= intpo_set_num))) then + + iep = iep + 1 + atom_intpo(iep, ie, et) = iatom + weight_intpo(iep, ie, et) = node_weight + atom_rep_iatom(iatom) = iatom + i_node = i_node + 1 + + !These are edge integration points + else if (((ix == 1).and.((iy > 1).and.(iy<=intpo_set_num)).and.((iz > 1).and.(iz<=intpo_set_num))).or. & + ((iy == 1).and.((ix > 1).and.(ix<=intpo_set_num)).and.((iz > 1).and.(iz<=intpo_set_num))).or. & + ((iz == 1).and.((iy > 1).and.(iy<=intpo_set_num)).and.((ix > 1).and.(ix<=intpo_set_num)))) then + + iep = iep+1 + atom_intpo(iep, ie, et) = iatom + weight_intpo(iep, ie, et) = edge_weight + atom_rep_iatom(iatom) = iatom + i_edge = i_edge+1 + + !These are surface integration points + else if(((ix == 1).and.(iy == 1).and.((iz > 1).and.(iz<=intpo_set_num))).or. & + (((ix > 1).and.(ix<=intpo_set_num)).and.(iy == 1).and.(iz == 1)).or. & + ((ix == 1).and.((iy > 1).and.(iy<=intpo_set_num)).and.(iz == 1))) then + + iep = iep+1 + atom_intpo(iep, ie, et) = iatom + weight_intpo(iep, ie, et) = surf_rect_weight + atom_rep_iatom(iatom) = iatom + i_surf = i_surf+1 + + else if((ix == 1).and.(iy == 1).and.(iz == 1)) then + !Interior integration point + iep = iep+1 + atom_intpo(iep, ie, et) = iatom + weight_intpo(iep, ie, et) = inner_weight + atom_rep_iatom(iatom) = iatom + i_inner = i_inner + 1 + + !Now below are points which are represented by edge integration points + + else if(((ix == 1).or.(ix > intpo_set_num)).and. & + (iy > 1).and.(iy <= intpo_set_num).and. & + (iz > 1).and. (iz <= intpo_set_num)) then + + atom_rep_iatom(iatom) = pos_to_iatom(1, iy, iz) + i_edge = i_edge + 1 + + else if(((iy == 1).or.(iy > intpo_set_num)).and. & + (ix > 1).and. (ix <= intpo_set_num).and. & + (iz > 1).and. (iz <= intpo_set_num)) then + + atom_rep_iatom(iatom) = pos_to_iatom(ix, 1, iz) + i_edge = i_edge + 1 + + + else if(((iz == 1).or.(iz > intpo_set_num)).and. & + (ix > 1).and. (ix <= intpo_set_num).and. & + (iy > 1).and. (iy <= intpo_set_num)) then + + atom_rep_iatom(iatom) = pos_to_iatom(ix, iy, 1) + i_edge = i_edge + 1 + + !Now below are points which are represented by surface integration points + + else if(((ix == 1).or.(ix > intpo_set_num)).and. & + ((iy == 1).or.(iy > intpo_set_num)).and. & + (iz > 1).and.(iz <= intpo_set_num)) then + + atom_rep_iatom(iatom) = pos_to_iatom(1,1,iz) + i_surf = i_surf + 1 + + else if(((iy == 1).or.(iy > intpo_set_num)).and. & + ((iz == 1).or.(iz > intpo_set_num)).and. & + (ix > 1).and. (ix <= intpo_set_num)) then + + atom_rep_iatom(iatom) = pos_to_iatom(ix, 1, 1) + i_surf = i_surf + 1 + + else if(((ix == 1).or.(ix > intpo_set_num)).and. & + ((iz == 1).or.(iz > intpo_set_num)).and. & + (iy > 1).and. (iy <= intpo_set_num)) then + + atom_rep_iatom(iatom) = pos_to_iatom(1, iy, 1) + i_surf = i_surf + 1 + + + !Otherwise it is represented by the interior integration point + else + atom_rep_iatom(iatom) = pos_to_iatom(1,1,1) + i_inner = i_inner + 1 + + end if + end do + end do + end do + + !Now verify that all the counts match up + if ( (i_node+i_edge+i_surf+i_inner) /= interp_count) then + print *, "Error: total considered interpolated points should equal ", interp_count, & + " not ", (i_node+i_edge+i_surf+i_Inner), " in init_rhomb" + call mpi_abort(1, mpi_comm_world, ierr) + end if + + !We now form the mapping array which dictates where in atom_intpo our representative integration point lies + who_rep_atomap = 0 + do iatom = 1, interp_count + do iep = 1, intpo_count(et) + if (atom_rep_iatom(iatom) == atom_intpo(iep, ie, et)) then + who_rep_atomap(iatom,ie, et) = iep + end if + end do + if (atom_rep_iatom(iatom)==0) then + write(msg, *) "atom_rep_iatom should not be 0 for iatom", iatom, " and esize ", esize + call misc_error(msg) + end if + end do + end do + + end subroutine init_rhomb + + subroutine update_intpo + !This subroutine updates the ownership arrays for the integration points + + integer :: ie, iep, j, iatom, iatomap, ibasis, intpo_num_sum + + !First allocate who_has_intpo + if(allocated(who_has_intpo)) then + if (ele_num_l > size(who_has_intpo,2)) deallocate(who_has_intpo) + end if + if(.not.allocated(who_has_intpo)) allocate(who_has_intpo(max_intpo_num*max_basisnum, ele_num_l)) + + + intpo_num_l = 0 + if(atomap_num_l > size(atomap_to_intpo,2)) then + deallocate(atomap_to_intpo) + allocate(atomap_to_intpo(2,size(r_atomap,2))) + end if + atomap_to_intpo = 0 + who_has_intpo(:, :) = .false. + do ie = 1, ele_num_l + j = size_to_shape(size_ele(ie)) + + do iep = 1, intpo_count(itype(ie)) + iatom = atom_intpo(iep, j, itype(ie)) + + do ibasis = 1, basis_num(ie) + iatomap=cg_atomap(basis_num(ie)*(iatom-1) + ibasis, ie) + + !If iatomap /=0 then it's our iatomap + if(iatomap /= 0) then + if(in_block_bd(r_atomap(:,iatomap), pro_bd)) then + intpo_num_l = intpo_num_l + 1 + who_has_intpo(basis_num(ie)*(iep-1) + ibasis, ie) = .true. + atomap_to_intpo(1,iatomap) = basis_num(ie)*(iep-1) + ibasis + atomap_to_intpo(2,iatomap) = ie + else + print *, "Error: Atomap ", iatomap, " should belong to ", rank, " but doesn't" + print *, "Pos is ", r_atomap(:,iatomap), " and bd is ", pro_bd + call mpi_abort( mpi_comm_world, 1, ierr) + end if + end if + + end do + + end do + end do + + !Now check to make sure we have the right number of intpo + call mpi_reduce(intpo_num_l, intpo_num_sum, 1, mpi_integer, mpi_sum, root, mpi_comm_world, ierr) + + if(rank == root) then + if(intpo_num_sum /= intpo_num) then + print *, "Error: Total integration point number ", intpo_num_sum, " does not equal ", intpo_num + call mpi_abort(mpi_comm_world, 1, ierr) + end if + end if + + + end subroutine update_intpo + + pure function mass_mat_coeff(esize, etype) + integer, intent(in) :: esize, etype + real(kind=wp) :: mass_mat_coeff + + mass_mat_coeff = real(esize+1,wp)**3.0_wp/real(ng_node(etype),wp) + return + end function mass_mat_coeff + + subroutine update_itype + !This just updates the itype array for the local element num + integer :: i + + if (allocated(itype)) deallocate(itype) + allocate(itype(ele_num_l)) + + do i = 1, ele_num_l + itype(i) = etype_to_itype(etype(i)) + end do + end subroutine + +end module integration diff --git a/src/langevin.f90 b/src/langevin.f90 new file mode 100644 index 0000000..1af5eec --- /dev/null +++ b/src/langevin.f90 @@ -0,0 +1,119 @@ +module langevin + !This module contains all the code for running steps of the verlet algorithm + !This is equivalent to the NVE ensemble + use parameters + use comms + use elements + use forces + use time + use atom_types + use group + + implicit none + + !Arguments passed in by users + real(kind=wp), private, save :: Tb, damp, T0 + real(kind=wp), private, save :: gfactor1(max_atom_types), gfactor2(max_atom_types) + integer, save :: langevin_group(20), lnum + public + contains + + subroutine langevin_defaults + lnum = 0 + langevin_group = 0 + end subroutine langevin_defaults + + subroutine parse_langevin(line) + !Parse the langevin thermostat command + character(len=*), intent(in) :: line + + character(len=read_len) :: tmptxt, g + integer :: i + + if(tok_count(line) < 5) call command_error("Missing arguments for dynamics langevin command") + read(line, *) tmptxt, g, T0, Tb, damp + + !Get the group for this langevin call + lnum = lnum + 1 + langevin_group(lnum) = get_group_index(g) + if(langevin_group(lnum) == 0) then + write(tmptxt,*) "Group name ", g, " not defined in langevin command" + call command_error(tmptxt) + end if + + !Now get force prefactors for all atom types + do i = 1, natom_types + gfactor1(i) = -masses(i)/damp/ftm2v + gfactor2(i) = sqrt(masses(i)) * sqrt(24.0*boltzmann/damp/time_step/const_motion)/ftm2v + end do + + return + end subroutine parse_langevin + + subroutine langevin_post_force + integer :: i, ia, ie, ibasis, inod, ip + real(kind = wp) :: T_target, delta, tsqrt, fdrag(3), fran(3), rand, gam1, gam2 + + delta = (iter - begin_step)/(run_steps) + + T_target = T0 + delta*(Tb-T0) + tsqrt=sqrt(T_target) + + !update force with langevin for all atoms + !Loop over all langevin calls + do i = 1, lnum + do ia = 1, atom_num_l + if(btest(a_mask(ia), langevin_group(lnum))) then + gam1 = gfactor1(type_atom(i)) + gam2 = gfactor2(type_atom(i))*tsqrt + + call random_number(rand) + fran(1) = gam2*(rand-0.5) + call random_number(rand) + fran(2) = gam2*(rand-0.5) + call random_number(rand) + fran(3) = gam2*(rand-0.5) + + fdrag(1) = gam1*vel_atom(1,ia) + fdrag(2) = gam1*vel_atom(2,ia) + fdrag(3) = gam1*vel_atom(3,ia) + + force_atom(1,ia) = force_atom(1,ia) + fdrag(1) + fran(1) + force_atom(2,ia) = force_atom(2,ia) + fdrag(2) + fran(2) + force_atom(3,ia) = force_atom(3,ia) + fdrag(3) + fran(3) + end if + end do + + do ie = 1, ele_num_l + if(btest(e_mask(ie), langevin_group(lnum)).and.(who_has_ele(ie))) then + do inod = 1, ng_node(etype(ie)) + ip = cg_node(inod,ie) + do ibasis = 1, basis_num(ie) + gam1 = gfactor1(basis_type(ibasis,ie)) + gam2 = gfactor2(basis_type(ibasis,ie))*tsqrt + + call random_number(rand) + fran(1) = gam2*(rand-0.5) + call random_number(rand) + fran(2) = gam2*(rand-0.5) + call random_number(rand) + fran(3) = gam2*(rand-0.5) + + fdrag(1) = gam1*vel_atom(1,ia) + fdrag(2) = gam1*vel_atom(2,ia) + fdrag(3) = gam1*vel_atom(3,ia) + + force_eq(1,ibasis,ip) = force_eq(1,ibasis,ip) + fdrag(1) + fran(1) + force_eq(2,ibasis,ip) = force_eq(2,ibasis,ip) + fdrag(2) + fran(2) + force_eq(3,ibasis,ip) = force_eq(3,ibasis,ip) + fdrag(3) + fran(3) + end do + end do + end if + end do + end do + + if(ele_num > 0) call communicate_force_eq + + end subroutine langevin_post_force + +end module langevin diff --git a/src/logger.f90 b/src/logger.f90 new file mode 100644 index 0000000..acff41b --- /dev/null +++ b/src/logger.f90 @@ -0,0 +1,70 @@ +module logger + !This is in charge of logging to stdout and to log files + use parameters + + logical,save :: log_on + + public + contains + + + subroutine log_defaults + log_on=.true. + end subroutine log_defaults + + subroutine init_log + !Open log file + if (log_on) then + if (rank == root) then + open(unit=51, file="cac.log", action='write', status='replace', position='rewind') + end if + end if + + end subroutine init_log + + subroutine log_msg(msg, isline, all_procs) + !Log to stdout and log file + character(len=*), intent(in) :: msg + integer, intent(in), optional :: isline !if 0 then no tab in front + logical, intent(in), optional :: all_procs !if .true. then all processors calling this write + integer :: tab + logical :: lognow + + if(log_on) then + lognow = .false. + if(present(all_procs)) then + lognow = all_procs + end if + if(rank == root) lognow = .true. + + if(present(isline)) then + tab = isline + else + tab = 1 + end if + + !write non-line information, which should be indented + if(lognow) then + if(tab == 1) then + print *, " ", trim(adjustl(msg)) + if(rank==root) write(51, "(a)") " ", trim(adjustl(msg)) + + !Write line information which isn't indented + else if(tab == 0) then + print *, trim(adjustl(msg)) + if(rank==root) write(51, "(a)") trim(adjustl(msg)) + + end if + end if + end if + + return + end subroutine log_msg + + subroutine close_log + !Close the log file + if (log_on) then + if(rank == root) close(51) + end if + end subroutine close_log +end module logger diff --git a/src/main.f90 b/src/main.f90 new file mode 100644 index 0000000..ddff3bb --- /dev/null +++ b/src/main.f90 @@ -0,0 +1,74 @@ +! Copyright (c) 2017-2018 Georgia Institute of Technology. All Rights Reserved +! Redistributing this source code is prohibited. This is a testing version of CAC. + +! This source code is provided as is, with no warranties or representations of accuracy +! or suitability for any application, and with no expectation of user support. + +! Please alert Alex Selimov (aselimov3@gatech.edu) with any bugs or changes made to source code. +! Written by Shuozhi Xu (shuozhixu@ucsb.edu) +program main + + use mpi + use comms + use parameters + use input_parser + use integration + use neighbors + use logger + use minimize + use time + use group + use dump + use thermo + use temp + use berendsen + use potential + use debug + use deform + use langevin + + implicit none + + !initialize mpi + call comm_init + !Initialize time + call time_defaults + + !Start timer + program_start = mpi_wtime() + + !Initialize log + call log_defaults + call init_log + + !Initialize some default values + deform_num = 0 + + !Initialize some flags + tflag = .false. + pflag = .false. + dflag = .false. + first_run = .true. + + !Now call all necessary default setting subroutines + call potential_defaults + call integration_defaults + call neighbors_defaults + call min_defaults + call group_defaults + call thermo_defaults + call dump_defaults + call langevin_defaults + call dynamics_defaults + + !Run input parsing code + call read_input + + !close log + call log_time + call close_log + + call mpi_finalize(ierr) + + stop + end program main diff --git a/src/math.f90 b/src/math.f90 new file mode 100644 index 0000000..734ca3c --- /dev/null +++ b/src/math.f90 @@ -0,0 +1,515 @@ +module math + !This contains math helper functions + use mpi + use parameters + + implicit none + + public + + contains + subroutine sort_array(array_in, n, array_out, array_order) + !sort 1d real array from minimum to maximum + + integer, intent(in) :: n + integer, dimension(n), intent(out) :: array_order + real(kind = wp), dimension(n), intent(in) :: array_in + real(kind = wp), dimension(n), intent(out) :: array_out + + + integer :: i, j, temp_int + real(kind = wp) :: temp + + array_out(:) = array_in(:) + do i = 1, n + array_order(i) = i + end do + + do i = 1, n + do j = i+1, n + if(array_out(i) > array_out(j)) then + temp = array_out(j) + array_out(j) = array_out(i) + array_out(i) = temp + temp_int = array_order(j) + array_order(j) = array_order(i) + array_order(i) = temp_int + end if + end do + end do + return + end subroutine sort_array + + pure function identity_mat(n) + !real identity matrix of n by n + + implicit none + + integer, intent(in) :: n + real(kind = wp), dimension(n, n) :: identity_mat + + integer :: i + + identity_mat(:, :) = 0.0_wp + + do i = 1, n + identity_mat(i, i) = 1.0_wp + end do + + return + end function identity_mat + + pure function cross_product(a, b) + !cross product between two 3*1 vectors + implicit none + real(kind = wp), dimension(3), intent(in) :: a, b + real(kind = wp), dimension(3) :: cross_product + + cross_product(1) = a(2) * b(3) - a(3) * b(2) + cross_product(2) = a(3) * b(1) - a(1) * b(3) + cross_product(3) = a(1) * b(2) - a(2) * b(1) + return + end function cross_product + + pure function triple_product(a, b, c) + !triple product between three 3*1 vectors + implicit none + real(kind = wp), dimension(3), intent(in) :: a, b, c + real(kind = wp) :: triple_product + + triple_product = dot_product(a, cross_product(b, c)) + + return + end function triple_product + + subroutine matrix_normal(a, n, a_nor) + !normalize an n by n matrix + implicit none + integer, intent(in) :: n + real(kind = wp), dimension(n, n), intent(in) :: a + real(kind = wp), dimension(n, n), intent(out) :: a_nor + + real(kind = wp), dimension(n) :: v + integer :: i + + a_nor(:, :) = a(:, :) + do i = 1, n + v(:) = a(:, i) + a_nor(:, i) = v(:) / norm2(v) + end do + return + end subroutine matrix_normal + + subroutine matrix_lattice(a, n, iorh) + !check if a matrix is orthogonal and obey right hand rule + implicit none + integer, intent(in) :: n + real(kind = wp), dimension(n, n), intent(in) :: a + logical, dimension(2), intent(out) :: iorh + + real(kind = wp), dimension(n) :: v, v_k + integer :: i, j + + iorh(:) = .true. + i_loop: do i = 1, n + do j = i + 1, n + + if(abs(dot_product(a(:, i), a(:, j))) > lim_small) then + iorh(1) = .false. + end if + + if(j == i + 1) then + v(:) = cross_product(a(:, i), a(:, j)) + v_k(:) = v(:) - a(:, mod(j, n)+1) + + else if((i == 1).and.(j == n)) then + v(:) = cross_product(a(:, j), a(:, i)) + v_k(:) = v(:) - a(:, i+1) + + end if + + if(norm2(v_k(:)) > lim_small) then + iorh(2) = .false. + end if + + if(all(iorh).eqv..false.) then + exit i_loop + end if + end do + end do i_loop + return + + end subroutine matrix_lattice + + subroutine matrix_inverse(a, n, a_inv) + !inverse an n by n matrix + implicit none + integer, intent(in) :: n + real(kind = wp), dimension(n, n), intent(in) :: a + real(kind = wp), dimension(n, n), intent(out) :: a_inv + + integer :: i, j, k, piv_loc + real(kind = wp) :: coeff, sum_l, sum_u + real(kind = wp), dimension(n) :: b, x, y, b_piv + real(kind = wp), dimension(n, n) :: l, u, p + real(kind = wp), allocatable :: v(:), u_temp(:), l_temp(:), p_temp(:) + + l(:, :) = identity_mat(n) + u(:, :) = a(:, :) + p(:, :) = identity_mat(n) + + !LU decomposition with partial pivoting + do j = 1, n-1 + + allocate( v(n-j+1), stat = allostat ) + if(allostat /=0 ) then + print *, 'Fail to allocate v in matrix_inverse' + call mpi_abort(1,mpi_comm_world,ierr) + end if + + v(:) = u(j:n, j) + if(maxval(abs(v)) < lim_zero) then + print *, 'Fail to inverse matrix', a + call mpi_abort(1,mpi_comm_world,ierr) + end if + + piv_loc = maxloc(abs(v), 1) + + deallocate( v, stat = deallostat ) + if(deallostat /=0 ) then + print *, 'Fail to deallocate v in matrix_inverse' + call mpi_abort(1,mpi_comm_world,ierr) + end if + + !partial pivoting + + if(piv_loc /= 1) then + + allocate( u_temp(n-j+1), p_temp(n), stat = allostat) + if(allostat /=0 ) then + print *, 'Fail to allocate p_temp and/or u_temp in matrix_inverse' + call mpi_abort(1,mpi_comm_world,ierr) + end if + + u_temp(:) = u(j, j:n) + u(j, j:n) = u(piv_loc+j-1, j:n) + u(piv_loc+j-1, j:n) = u_temp(:) + p_temp(:) = p(j, :) + p(j, :) = p(piv_loc+j-1, :) + p(piv_loc+j-1, :) = p_temp(:) + + deallocate( u_temp, p_temp, stat = deallostat ) + if(deallostat /=0 ) then + print *, 'Fail to deallocate p_temp and/or u_temp in matrix_inverse' + call mpi_abort(1,mpi_comm_world,ierr) + end if + + if(j > 1) then + allocate( l_temp(j-1), stat = allostat ) + if(allostat /= 0) then + print *, 'Fail to allocate l_temp in matrix_inverse' + call mpi_abort(1,mpi_comm_world,ierr) + end if + + l_temp(:) = l(j, 1:j-1) + l(j, 1:j-1) = l(piv_loc+j-1, 1:j-1) + l(piv_loc+j-1, 1:j-1) = l_temp(:) + + deallocate(l_temp, stat = deallostat) + if(deallostat /=0 ) then + print *, 'Fail to deallocate l_temp in matrix_inverse' + call mpi_abort(1,mpi_comm_world,ierr) + end if + end if + end if + + !LU decomposition + do i = j+1, n + coeff = u(i, j)/u(j, j) + l(i, j) = coeff + u(i, j:n) = u(i, j:n)-coeff*u(j, j:n) + end do + end do + + a_inv(:, :) = 0.0_wp + + do j = 1, n + b(:) = 0.0_wp + b(j) = 1.0_wp + b_piv(:) = matmul(p, b) + + !Now we have LUx = b_piv + !the first step is to solve y from Ly = b_piv + !forward substitution + do i = 1, n + if(i == 1) then + y(i) = b_piv(i)/l(i, i) + else + sum_l = 0 + do k = 1, i-1 + sum_l = sum_l+l(i, k)*y(k) + end do + y(i) = (b_piv(i)-sum_l)/l(i, i) + end if + end do + + !then we solve x from ux = y + !backward subsitution + do i = n, 1, -1 + if(i == n) then + x(i) = y(i)/u(i, i) + else + sum_u = 0 + do k = i+1, n + sum_u = sum_u+u(i, k)*x(k) + end do + x(i) = (y(i)-sum_u)/u(i, i) + end if + end do + +! put x into j column of a_inv + a_inv(:, j) = x(:) + + end do + + return + end subroutine matrix_inverse + +! three point interpolation + + function interp3(x, tab) + + !this comes from dl poly + implicit none + + real(kind = wp), intent(in) :: x + real(kind = wp), dimension(:), intent(in) :: tab + real(kind = wp) :: interp3 + + integer :: l + real(kind = wp) :: rdr, rrr, ppp, gk0, gk1, gk2, t1, t2 + + + interp3 = 0.0_wp + + if(x > tab(3)) then + interp3 = 0 + + else + + rdr = 1.0_wp / tab(4) + rrr = x - tab(2) + + l = min(nint(rrr*rdr), nint(tab(1))-1) + if(l < 5) then + interp3 = tab(5) + end if + + ppp = rrr * rdr - real(l, wp) + gk0 = tab(l-1) + gk1 = tab(l) + gk2 = tab(l+1) + t1 = gk1 + (gk1 - gk0) * ppp + t2 = gk1 + (gk2 - gk1) * ppp + + if(ppp < 0.0_wp) then + interp3 = t1 + 0.5_wp * (t2 - t1) * (ppp + 1.0_wp) + + else if(l == 5) then + interp3 = t2 + + else + interp3 = t2 + 0.5_wp * (t2 - t1) * (ppp - 1.0_wp) + + end if + end if + + return + end function interp3 + + + subroutine delete_duplicate(array_in, n, array_out, m) + !delete duplicates in an 1-d integer array, the others are 0 + implicit none + + integer, intent(in) :: n + integer, dimension(n), intent(in) :: array_in + integer, intent(out) :: m + integer, dimension(n), intent(out) :: array_out + + integer :: i, j + integer, dimension(n) :: array_temp + + + m = 0 + array_out(:) = 0 + array_temp(:) = array_in(:) + + do i = 1, n + if(array_temp(i) > 0) then + do j = i+1, n + if(array_temp(i) == array_temp(j)) then + array_temp(j) = 0 + end if + end do + + m = m+1 + array_out(m) = array_temp(i) + + end if + end do + + return + end subroutine delete_duplicate + + + subroutine normal_distribution(mean, dev, rand) + !return a normally distributed random number + implicit none + + real(kind = wp), intent(in) :: mean, dev + real(kind = wp), intent(out) :: rand + + real(kind = wp) :: rand_uf, rand_us + + rand_uf = -1.0_wp + + do while(rand_uf < lim_zero) + call random_number(rand_uf) + end do + + if(rand_uf <= 0.5_wp) then + call random_number(rand_us) + rand = sqrt(-2.0_wp * log(rand_uf)) * cos(2.0_wp * pi * rand_us) + + else + call random_number(rand_us) + rand = sqrt(-2.0_wp * log(rand_uf)) * sin(2.0_wp * pi * rand_us) + + end if + + rand = mean + dev * rand + + return + end subroutine normal_distribution + + function is_equal(A,B) + real(kind=dp), intent(in) :: A, B + logical :: is_equal + + if( (abs(A-B)) <= zerotol) then + is_equal = .true. + else + is_equal = .false. + end if + return + end function + + function dumb_interp(test_x, xlo, ylo, xhi, yhi) + !Simple interpolation used just for validation in unit tests + real(kind=dp), intent(in) :: test_x, xlo, ylo, xhi, yhi + real(kind=dp) :: dumb_interp + + dumb_interp = ((test_x - xlo)/(xhi-xlo))*(yhi-ylo) + ylo + return + end function + + pure function in_block_bd(v, block_bd, check_bd) + !This function determines whether a point is within a block in 3d + !Input/output + real(kind=dp), dimension(3), intent(in) :: v + real(kind=dp), dimension(6), intent(in) :: block_bd + logical, dimension(3), optional, intent(in) :: check_bd + logical :: in_block_bd + + !Other variables + integer :: i + logical :: bool_bd(3) + + in_block_bd = .true. + if(present(check_bd)) then + bool_bd = check_bd + else + bool_bd(:)=.true. + end if + + + do i =1 ,3 + if(bool_bd(i)) then + !Check upper bound + if(v(i) >= (block_bd(2*i)) ) then + in_block_bd =.false. + exit + !Check lower bound + else if (v(i) < (block_bd(2*i-1))) then + in_block_bd = .false. + exit + end if + end if + end do + + end function in_block_bd + + pure function in_cutoff(rl, rk, rcmin, rcoff) + !This function returns true when rl and rk are separated by a distance between rcmin and rcoff + real(kind=wp), dimension(3), intent(in) :: rl, rk + real(kind=wp), intent(in) :: rcmin, rcoff + logical :: in_cutoff + + real(kind=wp) :: rlk + rlk=norm2(rk-rl) + + if (( rlk > rcoff).or.(rlk 0) then + if(allocated(rzeronode)) deallocate(rzeronode, hnode, gnode) + allocate(hnode(3, max_basisnum, node_num_l), gnode(3, max_basisnum, node_num_l), & + rzeronode(3, max_basisnum, node_num_l)) + end if + + if(atom_num > 0) then + if(allocated(rzeroatom)) deallocate(rzeroatom, hatom, gatom) + allocate(hatom(3,atom_num_l), gatom(3,atom_num_l), rzeroatom(3, atom_num_l)) + end if + + end subroutine alloc_min_arrays + + subroutine grow_at_min_arrays(seg_real) + !Grow atom min arrays + integer, intent(in), optional :: seg_real + + integer :: seg_num + real(kind=wp), allocatable :: r_array(:,:), g_array(:,:), h_array(:,:) + + + if(present(seg_real)) then + seg_num = seg_real + else + seg_num = 1024 + end if + + allocate(r_array(3, size(rzeroatom,2)+seg_num), g_array(3, size(gatom,2)+seg_num), h_array(3, size(hatom,2)+seg_num)) + + r_array=0.0_wp + g_array=0.0_wp + h_array=0.0_wp + + r_array(:,1:size(rzeroatom,2)) = rzeroatom + g_array(:,1:size(gatom,2)) = gatom + h_array(:,1:size(hatom,2)) = hatom + + call move_alloc(r_array, rzeroatom) + call move_alloc(g_array, gatom) + call move_alloc(h_array, hatom) + + return + end subroutine grow_at_min_arrays + + subroutine grow_ele_min_arrays(seg_real) + !Grow atom min arrays + integer, intent(in), optional :: seg_real + + integer :: seg_num + real(kind=wp), allocatable :: r_array(:,:,:), g_array(:,:,:), h_array(:,:,:) + + if(present(seg_real)) then + seg_num = seg_real + else + seg_num = 1024 + end if + + allocate(r_array(3, max_basisnum, size(rzeronode,3)+seg_num), & + g_array(3, max_basisnum, size(gnode,3)+seg_num), h_array(3, max_basisnum, size(hnode,3)+seg_num)) + + r_array=0.0_wp + g_array=0.0_wp + h_array=0.0_wp + + r_array(:,:,1:size(rzeronode,3)) = rzeronode + g_array(:,:,1:size(gnode,3)) = gnode + h_array(:,:,1:size(hnode,3)) = hnode + + call move_alloc(r_array, rzeronode) + call move_alloc(g_array, gnode) + call move_alloc(h_array, hnode) + + return + end subroutine grow_ele_min_arrays + + + + subroutine pack_atom_cg(r0,g,h, send_cg) + !This subroutine packs the atom information for conjugate gradient minimization + real(kind=wp), dimension(3), intent(in) :: r0, g, h + real(kind=wp), dimension(:), intent(out) :: send_cg + + integer :: i, k + + !Now pack send_cg + k = 1 + do i = 1,3 + send_cg(k) = r0(i) + k=k+1 + send_cg(k) = g(i) + k=k+1 + send_cg(k) = h(i) + k=k+1 + end do + + return + + end subroutine pack_atom_cg + + subroutine unpack_atom_cg(recv_cg, r0, g, h) + !This subroutine unpacks the atom information for cg minimization + real(kind=wp), dimension(:), intent(in) :: recv_cg + real(kind=wp), dimension(3), intent(out) :: r0, g, h + + integer :: i, k + + !Now unpack recv_cg + k = 1 + do i = 1,3 + r0(i) = recv_cg(k) + k=k+1 + g(i) = recv_cg(k) + k=k+1 + h(i) = recv_cg(k) + k=k+1 + end do + + return + + end subroutine unpack_atom_cg + + subroutine pack_ele_cg(n, b, r0,g,h, send_cg) + !This subroutine packs the atom information for conjugate gradient minimization + integer, intent(in) :: n, b + real(kind=wp), dimension(3, max_basisnum, ng_max_node), intent(in) :: r0, g, h + real(kind=wp), dimension(:), intent(out) :: send_cg + + integer :: i, k, inod, ibasis + + !Now pack send_cg + k = 1 + do inod = 1, n + do ibasis = 1, b + do i = 1,3 + send_cg(k) = r0(i,ibasis,inod) + k=k+1 + send_cg(k) = g(i,ibasis,inod) + k=k+1 + send_cg(k) = h(i,ibasis,inod) + k=k+1 + end do + end do + end do + + return + end subroutine pack_ele_cg + + subroutine unpack_ele_cg(n, b, recv_cg, r0, g, h) + !This subroutine packs the atom information for conjugate gradient minimization + integer, intent(in) :: n, b + real(kind=wp), dimension(:), intent(in) :: recv_cg + real(kind=wp), dimension(3, max_basisnum, ng_max_node), intent(out) :: r0, g, h + + integer :: i, k, inod, ibasis + + !Now pack send_cg + k = 1 + do inod = 1, n + do ibasis = 1, b + do i = 1,3 + r0(i,ibasis,inod)=recv_cg(k) + k=k+1 + g(i,ibasis,inod)=recv_cg(k) + k=k+1 + h(i,ibasis,inod)=recv_cg(k) + k=k+1 + end do + end do + end do + + return + end subroutine unpack_ele_cg +end module min_arrays diff --git a/src/minimize.f90 b/src/minimize.f90 new file mode 100644 index 0000000..e0f0d40 --- /dev/null +++ b/src/minimize.f90 @@ -0,0 +1,309 @@ +module minimize + + !This code contains the code needed to run the various types of dynamics + use parameters + use fire + use cg + use neighbors + use forces + use comms + use potential + use dump + use thermo + use errors + use logger + use debug + + implicit none + + real(kind=wp), save :: force_tol, energy_tol + integer :: min_style, max_iter, reset_num + real(kind=wp), parameter :: eps_energy = 1d-8 + + private :: pre_iter, post_iter, iterate + public + contains + + subroutine min_defaults + !Set defaults + min_style = 1 + end subroutine min_defaults + + subroutine parse_min_style(line) + !parse the run command + character(len = *), intent(in) :: line + character(len=read_len) :: tmptxt, min_string, args(20) + integer ::iospara, j, i + real(kind=wp) :: val + + read(line, *, iostat = iospara) tmptxt, min_string + if (iospara > 0) call read_error("Invalid read of min_string from min_style command", iospara) + + select case(min_string) + case('fire', 'FIRE') + call fire_defaults + min_style = 1 + j = tok_count(line) + if(j > 2) then + read(line, *) args(1:j) + i = 3 + do while(i <= j) + select case(args(i)) + case('alpha0', 'tmax', 'tmin') + i = i + 1 + !Read param val + read(args(i), *) val + !set param val + call set_fire_param(args(i-1),val) + case default + write(tmptxt, *) "Cannot pass argument ", trim(adjustl(args(i))), " to min_style with style cg" + call misc_error(tmptxt) + end select + i = i + 1 + end do + end if + case('cg', 'CG') + call command_error('Conjugate gradient not correctly working in this implementation of CAC') + call cg_defaults + min_style = 2 + !Now parse additional options for cg + j = tok_count(line) + if(j > 2) then + read(line, *) args(1:j) + i = 3 + do while(i <= j) + select case(args(i)) + case('reset') + i = i + 1 + !Read the number of iterations between resets + read(args(i), *) reset_num + case default + write(tmptxt, *) "Cannot pass argument ", trim(adjustl(args(i))), " to min_style with style cg" + call misc_error(tmptxt) + end select + i = i + 1 + end do + end if + case default + call command_error("Min_style "//trim(adjustl(min_string))//" is not currently accepted as an option for dynamics") + end select + + end subroutine parse_min_style + + subroutine parse_minimize(line) + character(len=*), intent(in) :: line + character(len=read_len) :: tmptxt + integer :: iospara + + read(line, *, iostat = iospara) tmptxt, energy_tol, force_tol, max_iter + if (iospara > 0) call read_error("Invalid read of minimize command", iospara) + + if ((force_tol < 0)) then + write(tmptxt, *) "Force tolerance ", force_tol, " should not be less than zero" + call misc_error(tmptxt) + end if + if ((energy_tol< 0)) then + write(tmptxt, *) "Energy tolerance ", energy_tol, " should not be less than zero" + call misc_error(tmptxt) + end if + if ((max_iter<= 0)) then + write(tmptxt, *) "max_iter", max_iter, " should not be less than or equal to zero" + call misc_error(tmptxt) + end if + + begin_step = iter + call run_min + end subroutine parse_minimize + + subroutine run_min + !This subroutine actually runs the minimization + integer ::i, exit_cond, delay, nevals + real(kind=wp) :: old_energy, old_f_norm, pe, fnorm + character(len=read_len) :: msg + + + !Init neighbor list and do initial calc + if (nei_init) then + call update_neighbor(iter, .false.,.true.) + else + if(ele_num > 0) call ghost_cg + if(atom_num> 0) call ghost_at + call neighbor_lists + end if + call update_force + + !Initialize minimizer + delay = 0 + select case(min_style) + case(1) + !Check to make sure that the timestep has been set + if (is_equal(time_step, 0.0_wp)) then + call misc_error("Time step must be set prior to calling minimize with min_style fire") + end if + call fire_init + delay = get_fire_delaystep() + !Initialize velocities to 0 + if(node_num_l > 0) vel(:,:,:) = 0 + if(atom_num_l > 0) vel_atom(:,:) = 0 + case(2) + call cg_init + end select + + if(first_run) call write_dump(iter, .true.) + call write_thermo_style + call write_thermo_out(iter) + + exit_cond = 0 + + pe = compute_pe(1, .true.) + fnorm = compute_fnorm(1) + do i = 1, max_iter + + iter = iter + 1 + !Save old energies + old_energy=pe + old_f_norm =fnorm + + !Call pre_iterate + call pre_iter(i) + + !Call iterate + call iterate(i, exit_cond) + + if(min_style == 2) then + call update_neighbor(iter, .true.) + call update_force + + !Now check to see if we want to reset the conjugate direction. This may slow down the convergance, but may be + !useful for finite elements + if(reset_num > 0) then + if(mod(i, reset_num) == 0) then + call reset_cg_dir + end if + end if + end if + + !Exit if the minimizer tells us to + if(exit_cond > 0) exit + + !Check tolerances + if (i > delay) then + pe = compute_pe(1, .true.) + fnorm = compute_fnorm(1) + if(abs(old_energy-pe)< energy_tol*0.5_wp*(abs(pe)+abs(old_energy)+eps_energy)) then + exit_cond = 1 + exit + else if (fnorm < force_tol) then + exit_cond = 2 + exit + end if + end if + + !Call post iterate + call post_iter(i) + end do + + !Compute virial + need_virial = .true. + call update_force + + !Post completion thermo and dump + call write_thermo_out(iter) + call write_dump(iter, .true.) + + !Post minimizer message to user + !First print the exit conditions + if(exit_cond < 3) then + select case(exit_cond) + case(0) + write(msg, *) "Max Iterations" + case(1) + write(msg, *) "Energy Tolerance" + case(2) + write(msg, *) "Force Tolerance" + end select + else + select case(min_style) + case(1) + if(exit_cond == 3) write(msg, *) "Max P < 0 iterations" + case(2) + if(exit_cond == 4) write(msg, *) "Exit because search direction isn't downhill" + + if(exit_cond == 5) write(msg, *) "All search direction components are 0" + + if(exit_cond == 6) write(msg, *) "Alpha is equal to 0" + + if(exit_cond == 11) write(msg, *) "No change to force between line search iterations" + end select + end if + call log_msg("Minimizer out with stopping condition: "//trim(adjustl(msg))) + + !Now print number of evaluations and iterations and clean up + select case(min_style) + case(1) + nevals = get_fire_neval() + call fire_clean + case(2) + nevals = get_cg_neval() + call cg_clean + end select + write(msg,*) "Total number of iterations: ", i + call log_msg(msg) + write(msg, *) "Total number of force evals: ", nevals + call log_msg(msg) + + write(msg, *) "Final and second-to-last energies: ", pe, old_energy + call log_msg(msg) + + call log_neighbor_info + + first_run = .false. + end subroutine run_min + + subroutine pre_iter(i) + !This subroutine is run before each dynamics timeiter is calculated + + !i is the current iter number + integer, intent(in) ::i + + !If we are dumping this timeiter then we need to calculate the virial stress + if(need_dump(i)) need_virial = .true. + if((mod(i,thermo_every)==0).and.need_p) need_virial = .true. + return + end subroutine pre_iter + + subroutine post_iter(i) + integer, intent(in) :: i + + integer :: pre_atom_num + !This subroutine is run after the dynamics timeiter is calculated + !i is the current iter number + + !Dump if we need it + call write_dump(i) + + !Thermo if we need it + if(mod(i,thermo_every)==0) call write_thermo_out(iter) + + !Debug if we need it + if(dflag) call run_debug + + need_virial = .false. + return + end subroutine post_iter + + subroutine iterate(i, mincode) + !This subroutine calls the correct dynamics to run + !i is the current iter number + integer, intent(in) :: i + integer, intent(out) :: mincode + select case(min_style) + case(1) + call fire_iterate(i, mincode) + case(2) + call cg_iterate(mincode) + end select + + end subroutine iterate + +end module minimize diff --git a/src/modify.f90 b/src/modify.f90 new file mode 100644 index 0000000..cd1ed78 --- /dev/null +++ b/src/modify.f90 @@ -0,0 +1,48 @@ +module modify + !This module is in charge of code which changes various aspects of current running program, mainly regarding options + + use parameters + use neighbors + use elements + use potential + + implicit none + + public + contains + + subroutine parse_modify(line) + character(len=*), intent(in) :: line + + integer :: i, acount, iospara + character(len=read_len) :: args(20), msg + logical :: l + + acount = tok_count(line) + + read(line,*) (args(i), i=1, acount) + + i = 2 + do while (i < acount) + select case(args(i)) + case('atomap_neighbors') + i=i+1 + read(args(i), *, iostat=iospara, iomsg=msg) l + if(iospara > 0) call read_error(msg, iospara) + if(l) then + atomap_neighbors = .true. + else + atomap_neighbors = .false. + end if + + case default + write(msg, *) "Argument ", args(i), " not accepted in modify command" + end select + + i = i + 1 + end do + + + end subroutine parse_modify + +end module modify diff --git a/src/morse.f90 b/src/morse.f90 new file mode 100644 index 0000000..f5d95da --- /dev/null +++ b/src/morse.f90 @@ -0,0 +1,268 @@ +module morse + + !subroutines for pairtab potential style + + use parameters + use math + use forces + use elements + use integration + use neighbors + use comms + use errors + use atom_types + + implicit none + + real(kind=wp), allocatable, private, save :: pair_spline(:,:,:) + real(kind=wp), private, dimension(max_atom_types, max_atom_types), save :: pair_cutoffsq, d0, alpha, r0, morse1 + + public + contains + + subroutine add_morse_potential(type1, type2, ind0, inalpha, inr0, incutoff) + integer, intent(in) :: type1, type2 + real(kind=wp), intent(in) :: ind0, inalpha, inr0, incutoff + + integer :: i,j, mn + + d0(type1, type2) = ind0 + d0(type2, type1) = ind0 + alpha(type1, type2) = inalpha + alpha(type2, type1) = inalpha + r0(type1, type2) = inr0 + r0(type2, type1) = inr0 + pair_cutoffsq(type1, type2)=incutoff*incutoff + pair_cutoffsq(type2, type1)=incutoff*incutoff + morse1(type1, type2) = 2*inalpha*ind0 + morse1(type2, type1) = 2*inalpha*ind0 + + if((types_to_pot_type(type1, type2) > 0).or.(types_to_pot_type(type2,type1) > 0)) then + call misc_error("Can't define multiple potentials for single pair interaction") + end if + types_to_pot_type(type1,type2)=ibset(types_to_pot_type(type1,type2),2) + types_to_pot_type(type2,type1)=ibset(types_to_pot_type(type2,type1),2) + + if(.not.def_nei) then + def_nei=.true. + rc_neigh=incutoff + else + rc_neigh=max(rc_neigh, incutoff) + end if + + end subroutine add_morse_potential + + subroutine update_force_morse + integer :: i, ia, ie, iep, jep, ja, iatomap, jatomap, iatom, latomtype, katomtype, nei, mn_list, ibasis, ic, inod, ip + real(kind=wp) :: rl(3), rk(3), rlk(4), dr, d_exp, pair, flk(3), eshape, rsq + + real(kind=wp), dimension(max_basisnum*max_intpo_num, ele_num_l) :: energy_intpo + real(kind=wp), dimension(3, max_basisnum*max_intpo_num, ele_num_l) :: force_intpo + real(kind=wp), dimension(3,3, max_basisnum*max_intpo_num, ele_num_l) :: virial_intpo + + mn_list=0 + do i = 1,2 + if(potential_types(i)) mn_list=mn_list+1 + end do + energy_intpo=0.0_wp + force_intpo=0.0_wp + virial_intpo=0.0_wp + + !First calculate energy/force for atoms + if (atom_num > 0) then + do ia=1, atom_num_l + rl(:)=r_atom(:, ia) + latomtype=type_atom(ia) + + do nei = 1, n_at_at(ia, mn_list) + ja= at_nei_at(nei,ia,mn_list) + rk=r_atom(:, ja) + katomtype=type_atom(ja) + + rlk(1:3)=rl-rk + rsq = rlk(1)*rlk(1) + rlk(2)*rlk(2) + rlk(3)*rlk(3) + if(rsq < pair_cutoffsq(latomtype, katomtype)) then + rlk(4)=sqrt(rsq) + dr=rlk(4)-r0(latomtype, katomtype) + d_exp=exp(-alpha(latomtype, katomtype)*dr) + flk=rlk(1:3)*(morse1(latomtype, katomtype)*(d_exp*d_exp-d_exp)/rlk(4)) + pair=d0(latomtype, katomtype)*(d_exp*d_exp-2.0_wp*d_exp) + + energy_atom(ia)=energy_atom(ia)+pair/2.0_wp + + force_atom(:, ia) = force_atom(:, ia) + flk + + if(need_virial) then + do ic = 1, 3 + virial_atom(:, ic, ia) = virial_atom(:, ic, ia) + flk(:) * rlk(ic) / 2.0_wp + end do + end if + if (ja <= atom_num_l) then + energy_atom(ja)=energy_atom(ja) + pair/2.0_wp + force_atom(:, ja) = force_atom(:, ja) - flk + if(need_virial) then + do ic = 1, 3 + virial_atom(:, ic, ja) = virial_atom(:, ic, ja) + flk(:) * rlk(ic) / 2.0_wp + end do + end if + end if + end if + end do + do nei=1, n_at_cg(ia, mn_list) + ja = at_nei_cg(nei, ia, mn_list) + rk(:) = r_atomap(:, ja) + katomtype = type_atomap(ja) + + rlk(1:3) = rl - rk + rsq = rlk(1)*rlk(1) + rlk(2)*rlk(2) + rlk(3)*rlk(3) + if(rsq < pair_cutoffsq(latomtype, katomtype)) then + rlk(4)=sqrt(rsq) + dr=rlk(4)-r0(latomtype, katomtype) + d_exp=exp(-alpha(latomtype, katomtype)*dr) + flk=rlk(1:3)*(morse1(latomtype, katomtype)*(d_exp*d_exp-d_exp)/rlk(4)) + pair=d0(latomtype, katomtype)*(d_exp*d_exp-2.0_wp*d_exp) + energy_atom(ia)=energy_atom(ia)+pair/2.0_wp + force_atom(:, ia) = force_atom(:, ia) + flk + + if(need_virial) then + do ic = 1, 3 + virial_atom(:, ic, ia) = virial_atom(:, ic, ia) + flk(:) * rlk(ic) / 2.0_wp + end do + end if + + if(ja <= atomap_num_l) then + if(atomap_to_intpo(1,ja)/=0) then + jep = atomap_to_intpo(1,ja) + ie = atomap_to_intpo(2,ja) + + force_intpo(:,jep,ie) = force_intpo(:,jep,ie) - flk(:) + if(need_virial)then + do ic = 1, 3 + virial_intpo(:, ic, jep,ie) = virial_intpo(:, ic, jep,ie) + flk(:) * rlk(ic) / 2.0_wp + end do + end if + energy_intpo(jep,ie) = energy_intpo(jep,ie) + pair/2.0_wp + end if + + end if + end if + end do + end do + end if + + if(ele_num_l > 0) then + do ie=1, ele_num_l + do iep=1, intpo_count(itype(ie)) + do ibasis=1, basis_num(ie) + jep=basis_num(ie)*(iep-1)+ibasis + if(who_has_intpo(iep, ie)) then + iatom = atom_intpo(iep, size_to_shape(size_ele(ie)), itype(ie)) + iatomap = cg_atomap(basis_num(ie)*(iatom-1) + ibasis, ie) + latomtype = type_atomap(iatomap) + rl(:) = r_atomap(:, iatomap) + + do nei=1, n_cg_cg(jep, ie, mn_list) + ja=cg_nei_cg(nei, jep, ie, mn_list) + + rk(:) = r_atomap(:, ja) + katomtype = type_atomap(ja) + + rlk(1:3) = rl - rk + rsq = rlk(1)*rlk(1) + rlk(2)*rlk(2) + rlk(3)*rlk(3) + if(rsq < pair_cutoffsq(latomtype,katomtype)) then + + rlk(4) = sqrt(rsq) + + dr=rlk(4)-r0(latomtype, katomtype) + d_exp=exp(-alpha(latomtype, katomtype)*dr) + flk=rlk(1:3)*(morse1(latomtype, katomtype)*(d_exp*d_exp-d_exp)/rlk(4)) + pair=d0(latomtype, katomtype)*(d_exp*d_exp-2.0_wp*d_exp) + + force_intpo(:, jep, ie) = force_intpo(:, jep, ie) + flk(:) + if(need_virial) then + do ic = 1, 3 + virial_intpo(:, ic, jep, ie) = virial_intpo(:, ic, jep, ie) + flk(:) * rlk(ic) / 2.0_wp + end do + end if + energy_intpo(jep, ie) = energy_intpo(jep, ie)+ pair/2.0_wp + end if + end do + + do nei=1, n_cg_at(jep, ie, mn_list) + + ja = cg_nei_at(nei, jep, ie, mn_list) + rk(:) = r_atom(:, ja) + katomtype = type_atom(ja) + + rlk(1:3) = rl - rk + rsq = rlk(1)*rlk(1) + rlk(2)*rlk(2) + rlk(3)*rlk(3) + if(rsq < pair_cutoffsq(latomtype,katomtype)) then + rlk(4) = sqrt(rsq) + + dr=rlk(4)-r0(latomtype, katomtype) + d_exp=exp(-alpha(latomtype, katomtype)*dr) + flk=rlk(1:3)*(morse1(latomtype, katomtype)*(d_exp*d_exp-d_exp)/rlk(4)) + pair=d0(latomtype, katomtype)*(d_exp*d_exp-2.0_wp*d_exp) + + force_intpo(:, jep, ie) = force_intpo(:, jep, ie) + flk(:) + if(need_virial) then + do ic = 1, 3 + virial_intpo(:, ic, jep, ie) = virial_intpo(:, ic, jep, ie) + flk(:) * rlk(ic) / 2.0_wp + end do + end if + energy_intpo(jep, ie) = energy_intpo(jep, ie)+ pair/2.0_wp + + end if + end do + end if + end do + end do + + do iep= 1, intpo_count(itype(ie)) + do ibasis=1, basis_num(ie) + jep=basis_num(ie)*(iep-1)+ibasis + iatom=atom_intpo(iep, size_to_shape(size_ele(ie)), itype(ie)) + iatomap=cg_atomap(basis_num(ie)*(iatom-1)+ ibasis, ie) + + if(who_has_intpo(jep, ie).eqv..true.) then + force_intpo(:, jep, ie) = force_intpo(:, jep, ie) & + * weight_intpo(iep, size_to_shape(size_ele(ie)), itype(ie)) + + if(need_virial) then + virial_intpo(:, :, jep, ie) = virial_intpo(:, :, jep, ie) & + * weight_intpo(iep, size_to_shape(size_ele(ie)), itype(ie)) + end if + energy_intpo(jep, ie) = energy_intpo(jep, ie) & + * weight_intpo(iep, size_to_shape(size_ele(ie)), itype(ie)) + end if + end do + end do + + do inod = 1, ng_node(etype(ie)) + ip = cg_node(inod, ie) + do iep = 1, intpo_count(itype(ie)) + do ibasis = 1, basis_num(ie) + jep = basis_num(ie)*(iep-1) + ibasis + if(who_has_intpo(iep, ie).eqv..true.) then + + iatom = atom_intpo(iep, size_to_shape(size_ele(ie)), itype(ie)) + eshape = a_interpo(inod, iatom, size_to_shape(size_ele(ie)), etype(ie)) + force(:, ibasis, ip ) = force(:, ibasis, ip) + eshape * force_intpo(:, jep, ie) + + if(need_virial) then + virial(:, :, ibasis, ip) = virial(:, :, ibasis, ip) & + + eshape * virial_intpo(:, :, jep, ie) + end if + + energy(ibasis, ip) = energy(ibasis, ip) + eshape*energy_intpo(jep, ie) + + end if + end do + end do + end do + + end do + end if + end subroutine update_force_morse + +end module morse diff --git a/src/neighbors.f90 b/src/neighbors.f90 new file mode 100644 index 0000000..b85fdfc --- /dev/null +++ b/src/neighbors.f90 @@ -0,0 +1,2413 @@ +module neighbors + !This contains all of the neighbor running code + use parameters + use integration + use math + use comms + use logger + use forces + use elements + use group + use temp + use min_arrays + + implicit none + + logical :: reneighbored, nei_init + + !Variables associated ith the actual linked cell list data structures + integer, private :: num_cell(3), cell_num, num_cell_max, cell_atomap_lim, cell_atom_lim, delay, last_reneighbor + integer, allocatable :: cell_atomap(:,:), cell_atom(:,:), num_cell_atomap(:), num_cell_atom(:), v_cell_nei(:), & + num_celldiv_atpo(:,:), cell_neighbor(:,:), which_cell_intpo(:,:), num_neighbor_cell(:), & + which_cell_atom(:), which_cell_atomap(:) + logical, allocatable :: bound_cell(:,:) + real(kind = wp) :: rc_bin + + !Variables for the actual neighbor lists + integer, private :: nei_lim, builds, nei_lists + integer, allocatable :: cg_nei_at(:,:,:,:), n_cg_at(:,:,:), cg_nei_cg(:,:,:,:), n_cg_cg(:,:,:), & + at_nei_at(:,:,:), n_at_at(:,:), at_nei_cg(:,:,:), n_at_cg(:,:), & + atomap_nei_cg(:,:,:), n_atomap_cg(:,:), atomap_nei_at(:,:,:), n_atomap_at(:, :) + + integer, save, dimension(max_pot_types):: neilist_to_pot + logical :: def_nei, neis_updated, atomap_neighbors, need_all_step + + !Keep position of atoms and elements last time they were checked + real(kind=wp), allocatable :: r_update(:,:,:), r_atom_update(:,:) + + public + contains + + +subroutine neighbors_defaults + !set default values + rc_neigh = 0.0_wp + def_nei = .false. + rc_bin = 1.0_wp + neis_updated = .false. + delay = 1 + last_reneighbor = 0 + builds = 0 + atomap_neighbors = .false. + nei_init=.false. + need_all_step=.true. +end subroutine neighbors_defaults + +subroutine parse_neighbors(line) + !Parse neighbor command to get neighbor rc_neigh + character(len = *), intent(in) :: line + character(len = read_len) :: label, args(10), msg + integer :: iospara, i, j + + read(line, *, iostat = iospara) label, rc_bin + + if(.not.def_nei) then + call misc_error("Please define potential before running neighbor command") + else if (def_nei) then + call set_rc_neigh(rc_neigh+rc_bin) + end if + + !Read additional arguments + j = tok_count(line) + if (j > 2) then + read(line, *, iostat = iospara) args + i = 3 + do while(i <= j) + select case(args(i)) + case('delay') + i = i + 1 + read(args(i), *) delay + if(delay < 0) then + call command_error("Delay must be greater than 0 for neighbor command") + end if + i=i+1 + write(msg, *) "Neighbor delay set to ", delay + call log_msg(msg) + case('no_need_all_step') + need_all_step=.false. + case('need_all_step') + need_all_step=.true. + case default + write(msg, *) "Keyword ", trim(adjustl(args(i))), " is not accepted in neighbor command" + call command_error(msg) + end select + end do + end if + + return +end subroutine parse_neighbors + + +subroutine set_rc_neigh(val) + real(kind = wp), intent(in) :: val + character(len = read_len) :: msg + + rc_neigh = val + write(msg, *) "Neighbor list cutoff distance set to ", rc_neigh + call log_msg(msg) + + !Now call the processor boundary code to set up the ghost boundaries + call processor_bds +end subroutine set_rc_neigh + +subroutine print_neighbor_info + integer :: iep, ie + integer :: max_nei, min_nei, max_neime, min_neime + real(kind = wp) :: avg_neime, avg_nei + character(len=read_len) :: msg + !This subroutine just prints some information about the neighbor lists + + !Get min, max, and average number of neighbors + min_neime = minval(n_at_at(1:atom_num_l, :)+n_at_cg(1:atom_num_l, :)) + max_neime = maxval(n_at_at(1:atom_num_l, :)+n_at_cg(1:atom_num_l, :)) + avg_neime = sum(n_at_at(1:atom_num_l, :)+n_at_cg(1:atom_num_l, :)) + + do ie = 1, ele_num_l + do iep = 1, intpo_count(itype(ie))*basis_num(ie) + if(who_has_intpo(iep, ie) ) then + min_neime = min(min_neime, minval(n_cg_at(iep, ie, :)+n_cg_cg(iep,ie, :))) + max_neime = max(max_neime, maxval(n_cg_at(iep,ie, :)+n_cg_cg(iep,ie, :))) + avg_neime = avg_neime + sum(n_cg_at(iep, ie, :) + n_cg_cg(iep, ie, :)) + end if + end do + end do + + !Now reduce them + avg_neime = real(avg_neime,wp)/real(atom_num+intpo_num,wp) + + call mpi_reduce(min_neime, min_nei, 1, mpi_integer, mpi_min, root, world, ierr) + call mpi_reduce(max_neime, max_nei, 1, mpi_integer, mpi_max, root, world, ierr) + call mpi_reduce(avg_neime, avg_nei, 1, mpi_wp, mpi_sum, root, world, ierr) + + if(rank == root) then + write(msg, *) "Min, max, and average neighbor counts are:", min_nei, max_nei, avg_nei + call log_msg(msg) + + write(msg, *) "Neighbor list cutoff is: ", rc_neigh + call log_msg(msg) + end if +end subroutine print_neighbor_info + +subroutine update_arrays + !This subroutine allocates the update arrays if necessary + if(node_num_l > 0) then + !Deallocate if our arrays grew + if(allocated(r_update)) then + if(node_num_l > size(r_update,3)) deallocate(r_update) + end if + + !Allocate if necessary + if(.not.allocated(r_update)) then + allocate(r_update(3,max_basisnum, node_num_lr), stat=allostat) + if(allostat > 0 ) call alloc_error("Failure allocating r_update in update_arrays", allostat) + end if + + !Assign r_update + r_update(:,:,1:node_num_l) = r(:,:,1:node_num_l) + end if + + !This subroutine allocates the update arrays if necessary + if(atom_num_l > 0) then + !Deallocate if our arrays grew + if(allocated(r_atom_update)) then + if (atom_num_l > size(r_atom_update,2)) deallocate(r_atom_update) + end if + + !Allocate if necessary + if(.not.allocated(r_atom_update)) then + allocate(r_atom_update(3,atom_num_lr)) + !if(allostat > 0 ) call alloc_error("Failure allocating r_atom_update in update_arrays", allostat) + end if + + !Assign r_update + r_atom_update(:,1:atom_num_l) = r_atom(:,1:atom_num_l) + end if +end subroutine update_arrays + +subroutine neighbor_lists + real(kind = wp) :: t_start, t_end + !This code initializes neighbor lists + !If init == 0 then that means this is not the first time this was called + ! whereas if init == 1 then it is the first time this function is called + integer :: i, j + + !Start timer + t_start = mpi_wtime() + + !First call cell initialization functions + if(allocated(which_cell_intpo).or.allocated(which_cell_atom)) call dealloc_cell + call cell_init + call update_cell + + !Allocate arrays as needed + nei_lim = 100 + if (.not. nei_init) then + j=1 + neilist_to_pot=0 + do i = 1, max_pot_types + if(potential_types(i)) then + neilist_to_pot(j)=i + j=j+1 + end if + end do + end if + if(nei_init) call dealloc_nei_arrays + call alloc_nei_arrays + + if(ele_num_l > 0) then + needed_atomap=.false. + call neighbor_list_cg + end if + + if(atom_num_l> 0) call neighbor_list_at + + if(atomap_neighbors) then + call neighbor_list_atomap + else + !If we aren't fully calculating the cluster potential then we just set everything to false again + if(ele_num_l > 0) needed_atomap=.false. + end if + + + !Assign update arrays + call update_arrays + + !Get original centroid and log neighbor info + if(.not.nei_init) then + call get_centroid(center_mass_ori) + call print_neighbor_info + end if + + t_end = mpi_wtime() + + walltime(1) = walltime(1) + (t_end - t_start) + + !Increment neighbor list + builds = builds + 1 + nei_init=.true. + + return +end subroutine neighbor_lists + +subroutine alloc_nei_arrays + !Allocate neighbor arrays + integer :: i, j + nei_lists=0 + j=1 + do i = 1, max_pot_types + if(potential_types(i)) then + nei_lists = nei_lists+1 + neilist_to_pot(j)=i + j=j+1 + end if + end do + + if(ele_num_l > 0) allocate( cg_nei_cg(100, max_basisnum*max_intpo_num, ele_num_l, nei_lists), & + n_cg_cg(max_basisnum*max_intpo_num, ele_num_l, nei_lists), & + cg_nei_at(100, max_basisnum*max_intpo_num, ele_num_l, nei_lists), & + n_cg_at(max_basisnum*max_intpo_num, ele_num_l, nei_lists), & + needed_atomap(atomap_num_l), & + stat = allostat) + + if(atom_num_l > 0) allocate(at_nei_at(100, atom_num_l, nei_lists), & + n_at_at(atom_num_l, nei_lists), & + at_nei_cg(100, atom_num_l, nei_lists), & + n_at_cg(atom_num_l, nei_lists), & + stat = allostat) + + if(atomap_neighbors) allocate(atomap_nei_cg(100, atomap_num_l, nei_lists), & + n_atomap_cg(atomap_num_l, nei_lists), & + atomap_nei_at(100, atomap_num_l, nei_lists), & + n_atomap_at(atomap_num_l, nei_lists), & + stat = allostat) + + return +end subroutine alloc_nei_arrays + +subroutine dealloc_nei_arrays + !deallocate neighbor arrays + + if(ele_num_l > 0) deallocate(cg_nei_cg, n_cg_cg, cg_nei_at, n_cg_at, needed_atomap, stat = allostat) + + if(atom_num_l > 0) deallocate(at_nei_at, n_at_at, at_nei_cg, n_at_cg, stat = allostat) + + if(atomap_neighbors) deallocate(atomap_nei_at, n_atomap_at, atomap_nei_cg, n_atomap_cg) + return +end subroutine dealloc_nei_arrays + +subroutine dealloc_cell + !Deallocate cell arrays + deallocate(cell_atomap, cell_atom, num_cell_atomap, num_cell_atom, num_celldiv_atpo, cell_neighbor, & + v_cell_nei, num_neighbor_cell, bound_cell, stat = allostat) + + !Deallocate atom/atomap location arrays + deallocate(which_cell_intpo, which_cell_atom, which_cell_atomap) + + return +end subroutine dealloc_cell + +subroutine cell_init + !This subroutine initializes various things required for the linked cell list neighbor listing code + integer :: i, v_cell_temp(3) + + !Calculate cell numbers along each dimension and total + do i = 1, 3 + num_cell(i) = int(pro_length_out(i)/rc_neigh) + if (num_cell(i) == 0) num_cell(i) = 1 + end do + cell_num = product(num_cell) + num_cell_max=maxval(num_cell)+1 + cell_atom_lim = 100 + cell_atomap_lim = 100 + + !Allocate cell arrays now + allocate( cell_atomap(100, cell_num), & + cell_atom(100, cell_num), & + num_cell_atomap(cell_num), & + num_cell_atom(cell_num), & + num_celldiv_atpo(num_cell_max, 3), & + cell_neighbor(27, cell_num), & + v_cell_nei(27), & + num_neighbor_cell(cell_num), & + bound_cell(6,27), & + stat = allostat) + + !Allocate atom/atomap location arrays + allocate( which_cell_intpo(max_intpo_num*max_basisnum, ele_num_l), & + which_cell_atom(atom_num_lg), which_cell_atomap(atomap_num_lg)) + + !NOw initialize some variables for listing the neighboring cells + v_cell_temp(1) = 1 + v_cell_temp(2) = num_cell(1) + v_cell_temp(3) = num_cell(2)*num_cell(1) + + v_cell_nei(:) = 0.0_wp + bound_cell(:,:) = .false. + + !Neighbors in x direction + v_cell_nei(1) = v_cell_temp(1) + bound_cell(2, 1) = .true. + + v_cell_nei(2) = -v_cell_temp(1) + bound_cell(1, 2) = .true. + + !y direction + + v_cell_nei(3) = v_cell_temp(2) + bound_cell(4, 3) = .true. + + v_cell_nei(4) = -v_cell_temp(2) + bound_cell(3, 4) = .true. + + !x-y plane + + v_cell_nei(5) = v_cell_nei(1)+v_cell_nei(3) + bound_cell(2, 5) = .true. + bound_cell(4, 5) = .true. + + v_cell_nei(6) = v_cell_nei(2)+v_cell_nei(3) + bound_cell(1, 6) = .true. + bound_cell(4, 6) = .true. + + v_cell_nei(7) = v_cell_nei(1)+v_cell_nei(4) + bound_cell(2, 7) = .true. + bound_cell(3, 7) = .true. + + v_cell_nei(8) = v_cell_nei(2)+v_cell_nei(4) + bound_cell(1, 8) = .true. + bound_cell(3, 8) = .true. + + v_cell_nei(9) = v_cell_temp(3) + bound_cell(6, 9) = .true. + + v_cell_nei(10) = v_cell_nei(1)+v_cell_nei(9) + bound_cell(2, 10) = .true. + bound_cell(6, 10) = .true. + + v_cell_nei(11) = v_cell_nei(2)+v_cell_nei(9) + bound_cell(1, 11) = .true. + bound_cell(6, 11) = .true. + + v_cell_nei(12) = v_cell_nei(3)+v_cell_nei(9) + bound_cell(4, 12) = .true. + bound_cell(6, 12) = .true. + + v_cell_nei(13) = v_cell_nei(4)+v_cell_nei(9) + bound_cell(3, 13) = .true. + bound_cell(6, 13) = .true. + + v_cell_nei(14) = v_cell_nei(5)+v_cell_nei(9) + bound_cell(2, 14) = .true. + bound_cell(4, 14) = .true. + bound_cell(6, 14) = .true. + + v_cell_nei(15) = v_cell_nei(6)+v_cell_nei(9) + bound_cell(1, 15) = .true. + bound_cell(4, 15) = .true. + bound_cell(6, 15) = .true. + + v_cell_nei(16) = v_cell_nei(7)+v_cell_nei(9) + bound_cell(2, 16) = .true. + bound_cell(3, 16) = .true. + bound_cell(6, 16) = .true. + + v_cell_nei(17) = v_cell_nei(8)+v_cell_nei(9) + bound_cell(1, 17) = .true. + bound_cell(3, 17) = .true. + bound_cell(6, 17) = .true. + + !lower x-y plane + + v_cell_nei(18) = -v_cell_temp(3) + bound_cell(5, 18) = .true. + + v_cell_nei(19) = v_cell_nei(1)+v_cell_nei(18) + bound_cell(2, 19) = .true. + bound_cell(5, 19) = .true. + + v_cell_nei(20) = v_cell_nei(2)+v_cell_nei(18) + bound_cell(1, 20) = .true. + bound_cell(5, 20) = .true. + + v_cell_nei(21) = v_cell_nei(3)+v_cell_nei(18) + bound_cell(4, 21) = .true. + bound_cell(5, 21) = .true. + + v_cell_nei(22) = v_cell_nei(4)+v_cell_nei(18) + bound_cell(3, 22) = .true. + bound_cell(5, 22) = .true. + + v_cell_nei(23) = v_cell_nei(5)+v_cell_nei(18) + bound_cell(2, 23) = .true. + bound_cell(4, 23) = .true. + bound_cell(5, 23) = .true. + + v_cell_nei(24) = v_cell_nei(6)+v_cell_nei(18) + bound_cell(1, 24) = .true. + bound_cell(4, 24) = .true. + bound_cell(5, 24) = .true. + + v_cell_nei(25) = v_cell_nei(7)+v_cell_nei(18) + bound_cell(2, 25) = .true. + bound_cell(3, 25) = .true. + bound_cell(5, 25) = .true. + + v_cell_nei(26) = v_cell_nei(8)+v_cell_nei(18) + bound_cell(1, 26) = .true. + bound_cell(3, 26) = .true. + bound_cell(5, 26) = .true. + + call update_cell_neighbor +end subroutine cell_init + +subroutine update_cell_neighbor + !This code gets the neighboring cells to build the cell_neighbor list + + integer :: i, ice, jce, nei, nei_real + integer, dimension(27) :: cell_neighbor_temp, cell_neighbor_real + + !cell_neighbor and num_neighbor_cell + cell_neighbor(:, :) = 0 + num_neighbor_cell(:) = 0 + + do ice = 1, cell_num + + nei = 0 + cell_neighbor_temp(:) = 0 + + do i = 1, 27 + jce = ice + v_cell_nei(i) + + if((jce > 0).and.(jce <= cell_num)) then + nei = nei + 1 + cell_neighbor_temp(nei) = jce + end if + end do + + call delete_duplicate(cell_neighbor_temp, 27, cell_neighbor_real, nei_real) + + if(nei_real == 0) then + print *, 'Error: Cell', ice, ' does not have neighboring cell' + call mpi_abort(mpi_comm_world, 1, ierr) + end if + + cell_neighbor(1:nei_real, ice) = cell_neighbor_real(1:nei_real) + num_neighbor_cell(ice) = nei_real + + end do + + return +end subroutine update_cell_neighbor + +subroutine update_cell + !Sort atom and atomaps to cells + + integer :: i, j, ia, ie, iep, ice, iatom, iatomap, ibasis + integer, dimension(3) :: num_index + integer, allocatable :: cell_atomp_array(:, :), num_cell_intpo(:) + + !coarse-grained domain + if(ele_num /= 0) then + + !put atomap into cells + cell_atomap(:, :) = 0 + which_cell_atomap(:) = 0 + num_cell_atomap(:) = 0 + + do iatomap = 1, atomap_num_lg + num_index(:) = 0 + + do i = 1, 3 + num_index(i) = int((r_atomap(i, iatomap) - pro_bd_out(2*i-1))/rc_neigh) + if(num_index(i) > num_cell(i)) then + + print *, 'Error: Atomap', iatomap, ' of rank', rank, ' with position', & + r_atomap(i, iatomap), ' has wrong cell index', & + num_index(i), ' along', i, & + ' direction which is larger than', num_cell(i), ' .', & + ' The cell boundaries are', pro_bd_out(2*i-1:2*i) + call mpi_abort(mpi_comm_world, 1, ierr) + + else if(num_index(i) == num_cell(i)) then + num_index(i) = num_cell(i) - 1 + + end if + end do + + ice = 1 + num_index(1) + num_index(2) * num_cell(1) + num_index(3) * num_cell(1) * num_cell(2) + + if((ice < 1).or. (ice > cell_num)) then + print *, 'Error: Wrong ice', ice, ' in update_cell', & + ' which should be between 1 and', cell_num + call mpi_abort(mpi_comm_world, 1, ierr) + + else + num_cell_atomap(ice) = num_cell_atomap(ice) + 1 + + if(num_cell_atomap(ice) > cell_atomap_lim) then + allocate(cell_atomp_array(cell_atomap_lim+20, cell_num), stat = allostat) + if(allostat /= 0) call alloc_error("Failure to allocate cell_atomp_array in update_cell", allostat) + + cell_atomp_array(1:cell_atomap_lim, :) = cell_atomap(:, :) + cell_atomp_array(cell_atomap_lim+1:, :) = 0 + call move_alloc(cell_atomp_array, cell_atomap) + cell_atomap_lim = cell_atomap_lim+20 + + end if + + cell_atomap(num_cell_atomap(ice), ice) = iatomap + which_cell_atomap(iatomap) = ice + + end if + end do + + !debug + + if(maxval(num_cell_atomap) > cell_atomap_lim) then + print *, 'Error: cell_atomap_lim, which is', cell_atomap_lim, & + ' should be at least', maxval(num_cell_atomap) + call mpi_abort(mpi_comm_world, 1, ierr) + + else if(sum(num_cell_atomap) /= atomap_num_lg) then + print *, 'Error: Wrong number of atomap in cell', sum(num_cell_atomap), & + ' which should be', atomap_num_lg, ' which suggests that', & + ' probably some atomap are not in any cell' + call mpi_abort(mpi_comm_world, 1, ierr) + + end if + + !put intpo into cells + allocate(num_cell_intpo(cell_num), stat = allostat) + if(allostat /= 0) call alloc_error("Failure to allocate num_cell_intpo in update_cell", allostat) + + num_cell_intpo(:) = 0 + which_cell_intpo(:, :) = 0 + + do ie = 1, ele_num_l + j = size_to_shape(size_ele(ie)) + + do iep = 1, intpo_count(itype(ie)) + do ibasis = 1, basis_num(ie) + if(who_has_intpo(basis_num(ie)*(iep-1)+ibasis, ie).eqv..true.) then + + !Get the atomap position + iatom = atom_intpo(iep, j, itype(ie)) + iatomap = cg_atomap(basis_num(ie)*(iatom-1) + ibasis, ie) + if(iatomap == 0) then + print *, 'Error: Iatomap', iatomap, ' can not be zero' + call mpi_abort(mpi_comm_world, 1, ierr) + end if + + !Get the atomap cell index + num_index(:) = 0 + do i = 1, 3 + num_index(i) = int((r_atomap(i,iatomap) - pro_bd_out(2*i-1))/rc_neigh) + !check to make sure that the index is correct + if(num_index(i) > num_cell(i)) then + print *, 'Error: Intpo', iep, ' of element', ie, & + ' has wrong cell index', num_index(i), ' along', & + i, ' direction which is larger than', num_cell(i) + call mpi_abort(mpi_comm_world, 1, ierr) + + else if(num_index(i) == num_cell(i)) then + num_index(i) = num_cell(i) - 1 + + end if + end do + + !ice is the cell index + ice = 1 + num_index(1) + num_index(2) * num_cell(1) + num_index(3) * num_cell(1) * num_cell(2) + + !Again check to make sure that the cell index is correct + if((ice < 1).or.(ice > cell_num)) then + print *, 'Error: Wrong ice', ice, ' in update_cell cg which should be between 1 and', cell_num + call mpi_abort(mpi_comm_world, 1, ierr) + + else + which_cell_intpo(basis_num(ie)*(iep-1)+ibasis, ie) = ice + num_cell_intpo(ice) = num_cell_intpo(ice) + 1 + end if + end if + end do + end do + + end do + + if(sum(num_cell_intpo) /= intpo_num_l) then + print *, 'Error: The number of integration point contained in all', & + ' cells', sum(num_cell_intpo), & + ' does not match intpo_num_l', intpo_num_l + + call mpi_abort(mpi_comm_world, 1, ierr) + end if + + !IF no elements then we don't do anything + else + cell_atomap(:, :) = 0 + num_cell_atomap(:) = 0 + + end if + + !put atom into cells + if(atom_num /= 0) then + cell_atom(:, :) = 0 + which_cell_atom(:) = 0 + num_cell_atom(:) = 0 + + do ia = 1, atom_num_lg + + num_index(:) = 0 + + do i = 1, 3 + num_index(i) = int((r_atom(i, ia) - pro_bd_out(2*i-1)) / rc_neigh) + if(num_index(i) > num_cell(i)) then + print *, 'Error: Atom', ia, ' of rank', rank, ' with position', & + r_atom(i, ia), & + ' has wrong cell index', num_index(i), ' along', i, & + ' direction which is larger than', num_cell(i), ' .', & + ' The processor outer boundaries are', pro_bd_out(2*i-1:2*i) + call mpi_abort(mpi_comm_world, 1, ierr) + + else if(num_index(i) == num_cell(i)) then + num_index(i) = num_cell(i) - 1 + end if + end do + + ice = 1 + num_index(1) + num_index(2) * num_cell(1) + num_index(3) * num_cell(1) * num_cell(2) + + if((ice < 1).or.(ice > cell_num)) then + print *, 'Error: Wrong ice', ice, ' in update_cell atomistic', & + ' which should be between 1 and', cell_num + + call mpi_abort(mpi_comm_world, 1, ierr) + + else + num_cell_atom(ice) = num_cell_atom(ice) + 1 + + if(num_cell_atom(ice) > cell_atom_lim) then + + allocate( cell_atomp_array(cell_atom_lim+20, cell_num), stat = allostat) + if(allostat /= 0) call alloc_error("Failure to allocate cell_atom_array", allostat) + + cell_atomp_array(1:cell_atom_lim, :) = cell_atom(:, :) + cell_atomp_array(cell_atom_lim+1:, :) = 0 + call move_alloc(cell_atomp_array, cell_atom) + + cell_atom_lim = cell_atom_lim + 20 + + end if + + cell_atom(num_cell_atom(ice), ice) = ia + which_cell_atom(ia) = ice + end if + end do + + !debug + + if(maxval(num_cell_atom) > cell_atom_lim) then + + print *, 'Error: cell_atom_lim, which is', cell_atom_lim, & + ' should be at least', maxval(num_cell_atom) + + call mpi_abort(mpi_comm_world, 1, ierr) + + else if(sum(num_cell_atom) /= atom_num_lg) then + + print *, 'Error: The number of atoms contained in all cells', & + sum(num_cell_atom), ' does not match atom_num_lg', atom_num_lg + + call mpi_abort(mpi_comm_world, 1, ierr) + + end if + + else + num_cell_atom(:) = 0 + cell_atom(:, :) = 0 + + end if + + return + end subroutine update_cell + +subroutine neighbor_list_cg + + integer :: ie, iep, jep, ice, nei_ice, jce, iatom, jatom, iatomap, ja, jatomap, ibasis, n_at(nei_lists), n_cg (nei_lists), i + real(kind = wp) :: rsq, rc_neisq + real(kind = wp), dimension(3) :: rl, rk, rlk + integer, allocatable :: intpo_neighbor_array(:, :, :, :) + + !Initialize variables + cg_nei_cg = 0 + cg_nei_at = 0 + n_cg_cg = 0 + n_cg_at = 0 + + rc_neisq = rc_neigh*rc_neigh + do ie = 1, ele_num_l + + do iep = 1, intpo_count(itype(ie)) + !Get the adjusted index taking into account the basis counts + do ibasis = 1, basis_num(ie) + jep = basis_num(ie)*(iep-1)+ibasis + + if(who_has_intpo(jep, ie).eqv..true.) then + + iatom = atom_intpo(iep, size_to_shape(size_ele(ie)), itype(ie)) + + !The way the code is structured, atom_intpo contains only the position of integration point when + !counting the lattice sites of the finite elements. cg_atomap contains the full range of interpolated + !atoms, with atoms in the same basis next to each other.This code loops over both atoms at the + !integration point and calculates separate neighbor lists for them from the interpolated atom list. + + iatomap = cg_atomap(basis_num(ie) *(iatom-1) + ibasis, ie) + rl(:) = r_atomap(:, iatomap) + ice = which_cell_intpo(jep, ie) + + n_cg = 0 + n_at = 0 + !Loops over all neighboring cells. + do nei_ice = 1, num_neighbor_cell(ice) + jce = cell_neighbor(nei_ice, ice) + + if((jce < 1).or.(jce > cell_num)) then + print *, 'Error: Wrong neighboring cell index', jce, & + ' which should be between 1 and', cell_num + call mpi_abort(mpi_comm_world, 1, ierr) + end if + + !Loops over all the interpolated atoms within that neighboring cell + do jatom = 1, num_cell_atomap(jce) + jatomap = cell_atomap(jatom, jce) + + if(jatomap /= iatomap) then + + !Check to see if these are neighbors, if they are then add them to the list + rk(:) = r_atomap(:, jatomap) + rlk = rk - rl + rsq = rlk(1)*rlk(1) + rlk(2)*rlk(2) + rlk(3)*rlk(3) + if(rsq < rc_neisq) then + !Add neighbor to list + do i= 1, nei_lists + if(btest(types_to_pot_type(type_atomap(iatomap), type_atomap(jatomap)), & + neilist_to_pot(i))) then + n_cg(i) = n_cg(i) + 1 + !Check to see if we need to resize arrays + if(n_cg(i) > size(cg_nei_cg,1)) then + nei_lim = size(cg_nei_cg,1) + allocate(intpo_neighbor_array(nei_lim+20,max_basisnum*max_intpo_num,ele_num_l, & + nei_lists), stat = allostat) + + if(allostat /= 0) call alloc_error("Failure to allocate atom_neighbor/ac_array", & + allostat) + + intpo_neighbor_array(:, :, :, :) = 0 + intpo_neighbor_array(1:nei_lim, :, :, :) = cg_nei_cg + call move_alloc(intpo_neighbor_array, cg_nei_cg) + + end if + + cg_nei_cg(n_cg(i), jep, ie, i) = jatomap + end if + end do + end if + end if + end do + + !Now loop over all real atoms, we only need ghost atom neighbors here + do jatom = 1, num_cell_atom(jce) + ja = cell_atom(jatom, jce) + if(ja > atom_num_l) then + rk(:) = r_atom(:, ja) + + rlk = rk - rl + rsq = rlk(1)*rlk(1) + rlk(2)*rlk(2) + rlk(3)*rlk(3) + if(rsq < rc_neisq) then + do i= 1, nei_lists + if(btest(types_to_pot_type(type_atomap(iatomap), type_atom(ja)), & + neilist_to_pot(i))) then + n_at(i) = n_at(i) + 1 + !Check to see if we need to resize arrays + if(n_at(i) > size(cg_nei_at,1)) then + nei_lim = size(cg_nei_at,1) + allocate(intpo_neighbor_array(nei_lim+20,max_basisnum*max_intpo_num,ele_num_l, & + nei_lists), stat = allostat) + + if(allostat /= 0) call alloc_error("Failure to allocate atom_neighbor/ac_array", & + allostat) + + intpo_neighbor_array(:, :, :, :) = 0 + intpo_neighbor_array(1:nei_lim, :, :, :) = cg_nei_at + call move_alloc(intpo_neighbor_array, cg_nei_at) + + end if + + cg_nei_at(n_at(i), jep, ie, i) = ja + end if + end do + end if + end if + end do + end do + + n_cg_at(jep, ie,:) = n_at + n_cg_cg(jep, ie,:) = n_cg + + if((sum(n_at+n_cg))==0) then + print *, 'Warning: Integration point', iep, ' of element', ie, & + ' in cell', ice, ' of rank', rank, ' does not have neighbor' + print *, "Integration point has position ", rl, " for atomap ", iatomap + print *, "Nodes are at position:" + do i = 1, 8 + print *, r(:, 1, cg_node(i, ie)) + end do + print *, "Box bounds are: ", box_bd + call mpi_abort(mpi_comm_world, 1, ierr) + end if + end if + end do + end do + end do + + return +end subroutine neighbor_list_cg + +subroutine neighbor_list_at + !Build neighbor list for atomistic + integer :: ia, ja, ice, nei_ice, jce, jatom, jatomap, n_at(nei_lists), n_cg(nei_lists), i + real(kind=wp) :: rsq, rc_neisq + real(kind = wp), dimension(3) :: rl, rk, rlk + integer, allocatable :: atom_neighbor_array(:, :, :) + + !Initialize arrays + at_nei_at = 0 + n_at_at = 0 + at_nei_cg = 0 + n_at_cg = 0 + + rc_neisq = rc_neigh*rc_neigh + + do ia = 1, atom_num_l + + rl(:) = r_atom(:, ia) + ice = which_cell_atom(ia) + n_at = 0 + n_cg = 0 + + !Loop over all neighboring cells + do nei_ice = 1, num_neighbor_cell(ice) + jce = cell_neighbor(nei_ice, ice) + + if((jce < 1).or.(jce > cell_num)) then + print *, 'Error: Wrong neighboring cell index', jce, & + ' which should be between 1 and', cell_num + call mpi_abort(mpi_comm_world, 1, ierr) + end if + + !Loop over all atomaps in neighboring cells + do jatom = 1, num_cell_atomap(jce) + + jatomap = cell_atomap(jatom, jce) + rk(:) = r_atomap(:, jatomap) + + rlk = rk - rl + rsq = rlk(1)*rlk(1) + rlk(2)*rlk(2) + rlk(3)*rlk(3) + if(rsq < rc_neisq) then + + do i= 1, nei_lists + if(btest(types_to_pot_type(type_atom(ia), type_atomap(jatomap)), neilist_to_pot(i))) then + n_cg(i) = n_cg(i) + 1 + !Check to see if we need to resize arrays + if(n_cg(i) > size(at_nei_cg,1)) then + nei_lim = size(at_nei_cg,1) + allocate(atom_neighbor_array(nei_lim+20, atom_num_l, nei_lists), stat = allostat) + + if(allostat /= 0) call alloc_error("Failure to allocate atom_neighbor/ac_array", allostat) + + atom_neighbor_array(:, :, :) = 0 + atom_neighbor_array(1:nei_lim, :, :) = at_nei_cg + call move_alloc(atom_neighbor_array, at_nei_cg) + + end if + + at_nei_cg(n_cg(i), ia, i) = jatomap + if(jatomap<=atomap_num_l) needed_atomap(jatomap) = .true. + end if + end do + end if + end do + + !Loop over all atoms in neighboring cells + do jatom = 1, num_cell_atom(jce) + ja = cell_atom(jatom, jce) + + !Only save if ia < ja + if(ia < ja) then + + rk(:) = r_atom(:, ja) + + rlk = rk - rl + rsq = rlk(1)*rlk(1) + rlk(2)*rlk(2) + rlk(3)*rlk(3) + if(rsq < rc_neisq) then + + !Add neighbor to list + do i= 1, nei_lists + if(btest(types_to_pot_type(type_atom(ia), type_atom(ja)), neilist_to_pot(i))) then + n_at(i) = n_at(i) + 1 + !Check to see if we need to resize arrays + if(n_at(i) > size(at_nei_at,1)) then + nei_lim = size(at_nei_at,1) + allocate(atom_neighbor_array(nei_lim+20, atom_num_l, nei_lists), stat = allostat) + + if(allostat /= 0) call alloc_error("Failure to allocate atom_neighbor/ac_array", allostat) + + atom_neighbor_array(:, :, :) = 0 + atom_neighbor_array(1:nei_lim, :, :) = at_nei_at + call move_alloc(atom_neighbor_array, at_nei_at) + + end if + + at_nei_at(n_at(i), ia, i) = ja + end if + end do + end if + end if + end do + end do + + n_at_at(ia,:) = n_at + n_at_cg(ia,:) = n_cg + end do + + return +end subroutine neighbor_list_at + +subroutine neighbor_list_atomap + !Build neighbor list for virtual atoms, this is only used in case we want to fuully calculate the cluster potential correctly + !Instead of imposing uniform electron density on along element subdomains. + integer :: ia, ja, ice, nei_ice, jce, jatom, jatomap, n_at(nei_lists), n_cg(nei_lists), counts, i + real(kind=wp) :: rsq, rc_neisq + real(kind = wp), dimension(3) :: rl, rk, rlk + integer, allocatable :: atom_neighbor_array(:, :, :) + character(len=read_len) :: msg + + !Initialize arrays + atomap_nei_cg = 0 + atomap_nei_at = 0 + n_atomap_cg = 0 + n_atomap_at = 0 + + rc_neisq = rc_neigh*rc_neigh + + do ia = 1, atomap_num_l + + !Assume that an atomap that is sent as a ghost is needed, may need to change this for speed at some point + rl(:) = r_atomap(:, ia) + if(.not.in_block_bd(rl, pro_bd_in)) then + needed_atomap(ia) = .true. + end if + if(needed_atomap(ia)) then + ice = which_cell_atomap(ia) + n_at = 0 + n_cg = 0 + + !Loop over all neighboring cells + do nei_ice = 1, num_neighbor_cell(ice) + jce = cell_neighbor(nei_ice, ice) + + if((jce < 1).or.(jce > cell_num)) then + print *, 'Error: Wrong neighboring cell index', jce, & + ' which should be between 1 and', cell_num + call mpi_abort(mpi_comm_world, 1, ierr) + end if + + !Loop over all atomaps in neighboring cells + do jatom = 1, num_cell_atomap(jce) + + jatomap = cell_atomap(jatom, jce) + rk(:) = r_atomap(:, jatomap) + + rlk = rk - rl + rsq = rlk(1)*rlk(1) + rlk(2)*rlk(2) + rlk(3)*rlk(3) + if(rsq < rc_neisq) then + + !Add neighbor to list + do i= 1, nei_lists + if(btest(types_to_pot_type(type_atomap(ia), type_atomap(jatomap)), neilist_to_pot(i))) then + n_cg(i) = n_cg(i) + 1 + !Check to see if we need to resize arrays + if(n_cg(i) > size(atomap_nei_cg,1)) then + nei_lim = size(atomap_nei_cg,1) + allocate(atom_neighbor_array(nei_lim+20, atom_num_l, nei_lists), stat = allostat) + + if(allostat /= 0) call alloc_error("Failure to allocate atom_neighbor/ac_array", allostat) + + atom_neighbor_array(:, :, :) = 0 + atom_neighbor_array(1:nei_lim, :, :) = atomap_nei_cg + call move_alloc(atom_neighbor_array, atomap_nei_cg) + + end if + + atomap_nei_cg(n_cg(i), ia, i) = jatomap + end if + end do + end if + end do + + !Now loop over all real atoms, we only need ghost atom neighbors here + do jatom = 1, num_cell_atom(jce) + ja = cell_atom(jatom, jce) + if(ja > atom_num_l) then + rk(:) = r_atom(:, ja) + + rlk = rk - rl + rsq = rlk(1)*rlk(1) + rlk(2)*rlk(2) + rlk(3)*rlk(3) + if(rsq < rc_neisq) then + + !Add neighbor to list + do i= 1, nei_lists + if(btest(types_to_pot_type(type_atomap(ia), type_atomap(jatomap)), neilist_to_pot(i))) then + n_at(i) = n_at(i) + 1 + !Check to see if we need to resize arrays + if(n_at(i) > size(atomap_nei_at,1)) then + nei_lim = size(atomap_nei_at,1) + allocate(atom_neighbor_array(nei_lim+20, atom_num_l, nei_lists), stat = allostat) + + if(allostat /= 0) call alloc_error("Failure to allocate atom_neighbor/ac_array", allostat) + + atom_neighbor_array(:, :, :) = 0 + atom_neighbor_array(1:nei_lim, :, :) = atomap_nei_at + call move_alloc(atom_neighbor_array, atomap_nei_at) + + end if + + atomap_nei_at(n_at(i), ia, i) = jatomap + end if + end do + end if + end if + end do + + n_atomap_cg(ia, :) = n_cg + n_atomap_at(ia, :) = n_at + end do + end if + end do + + call mpi_reduce(count(needed_atomap), counts, 1, mpi_integer, mpi_sum, root, world, ierr) + write(msg, *) "Calculating electron density for ", counts, " out of ", atomap_num, " virtual atoms" + call log_msg(msg) + + return +end subroutine neighbor_list_atomap + +subroutine update_neighbor(time, arg1, arg2) + !This subroutine updates the neighbor lists when needed + integer, intent(in) :: time + !Arg 1 lets us know if we are calling this from the conjugate gradient code. Special code needs to be called when running cg + logical, intent(in), optional :: arg1, arg2 + integer :: ie, i, ip, info(3), ibasis, ia, inod + logical :: need_updateall, need_updateme, cg, force_reneighbor + real(kind=wp) :: center_mass_disp(3), r_old(3), rc_binsq, rlk(3), rsq, t_start, t_end + + character(len=read_len) :: msg + + !Start timer + t_start = mpi_wtime() + + if(present(arg1)) then + cg = arg1 + else + cg = .false. + end if + + if(present(arg2)) then + force_reneighbor = arg2 + else + force_reneighbor = .false. + end if + + !Reneighbored lets us know if we moved atoms/nodes between processors. This is important as some data + !structures may need to be reset if this happens + reneighbored = .false. + + !First check if we actually need to update the neighbor list by using the verlet list method + need_updateall = .false. + need_updateme = .false. + + !Get the movement of the centroid + call get_centroid(center_mass) + center_mass_disp = center_mass - center_mass_ori + + !Update pb_node and r when using periodic boundaries, only needed if we are exchanging + + if((ele_num > 0).and.(periodic)) then + do ie = 1, ele_num_l + do inod = 1, ng_node(etype(ie)) + ip = cg_node(inod, ie) + do ibasis = 1, basis_num(ie) + call cross_pb(r(:, ibasis, ip), info) + pb_node(:, ibasis, ip) = pb_node(:, ibasis, ip) + info(:) + end do + end do + end do + end if + + if((atom_num > 0).and.periodic) then + do ia = 1, atom_num_l + call cross_pb(r_atom(:, ia),info) + end do + end if + + + if(force_reneighbor) then + need_updateall = .true. + else if(time-last_reneighbor > delay) then + r_old(:) = 0.0_wp + rc_binsq = rc_bin*rc_bin/4.0_wp + if (ele_num > 0) then + ip_loop: do ip = 1, node_num_l + do ibasis = 1, basis_num(node_cg(ip)) + !Check to see if nodse have moved by half the bin distance + rlk = r(:,ibasis, ip) - r_update(:, ibasis, ip) + rsq = rlk(1)*rlk(1) + rlk(2)*rlk(2) + rlk(3)*rlk(3) + if(rsq > rc_binsq) then + need_updateme = .true. + exit ip_loop + end if + end do + end do ip_loop + end if + + if(atom_num > 0) then + !Only check atoms if the elements don't trigger reneighboring code + if(.not.need_updateme) then + do ia = 1, atom_num_l + rlk = r_atom(:,ia) - r_atom_update(:, ia) + rsq = rlk(1)*rlk(1) + rlk(2)*rlk(2) + rlk(3)*rlk(3) + if(rsq > rc_binsq) then + need_updateme = .true. + exit + end if + end do + end if + end if + + !Gather need_update + call mpi_allreduce(need_updateme, need_updateall, 1, mpi_logical, mpi_lor, world, ierr) + else + need_updateall = .false. + end if + + + + ! If we don't have to reneighbor then just update ghost atom/virtual atom positions + if(.not.need_updateall) then + !only update needed virtual atoms + call update_virtual_atoms(need_all_step) + call update_proc_bd(.false.) + if(ele_num > 0) then + !if(tflag) call apply_perturbation +! call ghost_cg + call processor_atomap + end if + + if(atom_num > 0) then + call processor_atomistic + end if + !Set the updated flag to false + neis_updated = .false. + else + !update all + call update_virtual_atoms(.true.) + !Now check to rescale processor boundaries if shrink-wrapped in that dimension + call update_proc_bd + + reneighbored = .true. + + + !write(msg,*) "Updating neighbor list at", time + !call log_msg(msg) + + !Update lists + !Check pro_length + do i = 1, 3 + if(pro_length(i) < 2.0_wp * rc_neigh) then + write(msg, *) "Pro_length along the ", i, " direction ", pro_length(i), " of rank ", rank, & + " is smaller than", 2.0_wp*rc_neigh + call misc_error(msg) + end if + end do + + if(ele_num /= 0) call update_neighbor_cg(cg) + if(atom_num /= 0) call update_neighbor_at(cg) + + !Resize force_arrays + call resize_force_arrays +! if(ele_num_l > 0) then +! if(node_num_l > size(r_update,3)) then +! deallocate(r_update) +! allocate(r_update(3, max_basisnum, node_num_l), stat = allostat) +! call alloc_error("Failure to allocate r_update in update_neighbor", allostat) +! end if +! r_update(:,:,1:node_num_l) = r(:,:, 1:node_num_l) +! end if +! +! if(atom_num_l > 0) then +! if(atom_num_l > size(r_atom_update,2)) then +! deallocate(r_atom_update) +! allocate(r_atom_update(3, atom_num_l), stat = allostat) +! call alloc_error("Failure to allocate r_atom_update in update_neighbor", allostat) +! end if +! r_atom_update(:,1:atom_num_l) = r_atom_update(:,1:atom_num_l) +! end if + + !Now if we are using temperature control, we apply the perturbation here + !if(tflag) call apply_perturbation + + !Update ghosts + if(ele_num > 0) call ghost_cg + if(atom_num > 0) call ghost_at + + !Now update cell lists + call neighbor_lists + + !Set the update nei flag to true + neis_updated = .true. + end if + t_end = mpi_wtime() + walltime(1) = walltime(1) + (t_end-t_start) + return +end subroutine update_neighbor + +subroutine update_neighbor_cg(arg1) + + logical, intent(in), optional :: arg1 + + logical :: cg + integer :: i, iatom, inod, ie, je, ke, ip, jp, delete_n, ibasis, send_n, send_n_sum, & + seg_num_real, irank, ele_num_ln, pb_in(3, max_basisnum, ng_max_node), n_ints, n_reals, & + iint, ireal, et, tag, id, esize, bnum, btype(max_basisnum), & + iatomap, node_num_ln, mask, n_cg, icg + integer, dimension(pro_num) :: displs, recv_counts, counts + real(kind=wp) :: r_interp(3,max_basisnum) + real(kind = wp), dimension(3, max_basisnum, ng_max_node) :: r_nodes, vel_nodes, force_nodes, r0, g, h + integer, allocatable :: delete_buff(:), delete_array(:), send_buff(:), send_array(:), & + recv_array(:), who_ele(:), who_ele_all(:) + + logical, allocatable :: ele_shared(:), who_has_array(:) + + real(kind = wp), allocatable :: send_reals(:), recv_reals(:), force_array(:,:,:), send_cg(:), recv_cg(:) + integer, allocatable :: send_ints(:), recv_ints(:), integer_buff(:) + character(len=read_len) :: msg + + if(present(arg1)) then + cg = arg1 + else + cg = .false. + end if + + allocate(who_ele(ele_num), ele_shared(ele_num), stat = allostat) + if(allostat /= 0) call alloc_error("Failure to allocate who_ele and ele_shared", allostat) + + who_ele(:) = 0 + ele_shared(:) = .false. + seg_num_real = seg_num + + !This series of loops checks to see whether any part of an element belongs to the current processor + do ie = 1, ele_num_l + + ke = ele_glob_id(ie) + ele_shared(ke) = .true. + + !Put position and pb node into continuous array + do inod = 1, ng_node(etype(ie)) + ip = cg_node(inod, ie) + r_nodes(:,:,inod) = r(:,:,ip) + if(periodic) then + pb_in(:,:,inod) = pb_node(:,:,ip) + else + pb_in(:,:,inod) = 0 + end if + end do + !Check all atoms to get the count of virtual atoms that are within the processor boundaries + do iatom = 1, get_virtual_count(etype(ie), size_ele(ie)) + call interp_atom(iatom, size_ele(ie), etype(ie), pb_in, basis_num(ie), r_nodes, r_interp) + do ibasis = 1, basis_num(ie) + if(in_block_bd(r_interp(:,ibasis), pro_bd)) then + who_ele(ke) = who_ele(ke) + 1 + end if + end do + end do + end do + +! who_ele_all, this gathers the number of atomaps in for each element in each processor and then sums them. +! This is to ensure that all the interpolated atoms are correctly assigned. + allocate(who_ele_all(ele_num), stat = allostat) + if(allostat /= 0) call alloc_error("Failure to allocate who_ele_all", allostat) + + + who_ele_all(:) = 0 + call mpi_allreduce(who_ele, who_ele_all, ele_num, mpi_integer, mpi_sum, mpi_comm_world, ierr) + + !Now check to make sure calculated virtual atom number isn't greater than the real virtual atom number + if(rank == root) then + if(sum(who_ele_all) > atomap_num) then + print *, 'Error: The sum of who_ele_all', sum(who_ele_all), & + ' should not be larger than atomap_num', atomap_num + call mpi_abort(mpi_comm_world, 1, ierr) + end if + end if + + !delete any element if none of its atomap is within pro_bd + allocate(delete_buff(seg_num), stat = allostat) + + if(allostat /= 0) call alloc_error("Failure to allocate delete_buff", allostat) + + delete_n = 0 + delete_buff(:) = 0 + seg_num_real = seg_num + + do ie = 1, ele_num_l + ke = ele_glob_id(ie) + !If we don't have any virtual atoms and the element doesn't belong to use then add it to the delete list + if((who_ele(ke) == 0).and.(who_has_ele(ie).eqv..false.)) then + + delete_n = delete_n + 1 + ele_shared(ke) = .false. + if(delete_n > seg_num_real) then + allocate(delete_array(seg_num_real+seg_num), stat = allostat) + if(allostat /= 0) call alloc_error("Failure to allocate delete array in update neighbor cg", allostat) + + delete_array(1:seg_num_real) = delete_buff(:) + delete_array(seg_num_real+1:) = 0 + call move_alloc(delete_array, delete_buff) + seg_num_real = seg_num_real + seg_num + end if + delete_buff(delete_n) = ie + end if + end do + + !prepare send_buff when who_has_ele is true + allocate(send_buff(seg_num), stat = allostat) + if(allostat /= 0) call alloc_error("Failure to allocate send buff", allostat) + + send_n = 0 + send_buff(:) = 0 + seg_num_real = seg_num + !This loop just counts the number of elements that you information to send for. + do ie = 1, ele_num_l + !If we own the element + if(who_has_ele(ie)) then + ke = ele_glob_id(ie) + + + if(who_ele_all(ke) > basis_num(ie)*get_virtual_count(etype(ie), size_ele(ie))) then + print *, 'Error: Who_ele_all', who_ele_all(ke), & + ' of global element', ke, ' of rank', rank, & + ' should not be larger than', get_virtual_count(etype(ie), size_ele(ie)) + call mpi_abort(mpi_comm_world, 1, ierr) + + !If the total counts is less than the real counts then that means the element is in the bounds of a + !processor that doesn't have it so we have to send it + else if(who_ele_all(ke) < basis_num(ie)*get_virtual_count(etype(ie), size_ele(ie))) then + send_n = send_n + 1 + + if(send_n > seg_num_real) then + + allocate(send_array(seg_num_real+seg_num), stat = allostat) + if(allostat /= 0) call alloc_error("Failure to allocate send_array in update_neighbor_cg", allostat) + + send_array(1:seg_num_real) = send_buff(:) + send_array(seg_num_real+1:) = 0 + call move_alloc(send_array, send_buff) + seg_num_real = seg_num_real + seg_num + end if + !Add this element to the send list + send_buff(send_n) = ie + end if + end if + end do + + !Get the total number of sends + call mpi_allreduce(send_n, send_n_sum, 1, mpi_integer, mpi_sum, mpi_comm_world, ierr) + + !Calculate counts of data to send per element + n_ints = 6+max_basisnum + n_reals = 3*max_basisnum*ng_max_node + if(periodic) then + n_ints = n_ints+3*max_basisnum*ng_max_node + end if + if(need_vel) then + n_reals = n_reals + 3*max_basisnum*ng_max_node + end if + if(need_force_pre) then + n_reals = n_reals + 3*max_basisnum*ng_max_node + end if + + !Allocate all send and receive buffs + allocate(send_ints(send_n*n_ints), send_reals(send_n*n_reals), & + recv_ints(send_n_sum*n_ints), recv_reals(send_n_sum*n_reals), & + recv_array(send_n_sum), stat = allostat) + if(allostat /= 0) call alloc_error("Failure to allocate send/recv_buff", allostat) + !Initialize allocated variables + send_ints(:) = 0 + recv_ints(:) = 0 + send_reals(:) = 0.0_wp + recv_reals(:) = 0.0_wp + recv_array = 0 + + !If needed then initialize cg arrays + if(cg) then + n_cg=3*3*max_basisnum*ng_max_node + allocate(send_cg(send_n*n_cg), recv_cg(send_n_sum*n_cg)) + end if + + do ie = 1, send_n + je = send_buff(ie) + iint = n_ints*(ie-1)+1 + ireal = n_reals*(ie-1)+1 + + !Get the nodal variables into sequential array + do inod = 1, ng_node(etype(je)) + ip = cg_node(inod, je) + r_nodes(:,:,inod) = r(:,:,ip) + if (periodic) pb_in(:,:,inod) = pb_node(:,:,ip) + if(need_vel) vel_nodes(:,:,inod) = vel(:,:,ip) + if(need_force_pre) force_nodes(:,:,inod) = force_eq_pre(:, :, ip) + end do + + + !Pack into to the send arrays + call pack_ele_neighbor(etype(je), tag_ele(je), ele_glob_id(je), size_ele(je), e_mask(je), basis_num(je), basis_type(:,je), & + r_nodes, pb_in, vel_nodes, force_nodes, send_ints(iint:iint+n_ints-1), send_reals(ireal:ireal+n_reals-1)) + + if(cg) then + !First get the nodal cg variables into a sequential array + icg = n_cg*(ie-1)+1 + do inod=1, ng_node(etype(je)) + ip = cg_node(inod,je) + r0(:,:,inod)=rzeronode(:, :, ip) + g(:,:,inod)=gnode(:, :, ip) + h(:,:,inod)=hnode(:, :, ip) + end do + + !Now pack into send_cg array + call pack_ele_cg(ng_node(etype(je)), basis_num(je), r0, g, h, send_cg(icg:icg+n_cg-1)) + + end if + end do + + recv_ints(:) = 0 + recv_reals(:) = 0.0_wp + + !If one proc then the recv_arrays are equal to the send_arrays + if(pro_num == 1) then + recv_ints = send_ints + recv_reals = send_reals + + !Otherwise we have to prepare all gatherv + else + !Get the number of elements being send by each processor + recv_counts(:) = 0 + call mpi_allgather(send_n, 1, mpi_integer, recv_counts, 1, mpi_integer, mpi_comm_world, ierr) + + !Get the displacements and the real data count for the integer data + displs(:) = 0 + do irank = 1, pro_num-1 + displs(irank+1) = displs(irank) + recv_counts(irank)*n_ints + end do + counts = recv_counts*n_ints + !allgatherv ints + call mpi_allgatherv(send_ints, send_n*n_ints, mpi_integer, & + recv_ints, counts, displs, & + mpi_integer, world, ierr) + + !Get the displacements and the real data count for the real data + displs(:) = 0 + do irank = 1, pro_num-1 + displs(irank+1) = displs(irank) + recv_counts(irank)*n_reals + end do + counts = recv_counts*n_reals + !allgatherv real + call mpi_allgatherv(send_reals, send_n*n_reals, mpi_wp, & + recv_reals, counts, displs, & + mpi_wp, world, ierr) + + if(cg) then + !Get the displacements and the real data count for the real data + displs(:) = 0 + do irank = 1, pro_num-1 + displs(irank+1) = displs(irank) + recv_counts(irank)*n_cg + end do + counts = recv_counts*n_cg + !allgatherv real + call mpi_allgatherv(send_cg, send_n*n_cg, mpi_wp, & + recv_cg, counts, displs, & + mpi_wp, world, ierr) + end if + end if + +! Now delete delete_buff from the original array, +! move the end of the array forward to fill the blank +! do not delete the element when who_has_ele is true, +! or new master proc needs to be assigned + + ele_num_ln = ele_num_l + node_num_ln = node_num_l + do ie = delete_n, 1, -1 + + je = delete_buff(ie) + node_num_ln = node_num_ln - ng_node(etype(je)) + if(je < ele_num_ln) then + + tag_ele(je) = tag_ele(ele_num_ln) + size_ele(je) = size_ele(ele_num_ln) + etype(je) = etype(ele_num_ln) + e_mask(je) = e_mask(ele_num_ln) + basis_num(je) = basis_num(ele_num_ln) + basis_type(:,je) = basis_type(:, ele_num_ln) + ele_glob_id(je) = ele_glob_id(ele_num_ln) + + !Reassign who has ele + if(who_has_ele(je).eqv..true.) then + print *, 'Error: Who_has_ele(je) of je', je, ' must be .false.' + call mpi_abort(mpi_comm_world, 1, ierr) + else + who_has_ele(je) = who_has_ele(ele_num_ln) + who_has_ele(ele_num_ln) = .false. + end if + + do inod = 1, ng_node(etype(je)) + + jp = cg_node(inod, je) + ip = cg_node(inod, ele_num_ln) + r(:, :, jp) = r(:, :, ip) + + if(periodic.eqv..true.) then + pb_node(:, :, jp) = pb_node(:, :, ip) + end if + if(need_vel) then + vel(:, :, jp) = vel(:, :, ip) + end if + if(need_force_pre) then + force_eq_pre(:, :, jp) = force_eq_pre(:, :, ip) + end if + end do + + if(cg) then + do inod = 1, ng_node(etype(je)) + jp = cg_node(inod, je) + ip = cg_node(inod, ele_num_ln) + + rzeronode(:, :, jp) = rzeronode(:,:, ip) + gnode(:, :, jp) = gnode(:,:, ip) + hnode(:, :, jp) = hnode(:,:, ip) + end do + end if + + else if(je > ele_num_ln) then + print *, 'Error: je', je, ' should not be larger than', & + ' ele_num_ln', ele_num_ln + call mpi_abort(mpi_comm_world, 1, ierr) + end if + + ele_num_ln = ele_num_ln - 1 + end do + + if(ele_num_ln /= ele_num_l) then + reneighbored = .true. + !write(msg,*)'Rank', rank, ' deletes', ele_num_l - ele_num_ln, ' elements' + !call log_msg(msg, 1, .true.) + end if + + !append recv_array to the original array, increase array size when necessary + je = ele_num_ln + jp = node_num_ln + do ie = 1, send_n_sum + ireal = n_reals*(ie-1)+1 + iint = n_ints*(ie-1)+1 + !Unpack the current element + call unpack_ele_neighbor(recv_ints(iint:iint+n_ints-1), recv_reals(ireal:ireal+n_reals-1), et, tag, id, esize, mask, & + bnum, btype, pb_in, r_nodes, vel_nodes, force_nodes) + + if(cg) then + icg = n_cg*(ie-1)+1 + + call unpack_ele_cg(ng_node(et), bnum, recv_cg(icg:icg+n_cg-1), r0, g, h) + end if + !If ele_shared then we already have this element + if(ele_shared(id)) then + recv_array(ie) = 1 + + !Otherwise we need to check to see if we need to add it to our list + else + !Loop over all the interpolated atoms + outloop: do iatom = 1, get_virtual_count(et, esize) + call interp_atom(iatom, esize, et, pb_in, bnum, r_nodes, r_interp) + do ibasis = 1, bnum + !Check if it's in our boundary, if it is then add it, if it isn't then go to the next one + if (in_block_bd(r_interp(:, ibasis), pro_bd)) then + if(recv_array(ie) == 1) then + write(msg, *) "Element ", ie, " has been taken by rank ", rank + call misc_error(msg) + end if + + !Add this element + je = je+1 + !Resize element arrays if needed + if(je > ele_num_lr) call grow_cg_arrays(1) + + !Add new element to arrays + recv_array(ie) = 1 + tag_ele(je) = tag + etype(je) = et + ele_glob_id(je) = id + e_mask(je) = mask + size_ele(je) = esize + basis_num(je) = bnum + basis_type(:, je) = btype + + !check to see if need to resize node arrays + if(jp + ng_node(et) > node_num_lr) call grow_cg_arrays(2) + + if(need_force_pre) then + if (jp+ng_node(et) > size(force_eq_pre,3)) then + allocate(force_array(3, max_basisnum, node_num_lr), stat = allostat) + if(allostat > 0) call alloc_error("Failure to allocate force array in update_nei_cg", allostat) + force_array(:,:,1:size(force_eq_pre,3)) = force_eq_pre + force_array(:,:,size(force_eq_pre,3)+1:) = 0.0_wp + call move_alloc(force_array, force_eq_pre) + end if + end if + !Now assign node arrays + do inod = 1, ng_node(et) + jp = jp + 1 + cg_node(inod, je) = jp + node_cg(jp) = je + r(:,:,jp) = r_nodes(:,:,inod) + if(periodic) pb_node(:, :, jp) = pb_in(:, :, inod) + if(need_vel) vel(:, :, jp) = vel_nodes(:, :, inod) + if(need_force_pre) force_eq_pre(:, :, jp) = force_nodes(:,:,inod) + end do + + if(cg) then + if(jp+ng_node(et) > size(rzeronode,3)) call grow_ele_min_arrays + do inod = 1, ng_node(et) + jp = cg_node(inod, je) + rzeronode(:,:,jp) = r0(:,:,inod) + gnode(:,:,jp) = g(:,:,inod) + hnode(:,:,jp) = h(:,:,inod) + end do + end if + + !If we added an element then we exit the loop over interpolated atoms and move to the next element + exit outloop + end if + end do + end do outloop + end if + end do + + if(je > ele_num_ln) then + reneighbored = .true. + !write(msg, *) "Rank ", rank, " adds ", je-ele_num_ln, " elements" + !call log_msg(msg,1, .true.) + end if + + node_num_l = jp + ele_num_l = je + + !Rebuild itype array now that elements lists have changed + call update_itype + + !Double check that ele_num_l summed isn't smaller than ele_num + call mpi_reduce(ele_num_l, i, 1, mpi_integer, mpi_sum, root, mpi_comm_world, ierr) + if(rank == root) then + if( i < ele_num) then + write(msg, *) "Total number of elements ", i, " should not be smaller than ele_num ", ele_num + call misc_error(msg) + end if + end if + + !Resize tag_ele_shared, pro_shared_num, who_has_ele + allocate(integer_buff(ele_num_l), stat=allostat) + if (allostat>0) call alloc_error("Failure to resize tag_ele_shared in update_nei_cg", allostat) + integer_buff(:) = 0 + call move_alloc(integer_buff, ele_id_shared) + + allocate(integer_buff(ele_num_l), stat=allostat) + if (allostat>0) call alloc_error("Failure to resize pro_shared_num in update_nei_cg", allostat) + integer_buff(:) = 0 + call move_alloc(integer_buff, pro_shared_num) + + if(ele_num_l > size(who_has_ele)) then + allocate(who_has_array(ele_num_l), stat = allostat) + if (allostat > 0) call alloc_error("Failure to allocate who_has_array in update_nei_cg", allostat) + who_has_array(1:size(who_has_ele)) = who_has_ele(:) + who_has_array(size(who_has_ele)+1:) = .false. + call move_alloc(who_has_array, who_has_ele) + end if + + !Update the shared elements + who_ele(:) = 0 + do ie = 1,ele_num_l + ke = ele_glob_id(ie) + who_ele(ke) = 1 + end do + + who_ele_all(:) = 0 + call mpi_allreduce(who_ele, who_ele_all, ele_num, mpi_integer, mpi_sum, mpi_comm_world, ierr) + + if(rank == root) then + if(sum(who_ele_all) < ele_num) then + write(msg, *) "The sum of who_ele_all", sum(who_ele_all), " should not be smaller than ", ele_num + call misc_error(msg) + end if + end if + + allocate(integer_buff(ele_num), stat=allostat) + if(allostat > 0) call alloc_error("Failure to allocate integer_buff in update_nei_cg", allostat) + + integer_buff(:) = 0 + je = 0 + do ie = 1, ele_num + !If who_ele_all(ie) > 1 that means it's a shared element + if(who_ele_all(ie) > 1) then + je = je + 1 + integer_buff(ie) = je + end if + end do + + ele_shared_num = je + ele_id_shared=0 + do ie = 1, ele_num_l + ke = ele_glob_id(ie) + pro_shared_num(ie) = who_ele_all(ie) + je = integer_buff(ke) + if(je /= 0) then + ele_id_shared(ie) = je + end if + end do + + !Now resize the atomap arrays + if(ele_num_lr > size(cg_atomap,2)) then + deallocate(cg_atomap, stat = allostat) + allocate(cg_atomap(atomap_max_ele, ele_num_lr), stat = allostat) + if(allostat > 0) call alloc_error("Failure allocating cg_atomap in update_nei_cg", allostat) + end if + + who_ele(:) = 0 + r_atomap(:,:) = 0.0_wp + type_atomap(:) = 0 + cg_atomap(:,:) = 0 + atomap_num_l = 0 + !Loop over all elements + do ie = 1, ele_num_l + ke = ele_glob_id(ie) + + r_nodes = 0.0_wp + pb_in = 0 + do inod = 1, ng_node(etype(ie)) + ip = cg_node(inod, ie) + r_nodes(:,:,inod) = r(:,:, ip) + if(periodic) pb_in(:,:,inod) = pb_node(:,:,ip) + end do + !Loop over all virtual atoms + do iatom = 1, get_virtual_count(etype(ie), size_ele(ie)) + call interp_atom(iatom, size_ele(ie), etype(ie), pb_in, basis_num(ie), r_nodes, r_interp) + do ibasis = 1, basis_num(ie) + !If it is in our boundaries then add it to the list + if(in_block_bd(r_interp(:,ibasis),pro_bd)) then + atomap_num_l = atomap_num_l + 1 + if (atomap_num_l > atomap_num_lr) call grow_cg_arrays(3) + + who_ele(ke) = who_ele(ke) + 1 + cg_atomap(basis_num(ie)*(iatom-1) + ibasis, ie) = atomap_num_l + r_atomap(:, atomap_num_l) = r_interp(:, ibasis) + type_atomap(atomap_num_l) = basis_type(ibasis,ie) + + !Debug statement to check whether it is in the box at all + else if(.not. in_block_bd(r_interp(:,ibasis), box_bd)) then + print *, "Interpolated atom ", iatom, " of element ", ie, " on rank ", rank, "is outside box boundaries" + print *, rank, ie, cg_node(1, ie), r(:, 1, cg_node(1, ie)) + call mpi_abort(mpi_comm_world, 1, ierr) + + end if + end do + end do + + end do + + !Now check to make sure all the values are correct + if(atomap_num_l > atomap_num_lr) call misc_error("Atomap_num_lr can't be greater than atomap_num_l in update_neigh_cg") + + who_ele_all(:) = 0 + call mpi_allreduce(who_ele, who_ele_all, ele_num, mpi_integer, mpi_sum, world, ierr) + !Check to make sure all the atomap counts are right + do ie = 1, ele_num_l + !If the total counts are greater than the real counts then exit with error + if(who_has_ele(ie)) then + ke = ele_glob_id(ie) + if(who_ele_all(ke) /= basis_num(ie)*get_virtual_count(etype(ie), size_ele(ie))) then + write(msg, *) "Who_ele_all ", who_ele_all(ke), " of global element ", ke, " should be ", & + basis_num(ie)*get_virtual_count(etype(ie), size_ele(ie)) + call misc_error(msg) + end if + end if + end do + + if(rank == root) then + if(sum(who_ele_all) /= atomap_num) then + write(msg, *) "The sum of who_ele_all ", sum(who_ele_all), " should equal atomap_num ", atomap_num + call misc_error(msg) + end if + end if + + call mpi_reduce(atomap_num_l, i, 1, mpi_integer, mpi_sum, root, world, ierr) + if(rank == root) then + if(i /= atomap_num) then + write(msg, *) "Total atomap number ", i, " should equal ", atomap_num + call misc_error(msg) + end if + end if + + !Now assign new tags + recv_counts(:) = 0 + call mpi_allgather(atomap_num_l, 1, mpi_integer, recv_counts, 1, mpi_integer, mpi_comm_world, ierr) + displs(:) = 0 + do irank = 1, pro_num-1 + displs(irank+1) = displs(irank) + recv_counts(irank) + end do + do iatomap = 1, atomap_num_l + tag_atomap(iatomap) = displs(rank+1)+iatomap + end do + + !Resize integration point arrays + if(ele_num_l > size(who_has_intpo, 2)) then + deallocate(who_has_intpo, which_cell_intpo, stat=allostat) + allocate(who_has_intpo(max_intpo_num, ele_num_l), which_cell_intpo(max_intpo_num, ele_num_l), stat = allostat) + if (allostat > 0) call alloc_error("Failure to allocate/deallocate who_has_intpo in update_nei_cg", allostat) + end if + + !update integration points + call update_intpo + return +end subroutine update_neighbor_cg + +subroutine update_neighbor_at(arg1) + !Transfer atoms between processors + logical, intent(in), optional :: arg1 + + logical :: cg + integer :: i, ix, iy, iz, ia, ja, send_n, send_n_sum, & + seg_num_real, irank, atom_num_r, atom_num_ln, n_ints, n_reals, iint, ireal, & + tag, typ, mask, n_cg, icg + real(kind=wp) :: rtemp(3), veltemp(3), forcetemp(3) + integer, dimension(3) :: index_array + integer, dimension(pro_num) :: displs, recv_counts + real(kind = wp), dimension(3) :: r_in, vel_in, force_in, r0, g, h + integer, allocatable :: send_buff(:), send_array(:), & + recv_array(:), & + send_ints(:), recv_ints(:) + real(kind = wp), allocatable :: send_reals(:), recv_reals(:), force_array(:,:), send_cg(:), recv_cg(:) + character(len=read_len) :: msg + +! update +! r_atom, tag_atom, grain_atom +! vel_atom (when vel_now.eqv..true.) +! force_atom_pre (when force_pre_now.eqv..true.) +! group_atom (when group_num /= 0) + + if(present(arg1)) then + cg = arg1 + else + cg = .false. + end if + + allocate(send_buff(seg_num), stat = allostat) + if(allostat /= 0) call alloc_error("Failure to allocate send_buff in update_nei_at", allostat) + + send_n = 0 + send_buff(:) = 0 + seg_num_real = seg_num + + !Figure out which atoms need to be sent + do ia = 1, atom_num_l + r_in(:) = r_atom(:, ia) + if(.not.in_block_bd(r_in, pro_bd)) then + + send_n = send_n + 1 + if(send_n > seg_num_real) then + + allocate(send_array(seg_num_real+seg_num), stat = allostat) + if(allostat /= 0) call alloc_error("Failure to allocate send_array ", allostat) + + send_array(1:seg_num_real) = send_buff(:) + send_array(seg_num_real+1:) = 0 + call move_alloc(send_array, send_buff) + + seg_num_real = seg_num_real + seg_num + end if + send_buff(send_n) = ia + end if + end do + + call mpi_allreduce(send_n, send_n_sum, 1, mpi_integer, mpi_sum, mpi_comm_world, ierr) + + !Calculate size of data + n_ints = 3 + n_reals = 3 + if(need_vel) n_reals = n_reals + 3 + if(need_force_pre) n_reals = n_reals + 3 + + !Allocate send and recv arrays + allocate(send_ints(n_ints*send_n), recv_ints(n_ints*send_n_sum), & + send_reals(n_reals*send_n), recv_reals(n_reals*send_n_sum), & + recv_array(send_n_sum), stat = allostat) + send_ints(:) = 0 + recv_ints(:) = 0 + send_reals(:) = 0.0_wp + recv_reals(:) = 0.0_wp + + !If running cg then allocate cg data arrays + if(cg) then + n_cg = 9 + allocate(send_cg(n_cg*send_n), recv_cg(n_cg*send_n_sum)) + send_cg = 0 + recv_cg=0 + end if + + !Pack atoms + do ia = 1, send_n + ja = send_buff(ia) + if(need_vel) vel_in = vel_atom(:,ja) + if(need_force_pre) force_in = force_atom_pre(:,ja) + + iint = n_ints*(ia-1)+1 + ireal = n_reals*(ia-1)+1 + call pack_atom_neighbor(tag_atom(ja), type_atom(ja), a_mask(ja), r_atom(:,ja), vel_in, force_in, & + send_ints(iint:iint+n_ints-1), send_reals(ireal:ireal+n_reals-1)) + if(cg) then + icg = n_cg*(ia-1)+1 + call pack_atom_cg(rzeroatom(:,ja), gatom(:, ja), hatom(:,ja), send_cg(icg:icg+n_cg-1)) + end if + end do + + !If one proc then the send arrays are equal to the recv arrays + if(pro_num == 1) then + recv_ints = send_ints + recv_reals = send_reals + if(cg) recv_cg=send_cg + !Otherwise we have to mpi_allgatherv the data + else + + recv_counts(:) = 0 + call mpi_allgather(send_n, 1, mpi_integer, recv_counts, 1, mpi_integer, mpi_comm_world, ierr) + + !Get displacement and offset for integer data + displs(:) = 0 + do irank = 1, pro_num-1 + displs(irank+1) = displs(irank) + n_ints*recv_counts(irank) + end do + !allgatherv ints + call mpi_allgatherv(send_ints, n_ints*send_n, mpi_integer, & + recv_ints, n_ints*recv_counts, displs, & + mpi_integer, mpi_comm_world, ierr) + + !Get displacement and offset for real data + displs(:) = 0 + do irank = 1, pro_num-1 + displs(irank+1) = displs(irank) + n_reals*recv_counts(irank) + end do + + !allgatherv reals + call mpi_allgatherv(send_reals, n_reals*send_n, mpi_wp, & + recv_reals, n_reals*recv_counts, displs, & + mpi_wp, mpi_comm_world, ierr) + if(cg) then + !Get displacement and offset for cg data + displs(:) = 0 + do irank = 1, pro_num-1 + displs(irank+1) = displs(irank) + n_cg*recv_counts(irank) + end do + + !allgatherv reals + call mpi_allgatherv(send_cg, n_cg*send_n, mpi_wp, & + recv_cg, n_cg*recv_counts, displs, & + mpi_wp, mpi_comm_world, ierr) + end if + + end if + + !delete send_buff from the original array, + !move the end of the array to fill the blank + atom_num_ln = atom_num_l + do ia = send_n, 1, -1 + + ja = send_buff(ia) + + if(ja < atom_num_ln) then + + tag_atom(ja) = tag_atom(atom_num_ln) + type_atom(ja) = type_atom(atom_num_ln) + a_mask(ja) = a_mask(atom_num_ln) + r_atom(:, ja) = r_atom(:, atom_num_ln) + if(need_vel) vel_atom(:, ja) = vel_atom(:, atom_num_ln) + if(need_force_pre) force_atom_pre(:, ja) = force_atom_pre(:, atom_num_ln) + + if(cg) then + rzeroatom(:,ja) = rzeroatom(:,atom_num_ln) + gatom(:,ja) = gatom(:, atom_num_ln) + hatom(:,ja) = hatom(:, atom_num_ln) + end if + + else if(ja > atom_num_ln) then + + print *, 'Error: ja', ja, ' should not be larger than', & + ' atom_num_ln', atom_num_ln + call mpi_abort(mpi_comm_world, 1, ierr) + + end if + atom_num_ln = atom_num_ln - 1 + end do + + if(atom_num_ln /= atom_num_l) then + reneighbored = .true. + end if + + ! Now add new atoms to the end of the array + ja = atom_num_ln + atom_num_r = atom_num_l + do ia = 1, send_n_sum + ireal = n_reals*(ia-1)+1 + iint = n_ints*(ia-1)+1 + !Unpack the current atom + call unpack_atom_neighbor(recv_ints(iint:iint+n_ints-1), recv_reals(ireal:ireal+n_reals-1), tag, typ, mask, rtemp, & + veltemp, forcetemp) + if(cg) then + icg = n_cg*(ia-1)+1 + call unpack_atom_cg(recv_cg(icg:icg+n_cg-1), r0, g, h) + end if + do ix = 0, 2 + do iy = 0, 2 + inloop: do iz = 0, 2 + + index_array(:) = [ ix, iy, iz ] + do i = 1, 3 + if(index_array(i) == 0) then + r_in(i) = rtemp(i) + + else if(period(i).eqv..true.) then + r_in(i) = rtemp(i) + (-1) ** index_array(i) * box_length(i) + + else + exit inloop + end if + end do + + !Add the atom if needed + if(in_block_bd(r_in, pro_bd)) then + recv_array(ia) = 1 + + ja = ja + 1 + !Resize arrays if needed + if (ja > atom_num_lr) call grow_at_arrays + + if(need_force_pre) then + if(ja > size(force_atom_pre,2)) then + allocate(force_array(3,atom_num_lr), stat = allostat) + if (allostat > 0) call alloc_error("Failure to allocate force array in update_neighbor_at", & + allostat) + force_array(:,1:ja-1) = force_atom_pre(:,:) + force_array(:,ja:) = 0.0_wp + call move_alloc(force_array, force_atom_pre) + end if + end if + tag_atom(ja) = tag + type_atom(ja) = typ + a_mask(ja) = mask + r_atom(:,ja) = r_in + if(need_vel) vel_atom(:,ja) = veltemp + if(need_force_pre) then + force_atom_pre(:,ja) = forcetemp + end if + + if(cg) then + if(ja > size(gatom,2)) call grow_at_min_arrays + rzeroatom(:,ja) = r0 + gatom(:,ja) = g + hatom(:, ja) = h + end if + end if + end do inloop + end do + end do + end do + + if(ja > atom_num_ln) then + reneighbored = .true. + !write(msg, *)'Rank', rank, ' adds', ja-atom_num_ln, ' atoms' + !call log_msg(msg, 1, .true.) + end if + atom_num_l = ja + + !debug + if(atom_num_l > atom_num_lr) then + print *, 'Rank', rank, ' has atom_num_l', atom_num_l, & + ' which is larger than atom_num_lr', atom_num_lr + call mpi_abort(mpi_comm_world, 1, ierr) + end if + + !debug + call mpi_reduce(atom_num_l, i, 1, mpi_integer, mpi_sum, root, mpi_comm_world, ierr) + if(rank == root) then + if(i /= atom_num) then + write(msg, *) 'Total number of atoms', i, ' should equal atom_num', atom_num + call misc_error(msg) + end if + end if + + return +end subroutine update_neighbor_at + +subroutine get_centroid(centroidall) + !This subroutine gets the average position of all virtual and real atoms + real(kind=wp), intent(out) :: centroidall(3) + integer :: ie, inod, ip, ia, ibasis, atomp_num, eanum + real(kind=wp) :: centroidme(3) + + centroidme(:) = 0.0_wp + centroidall(:) = 0.0_wp + atomp_num = atom_num + atomap_num + !Get center of element positions + if(ele_num > 0) then + do ie = 1, ele_num_l + if (who_has_ele(ie)) then + select case(etype(ie)) + case(1,2) + eanum = basis_num(ie)*(size_ele(ie)+1)**3 + end select + do inod = 1, ng_node(etype(ie)) + ip = cg_node(inod,ie) + do ibasis = 1, basis_num(ie) + centroidme(:) = centroidme + r(:,ibasis,ip)*eanum/(ng_node(etype(ie))*atomp_num) + end do + end do + end if + end do + end if + + !Get atomistic centroid + if(atom_num > 0) then + do ia = 1, atom_num_l + centroidme(:) = centroidme(:) + r_atom(:, ia)/atomp_num + end do + end if + + !Sum this over all processors + call mpi_allreduce(centroidme, centroidall, 3, mpi_wp, mpi_sum, world, ierr) + + return +end subroutine get_centroid + +subroutine pack_ele_neighbor(send_etype, send_tag_ele, send_id, send_esize, mask, send_basis_num, send_basis_type, & + send_r_nodes, send_pb_nodes, send_vel_nodes, send_force_nodes, send_int,send_real) + !This subroutine packs all data for one element into 2 arrays, a send_int array and a send_real array + !containing all information needed for elements + + integer, intent(in) :: send_etype, send_tag_ele, send_esize, send_id, mask, send_basis_num, send_basis_type(max_basisnum),& + send_pb_nodes(3,max_basisnum,ng_max_node) + real(kind=wp), dimension(3, max_basisnum, ng_max_node), intent(in) :: send_r_nodes, send_vel_nodes, send_force_nodes + integer, dimension(:), intent(out) :: send_int + real(kind=wp), dimension(:), intent(out) :: send_real + + integer i, j, inod, ibasis + + logical :: pack_vel + pack_vel=.false. + + !Initialize variables + send_int(:) = 0 + send_real(:) = 0.0_wp + + !Calculate send counts + !First pack the send_int variable + send_int(1) = send_etype + send_int(2) = send_tag_ele + send_int(3) = send_id + send_int(4) = send_esize + send_int(5) = mask + send_int(6) = send_basis_num + j = 6+send_basis_num + send_int(7:j) = send_basis_type(1:send_basis_num) + if(periodic) then + do inod = 1, ng_node(send_etype) + do ibasis = 1, send_basis_num + do i = 1, 3 + j = j+1 + send_int(j) = send_pb_nodes(i, ibasis, inod) + end do + end do + end do + end if + !Now pack the send_real variable + j = 1 + do inod = 1, ng_node(send_etype) + do ibasis = 1, send_basis_num + do i =1, 3 + send_real(j) = send_r_nodes(i, ibasis, inod) + j = j+1 + if (need_vel) then + send_real(j) = send_vel_nodes(i, ibasis,inod) + j = j+1 + end if + if(need_force_pre) then + send_real(j) = send_force_nodes(i, ibasis, inod) + j=j+1 + end if + end do + end do + end do + + return +end subroutine pack_ele_neighbor + +subroutine unpack_ele_neighbor(recv_int, recv_real, recv_etype, recv_tag_ele, recv_id, recv_esize, mask, recv_basis_num, & + recv_basis_type, recv_pb_node, recv_r_nodes, recv_vel_nodes, recv_force_nodes) + !This subroutine unpacks the arrays that are communicated + + integer, dimension(:), intent(in) :: recv_int + real(kind=wp), dimension(:), intent(in) :: recv_real + + integer, intent(out) :: recv_etype, recv_tag_ele, recv_id, recv_esize, mask, recv_basis_num, recv_basis_type(max_basisnum),& + recv_pb_node(3,max_basisnum, ng_max_node) + real(kind=wp), dimension(3, max_basisnum, ng_max_node), intent(out) :: recv_r_nodes, recv_vel_nodes, recv_force_nodes + + + integer i, j, inod, ibasis + + recv_basis_type(:) = 0 + recv_pb_node(:,:,:) = 0 + recv_r_nodes(:,:,:) = 0.0_wp + recv_vel_nodes(:,:,:) = 0.0_wp + + !First pack the recv_int variable + recv_etype = recv_int(1) + recv_tag_ele = recv_int(2) + recv_id = recv_int(3) + recv_esize = recv_int(4) + mask = recv_int(5) + recv_basis_num = recv_int(6) + j = 6+recv_basis_num + recv_basis_type(1:recv_basis_num) = recv_int(7:j) + if(periodic) then + do inod = 1, ng_node(recv_etype) + do ibasis = 1, recv_basis_num + do i = 1, 3 + j = j+1 + recv_pb_node(i, ibasis, inod) = recv_int(j) + end do + end do + end do + end if + !Now pack the recv_real variable + j = 1 + do inod = 1,ng_node(recv_etype) + do ibasis = 1, recv_basis_num + do i =1, 3 + recv_r_nodes(i, ibasis, inod) = recv_real(j) + j = j+1 + if(need_vel) then + recv_vel_nodes(i, ibasis,inod) = recv_real(j) + j = j+1 + end if + if(need_force_pre) then + recv_force_nodes(i, ibasis, inod) = recv_real(j) + j = j+1 + end if + end do + end do + end do + + return +end subroutine unpack_ele_neighbor + +subroutine pack_atom_neighbor(tag_buff, type_buff, mask, r_buff, vel_buff, force_buff, send_int, send_real) + !This subroutine packs the atom information into send_int and send_real + integer, intent(in) :: tag_buff, type_buff, mask + real(kind=wp), dimension(3), intent(in) :: r_buff + real(kind=wp), intent(in), optional :: vel_buff(3) + real(kind=wp), intent(in), optional :: force_buff(3) + integer, dimension(:), intent(out) :: send_int + real(kind=wp), dimension(:), intent(out) :: send_real + + integer :: i, k + + !Check to make sure vel_buff is provided if need_vel + !First pack send_int + send_int(1) = tag_buff + send_int(2) = type_buff + send_int(3) = mask + + !Now pack send_real + k = 1 + do i = 1,3 + send_real(k) = r_buff(i) + k=k+1 + if(need_vel) then + send_real(k) = vel_buff(i) + k=k+1 + end if + if(need_force_pre) then + send_real(k) = force_buff(i) + k = k + 1 + end if + end do + + return + +end subroutine pack_atom_neighbor + +subroutine unpack_atom_neighbor(recv_int, recv_real, tag_buff, type_buff, mask, r_buff, vel_buff, force_buff ) + !This subroutine packs the atom information into recv_int and recv_real + integer, dimension(:), intent(in) :: recv_int + real(kind=wp), dimension(:), intent(in) :: recv_real + integer, intent(out) :: tag_buff, type_buff, mask + real(kind=wp), dimension(3), intent(out) :: r_buff + real(kind=wp), intent(out) :: vel_buff(3) + real(kind=wp), intent(out) :: force_buff(3) + + integer :: i, k + + !First unpack recv_int + tag_buff = recv_int(1) + type_buff= recv_int(2) + mask = recv_int(3) + + !Now unpack recv_real + k = 1 + do i = 1,3 + r_buff(i) = recv_real(k) + k=k+1 + if(need_vel) then + vel_buff(i) = recv_real(k) + k=k+1 + end if + if(need_force_pre) then + force_buff(i) = recv_real(k) + k = k + 1 + end if + end do + return +end subroutine unpack_atom_neighbor + + +subroutine log_neighbor_info + character(len=read_len) :: msg + + write(msg, *) "Neighbor list was built ", builds, " times in last run" + call log_msg(msg) + builds = 0 +end subroutine log_neighbor_info + +end module neighbors diff --git a/src/parameters.f90 b/src/parameters.f90 new file mode 100644 index 0000000..eacc787 --- /dev/null +++ b/src/parameters.f90 @@ -0,0 +1,50 @@ +module parameters + + implicit none + + !Precision defining parameters + integer, parameter :: dp = selected_real_kind(15, 307), & ! double real + qp = selected_real_kind(33, 4931), & ! quadrupole real + wp = dp + integer, parameter :: read_len = 500 !Read 500 characters for every input command + integer :: error + + !Integers for which hold error codes + integer, save :: allostat, deallostat, ierr + + !Chunk size for arrays + integer, parameter :: seg_num = 1024 + + !Various parameters used for tolerance checks + integer, save :: lim_large_int = huge(1) + real(kind = wp), parameter :: lim_zero = epsilon(1.0_wp), lim_small = epsilon(1.0), lim_large = huge(1.0) + real(kind=dp), parameter :: zerotol=1E-8 + + !constant in physics + real(kind = wp), parameter :: & + boltzmann = 8.6173324e-5_wp, & + avogadro = 6.02214129e23_wp, & + electron_volt = 1.602176565e-19_wp, & + amu = 1.6605402e-27_wp, & + pi = 3.14159265358979323846_wp, & + !nktv2p is a conversion that lammps uses to get virial stress to atm + nktv2p = 1602176.5_wp, & + !ftm2v is another conversion factor from lammps used in fire + ftm2v = 1.0_wp/1.0364269d-4 + + + !constant in motion equation and kinetic energy calculation, conversion from mv^2 to energy + real(kind = wp), parameter :: const_motion = 0.00010364269_wp + + !Parameters for simulator + character(len = 20), save :: simulator + logical,save :: need_virial = .false., first_run + + !Cutoff radiuses + real(kind=wp), save :: rc_off, rc_min, rc_neigh, rc_sq + + !Comm variables + integer, parameter :: root = 0 + integer :: rank + +end module parameters diff --git a/src/potential.f90 b/src/potential.f90 new file mode 100644 index 0000000..977d176 --- /dev/null +++ b/src/potential.f90 @@ -0,0 +1,217 @@ +module potential + + use parameters + use eam + use morse + use forces + use comms + use str + use atom_types + use force_mod + + implicit none + + !This code handles controlling which potentials are called and processes potential input + integer :: pnum, enum, fnum + + !Spline arrays + public + contains + + subroutine potential_defaults + potential_types=.false. + rdr = 0.0_wp + rdrho = 0.0_wp + nr = 0.0_wp + nrho = 0.0_wp + pnum = 0 + enum = 0 + fnum = 0 + type_to_pair = 0 + type_to_dens = 0 + pot_map = 0 + end subroutine potential_defaults + + subroutine parse_potential(line) + character(len = *), intent(in) :: line + character(len = 100) :: tmptxt, tmptxt2 + character(len = 100) potential_type + character(len = read_len) :: filename + character(len=2), dimension(max_atom_types) :: names + integer :: iospara, i, j, mn, itype, jtype, lim + real(kind=wp) :: ind0, inalpha, inr0,cutoff + + real(kind=wp), allocatable :: mpair(:) + + + read(line, *, iostat = iospara) tmptxt, potential_type + select case(potential_type) + case('eam','fs') + + potential_types(1)=.true. + + read(line, *, iostat = iospara) tmptxt, potential_type, filename + if(iospara> 0) call read_error("Error parsing potential command", iospara) + !Now if we have more than 2 tokens we read them in as atom types unless we have already set the atom types + j = tok_count(line)-3 + if ((j > 0).and.(.not.atom_types_set)) then + read(line, *, iostat = iospara) tmptxt, potential_type, filename, (names(i), i = 1, j) + + !and pass it to atom_types + call set_atom_types(j, names) + end if + + if(potential_type == "eam") then + call eam_lammps(filename, 1) + else if(potential_type == "fs") then + call eam_lammps(filename, 1, .true.) + end if + + call eamarray2spline + + !Set cutoff radiuses + rc_off = min(d_finish, p_finish) + rc_sq = rc_off**2 + rc_min = max(d_start, p_start, lim_small) + if (.not.def_nei) then + def_nei = .true. + rc_neigh = rc_off + end if + + call set_eam_map_arrays + + case('morse') + potential_types(2)=.true. + !atom types have to be set before morse potential is called, it usually needs to be set with the types command + + if(.not.atom_types_set) call misc_error("Atom types need to be set using types command before morse is used") + + read(line, *) tmptxt, tmptxt2, itype, jtype, ind0, inalpha, inr0, cutoff + call add_morse_potential(itype, jtype, ind0, inalpha, inr0, cutoff) + + case default + call command_error(trim(adjustl(potential_type))//" is not an acceptable potential type") + end select + + !Log the atom types + !call log_types + + + end subroutine parse_potential + + subroutine alloc_potential_arrays + !Initialize potential arrays + if (potential_types(1)) then + call alloc_eam_arrays + end if + end subroutine alloc_potential_arrays + + subroutine update_force + + real(kind=wp) :: t_start, t_end + + call pre_force + t_start = mpi_wtime() + + !First zero arrays + if(ele_num > 0) then + if(allocated(force)) then + if(node_num_l > size(force, 3)) then + deallocate(force, energy) + allocate(force(3, max_basisnum, node_num_l), energy(max_basisnum, node_num_l), stat = allostat) + if(allostat /= 0) call alloc_error("Failure to allocate force/energy", allostat) + if(need_virial) then + deallocate(virial) + allocate(virial(3, 3, max_basisnum, node_num_l), stat = allostat) + if(allostat /= 0) call alloc_error("Failure to allocate virial", allostat) + end if + end if + else + allocate(force(3, max_basisnum, node_num_l), energy(max_basisnum, node_num_l), stat = allostat) + if(allostat /= 0) call alloc_error("Failure to allocate force/energy", allostat) + if(need_virial) then + allocate(virial(3, 3, max_basisnum, node_num_l), stat = allostat) + if(allostat /= 0) call alloc_error("Failure to allocate virial", allostat) + end if + end if + + !Initialize force and energy + force(:, :, :) = 0.0_wp + energy(:, :) = 0.0_wp + !Allocate virial if needed + if(need_virial) then + virial(:, :, :, :) = 0.0_wp + end if + + end if + + if(atom_num > 0) then + force_atom=0.0_wp + energy_atom=0.0_wp + if(need_virial) then + virial_atom=0.0_wp + end if + end if + + if(potential_types(1)) call update_force_eam + + if(potential_types(2)) call update_force_morse + + !Now call update_equiv to update equivalent node value arrays if there are finite elements + if(ele_num > 0) call update_equiv + t_end = mpi_wtime() + + walltime(2) = walltime(2) + (t_end-t_start) + call post_force + end subroutine update_force + + subroutine pre_force + return + end subroutine pre_force + + subroutine post_force + if(set_force_num > 0) call run_set_force + if((add_force_num > 0)) call add_force + end subroutine post_force + +! subroutine pairtab(line) +! !This subroutine reads in tabulated morse potential files +! character(len=*), intent(in) :: line +! +! integer :: i, itype, jtype, mn +! real(kind=wp) :: mdr +! real(kind=wp), allocatable :: mpair(:) +! character(len = read_len) :: tmptxt, filename +! +! read(line, *) tmptxt, itype, jtype, filename +! open(unit=21, file=trim(adjustl(filename)), status='old', action='read', position='rewind') +! !First read the step size and number of units, these start from the first step +! +! read(21, *) mn, mdr +! +! !Now check to make sure the morse step size is equivalent to the eam step size if the eam has already been defined +! if(rdr > 0.0_wp) then +! if(.not.is_equal(1/mdr, rdr)) call misc_error("Step size for morse potential must match step size for eam potential") +! else +! rdr = 0.0 +! end if +! +! !Now read in the potential table +! allocate(mpair(mn)) +! do i = 1, mn +! read(21,*) mpair(i) +! end do +! +! !Now add the pair potential to the spline arrays +! call spline_arrays(mn,mn,mn,1,1,1) +! ! +! embed_spline(:,:, size(embed_spline,3)) = 0.0_wp +! edens_spline(:,:, size(edens_spline,3)) = 0.0_wp +! +! call interpolate(mn, mdr, mpair, pair_spline(:,:,size(pair_spline,3))) +! +! return +! +! end subroutine pairtab + +end module potential diff --git a/src/quenched_dynamics.f90 b/src/quenched_dynamics.f90 new file mode 100644 index 0000000..0589b46 --- /dev/null +++ b/src/quenched_dynamics.f90 @@ -0,0 +1,110 @@ +module quenched_dynamics + + use parameters + use comms + use elements + use neighbors + use potential + use time + use vel_verlet + + implicit none + public + contains + + subroutine qd(i) + integer, intent(in) :: i + + call update_r + call update_neighbor(i) + call update_force + call quenched_vel + end subroutine qd + + subroutine quenched_vel + integer :: ia, ie, inod, ip, ibasis + real(kind = wp) :: pro_vel_p, pro_force_normsq, force_normsq, force_norm, vel_p + + + pro_vel_p = 0.0_wp + pro_force_normsq = 0.0_wp + + !coarse-grained domain + if(ele_num /= 0) then + do ie = 1, ele_num_l + do inod = 1, ng_node(etype(ie)) + ip = cg_node(inod, ie) + do ibasis = 1, basis_num(ie) + vel(:, ibasis, ip) = vel(:, ibasis, ip) & + + 0.5_wp * (force_eq_pre(:, ibasis, ip) & + + force_eq(:, ibasis, ip)) / masses(basis_type(ibasis,ie)) & + * time_step / const_motion + + if(who_has_ele(ie).eqv..true.) then + pro_vel_p = pro_vel_p + dot_product(vel(:, ibasis, ip), force_eq(:, ibasis, ip)) + pro_force_normsq = pro_force_normsq + dot_product(force_eq(:, ibasis, ip), force_eq(:, ibasis, ip)) + end if + + end do + + end do + + end do + + end if + + !atomistic domain + if(atom_num /= 0) then + do ia = 1, atom_num_l + vel_atom(:, ia) = vel_atom(:, ia) & + + 0.5_wp * (force_atom_pre(:, ia) & + + force_atom(:, ia)) / masses(type_atom(ia)) & + * time_step / const_motion + + pro_vel_p = pro_vel_p + dot_product(vel_atom(:, ia), force_atom(:, ia)) + pro_force_normsq = pro_force_normsq + dot_product(force_atom(:, ia), force_atom(:, ia)) + + end do + + end if + + !sum pro_vel_p and pro_force_normsq up to get vel_p and force_normsq + if(pro_num == 1) then + vel_p = pro_vel_p + force_normsq = pro_force_normsq + + else + call mpi_allreduce(pro_vel_p, vel_p, 1, mpi_wp, mpi_sum, world, ierr) + call mpi_allreduce(pro_force_normsq, force_normsq, 1, mpi_wp, mpi_sum, world, ierr) + + end if + + force_norm = sqrt(force_normsq) +! decide if the nodes/atoms should be freezed +! coarse-grained domain + + if(ele_num /= 0) then + do ip = 1, node_num_l + if((vel_p < 0.0_wp).or. (abs(vel_p) < lim_zero)) then + vel(:, :, ip) = 0.0_wp + else + !only use the component of the velocity parallel to the force + vel(:, :, ip) = vel_p * force_eq(:, :, ip) / force_normsq + end if + end do + end if + + !atomistic domain + if(atom_num /= 0) then + do ia = 1, atom_num_l + if((vel_p < 0.0_wp).or. (abs(vel_p) < lim_zero)) then + vel_atom(:, ia) = 0.0_wp + else + !only use the component of the velocity parallel to the force + vel_atom(:, ia) = vel_p * force_atom(:, ia) / force_normsq + end if + end do + end if + end subroutine quenched_vel + +end module quenched_dynamics diff --git a/src/read_data.f90 b/src/read_data.f90 new file mode 100644 index 0000000..f8f0d81 --- /dev/null +++ b/src/read_data.f90 @@ -0,0 +1,339 @@ +module read_data + ! This code is in charge of all the reading and writing + use mpi + use math + use parameters + use elements + use box + use integration + use potential + use dynamics + use comms + use logger + use str + use time + + + public + contains + + subroutine parse_read(line) + character(len = *), intent(in) :: line + character(len=read_len) :: tmptxt, filename + integer :: iospara + + read(line, *, iostat = iospara) tmptxt, filename + if(iospara > 0) call read_error(" Invalid read of read_data command",iospara) + + if(rank == root) then + write(tmptxt,*) "Reading data from ", trim(adjustl(filename)) + call log_msg(tmptxt) + end if + call read_model(filename) + end subroutine + + subroutine read_model(filename) + !This subroutine is in charge of reading the model and distributing the model to the different processors + character(len=read_len), intent(in) :: filename + character(len=read_len) :: msg + integer :: numbers(8), i, j, k + real(kind=wp) :: box_mat_bcast(9) + + numbers(:) = 0 + if (rank == root) then + !First read the file if root and broadcast numbers, currently only accepts restart files + call read_restart(filename) + if (ele_num > 0) atomap_max = max_basisnum*(maxval(size_ele_scatter)+1)**3 + + numbers(1) = ele_num + numbers(2) = node_num + numbers(3) = atom_num + numbers(4) = atomap_num + numbers(5) = ng_max_node + numbers(6) = max_basisnum + numbers(7) = atomap_max + numbers(8) = iter + call mpi_bcast(numbers, 8, mpi_integer, root, world, ierr) + write(msg, *) "Read ", ele_num, " elements with ", node_num, " nodes and read ", atom_num, " atoms" + call log_msg(msg) + else + !If not root then wait to recieve files and then initialize numbers + call mpi_bcast(numbers, 8, mpi_integer, root, world, ierr) + ele_num = numbers(1) + node_num = numbers(2) + atom_num = numbers(3) + atomap_num = numbers(4) + ng_max_node = numbers(5) + max_basisnum= numbers(6) + atomap_max = numbers(7) + iter = numbers(8) + + end if + + !Communicate masses + call mpi_bcast(masses, max_atom_types, mpi_wp, root, world, ierr) + !Now we initialize the local variable nums + ele_num_l = nint((real(ele_num,wp) / pro_num) * 1.2_wp) + node_num_l = nint((real(node_num,wp) / pro_num) * 1.2_wp) + atom_num_l = nint((real(atom_num,wp) / pro_num) * 1.2_wp) + atomap_num_l = nint((real(atomap_num,wp) / pro_num) * 1.2_wp) + ele_num_lr = ele_num_l + node_num_lr = node_num_l + atom_num_lr = atom_num_l + atomap_num_lr = atomap_num_l + !Now get the largest number of atoms per element + + !Now broadcast and record box information + call mpi_bcast(box_bd, 6, mpi_wp, root, world, ierr) + !Calculate box length + do i = 1, 3 + box_length(i) = box_bd(2*i) - box_bd(2*i-1) + end do + + !Now set the original box bounds + orig_box_bd = box_bd + orig_box_length = box_length + + !Now broadcast box_bcs + call mpi_bcast(boundary, 3, mpi_character, root, world, ierr) + !Define logical periodic boundary variables + do i = 1,3 + if (boundary(i:i) == 'p') then + period(i) = .true. + else + period(i) = .false. + end if + end do + periodic = any(period) + + !Now prepare box_mat buffer and broadcast + if(rank == root) then + k = 1 + do i = 1,3 + do j = 1, 3 + box_mat_bcast(k) = box_mat(i,j) + k = k+1 + end do + end do + end if + call mpi_bcast(box_mat_bcast, 9, mpi_wp, root, world, ierr) + if(rank /= root) then + k = 1 + do i = 1,3 + do j = 1, 3 + box_mat(i,j) = box_mat_bcast(k) + k = k+1 + end do + end do + end if + + !Now initialize original box_mat + box_mat_ori = box_mat + + !Now communicate the velocity flag + call mpi_bcast(need_vel, 1, mpi_logical, root, world, ierr) + + !Now scatter the models + call divide_domain + + if(ele_num > 0) then + call alloc_cg_arrays + call scatter_cg + call init_integration + end if + + if (atom_num > 0) then + call alloc_at_arrays + call scatter_at + end if + + !Init all group + call init_group_all + end subroutine read_model + + subroutine read_restart(filename) + !First we read the restart file headers + character(len=read_len), intent(in) :: filename + integer :: iospara, i, j, ip, ib, inod, ibasis, bnum + character(len=2) :: paraline + character(len=read_len) :: line, label + character(len=100) :: ioerror, msg + logical :: read_vel_cg, read_vel_at + + etype_present(:) = .false. + + !open the file for reading + open(1, file = trim(adjustl(filename)), status='old', action='read', position='rewind') + + !Read the time + read(1, '(a)', iostat=iospara) paraline + read(1, *, iostat=iospara, iomsg=ioerror) iter, t + if(iospara>0) call read_error(ioerror, iospara) + + !Read element number + read(1, '(a)', iostat=iosline) paraline + read(1, *, iostat=iospara) ele_num + if(iospara>0) call read_error("Invalid read of element_num ", iospara) + + !Read max interpolated atoms per element + read(1, '(a)', iostat=iosline) paraline + read(1, *, iostat=iospara) atomap_max_ele + if(iospara>0) call read_error("Invalid read of max interpo", iospara) + + !Read node number + read(1, '(a)', iostat=iosline) paraline + read(1, *, iostat=iospara) node_num + if(iospara>0) call read_error("Invalid read of node_num ", iospara) + + !Read max nodes per element and basisnum + read(1, '(a)', iostat=iosline) paraline + read(1, *, iostat=iospara) ng_max_node, max_basisnum + if(iospara>0) call read_error("Invalid read of ng_max_node/max_basisnum ", iospara) + + !Read number of atoms + read(1, '(a)', iostat=iosline) paraline + read(1, *, iostat=iospara) atom_num + if(iospara>0) call read_error("Invalid read of atom_num", iospara) + + !Now read the atom type masses and map them to the right potential + read(1, '(a)', iostat=iosline) paraline + read(1, '(a)', iostat=iosline) line + if(iospara>0) call read_error("Invalid read of atom_types ", iospara) + j = tok_count(line) + read(line, *, iostat=iospara) (masses(i), i = 1, j) + + !Now read the box boundary definition + read(1, '(a)', iostat=iosline) paraline + read(1, *, iostat=iospara) boundary + if(iospara>0) then + print *, "Error: Invalid read of boundary with error code", iospara + call mpi_abort(mpi_comm_world, 1, ierr) + end if + + !Read box_bd + box_bd(:) = 0.0_wp + read(1, '(a)', iostat=iosline) paraline + read(1, *, iostat=iospara) box_bd(1:6) + if(iospara>0) then + print *, "Error: Invalid read of box_bd with error code", iospara + print *, "Last line read was: ", paraline + call mpi_abort(mpi_comm_world, 1, ierr) + end if + + !Read box matrix + box_mat(:,:)=0.0_wp + read(1, '(a)', iostat=iosline) paraline + do i=1,3 + read( 1, *, iostat=iospara) box_mat(1:3, i) + if(iospara>0) then + print *, "Error: Invalid read of box_mat with error code", iospara + call mpi_abort(mpi_comm_world, 1, ierr) + end if + end do + + + read_vel_cg=.false. + read_vel_at=.false. + ! Now read the cg part of the restart file + if(ele_num > 0) then + !Initialize some coarse-grained variables and allocate scatter arrays + call scatter_cg_array + + atomap_num = 0 + !Read info lines + read(1, '(a)', iostat=iospara) line + read(1, '(a)', iostat=iospara) line + + !Figure out if velocity information is included in this restart file + read(1, '(a)', iostat=iospara) line + read(line, *, iostat=iospara) (label, i =1 ,7) + if(trim(adjustl(label)) == 'velx') then + read_vel_cg = .true. + end if + + !Now start reading the element information + do i=1,ele_num + read(1, '(a)', iostat=iospara) line + read(line, *, iostat=iospara, iomsg=msg) tag_ele_scatter(i), bnum, etype_scatter(i), size_ele_scatter(i) + if(iospara>0) call read_error(msg, iospara) + + !Set the element_type to true + etype_present(etype_scatter(i)) = .true. + + !Now read the nodes + do inod = 1, ng_node(etype_scatter(i)) + do ibasis =1 , bnum + if(read_vel_cg) then + read(1, *, iostat=iospara) ip,ib,basis_type_scatter(ib, i),r_scatter(:, ib, ip),vel_scatter(:, ib, ip) + if(iospara>0) then + print *, "Error: Invalid read of node information with error code", iospara + call mpi_abort(mpi_comm_world, 1, ierr) + end if + else + read(1, *, iostat=iospara) ip, ib, basis_type_scatter(ib,i), r_scatter(:, ib, ip) + if(iospara>0) then + print *, "Error: Invalid read of node information with error code", iospara + call mpi_abort(mpi_comm_world, 1, ierr) + end if + end if + end do + basis_num_scatter(i) = bnum + end do + + !Figure out the number of atoms needed + select case(etype_scatter(i)) + case(1,2,3) + atomap_num =atomap_num + bnum*((size_ele_scatter(i)+1)**3) + end select + end do + + end if + + !Now read the atomistic information from the restart files + if(atom_num > 0) then + !First allocate the data + call scatter_at_array + read(1, '(a)', iostat=iospara) line + + !Figure out if velocity information is included in this restart file + read(1, '(a)', iostat=iospara) line + read(line, *, iostat=iospara) (label, i =1 ,6) + if(trim(adjustl(label)) == 'velx') then + read_vel_at = .true. + end if + + !Now read the atomistic information + do i =1, atom_num + if(read_vel_at) then + read(1, *, iostat=iospara) tag_atom_scatter(i), type_atom_scatter(i), & + r_atom_scatter(1:3,i), vel_atom_scatter(1:3,i) + if(iospara>0) then + print *, "Error: Invalid read of atom information with error code", iospara + call mpi_abort(mpi_comm_world, 1, ierr) + end if + else + read(1, *, iostat=iospara) tag_atom_scatter(i), type_atom_scatter(i), r_atom_scatter(1:3,i) + if(iospara>0) then + print *, "Error: Invalid read of atom information with error code", iospara + call mpi_abort(mpi_comm_world, 1, ierr) + end if + end if + end do + + end if + + !Figure out if we need velocity, need_vel is only true if read_vel_cg and read_vel_at are both true + if ((ele_num > 0).and.(atom_num > 0)) then + need_vel = (read_vel_cg ).and. read_vel_at + else if(ele_num > 0) then + need_vel = read_vel_cg + else if(atom_num > 0) then + need_vel = read_vel_at + end if + + close(1) + return + end subroutine read_restart + + +end module read_data diff --git a/src/set.f90 b/src/set.f90 new file mode 100644 index 0000000..1ee411c --- /dev/null +++ b/src/set.f90 @@ -0,0 +1,140 @@ +module set + ! sets per-atom values as required for random alloys, temperature initializiation + use elements + use group + use logger + + implicit none + public + contains + + subroutine parse_set(line) + ! parse the 'set' command and set appropriate per-atom vals + character(len = *), intent(in) :: line + character(len=read_len) :: tmptxt, g, mode, msg + integer :: iospara, gnum, j, i + real(kind=wp) :: args(20) + + + if((ele_num == 0).and.(atom_num == 0)) call misc_error("Model must be read in before calling set command") + + read(line, *, iostat=iospara) tmptxt, g, mode + if(iospara > 0) call read_error("Failure to read set command", iospara) + + gnum = get_group_index(g) + if(gnum == 0) then + call misc_error("Group "//trim(adjustl(g))// " in set command has not been defined. "// & + "Please define group before use") + end if + + ! read in remaining arguments + j = tok_count(line)-3 + if (j > 0) then + read(line, *, iostat = iospara) tmptxt, g, mode, (args(i), i = 1, j) + end if + + select case(mode) + case('type') + write(msg,*) "set ", group_counts(1,gnum), " atoms and ", group_counts(2,gnum), " elements to type: ", args(1) + call log_msg(msg) + call set_type(gnum, int(args(1))) + case('fraction') + if(group_counts(2,gnum) > 0) then + call command_error("Set fraction cannot operate on group "//trim(adjustl(group_name(group_num))) & + //" containing elements") + endif + call set_fraction(gnum, int(args(1)), args(2)) + case('etype') + if(group_counts(1,gnum) > 0) call command_error("Set etype cannot operate on groups containing atoms") + call set_etype(gnum, int(args(1))) + case('vel') + if(allocated(vel_atom)) then + write(msg,*) "set ", group_counts(1,gnum), " atoms and ", group_counts(2,gnum), " elements to vel ", args(1:3) + call set_vel(gnum, real(args(1),wp),real(args(2),wp), real(args(3),wp)) + else + write(msg,*) "Velocity arrays are not allocated, velocity cannot be set" + call log_msg(msg) + end if + call log_msg(msg) + end select + end subroutine parse_set + + subroutine set_type(gnum, type) + integer, intent(in) :: gnum, type + integer :: ia, ip, ibasis, ie + + do ia = 1, atom_num_l + if(btest(a_mask(ia), gnum)) then + type_atom(ia) = type + end if + end do + do ip = 1, node_num_l + ie = node_cg(ip) + if(btest(e_mask(ie), gnum)) then + do ibasis = 1, basis_num(ie) + basis_type(ibasis,ie) = type + end do + end if + end do + end subroutine set_type + + subroutine set_vel(gnum, vx, vy, vz) + integer, intent(in) :: gnum + real(kind = wp), intent(in) :: vx, vy, vz + integer ::ia, ip, ibasis, ie + real(kind=wp) :: v(3) + + v(1) = vx + v(2) = vy + v(3) = vz + + do ia = 1, atom_num_l + if(btest(a_mask(ia), gnum)) then + vel_atom(:, ia) = v + end if + end do + do ip = 1, node_num_l + ie = node_cg(ip) + if(btest(e_mask(ie), gnum)) then + do ibasis = 1, basis_num(ie) + vel(:,ibasis,ip) = v + end do + end if + end do + end subroutine set_vel + + subroutine set_fraction(gnum, type, frac) + integer, intent(in) :: gnum, type + real(kind=wp), intent(in) :: frac + character(len = read_len) :: msg + real(kind=wp) :: rand + integer :: ia, count + ! allocate fraction of group_counts array for randomization + count = 0 + do ia = 1, atom_num_l + if(btest(a_mask(ia), gnum)) then + call random_number(rand) + if (rand > frac) cycle + type_atom(ia) = type + count = count + 1 + end if + end do + write(msg, *) "Set fraction swapped ", count, " atoms to type ", type + call log_msg(msg) + !group_counts(1,gnum) + end subroutine set_fraction + + subroutine set_etype(gnum, et) + integer, intent(in) :: gnum, et + + integer :: i + do i = 1, ele_num_l + if (btest(e_mask(i), gnum)) then + etype(i) = et + end if + end do + + return + end subroutine set_etype + +end module set diff --git a/src/str.f90 b/src/str.f90 new file mode 100644 index 0000000..93f48d7 --- /dev/null +++ b/src/str.f90 @@ -0,0 +1,46 @@ +module str + + !this module has some string manipulation commands + public + contains + + pure function tok_count(text) + !counts number of tokens in a string + character(len = *), intent(in) :: text + integer :: tok_count + integer :: i, j + logical :: in_tok + + j = len(trim(adjustl(text))) + in_tok = .false. + tok_count = 0 + do i = 1, j + !This checks if it is a white space character which is the delimiter + if(trim(adjustl(text(i:i))) == ' ') then + !If previously we were in token and the current character is the delimiter + !Then we are no longer in the token + if(in_tok) in_tok = .false. + + !If the character isn't a white space character and we previously weren't in the token then set in_tok + !to true and increment token count + else if(.not.in_tok) then + in_tok = .true. + tok_count = tok_count + 1 + end if + end do + return + end function tok_count + + subroutine to_lower(str) + character(*), intent(in out) :: str + integer :: i + + do i = 1, len(str) + select case(str(i:i)) + case("A":"Z") + str(i:i) = achar(iachar(str(i:i))+32) + end select + end do + end subroutine to_lower + +end module str diff --git a/src/temp.f90 b/src/temp.f90 new file mode 100644 index 0000000..007742f --- /dev/null +++ b/src/temp.f90 @@ -0,0 +1,427 @@ +module temp + + use parameters + use mpi + use comms + use elements + use group + use compute + use time + use atom_types + + implicit none + + integer :: Nf(2), gnum + real(kind=wp) :: Tinit=0.0_wp !This is the target temp for the velocity initialization subroutine + + !Target temp and kinetic energy for rescaling subroutines, we have three Ktarget values currently for testing + real(kind=wp) :: Ttarget = 0.0_wp, Ktarget(3), urms(max_atom_types) + + real(kind=wp) :: time_constant !The time constant used to calculate the rescaling coefficient + real(kind=wp) :: omega !This is the average frequency used for applying an oscillation to the interpolated atoms + + logical :: tflag + integer, private :: delay + + public + contains + + subroutine parse_temp(line) + !This subroutine parses the temperature command + character(len = *), intent(in) :: line + character(len = read_len) :: tmptxt, group_id, command, args(10) + integer :: iospara, i, g + real(kind = wp) :: K(3), T + + delay = 1.0_wp + read(line, *, iostat = iospara) tmptxt, command + + !Initialize random number generator + call random_seed() + + select case(command) + case('create') + !Read the temperature command + read(line, *, iostat = iospara) tmptxt, command, group_id, T + + case('control') + omega = 0.0_wp + !Read the temperature command + read(line, *, iostat = iospara) tmptxt, command, group_id, T, time_constant + + !Check for optional arguments + read(line, *, iostat = iospara) args + i = 5 + do while(i < tok_count(line)) + i = i + 1 + select case(args(i)) + case('delay') + i = i + 1 + read(args(i), *) delay + end select + end do + + end select + + g = get_group_index(group_id) + if(g == 0) then + call misc_error("Missing group name in temp command") + end if + !Nf(1) is atomic dof and Nf(2) is elemental dof + Nf(1) = 3*group_counts(1, g) + Nf(2) = 3*group_counts(4, g) + + !1st component is target energy for all degrees of freedom + K(1) = (Nf(1)+Nf(2))*boltzmann*T/2.0_wp + !2nd is for atomic dof + K(2) = (Nf(1))*boltzmann*T/2.0_wp + K(3) = (Nf(2))*boltzmann*T/2.0_wp + + select case(command) + case('create') + write(tmptxt, *) "Initializing velocities to match temperature", T, " with kinetic energy", K(1) + call log_msg(tmptxt) + call init_vel(g,K) + case('control') + Ktarget = K + Ttarget = T + gnum = g + + !if (Nf(2) > 0) then + ! !Check to see if we have elements, if we do then we have to pass a frequency suboption dictating the frequency of + ! !oscillation to apply to the elements + ! read(line, *, iostat = iospara, iomsg = msg) tmptxt, command, group_id, Ttarget, time_constant, tmptxt, om + ! if(iospara > 0) call read_error(msg, iospara) + ! !Inputted frequency must be converted to angular frequency, the input frequency should be given in THz + ! omega = omega*2*pi*10.0_wp**12 + + ! !Now calculate the root mean-squared displacement magnitude for each different atom type + + ! do i = 1, natom_types + ! urms(i) = sqrt((3*10d20*boltzmann*electron_volt*Ttarget)/(masses(i)*amu*omega**2)) + ! end do + !end if + + write(tmptxt, *) "Running temperature control: Target Ke is ", Ktarget(1), " target temp is ", & + Ttarget, " with a time", "constant of ", time_constant + call log_msg(tmptxt) + + if(omega>lim_small) then + write(tmptxt,*) "Applying a mean square displacement of ", (urms(i), i =1, natom_types), " for atom types ", & + (i, i =1, natom_types) + call log_msg(tmptxt) + end if + tflag = .true. + end select + + return + end subroutine parse_temp + + subroutine init_vel(g, Ktar) + !This subroutine initializes velocities to a random gaussian distribution around the desired temperature + real(kind=wp), intent(in) :: Ktar(3) + integer, intent(in) :: g + integer :: i, j, ibasis, id, inod, ip, ie + real(kind=wp) :: alpha, Kme, Kall, vel_buff(3*ele_num*max_basisnum*ng_max_node) + + Kme = 0.0_wp + + !Allocate velocity arrays if not already allocated + if(.not. need_vel) call alloc_velocity_arrays + + if(atom_num > 0) then + do i=1, atom_num_l + if (btest(a_mask(i), g)) then + do j = 1, 3 + vel_atom(j,i) = gasdev() + Kme = Kme + 0.5_wp * masses(type_atom(i))*const_motion*(vel_atom(j,i)*vel_atom(j,i)) + end do + end if + end do + end if + + if(ele_num > 0) then + do ie = 1, ele_num_l + if(who_has_ele(ie).and.btest(e_mask(ie), g)) then + do inod = 1, ng_node(etype(ie)) + ip = cg_node(inod, ie) + do ibasis = 1, basis_num(ie) + do j = 1, 3 + vel(j,ibasis,ip) = gasdev() + Kme = Kme + 0.5_wp * masses(basis_type(ibasis,ie))*const_motion & + * mass_mat_coeff(size_ele(ie), etype(ie))*(vel(j,ibasis,ip)*vel(j,ibasis,ip)) + end do + end do + end do + end if + end do + end if + + !Now sum all of the kinetic energies from all processors, calculate the scaling factor and apply it to the velocities + call mpi_allreduce(Kme, Kall, 1, mpi_wp, mpi_sum, world, ierr) + alpha=sqrt(Ktar(1)/Kall) + if(atom_num > 0) vel_atom = alpha*vel_atom + + if(ele_num > 0) then + vel=alpha*vel + !Now communicate the element node velocities for shared elements + vel_buff = 0 + do ie = 1, ele_num_l + if(who_has_ele(ie)) then + id = ele_glob_id(ie) + do inod = 1, ng_node(etype(ie)) + ip = cg_node(inod, ie) + do ibasis = 1, basis_num(ie) + do j = 1, 3 + vel_buff(3*(id-1)*ng_max_node*max_basisnum + 3*(inod-1)*max_basisnum + 3*(ibasis-1)+j) & + = vel(j, ibasis, ip) + end do + end do + end do + end if + end do + + call mpi_allreduce(mpi_in_place, vel_buff, 3*ng_max_node*ele_num*max_basisnum, mpi_wp, mpi_sum, world, ierr) + do ie = 1, ele_num_l + id = ele_glob_id(ie) + do inod = 1, ng_node(etype(ie)) + ip = cg_node(inod, ie) + do ibasis = 1, basis_num(ie) + do j = 1, 3 + vel(j, ibasis, ip) & + =vel_buff(3*(id-1)*ng_max_node*max_basisnum + 3*(inod-1)*max_basisnum + 3*(ibasis-1)+j) + end do + end do + end do + end do + end if + + end subroutine init_vel + + + subroutine rescale_v + !rescale velocity + integer :: i, j, ibasis, ie + real(kind=wp) :: alpha, avg_vel(3), ke(2), rr, rsum, c1, c2, max_vel(3) + + + if (mod(iter, delay) == 0) then + !First calculate the kinetic energy + avg_vel = compute_avgvel(gnum) + ke = compute_ke(gnum, avg_vel) + + if(Nf(1) > 0) then + if(rank == root) then + !Sample the target kinetic energy from the canonical ensemble + if (is_equal(time_constant, 0.0_wp)) then + c1=0 + else + c1 = exp(-1/(time_constant)) + end if + c2 = (1.0-c1)*Ktarget(2)/(ke(1))/(Nf(1)) + rr = gasdev() + rsum = sumnoises(Nf(1)-1) + !This sampling formulation comes from: + !Bussi, Giovanni et al. "Canonical sampling through velocity rescaling" The Journal of Chemical Physics (2007) + + alpha = sqrt(c1 + c2*(rr*rr + rsum) + 2.0_wp*rr*sqrt(c1*c2)) + end if + + call mpi_bcast(alpha, 1, mpi_wp, root, world, ierr) + + + !Now rescale the velocities + + max_vel=0.0_wp + do i = 1, atom_num_l + do j = 1, 3 + vel_atom(j,i) = avg_vel(j) + alpha*(vel_atom(j,i)-avg_vel(j)) + end do + if(norm2(vel_atom(:, i)) > norm2(max_vel)) max_vel = vel_atom(:,i) + end do + + end if + + if (Nf(2) > 0) then + if(rank == root) then + !Sample the target kinetic energy from the canonical ensemble + if (is_equal(time_constant, 0.0_wp)) then + c1=0 + else + c1 = exp(-1/(time_constant)) + end if + c2 = (1.0-c1)*Ktarget(3)/(ke(2))/(Nf(2)) + rr = gasdev() + rsum = sumnoises(Nf(2)-1) + !This sampling formulation comes from: + !Bussi, Giovanni et al. "Canonical sampling through velocity rescaling" The Journal of Chemical Physics (2007) + + alpha = sqrt(c1 + c2*(rr*rr + rsum) + 2.0_wp*rr*sqrt(c1*c2)) + end if + + call mpi_bcast(alpha, 1, mpi_wp, root, world, ierr) + do i = 1, node_num_l + ie = node_cg(i) + do ibasis=1, basis_num(ie) + do j = 1, 3 + vel(j, ibasis, i) = avg_vel(j)+alpha*(vel(j,ibasis,i)-avg_vel(j)) + end do + end do + end do + end if + end if + return + end subroutine rescale_v + + + + function sumnoises(nn) + !Returns the sum of n independent gaussian noises squared. This is equivalent to a chi-squared dist + implicit none + integer, intent(in) :: nn + real(kind=wp) :: sumnoises + + if (nn==0) then + sumnoises=0.0_wp + else if (nn==1) then + sumnoises=gasdev()**2.0_wp + else if (modulo(nn,2)==0) then + sumnoises=2.0_wp*gamdev(nn/2) + else + sumnoises=2.0_wp*gamdev((nn-1)/2) + gasdev()**2.0_wp + end if + end function sumnoises + + function gamdev(ia) + !gamma-distributed random number, implemented as described in numerical recipes + + implicit none + integer, intent(in) :: ia + integer :: j + real(kind=wp) :: am, e, s, v1, v2, x, y, rand + real(kind=wp) :: gamdev + + if(ia < 1) then + print *, "Bad argument to function gamdev" + call mpi_abort(mpi_comm_world, 1, ierr) + else if (ia < 6) then + x = 1 + do j = 1, ia + call random_number(rand) + x = x*rand + end do + + if( x < lim_small) then + x = 708.4 + else + x = -log(x) + end if + else + do while(.true.) + call random_number(rand) + v1=rand + call random_number(rand) + v2=2.0_wp*rand-1.0_wp + if((v1*v1+v2*v2)>1.0_wp) cycle + y = v2/v1 + am=ia-1 + s=sqrt(2.0_wp*am+1.0_wp) + x=s*y+am + + if ( x <= 0) cycle + + if (((am*log(x/am)-s*y) < -700) .or.(v1<0.00001)) cycle + e = (1.0_wp + y**2.0_wp)*exp(am*log(x/am)-s*y) + call random_number(rand) + if(rand > e) cycle + exit + end do + end if + gamdev=x + end function gamdev + + function gasdev() + ! gaussian-distributed random number, implemented as described in numerical recipes + + implicit none + integer, save :: iset=0 + real(kind=wp), save :: gset + real(kind=wp) :: rand, fac, rsq, v1, v2 + real(kind=wp) gasdev + + if(iset ==0) then + do while(.true.) + call random_number(rand) + v1=2.0_wp*rand-1.0_wp + call random_number(rand) + v2=2.0_wp*rand-1.0_wp + rsq = v1**2.0_wp + v2**2.0_wp + if((rsq>=1.0_wp).or.((rsq>-lim_small).and.(rsq < lim_small))) then + cycle + else + exit + end if + end do + fac=sqrt(-2.0_wp*log(rsq)/rsq) + gset=v1*fac + gasdev=v2*fac + iset=1 + else + gasdev=gset + iset=0 + end if + end function gasdev + + subroutine apply_perturbation + !This subroutine applies the appropriate perturbation to the interpolated atoms of an element + + integer :: ie, iatom, virt_count, iatomap, ibasis + real(kind = wp) :: bvec(3,3), r1, r2, r3, v(3), max_disp + + max_disp = 0 + do ie = 1, ele_num_l + select case(etype(ie)) + case(1,2) + virt_count = (size_ele(ie)+1)**3 + end select + + !First calculate the lattice basis vectors from the element nodal positions + bvec(:,1) = r(:,1,cg_node(2,ie)) - r(:,1, cg_node(1,ie)) + bvec(:,2) = r(:,1,cg_node(4,ie)) - r(:,1, cg_node(1,ie)) + bvec(:,3) = r(:,1,cg_node(5,ie)) - r(:,1, cg_node(1,ie)) + + !Now normalize the lattice vectors + bvec(:,1) = bvec(:,1)/norm2(bvec(:,1)) + bvec(:,2) = bvec(:,2)/norm2(bvec(:,2)) + bvec(:,3) = bvec(:,3)/norm2(bvec(:,3)) + + !Loop over all interpolated atoms + do iatom = 1, virt_count + do ibasis = 1, basis_num(ie) + iatomap = cg_atomap(basis_num(ie)*(iatom-1) + ibasis, ie) + if (iatomap /= 0) then + !Get 3 random numbers + r1 = gasdev() + r2 = gasdev() + r3 = gasdev() + + !Now scale the lattice vectors by the gaussian number + v = 0.0_wp + v = v+r1*bvec(:,1) + v = v+r2*bvec(:,2) + v = v+r3*bvec(:,3) + + !Add the vectors together to get the perturbation vector and then scale the magnitude to the mean sqaured + !oscillation magnitude + + v = v*(urms(type_atomap(iatomap))/norm2(v)) + max_disp = norm2(v) + + r_atomap(:, iatomap) = r_atomap(:, iatomap) + v + end if + end do + end do + end do + + end subroutine apply_perturbation +end module temp diff --git a/src/thermo.f90 b/src/thermo.f90 new file mode 100644 index 0000000..9c51b87 --- /dev/null +++ b/src/thermo.f90 @@ -0,0 +1,165 @@ +module thermo + !this is similar to the lammps thermo output which logs simulation information + use elements + use forces + use compute + use str + + implicit none + + + integer :: thermo_every, thermo_style_num + character(len=read_len) :: thermo_style(100) + logical :: need_k, need_t, need_p + + public + contains + + subroutine thermo_defaults + thermo_every = huge(1) + thermo_style_num = 2 + thermo_style(1) = "pe" + thermo_style(2) = "fnorm" + need_k = .false. + need_t = .false. + need_p = .false. + end subroutine thermo_defaults + + subroutine parse_thermo_style(line) + !This subroutine parses the thermo style command + character(len=*), intent(in) :: line + integer :: iospara, i + + character(len=read_len) :: tmptxt + + thermo_style_num = tok_count(line)-1 + + need_k = .false. + need_t = .false. + need_p = .false. + read(line, *, iostat=iospara) tmptxt, (thermo_style(i), i=1, thermo_style_num) + + !Now loop over all of the thermo_style items to make sure that they are acceptable + do i= 1, thermo_style_num + select case(thermo_style(i)) + case('pe','fnorm', 'lx', 'ly', 'lz') + continue + case('ke','ke_c','ke_a') + need_k = .true. + case('temp', 'temp_c', 'temp_a') + need_k = .true. + need_t = .true. + case('px', 'py', 'pz') + need_p = .true. + case default + write(tmptxt, *) trim(adjustl(thermo_style(i))), " is not an acceptable thermostyle command, please check " //& + " the documentation for acceptable commands." + end select + end do + end subroutine parse_thermo_style + + subroutine parse_thermo(line) + !This subroutine parses the thermo command + character(len=*) :: line + character(len = read_len) :: tmptxt + integer :: iospara + + read(line, *, iostat = iospara) tmptxt, thermo_every + if (iospara > 0) call read_error("Failure to read thermo command", iospara) + + !Make sure thermo_every is greater than 0 + if(thermo_every < 0) call misc_error("Thermo_every should be greater than 0 in thermo command") + + if(thermo_every == 0) then + thermo_every = huge(1) + end if + + end subroutine parse_thermo + + subroutine write_thermo_style + integer :: i + character(len=read_len) :: style_out + + write(style_out, *) "thermo: ", (trim(adjustl(thermo_style(i)))//" ", i = 1, thermo_style_num) + call log_msg(style_out, 0) + + end subroutine write_thermo_style + + subroutine write_thermo_out(time) + !Preliminary write thermo out + integer, intent(in) :: time + integer :: i + real(kind=wp) :: ke(2), T(3), thermo_out(100), P(3,3) + character(len=read_len) :: msg + + thermo_out = 0.0_wp + + !Calculate temp, ke, and press if needed + if(need_k) then + if(need_vel) then + ke = compute_ke(1) + else + ke = 0.0_wp + end if + end if + if(need_t) then + if(need_vel) then + T = compute_temp(1, ke) + else + T = 0.0_wp + end if + end if + + if(need_p) P = compute_box_press() + + !Loop over all thermo_style commands and put the desired outputs into thermo_out + do i= 1, thermo_style_num + select case(thermo_style(i)) + case('pe') + !Get pe all + thermo_out(i) = compute_pe(1) + + case('fnorm') + !Get fnorm all + thermo_out(i) = compute_fnorm(1) + + case('ke') + thermo_out(i) = ke(1) + ke(2) + case('ke_a') + thermo_out(i) = ke(1) + case('ke_c') + thermo_out(i) = ke(2) + + case('temp') + thermo_out(i) = T(1) + case('temp_a') + thermo_out(i) = T(2) + case('temp_c') + thermo_out(i) = T(3) + + case('lx') + thermo_out(i) = box_bd(2) - box_bd(1) + case('ly') + thermo_out(i) = box_bd(4) - box_bd(3) + case('lz') + thermo_out(i) = box_bd(6) - box_bd(5) + + case('px') + thermo_out(i) = P(1,1) + case('py') + thermo_out(i) = P(2,2) + case('pz') + thermo_out(i) = P(3,3) + + end select + + end do + + if(rank == root) then + write(msg,*) time, thermo_out(1:thermo_style_num) + call log_msg(msg) + end if + + return + end subroutine write_thermo_out +end module thermo diff --git a/src/time.f90 b/src/time.f90 new file mode 100644 index 0000000..5c7b9d7 --- /dev/null +++ b/src/time.f90 @@ -0,0 +1,50 @@ +module time + use parameters + use logger + use errors + use mpi + + implicit none + !This subroutine contains variables and subroutines associated with timestepping + !also contains code for timing + + integer :: itime_start, iter, run_steps, begin_step + real(kind=wp) :: t, time_step, orig_time_step + real(kind=wp) :: walltime(2), program_start, program_end + + public + contains + + subroutine time_defaults + t = 0.0_wp + time_step = 0.001_wp + iter = 0 + orig_time_step = 0.001_wp + walltime=0.0_wp + end subroutine time_defaults + + subroutine parse_timestep(line) + character(len=*), intent(in) :: line + character(len=read_len) :: tmptxt + integer :: iospara + + read(line, *, iostat = iospara) tmptxt, time_step + + if(time_step <= 0.0_wp) then + call misc_error("time step must be greater than 0") + end if + + orig_time_step = time_step + end subroutine parse_timestep + + subroutine log_time + character(len=read_len) :: msg + + program_end = mpi_wtime() + write(msg, *) walltime(1), " spent neighboring and ", walltime(2), " spent computing forces" + call log_msg(msg) + write(msg, *) "Total time is:", program_end - program_start + call log_msg(msg) + end subroutine log_time + +end module time diff --git a/src/vel_verlet.f90 b/src/vel_verlet.f90 new file mode 100644 index 0000000..4dbdad4 --- /dev/null +++ b/src/vel_verlet.f90 @@ -0,0 +1,109 @@ +module vel_verlet + !This module contains all the code for running steps of the verlet algorithm + !This is equivalent to the NVE ensemble + use parameters + use comms + use elements + use neighbors + use potential + use time + + implicit none + + public + contains + + subroutine verlet(i) + integer, intent(in) :: i + !This subroutine advances the model by 1 timestep using the velocity verlet algorithm + !Update position at time i+1 + call update_r + !Update neighbor at time i+1 + call update_neighbor(i) + !Update neighbor at time i+1 + call update_force + !Update velocity at time i+1 + call update_vel(time_step) + end subroutine verlet + + subroutine update_r + !This subroutine updates the positions for all atoms and coarse-grained element nodes + + integer :: ia, ip, ibasis + + if(ele_num /= 0) then + do ip = 1, node_num_l + do ibasis = 1, basis_num(node_cg(ip)) + r(:,ibasis,ip) = r(:, ibasis, ip) + vel(:, ibasis, ip) * time_step & + + 0.5_wp * force_eq(:, ibasis, ip)/ masses(basis_type(ibasis,node_cg(ip))) & + * time_step**2.0_wp / const_motion + end do + end do + force_eq_pre(:,:,1:node_num_l) = force_eq(:,:,1:node_num_l) + end if + + if(atom_num /= 0) then + do ia = 1, atom_num_l + r_atom(:,ia) = r_atom(:,ia) + vel_atom(:, ia)*time_step + 0.5_wp*force_atom(:,ia)/masses(type_atom(ia)) & + * time_step**2.0_wp / const_motion + end do + force_atom_pre(:,1:atom_num_l) = force_atom(:, 1:atom_num_l) + + end if + + return + end subroutine update_r + + subroutine update_vel(time_step) + real(kind=wp), intent(in) :: time_step + !update the velocity + integer :: ie, inod, ip, ia, ibasis + real(kind = wp) :: pro_force_normsq, avg(3) + + pro_force_normsq = 0.0_wp + if(ele_num /= 0) then + do ie = 1, ele_num_l + do inod = 1, ng_node(etype(ie)) + ip = cg_node(inod, ie) + do ibasis = 1, basis_num(ie) + vel(:, ibasis, ip) = vel(:, ibasis, ip) + 0.5_wp * (force_eq_pre(:, ibasis, ip) & + + force_eq(:, ibasis, ip)) / masses(basis_type(ibasis,ie)) & + * time_step / const_motion + end do + end do + end do + end if + + !atomistic domain + if(atom_num /= 0) then + do ia = 1, atom_num_l + vel_atom(:, ia) = vel_atom(:, ia) + 0.5_wp * (force_atom_pre(:, ia) & + + force_atom(:, ia)) / masses(type_atom(ia)) & + * time_step / const_motion + end do + + end if + + !Now take out the average velocity + avg = compute_avgvel(1) + if(ele_num > 0) then + do ie = 1, ele_num_l + do inod = 1, ng_node(etype(ie)) + ip = cg_node(inod, ie) + do ibasis = 1, basis_num(ie) + vel(:, ibasis, ip) = vel(:, ibasis, ip) - avg + end do + end do + end do + end if + + !atomistic domain + if(atom_num /= 0) then + do ia = 1, atom_num_l + vel_atom(:, ia) = vel_atom(:, ia) - avg + end do + + end if + + end subroutine update_vel +end module vel_verlet