diff --git a/gprMax/solvers.py b/gprMax/solvers.py
index baedc0c5..b73de79f 100644
--- a/gprMax/solvers.py
+++ b/gprMax/solvers.py
@@ -21,7 +21,8 @@ import gprMax.config as config
from .grid import CUDAGrid, FDTDGrid, OpenCLGrid
from .subgrids.updates import create_updates as create_subgrid_updates
from .updates.cpu_updates import CPUUpdates
-from .updates.updates import CUDAUpdates, OpenCLUpdates
+from .updates.cuda_updates import CUDAUpdates
+from .updates.opencl_updates import OpenCLUpdates
def create_G():
diff --git a/gprMax/updates/cuda_updates.py b/gprMax/updates/cuda_updates.py
new file mode 100644
index 00000000..1211725c
--- /dev/null
+++ b/gprMax/updates/cuda_updates.py
@@ -0,0 +1,584 @@
+import logging
+from importlib import import_module
+
+import humanize
+import numpy as np
+from jinja2 import Environment, PackageLoader
+
+from gprMax import config
+from gprMax.cuda_opencl import knl_fields_updates, knl_snapshots, knl_source_updates, knl_store_outputs
+from gprMax.receivers import dtoh_rx_array, htod_rx_arrays
+from gprMax.snapshots import Snapshot, dtoh_snapshot_array, htod_snapshot_array
+from gprMax.sources import htod_src_arrays
+from gprMax.utilities.utilities import round32
+
+logger = logging.getLogger(__name__)
+
+
+class CUDAUpdates:
+ """Defines update functions for GPU-based (CUDA) solver."""
+
+ def __init__(self, G):
+ """
+ Args:
+ G: CUDAGrid class describing a grid in a model.
+ """
+
+ self.grid = G
+
+ # Import PyCUDA modules
+ self.drv = import_module("pycuda.driver")
+ self.source_module = getattr(import_module("pycuda.compiler"), "SourceModule")
+ self.drv.init()
+
+ # Create device handle and context on specific GPU device (and make it current context)
+ self.dev = config.get_model_config().device["dev"]
+ self.ctx = self.dev.make_context()
+
+ # Set common substitutions for use in kernels
+ # Substitutions in function arguments
+ self.subs_name_args = {
+ "REAL": config.sim_config.dtypes["C_float_or_double"],
+ "COMPLEX": config.get_model_config().materials["dispersiveCdtype"],
+ }
+ # Substitutions in function bodies
+ self.subs_func = {
+ "REAL": config.sim_config.dtypes["C_float_or_double"],
+ "CUDA_IDX": "int i = blockIdx.x * blockDim.x + threadIdx.x;",
+ "NX_FIELDS": self.grid.nx + 1,
+ "NY_FIELDS": self.grid.ny + 1,
+ "NZ_FIELDS": self.grid.nz + 1,
+ "NX_ID": self.grid.ID.shape[1],
+ "NY_ID": self.grid.ID.shape[2],
+ "NZ_ID": self.grid.ID.shape[3],
+ }
+
+ # Enviroment for templating kernels
+ self.env = Environment(loader=PackageLoader("gprMax", "cuda_opencl"))
+
+ # Initialise arrays on GPU, prepare kernels, and get kernel functions
+ self._set_macros()
+ self._set_field_knls()
+ if self.grid.pmls["slabs"]:
+ self._set_pml_knls()
+ if self.grid.rxs:
+ self._set_rx_knl()
+ if self.grid.voltagesources + self.grid.hertziandipoles + self.grid.magneticdipoles:
+ self._set_src_knls()
+ if self.grid.snapshots:
+ self._set_snapshot_knl()
+
+ def _build_knl(self, knl_func, subs_name_args, subs_func):
+ """Builds a CUDA kernel from templates: 1) function name and args;
+ and 2) function (kernel) body.
+
+ Args:
+ knl_func: dict containing templates for function name and args,
+ and function body.
+ subs_name_args: dict containing substitutions to be used with
+ function name and args.
+ subs_func: dict containing substitutions to be used with function
+ (kernel) body.
+
+ Returns:
+ knl: string with complete kernel
+ """
+
+ name_plus_args = knl_func["args_cuda"].substitute(subs_name_args)
+ func_body = knl_func["func"].substitute(subs_func)
+ knl = self.knl_common + "\n" + name_plus_args + "{" + func_body + "}"
+
+ return knl
+
+ def _set_macros(self):
+ """Common macros to be used in kernels."""
+
+ # Set specific values for any dispersive materials
+ if config.get_model_config().materials["maxpoles"] > 0:
+ NY_MATDISPCOEFFS = self.grid.updatecoeffsdispersive.shape[1]
+ NX_T = self.grid.Tx.shape[1]
+ NY_T = self.grid.Tx.shape[2]
+ NZ_T = self.grid.Tx.shape[3]
+ else: # Set to one any substitutions for dispersive materials.
+ NY_MATDISPCOEFFS = 1
+ NX_T = 1
+ NY_T = 1
+ NZ_T = 1
+
+ self.knl_common = self.env.get_template("knl_common_cuda.tmpl").render(
+ REAL=config.sim_config.dtypes["C_float_or_double"],
+ N_updatecoeffsE=self.grid.updatecoeffsE.size,
+ N_updatecoeffsH=self.grid.updatecoeffsH.size,
+ NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1],
+ NY_MATDISPCOEFFS=NY_MATDISPCOEFFS,
+ NX_FIELDS=self.grid.nx + 1,
+ NY_FIELDS=self.grid.ny + 1,
+ NZ_FIELDS=self.grid.nz + 1,
+ NX_ID=self.grid.ID.shape[1],
+ NY_ID=self.grid.ID.shape[2],
+ NZ_ID=self.grid.ID.shape[3],
+ NX_T=NX_T,
+ NY_T=NY_T,
+ NZ_T=NZ_T,
+ NY_RXCOORDS=3,
+ NX_RXS=6,
+ NY_RXS=self.grid.iterations,
+ NZ_RXS=len(self.grid.rxs),
+ NY_SRCINFO=4,
+ NY_SRCWAVES=self.grid.iterations,
+ NX_SNAPS=Snapshot.nx_max,
+ NY_SNAPS=Snapshot.ny_max,
+ NZ_SNAPS=Snapshot.nz_max,
+ )
+
+ def _set_field_knls(self):
+ """Electric and magnetic field updates - prepares kernels, and
+ gets kernel functions.
+ """
+
+ bld = self._build_knl(knl_fields_updates.update_electric, self.subs_name_args, self.subs_func)
+ knlE = self.source_module(bld, options=config.sim_config.devices["nvcc_opts"])
+ self.update_electric_dev = knlE.get_function("update_electric")
+
+ bld = self._build_knl(knl_fields_updates.update_magnetic, self.subs_name_args, self.subs_func)
+ knlH = self.source_module(bld, options=config.sim_config.devices["nvcc_opts"])
+ self.update_magnetic_dev = knlH.get_function("update_magnetic")
+
+ self._copy_mat_coeffs(knlE, knlH)
+
+ # If there are any dispersive materials (updates are split into two
+ # parts as they require present and updated electric field values).
+ if config.get_model_config().materials["maxpoles"] > 0:
+ self.subs_func.update(
+ {
+ "REAL": config.sim_config.dtypes["C_float_or_double"],
+ "REALFUNC": config.get_model_config().materials["crealfunc"],
+ "NX_T": self.grid.Tx.shape[1],
+ "NY_T": self.grid.Tx.shape[2],
+ "NZ_T": self.grid.Tx.shape[3],
+ }
+ )
+
+ bld = self._build_knl(knl_fields_updates.update_electric_dispersive_A, self.subs_name_args, self.subs_func)
+ knl = self.source_module(bld, options=config.sim_config.devices["nvcc_opts"])
+ self.dispersive_update_a = knl.get_function("update_electric_dispersive_A")
+ self._copy_mat_coeffs(knl, knl)
+
+ bld = self._build_knl(knl_fields_updates.update_electric_dispersive_B, self.subs_name_args, self.subs_func)
+ knl = self.source_module(bld, options=config.sim_config.devices["nvcc_opts"])
+ self.dispersive_update_b = knl.get_function("update_electric_dispersive_B")
+ self._copy_mat_coeffs(knl, knl)
+
+ # Set blocks per grid and initialise field arrays on GPU
+ self.grid.set_blocks_per_grid()
+ self.grid.htod_geometry_arrays()
+ self.grid.htod_field_arrays()
+ if config.get_model_config().materials["maxpoles"] > 0:
+ self.grid.htod_dispersive_arrays()
+
+ def _set_pml_knls(self):
+ """PMLS - prepares kernels and gets kernel functions."""
+ knl_pml_updates_electric = import_module(
+ "gprMax.cuda_opencl.knl_pml_updates_electric_" + self.grid.pmls["formulation"]
+ )
+ knl_pml_updates_magnetic = import_module(
+ "gprMax.cuda_opencl.knl_pml_updates_magnetic_" + self.grid.pmls["formulation"]
+ )
+
+ # Initialise arrays on GPU, set block per grid, and get kernel functions
+ for pml in self.grid.pmls["slabs"]:
+ pml.htod_field_arrays()
+ pml.set_blocks_per_grid()
+ knl_name = f"order{len(pml.CFS)}_{pml.direction}"
+ self.subs_name_args["FUNC"] = knl_name
+
+ knl_electric = getattr(knl_pml_updates_electric, knl_name)
+ bld = self._build_knl(knl_electric, self.subs_name_args, self.subs_func)
+ knlE = self.source_module(bld, options=config.sim_config.devices["nvcc_opts"])
+ pml.update_electric_dev = knlE.get_function(knl_name)
+
+ knl_magnetic = getattr(knl_pml_updates_magnetic, knl_name)
+ bld = self._build_knl(knl_magnetic, self.subs_name_args, self.subs_func)
+ knlH = self.source_module(bld, options=config.sim_config.devices["nvcc_opts"])
+ pml.update_magnetic_dev = knlH.get_function(knl_name)
+
+ # Copy material coefficient arrays to constant memory of GPU - must
+ # be done for each kernel
+ self._copy_mat_coeffs(knlE, knlH)
+
+ def _set_rx_knl(self):
+ """Receivers - initialises arrays on GPU, prepares kernel and gets kernel
+ function.
+ """
+ self.rxcoords_dev, self.rxs_dev = htod_rx_arrays(self.grid)
+
+ self.subs_func.update(
+ {
+ "REAL": config.sim_config.dtypes["C_float_or_double"],
+ "NY_RXCOORDS": 3,
+ "NX_RXS": 6,
+ "NY_RXS": self.grid.iterations,
+ "NZ_RXS": len(self.grid.rxs),
+ }
+ )
+
+ bld = self._build_knl(knl_store_outputs.store_outputs, self.subs_name_args, self.subs_func)
+ knl = self.source_module(bld, options=config.sim_config.devices["nvcc_opts"])
+ self.store_outputs_dev = knl.get_function("store_outputs")
+
+ def _set_src_knls(self):
+ """Sources - initialises arrays on GPU, prepares kernel and gets kernel
+ function.
+ """
+ self.subs_func.update({"NY_SRCINFO": 4, "NY_SRCWAVES": self.grid.iteration})
+
+ if self.grid.hertziandipoles:
+ self.srcinfo1_hertzian_dev, self.srcinfo2_hertzian_dev, self.srcwaves_hertzian_dev = htod_src_arrays(
+ self.grid.hertziandipoles, self.grid
+ )
+ bld = self._build_knl(knl_source_updates.update_hertzian_dipole, self.subs_name_args, self.subs_func)
+ knl = self.source_module(bld, options=config.sim_config.devices["nvcc_opts"])
+ self.update_hertzian_dipole_dev = knl.get_function("update_hertzian_dipole")
+ if self.grid.magneticdipoles:
+ self.srcinfo1_magnetic_dev, self.srcinfo2_magnetic_dev, self.srcwaves_magnetic_dev = htod_src_arrays(
+ self.grid.magneticdipoles, self.grid
+ )
+ bld = self._build_knl(knl_source_updates.update_magnetic_dipole, self.subs_name_args, self.subs_func)
+ knl = self.source_module(bld, options=config.sim_config.devices["nvcc_opts"])
+ self.update_magnetic_dipole_dev = knl.get_function("update_magnetic_dipole")
+ if self.grid.voltagesources:
+ self.srcinfo1_voltage_dev, self.srcinfo2_voltage_dev, self.srcwaves_voltage_dev = htod_src_arrays(
+ self.grid.voltagesources, self.grid
+ )
+ bld = self._build_knl(knl_source_updates.update_voltage_source, self.subs_name_args, self.subs_func)
+ knl = self.source_module(bld, options=config.sim_config.devices["nvcc_opts"])
+ self.update_voltage_source_dev = knl.get_function("update_voltage_source")
+
+ self._copy_mat_coeffs(knl, knl)
+
+ def _set_snapshot_knl(self):
+ """Snapshots - initialises arrays on GPU, prepares kernel and gets kernel
+ function.
+ """
+ (
+ self.snapEx_dev,
+ self.snapEy_dev,
+ self.snapEz_dev,
+ self.snapHx_dev,
+ self.snapHy_dev,
+ self.snapHz_dev,
+ ) = htod_snapshot_array(self.grid)
+
+ self.subs_func.update(
+ {
+ "REAL": config.sim_config.dtypes["C_float_or_double"],
+ "NX_SNAPS": Snapshot.nx_max,
+ "NY_SNAPS": Snapshot.ny_max,
+ "NZ_SNAPS": Snapshot.nz_max,
+ }
+ )
+
+ bld = self._build_knl(knl_snapshots.store_snapshot, self.subs_name_args, self.subs_func)
+ knl = self.source_module(bld, options=config.sim_config.devices["nvcc_opts"])
+ self.store_snapshot_dev = knl.get_function("store_snapshot")
+
+ def _copy_mat_coeffs(self, knlE, knlH):
+ """Copies material coefficient arrays to constant memory of GPU
+ (must be <64KB).
+
+ Args:
+ knlE: kernel for electric field.
+ knlH: kernel for magnetic field.
+ """
+
+ # Check if coefficient arrays will fit on constant memory of GPU
+ if (
+ self.grid.updatecoeffsE.nbytes + self.grid.updatecoeffsH.nbytes
+ > config.get_model_config().device["dev"].total_constant_memory
+ ):
+ device = config.get_model_config().device["dev"]
+ logger.exception(
+ f"Too many materials in the model to fit onto "
+ + f"constant memory of size {humanize.naturalsize(device.total_constant_memory)} "
+ + f"on {device.deviceID}: {' '.join(device.name().split())}"
+ )
+ raise ValueError
+
+ updatecoeffsE = knlE.get_global("updatecoeffsE")[0]
+ updatecoeffsH = knlH.get_global("updatecoeffsH")[0]
+ self.drv.memcpy_htod(updatecoeffsE, self.grid.updatecoeffsE)
+ self.drv.memcpy_htod(updatecoeffsH, self.grid.updatecoeffsH)
+
+ def store_outputs(self):
+ """Stores field component values for every receiver."""
+ if self.grid.rxs:
+ self.store_outputs_dev(
+ np.int32(len(self.grid.rxs)),
+ np.int32(self.grid.iteration),
+ self.rxcoords_dev.gpudata,
+ self.rxs_dev.gpudata,
+ self.grid.Ex_dev.gpudata,
+ self.grid.Ey_dev.gpudata,
+ self.grid.Ez_dev.gpudata,
+ self.grid.Hx_dev.gpudata,
+ self.grid.Hy_dev.gpudata,
+ self.grid.Hz_dev.gpudata,
+ block=(1, 1, 1),
+ grid=(round32(len(self.grid.rxs)), 1, 1),
+ )
+
+ def store_snapshots(self, iteration):
+ """Stores any snapshots.
+
+ Args:
+ iteration: int for iteration number.
+ """
+
+ for i, snap in enumerate(self.grid.snapshots):
+ if snap.time == iteration + 1:
+ snapno = 0 if config.get_model_config().device["snapsgpu2cpu"] else i
+ self.store_snapshot_dev(
+ np.int32(snapno),
+ np.int32(snap.xs),
+ np.int32(snap.xf),
+ np.int32(snap.ys),
+ np.int32(snap.yf),
+ np.int32(snap.zs),
+ np.int32(snap.zf),
+ np.int32(snap.dx),
+ np.int32(snap.dy),
+ np.int32(snap.dz),
+ self.grid.Ex_dev.gpudata,
+ self.grid.Ey_dev.gpudata,
+ self.grid.Ez_dev.gpudata,
+ self.grid.Hx_dev.gpudata,
+ self.grid.Hy_dev.gpudata,
+ self.grid.Hz_dev.gpudata,
+ self.snapEx_dev.gpudata,
+ self.snapEy_dev.gpudata,
+ self.snapEz_dev.gpudata,
+ self.snapHx_dev.gpudata,
+ self.snapHy_dev.gpudata,
+ self.snapHz_dev.gpudata,
+ block=Snapshot.tpb,
+ grid=Snapshot.bpg,
+ )
+ if config.get_model_config().device["snapsgpu2cpu"]:
+ dtoh_snapshot_array(
+ self.snapEx_dev.get(),
+ self.snapEy_dev.get(),
+ self.snapEz_dev.get(),
+ self.snapHx_dev.get(),
+ self.snapHy_dev.get(),
+ self.snapHz_dev.get(),
+ 0,
+ snap,
+ )
+
+ def update_magnetic(self):
+ """Updates magnetic field components."""
+ self.update_magnetic_dev(
+ np.int32(self.grid.nx),
+ np.int32(self.grid.ny),
+ np.int32(self.grid.nz),
+ self.grid.ID_dev.gpudata,
+ self.grid.Hx_dev.gpudata,
+ self.grid.Hy_dev.gpudata,
+ self.grid.Hz_dev.gpudata,
+ self.grid.Ex_dev.gpudata,
+ self.grid.Ey_dev.gpudata,
+ self.grid.Ez_dev.gpudata,
+ block=self.grid.tpb,
+ grid=self.grid.bpg,
+ )
+
+ def update_magnetic_pml(self):
+ """Updates magnetic field components with the PML correction."""
+ for pml in self.grid.pmls["slabs"]:
+ pml.update_magnetic()
+
+ def update_magnetic_sources(self):
+ """Updates magnetic field components from sources."""
+ if self.grid.magneticdipoles:
+ self.update_magnetic_dipole_dev(
+ np.int32(len(self.grid.magneticdipoles)),
+ np.int32(self.grid.iteration),
+ config.sim_config.dtypes["float_or_double"](self.grid.dx),
+ config.sim_config.dtypes["float_or_double"](self.grid.dy),
+ config.sim_config.dtypes["float_or_double"](self.grid.dz),
+ self.srcinfo1_magnetic_dev.gpudata,
+ self.srcinfo2_magnetic_dev.gpudata,
+ self.srcwaves_magnetic_dev.gpudata,
+ self.grid.ID_dev.gpudata,
+ self.grid.Hx_dev.gpudata,
+ self.grid.Hy_dev.gpudata,
+ self.grid.Hz_dev.gpudata,
+ block=(1, 1, 1),
+ grid=(round32(len(self.grid.magneticdipoles)), 1, 1),
+ )
+
+ def update_electric_a(self):
+ """Updates electric field components."""
+ # All materials are non-dispersive so do standard update.
+ if config.get_model_config().materials["maxpoles"] == 0:
+ self.update_electric_dev(
+ np.int32(self.grid.nx),
+ np.int32(self.grid.ny),
+ np.int32(self.grid.nz),
+ self.grid.ID_dev.gpudata,
+ self.grid.Ex_dev.gpudata,
+ self.grid.Ey_dev.gpudata,
+ self.grid.Ez_dev.gpudata,
+ self.grid.Hx_dev.gpudata,
+ self.grid.Hy_dev.gpudata,
+ self.grid.Hz_dev.gpudata,
+ block=self.grid.tpb,
+ grid=self.grid.bpg,
+ )
+
+ # If there are any dispersive materials do 1st part of dispersive update
+ # (it is split into two parts as it requires present and updated electric field values).
+ else:
+ self.dispersive_update_a(
+ np.int32(self.grid.nx),
+ np.int32(self.grid.ny),
+ np.int32(self.grid.nz),
+ np.int32(config.get_model_config().materials["maxpoles"]),
+ self.grid.updatecoeffsdispersive_dev.gpudata,
+ self.grid.Tx_dev.gpudata,
+ self.grid.Ty_dev.gpudata,
+ self.grid.Tz_dev.gpudata,
+ self.grid.ID_dev.gpudata,
+ self.grid.Ex_dev.gpudata,
+ self.grid.Ey_dev.gpudata,
+ self.grid.Ez_dev.gpudata,
+ self.grid.Hx_dev.gpudata,
+ self.grid.Hy_dev.gpudata,
+ self.grid.Hz_dev.gpudata,
+ block=self.grid.tpb,
+ grid=self.grid.bpg,
+ )
+
+ def update_electric_pml(self):
+ """Updates electric field components with the PML correction."""
+ for pml in self.grid.pmls["slabs"]:
+ pml.update_electric()
+
+ def update_electric_sources(self):
+ """Updates electric field components from sources -
+ update any Hertzian dipole sources last.
+ """
+ if self.grid.voltagesources:
+ self.update_voltage_source_dev(
+ np.int32(len(self.grid.voltagesources)),
+ np.int32(self.grid.iteration),
+ config.sim_config.dtypes["float_or_double"](self.grid.dx),
+ config.sim_config.dtypes["float_or_double"](self.grid.dy),
+ config.sim_config.dtypes["float_or_double"](self.grid.dz),
+ self.srcinfo1_voltage_dev.gpudata,
+ self.srcinfo2_voltage_dev.gpudata,
+ self.srcwaves_voltage_dev.gpudata,
+ self.grid.ID_dev.gpudata,
+ self.grid.Ex_dev.gpudata,
+ self.grid.Ey_dev.gpudata,
+ self.grid.Ez_dev.gpudata,
+ block=(1, 1, 1),
+ grid=(round32(len(self.grid.voltagesources)), 1, 1),
+ )
+
+ if self.grid.hertziandipoles:
+ self.update_hertzian_dipole_dev(
+ np.int32(len(self.grid.hertziandipoles)),
+ np.int32(self.grid.iteration),
+ config.sim_config.dtypes["float_or_double"](self.grid.dx),
+ config.sim_config.dtypes["float_or_double"](self.grid.dy),
+ config.sim_config.dtypes["float_or_double"](self.grid.dz),
+ self.srcinfo1_hertzian_dev.gpudata,
+ self.srcinfo2_hertzian_dev.gpudata,
+ self.srcwaves_hertzian_dev.gpudata,
+ self.grid.ID_dev.gpudata,
+ self.grid.Ex_dev.gpudata,
+ self.grid.Ey_dev.gpudata,
+ self.grid.Ez_dev.gpudata,
+ block=(1, 1, 1),
+ grid=(round32(len(self.grid.hertziandipoles)), 1, 1),
+ )
+
+ self.grid.iteration += 1
+
+ def update_electric_b(self):
+ """If there are any dispersive materials do 2nd part of dispersive
+ update - it is split into two parts as it requires present and
+ updated electric field values. Therefore it can only be completely
+ updated after the electric field has been updated by the PML and
+ source updates.
+ """
+ if config.get_model_config().materials["maxpoles"] > 0:
+ self.dispersive_update_b(
+ np.int32(self.grid.nx),
+ np.int32(self.grid.ny),
+ np.int32(self.grid.nz),
+ np.int32(config.get_model_config().materials["maxpoles"]),
+ self.grid.updatecoeffsdispersive_dev.gpudata,
+ self.grid.Tx_dev.gpudata,
+ self.grid.Ty_dev.gpudata,
+ self.grid.Tz_dev.gpudata,
+ self.grid.ID_dev.gpudata,
+ self.grid.Ex_dev.gpudata,
+ self.grid.Ey_dev.gpudata,
+ self.grid.Ez_dev.gpudata,
+ block=self.grid.tpb,
+ grid=self.grid.bpg,
+ )
+
+ def time_start(self):
+ """Starts event timers used to calculate solving time for model."""
+ self.iterstart = self.drv.Event()
+ self.iterend = self.drv.Event()
+ self.iterstart.record()
+ self.iterstart.synchronize()
+
+ def calculate_memory_used(self, iteration):
+ """Calculates memory used on last iteration.
+
+ Args:
+ iteration: int for iteration number.
+
+ Returns:
+ Memory (RAM) used on GPU.
+ """
+ if iteration == self.grid.iterations - 1:
+ # Total minus free memory in current context
+ return self.drv.mem_get_info()[1] - self.drv.mem_get_info()[0]
+
+ def calculate_solve_time(self):
+ """Calculates solving time for model."""
+ self.iterend.record()
+ self.iterend.synchronize()
+ return self.iterstart.time_till(self.iterend) * 1e-3
+
+ def finalise(self):
+ """Copies data from GPU back to CPU to save to file(s)."""
+ # Copy output from receivers array back to correct receiver objects
+ if self.grid.rxs:
+ dtoh_rx_array(self.rxs_dev.get(), self.rxcoords_dev.get(), self.grid)
+
+ # Copy data from any snapshots back to correct snapshot objects
+ if self.grid.snapshots and not config.get_model_config().device["snapsgpu2cpu"]:
+ for i, snap in enumerate(self.grid.snapshots):
+ dtoh_snapshot_array(
+ self.snapEx_dev.get(),
+ self.snapEy_dev.get(),
+ self.snapEz_dev.get(),
+ self.snapHx_dev.get(),
+ self.snapHy_dev.get(),
+ self.snapHz_dev.get(),
+ i,
+ snap,
+ )
+
+ def cleanup(self):
+ """Cleanup GPU context."""
+ # Remove context from top of stack and clear
+ self.ctx.pop()
+ self.ctx = None
diff --git a/gprMax/updates/opencl_updates.py b/gprMax/updates/opencl_updates.py
new file mode 100644
index 00000000..39b92861
--- /dev/null
+++ b/gprMax/updates/opencl_updates.py
@@ -0,0 +1,591 @@
+# Copyright (C) 2015-2024: The University of Edinburgh, United Kingdom
+# Authors: Craig Warren, Antonis Giannopoulos, and John Hartley
+#
+# This file is part of gprMax.
+#
+# gprMax is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# gprMax is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with gprMax. If not, see .
+
+import logging
+from importlib import import_module
+
+import numpy as np
+from jinja2 import Environment, PackageLoader
+
+from gprMax import config
+from gprMax.cuda_opencl import knl_fields_updates, knl_snapshots, knl_source_updates, knl_store_outputs
+from gprMax.receivers import dtoh_rx_array, htod_rx_arrays
+from gprMax.snapshots import Snapshot, dtoh_snapshot_array, htod_snapshot_array
+from gprMax.sources import htod_src_arrays
+
+logger = logging.getLogger(__name__)
+
+
+class OpenCLUpdates:
+ """Defines update functions for OpenCL-based solver."""
+
+ def __init__(self, G):
+ """
+ Args:
+ G: OpenCLGrid class describing a grid in a model.
+ """
+
+ self.grid = G
+
+ # Import pyopencl module
+ self.cl = import_module("pyopencl")
+ self.elwiseknl = getattr(import_module("pyopencl.elementwise"), "ElementwiseKernel")
+
+ # Select device, create context and command queue
+ self.dev = config.get_model_config().device["dev"]
+ self.ctx = self.cl.Context(devices=[self.dev])
+ self.queue = self.cl.CommandQueue(self.ctx, properties=self.cl.command_queue_properties.PROFILING_ENABLE)
+
+ # Enviroment for templating kernels
+ self.env = Environment(loader=PackageLoader("gprMax", "cuda_opencl"))
+
+ # Initialise arrays on device, prepare kernels, and get kernel functions
+ self._set_macros()
+ self._set_field_knls()
+ if self.grid.pmls["slabs"]:
+ self._set_pml_knls()
+ if self.grid.rxs:
+ self._set_rx_knl()
+ if self.grid.voltagesources + self.grid.hertziandipoles + self.grid.magneticdipoles:
+ self._set_src_knls()
+ if self.grid.snapshots:
+ self._set_snapshot_knl()
+
+ def _set_macros(self):
+ """Common macros to be used in kernels."""
+
+ # Set specific values for any dispersive materials
+ if config.get_model_config().materials["maxpoles"] > 0:
+ NY_MATDISPCOEFFS = self.grid.updatecoeffsdispersive.shape[1]
+ NX_T = self.grid.Tx.shape[1]
+ NY_T = self.grid.Tx.shape[2]
+ NZ_T = self.grid.Tx.shape[3]
+ else: # Set to one any substitutions for dispersive materials.
+ NY_MATDISPCOEFFS = 1
+ NX_T = 1
+ NY_T = 1
+ NZ_T = 1
+
+ self.knl_common = self.env.get_template("knl_common_opencl.tmpl").render(
+ updatecoeffsE=self.grid.updatecoeffsE.ravel(),
+ updatecoeffsH=self.grid.updatecoeffsH.ravel(),
+ REAL=config.sim_config.dtypes["C_float_or_double"],
+ N_updatecoeffsE=self.grid.updatecoeffsE.size,
+ N_updatecoeffsH=self.grid.updatecoeffsH.size,
+ NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1],
+ NY_MATDISPCOEFFS=NY_MATDISPCOEFFS,
+ NX_FIELDS=self.grid.nx + 1,
+ NY_FIELDS=self.grid.ny + 1,
+ NZ_FIELDS=self.grid.nz + 1,
+ NX_ID=self.grid.ID.shape[1],
+ NY_ID=self.grid.ID.shape[2],
+ NZ_ID=self.grid.ID.shape[3],
+ NX_T=NX_T,
+ NY_T=NY_T,
+ NZ_T=NZ_T,
+ NY_RXCOORDS=3,
+ NX_RXS=6,
+ NY_RXS=self.grid.iterations,
+ NZ_RXS=len(self.grid.rxs),
+ NY_SRCINFO=4,
+ NY_SRCWAVES=self.grid.iterations,
+ NX_SNAPS=Snapshot.nx_max,
+ NY_SNAPS=Snapshot.ny_max,
+ NZ_SNAPS=Snapshot.nz_max,
+ )
+
+ def _set_field_knls(self):
+ """Electric and magnetic field updates - prepares kernels, and
+ gets kernel functions.
+ """
+
+ subs = {
+ "CUDA_IDX": "",
+ "NX_FIELDS": self.grid.nx + 1,
+ "NY_FIELDS": self.grid.ny + 1,
+ "NZ_FIELDS": self.grid.nz + 1,
+ "NX_ID": self.grid.ID.shape[1],
+ "NY_ID": self.grid.ID.shape[2],
+ "NZ_ID": self.grid.ID.shape[3],
+ }
+
+ self.update_electric_dev = self.elwiseknl(
+ self.ctx,
+ knl_fields_updates.update_electric["args_opencl"].substitute(
+ {"REAL": config.sim_config.dtypes["C_float_or_double"]}
+ ),
+ knl_fields_updates.update_electric["func"].substitute(subs),
+ "update_electric",
+ preamble=self.knl_common,
+ options=config.sim_config.devices["compiler_opts"],
+ )
+
+ self.update_magnetic_dev = self.elwiseknl(
+ self.ctx,
+ knl_fields_updates.update_magnetic["args_opencl"].substitute(
+ {"REAL": config.sim_config.dtypes["C_float_or_double"]}
+ ),
+ knl_fields_updates.update_magnetic["func"].substitute(subs),
+ "update_magnetic",
+ preamble=self.knl_common,
+ options=config.sim_config.devices["compiler_opts"],
+ )
+
+ # If there are any dispersive materials (updates are split into two
+ # parts as they require present and updated electric field values).
+ if config.get_model_config().materials["maxpoles"] > 0:
+ subs = {
+ "CUDA_IDX": "",
+ "REAL": config.sim_config.dtypes["C_float_or_double"],
+ "REALFUNC": config.get_model_config().materials["crealfunc"],
+ "NX_FIELDS": self.grid.nx + 1,
+ "NY_FIELDS": self.grid.ny + 1,
+ "NZ_FIELDS": self.grid.nz + 1,
+ "NX_ID": self.grid.ID.shape[1],
+ "NY_ID": self.grid.ID.shape[2],
+ "NZ_ID": self.grid.ID.shape[3],
+ "NX_T": self.grid.Tx.shape[1],
+ "NY_T": self.grid.Tx.shape[2],
+ "NZ_T": self.grid.Tx.shape[3],
+ }
+
+ self.dispersive_update_a = self.elwiseknl(
+ self.ctx,
+ knl_fields_updates.update_electric_dispersive_A["args_opencl"].substitute(
+ {
+ "REAL": config.sim_config.dtypes["C_float_or_double"],
+ "COMPLEX": config.get_model_config().materials["dispersiveCdtype"],
+ }
+ ),
+ knl_fields_updates.update_electric_dispersive_A["func"].substitute(subs),
+ "update_electric_dispersive_A",
+ preamble=self.knl_common,
+ options=config.sim_config.devices["compiler_opts"],
+ )
+
+ self.dispersive_update_b = self.elwiseknl(
+ self.ctx,
+ knl_fields_updates.update_electric_dispersive_B["args_opencl"].substitute(
+ {
+ "REAL": config.sim_config.dtypes["C_float_or_double"],
+ "COMPLEX": config.get_model_config().materials["dispersiveCdtype"],
+ }
+ ),
+ knl_fields_updates.update_electric_dispersive_B["func"].substitute(subs),
+ "update_electric_dispersive_B",
+ preamble=self.knl_common,
+ options=config.sim_config.devices["compiler_opts"],
+ )
+
+ # Initialise field arrays on compute device
+ self.grid.htod_geometry_arrays(self.queue)
+ self.grid.htod_field_arrays(self.queue)
+ if config.get_model_config().materials["maxpoles"] > 0:
+ self.grid.htod_dispersive_arrays(self.queue)
+
+ def _set_pml_knls(self):
+ """PMLS - prepares kernels and gets kernel functions."""
+ knl_pml_updates_electric = import_module(
+ "gprMax.cuda_opencl.knl_pml_updates_electric_" + self.grid.pmls["formulation"]
+ )
+ knl_pml_updates_magnetic = import_module(
+ "gprMax.cuda_opencl.knl_pml_updates_magnetic_" + self.grid.pmls["formulation"]
+ )
+
+ subs = {
+ "CUDA_IDX": "",
+ "REAL": config.sim_config.dtypes["C_float_or_double"],
+ "NX_FIELDS": self.grid.nx + 1,
+ "NY_FIELDS": self.grid.ny + 1,
+ "NZ_FIELDS": self.grid.nz + 1,
+ "NX_ID": self.grid.ID.shape[1],
+ "NY_ID": self.grid.ID.shape[2],
+ "NZ_ID": self.grid.ID.shape[3],
+ }
+
+ # Set workgroup size, initialise arrays on compute device, and get
+ # kernel functions
+ for pml in self.grid.pmls["slabs"]:
+ pml.set_queue(self.queue)
+ pml.htod_field_arrays()
+ knl_name = f"order{len(pml.CFS)}_{pml.direction}"
+ knl_electric_name = getattr(knl_pml_updates_electric, knl_name)
+ knl_magnetic_name = getattr(knl_pml_updates_magnetic, knl_name)
+
+ pml.update_electric_dev = self.elwiseknl(
+ self.ctx,
+ knl_electric_name["args_opencl"].substitute({"REAL": config.sim_config.dtypes["C_float_or_double"]}),
+ knl_electric_name["func"].substitute(subs),
+ f"pml_updates_electric_{knl_name}",
+ preamble=self.knl_common,
+ options=config.sim_config.devices["compiler_opts"],
+ )
+
+ pml.update_magnetic_dev = self.elwiseknl(
+ self.ctx,
+ knl_magnetic_name["args_opencl"].substitute({"REAL": config.sim_config.dtypes["C_float_or_double"]}),
+ knl_magnetic_name["func"].substitute(subs),
+ f"pml_updates_magnetic_{knl_name}",
+ preamble=self.knl_common,
+ options=config.sim_config.devices["compiler_opts"],
+ )
+
+ def _set_rx_knl(self):
+ """Receivers - initialises arrays on compute device, prepares kernel and
+ gets kernel function.
+ """
+ self.rxcoords_dev, self.rxs_dev = htod_rx_arrays(self.grid, self.queue)
+ self.store_outputs_dev = self.elwiseknl(
+ self.ctx,
+ knl_store_outputs.store_outputs["args_opencl"].substitute(
+ {"REAL": config.sim_config.dtypes["C_float_or_double"]}
+ ),
+ knl_store_outputs.store_outputs["func"].substitute({"CUDA_IDX": ""}),
+ "store_outputs",
+ preamble=self.knl_common,
+ options=config.sim_config.devices["compiler_opts"],
+ )
+
+ def _set_src_knls(self):
+ """Sources - initialises arrays on compute device, prepares kernel and
+ gets kernel function.
+ """
+ if self.grid.hertziandipoles:
+ self.srcinfo1_hertzian_dev, self.srcinfo2_hertzian_dev, self.srcwaves_hertzian_dev = htod_src_arrays(
+ self.grid.hertziandipoles, self.grid, self.queue
+ )
+ self.update_hertzian_dipole_dev = self.elwiseknl(
+ self.ctx,
+ knl_source_updates.update_hertzian_dipole["args_opencl"].substitute(
+ {"REAL": config.sim_config.dtypes["C_float_or_double"]}
+ ),
+ knl_source_updates.update_hertzian_dipole["func"].substitute(
+ {"CUDA_IDX": "", "REAL": config.sim_config.dtypes["C_float_or_double"]}
+ ),
+ "update_hertzian_dipole",
+ preamble=self.knl_common,
+ options=config.sim_config.devices["compiler_opts"],
+ )
+ if self.grid.magneticdipoles:
+ self.srcinfo1_magnetic_dev, self.srcinfo2_magnetic_dev, self.srcwaves_magnetic_dev = htod_src_arrays(
+ self.grid.magneticdipoles, self.grid, self.queue
+ )
+ self.update_magnetic_dipole_dev = self.elwiseknl(
+ self.ctx,
+ knl_source_updates.update_magnetic_dipole["args_opencl"].substitute(
+ {"REAL": config.sim_config.dtypes["C_float_or_double"]}
+ ),
+ knl_source_updates.update_magnetic_dipole["func"].substitute(
+ {"CUDA_IDX": "", "REAL": config.sim_config.dtypes["C_float_or_double"]}
+ ),
+ "update_magnetic_dipole",
+ preamble=self.knl_common,
+ options=config.sim_config.devices["compiler_opts"],
+ )
+ if self.grid.voltagesources:
+ self.srcinfo1_voltage_dev, self.srcinfo2_voltage_dev, self.srcwaves_voltage_dev = htod_src_arrays(
+ self.grid.voltagesources, self.grid, self.queue
+ )
+ self.update_voltage_source_dev = self.elwiseknl(
+ self.ctx,
+ knl_source_updates.update_voltage_source["args_opencl"].substitute(
+ {"REAL": config.sim_config.dtypes["C_float_or_double"]}
+ ),
+ knl_source_updates.update_voltage_source["func"].substitute(
+ {"CUDA_IDX": "", "REAL": config.sim_config.dtypes["C_float_or_double"]}
+ ),
+ "update_voltage_source",
+ preamble=self.knl_common,
+ options=config.sim_config.devices["compiler_opts"],
+ )
+
+ def _set_snapshot_knl(self):
+ """Snapshots - initialises arrays on compute device, prepares kernel and
+ gets kernel function.
+ """
+ (
+ self.snapEx_dev,
+ self.snapEy_dev,
+ self.snapEz_dev,
+ self.snapHx_dev,
+ self.snapHy_dev,
+ self.snapHz_dev,
+ ) = htod_snapshot_array(self.grid, self.queue)
+ self.store_snapshot_dev = self.elwiseknl(
+ self.ctx,
+ knl_snapshots.store_snapshot["args_opencl"].substitute(
+ {"REAL": config.sim_config.dtypes["C_float_or_double"]}
+ ),
+ knl_snapshots.store_snapshot["func"].substitute(
+ {"CUDA_IDX": "", "NX_SNAPS": Snapshot.nx_max, "NY_SNAPS": Snapshot.ny_max, "NZ_SNAPS": Snapshot.nz_max}
+ ),
+ "store_snapshot",
+ preamble=self.knl_common,
+ options=config.sim_config.devices["compiler_opts"],
+ )
+
+ def store_outputs(self):
+ """Stores field component values for every receiver."""
+ if self.grid.rxs:
+ self.store_outputs_dev(
+ np.int32(len(self.grid.rxs)),
+ np.int32(self.grid.iteration),
+ self.rxcoords_dev,
+ self.rxs_dev,
+ self.grid.Ex_dev,
+ self.grid.Ey_dev,
+ self.grid.Ez_dev,
+ self.grid.Hx_dev,
+ self.grid.Hy_dev,
+ self.grid.Hz_dev,
+ )
+
+ def store_snapshots(self, iteration):
+ """Stores any snapshots.
+
+ Args:
+ iteration: int for iteration number.
+ """
+
+ for i, snap in enumerate(self.grid.snapshots):
+ if snap.time == iteration + 1:
+ snapno = 0 if config.get_model_config().device["snapsgpu2cpu"] else i
+ self.store_snapshot_dev(
+ np.int32(snapno),
+ np.int32(snap.xs),
+ np.int32(snap.xf),
+ np.int32(snap.ys),
+ np.int32(snap.yf),
+ np.int32(snap.zs),
+ np.int32(snap.zf),
+ np.int32(snap.dx),
+ np.int32(snap.dy),
+ np.int32(snap.dz),
+ self.grid.Ex_dev,
+ self.grid.Ey_dev,
+ self.grid.Ez_dev,
+ self.grid.Hx_dev,
+ self.grid.Hy_dev,
+ self.grid.Hz_dev,
+ self.snapEx_dev,
+ self.snapEy_dev,
+ self.snapEz_dev,
+ self.snapHx_dev,
+ self.snapHy_dev,
+ self.snapHz_dev,
+ )
+
+ if config.get_model_config().device["snapsgpu2cpu"]:
+ dtoh_snapshot_array(
+ self.snapEx_dev.get(),
+ self.snapEy_dev.get(),
+ self.snapEz_dev.get(),
+ self.snapHx_dev.get(),
+ self.snapHy_dev.get(),
+ self.snapHz_dev.get(),
+ 0,
+ snap,
+ )
+
+ def update_magnetic(self):
+ """Updates magnetic field components."""
+ self.update_magnetic_dev(
+ np.int32(self.grid.nx),
+ np.int32(self.grid.ny),
+ np.int32(self.grid.nz),
+ self.grid.ID_dev,
+ self.grid.Hx_dev,
+ self.grid.Hy_dev,
+ self.grid.Hz_dev,
+ self.grid.Ex_dev,
+ self.grid.Ey_dev,
+ self.grid.Ez_dev,
+ )
+
+ def update_magnetic_pml(self):
+ """Updates magnetic field components with the PML correction."""
+ for pml in self.grid.pmls["slabs"]:
+ pml.update_magnetic()
+
+ def update_magnetic_sources(self):
+ """Updates magnetic field components from sources."""
+ if self.grid.magneticdipoles:
+ self.update_magnetic_dipole_dev(
+ np.int32(len(self.grid.magneticdipoles)),
+ np.int32(self.grid.iteration),
+ config.sim_config.dtypes["float_or_double"](self.grid.dx),
+ config.sim_config.dtypes["float_or_double"](self.grid.dy),
+ config.sim_config.dtypes["float_or_double"](self.grid.dz),
+ self.srcinfo1_magnetic_dev,
+ self.srcinfo2_magnetic_dev,
+ self.srcwaves_magnetic_dev,
+ self.grid.ID_dev,
+ self.grid.Hx_dev,
+ self.grid.Hy_dev,
+ self.grid.Hz_dev,
+ )
+
+ def update_electric_a(self):
+ """Updates electric field components."""
+ # All materials are non-dispersive so do standard update.
+ if config.get_model_config().materials["maxpoles"] == 0:
+ self.update_electric_dev(
+ np.int32(self.grid.nx),
+ np.int32(self.grid.ny),
+ np.int32(self.grid.nz),
+ self.grid.ID_dev,
+ self.grid.Ex_dev,
+ self.grid.Ey_dev,
+ self.grid.Ez_dev,
+ self.grid.Hx_dev,
+ self.grid.Hy_dev,
+ self.grid.Hz_dev,
+ )
+
+ # If there are any dispersive materials do 1st part of dispersive update
+ # (it is split into two parts as it requires present and updated electric field values).
+ else:
+ self.dispersive_update_a(
+ np.int32(self.grid.nx),
+ np.int32(self.grid.ny),
+ np.int32(self.grid.nz),
+ np.int32(config.get_model_config().materials["maxpoles"]),
+ self.grid.ID_dev,
+ self.grid.Ex_dev,
+ self.grid.Ey_dev,
+ self.grid.Ez_dev,
+ self.grid.Hx_dev,
+ self.grid.Hy_dev,
+ self.grid.Hz_dev,
+ self.grid.updatecoeffsdispersive_dev,
+ self.grid.Tx_dev,
+ self.grid.Ty_dev,
+ self.grid.Tz_dev,
+ )
+
+ def update_electric_pml(self):
+ """Updates electric field components with the PML correction."""
+ for pml in self.grid.pmls["slabs"]:
+ pml.update_electric()
+
+ def update_electric_sources(self):
+ """Updates electric field components from sources -
+ update any Hertzian dipole sources last.
+ """
+ if self.grid.voltagesources:
+ self.update_voltage_source_dev(
+ np.int32(len(self.grid.voltagesources)),
+ np.int32(self.grid.iteration),
+ config.sim_config.dtypes["float_or_double"](self.grid.dx),
+ config.sim_config.dtypes["float_or_double"](self.grid.dy),
+ config.sim_config.dtypes["float_or_double"](self.grid.dz),
+ self.srcinfo1_voltage_dev,
+ self.srcinfo2_voltage_dev,
+ self.srcwaves_voltage_dev,
+ self.grid.ID_dev,
+ self.grid.Ex_dev,
+ self.grid.Ey_dev,
+ self.grid.Ez_dev,
+ )
+
+ if self.grid.hertziandipoles:
+ self.update_hertzian_dipole_dev(
+ np.int32(len(self.grid.hertziandipoles)),
+ np.int32(self.grid.iteration),
+ config.sim_config.dtypes["float_or_double"](self.grid.dx),
+ config.sim_config.dtypes["float_or_double"](self.grid.dy),
+ config.sim_config.dtypes["float_or_double"](self.grid.dz),
+ self.srcinfo1_hertzian_dev,
+ self.srcinfo2_hertzian_dev,
+ self.srcwaves_hertzian_dev,
+ self.grid.ID_dev,
+ self.grid.Ex_dev,
+ self.grid.Ey_dev,
+ self.grid.Ez_dev,
+ )
+
+ self.grid.iteration += 1
+
+ def update_electric_b(self):
+ """If there are any dispersive materials do 2nd part of dispersive
+ update - it is split into two parts as it requires present and
+ updated electric field values. Therefore it can only be completely
+ updated after the electric field has been updated by the PML and
+ source updates.
+ """
+ if config.get_model_config().materials["maxpoles"] > 0:
+ self.dispersive_update_b(
+ np.int32(self.grid.nx),
+ np.int32(self.grid.ny),
+ np.int32(self.grid.nz),
+ np.int32(config.get_model_config().materials["maxpoles"]),
+ self.grid.ID_dev,
+ self.grid.Ex_dev,
+ self.grid.Ey_dev,
+ self.grid.Ez_dev,
+ self.grid.updatecoeffsdispersive_dev,
+ self.grid.Tx_dev,
+ self.grid.Ty_dev,
+ self.grid.Tz_dev,
+ )
+
+ def time_start(self):
+ """Starts event timers used to calculate solving time for model."""
+ self.event_marker1 = self.cl.enqueue_marker(self.queue)
+ self.event_marker1.wait()
+
+ def calculate_memory_used(self, iteration):
+ """Calculates memory used on last iteration.
+
+ Args:
+ iteration: int for iteration number.
+
+ Returns:
+ Memory (RAM) used on compute device.
+ """
+ # No clear way to determine memory used from PyOpenCL unlike PyCUDA.
+ pass
+
+ def calculate_solve_time(self):
+ """Calculates solving time for model."""
+ event_marker2 = self.cl.enqueue_marker(self.queue)
+ event_marker2.wait()
+ return (event_marker2.profile.end - self.event_marker1.profile.start) * 1e-9
+
+ def finalise(self):
+ """Copies data from compute device back to CPU to save to file(s)."""
+ # Copy output from receivers array back to correct receiver objects
+ if self.grid.rxs:
+ dtoh_rx_array(self.rxs_dev.get(), self.rxcoords_dev.get(), self.grid)
+
+ # Copy data from any snapshots back to correct snapshot objects
+ if self.grid.snapshots and not config.get_model_config().device["snapsgpu2cpu"]:
+ for i, snap in enumerate(self.grid.snapshots):
+ dtoh_snapshot_array(
+ self.snapEx_dev.get(),
+ self.snapEy_dev.get(),
+ self.snapEz_dev.get(),
+ self.snapHx_dev.get(),
+ self.snapHy_dev.get(),
+ self.snapHz_dev.get(),
+ i,
+ snap,
+ )
+
+ def cleanup(self):
+ pass
diff --git a/gprMax/updates/updates.py b/gprMax/updates/updates.py
deleted file mode 100644
index e8fd6d54..00000000
--- a/gprMax/updates/updates.py
+++ /dev/null
@@ -1,1163 +0,0 @@
-# Copyright (C) 2015-2024: The University of Edinburgh, United Kingdom
-# Authors: Craig Warren, Antonis Giannopoulos, and John Hartley
-#
-# This file is part of gprMax.
-#
-# gprMax is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# gprMax is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with gprMax. If not, see .
-
-import logging
-from importlib import import_module
-
-import humanize
-import numpy as np
-from jinja2 import Environment, PackageLoader
-
-import gprMax.config as config
-
-from ..cuda_opencl import knl_fields_updates, knl_snapshots, knl_source_updates, knl_store_outputs
-from ..receivers import dtoh_rx_array, htod_rx_arrays
-from ..snapshots import Snapshot, dtoh_snapshot_array, htod_snapshot_array
-from ..sources import htod_src_arrays
-from ..utilities.utilities import round32
-
-logger = logging.getLogger(__name__)
-
-
-class CUDAUpdates:
- """Defines update functions for GPU-based (CUDA) solver."""
-
- def __init__(self, G):
- """
- Args:
- G: CUDAGrid class describing a grid in a model.
- """
-
- self.grid = G
-
- # Import PyCUDA modules
- self.drv = import_module("pycuda.driver")
- self.source_module = getattr(import_module("pycuda.compiler"), "SourceModule")
- self.drv.init()
-
- # Create device handle and context on specific GPU device (and make it current context)
- self.dev = config.get_model_config().device["dev"]
- self.ctx = self.dev.make_context()
-
- # Set common substitutions for use in kernels
- # Substitutions in function arguments
- self.subs_name_args = {
- "REAL": config.sim_config.dtypes["C_float_or_double"],
- "COMPLEX": config.get_model_config().materials["dispersiveCdtype"],
- }
- # Substitutions in function bodies
- self.subs_func = {
- "REAL": config.sim_config.dtypes["C_float_or_double"],
- "CUDA_IDX": "int i = blockIdx.x * blockDim.x + threadIdx.x;",
- "NX_FIELDS": self.grid.nx + 1,
- "NY_FIELDS": self.grid.ny + 1,
- "NZ_FIELDS": self.grid.nz + 1,
- "NX_ID": self.grid.ID.shape[1],
- "NY_ID": self.grid.ID.shape[2],
- "NZ_ID": self.grid.ID.shape[3],
- }
-
- # Enviroment for templating kernels
- self.env = Environment(loader=PackageLoader("gprMax", "cuda_opencl"))
-
- # Initialise arrays on GPU, prepare kernels, and get kernel functions
- self._set_macros()
- self._set_field_knls()
- if self.grid.pmls["slabs"]:
- self._set_pml_knls()
- if self.grid.rxs:
- self._set_rx_knl()
- if self.grid.voltagesources + self.grid.hertziandipoles + self.grid.magneticdipoles:
- self._set_src_knls()
- if self.grid.snapshots:
- self._set_snapshot_knl()
-
- def _build_knl(self, knl_func, subs_name_args, subs_func):
- """Builds a CUDA kernel from templates: 1) function name and args;
- and 2) function (kernel) body.
-
- Args:
- knl_func: dict containing templates for function name and args,
- and function body.
- subs_name_args: dict containing substitutions to be used with
- function name and args.
- subs_func: dict containing substitutions to be used with function
- (kernel) body.
-
- Returns:
- knl: string with complete kernel
- """
-
- name_plus_args = knl_func["args_cuda"].substitute(subs_name_args)
- func_body = knl_func["func"].substitute(subs_func)
- knl = self.knl_common + "\n" + name_plus_args + "{" + func_body + "}"
-
- return knl
-
- def _set_macros(self):
- """Common macros to be used in kernels."""
-
- # Set specific values for any dispersive materials
- if config.get_model_config().materials["maxpoles"] > 0:
- NY_MATDISPCOEFFS = self.grid.updatecoeffsdispersive.shape[1]
- NX_T = self.grid.Tx.shape[1]
- NY_T = self.grid.Tx.shape[2]
- NZ_T = self.grid.Tx.shape[3]
- else: # Set to one any substitutions for dispersive materials.
- NY_MATDISPCOEFFS = 1
- NX_T = 1
- NY_T = 1
- NZ_T = 1
-
- self.knl_common = self.env.get_template("knl_common_cuda.tmpl").render(
- REAL=config.sim_config.dtypes["C_float_or_double"],
- N_updatecoeffsE=self.grid.updatecoeffsE.size,
- N_updatecoeffsH=self.grid.updatecoeffsH.size,
- NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1],
- NY_MATDISPCOEFFS=NY_MATDISPCOEFFS,
- NX_FIELDS=self.grid.nx + 1,
- NY_FIELDS=self.grid.ny + 1,
- NZ_FIELDS=self.grid.nz + 1,
- NX_ID=self.grid.ID.shape[1],
- NY_ID=self.grid.ID.shape[2],
- NZ_ID=self.grid.ID.shape[3],
- NX_T=NX_T,
- NY_T=NY_T,
- NZ_T=NZ_T,
- NY_RXCOORDS=3,
- NX_RXS=6,
- NY_RXS=self.grid.iterations,
- NZ_RXS=len(self.grid.rxs),
- NY_SRCINFO=4,
- NY_SRCWAVES=self.grid.iterations,
- NX_SNAPS=Snapshot.nx_max,
- NY_SNAPS=Snapshot.ny_max,
- NZ_SNAPS=Snapshot.nz_max,
- )
-
- def _set_field_knls(self):
- """Electric and magnetic field updates - prepares kernels, and
- gets kernel functions.
- """
-
- bld = self._build_knl(knl_fields_updates.update_electric, self.subs_name_args, self.subs_func)
- knlE = self.source_module(bld, options=config.sim_config.devices["nvcc_opts"])
- self.update_electric_dev = knlE.get_function("update_electric")
-
- bld = self._build_knl(knl_fields_updates.update_magnetic, self.subs_name_args, self.subs_func)
- knlH = self.source_module(bld, options=config.sim_config.devices["nvcc_opts"])
- self.update_magnetic_dev = knlH.get_function("update_magnetic")
-
- self._copy_mat_coeffs(knlE, knlH)
-
- # If there are any dispersive materials (updates are split into two
- # parts as they require present and updated electric field values).
- if config.get_model_config().materials["maxpoles"] > 0:
- self.subs_func.update(
- {
- "REAL": config.sim_config.dtypes["C_float_or_double"],
- "REALFUNC": config.get_model_config().materials["crealfunc"],
- "NX_T": self.grid.Tx.shape[1],
- "NY_T": self.grid.Tx.shape[2],
- "NZ_T": self.grid.Tx.shape[3],
- }
- )
-
- bld = self._build_knl(knl_fields_updates.update_electric_dispersive_A, self.subs_name_args, self.subs_func)
- knl = self.source_module(bld, options=config.sim_config.devices["nvcc_opts"])
- self.dispersive_update_a = knl.get_function("update_electric_dispersive_A")
- self._copy_mat_coeffs(knl, knl)
-
- bld = self._build_knl(knl_fields_updates.update_electric_dispersive_B, self.subs_name_args, self.subs_func)
- knl = self.source_module(bld, options=config.sim_config.devices["nvcc_opts"])
- self.dispersive_update_b = knl.get_function("update_electric_dispersive_B")
- self._copy_mat_coeffs(knl, knl)
-
- # Set blocks per grid and initialise field arrays on GPU
- self.grid.set_blocks_per_grid()
- self.grid.htod_geometry_arrays()
- self.grid.htod_field_arrays()
- if config.get_model_config().materials["maxpoles"] > 0:
- self.grid.htod_dispersive_arrays()
-
- def _set_pml_knls(self):
- """PMLS - prepares kernels and gets kernel functions."""
- knl_pml_updates_electric = import_module(
- "gprMax.cuda_opencl.knl_pml_updates_electric_" + self.grid.pmls["formulation"]
- )
- knl_pml_updates_magnetic = import_module(
- "gprMax.cuda_opencl.knl_pml_updates_magnetic_" + self.grid.pmls["formulation"]
- )
-
- # Initialise arrays on GPU, set block per grid, and get kernel functions
- for pml in self.grid.pmls["slabs"]:
- pml.htod_field_arrays()
- pml.set_blocks_per_grid()
- knl_name = f"order{len(pml.CFS)}_{pml.direction}"
- self.subs_name_args["FUNC"] = knl_name
-
- knl_electric = getattr(knl_pml_updates_electric, knl_name)
- bld = self._build_knl(knl_electric, self.subs_name_args, self.subs_func)
- knlE = self.source_module(bld, options=config.sim_config.devices["nvcc_opts"])
- pml.update_electric_dev = knlE.get_function(knl_name)
-
- knl_magnetic = getattr(knl_pml_updates_magnetic, knl_name)
- bld = self._build_knl(knl_magnetic, self.subs_name_args, self.subs_func)
- knlH = self.source_module(bld, options=config.sim_config.devices["nvcc_opts"])
- pml.update_magnetic_dev = knlH.get_function(knl_name)
-
- # Copy material coefficient arrays to constant memory of GPU - must
- # be done for each kernel
- self._copy_mat_coeffs(knlE, knlH)
-
- def _set_rx_knl(self):
- """Receivers - initialises arrays on GPU, prepares kernel and gets kernel
- function.
- """
- self.rxcoords_dev, self.rxs_dev = htod_rx_arrays(self.grid)
-
- self.subs_func.update(
- {
- "REAL": config.sim_config.dtypes["C_float_or_double"],
- "NY_RXCOORDS": 3,
- "NX_RXS": 6,
- "NY_RXS": self.grid.iterations,
- "NZ_RXS": len(self.grid.rxs),
- }
- )
-
- bld = self._build_knl(knl_store_outputs.store_outputs, self.subs_name_args, self.subs_func)
- knl = self.source_module(bld, options=config.sim_config.devices["nvcc_opts"])
- self.store_outputs_dev = knl.get_function("store_outputs")
-
- def _set_src_knls(self):
- """Sources - initialises arrays on GPU, prepares kernel and gets kernel
- function.
- """
- self.subs_func.update({"NY_SRCINFO": 4, "NY_SRCWAVES": self.grid.iteration})
-
- if self.grid.hertziandipoles:
- self.srcinfo1_hertzian_dev, self.srcinfo2_hertzian_dev, self.srcwaves_hertzian_dev = htod_src_arrays(
- self.grid.hertziandipoles, self.grid
- )
- bld = self._build_knl(knl_source_updates.update_hertzian_dipole, self.subs_name_args, self.subs_func)
- knl = self.source_module(bld, options=config.sim_config.devices["nvcc_opts"])
- self.update_hertzian_dipole_dev = knl.get_function("update_hertzian_dipole")
- if self.grid.magneticdipoles:
- self.srcinfo1_magnetic_dev, self.srcinfo2_magnetic_dev, self.srcwaves_magnetic_dev = htod_src_arrays(
- self.grid.magneticdipoles, self.grid
- )
- bld = self._build_knl(knl_source_updates.update_magnetic_dipole, self.subs_name_args, self.subs_func)
- knl = self.source_module(bld, options=config.sim_config.devices["nvcc_opts"])
- self.update_magnetic_dipole_dev = knl.get_function("update_magnetic_dipole")
- if self.grid.voltagesources:
- self.srcinfo1_voltage_dev, self.srcinfo2_voltage_dev, self.srcwaves_voltage_dev = htod_src_arrays(
- self.grid.voltagesources, self.grid
- )
- bld = self._build_knl(knl_source_updates.update_voltage_source, self.subs_name_args, self.subs_func)
- knl = self.source_module(bld, options=config.sim_config.devices["nvcc_opts"])
- self.update_voltage_source_dev = knl.get_function("update_voltage_source")
-
- self._copy_mat_coeffs(knl, knl)
-
- def _set_snapshot_knl(self):
- """Snapshots - initialises arrays on GPU, prepares kernel and gets kernel
- function.
- """
- (
- self.snapEx_dev,
- self.snapEy_dev,
- self.snapEz_dev,
- self.snapHx_dev,
- self.snapHy_dev,
- self.snapHz_dev,
- ) = htod_snapshot_array(self.grid)
-
- self.subs_func.update(
- {
- "REAL": config.sim_config.dtypes["C_float_or_double"],
- "NX_SNAPS": Snapshot.nx_max,
- "NY_SNAPS": Snapshot.ny_max,
- "NZ_SNAPS": Snapshot.nz_max,
- }
- )
-
- bld = self._build_knl(knl_snapshots.store_snapshot, self.subs_name_args, self.subs_func)
- knl = self.source_module(bld, options=config.sim_config.devices["nvcc_opts"])
- self.store_snapshot_dev = knl.get_function("store_snapshot")
-
- def _copy_mat_coeffs(self, knlE, knlH):
- """Copies material coefficient arrays to constant memory of GPU
- (must be <64KB).
-
- Args:
- knlE: kernel for electric field.
- knlH: kernel for magnetic field.
- """
-
- # Check if coefficient arrays will fit on constant memory of GPU
- if (
- self.grid.updatecoeffsE.nbytes + self.grid.updatecoeffsH.nbytes
- > config.get_model_config().device["dev"].total_constant_memory
- ):
- device = config.get_model_config().device["dev"]
- logger.exception(
- f"Too many materials in the model to fit onto "
- + f"constant memory of size {humanize.naturalsize(device.total_constant_memory)} "
- + f"on {device.deviceID}: {' '.join(device.name().split())}"
- )
- raise ValueError
-
- updatecoeffsE = knlE.get_global("updatecoeffsE")[0]
- updatecoeffsH = knlH.get_global("updatecoeffsH")[0]
- self.drv.memcpy_htod(updatecoeffsE, self.grid.updatecoeffsE)
- self.drv.memcpy_htod(updatecoeffsH, self.grid.updatecoeffsH)
-
- def store_outputs(self):
- """Stores field component values for every receiver."""
- if self.grid.rxs:
- self.store_outputs_dev(
- np.int32(len(self.grid.rxs)),
- np.int32(self.grid.iteration),
- self.rxcoords_dev.gpudata,
- self.rxs_dev.gpudata,
- self.grid.Ex_dev.gpudata,
- self.grid.Ey_dev.gpudata,
- self.grid.Ez_dev.gpudata,
- self.grid.Hx_dev.gpudata,
- self.grid.Hy_dev.gpudata,
- self.grid.Hz_dev.gpudata,
- block=(1, 1, 1),
- grid=(round32(len(self.grid.rxs)), 1, 1),
- )
-
- def store_snapshots(self, iteration):
- """Stores any snapshots.
-
- Args:
- iteration: int for iteration number.
- """
-
- for i, snap in enumerate(self.grid.snapshots):
- if snap.time == iteration + 1:
- snapno = 0 if config.get_model_config().device["snapsgpu2cpu"] else i
- self.store_snapshot_dev(
- np.int32(snapno),
- np.int32(snap.xs),
- np.int32(snap.xf),
- np.int32(snap.ys),
- np.int32(snap.yf),
- np.int32(snap.zs),
- np.int32(snap.zf),
- np.int32(snap.dx),
- np.int32(snap.dy),
- np.int32(snap.dz),
- self.grid.Ex_dev.gpudata,
- self.grid.Ey_dev.gpudata,
- self.grid.Ez_dev.gpudata,
- self.grid.Hx_dev.gpudata,
- self.grid.Hy_dev.gpudata,
- self.grid.Hz_dev.gpudata,
- self.snapEx_dev.gpudata,
- self.snapEy_dev.gpudata,
- self.snapEz_dev.gpudata,
- self.snapHx_dev.gpudata,
- self.snapHy_dev.gpudata,
- self.snapHz_dev.gpudata,
- block=Snapshot.tpb,
- grid=Snapshot.bpg,
- )
- if config.get_model_config().device["snapsgpu2cpu"]:
- dtoh_snapshot_array(
- self.snapEx_dev.get(),
- self.snapEy_dev.get(),
- self.snapEz_dev.get(),
- self.snapHx_dev.get(),
- self.snapHy_dev.get(),
- self.snapHz_dev.get(),
- 0,
- snap,
- )
-
- def update_magnetic(self):
- """Updates magnetic field components."""
- self.update_magnetic_dev(
- np.int32(self.grid.nx),
- np.int32(self.grid.ny),
- np.int32(self.grid.nz),
- self.grid.ID_dev.gpudata,
- self.grid.Hx_dev.gpudata,
- self.grid.Hy_dev.gpudata,
- self.grid.Hz_dev.gpudata,
- self.grid.Ex_dev.gpudata,
- self.grid.Ey_dev.gpudata,
- self.grid.Ez_dev.gpudata,
- block=self.grid.tpb,
- grid=self.grid.bpg,
- )
-
- def update_magnetic_pml(self):
- """Updates magnetic field components with the PML correction."""
- for pml in self.grid.pmls["slabs"]:
- pml.update_magnetic()
-
- def update_magnetic_sources(self):
- """Updates magnetic field components from sources."""
- if self.grid.magneticdipoles:
- self.update_magnetic_dipole_dev(
- np.int32(len(self.grid.magneticdipoles)),
- np.int32(self.grid.iteration),
- config.sim_config.dtypes["float_or_double"](self.grid.dx),
- config.sim_config.dtypes["float_or_double"](self.grid.dy),
- config.sim_config.dtypes["float_or_double"](self.grid.dz),
- self.srcinfo1_magnetic_dev.gpudata,
- self.srcinfo2_magnetic_dev.gpudata,
- self.srcwaves_magnetic_dev.gpudata,
- self.grid.ID_dev.gpudata,
- self.grid.Hx_dev.gpudata,
- self.grid.Hy_dev.gpudata,
- self.grid.Hz_dev.gpudata,
- block=(1, 1, 1),
- grid=(round32(len(self.grid.magneticdipoles)), 1, 1),
- )
-
- def update_electric_a(self):
- """Updates electric field components."""
- # All materials are non-dispersive so do standard update.
- if config.get_model_config().materials["maxpoles"] == 0:
- self.update_electric_dev(
- np.int32(self.grid.nx),
- np.int32(self.grid.ny),
- np.int32(self.grid.nz),
- self.grid.ID_dev.gpudata,
- self.grid.Ex_dev.gpudata,
- self.grid.Ey_dev.gpudata,
- self.grid.Ez_dev.gpudata,
- self.grid.Hx_dev.gpudata,
- self.grid.Hy_dev.gpudata,
- self.grid.Hz_dev.gpudata,
- block=self.grid.tpb,
- grid=self.grid.bpg,
- )
-
- # If there are any dispersive materials do 1st part of dispersive update
- # (it is split into two parts as it requires present and updated electric field values).
- else:
- self.dispersive_update_a(
- np.int32(self.grid.nx),
- np.int32(self.grid.ny),
- np.int32(self.grid.nz),
- np.int32(config.get_model_config().materials["maxpoles"]),
- self.grid.updatecoeffsdispersive_dev.gpudata,
- self.grid.Tx_dev.gpudata,
- self.grid.Ty_dev.gpudata,
- self.grid.Tz_dev.gpudata,
- self.grid.ID_dev.gpudata,
- self.grid.Ex_dev.gpudata,
- self.grid.Ey_dev.gpudata,
- self.grid.Ez_dev.gpudata,
- self.grid.Hx_dev.gpudata,
- self.grid.Hy_dev.gpudata,
- self.grid.Hz_dev.gpudata,
- block=self.grid.tpb,
- grid=self.grid.bpg,
- )
-
- def update_electric_pml(self):
- """Updates electric field components with the PML correction."""
- for pml in self.grid.pmls["slabs"]:
- pml.update_electric()
-
- def update_electric_sources(self):
- """Updates electric field components from sources -
- update any Hertzian dipole sources last.
- """
- if self.grid.voltagesources:
- self.update_voltage_source_dev(
- np.int32(len(self.grid.voltagesources)),
- np.int32(self.grid.iteration),
- config.sim_config.dtypes["float_or_double"](self.grid.dx),
- config.sim_config.dtypes["float_or_double"](self.grid.dy),
- config.sim_config.dtypes["float_or_double"](self.grid.dz),
- self.srcinfo1_voltage_dev.gpudata,
- self.srcinfo2_voltage_dev.gpudata,
- self.srcwaves_voltage_dev.gpudata,
- self.grid.ID_dev.gpudata,
- self.grid.Ex_dev.gpudata,
- self.grid.Ey_dev.gpudata,
- self.grid.Ez_dev.gpudata,
- block=(1, 1, 1),
- grid=(round32(len(self.grid.voltagesources)), 1, 1),
- )
-
- if self.grid.hertziandipoles:
- self.update_hertzian_dipole_dev(
- np.int32(len(self.grid.hertziandipoles)),
- np.int32(self.grid.iteration),
- config.sim_config.dtypes["float_or_double"](self.grid.dx),
- config.sim_config.dtypes["float_or_double"](self.grid.dy),
- config.sim_config.dtypes["float_or_double"](self.grid.dz),
- self.srcinfo1_hertzian_dev.gpudata,
- self.srcinfo2_hertzian_dev.gpudata,
- self.srcwaves_hertzian_dev.gpudata,
- self.grid.ID_dev.gpudata,
- self.grid.Ex_dev.gpudata,
- self.grid.Ey_dev.gpudata,
- self.grid.Ez_dev.gpudata,
- block=(1, 1, 1),
- grid=(round32(len(self.grid.hertziandipoles)), 1, 1),
- )
-
- self.grid.iteration += 1
-
- def update_electric_b(self):
- """If there are any dispersive materials do 2nd part of dispersive
- update - it is split into two parts as it requires present and
- updated electric field values. Therefore it can only be completely
- updated after the electric field has been updated by the PML and
- source updates.
- """
- if config.get_model_config().materials["maxpoles"] > 0:
- self.dispersive_update_b(
- np.int32(self.grid.nx),
- np.int32(self.grid.ny),
- np.int32(self.grid.nz),
- np.int32(config.get_model_config().materials["maxpoles"]),
- self.grid.updatecoeffsdispersive_dev.gpudata,
- self.grid.Tx_dev.gpudata,
- self.grid.Ty_dev.gpudata,
- self.grid.Tz_dev.gpudata,
- self.grid.ID_dev.gpudata,
- self.grid.Ex_dev.gpudata,
- self.grid.Ey_dev.gpudata,
- self.grid.Ez_dev.gpudata,
- block=self.grid.tpb,
- grid=self.grid.bpg,
- )
-
- def time_start(self):
- """Starts event timers used to calculate solving time for model."""
- self.iterstart = self.drv.Event()
- self.iterend = self.drv.Event()
- self.iterstart.record()
- self.iterstart.synchronize()
-
- def calculate_memory_used(self, iteration):
- """Calculates memory used on last iteration.
-
- Args:
- iteration: int for iteration number.
-
- Returns:
- Memory (RAM) used on GPU.
- """
- if iteration == self.grid.iterations - 1:
- # Total minus free memory in current context
- return self.drv.mem_get_info()[1] - self.drv.mem_get_info()[0]
-
- def calculate_solve_time(self):
- """Calculates solving time for model."""
- self.iterend.record()
- self.iterend.synchronize()
- return self.iterstart.time_till(self.iterend) * 1e-3
-
- def finalise(self):
- """Copies data from GPU back to CPU to save to file(s)."""
- # Copy output from receivers array back to correct receiver objects
- if self.grid.rxs:
- dtoh_rx_array(self.rxs_dev.get(), self.rxcoords_dev.get(), self.grid)
-
- # Copy data from any snapshots back to correct snapshot objects
- if self.grid.snapshots and not config.get_model_config().device["snapsgpu2cpu"]:
- for i, snap in enumerate(self.grid.snapshots):
- dtoh_snapshot_array(
- self.snapEx_dev.get(),
- self.snapEy_dev.get(),
- self.snapEz_dev.get(),
- self.snapHx_dev.get(),
- self.snapHy_dev.get(),
- self.snapHz_dev.get(),
- i,
- snap,
- )
-
- def cleanup(self):
- """Cleanup GPU context."""
- # Remove context from top of stack and clear
- self.ctx.pop()
- self.ctx = None
-
-
-class OpenCLUpdates:
- """Defines update functions for OpenCL-based solver."""
-
- def __init__(self, G):
- """
- Args:
- G: OpenCLGrid class describing a grid in a model.
- """
-
- self.grid = G
-
- # Import pyopencl module
- self.cl = import_module("pyopencl")
- self.elwiseknl = getattr(import_module("pyopencl.elementwise"), "ElementwiseKernel")
-
- # Select device, create context and command queue
- self.dev = config.get_model_config().device["dev"]
- self.ctx = self.cl.Context(devices=[self.dev])
- self.queue = self.cl.CommandQueue(self.ctx, properties=self.cl.command_queue_properties.PROFILING_ENABLE)
-
- # Enviroment for templating kernels
- self.env = Environment(loader=PackageLoader("gprMax", "cuda_opencl"))
-
- # Initialise arrays on device, prepare kernels, and get kernel functions
- self._set_macros()
- self._set_field_knls()
- if self.grid.pmls["slabs"]:
- self._set_pml_knls()
- if self.grid.rxs:
- self._set_rx_knl()
- if self.grid.voltagesources + self.grid.hertziandipoles + self.grid.magneticdipoles:
- self._set_src_knls()
- if self.grid.snapshots:
- self._set_snapshot_knl()
-
- def _set_macros(self):
- """Common macros to be used in kernels."""
-
- # Set specific values for any dispersive materials
- if config.get_model_config().materials["maxpoles"] > 0:
- NY_MATDISPCOEFFS = self.grid.updatecoeffsdispersive.shape[1]
- NX_T = self.grid.Tx.shape[1]
- NY_T = self.grid.Tx.shape[2]
- NZ_T = self.grid.Tx.shape[3]
- else: # Set to one any substitutions for dispersive materials.
- NY_MATDISPCOEFFS = 1
- NX_T = 1
- NY_T = 1
- NZ_T = 1
-
- self.knl_common = self.env.get_template("knl_common_opencl.tmpl").render(
- updatecoeffsE=self.grid.updatecoeffsE.ravel(),
- updatecoeffsH=self.grid.updatecoeffsH.ravel(),
- REAL=config.sim_config.dtypes["C_float_or_double"],
- N_updatecoeffsE=self.grid.updatecoeffsE.size,
- N_updatecoeffsH=self.grid.updatecoeffsH.size,
- NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1],
- NY_MATDISPCOEFFS=NY_MATDISPCOEFFS,
- NX_FIELDS=self.grid.nx + 1,
- NY_FIELDS=self.grid.ny + 1,
- NZ_FIELDS=self.grid.nz + 1,
- NX_ID=self.grid.ID.shape[1],
- NY_ID=self.grid.ID.shape[2],
- NZ_ID=self.grid.ID.shape[3],
- NX_T=NX_T,
- NY_T=NY_T,
- NZ_T=NZ_T,
- NY_RXCOORDS=3,
- NX_RXS=6,
- NY_RXS=self.grid.iterations,
- NZ_RXS=len(self.grid.rxs),
- NY_SRCINFO=4,
- NY_SRCWAVES=self.grid.iterations,
- NX_SNAPS=Snapshot.nx_max,
- NY_SNAPS=Snapshot.ny_max,
- NZ_SNAPS=Snapshot.nz_max,
- )
-
- def _set_field_knls(self):
- """Electric and magnetic field updates - prepares kernels, and
- gets kernel functions.
- """
-
- subs = {
- "CUDA_IDX": "",
- "NX_FIELDS": self.grid.nx + 1,
- "NY_FIELDS": self.grid.ny + 1,
- "NZ_FIELDS": self.grid.nz + 1,
- "NX_ID": self.grid.ID.shape[1],
- "NY_ID": self.grid.ID.shape[2],
- "NZ_ID": self.grid.ID.shape[3],
- }
-
- self.update_electric_dev = self.elwiseknl(
- self.ctx,
- knl_fields_updates.update_electric["args_opencl"].substitute(
- {"REAL": config.sim_config.dtypes["C_float_or_double"]}
- ),
- knl_fields_updates.update_electric["func"].substitute(subs),
- "update_electric",
- preamble=self.knl_common,
- options=config.sim_config.devices["compiler_opts"],
- )
-
- self.update_magnetic_dev = self.elwiseknl(
- self.ctx,
- knl_fields_updates.update_magnetic["args_opencl"].substitute(
- {"REAL": config.sim_config.dtypes["C_float_or_double"]}
- ),
- knl_fields_updates.update_magnetic["func"].substitute(subs),
- "update_magnetic",
- preamble=self.knl_common,
- options=config.sim_config.devices["compiler_opts"],
- )
-
- # If there are any dispersive materials (updates are split into two
- # parts as they require present and updated electric field values).
- if config.get_model_config().materials["maxpoles"] > 0:
- subs = {
- "CUDA_IDX": "",
- "REAL": config.sim_config.dtypes["C_float_or_double"],
- "REALFUNC": config.get_model_config().materials["crealfunc"],
- "NX_FIELDS": self.grid.nx + 1,
- "NY_FIELDS": self.grid.ny + 1,
- "NZ_FIELDS": self.grid.nz + 1,
- "NX_ID": self.grid.ID.shape[1],
- "NY_ID": self.grid.ID.shape[2],
- "NZ_ID": self.grid.ID.shape[3],
- "NX_T": self.grid.Tx.shape[1],
- "NY_T": self.grid.Tx.shape[2],
- "NZ_T": self.grid.Tx.shape[3],
- }
-
- self.dispersive_update_a = self.elwiseknl(
- self.ctx,
- knl_fields_updates.update_electric_dispersive_A["args_opencl"].substitute(
- {
- "REAL": config.sim_config.dtypes["C_float_or_double"],
- "COMPLEX": config.get_model_config().materials["dispersiveCdtype"],
- }
- ),
- knl_fields_updates.update_electric_dispersive_A["func"].substitute(subs),
- "update_electric_dispersive_A",
- preamble=self.knl_common,
- options=config.sim_config.devices["compiler_opts"],
- )
-
- self.dispersive_update_b = self.elwiseknl(
- self.ctx,
- knl_fields_updates.update_electric_dispersive_B["args_opencl"].substitute(
- {
- "REAL": config.sim_config.dtypes["C_float_or_double"],
- "COMPLEX": config.get_model_config().materials["dispersiveCdtype"],
- }
- ),
- knl_fields_updates.update_electric_dispersive_B["func"].substitute(subs),
- "update_electric_dispersive_B",
- preamble=self.knl_common,
- options=config.sim_config.devices["compiler_opts"],
- )
-
- # Initialise field arrays on compute device
- self.grid.htod_geometry_arrays(self.queue)
- self.grid.htod_field_arrays(self.queue)
- if config.get_model_config().materials["maxpoles"] > 0:
- self.grid.htod_dispersive_arrays(self.queue)
-
- def _set_pml_knls(self):
- """PMLS - prepares kernels and gets kernel functions."""
- knl_pml_updates_electric = import_module(
- "gprMax.cuda_opencl.knl_pml_updates_electric_" + self.grid.pmls["formulation"]
- )
- knl_pml_updates_magnetic = import_module(
- "gprMax.cuda_opencl.knl_pml_updates_magnetic_" + self.grid.pmls["formulation"]
- )
-
- subs = {
- "CUDA_IDX": "",
- "REAL": config.sim_config.dtypes["C_float_or_double"],
- "NX_FIELDS": self.grid.nx + 1,
- "NY_FIELDS": self.grid.ny + 1,
- "NZ_FIELDS": self.grid.nz + 1,
- "NX_ID": self.grid.ID.shape[1],
- "NY_ID": self.grid.ID.shape[2],
- "NZ_ID": self.grid.ID.shape[3],
- }
-
- # Set workgroup size, initialise arrays on compute device, and get
- # kernel functions
- for pml in self.grid.pmls["slabs"]:
- pml.set_queue(self.queue)
- pml.htod_field_arrays()
- knl_name = f"order{len(pml.CFS)}_{pml.direction}"
- knl_electric_name = getattr(knl_pml_updates_electric, knl_name)
- knl_magnetic_name = getattr(knl_pml_updates_magnetic, knl_name)
-
- pml.update_electric_dev = self.elwiseknl(
- self.ctx,
- knl_electric_name["args_opencl"].substitute({"REAL": config.sim_config.dtypes["C_float_or_double"]}),
- knl_electric_name["func"].substitute(subs),
- f"pml_updates_electric_{knl_name}",
- preamble=self.knl_common,
- options=config.sim_config.devices["compiler_opts"],
- )
-
- pml.update_magnetic_dev = self.elwiseknl(
- self.ctx,
- knl_magnetic_name["args_opencl"].substitute({"REAL": config.sim_config.dtypes["C_float_or_double"]}),
- knl_magnetic_name["func"].substitute(subs),
- f"pml_updates_magnetic_{knl_name}",
- preamble=self.knl_common,
- options=config.sim_config.devices["compiler_opts"],
- )
-
- def _set_rx_knl(self):
- """Receivers - initialises arrays on compute device, prepares kernel and
- gets kernel function.
- """
- self.rxcoords_dev, self.rxs_dev = htod_rx_arrays(self.grid, self.queue)
- self.store_outputs_dev = self.elwiseknl(
- self.ctx,
- knl_store_outputs.store_outputs["args_opencl"].substitute(
- {"REAL": config.sim_config.dtypes["C_float_or_double"]}
- ),
- knl_store_outputs.store_outputs["func"].substitute({"CUDA_IDX": ""}),
- "store_outputs",
- preamble=self.knl_common,
- options=config.sim_config.devices["compiler_opts"],
- )
-
- def _set_src_knls(self):
- """Sources - initialises arrays on compute device, prepares kernel and
- gets kernel function.
- """
- if self.grid.hertziandipoles:
- self.srcinfo1_hertzian_dev, self.srcinfo2_hertzian_dev, self.srcwaves_hertzian_dev = htod_src_arrays(
- self.grid.hertziandipoles, self.grid, self.queue
- )
- self.update_hertzian_dipole_dev = self.elwiseknl(
- self.ctx,
- knl_source_updates.update_hertzian_dipole["args_opencl"].substitute(
- {"REAL": config.sim_config.dtypes["C_float_or_double"]}
- ),
- knl_source_updates.update_hertzian_dipole["func"].substitute(
- {"CUDA_IDX": "", "REAL": config.sim_config.dtypes["C_float_or_double"]}
- ),
- "update_hertzian_dipole",
- preamble=self.knl_common,
- options=config.sim_config.devices["compiler_opts"],
- )
- if self.grid.magneticdipoles:
- self.srcinfo1_magnetic_dev, self.srcinfo2_magnetic_dev, self.srcwaves_magnetic_dev = htod_src_arrays(
- self.grid.magneticdipoles, self.grid, self.queue
- )
- self.update_magnetic_dipole_dev = self.elwiseknl(
- self.ctx,
- knl_source_updates.update_magnetic_dipole["args_opencl"].substitute(
- {"REAL": config.sim_config.dtypes["C_float_or_double"]}
- ),
- knl_source_updates.update_magnetic_dipole["func"].substitute(
- {"CUDA_IDX": "", "REAL": config.sim_config.dtypes["C_float_or_double"]}
- ),
- "update_magnetic_dipole",
- preamble=self.knl_common,
- options=config.sim_config.devices["compiler_opts"],
- )
- if self.grid.voltagesources:
- self.srcinfo1_voltage_dev, self.srcinfo2_voltage_dev, self.srcwaves_voltage_dev = htod_src_arrays(
- self.grid.voltagesources, self.grid, self.queue
- )
- self.update_voltage_source_dev = self.elwiseknl(
- self.ctx,
- knl_source_updates.update_voltage_source["args_opencl"].substitute(
- {"REAL": config.sim_config.dtypes["C_float_or_double"]}
- ),
- knl_source_updates.update_voltage_source["func"].substitute(
- {"CUDA_IDX": "", "REAL": config.sim_config.dtypes["C_float_or_double"]}
- ),
- "update_voltage_source",
- preamble=self.knl_common,
- options=config.sim_config.devices["compiler_opts"],
- )
-
- def _set_snapshot_knl(self):
- """Snapshots - initialises arrays on compute device, prepares kernel and
- gets kernel function.
- """
- (
- self.snapEx_dev,
- self.snapEy_dev,
- self.snapEz_dev,
- self.snapHx_dev,
- self.snapHy_dev,
- self.snapHz_dev,
- ) = htod_snapshot_array(self.grid, self.queue)
- self.store_snapshot_dev = self.elwiseknl(
- self.ctx,
- knl_snapshots.store_snapshot["args_opencl"].substitute(
- {"REAL": config.sim_config.dtypes["C_float_or_double"]}
- ),
- knl_snapshots.store_snapshot["func"].substitute(
- {"CUDA_IDX": "", "NX_SNAPS": Snapshot.nx_max, "NY_SNAPS": Snapshot.ny_max, "NZ_SNAPS": Snapshot.nz_max}
- ),
- "store_snapshot",
- preamble=self.knl_common,
- options=config.sim_config.devices["compiler_opts"],
- )
-
- def store_outputs(self):
- """Stores field component values for every receiver."""
- if self.grid.rxs:
- self.store_outputs_dev(
- np.int32(len(self.grid.rxs)),
- np.int32(self.grid.iteration),
- self.rxcoords_dev,
- self.rxs_dev,
- self.grid.Ex_dev,
- self.grid.Ey_dev,
- self.grid.Ez_dev,
- self.grid.Hx_dev,
- self.grid.Hy_dev,
- self.grid.Hz_dev,
- )
-
- def store_snapshots(self, iteration):
- """Stores any snapshots.
-
- Args:
- iteration: int for iteration number.
- """
-
- for i, snap in enumerate(self.grid.snapshots):
- if snap.time == iteration + 1:
- snapno = 0 if config.get_model_config().device["snapsgpu2cpu"] else i
- self.store_snapshot_dev(
- np.int32(snapno),
- np.int32(snap.xs),
- np.int32(snap.xf),
- np.int32(snap.ys),
- np.int32(snap.yf),
- np.int32(snap.zs),
- np.int32(snap.zf),
- np.int32(snap.dx),
- np.int32(snap.dy),
- np.int32(snap.dz),
- self.grid.Ex_dev,
- self.grid.Ey_dev,
- self.grid.Ez_dev,
- self.grid.Hx_dev,
- self.grid.Hy_dev,
- self.grid.Hz_dev,
- self.snapEx_dev,
- self.snapEy_dev,
- self.snapEz_dev,
- self.snapHx_dev,
- self.snapHy_dev,
- self.snapHz_dev,
- )
-
- if config.get_model_config().device["snapsgpu2cpu"]:
- dtoh_snapshot_array(
- self.snapEx_dev.get(),
- self.snapEy_dev.get(),
- self.snapEz_dev.get(),
- self.snapHx_dev.get(),
- self.snapHy_dev.get(),
- self.snapHz_dev.get(),
- 0,
- snap,
- )
-
- def update_magnetic(self):
- """Updates magnetic field components."""
- self.update_magnetic_dev(
- np.int32(self.grid.nx),
- np.int32(self.grid.ny),
- np.int32(self.grid.nz),
- self.grid.ID_dev,
- self.grid.Hx_dev,
- self.grid.Hy_dev,
- self.grid.Hz_dev,
- self.grid.Ex_dev,
- self.grid.Ey_dev,
- self.grid.Ez_dev,
- )
-
- def update_magnetic_pml(self):
- """Updates magnetic field components with the PML correction."""
- for pml in self.grid.pmls["slabs"]:
- pml.update_magnetic()
-
- def update_magnetic_sources(self):
- """Updates magnetic field components from sources."""
- if self.grid.magneticdipoles:
- self.update_magnetic_dipole_dev(
- np.int32(len(self.grid.magneticdipoles)),
- np.int32(self.grid.iteration),
- config.sim_config.dtypes["float_or_double"](self.grid.dx),
- config.sim_config.dtypes["float_or_double"](self.grid.dy),
- config.sim_config.dtypes["float_or_double"](self.grid.dz),
- self.srcinfo1_magnetic_dev,
- self.srcinfo2_magnetic_dev,
- self.srcwaves_magnetic_dev,
- self.grid.ID_dev,
- self.grid.Hx_dev,
- self.grid.Hy_dev,
- self.grid.Hz_dev,
- )
-
- def update_electric_a(self):
- """Updates electric field components."""
- # All materials are non-dispersive so do standard update.
- if config.get_model_config().materials["maxpoles"] == 0:
- self.update_electric_dev(
- np.int32(self.grid.nx),
- np.int32(self.grid.ny),
- np.int32(self.grid.nz),
- self.grid.ID_dev,
- self.grid.Ex_dev,
- self.grid.Ey_dev,
- self.grid.Ez_dev,
- self.grid.Hx_dev,
- self.grid.Hy_dev,
- self.grid.Hz_dev,
- )
-
- # If there are any dispersive materials do 1st part of dispersive update
- # (it is split into two parts as it requires present and updated electric field values).
- else:
- self.dispersive_update_a(
- np.int32(self.grid.nx),
- np.int32(self.grid.ny),
- np.int32(self.grid.nz),
- np.int32(config.get_model_config().materials["maxpoles"]),
- self.grid.ID_dev,
- self.grid.Ex_dev,
- self.grid.Ey_dev,
- self.grid.Ez_dev,
- self.grid.Hx_dev,
- self.grid.Hy_dev,
- self.grid.Hz_dev,
- self.grid.updatecoeffsdispersive_dev,
- self.grid.Tx_dev,
- self.grid.Ty_dev,
- self.grid.Tz_dev,
- )
-
- def update_electric_pml(self):
- """Updates electric field components with the PML correction."""
- for pml in self.grid.pmls["slabs"]:
- pml.update_electric()
-
- def update_electric_sources(self):
- """Updates electric field components from sources -
- update any Hertzian dipole sources last.
- """
- if self.grid.voltagesources:
- self.update_voltage_source_dev(
- np.int32(len(self.grid.voltagesources)),
- np.int32(self.grid.iteration),
- config.sim_config.dtypes["float_or_double"](self.grid.dx),
- config.sim_config.dtypes["float_or_double"](self.grid.dy),
- config.sim_config.dtypes["float_or_double"](self.grid.dz),
- self.srcinfo1_voltage_dev,
- self.srcinfo2_voltage_dev,
- self.srcwaves_voltage_dev,
- self.grid.ID_dev,
- self.grid.Ex_dev,
- self.grid.Ey_dev,
- self.grid.Ez_dev,
- )
-
- if self.grid.hertziandipoles:
- self.update_hertzian_dipole_dev(
- np.int32(len(self.grid.hertziandipoles)),
- np.int32(self.grid.iteration),
- config.sim_config.dtypes["float_or_double"](self.grid.dx),
- config.sim_config.dtypes["float_or_double"](self.grid.dy),
- config.sim_config.dtypes["float_or_double"](self.grid.dz),
- self.srcinfo1_hertzian_dev,
- self.srcinfo2_hertzian_dev,
- self.srcwaves_hertzian_dev,
- self.grid.ID_dev,
- self.grid.Ex_dev,
- self.grid.Ey_dev,
- self.grid.Ez_dev,
- )
-
- self.grid.iteration += 1
-
- def update_electric_b(self):
- """If there are any dispersive materials do 2nd part of dispersive
- update - it is split into two parts as it requires present and
- updated electric field values. Therefore it can only be completely
- updated after the electric field has been updated by the PML and
- source updates.
- """
- if config.get_model_config().materials["maxpoles"] > 0:
- self.dispersive_update_b(
- np.int32(self.grid.nx),
- np.int32(self.grid.ny),
- np.int32(self.grid.nz),
- np.int32(config.get_model_config().materials["maxpoles"]),
- self.grid.ID_dev,
- self.grid.Ex_dev,
- self.grid.Ey_dev,
- self.grid.Ez_dev,
- self.grid.updatecoeffsdispersive_dev,
- self.grid.Tx_dev,
- self.grid.Ty_dev,
- self.grid.Tz_dev,
- )
-
- def time_start(self):
- """Starts event timers used to calculate solving time for model."""
- self.event_marker1 = self.cl.enqueue_marker(self.queue)
- self.event_marker1.wait()
-
- def calculate_memory_used(self, iteration):
- """Calculates memory used on last iteration.
-
- Args:
- iteration: int for iteration number.
-
- Returns:
- Memory (RAM) used on compute device.
- """
- # No clear way to determine memory used from PyOpenCL unlike PyCUDA.
- pass
-
- def calculate_solve_time(self):
- """Calculates solving time for model."""
- event_marker2 = self.cl.enqueue_marker(self.queue)
- event_marker2.wait()
- return (event_marker2.profile.end - self.event_marker1.profile.start) * 1e-9
-
- def finalise(self):
- """Copies data from compute device back to CPU to save to file(s)."""
- # Copy output from receivers array back to correct receiver objects
- if self.grid.rxs:
- dtoh_rx_array(self.rxs_dev.get(), self.rxcoords_dev.get(), self.grid)
-
- # Copy data from any snapshots back to correct snapshot objects
- if self.grid.snapshots and not config.get_model_config().device["snapsgpu2cpu"]:
- for i, snap in enumerate(self.grid.snapshots):
- dtoh_snapshot_array(
- self.snapEx_dev.get(),
- self.snapEy_dev.get(),
- self.snapEz_dev.get(),
- self.snapHx_dev.get(),
- self.snapHy_dev.get(),
- self.snapHz_dev.get(),
- i,
- snap,
- )
-
- def cleanup(self):
- pass