Basic functional GPU solver with cylinder_Ascan_2D example.

2025-08-08 07:24:19 +08:00 · 2019-10-21 17:17:36 +01:00
--- a/gprMax/cmds_multiple.py
+++ b/gprMax/cmds_multiple.py
@@ -510,14 +510,12 @@ class Rx(UserObjectMulti):
            r.ID = self.kwargs['id']
            outputs = self.kwargs['outputs']
            # Get allowable outputs
-            if grid.gpu is not None:
-                allowableoutputs = RxUser.gpu_allowableoutputs
-            else:
-                allowableoutputs = RxUser.allowableoutputs
+            allowableoutputs = RxUser.allowableoutputs_gpu if config.sim_config.general['cuda'] else RxUser.allowableoutputs
            # Check and add field output names
+            outputs.sort()
            for field in outputs:
                if field in allowableoutputs:
-                    r.outputs[field] = np.zeros(grid.iterations, dtype=config.dtypes['float_or_double'])
+                    r.outputs[field] = np.zeros(grid.iterations, dtype=config.sim_config.dtypes['float_or_double'])
                else:
                    raise CmdInputError(f"'{self.params_str()}' contains an output type that is not allowable. Allowable outputs in current context are {allowableoutputs}")
        # If no ID or outputs are specified, use default
--- a/gprMax/config.py
+++ b/gprMax/config.py
@@ -18,6 +18,7 @@

 import logging
 from pathlib import Path
+import sys

 from colorama import init
 from colorama import Fore
@@ -64,7 +65,8 @@ class ModelConfig:
        #     N.B. This will happen if the requested snapshots are too large to fit
        #     on the memory of the GPU. If True this will slow performance significantly
        if sim_config.general['cuda']:
-            self.cuda = {'gpu': sim_config.cuda['gpus'], 'snapsgpu2cpu': False}
+            self.cuda = {'gpu': sim_config.cuda['gpus'],
+                         'snapsgpu2cpu': False}

        # Total memory usage for all grids in the model. Starts with 50MB overhead.
        self.mem_use = 50e6
@@ -175,10 +177,12 @@ class SimulationConfig:
            self.general['cuda'] = True
            self.general['cpu'] = False
            self.general['opencl'] = False
-            #   gpus: list of GPU objects
-            #   gpus_str: list of strings describing GPU(s)
-            self.cuda = {'gpus': [],
-                         'gpus_str': []}
+            self.general['precision'] = 'single'
+            self.cuda = {'gpus': [], # gpus: list of GPU objects
+                         'gpus_str': [], # gpus_str: list of strings describing GPU(s)
+                         'nvcc_opts': None} # nvcc_opts: nvcc compiler options
+            # Suppress nvcc warnings on Microsoft Windows
+            if sys.platform == 'win32': self.cuda['nvcc_opts'] = '-w'
            self.get_gpus()
            self.set_gpus()

--- a/gprMax/cuda/fields_updates.py
+++ b/gprMax/cuda/fields_updates.py
@@ -128,7 +128,7 @@ __global__ void update_magnetic(int NX, int NY, int NZ, const unsigned int* __re
 // Electric field updates - dispersive materials //
 ///////////////////////////////////////////////////

-__global__ void update_electric_dispersive_A(int NX, int NY, int NZ, int MAXPOLES, const $REAL_OR_COMPLEX* __restrict__ updatecoeffsdispersive, $REAL_OR_COMPLEX *Tx, $REAL_OR_COMPLEX *Ty, $REAL_OR_COMPLEX *Tz, const unsigned int* __restrict__ ID, $REAL *Ex, $REAL *Ey, $REAL *Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz) {
+__global__ void update_electric_dispersive_A(int NX, int NY, int NZ, int MAXPOLES, const $COMPLEX* __restrict__ updatecoeffsdispersive, $COMPLEX *Tx, $COMPLEX *Ty, $COMPLEX *Tz, const unsigned int* __restrict__ ID, $REAL *Ex, $REAL *Ey, $REAL *Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz) {

    //  This function is part A of updates to electric field values when dispersive materials (with multiple poles) are present.
    //
@@ -189,7 +189,7 @@ __global__ void update_electric_dispersive_A(int NX, int NY, int NZ, int MAXPOLE
    }
 }

-__global__ void update_electric_dispersive_B(int NX, int NY, int NZ, int MAXPOLES, const $REAL_OR_COMPLEX* __restrict__ updatecoeffsdispersive, $REAL_OR_COMPLEX *Tx, $REAL_OR_COMPLEX *Ty, $REAL_OR_COMPLEX *Tz, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez) {
+__global__ void update_electric_dispersive_B(int NX, int NY, int NZ, int MAXPOLES, const $COMPLEX* __restrict__ updatecoeffsdispersive, $COMPLEX *Tx, $COMPLEX *Ty, $COMPLEX *Tz, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez) {

    //  This function is part B which updates the dispersive field arrays when dispersive materials (with multiple poles) are present.
    //
--- a/gprMax/grid.py
+++ b/gprMax/grid.py
@@ -279,15 +279,22 @@ class FDTDGrid:
 class CUDAGrid(FDTDGrid):
    """Additional grid methods for solving on GPU using CUDA."""

+    def __init__(self, model_num):
+        super().__init__(model_num)
+
+        # Threads per block - used for main electric/magnetic field updates
+        self.tpb = (256, 1, 1)
+        # Blocks per grid - used for main electric/magnetic field updates
+        self.bpg = None
+
+
    def set_blocks_per_grid(self):
        """Set the blocks per grid size used for updating the electric and
            magnetic field arrays on a GPU.
        """

-        config.cuda['gpus'].bpg = (int(np.ceil(((self.nx + 1) *
-                                                (self.ny + 1) *
-                                                (self.nz + 1)) /
-                                                config.cuda['gpus'].tpb[0])), 1, 1)
+        self.bpg = (int(np.ceil(((self.nx + 1) * (self.ny + 1) *
+                   (self.nz + 1)) / self.tpb[0])), 1, 1)

    def initialise_arrays(self):
        """Initialise geometry and field arrays on GPU."""
--- a/gprMax/model_build_run.py
+++ b/gprMax/model_build_run.py
@@ -58,15 +58,15 @@ from .pml import CFS
 from .pml import PML
 from .pml import build_pml
 from .pml import pml_information
-from .receivers import gpu_initialise_rx_arrays
-from .receivers import gpu_get_rx_array
+from .receivers import initialise_rx_arrays_gpu
+from .receivers import get_rx_array_gpu
 from .receivers import Rx
 from .scene import Scene
 from .snapshots import Snapshot
-from .snapshots import gpu_initialise_snapshot_array
-from .snapshots import gpu_get_snapshot_array
+from .snapshots import initialise_snapshot_array_gpu
+from .snapshots import get_snapshot_array_gpu
 from .solvers import create_solver
-from .sources import gpu_initialise_src_arrays
+from .sources import initialise_src_arrays_gpu
 from .utilities import get_terminal_width
 from .utilities import human_size
 from .utilities import mem_check
@@ -286,7 +286,8 @@ class ModelBuildRun:

        mem_GPU = ''
        if config.sim_config.general['cuda']:
-            mem_GPU = f' host + ~{human_size(self.solver.get_memsolve())} GPU'
+            log.debug('Fix memory used calc for GPU')
+            # mem_GPU = f' host + ~{human_size(self.solver.get_memsolve())} GPU'

        log.info(f'\nMemory (RAM) used: ~{human_size(self.p.memory_full_info().uss)}{mem_GPU}')
        log.info(f'Solving time [HH:MM:SS]: {datetime.timedelta(seconds=tsolve)}')
--- a/gprMax/pml.py
+++ b/gprMax/pml.py
@@ -207,8 +207,7 @@ class PML:

        self.CFS = G.cfs

-        if not config.sim_config.general['cuda']:
-            self.initialise_field_arrays()
+        self.initialise_field_arrays()

    def initialise_field_arrays(self):
        """Initialise arrays to store fields in PML."""
@@ -340,7 +339,7 @@ class CUDAPML(PML):
        solving on GPU using CUDA.
    """

-    def initialise_arrays(self):
+    def initialise_field_arrays_gpu(self):
        """Initialise PML field and coefficient arrays on GPU."""

        import pycuda.gpuarray as gpuarray
@@ -355,20 +354,44 @@ class CUDAPML(PML):
        self.HRF_gpu = gpuarray.to_gpu(self.HRF)

        if self.direction[0] == 'x':
-            self.EPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx + 1, self.ny, self.nz + 1), dtype=floattype))
-            self.EPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx + 1, self.ny + 1, self.nz), dtype=floattype))
-            self.HPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx, self.ny + 1, self.nz), dtype=floattype))
-            self.HPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx, self.ny, self.nz + 1), dtype=floattype))
+            self.EPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS),
+                                             self.nx + 1, self.ny, self.nz + 1),
+                                             dtype=config.sim_config.dtypes['float_or_double']))
+            self.EPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS),
+                                             self.nx + 1, self.ny + 1, self.nz),
+                                             dtype=config.sim_config.dtypes['float_or_double']))
+            self.HPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS),
+                                             self.nx, self.ny + 1, self.nz),
+                                             dtype=config.sim_config.dtypes['float_or_double']))
+            self.HPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS),
+                                             self.nx, self.ny, self.nz + 1),
+                                             dtype=config.sim_config.dtypes['float_or_double']))
        elif self.direction[0] == 'y':
-            self.EPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx, self.ny + 1, self.nz + 1), dtype=floattype))
-            self.EPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx + 1, self.ny + 1, self.nz), dtype=floattype))
-            self.HPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx + 1, self.ny, self.nz), dtype=floattype))
-            self.HPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx, self.ny, self.nz + 1), dtype=floattype))
+            self.EPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS),
+                                             self.nx, self.ny + 1, self.nz + 1),
+                                             dtype=config.sim_config.dtypes['float_or_double']))
+            self.EPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS),
+                                             self.nx + 1, self.ny + 1, self.nz),
+                                             dtype=config.sim_config.dtypes['float_or_double']))
+            self.HPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS),
+                                             self.nx + 1, self.ny, self.nz),
+                                             dtype=config.sim_config.dtypes['float_or_double']))
+            self.HPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS),
+                                             self.nx, self.ny, self.nz + 1),
+                                             dtype=config.sim_config.dtypes['float_or_double']))
        elif self.direction[0] == 'z':
-            self.EPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx, self.ny + 1, self.nz + 1), dtype=floattype))
-            self.EPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx + 1, self.ny, self.nz + 1), dtype=floattype))
-            self.HPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx + 1, self.ny, self.nz), dtype=floattype))
-            self.HPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx, self.ny + 1, self.nz), dtype=floattype))
+            self.EPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS),
+                                             self.nx, self.ny + 1, self.nz + 1),
+                                             dtype=config.sim_config.dtypes['float_or_double']))
+            self.EPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS),
+                                             self.nx + 1, self.ny, self.nz + 1),
+                                             dtype=config.sim_config.dtypes['float_or_double']))
+            self.HPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS),
+                                             self.nx + 1, self.ny, self.nz),
+                                             dtype=config.sim_config.dtypes['float_or_double']))
+            self.HPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS),
+                                             self.nx, self.ny + 1, self.nz),
+                                             dtype=config.sim_config.dtypes['float_or_double']))

    def set_blocks_per_grid(self, G):
        """Set the blocks per grid size used for updating the PML field arrays on a GPU.
@@ -377,7 +400,9 @@ class CUDAPML(PML):
            G (FDTDGrid): Holds essential parameters describing the model.
        """

-        self.bpg = (int(np.ceil(((self.EPhi1_gpu.shape[1] + 1) * (self.EPhi1_gpu.shape[2] + 1) * (self.EPhi1_gpu.shape[3] + 1)) / G.tpb[0])), 1, 1)
+        self.bpg = (int(np.ceil(((self.EPhi1_gpu.shape[1] + 1) *
+                   (self.EPhi1_gpu.shape[2] + 1) *
+                   (self.EPhi1_gpu.shape[3] + 1)) / G.tpb[0])), 1, 1)

    def get_update_funcs(self, kernelselectric, kernelsmagnetic):
        """Get update functions from PML kernels.
@@ -400,7 +425,24 @@ class CUDAPML(PML):
            G (FDTDGrid): Holds essential parameters describing the model.
        """

-        self.update_electric_gpu(np.int32(self.xs), np.int32(self.xf), np.int32(self.ys), np.int32(self.yf), np.int32(self.zs), np.int32(self.zf), np.int32(self.EPhi1_gpu.shape[1]), np.int32(self.EPhi1_gpu.shape[2]), np.int32(self.EPhi1_gpu.shape[3]), np.int32(self.EPhi2_gpu.shape[1]), np.int32(self.EPhi2_gpu.shape[2]), np.int32(self.EPhi2_gpu.shape[3]), np.int32(self.thickness), G.ID_gpu.gpudata, G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata, G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata, self.EPhi1_gpu.gpudata, self.EPhi2_gpu.gpudata, self.ERA_gpu.gpudata, self.ERB_gpu.gpudata, self.ERE_gpu.gpudata, self.ERF_gpu.gpudata, floattype(self.d), block=G.tpb, grid=self.bpg)
+        self.update_electric_gpu(np.int32(self.xs), np.int32(self.xf),
+                                 np.int32(self.ys), np.int32(self.yf),
+                                 np.int32(self.zs), np.int32(self.zf),
+                                 np.int32(self.EPhi1_gpu.shape[1]),
+                                 np.int32(self.EPhi1_gpu.shape[2]),
+                                 np.int32(self.EPhi1_gpu.shape[3]),
+                                 np.int32(self.EPhi2_gpu.shape[1]),
+                                 np.int32(self.EPhi2_gpu.shape[2]),
+                                 np.int32(self.EPhi2_gpu.shape[3]),
+                                 np.int32(self.thickness),
+                                 G.ID_gpu.gpudata,
+                                 G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata,
+                                 G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata,
+                                 self.EPhi1_gpu.gpudata, self.EPhi2_gpu.gpudata,
+                                 self.ERA_gpu.gpudata, self.ERB_gpu.gpudata,
+                                 self.ERE_gpu.gpudata, self.ERF_gpu.gpudata,
+                                 config.sim_config.dtypes['float_or_double'](self.d),
+                                 block=G.tpb, grid=self.bpg)

    def update_magnetic(self, G):
        """This functions updates magnetic field components with the PML
@@ -409,7 +451,24 @@ class CUDAPML(PML):
        Args:
            G (FDTDGrid): Holds essential parameters describing the model.
        """
-        self.update_magnetic_gpu(np.int32(self.xs), np.int32(self.xf), np.int32(self.ys), np.int32(self.yf), np.int32(self.zs), np.int32(self.zf), np.int32(self.HPhi1_gpu.shape[1]), np.int32(self.HPhi1_gpu.shape[2]), np.int32(self.HPhi1_gpu.shape[3]), np.int32(self.HPhi2_gpu.shape[1]), np.int32(self.HPhi2_gpu.shape[2]), np.int32(self.HPhi2_gpu.shape[3]), np.int32(self.thickness), G.ID_gpu.gpudata, G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata, G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata, self.HPhi1_gpu.gpudata, self.HPhi2_gpu.gpudata, self.HRA_gpu.gpudata, self.HRB_gpu.gpudata, self.HRE_gpu.gpudata, self.HRF_gpu.gpudata, floattype(self.d), block=G.tpb, grid=self.bpg)
+        self.update_magnetic_gpu(np.int32(self.xs), np.int32(self.xf),
+                                 np.int32(self.ys), np.int32(self.yf),
+                                 np.int32(self.zs), np.int32(self.zf),
+                                 np.int32(self.HPhi1_gpu.shape[1]),
+                                 np.int32(self.HPhi1_gpu.shape[2]),
+                                 np.int32(self.HPhi1_gpu.shape[3]),
+                                 np.int32(self.HPhi2_gpu.shape[1]),
+                                 np.int32(self.HPhi2_gpu.shape[2]),
+                                 np.int32(self.HPhi2_gpu.shape[3]),
+                                 np.int32(self.thickness),
+                                 G.ID_gpu.gpudata,
+                                 G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata,
+                                 G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata,
+                                 self.HPhi1_gpu.gpudata, self.HPhi2_gpu.gpudata,
+                                 self.HRA_gpu.gpudata, self.HRB_gpu.gpudata,
+                                 self.HRE_gpu.gpudata, self.HRF_gpu.gpudata,
+                                 config.sim_config.dtypes['float_or_double'](self.d),
+                                 block=G.tpb, grid=self.bpg)

 def pml_information(G):
    """Information about PMLs.
@@ -443,14 +502,16 @@ def build_pml(G, key, value):
        value (int): Thickness of PML slab in cells.
    """

+    pml_type = CUDAPML if config.sim_config.general['cuda'] else PML
+
    sumer = 0  # Sum of relative permittivities in PML slab
    summr = 0  # Sum of relative permeabilities in PML slab

    if key[0] == 'x':
        if key == 'x0':
-            pml = PML(G, ID=key, direction='xminus', xf=value, yf=G.ny, zf=G.nz)
+            pml = pml_type(G, ID=key, direction='xminus', xf=value, yf=G.ny, zf=G.nz)
        elif key == 'xmax':
-            pml = PML(G, ID=key, direction='xplus', xs=G.nx - value, xf=G.nx, yf=G.ny, zf=G.nz)
+            pml = pml_type(G, ID=key, direction='xplus', xs=G.nx - value, xf=G.nx, yf=G.ny, zf=G.nz)
        G.pmls.append(pml)
        for j in range(G.ny):
            for k in range(G.nz):
@@ -463,9 +524,9 @@ def build_pml(G, key, value):

    elif key[0] == 'y':
        if key == 'y0':
-            pml = PML(G, ID=key, direction='yminus', yf=value, xf=G.nx, zf=G.nz)
+            pml = pml_type(G, ID=key, direction='yminus', yf=value, xf=G.nx, zf=G.nz)
        elif key == 'ymax':
-            pml = PML(G, ID=key, direction='yplus', ys=G.ny - value, xf=G.nx, yf=G.ny, zf=G.nz)
+            pml = pml_type(G, ID=key, direction='yplus', ys=G.ny - value, xf=G.nx, yf=G.ny, zf=G.nz)
        G.pmls.append(pml)
        for i in range(G.nx):
            for k in range(G.nz):
@@ -478,9 +539,9 @@ def build_pml(G, key, value):

    elif key[0] == 'z':
        if key == 'z0':
-            pml = PML(G, ID=key, direction='zminus', zf=value, xf=G.nx, yf=G.ny)
+            pml = pml_type(G, ID=key, direction='zminus', zf=value, xf=G.nx, yf=G.ny)
        elif key == 'zmax':
-            pml = PML(G, ID=key, direction='zplus', zs=G.nz - value, xf=G.nx, yf=G.ny, zf=G.nz)
+            pml = pml_type(G, ID=key, direction='zplus', zs=G.nz - value, xf=G.nx, yf=G.ny, zf=G.nz)
        G.pmls.append(pml)
        for i in range(G.nx):
            for j in range(G.ny):
--- a/gprMax/receivers.py
+++ b/gprMax/receivers.py
@@ -16,8 +16,6 @@
 # You should have received a copy of the GNU General Public License
 # along with gprMax.  If not, see <http://www.gnu.org/licenses/>.

-from collections import OrderedDict
-
 import numpy as np

 import gprMax.config as config
@@ -27,14 +25,15 @@ class Rx:
    """Receiver output points."""

    allowableoutputs = ['Ex', 'Ey', 'Ez', 'Hx', 'Hy', 'Hz', 'Ix', 'Iy', 'Iz']
-    gpu_allowableoutputs = allowableoutputs[:-3]
    defaultoutputs = allowableoutputs[:-3]
-    maxnumoutputs = 0
+
+    allowableoutputs_gpu = allowableoutputs[:-3]
+    maxnumoutputs_gpu = 0

    def __init__(self):

        self.ID = None
-        self.outputs = OrderedDict()
+        self.outputs = {}
        self.xcoord = None
        self.ycoord = None
        self.zcoord = None
@@ -43,12 +42,17 @@ class Rx:
        self.zcoordorigin = None


-def gpu_initialise_rx_arrays(G):
+def initialise_rx_arrays_gpu(G):
    """Initialise arrays on GPU for receiver coordinates and to store field
        components for receivers.

    Args:
        G (FDTDGrid): Holds essential parameters describing the model.
+
+    Returns:
+        rxcoords_gpu (int): numpy array of receiver coordinates from GPU.
+        rxs_gpu (float): numpy array of receiver data from GPU - rows are field
+                            components; columns are iterations; pages are receivers.
    """

    import pycuda.gpuarray as gpuarray
@@ -59,11 +63,14 @@ def gpu_initialise_rx_arrays(G):
        rxcoords[i, 0] = rx.xcoord
        rxcoords[i, 1] = rx.ycoord
        rxcoords[i, 2] = rx.zcoord
+        # Store maximum number of output components
+        if len(rx.outputs) > Rx.maxnumoutputs_gpu:
+            Rx.maxnumoutputs_gpu = len(rx.outputs)

    # Array to store field components for receivers on GPU - rows are field components;
    # columns are iterations; pages are receivers
-    rxs = np.zeros((Rx.maxnumoutputs, G.iterations, len(G.rxs)),
-                   dtype=config.dtypes['float_or_double'])
+    rxs = np.zeros((len(Rx.allowableoutputs_gpu), G.iterations, len(G.rxs)),
+                   dtype=config.sim_config.dtypes['float_or_double'])

    # Copy arrays to GPU
    rxcoords_gpu = gpuarray.to_gpu(rxcoords)
@@ -72,20 +79,20 @@ def gpu_initialise_rx_arrays(G):
    return rxcoords_gpu, rxs_gpu


-def gpu_get_rx_array(rxs_gpu, rxcoords_gpu, G):
+def get_rx_array_gpu(rxs_gpu, rxcoords_gpu, G):
    """Copy output from receivers array used on GPU back to receiver objects.

    Args:
        rxs_gpu (float): numpy array of receiver data from GPU - rows are field
                            components; columns are iterations; pages are receivers.
-        rxcoords_gpu (float): numpy array of receiver coordinates from GPU.
+        rxcoords_gpu (int): numpy array of receiver coordinates from GPU.
        G (FDTDGrid): Holds essential parameters describing the model.
    """

    for rx in G.rxs:
        for rxgpu in range(len(G.rxs)):
-            if rx.xcoord == rxcoords_gpu[rxgpu, 0] and \
-               rx.ycoord == rxcoords_gpu[rxgpu, 1] and \
-               rx.zcoord == rxcoords_gpu[rxgpu, 2]:
-                for k in rx.outputs.items():
-                    rx.outputs[k] = rxs_gpu[Rx.gpu_allowableoutputs.index(k), :, rxgpu]
+            if (rx.xcoord == rxcoords_gpu[rxgpu, 0] and
+                rx.ycoord == rxcoords_gpu[rxgpu, 1] and
+                rx.zcoord == rxcoords_gpu[rxgpu, 2]):
+                for output in rx.outputs.keys():
+                    rx.outputs[output] = rxs_gpu[Rx.allowableoutputs_gpu.index(output), :, rxgpu]
--- a/gprMax/snapshots.py
+++ b/gprMax/snapshots.py
@@ -184,7 +184,7 @@ class Snapshot:
        self.filehandle.close()


-def gpu_initialise_snapshot_array(G):
+def initialise_snapshot_array_gpu(G):
    """Initialise array on GPU for to store field data for snapshots.

    Args:
@@ -236,7 +236,7 @@ def gpu_initialise_snapshot_array(G):
    return snapEx_gpu, snapEy_gpu, snapEz_gpu, snapHx_gpu, snapHy_gpu, snapHz_gpu


-def gpu_get_snapshot_array(snapEx_gpu, snapEy_gpu, snapEz_gpu, snapHx_gpu, snapHy_gpu, snapHz_gpu, i, snap):
+def get_snapshot_array_gpu(snapEx_gpu, snapEy_gpu, snapEz_gpu, snapHx_gpu, snapHy_gpu, snapHz_gpu, i, snap):
    """Copy snapshot array used on GPU back to snapshot objects and store in format for Paraview.

    Args:
--- a/gprMax/solvers.py
+++ b/gprMax/solvers.py
@@ -15,7 +15,7 @@
 #
 # You should have received a copy of the GNU General Public License
 # along with gprMax.  If not, see <http://www.gnu.org/licenses/>.
-
+import sys
 import gprMax.config as config
 from .grid import FDTDGrid
 from .grid import CUDAGrid
--- a/gprMax/sources.py
+++ b/gprMax/sources.py
@@ -236,7 +236,7 @@ class MagneticDipole(Source):
                                (1 / (G.dx * G.dy * G.dz)))


-def gpu_initialise_src_arrays(sources, G):
+def initialise_src_arrays_gpu(sources, G):
    """Initialise arrays on GPU for source coordinates/polarisation, other
        source information, and source waveform values.

@@ -255,8 +255,8 @@ def gpu_initialise_src_arrays(sources, G):
    import pycuda.gpuarray as gpuarray

    srcinfo1 = np.zeros((len(sources), 4), dtype=np.int32)
-    srcinfo2 = np.zeros((len(sources)), dtype=config.dtypes['float_or_double'])
-    srcwaves = np.zeros((len(sources), G.iterations), dtype=config.dtypes['float_or_double'])
+    srcinfo2 = np.zeros((len(sources)), dtype=config.sim_config.dtypes['float_or_double'])
+    srcwaves = np.zeros((len(sources), G.iterations), dtype=config.sim_config.dtypes['float_or_double'])
    for i, src in enumerate(sources):
        srcinfo1[i, 0] = src.xcoord
        srcinfo1[i, 1] = src.ycoord
--- a/gprMax/updates.py
+++ b/gprMax/updates.py
@@ -18,21 +18,24 @@

 from importlib import import_module
 import logging
-import sys
+
+import numpy as np

 import gprMax.config as config
 from .cuda.fields_updates import kernel_template_fields
 from .cuda.snapshots import kernel_template_store_snapshot
 from .cuda.source_updates import kernel_template_sources
-from .cython.fields_updates_normal import update_electric
-from .cython.fields_updates_normal import update_magnetic
-from .fields_outputs import store_outputs
-from .receivers import gpu_initialise_rx_arrays
-from .receivers import gpu_get_rx_array
+from .cython.fields_updates_normal import update_electric as update_electric_cpu
+from .cython.fields_updates_normal import update_magnetic as update_magnetic_cpu
+from .fields_outputs import store_outputs as store_outputs_cpu
+from .fields_outputs import kernel_template_store_outputs
+from .receivers import initialise_rx_arrays_gpu
+from .receivers import get_rx_array_gpu
 from .snapshots import Snapshot
-from .snapshots import gpu_initialise_snapshot_array
-from .snapshots import gpu_get_snapshot_array
-from .sources import gpu_initialise_src_arrays
+from .snapshots import initialise_snapshot_array_gpu
+from .snapshots import get_snapshot_array_gpu
+from .sources import initialise_src_arrays_gpu
+from .utilities import round32
 from .utilities import timer

 log = logging.getLogger(__name__)
@@ -52,7 +55,7 @@ class CPUUpdates:

    def store_outputs(self):
        """Store field component values for every receiver and transmission line."""
-        store_outputs(self.grid)
+        store_outputs_cpu(self.grid)

    def store_snapshots(self, iteration):
        """Store any snapshots.
@@ -66,7 +69,7 @@ class CPUUpdates:

    def update_magnetic(self):
        """Update magnetic field components."""
-        update_magnetic(self.grid.nx,
+        update_magnetic_cpu(self.grid.nx,
                        self.grid.ny,
                        self.grid.nz,
                        config.sim_config.hostinfo['ompthreads'],
@@ -99,18 +102,18 @@ class CPUUpdates:
        """Update electric field components."""
        # All materials are non-dispersive so do standard update.
        if config.model_configs[self.grid.model_num].materials['maxpoles'] == 0:
-            update_electric(self.grid.nx,
-                            self.grid.ny,
-                            self.grid.nz,
-                            config.sim_config.hostinfo['ompthreads'],
-                            self.grid.updatecoeffsE,
-                            self.grid.ID,
-                            self.grid.Ex,
-                            self.grid.Ey,
-                            self.grid.Ez,
-                            self.grid.Hx,
-                            self.grid.Hy,
-                            self.grid.Hz)
+            update_electric_cpu(self.grid.nx,
+                                self.grid.ny,
+                                self.grid.nz,
+                                config.sim_config.hostinfo['ompthreads'],
+                                self.grid.updatecoeffsE,
+                                self.grid.ID,
+                                self.grid.Ex,
+                                self.grid.Ey,
+                                self.grid.Ez,
+                                self.grid.Hx,
+                                self.grid.Hy,
+                                self.grid.Hz)

        # If there are any dispersive materials do 1st part of dispersive update
        # (it is split into two parts as it requires present and updated electric field values).
@@ -247,20 +250,14 @@ class CUDAUpdates:
        self.dispersive_update_a = None
        self.dispersive_update_b = None

-        import pycuda.driver as drv
-        from pycuda.compiler import SourceModule
-        drv.init()
-
-        # Suppress nvcc warnings on Windows
-        log.debug('Move nvcc compiler options to simulation config')
-        if sys.platform == 'win32':
-            self.compiler_opts = ['-w']
-        else:
-            self.compiler_opts = None
+        # Import PyCUDA modules
+        self.drv = import_module('pycuda.driver')
+        self.source_module = getattr(import_module('pycuda.compiler'), 'SourceModule')
+        self.drv.init()

        # Create device handle and context on specifc GPU device (and make it current context)
-        self.dev = drv.Device(self.grid.gpu.deviceID)
-        self.ctx = dev.make_context()
+        self.dev = self.drv.Device(config.model_configs[self.grid.model_num].cuda['gpu'].deviceID)
+        self.ctx = self.dev.make_context()

        # Initialise arrays on GPU, prepare kernels, and get kernel functions
        self.set_field_kernels()
@@ -274,56 +271,56 @@ class CUDAUpdates:
            get kernel functions.
        """
        if config.model_configs[self.grid.model_num].materials['maxpoles'] > 0:
-            kernels_fields = SourceModule(kernels_template_fields.substitute(
-                                          REAL=cudafloattype,
-                                          COMPLEX=cudacomplextype,
-                                          N_updatecoeffsE=self.grid.updatecoeffsE.size,
-                                          N_updatecoeffsH=self.grid.updatecoeffsH.size,
-                                          NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1],
-                                          NY_MATDISPCOEFFS=self.grid.updatecoeffsdispersive.shape[1],
-                                          NX_FIELDS=self.grid.nx + 1,
-                                          NY_FIELDS=self.grid.ny + 1,
-                                          NZ_FIELDS=self.grid.nz + 1,
-                                          NX_ID=self.grid.ID.shape[1],
-                                          NY_ID=self.grid.ID.shape[2],
-                                          NZ_ID=self.grid.ID.shape[3],
-                                          NX_T=self.grid.Tx.shape[1],
-                                          NY_T=self.grid.Tx.shape[2],
-                                          NZ_T=self.grid.Tx.shape[3]),
-                                          options=self.compiler_opts)
+            kernels_fields = self.source_module(kernels_template_fields.substitute(
+                                                REAL=config.sim_config.dtypes['C_float_or_double'],
+                                                COMPLEX=config.sim_config.dtypes['C_complex'],
+                                                N_updatecoeffsE=self.grid.updatecoeffsE.size,
+                                                N_updatecoeffsH=self.grid.updatecoeffsH.size,
+                                                NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1],
+                                                NY_MATDISPCOEFFS=self.grid.updatecoeffsdispersive.shape[1],
+                                                NX_FIELDS=self.grid.nx + 1,
+                                                NY_FIELDS=self.grid.ny + 1,
+                                                NZ_FIELDS=self.grid.nz + 1,
+                                                NX_ID=self.grid.ID.shape[1],
+                                                NY_ID=self.grid.ID.shape[2],
+                                                NZ_ID=self.grid.ID.shape[3],
+                                                NX_T=self.grid.Tx.shape[1],
+                                                NY_T=self.grid.Tx.shape[2],
+                                                NZ_T=self.grid.Tx.shape[3]),
+                                                options=config.sim_config.cuda['nvcc_opts'])
        else: # Set to one any substitutions for dispersive materials
-            kernels_fields = SourceModule(kernels_template_fields.substitute(
-                                          REAL=cudafloattype,
-                                          COMPLEX=cudacomplextype,
-                                          N_updatecoeffsE=self.grid.updatecoeffsE.size,
-                                          N_updatecoeffsH=self.grid.updatecoeffsH.size,
-                                          NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1],
-                                          NY_MATDISPCOEFFS=1,
-                                          NX_FIELDS=self.grid.nx + 1,
-                                          NY_FIELDS=self.grid.ny + 1,
-                                          NZ_FIELDS=self.grid.nz + 1,
-                                          NX_ID=self.grid.ID.shape[1],
-                                          NY_ID=self.grid.ID.shape[2],
-                                          NZ_ID=self.grid.ID.shape[3],
-                                          NX_T=1,
-                                          NY_T=1,
-                                          NZ_T=1),
-                                          options=self.compiler_opts)
-        self.update_electric = kernels_fields.get_function("update_electric")
-        self.update_magnetic = kernels_fields.get_function("update_magnetic")
-        if self.grid.updatecoeffsE.nbytes + self.grid.updatecoeffsH.nbytes > self.grid.gpu.constmem:
-            raise GeneralError(log.exception(f'Too many materials in the model to fit onto constant memory of size {human_size(self.grid.gpu.constmem)} on {self.grid.gpu.deviceID} - {self.grid.gpu.name} GPU'))
-        self.copy_mat_coeffs()
+            kernels_fields = self.source_module(kernel_template_fields.substitute(
+                                                REAL=config.sim_config.dtypes['C_float_or_double'],
+                                                COMPLEX=config.sim_config.dtypes['C_complex'],
+                                                N_updatecoeffsE=self.grid.updatecoeffsE.size,
+                                                N_updatecoeffsH=self.grid.updatecoeffsH.size,
+                                                NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1],
+                                                NY_MATDISPCOEFFS=1,
+                                                NX_FIELDS=self.grid.nx + 1,
+                                                NY_FIELDS=self.grid.ny + 1,
+                                                NZ_FIELDS=self.grid.nz + 1,
+                                                NX_ID=self.grid.ID.shape[1],
+                                                NY_ID=self.grid.ID.shape[2],
+                                                NZ_ID=self.grid.ID.shape[3],
+                                                NX_T=1,
+                                                NY_T=1,
+                                                NZ_T=1),
+                                                options=config.sim_config.cuda['nvcc_opts'])
+        self.update_electric_gpu = kernels_fields.get_function("update_electric")
+        self.update_magnetic_gpu = kernels_fields.get_function("update_magnetic")
+        if (self.grid.updatecoeffsE.nbytes + self.grid.updatecoeffsH.nbytes > config.model_configs[self.grid.model_num].cuda['gpu'].constmem):
+            raise GeneralError(log.exception(f"Too many materials in the model to fit onto constant memory of size {human_size(config.model_configs[self.grid.model_num].cuda['gpu'].constmem)} on {config.model_configs[self.grid.model_num].cuda['gpu'].deviceID} - {config.model_configs[self.grid.model_num].cuda['gpu'].name} GPU"))
+        self.copy_mat_coeffs(kernels_fields, kernels_fields)

        # Electric and magnetic field updates - dispersive materials - get kernel functions and initialise array on GPU
        if config.model_configs[self.grid.model_num].materials['maxpoles'] > 0:  # If there are any dispersive materials (updates are split into two parts as they require present and updated electric field values).
            self.dispersive_update_a = kernels_fields.get_function("update_electric_dispersive_A")
            self.dispersive_update_b = kernels_fields.get_function("update_electric_dispersive_B")
-            self.grid.gpu_initialise_dispersive_arrays()
+            self.grid.initialise_dispersive_arrays()

        # Electric and magnetic field updates - set blocks per grid and initialise field arrays on GPU
-        self.grid.gpu_set_blocks_per_grid()
-        self.grid.gpu_initialise_arrays()
+        self.grid.set_blocks_per_grid()
+        self.grid.initialise_arrays()

    def set_pml_kernels(self):
        """PMLS - prepare kernels and get kernel functions."""
@@ -336,81 +333,81 @@ class CUDAUpdates:
            kernelmagneticfunc = getattr(import_module(pmlmodulemagnetic),
                                         'kernels_template_pml_magnetic_' +
                                         self.grid.pmlformulation)
-            kernels_pml_electric = SourceModule(kernelelectricfunc.substitute(
-                                                REAL=cudafloattype,
-                                                N_updatecoeffsE=self.grid.updatecoeffsE.size,
-                                                NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1],
-                                                NX_FIELDS=self.grid.nx + 1,
-                                                NY_FIELDS=self.grid.ny + 1,
-                                                NZ_FIELDS=self.grid.nz + 1,
-                                                NX_ID=self.grid.ID.shape[1],
-                                                NY_ID=self.grid.ID.shape[2],
-                                                NZ_ID=self.grid.ID.shape[3]),
-                                                options=self.compiler_opts)
-            kernels_pml_magnetic = SourceModule(kernelmagneticfunc.substitute(
-                                                REAL=cudafloattype,
-                                                N_updatecoeffsH=self.grid.updatecoeffsH.size,
-                                                NY_MATCOEFFS=self.grid.updatecoeffsH.shape[1],
-                                                NX_FIELDS=self.grid.nx + 1,
-                                                NY_FIELDS=self.grid.ny + 1,
-                                                NZ_FIELDS=self.grid.nz + 1,
-                                                NX_ID=self.gridG.ID.shape[1],
-                                                NY_ID=self.grid.ID.shape[2],
-                                                NZ_ID=self.grid.ID.shape[3]),
-                                                options=self.compiler_opts)
-            self.copy_mat_coeffs()
+            kernels_pml_electric = self.source_module(kernelelectricfunc.substitute(
+                                                      REAL=config.sim_config.dtypes['C_float_or_double'],
+                                                      N_updatecoeffsE=self.grid.updatecoeffsE.size,
+                                                      NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1],
+                                                      NX_FIELDS=self.grid.nx + 1,
+                                                      NY_FIELDS=self.grid.ny + 1,
+                                                      NZ_FIELDS=self.grid.nz + 1,
+                                                      NX_ID=self.grid.ID.shape[1],
+                                                      NY_ID=self.grid.ID.shape[2],
+                                                      NZ_ID=self.grid.ID.shape[3]),
+                                                      options=config.sim_config.cuda['nvcc_opts'])
+            kernels_pml_magnetic = self.source_module(kernelmagneticfunc.substitute(
+                                                      REAL=config.sim_config.dtypes['C_float_or_double'],
+                                                      N_updatecoeffsH=self.grid.updatecoeffsH.size,
+                                                      NY_MATCOEFFS=self.grid.updatecoeffsH.shape[1],
+                                                      NX_FIELDS=self.grid.nx + 1,
+                                                      NY_FIELDS=self.grid.ny + 1,
+                                                      NZ_FIELDS=self.grid.nz + 1,
+                                                      NX_ID=self.grid.ID.shape[1],
+                                                      NY_ID=self.grid.ID.shape[2],
+                                                      NZ_ID=self.grid.ID.shape[3]),
+                                                      options=config.sim_config.cuda['nvcc_opts'])
+            self.copy_mat_coeffs(kernels_pml_electric, kernels_pml_magnetic)
            # Set block per grid, initialise arrays on GPU, and get kernel functions
            for pml in self.grid.pmls:
-                pml.gpu_initialise_arrays()
-                pml.gpu_get_update_funcs(kernels_pml_electric, kernels_pml_magnetic)
-                pml.gpu_set_blocks_per_grid(self.grid)
+                pml.initialise_field_arrays_gpu()
+                pml.get_update_funcs(kernels_pml_electric, kernels_pml_magnetic)
+                pml.set_blocks_per_grid(self.grid)

    def set_rx_kernel(self):
        """Receivers - initialise arrays on GPU, prepare kernel and get kernel
                        function.
        """
        if self.grid.rxs:
-            rxcoords_gpu, rxs_gpu = gpu_initialise_rx_arrays(self.grid)
-            kernel_store_outputs = SourceModule(kernel_template_store_outputs.substitute(
-                                                REAL=cudafloattype,
-                                                NY_RXCOORDS=3,
-                                                NX_RXS=6,
-                                                NY_RXS=self.grid.iterations,
-                                                NZ_RXS=len(self.grid.rxs),
-                                                NX_FIELDS=self.grid.nx + 1,
-                                                NY_FIELDS=self.grid.ny + 1,
-                                                NZ_FIELDS=self.grid.nz + 1),
-                                                options=self.compiler_opts)
-            self.store_outputs = kernel_store_outputs.get_function("store_outputs")
+            self.rxcoords_gpu, self.rxs_gpu = initialise_rx_arrays_gpu(self.grid)
+            kernel_store_outputs = self.source_module(kernel_template_store_outputs.substitute(
+                                                      REAL=config.sim_config.dtypes['C_float_or_double'],
+                                                      NY_RXCOORDS=3,
+                                                      NX_RXS=6,
+                                                      NY_RXS=self.grid.iterations,
+                                                      NZ_RXS=len(self.grid.rxs),
+                                                      NX_FIELDS=self.grid.nx + 1,
+                                                      NY_FIELDS=self.grid.ny + 1,
+                                                      NZ_FIELDS=self.grid.nz + 1),
+                                                      options=config.sim_config.cuda['nvcc_opts'])
+            self.store_outputs_gpu = kernel_store_outputs.get_function("store_outputs")

    def set_src_kernels(self):
        """Sources - initialise arrays on GPU, prepare kernel and get kernel
                        function.
        """
        if self.grid.voltagesources + self.grid.hertziandipoles + self.grid.magneticdipoles:
-            kernels_sources = SourceModule(kernels_template_sources.substitute(
-                                           REAL=cudafloattype,
-                                           N_updatecoeffsE=self.grid.updatecoeffsE.size,
-                                           N_updatecoeffsH=self.grid.updatecoeffsH.size,
-                                           NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1],
-                                           NY_SRCINFO=4,
-                                           NY_SRCWAVES=self.grid.iterations,
-                                           NX_FIELDS=self.grid.nx + 1,
-                                           NY_FIELDS=self.grid.ny + 1,
-                                           NZ_FIELDS=self.grid.nz + 1,
-                                           NX_ID=self.grid.ID.shape[1],
-                                           NY_ID=self.grid.ID.shape[2],
-                                           NZ_ID=self.grid.ID.shape[3]),
-                                           options=self.compiler_opts)
-            self.copy_mat_coeffs()
+            kernels_sources = self.source_module(kernel_template_sources.substitute(
+                                                 REAL=config.sim_config.dtypes['C_float_or_double'],
+                                                 N_updatecoeffsE=self.grid.updatecoeffsE.size,
+                                                 N_updatecoeffsH=self.grid.updatecoeffsH.size,
+                                                 NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1],
+                                                 NY_SRCINFO=4,
+                                                 NY_SRCWAVES=self.grid.iterations,
+                                                 NX_FIELDS=self.grid.nx + 1,
+                                                 NY_FIELDS=self.grid.ny + 1,
+                                                 NZ_FIELDS=self.grid.nz + 1,
+                                                 NX_ID=self.grid.ID.shape[1],
+                                                 NY_ID=self.grid.ID.shape[2],
+                                                 NZ_ID=self.grid.ID.shape[3]),
+                                                 options=config.sim_config.cuda['nvcc_opts'])
+            self.copy_mat_coeffs(kernels_sources, kernels_sources)
            if self.grid.hertziandipoles:
-                self.srcinfo1_hertzian_gpu, self.srcinfo2_hertzian_gpu, self.srcwaves_hertzian_gpu = gpu_initialise_src_arrays(self.grid.hertziandipoles, self.grid)
+                self.srcinfo1_hertzian_gpu, self.srcinfo2_hertzian_gpu, self.srcwaves_hertzian_gpu = initialise_src_arrays_gpu(self.grid.hertziandipoles, self.grid)
                self.update_hertzian_dipole_gpu = kernels_sources.get_function("update_hertzian_dipole")
            if self.grid.magneticdipoles:
-                self.srcinfo1_magnetic_gpu, self.srcinfo2_magnetic_gpu, self.srcwaves_magnetic_gpu = gpu_initialise_src_arrays(self.grid.magneticdipoles, self.grid)
+                self.srcinfo1_magnetic_gpu, self.srcinfo2_magnetic_gpu, self.srcwaves_magnetic_gpu = initialise_src_arrays_gpu(self.grid.magneticdipoles, self.grid)
                self.update_magnetic_dipole_gpu = kernels_sources.get_function("update_magnetic_dipole")
            if self.grid.voltagesources:
-                self.srcinfo1_voltage_gpu, self.srcinfo2_voltage_gpu, self.srcwaves_voltage_gpu = gpu_initialise_src_arrays(self.grid.voltagesources, self.grid)
+                self.srcinfo1_voltage_gpu, self.srcinfo2_voltage_gpu, self.srcwaves_voltage_gpu = initialise_src_arrays_gpu(self.grid.voltagesources, self.grid)
                self.update_voltage_source_gpu = kernels_sources.get_function("update_voltage_source")

    def set_snapshot_kernel(self):
@@ -418,42 +415,46 @@ class CUDAUpdates:
                        function.
        """
        if self.grid.snapshots:
-            self.snapEx_gpu, self.snapEy_gpu, self.snapEz_gpu, self.snapHx_gpu, self.snapHy_gpu, self.snapHz_gpu = gpu_initialise_snapshot_array(self.grid)
-            kernel_store_snapshot = SourceModule(kernel_template_store_snapshot.substitute(
-                                                 REAL=cudafloattype,
-                                                 NX_SNAPS=Snapshot.nx_max,
-                                                 NY_SNAPS=Snapshot.ny_max,
-                                                 NZ_SNAPS=Snapshot.nz_max,
-                                                 NX_FIELDS=self.grid.nx + 1,
-                                                 NY_FIELDS=self.grid.ny + 1,
-                                                 NZ_FIELDS=self.grid.nz + 1),
-                                                 options=self.compiler_opts)
+            self.snapEx_gpu, self.snapEy_gpu, self.snapEz_gpu, self.snapHx_gpu, self.snapHy_gpu, self.snapHz_gpu = initialise_snapshot_array_gpu(self.grid)
+            kernel_store_snapshot = self.source_module(kernel_template_store_snapshot.substitute(
+                                                       REAL=config.sim_config.dtypes['C_float_or_double'],
+                                                       NX_SNAPS=Snapshot.nx_max,
+                                                       NY_SNAPS=Snapshot.ny_max,
+                                                       NZ_SNAPS=Snapshot.nz_max,
+                                                       NX_FIELDS=self.grid.nx + 1,
+                                                       NY_FIELDS=self.grid.ny + 1,
+                                                       NZ_FIELDS=self.grid.nz + 1),
+                                                       options=config.sim_config.cuda['nvcc_opts'])
            self.store_snapshot_gpu = kernel_store_snapshot.get_function("store_snapshot")

-    def copy_mat_coeffs(self):
+    def copy_mat_coeffs(self, kernelE, kernelH):
        """Copy material coefficient arrays to constant memory of GPU
            (must be <64KB).
+
+        Args:
+            kernelE (kernel): electric field kernel.
+            kernelH (kernel): magnetic field kernel.
        """
-        updatecoeffsE = kernels_sources.get_global('updatecoeffsE')[0]
-        updatecoeffsH = kernels_sources.get_global('updatecoeffsH')[0]
+        updatecoeffsE = kernelE.get_global('updatecoeffsE')[0]
+        updatecoeffsH = kernelH.get_global('updatecoeffsH')[0]
        self.drv.memcpy_htod(updatecoeffsE, self.grid.updatecoeffsE)
        self.drv.memcpy_htod(updatecoeffsH, self.grid.updatecoeffsH)

    def store_outputs(self):
-        """Store field component values for every receiver and transmission line."""
+        """Store field component values for every receiver."""
        if self.grid.rxs:
-            self.store_outputs(np.int32(len(self.grid.rxs)),
-                               np.int32(self.grid.iteration),
-                               self.rxcoords_gpu.gpudata,
-                               self.rxs_gpu.gpudata,
-                               self.grid.Ex_gpu.gpudata,
-                               self.grid.Ey_gpu.gpudata,
-                               self.grid.Ez_gpu.gpudata,
-                               self.grid.Hx_gpu.gpudata,
-                               self.grid.Hy_gpu.gpudata,
-                               self.grid.Hz_gpu.gpudata,
-                               block=(1, 1, 1),
-                               grid=(round32(len(self.grid.rxs)), 1, 1))
+            self.store_outputs_gpu(np.int32(len(self.grid.rxs)),
+                                   np.int32(self.grid.iteration),
+                                   self.rxcoords_gpu.gpudata,
+                                   self.rxs_gpu.gpudata,
+                                   self.grid.Ex_gpu.gpudata,
+                                   self.grid.Ey_gpu.gpudata,
+                                   self.grid.Ez_gpu.gpudata,
+                                   self.grid.Hx_gpu.gpudata,
+                                   self.grid.Hy_gpu.gpudata,
+                                   self.grid.Hz_gpu.gpudata,
+                                   block=(1, 1, 1),
+                                   grid=(round32(len(self.grid.rxs)), 1, 1))

    def store_snapshots(self, iteration):
        """Store any snapshots.
@@ -501,32 +502,32 @@ class CUDAUpdates:

    def update_magnetic(self):
        """Update magnetic field components."""
-        self.update_magnetic(np.int32(self.grid.nx),
-                        np.int32(self.grid.ny),
-                        np.int32(self.grid.nz),
-                        self.grid.ID_gpu,
-                        self.grid.Hx_gpu,
-                        self.grid.Hy_gpu,
-                        self.grid.Hz_gpu,
-                        self.grid.Ex_gpu,
-                        self.grid.Ey_gpu,
-                        self.grid.Ez_gpu,
-                        block=self.grid.tpb,
-                        grid=self.grid.bpg)
+        self.update_magnetic_gpu(np.int32(self.grid.nx),
+                                 np.int32(self.grid.ny),
+                                 np.int32(self.grid.nz),
+                                 self.grid.ID_gpu,
+                                 self.grid.Hx_gpu,
+                                 self.grid.Hy_gpu,
+                                 self.grid.Hz_gpu,
+                                 self.grid.Ex_gpu,
+                                 self.grid.Ey_gpu,
+                                 self.grid.Ez_gpu,
+                                 block=self.grid.tpb,
+                                 grid=self.grid.bpg)

    def update_magnetic_pml(self):
        """Update magnetic field components with the PML correction."""
        for pml in self.grid.pmls:
-            pml.gpu_update_magnetic(self.grid)
+            pml.update_magnetic(self.grid)

    def update_magnetic_sources(self):
        """Update magnetic field components from sources."""
        if self.grid.magneticdipoles:
            self.update_magnetic_dipole_gpu(np.int32(len(self.grid.magneticdipoles)),
                                              np.int32(self.grid.iteration),
-                                              config.dtypes['float_or_double'](self.grid.dx),
-                                              config.dtypes['float_or_double'](self.grid.dy),
-                                              config.dtypes['float_or_double'](self.grid.dz),
+                                              config.sim_config.dtypes['float_or_double'](self.grid.dx),
+                                              config.sim_config.dtypes['float_or_double'](self.grid.dy),
+                                              config.sim_config.dtypes['float_or_double'](self.grid.dz),
                                              self.srcinfo1_magnetic_gpu.gpudata,
                                              self.srcinfo2_magnetic_gpu.gpudata,
                                              self.srcwaves_magnetic_gpu.gpudata,
@@ -541,18 +542,18 @@ class CUDAUpdates:
        """Update electric field components."""
        # All materials are non-dispersive so do standard update.
        if config.model_configs[self.grid.model_num].materials['maxpoles'] == 0:
-            self.update_electric(np.int32(self.grid.nx),
-                                 np.int32(self.grid.ny),
-                                 np.int32(self.grid.nz),
-                                 self.grid.ID_gpu,
-                                 self.grid.Ex_gpu,
-                                 self.grid.Ey_gpu,
-                                 self.grid.Ez_gpu,
-                                 self.grid.Hx_gpu,
-                                 self.grid.Hy_gpu,
-                                 self.grid.Hz_gpu,
-                                 block=self.grid.tpb,
-                                 grid=self.grid.bpg)
+            self.update_electric_gpu(np.int32(self.grid.nx),
+                                     np.int32(self.grid.ny),
+                                     np.int32(self.grid.nz),
+                                     self.grid.ID_gpu,
+                                     self.grid.Ex_gpu,
+                                     self.grid.Ey_gpu,
+                                     self.grid.Ez_gpu,
+                                     self.grid.Hx_gpu,
+                                     self.grid.Hy_gpu,
+                                     self.grid.Hz_gpu,
+                                     block=self.grid.tpb,
+                                     grid=self.grid.bpg)

        # If there are any dispersive materials do 1st part of dispersive update
        # (it is split into two parts as it requires present and updated electric field values).
@@ -578,7 +579,7 @@ class CUDAUpdates:
    def update_electric_pml(self):
        """Update electric field components with the PML correction."""
        for pml in self.grid.pmls:
-            pml.gpu_update_electric(self.grid)
+            pml.update_electric(self.grid)

    def update_electric_sources(self):
        """Update electric field components from sources -
@@ -587,9 +588,9 @@ class CUDAUpdates:
        if self.grid.voltagesources:
            self.update_voltage_source_gpu(np.int32(len(self.grid.voltagesources)),
                                           np.int32(self.grid.iteration),
-                                           config.dtypes['float_or_double'](self.grid.dx),
-                                           config.dtypes['float_or_double'](self.grid.dy),
-                                           config.dtypes['float_or_double'](self.grid.dz),
+                                           config.sim_config.dtypes['float_or_double'](self.grid.dx),
+                                           config.sim_config.dtypes['float_or_double'](self.grid.dy),
+                                           config.sim_config.dtypes['float_or_double'](self.grid.dz),
                                           self.srcinfo1_voltage_gpu.gpudata,
                                           self.srcinfo2_voltage_gpu.gpudata,
                                           self.srcwaves_voltage_gpu.gpudata,
@@ -603,9 +604,9 @@ class CUDAUpdates:
        if self.grid.hertziandipoles:
            self.update_hertzian_dipole_gpu(np.int32(len(self.grid.hertziandipoles)),
                                            np.int32(self.grid.iteration),
-                                            config.dtypes['float_or_double'](self.grid.dx),
-                                            config.dtypes['float_or_double'](self.grid.dy),
-                                            config.dtypes['float_or_double'](self.grid.dz),
+                                            config.sim_config.dtypes['float_or_double'](self.grid.dx),
+                                            config.sim_config.dtypes['float_or_double'](self.grid.dy),
+                                            config.sim_config.dtypes['float_or_double'](self.grid.dz),
                                            self.srcinfo1_hertzian_gpu.gpudata,
                                            self.srcinfo2_hertzian_gpu.gpudata,
                                            self.srcwaves_hertzian_gpu.gpudata,
@@ -660,21 +661,20 @@ class CUDAUpdates:
        """Copy data from GPU back to CPU to save to file(s)."""
        # Copy output from receivers array back to correct receiver objects
        if self.grid.rxs:
-            gpu_get_rx_array(self.rxs_gpu.get(),
+            get_rx_array_gpu(self.rxs_gpu.get(),
                             self.rxcoords_gpu.get(),
                             self.grid)

        # Copy data from any snapshots back to correct snapshot objects
        if self.grid.snapshots and not self.grid.snapsgpu2cpu:
            for i, snap in enumerate(self.grid.snapshots):
-                gpu_get_snapshot_array(self.snapEx_gpu.get(),
+                get_snapshot_array_gpu(self.snapEx_gpu.get(),
                                       self.snapEy_gpu.get(),
                                       self.snapEz_gpu.get(),
                                       self.snapHx_gpu.get(),
                                       self.snapHy_gpu.get(),
                                       self.snapHz_gpu.get(),
-                                       i,
-                                       snap)
+                                       i, snap)

    def cleanup(self):
        """Cleanup GPU context."""
--- a/gprMax/utilities.py
+++ b/gprMax/utilities.py
@@ -421,10 +421,6 @@ class GPU:
        self.pcibusID = None
        self.constmem = None
        self.totalmem = None
-        # Threads per block for main field updates
-        self.tpb = (256, 1, 1)
-        # Blocks per grid for main field updates (set in grid.py)
-        self.bpg = None

    def get_gpu_info(self, drv):
        """Set information about GPU.