move gpu update code

2025-08-08 07:24:19 +08:00 · 2019-08-01 12:05:41 +01:00
--- a/gprMax/model_build_run.py
+++ b/gprMax/model_build_run.py
@@ -274,13 +274,11 @@ class ModelBuildRun:
        if not os.path.isdir(config.outputfilepath):
            os.mkdir(config.outputfilepath)
            printer.print('\nCreated output directory: {}'.format(config.outputfilepath))
+
        outputfile = os.path.join(config.outputfilepath, os.path.splitext(os.path.split(config.inputfilepath)[1])[0] + appendmodelnumber + '.out')
        printer.print('\nOutput file: {}\n'.format(outputfile))
-        # Main FDTD solving functions for either CPU or GPU
-        if config.cuda['gpus'] is None:
-            tsolve = solve_cpu(currentmodelrun, modelend, G)
-        else:
-            tsolve, memsolve = solve_gpu(currentmodelrun, modelend, G)
+
+        tsolve, memsolve = solve(currentmodelrun, modelend, G)

        # Write an output file in HDF5 format
        write_hdf5_outputfile(outputfile, G.Ex, G.Ey, G.Ez, G.Hx, G.Hy, G.Hz, G)
@@ -309,7 +307,7 @@ class ModelBuildRun:

        return tsolve

-def solve_cpu(solver, sim_config, model_config):
+def solve(solver, sim_config, model_config):
    """
    Solving using FDTD method on CPU. Parallelised using Cython (OpenMP) for
    electric and magnetic field updates, and PML updates.
@@ -327,242 +325,3 @@ def solve_cpu(solver, sim_config, model_config):
    solver.solve()

    return tsolve
-
-
-def solve_gpu(currentmodelrun, modelend, G):
-    """Solving using FDTD method on GPU. Implemented using Nvidia CUDA.
-
-    Args:
-        currentmodelrun (int): Current model run number.
-        modelend (int): Number of last model to run.
-        G (class): Grid class instance - holds essential parameters describing the model.
-
-    Returns:
-        tsolve (float): Time taken to execute solving (seconds)
-        memsolve (int): memory usage on final iteration (bytes)
-    """
-
-    import pycuda.driver as drv
-    from pycuda.compiler import SourceModule
-    drv.init()
-
-    # Suppress nvcc warnings on Windows
-    if sys.platform == 'win32':
-        compiler_opts = ['-w']
-    else:
-        compiler_opts = None
-
-    # Create device handle and context on specifc GPU device (and make it current context)
-    dev = drv.Device(config.cuda['gpus'].deviceID)
-    ctx = dev.make_context()
-
-    # Electric and magnetic field updates - prepare kernels, get kernel functions, and initialise arrays on GPU
-    if config.materials['maxpoles'] == 0:
-        kernel_fields = SourceModule(kernel_template_fields.substitute(REAL=config.dtypes['C_float_or_double'], REAL_OR_COMPLEX=config.dtypes['C_complex'], N_updatecoeffsE=G.updatecoeffsE.size, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NY_MATDISPCOEFFS=1, NX_FIELDS=G.Ex.shape[0], NY_FIELDS=G.Ex.shape[1], NZ_FIELDS=G.Ex.shape[2], NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3], NX_T=1, NY_T=1, NZ_T=1), options=compiler_opts)
-
-    else:   # # If there are any dispersive materials (updates are split into two parts as they require present and updated electric field values).
-        kernel_fields = SourceModule(kernel_template_fields.substitute(REAL=config.dtypes['C_float_or_double'], REAL_OR_COMPLEX=config.dtypes['C_complex'], N_updatecoeffsE=G.updatecoeffsE.size, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NY_MATDISPCOEFFS=G.updatecoeffsdispersive.shape[1], NX_FIELDS=G.Ex.shape[0], NY_FIELDS=G.Ex.shape[1], NZ_FIELDS=G.Ex.shape[2], NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3], NX_T=G.Tx.shape[1], NY_T=G.Tx.shape[2], NZ_T=G.Tx.shape[3]), options=compiler_opts)
-        update_e_dispersive_A_gpu = kernel_fields.get_function("update_e_dispersive_A")
-        update_e_dispersive_B_gpu = kernel_fields.get_function("update_e_dispersive_B")
-        G.gpu_initialise_dispersive_arrays()
-    update_e_gpu = kernel_fields.get_function("update_e")
-    update_h_gpu = kernel_fields.get_function("update_h")
-
-    # Copy material coefficient arrays to constant memory of GPU (must be <64KB) for fields kernels
-    updatecoeffsE = kernel_fields.get_global('updatecoeffsE')[0]
-    updatecoeffsH = kernel_fields.get_global('updatecoeffsH')[0]
-    if G.updatecoeffsE.nbytes + G.updatecoeffsH.nbytes > config.cuda['gpus'].constmem:
-        raise GeneralError('Too many materials in the model to fit onto constant memory of size {} on {} - {} GPU'.format(human_size(config.cuda['gpus'].constmem), config.cuda['gpus'].deviceID, config.cuda['gpus'].name))
-    else:
-        drv.memcpy_htod(updatecoeffsE, G.updatecoeffsE)
-        drv.memcpy_htod(updatecoeffsH, G.updatecoeffsH)
-
-    # Electric and magnetic field updates - set blocks per grid and initialise field arrays on GPU
-    G.gpu_set_blocks_per_grid()
-    G.gpu_initialise_arrays()
-
-    # PML updates
-    if G.pmls:
-        # Prepare kernels
-        kernelelectricfunc = getattr(import_module('gprMax.cuda.pml_updates_electric_' + G.pmlformulation), 'kernels_template_pml_electric_' + G.pmlformulation)
-        kernelmagneticfunc = getattr(import_module('gprMax.cuda.pml_updates_magnetic_' + G.pmlformulation), 'kernels_template_pml_magnetic_' + G.pmlformulation)
-        kernels_pml_electric = SourceModule(kernelelectricfunc.substitute(REAL=config.dtypes['C_float_or_double'], N_updatecoeffsE=G.updatecoeffsE.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NX_FIELDS=G.Ex.shape[0], NY_FIELDS=G.Ex.shape[1], NZ_FIELDS=G.Ex.shape[2], NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3]), options=compiler_opts)
-        kernels_pml_magnetic = SourceModule(kernelmagneticfunc.substitute(REAL=config.dtypes['C_float_or_double'], N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsH.shape[1], NX_FIELDS=G.Hx.shape[0], NY_FIELDS=G.Hx.shape[1], NZ_FIELDS=G.Hx.shape[2], NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3]), options=compiler_opts)
-        # Copy material coefficient arrays to constant memory of GPU (must be <64KB) for PML kernels
-        updatecoeffsE = kernels_pml_electric.get_global('updatecoeffsE')[0]
-        updatecoeffsH = kernels_pml_magnetic.get_global('updatecoeffsH')[0]
-        drv.memcpy_htod(updatecoeffsE, G.updatecoeffsE)
-        drv.memcpy_htod(updatecoeffsH, G.updatecoeffsH)
-        # Set block per grid, initialise arrays on GPU, and get kernel functions
-        for pml in G.pmls:
-            pml.gpu_set_blocks_per_grid(G)
-            pml.gpu_initialise_arrays()
-            pml.gpu_get_update_funcs(kernels_pml_electric, kernels_pml_magnetic)
-
-    # Receivers
-    if G.rxs:
-        # Initialise arrays on GPU
-        rxcoords_gpu, rxs_gpu = gpu_initialise_rx_arrays(G)
-        # Prepare kernel and get kernel function
-
-        kernel_store_outputs = SourceModule(kernel_template_store_outputs.substitute(REAL=config.dtypes['C_float_or_double'], NY_RXCOORDS=3, NX_RXS=len(Rx.gpu_allowableoutputs), NY_RXS=G.iterations, NZ_RXS=len(G.rxs), NX_FIELDS=G.Ex.shape[0], NY_FIELDS=G.Ex.shape[1], NZ_FIELDS=G.Ex.shape[2]), options=compiler_opts)
-        store_outputs_gpu = kernel_store_outputs.get_function("store_outputs")
-
-    # Sources - initialise arrays on GPU, prepare kernel and get kernel functions
-    if G.voltagesources + G.hertziandipoles + G.magneticdipoles:
-        kernel_sources = SourceModule(kernel_template_sources.substitute(REAL=config.dtypes['C_float_or_double'], N_updatecoeffsE=G.updatecoeffsE.size, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NY_SRCINFO=4, NY_SRCWAVES=G.iterations, NX_FIELDS=G.Ex.shape[0], NY_FIELDS=G.Ex.shape[1], NZ_FIELDS=G.Ex.shape[2], NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3]), options=compiler_opts)
-        # Copy material coefficient arrays to constant memory of GPU (must be <64KB) for source kernels
-        updatecoeffsE = kernel_sources.get_global('updatecoeffsE')[0]
-        updatecoeffsH = kernel_sources.get_global('updatecoeffsH')[0]
-        drv.memcpy_htod(updatecoeffsE, G.updatecoeffsE)
-        drv.memcpy_htod(updatecoeffsH, G.updatecoeffsH)
-        if G.hertziandipoles:
-            srcinfo1_hertzian_gpu, srcinfo2_hertzian_gpu, srcwaves_hertzian_gpu = gpu_initialise_src_arrays(G.hertziandipoles, G)
-            update_hertzian_dipole_gpu = kernel_sources.get_function("update_hertzian_dipole")
-        if G.magneticdipoles:
-            srcinfo1_magnetic_gpu, srcinfo2_magnetic_gpu, srcwaves_magnetic_gpu = gpu_initialise_src_arrays(G.magneticdipoles, G)
-            update_magnetic_dipole_gpu = kernel_sources.get_function("update_magnetic_dipole")
-        if G.voltagesources:
-            srcinfo1_voltage_gpu, srcinfo2_voltage_gpu, srcwaves_voltage_gpu = gpu_initialise_src_arrays(G.voltagesources, G)
-            update_voltage_source_gpu = kernel_sources.get_function("update_voltage_source")
-
-    # Snapshots - initialise arrays on GPU, prepare kernel and get kernel functions
-    if G.snapshots:
-        # Initialise arrays on GPU
-        snapEx_gpu, snapEy_gpu, snapEz_gpu, snapHx_gpu, snapHy_gpu, snapHz_gpu = gpu_initialise_snapshot_array(G)
-        # Prepare kernel and get kernel function
-        kernel_store_snapshot = SourceModule(kernel_template_store_snapshot.substitute(REAL=config.dtypes['C_float_or_double'], NX_SNAPS=Snapshot.nx_max, NY_SNAPS=Snapshot.ny_max, NZ_SNAPS=Snapshot.nz_max, NX_FIELDS=G.Ex.shape[0], NY_FIELDS=G.Ex.shape[1], NZ_FIELDS=G.Ex.shape[2]), options=compiler_opts)
-        store_snapshot_gpu = kernel_store_snapshot.get_function("store_snapshot")
-
-    # Iteration loop timer
-    iterstart = drv.Event()
-    iterend = drv.Event()
-    iterstart.record()
-
-    for iteration in tqdm(range(G.iterations), desc='Running simulation, model ' + str(currentmodelrun) + '/' + str(modelend), ncols=get_terminal_width() - 1, file=sys.stdout, disable=not config.general['progressbars']):
-
-        # Get GPU memory usage on final iteration
-        if iteration == G.iterations - 1:
-            memsolve = drv.mem_get_info()[1] - drv.mem_get_info()[0]
-
-        # Store field component values for every receiver
-        if G.rxs:
-            store_outputs_gpu(np.int32(len(G.rxs)), np.int32(iteration),
-                              rxcoords_gpu.gpudata, rxs_gpu.gpudata,
-                              G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata,
-                              G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata,
-                              block=(1, 1, 1), grid=(round32(len(G.rxs)), 1, 1))
-
-        # Store any snapshots
-        for i, snap in enumerate(G.snapshots):
-            if snap.time == iteration + 1:
-                if not config.cuda['snapsgpu2cpu']:
-                    store_snapshot_gpu(np.int32(i), np.int32(snap.xs),
-                                       np.int32(snap.xf), np.int32(snap.ys),
-                                       np.int32(snap.yf), np.int32(snap.zs),
-                                       np.int32(snap.zf), np.int32(snap.dx),
-                                       np.int32(snap.dy), np.int32(snap.dz),
-                                       G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata,
-                                       G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata,
-                                       snapEx_gpu.gpudata, snapEy_gpu.gpudata, snapEz_gpu.gpudata,
-                                       snapHx_gpu.gpudata, snapHy_gpu.gpudata, snapHz_gpu.gpudata,
-                                       block=Snapshot.tpb, grid=Snapshot.bpg)
-                else:
-                    store_snapshot_gpu(np.int32(0), np.int32(snap.xs),
-                                       np.int32(snap.xf), np.int32(snap.ys),
-                                       np.int32(snap.yf), np.int32(snap.zs),
-                                       np.int32(snap.zf), np.int32(snap.dx),
-                                       np.int32(snap.dy), np.int32(snap.dz),
-                                       G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata,
-                                       G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata,
-                                       snapEx_gpu.gpudata, snapEy_gpu.gpudata, snapEz_gpu.gpudata,
-                                       snapHx_gpu.gpudata, snapHy_gpu.gpudata, snapHz_gpu.gpudata,
-                                       block=Snapshot.tpb, grid=Snapshot.bpg)
-                    gpu_get_snapshot_array(snapEx_gpu.get(), snapEy_gpu.get(), snapEz_gpu.get(),
-                                           snapHx_gpu.get(), snapHy_gpu.get(), snapHz_gpu.get(), 0, snap)
-
-        # Update magnetic field components
-        update_h_gpu(np.int32(G.nx), np.int32(G.ny), np.int32(G.nz),
-                     G.ID_gpu.gpudata, G.Hx_gpu.gpudata, G.Hy_gpu.gpudata,
-                     G.Hz_gpu.gpudata, G.Ex_gpu.gpudata, G.Ey_gpu.gpudata,
-                     G.Ez_gpu.gpudata, block=config.cuda['gpus'].tpb, grid=config.cuda['gpus'].bpg)
-
-        # Update magnetic field components with the PML correction
-        for pml in G.pmls:
-            pml.gpu_update_magnetic(G)
-
-        # Update magnetic field components for magetic dipole sources
-        if G.magneticdipoles:
-            update_magnetic_dipole_gpu(np.int32(len(G.magneticdipoles)), np.int32(iteration),
-                                       config.dtypes['float_or_double'](G.dx), config.dtypes['float_or_double'](G.dy), config.dtypes['float_or_double'](G.dz),
-                                       srcinfo1_magnetic_gpu.gpudata, srcinfo2_magnetic_gpu.gpudata,
-                                       srcwaves_magnetic_gpu.gpudata, G.ID_gpu.gpudata,
-                                       G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata,
-                                       block=(1, 1, 1), grid=(round32(len(G.magneticdipoles)), 1, 1))
-
-        # Update electric field components
-        # If all materials are non-dispersive do standard update
-        if config.materials['maxpoles'] == 0:
-            update_e_gpu(np.int32(G.nx), np.int32(G.ny), np.int32(G.nz), G.ID_gpu.gpudata,
-                         G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata,
-                         G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata,
-                         block=config.cuda['gpus'].tpb, grid=config.cuda['gpus'].bpg)
-        # If there are any dispersive materials do 1st part of dispersive update
-        # (it is split into two parts as it requires present and updated electric field values).
-        else:
-            update_e_dispersive_A_gpu(np.int32(G.nx), np.int32(G.ny), np.int32(G.nz),
-                                      np.int32(config.materials['maxpoles']), G.updatecoeffsdispersive_gpu.gpudata,
-                                      G.Tx_gpu.gpudata, G.Ty_gpu.gpudata, G.Tz_gpu.gpudata, G.ID_gpu.gpudata,
-                                      G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata,
-                                      G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata,
-                                      block=config.cuda['gpus'].tpb, grid=config.cuda['gpus'].bpg)
-
-        # Update electric field components with the PML correction
-        for pml in G.pmls:
-            pml.gpu_update_electric(G)
-
-        # Update electric field components for voltage sources
-        if G.voltagesources:
-            update_voltage_source_gpu(np.int32(len(G.voltagesources)), np.int32(iteration),
-                                      config.dtypes['float_or_double'](G.dx), config.dtypes['float_or_double'](G.dy), config.dtypes['float_or_double'](G.dz),
-                                      srcinfo1_voltage_gpu.gpudata, srcinfo2_voltage_gpu.gpudata,
-                                      srcwaves_voltage_gpu.gpudata, G.ID_gpu.gpudata,
-                                      G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata,
-                                      block=(1, 1, 1), grid=(round32(len(G.voltagesources)), 1, 1))
-
-        # Update electric field components for Hertzian dipole sources (update any Hertzian dipole sources last)
-        if G.hertziandipoles:
-            update_hertzian_dipole_gpu(np.int32(len(G.hertziandipoles)), np.int32(iteration),
-                                       config.dtypes['float_or_double'](G.dx), config.dtypes['float_or_double'](G.dy), config.dtypes['float_or_double'](G.dz),
-                                       srcinfo1_hertzian_gpu.gpudata, srcinfo2_hertzian_gpu.gpudata,
-                                       srcwaves_hertzian_gpu.gpudata, G.ID_gpu.gpudata,
-                                       G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata,
-                                       block=(1, 1, 1), grid=(round32(len(G.hertziandipoles)), 1, 1))
-
-        # If there are any dispersive materials do 2nd part of dispersive update (it is split into two parts as it requires present and updated electric field values). Therefore it can only be completely updated after the electric field has been updated by the PML and source updates.
-        if config.materials['maxpoles'] > 0:
-            update_e_dispersive_B_gpu(np.int32(G.nx), np.int32(G.ny), np.int32(G.nz),
-                                      np.int32(config.materials['maxpoles']), G.updatecoeffsdispersive_gpu.gpudata,
-                                      G.Tx_gpu.gpudata, G.Ty_gpu.gpudata, G.Tz_gpu.gpudata, G.ID_gpu.gpudata,
-                                      G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata,
-                                      block=config.cuda['gpus'].tpb, grid=config.cuda['gpus'].bpg)
-
-    # Copy output from receivers array back to correct receiver objects
-    if G.rxs:
-        gpu_get_rx_array(rxs_gpu.get(), rxcoords_gpu.get(), G)
-
-    # Copy data from any snapshots back to correct snapshot objects
-    if G.snapshots and not config.cuda['snapsgpu2cpu']:
-        for i, snap in enumerate(G.snapshots):
-            gpu_get_snapshot_array(snapEx_gpu.get(), snapEy_gpu.get(), snapEz_gpu.get(),
-                                   snapHx_gpu.get(), snapHy_gpu.get(), snapHz_gpu.get(), i, snap)
-
-    iterend.record()
-    iterend.synchronize()
-    tsolve = iterstart.time_till(iterend) * 1e-3
-
-    # Remove context from top of stack and delete
-    ctx.pop()
-    del ctx
-
-    return tsolve, memsolve
--- a/gprMax/updates.py
+++ b/gprMax/updates.py
@@ -154,3 +154,243 @@ class SubgridUpdates:

 class GPUUpdates:
    pass
+
+"""
+def solve_gpu(currentmodelrun, modelend, G):
+    """Solving using FDTD method on GPU. Implemented using Nvidia CUDA.
+
+    Args:
+        currentmodelrun (int): Current model run number.
+        modelend (int): Number of last model to run.
+        G (class): Grid class instance - holds essential parameters describing the model.
+
+    Returns:
+        tsolve (float): Time taken to execute solving (seconds)
+        memsolve (int): memory usage on final iteration (bytes)
+    """
+
+    import pycuda.driver as drv
+    from pycuda.compiler import SourceModule
+    drv.init()
+
+    # Suppress nvcc warnings on Windows
+    if sys.platform == 'win32':
+        compiler_opts = ['-w']
+    else:
+        compiler_opts = None
+
+    # Create device handle and context on specifc GPU device (and make it current context)
+    dev = drv.Device(config.cuda['gpus'].deviceID)
+    ctx = dev.make_context()
+
+    # Electric and magnetic field updates - prepare kernels, get kernel functions, and initialise arrays on GPU
+    if config.materials['maxpoles'] == 0:
+        kernel_fields = SourceModule(kernel_template_fields.substitute(REAL=config.dtypes['C_float_or_double'], REAL_OR_COMPLEX=config.dtypes['C_complex'], N_updatecoeffsE=G.updatecoeffsE.size, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NY_MATDISPCOEFFS=1, NX_FIELDS=G.Ex.shape[0], NY_FIELDS=G.Ex.shape[1], NZ_FIELDS=G.Ex.shape[2], NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3], NX_T=1, NY_T=1, NZ_T=1), options=compiler_opts)
+
+    else:   # # If there are any dispersive materials (updates are split into two parts as they require present and updated electric field values).
+        kernel_fields = SourceModule(kernel_template_fields.substitute(REAL=config.dtypes['C_float_or_double'], REAL_OR_COMPLEX=config.dtypes['C_complex'], N_updatecoeffsE=G.updatecoeffsE.size, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NY_MATDISPCOEFFS=G.updatecoeffsdispersive.shape[1], NX_FIELDS=G.Ex.shape[0], NY_FIELDS=G.Ex.shape[1], NZ_FIELDS=G.Ex.shape[2], NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3], NX_T=G.Tx.shape[1], NY_T=G.Tx.shape[2], NZ_T=G.Tx.shape[3]), options=compiler_opts)
+        update_e_dispersive_A_gpu = kernel_fields.get_function("update_e_dispersive_A")
+        update_e_dispersive_B_gpu = kernel_fields.get_function("update_e_dispersive_B")
+        G.gpu_initialise_dispersive_arrays()
+    update_e_gpu = kernel_fields.get_function("update_e")
+    update_h_gpu = kernel_fields.get_function("update_h")
+
+    # Copy material coefficient arrays to constant memory of GPU (must be <64KB) for fields kernels
+    updatecoeffsE = kernel_fields.get_global('updatecoeffsE')[0]
+    updatecoeffsH = kernel_fields.get_global('updatecoeffsH')[0]
+    if G.updatecoeffsE.nbytes + G.updatecoeffsH.nbytes > config.cuda['gpus'].constmem:
+        raise GeneralError('Too many materials in the model to fit onto constant memory of size {} on {} - {} GPU'.format(human_size(config.cuda['gpus'].constmem), config.cuda['gpus'].deviceID, config.cuda['gpus'].name))
+    else:
+        drv.memcpy_htod(updatecoeffsE, G.updatecoeffsE)
+        drv.memcpy_htod(updatecoeffsH, G.updatecoeffsH)
+
+    # Electric and magnetic field updates - set blocks per grid and initialise field arrays on GPU
+    G.gpu_set_blocks_per_grid()
+    G.gpu_initialise_arrays()
+
+    # PML updates
+    if G.pmls:
+        # Prepare kernels
+        kernelelectricfunc = getattr(import_module('gprMax.cuda.pml_updates_electric_' + G.pmlformulation), 'kernels_template_pml_electric_' + G.pmlformulation)
+        kernelmagneticfunc = getattr(import_module('gprMax.cuda.pml_updates_magnetic_' + G.pmlformulation), 'kernels_template_pml_magnetic_' + G.pmlformulation)
+        kernels_pml_electric = SourceModule(kernelelectricfunc.substitute(REAL=config.dtypes['C_float_or_double'], N_updatecoeffsE=G.updatecoeffsE.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NX_FIELDS=G.Ex.shape[0], NY_FIELDS=G.Ex.shape[1], NZ_FIELDS=G.Ex.shape[2], NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3]), options=compiler_opts)
+        kernels_pml_magnetic = SourceModule(kernelmagneticfunc.substitute(REAL=config.dtypes['C_float_or_double'], N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsH.shape[1], NX_FIELDS=G.Hx.shape[0], NY_FIELDS=G.Hx.shape[1], NZ_FIELDS=G.Hx.shape[2], NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3]), options=compiler_opts)
+        # Copy material coefficient arrays to constant memory of GPU (must be <64KB) for PML kernels
+        updatecoeffsE = kernels_pml_electric.get_global('updatecoeffsE')[0]
+        updatecoeffsH = kernels_pml_magnetic.get_global('updatecoeffsH')[0]
+        drv.memcpy_htod(updatecoeffsE, G.updatecoeffsE)
+        drv.memcpy_htod(updatecoeffsH, G.updatecoeffsH)
+        # Set block per grid, initialise arrays on GPU, and get kernel functions
+        for pml in G.pmls:
+            pml.gpu_set_blocks_per_grid(G)
+            pml.gpu_initialise_arrays()
+            pml.gpu_get_update_funcs(kernels_pml_electric, kernels_pml_magnetic)
+
+    # Receivers
+    if G.rxs:
+        # Initialise arrays on GPU
+        rxcoords_gpu, rxs_gpu = gpu_initialise_rx_arrays(G)
+        # Prepare kernel and get kernel function
+
+        kernel_store_outputs = SourceModule(kernel_template_store_outputs.substitute(REAL=config.dtypes['C_float_or_double'], NY_RXCOORDS=3, NX_RXS=len(Rx.gpu_allowableoutputs), NY_RXS=G.iterations, NZ_RXS=len(G.rxs), NX_FIELDS=G.Ex.shape[0], NY_FIELDS=G.Ex.shape[1], NZ_FIELDS=G.Ex.shape[2]), options=compiler_opts)
+        store_outputs_gpu = kernel_store_outputs.get_function("store_outputs")
+
+    # Sources - initialise arrays on GPU, prepare kernel and get kernel functions
+    if G.voltagesources + G.hertziandipoles + G.magneticdipoles:
+        kernel_sources = SourceModule(kernel_template_sources.substitute(REAL=config.dtypes['C_float_or_double'], N_updatecoeffsE=G.updatecoeffsE.size, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NY_SRCINFO=4, NY_SRCWAVES=G.iterations, NX_FIELDS=G.Ex.shape[0], NY_FIELDS=G.Ex.shape[1], NZ_FIELDS=G.Ex.shape[2], NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3]), options=compiler_opts)
+        # Copy material coefficient arrays to constant memory of GPU (must be <64KB) for source kernels
+        updatecoeffsE = kernel_sources.get_global('updatecoeffsE')[0]
+        updatecoeffsH = kernel_sources.get_global('updatecoeffsH')[0]
+        drv.memcpy_htod(updatecoeffsE, G.updatecoeffsE)
+        drv.memcpy_htod(updatecoeffsH, G.updatecoeffsH)
+        if G.hertziandipoles:
+            srcinfo1_hertzian_gpu, srcinfo2_hertzian_gpu, srcwaves_hertzian_gpu = gpu_initialise_src_arrays(G.hertziandipoles, G)
+            update_hertzian_dipole_gpu = kernel_sources.get_function("update_hertzian_dipole")
+        if G.magneticdipoles:
+            srcinfo1_magnetic_gpu, srcinfo2_magnetic_gpu, srcwaves_magnetic_gpu = gpu_initialise_src_arrays(G.magneticdipoles, G)
+            update_magnetic_dipole_gpu = kernel_sources.get_function("update_magnetic_dipole")
+        if G.voltagesources:
+            srcinfo1_voltage_gpu, srcinfo2_voltage_gpu, srcwaves_voltage_gpu = gpu_initialise_src_arrays(G.voltagesources, G)
+            update_voltage_source_gpu = kernel_sources.get_function("update_voltage_source")
+
+    # Snapshots - initialise arrays on GPU, prepare kernel and get kernel functions
+    if G.snapshots:
+        # Initialise arrays on GPU
+        snapEx_gpu, snapEy_gpu, snapEz_gpu, snapHx_gpu, snapHy_gpu, snapHz_gpu = gpu_initialise_snapshot_array(G)
+        # Prepare kernel and get kernel function
+        kernel_store_snapshot = SourceModule(kernel_template_store_snapshot.substitute(REAL=config.dtypes['C_float_or_double'], NX_SNAPS=Snapshot.nx_max, NY_SNAPS=Snapshot.ny_max, NZ_SNAPS=Snapshot.nz_max, NX_FIELDS=G.Ex.shape[0], NY_FIELDS=G.Ex.shape[1], NZ_FIELDS=G.Ex.shape[2]), options=compiler_opts)
+        store_snapshot_gpu = kernel_store_snapshot.get_function("store_snapshot")
+
+    # Iteration loop timer
+    iterstart = drv.Event()
+    iterend = drv.Event()
+    iterstart.record()
+
+    for iteration in tqdm(range(G.iterations), desc='Running simulation, model ' + str(currentmodelrun) + '/' + str(modelend), ncols=get_terminal_width() - 1, file=sys.stdout, disable=not config.general['progressbars']):
+
+        # Get GPU memory usage on final iteration
+        if iteration == G.iterations - 1:
+            memsolve = drv.mem_get_info()[1] - drv.mem_get_info()[0]
+
+        # Store field component values for every receiver
+        if G.rxs:
+            store_outputs_gpu(np.int32(len(G.rxs)), np.int32(iteration),
+                              rxcoords_gpu.gpudata, rxs_gpu.gpudata,
+                              G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata,
+                              G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata,
+                              block=(1, 1, 1), grid=(round32(len(G.rxs)), 1, 1))
+
+        # Store any snapshots
+        for i, snap in enumerate(G.snapshots):
+            if snap.time == iteration + 1:
+                if not config.cuda['snapsgpu2cpu']:
+                    store_snapshot_gpu(np.int32(i), np.int32(snap.xs),
+                                       np.int32(snap.xf), np.int32(snap.ys),
+                                       np.int32(snap.yf), np.int32(snap.zs),
+                                       np.int32(snap.zf), np.int32(snap.dx),
+                                       np.int32(snap.dy), np.int32(snap.dz),
+                                       G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata,
+                                       G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata,
+                                       snapEx_gpu.gpudata, snapEy_gpu.gpudata, snapEz_gpu.gpudata,
+                                       snapHx_gpu.gpudata, snapHy_gpu.gpudata, snapHz_gpu.gpudata,
+                                       block=Snapshot.tpb, grid=Snapshot.bpg)
+                else:
+                    store_snapshot_gpu(np.int32(0), np.int32(snap.xs),
+                                       np.int32(snap.xf), np.int32(snap.ys),
+                                       np.int32(snap.yf), np.int32(snap.zs),
+                                       np.int32(snap.zf), np.int32(snap.dx),
+                                       np.int32(snap.dy), np.int32(snap.dz),
+                                       G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata,
+                                       G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata,
+                                       snapEx_gpu.gpudata, snapEy_gpu.gpudata, snapEz_gpu.gpudata,
+                                       snapHx_gpu.gpudata, snapHy_gpu.gpudata, snapHz_gpu.gpudata,
+                                       block=Snapshot.tpb, grid=Snapshot.bpg)
+                    gpu_get_snapshot_array(snapEx_gpu.get(), snapEy_gpu.get(), snapEz_gpu.get(),
+                                           snapHx_gpu.get(), snapHy_gpu.get(), snapHz_gpu.get(), 0, snap)
+
+        # Update magnetic field components
+        update_h_gpu(np.int32(G.nx), np.int32(G.ny), np.int32(G.nz),
+                     G.ID_gpu.gpudata, G.Hx_gpu.gpudata, G.Hy_gpu.gpudata,
+                     G.Hz_gpu.gpudata, G.Ex_gpu.gpudata, G.Ey_gpu.gpudata,
+                     G.Ez_gpu.gpudata, block=config.cuda['gpus'].tpb, grid=config.cuda['gpus'].bpg)
+
+        # Update magnetic field components with the PML correction
+        for pml in G.pmls:
+            pml.gpu_update_magnetic(G)
+
+        # Update magnetic field components for magetic dipole sources
+        if G.magneticdipoles:
+            update_magnetic_dipole_gpu(np.int32(len(G.magneticdipoles)), np.int32(iteration),
+                                       config.dtypes['float_or_double'](G.dx), config.dtypes['float_or_double'](G.dy), config.dtypes['float_or_double'](G.dz),
+                                       srcinfo1_magnetic_gpu.gpudata, srcinfo2_magnetic_gpu.gpudata,
+                                       srcwaves_magnetic_gpu.gpudata, G.ID_gpu.gpudata,
+                                       G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata,
+                                       block=(1, 1, 1), grid=(round32(len(G.magneticdipoles)), 1, 1))
+
+        # Update electric field components
+        # If all materials are non-dispersive do standard update
+        if config.materials['maxpoles'] == 0:
+            update_e_gpu(np.int32(G.nx), np.int32(G.ny), np.int32(G.nz), G.ID_gpu.gpudata,
+                         G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata,
+                         G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata,
+                         block=config.cuda['gpus'].tpb, grid=config.cuda['gpus'].bpg)
+        # If there are any dispersive materials do 1st part of dispersive update
+        # (it is split into two parts as it requires present and updated electric field values).
+        else:
+            update_e_dispersive_A_gpu(np.int32(G.nx), np.int32(G.ny), np.int32(G.nz),
+                                      np.int32(config.materials['maxpoles']), G.updatecoeffsdispersive_gpu.gpudata,
+                                      G.Tx_gpu.gpudata, G.Ty_gpu.gpudata, G.Tz_gpu.gpudata, G.ID_gpu.gpudata,
+                                      G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata,
+                                      G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata,
+                                      block=config.cuda['gpus'].tpb, grid=config.cuda['gpus'].bpg)
+
+        # Update electric field components with the PML correction
+        for pml in G.pmls:
+            pml.gpu_update_electric(G)
+
+        # Update electric field components for voltage sources
+        if G.voltagesources:
+            update_voltage_source_gpu(np.int32(len(G.voltagesources)), np.int32(iteration),
+                                      config.dtypes['float_or_double'](G.dx), config.dtypes['float_or_double'](G.dy), config.dtypes['float_or_double'](G.dz),
+                                      srcinfo1_voltage_gpu.gpudata, srcinfo2_voltage_gpu.gpudata,
+                                      srcwaves_voltage_gpu.gpudata, G.ID_gpu.gpudata,
+                                      G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata,
+                                      block=(1, 1, 1), grid=(round32(len(G.voltagesources)), 1, 1))
+
+        # Update electric field components for Hertzian dipole sources (update any Hertzian dipole sources last)
+        if G.hertziandipoles:
+            update_hertzian_dipole_gpu(np.int32(len(G.hertziandipoles)), np.int32(iteration),
+                                       config.dtypes['float_or_double'](G.dx), config.dtypes['float_or_double'](G.dy), config.dtypes['float_or_double'](G.dz),
+                                       srcinfo1_hertzian_gpu.gpudata, srcinfo2_hertzian_gpu.gpudata,
+                                       srcwaves_hertzian_gpu.gpudata, G.ID_gpu.gpudata,
+                                       G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata,
+                                       block=(1, 1, 1), grid=(round32(len(G.hertziandipoles)), 1, 1))
+
+        # If there are any dispersive materials do 2nd part of dispersive update (it is split into two parts as it requires present and updated electric field values). Therefore it can only be completely updated after the electric field has been updated by the PML and source updates.
+        if config.materials['maxpoles'] > 0:
+            update_e_dispersive_B_gpu(np.int32(G.nx), np.int32(G.ny), np.int32(G.nz),
+                                      np.int32(config.materials['maxpoles']), G.updatecoeffsdispersive_gpu.gpudata,
+                                      G.Tx_gpu.gpudata, G.Ty_gpu.gpudata, G.Tz_gpu.gpudata, G.ID_gpu.gpudata,
+                                      G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata,
+                                      block=config.cuda['gpus'].tpb, grid=config.cuda['gpus'].bpg)
+
+    # Copy output from receivers array back to correct receiver objects
+    if G.rxs:
+        gpu_get_rx_array(rxs_gpu.get(), rxcoords_gpu.get(), G)
+
+    # Copy data from any snapshots back to correct snapshot objects
+    if G.snapshots and not config.cuda['snapsgpu2cpu']:
+        for i, snap in enumerate(G.snapshots):
+            gpu_get_snapshot_array(snapEx_gpu.get(), snapEy_gpu.get(), snapEz_gpu.get(),
+                                   snapHx_gpu.get(), snapHy_gpu.get(), snapHz_gpu.get(), i, snap)
+
+    iterend.record()
+    iterend.synchronize()
+    tsolve = iterstart.time_till(iterend) * 1e-3
+
+    # Remove context from top of stack and delete
+    ctx.pop()
+    del ctx
+
+    return tsolve, memsolve
+"""