Added ability to write snapshots with GPU solver. Implmented speedup of snapshot writing for CPU solver.

2025-08-04 11:36:52 +08:00 · 2018-07-09 11:15:08 +01:00
--- a/gprMax/model_build_run.py
+++ b/gprMax/model_build_run.py
@@ -31,7 +31,10 @@ import numpy as np
 from terminaltables import AsciiTable
 from tqdm import tqdm

-from gprMax.constants import floattype, cudafloattype, cudacomplextype
+from gprMax.constants import floattype
+from gprMax.constants import complextype
+from gprMax.constants import cudafloattype
+from gprMax.constants import cudacomplextype
 from gprMax.exceptions import GeneralError

 from gprMax.fields_outputs import store_outputs
@@ -48,6 +51,7 @@ from gprMax.fields_updates_gpu import kernels_template_fields

 from gprMax.grid import FDTDGrid
 from gprMax.grid import dispersion_analysis
+
 from gprMax.input_cmds_geometry import process_geometrycmds
 from gprMax.input_cmds_file import process_python_include_code
 from gprMax.input_cmds_file import write_processed_file
@@ -60,12 +64,15 @@ from gprMax.pml import build_pmls
 from gprMax.pml_updates_gpu import kernels_template_pml
 from gprMax.receivers import gpu_initialise_rx_arrays
 from gprMax.receivers import gpu_get_rx_array
+from gprMax.snapshots import Snapshot
+from gprMax.snapshots import gpu_initialise_snapshot_array
+from gprMax.snapshots import gpu_get_snapshot_array
+from gprMax.snapshots_gpu import kernel_template_store_snapshot
 from gprMax.sources import gpu_initialise_src_arrays
 from gprMax.source_updates_gpu import kernels_template_sources
 from gprMax.utilities import get_host_info
 from gprMax.utilities import get_terminal_width
 from gprMax.utilities import human_size
-from gprMax.utilities import memory_usage
 from gprMax.utilities import open_path_file
 from gprMax.utilities import round32
 from gprMax.yee_cell_build_ext import build_electric_components
@@ -153,6 +160,12 @@ def run_model(args, currentmodelrun, modelend, numbermodelruns, inputfile, usern
        print()
        process_multicmds(multicmds, G)

+        # Estimate and check memory (RAM) usage
+        G.memory_estimate_basic()
+        G.memory_check()
+        if G.messages:
+            print('\nTotal memory (RAM) required: ~{}\n'.format(human_size(G.memoryusage)))
+
        # Initialise an array for volumetric material IDs (solid), boolean
        # arrays for specifying materials not to be averaged (rigid),
        # an array for cell edge IDs (ID)
@@ -198,22 +211,22 @@ def run_model(args, currentmodelrun, modelend, numbermodelruns, inputfile, usern
        # N.B. 2D modes are a single cell slice of 3D grid
        if '2D TMx' in G.mode:
            # Ey & Ez components
-            G.ID[1,0,:,:] = 0
-            G.ID[1,1,:,:] = 0
-            G.ID[2,0,:,:] = 0
-            G.ID[2,1,:,:] = 0
+            G.ID[1, 0, :, :] = 0
+            G.ID[1, 1, :, :] = 0
+            G.ID[2, 0, :, :] = 0
+            G.ID[2, 1, :, :] = 0
        elif '2D TMy' in G.mode:
            # Ex & Ez components
-            G.ID[0,:,0,:] = 0
-            G.ID[0,:,1,:] = 0
-            G.ID[2,:,0,:] = 0
-            G.ID[2,:,1,:] = 0
+            G.ID[0, :, 0, :] = 0
+            G.ID[0, :, 1, :] = 0
+            G.ID[2, :, 0, :] = 0
+            G.ID[2, :, 1, :] = 0
        elif '2D TMz' in G.mode:
            # Ex & Ey components
-            G.ID[0,:,:,0] = 0
-            G.ID[0,:,:,1] = 0
-            G.ID[1,:,:,0] = 0
-            G.ID[1,:,:,1] = 0
+            G.ID[0, :, :, 0] = 0
+            G.ID[0, :, :, 1] = 0
+            G.ID[1, :, :, 0] = 0
+            G.ID[1, :, :, 1] = 0

        # Process any voltage sources (that have resistance) to create a new
        # material at the source location
@@ -227,20 +240,24 @@ def run_model(args, currentmodelrun, modelend, numbermodelruns, inputfile, usern
        # there are any dispersive materials
        if Material.maxpoles != 0:
            # Update estimated memory (RAM) usage
-            memestimate = memory_usage(G)
-            # Check if model can be built and/or run on host
-            if memestimate > G.hostinfo['ram']:
-                raise GeneralError('Estimated memory (RAM) required ~{} exceeds {} detected!\n'.format(human_size(memestimate), human_size(G.hostinfo['ram'], a_kilobyte_is_1024_bytes=True)))
-
-            # Check if model can be run on specified GPU if required
-            if G.gpu is not None:
-                if memestimate > G.gpu.totalmem:
-                    raise GeneralError('Estimated memory (RAM) required ~{} exceeds {} detected on specified {} - {} GPU!\n'.format(human_size(memestimate), human_size(G.gpu.totalmem, a_kilobyte_is_1024_bytes=True), G.gpu.deviceID, G.gpu.name))
+            G.memoryusage += int(3 * Material.maxpoles * (G.nx + 1) * (G.ny + 1) * (G.nz + 1) * np.dtype(complextype).itemsize)
+            G.memory_check()
            if G.messages:
-                print('Estimated memory (RAM) required: ~{}'.format(human_size(memestimate)))
+                print('\nTotal memory (RAM) required, updated (dispersive): ~{}\n'.format(human_size(G.memoryusage)))

            G.initialise_dispersive_arrays()

+        # Check there is sufficient memory to store any snapshots
+        if G.snapshots:
+            snapsmemsize = 0
+            for snap in G.snapshots:
+                # 2 x required to account for electric and magnetic fields
+                snapsmemsize += (2 * snap.datasizefield)
+            G.memoryusage += int(snapsmemsize)
+            G.memory_check(snapsmemsize=int(snapsmemsize))
+            if G.messages:
+                print('\nTotal memory (RAM) required, updated (snapshots): ~{}\n'.format(human_size(G.memoryusage)))
+
        # Process complete list of materials - calculate update coefficients,
        # store in arrays, and build text list of materials/properties
        materialsdata = process_materials(G)
@@ -314,10 +331,6 @@ def run_model(args, currentmodelrun, modelend, numbermodelruns, inputfile, usern

    # Run simulation
    else:
-        # Prepare any snapshot files
-        for snapshot in G.snapshots:
-            snapshot.prepare_vtk_imagedata(appendmodelnumber, G)
-
        # Output filename
        inputfileparts = os.path.splitext(os.path.join(G.inputdirectory, G.inputfilename))
        outputfile = inputfileparts[0] + appendmodelnumber + '.out'
@@ -332,8 +345,23 @@ def run_model(args, currentmodelrun, modelend, numbermodelruns, inputfile, usern
        # Write an output file in HDF5 format
        write_hdf5_outputfile(outputfile, G.Ex, G.Ey, G.Ez, G.Hx, G.Hy, G.Hz, G)

+        # Write any snapshots to file
+        if G.snapshots:
+            # Create directory and construct filename from user-supplied name and model run number
+            snapshotdir = os.path.join(G.inputdirectory, os.path.splitext(G.inputfilename)[0] + '_snaps' + appendmodelnumber)
+            if not os.path.exists(snapshotdir):
+                os.mkdir(snapshotdir)
+
+            print()
+            for i, snap in enumerate(G.snapshots):
+                snap.filename = os.path.abspath(os.path.join(snapshotdir, snap.basefilename + '.vti'))
+                pbar = tqdm(total=snap.vtkdatawritesize, leave=True, unit='byte', unit_scale=True, desc='Writing snapshot file {} of {}, {}'.format(i + 1, len(G.snapshots), os.path.split(snap.filename)[1]), ncols=get_terminal_width() - 1, file=sys.stdout, disable=G.tqdmdisable)
+                snap.write_vtk_imagedata(pbar, G)
+                pbar.close()
+            print()
+
        if G.messages:
-            print('Memory (RAM) used: ~{}'.format(human_size(p.memory_info().rss)))
+            print('Total memory (RAM) used: ~{}'.format(human_size(p.memory_info().rss)))
            print('Solving time [HH:MM:SS]: {}'.format(datetime.timedelta(seconds=tsolve)))

    # If geometry information to be reused between model runs then FDTDGrid
@@ -364,13 +392,10 @@ def solve_cpu(currentmodelrun, modelend, G):
        # Store field component values for every receiver and transmission line
        store_outputs(iteration, G.Ex, G.Ey, G.Ez, G.Hx, G.Hy, G.Hz, G)

-        # Write any snapshots to file
-        for i, snap in enumerate(G.snapshots):
+        # Store any snapshots
+        for snap in G.snapshots:
            if snap.time == iteration + 1:
-                snapiters = 36 * (((snap.xf - snap.xs) / snap.dx) * ((snap.yf - snap.ys) / snap.dy) * ((snap.zf - snap.zs) / snap.dz))
-                pbar = tqdm(total=snapiters, leave=False, unit='byte', unit_scale=True, desc='  Writing snapshot file {} of {}, {}'.format(i + 1, len(G.snapshots), os.path.split(snap.filename)[1]), ncols=get_terminal_width() - 1, file=sys.stdout, disable=G.tqdmdisable)
-                snap.write_vtk_imagedata(G.Ex, G.Ey, G.Ez, G.Hx, G.Hy, G.Hz, G, pbar)
-                pbar.close()
+                snap.store(G)

        # Update magnetic field components
        update_magnetic(G.nx, G.ny, G.nz, G.nthreads, G.updatecoeffsH, G.ID, G.Ex, G.Ey, G.Ez, G.Hx, G.Hy, G.Hz)
@@ -453,7 +478,7 @@ def solve_gpu(currentmodelrun, modelend, G):
        drv.memcpy_htod(updatecoeffsE, G.updatecoeffsE)
        drv.memcpy_htod(updatecoeffsH, G.updatecoeffsH)

-    # Electric and magnetic field updates - dispersive materials - get kernel functions
+    # Electric and magnetic field updates - dispersive materials - get kernel functions and initialise array on GPU
    if Material.maxpoles > 0:  # If there are any dispersive materials (updates are split into two parts as they require present and updated electric field values).
        update_e_dispersive_A_gpu = kernels_fields.get_function("update_e_dispersive_A")
        update_e_dispersive_B_gpu = kernels_fields.get_function("update_e_dispersive_B")
@@ -504,6 +529,14 @@ def solve_gpu(currentmodelrun, modelend, G):
            srcinfo1_voltage_gpu, srcinfo2_voltage_gpu, srcwaves_voltage_gpu = gpu_initialise_src_arrays(G.voltagesources, G)
            update_voltage_source_gpu = kernels_sources.get_function("update_voltage_source")

+    # Snapshots - initialise arrays on GPU, prepare kernel and get kernel functions
+    if G.snapshots:
+        # Initialise arrays on GPU
+        snapEx_gpu, snapEy_gpu, snapEz_gpu, snapHx_gpu, snapHy_gpu, snapHz_gpu = gpu_initialise_snapshot_array(G)
+        # Prepare kernel and get kernel function
+        kernel_store_snapshot = SourceModule(kernel_template_store_snapshot.substitute(REAL=cudafloattype, NX_SNAPS=Snapshot.nx_max, NY_SNAPS=Snapshot.ny_max, NZ_SNAPS=Snapshot.nz_max, NX_FIELDS=G.Ex.shape[0], NY_FIELDS=G.Ex.shape[1], NZ_FIELDS=G.Ex.shape[2]))
+        store_snapshot_gpu = kernel_store_snapshot.get_function("store_snapshot")
+
    # Iteration loop timer
    iterstart = drv.Event()
    iterend = drv.Event()
@@ -513,10 +546,34 @@ def solve_gpu(currentmodelrun, modelend, G):

        # Store field component values for every receiver
        if G.rxs:
-            store_outputs_gpu(np.int32(len(G.rxs)), np.int32(iteration), rxcoords_gpu.gpudata, rxs_gpu.gpudata, G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata, G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata, block=(1, 1, 1), grid=(round32(len(G.rxs)), 1, 1))
+            store_outputs_gpu(np.int32(len(G.rxs)), np.int32(iteration),
+                                rxcoords_gpu.gpudata, rxs_gpu.gpudata,
+                                G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata,
+                                G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata,
+                                block=(1, 1, 1), grid=(round32(len(G.rxs)), 1, 1))
+
+        # Store any snapshots
+        for i, snap in enumerate(G.snapshots):
+            if snap.time == iteration + 1:
+                store_snapshot_gpu(np.int32(i), np.int32(snap.xs),
+                                    np.int32(snap.xf), np.int32(snap.ys),
+                                    np.int32(snap.yf), np.int32(snap.zs),
+                                    np.int32(snap.zf), np.int32(snap.dx),
+                                    np.int32(snap.dy), np.int32(snap.dz),
+                                    G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata,
+                                    G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata,
+                                    snapEx_gpu.gpudata, snapEy_gpu.gpudata, snapEz_gpu.gpudata,
+                                    snapHx_gpu.gpudata, snapHy_gpu.gpudata, snapHz_gpu.gpudata,
+                                    block=Snapshot.tpb, grid=Snapshot.bpg)
+                if G.snapsgpu2cpu:
+                    gpu_get_snapshot_array(snapEx_gpu.get(), snapEy_gpu.get(), snapEz_gpu.get(),
+                                            snapHx_gpu.get(), snapHy_gpu.get(), snapHz_gpu.get(), i, snap)

        # Update magnetic field components
-        update_h_gpu(np.int32(G.nx), np.int32(G.ny), np.int32(G.nz), G.ID_gpu.gpudata, G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata, G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata, block=G.tpb, grid=G.bpg)
+        update_h_gpu(np.int32(G.nx), np.int32(G.ny), np.int32(G.nz),
+                        G.ID_gpu.gpudata, G.Hx_gpu.gpudata, G.Hy_gpu.gpudata,
+                        G.Hz_gpu.gpudata, G.Ex_gpu.gpudata, G.Ey_gpu.gpudata,
+                        G.Ez_gpu.gpudata, block=G.tpb, grid=G.bpg)

        # Update magnetic field components with the PML correction
        for pml in G.pmls:
@@ -524,13 +581,29 @@ def solve_gpu(currentmodelrun, modelend, G):

        # Update magnetic field components for magetic dipole sources
        if G.magneticdipoles:
-            update_magnetic_dipole_gpu(np.int32(len(G.magneticdipoles)), np.int32(iteration), floattype(G.dx), floattype(G.dy), floattype(G.dz), srcinfo1_magnetic_gpu.gpudata, srcinfo2_magnetic_gpu.gpudata, srcwaves_magnetic_gpu.gpudata, G.ID_gpu.gpudata, G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata, block=(1, 1, 1), grid=(round32(len(G.magneticdipoles)), 1, 1))
+            update_magnetic_dipole_gpu(np.int32(len(G.magneticdipoles)), np.int32(iteration),
+                                        floattype(G.dx), floattype(G.dy), floattype(G.dz),
+                                        srcinfo1_magnetic_gpu.gpudata, srcinfo2_magnetic_gpu.gpudata,
+                                        srcwaves_magnetic_gpu.gpudata, G.ID_gpu.gpudata,
+                                        G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata,
+                                        block=(1, 1, 1), grid=(round32(len(G.magneticdipoles)), 1, 1))

        # Update electric field components
-        if Material.maxpoles == 0:  # If all materials are non-dispersive do standard update
-            update_e_gpu(np.int32(G.nx), np.int32(G.ny), np.int32(G.nz), G.ID_gpu.gpudata, G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata, G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata, block=G.tpb, grid=G.bpg)
-        else:  # If there are any dispersive materials do 1st part of dispersive update (it is split into two parts as it requires present and updated electric field values).
-            update_e_dispersive_A_gpu(np.int32(G.nx), np.int32(G.ny), np.int32(G.nz), np.int32(Material.maxpoles), G.updatecoeffsdispersive_gpu.gpudata, G.Tx_gpu.gpudata, G.Ty_gpu.gpudata, G.Tz_gpu.gpudata, G.ID_gpu.gpudata, G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata, G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata, block=G.tpb, grid=G.bpg)
+        # If all materials are non-dispersive do standard update
+        if Material.maxpoles == 0:
+            update_e_gpu(np.int32(G.nx), np.int32(G.ny), np.int32(G.nz), G.ID_gpu.gpudata,
+                            G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata,
+                            G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata,
+                            block=G.tpb, grid=G.bpg)
+        # If there are any dispersive materials do 1st part of dispersive update
+        # (it is split into two parts as it requires present and updated electric field values).
+        else:
+            update_e_dispersive_A_gpu(np.int32(G.nx), np.int32(G.ny), np.int32(G.nz),
+                                        np.int32(Material.maxpoles), G.updatecoeffsdispersive_gpu.gpudata,
+                                        G.Tx_gpu.gpudata, G.Ty_gpu.gpudata, G.Tz_gpu.gpudata, G.ID_gpu.gpudata,
+                                        G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata,
+                                        G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata,
+                                        block=G.tpb, grid=G.bpg)

        # Update electric field components with the PML correction
        for pml in G.pmls:
@@ -538,20 +611,40 @@ def solve_gpu(currentmodelrun, modelend, G):

        # Update electric field components for voltage sources
        if G.voltagesources:
-            update_voltage_source_gpu(np.int32(len(G.voltagesources)), np.int32(iteration), floattype(G.dx), floattype(G.dy), floattype(G.dz), srcinfo1_voltage_gpu.gpudata, srcinfo2_voltage_gpu.gpudata, srcwaves_voltage_gpu.gpudata, G.ID_gpu.gpudata, G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata, block=(1, 1, 1), grid=(round32(len(G.voltagesources)), 1, 1))
+            update_voltage_source_gpu(np.int32(len(G.voltagesources)), np.int32(iteration),
+                                        floattype(G.dx), floattype(G.dy), floattype(G.dz),
+                                        srcinfo1_voltage_gpu.gpudata, srcinfo2_voltage_gpu.gpudata,
+                                        srcwaves_voltage_gpu.gpudata, G.ID_gpu.gpudata,
+                                        G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata,
+                                        block=(1, 1, 1), grid=(round32(len(G.voltagesources)), 1, 1))

        # Update electric field components for Hertzian dipole sources (update any Hertzian dipole sources last)
        if G.hertziandipoles:
-            update_hertzian_dipole_gpu(np.int32(len(G.hertziandipoles)), np.int32(iteration), floattype(G.dx), floattype(G.dy), floattype(G.dz), srcinfo1_hertzian_gpu.gpudata, srcinfo2_hertzian_gpu.gpudata, srcwaves_hertzian_gpu.gpudata, G.ID_gpu.gpudata, G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata, block=(1, 1, 1), grid=(round32(len(G.hertziandipoles)), 1, 1))
+            update_hertzian_dipole_gpu(np.int32(len(G.hertziandipoles)), np.int32(iteration),
+                                        floattype(G.dx), floattype(G.dy), floattype(G.dz),
+                                        srcinfo1_hertzian_gpu.gpudata, srcinfo2_hertzian_gpu.gpudata,
+                                        srcwaves_hertzian_gpu.gpudata, G.ID_gpu.gpudata,
+                                        G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata,
+                                        block=(1, 1, 1), grid=(round32(len(G.hertziandipoles)), 1, 1))

        # If there are any dispersive materials do 2nd part of dispersive update (it is split into two parts as it requires present and updated electric field values). Therefore it can only be completely updated after the electric field has been updated by the PML and source updates.
        if Material.maxpoles > 0:
-            update_e_dispersive_B_gpu(np.int32(G.nx), np.int32(G.ny), np.int32(G.nz), np.int32(Material.maxpoles), G.updatecoeffsdispersive_gpu.gpudata, G.Tx_gpu.gpudata, G.Ty_gpu.gpudata, G.Tz_gpu.gpudata, G.ID_gpu.gpudata, G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata, block=G.tpb, grid=G.bpg)
+            update_e_dispersive_B_gpu(np.int32(G.nx), np.int32(G.ny), np.int32(G.nz),
+                                        np.int32(Material.maxpoles), G.updatecoeffsdispersive_gpu.gpudata,
+                                        G.Tx_gpu.gpudata, G.Ty_gpu.gpudata, G.Tz_gpu.gpudata, G.ID_gpu.gpudata,
+                                        G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata,
+                                        block=G.tpb, grid=G.bpg)

    # Copy output from receivers array back to correct receiver objects
    if G.rxs:
        gpu_get_rx_array(rxs_gpu.get(), rxcoords_gpu.get(), G)

+    # Copy data from any snapshots back to correct snapshot objects
+    if G.snapshots and not G.snapsgpu2cpu:
+        for i, snap in enumerate(G.snapshots):
+            gpu_get_snapshot_array(snapEx_gpu.get(), snapEy_gpu.get(), snapEz_gpu.get(),
+                                    snapHx_gpu.get(), snapHy_gpu.get(), snapHz_gpu.get(), i, snap)
+
    iterend.record()
    iterend.synchronize()
    tsolve = iterstart.time_till(iterend) * 1e-3
--- a/gprMax/snapshots.py
+++ b/gprMax/snapshots.py
@@ -23,15 +23,23 @@ from struct import pack
 import numpy as np

 from gprMax.constants import floattype
-from gprMax.grid import Ix
-from gprMax.grid import Iy
-from gprMax.grid import Iz
+from gprMax.snapshots_ext import calculate_snapshot_fields
 from gprMax.utilities import round_value


 class Snapshot(object):
    """Snapshots of the electric and magnetic field values."""

+    # Dimensions of largest requested snapshot
+    nx_max = 0
+    ny_max = 0
+    nz_max = 0
+
+    # GPU - threads per block
+    tpb = (1, 1, 1)
+    # GPU - blocks per grid - set according to largest requested snapshot
+    bpg = None
+
    # Set string for byte order
    if sys.byteorder == 'little':
        byteorder = 'LittleEndian'
@@ -49,8 +57,8 @@ class Snapshot(object):
    def __init__(self, xs=None, ys=None, zs=None, xf=None, yf=None, zf=None, dx=None, dy=None, dz=None, time=None, filename=None):
        """
        Args:
-            xs, xf, ys, yf, zs, zf (float): Extent of the volume.
-            dx, dy, dz (float): Spatial discretisation.
+            xs, xf, ys, yf, zs, zf (int): Extent of the volume in cells.
+            dx, dy, dz (int): Spatial discretisation in cells.
            time (int): Iteration number to take the snapshot on.
            filename (str): Filename to save to.
        """
@@ -64,92 +72,156 @@ class Snapshot(object):
        self.dx = dx
        self.dy = dy
        self.dz = dz
+        self.nx = round_value((self.xf - self.xs) / self.dx)
+        self.ny = round_value((self.yf - self.ys) / self.dy)
+        self.nz = round_value((self.zf - self.zs) / self.dz)
+        self.sx = slice(self.xs, self.xf + self.dx, self.dx)
+        self.sy = slice(self.ys, self.yf + self.dy, self.dy)
+        self.sz = slice(self.zs, self.zf + self.dz, self.dz)
+        self.ncells = self.nx * self.ny * self.nz
+        self.datasizefield = 3 * np.dtype(floattype).itemsize * self.ncells
+        self.vtkdatawritesize = 2 * self.datasizefield + 2 * np.dtype(np.uint32).itemsize
        self.time = time
        self.basefilename = filename

-    def prepare_vtk_imagedata(self, appendmodelnumber, G):
-        """Prepares a VTK ImageData (.vti) file for a snapshot.
+    def store(self, G):
+        """Store (in memory) electric and magnetic field values for snapshot.

        Args:
-            appendmodelnumber (str): Text to append to filename.
            G (class): Grid class instance - holds essential parameters describing the model.
        """

-        # No Python 3 support for VTK at time of writing (03/2015)
-        self.vtk_nx = self.xf - self.xs
-        self.vtk_ny = self.yf - self.ys
-        self.vtk_nz = self.zf - self.zs
+        # Memory views of field arrays to dimensions required for the snapshot
+        Exslice = np.ascontiguousarray(G.Ex[self.sx, self.sy, self.sz])
+        Eyslice = np.ascontiguousarray(G.Ey[self.sx, self.sy, self.sz])
+        Ezslice = np.ascontiguousarray(G.Ez[self.sx, self.sy, self.sz])
+        Hxslice = np.ascontiguousarray(G.Hx[self.sx, self.sy, self.sz])
+        Hyslice = np.ascontiguousarray(G.Hy[self.sx, self.sy, self.sz])
+        Hzslice = np.ascontiguousarray(G.Hz[self.sx, self.sy, self.sz])

-        # Create directory and construct filename from user-supplied name and model run number
-        snapshotdir = os.path.join(G.inputdirectory, os.path.splitext(G.inputfilename)[0] + '_snaps' + appendmodelnumber)
-        if not os.path.exists(snapshotdir):
-            os.mkdir(snapshotdir)
-        self.filename = os.path.abspath(os.path.join(snapshotdir, self.basefilename + '.vti'))
+        # Create arrays to hold the field data for snapshot
+        Exsnap = np.zeros((self.nx, self.ny, self.nz), dtype=floattype)
+        Eysnap = np.zeros((self.nx, self.ny, self.nz), dtype=floattype)
+        Ezsnap = np.zeros((self.nx, self.ny, self.nz), dtype=floattype)
+        Hxsnap = np.zeros((self.nx, self.ny, self.nz), dtype=floattype)
+        Hysnap = np.zeros((self.nx, self.ny, self.nz), dtype=floattype)
+        Hzsnap = np.zeros((self.nx, self.ny, self.nz), dtype=floattype)

-        # Calculate number of cells according to requested sampling
-        self.vtk_xscells = round_value(self.xs / self.dx)
-        self.vtk_xfcells = round_value(self.xf / self.dx)
-        self.vtk_yscells = round_value(self.ys / self.dy)
-        self.vtk_yfcells = round_value(self.yf / self.dy)
-        self.vtk_zscells = round_value(self.zs / self.dz)
-        self.vtk_zfcells = round_value(self.zf / self.dz)
-        vtk_hfield_offset = 3 * np.dtype(floattype).itemsize * (self.vtk_xfcells - self.vtk_xscells) * (self.vtk_yfcells - self.vtk_yscells) * (self.vtk_zfcells - self.vtk_zscells) + np.dtype(np.uint32).itemsize
-        vtk_current_offset = 2 * vtk_hfield_offset
+        # Calculate field values at points (comes from averaging field components in cells)
+        calculate_snapshot_fields(
+            self.nx,
+            self.ny,
+            self.nz,
+            Exslice,
+            Eyslice,
+            Ezslice,
+            Hxslice,
+            Hyslice,
+            Hzslice,
+            Exsnap,
+            Eysnap,
+            Ezsnap,
+            Hxsnap,
+            Hysnap,
+            Hzsnap)
+
+        # Convert to format for Paraview
+        self.electric = np.stack((Exsnap, Eysnap, Ezsnap)).reshape(-1, order='F')
+        self.magnetic = np.stack((Hxsnap, Hysnap, Hzsnap)).reshape(-1, order='F')
+
+    def write_vtk_imagedata(self, pbar, G):
+        """Write snapshot data to a VTK ImageData (.vti) file.
+
+            N.B. No Python 3 support for VTK at time of writing (03/2015)
+
+        Args:
+            pbar (class): Progress bar class instance.
+            G (class): Grid class instance - holds essential parameters describing the model.
+        """
+
+        hfield_offset = 3 * np.dtype(floattype).itemsize * self.ncells + np.dtype(np.uint32).itemsize

        self.filehandle = open(self.filename, 'wb')
        self.filehandle.write('<?xml version="1.0"?>\n'.encode('utf-8'))
        self.filehandle.write('<VTKFile type="ImageData" version="1.0" byte_order="{}">\n'.format(Snapshot.byteorder).encode('utf-8'))
-        self.filehandle.write('<ImageData WholeExtent="{} {} {} {} {} {}" Origin="0 0 0" Spacing="{:.3} {:.3} {:.3}">\n'.format(self.vtk_xscells, self.vtk_xfcells, self.vtk_yscells, self.vtk_yfcells, self.vtk_zscells, self.vtk_zfcells, self.dx * G.dx, self.dy * G.dy, self.dz * G.dz).encode('utf-8'))
-        self.filehandle.write('<Piece Extent="{} {} {} {} {} {}">\n'.format(self.vtk_xscells, self.vtk_xfcells, self.vtk_yscells, self.vtk_yfcells, self.vtk_zscells, self.vtk_zfcells).encode('utf-8'))
-        self.filehandle.write('<CellData Vectors="E-field H-field Current">\n'.encode('utf-8'))
+        self.filehandle.write('<ImageData WholeExtent="{} {} {} {} {} {}" Origin="0 0 0" Spacing="{:.3} {:.3} {:.3}">\n'.format(self.xs, round_value(self.xf / self.dx), self.ys, round_value(self.yf / self.dy), self.zs, round_value(self.zf / self.dz), self.dx * G.dx, self.dy * G.dy, self.dz * G.dz).encode('utf-8'))
+        self.filehandle.write('<Piece Extent="{} {} {} {} {} {}">\n'.format(self.xs, round_value(self.xf / self.dx), self.ys, round_value(self.yf / self.dy), self.zs, round_value(self.zf / self.dz)).encode('utf-8'))
+        self.filehandle.write('<CellData Vectors="E-field H-field">\n'.encode('utf-8'))
        self.filehandle.write('<DataArray type="{}" Name="E-field" NumberOfComponents="3" format="appended" offset="0" />\n'.format(Snapshot.floatname).encode('utf-8'))
-        self.filehandle.write('<DataArray type="{}" Name="H-field" NumberOfComponents="3" format="appended" offset="{}" />\n'.format(Snapshot.floatname, vtk_hfield_offset).encode('utf-8'))
-        self.filehandle.write('<DataArray type="{}" Name="Current" NumberOfComponents="3" format="appended" offset="{}" />\n'.format(Snapshot.floatname, vtk_current_offset).encode('utf-8'))
+        self.filehandle.write('<DataArray type="{}" Name="H-field" NumberOfComponents="3" format="appended" offset="{}" />\n'.format(Snapshot.floatname, hfield_offset).encode('utf-8'))
        self.filehandle.write('</CellData>\n</Piece>\n</ImageData>\n<AppendedData encoding="raw">\n_'.encode('utf-8'))
-        self.filehandle.close()

-    def write_vtk_imagedata(self, Ex, Ey, Ez, Hx, Hy, Hz, G, pbar):
-        """Writes electric and magnetic field values to VTK ImageData (.vti) file.
-
-        Args:
-            Ex, Ey, Ez, Hx, Hy, Hz (memory view): Electric and magnetic field values.
-            G (class): Grid class instance - holds essential parameters describing the model.
-            pbar (class): Progress bar class instance.
-        """
-
-        self.filehandle = open(self.filename, 'ab')
-
-        datasize = 3 * np.dtype(floattype).itemsize * (self.vtk_xfcells - self.vtk_xscells) * (self.vtk_yfcells - self.vtk_yscells) * (self.vtk_zfcells - self.vtk_zscells)
        # Write number of bytes of appended data as UInt32
-        self.filehandle.write(pack('I', datasize))
-        for k in range(self.zs, self.zf, self.dz):
-            for j in range(self.ys, self.yf, self.dy):
-                for i in range(self.xs, self.xf, self.dx):
-                    pbar.update(n=12)
-                    # The electric field component value at a point comes from average of the 4 electric field component values in that cell
-                    self.filehandle.write(pack(Snapshot.floatstring, (Ex[i, j, k] + Ex[i, j + 1, k] + Ex[i, j, k + 1] + Ex[i, j + 1, k + 1]) / 4))
-                    self.filehandle.write(pack(Snapshot.floatstring, (Ey[i, j, k] + Ey[i + 1, j, k] + Ey[i, j, k + 1] + Ey[i + 1, j, k + 1]) / 4))
-                    self.filehandle.write(pack(Snapshot.floatstring, (Ez[i, j, k] + Ez[i + 1, j, k] + Ez[i, j + 1, k] + Ez[i + 1, j + 1, k]) / 4))
+        self.filehandle.write(pack('I', self.datasizefield))
+        pbar.update(n=4)
+        self.electric.tofile(self.filehandle)
+        pbar.update(n=self.datasizefield)

-        self.filehandle.write(pack('I', datasize))
-        for k in range(self.zs, self.zf, self.dz):
-            for j in range(self.ys, self.yf, self.dy):
-                for i in range(self.xs, self.xf, self.dx):
-                    pbar.update(n=12)
-                    # The magnetic field component value at a point comes from average
-                    # of 2 magnetic field component values in that cell and the following cell
-                    self.filehandle.write(pack(Snapshot.floatstring, (Hx[i, j, k] + Hx[i + 1, j, k]) / 2))
-                    self.filehandle.write(pack(Snapshot.floatstring, (Hy[i, j, k] + Hy[i, j + 1, k]) / 2))
-                    self.filehandle.write(pack(Snapshot.floatstring, (Hz[i, j, k] + Hz[i, j, k + 1]) / 2))
-
-        self.filehandle.write(pack('I', datasize))
-        for k in range(self.zs, self.zf, self.dz):
-            for j in range(self.ys, self.yf, self.dy):
-                for i in range(self.xs, self.xf, self.dx):
-                    pbar.update(n=12)
-                    self.filehandle.write(pack(Snapshot.floatstring, Ix(i, j, k, Hx, Hy, Hz, G)))
-                    self.filehandle.write(pack(Snapshot.floatstring, Iy(i, j, k, Hx, Hy, Hz, G)))
-                    self.filehandle.write(pack(Snapshot.floatstring, Iz(i, j, k, Hx, Hy, Hz, G)))
+        # Write number of bytes of appended data as UInt32
+        self.filehandle.write(pack('I', self.datasizefield))
+        pbar.update(n=4)
+        self.magnetic.tofile(self.filehandle)
+        pbar.update(n=self.datasizefield)

        self.filehandle.write('\n</AppendedData>\n</VTKFile>'.encode('utf-8'))
        self.filehandle.close()
+
+
+def gpu_initialise_snapshot_array(G):
+    """Initialise array on GPU for to store field data for snapshots.
+
+    Args:
+        G (class): Grid class instance - holds essential parameters describing the model.
+
+    Returns:
+        snapE*_gpu, snapH*_gpu (float): numpy arrays of snapshot data on GPU.
+    """
+
+    import pycuda.gpuarray as gpuarray
+
+    # Get dimensions of largest requested snapshot
+    for snap in G.snapshots:
+        if snap.nx > Snapshot.nx_max:
+            Snapshot.nx_max = snap.nx
+        if snap.ny > Snapshot.ny_max:
+            Snapshot.ny_max = snap.ny
+        if snap.nz > Snapshot.nz_max:
+            Snapshot.nz_max = snap.nz
+
+    # GPU - blocks per grid - according to largest requested snapshot
+    Snapshot.bpg = (int(np.ceil(((Snapshot.nx_max) * (Snapshot.ny_max) * (Snapshot.nz_max)) / Snapshot.tpb[0])), 1, 1)
+
+    # 4D arrays to store snapshots on GPU, e.g. snapEx(time, x, y, z)
+    snapEx = np.zeros((len(G.snapshots), Snapshot.nx_max, Snapshot.ny_max, Snapshot.nz_max), dtype=floattype)
+    snapEy = np.zeros((len(G.snapshots), Snapshot.nx_max, Snapshot.ny_max, Snapshot.nz_max), dtype=floattype)
+    snapEz = np.zeros((len(G.snapshots), Snapshot.nx_max, Snapshot.ny_max, Snapshot.nz_max), dtype=floattype)
+    snapHx = np.zeros((len(G.snapshots), Snapshot.nx_max, Snapshot.ny_max, Snapshot.nz_max), dtype=floattype)
+    snapHy = np.zeros((len(G.snapshots), Snapshot.nx_max, Snapshot.ny_max, Snapshot.nz_max), dtype=floattype)
+    snapHz = np.zeros((len(G.snapshots), Snapshot.nx_max, Snapshot.ny_max, Snapshot.nz_max), dtype=floattype)
+
+    # Copy arrays to GPU
+    snapEx_gpu = gpuarray.to_gpu(snapEx)
+    snapEy_gpu = gpuarray.to_gpu(snapEy)
+    snapEz_gpu = gpuarray.to_gpu(snapEz)
+    snapHx_gpu = gpuarray.to_gpu(snapHx)
+    snapHy_gpu = gpuarray.to_gpu(snapHy)
+    snapHz_gpu = gpuarray.to_gpu(snapHz)
+
+    return snapEx_gpu, snapEy_gpu, snapEz_gpu, snapHx_gpu, snapHy_gpu, snapHz_gpu
+
+
+def gpu_get_snapshot_array(snapEx_gpu, snapEy_gpu, snapEz_gpu, snapHx_gpu, snapHy_gpu, snapHz_gpu, i, snap):
+    """Copy snapshot array used on GPU back to snapshot objects and store in format for Paraview.
+
+    Args:
+        snapE_gpu, snapH_gpu (float): numpy arrays of snapshot data from GPU.
+        i (int): index for snapshot data on GPU array.
+        snap (class): Snapshot class instance
+    """
+
+    snap.electric = np.stack((snapEx_gpu[i, snap.xs:snap.xf, snap.ys:snap.yf, snap.zs:snap.zf],
+                                    snapEy_gpu[i, snap.xs:snap.xf, snap.ys:snap.yf, snap.zs:snap.zf],
+                                    snapEz_gpu[i, snap.xs:snap.xf, snap.ys:snap.yf, snap.zs:snap.zf])).reshape(-1, order='F')
+    snap.magnetic = np.stack((snapHx_gpu[i, snap.xs:snap.xf, snap.ys:snap.yf, snap.zs:snap.zf],
+                                    snapHy_gpu[i, snap.xs:snap.xf, snap.ys:snap.yf, snap.zs:snap.zf],
+                                    snapHz_gpu[i, snap.xs:snap.xf, snap.ys:snap.yf, snap.zs:snap.zf])).reshape(-1, order='F')
--- a/gprMax/snapshots_ext.pyx
+++ b/gprMax/snapshots_ext.pyx
@@ -0,0 +1,71 @@
+# Copyright (C) 2015-2018: The University of Edinburgh
+#                 Authors: Craig Warren and Antonis Giannopoulos
+#
+# This file is part of gprMax.
+#
+# gprMax is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# gprMax is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with gprMax.  If not, see <http://www.gnu.org/licenses/>.
+
+cimport numpy as np
+
+from gprMax.constants cimport floattype_t
+
+
+cpdef void calculate_snapshot_fields(
+                    int nx,
+                    int ny,
+                    int nz,
+                    floattype_t[:, :, ::1] sliceEx,
+                    floattype_t[:, :, ::1] sliceEy,
+                    floattype_t[:, :, ::1] sliceEz,
+                    floattype_t[:, :, ::1] sliceHx,
+                    floattype_t[:, :, ::1] sliceHy,
+                    floattype_t[:, :, ::1] sliceHz,
+                    floattype_t[:, :, ::1] snapEx,
+                    floattype_t[:, :, ::1] snapEy,
+                    floattype_t[:, :, ::1] snapEz,
+                    floattype_t[:, :, ::1] snapHx,
+                    floattype_t[:, :, ::1] snapHy,
+                    floattype_t[:, :, ::1] snapHz
+            ):
+    """This function calculates electric and magnetic values at points from
+        averaging values in cells
+
+    Args:
+        nx, ny, nz (int): Size of snapshot array
+        sliceEx, sliceEy, sliceEz,
+            sliceHx, sliceHy, sliceHz (memoryview): Access to slices of field arrays
+        snapEx, snapEy, snapEz, snapHx,
+            snapHy, snapHz (memoryview): Access to snapshot arrays
+    """
+
+    cdef Py_ssize_t i, j, k
+
+    for i in range(nx):
+        for j in range(ny):
+            for k in range(nz):
+
+                # The electric field component value at a point comes from
+                # average of the 4 electric field component values in that cell
+                snapEx[i, j, k] = (sliceEx[i, j, k] + sliceEx[i, j + 1, k] +
+                                    sliceEx[i, j, k + 1] + sliceEx[i, j + 1, k + 1]) / 4
+                snapEy[i, j, k] = (sliceEy[i, j, k] + sliceEy[i + 1, j, k] +
+                                    sliceEy[i, j, k + 1] + sliceEy[i + 1, j, k + 1]) / 4
+                snapEz[i, j, k] = (sliceEz[i, j, k] + sliceEz[i + 1, j, k] +
+                                    sliceEz[i, j + 1, k] + sliceEz[i + 1, j + 1, k]) / 4
+
+                # The magnetic field component value at a point comes from average
+                # of 2 magnetic field component values in that cell and the following cell
+                snapHx[i, j, k] = (sliceHx[i, j, k] + sliceHx[i + 1, j, k]) / 2
+                snapHy[i, j, k] = (sliceHy[i, j, k] + sliceHy[i, j + 1, k]) / 2
+                snapHz[i, j, k] = (sliceHz[i, j, k] + sliceHz[i, j, k + 1]) / 2
--- a/gprMax/snapshots_gpu.py
+++ b/gprMax/snapshots_gpu.py
@@ -0,0 +1,79 @@
+# Copyright (C) 2015-2018: The University of Edinburgh
+#                 Authors: Craig Warren and Antonis Giannopoulos
+#
+# This file is part of gprMax.
+#
+# gprMax is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# gprMax is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with gprMax.  If not, see <http://www.gnu.org/licenses/>.
+
+from string import Template
+
+kernel_template_store_snapshot = Template("""
+
+// Macros for converting subscripts to linear index:
+#define INDEX3D_FIELDS(i, j, k) (i)*($NY_FIELDS)*($NZ_FIELDS)+(j)*($NZ_FIELDS)+(k)
+#define INDEX4D_SNAPS(p, i, j, k) (p)*($NX_SNAPS)*($NY_SNAPS)*($NZ_SNAPS)+(i)*($NY_SNAPS)*($NZ_SNAPS)+(j)*($NZ_SNAPS)+(k)
+
+////////////////////
+// Store snapshot //
+////////////////////
+
+__global__ void store_snapshot(int p, int xs, int xf, int ys, int yf, int zs, int zf, int dx, int dy, int dz,
+                                const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey,
+                                const $REAL* __restrict__ Ez, const $REAL* __restrict__ Hx,
+                                const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz,
+                                $REAL *snapEx, $REAL *snapEy, $REAL *snapEz,
+                                $REAL *snapHx, $REAL *snapHy, $REAL *snapHz) {
+
+    //  This function stores field values for a snapshot.
+    //
+    //  Args:
+    //      p: Snapshot number
+    //      xs, xf, ys, yf, xs, xf: Start and finish cell coordinates for snapshot
+    //      dx, dy, dz: Sampling interval in cell coordinates for snapshot
+    //      E, H: Access to field component arrays
+    //      snapEx, snapEy, snapEz, snapHx, snapHy, snapHz: Access to arrays to store snapshots
+
+    // Obtain the linear index corresponding to the current thread
+    int idx = blockIdx.x * blockDim.x + threadIdx.x;
+
+    // Convert the linear index to subscripts for 4D SNAPS array
+    int i = (idx % ($NX_SNAPS * $NY_SNAPS * $NZ_SNAPS)) / ($NY_SNAPS * $NZ_SNAPS);
+    int j = ((idx % ($NX_SNAPS * $NY_SNAPS * $NZ_SNAPS)) % ($NY_SNAPS * $NZ_SNAPS)) / $NZ_SNAPS;
+    int k = ((idx % ($NX_SNAPS * $NY_SNAPS * $NZ_SNAPS)) % ($NY_SNAPS * $NZ_SNAPS)) % $NZ_SNAPS;
+
+    // Subscripts for field arrays
+    int ii, jj, kk;
+
+    if (i >= xs && i < xf && j >= ys && j < yf && k >= zs && k < zf) {
+
+        // Increment subscripts for field array to account for spatial sampling of snapshot
+        ii = (xs + i) * dx;
+        jj = (ys + j) * dy;
+        kk = (zs + k) * dz;
+
+        // The electric field component value at a point comes from an average of
+        // the 4 electric field component values in that cell
+        snapEx[INDEX4D_SNAPS(p,i,j,k)] = (Ex[INDEX3D_FIELDS(ii,jj,kk)] + Ex[INDEX3D_FIELDS(ii,jj+1,kk)] + Ex[INDEX3D_FIELDS(ii,jj,kk+1)] + Ex[INDEX3D_FIELDS(ii,jj+1,kk+1)]) / 4;
+        snapEy[INDEX4D_SNAPS(p,i,j,k)] = (Ey[INDEX3D_FIELDS(ii,jj,kk)] + Ey[INDEX3D_FIELDS(ii+1,jj,kk)] + Ey[INDEX3D_FIELDS(ii,jj,kk+1)] + Ey[INDEX3D_FIELDS(ii+1,jj,kk+1)]) / 4;
+        snapEz[INDEX4D_SNAPS(p,i,j,k)] = (Ez[INDEX3D_FIELDS(ii,jj,kk)] + Ez[INDEX3D_FIELDS(ii+1,jj,kk)] + Ez[INDEX3D_FIELDS(ii,jj+1,kk)] + Ez[INDEX3D_FIELDS(ii+1,jj+1,kk)]) / 4;
+
+        // The magnetic field component value at a point comes from average of
+        // 2 magnetic field component values in that cell and the following cell
+        snapHx[INDEX4D_SNAPS(p,i,j,k)] = (Hx[INDEX3D_FIELDS(ii,jj,kk)] + Hx[INDEX3D_FIELDS(ii+1,jj,kk)]) / 2;
+        snapHy[INDEX4D_SNAPS(p,i,j,k)] = (Hy[INDEX3D_FIELDS(ii,jj,kk)] + Hy[INDEX3D_FIELDS(ii,jj+1,kk)]) / 2;
+        snapHz[INDEX4D_SNAPS(p,i,j,k)] = (Hz[INDEX3D_FIELDS(ii,jj,kk)] + Hz[INDEX3D_FIELDS(ii,jj,kk+1)]) / 2;
+    }
+}
+
+""")