From c7eac5bee3130c79efc7ce543e941867f804875e Mon Sep 17 00:00:00 2001
From: Craig Warren <cwarren@C17776592.local>
Date: Mon, 3 Jun 2019 09:23:14 +0100
Subject: [PATCH 01/13] Updated notes on using rx with an ID and specifying
 field components on GPU.

---
 docs/source/input.rst | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/docs/source/input.rst b/docs/source/input.rst
index b5e9623b..2c75ed22 100644
--- a/docs/source/input.rst
+++ b/docs/source/input.rst
@@ -834,7 +834,8 @@ Allows you to introduce output points into the model. These are locations where
 
 .. note::
 
-    * When the optional parameters ``str1`` and ``str2`` are not given all the electric and magnetic field components will be output with the receiver point.
+    * When the optional parameters ``str1`` and ``str2`` are not given the default electric and magnetic field components (``Ex``, ``Ey``, ``Ez``, ``Hx``, ``Hy``, ``Hz``) will be output with the receiver point.
+    * ``str2`` has no effect if solving using GPU(s), i.e. the default electric and magnetic components (``Ex``, ``Ey``, ``Ez``, ``Hx``, ``Hy``, ``Hz``) will allways be output irrespective of the value of ``str2``.
 
 #rx_array:
 ----------

From 226843b70086f5ea3dc5761998bd52eb2a91feed Mon Sep 17 00:00:00 2001
From: Craig Warren <cwarren@C17776592.local>
Date: Tue, 4 Jun 2019 16:35:11 +0100
Subject: [PATCH 02/13] Added specific conda environment for macOS to get
 around MKL threading bug - use OpenBLAS instead.

---
 conda_env_macOS.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conda_env_macOS.yml b/conda_env_macOS.yml
index 826dd5c0..0d17ce8f 100644
--- a/conda_env_macOS.yml
+++ b/conda_env_macOS.yml
@@ -1,7 +1,7 @@
 # This file may be used to create a conda environment using:
 # $ conda create --name gprMax --file <this file>
 
-name: gprMax
+name: gprMax_openblas
 dependencies:
 - python>3.6
 - blas=*=openblas

From 0048f6c249c56cdc3573cbe6e8e7fe31cb6ba045 Mon Sep 17 00:00:00 2001
From: Craig Warren <cwarren@C17776592.local>
Date: Tue, 4 Jun 2019 16:37:07 +0100
Subject: [PATCH 03/13] Revert conda environment name.

---
 conda_env_macOS.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/conda_env_macOS.yml b/conda_env_macOS.yml
index 0d17ce8f..826dd5c0 100644
--- a/conda_env_macOS.yml
+++ b/conda_env_macOS.yml
@@ -1,7 +1,7 @@
 # This file may be used to create a conda environment using:
 # $ conda create --name gprMax --file <this file>
 
-name: gprMax_openblas
+name: gprMax
 dependencies:
 - python>3.6
 - blas=*=openblas

From 84fbc065fd918cfc0981929f1e8cb4c131733b82 Mon Sep 17 00:00:00 2001
From: Craig Warren <cwarren@C17776592.local>
Date: Fri, 5 Jul 2019 11:45:42 +0100
Subject: [PATCH 04/13] Workaround for MKL threading bug on macOS.

---
 conda_env_macOS.yml | 22 ----------------------
 gprMax/gprMax.py    |  6 ++++++
 2 files changed, 6 insertions(+), 22 deletions(-)
 delete mode 100644 conda_env_macOS.yml

diff --git a/conda_env_macOS.yml b/conda_env_macOS.yml
deleted file mode 100644
index 826dd5c0..00000000
--- a/conda_env_macOS.yml
+++ /dev/null
@@ -1,22 +0,0 @@
-# This file may be used to create a conda environment using:
-# $ conda create --name gprMax --file <this file>
-
-name: gprMax
-dependencies:
-- python>3.6
-- blas=*=openblas
-- colorama
-- cython
-- h5py
-- jupyter
-- matplotlib
-- numpy
-- pip
-- psutil
-- scipy
-
-- pip:
-    - terminaltables
-    - tqdm
-# - mpi4py
-# - pycuda
diff --git a/gprMax/gprMax.py b/gprMax/gprMax.py
index 8538d228..0ebf3f06 100644
--- a/gprMax/gprMax.py
+++ b/gprMax/gprMax.py
@@ -25,6 +25,12 @@ import sys
 
 from enum import Enum
 
+# There is a bug with threading and MKL on macOS
+# (https://github.com/gprMax/gprMax/issues/195) . Setting the MKL threading
+# layer to sequential solves it, but must be done before numpy is imported.
+if sys.platform == 'darwin':
+    os.environ["MKL_THREADING_LAYER"] = 'sequential'
+
 import h5py
 import numpy as np
 

From c9902b2909daaaa18bbd6d4b31a685e25cad4f13 Mon Sep 17 00:00:00 2001
From: Craig Warren <cwarren@C17776592.local>
Date: Wed, 31 Jul 2019 09:41:43 +0100
Subject: [PATCH 05/13] Only initialise field/PML arrays on host when using
 CPU.

---
 gprMax/grid.py            | 12 ++++++------
 gprMax/model_build_run.py | 14 ++++++++------
 gprMax/pml.py             | 25 ++++++++++++++++++-------
 3 files changed, 32 insertions(+), 19 deletions(-)

diff --git a/gprMax/grid.py b/gprMax/grid.py
index 1467c154..8ab8fdb9 100644
--- a/gprMax/grid.py
+++ b/gprMax/grid.py
@@ -254,12 +254,12 @@ class FDTDGrid(Grid):
         import pycuda.gpuarray as gpuarray
 
         self.ID_gpu = gpuarray.to_gpu(self.ID)
-        self.Ex_gpu = gpuarray.to_gpu(self.Ex)
-        self.Ey_gpu = gpuarray.to_gpu(self.Ey)
-        self.Ez_gpu = gpuarray.to_gpu(self.Ez)
-        self.Hx_gpu = gpuarray.to_gpu(self.Hx)
-        self.Hy_gpu = gpuarray.to_gpu(self.Hy)
-        self.Hz_gpu = gpuarray.to_gpu(self.Hz)
+        self.Ex_gpu = gpuarray.to_gpu(np.zeros((self.nx + 1, self.ny + 1, self.nz + 1), dtype=floattype))
+        self.Ey_gpu = gpuarray.to_gpu(np.zeros((self.nx + 1, self.ny + 1, self.nz + 1), dtype=floattype))
+        self.Ez_gpu = gpuarray.to_gpu(np.zeros((self.nx + 1, self.ny + 1, self.nz + 1), dtype=floattype))
+        self.Hx_gpu = gpuarray.to_gpu(np.zeros((self.nx + 1, self.ny + 1, self.nz + 1), dtype=floattype))
+        self.Hy_gpu = gpuarray.to_gpu(np.zeros((self.nx + 1, self.ny + 1, self.nz + 1), dtype=floattype))
+        self.Hz_gpu = gpuarray.to_gpu(np.zeros((self.nx + 1, self.ny + 1, self.nz + 1), dtype=floattype))
 
     def gpu_initialise_dispersive_arrays(self):
         """Initialise dispersive material coefficient arrays on GPU."""
diff --git a/gprMax/model_build_run.py b/gprMax/model_build_run.py
index a21653aa..ba9e7aff 100644
--- a/gprMax/model_build_run.py
+++ b/gprMax/model_build_run.py
@@ -180,7 +180,8 @@ def run_model(args, currentmodelrun, modelend, numbermodelruns, inputfile, usern
         G.initialise_geometry_arrays()
 
         # Initialise arrays for the field components
-        G.initialise_field_arrays()
+        if G.gpu is None:
+            G.initialise_field_arrays()
 
         # Process geometry commands in the order they were given
         process_geometrycmds(geometry, G)
@@ -296,12 +297,13 @@ def run_model(args, currentmodelrun, modelend, numbermodelruns, inputfile, usern
         if G.messages:
             print(Fore.GREEN + '{} {}\n'.format(inputfilestr, '-' * (get_terminal_width() - 1 - len(inputfilestr))) + Style.RESET_ALL)
 
-        # Clear arrays for field components
-        G.initialise_field_arrays()
+        if G.gpu is None:
+            # Clear arrays for field components
+            G.initialise_field_arrays()
 
-        # Clear arrays for fields in PML
-        for pml in G.pmls:
-            pml.initialise_field_arrays()
+            # Clear arrays for fields in PML
+            for pml in G.pmls:
+                pml.initialise_field_arrays()
 
     # Adjust position of simple sources and receivers if required
     if G.srcsteps[0] != 0 or G.srcsteps[1] != 0 or G.srcsteps[2] != 0:
diff --git a/gprMax/pml.py b/gprMax/pml.py
index 7b7a950e..f1124f84 100644
--- a/gprMax/pml.py
+++ b/gprMax/pml.py
@@ -196,7 +196,8 @@ class PML(object):
 
         self.CFS = G.cfs
 
-        self.initialise_field_arrays()
+        if G.gpu is None:
+            self.initialise_field_arrays()
 
     def initialise_field_arrays(self):
         """Initialise arrays to store fields in PML."""
@@ -308,19 +309,31 @@ class PML(object):
 
         import pycuda.gpuarray as gpuarray
 
-        self.EPhi1_gpu = gpuarray.to_gpu(self.EPhi1)
-        self.EPhi2_gpu = gpuarray.to_gpu(self.EPhi2)
         self.ERA_gpu = gpuarray.to_gpu(self.ERA)
         self.ERB_gpu = gpuarray.to_gpu(self.ERB)
         self.ERE_gpu = gpuarray.to_gpu(self.ERE)
         self.ERF_gpu = gpuarray.to_gpu(self.ERF)
-        self.HPhi1_gpu = gpuarray.to_gpu(self.HPhi1)
-        self.HPhi2_gpu = gpuarray.to_gpu(self.HPhi2)
         self.HRA_gpu = gpuarray.to_gpu(self.HRA)
         self.HRB_gpu = gpuarray.to_gpu(self.HRB)
         self.HRE_gpu = gpuarray.to_gpu(self.HRE)
         self.HRF_gpu = gpuarray.to_gpu(self.HRF)
 
+        if self.direction[0] == 'x':
+            self.EPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx + 1, self.ny, self.nz + 1), dtype=floattype))
+            self.EPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx + 1, self.ny + 1, self.nz), dtype=floattype))
+            self.HPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx, self.ny + 1, self.nz), dtype=floattype))
+            self.HPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx, self.ny, self.nz + 1), dtype=floattype))
+        elif self.direction[0] == 'y':
+            self.EPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx, self.ny + 1, self.nz + 1), dtype=floattype))
+            self.EPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx + 1, self.ny + 1, self.nz), dtype=floattype))
+            self.HPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx + 1, self.ny, self.nz), dtype=floattype))
+            self.HPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx, self.ny, self.nz + 1), dtype=floattype))
+        elif self.direction[0] == 'z':
+            self.EPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx, self.ny + 1, self.nz + 1), dtype=floattype))
+            self.EPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx + 1, self.ny, self.nz + 1), dtype=floattype))
+            self.HPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx + 1, self.ny, self.nz), dtype=floattype))
+            self.HPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx, self.ny + 1, self.nz), dtype=floattype))
+
     def gpu_get_update_funcs(self, kernelselectric, kernelsmagnetic):
         """Get update functions from PML kernels.
 
@@ -329,8 +342,6 @@ class PML(object):
             kernelsmagnetic: PyCuda SourceModule containing PML kernels for magnetic updates.
         """
 
-        from pycuda.compiler import SourceModule
-
         self.update_electric_gpu = kernelselectric.get_function('order' + str(len(self.CFS)) + '_' + self.direction)
         self.update_magnetic_gpu = kernelsmagnetic.get_function('order' + str(len(self.CFS)) + '_' + self.direction)
 

From 097bd1e28307b161b976efe7d8ff279ccc8c9310 Mon Sep 17 00:00:00 2001
From: Craig Warren <cwarren@C17776592.local>
Date: Wed, 31 Jul 2019 09:58:08 +0100
Subject: [PATCH 06/13] Updated checking of GPU field array shapes.

---
 gprMax/model_build_run.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/gprMax/model_build_run.py b/gprMax/model_build_run.py
index ba9e7aff..b0f5bc94 100644
--- a/gprMax/model_build_run.py
+++ b/gprMax/model_build_run.py
@@ -503,9 +503,9 @@ def solve_gpu(currentmodelrun, modelend, G):
 
     # Electric and magnetic field updates - prepare kernels, and get kernel functions
     if Material.maxpoles > 0:
-        kernels_fields = SourceModule(kernels_template_fields.substitute(REAL=cudafloattype, COMPLEX=cudacomplextype, N_updatecoeffsE=G.updatecoeffsE.size, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NY_MATDISPCOEFFS=G.updatecoeffsdispersive.shape[1], NX_FIELDS=G.Ex.shape[0], NY_FIELDS=G.Ex.shape[1], NZ_FIELDS=G.Ex.shape[2], NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3], NX_T=G.Tx.shape[1], NY_T=G.Tx.shape[2], NZ_T=G.Tx.shape[3]), options=compiler_opts)
+        kernels_fields = SourceModule(kernels_template_fields.substitute(REAL=cudafloattype, COMPLEX=cudacomplextype, N_updatecoeffsE=G.updatecoeffsE.size, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NY_MATDISPCOEFFS=G.updatecoeffsdispersive.shape[1], NX_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NY_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NZ_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3], NX_T=G.Tx.shape[1], NY_T=G.Tx.shape[2], NZ_T=G.Tx.shape[3]), options=compiler_opts)
     else:   # Set to one any substitutions for dispersive materials
-        kernels_fields = SourceModule(kernels_template_fields.substitute(REAL=cudafloattype, COMPLEX=cudacomplextype, N_updatecoeffsE=G.updatecoeffsE.size, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NY_MATDISPCOEFFS=1, NX_FIELDS=G.Ex.shape[0], NY_FIELDS=G.Ex.shape[1], NZ_FIELDS=G.Ex.shape[2], NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3], NX_T=1, NY_T=1, NZ_T=1), options=compiler_opts)
+        kernels_fields = SourceModule(kernels_template_fields.substitute(REAL=cudafloattype, COMPLEX=cudacomplextype, N_updatecoeffsE=G.updatecoeffsE.size, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NY_MATDISPCOEFFS=1, NX_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NY_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NZ_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3], NX_T=1, NY_T=1, NZ_T=1), options=compiler_opts)
     update_e_gpu = kernels_fields.get_function("update_e")
     update_h_gpu = kernels_fields.get_function("update_h")
 
@@ -535,8 +535,8 @@ def solve_gpu(currentmodelrun, modelend, G):
         kernelelectricfunc = getattr(import_module(pmlmodulelectric), 'kernels_template_pml_electric_' + G.pmlformulation)
         pmlmodulemagnetic = 'gprMax.pml_updates.pml_updates_magnetic_' + G.pmlformulation + '_gpu'
         kernelmagneticfunc = getattr(import_module(pmlmodulemagnetic), 'kernels_template_pml_magnetic_' + G.pmlformulation)
-        kernels_pml_electric = SourceModule(kernelelectricfunc.substitute(REAL=cudafloattype, N_updatecoeffsE=G.updatecoeffsE.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NX_FIELDS=G.Ex.shape[0], NY_FIELDS=G.Ex.shape[1], NZ_FIELDS=G.Ex.shape[2], NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3]), options=compiler_opts)
-        kernels_pml_magnetic = SourceModule(kernelmagneticfunc.substitute(REAL=cudafloattype, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsH.shape[1], NX_FIELDS=G.Hx.shape[0], NY_FIELDS=G.Hx.shape[1], NZ_FIELDS=G.Hx.shape[2], NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3]), options=compiler_opts)
+        kernels_pml_electric = SourceModule(kernelelectricfunc.substitute(REAL=cudafloattype, N_updatecoeffsE=G.updatecoeffsE.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NX_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NY_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NZ_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3]), options=compiler_opts)
+        kernels_pml_magnetic = SourceModule(kernelmagneticfunc.substitute(REAL=cudafloattype, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsH.shape[1], NX_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NY_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NZ_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3]), options=compiler_opts)
         # Copy material coefficient arrays to constant memory of GPU (must be <64KB) for PML kernels
         updatecoeffsE = kernels_pml_electric.get_global('updatecoeffsE')[0]
         updatecoeffsH = kernels_pml_magnetic.get_global('updatecoeffsH')[0]
@@ -553,12 +553,12 @@ def solve_gpu(currentmodelrun, modelend, G):
         # Initialise arrays on GPU
         rxcoords_gpu, rxs_gpu = gpu_initialise_rx_arrays(G)
         # Prepare kernel and get kernel function
-        kernel_store_outputs = SourceModule(kernel_template_store_outputs.substitute(REAL=cudafloattype, NY_RXCOORDS=3, NX_RXS=6, NY_RXS=G.iterations, NZ_RXS=len(G.rxs), NX_FIELDS=G.Ex.shape[0], NY_FIELDS=G.Ex.shape[1], NZ_FIELDS=G.Ex.shape[2]), options=compiler_opts)
+        kernel_store_outputs = SourceModule(kernel_template_store_outputs.substitute(REAL=cudafloattype, NY_RXCOORDS=3, NX_RXS=6, NY_RXS=G.iterations, NZ_RXS=len(G.rxs), NX_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NY_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), (G.nx + 1, G.ny + 1, G.nz + 1)), options=compiler_opts)
         store_outputs_gpu = kernel_store_outputs.get_function("store_outputs")
 
     # Sources - initialise arrays on GPU, prepare kernel and get kernel functions
     if G.voltagesources + G.hertziandipoles + G.magneticdipoles:
-        kernels_sources = SourceModule(kernels_template_sources.substitute(REAL=cudafloattype, N_updatecoeffsE=G.updatecoeffsE.size, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NY_SRCINFO=4, NY_SRCWAVES=G.iterations, NX_FIELDS=G.Ex.shape[0], NY_FIELDS=G.Ex.shape[1], NZ_FIELDS=G.Ex.shape[2], NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3]), options=compiler_opts)
+        kernels_sources = SourceModule(kernels_template_sources.substitute(REAL=cudafloattype, N_updatecoeffsE=G.updatecoeffsE.size, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NY_SRCINFO=4, NY_SRCWAVES=G.iterations, NX_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NY_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NZ_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3]), options=compiler_opts)
         # Copy material coefficient arrays to constant memory of GPU (must be <64KB) for source kernels
         updatecoeffsE = kernels_sources.get_global('updatecoeffsE')[0]
         updatecoeffsH = kernels_sources.get_global('updatecoeffsH')[0]
@@ -579,7 +579,7 @@ def solve_gpu(currentmodelrun, modelend, G):
         # Initialise arrays on GPU
         snapEx_gpu, snapEy_gpu, snapEz_gpu, snapHx_gpu, snapHy_gpu, snapHz_gpu = gpu_initialise_snapshot_array(G)
         # Prepare kernel and get kernel function
-        kernel_store_snapshot = SourceModule(kernel_template_store_snapshot.substitute(REAL=cudafloattype, NX_SNAPS=Snapshot.nx_max, NY_SNAPS=Snapshot.ny_max, NZ_SNAPS=Snapshot.nz_max, NX_FIELDS=G.Ex.shape[0], NY_FIELDS=G.Ex.shape[1], NZ_FIELDS=G.Ex.shape[2]), options=compiler_opts)
+        kernel_store_snapshot = SourceModule(kernel_template_store_snapshot.substitute(REAL=cudafloattype, NX_SNAPS=Snapshot.nx_max, NY_SNAPS=Snapshot.ny_max, NZ_SNAPS=Snapshot.nz_max, NX_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NY_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NZ_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1)), options=compiler_opts)
         store_snapshot_gpu = kernel_store_snapshot.get_function("store_snapshot")
 
     # Iteration loop timer

From 59adcde08b91d92d9f2ce1d3799a3cc7d7c6fc33 Mon Sep 17 00:00:00 2001
From: Craig Warren <cwarren@C17776592.local>
Date: Wed, 31 Jul 2019 09:59:35 +0100
Subject: [PATCH 07/13] Updated checking of GPU field array shapes.

---
 gprMax/model_build_run.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gprMax/model_build_run.py b/gprMax/model_build_run.py
index b0f5bc94..70517b44 100644
--- a/gprMax/model_build_run.py
+++ b/gprMax/model_build_run.py
@@ -553,7 +553,7 @@ def solve_gpu(currentmodelrun, modelend, G):
         # Initialise arrays on GPU
         rxcoords_gpu, rxs_gpu = gpu_initialise_rx_arrays(G)
         # Prepare kernel and get kernel function
-        kernel_store_outputs = SourceModule(kernel_template_store_outputs.substitute(REAL=cudafloattype, NY_RXCOORDS=3, NX_RXS=6, NY_RXS=G.iterations, NZ_RXS=len(G.rxs), NX_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NY_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), (G.nx + 1, G.ny + 1, G.nz + 1)), options=compiler_opts)
+        kernel_store_outputs = SourceModule(kernel_template_store_outputs.substitute(REAL=cudafloattype, NY_RXCOORDS=3, NX_RXS=6, NY_RXS=G.iterations, NZ_RXS=len(G.rxs), NX_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NY_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NZ_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1)), options=compiler_opts)
         store_outputs_gpu = kernel_store_outputs.get_function("store_outputs")
 
     # Sources - initialise arrays on GPU, prepare kernel and get kernel functions

From bad65bd32d79f0f9322d0ba685363f2273f1802d Mon Sep 17 00:00:00 2001
From: Craig Warren <cwarren@C17776592.local>
Date: Wed, 31 Jul 2019 10:05:37 +0100
Subject: [PATCH 08/13] Updated checking of GPU field array shapes.

---
 gprMax/model_build_run.py | 16 ++++++++--------
 gprMax/pml.py             |  2 +-
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/gprMax/model_build_run.py b/gprMax/model_build_run.py
index 70517b44..10c1a8a6 100644
--- a/gprMax/model_build_run.py
+++ b/gprMax/model_build_run.py
@@ -503,9 +503,9 @@ def solve_gpu(currentmodelrun, modelend, G):
 
     # Electric and magnetic field updates - prepare kernels, and get kernel functions
     if Material.maxpoles > 0:
-        kernels_fields = SourceModule(kernels_template_fields.substitute(REAL=cudafloattype, COMPLEX=cudacomplextype, N_updatecoeffsE=G.updatecoeffsE.size, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NY_MATDISPCOEFFS=G.updatecoeffsdispersive.shape[1], NX_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NY_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NZ_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3], NX_T=G.Tx.shape[1], NY_T=G.Tx.shape[2], NZ_T=G.Tx.shape[3]), options=compiler_opts)
+        kernels_fields = SourceModule(kernels_template_fields.substitute(REAL=cudafloattype, COMPLEX=cudacomplextype, N_updatecoeffsE=G.updatecoeffsE.size, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NY_MATDISPCOEFFS=G.updatecoeffsdispersive.shape[1], NX_FIELDS=G.nx + 1, NY_FIELDS=G.ny + 1, NZ_FIELDS=G.nz + 1), NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3], NX_T=G.Tx.shape[1], NY_T=G.Tx.shape[2], NZ_T=G.Tx.shape[3]), options=compiler_opts)
     else:   # Set to one any substitutions for dispersive materials
-        kernels_fields = SourceModule(kernels_template_fields.substitute(REAL=cudafloattype, COMPLEX=cudacomplextype, N_updatecoeffsE=G.updatecoeffsE.size, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NY_MATDISPCOEFFS=1, NX_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NY_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NZ_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3], NX_T=1, NY_T=1, NZ_T=1), options=compiler_opts)
+        kernels_fields = SourceModule(kernels_template_fields.substitute(REAL=cudafloattype, COMPLEX=cudacomplextype, N_updatecoeffsE=G.updatecoeffsE.size, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NY_MATDISPCOEFFS=1, NX_FIELDS=G.nx + 1, NY_FIELDS=G.ny + 1, NZ_FIELDS=G.nz + 1), NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3], NX_T=1, NY_T=1, NZ_T=1), options=compiler_opts)
     update_e_gpu = kernels_fields.get_function("update_e")
     update_h_gpu = kernels_fields.get_function("update_h")
 
@@ -535,8 +535,8 @@ def solve_gpu(currentmodelrun, modelend, G):
         kernelelectricfunc = getattr(import_module(pmlmodulelectric), 'kernels_template_pml_electric_' + G.pmlformulation)
         pmlmodulemagnetic = 'gprMax.pml_updates.pml_updates_magnetic_' + G.pmlformulation + '_gpu'
         kernelmagneticfunc = getattr(import_module(pmlmodulemagnetic), 'kernels_template_pml_magnetic_' + G.pmlformulation)
-        kernels_pml_electric = SourceModule(kernelelectricfunc.substitute(REAL=cudafloattype, N_updatecoeffsE=G.updatecoeffsE.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NX_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NY_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NZ_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3]), options=compiler_opts)
-        kernels_pml_magnetic = SourceModule(kernelmagneticfunc.substitute(REAL=cudafloattype, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsH.shape[1], NX_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NY_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NZ_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3]), options=compiler_opts)
+        kernels_pml_electric = SourceModule(kernelelectricfunc.substitute(REAL=cudafloattype, N_updatecoeffsE=G.updatecoeffsE.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NX_FIELDS=G.nx + 1, NY_FIELDS=G.ny + 1, NZ_FIELDS=G.nz + 1), NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3]), options=compiler_opts)
+        kernels_pml_magnetic = SourceModule(kernelmagneticfunc.substitute(REAL=cudafloattype, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsH.shape[1], NX_FIELDS=G.nx + 1, NY_FIELDS=G.ny + 1, NZ_FIELDS=G.nz + 1), NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3]), options=compiler_opts)
         # Copy material coefficient arrays to constant memory of GPU (must be <64KB) for PML kernels
         updatecoeffsE = kernels_pml_electric.get_global('updatecoeffsE')[0]
         updatecoeffsH = kernels_pml_magnetic.get_global('updatecoeffsH')[0]
@@ -544,21 +544,21 @@ def solve_gpu(currentmodelrun, modelend, G):
         drv.memcpy_htod(updatecoeffsH, G.updatecoeffsH)
         # Set block per grid, initialise arrays on GPU, and get kernel functions
         for pml in G.pmls:
-            pml.gpu_set_blocks_per_grid(G)
             pml.gpu_initialise_arrays()
             pml.gpu_get_update_funcs(kernels_pml_electric, kernels_pml_magnetic)
+            pml.gpu_set_blocks_per_grid(G)
 
     # Receivers
     if G.rxs:
         # Initialise arrays on GPU
         rxcoords_gpu, rxs_gpu = gpu_initialise_rx_arrays(G)
         # Prepare kernel and get kernel function
-        kernel_store_outputs = SourceModule(kernel_template_store_outputs.substitute(REAL=cudafloattype, NY_RXCOORDS=3, NX_RXS=6, NY_RXS=G.iterations, NZ_RXS=len(G.rxs), NX_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NY_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NZ_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1)), options=compiler_opts)
+        kernel_store_outputs = SourceModule(kernel_template_store_outputs.substitute(REAL=cudafloattype, NY_RXCOORDS=3, NX_RXS=6, NY_RXS=G.iterations, NZ_RXS=len(G.rxs), NX_FIELDS=G.nx + 1, NY_FIELDS=G.ny + 1, NZ_FIELDS=G.nz + 1), options=compiler_opts)
         store_outputs_gpu = kernel_store_outputs.get_function("store_outputs")
 
     # Sources - initialise arrays on GPU, prepare kernel and get kernel functions
     if G.voltagesources + G.hertziandipoles + G.magneticdipoles:
-        kernels_sources = SourceModule(kernels_template_sources.substitute(REAL=cudafloattype, N_updatecoeffsE=G.updatecoeffsE.size, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NY_SRCINFO=4, NY_SRCWAVES=G.iterations, NX_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NY_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NZ_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3]), options=compiler_opts)
+        kernels_sources = SourceModule(kernels_template_sources.substitute(REAL=cudafloattype, N_updatecoeffsE=G.updatecoeffsE.size, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NY_SRCINFO=4, NY_SRCWAVES=G.iterations, NX_FIELDS=G.nx + 1, NY_FIELDS=G.ny + 1, NZ_FIELDS=G.nz + 1), NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3]), options=compiler_opts)
         # Copy material coefficient arrays to constant memory of GPU (must be <64KB) for source kernels
         updatecoeffsE = kernels_sources.get_global('updatecoeffsE')[0]
         updatecoeffsH = kernels_sources.get_global('updatecoeffsH')[0]
@@ -579,7 +579,7 @@ def solve_gpu(currentmodelrun, modelend, G):
         # Initialise arrays on GPU
         snapEx_gpu, snapEy_gpu, snapEz_gpu, snapHx_gpu, snapHy_gpu, snapHz_gpu = gpu_initialise_snapshot_array(G)
         # Prepare kernel and get kernel function
-        kernel_store_snapshot = SourceModule(kernel_template_store_snapshot.substitute(REAL=cudafloattype, NX_SNAPS=Snapshot.nx_max, NY_SNAPS=Snapshot.ny_max, NZ_SNAPS=Snapshot.nz_max, NX_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NY_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NZ_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1)), options=compiler_opts)
+        kernel_store_snapshot = SourceModule(kernel_template_store_snapshot.substitute(REAL=cudafloattype, NX_SNAPS=Snapshot.nx_max, NY_SNAPS=Snapshot.ny_max, NZ_SNAPS=Snapshot.nz_max, NX_FIELDS=G.nx + 1, NY_FIELDS=G.ny + 1, NZ_FIELDS=G.nz + 1)), options=compiler_opts)
         store_snapshot_gpu = kernel_store_snapshot.get_function("store_snapshot")
 
     # Iteration loop timer
diff --git a/gprMax/pml.py b/gprMax/pml.py
index f1124f84..4edc5968 100644
--- a/gprMax/pml.py
+++ b/gprMax/pml.py
@@ -302,7 +302,7 @@ class PML(object):
             G (class): Grid class instance - holds essential parameters describing the model.
         """
 
-        self.bpg = (int(np.ceil(((self.EPhi1.shape[1] + 1) * (self.EPhi1.shape[2] + 1) * (self.EPhi1.shape[3] + 1)) / G.tpb[0])), 1, 1)
+        self.bpg = (int(np.ceil(((self.EPhi1_gpu.shape[1] + 1) * (self.EPhi1_gpu.shape[2] + 1) * (self.EPhi1_gpu.shape[3] + 1)) / G.tpb[0])), 1, 1)
 
     def gpu_initialise_arrays(self):
         """Initialise PML field and coefficient arrays on GPU."""

From ed70d0634dd8b1c8b00f33b4e0a3224f4f659e56 Mon Sep 17 00:00:00 2001
From: Craig Warren <cwarren@C17776592.local>
Date: Wed, 31 Jul 2019 10:07:03 +0100
Subject: [PATCH 09/13] Removed extra bracket.

---
 gprMax/model_build_run.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/gprMax/model_build_run.py b/gprMax/model_build_run.py
index 10c1a8a6..71382aaa 100644
--- a/gprMax/model_build_run.py
+++ b/gprMax/model_build_run.py
@@ -503,9 +503,9 @@ def solve_gpu(currentmodelrun, modelend, G):
 
     # Electric and magnetic field updates - prepare kernels, and get kernel functions
     if Material.maxpoles > 0:
-        kernels_fields = SourceModule(kernels_template_fields.substitute(REAL=cudafloattype, COMPLEX=cudacomplextype, N_updatecoeffsE=G.updatecoeffsE.size, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NY_MATDISPCOEFFS=G.updatecoeffsdispersive.shape[1], NX_FIELDS=G.nx + 1, NY_FIELDS=G.ny + 1, NZ_FIELDS=G.nz + 1), NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3], NX_T=G.Tx.shape[1], NY_T=G.Tx.shape[2], NZ_T=G.Tx.shape[3]), options=compiler_opts)
+        kernels_fields = SourceModule(kernels_template_fields.substitute(REAL=cudafloattype, COMPLEX=cudacomplextype, N_updatecoeffsE=G.updatecoeffsE.size, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NY_MATDISPCOEFFS=G.updatecoeffsdispersive.shape[1], NX_FIELDS=G.nx + 1, NY_FIELDS=G.ny + 1, NZ_FIELDS=G.nz + 1, NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3], NX_T=G.Tx.shape[1], NY_T=G.Tx.shape[2], NZ_T=G.Tx.shape[3]), options=compiler_opts)
     else:   # Set to one any substitutions for dispersive materials
-        kernels_fields = SourceModule(kernels_template_fields.substitute(REAL=cudafloattype, COMPLEX=cudacomplextype, N_updatecoeffsE=G.updatecoeffsE.size, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NY_MATDISPCOEFFS=1, NX_FIELDS=G.nx + 1, NY_FIELDS=G.ny + 1, NZ_FIELDS=G.nz + 1), NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3], NX_T=1, NY_T=1, NZ_T=1), options=compiler_opts)
+        kernels_fields = SourceModule(kernels_template_fields.substitute(REAL=cudafloattype, COMPLEX=cudacomplextype, N_updatecoeffsE=G.updatecoeffsE.size, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NY_MATDISPCOEFFS=1, NX_FIELDS=G.nx + 1, NY_FIELDS=G.ny + 1, NZ_FIELDS=G.nz + 1, NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3], NX_T=1, NY_T=1, NZ_T=1), options=compiler_opts)
     update_e_gpu = kernels_fields.get_function("update_e")
     update_h_gpu = kernels_fields.get_function("update_h")
 
@@ -535,8 +535,8 @@ def solve_gpu(currentmodelrun, modelend, G):
         kernelelectricfunc = getattr(import_module(pmlmodulelectric), 'kernels_template_pml_electric_' + G.pmlformulation)
         pmlmodulemagnetic = 'gprMax.pml_updates.pml_updates_magnetic_' + G.pmlformulation + '_gpu'
         kernelmagneticfunc = getattr(import_module(pmlmodulemagnetic), 'kernels_template_pml_magnetic_' + G.pmlformulation)
-        kernels_pml_electric = SourceModule(kernelelectricfunc.substitute(REAL=cudafloattype, N_updatecoeffsE=G.updatecoeffsE.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NX_FIELDS=G.nx + 1, NY_FIELDS=G.ny + 1, NZ_FIELDS=G.nz + 1), NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3]), options=compiler_opts)
-        kernels_pml_magnetic = SourceModule(kernelmagneticfunc.substitute(REAL=cudafloattype, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsH.shape[1], NX_FIELDS=G.nx + 1, NY_FIELDS=G.ny + 1, NZ_FIELDS=G.nz + 1), NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3]), options=compiler_opts)
+        kernels_pml_electric = SourceModule(kernelelectricfunc.substitute(REAL=cudafloattype, N_updatecoeffsE=G.updatecoeffsE.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NX_FIELDS=G.nx + 1, NY_FIELDS=G.ny + 1, NZ_FIELDS=G.nz + 1, NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3]), options=compiler_opts)
+        kernels_pml_magnetic = SourceModule(kernelmagneticfunc.substitute(REAL=cudafloattype, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsH.shape[1], NX_FIELDS=G.nx + 1, NY_FIELDS=G.ny + 1, NZ_FIELDS=G.nz + 1, NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3]), options=compiler_opts)
         # Copy material coefficient arrays to constant memory of GPU (must be <64KB) for PML kernels
         updatecoeffsE = kernels_pml_electric.get_global('updatecoeffsE')[0]
         updatecoeffsH = kernels_pml_magnetic.get_global('updatecoeffsH')[0]
@@ -553,12 +553,12 @@ def solve_gpu(currentmodelrun, modelend, G):
         # Initialise arrays on GPU
         rxcoords_gpu, rxs_gpu = gpu_initialise_rx_arrays(G)
         # Prepare kernel and get kernel function
-        kernel_store_outputs = SourceModule(kernel_template_store_outputs.substitute(REAL=cudafloattype, NY_RXCOORDS=3, NX_RXS=6, NY_RXS=G.iterations, NZ_RXS=len(G.rxs), NX_FIELDS=G.nx + 1, NY_FIELDS=G.ny + 1, NZ_FIELDS=G.nz + 1), options=compiler_opts)
+        kernel_store_outputs = SourceModule(kernel_template_store_outputs.substitute(REAL=cudafloattype, NY_RXCOORDS=3, NX_RXS=6, NY_RXS=G.iterations, NZ_RXS=len(G.rxs), NX_FIELDS=G.nx + 1, NY_FIELDS=G.ny + 1, NZ_FIELDS=G.nz + 1, options=compiler_opts)
         store_outputs_gpu = kernel_store_outputs.get_function("store_outputs")
 
     # Sources - initialise arrays on GPU, prepare kernel and get kernel functions
     if G.voltagesources + G.hertziandipoles + G.magneticdipoles:
-        kernels_sources = SourceModule(kernels_template_sources.substitute(REAL=cudafloattype, N_updatecoeffsE=G.updatecoeffsE.size, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NY_SRCINFO=4, NY_SRCWAVES=G.iterations, NX_FIELDS=G.nx + 1, NY_FIELDS=G.ny + 1, NZ_FIELDS=G.nz + 1), NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3]), options=compiler_opts)
+        kernels_sources = SourceModule(kernels_template_sources.substitute(REAL=cudafloattype, N_updatecoeffsE=G.updatecoeffsE.size, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NY_SRCINFO=4, NY_SRCWAVES=G.iterations, NX_FIELDS=G.nx + 1, NY_FIELDS=G.ny + 1, NZ_FIELDS=G.nz + 1, NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3]), options=compiler_opts)
         # Copy material coefficient arrays to constant memory of GPU (must be <64KB) for source kernels
         updatecoeffsE = kernels_sources.get_global('updatecoeffsE')[0]
         updatecoeffsH = kernels_sources.get_global('updatecoeffsH')[0]
@@ -579,7 +579,7 @@ def solve_gpu(currentmodelrun, modelend, G):
         # Initialise arrays on GPU
         snapEx_gpu, snapEy_gpu, snapEz_gpu, snapHx_gpu, snapHy_gpu, snapHz_gpu = gpu_initialise_snapshot_array(G)
         # Prepare kernel and get kernel function
-        kernel_store_snapshot = SourceModule(kernel_template_store_snapshot.substitute(REAL=cudafloattype, NX_SNAPS=Snapshot.nx_max, NY_SNAPS=Snapshot.ny_max, NZ_SNAPS=Snapshot.nz_max, NX_FIELDS=G.nx + 1, NY_FIELDS=G.ny + 1, NZ_FIELDS=G.nz + 1)), options=compiler_opts)
+        kernel_store_snapshot = SourceModule(kernel_template_store_snapshot.substitute(REAL=cudafloattype, NX_SNAPS=Snapshot.nx_max, NY_SNAPS=Snapshot.ny_max, NZ_SNAPS=Snapshot.nz_max, NX_FIELDS=G.nx + 1, NY_FIELDS=G.ny + 1, NZ_FIELDS=G.nz + 1), options=compiler_opts)
         store_snapshot_gpu = kernel_store_snapshot.get_function("store_snapshot")
 
     # Iteration loop timer

From 6d7f36af799915bb40f3dc27640753c2eb4122e3 Mon Sep 17 00:00:00 2001
From: Craig Warren <cwarren@C17776592.local>
Date: Wed, 31 Jul 2019 10:07:46 +0100
Subject: [PATCH 10/13] Corrected extra bracket.

---
 gprMax/model_build_run.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gprMax/model_build_run.py b/gprMax/model_build_run.py
index 71382aaa..dde372b1 100644
--- a/gprMax/model_build_run.py
+++ b/gprMax/model_build_run.py
@@ -553,7 +553,7 @@ def solve_gpu(currentmodelrun, modelend, G):
         # Initialise arrays on GPU
         rxcoords_gpu, rxs_gpu = gpu_initialise_rx_arrays(G)
         # Prepare kernel and get kernel function
-        kernel_store_outputs = SourceModule(kernel_template_store_outputs.substitute(REAL=cudafloattype, NY_RXCOORDS=3, NX_RXS=6, NY_RXS=G.iterations, NZ_RXS=len(G.rxs), NX_FIELDS=G.nx + 1, NY_FIELDS=G.ny + 1, NZ_FIELDS=G.nz + 1, options=compiler_opts)
+        kernel_store_outputs = SourceModule(kernel_template_store_outputs.substitute(REAL=cudafloattype, NY_RXCOORDS=3, NX_RXS=6, NY_RXS=G.iterations, NZ_RXS=len(G.rxs), NX_FIELDS=G.nx + 1, NY_FIELDS=G.ny + 1, NZ_FIELDS=G.nz + 1), options=compiler_opts)
         store_outputs_gpu = kernel_store_outputs.get_function("store_outputs")
 
     # Sources - initialise arrays on GPU, prepare kernel and get kernel functions

From dc17a9a94b8505b607898555992bb9cd324ab118 Mon Sep 17 00:00:00 2001
From: Craig Warren <cwarren@C17776592.local>
Date: Wed, 31 Jul 2019 10:10:16 +0100
Subject: [PATCH 11/13] Corrected refs to GPU arrays.

---
 gprMax/pml.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gprMax/pml.py b/gprMax/pml.py
index 4edc5968..f0570c08 100644
--- a/gprMax/pml.py
+++ b/gprMax/pml.py
@@ -352,7 +352,7 @@ class PML(object):
             G (class): Grid class instance - holds essential parameters describing the model.
         """
 
-        self.update_electric_gpu(np.int32(self.xs), np.int32(self.xf), np.int32(self.ys), np.int32(self.yf), np.int32(self.zs), np.int32(self.zf), np.int32(self.EPhi1.shape[1]), np.int32(self.EPhi1.shape[2]), np.int32(self.EPhi1.shape[3]), np.int32(self.EPhi2.shape[1]), np.int32(self.EPhi2.shape[2]), np.int32(self.EPhi2.shape[3]), np.int32(self.thickness), G.ID_gpu.gpudata, G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata, G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata, self.EPhi1_gpu.gpudata, self.EPhi2_gpu.gpudata, self.ERA_gpu.gpudata, self.ERB_gpu.gpudata, self.ERE_gpu.gpudata, self.ERF_gpu.gpudata, floattype(self.d), block=G.tpb, grid=self.bpg)
+        self.update_electric_gpu(np.int32(self.xs), np.int32(self.xf), np.int32(self.ys), np.int32(self.yf), np.int32(self.zs), np.int32(self.zf), np.int32(self.EPhi1_gpu.shape[1]), np.int32(self.EPhi1_gpu.shape[2]), np.int32(self.EPhi1_gpu.shape[3]), np.int32(self.EPhi2_gpu.shape[1]), np.int32(self.EPhi2_gpu.shape[2]), np.int32(self.EPhi2_gpu.shape[3]), np.int32(self.thickness), G.ID_gpu.gpudata, G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata, G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata, self.EPhi1_gpu.gpudata, self.EPhi2_gpu.gpudata, self.ERA_gpu.gpudata, self.ERB_gpu.gpudata, self.ERE_gpu.gpudata, self.ERF_gpu.gpudata, floattype(self.d), block=G.tpb, grid=self.bpg)
 
     def gpu_update_magnetic(self, G):
         """This functions updates magnetic field components with the PML correction on the GPU.
@@ -361,7 +361,7 @@ class PML(object):
             G (class): Grid class instance - holds essential parameters describing the model.
         """
 
-        self.update_magnetic_gpu(np.int32(self.xs), np.int32(self.xf), np.int32(self.ys), np.int32(self.yf), np.int32(self.zs), np.int32(self.zf), np.int32(self.HPhi1.shape[1]), np.int32(self.HPhi1.shape[2]), np.int32(self.HPhi1.shape[3]), np.int32(self.HPhi2.shape[1]), np.int32(self.HPhi2.shape[2]), np.int32(self.HPhi2.shape[3]), np.int32(self.thickness), G.ID_gpu.gpudata, G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata, G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata, self.HPhi1_gpu.gpudata, self.HPhi2_gpu.gpudata, self.HRA_gpu.gpudata, self.HRB_gpu.gpudata, self.HRE_gpu.gpudata, self.HRF_gpu.gpudata, floattype(self.d), block=G.tpb, grid=self.bpg)
+        self.update_magnetic_gpu(np.int32(self.xs), np.int32(self.xf), np.int32(self.ys), np.int32(self.yf), np.int32(self.zs), np.int32(self.zf), np.int32(self.HPhi1_gpu.shape[1]), np.int32(self.HPhi1_gpu.shape[2]), np.int32(self.HPhi1_gpu.shape[3]), np.int32(self.HPhi2_gpu.shape[1]), np.int32(self.HPhi2_gpu.shape[2]), np.int32(self.HPhHPhi2_gpui2.shape[3]), np.int32(self.thickness), G.ID_gpu.gpudata, G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata, G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata, self.HPhi1_gpu.gpudata, self.HPhi2_gpu.gpudata, self.HRA_gpu.gpudata, self.HRB_gpu.gpudata, self.HRE_gpu.gpudata, self.HRF_gpu.gpudata, floattype(self.d), block=G.tpb, grid=self.bpg)
 
 
 def build_pmls(G, pbar):

From f070a878d6654d29f89debb0b1df068a3ac89783 Mon Sep 17 00:00:00 2001
From: Craig Warren <cwarren@C17776592.local>
Date: Wed, 31 Jul 2019 10:11:17 +0100
Subject: [PATCH 12/13] Corrected refs to GPU array

---
 gprMax/pml.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gprMax/pml.py b/gprMax/pml.py
index f0570c08..1840ed96 100644
--- a/gprMax/pml.py
+++ b/gprMax/pml.py
@@ -361,7 +361,7 @@ class PML(object):
             G (class): Grid class instance - holds essential parameters describing the model.
         """
 
-        self.update_magnetic_gpu(np.int32(self.xs), np.int32(self.xf), np.int32(self.ys), np.int32(self.yf), np.int32(self.zs), np.int32(self.zf), np.int32(self.HPhi1_gpu.shape[1]), np.int32(self.HPhi1_gpu.shape[2]), np.int32(self.HPhi1_gpu.shape[3]), np.int32(self.HPhi2_gpu.shape[1]), np.int32(self.HPhi2_gpu.shape[2]), np.int32(self.HPhHPhi2_gpui2.shape[3]), np.int32(self.thickness), G.ID_gpu.gpudata, G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata, G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata, self.HPhi1_gpu.gpudata, self.HPhi2_gpu.gpudata, self.HRA_gpu.gpudata, self.HRB_gpu.gpudata, self.HRE_gpu.gpudata, self.HRF_gpu.gpudata, floattype(self.d), block=G.tpb, grid=self.bpg)
+        self.update_magnetic_gpu(np.int32(self.xs), np.int32(self.xf), np.int32(self.ys), np.int32(self.yf), np.int32(self.zs), np.int32(self.zf), np.int32(self.HPhi1_gpu.shape[1]), np.int32(self.HPhi1_gpu.shape[2]), np.int32(self.HPhi1_gpu.shape[3]), np.int32(self.HPhi2_gpu.shape[1]), np.int32(self.HPhi2_gpu.shape[2]), np.int32(self.HPhi2_gpu.shape[3]), np.int32(self.thickness), G.ID_gpu.gpudata, G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata, G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata, self.HPhi1_gpu.gpudata, self.HPhi2_gpu.gpudata, self.HRA_gpu.gpudata, self.HRB_gpu.gpudata, self.HRE_gpu.gpudata, self.HRF_gpu.gpudata, floattype(self.d), block=G.tpb, grid=self.bpg)
 
 
 def build_pmls(G, pbar):

From 1bc75fb811e2704129ccb4df92ff44b47e0f4984 Mon Sep 17 00:00:00 2001
From: Craig Warren <cwarren@C17776592.local>
Date: Wed, 31 Jul 2019 10:14:59 +0100
Subject: [PATCH 13/13] Remove un-needed arguments from store outputs function.

---
 gprMax/fields_outputs.py  | 3 +--
 gprMax/model_build_run.py | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/gprMax/fields_outputs.py b/gprMax/fields_outputs.py
index c70167ba..c60cf3b5 100644
--- a/gprMax/fields_outputs.py
+++ b/gprMax/fields_outputs.py
@@ -89,12 +89,11 @@ __global__ void store_outputs(int NRX, int iteration, const int* __restrict__ rx
 """)
 
 
-def write_hdf5_outputfile(outputfile, Ex, Ey, Ez, Hx, Hy, Hz, G):
+def write_hdf5_outputfile(outputfile, G):
     """Write an output file in HDF5 format.
 
     Args:
         outputfile (str): Name of the output file.
-        Ex, Ey, Ez, Hx, Hy, Hz (memory view): Current electric and magnetic field values.
         G (class): Grid class instance - holds essential parameters describing the model.
     """
 
diff --git a/gprMax/model_build_run.py b/gprMax/model_build_run.py
index dde372b1..451239ee 100644
--- a/gprMax/model_build_run.py
+++ b/gprMax/model_build_run.py
@@ -373,7 +373,7 @@ def run_model(args, currentmodelrun, modelend, numbermodelruns, inputfile, usern
             tsolve, memsolve = solve_gpu(currentmodelrun, modelend, G)
 
         # Write an output file in HDF5 format
-        write_hdf5_outputfile(outputfile, G.Ex, G.Ey, G.Ez, G.Hx, G.Hy, G.Hz, G)
+        write_hdf5_outputfile(outputfile, G)
 
         # Write any snapshots to file
         if G.snapshots: