From c7eac5bee3130c79efc7ce543e941867f804875e Mon Sep 17 00:00:00 2001 From: Craig Warren Date: Mon, 3 Jun 2019 09:23:14 +0100 Subject: [PATCH 01/13] Updated notes on using rx with an ID and specifying field components on GPU. --- docs/source/input.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/source/input.rst b/docs/source/input.rst index b5e9623b..2c75ed22 100644 --- a/docs/source/input.rst +++ b/docs/source/input.rst @@ -834,7 +834,8 @@ Allows you to introduce output points into the model. These are locations where .. note:: - * When the optional parameters ``str1`` and ``str2`` are not given all the electric and magnetic field components will be output with the receiver point. + * When the optional parameters ``str1`` and ``str2`` are not given the default electric and magnetic field components (``Ex``, ``Ey``, ``Ez``, ``Hx``, ``Hy``, ``Hz``) will be output with the receiver point. + * ``str2`` has no effect if solving using GPU(s), i.e. the default electric and magnetic components (``Ex``, ``Ey``, ``Ez``, ``Hx``, ``Hy``, ``Hz``) will allways be output irrespective of the value of ``str2``. #rx_array: ---------- From 226843b70086f5ea3dc5761998bd52eb2a91feed Mon Sep 17 00:00:00 2001 From: Craig Warren Date: Tue, 4 Jun 2019 16:35:11 +0100 Subject: [PATCH 02/13] Added specific conda environment for macOS to get around MKL threading bug - use OpenBLAS instead. --- conda_env_macOS.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conda_env_macOS.yml b/conda_env_macOS.yml index 826dd5c0..0d17ce8f 100644 --- a/conda_env_macOS.yml +++ b/conda_env_macOS.yml @@ -1,7 +1,7 @@ # This file may be used to create a conda environment using: # $ conda create --name gprMax --file -name: gprMax +name: gprMax_openblas dependencies: - python>3.6 - blas=*=openblas From 0048f6c249c56cdc3573cbe6e8e7fe31cb6ba045 Mon Sep 17 00:00:00 2001 From: Craig Warren Date: Tue, 4 Jun 2019 16:37:07 +0100 Subject: [PATCH 03/13] Revert conda environment name. --- conda_env_macOS.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conda_env_macOS.yml b/conda_env_macOS.yml index 0d17ce8f..826dd5c0 100644 --- a/conda_env_macOS.yml +++ b/conda_env_macOS.yml @@ -1,7 +1,7 @@ # This file may be used to create a conda environment using: # $ conda create --name gprMax --file -name: gprMax_openblas +name: gprMax dependencies: - python>3.6 - blas=*=openblas From 84fbc065fd918cfc0981929f1e8cb4c131733b82 Mon Sep 17 00:00:00 2001 From: Craig Warren Date: Fri, 5 Jul 2019 11:45:42 +0100 Subject: [PATCH 04/13] Workaround for MKL threading bug on macOS. --- conda_env_macOS.yml | 22 ---------------------- gprMax/gprMax.py | 6 ++++++ 2 files changed, 6 insertions(+), 22 deletions(-) delete mode 100644 conda_env_macOS.yml diff --git a/conda_env_macOS.yml b/conda_env_macOS.yml deleted file mode 100644 index 826dd5c0..00000000 --- a/conda_env_macOS.yml +++ /dev/null @@ -1,22 +0,0 @@ -# This file may be used to create a conda environment using: -# $ conda create --name gprMax --file - -name: gprMax -dependencies: -- python>3.6 -- blas=*=openblas -- colorama -- cython -- h5py -- jupyter -- matplotlib -- numpy -- pip -- psutil -- scipy - -- pip: - - terminaltables - - tqdm -# - mpi4py -# - pycuda diff --git a/gprMax/gprMax.py b/gprMax/gprMax.py index 8538d228..0ebf3f06 100644 --- a/gprMax/gprMax.py +++ b/gprMax/gprMax.py @@ -25,6 +25,12 @@ import sys from enum import Enum +# There is a bug with threading and MKL on macOS +# (https://github.com/gprMax/gprMax/issues/195) . Setting the MKL threading +# layer to sequential solves it, but must be done before numpy is imported. +if sys.platform == 'darwin': + os.environ["MKL_THREADING_LAYER"] = 'sequential' + import h5py import numpy as np From c9902b2909daaaa18bbd6d4b31a685e25cad4f13 Mon Sep 17 00:00:00 2001 From: Craig Warren Date: Wed, 31 Jul 2019 09:41:43 +0100 Subject: [PATCH 05/13] Only initialise field/PML arrays on host when using CPU. --- gprMax/grid.py | 12 ++++++------ gprMax/model_build_run.py | 14 ++++++++------ gprMax/pml.py | 25 ++++++++++++++++++------- 3 files changed, 32 insertions(+), 19 deletions(-) diff --git a/gprMax/grid.py b/gprMax/grid.py index 1467c154..8ab8fdb9 100644 --- a/gprMax/grid.py +++ b/gprMax/grid.py @@ -254,12 +254,12 @@ class FDTDGrid(Grid): import pycuda.gpuarray as gpuarray self.ID_gpu = gpuarray.to_gpu(self.ID) - self.Ex_gpu = gpuarray.to_gpu(self.Ex) - self.Ey_gpu = gpuarray.to_gpu(self.Ey) - self.Ez_gpu = gpuarray.to_gpu(self.Ez) - self.Hx_gpu = gpuarray.to_gpu(self.Hx) - self.Hy_gpu = gpuarray.to_gpu(self.Hy) - self.Hz_gpu = gpuarray.to_gpu(self.Hz) + self.Ex_gpu = gpuarray.to_gpu(np.zeros((self.nx + 1, self.ny + 1, self.nz + 1), dtype=floattype)) + self.Ey_gpu = gpuarray.to_gpu(np.zeros((self.nx + 1, self.ny + 1, self.nz + 1), dtype=floattype)) + self.Ez_gpu = gpuarray.to_gpu(np.zeros((self.nx + 1, self.ny + 1, self.nz + 1), dtype=floattype)) + self.Hx_gpu = gpuarray.to_gpu(np.zeros((self.nx + 1, self.ny + 1, self.nz + 1), dtype=floattype)) + self.Hy_gpu = gpuarray.to_gpu(np.zeros((self.nx + 1, self.ny + 1, self.nz + 1), dtype=floattype)) + self.Hz_gpu = gpuarray.to_gpu(np.zeros((self.nx + 1, self.ny + 1, self.nz + 1), dtype=floattype)) def gpu_initialise_dispersive_arrays(self): """Initialise dispersive material coefficient arrays on GPU.""" diff --git a/gprMax/model_build_run.py b/gprMax/model_build_run.py index a21653aa..ba9e7aff 100644 --- a/gprMax/model_build_run.py +++ b/gprMax/model_build_run.py @@ -180,7 +180,8 @@ def run_model(args, currentmodelrun, modelend, numbermodelruns, inputfile, usern G.initialise_geometry_arrays() # Initialise arrays for the field components - G.initialise_field_arrays() + if G.gpu is None: + G.initialise_field_arrays() # Process geometry commands in the order they were given process_geometrycmds(geometry, G) @@ -296,12 +297,13 @@ def run_model(args, currentmodelrun, modelend, numbermodelruns, inputfile, usern if G.messages: print(Fore.GREEN + '{} {}\n'.format(inputfilestr, '-' * (get_terminal_width() - 1 - len(inputfilestr))) + Style.RESET_ALL) - # Clear arrays for field components - G.initialise_field_arrays() + if G.gpu is None: + # Clear arrays for field components + G.initialise_field_arrays() - # Clear arrays for fields in PML - for pml in G.pmls: - pml.initialise_field_arrays() + # Clear arrays for fields in PML + for pml in G.pmls: + pml.initialise_field_arrays() # Adjust position of simple sources and receivers if required if G.srcsteps[0] != 0 or G.srcsteps[1] != 0 or G.srcsteps[2] != 0: diff --git a/gprMax/pml.py b/gprMax/pml.py index 7b7a950e..f1124f84 100644 --- a/gprMax/pml.py +++ b/gprMax/pml.py @@ -196,7 +196,8 @@ class PML(object): self.CFS = G.cfs - self.initialise_field_arrays() + if G.gpu is None: + self.initialise_field_arrays() def initialise_field_arrays(self): """Initialise arrays to store fields in PML.""" @@ -308,19 +309,31 @@ class PML(object): import pycuda.gpuarray as gpuarray - self.EPhi1_gpu = gpuarray.to_gpu(self.EPhi1) - self.EPhi2_gpu = gpuarray.to_gpu(self.EPhi2) self.ERA_gpu = gpuarray.to_gpu(self.ERA) self.ERB_gpu = gpuarray.to_gpu(self.ERB) self.ERE_gpu = gpuarray.to_gpu(self.ERE) self.ERF_gpu = gpuarray.to_gpu(self.ERF) - self.HPhi1_gpu = gpuarray.to_gpu(self.HPhi1) - self.HPhi2_gpu = gpuarray.to_gpu(self.HPhi2) self.HRA_gpu = gpuarray.to_gpu(self.HRA) self.HRB_gpu = gpuarray.to_gpu(self.HRB) self.HRE_gpu = gpuarray.to_gpu(self.HRE) self.HRF_gpu = gpuarray.to_gpu(self.HRF) + if self.direction[0] == 'x': + self.EPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx + 1, self.ny, self.nz + 1), dtype=floattype)) + self.EPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx + 1, self.ny + 1, self.nz), dtype=floattype)) + self.HPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx, self.ny + 1, self.nz), dtype=floattype)) + self.HPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx, self.ny, self.nz + 1), dtype=floattype)) + elif self.direction[0] == 'y': + self.EPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx, self.ny + 1, self.nz + 1), dtype=floattype)) + self.EPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx + 1, self.ny + 1, self.nz), dtype=floattype)) + self.HPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx + 1, self.ny, self.nz), dtype=floattype)) + self.HPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx, self.ny, self.nz + 1), dtype=floattype)) + elif self.direction[0] == 'z': + self.EPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx, self.ny + 1, self.nz + 1), dtype=floattype)) + self.EPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx + 1, self.ny, self.nz + 1), dtype=floattype)) + self.HPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx + 1, self.ny, self.nz), dtype=floattype)) + self.HPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx, self.ny + 1, self.nz), dtype=floattype)) + def gpu_get_update_funcs(self, kernelselectric, kernelsmagnetic): """Get update functions from PML kernels. @@ -329,8 +342,6 @@ class PML(object): kernelsmagnetic: PyCuda SourceModule containing PML kernels for magnetic updates. """ - from pycuda.compiler import SourceModule - self.update_electric_gpu = kernelselectric.get_function('order' + str(len(self.CFS)) + '_' + self.direction) self.update_magnetic_gpu = kernelsmagnetic.get_function('order' + str(len(self.CFS)) + '_' + self.direction) From 097bd1e28307b161b976efe7d8ff279ccc8c9310 Mon Sep 17 00:00:00 2001 From: Craig Warren Date: Wed, 31 Jul 2019 09:58:08 +0100 Subject: [PATCH 06/13] Updated checking of GPU field array shapes. --- gprMax/model_build_run.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/gprMax/model_build_run.py b/gprMax/model_build_run.py index ba9e7aff..b0f5bc94 100644 --- a/gprMax/model_build_run.py +++ b/gprMax/model_build_run.py @@ -503,9 +503,9 @@ def solve_gpu(currentmodelrun, modelend, G): # Electric and magnetic field updates - prepare kernels, and get kernel functions if Material.maxpoles > 0: - kernels_fields = SourceModule(kernels_template_fields.substitute(REAL=cudafloattype, COMPLEX=cudacomplextype, N_updatecoeffsE=G.updatecoeffsE.size, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NY_MATDISPCOEFFS=G.updatecoeffsdispersive.shape[1], NX_FIELDS=G.Ex.shape[0], NY_FIELDS=G.Ex.shape[1], NZ_FIELDS=G.Ex.shape[2], NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3], NX_T=G.Tx.shape[1], NY_T=G.Tx.shape[2], NZ_T=G.Tx.shape[3]), options=compiler_opts) + kernels_fields = SourceModule(kernels_template_fields.substitute(REAL=cudafloattype, COMPLEX=cudacomplextype, N_updatecoeffsE=G.updatecoeffsE.size, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NY_MATDISPCOEFFS=G.updatecoeffsdispersive.shape[1], NX_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NY_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NZ_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3], NX_T=G.Tx.shape[1], NY_T=G.Tx.shape[2], NZ_T=G.Tx.shape[3]), options=compiler_opts) else: # Set to one any substitutions for dispersive materials - kernels_fields = SourceModule(kernels_template_fields.substitute(REAL=cudafloattype, COMPLEX=cudacomplextype, N_updatecoeffsE=G.updatecoeffsE.size, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NY_MATDISPCOEFFS=1, NX_FIELDS=G.Ex.shape[0], NY_FIELDS=G.Ex.shape[1], NZ_FIELDS=G.Ex.shape[2], NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3], NX_T=1, NY_T=1, NZ_T=1), options=compiler_opts) + kernels_fields = SourceModule(kernels_template_fields.substitute(REAL=cudafloattype, COMPLEX=cudacomplextype, N_updatecoeffsE=G.updatecoeffsE.size, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NY_MATDISPCOEFFS=1, NX_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NY_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NZ_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3], NX_T=1, NY_T=1, NZ_T=1), options=compiler_opts) update_e_gpu = kernels_fields.get_function("update_e") update_h_gpu = kernels_fields.get_function("update_h") @@ -535,8 +535,8 @@ def solve_gpu(currentmodelrun, modelend, G): kernelelectricfunc = getattr(import_module(pmlmodulelectric), 'kernels_template_pml_electric_' + G.pmlformulation) pmlmodulemagnetic = 'gprMax.pml_updates.pml_updates_magnetic_' + G.pmlformulation + '_gpu' kernelmagneticfunc = getattr(import_module(pmlmodulemagnetic), 'kernels_template_pml_magnetic_' + G.pmlformulation) - kernels_pml_electric = SourceModule(kernelelectricfunc.substitute(REAL=cudafloattype, N_updatecoeffsE=G.updatecoeffsE.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NX_FIELDS=G.Ex.shape[0], NY_FIELDS=G.Ex.shape[1], NZ_FIELDS=G.Ex.shape[2], NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3]), options=compiler_opts) - kernels_pml_magnetic = SourceModule(kernelmagneticfunc.substitute(REAL=cudafloattype, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsH.shape[1], NX_FIELDS=G.Hx.shape[0], NY_FIELDS=G.Hx.shape[1], NZ_FIELDS=G.Hx.shape[2], NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3]), options=compiler_opts) + kernels_pml_electric = SourceModule(kernelelectricfunc.substitute(REAL=cudafloattype, N_updatecoeffsE=G.updatecoeffsE.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NX_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NY_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NZ_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3]), options=compiler_opts) + kernels_pml_magnetic = SourceModule(kernelmagneticfunc.substitute(REAL=cudafloattype, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsH.shape[1], NX_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NY_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NZ_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3]), options=compiler_opts) # Copy material coefficient arrays to constant memory of GPU (must be <64KB) for PML kernels updatecoeffsE = kernels_pml_electric.get_global('updatecoeffsE')[0] updatecoeffsH = kernels_pml_magnetic.get_global('updatecoeffsH')[0] @@ -553,12 +553,12 @@ def solve_gpu(currentmodelrun, modelend, G): # Initialise arrays on GPU rxcoords_gpu, rxs_gpu = gpu_initialise_rx_arrays(G) # Prepare kernel and get kernel function - kernel_store_outputs = SourceModule(kernel_template_store_outputs.substitute(REAL=cudafloattype, NY_RXCOORDS=3, NX_RXS=6, NY_RXS=G.iterations, NZ_RXS=len(G.rxs), NX_FIELDS=G.Ex.shape[0], NY_FIELDS=G.Ex.shape[1], NZ_FIELDS=G.Ex.shape[2]), options=compiler_opts) + kernel_store_outputs = SourceModule(kernel_template_store_outputs.substitute(REAL=cudafloattype, NY_RXCOORDS=3, NX_RXS=6, NY_RXS=G.iterations, NZ_RXS=len(G.rxs), NX_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NY_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), (G.nx + 1, G.ny + 1, G.nz + 1)), options=compiler_opts) store_outputs_gpu = kernel_store_outputs.get_function("store_outputs") # Sources - initialise arrays on GPU, prepare kernel and get kernel functions if G.voltagesources + G.hertziandipoles + G.magneticdipoles: - kernels_sources = SourceModule(kernels_template_sources.substitute(REAL=cudafloattype, N_updatecoeffsE=G.updatecoeffsE.size, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NY_SRCINFO=4, NY_SRCWAVES=G.iterations, NX_FIELDS=G.Ex.shape[0], NY_FIELDS=G.Ex.shape[1], NZ_FIELDS=G.Ex.shape[2], NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3]), options=compiler_opts) + kernels_sources = SourceModule(kernels_template_sources.substitute(REAL=cudafloattype, N_updatecoeffsE=G.updatecoeffsE.size, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NY_SRCINFO=4, NY_SRCWAVES=G.iterations, NX_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NY_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NZ_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3]), options=compiler_opts) # Copy material coefficient arrays to constant memory of GPU (must be <64KB) for source kernels updatecoeffsE = kernels_sources.get_global('updatecoeffsE')[0] updatecoeffsH = kernels_sources.get_global('updatecoeffsH')[0] @@ -579,7 +579,7 @@ def solve_gpu(currentmodelrun, modelend, G): # Initialise arrays on GPU snapEx_gpu, snapEy_gpu, snapEz_gpu, snapHx_gpu, snapHy_gpu, snapHz_gpu = gpu_initialise_snapshot_array(G) # Prepare kernel and get kernel function - kernel_store_snapshot = SourceModule(kernel_template_store_snapshot.substitute(REAL=cudafloattype, NX_SNAPS=Snapshot.nx_max, NY_SNAPS=Snapshot.ny_max, NZ_SNAPS=Snapshot.nz_max, NX_FIELDS=G.Ex.shape[0], NY_FIELDS=G.Ex.shape[1], NZ_FIELDS=G.Ex.shape[2]), options=compiler_opts) + kernel_store_snapshot = SourceModule(kernel_template_store_snapshot.substitute(REAL=cudafloattype, NX_SNAPS=Snapshot.nx_max, NY_SNAPS=Snapshot.ny_max, NZ_SNAPS=Snapshot.nz_max, NX_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NY_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NZ_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1)), options=compiler_opts) store_snapshot_gpu = kernel_store_snapshot.get_function("store_snapshot") # Iteration loop timer From 59adcde08b91d92d9f2ce1d3799a3cc7d7c6fc33 Mon Sep 17 00:00:00 2001 From: Craig Warren Date: Wed, 31 Jul 2019 09:59:35 +0100 Subject: [PATCH 07/13] Updated checking of GPU field array shapes. --- gprMax/model_build_run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gprMax/model_build_run.py b/gprMax/model_build_run.py index b0f5bc94..70517b44 100644 --- a/gprMax/model_build_run.py +++ b/gprMax/model_build_run.py @@ -553,7 +553,7 @@ def solve_gpu(currentmodelrun, modelend, G): # Initialise arrays on GPU rxcoords_gpu, rxs_gpu = gpu_initialise_rx_arrays(G) # Prepare kernel and get kernel function - kernel_store_outputs = SourceModule(kernel_template_store_outputs.substitute(REAL=cudafloattype, NY_RXCOORDS=3, NX_RXS=6, NY_RXS=G.iterations, NZ_RXS=len(G.rxs), NX_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NY_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), (G.nx + 1, G.ny + 1, G.nz + 1)), options=compiler_opts) + kernel_store_outputs = SourceModule(kernel_template_store_outputs.substitute(REAL=cudafloattype, NY_RXCOORDS=3, NX_RXS=6, NY_RXS=G.iterations, NZ_RXS=len(G.rxs), NX_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NY_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NZ_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1)), options=compiler_opts) store_outputs_gpu = kernel_store_outputs.get_function("store_outputs") # Sources - initialise arrays on GPU, prepare kernel and get kernel functions From bad65bd32d79f0f9322d0ba685363f2273f1802d Mon Sep 17 00:00:00 2001 From: Craig Warren Date: Wed, 31 Jul 2019 10:05:37 +0100 Subject: [PATCH 08/13] Updated checking of GPU field array shapes. --- gprMax/model_build_run.py | 16 ++++++++-------- gprMax/pml.py | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/gprMax/model_build_run.py b/gprMax/model_build_run.py index 70517b44..10c1a8a6 100644 --- a/gprMax/model_build_run.py +++ b/gprMax/model_build_run.py @@ -503,9 +503,9 @@ def solve_gpu(currentmodelrun, modelend, G): # Electric and magnetic field updates - prepare kernels, and get kernel functions if Material.maxpoles > 0: - kernels_fields = SourceModule(kernels_template_fields.substitute(REAL=cudafloattype, COMPLEX=cudacomplextype, N_updatecoeffsE=G.updatecoeffsE.size, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NY_MATDISPCOEFFS=G.updatecoeffsdispersive.shape[1], NX_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NY_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NZ_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3], NX_T=G.Tx.shape[1], NY_T=G.Tx.shape[2], NZ_T=G.Tx.shape[3]), options=compiler_opts) + kernels_fields = SourceModule(kernels_template_fields.substitute(REAL=cudafloattype, COMPLEX=cudacomplextype, N_updatecoeffsE=G.updatecoeffsE.size, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NY_MATDISPCOEFFS=G.updatecoeffsdispersive.shape[1], NX_FIELDS=G.nx + 1, NY_FIELDS=G.ny + 1, NZ_FIELDS=G.nz + 1), NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3], NX_T=G.Tx.shape[1], NY_T=G.Tx.shape[2], NZ_T=G.Tx.shape[3]), options=compiler_opts) else: # Set to one any substitutions for dispersive materials - kernels_fields = SourceModule(kernels_template_fields.substitute(REAL=cudafloattype, COMPLEX=cudacomplextype, N_updatecoeffsE=G.updatecoeffsE.size, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NY_MATDISPCOEFFS=1, NX_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NY_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NZ_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3], NX_T=1, NY_T=1, NZ_T=1), options=compiler_opts) + kernels_fields = SourceModule(kernels_template_fields.substitute(REAL=cudafloattype, COMPLEX=cudacomplextype, N_updatecoeffsE=G.updatecoeffsE.size, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NY_MATDISPCOEFFS=1, NX_FIELDS=G.nx + 1, NY_FIELDS=G.ny + 1, NZ_FIELDS=G.nz + 1), NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3], NX_T=1, NY_T=1, NZ_T=1), options=compiler_opts) update_e_gpu = kernels_fields.get_function("update_e") update_h_gpu = kernels_fields.get_function("update_h") @@ -535,8 +535,8 @@ def solve_gpu(currentmodelrun, modelend, G): kernelelectricfunc = getattr(import_module(pmlmodulelectric), 'kernels_template_pml_electric_' + G.pmlformulation) pmlmodulemagnetic = 'gprMax.pml_updates.pml_updates_magnetic_' + G.pmlformulation + '_gpu' kernelmagneticfunc = getattr(import_module(pmlmodulemagnetic), 'kernels_template_pml_magnetic_' + G.pmlformulation) - kernels_pml_electric = SourceModule(kernelelectricfunc.substitute(REAL=cudafloattype, N_updatecoeffsE=G.updatecoeffsE.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NX_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NY_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NZ_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3]), options=compiler_opts) - kernels_pml_magnetic = SourceModule(kernelmagneticfunc.substitute(REAL=cudafloattype, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsH.shape[1], NX_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NY_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NZ_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3]), options=compiler_opts) + kernels_pml_electric = SourceModule(kernelelectricfunc.substitute(REAL=cudafloattype, N_updatecoeffsE=G.updatecoeffsE.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NX_FIELDS=G.nx + 1, NY_FIELDS=G.ny + 1, NZ_FIELDS=G.nz + 1), NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3]), options=compiler_opts) + kernels_pml_magnetic = SourceModule(kernelmagneticfunc.substitute(REAL=cudafloattype, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsH.shape[1], NX_FIELDS=G.nx + 1, NY_FIELDS=G.ny + 1, NZ_FIELDS=G.nz + 1), NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3]), options=compiler_opts) # Copy material coefficient arrays to constant memory of GPU (must be <64KB) for PML kernels updatecoeffsE = kernels_pml_electric.get_global('updatecoeffsE')[0] updatecoeffsH = kernels_pml_magnetic.get_global('updatecoeffsH')[0] @@ -544,21 +544,21 @@ def solve_gpu(currentmodelrun, modelend, G): drv.memcpy_htod(updatecoeffsH, G.updatecoeffsH) # Set block per grid, initialise arrays on GPU, and get kernel functions for pml in G.pmls: - pml.gpu_set_blocks_per_grid(G) pml.gpu_initialise_arrays() pml.gpu_get_update_funcs(kernels_pml_electric, kernels_pml_magnetic) + pml.gpu_set_blocks_per_grid(G) # Receivers if G.rxs: # Initialise arrays on GPU rxcoords_gpu, rxs_gpu = gpu_initialise_rx_arrays(G) # Prepare kernel and get kernel function - kernel_store_outputs = SourceModule(kernel_template_store_outputs.substitute(REAL=cudafloattype, NY_RXCOORDS=3, NX_RXS=6, NY_RXS=G.iterations, NZ_RXS=len(G.rxs), NX_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NY_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NZ_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1)), options=compiler_opts) + kernel_store_outputs = SourceModule(kernel_template_store_outputs.substitute(REAL=cudafloattype, NY_RXCOORDS=3, NX_RXS=6, NY_RXS=G.iterations, NZ_RXS=len(G.rxs), NX_FIELDS=G.nx + 1, NY_FIELDS=G.ny + 1, NZ_FIELDS=G.nz + 1), options=compiler_opts) store_outputs_gpu = kernel_store_outputs.get_function("store_outputs") # Sources - initialise arrays on GPU, prepare kernel and get kernel functions if G.voltagesources + G.hertziandipoles + G.magneticdipoles: - kernels_sources = SourceModule(kernels_template_sources.substitute(REAL=cudafloattype, N_updatecoeffsE=G.updatecoeffsE.size, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NY_SRCINFO=4, NY_SRCWAVES=G.iterations, NX_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NY_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NZ_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3]), options=compiler_opts) + kernels_sources = SourceModule(kernels_template_sources.substitute(REAL=cudafloattype, N_updatecoeffsE=G.updatecoeffsE.size, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NY_SRCINFO=4, NY_SRCWAVES=G.iterations, NX_FIELDS=G.nx + 1, NY_FIELDS=G.ny + 1, NZ_FIELDS=G.nz + 1), NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3]), options=compiler_opts) # Copy material coefficient arrays to constant memory of GPU (must be <64KB) for source kernels updatecoeffsE = kernels_sources.get_global('updatecoeffsE')[0] updatecoeffsH = kernels_sources.get_global('updatecoeffsH')[0] @@ -579,7 +579,7 @@ def solve_gpu(currentmodelrun, modelend, G): # Initialise arrays on GPU snapEx_gpu, snapEy_gpu, snapEz_gpu, snapHx_gpu, snapHy_gpu, snapHz_gpu = gpu_initialise_snapshot_array(G) # Prepare kernel and get kernel function - kernel_store_snapshot = SourceModule(kernel_template_store_snapshot.substitute(REAL=cudafloattype, NX_SNAPS=Snapshot.nx_max, NY_SNAPS=Snapshot.ny_max, NZ_SNAPS=Snapshot.nz_max, NX_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NY_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NZ_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1)), options=compiler_opts) + kernel_store_snapshot = SourceModule(kernel_template_store_snapshot.substitute(REAL=cudafloattype, NX_SNAPS=Snapshot.nx_max, NY_SNAPS=Snapshot.ny_max, NZ_SNAPS=Snapshot.nz_max, NX_FIELDS=G.nx + 1, NY_FIELDS=G.ny + 1, NZ_FIELDS=G.nz + 1)), options=compiler_opts) store_snapshot_gpu = kernel_store_snapshot.get_function("store_snapshot") # Iteration loop timer diff --git a/gprMax/pml.py b/gprMax/pml.py index f1124f84..4edc5968 100644 --- a/gprMax/pml.py +++ b/gprMax/pml.py @@ -302,7 +302,7 @@ class PML(object): G (class): Grid class instance - holds essential parameters describing the model. """ - self.bpg = (int(np.ceil(((self.EPhi1.shape[1] + 1) * (self.EPhi1.shape[2] + 1) * (self.EPhi1.shape[3] + 1)) / G.tpb[0])), 1, 1) + self.bpg = (int(np.ceil(((self.EPhi1_gpu.shape[1] + 1) * (self.EPhi1_gpu.shape[2] + 1) * (self.EPhi1_gpu.shape[3] + 1)) / G.tpb[0])), 1, 1) def gpu_initialise_arrays(self): """Initialise PML field and coefficient arrays on GPU.""" From ed70d0634dd8b1c8b00f33b4e0a3224f4f659e56 Mon Sep 17 00:00:00 2001 From: Craig Warren Date: Wed, 31 Jul 2019 10:07:03 +0100 Subject: [PATCH 09/13] Removed extra bracket. --- gprMax/model_build_run.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/gprMax/model_build_run.py b/gprMax/model_build_run.py index 10c1a8a6..71382aaa 100644 --- a/gprMax/model_build_run.py +++ b/gprMax/model_build_run.py @@ -503,9 +503,9 @@ def solve_gpu(currentmodelrun, modelend, G): # Electric and magnetic field updates - prepare kernels, and get kernel functions if Material.maxpoles > 0: - kernels_fields = SourceModule(kernels_template_fields.substitute(REAL=cudafloattype, COMPLEX=cudacomplextype, N_updatecoeffsE=G.updatecoeffsE.size, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NY_MATDISPCOEFFS=G.updatecoeffsdispersive.shape[1], NX_FIELDS=G.nx + 1, NY_FIELDS=G.ny + 1, NZ_FIELDS=G.nz + 1), NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3], NX_T=G.Tx.shape[1], NY_T=G.Tx.shape[2], NZ_T=G.Tx.shape[3]), options=compiler_opts) + kernels_fields = SourceModule(kernels_template_fields.substitute(REAL=cudafloattype, COMPLEX=cudacomplextype, N_updatecoeffsE=G.updatecoeffsE.size, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NY_MATDISPCOEFFS=G.updatecoeffsdispersive.shape[1], NX_FIELDS=G.nx + 1, NY_FIELDS=G.ny + 1, NZ_FIELDS=G.nz + 1, NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3], NX_T=G.Tx.shape[1], NY_T=G.Tx.shape[2], NZ_T=G.Tx.shape[3]), options=compiler_opts) else: # Set to one any substitutions for dispersive materials - kernels_fields = SourceModule(kernels_template_fields.substitute(REAL=cudafloattype, COMPLEX=cudacomplextype, N_updatecoeffsE=G.updatecoeffsE.size, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NY_MATDISPCOEFFS=1, NX_FIELDS=G.nx + 1, NY_FIELDS=G.ny + 1, NZ_FIELDS=G.nz + 1), NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3], NX_T=1, NY_T=1, NZ_T=1), options=compiler_opts) + kernels_fields = SourceModule(kernels_template_fields.substitute(REAL=cudafloattype, COMPLEX=cudacomplextype, N_updatecoeffsE=G.updatecoeffsE.size, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NY_MATDISPCOEFFS=1, NX_FIELDS=G.nx + 1, NY_FIELDS=G.ny + 1, NZ_FIELDS=G.nz + 1, NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3], NX_T=1, NY_T=1, NZ_T=1), options=compiler_opts) update_e_gpu = kernels_fields.get_function("update_e") update_h_gpu = kernels_fields.get_function("update_h") @@ -535,8 +535,8 @@ def solve_gpu(currentmodelrun, modelend, G): kernelelectricfunc = getattr(import_module(pmlmodulelectric), 'kernels_template_pml_electric_' + G.pmlformulation) pmlmodulemagnetic = 'gprMax.pml_updates.pml_updates_magnetic_' + G.pmlformulation + '_gpu' kernelmagneticfunc = getattr(import_module(pmlmodulemagnetic), 'kernels_template_pml_magnetic_' + G.pmlformulation) - kernels_pml_electric = SourceModule(kernelelectricfunc.substitute(REAL=cudafloattype, N_updatecoeffsE=G.updatecoeffsE.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NX_FIELDS=G.nx + 1, NY_FIELDS=G.ny + 1, NZ_FIELDS=G.nz + 1), NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3]), options=compiler_opts) - kernels_pml_magnetic = SourceModule(kernelmagneticfunc.substitute(REAL=cudafloattype, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsH.shape[1], NX_FIELDS=G.nx + 1, NY_FIELDS=G.ny + 1, NZ_FIELDS=G.nz + 1), NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3]), options=compiler_opts) + kernels_pml_electric = SourceModule(kernelelectricfunc.substitute(REAL=cudafloattype, N_updatecoeffsE=G.updatecoeffsE.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NX_FIELDS=G.nx + 1, NY_FIELDS=G.ny + 1, NZ_FIELDS=G.nz + 1, NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3]), options=compiler_opts) + kernels_pml_magnetic = SourceModule(kernelmagneticfunc.substitute(REAL=cudafloattype, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsH.shape[1], NX_FIELDS=G.nx + 1, NY_FIELDS=G.ny + 1, NZ_FIELDS=G.nz + 1, NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3]), options=compiler_opts) # Copy material coefficient arrays to constant memory of GPU (must be <64KB) for PML kernels updatecoeffsE = kernels_pml_electric.get_global('updatecoeffsE')[0] updatecoeffsH = kernels_pml_magnetic.get_global('updatecoeffsH')[0] @@ -553,12 +553,12 @@ def solve_gpu(currentmodelrun, modelend, G): # Initialise arrays on GPU rxcoords_gpu, rxs_gpu = gpu_initialise_rx_arrays(G) # Prepare kernel and get kernel function - kernel_store_outputs = SourceModule(kernel_template_store_outputs.substitute(REAL=cudafloattype, NY_RXCOORDS=3, NX_RXS=6, NY_RXS=G.iterations, NZ_RXS=len(G.rxs), NX_FIELDS=G.nx + 1, NY_FIELDS=G.ny + 1, NZ_FIELDS=G.nz + 1), options=compiler_opts) + kernel_store_outputs = SourceModule(kernel_template_store_outputs.substitute(REAL=cudafloattype, NY_RXCOORDS=3, NX_RXS=6, NY_RXS=G.iterations, NZ_RXS=len(G.rxs), NX_FIELDS=G.nx + 1, NY_FIELDS=G.ny + 1, NZ_FIELDS=G.nz + 1, options=compiler_opts) store_outputs_gpu = kernel_store_outputs.get_function("store_outputs") # Sources - initialise arrays on GPU, prepare kernel and get kernel functions if G.voltagesources + G.hertziandipoles + G.magneticdipoles: - kernels_sources = SourceModule(kernels_template_sources.substitute(REAL=cudafloattype, N_updatecoeffsE=G.updatecoeffsE.size, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NY_SRCINFO=4, NY_SRCWAVES=G.iterations, NX_FIELDS=G.nx + 1, NY_FIELDS=G.ny + 1, NZ_FIELDS=G.nz + 1), NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3]), options=compiler_opts) + kernels_sources = SourceModule(kernels_template_sources.substitute(REAL=cudafloattype, N_updatecoeffsE=G.updatecoeffsE.size, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NY_SRCINFO=4, NY_SRCWAVES=G.iterations, NX_FIELDS=G.nx + 1, NY_FIELDS=G.ny + 1, NZ_FIELDS=G.nz + 1, NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3]), options=compiler_opts) # Copy material coefficient arrays to constant memory of GPU (must be <64KB) for source kernels updatecoeffsE = kernels_sources.get_global('updatecoeffsE')[0] updatecoeffsH = kernels_sources.get_global('updatecoeffsH')[0] @@ -579,7 +579,7 @@ def solve_gpu(currentmodelrun, modelend, G): # Initialise arrays on GPU snapEx_gpu, snapEy_gpu, snapEz_gpu, snapHx_gpu, snapHy_gpu, snapHz_gpu = gpu_initialise_snapshot_array(G) # Prepare kernel and get kernel function - kernel_store_snapshot = SourceModule(kernel_template_store_snapshot.substitute(REAL=cudafloattype, NX_SNAPS=Snapshot.nx_max, NY_SNAPS=Snapshot.ny_max, NZ_SNAPS=Snapshot.nz_max, NX_FIELDS=G.nx + 1, NY_FIELDS=G.ny + 1, NZ_FIELDS=G.nz + 1)), options=compiler_opts) + kernel_store_snapshot = SourceModule(kernel_template_store_snapshot.substitute(REAL=cudafloattype, NX_SNAPS=Snapshot.nx_max, NY_SNAPS=Snapshot.ny_max, NZ_SNAPS=Snapshot.nz_max, NX_FIELDS=G.nx + 1, NY_FIELDS=G.ny + 1, NZ_FIELDS=G.nz + 1), options=compiler_opts) store_snapshot_gpu = kernel_store_snapshot.get_function("store_snapshot") # Iteration loop timer From 6d7f36af799915bb40f3dc27640753c2eb4122e3 Mon Sep 17 00:00:00 2001 From: Craig Warren Date: Wed, 31 Jul 2019 10:07:46 +0100 Subject: [PATCH 10/13] Corrected extra bracket. --- gprMax/model_build_run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gprMax/model_build_run.py b/gprMax/model_build_run.py index 71382aaa..dde372b1 100644 --- a/gprMax/model_build_run.py +++ b/gprMax/model_build_run.py @@ -553,7 +553,7 @@ def solve_gpu(currentmodelrun, modelend, G): # Initialise arrays on GPU rxcoords_gpu, rxs_gpu = gpu_initialise_rx_arrays(G) # Prepare kernel and get kernel function - kernel_store_outputs = SourceModule(kernel_template_store_outputs.substitute(REAL=cudafloattype, NY_RXCOORDS=3, NX_RXS=6, NY_RXS=G.iterations, NZ_RXS=len(G.rxs), NX_FIELDS=G.nx + 1, NY_FIELDS=G.ny + 1, NZ_FIELDS=G.nz + 1, options=compiler_opts) + kernel_store_outputs = SourceModule(kernel_template_store_outputs.substitute(REAL=cudafloattype, NY_RXCOORDS=3, NX_RXS=6, NY_RXS=G.iterations, NZ_RXS=len(G.rxs), NX_FIELDS=G.nx + 1, NY_FIELDS=G.ny + 1, NZ_FIELDS=G.nz + 1), options=compiler_opts) store_outputs_gpu = kernel_store_outputs.get_function("store_outputs") # Sources - initialise arrays on GPU, prepare kernel and get kernel functions From dc17a9a94b8505b607898555992bb9cd324ab118 Mon Sep 17 00:00:00 2001 From: Craig Warren Date: Wed, 31 Jul 2019 10:10:16 +0100 Subject: [PATCH 11/13] Corrected refs to GPU arrays. --- gprMax/pml.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gprMax/pml.py b/gprMax/pml.py index 4edc5968..f0570c08 100644 --- a/gprMax/pml.py +++ b/gprMax/pml.py @@ -352,7 +352,7 @@ class PML(object): G (class): Grid class instance - holds essential parameters describing the model. """ - self.update_electric_gpu(np.int32(self.xs), np.int32(self.xf), np.int32(self.ys), np.int32(self.yf), np.int32(self.zs), np.int32(self.zf), np.int32(self.EPhi1.shape[1]), np.int32(self.EPhi1.shape[2]), np.int32(self.EPhi1.shape[3]), np.int32(self.EPhi2.shape[1]), np.int32(self.EPhi2.shape[2]), np.int32(self.EPhi2.shape[3]), np.int32(self.thickness), G.ID_gpu.gpudata, G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata, G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata, self.EPhi1_gpu.gpudata, self.EPhi2_gpu.gpudata, self.ERA_gpu.gpudata, self.ERB_gpu.gpudata, self.ERE_gpu.gpudata, self.ERF_gpu.gpudata, floattype(self.d), block=G.tpb, grid=self.bpg) + self.update_electric_gpu(np.int32(self.xs), np.int32(self.xf), np.int32(self.ys), np.int32(self.yf), np.int32(self.zs), np.int32(self.zf), np.int32(self.EPhi1_gpu.shape[1]), np.int32(self.EPhi1_gpu.shape[2]), np.int32(self.EPhi1_gpu.shape[3]), np.int32(self.EPhi2_gpu.shape[1]), np.int32(self.EPhi2_gpu.shape[2]), np.int32(self.EPhi2_gpu.shape[3]), np.int32(self.thickness), G.ID_gpu.gpudata, G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata, G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata, self.EPhi1_gpu.gpudata, self.EPhi2_gpu.gpudata, self.ERA_gpu.gpudata, self.ERB_gpu.gpudata, self.ERE_gpu.gpudata, self.ERF_gpu.gpudata, floattype(self.d), block=G.tpb, grid=self.bpg) def gpu_update_magnetic(self, G): """This functions updates magnetic field components with the PML correction on the GPU. @@ -361,7 +361,7 @@ class PML(object): G (class): Grid class instance - holds essential parameters describing the model. """ - self.update_magnetic_gpu(np.int32(self.xs), np.int32(self.xf), np.int32(self.ys), np.int32(self.yf), np.int32(self.zs), np.int32(self.zf), np.int32(self.HPhi1.shape[1]), np.int32(self.HPhi1.shape[2]), np.int32(self.HPhi1.shape[3]), np.int32(self.HPhi2.shape[1]), np.int32(self.HPhi2.shape[2]), np.int32(self.HPhi2.shape[3]), np.int32(self.thickness), G.ID_gpu.gpudata, G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata, G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata, self.HPhi1_gpu.gpudata, self.HPhi2_gpu.gpudata, self.HRA_gpu.gpudata, self.HRB_gpu.gpudata, self.HRE_gpu.gpudata, self.HRF_gpu.gpudata, floattype(self.d), block=G.tpb, grid=self.bpg) + self.update_magnetic_gpu(np.int32(self.xs), np.int32(self.xf), np.int32(self.ys), np.int32(self.yf), np.int32(self.zs), np.int32(self.zf), np.int32(self.HPhi1_gpu.shape[1]), np.int32(self.HPhi1_gpu.shape[2]), np.int32(self.HPhi1_gpu.shape[3]), np.int32(self.HPhi2_gpu.shape[1]), np.int32(self.HPhi2_gpu.shape[2]), np.int32(self.HPhHPhi2_gpui2.shape[3]), np.int32(self.thickness), G.ID_gpu.gpudata, G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata, G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata, self.HPhi1_gpu.gpudata, self.HPhi2_gpu.gpudata, self.HRA_gpu.gpudata, self.HRB_gpu.gpudata, self.HRE_gpu.gpudata, self.HRF_gpu.gpudata, floattype(self.d), block=G.tpb, grid=self.bpg) def build_pmls(G, pbar): From f070a878d6654d29f89debb0b1df068a3ac89783 Mon Sep 17 00:00:00 2001 From: Craig Warren Date: Wed, 31 Jul 2019 10:11:17 +0100 Subject: [PATCH 12/13] Corrected refs to GPU array --- gprMax/pml.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gprMax/pml.py b/gprMax/pml.py index f0570c08..1840ed96 100644 --- a/gprMax/pml.py +++ b/gprMax/pml.py @@ -361,7 +361,7 @@ class PML(object): G (class): Grid class instance - holds essential parameters describing the model. """ - self.update_magnetic_gpu(np.int32(self.xs), np.int32(self.xf), np.int32(self.ys), np.int32(self.yf), np.int32(self.zs), np.int32(self.zf), np.int32(self.HPhi1_gpu.shape[1]), np.int32(self.HPhi1_gpu.shape[2]), np.int32(self.HPhi1_gpu.shape[3]), np.int32(self.HPhi2_gpu.shape[1]), np.int32(self.HPhi2_gpu.shape[2]), np.int32(self.HPhHPhi2_gpui2.shape[3]), np.int32(self.thickness), G.ID_gpu.gpudata, G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata, G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata, self.HPhi1_gpu.gpudata, self.HPhi2_gpu.gpudata, self.HRA_gpu.gpudata, self.HRB_gpu.gpudata, self.HRE_gpu.gpudata, self.HRF_gpu.gpudata, floattype(self.d), block=G.tpb, grid=self.bpg) + self.update_magnetic_gpu(np.int32(self.xs), np.int32(self.xf), np.int32(self.ys), np.int32(self.yf), np.int32(self.zs), np.int32(self.zf), np.int32(self.HPhi1_gpu.shape[1]), np.int32(self.HPhi1_gpu.shape[2]), np.int32(self.HPhi1_gpu.shape[3]), np.int32(self.HPhi2_gpu.shape[1]), np.int32(self.HPhi2_gpu.shape[2]), np.int32(self.HPhi2_gpu.shape[3]), np.int32(self.thickness), G.ID_gpu.gpudata, G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata, G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata, self.HPhi1_gpu.gpudata, self.HPhi2_gpu.gpudata, self.HRA_gpu.gpudata, self.HRB_gpu.gpudata, self.HRE_gpu.gpudata, self.HRF_gpu.gpudata, floattype(self.d), block=G.tpb, grid=self.bpg) def build_pmls(G, pbar): From 1bc75fb811e2704129ccb4df92ff44b47e0f4984 Mon Sep 17 00:00:00 2001 From: Craig Warren Date: Wed, 31 Jul 2019 10:14:59 +0100 Subject: [PATCH 13/13] Remove un-needed arguments from store outputs function. --- gprMax/fields_outputs.py | 3 +-- gprMax/model_build_run.py | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/gprMax/fields_outputs.py b/gprMax/fields_outputs.py index c70167ba..c60cf3b5 100644 --- a/gprMax/fields_outputs.py +++ b/gprMax/fields_outputs.py @@ -89,12 +89,11 @@ __global__ void store_outputs(int NRX, int iteration, const int* __restrict__ rx """) -def write_hdf5_outputfile(outputfile, Ex, Ey, Ez, Hx, Hy, Hz, G): +def write_hdf5_outputfile(outputfile, G): """Write an output file in HDF5 format. Args: outputfile (str): Name of the output file. - Ex, Ey, Ez, Hx, Hy, Hz (memory view): Current electric and magnetic field values. G (class): Grid class instance - holds essential parameters describing the model. """ diff --git a/gprMax/model_build_run.py b/gprMax/model_build_run.py index dde372b1..451239ee 100644 --- a/gprMax/model_build_run.py +++ b/gprMax/model_build_run.py @@ -373,7 +373,7 @@ def run_model(args, currentmodelrun, modelend, numbermodelruns, inputfile, usern tsolve, memsolve = solve_gpu(currentmodelrun, modelend, G) # Write an output file in HDF5 format - write_hdf5_outputfile(outputfile, G.Ex, G.Ey, G.Ez, G.Hx, G.Hy, G.Hz, G) + write_hdf5_outputfile(outputfile, G) # Write any snapshots to file if G.snapshots: