diff --git a/gprMax/cmds_multiple.py b/gprMax/cmds_multiple.py
index 819d5fc1..9fc47bdc 100644
--- a/gprMax/cmds_multiple.py
+++ b/gprMax/cmds_multiple.py
@@ -510,14 +510,12 @@ class Rx(UserObjectMulti):
r.ID = self.kwargs['id']
outputs = self.kwargs['outputs']
# Get allowable outputs
- if grid.gpu is not None:
- allowableoutputs = RxUser.gpu_allowableoutputs
- else:
- allowableoutputs = RxUser.allowableoutputs
+ allowableoutputs = RxUser.allowableoutputs_gpu if config.sim_config.general['cuda'] else RxUser.allowableoutputs
# Check and add field output names
+ outputs.sort()
for field in outputs:
if field in allowableoutputs:
- r.outputs[field] = np.zeros(grid.iterations, dtype=config.dtypes['float_or_double'])
+ r.outputs[field] = np.zeros(grid.iterations, dtype=config.sim_config.dtypes['float_or_double'])
else:
raise CmdInputError(f"'{self.params_str()}' contains an output type that is not allowable. Allowable outputs in current context are {allowableoutputs}")
# If no ID or outputs are specified, use default
diff --git a/gprMax/config.py b/gprMax/config.py
index bb584975..b89eb268 100644
--- a/gprMax/config.py
+++ b/gprMax/config.py
@@ -18,6 +18,7 @@
import logging
from pathlib import Path
+import sys
from colorama import init
from colorama import Fore
@@ -64,7 +65,8 @@ class ModelConfig:
# N.B. This will happen if the requested snapshots are too large to fit
# on the memory of the GPU. If True this will slow performance significantly
if sim_config.general['cuda']:
- self.cuda = {'gpu': sim_config.cuda['gpus'], 'snapsgpu2cpu': False}
+ self.cuda = {'gpu': sim_config.cuda['gpus'],
+ 'snapsgpu2cpu': False}
# Total memory usage for all grids in the model. Starts with 50MB overhead.
self.mem_use = 50e6
@@ -175,10 +177,12 @@ class SimulationConfig:
self.general['cuda'] = True
self.general['cpu'] = False
self.general['opencl'] = False
- # gpus: list of GPU objects
- # gpus_str: list of strings describing GPU(s)
- self.cuda = {'gpus': [],
- 'gpus_str': []}
+ self.general['precision'] = 'single'
+ self.cuda = {'gpus': [], # gpus: list of GPU objects
+ 'gpus_str': [], # gpus_str: list of strings describing GPU(s)
+ 'nvcc_opts': None} # nvcc_opts: nvcc compiler options
+ # Suppress nvcc warnings on Microsoft Windows
+ if sys.platform == 'win32': self.cuda['nvcc_opts'] = '-w'
self.get_gpus()
self.set_gpus()
diff --git a/gprMax/cuda/fields_updates.py b/gprMax/cuda/fields_updates.py
index 26cf94b6..6000c016 100644
--- a/gprMax/cuda/fields_updates.py
+++ b/gprMax/cuda/fields_updates.py
@@ -128,7 +128,7 @@ __global__ void update_magnetic(int NX, int NY, int NZ, const unsigned int* __re
// Electric field updates - dispersive materials //
///////////////////////////////////////////////////
-__global__ void update_electric_dispersive_A(int NX, int NY, int NZ, int MAXPOLES, const $REAL_OR_COMPLEX* __restrict__ updatecoeffsdispersive, $REAL_OR_COMPLEX *Tx, $REAL_OR_COMPLEX *Ty, $REAL_OR_COMPLEX *Tz, const unsigned int* __restrict__ ID, $REAL *Ex, $REAL *Ey, $REAL *Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz) {
+__global__ void update_electric_dispersive_A(int NX, int NY, int NZ, int MAXPOLES, const $COMPLEX* __restrict__ updatecoeffsdispersive, $COMPLEX *Tx, $COMPLEX *Ty, $COMPLEX *Tz, const unsigned int* __restrict__ ID, $REAL *Ex, $REAL *Ey, $REAL *Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz) {
// This function is part A of updates to electric field values when dispersive materials (with multiple poles) are present.
//
@@ -189,7 +189,7 @@ __global__ void update_electric_dispersive_A(int NX, int NY, int NZ, int MAXPOLE
}
}
-__global__ void update_electric_dispersive_B(int NX, int NY, int NZ, int MAXPOLES, const $REAL_OR_COMPLEX* __restrict__ updatecoeffsdispersive, $REAL_OR_COMPLEX *Tx, $REAL_OR_COMPLEX *Ty, $REAL_OR_COMPLEX *Tz, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez) {
+__global__ void update_electric_dispersive_B(int NX, int NY, int NZ, int MAXPOLES, const $COMPLEX* __restrict__ updatecoeffsdispersive, $COMPLEX *Tx, $COMPLEX *Ty, $COMPLEX *Tz, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez) {
// This function is part B which updates the dispersive field arrays when dispersive materials (with multiple poles) are present.
//
diff --git a/gprMax/grid.py b/gprMax/grid.py
index 5c529d5c..d8cc4b2d 100644
--- a/gprMax/grid.py
+++ b/gprMax/grid.py
@@ -279,15 +279,22 @@ class FDTDGrid:
class CUDAGrid(FDTDGrid):
"""Additional grid methods for solving on GPU using CUDA."""
+ def __init__(self, model_num):
+ super().__init__(model_num)
+
+ # Threads per block - used for main electric/magnetic field updates
+ self.tpb = (256, 1, 1)
+ # Blocks per grid - used for main electric/magnetic field updates
+ self.bpg = None
+
+
def set_blocks_per_grid(self):
"""Set the blocks per grid size used for updating the electric and
magnetic field arrays on a GPU.
"""
- config.cuda['gpus'].bpg = (int(np.ceil(((self.nx + 1) *
- (self.ny + 1) *
- (self.nz + 1)) /
- config.cuda['gpus'].tpb[0])), 1, 1)
+ self.bpg = (int(np.ceil(((self.nx + 1) * (self.ny + 1) *
+ (self.nz + 1)) / self.tpb[0])), 1, 1)
def initialise_arrays(self):
"""Initialise geometry and field arrays on GPU."""
diff --git a/gprMax/model_build_run.py b/gprMax/model_build_run.py
index 8e950975..5bd08364 100644
--- a/gprMax/model_build_run.py
+++ b/gprMax/model_build_run.py
@@ -58,15 +58,15 @@ from .pml import CFS
from .pml import PML
from .pml import build_pml
from .pml import pml_information
-from .receivers import gpu_initialise_rx_arrays
-from .receivers import gpu_get_rx_array
+from .receivers import initialise_rx_arrays_gpu
+from .receivers import get_rx_array_gpu
from .receivers import Rx
from .scene import Scene
from .snapshots import Snapshot
-from .snapshots import gpu_initialise_snapshot_array
-from .snapshots import gpu_get_snapshot_array
+from .snapshots import initialise_snapshot_array_gpu
+from .snapshots import get_snapshot_array_gpu
from .solvers import create_solver
-from .sources import gpu_initialise_src_arrays
+from .sources import initialise_src_arrays_gpu
from .utilities import get_terminal_width
from .utilities import human_size
from .utilities import mem_check
@@ -286,7 +286,8 @@ class ModelBuildRun:
mem_GPU = ''
if config.sim_config.general['cuda']:
- mem_GPU = f' host + ~{human_size(self.solver.get_memsolve())} GPU'
+ log.debug('Fix memory used calc for GPU')
+ # mem_GPU = f' host + ~{human_size(self.solver.get_memsolve())} GPU'
log.info(f'\nMemory (RAM) used: ~{human_size(self.p.memory_full_info().uss)}{mem_GPU}')
log.info(f'Solving time [HH:MM:SS]: {datetime.timedelta(seconds=tsolve)}')
diff --git a/gprMax/pml.py b/gprMax/pml.py
index 38ee74f9..f5ad1bf1 100644
--- a/gprMax/pml.py
+++ b/gprMax/pml.py
@@ -207,8 +207,7 @@ class PML:
self.CFS = G.cfs
- if not config.sim_config.general['cuda']:
- self.initialise_field_arrays()
+ self.initialise_field_arrays()
def initialise_field_arrays(self):
"""Initialise arrays to store fields in PML."""
@@ -340,7 +339,7 @@ class CUDAPML(PML):
solving on GPU using CUDA.
"""
- def initialise_arrays(self):
+ def initialise_field_arrays_gpu(self):
"""Initialise PML field and coefficient arrays on GPU."""
import pycuda.gpuarray as gpuarray
@@ -355,20 +354,44 @@ class CUDAPML(PML):
self.HRF_gpu = gpuarray.to_gpu(self.HRF)
if self.direction[0] == 'x':
- self.EPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx + 1, self.ny, self.nz + 1), dtype=floattype))
- self.EPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx + 1, self.ny + 1, self.nz), dtype=floattype))
- self.HPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx, self.ny + 1, self.nz), dtype=floattype))
- self.HPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx, self.ny, self.nz + 1), dtype=floattype))
+ self.EPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS),
+ self.nx + 1, self.ny, self.nz + 1),
+ dtype=config.sim_config.dtypes['float_or_double']))
+ self.EPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS),
+ self.nx + 1, self.ny + 1, self.nz),
+ dtype=config.sim_config.dtypes['float_or_double']))
+ self.HPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS),
+ self.nx, self.ny + 1, self.nz),
+ dtype=config.sim_config.dtypes['float_or_double']))
+ self.HPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS),
+ self.nx, self.ny, self.nz + 1),
+ dtype=config.sim_config.dtypes['float_or_double']))
elif self.direction[0] == 'y':
- self.EPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx, self.ny + 1, self.nz + 1), dtype=floattype))
- self.EPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx + 1, self.ny + 1, self.nz), dtype=floattype))
- self.HPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx + 1, self.ny, self.nz), dtype=floattype))
- self.HPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx, self.ny, self.nz + 1), dtype=floattype))
+ self.EPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS),
+ self.nx, self.ny + 1, self.nz + 1),
+ dtype=config.sim_config.dtypes['float_or_double']))
+ self.EPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS),
+ self.nx + 1, self.ny + 1, self.nz),
+ dtype=config.sim_config.dtypes['float_or_double']))
+ self.HPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS),
+ self.nx + 1, self.ny, self.nz),
+ dtype=config.sim_config.dtypes['float_or_double']))
+ self.HPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS),
+ self.nx, self.ny, self.nz + 1),
+ dtype=config.sim_config.dtypes['float_or_double']))
elif self.direction[0] == 'z':
- self.EPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx, self.ny + 1, self.nz + 1), dtype=floattype))
- self.EPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx + 1, self.ny, self.nz + 1), dtype=floattype))
- self.HPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx + 1, self.ny, self.nz), dtype=floattype))
- self.HPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx, self.ny + 1, self.nz), dtype=floattype))
+ self.EPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS),
+ self.nx, self.ny + 1, self.nz + 1),
+ dtype=config.sim_config.dtypes['float_or_double']))
+ self.EPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS),
+ self.nx + 1, self.ny, self.nz + 1),
+ dtype=config.sim_config.dtypes['float_or_double']))
+ self.HPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS),
+ self.nx + 1, self.ny, self.nz),
+ dtype=config.sim_config.dtypes['float_or_double']))
+ self.HPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS),
+ self.nx, self.ny + 1, self.nz),
+ dtype=config.sim_config.dtypes['float_or_double']))
def set_blocks_per_grid(self, G):
"""Set the blocks per grid size used for updating the PML field arrays on a GPU.
@@ -377,7 +400,9 @@ class CUDAPML(PML):
G (FDTDGrid): Holds essential parameters describing the model.
"""
- self.bpg = (int(np.ceil(((self.EPhi1_gpu.shape[1] + 1) * (self.EPhi1_gpu.shape[2] + 1) * (self.EPhi1_gpu.shape[3] + 1)) / G.tpb[0])), 1, 1)
+ self.bpg = (int(np.ceil(((self.EPhi1_gpu.shape[1] + 1) *
+ (self.EPhi1_gpu.shape[2] + 1) *
+ (self.EPhi1_gpu.shape[3] + 1)) / G.tpb[0])), 1, 1)
def get_update_funcs(self, kernelselectric, kernelsmagnetic):
"""Get update functions from PML kernels.
@@ -400,7 +425,24 @@ class CUDAPML(PML):
G (FDTDGrid): Holds essential parameters describing the model.
"""
- self.update_electric_gpu(np.int32(self.xs), np.int32(self.xf), np.int32(self.ys), np.int32(self.yf), np.int32(self.zs), np.int32(self.zf), np.int32(self.EPhi1_gpu.shape[1]), np.int32(self.EPhi1_gpu.shape[2]), np.int32(self.EPhi1_gpu.shape[3]), np.int32(self.EPhi2_gpu.shape[1]), np.int32(self.EPhi2_gpu.shape[2]), np.int32(self.EPhi2_gpu.shape[3]), np.int32(self.thickness), G.ID_gpu.gpudata, G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata, G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata, self.EPhi1_gpu.gpudata, self.EPhi2_gpu.gpudata, self.ERA_gpu.gpudata, self.ERB_gpu.gpudata, self.ERE_gpu.gpudata, self.ERF_gpu.gpudata, floattype(self.d), block=G.tpb, grid=self.bpg)
+ self.update_electric_gpu(np.int32(self.xs), np.int32(self.xf),
+ np.int32(self.ys), np.int32(self.yf),
+ np.int32(self.zs), np.int32(self.zf),
+ np.int32(self.EPhi1_gpu.shape[1]),
+ np.int32(self.EPhi1_gpu.shape[2]),
+ np.int32(self.EPhi1_gpu.shape[3]),
+ np.int32(self.EPhi2_gpu.shape[1]),
+ np.int32(self.EPhi2_gpu.shape[2]),
+ np.int32(self.EPhi2_gpu.shape[3]),
+ np.int32(self.thickness),
+ G.ID_gpu.gpudata,
+ G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata,
+ G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata,
+ self.EPhi1_gpu.gpudata, self.EPhi2_gpu.gpudata,
+ self.ERA_gpu.gpudata, self.ERB_gpu.gpudata,
+ self.ERE_gpu.gpudata, self.ERF_gpu.gpudata,
+ config.sim_config.dtypes['float_or_double'](self.d),
+ block=G.tpb, grid=self.bpg)
def update_magnetic(self, G):
"""This functions updates magnetic field components with the PML
@@ -409,7 +451,24 @@ class CUDAPML(PML):
Args:
G (FDTDGrid): Holds essential parameters describing the model.
"""
- self.update_magnetic_gpu(np.int32(self.xs), np.int32(self.xf), np.int32(self.ys), np.int32(self.yf), np.int32(self.zs), np.int32(self.zf), np.int32(self.HPhi1_gpu.shape[1]), np.int32(self.HPhi1_gpu.shape[2]), np.int32(self.HPhi1_gpu.shape[3]), np.int32(self.HPhi2_gpu.shape[1]), np.int32(self.HPhi2_gpu.shape[2]), np.int32(self.HPhi2_gpu.shape[3]), np.int32(self.thickness), G.ID_gpu.gpudata, G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata, G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata, self.HPhi1_gpu.gpudata, self.HPhi2_gpu.gpudata, self.HRA_gpu.gpudata, self.HRB_gpu.gpudata, self.HRE_gpu.gpudata, self.HRF_gpu.gpudata, floattype(self.d), block=G.tpb, grid=self.bpg)
+ self.update_magnetic_gpu(np.int32(self.xs), np.int32(self.xf),
+ np.int32(self.ys), np.int32(self.yf),
+ np.int32(self.zs), np.int32(self.zf),
+ np.int32(self.HPhi1_gpu.shape[1]),
+ np.int32(self.HPhi1_gpu.shape[2]),
+ np.int32(self.HPhi1_gpu.shape[3]),
+ np.int32(self.HPhi2_gpu.shape[1]),
+ np.int32(self.HPhi2_gpu.shape[2]),
+ np.int32(self.HPhi2_gpu.shape[3]),
+ np.int32(self.thickness),
+ G.ID_gpu.gpudata,
+ G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata,
+ G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata,
+ self.HPhi1_gpu.gpudata, self.HPhi2_gpu.gpudata,
+ self.HRA_gpu.gpudata, self.HRB_gpu.gpudata,
+ self.HRE_gpu.gpudata, self.HRF_gpu.gpudata,
+ config.sim_config.dtypes['float_or_double'](self.d),
+ block=G.tpb, grid=self.bpg)
def pml_information(G):
"""Information about PMLs.
@@ -443,14 +502,16 @@ def build_pml(G, key, value):
value (int): Thickness of PML slab in cells.
"""
+ pml_type = CUDAPML if config.sim_config.general['cuda'] else PML
+
sumer = 0 # Sum of relative permittivities in PML slab
summr = 0 # Sum of relative permeabilities in PML slab
if key[0] == 'x':
if key == 'x0':
- pml = PML(G, ID=key, direction='xminus', xf=value, yf=G.ny, zf=G.nz)
+ pml = pml_type(G, ID=key, direction='xminus', xf=value, yf=G.ny, zf=G.nz)
elif key == 'xmax':
- pml = PML(G, ID=key, direction='xplus', xs=G.nx - value, xf=G.nx, yf=G.ny, zf=G.nz)
+ pml = pml_type(G, ID=key, direction='xplus', xs=G.nx - value, xf=G.nx, yf=G.ny, zf=G.nz)
G.pmls.append(pml)
for j in range(G.ny):
for k in range(G.nz):
@@ -463,9 +524,9 @@ def build_pml(G, key, value):
elif key[0] == 'y':
if key == 'y0':
- pml = PML(G, ID=key, direction='yminus', yf=value, xf=G.nx, zf=G.nz)
+ pml = pml_type(G, ID=key, direction='yminus', yf=value, xf=G.nx, zf=G.nz)
elif key == 'ymax':
- pml = PML(G, ID=key, direction='yplus', ys=G.ny - value, xf=G.nx, yf=G.ny, zf=G.nz)
+ pml = pml_type(G, ID=key, direction='yplus', ys=G.ny - value, xf=G.nx, yf=G.ny, zf=G.nz)
G.pmls.append(pml)
for i in range(G.nx):
for k in range(G.nz):
@@ -478,9 +539,9 @@ def build_pml(G, key, value):
elif key[0] == 'z':
if key == 'z0':
- pml = PML(G, ID=key, direction='zminus', zf=value, xf=G.nx, yf=G.ny)
+ pml = pml_type(G, ID=key, direction='zminus', zf=value, xf=G.nx, yf=G.ny)
elif key == 'zmax':
- pml = PML(G, ID=key, direction='zplus', zs=G.nz - value, xf=G.nx, yf=G.ny, zf=G.nz)
+ pml = pml_type(G, ID=key, direction='zplus', zs=G.nz - value, xf=G.nx, yf=G.ny, zf=G.nz)
G.pmls.append(pml)
for i in range(G.nx):
for j in range(G.ny):
diff --git a/gprMax/receivers.py b/gprMax/receivers.py
index 4d65b95e..399a8959 100644
--- a/gprMax/receivers.py
+++ b/gprMax/receivers.py
@@ -16,8 +16,6 @@
# You should have received a copy of the GNU General Public License
# along with gprMax. If not, see .
-from collections import OrderedDict
-
import numpy as np
import gprMax.config as config
@@ -27,14 +25,15 @@ class Rx:
"""Receiver output points."""
allowableoutputs = ['Ex', 'Ey', 'Ez', 'Hx', 'Hy', 'Hz', 'Ix', 'Iy', 'Iz']
- gpu_allowableoutputs = allowableoutputs[:-3]
defaultoutputs = allowableoutputs[:-3]
- maxnumoutputs = 0
+
+ allowableoutputs_gpu = allowableoutputs[:-3]
+ maxnumoutputs_gpu = 0
def __init__(self):
self.ID = None
- self.outputs = OrderedDict()
+ self.outputs = {}
self.xcoord = None
self.ycoord = None
self.zcoord = None
@@ -43,12 +42,17 @@ class Rx:
self.zcoordorigin = None
-def gpu_initialise_rx_arrays(G):
+def initialise_rx_arrays_gpu(G):
"""Initialise arrays on GPU for receiver coordinates and to store field
components for receivers.
Args:
G (FDTDGrid): Holds essential parameters describing the model.
+
+ Returns:
+ rxcoords_gpu (int): numpy array of receiver coordinates from GPU.
+ rxs_gpu (float): numpy array of receiver data from GPU - rows are field
+ components; columns are iterations; pages are receivers.
"""
import pycuda.gpuarray as gpuarray
@@ -59,11 +63,14 @@ def gpu_initialise_rx_arrays(G):
rxcoords[i, 0] = rx.xcoord
rxcoords[i, 1] = rx.ycoord
rxcoords[i, 2] = rx.zcoord
+ # Store maximum number of output components
+ if len(rx.outputs) > Rx.maxnumoutputs_gpu:
+ Rx.maxnumoutputs_gpu = len(rx.outputs)
# Array to store field components for receivers on GPU - rows are field components;
# columns are iterations; pages are receivers
- rxs = np.zeros((Rx.maxnumoutputs, G.iterations, len(G.rxs)),
- dtype=config.dtypes['float_or_double'])
+ rxs = np.zeros((len(Rx.allowableoutputs_gpu), G.iterations, len(G.rxs)),
+ dtype=config.sim_config.dtypes['float_or_double'])
# Copy arrays to GPU
rxcoords_gpu = gpuarray.to_gpu(rxcoords)
@@ -72,20 +79,20 @@ def gpu_initialise_rx_arrays(G):
return rxcoords_gpu, rxs_gpu
-def gpu_get_rx_array(rxs_gpu, rxcoords_gpu, G):
+def get_rx_array_gpu(rxs_gpu, rxcoords_gpu, G):
"""Copy output from receivers array used on GPU back to receiver objects.
Args:
rxs_gpu (float): numpy array of receiver data from GPU - rows are field
components; columns are iterations; pages are receivers.
- rxcoords_gpu (float): numpy array of receiver coordinates from GPU.
+ rxcoords_gpu (int): numpy array of receiver coordinates from GPU.
G (FDTDGrid): Holds essential parameters describing the model.
"""
for rx in G.rxs:
for rxgpu in range(len(G.rxs)):
- if rx.xcoord == rxcoords_gpu[rxgpu, 0] and \
- rx.ycoord == rxcoords_gpu[rxgpu, 1] and \
- rx.zcoord == rxcoords_gpu[rxgpu, 2]:
- for k in rx.outputs.items():
- rx.outputs[k] = rxs_gpu[Rx.gpu_allowableoutputs.index(k), :, rxgpu]
+ if (rx.xcoord == rxcoords_gpu[rxgpu, 0] and
+ rx.ycoord == rxcoords_gpu[rxgpu, 1] and
+ rx.zcoord == rxcoords_gpu[rxgpu, 2]):
+ for output in rx.outputs.keys():
+ rx.outputs[output] = rxs_gpu[Rx.allowableoutputs_gpu.index(output), :, rxgpu]
diff --git a/gprMax/snapshots.py b/gprMax/snapshots.py
index 1b57fec6..b2a42d9a 100644
--- a/gprMax/snapshots.py
+++ b/gprMax/snapshots.py
@@ -184,7 +184,7 @@ class Snapshot:
self.filehandle.close()
-def gpu_initialise_snapshot_array(G):
+def initialise_snapshot_array_gpu(G):
"""Initialise array on GPU for to store field data for snapshots.
Args:
@@ -236,7 +236,7 @@ def gpu_initialise_snapshot_array(G):
return snapEx_gpu, snapEy_gpu, snapEz_gpu, snapHx_gpu, snapHy_gpu, snapHz_gpu
-def gpu_get_snapshot_array(snapEx_gpu, snapEy_gpu, snapEz_gpu, snapHx_gpu, snapHy_gpu, snapHz_gpu, i, snap):
+def get_snapshot_array_gpu(snapEx_gpu, snapEy_gpu, snapEz_gpu, snapHx_gpu, snapHy_gpu, snapHz_gpu, i, snap):
"""Copy snapshot array used on GPU back to snapshot objects and store in format for Paraview.
Args:
diff --git a/gprMax/solvers.py b/gprMax/solvers.py
index cfb31bb6..78e1cbde 100644
--- a/gprMax/solvers.py
+++ b/gprMax/solvers.py
@@ -15,7 +15,7 @@
#
# You should have received a copy of the GNU General Public License
# along with gprMax. If not, see .
-
+import sys
import gprMax.config as config
from .grid import FDTDGrid
from .grid import CUDAGrid
diff --git a/gprMax/sources.py b/gprMax/sources.py
index 22725b50..6f7be311 100644
--- a/gprMax/sources.py
+++ b/gprMax/sources.py
@@ -236,7 +236,7 @@ class MagneticDipole(Source):
(1 / (G.dx * G.dy * G.dz)))
-def gpu_initialise_src_arrays(sources, G):
+def initialise_src_arrays_gpu(sources, G):
"""Initialise arrays on GPU for source coordinates/polarisation, other
source information, and source waveform values.
@@ -255,8 +255,8 @@ def gpu_initialise_src_arrays(sources, G):
import pycuda.gpuarray as gpuarray
srcinfo1 = np.zeros((len(sources), 4), dtype=np.int32)
- srcinfo2 = np.zeros((len(sources)), dtype=config.dtypes['float_or_double'])
- srcwaves = np.zeros((len(sources), G.iterations), dtype=config.dtypes['float_or_double'])
+ srcinfo2 = np.zeros((len(sources)), dtype=config.sim_config.dtypes['float_or_double'])
+ srcwaves = np.zeros((len(sources), G.iterations), dtype=config.sim_config.dtypes['float_or_double'])
for i, src in enumerate(sources):
srcinfo1[i, 0] = src.xcoord
srcinfo1[i, 1] = src.ycoord
diff --git a/gprMax/updates.py b/gprMax/updates.py
index dc7ca6df..a3c2e529 100644
--- a/gprMax/updates.py
+++ b/gprMax/updates.py
@@ -18,21 +18,24 @@
from importlib import import_module
import logging
-import sys
+
+import numpy as np
import gprMax.config as config
from .cuda.fields_updates import kernel_template_fields
from .cuda.snapshots import kernel_template_store_snapshot
from .cuda.source_updates import kernel_template_sources
-from .cython.fields_updates_normal import update_electric
-from .cython.fields_updates_normal import update_magnetic
-from .fields_outputs import store_outputs
-from .receivers import gpu_initialise_rx_arrays
-from .receivers import gpu_get_rx_array
+from .cython.fields_updates_normal import update_electric as update_electric_cpu
+from .cython.fields_updates_normal import update_magnetic as update_magnetic_cpu
+from .fields_outputs import store_outputs as store_outputs_cpu
+from .fields_outputs import kernel_template_store_outputs
+from .receivers import initialise_rx_arrays_gpu
+from .receivers import get_rx_array_gpu
from .snapshots import Snapshot
-from .snapshots import gpu_initialise_snapshot_array
-from .snapshots import gpu_get_snapshot_array
-from .sources import gpu_initialise_src_arrays
+from .snapshots import initialise_snapshot_array_gpu
+from .snapshots import get_snapshot_array_gpu
+from .sources import initialise_src_arrays_gpu
+from .utilities import round32
from .utilities import timer
log = logging.getLogger(__name__)
@@ -52,7 +55,7 @@ class CPUUpdates:
def store_outputs(self):
"""Store field component values for every receiver and transmission line."""
- store_outputs(self.grid)
+ store_outputs_cpu(self.grid)
def store_snapshots(self, iteration):
"""Store any snapshots.
@@ -66,7 +69,7 @@ class CPUUpdates:
def update_magnetic(self):
"""Update magnetic field components."""
- update_magnetic(self.grid.nx,
+ update_magnetic_cpu(self.grid.nx,
self.grid.ny,
self.grid.nz,
config.sim_config.hostinfo['ompthreads'],
@@ -99,18 +102,18 @@ class CPUUpdates:
"""Update electric field components."""
# All materials are non-dispersive so do standard update.
if config.model_configs[self.grid.model_num].materials['maxpoles'] == 0:
- update_electric(self.grid.nx,
- self.grid.ny,
- self.grid.nz,
- config.sim_config.hostinfo['ompthreads'],
- self.grid.updatecoeffsE,
- self.grid.ID,
- self.grid.Ex,
- self.grid.Ey,
- self.grid.Ez,
- self.grid.Hx,
- self.grid.Hy,
- self.grid.Hz)
+ update_electric_cpu(self.grid.nx,
+ self.grid.ny,
+ self.grid.nz,
+ config.sim_config.hostinfo['ompthreads'],
+ self.grid.updatecoeffsE,
+ self.grid.ID,
+ self.grid.Ex,
+ self.grid.Ey,
+ self.grid.Ez,
+ self.grid.Hx,
+ self.grid.Hy,
+ self.grid.Hz)
# If there are any dispersive materials do 1st part of dispersive update
# (it is split into two parts as it requires present and updated electric field values).
@@ -247,20 +250,14 @@ class CUDAUpdates:
self.dispersive_update_a = None
self.dispersive_update_b = None
- import pycuda.driver as drv
- from pycuda.compiler import SourceModule
- drv.init()
-
- # Suppress nvcc warnings on Windows
- log.debug('Move nvcc compiler options to simulation config')
- if sys.platform == 'win32':
- self.compiler_opts = ['-w']
- else:
- self.compiler_opts = None
+ # Import PyCUDA modules
+ self.drv = import_module('pycuda.driver')
+ self.source_module = getattr(import_module('pycuda.compiler'), 'SourceModule')
+ self.drv.init()
# Create device handle and context on specifc GPU device (and make it current context)
- self.dev = drv.Device(self.grid.gpu.deviceID)
- self.ctx = dev.make_context()
+ self.dev = self.drv.Device(config.model_configs[self.grid.model_num].cuda['gpu'].deviceID)
+ self.ctx = self.dev.make_context()
# Initialise arrays on GPU, prepare kernels, and get kernel functions
self.set_field_kernels()
@@ -274,56 +271,56 @@ class CUDAUpdates:
get kernel functions.
"""
if config.model_configs[self.grid.model_num].materials['maxpoles'] > 0:
- kernels_fields = SourceModule(kernels_template_fields.substitute(
- REAL=cudafloattype,
- COMPLEX=cudacomplextype,
- N_updatecoeffsE=self.grid.updatecoeffsE.size,
- N_updatecoeffsH=self.grid.updatecoeffsH.size,
- NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1],
- NY_MATDISPCOEFFS=self.grid.updatecoeffsdispersive.shape[1],
- NX_FIELDS=self.grid.nx + 1,
- NY_FIELDS=self.grid.ny + 1,
- NZ_FIELDS=self.grid.nz + 1,
- NX_ID=self.grid.ID.shape[1],
- NY_ID=self.grid.ID.shape[2],
- NZ_ID=self.grid.ID.shape[3],
- NX_T=self.grid.Tx.shape[1],
- NY_T=self.grid.Tx.shape[2],
- NZ_T=self.grid.Tx.shape[3]),
- options=self.compiler_opts)
+ kernels_fields = self.source_module(kernels_template_fields.substitute(
+ REAL=config.sim_config.dtypes['C_float_or_double'],
+ COMPLEX=config.sim_config.dtypes['C_complex'],
+ N_updatecoeffsE=self.grid.updatecoeffsE.size,
+ N_updatecoeffsH=self.grid.updatecoeffsH.size,
+ NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1],
+ NY_MATDISPCOEFFS=self.grid.updatecoeffsdispersive.shape[1],
+ NX_FIELDS=self.grid.nx + 1,
+ NY_FIELDS=self.grid.ny + 1,
+ NZ_FIELDS=self.grid.nz + 1,
+ NX_ID=self.grid.ID.shape[1],
+ NY_ID=self.grid.ID.shape[2],
+ NZ_ID=self.grid.ID.shape[3],
+ NX_T=self.grid.Tx.shape[1],
+ NY_T=self.grid.Tx.shape[2],
+ NZ_T=self.grid.Tx.shape[3]),
+ options=config.sim_config.cuda['nvcc_opts'])
else: # Set to one any substitutions for dispersive materials
- kernels_fields = SourceModule(kernels_template_fields.substitute(
- REAL=cudafloattype,
- COMPLEX=cudacomplextype,
- N_updatecoeffsE=self.grid.updatecoeffsE.size,
- N_updatecoeffsH=self.grid.updatecoeffsH.size,
- NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1],
- NY_MATDISPCOEFFS=1,
- NX_FIELDS=self.grid.nx + 1,
- NY_FIELDS=self.grid.ny + 1,
- NZ_FIELDS=self.grid.nz + 1,
- NX_ID=self.grid.ID.shape[1],
- NY_ID=self.grid.ID.shape[2],
- NZ_ID=self.grid.ID.shape[3],
- NX_T=1,
- NY_T=1,
- NZ_T=1),
- options=self.compiler_opts)
- self.update_electric = kernels_fields.get_function("update_electric")
- self.update_magnetic = kernels_fields.get_function("update_magnetic")
- if self.grid.updatecoeffsE.nbytes + self.grid.updatecoeffsH.nbytes > self.grid.gpu.constmem:
- raise GeneralError(log.exception(f'Too many materials in the model to fit onto constant memory of size {human_size(self.grid.gpu.constmem)} on {self.grid.gpu.deviceID} - {self.grid.gpu.name} GPU'))
- self.copy_mat_coeffs()
+ kernels_fields = self.source_module(kernel_template_fields.substitute(
+ REAL=config.sim_config.dtypes['C_float_or_double'],
+ COMPLEX=config.sim_config.dtypes['C_complex'],
+ N_updatecoeffsE=self.grid.updatecoeffsE.size,
+ N_updatecoeffsH=self.grid.updatecoeffsH.size,
+ NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1],
+ NY_MATDISPCOEFFS=1,
+ NX_FIELDS=self.grid.nx + 1,
+ NY_FIELDS=self.grid.ny + 1,
+ NZ_FIELDS=self.grid.nz + 1,
+ NX_ID=self.grid.ID.shape[1],
+ NY_ID=self.grid.ID.shape[2],
+ NZ_ID=self.grid.ID.shape[3],
+ NX_T=1,
+ NY_T=1,
+ NZ_T=1),
+ options=config.sim_config.cuda['nvcc_opts'])
+ self.update_electric_gpu = kernels_fields.get_function("update_electric")
+ self.update_magnetic_gpu = kernels_fields.get_function("update_magnetic")
+ if (self.grid.updatecoeffsE.nbytes + self.grid.updatecoeffsH.nbytes > config.model_configs[self.grid.model_num].cuda['gpu'].constmem):
+ raise GeneralError(log.exception(f"Too many materials in the model to fit onto constant memory of size {human_size(config.model_configs[self.grid.model_num].cuda['gpu'].constmem)} on {config.model_configs[self.grid.model_num].cuda['gpu'].deviceID} - {config.model_configs[self.grid.model_num].cuda['gpu'].name} GPU"))
+ self.copy_mat_coeffs(kernels_fields, kernels_fields)
# Electric and magnetic field updates - dispersive materials - get kernel functions and initialise array on GPU
if config.model_configs[self.grid.model_num].materials['maxpoles'] > 0: # If there are any dispersive materials (updates are split into two parts as they require present and updated electric field values).
self.dispersive_update_a = kernels_fields.get_function("update_electric_dispersive_A")
self.dispersive_update_b = kernels_fields.get_function("update_electric_dispersive_B")
- self.grid.gpu_initialise_dispersive_arrays()
+ self.grid.initialise_dispersive_arrays()
# Electric and magnetic field updates - set blocks per grid and initialise field arrays on GPU
- self.grid.gpu_set_blocks_per_grid()
- self.grid.gpu_initialise_arrays()
+ self.grid.set_blocks_per_grid()
+ self.grid.initialise_arrays()
def set_pml_kernels(self):
"""PMLS - prepare kernels and get kernel functions."""
@@ -336,81 +333,81 @@ class CUDAUpdates:
kernelmagneticfunc = getattr(import_module(pmlmodulemagnetic),
'kernels_template_pml_magnetic_' +
self.grid.pmlformulation)
- kernels_pml_electric = SourceModule(kernelelectricfunc.substitute(
- REAL=cudafloattype,
- N_updatecoeffsE=self.grid.updatecoeffsE.size,
- NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1],
- NX_FIELDS=self.grid.nx + 1,
- NY_FIELDS=self.grid.ny + 1,
- NZ_FIELDS=self.grid.nz + 1,
- NX_ID=self.grid.ID.shape[1],
- NY_ID=self.grid.ID.shape[2],
- NZ_ID=self.grid.ID.shape[3]),
- options=self.compiler_opts)
- kernels_pml_magnetic = SourceModule(kernelmagneticfunc.substitute(
- REAL=cudafloattype,
- N_updatecoeffsH=self.grid.updatecoeffsH.size,
- NY_MATCOEFFS=self.grid.updatecoeffsH.shape[1],
- NX_FIELDS=self.grid.nx + 1,
- NY_FIELDS=self.grid.ny + 1,
- NZ_FIELDS=self.grid.nz + 1,
- NX_ID=self.gridG.ID.shape[1],
- NY_ID=self.grid.ID.shape[2],
- NZ_ID=self.grid.ID.shape[3]),
- options=self.compiler_opts)
- self.copy_mat_coeffs()
+ kernels_pml_electric = self.source_module(kernelelectricfunc.substitute(
+ REAL=config.sim_config.dtypes['C_float_or_double'],
+ N_updatecoeffsE=self.grid.updatecoeffsE.size,
+ NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1],
+ NX_FIELDS=self.grid.nx + 1,
+ NY_FIELDS=self.grid.ny + 1,
+ NZ_FIELDS=self.grid.nz + 1,
+ NX_ID=self.grid.ID.shape[1],
+ NY_ID=self.grid.ID.shape[2],
+ NZ_ID=self.grid.ID.shape[3]),
+ options=config.sim_config.cuda['nvcc_opts'])
+ kernels_pml_magnetic = self.source_module(kernelmagneticfunc.substitute(
+ REAL=config.sim_config.dtypes['C_float_or_double'],
+ N_updatecoeffsH=self.grid.updatecoeffsH.size,
+ NY_MATCOEFFS=self.grid.updatecoeffsH.shape[1],
+ NX_FIELDS=self.grid.nx + 1,
+ NY_FIELDS=self.grid.ny + 1,
+ NZ_FIELDS=self.grid.nz + 1,
+ NX_ID=self.grid.ID.shape[1],
+ NY_ID=self.grid.ID.shape[2],
+ NZ_ID=self.grid.ID.shape[3]),
+ options=config.sim_config.cuda['nvcc_opts'])
+ self.copy_mat_coeffs(kernels_pml_electric, kernels_pml_magnetic)
# Set block per grid, initialise arrays on GPU, and get kernel functions
for pml in self.grid.pmls:
- pml.gpu_initialise_arrays()
- pml.gpu_get_update_funcs(kernels_pml_electric, kernels_pml_magnetic)
- pml.gpu_set_blocks_per_grid(self.grid)
+ pml.initialise_field_arrays_gpu()
+ pml.get_update_funcs(kernels_pml_electric, kernels_pml_magnetic)
+ pml.set_blocks_per_grid(self.grid)
def set_rx_kernel(self):
"""Receivers - initialise arrays on GPU, prepare kernel and get kernel
function.
"""
if self.grid.rxs:
- rxcoords_gpu, rxs_gpu = gpu_initialise_rx_arrays(self.grid)
- kernel_store_outputs = SourceModule(kernel_template_store_outputs.substitute(
- REAL=cudafloattype,
- NY_RXCOORDS=3,
- NX_RXS=6,
- NY_RXS=self.grid.iterations,
- NZ_RXS=len(self.grid.rxs),
- NX_FIELDS=self.grid.nx + 1,
- NY_FIELDS=self.grid.ny + 1,
- NZ_FIELDS=self.grid.nz + 1),
- options=self.compiler_opts)
- self.store_outputs = kernel_store_outputs.get_function("store_outputs")
+ self.rxcoords_gpu, self.rxs_gpu = initialise_rx_arrays_gpu(self.grid)
+ kernel_store_outputs = self.source_module(kernel_template_store_outputs.substitute(
+ REAL=config.sim_config.dtypes['C_float_or_double'],
+ NY_RXCOORDS=3,
+ NX_RXS=6,
+ NY_RXS=self.grid.iterations,
+ NZ_RXS=len(self.grid.rxs),
+ NX_FIELDS=self.grid.nx + 1,
+ NY_FIELDS=self.grid.ny + 1,
+ NZ_FIELDS=self.grid.nz + 1),
+ options=config.sim_config.cuda['nvcc_opts'])
+ self.store_outputs_gpu = kernel_store_outputs.get_function("store_outputs")
def set_src_kernels(self):
"""Sources - initialise arrays on GPU, prepare kernel and get kernel
function.
"""
if self.grid.voltagesources + self.grid.hertziandipoles + self.grid.magneticdipoles:
- kernels_sources = SourceModule(kernels_template_sources.substitute(
- REAL=cudafloattype,
- N_updatecoeffsE=self.grid.updatecoeffsE.size,
- N_updatecoeffsH=self.grid.updatecoeffsH.size,
- NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1],
- NY_SRCINFO=4,
- NY_SRCWAVES=self.grid.iterations,
- NX_FIELDS=self.grid.nx + 1,
- NY_FIELDS=self.grid.ny + 1,
- NZ_FIELDS=self.grid.nz + 1,
- NX_ID=self.grid.ID.shape[1],
- NY_ID=self.grid.ID.shape[2],
- NZ_ID=self.grid.ID.shape[3]),
- options=self.compiler_opts)
- self.copy_mat_coeffs()
+ kernels_sources = self.source_module(kernel_template_sources.substitute(
+ REAL=config.sim_config.dtypes['C_float_or_double'],
+ N_updatecoeffsE=self.grid.updatecoeffsE.size,
+ N_updatecoeffsH=self.grid.updatecoeffsH.size,
+ NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1],
+ NY_SRCINFO=4,
+ NY_SRCWAVES=self.grid.iterations,
+ NX_FIELDS=self.grid.nx + 1,
+ NY_FIELDS=self.grid.ny + 1,
+ NZ_FIELDS=self.grid.nz + 1,
+ NX_ID=self.grid.ID.shape[1],
+ NY_ID=self.grid.ID.shape[2],
+ NZ_ID=self.grid.ID.shape[3]),
+ options=config.sim_config.cuda['nvcc_opts'])
+ self.copy_mat_coeffs(kernels_sources, kernels_sources)
if self.grid.hertziandipoles:
- self.srcinfo1_hertzian_gpu, self.srcinfo2_hertzian_gpu, self.srcwaves_hertzian_gpu = gpu_initialise_src_arrays(self.grid.hertziandipoles, self.grid)
+ self.srcinfo1_hertzian_gpu, self.srcinfo2_hertzian_gpu, self.srcwaves_hertzian_gpu = initialise_src_arrays_gpu(self.grid.hertziandipoles, self.grid)
self.update_hertzian_dipole_gpu = kernels_sources.get_function("update_hertzian_dipole")
if self.grid.magneticdipoles:
- self.srcinfo1_magnetic_gpu, self.srcinfo2_magnetic_gpu, self.srcwaves_magnetic_gpu = gpu_initialise_src_arrays(self.grid.magneticdipoles, self.grid)
+ self.srcinfo1_magnetic_gpu, self.srcinfo2_magnetic_gpu, self.srcwaves_magnetic_gpu = initialise_src_arrays_gpu(self.grid.magneticdipoles, self.grid)
self.update_magnetic_dipole_gpu = kernels_sources.get_function("update_magnetic_dipole")
if self.grid.voltagesources:
- self.srcinfo1_voltage_gpu, self.srcinfo2_voltage_gpu, self.srcwaves_voltage_gpu = gpu_initialise_src_arrays(self.grid.voltagesources, self.grid)
+ self.srcinfo1_voltage_gpu, self.srcinfo2_voltage_gpu, self.srcwaves_voltage_gpu = initialise_src_arrays_gpu(self.grid.voltagesources, self.grid)
self.update_voltage_source_gpu = kernels_sources.get_function("update_voltage_source")
def set_snapshot_kernel(self):
@@ -418,42 +415,46 @@ class CUDAUpdates:
function.
"""
if self.grid.snapshots:
- self.snapEx_gpu, self.snapEy_gpu, self.snapEz_gpu, self.snapHx_gpu, self.snapHy_gpu, self.snapHz_gpu = gpu_initialise_snapshot_array(self.grid)
- kernel_store_snapshot = SourceModule(kernel_template_store_snapshot.substitute(
- REAL=cudafloattype,
- NX_SNAPS=Snapshot.nx_max,
- NY_SNAPS=Snapshot.ny_max,
- NZ_SNAPS=Snapshot.nz_max,
- NX_FIELDS=self.grid.nx + 1,
- NY_FIELDS=self.grid.ny + 1,
- NZ_FIELDS=self.grid.nz + 1),
- options=self.compiler_opts)
+ self.snapEx_gpu, self.snapEy_gpu, self.snapEz_gpu, self.snapHx_gpu, self.snapHy_gpu, self.snapHz_gpu = initialise_snapshot_array_gpu(self.grid)
+ kernel_store_snapshot = self.source_module(kernel_template_store_snapshot.substitute(
+ REAL=config.sim_config.dtypes['C_float_or_double'],
+ NX_SNAPS=Snapshot.nx_max,
+ NY_SNAPS=Snapshot.ny_max,
+ NZ_SNAPS=Snapshot.nz_max,
+ NX_FIELDS=self.grid.nx + 1,
+ NY_FIELDS=self.grid.ny + 1,
+ NZ_FIELDS=self.grid.nz + 1),
+ options=config.sim_config.cuda['nvcc_opts'])
self.store_snapshot_gpu = kernel_store_snapshot.get_function("store_snapshot")
- def copy_mat_coeffs(self):
+ def copy_mat_coeffs(self, kernelE, kernelH):
"""Copy material coefficient arrays to constant memory of GPU
(must be <64KB).
+
+ Args:
+ kernelE (kernel): electric field kernel.
+ kernelH (kernel): magnetic field kernel.
"""
- updatecoeffsE = kernels_sources.get_global('updatecoeffsE')[0]
- updatecoeffsH = kernels_sources.get_global('updatecoeffsH')[0]
+ updatecoeffsE = kernelE.get_global('updatecoeffsE')[0]
+ updatecoeffsH = kernelH.get_global('updatecoeffsH')[0]
self.drv.memcpy_htod(updatecoeffsE, self.grid.updatecoeffsE)
self.drv.memcpy_htod(updatecoeffsH, self.grid.updatecoeffsH)
def store_outputs(self):
- """Store field component values for every receiver and transmission line."""
+ """Store field component values for every receiver."""
if self.grid.rxs:
- self.store_outputs(np.int32(len(self.grid.rxs)),
- np.int32(self.grid.iteration),
- self.rxcoords_gpu.gpudata,
- self.rxs_gpu.gpudata,
- self.grid.Ex_gpu.gpudata,
- self.grid.Ey_gpu.gpudata,
- self.grid.Ez_gpu.gpudata,
- self.grid.Hx_gpu.gpudata,
- self.grid.Hy_gpu.gpudata,
- self.grid.Hz_gpu.gpudata,
- block=(1, 1, 1),
- grid=(round32(len(self.grid.rxs)), 1, 1))
+ self.store_outputs_gpu(np.int32(len(self.grid.rxs)),
+ np.int32(self.grid.iteration),
+ self.rxcoords_gpu.gpudata,
+ self.rxs_gpu.gpudata,
+ self.grid.Ex_gpu.gpudata,
+ self.grid.Ey_gpu.gpudata,
+ self.grid.Ez_gpu.gpudata,
+ self.grid.Hx_gpu.gpudata,
+ self.grid.Hy_gpu.gpudata,
+ self.grid.Hz_gpu.gpudata,
+ block=(1, 1, 1),
+ grid=(round32(len(self.grid.rxs)), 1, 1))
def store_snapshots(self, iteration):
"""Store any snapshots.
@@ -501,32 +502,32 @@ class CUDAUpdates:
def update_magnetic(self):
"""Update magnetic field components."""
- self.update_magnetic(np.int32(self.grid.nx),
- np.int32(self.grid.ny),
- np.int32(self.grid.nz),
- self.grid.ID_gpu,
- self.grid.Hx_gpu,
- self.grid.Hy_gpu,
- self.grid.Hz_gpu,
- self.grid.Ex_gpu,
- self.grid.Ey_gpu,
- self.grid.Ez_gpu,
- block=self.grid.tpb,
- grid=self.grid.bpg)
+ self.update_magnetic_gpu(np.int32(self.grid.nx),
+ np.int32(self.grid.ny),
+ np.int32(self.grid.nz),
+ self.grid.ID_gpu,
+ self.grid.Hx_gpu,
+ self.grid.Hy_gpu,
+ self.grid.Hz_gpu,
+ self.grid.Ex_gpu,
+ self.grid.Ey_gpu,
+ self.grid.Ez_gpu,
+ block=self.grid.tpb,
+ grid=self.grid.bpg)
def update_magnetic_pml(self):
"""Update magnetic field components with the PML correction."""
for pml in self.grid.pmls:
- pml.gpu_update_magnetic(self.grid)
+ pml.update_magnetic(self.grid)
def update_magnetic_sources(self):
"""Update magnetic field components from sources."""
if self.grid.magneticdipoles:
self.update_magnetic_dipole_gpu(np.int32(len(self.grid.magneticdipoles)),
np.int32(self.grid.iteration),
- config.dtypes['float_or_double'](self.grid.dx),
- config.dtypes['float_or_double'](self.grid.dy),
- config.dtypes['float_or_double'](self.grid.dz),
+ config.sim_config.dtypes['float_or_double'](self.grid.dx),
+ config.sim_config.dtypes['float_or_double'](self.grid.dy),
+ config.sim_config.dtypes['float_or_double'](self.grid.dz),
self.srcinfo1_magnetic_gpu.gpudata,
self.srcinfo2_magnetic_gpu.gpudata,
self.srcwaves_magnetic_gpu.gpudata,
@@ -541,18 +542,18 @@ class CUDAUpdates:
"""Update electric field components."""
# All materials are non-dispersive so do standard update.
if config.model_configs[self.grid.model_num].materials['maxpoles'] == 0:
- self.update_electric(np.int32(self.grid.nx),
- np.int32(self.grid.ny),
- np.int32(self.grid.nz),
- self.grid.ID_gpu,
- self.grid.Ex_gpu,
- self.grid.Ey_gpu,
- self.grid.Ez_gpu,
- self.grid.Hx_gpu,
- self.grid.Hy_gpu,
- self.grid.Hz_gpu,
- block=self.grid.tpb,
- grid=self.grid.bpg)
+ self.update_electric_gpu(np.int32(self.grid.nx),
+ np.int32(self.grid.ny),
+ np.int32(self.grid.nz),
+ self.grid.ID_gpu,
+ self.grid.Ex_gpu,
+ self.grid.Ey_gpu,
+ self.grid.Ez_gpu,
+ self.grid.Hx_gpu,
+ self.grid.Hy_gpu,
+ self.grid.Hz_gpu,
+ block=self.grid.tpb,
+ grid=self.grid.bpg)
# If there are any dispersive materials do 1st part of dispersive update
# (it is split into two parts as it requires present and updated electric field values).
@@ -578,7 +579,7 @@ class CUDAUpdates:
def update_electric_pml(self):
"""Update electric field components with the PML correction."""
for pml in self.grid.pmls:
- pml.gpu_update_electric(self.grid)
+ pml.update_electric(self.grid)
def update_electric_sources(self):
"""Update electric field components from sources -
@@ -587,9 +588,9 @@ class CUDAUpdates:
if self.grid.voltagesources:
self.update_voltage_source_gpu(np.int32(len(self.grid.voltagesources)),
np.int32(self.grid.iteration),
- config.dtypes['float_or_double'](self.grid.dx),
- config.dtypes['float_or_double'](self.grid.dy),
- config.dtypes['float_or_double'](self.grid.dz),
+ config.sim_config.dtypes['float_or_double'](self.grid.dx),
+ config.sim_config.dtypes['float_or_double'](self.grid.dy),
+ config.sim_config.dtypes['float_or_double'](self.grid.dz),
self.srcinfo1_voltage_gpu.gpudata,
self.srcinfo2_voltage_gpu.gpudata,
self.srcwaves_voltage_gpu.gpudata,
@@ -603,9 +604,9 @@ class CUDAUpdates:
if self.grid.hertziandipoles:
self.update_hertzian_dipole_gpu(np.int32(len(self.grid.hertziandipoles)),
np.int32(self.grid.iteration),
- config.dtypes['float_or_double'](self.grid.dx),
- config.dtypes['float_or_double'](self.grid.dy),
- config.dtypes['float_or_double'](self.grid.dz),
+ config.sim_config.dtypes['float_or_double'](self.grid.dx),
+ config.sim_config.dtypes['float_or_double'](self.grid.dy),
+ config.sim_config.dtypes['float_or_double'](self.grid.dz),
self.srcinfo1_hertzian_gpu.gpudata,
self.srcinfo2_hertzian_gpu.gpudata,
self.srcwaves_hertzian_gpu.gpudata,
@@ -660,21 +661,20 @@ class CUDAUpdates:
"""Copy data from GPU back to CPU to save to file(s)."""
# Copy output from receivers array back to correct receiver objects
if self.grid.rxs:
- gpu_get_rx_array(self.rxs_gpu.get(),
+ get_rx_array_gpu(self.rxs_gpu.get(),
self.rxcoords_gpu.get(),
self.grid)
# Copy data from any snapshots back to correct snapshot objects
if self.grid.snapshots and not self.grid.snapsgpu2cpu:
for i, snap in enumerate(self.grid.snapshots):
- gpu_get_snapshot_array(self.snapEx_gpu.get(),
+ get_snapshot_array_gpu(self.snapEx_gpu.get(),
self.snapEy_gpu.get(),
self.snapEz_gpu.get(),
self.snapHx_gpu.get(),
self.snapHy_gpu.get(),
self.snapHz_gpu.get(),
- i,
- snap)
+ i, snap)
def cleanup(self):
"""Cleanup GPU context."""
diff --git a/gprMax/utilities.py b/gprMax/utilities.py
index a5973d48..dbf043e6 100644
--- a/gprMax/utilities.py
+++ b/gprMax/utilities.py
@@ -421,10 +421,6 @@ class GPU:
self.pcibusID = None
self.constmem = None
self.totalmem = None
- # Threads per block for main field updates
- self.tpb = (256, 1, 1)
- # Blocks per grid for main field updates (set in grid.py)
- self.bpg = None
def get_gpu_info(self, drv):
"""Set information about GPU.