diff --git a/gprMax/cmds_multiple.py b/gprMax/cmds_multiple.py index 819d5fc1..9fc47bdc 100644 --- a/gprMax/cmds_multiple.py +++ b/gprMax/cmds_multiple.py @@ -510,14 +510,12 @@ class Rx(UserObjectMulti): r.ID = self.kwargs['id'] outputs = self.kwargs['outputs'] # Get allowable outputs - if grid.gpu is not None: - allowableoutputs = RxUser.gpu_allowableoutputs - else: - allowableoutputs = RxUser.allowableoutputs + allowableoutputs = RxUser.allowableoutputs_gpu if config.sim_config.general['cuda'] else RxUser.allowableoutputs # Check and add field output names + outputs.sort() for field in outputs: if field in allowableoutputs: - r.outputs[field] = np.zeros(grid.iterations, dtype=config.dtypes['float_or_double']) + r.outputs[field] = np.zeros(grid.iterations, dtype=config.sim_config.dtypes['float_or_double']) else: raise CmdInputError(f"'{self.params_str()}' contains an output type that is not allowable. Allowable outputs in current context are {allowableoutputs}") # If no ID or outputs are specified, use default diff --git a/gprMax/config.py b/gprMax/config.py index bb584975..b89eb268 100644 --- a/gprMax/config.py +++ b/gprMax/config.py @@ -18,6 +18,7 @@ import logging from pathlib import Path +import sys from colorama import init from colorama import Fore @@ -64,7 +65,8 @@ class ModelConfig: # N.B. This will happen if the requested snapshots are too large to fit # on the memory of the GPU. If True this will slow performance significantly if sim_config.general['cuda']: - self.cuda = {'gpu': sim_config.cuda['gpus'], 'snapsgpu2cpu': False} + self.cuda = {'gpu': sim_config.cuda['gpus'], + 'snapsgpu2cpu': False} # Total memory usage for all grids in the model. Starts with 50MB overhead. self.mem_use = 50e6 @@ -175,10 +177,12 @@ class SimulationConfig: self.general['cuda'] = True self.general['cpu'] = False self.general['opencl'] = False - # gpus: list of GPU objects - # gpus_str: list of strings describing GPU(s) - self.cuda = {'gpus': [], - 'gpus_str': []} + self.general['precision'] = 'single' + self.cuda = {'gpus': [], # gpus: list of GPU objects + 'gpus_str': [], # gpus_str: list of strings describing GPU(s) + 'nvcc_opts': None} # nvcc_opts: nvcc compiler options + # Suppress nvcc warnings on Microsoft Windows + if sys.platform == 'win32': self.cuda['nvcc_opts'] = '-w' self.get_gpus() self.set_gpus() diff --git a/gprMax/cuda/fields_updates.py b/gprMax/cuda/fields_updates.py index 26cf94b6..6000c016 100644 --- a/gprMax/cuda/fields_updates.py +++ b/gprMax/cuda/fields_updates.py @@ -128,7 +128,7 @@ __global__ void update_magnetic(int NX, int NY, int NZ, const unsigned int* __re // Electric field updates - dispersive materials // /////////////////////////////////////////////////// -__global__ void update_electric_dispersive_A(int NX, int NY, int NZ, int MAXPOLES, const $REAL_OR_COMPLEX* __restrict__ updatecoeffsdispersive, $REAL_OR_COMPLEX *Tx, $REAL_OR_COMPLEX *Ty, $REAL_OR_COMPLEX *Tz, const unsigned int* __restrict__ ID, $REAL *Ex, $REAL *Ey, $REAL *Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz) { +__global__ void update_electric_dispersive_A(int NX, int NY, int NZ, int MAXPOLES, const $COMPLEX* __restrict__ updatecoeffsdispersive, $COMPLEX *Tx, $COMPLEX *Ty, $COMPLEX *Tz, const unsigned int* __restrict__ ID, $REAL *Ex, $REAL *Ey, $REAL *Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz) { // This function is part A of updates to electric field values when dispersive materials (with multiple poles) are present. // @@ -189,7 +189,7 @@ __global__ void update_electric_dispersive_A(int NX, int NY, int NZ, int MAXPOLE } } -__global__ void update_electric_dispersive_B(int NX, int NY, int NZ, int MAXPOLES, const $REAL_OR_COMPLEX* __restrict__ updatecoeffsdispersive, $REAL_OR_COMPLEX *Tx, $REAL_OR_COMPLEX *Ty, $REAL_OR_COMPLEX *Tz, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez) { +__global__ void update_electric_dispersive_B(int NX, int NY, int NZ, int MAXPOLES, const $COMPLEX* __restrict__ updatecoeffsdispersive, $COMPLEX *Tx, $COMPLEX *Ty, $COMPLEX *Tz, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez) { // This function is part B which updates the dispersive field arrays when dispersive materials (with multiple poles) are present. // diff --git a/gprMax/grid.py b/gprMax/grid.py index 5c529d5c..d8cc4b2d 100644 --- a/gprMax/grid.py +++ b/gprMax/grid.py @@ -279,15 +279,22 @@ class FDTDGrid: class CUDAGrid(FDTDGrid): """Additional grid methods for solving on GPU using CUDA.""" + def __init__(self, model_num): + super().__init__(model_num) + + # Threads per block - used for main electric/magnetic field updates + self.tpb = (256, 1, 1) + # Blocks per grid - used for main electric/magnetic field updates + self.bpg = None + + def set_blocks_per_grid(self): """Set the blocks per grid size used for updating the electric and magnetic field arrays on a GPU. """ - config.cuda['gpus'].bpg = (int(np.ceil(((self.nx + 1) * - (self.ny + 1) * - (self.nz + 1)) / - config.cuda['gpus'].tpb[0])), 1, 1) + self.bpg = (int(np.ceil(((self.nx + 1) * (self.ny + 1) * + (self.nz + 1)) / self.tpb[0])), 1, 1) def initialise_arrays(self): """Initialise geometry and field arrays on GPU.""" diff --git a/gprMax/model_build_run.py b/gprMax/model_build_run.py index 8e950975..5bd08364 100644 --- a/gprMax/model_build_run.py +++ b/gprMax/model_build_run.py @@ -58,15 +58,15 @@ from .pml import CFS from .pml import PML from .pml import build_pml from .pml import pml_information -from .receivers import gpu_initialise_rx_arrays -from .receivers import gpu_get_rx_array +from .receivers import initialise_rx_arrays_gpu +from .receivers import get_rx_array_gpu from .receivers import Rx from .scene import Scene from .snapshots import Snapshot -from .snapshots import gpu_initialise_snapshot_array -from .snapshots import gpu_get_snapshot_array +from .snapshots import initialise_snapshot_array_gpu +from .snapshots import get_snapshot_array_gpu from .solvers import create_solver -from .sources import gpu_initialise_src_arrays +from .sources import initialise_src_arrays_gpu from .utilities import get_terminal_width from .utilities import human_size from .utilities import mem_check @@ -286,7 +286,8 @@ class ModelBuildRun: mem_GPU = '' if config.sim_config.general['cuda']: - mem_GPU = f' host + ~{human_size(self.solver.get_memsolve())} GPU' + log.debug('Fix memory used calc for GPU') + # mem_GPU = f' host + ~{human_size(self.solver.get_memsolve())} GPU' log.info(f'\nMemory (RAM) used: ~{human_size(self.p.memory_full_info().uss)}{mem_GPU}') log.info(f'Solving time [HH:MM:SS]: {datetime.timedelta(seconds=tsolve)}') diff --git a/gprMax/pml.py b/gprMax/pml.py index 38ee74f9..f5ad1bf1 100644 --- a/gprMax/pml.py +++ b/gprMax/pml.py @@ -207,8 +207,7 @@ class PML: self.CFS = G.cfs - if not config.sim_config.general['cuda']: - self.initialise_field_arrays() + self.initialise_field_arrays() def initialise_field_arrays(self): """Initialise arrays to store fields in PML.""" @@ -340,7 +339,7 @@ class CUDAPML(PML): solving on GPU using CUDA. """ - def initialise_arrays(self): + def initialise_field_arrays_gpu(self): """Initialise PML field and coefficient arrays on GPU.""" import pycuda.gpuarray as gpuarray @@ -355,20 +354,44 @@ class CUDAPML(PML): self.HRF_gpu = gpuarray.to_gpu(self.HRF) if self.direction[0] == 'x': - self.EPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx + 1, self.ny, self.nz + 1), dtype=floattype)) - self.EPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx + 1, self.ny + 1, self.nz), dtype=floattype)) - self.HPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx, self.ny + 1, self.nz), dtype=floattype)) - self.HPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx, self.ny, self.nz + 1), dtype=floattype)) + self.EPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), + self.nx + 1, self.ny, self.nz + 1), + dtype=config.sim_config.dtypes['float_or_double'])) + self.EPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), + self.nx + 1, self.ny + 1, self.nz), + dtype=config.sim_config.dtypes['float_or_double'])) + self.HPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), + self.nx, self.ny + 1, self.nz), + dtype=config.sim_config.dtypes['float_or_double'])) + self.HPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), + self.nx, self.ny, self.nz + 1), + dtype=config.sim_config.dtypes['float_or_double'])) elif self.direction[0] == 'y': - self.EPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx, self.ny + 1, self.nz + 1), dtype=floattype)) - self.EPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx + 1, self.ny + 1, self.nz), dtype=floattype)) - self.HPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx + 1, self.ny, self.nz), dtype=floattype)) - self.HPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx, self.ny, self.nz + 1), dtype=floattype)) + self.EPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), + self.nx, self.ny + 1, self.nz + 1), + dtype=config.sim_config.dtypes['float_or_double'])) + self.EPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), + self.nx + 1, self.ny + 1, self.nz), + dtype=config.sim_config.dtypes['float_or_double'])) + self.HPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), + self.nx + 1, self.ny, self.nz), + dtype=config.sim_config.dtypes['float_or_double'])) + self.HPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), + self.nx, self.ny, self.nz + 1), + dtype=config.sim_config.dtypes['float_or_double'])) elif self.direction[0] == 'z': - self.EPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx, self.ny + 1, self.nz + 1), dtype=floattype)) - self.EPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx + 1, self.ny, self.nz + 1), dtype=floattype)) - self.HPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx + 1, self.ny, self.nz), dtype=floattype)) - self.HPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx, self.ny + 1, self.nz), dtype=floattype)) + self.EPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), + self.nx, self.ny + 1, self.nz + 1), + dtype=config.sim_config.dtypes['float_or_double'])) + self.EPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), + self.nx + 1, self.ny, self.nz + 1), + dtype=config.sim_config.dtypes['float_or_double'])) + self.HPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), + self.nx + 1, self.ny, self.nz), + dtype=config.sim_config.dtypes['float_or_double'])) + self.HPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), + self.nx, self.ny + 1, self.nz), + dtype=config.sim_config.dtypes['float_or_double'])) def set_blocks_per_grid(self, G): """Set the blocks per grid size used for updating the PML field arrays on a GPU. @@ -377,7 +400,9 @@ class CUDAPML(PML): G (FDTDGrid): Holds essential parameters describing the model. """ - self.bpg = (int(np.ceil(((self.EPhi1_gpu.shape[1] + 1) * (self.EPhi1_gpu.shape[2] + 1) * (self.EPhi1_gpu.shape[3] + 1)) / G.tpb[0])), 1, 1) + self.bpg = (int(np.ceil(((self.EPhi1_gpu.shape[1] + 1) * + (self.EPhi1_gpu.shape[2] + 1) * + (self.EPhi1_gpu.shape[3] + 1)) / G.tpb[0])), 1, 1) def get_update_funcs(self, kernelselectric, kernelsmagnetic): """Get update functions from PML kernels. @@ -400,7 +425,24 @@ class CUDAPML(PML): G (FDTDGrid): Holds essential parameters describing the model. """ - self.update_electric_gpu(np.int32(self.xs), np.int32(self.xf), np.int32(self.ys), np.int32(self.yf), np.int32(self.zs), np.int32(self.zf), np.int32(self.EPhi1_gpu.shape[1]), np.int32(self.EPhi1_gpu.shape[2]), np.int32(self.EPhi1_gpu.shape[3]), np.int32(self.EPhi2_gpu.shape[1]), np.int32(self.EPhi2_gpu.shape[2]), np.int32(self.EPhi2_gpu.shape[3]), np.int32(self.thickness), G.ID_gpu.gpudata, G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata, G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata, self.EPhi1_gpu.gpudata, self.EPhi2_gpu.gpudata, self.ERA_gpu.gpudata, self.ERB_gpu.gpudata, self.ERE_gpu.gpudata, self.ERF_gpu.gpudata, floattype(self.d), block=G.tpb, grid=self.bpg) + self.update_electric_gpu(np.int32(self.xs), np.int32(self.xf), + np.int32(self.ys), np.int32(self.yf), + np.int32(self.zs), np.int32(self.zf), + np.int32(self.EPhi1_gpu.shape[1]), + np.int32(self.EPhi1_gpu.shape[2]), + np.int32(self.EPhi1_gpu.shape[3]), + np.int32(self.EPhi2_gpu.shape[1]), + np.int32(self.EPhi2_gpu.shape[2]), + np.int32(self.EPhi2_gpu.shape[3]), + np.int32(self.thickness), + G.ID_gpu.gpudata, + G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata, + G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata, + self.EPhi1_gpu.gpudata, self.EPhi2_gpu.gpudata, + self.ERA_gpu.gpudata, self.ERB_gpu.gpudata, + self.ERE_gpu.gpudata, self.ERF_gpu.gpudata, + config.sim_config.dtypes['float_or_double'](self.d), + block=G.tpb, grid=self.bpg) def update_magnetic(self, G): """This functions updates magnetic field components with the PML @@ -409,7 +451,24 @@ class CUDAPML(PML): Args: G (FDTDGrid): Holds essential parameters describing the model. """ - self.update_magnetic_gpu(np.int32(self.xs), np.int32(self.xf), np.int32(self.ys), np.int32(self.yf), np.int32(self.zs), np.int32(self.zf), np.int32(self.HPhi1_gpu.shape[1]), np.int32(self.HPhi1_gpu.shape[2]), np.int32(self.HPhi1_gpu.shape[3]), np.int32(self.HPhi2_gpu.shape[1]), np.int32(self.HPhi2_gpu.shape[2]), np.int32(self.HPhi2_gpu.shape[3]), np.int32(self.thickness), G.ID_gpu.gpudata, G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata, G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata, self.HPhi1_gpu.gpudata, self.HPhi2_gpu.gpudata, self.HRA_gpu.gpudata, self.HRB_gpu.gpudata, self.HRE_gpu.gpudata, self.HRF_gpu.gpudata, floattype(self.d), block=G.tpb, grid=self.bpg) + self.update_magnetic_gpu(np.int32(self.xs), np.int32(self.xf), + np.int32(self.ys), np.int32(self.yf), + np.int32(self.zs), np.int32(self.zf), + np.int32(self.HPhi1_gpu.shape[1]), + np.int32(self.HPhi1_gpu.shape[2]), + np.int32(self.HPhi1_gpu.shape[3]), + np.int32(self.HPhi2_gpu.shape[1]), + np.int32(self.HPhi2_gpu.shape[2]), + np.int32(self.HPhi2_gpu.shape[3]), + np.int32(self.thickness), + G.ID_gpu.gpudata, + G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata, + G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata, + self.HPhi1_gpu.gpudata, self.HPhi2_gpu.gpudata, + self.HRA_gpu.gpudata, self.HRB_gpu.gpudata, + self.HRE_gpu.gpudata, self.HRF_gpu.gpudata, + config.sim_config.dtypes['float_or_double'](self.d), + block=G.tpb, grid=self.bpg) def pml_information(G): """Information about PMLs. @@ -443,14 +502,16 @@ def build_pml(G, key, value): value (int): Thickness of PML slab in cells. """ + pml_type = CUDAPML if config.sim_config.general['cuda'] else PML + sumer = 0 # Sum of relative permittivities in PML slab summr = 0 # Sum of relative permeabilities in PML slab if key[0] == 'x': if key == 'x0': - pml = PML(G, ID=key, direction='xminus', xf=value, yf=G.ny, zf=G.nz) + pml = pml_type(G, ID=key, direction='xminus', xf=value, yf=G.ny, zf=G.nz) elif key == 'xmax': - pml = PML(G, ID=key, direction='xplus', xs=G.nx - value, xf=G.nx, yf=G.ny, zf=G.nz) + pml = pml_type(G, ID=key, direction='xplus', xs=G.nx - value, xf=G.nx, yf=G.ny, zf=G.nz) G.pmls.append(pml) for j in range(G.ny): for k in range(G.nz): @@ -463,9 +524,9 @@ def build_pml(G, key, value): elif key[0] == 'y': if key == 'y0': - pml = PML(G, ID=key, direction='yminus', yf=value, xf=G.nx, zf=G.nz) + pml = pml_type(G, ID=key, direction='yminus', yf=value, xf=G.nx, zf=G.nz) elif key == 'ymax': - pml = PML(G, ID=key, direction='yplus', ys=G.ny - value, xf=G.nx, yf=G.ny, zf=G.nz) + pml = pml_type(G, ID=key, direction='yplus', ys=G.ny - value, xf=G.nx, yf=G.ny, zf=G.nz) G.pmls.append(pml) for i in range(G.nx): for k in range(G.nz): @@ -478,9 +539,9 @@ def build_pml(G, key, value): elif key[0] == 'z': if key == 'z0': - pml = PML(G, ID=key, direction='zminus', zf=value, xf=G.nx, yf=G.ny) + pml = pml_type(G, ID=key, direction='zminus', zf=value, xf=G.nx, yf=G.ny) elif key == 'zmax': - pml = PML(G, ID=key, direction='zplus', zs=G.nz - value, xf=G.nx, yf=G.ny, zf=G.nz) + pml = pml_type(G, ID=key, direction='zplus', zs=G.nz - value, xf=G.nx, yf=G.ny, zf=G.nz) G.pmls.append(pml) for i in range(G.nx): for j in range(G.ny): diff --git a/gprMax/receivers.py b/gprMax/receivers.py index 4d65b95e..399a8959 100644 --- a/gprMax/receivers.py +++ b/gprMax/receivers.py @@ -16,8 +16,6 @@ # You should have received a copy of the GNU General Public License # along with gprMax. If not, see . -from collections import OrderedDict - import numpy as np import gprMax.config as config @@ -27,14 +25,15 @@ class Rx: """Receiver output points.""" allowableoutputs = ['Ex', 'Ey', 'Ez', 'Hx', 'Hy', 'Hz', 'Ix', 'Iy', 'Iz'] - gpu_allowableoutputs = allowableoutputs[:-3] defaultoutputs = allowableoutputs[:-3] - maxnumoutputs = 0 + + allowableoutputs_gpu = allowableoutputs[:-3] + maxnumoutputs_gpu = 0 def __init__(self): self.ID = None - self.outputs = OrderedDict() + self.outputs = {} self.xcoord = None self.ycoord = None self.zcoord = None @@ -43,12 +42,17 @@ class Rx: self.zcoordorigin = None -def gpu_initialise_rx_arrays(G): +def initialise_rx_arrays_gpu(G): """Initialise arrays on GPU for receiver coordinates and to store field components for receivers. Args: G (FDTDGrid): Holds essential parameters describing the model. + + Returns: + rxcoords_gpu (int): numpy array of receiver coordinates from GPU. + rxs_gpu (float): numpy array of receiver data from GPU - rows are field + components; columns are iterations; pages are receivers. """ import pycuda.gpuarray as gpuarray @@ -59,11 +63,14 @@ def gpu_initialise_rx_arrays(G): rxcoords[i, 0] = rx.xcoord rxcoords[i, 1] = rx.ycoord rxcoords[i, 2] = rx.zcoord + # Store maximum number of output components + if len(rx.outputs) > Rx.maxnumoutputs_gpu: + Rx.maxnumoutputs_gpu = len(rx.outputs) # Array to store field components for receivers on GPU - rows are field components; # columns are iterations; pages are receivers - rxs = np.zeros((Rx.maxnumoutputs, G.iterations, len(G.rxs)), - dtype=config.dtypes['float_or_double']) + rxs = np.zeros((len(Rx.allowableoutputs_gpu), G.iterations, len(G.rxs)), + dtype=config.sim_config.dtypes['float_or_double']) # Copy arrays to GPU rxcoords_gpu = gpuarray.to_gpu(rxcoords) @@ -72,20 +79,20 @@ def gpu_initialise_rx_arrays(G): return rxcoords_gpu, rxs_gpu -def gpu_get_rx_array(rxs_gpu, rxcoords_gpu, G): +def get_rx_array_gpu(rxs_gpu, rxcoords_gpu, G): """Copy output from receivers array used on GPU back to receiver objects. Args: rxs_gpu (float): numpy array of receiver data from GPU - rows are field components; columns are iterations; pages are receivers. - rxcoords_gpu (float): numpy array of receiver coordinates from GPU. + rxcoords_gpu (int): numpy array of receiver coordinates from GPU. G (FDTDGrid): Holds essential parameters describing the model. """ for rx in G.rxs: for rxgpu in range(len(G.rxs)): - if rx.xcoord == rxcoords_gpu[rxgpu, 0] and \ - rx.ycoord == rxcoords_gpu[rxgpu, 1] and \ - rx.zcoord == rxcoords_gpu[rxgpu, 2]: - for k in rx.outputs.items(): - rx.outputs[k] = rxs_gpu[Rx.gpu_allowableoutputs.index(k), :, rxgpu] + if (rx.xcoord == rxcoords_gpu[rxgpu, 0] and + rx.ycoord == rxcoords_gpu[rxgpu, 1] and + rx.zcoord == rxcoords_gpu[rxgpu, 2]): + for output in rx.outputs.keys(): + rx.outputs[output] = rxs_gpu[Rx.allowableoutputs_gpu.index(output), :, rxgpu] diff --git a/gprMax/snapshots.py b/gprMax/snapshots.py index 1b57fec6..b2a42d9a 100644 --- a/gprMax/snapshots.py +++ b/gprMax/snapshots.py @@ -184,7 +184,7 @@ class Snapshot: self.filehandle.close() -def gpu_initialise_snapshot_array(G): +def initialise_snapshot_array_gpu(G): """Initialise array on GPU for to store field data for snapshots. Args: @@ -236,7 +236,7 @@ def gpu_initialise_snapshot_array(G): return snapEx_gpu, snapEy_gpu, snapEz_gpu, snapHx_gpu, snapHy_gpu, snapHz_gpu -def gpu_get_snapshot_array(snapEx_gpu, snapEy_gpu, snapEz_gpu, snapHx_gpu, snapHy_gpu, snapHz_gpu, i, snap): +def get_snapshot_array_gpu(snapEx_gpu, snapEy_gpu, snapEz_gpu, snapHx_gpu, snapHy_gpu, snapHz_gpu, i, snap): """Copy snapshot array used on GPU back to snapshot objects and store in format for Paraview. Args: diff --git a/gprMax/solvers.py b/gprMax/solvers.py index cfb31bb6..78e1cbde 100644 --- a/gprMax/solvers.py +++ b/gprMax/solvers.py @@ -15,7 +15,7 @@ # # You should have received a copy of the GNU General Public License # along with gprMax. If not, see . - +import sys import gprMax.config as config from .grid import FDTDGrid from .grid import CUDAGrid diff --git a/gprMax/sources.py b/gprMax/sources.py index 22725b50..6f7be311 100644 --- a/gprMax/sources.py +++ b/gprMax/sources.py @@ -236,7 +236,7 @@ class MagneticDipole(Source): (1 / (G.dx * G.dy * G.dz))) -def gpu_initialise_src_arrays(sources, G): +def initialise_src_arrays_gpu(sources, G): """Initialise arrays on GPU for source coordinates/polarisation, other source information, and source waveform values. @@ -255,8 +255,8 @@ def gpu_initialise_src_arrays(sources, G): import pycuda.gpuarray as gpuarray srcinfo1 = np.zeros((len(sources), 4), dtype=np.int32) - srcinfo2 = np.zeros((len(sources)), dtype=config.dtypes['float_or_double']) - srcwaves = np.zeros((len(sources), G.iterations), dtype=config.dtypes['float_or_double']) + srcinfo2 = np.zeros((len(sources)), dtype=config.sim_config.dtypes['float_or_double']) + srcwaves = np.zeros((len(sources), G.iterations), dtype=config.sim_config.dtypes['float_or_double']) for i, src in enumerate(sources): srcinfo1[i, 0] = src.xcoord srcinfo1[i, 1] = src.ycoord diff --git a/gprMax/updates.py b/gprMax/updates.py index dc7ca6df..a3c2e529 100644 --- a/gprMax/updates.py +++ b/gprMax/updates.py @@ -18,21 +18,24 @@ from importlib import import_module import logging -import sys + +import numpy as np import gprMax.config as config from .cuda.fields_updates import kernel_template_fields from .cuda.snapshots import kernel_template_store_snapshot from .cuda.source_updates import kernel_template_sources -from .cython.fields_updates_normal import update_electric -from .cython.fields_updates_normal import update_magnetic -from .fields_outputs import store_outputs -from .receivers import gpu_initialise_rx_arrays -from .receivers import gpu_get_rx_array +from .cython.fields_updates_normal import update_electric as update_electric_cpu +from .cython.fields_updates_normal import update_magnetic as update_magnetic_cpu +from .fields_outputs import store_outputs as store_outputs_cpu +from .fields_outputs import kernel_template_store_outputs +from .receivers import initialise_rx_arrays_gpu +from .receivers import get_rx_array_gpu from .snapshots import Snapshot -from .snapshots import gpu_initialise_snapshot_array -from .snapshots import gpu_get_snapshot_array -from .sources import gpu_initialise_src_arrays +from .snapshots import initialise_snapshot_array_gpu +from .snapshots import get_snapshot_array_gpu +from .sources import initialise_src_arrays_gpu +from .utilities import round32 from .utilities import timer log = logging.getLogger(__name__) @@ -52,7 +55,7 @@ class CPUUpdates: def store_outputs(self): """Store field component values for every receiver and transmission line.""" - store_outputs(self.grid) + store_outputs_cpu(self.grid) def store_snapshots(self, iteration): """Store any snapshots. @@ -66,7 +69,7 @@ class CPUUpdates: def update_magnetic(self): """Update magnetic field components.""" - update_magnetic(self.grid.nx, + update_magnetic_cpu(self.grid.nx, self.grid.ny, self.grid.nz, config.sim_config.hostinfo['ompthreads'], @@ -99,18 +102,18 @@ class CPUUpdates: """Update electric field components.""" # All materials are non-dispersive so do standard update. if config.model_configs[self.grid.model_num].materials['maxpoles'] == 0: - update_electric(self.grid.nx, - self.grid.ny, - self.grid.nz, - config.sim_config.hostinfo['ompthreads'], - self.grid.updatecoeffsE, - self.grid.ID, - self.grid.Ex, - self.grid.Ey, - self.grid.Ez, - self.grid.Hx, - self.grid.Hy, - self.grid.Hz) + update_electric_cpu(self.grid.nx, + self.grid.ny, + self.grid.nz, + config.sim_config.hostinfo['ompthreads'], + self.grid.updatecoeffsE, + self.grid.ID, + self.grid.Ex, + self.grid.Ey, + self.grid.Ez, + self.grid.Hx, + self.grid.Hy, + self.grid.Hz) # If there are any dispersive materials do 1st part of dispersive update # (it is split into two parts as it requires present and updated electric field values). @@ -247,20 +250,14 @@ class CUDAUpdates: self.dispersive_update_a = None self.dispersive_update_b = None - import pycuda.driver as drv - from pycuda.compiler import SourceModule - drv.init() - - # Suppress nvcc warnings on Windows - log.debug('Move nvcc compiler options to simulation config') - if sys.platform == 'win32': - self.compiler_opts = ['-w'] - else: - self.compiler_opts = None + # Import PyCUDA modules + self.drv = import_module('pycuda.driver') + self.source_module = getattr(import_module('pycuda.compiler'), 'SourceModule') + self.drv.init() # Create device handle and context on specifc GPU device (and make it current context) - self.dev = drv.Device(self.grid.gpu.deviceID) - self.ctx = dev.make_context() + self.dev = self.drv.Device(config.model_configs[self.grid.model_num].cuda['gpu'].deviceID) + self.ctx = self.dev.make_context() # Initialise arrays on GPU, prepare kernels, and get kernel functions self.set_field_kernels() @@ -274,56 +271,56 @@ class CUDAUpdates: get kernel functions. """ if config.model_configs[self.grid.model_num].materials['maxpoles'] > 0: - kernels_fields = SourceModule(kernels_template_fields.substitute( - REAL=cudafloattype, - COMPLEX=cudacomplextype, - N_updatecoeffsE=self.grid.updatecoeffsE.size, - N_updatecoeffsH=self.grid.updatecoeffsH.size, - NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1], - NY_MATDISPCOEFFS=self.grid.updatecoeffsdispersive.shape[1], - NX_FIELDS=self.grid.nx + 1, - NY_FIELDS=self.grid.ny + 1, - NZ_FIELDS=self.grid.nz + 1, - NX_ID=self.grid.ID.shape[1], - NY_ID=self.grid.ID.shape[2], - NZ_ID=self.grid.ID.shape[3], - NX_T=self.grid.Tx.shape[1], - NY_T=self.grid.Tx.shape[2], - NZ_T=self.grid.Tx.shape[3]), - options=self.compiler_opts) + kernels_fields = self.source_module(kernels_template_fields.substitute( + REAL=config.sim_config.dtypes['C_float_or_double'], + COMPLEX=config.sim_config.dtypes['C_complex'], + N_updatecoeffsE=self.grid.updatecoeffsE.size, + N_updatecoeffsH=self.grid.updatecoeffsH.size, + NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1], + NY_MATDISPCOEFFS=self.grid.updatecoeffsdispersive.shape[1], + NX_FIELDS=self.grid.nx + 1, + NY_FIELDS=self.grid.ny + 1, + NZ_FIELDS=self.grid.nz + 1, + NX_ID=self.grid.ID.shape[1], + NY_ID=self.grid.ID.shape[2], + NZ_ID=self.grid.ID.shape[3], + NX_T=self.grid.Tx.shape[1], + NY_T=self.grid.Tx.shape[2], + NZ_T=self.grid.Tx.shape[3]), + options=config.sim_config.cuda['nvcc_opts']) else: # Set to one any substitutions for dispersive materials - kernels_fields = SourceModule(kernels_template_fields.substitute( - REAL=cudafloattype, - COMPLEX=cudacomplextype, - N_updatecoeffsE=self.grid.updatecoeffsE.size, - N_updatecoeffsH=self.grid.updatecoeffsH.size, - NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1], - NY_MATDISPCOEFFS=1, - NX_FIELDS=self.grid.nx + 1, - NY_FIELDS=self.grid.ny + 1, - NZ_FIELDS=self.grid.nz + 1, - NX_ID=self.grid.ID.shape[1], - NY_ID=self.grid.ID.shape[2], - NZ_ID=self.grid.ID.shape[3], - NX_T=1, - NY_T=1, - NZ_T=1), - options=self.compiler_opts) - self.update_electric = kernels_fields.get_function("update_electric") - self.update_magnetic = kernels_fields.get_function("update_magnetic") - if self.grid.updatecoeffsE.nbytes + self.grid.updatecoeffsH.nbytes > self.grid.gpu.constmem: - raise GeneralError(log.exception(f'Too many materials in the model to fit onto constant memory of size {human_size(self.grid.gpu.constmem)} on {self.grid.gpu.deviceID} - {self.grid.gpu.name} GPU')) - self.copy_mat_coeffs() + kernels_fields = self.source_module(kernel_template_fields.substitute( + REAL=config.sim_config.dtypes['C_float_or_double'], + COMPLEX=config.sim_config.dtypes['C_complex'], + N_updatecoeffsE=self.grid.updatecoeffsE.size, + N_updatecoeffsH=self.grid.updatecoeffsH.size, + NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1], + NY_MATDISPCOEFFS=1, + NX_FIELDS=self.grid.nx + 1, + NY_FIELDS=self.grid.ny + 1, + NZ_FIELDS=self.grid.nz + 1, + NX_ID=self.grid.ID.shape[1], + NY_ID=self.grid.ID.shape[2], + NZ_ID=self.grid.ID.shape[3], + NX_T=1, + NY_T=1, + NZ_T=1), + options=config.sim_config.cuda['nvcc_opts']) + self.update_electric_gpu = kernels_fields.get_function("update_electric") + self.update_magnetic_gpu = kernels_fields.get_function("update_magnetic") + if (self.grid.updatecoeffsE.nbytes + self.grid.updatecoeffsH.nbytes > config.model_configs[self.grid.model_num].cuda['gpu'].constmem): + raise GeneralError(log.exception(f"Too many materials in the model to fit onto constant memory of size {human_size(config.model_configs[self.grid.model_num].cuda['gpu'].constmem)} on {config.model_configs[self.grid.model_num].cuda['gpu'].deviceID} - {config.model_configs[self.grid.model_num].cuda['gpu'].name} GPU")) + self.copy_mat_coeffs(kernels_fields, kernels_fields) # Electric and magnetic field updates - dispersive materials - get kernel functions and initialise array on GPU if config.model_configs[self.grid.model_num].materials['maxpoles'] > 0: # If there are any dispersive materials (updates are split into two parts as they require present and updated electric field values). self.dispersive_update_a = kernels_fields.get_function("update_electric_dispersive_A") self.dispersive_update_b = kernels_fields.get_function("update_electric_dispersive_B") - self.grid.gpu_initialise_dispersive_arrays() + self.grid.initialise_dispersive_arrays() # Electric and magnetic field updates - set blocks per grid and initialise field arrays on GPU - self.grid.gpu_set_blocks_per_grid() - self.grid.gpu_initialise_arrays() + self.grid.set_blocks_per_grid() + self.grid.initialise_arrays() def set_pml_kernels(self): """PMLS - prepare kernels and get kernel functions.""" @@ -336,81 +333,81 @@ class CUDAUpdates: kernelmagneticfunc = getattr(import_module(pmlmodulemagnetic), 'kernels_template_pml_magnetic_' + self.grid.pmlformulation) - kernels_pml_electric = SourceModule(kernelelectricfunc.substitute( - REAL=cudafloattype, - N_updatecoeffsE=self.grid.updatecoeffsE.size, - NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1], - NX_FIELDS=self.grid.nx + 1, - NY_FIELDS=self.grid.ny + 1, - NZ_FIELDS=self.grid.nz + 1, - NX_ID=self.grid.ID.shape[1], - NY_ID=self.grid.ID.shape[2], - NZ_ID=self.grid.ID.shape[3]), - options=self.compiler_opts) - kernels_pml_magnetic = SourceModule(kernelmagneticfunc.substitute( - REAL=cudafloattype, - N_updatecoeffsH=self.grid.updatecoeffsH.size, - NY_MATCOEFFS=self.grid.updatecoeffsH.shape[1], - NX_FIELDS=self.grid.nx + 1, - NY_FIELDS=self.grid.ny + 1, - NZ_FIELDS=self.grid.nz + 1, - NX_ID=self.gridG.ID.shape[1], - NY_ID=self.grid.ID.shape[2], - NZ_ID=self.grid.ID.shape[3]), - options=self.compiler_opts) - self.copy_mat_coeffs() + kernels_pml_electric = self.source_module(kernelelectricfunc.substitute( + REAL=config.sim_config.dtypes['C_float_or_double'], + N_updatecoeffsE=self.grid.updatecoeffsE.size, + NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1], + NX_FIELDS=self.grid.nx + 1, + NY_FIELDS=self.grid.ny + 1, + NZ_FIELDS=self.grid.nz + 1, + NX_ID=self.grid.ID.shape[1], + NY_ID=self.grid.ID.shape[2], + NZ_ID=self.grid.ID.shape[3]), + options=config.sim_config.cuda['nvcc_opts']) + kernels_pml_magnetic = self.source_module(kernelmagneticfunc.substitute( + REAL=config.sim_config.dtypes['C_float_or_double'], + N_updatecoeffsH=self.grid.updatecoeffsH.size, + NY_MATCOEFFS=self.grid.updatecoeffsH.shape[1], + NX_FIELDS=self.grid.nx + 1, + NY_FIELDS=self.grid.ny + 1, + NZ_FIELDS=self.grid.nz + 1, + NX_ID=self.grid.ID.shape[1], + NY_ID=self.grid.ID.shape[2], + NZ_ID=self.grid.ID.shape[3]), + options=config.sim_config.cuda['nvcc_opts']) + self.copy_mat_coeffs(kernels_pml_electric, kernels_pml_magnetic) # Set block per grid, initialise arrays on GPU, and get kernel functions for pml in self.grid.pmls: - pml.gpu_initialise_arrays() - pml.gpu_get_update_funcs(kernels_pml_electric, kernels_pml_magnetic) - pml.gpu_set_blocks_per_grid(self.grid) + pml.initialise_field_arrays_gpu() + pml.get_update_funcs(kernels_pml_electric, kernels_pml_magnetic) + pml.set_blocks_per_grid(self.grid) def set_rx_kernel(self): """Receivers - initialise arrays on GPU, prepare kernel and get kernel function. """ if self.grid.rxs: - rxcoords_gpu, rxs_gpu = gpu_initialise_rx_arrays(self.grid) - kernel_store_outputs = SourceModule(kernel_template_store_outputs.substitute( - REAL=cudafloattype, - NY_RXCOORDS=3, - NX_RXS=6, - NY_RXS=self.grid.iterations, - NZ_RXS=len(self.grid.rxs), - NX_FIELDS=self.grid.nx + 1, - NY_FIELDS=self.grid.ny + 1, - NZ_FIELDS=self.grid.nz + 1), - options=self.compiler_opts) - self.store_outputs = kernel_store_outputs.get_function("store_outputs") + self.rxcoords_gpu, self.rxs_gpu = initialise_rx_arrays_gpu(self.grid) + kernel_store_outputs = self.source_module(kernel_template_store_outputs.substitute( + REAL=config.sim_config.dtypes['C_float_or_double'], + NY_RXCOORDS=3, + NX_RXS=6, + NY_RXS=self.grid.iterations, + NZ_RXS=len(self.grid.rxs), + NX_FIELDS=self.grid.nx + 1, + NY_FIELDS=self.grid.ny + 1, + NZ_FIELDS=self.grid.nz + 1), + options=config.sim_config.cuda['nvcc_opts']) + self.store_outputs_gpu = kernel_store_outputs.get_function("store_outputs") def set_src_kernels(self): """Sources - initialise arrays on GPU, prepare kernel and get kernel function. """ if self.grid.voltagesources + self.grid.hertziandipoles + self.grid.magneticdipoles: - kernels_sources = SourceModule(kernels_template_sources.substitute( - REAL=cudafloattype, - N_updatecoeffsE=self.grid.updatecoeffsE.size, - N_updatecoeffsH=self.grid.updatecoeffsH.size, - NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1], - NY_SRCINFO=4, - NY_SRCWAVES=self.grid.iterations, - NX_FIELDS=self.grid.nx + 1, - NY_FIELDS=self.grid.ny + 1, - NZ_FIELDS=self.grid.nz + 1, - NX_ID=self.grid.ID.shape[1], - NY_ID=self.grid.ID.shape[2], - NZ_ID=self.grid.ID.shape[3]), - options=self.compiler_opts) - self.copy_mat_coeffs() + kernels_sources = self.source_module(kernel_template_sources.substitute( + REAL=config.sim_config.dtypes['C_float_or_double'], + N_updatecoeffsE=self.grid.updatecoeffsE.size, + N_updatecoeffsH=self.grid.updatecoeffsH.size, + NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1], + NY_SRCINFO=4, + NY_SRCWAVES=self.grid.iterations, + NX_FIELDS=self.grid.nx + 1, + NY_FIELDS=self.grid.ny + 1, + NZ_FIELDS=self.grid.nz + 1, + NX_ID=self.grid.ID.shape[1], + NY_ID=self.grid.ID.shape[2], + NZ_ID=self.grid.ID.shape[3]), + options=config.sim_config.cuda['nvcc_opts']) + self.copy_mat_coeffs(kernels_sources, kernels_sources) if self.grid.hertziandipoles: - self.srcinfo1_hertzian_gpu, self.srcinfo2_hertzian_gpu, self.srcwaves_hertzian_gpu = gpu_initialise_src_arrays(self.grid.hertziandipoles, self.grid) + self.srcinfo1_hertzian_gpu, self.srcinfo2_hertzian_gpu, self.srcwaves_hertzian_gpu = initialise_src_arrays_gpu(self.grid.hertziandipoles, self.grid) self.update_hertzian_dipole_gpu = kernels_sources.get_function("update_hertzian_dipole") if self.grid.magneticdipoles: - self.srcinfo1_magnetic_gpu, self.srcinfo2_magnetic_gpu, self.srcwaves_magnetic_gpu = gpu_initialise_src_arrays(self.grid.magneticdipoles, self.grid) + self.srcinfo1_magnetic_gpu, self.srcinfo2_magnetic_gpu, self.srcwaves_magnetic_gpu = initialise_src_arrays_gpu(self.grid.magneticdipoles, self.grid) self.update_magnetic_dipole_gpu = kernels_sources.get_function("update_magnetic_dipole") if self.grid.voltagesources: - self.srcinfo1_voltage_gpu, self.srcinfo2_voltage_gpu, self.srcwaves_voltage_gpu = gpu_initialise_src_arrays(self.grid.voltagesources, self.grid) + self.srcinfo1_voltage_gpu, self.srcinfo2_voltage_gpu, self.srcwaves_voltage_gpu = initialise_src_arrays_gpu(self.grid.voltagesources, self.grid) self.update_voltage_source_gpu = kernels_sources.get_function("update_voltage_source") def set_snapshot_kernel(self): @@ -418,42 +415,46 @@ class CUDAUpdates: function. """ if self.grid.snapshots: - self.snapEx_gpu, self.snapEy_gpu, self.snapEz_gpu, self.snapHx_gpu, self.snapHy_gpu, self.snapHz_gpu = gpu_initialise_snapshot_array(self.grid) - kernel_store_snapshot = SourceModule(kernel_template_store_snapshot.substitute( - REAL=cudafloattype, - NX_SNAPS=Snapshot.nx_max, - NY_SNAPS=Snapshot.ny_max, - NZ_SNAPS=Snapshot.nz_max, - NX_FIELDS=self.grid.nx + 1, - NY_FIELDS=self.grid.ny + 1, - NZ_FIELDS=self.grid.nz + 1), - options=self.compiler_opts) + self.snapEx_gpu, self.snapEy_gpu, self.snapEz_gpu, self.snapHx_gpu, self.snapHy_gpu, self.snapHz_gpu = initialise_snapshot_array_gpu(self.grid) + kernel_store_snapshot = self.source_module(kernel_template_store_snapshot.substitute( + REAL=config.sim_config.dtypes['C_float_or_double'], + NX_SNAPS=Snapshot.nx_max, + NY_SNAPS=Snapshot.ny_max, + NZ_SNAPS=Snapshot.nz_max, + NX_FIELDS=self.grid.nx + 1, + NY_FIELDS=self.grid.ny + 1, + NZ_FIELDS=self.grid.nz + 1), + options=config.sim_config.cuda['nvcc_opts']) self.store_snapshot_gpu = kernel_store_snapshot.get_function("store_snapshot") - def copy_mat_coeffs(self): + def copy_mat_coeffs(self, kernelE, kernelH): """Copy material coefficient arrays to constant memory of GPU (must be <64KB). + + Args: + kernelE (kernel): electric field kernel. + kernelH (kernel): magnetic field kernel. """ - updatecoeffsE = kernels_sources.get_global('updatecoeffsE')[0] - updatecoeffsH = kernels_sources.get_global('updatecoeffsH')[0] + updatecoeffsE = kernelE.get_global('updatecoeffsE')[0] + updatecoeffsH = kernelH.get_global('updatecoeffsH')[0] self.drv.memcpy_htod(updatecoeffsE, self.grid.updatecoeffsE) self.drv.memcpy_htod(updatecoeffsH, self.grid.updatecoeffsH) def store_outputs(self): - """Store field component values for every receiver and transmission line.""" + """Store field component values for every receiver.""" if self.grid.rxs: - self.store_outputs(np.int32(len(self.grid.rxs)), - np.int32(self.grid.iteration), - self.rxcoords_gpu.gpudata, - self.rxs_gpu.gpudata, - self.grid.Ex_gpu.gpudata, - self.grid.Ey_gpu.gpudata, - self.grid.Ez_gpu.gpudata, - self.grid.Hx_gpu.gpudata, - self.grid.Hy_gpu.gpudata, - self.grid.Hz_gpu.gpudata, - block=(1, 1, 1), - grid=(round32(len(self.grid.rxs)), 1, 1)) + self.store_outputs_gpu(np.int32(len(self.grid.rxs)), + np.int32(self.grid.iteration), + self.rxcoords_gpu.gpudata, + self.rxs_gpu.gpudata, + self.grid.Ex_gpu.gpudata, + self.grid.Ey_gpu.gpudata, + self.grid.Ez_gpu.gpudata, + self.grid.Hx_gpu.gpudata, + self.grid.Hy_gpu.gpudata, + self.grid.Hz_gpu.gpudata, + block=(1, 1, 1), + grid=(round32(len(self.grid.rxs)), 1, 1)) def store_snapshots(self, iteration): """Store any snapshots. @@ -501,32 +502,32 @@ class CUDAUpdates: def update_magnetic(self): """Update magnetic field components.""" - self.update_magnetic(np.int32(self.grid.nx), - np.int32(self.grid.ny), - np.int32(self.grid.nz), - self.grid.ID_gpu, - self.grid.Hx_gpu, - self.grid.Hy_gpu, - self.grid.Hz_gpu, - self.grid.Ex_gpu, - self.grid.Ey_gpu, - self.grid.Ez_gpu, - block=self.grid.tpb, - grid=self.grid.bpg) + self.update_magnetic_gpu(np.int32(self.grid.nx), + np.int32(self.grid.ny), + np.int32(self.grid.nz), + self.grid.ID_gpu, + self.grid.Hx_gpu, + self.grid.Hy_gpu, + self.grid.Hz_gpu, + self.grid.Ex_gpu, + self.grid.Ey_gpu, + self.grid.Ez_gpu, + block=self.grid.tpb, + grid=self.grid.bpg) def update_magnetic_pml(self): """Update magnetic field components with the PML correction.""" for pml in self.grid.pmls: - pml.gpu_update_magnetic(self.grid) + pml.update_magnetic(self.grid) def update_magnetic_sources(self): """Update magnetic field components from sources.""" if self.grid.magneticdipoles: self.update_magnetic_dipole_gpu(np.int32(len(self.grid.magneticdipoles)), np.int32(self.grid.iteration), - config.dtypes['float_or_double'](self.grid.dx), - config.dtypes['float_or_double'](self.grid.dy), - config.dtypes['float_or_double'](self.grid.dz), + config.sim_config.dtypes['float_or_double'](self.grid.dx), + config.sim_config.dtypes['float_or_double'](self.grid.dy), + config.sim_config.dtypes['float_or_double'](self.grid.dz), self.srcinfo1_magnetic_gpu.gpudata, self.srcinfo2_magnetic_gpu.gpudata, self.srcwaves_magnetic_gpu.gpudata, @@ -541,18 +542,18 @@ class CUDAUpdates: """Update electric field components.""" # All materials are non-dispersive so do standard update. if config.model_configs[self.grid.model_num].materials['maxpoles'] == 0: - self.update_electric(np.int32(self.grid.nx), - np.int32(self.grid.ny), - np.int32(self.grid.nz), - self.grid.ID_gpu, - self.grid.Ex_gpu, - self.grid.Ey_gpu, - self.grid.Ez_gpu, - self.grid.Hx_gpu, - self.grid.Hy_gpu, - self.grid.Hz_gpu, - block=self.grid.tpb, - grid=self.grid.bpg) + self.update_electric_gpu(np.int32(self.grid.nx), + np.int32(self.grid.ny), + np.int32(self.grid.nz), + self.grid.ID_gpu, + self.grid.Ex_gpu, + self.grid.Ey_gpu, + self.grid.Ez_gpu, + self.grid.Hx_gpu, + self.grid.Hy_gpu, + self.grid.Hz_gpu, + block=self.grid.tpb, + grid=self.grid.bpg) # If there are any dispersive materials do 1st part of dispersive update # (it is split into two parts as it requires present and updated electric field values). @@ -578,7 +579,7 @@ class CUDAUpdates: def update_electric_pml(self): """Update electric field components with the PML correction.""" for pml in self.grid.pmls: - pml.gpu_update_electric(self.grid) + pml.update_electric(self.grid) def update_electric_sources(self): """Update electric field components from sources - @@ -587,9 +588,9 @@ class CUDAUpdates: if self.grid.voltagesources: self.update_voltage_source_gpu(np.int32(len(self.grid.voltagesources)), np.int32(self.grid.iteration), - config.dtypes['float_or_double'](self.grid.dx), - config.dtypes['float_or_double'](self.grid.dy), - config.dtypes['float_or_double'](self.grid.dz), + config.sim_config.dtypes['float_or_double'](self.grid.dx), + config.sim_config.dtypes['float_or_double'](self.grid.dy), + config.sim_config.dtypes['float_or_double'](self.grid.dz), self.srcinfo1_voltage_gpu.gpudata, self.srcinfo2_voltage_gpu.gpudata, self.srcwaves_voltage_gpu.gpudata, @@ -603,9 +604,9 @@ class CUDAUpdates: if self.grid.hertziandipoles: self.update_hertzian_dipole_gpu(np.int32(len(self.grid.hertziandipoles)), np.int32(self.grid.iteration), - config.dtypes['float_or_double'](self.grid.dx), - config.dtypes['float_or_double'](self.grid.dy), - config.dtypes['float_or_double'](self.grid.dz), + config.sim_config.dtypes['float_or_double'](self.grid.dx), + config.sim_config.dtypes['float_or_double'](self.grid.dy), + config.sim_config.dtypes['float_or_double'](self.grid.dz), self.srcinfo1_hertzian_gpu.gpudata, self.srcinfo2_hertzian_gpu.gpudata, self.srcwaves_hertzian_gpu.gpudata, @@ -660,21 +661,20 @@ class CUDAUpdates: """Copy data from GPU back to CPU to save to file(s).""" # Copy output from receivers array back to correct receiver objects if self.grid.rxs: - gpu_get_rx_array(self.rxs_gpu.get(), + get_rx_array_gpu(self.rxs_gpu.get(), self.rxcoords_gpu.get(), self.grid) # Copy data from any snapshots back to correct snapshot objects if self.grid.snapshots and not self.grid.snapsgpu2cpu: for i, snap in enumerate(self.grid.snapshots): - gpu_get_snapshot_array(self.snapEx_gpu.get(), + get_snapshot_array_gpu(self.snapEx_gpu.get(), self.snapEy_gpu.get(), self.snapEz_gpu.get(), self.snapHx_gpu.get(), self.snapHy_gpu.get(), self.snapHz_gpu.get(), - i, - snap) + i, snap) def cleanup(self): """Cleanup GPU context.""" diff --git a/gprMax/utilities.py b/gprMax/utilities.py index a5973d48..dbf043e6 100644 --- a/gprMax/utilities.py +++ b/gprMax/utilities.py @@ -421,10 +421,6 @@ class GPU: self.pcibusID = None self.constmem = None self.totalmem = None - # Threads per block for main field updates - self.tpb = (256, 1, 1) - # Blocks per grid for main field updates (set in grid.py) - self.bpg = None def get_gpu_info(self, drv): """Set information about GPU.