From cf4fa037ec2b8037dd7383e8d5dff7c233441132 Mon Sep 17 00:00:00 2001 From: Craig Warren Date: Mon, 31 Oct 2022 19:38:52 +0000 Subject: [PATCH] Updates to get CUDA solver basic functionality again. --- gprMax/cuda_opencl_el/knl_fields_updates.py | 8 +- gprMax/model_build_run.py | 8 +- gprMax/pml.py | 114 ++++----- gprMax/updates.py | 266 ++++++++++---------- 4 files changed, 200 insertions(+), 196 deletions(-) diff --git a/gprMax/cuda_opencl_el/knl_fields_updates.py b/gprMax/cuda_opencl_el/knl_fields_updates.py index 5acda02f..36d87fe4 100644 --- a/gprMax/cuda_opencl_el/knl_fields_updates.py +++ b/gprMax/cuda_opencl_el/knl_fields_updates.py @@ -50,6 +50,8 @@ update_electric = { // NX, NY, NZ: Number of cells of the model domain. // ID, E, H: Access to ID and field component arrays. + $CUDA_IDX + // Convert the linear index to subscripts for 3D field arrays int x = i / ($NY_FIELDS * $NZ_FIELDS); int y = (i % ($NY_FIELDS * $NZ_FIELDS)) / $NZ_FIELDS; @@ -60,8 +62,6 @@ update_electric = { int y_ID = ((i % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) / $NZ_ID; int z_ID = ((i % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) % $NZ_ID; - $CUDA_IDX - // Ex component if ((NY != 1 || NZ != 1) && x >= 0 && x < NX && y > 0 && y < NY && z > 0 && z < NZ) { int materialEx = ID[IDX4D_ID(0,x_ID,y_ID,z_ID)]; @@ -119,6 +119,8 @@ update_magnetic = { // NX, NY, NZ: Number of cells of the model domain. // ID, E, H: Access to ID and field component arrays. + $CUDA_IDX + // Convert the linear index to subscripts for 3D field arrays int x = i / ($NY_FIELDS * $NZ_FIELDS); int y = (i % ($NY_FIELDS * $NZ_FIELDS)) / $NZ_FIELDS; @@ -129,8 +131,6 @@ update_magnetic = { int y_ID = ((i % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) / $NZ_ID; int z_ID = ((i % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) % $NZ_ID; - $CUDA_IDX - // Hx component if (NX != 1 && x > 0 && x < NX && y >= 0 && y < NY && z >= 0 && z < NZ) { int materialHx = ID[IDX4D_ID(3,x_ID,y_ID,z_ID)]; diff --git a/gprMax/model_build_run.py b/gprMax/model_build_run.py index a5984d5f..086b3711 100644 --- a/gprMax/model_build_run.py +++ b/gprMax/model_build_run.py @@ -290,15 +290,15 @@ class ModelBuildRun: f"than available physical CPU cores ({config.sim_config.hostinfo['physicalcores']}). " f"This may lead to degraded performance.") # Print information about any compute device, e.g. GPU, in use - elif config.sim_config.general['solver'] == 'cuda' or config.sim_config.general['solver'] == 'opencl': - solvername = config.sim_config.general['solver'].upper() - hostname = config.sim_config.hostinfo['hostname'] + elif config.sim_config.general['solver'] == 'cuda' or config.sim_config.general['solver'] == 'opencl': if config.sim_config.general['solver'] == 'opencl': solvername = 'OpenCL' platformname = ' on ' + ' '.join(config.get_model_config().device['dev'].platform.name.split()) + devicename = ' '.join(config.get_model_config().device['dev'].name.split()) else: + solvername = 'CUDA' platformname = '' - devicename = ' '.join(config.get_model_config().device['dev'].name.split()) + devicename = ' '.join(config.get_model_config().device['dev'].name().split()) logger.basic(f"\nModel {config.model_num + 1}/{config.sim_config.model_end} " f"on {config.sim_config.hostinfo['hostname']} " f"with {solvername} using {devicename}{platformname}") diff --git a/gprMax/pml.py b/gprMax/pml.py index 05a66ea3..cc0cb03c 100644 --- a/gprMax/pml.py +++ b/gprMax/pml.py @@ -363,25 +363,25 @@ class CUDAPML(PML): import pycuda.gpuarray as gpuarray - self.ERA_gpu = gpuarray.to_gpu(self.ERA) - self.ERB_gpu = gpuarray.to_gpu(self.ERB) - self.ERE_gpu = gpuarray.to_gpu(self.ERE) - self.ERF_gpu = gpuarray.to_gpu(self.ERF) - self.HRA_gpu = gpuarray.to_gpu(self.HRA) - self.HRB_gpu = gpuarray.to_gpu(self.HRB) - self.HRE_gpu = gpuarray.to_gpu(self.HRE) - self.HRF_gpu = gpuarray.to_gpu(self.HRF) - self.EPhi1_gpu = gpuarray.to_gpu(self.EPhi1) - self.EPhi2_gpu = gpuarray.to_gpu(self.EPhi2) - self.HPhi1_gpu = gpuarray.to_gpu(self.HPhi1) - self.HPhi2_gpu = gpuarray.to_gpu(self.HPhi2) + self.ERA_dev = gpuarray.to_gpu(self.ERA) + self.ERB_dev = gpuarray.to_gpu(self.ERB) + self.ERE_dev = gpuarray.to_gpu(self.ERE) + self.ERF_dev = gpuarray.to_gpu(self.ERF) + self.HRA_dev = gpuarray.to_gpu(self.HRA) + self.HRB_dev = gpuarray.to_gpu(self.HRB) + self.HRE_dev = gpuarray.to_gpu(self.HRE) + self.HRF_dev = gpuarray.to_gpu(self.HRF) + self.EPhi1_dev = gpuarray.to_gpu(self.EPhi1) + self.EPhi2_dev = gpuarray.to_gpu(self.EPhi2) + self.HPhi1_dev = gpuarray.to_gpu(self.HPhi1) + self.HPhi2_dev = gpuarray.to_gpu(self.HPhi2) def set_blocks_per_grid(self): """Set the blocks per grid size used for updating the PML field arrays on a GPU.""" - self.bpg = (int(np.ceil(((self.EPhi1_gpu.shape[1] + 1) * - (self.EPhi1_gpu.shape[2] + 1) * - (self.EPhi1_gpu.shape[3] + 1)) / self.G.tpb[0])), 1, 1) + self.bpg = (int(np.ceil(((self.EPhi1_dev.shape[1] + 1) * + (self.EPhi1_dev.shape[2] + 1) * + (self.EPhi1_dev.shape[3] + 1)) / self.G.tpb[0])), 1, 1) def get_update_funcs(self, kernelselectric, kernelsmagnetic): """Get update functions from PML kernels. @@ -393,39 +393,39 @@ class CUDAPML(PML): magnetic updates. """ - self.update_electric_gpu = kernelselectric.get_function('order' + str(len(self.CFS)) + '_' + self.direction) - self.update_magnetic_gpu = kernelsmagnetic.get_function('order' + str(len(self.CFS)) + '_' + self.direction) + self.update_electric_dev = kernelselectric.get_function('order' + str(len(self.CFS)) + '_' + self.direction) + self.update_magnetic_dev = kernelsmagnetic.get_function('order' + str(len(self.CFS)) + '_' + self.direction) def update_electric(self): """This functions updates electric field components with the PML correction on the GPU. """ - self.update_electric_gpu(np.int32(self.xs), + self.update_electric_dev(np.int32(self.xs), np.int32(self.xf), np.int32(self.ys), np.int32(self.yf), np.int32(self.zs), np.int32(self.zf), - np.int32(self.EPhi1_gpu.shape[1]), - np.int32(self.EPhi1_gpu.shape[2]), - np.int32(self.EPhi1_gpu.shape[3]), - np.int32(self.EPhi2_gpu.shape[1]), - np.int32(self.EPhi2_gpu.shape[2]), - np.int32(self.EPhi2_gpu.shape[3]), + np.int32(self.EPhi1_dev.shape[1]), + np.int32(self.EPhi1_dev.shape[2]), + np.int32(self.EPhi1_dev.shape[3]), + np.int32(self.EPhi2_dev.shape[1]), + np.int32(self.EPhi2_dev.shape[2]), + np.int32(self.EPhi2_dev.shape[3]), np.int32(self.thickness), - self.G.ID_gpu.gpudata, - self.G.Ex_gpu.gpudata, - self.G.Ey_gpu.gpudata, - self.G.Ez_gpu.gpudata, - self.G.Hx_gpu.gpudata, - self.G.Hy_gpu.gpudata, - self.G.Hz_gpu.gpudata, - self.EPhi1_gpu.gpudata, - self.EPhi2_gpu.gpudata, - self.ERA_gpu.gpudata, - self.ERB_gpu.gpudata, - self.ERE_gpu.gpudata, - self.ERF_gpu.gpudata, + self.G.ID_dev.gpudata, + self.G.Ex_dev.gpudata, + self.G.Ey_dev.gpudata, + self.G.Ez_dev.gpudata, + self.G.Hx_dev.gpudata, + self.G.Hy_dev.gpudata, + self.G.Hz_dev.gpudata, + self.EPhi1_dev.gpudata, + self.EPhi2_dev.gpudata, + self.ERA_dev.gpudata, + self.ERB_dev.gpudata, + self.ERE_dev.gpudata, + self.ERF_dev.gpudata, config.sim_config.dtypes['float_or_double'](self.d), block=self.G.tpb, grid=self.bpg) @@ -433,32 +433,32 @@ class CUDAPML(PML): """This functions updates magnetic field components with the PML correction on the GPU. """ - self.update_magnetic_gpu(np.int32(self.xs), + self.update_magnetic_dev(np.int32(self.xs), np.int32(self.xf), np.int32(self.ys), np.int32(self.yf), np.int32(self.zs), np.int32(self.zf), - np.int32(self.HPhi1_gpu.shape[1]), - np.int32(self.HPhi1_gpu.shape[2]), - np.int32(self.HPhi1_gpu.shape[3]), - np.int32(self.HPhi2_gpu.shape[1]), - np.int32(self.HPhi2_gpu.shape[2]), - np.int32(self.HPhi2_gpu.shape[3]), + np.int32(self.HPhi1_dev.shape[1]), + np.int32(self.HPhi1_dev.shape[2]), + np.int32(self.HPhi1_dev.shape[3]), + np.int32(self.HPhi2_dev.shape[1]), + np.int32(self.HPhi2_dev.shape[2]), + np.int32(self.HPhi2_dev.shape[3]), np.int32(self.thickness), - self.G.ID_gpu.gpudata, - self.G.Ex_gpu.gpudata, - self.G.Ey_gpu.gpudata, - self.G.Ez_gpu.gpudata, - self.G.Hx_gpu.gpudata, - self.G.Hy_gpu.gpudata, - self.G.Hz_gpu.gpudata, - self.HPhi1_gpu.gpudata, - self.HPhi2_gpu.gpudata, - self.HRA_gpu.gpudata, - self.HRB_gpu.gpudata, - self.HRE_gpu.gpudata, - self.HRF_gpu.gpudata, + self.G.ID_dev.gpudata, + self.G.Ex_dev.gpudata, + self.G.Ey_dev.gpudata, + self.G.Ez_dev.gpudata, + self.G.Hx_dev.gpudata, + self.G.Hy_dev.gpudata, + self.G.Hz_dev.gpudata, + self.HPhi1_dev.gpudata, + self.HPhi2_dev.gpudata, + self.HRA_dev.gpudata, + self.HRB_dev.gpudata, + self.HRE_dev.gpudata, + self.HRF_dev.gpudata, config.sim_config.dtypes['float_or_double'](self.d), block=self.G.tpb, grid=self.bpg) diff --git a/gprMax/updates.py b/gprMax/updates.py index 9914374f..dc269d87 100644 --- a/gprMax/updates.py +++ b/gprMax/updates.py @@ -265,7 +265,8 @@ class CUDAUpdates: # Set common substitutions for use in kernels self.subs_name_args = {'REAL': config.sim_config.dtypes['C_float_or_double'], 'COMPLEX': config.get_model_config().materials['dispersiveCdtype']} - self.subs_func = {'CUDA_IDX': 'int i = blockIdx.x * blockDim.x + threadIdx.x;', + self.subs_func = {'REAL': config.sim_config.dtypes['C_float_or_double'], + 'CUDA_IDX': 'int i = blockIdx.x * blockDim.x + threadIdx.x;', 'NX_FIELDS': self.grid.nx + 1, 'NY_FIELDS': self.grid.ny + 1, 'NZ_FIELDS': self.grid.nz + 1, @@ -273,6 +274,9 @@ class CUDAUpdates: 'NY_ID': self.grid.ID.shape[2], 'NZ_ID': self.grid.ID.shape[3]} + # Enviroment for templating kernels + self.env = Environment(loader=PackageLoader('gprMax', 'cuda_opencl_el')) + # Initialise arrays on GPU, prepare kernels, and get kernel functions self._set_macros() self._set_field_knls() @@ -351,17 +355,17 @@ class CUDAUpdates: get kernel functions. """ - bld = self._build_knl(knl_fields_updates.update_electric['func'], + bld = self._build_knl(knl_fields_updates.update_electric, self.subs_name_args, self.subs_func) knlE = self.source_module(bld, - options=config.sim_config.cuda['nvcc_opts']) - self.update_electric_gpu = knlE.get_function("update_electric") + options=config.sim_config.devices['nvcc_opts']) + self.update_electric_dev = knlE.get_function("update_electric") - bld = self._build_knl(knl_fields_updates.update_magnetic['func'], + bld = self._build_knl(knl_fields_updates.update_magnetic, self.subs_name_args, self.subs_func) knlH = self.source_module(bld, - options=config.sim_config.cuda['nvcc_opts']) - self.update_magnetic_gpu = knlH.get_function("update_magnetic") + options=config.sim_config.devices['nvcc_opts']) + self.update_magnetic_dev = knlH.get_function("update_magnetic") self._copy_mat_coeffs(knlE, knlH) @@ -379,13 +383,13 @@ class CUDAUpdates: bld = self._build_knl(knl_fields_updates.update_electric_dispersive_A['func'], self.subs_name_args, self.subs_func) knl = self.source_module(bld, - options=config.sim_config.cuda['nvcc_opts']) + options=config.sim_config.devices['nvcc_opts']) self.dispersive_update_a = knl.get_function("update_electric_dispersive_A") bld = self._build_knl(knl_fields_updates.update_electric_dispersive_B['func'], self.subs_name_args, self.subs_func) knl = self.source_module(bld, - options=config.sim_config.cuda['nvcc_opts']) + options=config.sim_config.devices['nvcc_opts']) self.dispersive_update_b = knl.get_function("update_electric_dispersive_B") # Electric and magnetic field updates - set blocks per grid and @@ -403,21 +407,21 @@ class CUDAUpdates: # Set block per grid, initialise arrays on GPU, and get kernel functions for pml in self.grid.pmls: - pml.set_blocks_per_grid() pml.htod_field_arrays() + pml.set_blocks_per_grid() knl_name = 'order' + str(len(pml.CFS)) + '_' + pml.direction self.subs_name_args['FUNC'] = knl_name knl_electric = getattr(knl_pml_updates_electric, knl_name) - bld = self._build_knl(knl_electric['func'], + bld = self._build_knl(knl_electric, self.subs_name_args, self.subs_func) - knlE = self.source_module(bld, options=config.sim_config.cuda['nvcc_opts']) + knlE = self.source_module(bld, options=config.sim_config.devices['nvcc_opts']) pml.update_electric_dev = knlE.get_function(knl_name) knl_magnetic = getattr(knl_pml_updates_magnetic, knl_name) - bld = self._build_knl(knl_magnetic['func'], + bld = self._build_knl(knl_magnetic, self.subs_name_args, self.subs_func) - knlH = self.source_module(bld, options=config.sim_config.cuda['nvcc_opts']) + knlH = self.source_module(bld, options=config.sim_config.devices['nvcc_opts']) pml.update_magnetic_dev = knlH.get_function(knl_name) self._copy_mat_coeffs(knlE, knlH) @@ -426,7 +430,7 @@ class CUDAUpdates: """Receivers - initialise arrays on GPU, prepare kernel and get kernel function. """ - self.rxcoords_gpu, self.rxs_gpu = htod_rx_arrays(self.grid) + self.rxcoords_dev, self.rxs_dev = htod_rx_arrays(self.grid) self.subs_func.update({'REAL': config.sim_config.dtypes['C_float_or_double'], 'NY_RXCOORDS': 3, @@ -434,10 +438,10 @@ class CUDAUpdates: 'NY_RXS': self.grid.iterations, 'NZ_RXS': len(self.grid.rxs)}) - bld = self._build_knl(knl_store_outputs.store_outputs['func'], + bld = self._build_knl(knl_store_outputs.store_outputs, self.subs_name_args, self.subs_func) - knl = self.source_module(bld, options=config.sim_config.cuda['nvcc_opts']) - self.store_outputs_gpu = knl.get_function("store_outputs") + knl = self.source_module(bld, options=config.sim_config.devices['nvcc_opts']) + self.store_outputs_dev = knl.get_function("store_outputs") def _set_src_knls(self): """Sources - initialise arrays on GPU, prepare kernel and get kernel @@ -447,23 +451,23 @@ class CUDAUpdates: 'NY_SRCWAVES': self.grid.iteration}) if self.grid.hertziandipoles: - self.srcinfo1_hertzian_gpu, self.srcinfo2_hertzian_gpu, self.srcwaves_hertzian_gpu = htod_src_arrays(self.grid.hertziandipoles, self.grid) - bld = self._build_knl(knl_source_updates.update_hertzian_dipole['func'], + self.srcinfo1_hertzian_dev, self.srcinfo2_hertzian_dev, self.srcwaves_hertzian_dev = htod_src_arrays(self.grid.hertziandipoles, self.grid) + bld = self._build_knl(knl_source_updates.update_hertzian_dipole, self.subs_name_args, self.subs_func) - knl = self.source_module(bld, options=config.sim_config.cuda['nvcc_opts']) - self.update_hertzian_dipole_gpu = knl.get_function("update_hertzian_dipole") + knl = self.source_module(bld, options=config.sim_config.devices['nvcc_opts']) + self.update_hertzian_dipole_dev = knl.get_function("update_hertzian_dipole") if self.grid.magneticdipoles: - self.srcinfo1_magnetic_gpu, self.srcinfo2_magnetic_gpu, self.srcwaves_magnetic_gpu = htod_src_arrays(self.grid.magneticdipoles, self.grid) - bld = self._build_knl(knl_source_updates.update_magnetic_dipole['func'], + self.srcinfo1_magnetic_dev, self.srcinfo2_magnetic_dev, self.srcwaves_magnetic_dev = htod_src_arrays(self.grid.magneticdipoles, self.grid) + bld = self._build_knl(knl_source_updates.update_magnetic_dipole, self.subs_name_args, self.subs_func) - knl = self.source_module(bld, options=config.sim_config.cuda['nvcc_opts']) - self.update_magnetic_dipole_gpu = knl.get_function("update_magnetic_dipole") + knl = self.source_module(bld, options=config.sim_config.devices['nvcc_opts']) + self.update_magnetic_dipole_dev = knl.get_function("update_magnetic_dipole") if self.grid.voltagesources: - self.srcinfo1_voltage_gpu, self.srcinfo2_voltage_gpu, self.srcwaves_voltage_gpu = htod_src_arrays(self.grid.voltagesources, self.grid) - bld = self._build_knl(knl_source_updates.update_voltage_source['func'], + self.srcinfo1_voltage_dev, self.srcinfo2_voltage_dev, self.srcwaves_voltage_dev = htod_src_arrays(self.grid.voltagesources, self.grid) + bld = self._build_knl(knl_source_updates.update_voltage_source, self.subs_name_args, self.subs_func) - knl = self.source_module(bld, options=config.sim_config.cuda['nvcc_opts']) - self.update_voltage_source_gpu = knl.get_function("update_voltage_source") + knl = self.source_module(bld, options=config.sim_config.devices['nvcc_opts']) + self.update_voltage_source_dev = knl.get_function("update_voltage_source") if (self.grid.hertziandipoles + self.grid.magneticdipoles + @@ -474,17 +478,17 @@ class CUDAUpdates: """Snapshots - initialise arrays on GPU, prepare kernel and get kernel function. """ - self.snapEx_gpu, self.snapEy_gpu, self.snapEz_gpu, self.snapHx_gpu, self.snapHy_gpu, self.snapHz_gpu = htod_snapshot_array(self.grid) + self.snapEx_dev, self.snapEy_dev, self.snapEz_dev, self.snapHx_dev, self.snapHy_dev, self.snapHz_dev = htod_snapshot_array(self.grid) self.subs_func.update({'REAL': config.sim_config.dtypes['C_float_or_double'], 'NX_SNAPS': Snapshot.nx_max, 'NY_SNAPS': Snapshot.ny_max, 'NZ_SNAPS': Snapshot.nz_max}) - bld = self._build_knl(knl_snapshots.store_snapshot['func'], + bld = self._build_knl(knl_snapshots.store_snapshot, self.subs_name_args, self.subs_func) - knl = self.source_module(bld, options=config.sim_config.cuda['nvcc_opts']) - self.store_snapshot_gpu = knl.get_function("store_snapshot") + knl = self.source_module(bld, options=config.sim_config.devices['nvcc_opts']) + self.store_snapshot_dev = knl.get_function("store_snapshot") def _copy_mat_coeffs(self, knlE, knlH): """Copy material coefficient arrays to constant memory of GPU @@ -497,8 +501,8 @@ class CUDAUpdates: # Check if coefficient arrays will fit on constant memory of GPU if (self.grid.updatecoeffsE.nbytes + self.grid.updatecoeffsH.nbytes - > config.get_model_config().cuda['gpu'].total_constant_memory): - logger.exception(f"Too many materials in the model to fit onto constant memory of size {human_size(config.get_model_config().cuda['gpu'].total_constant_memory)} on {config.get_model_config().cuda['gpu'].deviceID} - {config.get_model_config().cuda['gpu'].name} GPU") + > config.get_model_config().device['dev'].total_constant_memory): + logger.exception(f"Too many materials in the model to fit onto constant memory of size {human_size(config.get_model_config().device['dev'].total_constant_memory)} on {config.get_model_config().device['dev'].deviceID} - {config.get_model_config().device['dev'].name} GPU") raise ValueError updatecoeffsE = knlE.get_global('updatecoeffsE')[0] @@ -509,16 +513,16 @@ class CUDAUpdates: def store_outputs(self): """Store field component values for every receiver.""" if self.grid.rxs: - self.store_outputs_gpu(np.int32(len(self.grid.rxs)), + self.store_outputs_dev(np.int32(len(self.grid.rxs)), np.int32(self.grid.iteration), - self.rxcoords_gpu.gpudata, - self.rxs_gpu.gpudata, - self.grid.Ex_gpu.gpudata, - self.grid.Ey_gpu.gpudata, - self.grid.Ez_gpu.gpudata, - self.grid.Hx_gpu.gpudata, - self.grid.Hy_gpu.gpudata, - self.grid.Hz_gpu.gpudata, + self.rxcoords_dev.gpudata, + self.rxs_dev.gpudata, + self.grid.Ex_dev.gpudata, + self.grid.Ey_dev.gpudata, + self.grid.Ez_dev.gpudata, + self.grid.Hx_dev.gpudata, + self.grid.Hy_dev.gpudata, + self.grid.Hz_dev.gpudata, block=(1, 1, 1), grid=(round32(len(self.grid.rxs)), 1, 1)) @@ -532,7 +536,7 @@ class CUDAUpdates: for i, snap in enumerate(self.grid.snapshots): if snap.time == iteration + 1: snapno = 0 if config.get_model_config().cuda['snapsgpu2cpu'] else i - self.store_snapshot_gpu(np.int32(snapno), + self.store_snapshot_dev(np.int32(snapno), np.int32(snap.xs), np.int32(snap.xf), np.int32(snap.ys), @@ -542,41 +546,41 @@ class CUDAUpdates: np.int32(snap.dx), np.int32(snap.dy), np.int32(snap.dz), - self.grid.Ex_gpu.gpudata, - self.grid.Ey_gpu.gpudata, - self.grid.Ez_gpu.gpudata, - self.grid.Hx_gpu.gpudata, - self.grid.Hy_gpu.gpudata, - self.grid.Hz_gpu.gpudata, - self.snapEx_gpu.gpudata, - self.snapEy_gpu.gpudata, - self.snapEz_gpu.gpudata, - self.snapHx_gpu.gpudata, - self.snapHy_gpu.gpudata, - self.snapHz_gpu.gpudata, + self.grid.Ex_dev.gpudata, + self.grid.Ey_dev.gpudata, + self.grid.Ez_dev.gpudata, + self.grid.Hx_dev.gpudata, + self.grid.Hy_dev.gpudata, + self.grid.Hz_dev.gpudata, + self.snapEx_dev.gpudata, + self.snapEy_dev.gpudata, + self.snapEz_dev.gpudata, + self.snapHx_dev.gpudata, + self.snapHy_dev.gpudata, + self.snapHz_dev.gpudata, block=Snapshot.tpb, grid=Snapshot.bpg) if config.get_model_config().cuda['snapsgpu2cpu']: - dtoh_snapshot_array(self.snapEx_gpu.get(), - self.snapEy_gpu.get(), - self.snapEz_gpu.get(), - self.snapHx_gpu.get(), - self.snapHy_gpu.get(), - self.snapHz_gpu.get(), + dtoh_snapshot_array(self.snapEx_dev.get(), + self.snapEy_dev.get(), + self.snapEz_dev.get(), + self.snapHx_dev.get(), + self.snapHy_dev.get(), + self.snapHz_dev.get(), 0, snap) def update_magnetic(self): """Update magnetic field components.""" - self.update_magnetic_gpu(np.int32(self.grid.nx), + self.update_magnetic_dev(np.int32(self.grid.nx), np.int32(self.grid.ny), np.int32(self.grid.nz), - self.grid.ID_gpu.gpudata, - self.grid.Hx_gpu.gpudata, - self.grid.Hy_gpu.gpudata, - self.grid.Hz_gpu.gpudata, - self.grid.Ex_gpu.gpudata, - self.grid.Ey_gpu.gpudata, - self.grid.Ez_gpu.gpudata, + self.grid.ID_dev.gpudata, + self.grid.Hx_dev.gpudata, + self.grid.Hy_dev.gpudata, + self.grid.Hz_dev.gpudata, + self.grid.Ex_dev.gpudata, + self.grid.Ey_dev.gpudata, + self.grid.Ez_dev.gpudata, block=self.grid.tpb, grid=self.grid.bpg) @@ -588,18 +592,18 @@ class CUDAUpdates: def update_magnetic_sources(self): """Update magnetic field components from sources.""" if self.grid.magneticdipoles: - self.update_magnetic_dipole_gpu(np.int32(len(self.grid.magneticdipoles)), + self.update_magnetic_dipole_dev(np.int32(len(self.grid.magneticdipoles)), np.int32(self.grid.iteration), config.sim_config.dtypes['float_or_double'](self.grid.dx), config.sim_config.dtypes['float_or_double'](self.grid.dy), config.sim_config.dtypes['float_or_double'](self.grid.dz), - self.srcinfo1_magnetic_gpu.gpudata, - self.srcinfo2_magnetic_gpu.gpudata, - self.srcwaves_magnetic_gpu.gpudata, - self.grid.ID_gpu.gpudata, - self.grid.Hx_gpu.gpudata, - self.grid.Hy_gpu.gpudata, - self.grid.Hz_gpu.gpudata, + self.srcinfo1_magnetic_dev.gpudata, + self.srcinfo2_magnetic_dev.gpudata, + self.srcwaves_magnetic_dev.gpudata, + self.grid.ID_dev.gpudata, + self.grid.Hx_dev.gpudata, + self.grid.Hy_dev.gpudata, + self.grid.Hz_dev.gpudata, block=(1, 1, 1), grid=(round32(len(self.grid.magneticdipoles)), 1, 1)) @@ -607,16 +611,16 @@ class CUDAUpdates: """Update electric field components.""" # All materials are non-dispersive so do standard update. if config.get_model_config().materials['maxpoles'] == 0: - self.update_electric_gpu(np.int32(self.grid.nx), + self.update_electric_dev(np.int32(self.grid.nx), np.int32(self.grid.ny), np.int32(self.grid.nz), - self.grid.ID_gpu.gpudata, - self.grid.Ex_gpu.gpudata, - self.grid.Ey_gpu.gpudata, - self.grid.Ez_gpu.gpudata, - self.grid.Hx_gpu.gpudata, - self.grid.Hy_gpu.gpudata, - self.grid.Hz_gpu.gpudata, + self.grid.ID_dev.gpudata, + self.grid.Ex_dev.gpudata, + self.grid.Ey_dev.gpudata, + self.grid.Ez_dev.gpudata, + self.grid.Hx_dev.gpudata, + self.grid.Hy_dev.gpudata, + self.grid.Hz_dev.gpudata, block=self.grid.tpb, grid=self.grid.bpg) @@ -627,17 +631,17 @@ class CUDAUpdates: np.int32(self.grid.ny), np.int32(self.grid.nz), np.int32(config.get_model_config().materials['maxpoles']), - self.grid.updatecoeffsdispersive_gpu.gpudata, - self.grid.Tx_gpu.gpudata, - self.grid.Ty_gpu.gpudata, - self.grid.Tz_gpu.gpudata, - self.grid.ID_gpu.gpudata, - self.grid.Ex_gpu.gpudata, - self.grid.Ey_gpu.gpudata, - self.grid.Ez_gpu.gpudata, - self.grid.Hx_gpu.gpudata, - self.grid.Hy_gpu.gpudata, - self.grid.Hz_gpu.gpudata, + self.grid.updatecoeffsdispersive_dev.gpudata, + self.grid.Tx_dev.gpudata, + self.grid.Ty_dev.gpudata, + self.grid.Tz_dev.gpudata, + self.grid.ID_dev.gpudata, + self.grid.Ex_dev.gpudata, + self.grid.Ey_dev.gpudata, + self.grid.Ez_dev.gpudata, + self.grid.Hx_dev.gpudata, + self.grid.Hy_dev.gpudata, + self.grid.Hz_dev.gpudata, block=self.grid.tpb, grid=self.grid.bpg) @@ -651,34 +655,34 @@ class CUDAUpdates: update any Hertzian dipole sources last. """ if self.grid.voltagesources: - self.update_voltage_source_gpu(np.int32(len(self.grid.voltagesources)), + self.update_voltage_source_dev(np.int32(len(self.grid.voltagesources)), np.int32(self.grid.iteration), config.sim_config.dtypes['float_or_double'](self.grid.dx), config.sim_config.dtypes['float_or_double'](self.grid.dy), config.sim_config.dtypes['float_or_double'](self.grid.dz), - self.srcinfo1_voltage_gpu.gpudata, - self.srcinfo2_voltage_gpu.gpudata, - self.srcwaves_voltage_gpu.gpudata, - self.grid.ID_gpu.gpudata, - self.grid.Ex_gpu.gpudata, - self.grid.Ey_gpu.gpudata, - self.grid.Ez_gpu.gpudata, + self.srcinfo1_voltage_dev.gpudata, + self.srcinfo2_voltage_dev.gpudata, + self.srcwaves_voltage_dev.gpudata, + self.grid.ID_dev.gpudata, + self.grid.Ex_dev.gpudata, + self.grid.Ey_dev.gpudata, + self.grid.Ez_dev.gpudata, block=(1, 1, 1), grid=(round32(len(self.grid.voltagesources)), 1, 1)) if self.grid.hertziandipoles: - self.update_hertzian_dipole_gpu(np.int32(len(self.grid.hertziandipoles)), + self.update_hertzian_dipole_dev(np.int32(len(self.grid.hertziandipoles)), np.int32(self.grid.iteration), config.sim_config.dtypes['float_or_double'](self.grid.dx), config.sim_config.dtypes['float_or_double'](self.grid.dy), config.sim_config.dtypes['float_or_double'](self.grid.dz), - self.srcinfo1_hertzian_gpu.gpudata, - self.srcinfo2_hertzian_gpu.gpudata, - self.srcwaves_hertzian_gpu.gpudata, - self.grid.ID_gpu.gpudata, - self.grid.Ex_gpu.gpudata, - self.grid.Ey_gpu.gpudata, - self.grid.Ez_gpu.gpudata, + self.srcinfo1_hertzian_dev.gpudata, + self.srcinfo2_hertzian_dev.gpudata, + self.srcwaves_hertzian_dev.gpudata, + self.grid.ID_dev.gpudata, + self.grid.Ex_dev.gpudata, + self.grid.Ey_dev.gpudata, + self.grid.Ez_dev.gpudata, block=(1, 1, 1), grid=(round32(len(self.grid.hertziandipoles)), 1, 1)) @@ -696,14 +700,14 @@ class CUDAUpdates: np.int32(self.grid.ny), np.int32(self.grid.nz), np.int32(config.get_model_config().materials['maxpoles']), - self.grid.updatecoeffsdispersive_gpu.gpudata, - self.grid.Tx_gpu.gpudata, - self.grid.Ty_gpu.gpudata, - self.grid.Tz_gpu.gpudata, - self.grid.ID_gpu.gpudata, - self.grid.Ex_gpu.gpudata, - self.grid.Ey_gpu.gpudata, - self.grid.Ez_gpu.gpudata, + self.grid.updatecoeffsdispersive_dev.gpudata, + self.grid.Tx_dev.gpudata, + self.grid.Ty_dev.gpudata, + self.grid.Tz_dev.gpudata, + self.grid.ID_dev.gpudata, + self.grid.Ex_dev.gpudata, + self.grid.Ey_dev.gpudata, + self.grid.Ez_dev.gpudata, block=self.grid.tpb, grid=self.grid.bpg) @@ -738,19 +742,19 @@ class CUDAUpdates: """Copy data from GPU back to CPU to save to file(s).""" # Copy output from receivers array back to correct receiver objects if self.grid.rxs: - dtoh_rx_array(self.rxs_gpu.get(), - self.rxcoords_gpu.get(), + dtoh_rx_array(self.rxs_dev.get(), + self.rxcoords_dev.get(), self.grid) # Copy data from any snapshots back to correct snapshot objects if self.grid.snapshots and not config.get_model_config().cuda['snapsgpu2cpu']: for i, snap in enumerate(self.grid.snapshots): - dtoh_snapshot_array(self.snapEx_gpu.get(), - self.snapEy_gpu.get(), - self.snapEz_gpu.get(), - self.snapHx_gpu.get(), - self.snapHy_gpu.get(), - self.snapHz_gpu.get(), + dtoh_snapshot_array(self.snapEx_dev.get(), + self.snapEy_dev.get(), + self.snapEz_dev.get(), + self.snapHx_dev.get(), + self.snapHy_dev.get(), + self.snapHz_dev.get(), i, snap) def cleanup(self):