你已经派生过 gprMax
镜像自地址
https://gitee.com/sunhf/gprMax.git
已同步 2025-08-07 15:10:13 +08:00
Updates to get CUDA solver basic functionality again.
这个提交包含在:
@@ -50,6 +50,8 @@ update_electric = {
|
||||
// NX, NY, NZ: Number of cells of the model domain.
|
||||
// ID, E, H: Access to ID and field component arrays.
|
||||
|
||||
$CUDA_IDX
|
||||
|
||||
// Convert the linear index to subscripts for 3D field arrays
|
||||
int x = i / ($NY_FIELDS * $NZ_FIELDS);
|
||||
int y = (i % ($NY_FIELDS * $NZ_FIELDS)) / $NZ_FIELDS;
|
||||
@@ -60,8 +62,6 @@ update_electric = {
|
||||
int y_ID = ((i % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) / $NZ_ID;
|
||||
int z_ID = ((i % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) % $NZ_ID;
|
||||
|
||||
$CUDA_IDX
|
||||
|
||||
// Ex component
|
||||
if ((NY != 1 || NZ != 1) && x >= 0 && x < NX && y > 0 && y < NY && z > 0 && z < NZ) {
|
||||
int materialEx = ID[IDX4D_ID(0,x_ID,y_ID,z_ID)];
|
||||
@@ -119,6 +119,8 @@ update_magnetic = {
|
||||
// NX, NY, NZ: Number of cells of the model domain.
|
||||
// ID, E, H: Access to ID and field component arrays.
|
||||
|
||||
$CUDA_IDX
|
||||
|
||||
// Convert the linear index to subscripts for 3D field arrays
|
||||
int x = i / ($NY_FIELDS * $NZ_FIELDS);
|
||||
int y = (i % ($NY_FIELDS * $NZ_FIELDS)) / $NZ_FIELDS;
|
||||
@@ -129,8 +131,6 @@ update_magnetic = {
|
||||
int y_ID = ((i % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) / $NZ_ID;
|
||||
int z_ID = ((i % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) % $NZ_ID;
|
||||
|
||||
$CUDA_IDX
|
||||
|
||||
// Hx component
|
||||
if (NX != 1 && x > 0 && x < NX && y >= 0 && y < NY && z >= 0 && z < NZ) {
|
||||
int materialHx = ID[IDX4D_ID(3,x_ID,y_ID,z_ID)];
|
||||
|
@@ -290,15 +290,15 @@ class ModelBuildRun:
|
||||
f"than available physical CPU cores ({config.sim_config.hostinfo['physicalcores']}). "
|
||||
f"This may lead to degraded performance.")
|
||||
# Print information about any compute device, e.g. GPU, in use
|
||||
elif config.sim_config.general['solver'] == 'cuda' or config.sim_config.general['solver'] == 'opencl':
|
||||
solvername = config.sim_config.general['solver'].upper()
|
||||
hostname = config.sim_config.hostinfo['hostname']
|
||||
elif config.sim_config.general['solver'] == 'cuda' or config.sim_config.general['solver'] == 'opencl':
|
||||
if config.sim_config.general['solver'] == 'opencl':
|
||||
solvername = 'OpenCL'
|
||||
platformname = ' on ' + ' '.join(config.get_model_config().device['dev'].platform.name.split())
|
||||
devicename = ' '.join(config.get_model_config().device['dev'].name.split())
|
||||
else:
|
||||
solvername = 'CUDA'
|
||||
platformname = ''
|
||||
devicename = ' '.join(config.get_model_config().device['dev'].name.split())
|
||||
devicename = ' '.join(config.get_model_config().device['dev'].name().split())
|
||||
logger.basic(f"\nModel {config.model_num + 1}/{config.sim_config.model_end} "
|
||||
f"on {config.sim_config.hostinfo['hostname']} "
|
||||
f"with {solvername} using {devicename}{platformname}")
|
||||
|
114
gprMax/pml.py
114
gprMax/pml.py
@@ -363,25 +363,25 @@ class CUDAPML(PML):
|
||||
|
||||
import pycuda.gpuarray as gpuarray
|
||||
|
||||
self.ERA_gpu = gpuarray.to_gpu(self.ERA)
|
||||
self.ERB_gpu = gpuarray.to_gpu(self.ERB)
|
||||
self.ERE_gpu = gpuarray.to_gpu(self.ERE)
|
||||
self.ERF_gpu = gpuarray.to_gpu(self.ERF)
|
||||
self.HRA_gpu = gpuarray.to_gpu(self.HRA)
|
||||
self.HRB_gpu = gpuarray.to_gpu(self.HRB)
|
||||
self.HRE_gpu = gpuarray.to_gpu(self.HRE)
|
||||
self.HRF_gpu = gpuarray.to_gpu(self.HRF)
|
||||
self.EPhi1_gpu = gpuarray.to_gpu(self.EPhi1)
|
||||
self.EPhi2_gpu = gpuarray.to_gpu(self.EPhi2)
|
||||
self.HPhi1_gpu = gpuarray.to_gpu(self.HPhi1)
|
||||
self.HPhi2_gpu = gpuarray.to_gpu(self.HPhi2)
|
||||
self.ERA_dev = gpuarray.to_gpu(self.ERA)
|
||||
self.ERB_dev = gpuarray.to_gpu(self.ERB)
|
||||
self.ERE_dev = gpuarray.to_gpu(self.ERE)
|
||||
self.ERF_dev = gpuarray.to_gpu(self.ERF)
|
||||
self.HRA_dev = gpuarray.to_gpu(self.HRA)
|
||||
self.HRB_dev = gpuarray.to_gpu(self.HRB)
|
||||
self.HRE_dev = gpuarray.to_gpu(self.HRE)
|
||||
self.HRF_dev = gpuarray.to_gpu(self.HRF)
|
||||
self.EPhi1_dev = gpuarray.to_gpu(self.EPhi1)
|
||||
self.EPhi2_dev = gpuarray.to_gpu(self.EPhi2)
|
||||
self.HPhi1_dev = gpuarray.to_gpu(self.HPhi1)
|
||||
self.HPhi2_dev = gpuarray.to_gpu(self.HPhi2)
|
||||
|
||||
def set_blocks_per_grid(self):
|
||||
"""Set the blocks per grid size used for updating the PML field arrays
|
||||
on a GPU."""
|
||||
self.bpg = (int(np.ceil(((self.EPhi1_gpu.shape[1] + 1) *
|
||||
(self.EPhi1_gpu.shape[2] + 1) *
|
||||
(self.EPhi1_gpu.shape[3] + 1)) / self.G.tpb[0])), 1, 1)
|
||||
self.bpg = (int(np.ceil(((self.EPhi1_dev.shape[1] + 1) *
|
||||
(self.EPhi1_dev.shape[2] + 1) *
|
||||
(self.EPhi1_dev.shape[3] + 1)) / self.G.tpb[0])), 1, 1)
|
||||
|
||||
def get_update_funcs(self, kernelselectric, kernelsmagnetic):
|
||||
"""Get update functions from PML kernels.
|
||||
@@ -393,39 +393,39 @@ class CUDAPML(PML):
|
||||
magnetic updates.
|
||||
"""
|
||||
|
||||
self.update_electric_gpu = kernelselectric.get_function('order' + str(len(self.CFS)) + '_' + self.direction)
|
||||
self.update_magnetic_gpu = kernelsmagnetic.get_function('order' + str(len(self.CFS)) + '_' + self.direction)
|
||||
self.update_electric_dev = kernelselectric.get_function('order' + str(len(self.CFS)) + '_' + self.direction)
|
||||
self.update_magnetic_dev = kernelsmagnetic.get_function('order' + str(len(self.CFS)) + '_' + self.direction)
|
||||
|
||||
def update_electric(self):
|
||||
"""This functions updates electric field components with the PML
|
||||
correction on the GPU.
|
||||
"""
|
||||
self.update_electric_gpu(np.int32(self.xs),
|
||||
self.update_electric_dev(np.int32(self.xs),
|
||||
np.int32(self.xf),
|
||||
np.int32(self.ys),
|
||||
np.int32(self.yf),
|
||||
np.int32(self.zs),
|
||||
np.int32(self.zf),
|
||||
np.int32(self.EPhi1_gpu.shape[1]),
|
||||
np.int32(self.EPhi1_gpu.shape[2]),
|
||||
np.int32(self.EPhi1_gpu.shape[3]),
|
||||
np.int32(self.EPhi2_gpu.shape[1]),
|
||||
np.int32(self.EPhi2_gpu.shape[2]),
|
||||
np.int32(self.EPhi2_gpu.shape[3]),
|
||||
np.int32(self.EPhi1_dev.shape[1]),
|
||||
np.int32(self.EPhi1_dev.shape[2]),
|
||||
np.int32(self.EPhi1_dev.shape[3]),
|
||||
np.int32(self.EPhi2_dev.shape[1]),
|
||||
np.int32(self.EPhi2_dev.shape[2]),
|
||||
np.int32(self.EPhi2_dev.shape[3]),
|
||||
np.int32(self.thickness),
|
||||
self.G.ID_gpu.gpudata,
|
||||
self.G.Ex_gpu.gpudata,
|
||||
self.G.Ey_gpu.gpudata,
|
||||
self.G.Ez_gpu.gpudata,
|
||||
self.G.Hx_gpu.gpudata,
|
||||
self.G.Hy_gpu.gpudata,
|
||||
self.G.Hz_gpu.gpudata,
|
||||
self.EPhi1_gpu.gpudata,
|
||||
self.EPhi2_gpu.gpudata,
|
||||
self.ERA_gpu.gpudata,
|
||||
self.ERB_gpu.gpudata,
|
||||
self.ERE_gpu.gpudata,
|
||||
self.ERF_gpu.gpudata,
|
||||
self.G.ID_dev.gpudata,
|
||||
self.G.Ex_dev.gpudata,
|
||||
self.G.Ey_dev.gpudata,
|
||||
self.G.Ez_dev.gpudata,
|
||||
self.G.Hx_dev.gpudata,
|
||||
self.G.Hy_dev.gpudata,
|
||||
self.G.Hz_dev.gpudata,
|
||||
self.EPhi1_dev.gpudata,
|
||||
self.EPhi2_dev.gpudata,
|
||||
self.ERA_dev.gpudata,
|
||||
self.ERB_dev.gpudata,
|
||||
self.ERE_dev.gpudata,
|
||||
self.ERF_dev.gpudata,
|
||||
config.sim_config.dtypes['float_or_double'](self.d),
|
||||
block=self.G.tpb, grid=self.bpg)
|
||||
|
||||
@@ -433,32 +433,32 @@ class CUDAPML(PML):
|
||||
"""This functions updates magnetic field components with the PML
|
||||
correction on the GPU.
|
||||
"""
|
||||
self.update_magnetic_gpu(np.int32(self.xs),
|
||||
self.update_magnetic_dev(np.int32(self.xs),
|
||||
np.int32(self.xf),
|
||||
np.int32(self.ys),
|
||||
np.int32(self.yf),
|
||||
np.int32(self.zs),
|
||||
np.int32(self.zf),
|
||||
np.int32(self.HPhi1_gpu.shape[1]),
|
||||
np.int32(self.HPhi1_gpu.shape[2]),
|
||||
np.int32(self.HPhi1_gpu.shape[3]),
|
||||
np.int32(self.HPhi2_gpu.shape[1]),
|
||||
np.int32(self.HPhi2_gpu.shape[2]),
|
||||
np.int32(self.HPhi2_gpu.shape[3]),
|
||||
np.int32(self.HPhi1_dev.shape[1]),
|
||||
np.int32(self.HPhi1_dev.shape[2]),
|
||||
np.int32(self.HPhi1_dev.shape[3]),
|
||||
np.int32(self.HPhi2_dev.shape[1]),
|
||||
np.int32(self.HPhi2_dev.shape[2]),
|
||||
np.int32(self.HPhi2_dev.shape[3]),
|
||||
np.int32(self.thickness),
|
||||
self.G.ID_gpu.gpudata,
|
||||
self.G.Ex_gpu.gpudata,
|
||||
self.G.Ey_gpu.gpudata,
|
||||
self.G.Ez_gpu.gpudata,
|
||||
self.G.Hx_gpu.gpudata,
|
||||
self.G.Hy_gpu.gpudata,
|
||||
self.G.Hz_gpu.gpudata,
|
||||
self.HPhi1_gpu.gpudata,
|
||||
self.HPhi2_gpu.gpudata,
|
||||
self.HRA_gpu.gpudata,
|
||||
self.HRB_gpu.gpudata,
|
||||
self.HRE_gpu.gpudata,
|
||||
self.HRF_gpu.gpudata,
|
||||
self.G.ID_dev.gpudata,
|
||||
self.G.Ex_dev.gpudata,
|
||||
self.G.Ey_dev.gpudata,
|
||||
self.G.Ez_dev.gpudata,
|
||||
self.G.Hx_dev.gpudata,
|
||||
self.G.Hy_dev.gpudata,
|
||||
self.G.Hz_dev.gpudata,
|
||||
self.HPhi1_dev.gpudata,
|
||||
self.HPhi2_dev.gpudata,
|
||||
self.HRA_dev.gpudata,
|
||||
self.HRB_dev.gpudata,
|
||||
self.HRE_dev.gpudata,
|
||||
self.HRF_dev.gpudata,
|
||||
config.sim_config.dtypes['float_or_double'](self.d),
|
||||
block=self.G.tpb, grid=self.bpg)
|
||||
|
||||
|
@@ -265,7 +265,8 @@ class CUDAUpdates:
|
||||
# Set common substitutions for use in kernels
|
||||
self.subs_name_args = {'REAL': config.sim_config.dtypes['C_float_or_double'],
|
||||
'COMPLEX': config.get_model_config().materials['dispersiveCdtype']}
|
||||
self.subs_func = {'CUDA_IDX': 'int i = blockIdx.x * blockDim.x + threadIdx.x;',
|
||||
self.subs_func = {'REAL': config.sim_config.dtypes['C_float_or_double'],
|
||||
'CUDA_IDX': 'int i = blockIdx.x * blockDim.x + threadIdx.x;',
|
||||
'NX_FIELDS': self.grid.nx + 1,
|
||||
'NY_FIELDS': self.grid.ny + 1,
|
||||
'NZ_FIELDS': self.grid.nz + 1,
|
||||
@@ -273,6 +274,9 @@ class CUDAUpdates:
|
||||
'NY_ID': self.grid.ID.shape[2],
|
||||
'NZ_ID': self.grid.ID.shape[3]}
|
||||
|
||||
# Enviroment for templating kernels
|
||||
self.env = Environment(loader=PackageLoader('gprMax', 'cuda_opencl_el'))
|
||||
|
||||
# Initialise arrays on GPU, prepare kernels, and get kernel functions
|
||||
self._set_macros()
|
||||
self._set_field_knls()
|
||||
@@ -351,17 +355,17 @@ class CUDAUpdates:
|
||||
get kernel functions.
|
||||
"""
|
||||
|
||||
bld = self._build_knl(knl_fields_updates.update_electric['func'],
|
||||
bld = self._build_knl(knl_fields_updates.update_electric,
|
||||
self.subs_name_args, self.subs_func)
|
||||
knlE = self.source_module(bld,
|
||||
options=config.sim_config.cuda['nvcc_opts'])
|
||||
self.update_electric_gpu = knlE.get_function("update_electric")
|
||||
options=config.sim_config.devices['nvcc_opts'])
|
||||
self.update_electric_dev = knlE.get_function("update_electric")
|
||||
|
||||
bld = self._build_knl(knl_fields_updates.update_magnetic['func'],
|
||||
bld = self._build_knl(knl_fields_updates.update_magnetic,
|
||||
self.subs_name_args, self.subs_func)
|
||||
knlH = self.source_module(bld,
|
||||
options=config.sim_config.cuda['nvcc_opts'])
|
||||
self.update_magnetic_gpu = knlH.get_function("update_magnetic")
|
||||
options=config.sim_config.devices['nvcc_opts'])
|
||||
self.update_magnetic_dev = knlH.get_function("update_magnetic")
|
||||
|
||||
self._copy_mat_coeffs(knlE, knlH)
|
||||
|
||||
@@ -379,13 +383,13 @@ class CUDAUpdates:
|
||||
bld = self._build_knl(knl_fields_updates.update_electric_dispersive_A['func'],
|
||||
self.subs_name_args, self.subs_func)
|
||||
knl = self.source_module(bld,
|
||||
options=config.sim_config.cuda['nvcc_opts'])
|
||||
options=config.sim_config.devices['nvcc_opts'])
|
||||
self.dispersive_update_a = knl.get_function("update_electric_dispersive_A")
|
||||
|
||||
bld = self._build_knl(knl_fields_updates.update_electric_dispersive_B['func'],
|
||||
self.subs_name_args, self.subs_func)
|
||||
knl = self.source_module(bld,
|
||||
options=config.sim_config.cuda['nvcc_opts'])
|
||||
options=config.sim_config.devices['nvcc_opts'])
|
||||
self.dispersive_update_b = knl.get_function("update_electric_dispersive_B")
|
||||
|
||||
# Electric and magnetic field updates - set blocks per grid and
|
||||
@@ -403,21 +407,21 @@ class CUDAUpdates:
|
||||
|
||||
# Set block per grid, initialise arrays on GPU, and get kernel functions
|
||||
for pml in self.grid.pmls:
|
||||
pml.set_blocks_per_grid()
|
||||
pml.htod_field_arrays()
|
||||
pml.set_blocks_per_grid()
|
||||
knl_name = 'order' + str(len(pml.CFS)) + '_' + pml.direction
|
||||
self.subs_name_args['FUNC'] = knl_name
|
||||
|
||||
knl_electric = getattr(knl_pml_updates_electric, knl_name)
|
||||
bld = self._build_knl(knl_electric['func'],
|
||||
bld = self._build_knl(knl_electric,
|
||||
self.subs_name_args, self.subs_func)
|
||||
knlE = self.source_module(bld, options=config.sim_config.cuda['nvcc_opts'])
|
||||
knlE = self.source_module(bld, options=config.sim_config.devices['nvcc_opts'])
|
||||
pml.update_electric_dev = knlE.get_function(knl_name)
|
||||
|
||||
knl_magnetic = getattr(knl_pml_updates_magnetic, knl_name)
|
||||
bld = self._build_knl(knl_magnetic['func'],
|
||||
bld = self._build_knl(knl_magnetic,
|
||||
self.subs_name_args, self.subs_func)
|
||||
knlH = self.source_module(bld, options=config.sim_config.cuda['nvcc_opts'])
|
||||
knlH = self.source_module(bld, options=config.sim_config.devices['nvcc_opts'])
|
||||
pml.update_magnetic_dev = knlH.get_function(knl_name)
|
||||
|
||||
self._copy_mat_coeffs(knlE, knlH)
|
||||
@@ -426,7 +430,7 @@ class CUDAUpdates:
|
||||
"""Receivers - initialise arrays on GPU, prepare kernel and get kernel
|
||||
function.
|
||||
"""
|
||||
self.rxcoords_gpu, self.rxs_gpu = htod_rx_arrays(self.grid)
|
||||
self.rxcoords_dev, self.rxs_dev = htod_rx_arrays(self.grid)
|
||||
|
||||
self.subs_func.update({'REAL': config.sim_config.dtypes['C_float_or_double'],
|
||||
'NY_RXCOORDS': 3,
|
||||
@@ -434,10 +438,10 @@ class CUDAUpdates:
|
||||
'NY_RXS': self.grid.iterations,
|
||||
'NZ_RXS': len(self.grid.rxs)})
|
||||
|
||||
bld = self._build_knl(knl_store_outputs.store_outputs['func'],
|
||||
bld = self._build_knl(knl_store_outputs.store_outputs,
|
||||
self.subs_name_args, self.subs_func)
|
||||
knl = self.source_module(bld, options=config.sim_config.cuda['nvcc_opts'])
|
||||
self.store_outputs_gpu = knl.get_function("store_outputs")
|
||||
knl = self.source_module(bld, options=config.sim_config.devices['nvcc_opts'])
|
||||
self.store_outputs_dev = knl.get_function("store_outputs")
|
||||
|
||||
def _set_src_knls(self):
|
||||
"""Sources - initialise arrays on GPU, prepare kernel and get kernel
|
||||
@@ -447,23 +451,23 @@ class CUDAUpdates:
|
||||
'NY_SRCWAVES': self.grid.iteration})
|
||||
|
||||
if self.grid.hertziandipoles:
|
||||
self.srcinfo1_hertzian_gpu, self.srcinfo2_hertzian_gpu, self.srcwaves_hertzian_gpu = htod_src_arrays(self.grid.hertziandipoles, self.grid)
|
||||
bld = self._build_knl(knl_source_updates.update_hertzian_dipole['func'],
|
||||
self.srcinfo1_hertzian_dev, self.srcinfo2_hertzian_dev, self.srcwaves_hertzian_dev = htod_src_arrays(self.grid.hertziandipoles, self.grid)
|
||||
bld = self._build_knl(knl_source_updates.update_hertzian_dipole,
|
||||
self.subs_name_args, self.subs_func)
|
||||
knl = self.source_module(bld, options=config.sim_config.cuda['nvcc_opts'])
|
||||
self.update_hertzian_dipole_gpu = knl.get_function("update_hertzian_dipole")
|
||||
knl = self.source_module(bld, options=config.sim_config.devices['nvcc_opts'])
|
||||
self.update_hertzian_dipole_dev = knl.get_function("update_hertzian_dipole")
|
||||
if self.grid.magneticdipoles:
|
||||
self.srcinfo1_magnetic_gpu, self.srcinfo2_magnetic_gpu, self.srcwaves_magnetic_gpu = htod_src_arrays(self.grid.magneticdipoles, self.grid)
|
||||
bld = self._build_knl(knl_source_updates.update_magnetic_dipole['func'],
|
||||
self.srcinfo1_magnetic_dev, self.srcinfo2_magnetic_dev, self.srcwaves_magnetic_dev = htod_src_arrays(self.grid.magneticdipoles, self.grid)
|
||||
bld = self._build_knl(knl_source_updates.update_magnetic_dipole,
|
||||
self.subs_name_args, self.subs_func)
|
||||
knl = self.source_module(bld, options=config.sim_config.cuda['nvcc_opts'])
|
||||
self.update_magnetic_dipole_gpu = knl.get_function("update_magnetic_dipole")
|
||||
knl = self.source_module(bld, options=config.sim_config.devices['nvcc_opts'])
|
||||
self.update_magnetic_dipole_dev = knl.get_function("update_magnetic_dipole")
|
||||
if self.grid.voltagesources:
|
||||
self.srcinfo1_voltage_gpu, self.srcinfo2_voltage_gpu, self.srcwaves_voltage_gpu = htod_src_arrays(self.grid.voltagesources, self.grid)
|
||||
bld = self._build_knl(knl_source_updates.update_voltage_source['func'],
|
||||
self.srcinfo1_voltage_dev, self.srcinfo2_voltage_dev, self.srcwaves_voltage_dev = htod_src_arrays(self.grid.voltagesources, self.grid)
|
||||
bld = self._build_knl(knl_source_updates.update_voltage_source,
|
||||
self.subs_name_args, self.subs_func)
|
||||
knl = self.source_module(bld, options=config.sim_config.cuda['nvcc_opts'])
|
||||
self.update_voltage_source_gpu = knl.get_function("update_voltage_source")
|
||||
knl = self.source_module(bld, options=config.sim_config.devices['nvcc_opts'])
|
||||
self.update_voltage_source_dev = knl.get_function("update_voltage_source")
|
||||
|
||||
if (self.grid.hertziandipoles +
|
||||
self.grid.magneticdipoles +
|
||||
@@ -474,17 +478,17 @@ class CUDAUpdates:
|
||||
"""Snapshots - initialise arrays on GPU, prepare kernel and get kernel
|
||||
function.
|
||||
"""
|
||||
self.snapEx_gpu, self.snapEy_gpu, self.snapEz_gpu, self.snapHx_gpu, self.snapHy_gpu, self.snapHz_gpu = htod_snapshot_array(self.grid)
|
||||
self.snapEx_dev, self.snapEy_dev, self.snapEz_dev, self.snapHx_dev, self.snapHy_dev, self.snapHz_dev = htod_snapshot_array(self.grid)
|
||||
|
||||
self.subs_func.update({'REAL': config.sim_config.dtypes['C_float_or_double'],
|
||||
'NX_SNAPS': Snapshot.nx_max,
|
||||
'NY_SNAPS': Snapshot.ny_max,
|
||||
'NZ_SNAPS': Snapshot.nz_max})
|
||||
|
||||
bld = self._build_knl(knl_snapshots.store_snapshot['func'],
|
||||
bld = self._build_knl(knl_snapshots.store_snapshot,
|
||||
self.subs_name_args, self.subs_func)
|
||||
knl = self.source_module(bld, options=config.sim_config.cuda['nvcc_opts'])
|
||||
self.store_snapshot_gpu = knl.get_function("store_snapshot")
|
||||
knl = self.source_module(bld, options=config.sim_config.devices['nvcc_opts'])
|
||||
self.store_snapshot_dev = knl.get_function("store_snapshot")
|
||||
|
||||
def _copy_mat_coeffs(self, knlE, knlH):
|
||||
"""Copy material coefficient arrays to constant memory of GPU
|
||||
@@ -497,8 +501,8 @@ class CUDAUpdates:
|
||||
|
||||
# Check if coefficient arrays will fit on constant memory of GPU
|
||||
if (self.grid.updatecoeffsE.nbytes + self.grid.updatecoeffsH.nbytes
|
||||
> config.get_model_config().cuda['gpu'].total_constant_memory):
|
||||
logger.exception(f"Too many materials in the model to fit onto constant memory of size {human_size(config.get_model_config().cuda['gpu'].total_constant_memory)} on {config.get_model_config().cuda['gpu'].deviceID} - {config.get_model_config().cuda['gpu'].name} GPU")
|
||||
> config.get_model_config().device['dev'].total_constant_memory):
|
||||
logger.exception(f"Too many materials in the model to fit onto constant memory of size {human_size(config.get_model_config().device['dev'].total_constant_memory)} on {config.get_model_config().device['dev'].deviceID} - {config.get_model_config().device['dev'].name} GPU")
|
||||
raise ValueError
|
||||
|
||||
updatecoeffsE = knlE.get_global('updatecoeffsE')[0]
|
||||
@@ -509,16 +513,16 @@ class CUDAUpdates:
|
||||
def store_outputs(self):
|
||||
"""Store field component values for every receiver."""
|
||||
if self.grid.rxs:
|
||||
self.store_outputs_gpu(np.int32(len(self.grid.rxs)),
|
||||
self.store_outputs_dev(np.int32(len(self.grid.rxs)),
|
||||
np.int32(self.grid.iteration),
|
||||
self.rxcoords_gpu.gpudata,
|
||||
self.rxs_gpu.gpudata,
|
||||
self.grid.Ex_gpu.gpudata,
|
||||
self.grid.Ey_gpu.gpudata,
|
||||
self.grid.Ez_gpu.gpudata,
|
||||
self.grid.Hx_gpu.gpudata,
|
||||
self.grid.Hy_gpu.gpudata,
|
||||
self.grid.Hz_gpu.gpudata,
|
||||
self.rxcoords_dev.gpudata,
|
||||
self.rxs_dev.gpudata,
|
||||
self.grid.Ex_dev.gpudata,
|
||||
self.grid.Ey_dev.gpudata,
|
||||
self.grid.Ez_dev.gpudata,
|
||||
self.grid.Hx_dev.gpudata,
|
||||
self.grid.Hy_dev.gpudata,
|
||||
self.grid.Hz_dev.gpudata,
|
||||
block=(1, 1, 1),
|
||||
grid=(round32(len(self.grid.rxs)), 1, 1))
|
||||
|
||||
@@ -532,7 +536,7 @@ class CUDAUpdates:
|
||||
for i, snap in enumerate(self.grid.snapshots):
|
||||
if snap.time == iteration + 1:
|
||||
snapno = 0 if config.get_model_config().cuda['snapsgpu2cpu'] else i
|
||||
self.store_snapshot_gpu(np.int32(snapno),
|
||||
self.store_snapshot_dev(np.int32(snapno),
|
||||
np.int32(snap.xs),
|
||||
np.int32(snap.xf),
|
||||
np.int32(snap.ys),
|
||||
@@ -542,41 +546,41 @@ class CUDAUpdates:
|
||||
np.int32(snap.dx),
|
||||
np.int32(snap.dy),
|
||||
np.int32(snap.dz),
|
||||
self.grid.Ex_gpu.gpudata,
|
||||
self.grid.Ey_gpu.gpudata,
|
||||
self.grid.Ez_gpu.gpudata,
|
||||
self.grid.Hx_gpu.gpudata,
|
||||
self.grid.Hy_gpu.gpudata,
|
||||
self.grid.Hz_gpu.gpudata,
|
||||
self.snapEx_gpu.gpudata,
|
||||
self.snapEy_gpu.gpudata,
|
||||
self.snapEz_gpu.gpudata,
|
||||
self.snapHx_gpu.gpudata,
|
||||
self.snapHy_gpu.gpudata,
|
||||
self.snapHz_gpu.gpudata,
|
||||
self.grid.Ex_dev.gpudata,
|
||||
self.grid.Ey_dev.gpudata,
|
||||
self.grid.Ez_dev.gpudata,
|
||||
self.grid.Hx_dev.gpudata,
|
||||
self.grid.Hy_dev.gpudata,
|
||||
self.grid.Hz_dev.gpudata,
|
||||
self.snapEx_dev.gpudata,
|
||||
self.snapEy_dev.gpudata,
|
||||
self.snapEz_dev.gpudata,
|
||||
self.snapHx_dev.gpudata,
|
||||
self.snapHy_dev.gpudata,
|
||||
self.snapHz_dev.gpudata,
|
||||
block=Snapshot.tpb,
|
||||
grid=Snapshot.bpg)
|
||||
if config.get_model_config().cuda['snapsgpu2cpu']:
|
||||
dtoh_snapshot_array(self.snapEx_gpu.get(),
|
||||
self.snapEy_gpu.get(),
|
||||
self.snapEz_gpu.get(),
|
||||
self.snapHx_gpu.get(),
|
||||
self.snapHy_gpu.get(),
|
||||
self.snapHz_gpu.get(),
|
||||
dtoh_snapshot_array(self.snapEx_dev.get(),
|
||||
self.snapEy_dev.get(),
|
||||
self.snapEz_dev.get(),
|
||||
self.snapHx_dev.get(),
|
||||
self.snapHy_dev.get(),
|
||||
self.snapHz_dev.get(),
|
||||
0, snap)
|
||||
|
||||
def update_magnetic(self):
|
||||
"""Update magnetic field components."""
|
||||
self.update_magnetic_gpu(np.int32(self.grid.nx),
|
||||
self.update_magnetic_dev(np.int32(self.grid.nx),
|
||||
np.int32(self.grid.ny),
|
||||
np.int32(self.grid.nz),
|
||||
self.grid.ID_gpu.gpudata,
|
||||
self.grid.Hx_gpu.gpudata,
|
||||
self.grid.Hy_gpu.gpudata,
|
||||
self.grid.Hz_gpu.gpudata,
|
||||
self.grid.Ex_gpu.gpudata,
|
||||
self.grid.Ey_gpu.gpudata,
|
||||
self.grid.Ez_gpu.gpudata,
|
||||
self.grid.ID_dev.gpudata,
|
||||
self.grid.Hx_dev.gpudata,
|
||||
self.grid.Hy_dev.gpudata,
|
||||
self.grid.Hz_dev.gpudata,
|
||||
self.grid.Ex_dev.gpudata,
|
||||
self.grid.Ey_dev.gpudata,
|
||||
self.grid.Ez_dev.gpudata,
|
||||
block=self.grid.tpb,
|
||||
grid=self.grid.bpg)
|
||||
|
||||
@@ -588,18 +592,18 @@ class CUDAUpdates:
|
||||
def update_magnetic_sources(self):
|
||||
"""Update magnetic field components from sources."""
|
||||
if self.grid.magneticdipoles:
|
||||
self.update_magnetic_dipole_gpu(np.int32(len(self.grid.magneticdipoles)),
|
||||
self.update_magnetic_dipole_dev(np.int32(len(self.grid.magneticdipoles)),
|
||||
np.int32(self.grid.iteration),
|
||||
config.sim_config.dtypes['float_or_double'](self.grid.dx),
|
||||
config.sim_config.dtypes['float_or_double'](self.grid.dy),
|
||||
config.sim_config.dtypes['float_or_double'](self.grid.dz),
|
||||
self.srcinfo1_magnetic_gpu.gpudata,
|
||||
self.srcinfo2_magnetic_gpu.gpudata,
|
||||
self.srcwaves_magnetic_gpu.gpudata,
|
||||
self.grid.ID_gpu.gpudata,
|
||||
self.grid.Hx_gpu.gpudata,
|
||||
self.grid.Hy_gpu.gpudata,
|
||||
self.grid.Hz_gpu.gpudata,
|
||||
self.srcinfo1_magnetic_dev.gpudata,
|
||||
self.srcinfo2_magnetic_dev.gpudata,
|
||||
self.srcwaves_magnetic_dev.gpudata,
|
||||
self.grid.ID_dev.gpudata,
|
||||
self.grid.Hx_dev.gpudata,
|
||||
self.grid.Hy_dev.gpudata,
|
||||
self.grid.Hz_dev.gpudata,
|
||||
block=(1, 1, 1),
|
||||
grid=(round32(len(self.grid.magneticdipoles)), 1, 1))
|
||||
|
||||
@@ -607,16 +611,16 @@ class CUDAUpdates:
|
||||
"""Update electric field components."""
|
||||
# All materials are non-dispersive so do standard update.
|
||||
if config.get_model_config().materials['maxpoles'] == 0:
|
||||
self.update_electric_gpu(np.int32(self.grid.nx),
|
||||
self.update_electric_dev(np.int32(self.grid.nx),
|
||||
np.int32(self.grid.ny),
|
||||
np.int32(self.grid.nz),
|
||||
self.grid.ID_gpu.gpudata,
|
||||
self.grid.Ex_gpu.gpudata,
|
||||
self.grid.Ey_gpu.gpudata,
|
||||
self.grid.Ez_gpu.gpudata,
|
||||
self.grid.Hx_gpu.gpudata,
|
||||
self.grid.Hy_gpu.gpudata,
|
||||
self.grid.Hz_gpu.gpudata,
|
||||
self.grid.ID_dev.gpudata,
|
||||
self.grid.Ex_dev.gpudata,
|
||||
self.grid.Ey_dev.gpudata,
|
||||
self.grid.Ez_dev.gpudata,
|
||||
self.grid.Hx_dev.gpudata,
|
||||
self.grid.Hy_dev.gpudata,
|
||||
self.grid.Hz_dev.gpudata,
|
||||
block=self.grid.tpb,
|
||||
grid=self.grid.bpg)
|
||||
|
||||
@@ -627,17 +631,17 @@ class CUDAUpdates:
|
||||
np.int32(self.grid.ny),
|
||||
np.int32(self.grid.nz),
|
||||
np.int32(config.get_model_config().materials['maxpoles']),
|
||||
self.grid.updatecoeffsdispersive_gpu.gpudata,
|
||||
self.grid.Tx_gpu.gpudata,
|
||||
self.grid.Ty_gpu.gpudata,
|
||||
self.grid.Tz_gpu.gpudata,
|
||||
self.grid.ID_gpu.gpudata,
|
||||
self.grid.Ex_gpu.gpudata,
|
||||
self.grid.Ey_gpu.gpudata,
|
||||
self.grid.Ez_gpu.gpudata,
|
||||
self.grid.Hx_gpu.gpudata,
|
||||
self.grid.Hy_gpu.gpudata,
|
||||
self.grid.Hz_gpu.gpudata,
|
||||
self.grid.updatecoeffsdispersive_dev.gpudata,
|
||||
self.grid.Tx_dev.gpudata,
|
||||
self.grid.Ty_dev.gpudata,
|
||||
self.grid.Tz_dev.gpudata,
|
||||
self.grid.ID_dev.gpudata,
|
||||
self.grid.Ex_dev.gpudata,
|
||||
self.grid.Ey_dev.gpudata,
|
||||
self.grid.Ez_dev.gpudata,
|
||||
self.grid.Hx_dev.gpudata,
|
||||
self.grid.Hy_dev.gpudata,
|
||||
self.grid.Hz_dev.gpudata,
|
||||
block=self.grid.tpb,
|
||||
grid=self.grid.bpg)
|
||||
|
||||
@@ -651,34 +655,34 @@ class CUDAUpdates:
|
||||
update any Hertzian dipole sources last.
|
||||
"""
|
||||
if self.grid.voltagesources:
|
||||
self.update_voltage_source_gpu(np.int32(len(self.grid.voltagesources)),
|
||||
self.update_voltage_source_dev(np.int32(len(self.grid.voltagesources)),
|
||||
np.int32(self.grid.iteration),
|
||||
config.sim_config.dtypes['float_or_double'](self.grid.dx),
|
||||
config.sim_config.dtypes['float_or_double'](self.grid.dy),
|
||||
config.sim_config.dtypes['float_or_double'](self.grid.dz),
|
||||
self.srcinfo1_voltage_gpu.gpudata,
|
||||
self.srcinfo2_voltage_gpu.gpudata,
|
||||
self.srcwaves_voltage_gpu.gpudata,
|
||||
self.grid.ID_gpu.gpudata,
|
||||
self.grid.Ex_gpu.gpudata,
|
||||
self.grid.Ey_gpu.gpudata,
|
||||
self.grid.Ez_gpu.gpudata,
|
||||
self.srcinfo1_voltage_dev.gpudata,
|
||||
self.srcinfo2_voltage_dev.gpudata,
|
||||
self.srcwaves_voltage_dev.gpudata,
|
||||
self.grid.ID_dev.gpudata,
|
||||
self.grid.Ex_dev.gpudata,
|
||||
self.grid.Ey_dev.gpudata,
|
||||
self.grid.Ez_dev.gpudata,
|
||||
block=(1, 1, 1),
|
||||
grid=(round32(len(self.grid.voltagesources)), 1, 1))
|
||||
|
||||
if self.grid.hertziandipoles:
|
||||
self.update_hertzian_dipole_gpu(np.int32(len(self.grid.hertziandipoles)),
|
||||
self.update_hertzian_dipole_dev(np.int32(len(self.grid.hertziandipoles)),
|
||||
np.int32(self.grid.iteration),
|
||||
config.sim_config.dtypes['float_or_double'](self.grid.dx),
|
||||
config.sim_config.dtypes['float_or_double'](self.grid.dy),
|
||||
config.sim_config.dtypes['float_or_double'](self.grid.dz),
|
||||
self.srcinfo1_hertzian_gpu.gpudata,
|
||||
self.srcinfo2_hertzian_gpu.gpudata,
|
||||
self.srcwaves_hertzian_gpu.gpudata,
|
||||
self.grid.ID_gpu.gpudata,
|
||||
self.grid.Ex_gpu.gpudata,
|
||||
self.grid.Ey_gpu.gpudata,
|
||||
self.grid.Ez_gpu.gpudata,
|
||||
self.srcinfo1_hertzian_dev.gpudata,
|
||||
self.srcinfo2_hertzian_dev.gpudata,
|
||||
self.srcwaves_hertzian_dev.gpudata,
|
||||
self.grid.ID_dev.gpudata,
|
||||
self.grid.Ex_dev.gpudata,
|
||||
self.grid.Ey_dev.gpudata,
|
||||
self.grid.Ez_dev.gpudata,
|
||||
block=(1, 1, 1),
|
||||
grid=(round32(len(self.grid.hertziandipoles)), 1, 1))
|
||||
|
||||
@@ -696,14 +700,14 @@ class CUDAUpdates:
|
||||
np.int32(self.grid.ny),
|
||||
np.int32(self.grid.nz),
|
||||
np.int32(config.get_model_config().materials['maxpoles']),
|
||||
self.grid.updatecoeffsdispersive_gpu.gpudata,
|
||||
self.grid.Tx_gpu.gpudata,
|
||||
self.grid.Ty_gpu.gpudata,
|
||||
self.grid.Tz_gpu.gpudata,
|
||||
self.grid.ID_gpu.gpudata,
|
||||
self.grid.Ex_gpu.gpudata,
|
||||
self.grid.Ey_gpu.gpudata,
|
||||
self.grid.Ez_gpu.gpudata,
|
||||
self.grid.updatecoeffsdispersive_dev.gpudata,
|
||||
self.grid.Tx_dev.gpudata,
|
||||
self.grid.Ty_dev.gpudata,
|
||||
self.grid.Tz_dev.gpudata,
|
||||
self.grid.ID_dev.gpudata,
|
||||
self.grid.Ex_dev.gpudata,
|
||||
self.grid.Ey_dev.gpudata,
|
||||
self.grid.Ez_dev.gpudata,
|
||||
block=self.grid.tpb,
|
||||
grid=self.grid.bpg)
|
||||
|
||||
@@ -738,19 +742,19 @@ class CUDAUpdates:
|
||||
"""Copy data from GPU back to CPU to save to file(s)."""
|
||||
# Copy output from receivers array back to correct receiver objects
|
||||
if self.grid.rxs:
|
||||
dtoh_rx_array(self.rxs_gpu.get(),
|
||||
self.rxcoords_gpu.get(),
|
||||
dtoh_rx_array(self.rxs_dev.get(),
|
||||
self.rxcoords_dev.get(),
|
||||
self.grid)
|
||||
|
||||
# Copy data from any snapshots back to correct snapshot objects
|
||||
if self.grid.snapshots and not config.get_model_config().cuda['snapsgpu2cpu']:
|
||||
for i, snap in enumerate(self.grid.snapshots):
|
||||
dtoh_snapshot_array(self.snapEx_gpu.get(),
|
||||
self.snapEy_gpu.get(),
|
||||
self.snapEz_gpu.get(),
|
||||
self.snapHx_gpu.get(),
|
||||
self.snapHy_gpu.get(),
|
||||
self.snapHz_gpu.get(),
|
||||
dtoh_snapshot_array(self.snapEx_dev.get(),
|
||||
self.snapEy_dev.get(),
|
||||
self.snapEz_dev.get(),
|
||||
self.snapHx_dev.get(),
|
||||
self.snapHy_dev.get(),
|
||||
self.snapHz_dev.get(),
|
||||
i, snap)
|
||||
|
||||
def cleanup(self):
|
||||
|
在新工单中引用
屏蔽一个用户