remove gpu function

这个提交包含在:
John Hartley
2019-08-01 16:28:49 +01:00
父节点 f514c0c916
当前提交 90e30d46a4

查看文件

@@ -154,243 +154,3 @@ class SubgridUpdates:
class GPUUpdates: class GPUUpdates:
pass pass
"""
def solve_gpu(currentmodelrun, modelend, G):
"""Solving using FDTD method on GPU. Implemented using Nvidia CUDA.
Args:
currentmodelrun (int): Current model run number.
modelend (int): Number of last model to run.
G (class): Grid class instance - holds essential parameters describing the model.
Returns:
tsolve (float): Time taken to execute solving (seconds)
memsolve (int): memory usage on final iteration (bytes)
"""
import pycuda.driver as drv
from pycuda.compiler import SourceModule
drv.init()
# Suppress nvcc warnings on Windows
if sys.platform == 'win32':
compiler_opts = ['-w']
else:
compiler_opts = None
# Create device handle and context on specifc GPU device (and make it current context)
dev = drv.Device(config.cuda['gpus'].deviceID)
ctx = dev.make_context()
# Electric and magnetic field updates - prepare kernels, get kernel functions, and initialise arrays on GPU
if config.materials['maxpoles'] == 0:
kernel_fields = SourceModule(kernel_template_fields.substitute(REAL=config.dtypes['C_float_or_double'], REAL_OR_COMPLEX=config.dtypes['C_complex'], N_updatecoeffsE=G.updatecoeffsE.size, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NY_MATDISPCOEFFS=1, NX_FIELDS=G.Ex.shape[0], NY_FIELDS=G.Ex.shape[1], NZ_FIELDS=G.Ex.shape[2], NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3], NX_T=1, NY_T=1, NZ_T=1), options=compiler_opts)
else: # # If there are any dispersive materials (updates are split into two parts as they require present and updated electric field values).
kernel_fields = SourceModule(kernel_template_fields.substitute(REAL=config.dtypes['C_float_or_double'], REAL_OR_COMPLEX=config.dtypes['C_complex'], N_updatecoeffsE=G.updatecoeffsE.size, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NY_MATDISPCOEFFS=G.updatecoeffsdispersive.shape[1], NX_FIELDS=G.Ex.shape[0], NY_FIELDS=G.Ex.shape[1], NZ_FIELDS=G.Ex.shape[2], NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3], NX_T=G.Tx.shape[1], NY_T=G.Tx.shape[2], NZ_T=G.Tx.shape[3]), options=compiler_opts)
update_e_dispersive_A_gpu = kernel_fields.get_function("update_e_dispersive_A")
update_e_dispersive_B_gpu = kernel_fields.get_function("update_e_dispersive_B")
G.gpu_initialise_dispersive_arrays()
update_e_gpu = kernel_fields.get_function("update_e")
update_h_gpu = kernel_fields.get_function("update_h")
# Copy material coefficient arrays to constant memory of GPU (must be <64KB) for fields kernels
updatecoeffsE = kernel_fields.get_global('updatecoeffsE')[0]
updatecoeffsH = kernel_fields.get_global('updatecoeffsH')[0]
if G.updatecoeffsE.nbytes + G.updatecoeffsH.nbytes > config.cuda['gpus'].constmem:
raise GeneralError('Too many materials in the model to fit onto constant memory of size {} on {} - {} GPU'.format(human_size(config.cuda['gpus'].constmem), config.cuda['gpus'].deviceID, config.cuda['gpus'].name))
else:
drv.memcpy_htod(updatecoeffsE, G.updatecoeffsE)
drv.memcpy_htod(updatecoeffsH, G.updatecoeffsH)
# Electric and magnetic field updates - set blocks per grid and initialise field arrays on GPU
G.gpu_set_blocks_per_grid()
G.gpu_initialise_arrays()
# PML updates
if G.pmls:
# Prepare kernels
kernelelectricfunc = getattr(import_module('gprMax.cuda.pml_updates_electric_' + G.pmlformulation), 'kernels_template_pml_electric_' + G.pmlformulation)
kernelmagneticfunc = getattr(import_module('gprMax.cuda.pml_updates_magnetic_' + G.pmlformulation), 'kernels_template_pml_magnetic_' + G.pmlformulation)
kernels_pml_electric = SourceModule(kernelelectricfunc.substitute(REAL=config.dtypes['C_float_or_double'], N_updatecoeffsE=G.updatecoeffsE.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NX_FIELDS=G.Ex.shape[0], NY_FIELDS=G.Ex.shape[1], NZ_FIELDS=G.Ex.shape[2], NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3]), options=compiler_opts)
kernels_pml_magnetic = SourceModule(kernelmagneticfunc.substitute(REAL=config.dtypes['C_float_or_double'], N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsH.shape[1], NX_FIELDS=G.Hx.shape[0], NY_FIELDS=G.Hx.shape[1], NZ_FIELDS=G.Hx.shape[2], NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3]), options=compiler_opts)
# Copy material coefficient arrays to constant memory of GPU (must be <64KB) for PML kernels
updatecoeffsE = kernels_pml_electric.get_global('updatecoeffsE')[0]
updatecoeffsH = kernels_pml_magnetic.get_global('updatecoeffsH')[0]
drv.memcpy_htod(updatecoeffsE, G.updatecoeffsE)
drv.memcpy_htod(updatecoeffsH, G.updatecoeffsH)
# Set block per grid, initialise arrays on GPU, and get kernel functions
for pml in G.pmls:
pml.gpu_set_blocks_per_grid(G)
pml.gpu_initialise_arrays()
pml.gpu_get_update_funcs(kernels_pml_electric, kernels_pml_magnetic)
# Receivers
if G.rxs:
# Initialise arrays on GPU
rxcoords_gpu, rxs_gpu = gpu_initialise_rx_arrays(G)
# Prepare kernel and get kernel function
kernel_store_outputs = SourceModule(kernel_template_store_outputs.substitute(REAL=config.dtypes['C_float_or_double'], NY_RXCOORDS=3, NX_RXS=len(Rx.gpu_allowableoutputs), NY_RXS=G.iterations, NZ_RXS=len(G.rxs), NX_FIELDS=G.Ex.shape[0], NY_FIELDS=G.Ex.shape[1], NZ_FIELDS=G.Ex.shape[2]), options=compiler_opts)
store_outputs_gpu = kernel_store_outputs.get_function("store_outputs")
# Sources - initialise arrays on GPU, prepare kernel and get kernel functions
if G.voltagesources + G.hertziandipoles + G.magneticdipoles:
kernel_sources = SourceModule(kernel_template_sources.substitute(REAL=config.dtypes['C_float_or_double'], N_updatecoeffsE=G.updatecoeffsE.size, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NY_SRCINFO=4, NY_SRCWAVES=G.iterations, NX_FIELDS=G.Ex.shape[0], NY_FIELDS=G.Ex.shape[1], NZ_FIELDS=G.Ex.shape[2], NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3]), options=compiler_opts)
# Copy material coefficient arrays to constant memory of GPU (must be <64KB) for source kernels
updatecoeffsE = kernel_sources.get_global('updatecoeffsE')[0]
updatecoeffsH = kernel_sources.get_global('updatecoeffsH')[0]
drv.memcpy_htod(updatecoeffsE, G.updatecoeffsE)
drv.memcpy_htod(updatecoeffsH, G.updatecoeffsH)
if G.hertziandipoles:
srcinfo1_hertzian_gpu, srcinfo2_hertzian_gpu, srcwaves_hertzian_gpu = gpu_initialise_src_arrays(G.hertziandipoles, G)
update_hertzian_dipole_gpu = kernel_sources.get_function("update_hertzian_dipole")
if G.magneticdipoles:
srcinfo1_magnetic_gpu, srcinfo2_magnetic_gpu, srcwaves_magnetic_gpu = gpu_initialise_src_arrays(G.magneticdipoles, G)
update_magnetic_dipole_gpu = kernel_sources.get_function("update_magnetic_dipole")
if G.voltagesources:
srcinfo1_voltage_gpu, srcinfo2_voltage_gpu, srcwaves_voltage_gpu = gpu_initialise_src_arrays(G.voltagesources, G)
update_voltage_source_gpu = kernel_sources.get_function("update_voltage_source")
# Snapshots - initialise arrays on GPU, prepare kernel and get kernel functions
if G.snapshots:
# Initialise arrays on GPU
snapEx_gpu, snapEy_gpu, snapEz_gpu, snapHx_gpu, snapHy_gpu, snapHz_gpu = gpu_initialise_snapshot_array(G)
# Prepare kernel and get kernel function
kernel_store_snapshot = SourceModule(kernel_template_store_snapshot.substitute(REAL=config.dtypes['C_float_or_double'], NX_SNAPS=Snapshot.nx_max, NY_SNAPS=Snapshot.ny_max, NZ_SNAPS=Snapshot.nz_max, NX_FIELDS=G.Ex.shape[0], NY_FIELDS=G.Ex.shape[1], NZ_FIELDS=G.Ex.shape[2]), options=compiler_opts)
store_snapshot_gpu = kernel_store_snapshot.get_function("store_snapshot")
# Iteration loop timer
iterstart = drv.Event()
iterend = drv.Event()
iterstart.record()
for iteration in tqdm(range(G.iterations), desc='Running simulation, model ' + str(currentmodelrun) + '/' + str(modelend), ncols=get_terminal_width() - 1, file=sys.stdout, disable=not config.general['progressbars']):
# Get GPU memory usage on final iteration
if iteration == G.iterations - 1:
memsolve = drv.mem_get_info()[1] - drv.mem_get_info()[0]
# Store field component values for every receiver
if G.rxs:
store_outputs_gpu(np.int32(len(G.rxs)), np.int32(iteration),
rxcoords_gpu.gpudata, rxs_gpu.gpudata,
G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata,
G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata,
block=(1, 1, 1), grid=(round32(len(G.rxs)), 1, 1))
# Store any snapshots
for i, snap in enumerate(G.snapshots):
if snap.time == iteration + 1:
if not config.cuda['snapsgpu2cpu']:
store_snapshot_gpu(np.int32(i), np.int32(snap.xs),
np.int32(snap.xf), np.int32(snap.ys),
np.int32(snap.yf), np.int32(snap.zs),
np.int32(snap.zf), np.int32(snap.dx),
np.int32(snap.dy), np.int32(snap.dz),
G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata,
G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata,
snapEx_gpu.gpudata, snapEy_gpu.gpudata, snapEz_gpu.gpudata,
snapHx_gpu.gpudata, snapHy_gpu.gpudata, snapHz_gpu.gpudata,
block=Snapshot.tpb, grid=Snapshot.bpg)
else:
store_snapshot_gpu(np.int32(0), np.int32(snap.xs),
np.int32(snap.xf), np.int32(snap.ys),
np.int32(snap.yf), np.int32(snap.zs),
np.int32(snap.zf), np.int32(snap.dx),
np.int32(snap.dy), np.int32(snap.dz),
G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata,
G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata,
snapEx_gpu.gpudata, snapEy_gpu.gpudata, snapEz_gpu.gpudata,
snapHx_gpu.gpudata, snapHy_gpu.gpudata, snapHz_gpu.gpudata,
block=Snapshot.tpb, grid=Snapshot.bpg)
gpu_get_snapshot_array(snapEx_gpu.get(), snapEy_gpu.get(), snapEz_gpu.get(),
snapHx_gpu.get(), snapHy_gpu.get(), snapHz_gpu.get(), 0, snap)
# Update magnetic field components
update_h_gpu(np.int32(G.nx), np.int32(G.ny), np.int32(G.nz),
G.ID_gpu.gpudata, G.Hx_gpu.gpudata, G.Hy_gpu.gpudata,
G.Hz_gpu.gpudata, G.Ex_gpu.gpudata, G.Ey_gpu.gpudata,
G.Ez_gpu.gpudata, block=config.cuda['gpus'].tpb, grid=config.cuda['gpus'].bpg)
# Update magnetic field components with the PML correction
for pml in G.pmls:
pml.gpu_update_magnetic(G)
# Update magnetic field components for magetic dipole sources
if G.magneticdipoles:
update_magnetic_dipole_gpu(np.int32(len(G.magneticdipoles)), np.int32(iteration),
config.dtypes['float_or_double'](G.dx), config.dtypes['float_or_double'](G.dy), config.dtypes['float_or_double'](G.dz),
srcinfo1_magnetic_gpu.gpudata, srcinfo2_magnetic_gpu.gpudata,
srcwaves_magnetic_gpu.gpudata, G.ID_gpu.gpudata,
G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata,
block=(1, 1, 1), grid=(round32(len(G.magneticdipoles)), 1, 1))
# Update electric field components
# If all materials are non-dispersive do standard update
if config.materials['maxpoles'] == 0:
update_e_gpu(np.int32(G.nx), np.int32(G.ny), np.int32(G.nz), G.ID_gpu.gpudata,
G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata,
G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata,
block=config.cuda['gpus'].tpb, grid=config.cuda['gpus'].bpg)
# If there are any dispersive materials do 1st part of dispersive update
# (it is split into two parts as it requires present and updated electric field values).
else:
update_e_dispersive_A_gpu(np.int32(G.nx), np.int32(G.ny), np.int32(G.nz),
np.int32(config.materials['maxpoles']), G.updatecoeffsdispersive_gpu.gpudata,
G.Tx_gpu.gpudata, G.Ty_gpu.gpudata, G.Tz_gpu.gpudata, G.ID_gpu.gpudata,
G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata,
G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata,
block=config.cuda['gpus'].tpb, grid=config.cuda['gpus'].bpg)
# Update electric field components with the PML correction
for pml in G.pmls:
pml.gpu_update_electric(G)
# Update electric field components for voltage sources
if G.voltagesources:
update_voltage_source_gpu(np.int32(len(G.voltagesources)), np.int32(iteration),
config.dtypes['float_or_double'](G.dx), config.dtypes['float_or_double'](G.dy), config.dtypes['float_or_double'](G.dz),
srcinfo1_voltage_gpu.gpudata, srcinfo2_voltage_gpu.gpudata,
srcwaves_voltage_gpu.gpudata, G.ID_gpu.gpudata,
G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata,
block=(1, 1, 1), grid=(round32(len(G.voltagesources)), 1, 1))
# Update electric field components for Hertzian dipole sources (update any Hertzian dipole sources last)
if G.hertziandipoles:
update_hertzian_dipole_gpu(np.int32(len(G.hertziandipoles)), np.int32(iteration),
config.dtypes['float_or_double'](G.dx), config.dtypes['float_or_double'](G.dy), config.dtypes['float_or_double'](G.dz),
srcinfo1_hertzian_gpu.gpudata, srcinfo2_hertzian_gpu.gpudata,
srcwaves_hertzian_gpu.gpudata, G.ID_gpu.gpudata,
G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata,
block=(1, 1, 1), grid=(round32(len(G.hertziandipoles)), 1, 1))
# If there are any dispersive materials do 2nd part of dispersive update (it is split into two parts as it requires present and updated electric field values). Therefore it can only be completely updated after the electric field has been updated by the PML and source updates.
if config.materials['maxpoles'] > 0:
update_e_dispersive_B_gpu(np.int32(G.nx), np.int32(G.ny), np.int32(G.nz),
np.int32(config.materials['maxpoles']), G.updatecoeffsdispersive_gpu.gpudata,
G.Tx_gpu.gpudata, G.Ty_gpu.gpudata, G.Tz_gpu.gpudata, G.ID_gpu.gpudata,
G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata,
block=config.cuda['gpus'].tpb, grid=config.cuda['gpus'].bpg)
# Copy output from receivers array back to correct receiver objects
if G.rxs:
gpu_get_rx_array(rxs_gpu.get(), rxcoords_gpu.get(), G)
# Copy data from any snapshots back to correct snapshot objects
if G.snapshots and not config.cuda['snapsgpu2cpu']:
for i, snap in enumerate(G.snapshots):
gpu_get_snapshot_array(snapEx_gpu.get(), snapEy_gpu.get(), snapEz_gpu.get(),
snapHx_gpu.get(), snapHy_gpu.get(), snapHz_gpu.get(), i, snap)
iterend.record()
iterend.synchronize()
tsolve = iterstart.time_till(iterend) * 1e-3
# Remove context from top of stack and delete
ctx.pop()
del ctx
return tsolve, memsolve
"""