你已经派生过 gprMax
镜像自地址
https://gitee.com/sunhf/gprMax.git
已同步 2025-08-06 04:26:52 +08:00
Updated checking of GPU field array shapes.
这个提交包含在:
@@ -503,9 +503,9 @@ def solve_gpu(currentmodelrun, modelend, G):
|
||||
|
||||
# Electric and magnetic field updates - prepare kernels, and get kernel functions
|
||||
if Material.maxpoles > 0:
|
||||
kernels_fields = SourceModule(kernels_template_fields.substitute(REAL=cudafloattype, COMPLEX=cudacomplextype, N_updatecoeffsE=G.updatecoeffsE.size, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NY_MATDISPCOEFFS=G.updatecoeffsdispersive.shape[1], NX_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NY_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NZ_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3], NX_T=G.Tx.shape[1], NY_T=G.Tx.shape[2], NZ_T=G.Tx.shape[3]), options=compiler_opts)
|
||||
kernels_fields = SourceModule(kernels_template_fields.substitute(REAL=cudafloattype, COMPLEX=cudacomplextype, N_updatecoeffsE=G.updatecoeffsE.size, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NY_MATDISPCOEFFS=G.updatecoeffsdispersive.shape[1], NX_FIELDS=G.nx + 1, NY_FIELDS=G.ny + 1, NZ_FIELDS=G.nz + 1), NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3], NX_T=G.Tx.shape[1], NY_T=G.Tx.shape[2], NZ_T=G.Tx.shape[3]), options=compiler_opts)
|
||||
else: # Set to one any substitutions for dispersive materials
|
||||
kernels_fields = SourceModule(kernels_template_fields.substitute(REAL=cudafloattype, COMPLEX=cudacomplextype, N_updatecoeffsE=G.updatecoeffsE.size, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NY_MATDISPCOEFFS=1, NX_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NY_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NZ_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3], NX_T=1, NY_T=1, NZ_T=1), options=compiler_opts)
|
||||
kernels_fields = SourceModule(kernels_template_fields.substitute(REAL=cudafloattype, COMPLEX=cudacomplextype, N_updatecoeffsE=G.updatecoeffsE.size, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NY_MATDISPCOEFFS=1, NX_FIELDS=G.nx + 1, NY_FIELDS=G.ny + 1, NZ_FIELDS=G.nz + 1), NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3], NX_T=1, NY_T=1, NZ_T=1), options=compiler_opts)
|
||||
update_e_gpu = kernels_fields.get_function("update_e")
|
||||
update_h_gpu = kernels_fields.get_function("update_h")
|
||||
|
||||
@@ -535,8 +535,8 @@ def solve_gpu(currentmodelrun, modelend, G):
|
||||
kernelelectricfunc = getattr(import_module(pmlmodulelectric), 'kernels_template_pml_electric_' + G.pmlformulation)
|
||||
pmlmodulemagnetic = 'gprMax.pml_updates.pml_updates_magnetic_' + G.pmlformulation + '_gpu'
|
||||
kernelmagneticfunc = getattr(import_module(pmlmodulemagnetic), 'kernels_template_pml_magnetic_' + G.pmlformulation)
|
||||
kernels_pml_electric = SourceModule(kernelelectricfunc.substitute(REAL=cudafloattype, N_updatecoeffsE=G.updatecoeffsE.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NX_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NY_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NZ_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3]), options=compiler_opts)
|
||||
kernels_pml_magnetic = SourceModule(kernelmagneticfunc.substitute(REAL=cudafloattype, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsH.shape[1], NX_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NY_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NZ_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3]), options=compiler_opts)
|
||||
kernels_pml_electric = SourceModule(kernelelectricfunc.substitute(REAL=cudafloattype, N_updatecoeffsE=G.updatecoeffsE.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NX_FIELDS=G.nx + 1, NY_FIELDS=G.ny + 1, NZ_FIELDS=G.nz + 1), NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3]), options=compiler_opts)
|
||||
kernels_pml_magnetic = SourceModule(kernelmagneticfunc.substitute(REAL=cudafloattype, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsH.shape[1], NX_FIELDS=G.nx + 1, NY_FIELDS=G.ny + 1, NZ_FIELDS=G.nz + 1), NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3]), options=compiler_opts)
|
||||
# Copy material coefficient arrays to constant memory of GPU (must be <64KB) for PML kernels
|
||||
updatecoeffsE = kernels_pml_electric.get_global('updatecoeffsE')[0]
|
||||
updatecoeffsH = kernels_pml_magnetic.get_global('updatecoeffsH')[0]
|
||||
@@ -544,21 +544,21 @@ def solve_gpu(currentmodelrun, modelend, G):
|
||||
drv.memcpy_htod(updatecoeffsH, G.updatecoeffsH)
|
||||
# Set block per grid, initialise arrays on GPU, and get kernel functions
|
||||
for pml in G.pmls:
|
||||
pml.gpu_set_blocks_per_grid(G)
|
||||
pml.gpu_initialise_arrays()
|
||||
pml.gpu_get_update_funcs(kernels_pml_electric, kernels_pml_magnetic)
|
||||
pml.gpu_set_blocks_per_grid(G)
|
||||
|
||||
# Receivers
|
||||
if G.rxs:
|
||||
# Initialise arrays on GPU
|
||||
rxcoords_gpu, rxs_gpu = gpu_initialise_rx_arrays(G)
|
||||
# Prepare kernel and get kernel function
|
||||
kernel_store_outputs = SourceModule(kernel_template_store_outputs.substitute(REAL=cudafloattype, NY_RXCOORDS=3, NX_RXS=6, NY_RXS=G.iterations, NZ_RXS=len(G.rxs), NX_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NY_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NZ_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1)), options=compiler_opts)
|
||||
kernel_store_outputs = SourceModule(kernel_template_store_outputs.substitute(REAL=cudafloattype, NY_RXCOORDS=3, NX_RXS=6, NY_RXS=G.iterations, NZ_RXS=len(G.rxs), NX_FIELDS=G.nx + 1, NY_FIELDS=G.ny + 1, NZ_FIELDS=G.nz + 1), options=compiler_opts)
|
||||
store_outputs_gpu = kernel_store_outputs.get_function("store_outputs")
|
||||
|
||||
# Sources - initialise arrays on GPU, prepare kernel and get kernel functions
|
||||
if G.voltagesources + G.hertziandipoles + G.magneticdipoles:
|
||||
kernels_sources = SourceModule(kernels_template_sources.substitute(REAL=cudafloattype, N_updatecoeffsE=G.updatecoeffsE.size, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NY_SRCINFO=4, NY_SRCWAVES=G.iterations, NX_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NY_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NZ_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3]), options=compiler_opts)
|
||||
kernels_sources = SourceModule(kernels_template_sources.substitute(REAL=cudafloattype, N_updatecoeffsE=G.updatecoeffsE.size, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NY_SRCINFO=4, NY_SRCWAVES=G.iterations, NX_FIELDS=G.nx + 1, NY_FIELDS=G.ny + 1, NZ_FIELDS=G.nz + 1), NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3]), options=compiler_opts)
|
||||
# Copy material coefficient arrays to constant memory of GPU (must be <64KB) for source kernels
|
||||
updatecoeffsE = kernels_sources.get_global('updatecoeffsE')[0]
|
||||
updatecoeffsH = kernels_sources.get_global('updatecoeffsH')[0]
|
||||
@@ -579,7 +579,7 @@ def solve_gpu(currentmodelrun, modelend, G):
|
||||
# Initialise arrays on GPU
|
||||
snapEx_gpu, snapEy_gpu, snapEz_gpu, snapHx_gpu, snapHy_gpu, snapHz_gpu = gpu_initialise_snapshot_array(G)
|
||||
# Prepare kernel and get kernel function
|
||||
kernel_store_snapshot = SourceModule(kernel_template_store_snapshot.substitute(REAL=cudafloattype, NX_SNAPS=Snapshot.nx_max, NY_SNAPS=Snapshot.ny_max, NZ_SNAPS=Snapshot.nz_max, NX_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NY_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1), NZ_FIELDS=(G.nx + 1, G.ny + 1, G.nz + 1)), options=compiler_opts)
|
||||
kernel_store_snapshot = SourceModule(kernel_template_store_snapshot.substitute(REAL=cudafloattype, NX_SNAPS=Snapshot.nx_max, NY_SNAPS=Snapshot.ny_max, NZ_SNAPS=Snapshot.nz_max, NX_FIELDS=G.nx + 1, NY_FIELDS=G.ny + 1, NZ_FIELDS=G.nz + 1)), options=compiler_opts)
|
||||
store_snapshot_gpu = kernel_store_snapshot.get_function("store_snapshot")
|
||||
|
||||
# Iteration loop timer
|
||||
|
@@ -302,7 +302,7 @@ class PML(object):
|
||||
G (class): Grid class instance - holds essential parameters describing the model.
|
||||
"""
|
||||
|
||||
self.bpg = (int(np.ceil(((self.EPhi1.shape[1] + 1) * (self.EPhi1.shape[2] + 1) * (self.EPhi1.shape[3] + 1)) / G.tpb[0])), 1, 1)
|
||||
self.bpg = (int(np.ceil(((self.EPhi1_gpu.shape[1] + 1) * (self.EPhi1_gpu.shape[2] + 1) * (self.EPhi1_gpu.shape[3] + 1)) / G.tpb[0])), 1, 1)
|
||||
|
||||
def gpu_initialise_arrays(self):
|
||||
"""Initialise PML field and coefficient arrays on GPU."""
|
||||
|
在新工单中引用
屏蔽一个用户