你已经派生过 gprMax
镜像自地址
https://gitee.com/sunhf/gprMax.git
已同步 2025-08-08 07:24:19 +08:00
Basic functional GPU solver with cylinder_Ascan_2D example.
这个提交包含在:
@@ -510,14 +510,12 @@ class Rx(UserObjectMulti):
|
||||
r.ID = self.kwargs['id']
|
||||
outputs = self.kwargs['outputs']
|
||||
# Get allowable outputs
|
||||
if grid.gpu is not None:
|
||||
allowableoutputs = RxUser.gpu_allowableoutputs
|
||||
else:
|
||||
allowableoutputs = RxUser.allowableoutputs
|
||||
allowableoutputs = RxUser.allowableoutputs_gpu if config.sim_config.general['cuda'] else RxUser.allowableoutputs
|
||||
# Check and add field output names
|
||||
outputs.sort()
|
||||
for field in outputs:
|
||||
if field in allowableoutputs:
|
||||
r.outputs[field] = np.zeros(grid.iterations, dtype=config.dtypes['float_or_double'])
|
||||
r.outputs[field] = np.zeros(grid.iterations, dtype=config.sim_config.dtypes['float_or_double'])
|
||||
else:
|
||||
raise CmdInputError(f"'{self.params_str()}' contains an output type that is not allowable. Allowable outputs in current context are {allowableoutputs}")
|
||||
# If no ID or outputs are specified, use default
|
||||
|
@@ -18,6 +18,7 @@
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
import sys
|
||||
|
||||
from colorama import init
|
||||
from colorama import Fore
|
||||
@@ -64,7 +65,8 @@ class ModelConfig:
|
||||
# N.B. This will happen if the requested snapshots are too large to fit
|
||||
# on the memory of the GPU. If True this will slow performance significantly
|
||||
if sim_config.general['cuda']:
|
||||
self.cuda = {'gpu': sim_config.cuda['gpus'], 'snapsgpu2cpu': False}
|
||||
self.cuda = {'gpu': sim_config.cuda['gpus'],
|
||||
'snapsgpu2cpu': False}
|
||||
|
||||
# Total memory usage for all grids in the model. Starts with 50MB overhead.
|
||||
self.mem_use = 50e6
|
||||
@@ -175,10 +177,12 @@ class SimulationConfig:
|
||||
self.general['cuda'] = True
|
||||
self.general['cpu'] = False
|
||||
self.general['opencl'] = False
|
||||
# gpus: list of GPU objects
|
||||
# gpus_str: list of strings describing GPU(s)
|
||||
self.cuda = {'gpus': [],
|
||||
'gpus_str': []}
|
||||
self.general['precision'] = 'single'
|
||||
self.cuda = {'gpus': [], # gpus: list of GPU objects
|
||||
'gpus_str': [], # gpus_str: list of strings describing GPU(s)
|
||||
'nvcc_opts': None} # nvcc_opts: nvcc compiler options
|
||||
# Suppress nvcc warnings on Microsoft Windows
|
||||
if sys.platform == 'win32': self.cuda['nvcc_opts'] = '-w'
|
||||
self.get_gpus()
|
||||
self.set_gpus()
|
||||
|
||||
|
@@ -128,7 +128,7 @@ __global__ void update_magnetic(int NX, int NY, int NZ, const unsigned int* __re
|
||||
// Electric field updates - dispersive materials //
|
||||
///////////////////////////////////////////////////
|
||||
|
||||
__global__ void update_electric_dispersive_A(int NX, int NY, int NZ, int MAXPOLES, const $REAL_OR_COMPLEX* __restrict__ updatecoeffsdispersive, $REAL_OR_COMPLEX *Tx, $REAL_OR_COMPLEX *Ty, $REAL_OR_COMPLEX *Tz, const unsigned int* __restrict__ ID, $REAL *Ex, $REAL *Ey, $REAL *Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz) {
|
||||
__global__ void update_electric_dispersive_A(int NX, int NY, int NZ, int MAXPOLES, const $COMPLEX* __restrict__ updatecoeffsdispersive, $COMPLEX *Tx, $COMPLEX *Ty, $COMPLEX *Tz, const unsigned int* __restrict__ ID, $REAL *Ex, $REAL *Ey, $REAL *Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz) {
|
||||
|
||||
// This function is part A of updates to electric field values when dispersive materials (with multiple poles) are present.
|
||||
//
|
||||
@@ -189,7 +189,7 @@ __global__ void update_electric_dispersive_A(int NX, int NY, int NZ, int MAXPOLE
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void update_electric_dispersive_B(int NX, int NY, int NZ, int MAXPOLES, const $REAL_OR_COMPLEX* __restrict__ updatecoeffsdispersive, $REAL_OR_COMPLEX *Tx, $REAL_OR_COMPLEX *Ty, $REAL_OR_COMPLEX *Tz, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez) {
|
||||
__global__ void update_electric_dispersive_B(int NX, int NY, int NZ, int MAXPOLES, const $COMPLEX* __restrict__ updatecoeffsdispersive, $COMPLEX *Tx, $COMPLEX *Ty, $COMPLEX *Tz, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez) {
|
||||
|
||||
// This function is part B which updates the dispersive field arrays when dispersive materials (with multiple poles) are present.
|
||||
//
|
||||
|
@@ -279,15 +279,22 @@ class FDTDGrid:
|
||||
class CUDAGrid(FDTDGrid):
|
||||
"""Additional grid methods for solving on GPU using CUDA."""
|
||||
|
||||
def __init__(self, model_num):
|
||||
super().__init__(model_num)
|
||||
|
||||
# Threads per block - used for main electric/magnetic field updates
|
||||
self.tpb = (256, 1, 1)
|
||||
# Blocks per grid - used for main electric/magnetic field updates
|
||||
self.bpg = None
|
||||
|
||||
|
||||
def set_blocks_per_grid(self):
|
||||
"""Set the blocks per grid size used for updating the electric and
|
||||
magnetic field arrays on a GPU.
|
||||
"""
|
||||
|
||||
config.cuda['gpus'].bpg = (int(np.ceil(((self.nx + 1) *
|
||||
(self.ny + 1) *
|
||||
(self.nz + 1)) /
|
||||
config.cuda['gpus'].tpb[0])), 1, 1)
|
||||
self.bpg = (int(np.ceil(((self.nx + 1) * (self.ny + 1) *
|
||||
(self.nz + 1)) / self.tpb[0])), 1, 1)
|
||||
|
||||
def initialise_arrays(self):
|
||||
"""Initialise geometry and field arrays on GPU."""
|
||||
|
@@ -58,15 +58,15 @@ from .pml import CFS
|
||||
from .pml import PML
|
||||
from .pml import build_pml
|
||||
from .pml import pml_information
|
||||
from .receivers import gpu_initialise_rx_arrays
|
||||
from .receivers import gpu_get_rx_array
|
||||
from .receivers import initialise_rx_arrays_gpu
|
||||
from .receivers import get_rx_array_gpu
|
||||
from .receivers import Rx
|
||||
from .scene import Scene
|
||||
from .snapshots import Snapshot
|
||||
from .snapshots import gpu_initialise_snapshot_array
|
||||
from .snapshots import gpu_get_snapshot_array
|
||||
from .snapshots import initialise_snapshot_array_gpu
|
||||
from .snapshots import get_snapshot_array_gpu
|
||||
from .solvers import create_solver
|
||||
from .sources import gpu_initialise_src_arrays
|
||||
from .sources import initialise_src_arrays_gpu
|
||||
from .utilities import get_terminal_width
|
||||
from .utilities import human_size
|
||||
from .utilities import mem_check
|
||||
@@ -286,7 +286,8 @@ class ModelBuildRun:
|
||||
|
||||
mem_GPU = ''
|
||||
if config.sim_config.general['cuda']:
|
||||
mem_GPU = f' host + ~{human_size(self.solver.get_memsolve())} GPU'
|
||||
log.debug('Fix memory used calc for GPU')
|
||||
# mem_GPU = f' host + ~{human_size(self.solver.get_memsolve())} GPU'
|
||||
|
||||
log.info(f'\nMemory (RAM) used: ~{human_size(self.p.memory_full_info().uss)}{mem_GPU}')
|
||||
log.info(f'Solving time [HH:MM:SS]: {datetime.timedelta(seconds=tsolve)}')
|
||||
|
109
gprMax/pml.py
109
gprMax/pml.py
@@ -207,8 +207,7 @@ class PML:
|
||||
|
||||
self.CFS = G.cfs
|
||||
|
||||
if not config.sim_config.general['cuda']:
|
||||
self.initialise_field_arrays()
|
||||
self.initialise_field_arrays()
|
||||
|
||||
def initialise_field_arrays(self):
|
||||
"""Initialise arrays to store fields in PML."""
|
||||
@@ -340,7 +339,7 @@ class CUDAPML(PML):
|
||||
solving on GPU using CUDA.
|
||||
"""
|
||||
|
||||
def initialise_arrays(self):
|
||||
def initialise_field_arrays_gpu(self):
|
||||
"""Initialise PML field and coefficient arrays on GPU."""
|
||||
|
||||
import pycuda.gpuarray as gpuarray
|
||||
@@ -355,20 +354,44 @@ class CUDAPML(PML):
|
||||
self.HRF_gpu = gpuarray.to_gpu(self.HRF)
|
||||
|
||||
if self.direction[0] == 'x':
|
||||
self.EPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx + 1, self.ny, self.nz + 1), dtype=floattype))
|
||||
self.EPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx + 1, self.ny + 1, self.nz), dtype=floattype))
|
||||
self.HPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx, self.ny + 1, self.nz), dtype=floattype))
|
||||
self.HPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx, self.ny, self.nz + 1), dtype=floattype))
|
||||
self.EPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS),
|
||||
self.nx + 1, self.ny, self.nz + 1),
|
||||
dtype=config.sim_config.dtypes['float_or_double']))
|
||||
self.EPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS),
|
||||
self.nx + 1, self.ny + 1, self.nz),
|
||||
dtype=config.sim_config.dtypes['float_or_double']))
|
||||
self.HPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS),
|
||||
self.nx, self.ny + 1, self.nz),
|
||||
dtype=config.sim_config.dtypes['float_or_double']))
|
||||
self.HPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS),
|
||||
self.nx, self.ny, self.nz + 1),
|
||||
dtype=config.sim_config.dtypes['float_or_double']))
|
||||
elif self.direction[0] == 'y':
|
||||
self.EPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx, self.ny + 1, self.nz + 1), dtype=floattype))
|
||||
self.EPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx + 1, self.ny + 1, self.nz), dtype=floattype))
|
||||
self.HPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx + 1, self.ny, self.nz), dtype=floattype))
|
||||
self.HPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx, self.ny, self.nz + 1), dtype=floattype))
|
||||
self.EPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS),
|
||||
self.nx, self.ny + 1, self.nz + 1),
|
||||
dtype=config.sim_config.dtypes['float_or_double']))
|
||||
self.EPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS),
|
||||
self.nx + 1, self.ny + 1, self.nz),
|
||||
dtype=config.sim_config.dtypes['float_or_double']))
|
||||
self.HPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS),
|
||||
self.nx + 1, self.ny, self.nz),
|
||||
dtype=config.sim_config.dtypes['float_or_double']))
|
||||
self.HPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS),
|
||||
self.nx, self.ny, self.nz + 1),
|
||||
dtype=config.sim_config.dtypes['float_or_double']))
|
||||
elif self.direction[0] == 'z':
|
||||
self.EPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx, self.ny + 1, self.nz + 1), dtype=floattype))
|
||||
self.EPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx + 1, self.ny, self.nz + 1), dtype=floattype))
|
||||
self.HPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx + 1, self.ny, self.nz), dtype=floattype))
|
||||
self.HPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx, self.ny + 1, self.nz), dtype=floattype))
|
||||
self.EPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS),
|
||||
self.nx, self.ny + 1, self.nz + 1),
|
||||
dtype=config.sim_config.dtypes['float_or_double']))
|
||||
self.EPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS),
|
||||
self.nx + 1, self.ny, self.nz + 1),
|
||||
dtype=config.sim_config.dtypes['float_or_double']))
|
||||
self.HPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS),
|
||||
self.nx + 1, self.ny, self.nz),
|
||||
dtype=config.sim_config.dtypes['float_or_double']))
|
||||
self.HPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS),
|
||||
self.nx, self.ny + 1, self.nz),
|
||||
dtype=config.sim_config.dtypes['float_or_double']))
|
||||
|
||||
def set_blocks_per_grid(self, G):
|
||||
"""Set the blocks per grid size used for updating the PML field arrays on a GPU.
|
||||
@@ -377,7 +400,9 @@ class CUDAPML(PML):
|
||||
G (FDTDGrid): Holds essential parameters describing the model.
|
||||
"""
|
||||
|
||||
self.bpg = (int(np.ceil(((self.EPhi1_gpu.shape[1] + 1) * (self.EPhi1_gpu.shape[2] + 1) * (self.EPhi1_gpu.shape[3] + 1)) / G.tpb[0])), 1, 1)
|
||||
self.bpg = (int(np.ceil(((self.EPhi1_gpu.shape[1] + 1) *
|
||||
(self.EPhi1_gpu.shape[2] + 1) *
|
||||
(self.EPhi1_gpu.shape[3] + 1)) / G.tpb[0])), 1, 1)
|
||||
|
||||
def get_update_funcs(self, kernelselectric, kernelsmagnetic):
|
||||
"""Get update functions from PML kernels.
|
||||
@@ -400,7 +425,24 @@ class CUDAPML(PML):
|
||||
G (FDTDGrid): Holds essential parameters describing the model.
|
||||
"""
|
||||
|
||||
self.update_electric_gpu(np.int32(self.xs), np.int32(self.xf), np.int32(self.ys), np.int32(self.yf), np.int32(self.zs), np.int32(self.zf), np.int32(self.EPhi1_gpu.shape[1]), np.int32(self.EPhi1_gpu.shape[2]), np.int32(self.EPhi1_gpu.shape[3]), np.int32(self.EPhi2_gpu.shape[1]), np.int32(self.EPhi2_gpu.shape[2]), np.int32(self.EPhi2_gpu.shape[3]), np.int32(self.thickness), G.ID_gpu.gpudata, G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata, G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata, self.EPhi1_gpu.gpudata, self.EPhi2_gpu.gpudata, self.ERA_gpu.gpudata, self.ERB_gpu.gpudata, self.ERE_gpu.gpudata, self.ERF_gpu.gpudata, floattype(self.d), block=G.tpb, grid=self.bpg)
|
||||
self.update_electric_gpu(np.int32(self.xs), np.int32(self.xf),
|
||||
np.int32(self.ys), np.int32(self.yf),
|
||||
np.int32(self.zs), np.int32(self.zf),
|
||||
np.int32(self.EPhi1_gpu.shape[1]),
|
||||
np.int32(self.EPhi1_gpu.shape[2]),
|
||||
np.int32(self.EPhi1_gpu.shape[3]),
|
||||
np.int32(self.EPhi2_gpu.shape[1]),
|
||||
np.int32(self.EPhi2_gpu.shape[2]),
|
||||
np.int32(self.EPhi2_gpu.shape[3]),
|
||||
np.int32(self.thickness),
|
||||
G.ID_gpu.gpudata,
|
||||
G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata,
|
||||
G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata,
|
||||
self.EPhi1_gpu.gpudata, self.EPhi2_gpu.gpudata,
|
||||
self.ERA_gpu.gpudata, self.ERB_gpu.gpudata,
|
||||
self.ERE_gpu.gpudata, self.ERF_gpu.gpudata,
|
||||
config.sim_config.dtypes['float_or_double'](self.d),
|
||||
block=G.tpb, grid=self.bpg)
|
||||
|
||||
def update_magnetic(self, G):
|
||||
"""This functions updates magnetic field components with the PML
|
||||
@@ -409,7 +451,24 @@ class CUDAPML(PML):
|
||||
Args:
|
||||
G (FDTDGrid): Holds essential parameters describing the model.
|
||||
"""
|
||||
self.update_magnetic_gpu(np.int32(self.xs), np.int32(self.xf), np.int32(self.ys), np.int32(self.yf), np.int32(self.zs), np.int32(self.zf), np.int32(self.HPhi1_gpu.shape[1]), np.int32(self.HPhi1_gpu.shape[2]), np.int32(self.HPhi1_gpu.shape[3]), np.int32(self.HPhi2_gpu.shape[1]), np.int32(self.HPhi2_gpu.shape[2]), np.int32(self.HPhi2_gpu.shape[3]), np.int32(self.thickness), G.ID_gpu.gpudata, G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata, G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata, self.HPhi1_gpu.gpudata, self.HPhi2_gpu.gpudata, self.HRA_gpu.gpudata, self.HRB_gpu.gpudata, self.HRE_gpu.gpudata, self.HRF_gpu.gpudata, floattype(self.d), block=G.tpb, grid=self.bpg)
|
||||
self.update_magnetic_gpu(np.int32(self.xs), np.int32(self.xf),
|
||||
np.int32(self.ys), np.int32(self.yf),
|
||||
np.int32(self.zs), np.int32(self.zf),
|
||||
np.int32(self.HPhi1_gpu.shape[1]),
|
||||
np.int32(self.HPhi1_gpu.shape[2]),
|
||||
np.int32(self.HPhi1_gpu.shape[3]),
|
||||
np.int32(self.HPhi2_gpu.shape[1]),
|
||||
np.int32(self.HPhi2_gpu.shape[2]),
|
||||
np.int32(self.HPhi2_gpu.shape[3]),
|
||||
np.int32(self.thickness),
|
||||
G.ID_gpu.gpudata,
|
||||
G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata,
|
||||
G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata,
|
||||
self.HPhi1_gpu.gpudata, self.HPhi2_gpu.gpudata,
|
||||
self.HRA_gpu.gpudata, self.HRB_gpu.gpudata,
|
||||
self.HRE_gpu.gpudata, self.HRF_gpu.gpudata,
|
||||
config.sim_config.dtypes['float_or_double'](self.d),
|
||||
block=G.tpb, grid=self.bpg)
|
||||
|
||||
def pml_information(G):
|
||||
"""Information about PMLs.
|
||||
@@ -443,14 +502,16 @@ def build_pml(G, key, value):
|
||||
value (int): Thickness of PML slab in cells.
|
||||
"""
|
||||
|
||||
pml_type = CUDAPML if config.sim_config.general['cuda'] else PML
|
||||
|
||||
sumer = 0 # Sum of relative permittivities in PML slab
|
||||
summr = 0 # Sum of relative permeabilities in PML slab
|
||||
|
||||
if key[0] == 'x':
|
||||
if key == 'x0':
|
||||
pml = PML(G, ID=key, direction='xminus', xf=value, yf=G.ny, zf=G.nz)
|
||||
pml = pml_type(G, ID=key, direction='xminus', xf=value, yf=G.ny, zf=G.nz)
|
||||
elif key == 'xmax':
|
||||
pml = PML(G, ID=key, direction='xplus', xs=G.nx - value, xf=G.nx, yf=G.ny, zf=G.nz)
|
||||
pml = pml_type(G, ID=key, direction='xplus', xs=G.nx - value, xf=G.nx, yf=G.ny, zf=G.nz)
|
||||
G.pmls.append(pml)
|
||||
for j in range(G.ny):
|
||||
for k in range(G.nz):
|
||||
@@ -463,9 +524,9 @@ def build_pml(G, key, value):
|
||||
|
||||
elif key[0] == 'y':
|
||||
if key == 'y0':
|
||||
pml = PML(G, ID=key, direction='yminus', yf=value, xf=G.nx, zf=G.nz)
|
||||
pml = pml_type(G, ID=key, direction='yminus', yf=value, xf=G.nx, zf=G.nz)
|
||||
elif key == 'ymax':
|
||||
pml = PML(G, ID=key, direction='yplus', ys=G.ny - value, xf=G.nx, yf=G.ny, zf=G.nz)
|
||||
pml = pml_type(G, ID=key, direction='yplus', ys=G.ny - value, xf=G.nx, yf=G.ny, zf=G.nz)
|
||||
G.pmls.append(pml)
|
||||
for i in range(G.nx):
|
||||
for k in range(G.nz):
|
||||
@@ -478,9 +539,9 @@ def build_pml(G, key, value):
|
||||
|
||||
elif key[0] == 'z':
|
||||
if key == 'z0':
|
||||
pml = PML(G, ID=key, direction='zminus', zf=value, xf=G.nx, yf=G.ny)
|
||||
pml = pml_type(G, ID=key, direction='zminus', zf=value, xf=G.nx, yf=G.ny)
|
||||
elif key == 'zmax':
|
||||
pml = PML(G, ID=key, direction='zplus', zs=G.nz - value, xf=G.nx, yf=G.ny, zf=G.nz)
|
||||
pml = pml_type(G, ID=key, direction='zplus', zs=G.nz - value, xf=G.nx, yf=G.ny, zf=G.nz)
|
||||
G.pmls.append(pml)
|
||||
for i in range(G.nx):
|
||||
for j in range(G.ny):
|
||||
|
@@ -16,8 +16,6 @@
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with gprMax. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
from collections import OrderedDict
|
||||
|
||||
import numpy as np
|
||||
|
||||
import gprMax.config as config
|
||||
@@ -27,14 +25,15 @@ class Rx:
|
||||
"""Receiver output points."""
|
||||
|
||||
allowableoutputs = ['Ex', 'Ey', 'Ez', 'Hx', 'Hy', 'Hz', 'Ix', 'Iy', 'Iz']
|
||||
gpu_allowableoutputs = allowableoutputs[:-3]
|
||||
defaultoutputs = allowableoutputs[:-3]
|
||||
maxnumoutputs = 0
|
||||
|
||||
allowableoutputs_gpu = allowableoutputs[:-3]
|
||||
maxnumoutputs_gpu = 0
|
||||
|
||||
def __init__(self):
|
||||
|
||||
self.ID = None
|
||||
self.outputs = OrderedDict()
|
||||
self.outputs = {}
|
||||
self.xcoord = None
|
||||
self.ycoord = None
|
||||
self.zcoord = None
|
||||
@@ -43,12 +42,17 @@ class Rx:
|
||||
self.zcoordorigin = None
|
||||
|
||||
|
||||
def gpu_initialise_rx_arrays(G):
|
||||
def initialise_rx_arrays_gpu(G):
|
||||
"""Initialise arrays on GPU for receiver coordinates and to store field
|
||||
components for receivers.
|
||||
|
||||
Args:
|
||||
G (FDTDGrid): Holds essential parameters describing the model.
|
||||
|
||||
Returns:
|
||||
rxcoords_gpu (int): numpy array of receiver coordinates from GPU.
|
||||
rxs_gpu (float): numpy array of receiver data from GPU - rows are field
|
||||
components; columns are iterations; pages are receivers.
|
||||
"""
|
||||
|
||||
import pycuda.gpuarray as gpuarray
|
||||
@@ -59,11 +63,14 @@ def gpu_initialise_rx_arrays(G):
|
||||
rxcoords[i, 0] = rx.xcoord
|
||||
rxcoords[i, 1] = rx.ycoord
|
||||
rxcoords[i, 2] = rx.zcoord
|
||||
# Store maximum number of output components
|
||||
if len(rx.outputs) > Rx.maxnumoutputs_gpu:
|
||||
Rx.maxnumoutputs_gpu = len(rx.outputs)
|
||||
|
||||
# Array to store field components for receivers on GPU - rows are field components;
|
||||
# columns are iterations; pages are receivers
|
||||
rxs = np.zeros((Rx.maxnumoutputs, G.iterations, len(G.rxs)),
|
||||
dtype=config.dtypes['float_or_double'])
|
||||
rxs = np.zeros((len(Rx.allowableoutputs_gpu), G.iterations, len(G.rxs)),
|
||||
dtype=config.sim_config.dtypes['float_or_double'])
|
||||
|
||||
# Copy arrays to GPU
|
||||
rxcoords_gpu = gpuarray.to_gpu(rxcoords)
|
||||
@@ -72,20 +79,20 @@ def gpu_initialise_rx_arrays(G):
|
||||
return rxcoords_gpu, rxs_gpu
|
||||
|
||||
|
||||
def gpu_get_rx_array(rxs_gpu, rxcoords_gpu, G):
|
||||
def get_rx_array_gpu(rxs_gpu, rxcoords_gpu, G):
|
||||
"""Copy output from receivers array used on GPU back to receiver objects.
|
||||
|
||||
Args:
|
||||
rxs_gpu (float): numpy array of receiver data from GPU - rows are field
|
||||
components; columns are iterations; pages are receivers.
|
||||
rxcoords_gpu (float): numpy array of receiver coordinates from GPU.
|
||||
rxcoords_gpu (int): numpy array of receiver coordinates from GPU.
|
||||
G (FDTDGrid): Holds essential parameters describing the model.
|
||||
"""
|
||||
|
||||
for rx in G.rxs:
|
||||
for rxgpu in range(len(G.rxs)):
|
||||
if rx.xcoord == rxcoords_gpu[rxgpu, 0] and \
|
||||
rx.ycoord == rxcoords_gpu[rxgpu, 1] and \
|
||||
rx.zcoord == rxcoords_gpu[rxgpu, 2]:
|
||||
for k in rx.outputs.items():
|
||||
rx.outputs[k] = rxs_gpu[Rx.gpu_allowableoutputs.index(k), :, rxgpu]
|
||||
if (rx.xcoord == rxcoords_gpu[rxgpu, 0] and
|
||||
rx.ycoord == rxcoords_gpu[rxgpu, 1] and
|
||||
rx.zcoord == rxcoords_gpu[rxgpu, 2]):
|
||||
for output in rx.outputs.keys():
|
||||
rx.outputs[output] = rxs_gpu[Rx.allowableoutputs_gpu.index(output), :, rxgpu]
|
||||
|
@@ -184,7 +184,7 @@ class Snapshot:
|
||||
self.filehandle.close()
|
||||
|
||||
|
||||
def gpu_initialise_snapshot_array(G):
|
||||
def initialise_snapshot_array_gpu(G):
|
||||
"""Initialise array on GPU for to store field data for snapshots.
|
||||
|
||||
Args:
|
||||
@@ -236,7 +236,7 @@ def gpu_initialise_snapshot_array(G):
|
||||
return snapEx_gpu, snapEy_gpu, snapEz_gpu, snapHx_gpu, snapHy_gpu, snapHz_gpu
|
||||
|
||||
|
||||
def gpu_get_snapshot_array(snapEx_gpu, snapEy_gpu, snapEz_gpu, snapHx_gpu, snapHy_gpu, snapHz_gpu, i, snap):
|
||||
def get_snapshot_array_gpu(snapEx_gpu, snapEy_gpu, snapEz_gpu, snapHx_gpu, snapHy_gpu, snapHz_gpu, i, snap):
|
||||
"""Copy snapshot array used on GPU back to snapshot objects and store in format for Paraview.
|
||||
|
||||
Args:
|
||||
|
@@ -15,7 +15,7 @@
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with gprMax. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import sys
|
||||
import gprMax.config as config
|
||||
from .grid import FDTDGrid
|
||||
from .grid import CUDAGrid
|
||||
|
@@ -236,7 +236,7 @@ class MagneticDipole(Source):
|
||||
(1 / (G.dx * G.dy * G.dz)))
|
||||
|
||||
|
||||
def gpu_initialise_src_arrays(sources, G):
|
||||
def initialise_src_arrays_gpu(sources, G):
|
||||
"""Initialise arrays on GPU for source coordinates/polarisation, other
|
||||
source information, and source waveform values.
|
||||
|
||||
@@ -255,8 +255,8 @@ def gpu_initialise_src_arrays(sources, G):
|
||||
import pycuda.gpuarray as gpuarray
|
||||
|
||||
srcinfo1 = np.zeros((len(sources), 4), dtype=np.int32)
|
||||
srcinfo2 = np.zeros((len(sources)), dtype=config.dtypes['float_or_double'])
|
||||
srcwaves = np.zeros((len(sources), G.iterations), dtype=config.dtypes['float_or_double'])
|
||||
srcinfo2 = np.zeros((len(sources)), dtype=config.sim_config.dtypes['float_or_double'])
|
||||
srcwaves = np.zeros((len(sources), G.iterations), dtype=config.sim_config.dtypes['float_or_double'])
|
||||
for i, src in enumerate(sources):
|
||||
srcinfo1[i, 0] = src.xcoord
|
||||
srcinfo1[i, 1] = src.ycoord
|
||||
|
@@ -18,21 +18,24 @@
|
||||
|
||||
from importlib import import_module
|
||||
import logging
|
||||
import sys
|
||||
|
||||
import numpy as np
|
||||
|
||||
import gprMax.config as config
|
||||
from .cuda.fields_updates import kernel_template_fields
|
||||
from .cuda.snapshots import kernel_template_store_snapshot
|
||||
from .cuda.source_updates import kernel_template_sources
|
||||
from .cython.fields_updates_normal import update_electric
|
||||
from .cython.fields_updates_normal import update_magnetic
|
||||
from .fields_outputs import store_outputs
|
||||
from .receivers import gpu_initialise_rx_arrays
|
||||
from .receivers import gpu_get_rx_array
|
||||
from .cython.fields_updates_normal import update_electric as update_electric_cpu
|
||||
from .cython.fields_updates_normal import update_magnetic as update_magnetic_cpu
|
||||
from .fields_outputs import store_outputs as store_outputs_cpu
|
||||
from .fields_outputs import kernel_template_store_outputs
|
||||
from .receivers import initialise_rx_arrays_gpu
|
||||
from .receivers import get_rx_array_gpu
|
||||
from .snapshots import Snapshot
|
||||
from .snapshots import gpu_initialise_snapshot_array
|
||||
from .snapshots import gpu_get_snapshot_array
|
||||
from .sources import gpu_initialise_src_arrays
|
||||
from .snapshots import initialise_snapshot_array_gpu
|
||||
from .snapshots import get_snapshot_array_gpu
|
||||
from .sources import initialise_src_arrays_gpu
|
||||
from .utilities import round32
|
||||
from .utilities import timer
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
@@ -52,7 +55,7 @@ class CPUUpdates:
|
||||
|
||||
def store_outputs(self):
|
||||
"""Store field component values for every receiver and transmission line."""
|
||||
store_outputs(self.grid)
|
||||
store_outputs_cpu(self.grid)
|
||||
|
||||
def store_snapshots(self, iteration):
|
||||
"""Store any snapshots.
|
||||
@@ -66,7 +69,7 @@ class CPUUpdates:
|
||||
|
||||
def update_magnetic(self):
|
||||
"""Update magnetic field components."""
|
||||
update_magnetic(self.grid.nx,
|
||||
update_magnetic_cpu(self.grid.nx,
|
||||
self.grid.ny,
|
||||
self.grid.nz,
|
||||
config.sim_config.hostinfo['ompthreads'],
|
||||
@@ -99,18 +102,18 @@ class CPUUpdates:
|
||||
"""Update electric field components."""
|
||||
# All materials are non-dispersive so do standard update.
|
||||
if config.model_configs[self.grid.model_num].materials['maxpoles'] == 0:
|
||||
update_electric(self.grid.nx,
|
||||
self.grid.ny,
|
||||
self.grid.nz,
|
||||
config.sim_config.hostinfo['ompthreads'],
|
||||
self.grid.updatecoeffsE,
|
||||
self.grid.ID,
|
||||
self.grid.Ex,
|
||||
self.grid.Ey,
|
||||
self.grid.Ez,
|
||||
self.grid.Hx,
|
||||
self.grid.Hy,
|
||||
self.grid.Hz)
|
||||
update_electric_cpu(self.grid.nx,
|
||||
self.grid.ny,
|
||||
self.grid.nz,
|
||||
config.sim_config.hostinfo['ompthreads'],
|
||||
self.grid.updatecoeffsE,
|
||||
self.grid.ID,
|
||||
self.grid.Ex,
|
||||
self.grid.Ey,
|
||||
self.grid.Ez,
|
||||
self.grid.Hx,
|
||||
self.grid.Hy,
|
||||
self.grid.Hz)
|
||||
|
||||
# If there are any dispersive materials do 1st part of dispersive update
|
||||
# (it is split into two parts as it requires present and updated electric field values).
|
||||
@@ -247,20 +250,14 @@ class CUDAUpdates:
|
||||
self.dispersive_update_a = None
|
||||
self.dispersive_update_b = None
|
||||
|
||||
import pycuda.driver as drv
|
||||
from pycuda.compiler import SourceModule
|
||||
drv.init()
|
||||
|
||||
# Suppress nvcc warnings on Windows
|
||||
log.debug('Move nvcc compiler options to simulation config')
|
||||
if sys.platform == 'win32':
|
||||
self.compiler_opts = ['-w']
|
||||
else:
|
||||
self.compiler_opts = None
|
||||
# Import PyCUDA modules
|
||||
self.drv = import_module('pycuda.driver')
|
||||
self.source_module = getattr(import_module('pycuda.compiler'), 'SourceModule')
|
||||
self.drv.init()
|
||||
|
||||
# Create device handle and context on specifc GPU device (and make it current context)
|
||||
self.dev = drv.Device(self.grid.gpu.deviceID)
|
||||
self.ctx = dev.make_context()
|
||||
self.dev = self.drv.Device(config.model_configs[self.grid.model_num].cuda['gpu'].deviceID)
|
||||
self.ctx = self.dev.make_context()
|
||||
|
||||
# Initialise arrays on GPU, prepare kernels, and get kernel functions
|
||||
self.set_field_kernels()
|
||||
@@ -274,56 +271,56 @@ class CUDAUpdates:
|
||||
get kernel functions.
|
||||
"""
|
||||
if config.model_configs[self.grid.model_num].materials['maxpoles'] > 0:
|
||||
kernels_fields = SourceModule(kernels_template_fields.substitute(
|
||||
REAL=cudafloattype,
|
||||
COMPLEX=cudacomplextype,
|
||||
N_updatecoeffsE=self.grid.updatecoeffsE.size,
|
||||
N_updatecoeffsH=self.grid.updatecoeffsH.size,
|
||||
NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1],
|
||||
NY_MATDISPCOEFFS=self.grid.updatecoeffsdispersive.shape[1],
|
||||
NX_FIELDS=self.grid.nx + 1,
|
||||
NY_FIELDS=self.grid.ny + 1,
|
||||
NZ_FIELDS=self.grid.nz + 1,
|
||||
NX_ID=self.grid.ID.shape[1],
|
||||
NY_ID=self.grid.ID.shape[2],
|
||||
NZ_ID=self.grid.ID.shape[3],
|
||||
NX_T=self.grid.Tx.shape[1],
|
||||
NY_T=self.grid.Tx.shape[2],
|
||||
NZ_T=self.grid.Tx.shape[3]),
|
||||
options=self.compiler_opts)
|
||||
kernels_fields = self.source_module(kernels_template_fields.substitute(
|
||||
REAL=config.sim_config.dtypes['C_float_or_double'],
|
||||
COMPLEX=config.sim_config.dtypes['C_complex'],
|
||||
N_updatecoeffsE=self.grid.updatecoeffsE.size,
|
||||
N_updatecoeffsH=self.grid.updatecoeffsH.size,
|
||||
NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1],
|
||||
NY_MATDISPCOEFFS=self.grid.updatecoeffsdispersive.shape[1],
|
||||
NX_FIELDS=self.grid.nx + 1,
|
||||
NY_FIELDS=self.grid.ny + 1,
|
||||
NZ_FIELDS=self.grid.nz + 1,
|
||||
NX_ID=self.grid.ID.shape[1],
|
||||
NY_ID=self.grid.ID.shape[2],
|
||||
NZ_ID=self.grid.ID.shape[3],
|
||||
NX_T=self.grid.Tx.shape[1],
|
||||
NY_T=self.grid.Tx.shape[2],
|
||||
NZ_T=self.grid.Tx.shape[3]),
|
||||
options=config.sim_config.cuda['nvcc_opts'])
|
||||
else: # Set to one any substitutions for dispersive materials
|
||||
kernels_fields = SourceModule(kernels_template_fields.substitute(
|
||||
REAL=cudafloattype,
|
||||
COMPLEX=cudacomplextype,
|
||||
N_updatecoeffsE=self.grid.updatecoeffsE.size,
|
||||
N_updatecoeffsH=self.grid.updatecoeffsH.size,
|
||||
NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1],
|
||||
NY_MATDISPCOEFFS=1,
|
||||
NX_FIELDS=self.grid.nx + 1,
|
||||
NY_FIELDS=self.grid.ny + 1,
|
||||
NZ_FIELDS=self.grid.nz + 1,
|
||||
NX_ID=self.grid.ID.shape[1],
|
||||
NY_ID=self.grid.ID.shape[2],
|
||||
NZ_ID=self.grid.ID.shape[3],
|
||||
NX_T=1,
|
||||
NY_T=1,
|
||||
NZ_T=1),
|
||||
options=self.compiler_opts)
|
||||
self.update_electric = kernels_fields.get_function("update_electric")
|
||||
self.update_magnetic = kernels_fields.get_function("update_magnetic")
|
||||
if self.grid.updatecoeffsE.nbytes + self.grid.updatecoeffsH.nbytes > self.grid.gpu.constmem:
|
||||
raise GeneralError(log.exception(f'Too many materials in the model to fit onto constant memory of size {human_size(self.grid.gpu.constmem)} on {self.grid.gpu.deviceID} - {self.grid.gpu.name} GPU'))
|
||||
self.copy_mat_coeffs()
|
||||
kernels_fields = self.source_module(kernel_template_fields.substitute(
|
||||
REAL=config.sim_config.dtypes['C_float_or_double'],
|
||||
COMPLEX=config.sim_config.dtypes['C_complex'],
|
||||
N_updatecoeffsE=self.grid.updatecoeffsE.size,
|
||||
N_updatecoeffsH=self.grid.updatecoeffsH.size,
|
||||
NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1],
|
||||
NY_MATDISPCOEFFS=1,
|
||||
NX_FIELDS=self.grid.nx + 1,
|
||||
NY_FIELDS=self.grid.ny + 1,
|
||||
NZ_FIELDS=self.grid.nz + 1,
|
||||
NX_ID=self.grid.ID.shape[1],
|
||||
NY_ID=self.grid.ID.shape[2],
|
||||
NZ_ID=self.grid.ID.shape[3],
|
||||
NX_T=1,
|
||||
NY_T=1,
|
||||
NZ_T=1),
|
||||
options=config.sim_config.cuda['nvcc_opts'])
|
||||
self.update_electric_gpu = kernels_fields.get_function("update_electric")
|
||||
self.update_magnetic_gpu = kernels_fields.get_function("update_magnetic")
|
||||
if (self.grid.updatecoeffsE.nbytes + self.grid.updatecoeffsH.nbytes > config.model_configs[self.grid.model_num].cuda['gpu'].constmem):
|
||||
raise GeneralError(log.exception(f"Too many materials in the model to fit onto constant memory of size {human_size(config.model_configs[self.grid.model_num].cuda['gpu'].constmem)} on {config.model_configs[self.grid.model_num].cuda['gpu'].deviceID} - {config.model_configs[self.grid.model_num].cuda['gpu'].name} GPU"))
|
||||
self.copy_mat_coeffs(kernels_fields, kernels_fields)
|
||||
|
||||
# Electric and magnetic field updates - dispersive materials - get kernel functions and initialise array on GPU
|
||||
if config.model_configs[self.grid.model_num].materials['maxpoles'] > 0: # If there are any dispersive materials (updates are split into two parts as they require present and updated electric field values).
|
||||
self.dispersive_update_a = kernels_fields.get_function("update_electric_dispersive_A")
|
||||
self.dispersive_update_b = kernels_fields.get_function("update_electric_dispersive_B")
|
||||
self.grid.gpu_initialise_dispersive_arrays()
|
||||
self.grid.initialise_dispersive_arrays()
|
||||
|
||||
# Electric and magnetic field updates - set blocks per grid and initialise field arrays on GPU
|
||||
self.grid.gpu_set_blocks_per_grid()
|
||||
self.grid.gpu_initialise_arrays()
|
||||
self.grid.set_blocks_per_grid()
|
||||
self.grid.initialise_arrays()
|
||||
|
||||
def set_pml_kernels(self):
|
||||
"""PMLS - prepare kernels and get kernel functions."""
|
||||
@@ -336,81 +333,81 @@ class CUDAUpdates:
|
||||
kernelmagneticfunc = getattr(import_module(pmlmodulemagnetic),
|
||||
'kernels_template_pml_magnetic_' +
|
||||
self.grid.pmlformulation)
|
||||
kernels_pml_electric = SourceModule(kernelelectricfunc.substitute(
|
||||
REAL=cudafloattype,
|
||||
N_updatecoeffsE=self.grid.updatecoeffsE.size,
|
||||
NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1],
|
||||
NX_FIELDS=self.grid.nx + 1,
|
||||
NY_FIELDS=self.grid.ny + 1,
|
||||
NZ_FIELDS=self.grid.nz + 1,
|
||||
NX_ID=self.grid.ID.shape[1],
|
||||
NY_ID=self.grid.ID.shape[2],
|
||||
NZ_ID=self.grid.ID.shape[3]),
|
||||
options=self.compiler_opts)
|
||||
kernels_pml_magnetic = SourceModule(kernelmagneticfunc.substitute(
|
||||
REAL=cudafloattype,
|
||||
N_updatecoeffsH=self.grid.updatecoeffsH.size,
|
||||
NY_MATCOEFFS=self.grid.updatecoeffsH.shape[1],
|
||||
NX_FIELDS=self.grid.nx + 1,
|
||||
NY_FIELDS=self.grid.ny + 1,
|
||||
NZ_FIELDS=self.grid.nz + 1,
|
||||
NX_ID=self.gridG.ID.shape[1],
|
||||
NY_ID=self.grid.ID.shape[2],
|
||||
NZ_ID=self.grid.ID.shape[3]),
|
||||
options=self.compiler_opts)
|
||||
self.copy_mat_coeffs()
|
||||
kernels_pml_electric = self.source_module(kernelelectricfunc.substitute(
|
||||
REAL=config.sim_config.dtypes['C_float_or_double'],
|
||||
N_updatecoeffsE=self.grid.updatecoeffsE.size,
|
||||
NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1],
|
||||
NX_FIELDS=self.grid.nx + 1,
|
||||
NY_FIELDS=self.grid.ny + 1,
|
||||
NZ_FIELDS=self.grid.nz + 1,
|
||||
NX_ID=self.grid.ID.shape[1],
|
||||
NY_ID=self.grid.ID.shape[2],
|
||||
NZ_ID=self.grid.ID.shape[3]),
|
||||
options=config.sim_config.cuda['nvcc_opts'])
|
||||
kernels_pml_magnetic = self.source_module(kernelmagneticfunc.substitute(
|
||||
REAL=config.sim_config.dtypes['C_float_or_double'],
|
||||
N_updatecoeffsH=self.grid.updatecoeffsH.size,
|
||||
NY_MATCOEFFS=self.grid.updatecoeffsH.shape[1],
|
||||
NX_FIELDS=self.grid.nx + 1,
|
||||
NY_FIELDS=self.grid.ny + 1,
|
||||
NZ_FIELDS=self.grid.nz + 1,
|
||||
NX_ID=self.grid.ID.shape[1],
|
||||
NY_ID=self.grid.ID.shape[2],
|
||||
NZ_ID=self.grid.ID.shape[3]),
|
||||
options=config.sim_config.cuda['nvcc_opts'])
|
||||
self.copy_mat_coeffs(kernels_pml_electric, kernels_pml_magnetic)
|
||||
# Set block per grid, initialise arrays on GPU, and get kernel functions
|
||||
for pml in self.grid.pmls:
|
||||
pml.gpu_initialise_arrays()
|
||||
pml.gpu_get_update_funcs(kernels_pml_electric, kernels_pml_magnetic)
|
||||
pml.gpu_set_blocks_per_grid(self.grid)
|
||||
pml.initialise_field_arrays_gpu()
|
||||
pml.get_update_funcs(kernels_pml_electric, kernels_pml_magnetic)
|
||||
pml.set_blocks_per_grid(self.grid)
|
||||
|
||||
def set_rx_kernel(self):
|
||||
"""Receivers - initialise arrays on GPU, prepare kernel and get kernel
|
||||
function.
|
||||
"""
|
||||
if self.grid.rxs:
|
||||
rxcoords_gpu, rxs_gpu = gpu_initialise_rx_arrays(self.grid)
|
||||
kernel_store_outputs = SourceModule(kernel_template_store_outputs.substitute(
|
||||
REAL=cudafloattype,
|
||||
NY_RXCOORDS=3,
|
||||
NX_RXS=6,
|
||||
NY_RXS=self.grid.iterations,
|
||||
NZ_RXS=len(self.grid.rxs),
|
||||
NX_FIELDS=self.grid.nx + 1,
|
||||
NY_FIELDS=self.grid.ny + 1,
|
||||
NZ_FIELDS=self.grid.nz + 1),
|
||||
options=self.compiler_opts)
|
||||
self.store_outputs = kernel_store_outputs.get_function("store_outputs")
|
||||
self.rxcoords_gpu, self.rxs_gpu = initialise_rx_arrays_gpu(self.grid)
|
||||
kernel_store_outputs = self.source_module(kernel_template_store_outputs.substitute(
|
||||
REAL=config.sim_config.dtypes['C_float_or_double'],
|
||||
NY_RXCOORDS=3,
|
||||
NX_RXS=6,
|
||||
NY_RXS=self.grid.iterations,
|
||||
NZ_RXS=len(self.grid.rxs),
|
||||
NX_FIELDS=self.grid.nx + 1,
|
||||
NY_FIELDS=self.grid.ny + 1,
|
||||
NZ_FIELDS=self.grid.nz + 1),
|
||||
options=config.sim_config.cuda['nvcc_opts'])
|
||||
self.store_outputs_gpu = kernel_store_outputs.get_function("store_outputs")
|
||||
|
||||
def set_src_kernels(self):
|
||||
"""Sources - initialise arrays on GPU, prepare kernel and get kernel
|
||||
function.
|
||||
"""
|
||||
if self.grid.voltagesources + self.grid.hertziandipoles + self.grid.magneticdipoles:
|
||||
kernels_sources = SourceModule(kernels_template_sources.substitute(
|
||||
REAL=cudafloattype,
|
||||
N_updatecoeffsE=self.grid.updatecoeffsE.size,
|
||||
N_updatecoeffsH=self.grid.updatecoeffsH.size,
|
||||
NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1],
|
||||
NY_SRCINFO=4,
|
||||
NY_SRCWAVES=self.grid.iterations,
|
||||
NX_FIELDS=self.grid.nx + 1,
|
||||
NY_FIELDS=self.grid.ny + 1,
|
||||
NZ_FIELDS=self.grid.nz + 1,
|
||||
NX_ID=self.grid.ID.shape[1],
|
||||
NY_ID=self.grid.ID.shape[2],
|
||||
NZ_ID=self.grid.ID.shape[3]),
|
||||
options=self.compiler_opts)
|
||||
self.copy_mat_coeffs()
|
||||
kernels_sources = self.source_module(kernel_template_sources.substitute(
|
||||
REAL=config.sim_config.dtypes['C_float_or_double'],
|
||||
N_updatecoeffsE=self.grid.updatecoeffsE.size,
|
||||
N_updatecoeffsH=self.grid.updatecoeffsH.size,
|
||||
NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1],
|
||||
NY_SRCINFO=4,
|
||||
NY_SRCWAVES=self.grid.iterations,
|
||||
NX_FIELDS=self.grid.nx + 1,
|
||||
NY_FIELDS=self.grid.ny + 1,
|
||||
NZ_FIELDS=self.grid.nz + 1,
|
||||
NX_ID=self.grid.ID.shape[1],
|
||||
NY_ID=self.grid.ID.shape[2],
|
||||
NZ_ID=self.grid.ID.shape[3]),
|
||||
options=config.sim_config.cuda['nvcc_opts'])
|
||||
self.copy_mat_coeffs(kernels_sources, kernels_sources)
|
||||
if self.grid.hertziandipoles:
|
||||
self.srcinfo1_hertzian_gpu, self.srcinfo2_hertzian_gpu, self.srcwaves_hertzian_gpu = gpu_initialise_src_arrays(self.grid.hertziandipoles, self.grid)
|
||||
self.srcinfo1_hertzian_gpu, self.srcinfo2_hertzian_gpu, self.srcwaves_hertzian_gpu = initialise_src_arrays_gpu(self.grid.hertziandipoles, self.grid)
|
||||
self.update_hertzian_dipole_gpu = kernels_sources.get_function("update_hertzian_dipole")
|
||||
if self.grid.magneticdipoles:
|
||||
self.srcinfo1_magnetic_gpu, self.srcinfo2_magnetic_gpu, self.srcwaves_magnetic_gpu = gpu_initialise_src_arrays(self.grid.magneticdipoles, self.grid)
|
||||
self.srcinfo1_magnetic_gpu, self.srcinfo2_magnetic_gpu, self.srcwaves_magnetic_gpu = initialise_src_arrays_gpu(self.grid.magneticdipoles, self.grid)
|
||||
self.update_magnetic_dipole_gpu = kernels_sources.get_function("update_magnetic_dipole")
|
||||
if self.grid.voltagesources:
|
||||
self.srcinfo1_voltage_gpu, self.srcinfo2_voltage_gpu, self.srcwaves_voltage_gpu = gpu_initialise_src_arrays(self.grid.voltagesources, self.grid)
|
||||
self.srcinfo1_voltage_gpu, self.srcinfo2_voltage_gpu, self.srcwaves_voltage_gpu = initialise_src_arrays_gpu(self.grid.voltagesources, self.grid)
|
||||
self.update_voltage_source_gpu = kernels_sources.get_function("update_voltage_source")
|
||||
|
||||
def set_snapshot_kernel(self):
|
||||
@@ -418,42 +415,46 @@ class CUDAUpdates:
|
||||
function.
|
||||
"""
|
||||
if self.grid.snapshots:
|
||||
self.snapEx_gpu, self.snapEy_gpu, self.snapEz_gpu, self.snapHx_gpu, self.snapHy_gpu, self.snapHz_gpu = gpu_initialise_snapshot_array(self.grid)
|
||||
kernel_store_snapshot = SourceModule(kernel_template_store_snapshot.substitute(
|
||||
REAL=cudafloattype,
|
||||
NX_SNAPS=Snapshot.nx_max,
|
||||
NY_SNAPS=Snapshot.ny_max,
|
||||
NZ_SNAPS=Snapshot.nz_max,
|
||||
NX_FIELDS=self.grid.nx + 1,
|
||||
NY_FIELDS=self.grid.ny + 1,
|
||||
NZ_FIELDS=self.grid.nz + 1),
|
||||
options=self.compiler_opts)
|
||||
self.snapEx_gpu, self.snapEy_gpu, self.snapEz_gpu, self.snapHx_gpu, self.snapHy_gpu, self.snapHz_gpu = initialise_snapshot_array_gpu(self.grid)
|
||||
kernel_store_snapshot = self.source_module(kernel_template_store_snapshot.substitute(
|
||||
REAL=config.sim_config.dtypes['C_float_or_double'],
|
||||
NX_SNAPS=Snapshot.nx_max,
|
||||
NY_SNAPS=Snapshot.ny_max,
|
||||
NZ_SNAPS=Snapshot.nz_max,
|
||||
NX_FIELDS=self.grid.nx + 1,
|
||||
NY_FIELDS=self.grid.ny + 1,
|
||||
NZ_FIELDS=self.grid.nz + 1),
|
||||
options=config.sim_config.cuda['nvcc_opts'])
|
||||
self.store_snapshot_gpu = kernel_store_snapshot.get_function("store_snapshot")
|
||||
|
||||
def copy_mat_coeffs(self):
|
||||
def copy_mat_coeffs(self, kernelE, kernelH):
|
||||
"""Copy material coefficient arrays to constant memory of GPU
|
||||
(must be <64KB).
|
||||
|
||||
Args:
|
||||
kernelE (kernel): electric field kernel.
|
||||
kernelH (kernel): magnetic field kernel.
|
||||
"""
|
||||
updatecoeffsE = kernels_sources.get_global('updatecoeffsE')[0]
|
||||
updatecoeffsH = kernels_sources.get_global('updatecoeffsH')[0]
|
||||
updatecoeffsE = kernelE.get_global('updatecoeffsE')[0]
|
||||
updatecoeffsH = kernelH.get_global('updatecoeffsH')[0]
|
||||
self.drv.memcpy_htod(updatecoeffsE, self.grid.updatecoeffsE)
|
||||
self.drv.memcpy_htod(updatecoeffsH, self.grid.updatecoeffsH)
|
||||
|
||||
def store_outputs(self):
|
||||
"""Store field component values for every receiver and transmission line."""
|
||||
"""Store field component values for every receiver."""
|
||||
if self.grid.rxs:
|
||||
self.store_outputs(np.int32(len(self.grid.rxs)),
|
||||
np.int32(self.grid.iteration),
|
||||
self.rxcoords_gpu.gpudata,
|
||||
self.rxs_gpu.gpudata,
|
||||
self.grid.Ex_gpu.gpudata,
|
||||
self.grid.Ey_gpu.gpudata,
|
||||
self.grid.Ez_gpu.gpudata,
|
||||
self.grid.Hx_gpu.gpudata,
|
||||
self.grid.Hy_gpu.gpudata,
|
||||
self.grid.Hz_gpu.gpudata,
|
||||
block=(1, 1, 1),
|
||||
grid=(round32(len(self.grid.rxs)), 1, 1))
|
||||
self.store_outputs_gpu(np.int32(len(self.grid.rxs)),
|
||||
np.int32(self.grid.iteration),
|
||||
self.rxcoords_gpu.gpudata,
|
||||
self.rxs_gpu.gpudata,
|
||||
self.grid.Ex_gpu.gpudata,
|
||||
self.grid.Ey_gpu.gpudata,
|
||||
self.grid.Ez_gpu.gpudata,
|
||||
self.grid.Hx_gpu.gpudata,
|
||||
self.grid.Hy_gpu.gpudata,
|
||||
self.grid.Hz_gpu.gpudata,
|
||||
block=(1, 1, 1),
|
||||
grid=(round32(len(self.grid.rxs)), 1, 1))
|
||||
|
||||
def store_snapshots(self, iteration):
|
||||
"""Store any snapshots.
|
||||
@@ -501,32 +502,32 @@ class CUDAUpdates:
|
||||
|
||||
def update_magnetic(self):
|
||||
"""Update magnetic field components."""
|
||||
self.update_magnetic(np.int32(self.grid.nx),
|
||||
np.int32(self.grid.ny),
|
||||
np.int32(self.grid.nz),
|
||||
self.grid.ID_gpu,
|
||||
self.grid.Hx_gpu,
|
||||
self.grid.Hy_gpu,
|
||||
self.grid.Hz_gpu,
|
||||
self.grid.Ex_gpu,
|
||||
self.grid.Ey_gpu,
|
||||
self.grid.Ez_gpu,
|
||||
block=self.grid.tpb,
|
||||
grid=self.grid.bpg)
|
||||
self.update_magnetic_gpu(np.int32(self.grid.nx),
|
||||
np.int32(self.grid.ny),
|
||||
np.int32(self.grid.nz),
|
||||
self.grid.ID_gpu,
|
||||
self.grid.Hx_gpu,
|
||||
self.grid.Hy_gpu,
|
||||
self.grid.Hz_gpu,
|
||||
self.grid.Ex_gpu,
|
||||
self.grid.Ey_gpu,
|
||||
self.grid.Ez_gpu,
|
||||
block=self.grid.tpb,
|
||||
grid=self.grid.bpg)
|
||||
|
||||
def update_magnetic_pml(self):
|
||||
"""Update magnetic field components with the PML correction."""
|
||||
for pml in self.grid.pmls:
|
||||
pml.gpu_update_magnetic(self.grid)
|
||||
pml.update_magnetic(self.grid)
|
||||
|
||||
def update_magnetic_sources(self):
|
||||
"""Update magnetic field components from sources."""
|
||||
if self.grid.magneticdipoles:
|
||||
self.update_magnetic_dipole_gpu(np.int32(len(self.grid.magneticdipoles)),
|
||||
np.int32(self.grid.iteration),
|
||||
config.dtypes['float_or_double'](self.grid.dx),
|
||||
config.dtypes['float_or_double'](self.grid.dy),
|
||||
config.dtypes['float_or_double'](self.grid.dz),
|
||||
config.sim_config.dtypes['float_or_double'](self.grid.dx),
|
||||
config.sim_config.dtypes['float_or_double'](self.grid.dy),
|
||||
config.sim_config.dtypes['float_or_double'](self.grid.dz),
|
||||
self.srcinfo1_magnetic_gpu.gpudata,
|
||||
self.srcinfo2_magnetic_gpu.gpudata,
|
||||
self.srcwaves_magnetic_gpu.gpudata,
|
||||
@@ -541,18 +542,18 @@ class CUDAUpdates:
|
||||
"""Update electric field components."""
|
||||
# All materials are non-dispersive so do standard update.
|
||||
if config.model_configs[self.grid.model_num].materials['maxpoles'] == 0:
|
||||
self.update_electric(np.int32(self.grid.nx),
|
||||
np.int32(self.grid.ny),
|
||||
np.int32(self.grid.nz),
|
||||
self.grid.ID_gpu,
|
||||
self.grid.Ex_gpu,
|
||||
self.grid.Ey_gpu,
|
||||
self.grid.Ez_gpu,
|
||||
self.grid.Hx_gpu,
|
||||
self.grid.Hy_gpu,
|
||||
self.grid.Hz_gpu,
|
||||
block=self.grid.tpb,
|
||||
grid=self.grid.bpg)
|
||||
self.update_electric_gpu(np.int32(self.grid.nx),
|
||||
np.int32(self.grid.ny),
|
||||
np.int32(self.grid.nz),
|
||||
self.grid.ID_gpu,
|
||||
self.grid.Ex_gpu,
|
||||
self.grid.Ey_gpu,
|
||||
self.grid.Ez_gpu,
|
||||
self.grid.Hx_gpu,
|
||||
self.grid.Hy_gpu,
|
||||
self.grid.Hz_gpu,
|
||||
block=self.grid.tpb,
|
||||
grid=self.grid.bpg)
|
||||
|
||||
# If there are any dispersive materials do 1st part of dispersive update
|
||||
# (it is split into two parts as it requires present and updated electric field values).
|
||||
@@ -578,7 +579,7 @@ class CUDAUpdates:
|
||||
def update_electric_pml(self):
|
||||
"""Update electric field components with the PML correction."""
|
||||
for pml in self.grid.pmls:
|
||||
pml.gpu_update_electric(self.grid)
|
||||
pml.update_electric(self.grid)
|
||||
|
||||
def update_electric_sources(self):
|
||||
"""Update electric field components from sources -
|
||||
@@ -587,9 +588,9 @@ class CUDAUpdates:
|
||||
if self.grid.voltagesources:
|
||||
self.update_voltage_source_gpu(np.int32(len(self.grid.voltagesources)),
|
||||
np.int32(self.grid.iteration),
|
||||
config.dtypes['float_or_double'](self.grid.dx),
|
||||
config.dtypes['float_or_double'](self.grid.dy),
|
||||
config.dtypes['float_or_double'](self.grid.dz),
|
||||
config.sim_config.dtypes['float_or_double'](self.grid.dx),
|
||||
config.sim_config.dtypes['float_or_double'](self.grid.dy),
|
||||
config.sim_config.dtypes['float_or_double'](self.grid.dz),
|
||||
self.srcinfo1_voltage_gpu.gpudata,
|
||||
self.srcinfo2_voltage_gpu.gpudata,
|
||||
self.srcwaves_voltage_gpu.gpudata,
|
||||
@@ -603,9 +604,9 @@ class CUDAUpdates:
|
||||
if self.grid.hertziandipoles:
|
||||
self.update_hertzian_dipole_gpu(np.int32(len(self.grid.hertziandipoles)),
|
||||
np.int32(self.grid.iteration),
|
||||
config.dtypes['float_or_double'](self.grid.dx),
|
||||
config.dtypes['float_or_double'](self.grid.dy),
|
||||
config.dtypes['float_or_double'](self.grid.dz),
|
||||
config.sim_config.dtypes['float_or_double'](self.grid.dx),
|
||||
config.sim_config.dtypes['float_or_double'](self.grid.dy),
|
||||
config.sim_config.dtypes['float_or_double'](self.grid.dz),
|
||||
self.srcinfo1_hertzian_gpu.gpudata,
|
||||
self.srcinfo2_hertzian_gpu.gpudata,
|
||||
self.srcwaves_hertzian_gpu.gpudata,
|
||||
@@ -660,21 +661,20 @@ class CUDAUpdates:
|
||||
"""Copy data from GPU back to CPU to save to file(s)."""
|
||||
# Copy output from receivers array back to correct receiver objects
|
||||
if self.grid.rxs:
|
||||
gpu_get_rx_array(self.rxs_gpu.get(),
|
||||
get_rx_array_gpu(self.rxs_gpu.get(),
|
||||
self.rxcoords_gpu.get(),
|
||||
self.grid)
|
||||
|
||||
# Copy data from any snapshots back to correct snapshot objects
|
||||
if self.grid.snapshots and not self.grid.snapsgpu2cpu:
|
||||
for i, snap in enumerate(self.grid.snapshots):
|
||||
gpu_get_snapshot_array(self.snapEx_gpu.get(),
|
||||
get_snapshot_array_gpu(self.snapEx_gpu.get(),
|
||||
self.snapEy_gpu.get(),
|
||||
self.snapEz_gpu.get(),
|
||||
self.snapHx_gpu.get(),
|
||||
self.snapHy_gpu.get(),
|
||||
self.snapHz_gpu.get(),
|
||||
i,
|
||||
snap)
|
||||
i, snap)
|
||||
|
||||
def cleanup(self):
|
||||
"""Cleanup GPU context."""
|
||||
|
@@ -421,10 +421,6 @@ class GPU:
|
||||
self.pcibusID = None
|
||||
self.constmem = None
|
||||
self.totalmem = None
|
||||
# Threads per block for main field updates
|
||||
self.tpb = (256, 1, 1)
|
||||
# Blocks per grid for main field updates (set in grid.py)
|
||||
self.bpg = None
|
||||
|
||||
def get_gpu_info(self, drv):
|
||||
"""Set information about GPU.
|
||||
|
在新工单中引用
屏蔽一个用户