Basic functional GPU solver with cylinder_Ascan_2D example.

这个提交包含在:
craig-warren
2019-10-21 17:17:36 +01:00
父节点 c31eff41d2
当前提交 40ff78a8a9
共有 12 个文件被更改,包括 343 次插入269 次删除

查看文件

@@ -510,14 +510,12 @@ class Rx(UserObjectMulti):
r.ID = self.kwargs['id']
outputs = self.kwargs['outputs']
# Get allowable outputs
if grid.gpu is not None:
allowableoutputs = RxUser.gpu_allowableoutputs
else:
allowableoutputs = RxUser.allowableoutputs
allowableoutputs = RxUser.allowableoutputs_gpu if config.sim_config.general['cuda'] else RxUser.allowableoutputs
# Check and add field output names
outputs.sort()
for field in outputs:
if field in allowableoutputs:
r.outputs[field] = np.zeros(grid.iterations, dtype=config.dtypes['float_or_double'])
r.outputs[field] = np.zeros(grid.iterations, dtype=config.sim_config.dtypes['float_or_double'])
else:
raise CmdInputError(f"'{self.params_str()}' contains an output type that is not allowable. Allowable outputs in current context are {allowableoutputs}")
# If no ID or outputs are specified, use default

查看文件

@@ -18,6 +18,7 @@
import logging
from pathlib import Path
import sys
from colorama import init
from colorama import Fore
@@ -64,7 +65,8 @@ class ModelConfig:
# N.B. This will happen if the requested snapshots are too large to fit
# on the memory of the GPU. If True this will slow performance significantly
if sim_config.general['cuda']:
self.cuda = {'gpu': sim_config.cuda['gpus'], 'snapsgpu2cpu': False}
self.cuda = {'gpu': sim_config.cuda['gpus'],
'snapsgpu2cpu': False}
# Total memory usage for all grids in the model. Starts with 50MB overhead.
self.mem_use = 50e6
@@ -175,10 +177,12 @@ class SimulationConfig:
self.general['cuda'] = True
self.general['cpu'] = False
self.general['opencl'] = False
# gpus: list of GPU objects
# gpus_str: list of strings describing GPU(s)
self.cuda = {'gpus': [],
'gpus_str': []}
self.general['precision'] = 'single'
self.cuda = {'gpus': [], # gpus: list of GPU objects
'gpus_str': [], # gpus_str: list of strings describing GPU(s)
'nvcc_opts': None} # nvcc_opts: nvcc compiler options
# Suppress nvcc warnings on Microsoft Windows
if sys.platform == 'win32': self.cuda['nvcc_opts'] = '-w'
self.get_gpus()
self.set_gpus()

查看文件

@@ -128,7 +128,7 @@ __global__ void update_magnetic(int NX, int NY, int NZ, const unsigned int* __re
// Electric field updates - dispersive materials //
///////////////////////////////////////////////////
__global__ void update_electric_dispersive_A(int NX, int NY, int NZ, int MAXPOLES, const $REAL_OR_COMPLEX* __restrict__ updatecoeffsdispersive, $REAL_OR_COMPLEX *Tx, $REAL_OR_COMPLEX *Ty, $REAL_OR_COMPLEX *Tz, const unsigned int* __restrict__ ID, $REAL *Ex, $REAL *Ey, $REAL *Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz) {
__global__ void update_electric_dispersive_A(int NX, int NY, int NZ, int MAXPOLES, const $COMPLEX* __restrict__ updatecoeffsdispersive, $COMPLEX *Tx, $COMPLEX *Ty, $COMPLEX *Tz, const unsigned int* __restrict__ ID, $REAL *Ex, $REAL *Ey, $REAL *Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz) {
// This function is part A of updates to electric field values when dispersive materials (with multiple poles) are present.
//
@@ -189,7 +189,7 @@ __global__ void update_electric_dispersive_A(int NX, int NY, int NZ, int MAXPOLE
}
}
__global__ void update_electric_dispersive_B(int NX, int NY, int NZ, int MAXPOLES, const $REAL_OR_COMPLEX* __restrict__ updatecoeffsdispersive, $REAL_OR_COMPLEX *Tx, $REAL_OR_COMPLEX *Ty, $REAL_OR_COMPLEX *Tz, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez) {
__global__ void update_electric_dispersive_B(int NX, int NY, int NZ, int MAXPOLES, const $COMPLEX* __restrict__ updatecoeffsdispersive, $COMPLEX *Tx, $COMPLEX *Ty, $COMPLEX *Tz, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez) {
// This function is part B which updates the dispersive field arrays when dispersive materials (with multiple poles) are present.
//

查看文件

@@ -279,15 +279,22 @@ class FDTDGrid:
class CUDAGrid(FDTDGrid):
"""Additional grid methods for solving on GPU using CUDA."""
def __init__(self, model_num):
super().__init__(model_num)
# Threads per block - used for main electric/magnetic field updates
self.tpb = (256, 1, 1)
# Blocks per grid - used for main electric/magnetic field updates
self.bpg = None
def set_blocks_per_grid(self):
"""Set the blocks per grid size used for updating the electric and
magnetic field arrays on a GPU.
"""
config.cuda['gpus'].bpg = (int(np.ceil(((self.nx + 1) *
(self.ny + 1) *
(self.nz + 1)) /
config.cuda['gpus'].tpb[0])), 1, 1)
self.bpg = (int(np.ceil(((self.nx + 1) * (self.ny + 1) *
(self.nz + 1)) / self.tpb[0])), 1, 1)
def initialise_arrays(self):
"""Initialise geometry and field arrays on GPU."""

查看文件

@@ -58,15 +58,15 @@ from .pml import CFS
from .pml import PML
from .pml import build_pml
from .pml import pml_information
from .receivers import gpu_initialise_rx_arrays
from .receivers import gpu_get_rx_array
from .receivers import initialise_rx_arrays_gpu
from .receivers import get_rx_array_gpu
from .receivers import Rx
from .scene import Scene
from .snapshots import Snapshot
from .snapshots import gpu_initialise_snapshot_array
from .snapshots import gpu_get_snapshot_array
from .snapshots import initialise_snapshot_array_gpu
from .snapshots import get_snapshot_array_gpu
from .solvers import create_solver
from .sources import gpu_initialise_src_arrays
from .sources import initialise_src_arrays_gpu
from .utilities import get_terminal_width
from .utilities import human_size
from .utilities import mem_check
@@ -286,7 +286,8 @@ class ModelBuildRun:
mem_GPU = ''
if config.sim_config.general['cuda']:
mem_GPU = f' host + ~{human_size(self.solver.get_memsolve())} GPU'
log.debug('Fix memory used calc for GPU')
# mem_GPU = f' host + ~{human_size(self.solver.get_memsolve())} GPU'
log.info(f'\nMemory (RAM) used: ~{human_size(self.p.memory_full_info().uss)}{mem_GPU}')
log.info(f'Solving time [HH:MM:SS]: {datetime.timedelta(seconds=tsolve)}')

查看文件

@@ -207,8 +207,7 @@ class PML:
self.CFS = G.cfs
if not config.sim_config.general['cuda']:
self.initialise_field_arrays()
self.initialise_field_arrays()
def initialise_field_arrays(self):
"""Initialise arrays to store fields in PML."""
@@ -340,7 +339,7 @@ class CUDAPML(PML):
solving on GPU using CUDA.
"""
def initialise_arrays(self):
def initialise_field_arrays_gpu(self):
"""Initialise PML field and coefficient arrays on GPU."""
import pycuda.gpuarray as gpuarray
@@ -355,20 +354,44 @@ class CUDAPML(PML):
self.HRF_gpu = gpuarray.to_gpu(self.HRF)
if self.direction[0] == 'x':
self.EPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx + 1, self.ny, self.nz + 1), dtype=floattype))
self.EPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx + 1, self.ny + 1, self.nz), dtype=floattype))
self.HPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx, self.ny + 1, self.nz), dtype=floattype))
self.HPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx, self.ny, self.nz + 1), dtype=floattype))
self.EPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS),
self.nx + 1, self.ny, self.nz + 1),
dtype=config.sim_config.dtypes['float_or_double']))
self.EPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS),
self.nx + 1, self.ny + 1, self.nz),
dtype=config.sim_config.dtypes['float_or_double']))
self.HPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS),
self.nx, self.ny + 1, self.nz),
dtype=config.sim_config.dtypes['float_or_double']))
self.HPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS),
self.nx, self.ny, self.nz + 1),
dtype=config.sim_config.dtypes['float_or_double']))
elif self.direction[0] == 'y':
self.EPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx, self.ny + 1, self.nz + 1), dtype=floattype))
self.EPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx + 1, self.ny + 1, self.nz), dtype=floattype))
self.HPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx + 1, self.ny, self.nz), dtype=floattype))
self.HPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx, self.ny, self.nz + 1), dtype=floattype))
self.EPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS),
self.nx, self.ny + 1, self.nz + 1),
dtype=config.sim_config.dtypes['float_or_double']))
self.EPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS),
self.nx + 1, self.ny + 1, self.nz),
dtype=config.sim_config.dtypes['float_or_double']))
self.HPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS),
self.nx + 1, self.ny, self.nz),
dtype=config.sim_config.dtypes['float_or_double']))
self.HPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS),
self.nx, self.ny, self.nz + 1),
dtype=config.sim_config.dtypes['float_or_double']))
elif self.direction[0] == 'z':
self.EPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx, self.ny + 1, self.nz + 1), dtype=floattype))
self.EPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx + 1, self.ny, self.nz + 1), dtype=floattype))
self.HPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx + 1, self.ny, self.nz), dtype=floattype))
self.HPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx, self.ny + 1, self.nz), dtype=floattype))
self.EPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS),
self.nx, self.ny + 1, self.nz + 1),
dtype=config.sim_config.dtypes['float_or_double']))
self.EPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS),
self.nx + 1, self.ny, self.nz + 1),
dtype=config.sim_config.dtypes['float_or_double']))
self.HPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS),
self.nx + 1, self.ny, self.nz),
dtype=config.sim_config.dtypes['float_or_double']))
self.HPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS),
self.nx, self.ny + 1, self.nz),
dtype=config.sim_config.dtypes['float_or_double']))
def set_blocks_per_grid(self, G):
"""Set the blocks per grid size used for updating the PML field arrays on a GPU.
@@ -377,7 +400,9 @@ class CUDAPML(PML):
G (FDTDGrid): Holds essential parameters describing the model.
"""
self.bpg = (int(np.ceil(((self.EPhi1_gpu.shape[1] + 1) * (self.EPhi1_gpu.shape[2] + 1) * (self.EPhi1_gpu.shape[3] + 1)) / G.tpb[0])), 1, 1)
self.bpg = (int(np.ceil(((self.EPhi1_gpu.shape[1] + 1) *
(self.EPhi1_gpu.shape[2] + 1) *
(self.EPhi1_gpu.shape[3] + 1)) / G.tpb[0])), 1, 1)
def get_update_funcs(self, kernelselectric, kernelsmagnetic):
"""Get update functions from PML kernels.
@@ -400,7 +425,24 @@ class CUDAPML(PML):
G (FDTDGrid): Holds essential parameters describing the model.
"""
self.update_electric_gpu(np.int32(self.xs), np.int32(self.xf), np.int32(self.ys), np.int32(self.yf), np.int32(self.zs), np.int32(self.zf), np.int32(self.EPhi1_gpu.shape[1]), np.int32(self.EPhi1_gpu.shape[2]), np.int32(self.EPhi1_gpu.shape[3]), np.int32(self.EPhi2_gpu.shape[1]), np.int32(self.EPhi2_gpu.shape[2]), np.int32(self.EPhi2_gpu.shape[3]), np.int32(self.thickness), G.ID_gpu.gpudata, G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata, G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata, self.EPhi1_gpu.gpudata, self.EPhi2_gpu.gpudata, self.ERA_gpu.gpudata, self.ERB_gpu.gpudata, self.ERE_gpu.gpudata, self.ERF_gpu.gpudata, floattype(self.d), block=G.tpb, grid=self.bpg)
self.update_electric_gpu(np.int32(self.xs), np.int32(self.xf),
np.int32(self.ys), np.int32(self.yf),
np.int32(self.zs), np.int32(self.zf),
np.int32(self.EPhi1_gpu.shape[1]),
np.int32(self.EPhi1_gpu.shape[2]),
np.int32(self.EPhi1_gpu.shape[3]),
np.int32(self.EPhi2_gpu.shape[1]),
np.int32(self.EPhi2_gpu.shape[2]),
np.int32(self.EPhi2_gpu.shape[3]),
np.int32(self.thickness),
G.ID_gpu.gpudata,
G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata,
G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata,
self.EPhi1_gpu.gpudata, self.EPhi2_gpu.gpudata,
self.ERA_gpu.gpudata, self.ERB_gpu.gpudata,
self.ERE_gpu.gpudata, self.ERF_gpu.gpudata,
config.sim_config.dtypes['float_or_double'](self.d),
block=G.tpb, grid=self.bpg)
def update_magnetic(self, G):
"""This functions updates magnetic field components with the PML
@@ -409,7 +451,24 @@ class CUDAPML(PML):
Args:
G (FDTDGrid): Holds essential parameters describing the model.
"""
self.update_magnetic_gpu(np.int32(self.xs), np.int32(self.xf), np.int32(self.ys), np.int32(self.yf), np.int32(self.zs), np.int32(self.zf), np.int32(self.HPhi1_gpu.shape[1]), np.int32(self.HPhi1_gpu.shape[2]), np.int32(self.HPhi1_gpu.shape[3]), np.int32(self.HPhi2_gpu.shape[1]), np.int32(self.HPhi2_gpu.shape[2]), np.int32(self.HPhi2_gpu.shape[3]), np.int32(self.thickness), G.ID_gpu.gpudata, G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata, G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata, self.HPhi1_gpu.gpudata, self.HPhi2_gpu.gpudata, self.HRA_gpu.gpudata, self.HRB_gpu.gpudata, self.HRE_gpu.gpudata, self.HRF_gpu.gpudata, floattype(self.d), block=G.tpb, grid=self.bpg)
self.update_magnetic_gpu(np.int32(self.xs), np.int32(self.xf),
np.int32(self.ys), np.int32(self.yf),
np.int32(self.zs), np.int32(self.zf),
np.int32(self.HPhi1_gpu.shape[1]),
np.int32(self.HPhi1_gpu.shape[2]),
np.int32(self.HPhi1_gpu.shape[3]),
np.int32(self.HPhi2_gpu.shape[1]),
np.int32(self.HPhi2_gpu.shape[2]),
np.int32(self.HPhi2_gpu.shape[3]),
np.int32(self.thickness),
G.ID_gpu.gpudata,
G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata,
G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata,
self.HPhi1_gpu.gpudata, self.HPhi2_gpu.gpudata,
self.HRA_gpu.gpudata, self.HRB_gpu.gpudata,
self.HRE_gpu.gpudata, self.HRF_gpu.gpudata,
config.sim_config.dtypes['float_or_double'](self.d),
block=G.tpb, grid=self.bpg)
def pml_information(G):
"""Information about PMLs.
@@ -443,14 +502,16 @@ def build_pml(G, key, value):
value (int): Thickness of PML slab in cells.
"""
pml_type = CUDAPML if config.sim_config.general['cuda'] else PML
sumer = 0 # Sum of relative permittivities in PML slab
summr = 0 # Sum of relative permeabilities in PML slab
if key[0] == 'x':
if key == 'x0':
pml = PML(G, ID=key, direction='xminus', xf=value, yf=G.ny, zf=G.nz)
pml = pml_type(G, ID=key, direction='xminus', xf=value, yf=G.ny, zf=G.nz)
elif key == 'xmax':
pml = PML(G, ID=key, direction='xplus', xs=G.nx - value, xf=G.nx, yf=G.ny, zf=G.nz)
pml = pml_type(G, ID=key, direction='xplus', xs=G.nx - value, xf=G.nx, yf=G.ny, zf=G.nz)
G.pmls.append(pml)
for j in range(G.ny):
for k in range(G.nz):
@@ -463,9 +524,9 @@ def build_pml(G, key, value):
elif key[0] == 'y':
if key == 'y0':
pml = PML(G, ID=key, direction='yminus', yf=value, xf=G.nx, zf=G.nz)
pml = pml_type(G, ID=key, direction='yminus', yf=value, xf=G.nx, zf=G.nz)
elif key == 'ymax':
pml = PML(G, ID=key, direction='yplus', ys=G.ny - value, xf=G.nx, yf=G.ny, zf=G.nz)
pml = pml_type(G, ID=key, direction='yplus', ys=G.ny - value, xf=G.nx, yf=G.ny, zf=G.nz)
G.pmls.append(pml)
for i in range(G.nx):
for k in range(G.nz):
@@ -478,9 +539,9 @@ def build_pml(G, key, value):
elif key[0] == 'z':
if key == 'z0':
pml = PML(G, ID=key, direction='zminus', zf=value, xf=G.nx, yf=G.ny)
pml = pml_type(G, ID=key, direction='zminus', zf=value, xf=G.nx, yf=G.ny)
elif key == 'zmax':
pml = PML(G, ID=key, direction='zplus', zs=G.nz - value, xf=G.nx, yf=G.ny, zf=G.nz)
pml = pml_type(G, ID=key, direction='zplus', zs=G.nz - value, xf=G.nx, yf=G.ny, zf=G.nz)
G.pmls.append(pml)
for i in range(G.nx):
for j in range(G.ny):

查看文件

@@ -16,8 +16,6 @@
# You should have received a copy of the GNU General Public License
# along with gprMax. If not, see <http://www.gnu.org/licenses/>.
from collections import OrderedDict
import numpy as np
import gprMax.config as config
@@ -27,14 +25,15 @@ class Rx:
"""Receiver output points."""
allowableoutputs = ['Ex', 'Ey', 'Ez', 'Hx', 'Hy', 'Hz', 'Ix', 'Iy', 'Iz']
gpu_allowableoutputs = allowableoutputs[:-3]
defaultoutputs = allowableoutputs[:-3]
maxnumoutputs = 0
allowableoutputs_gpu = allowableoutputs[:-3]
maxnumoutputs_gpu = 0
def __init__(self):
self.ID = None
self.outputs = OrderedDict()
self.outputs = {}
self.xcoord = None
self.ycoord = None
self.zcoord = None
@@ -43,12 +42,17 @@ class Rx:
self.zcoordorigin = None
def gpu_initialise_rx_arrays(G):
def initialise_rx_arrays_gpu(G):
"""Initialise arrays on GPU for receiver coordinates and to store field
components for receivers.
Args:
G (FDTDGrid): Holds essential parameters describing the model.
Returns:
rxcoords_gpu (int): numpy array of receiver coordinates from GPU.
rxs_gpu (float): numpy array of receiver data from GPU - rows are field
components; columns are iterations; pages are receivers.
"""
import pycuda.gpuarray as gpuarray
@@ -59,11 +63,14 @@ def gpu_initialise_rx_arrays(G):
rxcoords[i, 0] = rx.xcoord
rxcoords[i, 1] = rx.ycoord
rxcoords[i, 2] = rx.zcoord
# Store maximum number of output components
if len(rx.outputs) > Rx.maxnumoutputs_gpu:
Rx.maxnumoutputs_gpu = len(rx.outputs)
# Array to store field components for receivers on GPU - rows are field components;
# columns are iterations; pages are receivers
rxs = np.zeros((Rx.maxnumoutputs, G.iterations, len(G.rxs)),
dtype=config.dtypes['float_or_double'])
rxs = np.zeros((len(Rx.allowableoutputs_gpu), G.iterations, len(G.rxs)),
dtype=config.sim_config.dtypes['float_or_double'])
# Copy arrays to GPU
rxcoords_gpu = gpuarray.to_gpu(rxcoords)
@@ -72,20 +79,20 @@ def gpu_initialise_rx_arrays(G):
return rxcoords_gpu, rxs_gpu
def gpu_get_rx_array(rxs_gpu, rxcoords_gpu, G):
def get_rx_array_gpu(rxs_gpu, rxcoords_gpu, G):
"""Copy output from receivers array used on GPU back to receiver objects.
Args:
rxs_gpu (float): numpy array of receiver data from GPU - rows are field
components; columns are iterations; pages are receivers.
rxcoords_gpu (float): numpy array of receiver coordinates from GPU.
rxcoords_gpu (int): numpy array of receiver coordinates from GPU.
G (FDTDGrid): Holds essential parameters describing the model.
"""
for rx in G.rxs:
for rxgpu in range(len(G.rxs)):
if rx.xcoord == rxcoords_gpu[rxgpu, 0] and \
rx.ycoord == rxcoords_gpu[rxgpu, 1] and \
rx.zcoord == rxcoords_gpu[rxgpu, 2]:
for k in rx.outputs.items():
rx.outputs[k] = rxs_gpu[Rx.gpu_allowableoutputs.index(k), :, rxgpu]
if (rx.xcoord == rxcoords_gpu[rxgpu, 0] and
rx.ycoord == rxcoords_gpu[rxgpu, 1] and
rx.zcoord == rxcoords_gpu[rxgpu, 2]):
for output in rx.outputs.keys():
rx.outputs[output] = rxs_gpu[Rx.allowableoutputs_gpu.index(output), :, rxgpu]

查看文件

@@ -184,7 +184,7 @@ class Snapshot:
self.filehandle.close()
def gpu_initialise_snapshot_array(G):
def initialise_snapshot_array_gpu(G):
"""Initialise array on GPU for to store field data for snapshots.
Args:
@@ -236,7 +236,7 @@ def gpu_initialise_snapshot_array(G):
return snapEx_gpu, snapEy_gpu, snapEz_gpu, snapHx_gpu, snapHy_gpu, snapHz_gpu
def gpu_get_snapshot_array(snapEx_gpu, snapEy_gpu, snapEz_gpu, snapHx_gpu, snapHy_gpu, snapHz_gpu, i, snap):
def get_snapshot_array_gpu(snapEx_gpu, snapEy_gpu, snapEz_gpu, snapHx_gpu, snapHy_gpu, snapHz_gpu, i, snap):
"""Copy snapshot array used on GPU back to snapshot objects and store in format for Paraview.
Args:

查看文件

@@ -15,7 +15,7 @@
#
# You should have received a copy of the GNU General Public License
# along with gprMax. If not, see <http://www.gnu.org/licenses/>.
import sys
import gprMax.config as config
from .grid import FDTDGrid
from .grid import CUDAGrid

查看文件

@@ -236,7 +236,7 @@ class MagneticDipole(Source):
(1 / (G.dx * G.dy * G.dz)))
def gpu_initialise_src_arrays(sources, G):
def initialise_src_arrays_gpu(sources, G):
"""Initialise arrays on GPU for source coordinates/polarisation, other
source information, and source waveform values.
@@ -255,8 +255,8 @@ def gpu_initialise_src_arrays(sources, G):
import pycuda.gpuarray as gpuarray
srcinfo1 = np.zeros((len(sources), 4), dtype=np.int32)
srcinfo2 = np.zeros((len(sources)), dtype=config.dtypes['float_or_double'])
srcwaves = np.zeros((len(sources), G.iterations), dtype=config.dtypes['float_or_double'])
srcinfo2 = np.zeros((len(sources)), dtype=config.sim_config.dtypes['float_or_double'])
srcwaves = np.zeros((len(sources), G.iterations), dtype=config.sim_config.dtypes['float_or_double'])
for i, src in enumerate(sources):
srcinfo1[i, 0] = src.xcoord
srcinfo1[i, 1] = src.ycoord

查看文件

@@ -18,21 +18,24 @@
from importlib import import_module
import logging
import sys
import numpy as np
import gprMax.config as config
from .cuda.fields_updates import kernel_template_fields
from .cuda.snapshots import kernel_template_store_snapshot
from .cuda.source_updates import kernel_template_sources
from .cython.fields_updates_normal import update_electric
from .cython.fields_updates_normal import update_magnetic
from .fields_outputs import store_outputs
from .receivers import gpu_initialise_rx_arrays
from .receivers import gpu_get_rx_array
from .cython.fields_updates_normal import update_electric as update_electric_cpu
from .cython.fields_updates_normal import update_magnetic as update_magnetic_cpu
from .fields_outputs import store_outputs as store_outputs_cpu
from .fields_outputs import kernel_template_store_outputs
from .receivers import initialise_rx_arrays_gpu
from .receivers import get_rx_array_gpu
from .snapshots import Snapshot
from .snapshots import gpu_initialise_snapshot_array
from .snapshots import gpu_get_snapshot_array
from .sources import gpu_initialise_src_arrays
from .snapshots import initialise_snapshot_array_gpu
from .snapshots import get_snapshot_array_gpu
from .sources import initialise_src_arrays_gpu
from .utilities import round32
from .utilities import timer
log = logging.getLogger(__name__)
@@ -52,7 +55,7 @@ class CPUUpdates:
def store_outputs(self):
"""Store field component values for every receiver and transmission line."""
store_outputs(self.grid)
store_outputs_cpu(self.grid)
def store_snapshots(self, iteration):
"""Store any snapshots.
@@ -66,7 +69,7 @@ class CPUUpdates:
def update_magnetic(self):
"""Update magnetic field components."""
update_magnetic(self.grid.nx,
update_magnetic_cpu(self.grid.nx,
self.grid.ny,
self.grid.nz,
config.sim_config.hostinfo['ompthreads'],
@@ -99,18 +102,18 @@ class CPUUpdates:
"""Update electric field components."""
# All materials are non-dispersive so do standard update.
if config.model_configs[self.grid.model_num].materials['maxpoles'] == 0:
update_electric(self.grid.nx,
self.grid.ny,
self.grid.nz,
config.sim_config.hostinfo['ompthreads'],
self.grid.updatecoeffsE,
self.grid.ID,
self.grid.Ex,
self.grid.Ey,
self.grid.Ez,
self.grid.Hx,
self.grid.Hy,
self.grid.Hz)
update_electric_cpu(self.grid.nx,
self.grid.ny,
self.grid.nz,
config.sim_config.hostinfo['ompthreads'],
self.grid.updatecoeffsE,
self.grid.ID,
self.grid.Ex,
self.grid.Ey,
self.grid.Ez,
self.grid.Hx,
self.grid.Hy,
self.grid.Hz)
# If there are any dispersive materials do 1st part of dispersive update
# (it is split into two parts as it requires present and updated electric field values).
@@ -247,20 +250,14 @@ class CUDAUpdates:
self.dispersive_update_a = None
self.dispersive_update_b = None
import pycuda.driver as drv
from pycuda.compiler import SourceModule
drv.init()
# Suppress nvcc warnings on Windows
log.debug('Move nvcc compiler options to simulation config')
if sys.platform == 'win32':
self.compiler_opts = ['-w']
else:
self.compiler_opts = None
# Import PyCUDA modules
self.drv = import_module('pycuda.driver')
self.source_module = getattr(import_module('pycuda.compiler'), 'SourceModule')
self.drv.init()
# Create device handle and context on specifc GPU device (and make it current context)
self.dev = drv.Device(self.grid.gpu.deviceID)
self.ctx = dev.make_context()
self.dev = self.drv.Device(config.model_configs[self.grid.model_num].cuda['gpu'].deviceID)
self.ctx = self.dev.make_context()
# Initialise arrays on GPU, prepare kernels, and get kernel functions
self.set_field_kernels()
@@ -274,56 +271,56 @@ class CUDAUpdates:
get kernel functions.
"""
if config.model_configs[self.grid.model_num].materials['maxpoles'] > 0:
kernels_fields = SourceModule(kernels_template_fields.substitute(
REAL=cudafloattype,
COMPLEX=cudacomplextype,
N_updatecoeffsE=self.grid.updatecoeffsE.size,
N_updatecoeffsH=self.grid.updatecoeffsH.size,
NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1],
NY_MATDISPCOEFFS=self.grid.updatecoeffsdispersive.shape[1],
NX_FIELDS=self.grid.nx + 1,
NY_FIELDS=self.grid.ny + 1,
NZ_FIELDS=self.grid.nz + 1,
NX_ID=self.grid.ID.shape[1],
NY_ID=self.grid.ID.shape[2],
NZ_ID=self.grid.ID.shape[3],
NX_T=self.grid.Tx.shape[1],
NY_T=self.grid.Tx.shape[2],
NZ_T=self.grid.Tx.shape[3]),
options=self.compiler_opts)
kernels_fields = self.source_module(kernels_template_fields.substitute(
REAL=config.sim_config.dtypes['C_float_or_double'],
COMPLEX=config.sim_config.dtypes['C_complex'],
N_updatecoeffsE=self.grid.updatecoeffsE.size,
N_updatecoeffsH=self.grid.updatecoeffsH.size,
NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1],
NY_MATDISPCOEFFS=self.grid.updatecoeffsdispersive.shape[1],
NX_FIELDS=self.grid.nx + 1,
NY_FIELDS=self.grid.ny + 1,
NZ_FIELDS=self.grid.nz + 1,
NX_ID=self.grid.ID.shape[1],
NY_ID=self.grid.ID.shape[2],
NZ_ID=self.grid.ID.shape[3],
NX_T=self.grid.Tx.shape[1],
NY_T=self.grid.Tx.shape[2],
NZ_T=self.grid.Tx.shape[3]),
options=config.sim_config.cuda['nvcc_opts'])
else: # Set to one any substitutions for dispersive materials
kernels_fields = SourceModule(kernels_template_fields.substitute(
REAL=cudafloattype,
COMPLEX=cudacomplextype,
N_updatecoeffsE=self.grid.updatecoeffsE.size,
N_updatecoeffsH=self.grid.updatecoeffsH.size,
NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1],
NY_MATDISPCOEFFS=1,
NX_FIELDS=self.grid.nx + 1,
NY_FIELDS=self.grid.ny + 1,
NZ_FIELDS=self.grid.nz + 1,
NX_ID=self.grid.ID.shape[1],
NY_ID=self.grid.ID.shape[2],
NZ_ID=self.grid.ID.shape[3],
NX_T=1,
NY_T=1,
NZ_T=1),
options=self.compiler_opts)
self.update_electric = kernels_fields.get_function("update_electric")
self.update_magnetic = kernels_fields.get_function("update_magnetic")
if self.grid.updatecoeffsE.nbytes + self.grid.updatecoeffsH.nbytes > self.grid.gpu.constmem:
raise GeneralError(log.exception(f'Too many materials in the model to fit onto constant memory of size {human_size(self.grid.gpu.constmem)} on {self.grid.gpu.deviceID} - {self.grid.gpu.name} GPU'))
self.copy_mat_coeffs()
kernels_fields = self.source_module(kernel_template_fields.substitute(
REAL=config.sim_config.dtypes['C_float_or_double'],
COMPLEX=config.sim_config.dtypes['C_complex'],
N_updatecoeffsE=self.grid.updatecoeffsE.size,
N_updatecoeffsH=self.grid.updatecoeffsH.size,
NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1],
NY_MATDISPCOEFFS=1,
NX_FIELDS=self.grid.nx + 1,
NY_FIELDS=self.grid.ny + 1,
NZ_FIELDS=self.grid.nz + 1,
NX_ID=self.grid.ID.shape[1],
NY_ID=self.grid.ID.shape[2],
NZ_ID=self.grid.ID.shape[3],
NX_T=1,
NY_T=1,
NZ_T=1),
options=config.sim_config.cuda['nvcc_opts'])
self.update_electric_gpu = kernels_fields.get_function("update_electric")
self.update_magnetic_gpu = kernels_fields.get_function("update_magnetic")
if (self.grid.updatecoeffsE.nbytes + self.grid.updatecoeffsH.nbytes > config.model_configs[self.grid.model_num].cuda['gpu'].constmem):
raise GeneralError(log.exception(f"Too many materials in the model to fit onto constant memory of size {human_size(config.model_configs[self.grid.model_num].cuda['gpu'].constmem)} on {config.model_configs[self.grid.model_num].cuda['gpu'].deviceID} - {config.model_configs[self.grid.model_num].cuda['gpu'].name} GPU"))
self.copy_mat_coeffs(kernels_fields, kernels_fields)
# Electric and magnetic field updates - dispersive materials - get kernel functions and initialise array on GPU
if config.model_configs[self.grid.model_num].materials['maxpoles'] > 0: # If there are any dispersive materials (updates are split into two parts as they require present and updated electric field values).
self.dispersive_update_a = kernels_fields.get_function("update_electric_dispersive_A")
self.dispersive_update_b = kernels_fields.get_function("update_electric_dispersive_B")
self.grid.gpu_initialise_dispersive_arrays()
self.grid.initialise_dispersive_arrays()
# Electric and magnetic field updates - set blocks per grid and initialise field arrays on GPU
self.grid.gpu_set_blocks_per_grid()
self.grid.gpu_initialise_arrays()
self.grid.set_blocks_per_grid()
self.grid.initialise_arrays()
def set_pml_kernels(self):
"""PMLS - prepare kernels and get kernel functions."""
@@ -336,81 +333,81 @@ class CUDAUpdates:
kernelmagneticfunc = getattr(import_module(pmlmodulemagnetic),
'kernels_template_pml_magnetic_' +
self.grid.pmlformulation)
kernels_pml_electric = SourceModule(kernelelectricfunc.substitute(
REAL=cudafloattype,
N_updatecoeffsE=self.grid.updatecoeffsE.size,
NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1],
NX_FIELDS=self.grid.nx + 1,
NY_FIELDS=self.grid.ny + 1,
NZ_FIELDS=self.grid.nz + 1,
NX_ID=self.grid.ID.shape[1],
NY_ID=self.grid.ID.shape[2],
NZ_ID=self.grid.ID.shape[3]),
options=self.compiler_opts)
kernels_pml_magnetic = SourceModule(kernelmagneticfunc.substitute(
REAL=cudafloattype,
N_updatecoeffsH=self.grid.updatecoeffsH.size,
NY_MATCOEFFS=self.grid.updatecoeffsH.shape[1],
NX_FIELDS=self.grid.nx + 1,
NY_FIELDS=self.grid.ny + 1,
NZ_FIELDS=self.grid.nz + 1,
NX_ID=self.gridG.ID.shape[1],
NY_ID=self.grid.ID.shape[2],
NZ_ID=self.grid.ID.shape[3]),
options=self.compiler_opts)
self.copy_mat_coeffs()
kernels_pml_electric = self.source_module(kernelelectricfunc.substitute(
REAL=config.sim_config.dtypes['C_float_or_double'],
N_updatecoeffsE=self.grid.updatecoeffsE.size,
NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1],
NX_FIELDS=self.grid.nx + 1,
NY_FIELDS=self.grid.ny + 1,
NZ_FIELDS=self.grid.nz + 1,
NX_ID=self.grid.ID.shape[1],
NY_ID=self.grid.ID.shape[2],
NZ_ID=self.grid.ID.shape[3]),
options=config.sim_config.cuda['nvcc_opts'])
kernels_pml_magnetic = self.source_module(kernelmagneticfunc.substitute(
REAL=config.sim_config.dtypes['C_float_or_double'],
N_updatecoeffsH=self.grid.updatecoeffsH.size,
NY_MATCOEFFS=self.grid.updatecoeffsH.shape[1],
NX_FIELDS=self.grid.nx + 1,
NY_FIELDS=self.grid.ny + 1,
NZ_FIELDS=self.grid.nz + 1,
NX_ID=self.grid.ID.shape[1],
NY_ID=self.grid.ID.shape[2],
NZ_ID=self.grid.ID.shape[3]),
options=config.sim_config.cuda['nvcc_opts'])
self.copy_mat_coeffs(kernels_pml_electric, kernels_pml_magnetic)
# Set block per grid, initialise arrays on GPU, and get kernel functions
for pml in self.grid.pmls:
pml.gpu_initialise_arrays()
pml.gpu_get_update_funcs(kernels_pml_electric, kernels_pml_magnetic)
pml.gpu_set_blocks_per_grid(self.grid)
pml.initialise_field_arrays_gpu()
pml.get_update_funcs(kernels_pml_electric, kernels_pml_magnetic)
pml.set_blocks_per_grid(self.grid)
def set_rx_kernel(self):
"""Receivers - initialise arrays on GPU, prepare kernel and get kernel
function.
"""
if self.grid.rxs:
rxcoords_gpu, rxs_gpu = gpu_initialise_rx_arrays(self.grid)
kernel_store_outputs = SourceModule(kernel_template_store_outputs.substitute(
REAL=cudafloattype,
NY_RXCOORDS=3,
NX_RXS=6,
NY_RXS=self.grid.iterations,
NZ_RXS=len(self.grid.rxs),
NX_FIELDS=self.grid.nx + 1,
NY_FIELDS=self.grid.ny + 1,
NZ_FIELDS=self.grid.nz + 1),
options=self.compiler_opts)
self.store_outputs = kernel_store_outputs.get_function("store_outputs")
self.rxcoords_gpu, self.rxs_gpu = initialise_rx_arrays_gpu(self.grid)
kernel_store_outputs = self.source_module(kernel_template_store_outputs.substitute(
REAL=config.sim_config.dtypes['C_float_or_double'],
NY_RXCOORDS=3,
NX_RXS=6,
NY_RXS=self.grid.iterations,
NZ_RXS=len(self.grid.rxs),
NX_FIELDS=self.grid.nx + 1,
NY_FIELDS=self.grid.ny + 1,
NZ_FIELDS=self.grid.nz + 1),
options=config.sim_config.cuda['nvcc_opts'])
self.store_outputs_gpu = kernel_store_outputs.get_function("store_outputs")
def set_src_kernels(self):
"""Sources - initialise arrays on GPU, prepare kernel and get kernel
function.
"""
if self.grid.voltagesources + self.grid.hertziandipoles + self.grid.magneticdipoles:
kernels_sources = SourceModule(kernels_template_sources.substitute(
REAL=cudafloattype,
N_updatecoeffsE=self.grid.updatecoeffsE.size,
N_updatecoeffsH=self.grid.updatecoeffsH.size,
NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1],
NY_SRCINFO=4,
NY_SRCWAVES=self.grid.iterations,
NX_FIELDS=self.grid.nx + 1,
NY_FIELDS=self.grid.ny + 1,
NZ_FIELDS=self.grid.nz + 1,
NX_ID=self.grid.ID.shape[1],
NY_ID=self.grid.ID.shape[2],
NZ_ID=self.grid.ID.shape[3]),
options=self.compiler_opts)
self.copy_mat_coeffs()
kernels_sources = self.source_module(kernel_template_sources.substitute(
REAL=config.sim_config.dtypes['C_float_or_double'],
N_updatecoeffsE=self.grid.updatecoeffsE.size,
N_updatecoeffsH=self.grid.updatecoeffsH.size,
NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1],
NY_SRCINFO=4,
NY_SRCWAVES=self.grid.iterations,
NX_FIELDS=self.grid.nx + 1,
NY_FIELDS=self.grid.ny + 1,
NZ_FIELDS=self.grid.nz + 1,
NX_ID=self.grid.ID.shape[1],
NY_ID=self.grid.ID.shape[2],
NZ_ID=self.grid.ID.shape[3]),
options=config.sim_config.cuda['nvcc_opts'])
self.copy_mat_coeffs(kernels_sources, kernels_sources)
if self.grid.hertziandipoles:
self.srcinfo1_hertzian_gpu, self.srcinfo2_hertzian_gpu, self.srcwaves_hertzian_gpu = gpu_initialise_src_arrays(self.grid.hertziandipoles, self.grid)
self.srcinfo1_hertzian_gpu, self.srcinfo2_hertzian_gpu, self.srcwaves_hertzian_gpu = initialise_src_arrays_gpu(self.grid.hertziandipoles, self.grid)
self.update_hertzian_dipole_gpu = kernels_sources.get_function("update_hertzian_dipole")
if self.grid.magneticdipoles:
self.srcinfo1_magnetic_gpu, self.srcinfo2_magnetic_gpu, self.srcwaves_magnetic_gpu = gpu_initialise_src_arrays(self.grid.magneticdipoles, self.grid)
self.srcinfo1_magnetic_gpu, self.srcinfo2_magnetic_gpu, self.srcwaves_magnetic_gpu = initialise_src_arrays_gpu(self.grid.magneticdipoles, self.grid)
self.update_magnetic_dipole_gpu = kernels_sources.get_function("update_magnetic_dipole")
if self.grid.voltagesources:
self.srcinfo1_voltage_gpu, self.srcinfo2_voltage_gpu, self.srcwaves_voltage_gpu = gpu_initialise_src_arrays(self.grid.voltagesources, self.grid)
self.srcinfo1_voltage_gpu, self.srcinfo2_voltage_gpu, self.srcwaves_voltage_gpu = initialise_src_arrays_gpu(self.grid.voltagesources, self.grid)
self.update_voltage_source_gpu = kernels_sources.get_function("update_voltage_source")
def set_snapshot_kernel(self):
@@ -418,42 +415,46 @@ class CUDAUpdates:
function.
"""
if self.grid.snapshots:
self.snapEx_gpu, self.snapEy_gpu, self.snapEz_gpu, self.snapHx_gpu, self.snapHy_gpu, self.snapHz_gpu = gpu_initialise_snapshot_array(self.grid)
kernel_store_snapshot = SourceModule(kernel_template_store_snapshot.substitute(
REAL=cudafloattype,
NX_SNAPS=Snapshot.nx_max,
NY_SNAPS=Snapshot.ny_max,
NZ_SNAPS=Snapshot.nz_max,
NX_FIELDS=self.grid.nx + 1,
NY_FIELDS=self.grid.ny + 1,
NZ_FIELDS=self.grid.nz + 1),
options=self.compiler_opts)
self.snapEx_gpu, self.snapEy_gpu, self.snapEz_gpu, self.snapHx_gpu, self.snapHy_gpu, self.snapHz_gpu = initialise_snapshot_array_gpu(self.grid)
kernel_store_snapshot = self.source_module(kernel_template_store_snapshot.substitute(
REAL=config.sim_config.dtypes['C_float_or_double'],
NX_SNAPS=Snapshot.nx_max,
NY_SNAPS=Snapshot.ny_max,
NZ_SNAPS=Snapshot.nz_max,
NX_FIELDS=self.grid.nx + 1,
NY_FIELDS=self.grid.ny + 1,
NZ_FIELDS=self.grid.nz + 1),
options=config.sim_config.cuda['nvcc_opts'])
self.store_snapshot_gpu = kernel_store_snapshot.get_function("store_snapshot")
def copy_mat_coeffs(self):
def copy_mat_coeffs(self, kernelE, kernelH):
"""Copy material coefficient arrays to constant memory of GPU
(must be <64KB).
Args:
kernelE (kernel): electric field kernel.
kernelH (kernel): magnetic field kernel.
"""
updatecoeffsE = kernels_sources.get_global('updatecoeffsE')[0]
updatecoeffsH = kernels_sources.get_global('updatecoeffsH')[0]
updatecoeffsE = kernelE.get_global('updatecoeffsE')[0]
updatecoeffsH = kernelH.get_global('updatecoeffsH')[0]
self.drv.memcpy_htod(updatecoeffsE, self.grid.updatecoeffsE)
self.drv.memcpy_htod(updatecoeffsH, self.grid.updatecoeffsH)
def store_outputs(self):
"""Store field component values for every receiver and transmission line."""
"""Store field component values for every receiver."""
if self.grid.rxs:
self.store_outputs(np.int32(len(self.grid.rxs)),
np.int32(self.grid.iteration),
self.rxcoords_gpu.gpudata,
self.rxs_gpu.gpudata,
self.grid.Ex_gpu.gpudata,
self.grid.Ey_gpu.gpudata,
self.grid.Ez_gpu.gpudata,
self.grid.Hx_gpu.gpudata,
self.grid.Hy_gpu.gpudata,
self.grid.Hz_gpu.gpudata,
block=(1, 1, 1),
grid=(round32(len(self.grid.rxs)), 1, 1))
self.store_outputs_gpu(np.int32(len(self.grid.rxs)),
np.int32(self.grid.iteration),
self.rxcoords_gpu.gpudata,
self.rxs_gpu.gpudata,
self.grid.Ex_gpu.gpudata,
self.grid.Ey_gpu.gpudata,
self.grid.Ez_gpu.gpudata,
self.grid.Hx_gpu.gpudata,
self.grid.Hy_gpu.gpudata,
self.grid.Hz_gpu.gpudata,
block=(1, 1, 1),
grid=(round32(len(self.grid.rxs)), 1, 1))
def store_snapshots(self, iteration):
"""Store any snapshots.
@@ -501,32 +502,32 @@ class CUDAUpdates:
def update_magnetic(self):
"""Update magnetic field components."""
self.update_magnetic(np.int32(self.grid.nx),
np.int32(self.grid.ny),
np.int32(self.grid.nz),
self.grid.ID_gpu,
self.grid.Hx_gpu,
self.grid.Hy_gpu,
self.grid.Hz_gpu,
self.grid.Ex_gpu,
self.grid.Ey_gpu,
self.grid.Ez_gpu,
block=self.grid.tpb,
grid=self.grid.bpg)
self.update_magnetic_gpu(np.int32(self.grid.nx),
np.int32(self.grid.ny),
np.int32(self.grid.nz),
self.grid.ID_gpu,
self.grid.Hx_gpu,
self.grid.Hy_gpu,
self.grid.Hz_gpu,
self.grid.Ex_gpu,
self.grid.Ey_gpu,
self.grid.Ez_gpu,
block=self.grid.tpb,
grid=self.grid.bpg)
def update_magnetic_pml(self):
"""Update magnetic field components with the PML correction."""
for pml in self.grid.pmls:
pml.gpu_update_magnetic(self.grid)
pml.update_magnetic(self.grid)
def update_magnetic_sources(self):
"""Update magnetic field components from sources."""
if self.grid.magneticdipoles:
self.update_magnetic_dipole_gpu(np.int32(len(self.grid.magneticdipoles)),
np.int32(self.grid.iteration),
config.dtypes['float_or_double'](self.grid.dx),
config.dtypes['float_or_double'](self.grid.dy),
config.dtypes['float_or_double'](self.grid.dz),
config.sim_config.dtypes['float_or_double'](self.grid.dx),
config.sim_config.dtypes['float_or_double'](self.grid.dy),
config.sim_config.dtypes['float_or_double'](self.grid.dz),
self.srcinfo1_magnetic_gpu.gpudata,
self.srcinfo2_magnetic_gpu.gpudata,
self.srcwaves_magnetic_gpu.gpudata,
@@ -541,18 +542,18 @@ class CUDAUpdates:
"""Update electric field components."""
# All materials are non-dispersive so do standard update.
if config.model_configs[self.grid.model_num].materials['maxpoles'] == 0:
self.update_electric(np.int32(self.grid.nx),
np.int32(self.grid.ny),
np.int32(self.grid.nz),
self.grid.ID_gpu,
self.grid.Ex_gpu,
self.grid.Ey_gpu,
self.grid.Ez_gpu,
self.grid.Hx_gpu,
self.grid.Hy_gpu,
self.grid.Hz_gpu,
block=self.grid.tpb,
grid=self.grid.bpg)
self.update_electric_gpu(np.int32(self.grid.nx),
np.int32(self.grid.ny),
np.int32(self.grid.nz),
self.grid.ID_gpu,
self.grid.Ex_gpu,
self.grid.Ey_gpu,
self.grid.Ez_gpu,
self.grid.Hx_gpu,
self.grid.Hy_gpu,
self.grid.Hz_gpu,
block=self.grid.tpb,
grid=self.grid.bpg)
# If there are any dispersive materials do 1st part of dispersive update
# (it is split into two parts as it requires present and updated electric field values).
@@ -578,7 +579,7 @@ class CUDAUpdates:
def update_electric_pml(self):
"""Update electric field components with the PML correction."""
for pml in self.grid.pmls:
pml.gpu_update_electric(self.grid)
pml.update_electric(self.grid)
def update_electric_sources(self):
"""Update electric field components from sources -
@@ -587,9 +588,9 @@ class CUDAUpdates:
if self.grid.voltagesources:
self.update_voltage_source_gpu(np.int32(len(self.grid.voltagesources)),
np.int32(self.grid.iteration),
config.dtypes['float_or_double'](self.grid.dx),
config.dtypes['float_or_double'](self.grid.dy),
config.dtypes['float_or_double'](self.grid.dz),
config.sim_config.dtypes['float_or_double'](self.grid.dx),
config.sim_config.dtypes['float_or_double'](self.grid.dy),
config.sim_config.dtypes['float_or_double'](self.grid.dz),
self.srcinfo1_voltage_gpu.gpudata,
self.srcinfo2_voltage_gpu.gpudata,
self.srcwaves_voltage_gpu.gpudata,
@@ -603,9 +604,9 @@ class CUDAUpdates:
if self.grid.hertziandipoles:
self.update_hertzian_dipole_gpu(np.int32(len(self.grid.hertziandipoles)),
np.int32(self.grid.iteration),
config.dtypes['float_or_double'](self.grid.dx),
config.dtypes['float_or_double'](self.grid.dy),
config.dtypes['float_or_double'](self.grid.dz),
config.sim_config.dtypes['float_or_double'](self.grid.dx),
config.sim_config.dtypes['float_or_double'](self.grid.dy),
config.sim_config.dtypes['float_or_double'](self.grid.dz),
self.srcinfo1_hertzian_gpu.gpudata,
self.srcinfo2_hertzian_gpu.gpudata,
self.srcwaves_hertzian_gpu.gpudata,
@@ -660,21 +661,20 @@ class CUDAUpdates:
"""Copy data from GPU back to CPU to save to file(s)."""
# Copy output from receivers array back to correct receiver objects
if self.grid.rxs:
gpu_get_rx_array(self.rxs_gpu.get(),
get_rx_array_gpu(self.rxs_gpu.get(),
self.rxcoords_gpu.get(),
self.grid)
# Copy data from any snapshots back to correct snapshot objects
if self.grid.snapshots and not self.grid.snapsgpu2cpu:
for i, snap in enumerate(self.grid.snapshots):
gpu_get_snapshot_array(self.snapEx_gpu.get(),
get_snapshot_array_gpu(self.snapEx_gpu.get(),
self.snapEy_gpu.get(),
self.snapEz_gpu.get(),
self.snapHx_gpu.get(),
self.snapHy_gpu.get(),
self.snapHz_gpu.get(),
i,
snap)
i, snap)
def cleanup(self):
"""Cleanup GPU context."""

查看文件

@@ -421,10 +421,6 @@ class GPU:
self.pcibusID = None
self.constmem = None
self.totalmem = None
# Threads per block for main field updates
self.tpb = (256, 1, 1)
# Blocks per grid for main field updates (set in grid.py)
self.bpg = None
def get_gpu_info(self, drv):
"""Set information about GPU.