Basic functional GPU solver with cylinder_Ascan_2D example.

这个提交包含在:
craig-warren
2019-10-21 17:17:36 +01:00
父节点 c31eff41d2
当前提交 40ff78a8a9
共有 12 个文件被更改,包括 343 次插入269 次删除

查看文件

@@ -510,14 +510,12 @@ class Rx(UserObjectMulti):
r.ID = self.kwargs['id'] r.ID = self.kwargs['id']
outputs = self.kwargs['outputs'] outputs = self.kwargs['outputs']
# Get allowable outputs # Get allowable outputs
if grid.gpu is not None: allowableoutputs = RxUser.allowableoutputs_gpu if config.sim_config.general['cuda'] else RxUser.allowableoutputs
allowableoutputs = RxUser.gpu_allowableoutputs
else:
allowableoutputs = RxUser.allowableoutputs
# Check and add field output names # Check and add field output names
outputs.sort()
for field in outputs: for field in outputs:
if field in allowableoutputs: if field in allowableoutputs:
r.outputs[field] = np.zeros(grid.iterations, dtype=config.dtypes['float_or_double']) r.outputs[field] = np.zeros(grid.iterations, dtype=config.sim_config.dtypes['float_or_double'])
else: else:
raise CmdInputError(f"'{self.params_str()}' contains an output type that is not allowable. Allowable outputs in current context are {allowableoutputs}") raise CmdInputError(f"'{self.params_str()}' contains an output type that is not allowable. Allowable outputs in current context are {allowableoutputs}")
# If no ID or outputs are specified, use default # If no ID or outputs are specified, use default

查看文件

@@ -18,6 +18,7 @@
import logging import logging
from pathlib import Path from pathlib import Path
import sys
from colorama import init from colorama import init
from colorama import Fore from colorama import Fore
@@ -64,7 +65,8 @@ class ModelConfig:
# N.B. This will happen if the requested snapshots are too large to fit # N.B. This will happen if the requested snapshots are too large to fit
# on the memory of the GPU. If True this will slow performance significantly # on the memory of the GPU. If True this will slow performance significantly
if sim_config.general['cuda']: if sim_config.general['cuda']:
self.cuda = {'gpu': sim_config.cuda['gpus'], 'snapsgpu2cpu': False} self.cuda = {'gpu': sim_config.cuda['gpus'],
'snapsgpu2cpu': False}
# Total memory usage for all grids in the model. Starts with 50MB overhead. # Total memory usage for all grids in the model. Starts with 50MB overhead.
self.mem_use = 50e6 self.mem_use = 50e6
@@ -175,10 +177,12 @@ class SimulationConfig:
self.general['cuda'] = True self.general['cuda'] = True
self.general['cpu'] = False self.general['cpu'] = False
self.general['opencl'] = False self.general['opencl'] = False
# gpus: list of GPU objects self.general['precision'] = 'single'
# gpus_str: list of strings describing GPU(s) self.cuda = {'gpus': [], # gpus: list of GPU objects
self.cuda = {'gpus': [], 'gpus_str': [], # gpus_str: list of strings describing GPU(s)
'gpus_str': []} 'nvcc_opts': None} # nvcc_opts: nvcc compiler options
# Suppress nvcc warnings on Microsoft Windows
if sys.platform == 'win32': self.cuda['nvcc_opts'] = '-w'
self.get_gpus() self.get_gpus()
self.set_gpus() self.set_gpus()

查看文件

@@ -128,7 +128,7 @@ __global__ void update_magnetic(int NX, int NY, int NZ, const unsigned int* __re
// Electric field updates - dispersive materials // // Electric field updates - dispersive materials //
/////////////////////////////////////////////////// ///////////////////////////////////////////////////
__global__ void update_electric_dispersive_A(int NX, int NY, int NZ, int MAXPOLES, const $REAL_OR_COMPLEX* __restrict__ updatecoeffsdispersive, $REAL_OR_COMPLEX *Tx, $REAL_OR_COMPLEX *Ty, $REAL_OR_COMPLEX *Tz, const unsigned int* __restrict__ ID, $REAL *Ex, $REAL *Ey, $REAL *Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz) { __global__ void update_electric_dispersive_A(int NX, int NY, int NZ, int MAXPOLES, const $COMPLEX* __restrict__ updatecoeffsdispersive, $COMPLEX *Tx, $COMPLEX *Ty, $COMPLEX *Tz, const unsigned int* __restrict__ ID, $REAL *Ex, $REAL *Ey, $REAL *Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz) {
// This function is part A of updates to electric field values when dispersive materials (with multiple poles) are present. // This function is part A of updates to electric field values when dispersive materials (with multiple poles) are present.
// //
@@ -189,7 +189,7 @@ __global__ void update_electric_dispersive_A(int NX, int NY, int NZ, int MAXPOLE
} }
} }
__global__ void update_electric_dispersive_B(int NX, int NY, int NZ, int MAXPOLES, const $REAL_OR_COMPLEX* __restrict__ updatecoeffsdispersive, $REAL_OR_COMPLEX *Tx, $REAL_OR_COMPLEX *Ty, $REAL_OR_COMPLEX *Tz, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez) { __global__ void update_electric_dispersive_B(int NX, int NY, int NZ, int MAXPOLES, const $COMPLEX* __restrict__ updatecoeffsdispersive, $COMPLEX *Tx, $COMPLEX *Ty, $COMPLEX *Tz, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez) {
// This function is part B which updates the dispersive field arrays when dispersive materials (with multiple poles) are present. // This function is part B which updates the dispersive field arrays when dispersive materials (with multiple poles) are present.
// //

查看文件

@@ -279,15 +279,22 @@ class FDTDGrid:
class CUDAGrid(FDTDGrid): class CUDAGrid(FDTDGrid):
"""Additional grid methods for solving on GPU using CUDA.""" """Additional grid methods for solving on GPU using CUDA."""
def __init__(self, model_num):
super().__init__(model_num)
# Threads per block - used for main electric/magnetic field updates
self.tpb = (256, 1, 1)
# Blocks per grid - used for main electric/magnetic field updates
self.bpg = None
def set_blocks_per_grid(self): def set_blocks_per_grid(self):
"""Set the blocks per grid size used for updating the electric and """Set the blocks per grid size used for updating the electric and
magnetic field arrays on a GPU. magnetic field arrays on a GPU.
""" """
config.cuda['gpus'].bpg = (int(np.ceil(((self.nx + 1) * self.bpg = (int(np.ceil(((self.nx + 1) * (self.ny + 1) *
(self.ny + 1) * (self.nz + 1)) / self.tpb[0])), 1, 1)
(self.nz + 1)) /
config.cuda['gpus'].tpb[0])), 1, 1)
def initialise_arrays(self): def initialise_arrays(self):
"""Initialise geometry and field arrays on GPU.""" """Initialise geometry and field arrays on GPU."""

查看文件

@@ -58,15 +58,15 @@ from .pml import CFS
from .pml import PML from .pml import PML
from .pml import build_pml from .pml import build_pml
from .pml import pml_information from .pml import pml_information
from .receivers import gpu_initialise_rx_arrays from .receivers import initialise_rx_arrays_gpu
from .receivers import gpu_get_rx_array from .receivers import get_rx_array_gpu
from .receivers import Rx from .receivers import Rx
from .scene import Scene from .scene import Scene
from .snapshots import Snapshot from .snapshots import Snapshot
from .snapshots import gpu_initialise_snapshot_array from .snapshots import initialise_snapshot_array_gpu
from .snapshots import gpu_get_snapshot_array from .snapshots import get_snapshot_array_gpu
from .solvers import create_solver from .solvers import create_solver
from .sources import gpu_initialise_src_arrays from .sources import initialise_src_arrays_gpu
from .utilities import get_terminal_width from .utilities import get_terminal_width
from .utilities import human_size from .utilities import human_size
from .utilities import mem_check from .utilities import mem_check
@@ -286,7 +286,8 @@ class ModelBuildRun:
mem_GPU = '' mem_GPU = ''
if config.sim_config.general['cuda']: if config.sim_config.general['cuda']:
mem_GPU = f' host + ~{human_size(self.solver.get_memsolve())} GPU' log.debug('Fix memory used calc for GPU')
# mem_GPU = f' host + ~{human_size(self.solver.get_memsolve())} GPU'
log.info(f'\nMemory (RAM) used: ~{human_size(self.p.memory_full_info().uss)}{mem_GPU}') log.info(f'\nMemory (RAM) used: ~{human_size(self.p.memory_full_info().uss)}{mem_GPU}')
log.info(f'Solving time [HH:MM:SS]: {datetime.timedelta(seconds=tsolve)}') log.info(f'Solving time [HH:MM:SS]: {datetime.timedelta(seconds=tsolve)}')

查看文件

@@ -207,8 +207,7 @@ class PML:
self.CFS = G.cfs self.CFS = G.cfs
if not config.sim_config.general['cuda']: self.initialise_field_arrays()
self.initialise_field_arrays()
def initialise_field_arrays(self): def initialise_field_arrays(self):
"""Initialise arrays to store fields in PML.""" """Initialise arrays to store fields in PML."""
@@ -340,7 +339,7 @@ class CUDAPML(PML):
solving on GPU using CUDA. solving on GPU using CUDA.
""" """
def initialise_arrays(self): def initialise_field_arrays_gpu(self):
"""Initialise PML field and coefficient arrays on GPU.""" """Initialise PML field and coefficient arrays on GPU."""
import pycuda.gpuarray as gpuarray import pycuda.gpuarray as gpuarray
@@ -355,20 +354,44 @@ class CUDAPML(PML):
self.HRF_gpu = gpuarray.to_gpu(self.HRF) self.HRF_gpu = gpuarray.to_gpu(self.HRF)
if self.direction[0] == 'x': if self.direction[0] == 'x':
self.EPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx + 1, self.ny, self.nz + 1), dtype=floattype)) self.EPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS),
self.EPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx + 1, self.ny + 1, self.nz), dtype=floattype)) self.nx + 1, self.ny, self.nz + 1),
self.HPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx, self.ny + 1, self.nz), dtype=floattype)) dtype=config.sim_config.dtypes['float_or_double']))
self.HPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx, self.ny, self.nz + 1), dtype=floattype)) self.EPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS),
self.nx + 1, self.ny + 1, self.nz),
dtype=config.sim_config.dtypes['float_or_double']))
self.HPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS),
self.nx, self.ny + 1, self.nz),
dtype=config.sim_config.dtypes['float_or_double']))
self.HPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS),
self.nx, self.ny, self.nz + 1),
dtype=config.sim_config.dtypes['float_or_double']))
elif self.direction[0] == 'y': elif self.direction[0] == 'y':
self.EPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx, self.ny + 1, self.nz + 1), dtype=floattype)) self.EPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS),
self.EPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx + 1, self.ny + 1, self.nz), dtype=floattype)) self.nx, self.ny + 1, self.nz + 1),
self.HPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx + 1, self.ny, self.nz), dtype=floattype)) dtype=config.sim_config.dtypes['float_or_double']))
self.HPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx, self.ny, self.nz + 1), dtype=floattype)) self.EPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS),
self.nx + 1, self.ny + 1, self.nz),
dtype=config.sim_config.dtypes['float_or_double']))
self.HPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS),
self.nx + 1, self.ny, self.nz),
dtype=config.sim_config.dtypes['float_or_double']))
self.HPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS),
self.nx, self.ny, self.nz + 1),
dtype=config.sim_config.dtypes['float_or_double']))
elif self.direction[0] == 'z': elif self.direction[0] == 'z':
self.EPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx, self.ny + 1, self.nz + 1), dtype=floattype)) self.EPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS),
self.EPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx + 1, self.ny, self.nz + 1), dtype=floattype)) self.nx, self.ny + 1, self.nz + 1),
self.HPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx + 1, self.ny, self.nz), dtype=floattype)) dtype=config.sim_config.dtypes['float_or_double']))
self.HPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS), self.nx, self.ny + 1, self.nz), dtype=floattype)) self.EPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS),
self.nx + 1, self.ny, self.nz + 1),
dtype=config.sim_config.dtypes['float_or_double']))
self.HPhi1_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS),
self.nx + 1, self.ny, self.nz),
dtype=config.sim_config.dtypes['float_or_double']))
self.HPhi2_gpu = gpuarray.to_gpu(np.zeros((len(self.CFS),
self.nx, self.ny + 1, self.nz),
dtype=config.sim_config.dtypes['float_or_double']))
def set_blocks_per_grid(self, G): def set_blocks_per_grid(self, G):
"""Set the blocks per grid size used for updating the PML field arrays on a GPU. """Set the blocks per grid size used for updating the PML field arrays on a GPU.
@@ -377,7 +400,9 @@ class CUDAPML(PML):
G (FDTDGrid): Holds essential parameters describing the model. G (FDTDGrid): Holds essential parameters describing the model.
""" """
self.bpg = (int(np.ceil(((self.EPhi1_gpu.shape[1] + 1) * (self.EPhi1_gpu.shape[2] + 1) * (self.EPhi1_gpu.shape[3] + 1)) / G.tpb[0])), 1, 1) self.bpg = (int(np.ceil(((self.EPhi1_gpu.shape[1] + 1) *
(self.EPhi1_gpu.shape[2] + 1) *
(self.EPhi1_gpu.shape[3] + 1)) / G.tpb[0])), 1, 1)
def get_update_funcs(self, kernelselectric, kernelsmagnetic): def get_update_funcs(self, kernelselectric, kernelsmagnetic):
"""Get update functions from PML kernels. """Get update functions from PML kernels.
@@ -400,7 +425,24 @@ class CUDAPML(PML):
G (FDTDGrid): Holds essential parameters describing the model. G (FDTDGrid): Holds essential parameters describing the model.
""" """
self.update_electric_gpu(np.int32(self.xs), np.int32(self.xf), np.int32(self.ys), np.int32(self.yf), np.int32(self.zs), np.int32(self.zf), np.int32(self.EPhi1_gpu.shape[1]), np.int32(self.EPhi1_gpu.shape[2]), np.int32(self.EPhi1_gpu.shape[3]), np.int32(self.EPhi2_gpu.shape[1]), np.int32(self.EPhi2_gpu.shape[2]), np.int32(self.EPhi2_gpu.shape[3]), np.int32(self.thickness), G.ID_gpu.gpudata, G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata, G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata, self.EPhi1_gpu.gpudata, self.EPhi2_gpu.gpudata, self.ERA_gpu.gpudata, self.ERB_gpu.gpudata, self.ERE_gpu.gpudata, self.ERF_gpu.gpudata, floattype(self.d), block=G.tpb, grid=self.bpg) self.update_electric_gpu(np.int32(self.xs), np.int32(self.xf),
np.int32(self.ys), np.int32(self.yf),
np.int32(self.zs), np.int32(self.zf),
np.int32(self.EPhi1_gpu.shape[1]),
np.int32(self.EPhi1_gpu.shape[2]),
np.int32(self.EPhi1_gpu.shape[3]),
np.int32(self.EPhi2_gpu.shape[1]),
np.int32(self.EPhi2_gpu.shape[2]),
np.int32(self.EPhi2_gpu.shape[3]),
np.int32(self.thickness),
G.ID_gpu.gpudata,
G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata,
G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata,
self.EPhi1_gpu.gpudata, self.EPhi2_gpu.gpudata,
self.ERA_gpu.gpudata, self.ERB_gpu.gpudata,
self.ERE_gpu.gpudata, self.ERF_gpu.gpudata,
config.sim_config.dtypes['float_or_double'](self.d),
block=G.tpb, grid=self.bpg)
def update_magnetic(self, G): def update_magnetic(self, G):
"""This functions updates magnetic field components with the PML """This functions updates magnetic field components with the PML
@@ -409,7 +451,24 @@ class CUDAPML(PML):
Args: Args:
G (FDTDGrid): Holds essential parameters describing the model. G (FDTDGrid): Holds essential parameters describing the model.
""" """
self.update_magnetic_gpu(np.int32(self.xs), np.int32(self.xf), np.int32(self.ys), np.int32(self.yf), np.int32(self.zs), np.int32(self.zf), np.int32(self.HPhi1_gpu.shape[1]), np.int32(self.HPhi1_gpu.shape[2]), np.int32(self.HPhi1_gpu.shape[3]), np.int32(self.HPhi2_gpu.shape[1]), np.int32(self.HPhi2_gpu.shape[2]), np.int32(self.HPhi2_gpu.shape[3]), np.int32(self.thickness), G.ID_gpu.gpudata, G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata, G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata, self.HPhi1_gpu.gpudata, self.HPhi2_gpu.gpudata, self.HRA_gpu.gpudata, self.HRB_gpu.gpudata, self.HRE_gpu.gpudata, self.HRF_gpu.gpudata, floattype(self.d), block=G.tpb, grid=self.bpg) self.update_magnetic_gpu(np.int32(self.xs), np.int32(self.xf),
np.int32(self.ys), np.int32(self.yf),
np.int32(self.zs), np.int32(self.zf),
np.int32(self.HPhi1_gpu.shape[1]),
np.int32(self.HPhi1_gpu.shape[2]),
np.int32(self.HPhi1_gpu.shape[3]),
np.int32(self.HPhi2_gpu.shape[1]),
np.int32(self.HPhi2_gpu.shape[2]),
np.int32(self.HPhi2_gpu.shape[3]),
np.int32(self.thickness),
G.ID_gpu.gpudata,
G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata,
G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata,
self.HPhi1_gpu.gpudata, self.HPhi2_gpu.gpudata,
self.HRA_gpu.gpudata, self.HRB_gpu.gpudata,
self.HRE_gpu.gpudata, self.HRF_gpu.gpudata,
config.sim_config.dtypes['float_or_double'](self.d),
block=G.tpb, grid=self.bpg)
def pml_information(G): def pml_information(G):
"""Information about PMLs. """Information about PMLs.
@@ -443,14 +502,16 @@ def build_pml(G, key, value):
value (int): Thickness of PML slab in cells. value (int): Thickness of PML slab in cells.
""" """
pml_type = CUDAPML if config.sim_config.general['cuda'] else PML
sumer = 0 # Sum of relative permittivities in PML slab sumer = 0 # Sum of relative permittivities in PML slab
summr = 0 # Sum of relative permeabilities in PML slab summr = 0 # Sum of relative permeabilities in PML slab
if key[0] == 'x': if key[0] == 'x':
if key == 'x0': if key == 'x0':
pml = PML(G, ID=key, direction='xminus', xf=value, yf=G.ny, zf=G.nz) pml = pml_type(G, ID=key, direction='xminus', xf=value, yf=G.ny, zf=G.nz)
elif key == 'xmax': elif key == 'xmax':
pml = PML(G, ID=key, direction='xplus', xs=G.nx - value, xf=G.nx, yf=G.ny, zf=G.nz) pml = pml_type(G, ID=key, direction='xplus', xs=G.nx - value, xf=G.nx, yf=G.ny, zf=G.nz)
G.pmls.append(pml) G.pmls.append(pml)
for j in range(G.ny): for j in range(G.ny):
for k in range(G.nz): for k in range(G.nz):
@@ -463,9 +524,9 @@ def build_pml(G, key, value):
elif key[0] == 'y': elif key[0] == 'y':
if key == 'y0': if key == 'y0':
pml = PML(G, ID=key, direction='yminus', yf=value, xf=G.nx, zf=G.nz) pml = pml_type(G, ID=key, direction='yminus', yf=value, xf=G.nx, zf=G.nz)
elif key == 'ymax': elif key == 'ymax':
pml = PML(G, ID=key, direction='yplus', ys=G.ny - value, xf=G.nx, yf=G.ny, zf=G.nz) pml = pml_type(G, ID=key, direction='yplus', ys=G.ny - value, xf=G.nx, yf=G.ny, zf=G.nz)
G.pmls.append(pml) G.pmls.append(pml)
for i in range(G.nx): for i in range(G.nx):
for k in range(G.nz): for k in range(G.nz):
@@ -478,9 +539,9 @@ def build_pml(G, key, value):
elif key[0] == 'z': elif key[0] == 'z':
if key == 'z0': if key == 'z0':
pml = PML(G, ID=key, direction='zminus', zf=value, xf=G.nx, yf=G.ny) pml = pml_type(G, ID=key, direction='zminus', zf=value, xf=G.nx, yf=G.ny)
elif key == 'zmax': elif key == 'zmax':
pml = PML(G, ID=key, direction='zplus', zs=G.nz - value, xf=G.nx, yf=G.ny, zf=G.nz) pml = pml_type(G, ID=key, direction='zplus', zs=G.nz - value, xf=G.nx, yf=G.ny, zf=G.nz)
G.pmls.append(pml) G.pmls.append(pml)
for i in range(G.nx): for i in range(G.nx):
for j in range(G.ny): for j in range(G.ny):

查看文件

@@ -16,8 +16,6 @@
# You should have received a copy of the GNU General Public License # You should have received a copy of the GNU General Public License
# along with gprMax. If not, see <http://www.gnu.org/licenses/>. # along with gprMax. If not, see <http://www.gnu.org/licenses/>.
from collections import OrderedDict
import numpy as np import numpy as np
import gprMax.config as config import gprMax.config as config
@@ -27,14 +25,15 @@ class Rx:
"""Receiver output points.""" """Receiver output points."""
allowableoutputs = ['Ex', 'Ey', 'Ez', 'Hx', 'Hy', 'Hz', 'Ix', 'Iy', 'Iz'] allowableoutputs = ['Ex', 'Ey', 'Ez', 'Hx', 'Hy', 'Hz', 'Ix', 'Iy', 'Iz']
gpu_allowableoutputs = allowableoutputs[:-3]
defaultoutputs = allowableoutputs[:-3] defaultoutputs = allowableoutputs[:-3]
maxnumoutputs = 0
allowableoutputs_gpu = allowableoutputs[:-3]
maxnumoutputs_gpu = 0
def __init__(self): def __init__(self):
self.ID = None self.ID = None
self.outputs = OrderedDict() self.outputs = {}
self.xcoord = None self.xcoord = None
self.ycoord = None self.ycoord = None
self.zcoord = None self.zcoord = None
@@ -43,12 +42,17 @@ class Rx:
self.zcoordorigin = None self.zcoordorigin = None
def gpu_initialise_rx_arrays(G): def initialise_rx_arrays_gpu(G):
"""Initialise arrays on GPU for receiver coordinates and to store field """Initialise arrays on GPU for receiver coordinates and to store field
components for receivers. components for receivers.
Args: Args:
G (FDTDGrid): Holds essential parameters describing the model. G (FDTDGrid): Holds essential parameters describing the model.
Returns:
rxcoords_gpu (int): numpy array of receiver coordinates from GPU.
rxs_gpu (float): numpy array of receiver data from GPU - rows are field
components; columns are iterations; pages are receivers.
""" """
import pycuda.gpuarray as gpuarray import pycuda.gpuarray as gpuarray
@@ -59,11 +63,14 @@ def gpu_initialise_rx_arrays(G):
rxcoords[i, 0] = rx.xcoord rxcoords[i, 0] = rx.xcoord
rxcoords[i, 1] = rx.ycoord rxcoords[i, 1] = rx.ycoord
rxcoords[i, 2] = rx.zcoord rxcoords[i, 2] = rx.zcoord
# Store maximum number of output components
if len(rx.outputs) > Rx.maxnumoutputs_gpu:
Rx.maxnumoutputs_gpu = len(rx.outputs)
# Array to store field components for receivers on GPU - rows are field components; # Array to store field components for receivers on GPU - rows are field components;
# columns are iterations; pages are receivers # columns are iterations; pages are receivers
rxs = np.zeros((Rx.maxnumoutputs, G.iterations, len(G.rxs)), rxs = np.zeros((len(Rx.allowableoutputs_gpu), G.iterations, len(G.rxs)),
dtype=config.dtypes['float_or_double']) dtype=config.sim_config.dtypes['float_or_double'])
# Copy arrays to GPU # Copy arrays to GPU
rxcoords_gpu = gpuarray.to_gpu(rxcoords) rxcoords_gpu = gpuarray.to_gpu(rxcoords)
@@ -72,20 +79,20 @@ def gpu_initialise_rx_arrays(G):
return rxcoords_gpu, rxs_gpu return rxcoords_gpu, rxs_gpu
def gpu_get_rx_array(rxs_gpu, rxcoords_gpu, G): def get_rx_array_gpu(rxs_gpu, rxcoords_gpu, G):
"""Copy output from receivers array used on GPU back to receiver objects. """Copy output from receivers array used on GPU back to receiver objects.
Args: Args:
rxs_gpu (float): numpy array of receiver data from GPU - rows are field rxs_gpu (float): numpy array of receiver data from GPU - rows are field
components; columns are iterations; pages are receivers. components; columns are iterations; pages are receivers.
rxcoords_gpu (float): numpy array of receiver coordinates from GPU. rxcoords_gpu (int): numpy array of receiver coordinates from GPU.
G (FDTDGrid): Holds essential parameters describing the model. G (FDTDGrid): Holds essential parameters describing the model.
""" """
for rx in G.rxs: for rx in G.rxs:
for rxgpu in range(len(G.rxs)): for rxgpu in range(len(G.rxs)):
if rx.xcoord == rxcoords_gpu[rxgpu, 0] and \ if (rx.xcoord == rxcoords_gpu[rxgpu, 0] and
rx.ycoord == rxcoords_gpu[rxgpu, 1] and \ rx.ycoord == rxcoords_gpu[rxgpu, 1] and
rx.zcoord == rxcoords_gpu[rxgpu, 2]: rx.zcoord == rxcoords_gpu[rxgpu, 2]):
for k in rx.outputs.items(): for output in rx.outputs.keys():
rx.outputs[k] = rxs_gpu[Rx.gpu_allowableoutputs.index(k), :, rxgpu] rx.outputs[output] = rxs_gpu[Rx.allowableoutputs_gpu.index(output), :, rxgpu]

查看文件

@@ -184,7 +184,7 @@ class Snapshot:
self.filehandle.close() self.filehandle.close()
def gpu_initialise_snapshot_array(G): def initialise_snapshot_array_gpu(G):
"""Initialise array on GPU for to store field data for snapshots. """Initialise array on GPU for to store field data for snapshots.
Args: Args:
@@ -236,7 +236,7 @@ def gpu_initialise_snapshot_array(G):
return snapEx_gpu, snapEy_gpu, snapEz_gpu, snapHx_gpu, snapHy_gpu, snapHz_gpu return snapEx_gpu, snapEy_gpu, snapEz_gpu, snapHx_gpu, snapHy_gpu, snapHz_gpu
def gpu_get_snapshot_array(snapEx_gpu, snapEy_gpu, snapEz_gpu, snapHx_gpu, snapHy_gpu, snapHz_gpu, i, snap): def get_snapshot_array_gpu(snapEx_gpu, snapEy_gpu, snapEz_gpu, snapHx_gpu, snapHy_gpu, snapHz_gpu, i, snap):
"""Copy snapshot array used on GPU back to snapshot objects and store in format for Paraview. """Copy snapshot array used on GPU back to snapshot objects and store in format for Paraview.
Args: Args:

查看文件

@@ -15,7 +15,7 @@
# #
# You should have received a copy of the GNU General Public License # You should have received a copy of the GNU General Public License
# along with gprMax. If not, see <http://www.gnu.org/licenses/>. # along with gprMax. If not, see <http://www.gnu.org/licenses/>.
import sys
import gprMax.config as config import gprMax.config as config
from .grid import FDTDGrid from .grid import FDTDGrid
from .grid import CUDAGrid from .grid import CUDAGrid

查看文件

@@ -236,7 +236,7 @@ class MagneticDipole(Source):
(1 / (G.dx * G.dy * G.dz))) (1 / (G.dx * G.dy * G.dz)))
def gpu_initialise_src_arrays(sources, G): def initialise_src_arrays_gpu(sources, G):
"""Initialise arrays on GPU for source coordinates/polarisation, other """Initialise arrays on GPU for source coordinates/polarisation, other
source information, and source waveform values. source information, and source waveform values.
@@ -255,8 +255,8 @@ def gpu_initialise_src_arrays(sources, G):
import pycuda.gpuarray as gpuarray import pycuda.gpuarray as gpuarray
srcinfo1 = np.zeros((len(sources), 4), dtype=np.int32) srcinfo1 = np.zeros((len(sources), 4), dtype=np.int32)
srcinfo2 = np.zeros((len(sources)), dtype=config.dtypes['float_or_double']) srcinfo2 = np.zeros((len(sources)), dtype=config.sim_config.dtypes['float_or_double'])
srcwaves = np.zeros((len(sources), G.iterations), dtype=config.dtypes['float_or_double']) srcwaves = np.zeros((len(sources), G.iterations), dtype=config.sim_config.dtypes['float_or_double'])
for i, src in enumerate(sources): for i, src in enumerate(sources):
srcinfo1[i, 0] = src.xcoord srcinfo1[i, 0] = src.xcoord
srcinfo1[i, 1] = src.ycoord srcinfo1[i, 1] = src.ycoord

查看文件

@@ -18,21 +18,24 @@
from importlib import import_module from importlib import import_module
import logging import logging
import sys
import numpy as np
import gprMax.config as config import gprMax.config as config
from .cuda.fields_updates import kernel_template_fields from .cuda.fields_updates import kernel_template_fields
from .cuda.snapshots import kernel_template_store_snapshot from .cuda.snapshots import kernel_template_store_snapshot
from .cuda.source_updates import kernel_template_sources from .cuda.source_updates import kernel_template_sources
from .cython.fields_updates_normal import update_electric from .cython.fields_updates_normal import update_electric as update_electric_cpu
from .cython.fields_updates_normal import update_magnetic from .cython.fields_updates_normal import update_magnetic as update_magnetic_cpu
from .fields_outputs import store_outputs from .fields_outputs import store_outputs as store_outputs_cpu
from .receivers import gpu_initialise_rx_arrays from .fields_outputs import kernel_template_store_outputs
from .receivers import gpu_get_rx_array from .receivers import initialise_rx_arrays_gpu
from .receivers import get_rx_array_gpu
from .snapshots import Snapshot from .snapshots import Snapshot
from .snapshots import gpu_initialise_snapshot_array from .snapshots import initialise_snapshot_array_gpu
from .snapshots import gpu_get_snapshot_array from .snapshots import get_snapshot_array_gpu
from .sources import gpu_initialise_src_arrays from .sources import initialise_src_arrays_gpu
from .utilities import round32
from .utilities import timer from .utilities import timer
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
@@ -52,7 +55,7 @@ class CPUUpdates:
def store_outputs(self): def store_outputs(self):
"""Store field component values for every receiver and transmission line.""" """Store field component values for every receiver and transmission line."""
store_outputs(self.grid) store_outputs_cpu(self.grid)
def store_snapshots(self, iteration): def store_snapshots(self, iteration):
"""Store any snapshots. """Store any snapshots.
@@ -66,7 +69,7 @@ class CPUUpdates:
def update_magnetic(self): def update_magnetic(self):
"""Update magnetic field components.""" """Update magnetic field components."""
update_magnetic(self.grid.nx, update_magnetic_cpu(self.grid.nx,
self.grid.ny, self.grid.ny,
self.grid.nz, self.grid.nz,
config.sim_config.hostinfo['ompthreads'], config.sim_config.hostinfo['ompthreads'],
@@ -99,18 +102,18 @@ class CPUUpdates:
"""Update electric field components.""" """Update electric field components."""
# All materials are non-dispersive so do standard update. # All materials are non-dispersive so do standard update.
if config.model_configs[self.grid.model_num].materials['maxpoles'] == 0: if config.model_configs[self.grid.model_num].materials['maxpoles'] == 0:
update_electric(self.grid.nx, update_electric_cpu(self.grid.nx,
self.grid.ny, self.grid.ny,
self.grid.nz, self.grid.nz,
config.sim_config.hostinfo['ompthreads'], config.sim_config.hostinfo['ompthreads'],
self.grid.updatecoeffsE, self.grid.updatecoeffsE,
self.grid.ID, self.grid.ID,
self.grid.Ex, self.grid.Ex,
self.grid.Ey, self.grid.Ey,
self.grid.Ez, self.grid.Ez,
self.grid.Hx, self.grid.Hx,
self.grid.Hy, self.grid.Hy,
self.grid.Hz) self.grid.Hz)
# If there are any dispersive materials do 1st part of dispersive update # If there are any dispersive materials do 1st part of dispersive update
# (it is split into two parts as it requires present and updated electric field values). # (it is split into two parts as it requires present and updated electric field values).
@@ -247,20 +250,14 @@ class CUDAUpdates:
self.dispersive_update_a = None self.dispersive_update_a = None
self.dispersive_update_b = None self.dispersive_update_b = None
import pycuda.driver as drv # Import PyCUDA modules
from pycuda.compiler import SourceModule self.drv = import_module('pycuda.driver')
drv.init() self.source_module = getattr(import_module('pycuda.compiler'), 'SourceModule')
self.drv.init()
# Suppress nvcc warnings on Windows
log.debug('Move nvcc compiler options to simulation config')
if sys.platform == 'win32':
self.compiler_opts = ['-w']
else:
self.compiler_opts = None
# Create device handle and context on specifc GPU device (and make it current context) # Create device handle and context on specifc GPU device (and make it current context)
self.dev = drv.Device(self.grid.gpu.deviceID) self.dev = self.drv.Device(config.model_configs[self.grid.model_num].cuda['gpu'].deviceID)
self.ctx = dev.make_context() self.ctx = self.dev.make_context()
# Initialise arrays on GPU, prepare kernels, and get kernel functions # Initialise arrays on GPU, prepare kernels, and get kernel functions
self.set_field_kernels() self.set_field_kernels()
@@ -274,56 +271,56 @@ class CUDAUpdates:
get kernel functions. get kernel functions.
""" """
if config.model_configs[self.grid.model_num].materials['maxpoles'] > 0: if config.model_configs[self.grid.model_num].materials['maxpoles'] > 0:
kernels_fields = SourceModule(kernels_template_fields.substitute( kernels_fields = self.source_module(kernels_template_fields.substitute(
REAL=cudafloattype, REAL=config.sim_config.dtypes['C_float_or_double'],
COMPLEX=cudacomplextype, COMPLEX=config.sim_config.dtypes['C_complex'],
N_updatecoeffsE=self.grid.updatecoeffsE.size, N_updatecoeffsE=self.grid.updatecoeffsE.size,
N_updatecoeffsH=self.grid.updatecoeffsH.size, N_updatecoeffsH=self.grid.updatecoeffsH.size,
NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1], NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1],
NY_MATDISPCOEFFS=self.grid.updatecoeffsdispersive.shape[1], NY_MATDISPCOEFFS=self.grid.updatecoeffsdispersive.shape[1],
NX_FIELDS=self.grid.nx + 1, NX_FIELDS=self.grid.nx + 1,
NY_FIELDS=self.grid.ny + 1, NY_FIELDS=self.grid.ny + 1,
NZ_FIELDS=self.grid.nz + 1, NZ_FIELDS=self.grid.nz + 1,
NX_ID=self.grid.ID.shape[1], NX_ID=self.grid.ID.shape[1],
NY_ID=self.grid.ID.shape[2], NY_ID=self.grid.ID.shape[2],
NZ_ID=self.grid.ID.shape[3], NZ_ID=self.grid.ID.shape[3],
NX_T=self.grid.Tx.shape[1], NX_T=self.grid.Tx.shape[1],
NY_T=self.grid.Tx.shape[2], NY_T=self.grid.Tx.shape[2],
NZ_T=self.grid.Tx.shape[3]), NZ_T=self.grid.Tx.shape[3]),
options=self.compiler_opts) options=config.sim_config.cuda['nvcc_opts'])
else: # Set to one any substitutions for dispersive materials else: # Set to one any substitutions for dispersive materials
kernels_fields = SourceModule(kernels_template_fields.substitute( kernels_fields = self.source_module(kernel_template_fields.substitute(
REAL=cudafloattype, REAL=config.sim_config.dtypes['C_float_or_double'],
COMPLEX=cudacomplextype, COMPLEX=config.sim_config.dtypes['C_complex'],
N_updatecoeffsE=self.grid.updatecoeffsE.size, N_updatecoeffsE=self.grid.updatecoeffsE.size,
N_updatecoeffsH=self.grid.updatecoeffsH.size, N_updatecoeffsH=self.grid.updatecoeffsH.size,
NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1], NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1],
NY_MATDISPCOEFFS=1, NY_MATDISPCOEFFS=1,
NX_FIELDS=self.grid.nx + 1, NX_FIELDS=self.grid.nx + 1,
NY_FIELDS=self.grid.ny + 1, NY_FIELDS=self.grid.ny + 1,
NZ_FIELDS=self.grid.nz + 1, NZ_FIELDS=self.grid.nz + 1,
NX_ID=self.grid.ID.shape[1], NX_ID=self.grid.ID.shape[1],
NY_ID=self.grid.ID.shape[2], NY_ID=self.grid.ID.shape[2],
NZ_ID=self.grid.ID.shape[3], NZ_ID=self.grid.ID.shape[3],
NX_T=1, NX_T=1,
NY_T=1, NY_T=1,
NZ_T=1), NZ_T=1),
options=self.compiler_opts) options=config.sim_config.cuda['nvcc_opts'])
self.update_electric = kernels_fields.get_function("update_electric") self.update_electric_gpu = kernels_fields.get_function("update_electric")
self.update_magnetic = kernels_fields.get_function("update_magnetic") self.update_magnetic_gpu = kernels_fields.get_function("update_magnetic")
if self.grid.updatecoeffsE.nbytes + self.grid.updatecoeffsH.nbytes > self.grid.gpu.constmem: if (self.grid.updatecoeffsE.nbytes + self.grid.updatecoeffsH.nbytes > config.model_configs[self.grid.model_num].cuda['gpu'].constmem):
raise GeneralError(log.exception(f'Too many materials in the model to fit onto constant memory of size {human_size(self.grid.gpu.constmem)} on {self.grid.gpu.deviceID} - {self.grid.gpu.name} GPU')) raise GeneralError(log.exception(f"Too many materials in the model to fit onto constant memory of size {human_size(config.model_configs[self.grid.model_num].cuda['gpu'].constmem)} on {config.model_configs[self.grid.model_num].cuda['gpu'].deviceID} - {config.model_configs[self.grid.model_num].cuda['gpu'].name} GPU"))
self.copy_mat_coeffs() self.copy_mat_coeffs(kernels_fields, kernels_fields)
# Electric and magnetic field updates - dispersive materials - get kernel functions and initialise array on GPU # Electric and magnetic field updates - dispersive materials - get kernel functions and initialise array on GPU
if config.model_configs[self.grid.model_num].materials['maxpoles'] > 0: # If there are any dispersive materials (updates are split into two parts as they require present and updated electric field values). if config.model_configs[self.grid.model_num].materials['maxpoles'] > 0: # If there are any dispersive materials (updates are split into two parts as they require present and updated electric field values).
self.dispersive_update_a = kernels_fields.get_function("update_electric_dispersive_A") self.dispersive_update_a = kernels_fields.get_function("update_electric_dispersive_A")
self.dispersive_update_b = kernels_fields.get_function("update_electric_dispersive_B") self.dispersive_update_b = kernels_fields.get_function("update_electric_dispersive_B")
self.grid.gpu_initialise_dispersive_arrays() self.grid.initialise_dispersive_arrays()
# Electric and magnetic field updates - set blocks per grid and initialise field arrays on GPU # Electric and magnetic field updates - set blocks per grid and initialise field arrays on GPU
self.grid.gpu_set_blocks_per_grid() self.grid.set_blocks_per_grid()
self.grid.gpu_initialise_arrays() self.grid.initialise_arrays()
def set_pml_kernels(self): def set_pml_kernels(self):
"""PMLS - prepare kernels and get kernel functions.""" """PMLS - prepare kernels and get kernel functions."""
@@ -336,81 +333,81 @@ class CUDAUpdates:
kernelmagneticfunc = getattr(import_module(pmlmodulemagnetic), kernelmagneticfunc = getattr(import_module(pmlmodulemagnetic),
'kernels_template_pml_magnetic_' + 'kernels_template_pml_magnetic_' +
self.grid.pmlformulation) self.grid.pmlformulation)
kernels_pml_electric = SourceModule(kernelelectricfunc.substitute( kernels_pml_electric = self.source_module(kernelelectricfunc.substitute(
REAL=cudafloattype, REAL=config.sim_config.dtypes['C_float_or_double'],
N_updatecoeffsE=self.grid.updatecoeffsE.size, N_updatecoeffsE=self.grid.updatecoeffsE.size,
NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1], NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1],
NX_FIELDS=self.grid.nx + 1, NX_FIELDS=self.grid.nx + 1,
NY_FIELDS=self.grid.ny + 1, NY_FIELDS=self.grid.ny + 1,
NZ_FIELDS=self.grid.nz + 1, NZ_FIELDS=self.grid.nz + 1,
NX_ID=self.grid.ID.shape[1], NX_ID=self.grid.ID.shape[1],
NY_ID=self.grid.ID.shape[2], NY_ID=self.grid.ID.shape[2],
NZ_ID=self.grid.ID.shape[3]), NZ_ID=self.grid.ID.shape[3]),
options=self.compiler_opts) options=config.sim_config.cuda['nvcc_opts'])
kernels_pml_magnetic = SourceModule(kernelmagneticfunc.substitute( kernels_pml_magnetic = self.source_module(kernelmagneticfunc.substitute(
REAL=cudafloattype, REAL=config.sim_config.dtypes['C_float_or_double'],
N_updatecoeffsH=self.grid.updatecoeffsH.size, N_updatecoeffsH=self.grid.updatecoeffsH.size,
NY_MATCOEFFS=self.grid.updatecoeffsH.shape[1], NY_MATCOEFFS=self.grid.updatecoeffsH.shape[1],
NX_FIELDS=self.grid.nx + 1, NX_FIELDS=self.grid.nx + 1,
NY_FIELDS=self.grid.ny + 1, NY_FIELDS=self.grid.ny + 1,
NZ_FIELDS=self.grid.nz + 1, NZ_FIELDS=self.grid.nz + 1,
NX_ID=self.gridG.ID.shape[1], NX_ID=self.grid.ID.shape[1],
NY_ID=self.grid.ID.shape[2], NY_ID=self.grid.ID.shape[2],
NZ_ID=self.grid.ID.shape[3]), NZ_ID=self.grid.ID.shape[3]),
options=self.compiler_opts) options=config.sim_config.cuda['nvcc_opts'])
self.copy_mat_coeffs() self.copy_mat_coeffs(kernels_pml_electric, kernels_pml_magnetic)
# Set block per grid, initialise arrays on GPU, and get kernel functions # Set block per grid, initialise arrays on GPU, and get kernel functions
for pml in self.grid.pmls: for pml in self.grid.pmls:
pml.gpu_initialise_arrays() pml.initialise_field_arrays_gpu()
pml.gpu_get_update_funcs(kernels_pml_electric, kernels_pml_magnetic) pml.get_update_funcs(kernels_pml_electric, kernels_pml_magnetic)
pml.gpu_set_blocks_per_grid(self.grid) pml.set_blocks_per_grid(self.grid)
def set_rx_kernel(self): def set_rx_kernel(self):
"""Receivers - initialise arrays on GPU, prepare kernel and get kernel """Receivers - initialise arrays on GPU, prepare kernel and get kernel
function. function.
""" """
if self.grid.rxs: if self.grid.rxs:
rxcoords_gpu, rxs_gpu = gpu_initialise_rx_arrays(self.grid) self.rxcoords_gpu, self.rxs_gpu = initialise_rx_arrays_gpu(self.grid)
kernel_store_outputs = SourceModule(kernel_template_store_outputs.substitute( kernel_store_outputs = self.source_module(kernel_template_store_outputs.substitute(
REAL=cudafloattype, REAL=config.sim_config.dtypes['C_float_or_double'],
NY_RXCOORDS=3, NY_RXCOORDS=3,
NX_RXS=6, NX_RXS=6,
NY_RXS=self.grid.iterations, NY_RXS=self.grid.iterations,
NZ_RXS=len(self.grid.rxs), NZ_RXS=len(self.grid.rxs),
NX_FIELDS=self.grid.nx + 1, NX_FIELDS=self.grid.nx + 1,
NY_FIELDS=self.grid.ny + 1, NY_FIELDS=self.grid.ny + 1,
NZ_FIELDS=self.grid.nz + 1), NZ_FIELDS=self.grid.nz + 1),
options=self.compiler_opts) options=config.sim_config.cuda['nvcc_opts'])
self.store_outputs = kernel_store_outputs.get_function("store_outputs") self.store_outputs_gpu = kernel_store_outputs.get_function("store_outputs")
def set_src_kernels(self): def set_src_kernels(self):
"""Sources - initialise arrays on GPU, prepare kernel and get kernel """Sources - initialise arrays on GPU, prepare kernel and get kernel
function. function.
""" """
if self.grid.voltagesources + self.grid.hertziandipoles + self.grid.magneticdipoles: if self.grid.voltagesources + self.grid.hertziandipoles + self.grid.magneticdipoles:
kernels_sources = SourceModule(kernels_template_sources.substitute( kernels_sources = self.source_module(kernel_template_sources.substitute(
REAL=cudafloattype, REAL=config.sim_config.dtypes['C_float_or_double'],
N_updatecoeffsE=self.grid.updatecoeffsE.size, N_updatecoeffsE=self.grid.updatecoeffsE.size,
N_updatecoeffsH=self.grid.updatecoeffsH.size, N_updatecoeffsH=self.grid.updatecoeffsH.size,
NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1], NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1],
NY_SRCINFO=4, NY_SRCINFO=4,
NY_SRCWAVES=self.grid.iterations, NY_SRCWAVES=self.grid.iterations,
NX_FIELDS=self.grid.nx + 1, NX_FIELDS=self.grid.nx + 1,
NY_FIELDS=self.grid.ny + 1, NY_FIELDS=self.grid.ny + 1,
NZ_FIELDS=self.grid.nz + 1, NZ_FIELDS=self.grid.nz + 1,
NX_ID=self.grid.ID.shape[1], NX_ID=self.grid.ID.shape[1],
NY_ID=self.grid.ID.shape[2], NY_ID=self.grid.ID.shape[2],
NZ_ID=self.grid.ID.shape[3]), NZ_ID=self.grid.ID.shape[3]),
options=self.compiler_opts) options=config.sim_config.cuda['nvcc_opts'])
self.copy_mat_coeffs() self.copy_mat_coeffs(kernels_sources, kernels_sources)
if self.grid.hertziandipoles: if self.grid.hertziandipoles:
self.srcinfo1_hertzian_gpu, self.srcinfo2_hertzian_gpu, self.srcwaves_hertzian_gpu = gpu_initialise_src_arrays(self.grid.hertziandipoles, self.grid) self.srcinfo1_hertzian_gpu, self.srcinfo2_hertzian_gpu, self.srcwaves_hertzian_gpu = initialise_src_arrays_gpu(self.grid.hertziandipoles, self.grid)
self.update_hertzian_dipole_gpu = kernels_sources.get_function("update_hertzian_dipole") self.update_hertzian_dipole_gpu = kernels_sources.get_function("update_hertzian_dipole")
if self.grid.magneticdipoles: if self.grid.magneticdipoles:
self.srcinfo1_magnetic_gpu, self.srcinfo2_magnetic_gpu, self.srcwaves_magnetic_gpu = gpu_initialise_src_arrays(self.grid.magneticdipoles, self.grid) self.srcinfo1_magnetic_gpu, self.srcinfo2_magnetic_gpu, self.srcwaves_magnetic_gpu = initialise_src_arrays_gpu(self.grid.magneticdipoles, self.grid)
self.update_magnetic_dipole_gpu = kernels_sources.get_function("update_magnetic_dipole") self.update_magnetic_dipole_gpu = kernels_sources.get_function("update_magnetic_dipole")
if self.grid.voltagesources: if self.grid.voltagesources:
self.srcinfo1_voltage_gpu, self.srcinfo2_voltage_gpu, self.srcwaves_voltage_gpu = gpu_initialise_src_arrays(self.grid.voltagesources, self.grid) self.srcinfo1_voltage_gpu, self.srcinfo2_voltage_gpu, self.srcwaves_voltage_gpu = initialise_src_arrays_gpu(self.grid.voltagesources, self.grid)
self.update_voltage_source_gpu = kernels_sources.get_function("update_voltage_source") self.update_voltage_source_gpu = kernels_sources.get_function("update_voltage_source")
def set_snapshot_kernel(self): def set_snapshot_kernel(self):
@@ -418,42 +415,46 @@ class CUDAUpdates:
function. function.
""" """
if self.grid.snapshots: if self.grid.snapshots:
self.snapEx_gpu, self.snapEy_gpu, self.snapEz_gpu, self.snapHx_gpu, self.snapHy_gpu, self.snapHz_gpu = gpu_initialise_snapshot_array(self.grid) self.snapEx_gpu, self.snapEy_gpu, self.snapEz_gpu, self.snapHx_gpu, self.snapHy_gpu, self.snapHz_gpu = initialise_snapshot_array_gpu(self.grid)
kernel_store_snapshot = SourceModule(kernel_template_store_snapshot.substitute( kernel_store_snapshot = self.source_module(kernel_template_store_snapshot.substitute(
REAL=cudafloattype, REAL=config.sim_config.dtypes['C_float_or_double'],
NX_SNAPS=Snapshot.nx_max, NX_SNAPS=Snapshot.nx_max,
NY_SNAPS=Snapshot.ny_max, NY_SNAPS=Snapshot.ny_max,
NZ_SNAPS=Snapshot.nz_max, NZ_SNAPS=Snapshot.nz_max,
NX_FIELDS=self.grid.nx + 1, NX_FIELDS=self.grid.nx + 1,
NY_FIELDS=self.grid.ny + 1, NY_FIELDS=self.grid.ny + 1,
NZ_FIELDS=self.grid.nz + 1), NZ_FIELDS=self.grid.nz + 1),
options=self.compiler_opts) options=config.sim_config.cuda['nvcc_opts'])
self.store_snapshot_gpu = kernel_store_snapshot.get_function("store_snapshot") self.store_snapshot_gpu = kernel_store_snapshot.get_function("store_snapshot")
def copy_mat_coeffs(self): def copy_mat_coeffs(self, kernelE, kernelH):
"""Copy material coefficient arrays to constant memory of GPU """Copy material coefficient arrays to constant memory of GPU
(must be <64KB). (must be <64KB).
Args:
kernelE (kernel): electric field kernel.
kernelH (kernel): magnetic field kernel.
""" """
updatecoeffsE = kernels_sources.get_global('updatecoeffsE')[0] updatecoeffsE = kernelE.get_global('updatecoeffsE')[0]
updatecoeffsH = kernels_sources.get_global('updatecoeffsH')[0] updatecoeffsH = kernelH.get_global('updatecoeffsH')[0]
self.drv.memcpy_htod(updatecoeffsE, self.grid.updatecoeffsE) self.drv.memcpy_htod(updatecoeffsE, self.grid.updatecoeffsE)
self.drv.memcpy_htod(updatecoeffsH, self.grid.updatecoeffsH) self.drv.memcpy_htod(updatecoeffsH, self.grid.updatecoeffsH)
def store_outputs(self): def store_outputs(self):
"""Store field component values for every receiver and transmission line.""" """Store field component values for every receiver."""
if self.grid.rxs: if self.grid.rxs:
self.store_outputs(np.int32(len(self.grid.rxs)), self.store_outputs_gpu(np.int32(len(self.grid.rxs)),
np.int32(self.grid.iteration), np.int32(self.grid.iteration),
self.rxcoords_gpu.gpudata, self.rxcoords_gpu.gpudata,
self.rxs_gpu.gpudata, self.rxs_gpu.gpudata,
self.grid.Ex_gpu.gpudata, self.grid.Ex_gpu.gpudata,
self.grid.Ey_gpu.gpudata, self.grid.Ey_gpu.gpudata,
self.grid.Ez_gpu.gpudata, self.grid.Ez_gpu.gpudata,
self.grid.Hx_gpu.gpudata, self.grid.Hx_gpu.gpudata,
self.grid.Hy_gpu.gpudata, self.grid.Hy_gpu.gpudata,
self.grid.Hz_gpu.gpudata, self.grid.Hz_gpu.gpudata,
block=(1, 1, 1), block=(1, 1, 1),
grid=(round32(len(self.grid.rxs)), 1, 1)) grid=(round32(len(self.grid.rxs)), 1, 1))
def store_snapshots(self, iteration): def store_snapshots(self, iteration):
"""Store any snapshots. """Store any snapshots.
@@ -501,32 +502,32 @@ class CUDAUpdates:
def update_magnetic(self): def update_magnetic(self):
"""Update magnetic field components.""" """Update magnetic field components."""
self.update_magnetic(np.int32(self.grid.nx), self.update_magnetic_gpu(np.int32(self.grid.nx),
np.int32(self.grid.ny), np.int32(self.grid.ny),
np.int32(self.grid.nz), np.int32(self.grid.nz),
self.grid.ID_gpu, self.grid.ID_gpu,
self.grid.Hx_gpu, self.grid.Hx_gpu,
self.grid.Hy_gpu, self.grid.Hy_gpu,
self.grid.Hz_gpu, self.grid.Hz_gpu,
self.grid.Ex_gpu, self.grid.Ex_gpu,
self.grid.Ey_gpu, self.grid.Ey_gpu,
self.grid.Ez_gpu, self.grid.Ez_gpu,
block=self.grid.tpb, block=self.grid.tpb,
grid=self.grid.bpg) grid=self.grid.bpg)
def update_magnetic_pml(self): def update_magnetic_pml(self):
"""Update magnetic field components with the PML correction.""" """Update magnetic field components with the PML correction."""
for pml in self.grid.pmls: for pml in self.grid.pmls:
pml.gpu_update_magnetic(self.grid) pml.update_magnetic(self.grid)
def update_magnetic_sources(self): def update_magnetic_sources(self):
"""Update magnetic field components from sources.""" """Update magnetic field components from sources."""
if self.grid.magneticdipoles: if self.grid.magneticdipoles:
self.update_magnetic_dipole_gpu(np.int32(len(self.grid.magneticdipoles)), self.update_magnetic_dipole_gpu(np.int32(len(self.grid.magneticdipoles)),
np.int32(self.grid.iteration), np.int32(self.grid.iteration),
config.dtypes['float_or_double'](self.grid.dx), config.sim_config.dtypes['float_or_double'](self.grid.dx),
config.dtypes['float_or_double'](self.grid.dy), config.sim_config.dtypes['float_or_double'](self.grid.dy),
config.dtypes['float_or_double'](self.grid.dz), config.sim_config.dtypes['float_or_double'](self.grid.dz),
self.srcinfo1_magnetic_gpu.gpudata, self.srcinfo1_magnetic_gpu.gpudata,
self.srcinfo2_magnetic_gpu.gpudata, self.srcinfo2_magnetic_gpu.gpudata,
self.srcwaves_magnetic_gpu.gpudata, self.srcwaves_magnetic_gpu.gpudata,
@@ -541,18 +542,18 @@ class CUDAUpdates:
"""Update electric field components.""" """Update electric field components."""
# All materials are non-dispersive so do standard update. # All materials are non-dispersive so do standard update.
if config.model_configs[self.grid.model_num].materials['maxpoles'] == 0: if config.model_configs[self.grid.model_num].materials['maxpoles'] == 0:
self.update_electric(np.int32(self.grid.nx), self.update_electric_gpu(np.int32(self.grid.nx),
np.int32(self.grid.ny), np.int32(self.grid.ny),
np.int32(self.grid.nz), np.int32(self.grid.nz),
self.grid.ID_gpu, self.grid.ID_gpu,
self.grid.Ex_gpu, self.grid.Ex_gpu,
self.grid.Ey_gpu, self.grid.Ey_gpu,
self.grid.Ez_gpu, self.grid.Ez_gpu,
self.grid.Hx_gpu, self.grid.Hx_gpu,
self.grid.Hy_gpu, self.grid.Hy_gpu,
self.grid.Hz_gpu, self.grid.Hz_gpu,
block=self.grid.tpb, block=self.grid.tpb,
grid=self.grid.bpg) grid=self.grid.bpg)
# If there are any dispersive materials do 1st part of dispersive update # If there are any dispersive materials do 1st part of dispersive update
# (it is split into two parts as it requires present and updated electric field values). # (it is split into two parts as it requires present and updated electric field values).
@@ -578,7 +579,7 @@ class CUDAUpdates:
def update_electric_pml(self): def update_electric_pml(self):
"""Update electric field components with the PML correction.""" """Update electric field components with the PML correction."""
for pml in self.grid.pmls: for pml in self.grid.pmls:
pml.gpu_update_electric(self.grid) pml.update_electric(self.grid)
def update_electric_sources(self): def update_electric_sources(self):
"""Update electric field components from sources - """Update electric field components from sources -
@@ -587,9 +588,9 @@ class CUDAUpdates:
if self.grid.voltagesources: if self.grid.voltagesources:
self.update_voltage_source_gpu(np.int32(len(self.grid.voltagesources)), self.update_voltage_source_gpu(np.int32(len(self.grid.voltagesources)),
np.int32(self.grid.iteration), np.int32(self.grid.iteration),
config.dtypes['float_or_double'](self.grid.dx), config.sim_config.dtypes['float_or_double'](self.grid.dx),
config.dtypes['float_or_double'](self.grid.dy), config.sim_config.dtypes['float_or_double'](self.grid.dy),
config.dtypes['float_or_double'](self.grid.dz), config.sim_config.dtypes['float_or_double'](self.grid.dz),
self.srcinfo1_voltage_gpu.gpudata, self.srcinfo1_voltage_gpu.gpudata,
self.srcinfo2_voltage_gpu.gpudata, self.srcinfo2_voltage_gpu.gpudata,
self.srcwaves_voltage_gpu.gpudata, self.srcwaves_voltage_gpu.gpudata,
@@ -603,9 +604,9 @@ class CUDAUpdates:
if self.grid.hertziandipoles: if self.grid.hertziandipoles:
self.update_hertzian_dipole_gpu(np.int32(len(self.grid.hertziandipoles)), self.update_hertzian_dipole_gpu(np.int32(len(self.grid.hertziandipoles)),
np.int32(self.grid.iteration), np.int32(self.grid.iteration),
config.dtypes['float_or_double'](self.grid.dx), config.sim_config.dtypes['float_or_double'](self.grid.dx),
config.dtypes['float_or_double'](self.grid.dy), config.sim_config.dtypes['float_or_double'](self.grid.dy),
config.dtypes['float_or_double'](self.grid.dz), config.sim_config.dtypes['float_or_double'](self.grid.dz),
self.srcinfo1_hertzian_gpu.gpudata, self.srcinfo1_hertzian_gpu.gpudata,
self.srcinfo2_hertzian_gpu.gpudata, self.srcinfo2_hertzian_gpu.gpudata,
self.srcwaves_hertzian_gpu.gpudata, self.srcwaves_hertzian_gpu.gpudata,
@@ -660,21 +661,20 @@ class CUDAUpdates:
"""Copy data from GPU back to CPU to save to file(s).""" """Copy data from GPU back to CPU to save to file(s)."""
# Copy output from receivers array back to correct receiver objects # Copy output from receivers array back to correct receiver objects
if self.grid.rxs: if self.grid.rxs:
gpu_get_rx_array(self.rxs_gpu.get(), get_rx_array_gpu(self.rxs_gpu.get(),
self.rxcoords_gpu.get(), self.rxcoords_gpu.get(),
self.grid) self.grid)
# Copy data from any snapshots back to correct snapshot objects # Copy data from any snapshots back to correct snapshot objects
if self.grid.snapshots and not self.grid.snapsgpu2cpu: if self.grid.snapshots and not self.grid.snapsgpu2cpu:
for i, snap in enumerate(self.grid.snapshots): for i, snap in enumerate(self.grid.snapshots):
gpu_get_snapshot_array(self.snapEx_gpu.get(), get_snapshot_array_gpu(self.snapEx_gpu.get(),
self.snapEy_gpu.get(), self.snapEy_gpu.get(),
self.snapEz_gpu.get(), self.snapEz_gpu.get(),
self.snapHx_gpu.get(), self.snapHx_gpu.get(),
self.snapHy_gpu.get(), self.snapHy_gpu.get(),
self.snapHz_gpu.get(), self.snapHz_gpu.get(),
i, i, snap)
snap)
def cleanup(self): def cleanup(self):
"""Cleanup GPU context.""" """Cleanup GPU context."""

查看文件

@@ -421,10 +421,6 @@ class GPU:
self.pcibusID = None self.pcibusID = None
self.constmem = None self.constmem = None
self.totalmem = None self.totalmem = None
# Threads per block for main field updates
self.tpb = (256, 1, 1)
# Blocks per grid for main field updates (set in grid.py)
self.bpg = None
def get_gpu_info(self, drv): def get_gpu_info(self, drv):
"""Set information about GPU. """Set information about GPU.