Work to implement OpenCL solver - temporarily breaks CUDA

这个提交包含在:
Craig Warren
2022-02-19 16:52:52 +00:00
父节点 0203d03cae
当前提交 cc0e30900b
共有 52 个文件被更改,包括 10369 次插入739 次删除

查看文件

@@ -28,7 +28,7 @@ from scipy.constants import c
from scipy.constants import epsilon_0 as e0
from scipy.constants import mu_0 as m0
from .utilities.host_info import detect_cuda_gpus, get_host_info
from .utilities.host_info import detect_cuda_gpus, detect_opencl, get_host_info
from .utilities.utilities import get_terminal_width
logger = logging.getLogger(__name__)
@@ -61,15 +61,21 @@ class ModelConfig:
self.grids = []
self.ompthreads = None
# Store information for CUDA solver
# gpu: GPU object
# snapsgpu2cpu: copy snapshot data from GPU to CPU during simulation
# N.B. This will happen if the requested snapshots are too large to fit
# on the memory of the GPU. If True this will slow performance significantly
if sim_config.general['cuda']:
# If a list of lists of GPU deviceIDs is found, flatten it
if any(isinstance(element, list) for element in sim_config.args.gpu):
deviceID = [val for sublist in sim_config.args.gpu for val in sublist]
# Store information for CUDA or OpenCL solver
# dev: compute device object.
# snapsgpu2cpu: copy snapshot data from GPU to CPU during simulation.
# N.B. This will happen if the requested snapshots are too large to
# fit on the memory of the GPU. If True this will slow
# performance significantly.
if sim_config.general['solver'] == 'cuda' or sim_config.general['solver'] == 'opencl':
if sim_config.general['solver'] == 'cuda':
devs = sim_config.args.gpu
elif sim_config.general['solver'] == 'opencl':
devs = sim_config.args.opencl
# If a list of lists of deviceIDs is found, flatten it
if any(isinstance(element, list) for element in devs):
deviceID = [val for sublist in devs for val in sublist]
# If no deviceID is given default to using deviceID 0. Else if either
# a single deviceID or list of deviceIDs is given use first one.
@@ -78,8 +84,8 @@ class ModelConfig:
except:
deviceID = 0
self.cuda = {'gpu': sim_config.set_model_gpu(deviceID),
'snapsgpu2cpu': False}
self.device = {'dev': sim_config.set_model_device(deviceID),
'snapsgpu2cpu': False}
# Total memory usage for all grids in the model. Starts with 50MB overhead.
self.mem_overhead = 50e6
@@ -88,29 +94,34 @@ class ModelConfig:
self.reuse_geometry = False
# String to print at start of each model run
s = f'\n--- Model {model_num + 1}/{sim_config.model_end}, input file: {sim_config.input_file_path}'
self.inputfilestr = Fore.GREEN + f"{s} {'-' * (get_terminal_width() - 1 - len(s))}\n" + Style.RESET_ALL
s = (f'\n--- Model {model_num + 1}/{sim_config.model_end}, '
f'input file: {sim_config.input_file_path}')
self.inputfilestr = (Fore.GREEN + f"{s} {'-' * (get_terminal_width() - 1 - len(s))}\n" +
Style.RESET_ALL)
# Output file path and name for specific model
self.appendmodelnumber = '' if sim_config.single_model else str(model_num + 1) # Indexed from 1
self.set_output_file_path()
# Numerical dispersion analysis parameters
# highestfreqthres: threshold (dB) down from maximum power (0dB) of main frequency used
# to calculate highest frequency for numerical dispersion analysis
# maxnumericaldisp: maximum allowable percentage physical phase-velocity phase error
# mingridsampling: minimum grid sampling of smallest wavelength for physical wave propagation
# highestfreqthres: threshold (dB) down from maximum power (0dB) of
# main frequency used to calculate highest
# frequency for numerical dispersion analysis.
# maxnumericaldisp: maximum allowable percentage physical
# phase-velocity phase error.
# mingridsampling: minimum grid sampling of smallest wavelength for
# physical wave propagation.
self.numdispersion = {'highestfreqthres': 40,
'maxnumericaldisp': 2,
'mingridsampling': 3}
# General information to configure materials
# maxpoles: Maximum number of dispersive material poles in a model
# dispersivedtype: Data type for dispersive materials
# dispersiveCdtype: Data type for dispersive materials in Cython
# drudelorentz: True/False model contains Drude or Lorentz materials
# maxpoles: Maximum number of dispersive material poles in a model.
# dispersivedtype: Data type for dispersive materials.
# dispersiveCdtype: Data type for dispersive materials in Cython.
# drudelorentz: True/False model contains Drude or Lorentz materials.
# cudarealfunc: String to substitute into CUDA kernels for fields
# dependent on dispersive material type
# dependent on dispersive material type.
self.materials = {'maxpoles': 0,
'dispersivedtype': None,
'dispersiveCdtype': None,
@@ -123,32 +134,32 @@ class ModelConfig:
else: return None
def get_usernamespace(self):
return {'c': c, # Speed of light in free space (m/s)
'e0': e0, # Permittivity of free space (F/m)
'm0': m0, # Permeability of free space (H/m)
'z0': np.sqrt(m0 / e0), # Impedance of free space (Ohms)
'number_model_runs': sim_config.model_end,
'current_model_run': model_num + 1,
'inputfile': sim_config.input_file_path.resolve()}
tmp = {'number_model_runs': sim_config.model_end,
'current_model_run': model_num + 1,
'inputfile': sim_config.input_file_path.resolve()}
return dict(**sim_config.em_consts, **tmp)
def set_dispersive_material_types(self):
"""Set data type for disperive materials. Complex if Drude or Lorentz
materials are present. Real if Debye materials.
"""
if self.materials['drudelorentz']:
self.materials['cudarealfunc'] = '.real()'
self.materials['crealfunc'] = '.real()'
self.materials['dispersivedtype'] = sim_config.dtypes['complex']
self.materials['dispersiveCdtype'] = sim_config.dtypes['C_complex']
else:
self.materials['crealfunc'] = ''
self.materials['dispersivedtype'] = sim_config.dtypes['float_or_double']
self.materials['dispersiveCdtype'] = sim_config.dtypes['C_float_or_double']
def set_output_file_path(self, outputdir=None):
"""Output file path can be provided by the user via the API or an input file
command. If they haven't provided one use the input file path instead.
"""Output file path can be provided by the user via the API or an input
file command. If they haven't provided one use the input file path
instead.
Args:
outputdir (str): Output file directory given from input file command.
outputdir: string of output file directory given by input file command.
"""
if not outputdir:
@@ -171,7 +182,7 @@ class ModelConfig:
"""Set directory to store any snapshots.
Returns:
snapshot_dir (Path): directory to store snapshot files in.
snapshot_dir: Path to directory to store snapshot files in.
"""
parts = self.output_file_path.with_suffix('').parts
snapshot_dir = Path(*parts[:-1], parts[-1] + '_snaps')
@@ -187,7 +198,7 @@ class SimulationConfig:
def __init__(self, args):
"""
Args:
args (Namespace): Arguments from either API or CLI.
args: Namespace with arguments from either API or CLI.
"""
self.args = args
@@ -196,17 +207,19 @@ class SimulationConfig:
logger.exception('The geometry fixed option cannot be used with MPI.')
raise ValueError
# General settings for the simulation
# inputfilepath: path to inputfile location
# outputfilepath: path to outputfile location
# progressbars: whether to show progress bars on stdoout or not
# cpu, cuda, opencl: solver type
# subgrid: whether the simulation uses sub-grids
# precision: data type for electromagnetic field output (single/double)
if args.gpu and args.opencl:
logger.exception('You cannot use both CUDA and OpenCl simultaneously.')
raise ValueError
self.general = {'cpu': True,
'cuda': False,
'opencl': False,
# General settings for the simulation
# inputfilepath: path to inputfile location.
# outputfilepath: path to outputfile location.
# progressbars: whether to show progress bars on stdoout or not.
# solver: cpu, cuda, opencl.
# subgrid: whether the simulation uses sub-grids.
# precision: data type for electromagnetic field output (single/double).
self.general = {'solver': 'cpu',
'subgrid': False,
'precision': 'single'}
@@ -222,29 +235,37 @@ class SimulationConfig:
# Store information about host machine
self.hostinfo = get_host_info()
# Information about any Nvidia GPUs
# CUDA
if self.args.gpu is not None:
self.general['cuda'] = True
self.general['cpu'] = False
self.general['opencl'] = False
self.general['solver'] = 'cuda'
# Both single and double precision are possible on GPUs, but single
# provides best performance.
self.general['precision'] = 'single'
self.cuda = {'gpus': [], # gpus: list of GPU objects
'nvcc_opts': None} # nvcc_opts: nvcc compiler options
self.devices = {'devs': [], # devs: list of pycuda device objects
'nvcc_opts': None} # nvcc_opts: nvcc compiler options
# Suppress nvcc warnings on Microsoft Windows
if sys.platform == 'win32': self.cuda['nvcc_opts'] = ['-w']
# List of GPU objects of available GPUs
self.cuda['gpus'] = detect_cuda_gpus()
# Add pycuda available GPU(s)
self.devices['devs'] = detect_cuda_gpus()
# OpenCL
if self.args.opencl is not None:
self.general['solver'] = 'opencl'
self.general['precision'] = 'single'
# List of pyopencl available device(s)
self.devices = {'devs': []}
self.devices['devs'] = detect_opencl()
# Subgrid parameter may not exist if user enters via CLI
try:
self.general['subgrid'] = self.args.subgrid
# Double precision should be used with subgrid for best accuracy
self.general['precision'] = 'double'
if self.general['subgrid'] and self.general['cuda']:
logger.exception('The CUDA-based solver cannot currently be used with models that contain sub-grids.')
if ((self.general['subgrid'] and self.general['cuda']) or
(self.general['subgrid'] and self.general['opencl'])):
logger.exception('You cannot currently use CUDA or OpenCL-based '
'solvers with models that contain sub-grids.')
raise ValueError
except AttributeError:
self.general['subgrid'] = False
@@ -262,34 +283,35 @@ class SimulationConfig:
self._set_model_start_end()
self._set_single_model()
def set_model_gpu(self, deviceID):
"""Specify GPU object for model.
def set_model_device(self, deviceID):
"""Specify pycuda/pyopencl object for model.
Args:
deviceID (int): Requested deviceID of GPU
deviceID: int of requested deviceID of compute device.
Returns:
gpu (GPU object): Requested GPU object.
dev: requested pycuda/pyopencl device object.
"""
found = False
for gpu in self.cuda['gpus']:
if gpu.deviceID == deviceID:
for ID, dev in self.devices['devs'].items():
if ID == deviceID:
found = True
return gpu
return dev
if not found:
logger.exception(f'GPU with device ID {deviceID} does not exist')
logger.exception(f'Compute device with device ID {deviceID} does '
'not exist.')
raise ValueError
def _set_precision(self):
"""Data type (precision) for electromagnetic field output.
Solid and ID arrays use 32-bit integers (0 to 4294967295)
Rigid arrays use 8-bit integers (the smallest available type to store true/false)
Fractal arrays use complex numbers
Dispersive coefficient arrays use either float or complex numbers
Main field arrays use floats
Solid and ID arrays use 32-bit integers (0 to 4294967295).
Rigid arrays use 8-bit integers (the smallest available type to store true/false).
Fractal arrays use complex numbers.
Dispersive coefficient arrays use either float or complex numbers.
Main field arrays use floats.
"""
if self.general['precision'] == 'single':
@@ -298,16 +320,25 @@ class SimulationConfig:
'cython_float_or_double': cython.float,
'cython_complex': cython.floatcomplex,
'C_float_or_double': 'float',
'C_complex': 'pycuda::complex<float>',
'C_complex': None,
'vtk_float': 'Float32'}
if self.general['solver'] == 'cuda':
self.dtypes['C_complex'] = 'pycuda::complex<float>'
elif self.general['solver'] == 'opencl':
self.dtypes['C_complex'] = 'cfloat'
elif self.general['precision'] == 'double':
self.dtypes = {'float_or_double': np.float64,
'complex': np.complex128,
'cython_float_or_double': cython.double,
'cython_complex': cython.doublecomplex,
'C_float_or_double': 'double',
'C_complex': 'pycuda::complex<double>',
'C_complex': None,
'vtk_float': 'Float64'}
if self.general['solver'] == 'cuda':
self.dtypes['C_complex'] = 'pycuda::complex<double>'
elif self.general['solver'] == 'opencl':
self.dtypes['C_complex'] = 'cdouble'
def _get_byteorder(self):
"""Check the byte order of system to use for VTK files, i.e. geometry

查看文件

@@ -25,18 +25,23 @@ import gprMax.config as config
from ._version import __version__, codename
from .model_build_run import ModelBuildRun
from .solvers import create_G, create_solver
from .utilities.utilities import get_terminal_width, human_size, logo, timer
from .utilities.host_info import (detect_cuda_gpus, detect_opencl,
print_cuda_info, print_host_info,
print_opencl_info)
from .utilities.utilities import get_terminal_width, logo, timer
logger = logging.getLogger(__name__)
class Context:
"""Standard context - models are run one after another and each model
can exploit parallelisation using either OpenMP (CPU) or CUDA (GPU).
can exploit parallelisation using either OpenMP (CPU), CUDA (GPU), or
OpenCL (CPU/GPU).
"""
def __init__(self):
self.model_range = range(config.sim_config.model_start, config.sim_config.model_end)
self.model_range = range(config.sim_config.model_start,
config.sim_config.model_end)
self.tsimend = None
self.tsimstart = None
@@ -44,10 +49,12 @@ class Context:
"""Run the simulation in the correct context."""
self.tsimstart = timer()
self.print_logo_copyright()
self.print_host_info()
if config.sim_config.general['cuda']:
self.print_gpu_info()
print_host_info(config.sim_config.hostinfo)
if config.sim_config.general['solver'] == 'cuda':
print_cuda_info(config.sim_config.devices['devs'])
elif config.sim_config.general['solver'] == 'opencl':
print_opencl_info(config.sim_config.devices['devs'])
# Clear list of model configs. It can be retained when gprMax is
# called in a loop, and want to avoid this.
config.model_configs = []
@@ -79,33 +86,23 @@ class Context:
logo_copyright = logo(__version__ + ' (' + codename + ')')
logger.basic(logo_copyright)
def print_host_info(self):
"""Print information about the host machine."""
hyperthreadingstr = f", {config.sim_config.hostinfo['logicalcores']} cores with Hyper-Threading" if config.sim_config.hostinfo['hyperthreading'] else ''
logger.basic(f"\nHost: {config.sim_config.hostinfo['hostname']} | {config.sim_config.hostinfo['machineID']} | {config.sim_config.hostinfo['sockets']} x {config.sim_config.hostinfo['cpuID']} ({config.sim_config.hostinfo['physicalcores']} cores{hyperthreadingstr}) | {human_size(config.sim_config.hostinfo['ram'], a_kilobyte_is_1024_bytes=True)} RAM | {config.sim_config.hostinfo['osversion']}")
def print_gpu_info(self):
"""Print information about any NVIDIA CUDA GPUs detected."""
gpus_info = []
for gpu in config.sim_config.cuda['gpus']:
gpus_info.append(f'{gpu.deviceID} - {gpu.name}, {human_size(gpu.totalmem, a_kilobyte_is_1024_bytes=True)}')
logger.basic(f"GPU resources: {' | '.join(gpus_info)}")
def print_time_report(self):
"""Print the total simulation time based on context."""
s = f"\n=== Simulation completed in [HH:MM:SS]: {datetime.timedelta(seconds=self.tsimend - self.tsimstart)}"
s = ("\n=== Simulation completed in [HH:MM:SS]: "
f"{datetime.timedelta(seconds=self.tsimend - self.tsimstart)}")
logger.basic(f"{s} {'=' * (get_terminal_width() - 1 - len(s))}\n")
class MPIContext(Context):
"""Mixed mode MPI/OpenMP/CUDA context - MPI task farm is used to distribute
models, and each model parallelised using either OpenMP (CPU)
or CUDA (GPU).
models, and each model parallelised using either OpenMP (CPU),
CUDA (GPU), or OpenCL (CPU/GPU).
"""
def __init__(self):
super().__init__()
from mpi4py import MPI
from gprMax.mpi import MPIExecutor
self.comm = MPI.COMM_WORLD
@@ -149,7 +146,9 @@ class MPIContext(Context):
if executor.is_master():
if config.sim_config.general['cuda']:
if executor.size - 1 > len(config.sim_config.cuda['gpus']):
logger.exception('Not enough GPU resources for number of MPI tasks requested. Number of MPI tasks should be equal to number of GPUs + 1.')
logger.exception('Not enough GPU resources for number of '
'MPI tasks requested. Number of MPI tasks '
'should be equal to number of GPUs + 1.')
raise ValueError
# Create job list
@@ -175,7 +174,8 @@ class SPOTPYContext(Context):
(https://github.com/thouska/spotpy). SPOTPY coupling can utilise 2 levels
of MPI parallelism - where the top level is where SPOPTY optmisation
algorithms can be parallelised, and the lower level is where gprMax
models can be parallelised using either OpenMP (CPU) or CUDA (GPU).
models can be parallelised using either OpenMP (CPU), CUDA (GPU), or
OpenCL (CPU/GPU).
"""
def __init__(self):

查看文件

@@ -1,244 +0,0 @@
# Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom
# Authors: Craig Warren, Antonis Giannopoulos, and John Hartley
#
# This file is part of gprMax.
#
# gprMax is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# gprMax is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with gprMax. If not, see <http://www.gnu.org/licenses/>.
from string import Template
kernel_template_fields = Template("""
#include <pycuda-complex.hpp>
// Macros for converting subscripts to linear index:
#define INDEX2D_MAT(m, n) (m)*($NY_MATCOEFFS)+(n)
#define INDEX2D_MATDISP(m, n) (m)*($NY_MATDISPCOEFFS)+(n)
#define INDEX3D_FIELDS(i, j, k) (i)*($NY_FIELDS)*($NZ_FIELDS)+(j)*($NZ_FIELDS)+(k)
#define INDEX4D_ID(p, i, j, k) (p)*($NX_ID)*($NY_ID)*($NZ_ID)+(i)*($NY_ID)*($NZ_ID)+(j)*($NZ_ID)+(k)
#define INDEX4D_T(p, i, j, k) (p)*($NX_T)*($NY_T)*($NZ_T)+(i)*($NY_T)*($NZ_T)+(j)*($NZ_T)+(k)
// Material coefficients (read-only) in constant memory (64KB)_
__device__ __constant__ $REAL updatecoeffsE[$N_updatecoeffsE];
__device__ __constant__ $REAL updatecoeffsH[$N_updatecoeffsH];
///////////////////////////////////////////////
// Electric field updates - normal materials //
///////////////////////////////////////////////
__global__ void update_electric(int NX, int NY, int NZ, const unsigned int* __restrict__ ID, $REAL *Ex, $REAL *Ey, $REAL *Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz) {
// This function updates electric field values.
//
// Args:
// NX, NY, NZ: Number of cells of the model domain
// ID, E, H: Access to ID and field component arrays
// Obtain the linear index corresponding to the current thread
int idx = blockIdx.x * blockDim.x + threadIdx.x;
// Convert the linear index to subscripts for 3D field arrays
int i = idx / ($NY_FIELDS * $NZ_FIELDS);
int j = (idx % ($NY_FIELDS * $NZ_FIELDS)) / $NZ_FIELDS;
int k = (idx % ($NY_FIELDS * $NZ_FIELDS)) % $NZ_FIELDS;
// Convert the linear index to subscripts for 4D material ID array
int i_ID = (idx % ($NX_ID * $NY_ID * $NZ_ID)) / ($NY_ID * $NZ_ID);
int j_ID = ((idx % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) / $NZ_ID;
int k_ID = ((idx % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) % $NZ_ID;
// Ex component
if ((NY != 1 || NZ != 1) && i >= 0 && i < NX && j > 0 && j < NY && k > 0 && k < NZ) {
int materialEx = ID[INDEX4D_ID(0,i_ID,j_ID,k_ID)];
Ex[INDEX3D_FIELDS(i,j,k)] = updatecoeffsE[INDEX2D_MAT(materialEx,0)] * Ex[INDEX3D_FIELDS(i,j,k)] + updatecoeffsE[INDEX2D_MAT(materialEx,2)] * (Hz[INDEX3D_FIELDS(i,j,k)] - Hz[INDEX3D_FIELDS(i,j-1,k)]) - updatecoeffsE[INDEX2D_MAT(materialEx,3)] * (Hy[INDEX3D_FIELDS(i,j,k)] - Hy[INDEX3D_FIELDS(i,j,k-1)]);
}
// Ey component
if ((NX != 1 || NZ != 1) && i > 0 && i < NX && j >= 0 && j < NY && k > 0 && k < NZ) {
int materialEy = ID[INDEX4D_ID(1,i_ID,j_ID,k_ID)];
Ey[INDEX3D_FIELDS(i,j,k)] = updatecoeffsE[INDEX2D_MAT(materialEy,0)] * Ey[INDEX3D_FIELDS(i,j,k)] + updatecoeffsE[INDEX2D_MAT(materialEy,3)] * (Hx[INDEX3D_FIELDS(i,j,k)] - Hx[INDEX3D_FIELDS(i,j,k-1)]) - updatecoeffsE[INDEX2D_MAT(materialEy,1)] * (Hz[INDEX3D_FIELDS(i,j,k)] - Hz[INDEX3D_FIELDS(i-1,j,k)]);
}
// Ez component
if ((NX != 1 || NY != 1) && i > 0 && i < NX && j > 0 && j < NY && k >= 0 && k < NZ) {
int materialEz = ID[INDEX4D_ID(2,i_ID,j_ID,k_ID)];
Ez[INDEX3D_FIELDS(i,j,k)] = updatecoeffsE[INDEX2D_MAT(materialEz,0)] * Ez[INDEX3D_FIELDS(i,j,k)] + updatecoeffsE[INDEX2D_MAT(materialEz,1)] * (Hy[INDEX3D_FIELDS(i,j,k)] - Hy[INDEX3D_FIELDS(i-1,j,k)]) - updatecoeffsE[INDEX2D_MAT(materialEz,2)] * (Hx[INDEX3D_FIELDS(i,j,k)] - Hx[INDEX3D_FIELDS(i,j-1,k)]);
}
}
////////////////////////////
// Magnetic field updates //
////////////////////////////
__global__ void update_magnetic(int NX, int NY, int NZ, const unsigned int* __restrict__ ID, $REAL *Hx, $REAL *Hy, $REAL *Hz, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez) {
// This function updates magnetic field values.
//
// Args:
// NX, NY, NZ: Number of cells of the model domain
// ID, E, H: Access to ID and field component arrays
// Obtain the linear index corresponding to the current thread
int idx = blockIdx.x * blockDim.x + threadIdx.x;
// Convert the linear index to subscripts for 3D field arrays
int i = idx / ($NY_FIELDS * $NZ_FIELDS);
int j = (idx % ($NY_FIELDS * $NZ_FIELDS)) / $NZ_FIELDS;
int k = (idx % ($NY_FIELDS * $NZ_FIELDS)) % $NZ_FIELDS;
// Convert the linear index to subscripts for 4D material ID array
int i_ID = (idx % ($NX_ID * $NY_ID * $NZ_ID)) / ($NY_ID * $NZ_ID);
int j_ID = ((idx % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) / $NZ_ID;
int k_ID = ((idx % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) % $NZ_ID;
// Hx component
if (NX != 1 && i > 0 && i < NX && j >= 0 && j < NY && k >= 0 && k < NZ) {
int materialHx = ID[INDEX4D_ID(3,i_ID,j_ID,k_ID)];
Hx[INDEX3D_FIELDS(i,j,k)] = updatecoeffsH[INDEX2D_MAT(materialHx,0)] * Hx[INDEX3D_FIELDS(i,j,k)] - updatecoeffsH[INDEX2D_MAT(materialHx,2)] * (Ez[INDEX3D_FIELDS(i,j+1,k)] - Ez[INDEX3D_FIELDS(i,j,k)]) + updatecoeffsH[INDEX2D_MAT(materialHx,3)] * (Ey[INDEX3D_FIELDS(i,j,k+1)] - Ey[INDEX3D_FIELDS(i,j,k)]);
}
// Hy component
if (NY != 1 && i >= 0 && i < NX && j > 0 && j < NY && k >= 0 && k < NZ) {
int materialHy = ID[INDEX4D_ID(4,i_ID,j_ID,k_ID)];
Hy[INDEX3D_FIELDS(i,j,k)] = updatecoeffsH[INDEX2D_MAT(materialHy,0)] * Hy[INDEX3D_FIELDS(i,j,k)] - updatecoeffsH[INDEX2D_MAT(materialHy,3)] * (Ex[INDEX3D_FIELDS(i,j,k+1)] - Ex[INDEX3D_FIELDS(i,j,k)]) + updatecoeffsH[INDEX2D_MAT(materialHy,1)] * (Ez[INDEX3D_FIELDS(i+1,j,k)] - Ez[INDEX3D_FIELDS(i,j,k)]);
}
// Hz component
if (NZ != 1 && i >= 0 && i < NX && j >= 0 && j < NY && k > 0 && k < NZ) {
int materialHz = ID[INDEX4D_ID(5,i_ID,j_ID,k_ID)];
Hz[INDEX3D_FIELDS(i,j,k)] = updatecoeffsH[INDEX2D_MAT(materialHz,0)] * Hz[INDEX3D_FIELDS(i,j,k)] - updatecoeffsH[INDEX2D_MAT(materialHz,1)] * (Ey[INDEX3D_FIELDS(i+1,j,k)] - Ey[INDEX3D_FIELDS(i,j,k)]) + updatecoeffsH[INDEX2D_MAT(materialHz,2)] * (Ex[INDEX3D_FIELDS(i,j+1,k)] - Ex[INDEX3D_FIELDS(i,j,k)]);
}
}
///////////////////////////////////////////////////
// Electric field updates - dispersive materials //
///////////////////////////////////////////////////
__global__ void update_electric_dispersive_A(int NX, int NY, int NZ, int MAXPOLES, const $COMPLEX* __restrict__ updatecoeffsdispersive, $COMPLEX *Tx, $COMPLEX *Ty, $COMPLEX *Tz, const unsigned int* __restrict__ ID, $REAL *Ex, $REAL *Ey, $REAL *Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz) {
// This function is part A of updates to electric field values when dispersive materials (with multiple poles) are present.
//
// Args:
// NX, NY, NZ: Number of cells of the model domain
// MAXPOLES: Maximum number of dispersive material poles present in model
// updatedispersivecoeffs, T, ID, E, H: Access to update coefficients, dispersive, ID and field component arrays
// Obtain the linear index corresponding to the current thread
int idx = blockIdx.x * blockDim.x + threadIdx.x;
// Convert the linear index to subscripts for 3D field arrays
int i = idx / ($NY_FIELDS * $NZ_FIELDS);
int j = (idx % ($NY_FIELDS * $NZ_FIELDS)) / $NZ_FIELDS;
int k = (idx % ($NY_FIELDS * $NZ_FIELDS)) % $NZ_FIELDS;
// Convert the linear index to subscripts for 4D material ID array
int i_ID = (idx % ($NX_ID * $NY_ID * $NZ_ID)) / ($NY_ID * $NZ_ID);
int j_ID = ((idx % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) / $NZ_ID;
int k_ID = ((idx % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) % $NZ_ID;
// Convert the linear index to subscripts for 4D dispersive array
int i_T = (idx % ($NX_T * $NY_T * $NZ_T)) / ($NY_T * $NZ_T);
int j_T = ((idx % ($NX_T * $NY_T * $NZ_T)) % ($NY_T * $NZ_T)) / $NZ_T;
int k_T = ((idx % ($NX_T * $NY_T * $NZ_T)) % ($NY_T * $NZ_T)) % $NZ_T;
// Ex component
if ((NY != 1 || NZ != 1) && i >= 0 && i < NX && j > 0 && j < NY && k > 0 && k < NZ) {
int materialEx = ID[INDEX4D_ID(0,i_ID,j_ID,k_ID)];
$REAL phi = 0;
for (int pole = 0; pole < MAXPOLES; pole++) {
phi = phi + updatecoeffsdispersive[INDEX2D_MATDISP(materialEx,pole*3)]$REALFUNC * Tx[INDEX4D_T(pole,i_T,j_T,k_T)]$REALFUNC;
Tx[INDEX4D_T(pole,i_T,j_T,k_T)] = updatecoeffsdispersive[INDEX2D_MATDISP(materialEx,1+(pole*3))] * Tx[INDEX4D_T(pole,i_T,j_T,k_T)] + updatecoeffsdispersive[INDEX2D_MATDISP(materialEx,2+(pole*3))] * Ex[INDEX3D_FIELDS(i,j,k)];
}
Ex[INDEX3D_FIELDS(i,j,k)] = updatecoeffsE[INDEX2D_MAT(materialEx,0)] * Ex[INDEX3D_FIELDS(i,j,k)] + updatecoeffsE[INDEX2D_MAT(materialEx,2)] * (Hz[INDEX3D_FIELDS(i,j,k)] - Hz[INDEX3D_FIELDS(i,j-1,k)]) - updatecoeffsE[INDEX2D_MAT(materialEx,3)] * (Hy[INDEX3D_FIELDS(i,j,k)] - Hy[INDEX3D_FIELDS(i,j,k-1)]) - updatecoeffsE[INDEX2D_MAT(materialEx,4)] * phi;
}
// Ey component
if ((NX != 1 || NZ != 1) && i > 0 && i < NX && j >= 0 && j < NY && k > 0 && k < NZ) {
int materialEy = ID[INDEX4D_ID(1,i_ID,j_ID,k_ID)];
$REAL phi = 0;
for (int pole = 0; pole < MAXPOLES; pole++) {
phi = phi + updatecoeffsdispersive[INDEX2D_MATDISP(materialEy,pole*3)]$REALFUNC * Ty[INDEX4D_T(pole,i_T,j_T,k_T)]$REALFUNC;
Ty[INDEX4D_T(pole,i_T,j_T,k_T)] = updatecoeffsdispersive[INDEX2D_MATDISP(materialEy,1+(pole*3))] * Ty[INDEX4D_T(pole,i_T,j_T,k_T)] + updatecoeffsdispersive[INDEX2D_MATDISP(materialEy,2+(pole*3))] * Ey[INDEX3D_FIELDS(i,j,k)];
}
Ey[INDEX3D_FIELDS(i,j,k)] = updatecoeffsE[INDEX2D_MAT(materialEy,0)] * Ey[INDEX3D_FIELDS(i,j,k)] + updatecoeffsE[INDEX2D_MAT(materialEy,3)] * (Hx[INDEX3D_FIELDS(i,j,k)] - Hx[INDEX3D_FIELDS(i,j,k-1)]) - updatecoeffsE[INDEX2D_MAT(materialEy,1)] * (Hz[INDEX3D_FIELDS(i,j,k)] - Hz[INDEX3D_FIELDS(i-1,j,k)]) - updatecoeffsE[INDEX2D_MAT(materialEy,4)] * phi;
}
// Ez component
if ((NX != 1 || NY != 1) && i > 0 && i < NX && j > 0 && j < NY && k >= 0 && k < NZ) {
int materialEz = ID[INDEX4D_ID(2,i_ID,j_ID,k_ID)];
$REAL phi = 0;
for (int pole = 0; pole < MAXPOLES; pole++) {
phi = phi + updatecoeffsdispersive[INDEX2D_MATDISP(materialEz,pole*3)]$REALFUNC * Tz[INDEX4D_T(pole,i_T,j_T,k_T)]$REALFUNC;
Tz[INDEX4D_T(pole,i_T,j_T,k_T)] = updatecoeffsdispersive[INDEX2D_MATDISP(materialEz,1+(pole*3))] * Tz[INDEX4D_T(pole,i_T,j_T,k_T)] + updatecoeffsdispersive[INDEX2D_MATDISP(materialEz,2+(pole*3))] * Ez[INDEX3D_FIELDS(i,j,k)];
}
Ez[INDEX3D_FIELDS(i,j,k)] = updatecoeffsE[INDEX2D_MAT(materialEz,0)] * Ez[INDEX3D_FIELDS(i,j,k)] + updatecoeffsE[INDEX2D_MAT(materialEz,1)] * (Hy[INDEX3D_FIELDS(i,j,k)] - Hy[INDEX3D_FIELDS(i-1,j,k)]) - updatecoeffsE[INDEX2D_MAT(materialEz,2)] * (Hx[INDEX3D_FIELDS(i,j,k)] - Hx[INDEX3D_FIELDS(i,j-1,k)]) - updatecoeffsE[INDEX2D_MAT(materialEz,4)] * phi;
}
}
__global__ void update_electric_dispersive_B(int NX, int NY, int NZ, int MAXPOLES, const $COMPLEX* __restrict__ updatecoeffsdispersive, $COMPLEX *Tx, $COMPLEX *Ty, $COMPLEX *Tz, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez) {
// This function is part B which updates the dispersive field arrays when dispersive materials (with multiple poles) are present.
//
// Args:
// NX, NY, NZ: Number of cells of the model domain
// MAXPOLES: Maximum number of dispersive material poles present in model
// updatedispersivecoeffs, T, ID, E, H: Access to update coefficients, dispersive, ID and field component arrays
// Obtain the linear index corresponding to the current thread
int idx = blockIdx.x * blockDim.x + threadIdx.x;
// Convert the linear index to subscripts for 3D field arrays
int i = idx / ($NY_FIELDS * $NZ_FIELDS);
int j = (idx % ($NY_FIELDS * $NZ_FIELDS)) / $NZ_FIELDS;
int k = (idx % ($NY_FIELDS * $NZ_FIELDS)) % $NZ_FIELDS;
// Convert the linear index to subscripts for 4D material ID array
int i_ID = (idx % ($NX_ID * $NY_ID * $NZ_ID)) / ($NY_ID * $NZ_ID);
int j_ID = ((idx % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) / $NZ_ID;
int k_ID = ((idx % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) % $NZ_ID;
// Convert the linear index to subscripts for 4D dispersive array
int i_T = (idx % ($NX_T * $NY_T * $NZ_T)) / ($NY_T * $NZ_T);
int j_T = ((idx % ($NX_T * $NY_T * $NZ_T)) % ($NY_T * $NZ_T)) / $NZ_T;
int k_T = ((idx % ($NX_T * $NY_T * $NZ_T)) % ($NY_T * $NZ_T)) % $NZ_T;
// Ex component
if ((NY != 1 || NZ != 1) && i >= 0 && i < NX && j > 0 && j < NY && k > 0 && k < NZ) {
int materialEx = ID[INDEX4D_ID(0,i_ID,j_ID,k_ID)];
for (int pole = 0; pole < MAXPOLES; pole++) {
Tx[INDEX4D_T(pole,i_T,j_T,k_T)] = Tx[INDEX4D_T(pole,i_T,j_T,k_T)] - updatecoeffsdispersive[INDEX2D_MATDISP(materialEx,2+(pole*3))] * Ex[INDEX3D_FIELDS(i,j,k)];
}
}
// Ey component
if ((NX != 1 || NZ != 1) && i > 0 && i < NX && j >= 0 && j < NY && k > 0 && k < NZ) {
int materialEy = ID[INDEX4D_ID(1,i_ID,j_ID,k_ID)];
for (int pole = 0; pole < MAXPOLES; pole++) {
Ty[INDEX4D_T(pole,i_T,j_T,k_T)] = Ty[INDEX4D_T(pole,i_T,j_T,k_T)] - updatecoeffsdispersive[INDEX2D_MATDISP(materialEy,2+(pole*3))] * Ey[INDEX3D_FIELDS(i,j,k)];
}
}
// Ez component
if ((NX != 1 || NY != 1) && i > 0 && i < NX && j > 0 && j < NY && k >= 0 && k < NZ) {
int materialEz = ID[INDEX4D_ID(2,i_ID,j_ID,k_ID)];
for (int pole = 0; pole < MAXPOLES; pole++) {
Tz[INDEX4D_T(pole,i_T,j_T,k_T)] = Tz[INDEX4D_T(pole,i_T,j_T,k_T)] - updatecoeffsdispersive[INDEX2D_MATDISP(materialEz,2+(pole*3))] * Ez[INDEX3D_FIELDS(i,j,k)];
}
}
}
""")

查看文件

@@ -18,7 +18,7 @@
from string import Template
kernel_template_store_snapshot = Template("""
knl_template_store_snapshot = Template("""
// Macros for converting subscripts to linear index:
#define INDEX3D_FIELDS(i, j, k) (i)*($NY_FIELDS)*($NZ_FIELDS)+(j)*($NZ_FIELDS)+(k)

查看文件

@@ -18,7 +18,7 @@
from string import Template
kernel_template_sources = Template("""
knl_template_sources = Template("""
// Macros for converting subscripts to linear index:
#define INDEX2D_MAT(m, n) (m)*($NY_MATCOEFFS)+(n)

查看文件

@@ -0,0 +1,276 @@
// Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom
// Authors: Craig Warren, Antonis Giannopoulos, and John Hartley
//
// This file is part of gprMax.
//
// gprMax is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// gprMax is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with gprMax. If not, see <http://www.gnu.org/licenses/>.
{% block complex_header %}{% endblock complex_header %}
// Macros for converting subscripts to linear index:
#define IDX2D_MAT(m, n) (m)*({{NY_MATCOEFFS}})+(n)
#define IDX2D_MATDISP(m, n) (m)*({{NY_MATDISPCOEFFS}})+(n)
#define IDX3D_FIELDS(i, j, k) (i)*({{NY_FIELDS}})*({{NZ_FIELDS}})+(j)*({{NZ_FIELDS}})+(k)
#define IDX4D_ID(p, i, j, k) (p)*({{NX_ID}})*({{NY_ID}})*({{NZ_ID}})+(i)*({{NY_ID}})*({{NZ_ID}})+(j)*({{NZ_ID}})+(k)
#define IDX4D_T(p, i, j, k) (p)*({{NX_T}})*({{NY_T}})*({{NZ_T}})+(i)*({{NY_T}})*({{NZ_T}})+(j)*({{NZ_T}})+(k)
// Material coefficients (read-only) stored in constant memory of compute device
{% block constmem %}{% endblock constmem %}
///////////////////////////////////////////////
// Electric field updates - normal materials //
///////////////////////////////////////////////
{{KERNEL}} void update_electric(int NX,
int NY,
int NZ,{% filter indent(width=30) %}{% block electric_args %}{% endblock electric_args %}{% endfilter %}{
// This function updates electric field values.
//
// Args:
// NX, NY, NZ: Number of cells of the model domain
// ID, E, H: Access to ID and field component arrays
// Obtain the linear index corresponding to the current thread
{% block threadidx %}{% endblock threadidx %}
// Convert the linear index to subscripts for 3D field arrays
int i = idx / ({{NY_FIELDS}} * {{NZ_FIELDS}});
int j = (idx % ({{NY_FIELDS}} * {{NZ_FIELDS}})) / {{NZ_FIELDS}};
int k = (idx % ({{NY_FIELDS}} * {{NZ_FIELDS}})) % {{NZ_FIELDS}};
// Convert the linear index to subscripts for 4D material ID array
int i_ID = (idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) / ({{NY_ID}} * {{NZ_ID}});
int j_ID = ((idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) / {{NZ_ID}};
int k_ID = ((idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) % {{NZ_ID}};
// Ex component
if ((NY != 1 || NZ != 1) && i >= 0 && i < NX && j > 0 && j < NY && k > 0 && k < NZ) {
int materialEx = ID[IDX4D_ID(0,i_ID,j_ID,k_ID)];
Ex[IDX3D_FIELDS(i,j,k)] = updatecoeffsE[IDX2D_MAT(materialEx,0)] * Ex[IDX3D_FIELDS(i,j,k)] +
updatecoeffsE[IDX2D_MAT(materialEx,2)] * (Hz[IDX3D_FIELDS(i,j,k)] - Hz[IDX3D_FIELDS(i,j-1,k)]) -
updatecoeffsE[IDX2D_MAT(materialEx,3)] * (Hy[IDX3D_FIELDS(i,j,k)] - Hy[IDX3D_FIELDS(i,j,k-1)]);
}
// Ey component
if ((NX != 1 || NZ != 1) && i > 0 && i < NX && j >= 0 && j < NY && k > 0 && k < NZ) {
int materialEy = ID[IDX4D_ID(1,i_ID,j_ID,k_ID)];
Ey[IDX3D_FIELDS(i,j,k)] = updatecoeffsE[IDX2D_MAT(materialEy,0)] * Ey[IDX3D_FIELDS(i,j,k)] +
updatecoeffsE[IDX2D_MAT(materialEy,3)] * (Hx[IDX3D_FIELDS(i,j,k)] - Hx[IDX3D_FIELDS(i,j,k-1)]) -
updatecoeffsE[IDX2D_MAT(materialEy,1)] * (Hz[IDX3D_FIELDS(i,j,k)] - Hz[IDX3D_FIELDS(i-1,j,k)]);
}
// Ez component
if ((NX != 1 || NY != 1) && i > 0 && i < NX && j > 0 && j < NY && k >= 0 && k < NZ) {
int materialEz = ID[IDX4D_ID(2,i_ID,j_ID,k_ID)];
Ez[IDX3D_FIELDS(i,j,k)] = updatecoeffsE[IDX2D_MAT(materialEz,0)] * Ez[IDX3D_FIELDS(i,j,k)] +
updatecoeffsE[IDX2D_MAT(materialEz,1)] * (Hy[IDX3D_FIELDS(i,j,k)] - Hy[IDX3D_FIELDS(i-1,j,k)]) -
updatecoeffsE[IDX2D_MAT(materialEz,2)] * (Hx[IDX3D_FIELDS(i,j,k)] - Hx[IDX3D_FIELDS(i,j-1,k)]);
}
}
////////////////////////////
// Magnetic field updates //
////////////////////////////
{{KERNEL}} void update_magnetic(int NX,
int NY,
int NZ,{% filter indent(width=30) %}{% block magnetic_args %}{% endblock magnetic_args %}{% endfilter %}{
// This function updates magnetic field values.
//
// Args:
// NX, NY, NZ: Number of cells of the model domain
// ID, E, H: Access to ID and field component arrays
// Obtain the linear index corresponding to the current thread
{{self.threadidx()}}
// Convert the linear index to subscripts for 3D field arrays
int i = idx / ({{NY_FIELDS}} * {{NZ_FIELDS}});
int j = (idx % ({{NY_FIELDS}} * {{NZ_FIELDS}})) / {{NZ_FIELDS}};
int k = (idx % ({{NY_FIELDS}} * {{NZ_FIELDS}})) % {{NZ_FIELDS}};
// Convert the linear index to subscripts for 4D material ID array
int i_ID = (idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) / ({{NY_ID}} * {{NZ_ID}});
int j_ID = ((idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) / {{NZ_ID}};
int k_ID = ((idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) % {{NZ_ID}};
// Hx component
if (NX != 1 && i > 0 && i < NX && j >= 0 && j < NY && k >= 0 && k < NZ) {
int materialHx = ID[IDX4D_ID(3,i_ID,j_ID,k_ID)];
Hx[IDX3D_FIELDS(i,j,k)] = updatecoeffsH[IDX2D_MAT(materialHx,0)] * Hx[IDX3D_FIELDS(i,j,k)] -
updatecoeffsH[IDX2D_MAT(materialHx,2)] * (Ez[IDX3D_FIELDS(i,j+1,k)] - Ez[IDX3D_FIELDS(i,j,k)]) +
updatecoeffsH[IDX2D_MAT(materialHx,3)] * (Ey[IDX3D_FIELDS(i,j,k+1)] - Ey[IDX3D_FIELDS(i,j,k)]);
}
// Hy component
if (NY != 1 && i >= 0 && i < NX && j > 0 && j < NY && k >= 0 && k < NZ) {
int materialHy = ID[IDX4D_ID(4,i_ID,j_ID,k_ID)];
Hy[IDX3D_FIELDS(i,j,k)] = updatecoeffsH[IDX2D_MAT(materialHy,0)] * Hy[IDX3D_FIELDS(i,j,k)] -
updatecoeffsH[IDX2D_MAT(materialHy,3)] * (Ex[IDX3D_FIELDS(i,j,k+1)] - Ex[IDX3D_FIELDS(i,j,k)]) +
updatecoeffsH[IDX2D_MAT(materialHy,1)] * (Ez[IDX3D_FIELDS(i+1,j,k)] - Ez[IDX3D_FIELDS(i,j,k)]);
}
// Hz component
if (NZ != 1 && i >= 0 && i < NX && j >= 0 && j < NY && k > 0 && k < NZ) {
int materialHz = ID[IDX4D_ID(5,i_ID,j_ID,k_ID)];
Hz[IDX3D_FIELDS(i,j,k)] = updatecoeffsH[IDX2D_MAT(materialHz,0)] * Hz[IDX3D_FIELDS(i,j,k)] -
updatecoeffsH[IDX2D_MAT(materialHz,1)] * (Ey[IDX3D_FIELDS(i+1,j,k)] - Ey[IDX3D_FIELDS(i,j,k)]) +
updatecoeffsH[IDX2D_MAT(materialHz,2)] * (Ex[IDX3D_FIELDS(i,j+1,k)] - Ex[IDX3D_FIELDS(i,j,k)]);
}
}
///////////////////////////////////////////////////
// Electric field updates - dispersive materials //
///////////////////////////////////////////////////
{{KERNEL}} void update_electric_dispersive_A(int NX,
int NY,
int NZ,
int MAXPOLES,{% filter indent(width=43) %}{% block electric_dispersive_A_args %}{% endblock electric_dispersive_A_args %}{% endfilter %}{
// This function is part A of updates to electric field values when
// dispersive materials (with multiple poles) are present.
//
// Args:
// NX, NY, NZ: Number of cells of the model domain
// MAXPOLES: Maximum number of dispersive material poles present in model
// updatedispersivecoeffs, T, ID, E, H: Access to update coefficients, dispersive, ID and field component arrays
// Obtain the linear index corresponding to the current thread
{{self.threadidx()}}
// Convert the linear index to subscripts for 3D field arrays
int i = idx / ({{NY_FIELDS}} * {{NZ_FIELDS}});
int j = (idx % ({{NY_FIELDS}} * {{NZ_FIELDS}})) / {{NZ_FIELDS}};
int k = (idx % ({{NY_FIELDS}} * {{NZ_FIELDS}})) % {{NZ_FIELDS}};
// Convert the linear index to subscripts for 4D material ID array
int i_ID = (idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) / ({{NY_ID}} * {{NZ_ID}});
int j_ID = ((idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) / {{NZ_ID}};
int k_ID = ((idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) % {{NZ_ID}};
// Convert the linear index to subscripts for 4D dispersive array
int i_T = (idx % ({{NX_T}} * {{NY_T}} * {{NZ_T}})) / ({{NY_T}} * {{NZ_T}});
int j_T = ((idx % ({{NX_T}} * {{NY_T}} * {{NZ_T}})) % ({{NY_T}} * {{NZ_T}})) / {{NZ_T}};
int k_T = ((idx % ({{NX_T}} * {{NY_T}} * {{NZ_T}})) % ({{NY_T}} * {{NZ_T}})) % {{NZ_T}};
// Ex component
if ((NY != 1 || NZ != 1) && i >= 0 && i < NX && j > 0 && j < NY && k > 0 && k < NZ) {
int materialEx = ID[IDX4D_ID(0,i_ID,j_ID,k_ID)];
{{REAL}} phi = 0;
for (int pole = 0; pole < MAXPOLES; pole++) {
phi = phi + updatecoeffsdispersive[IDX2D_MATDISP(materialEx,pole*3)]{{REALFUNC}} * Tx[IDX4D_T(pole,i_T,j_T,k_T)]{{REALFUNC}};
Tx[IDX4D_T(pole,i_T,j_T,k_T)] = updatecoeffsdispersive[IDX2D_MATDISP(materialEx,1+(pole*3))] * Tx[IDX4D_T(pole,i_T,j_T,k_T)] +
updatecoeffsdispersive[IDX2D_MATDISP(materialEx,2+(pole*3))] * Ex[IDX3D_FIELDS(i,j,k)];
}
Ex[IDX3D_FIELDS(i,j,k)] = updatecoeffsE[IDX2D_MAT(materialEx,0)] * Ex[IDX3D_FIELDS(i,j,k)] +
updatecoeffsE[IDX2D_MAT(materialEx,2)] * (Hz[IDX3D_FIELDS(i,j,k)] - Hz[IDX3D_FIELDS(i,j-1,k)]) -
updatecoeffsE[IDX2D_MAT(materialEx,3)] * (Hy[IDX3D_FIELDS(i,j,k)] - Hy[IDX3D_FIELDS(i,j,k-1)]) -
updatecoeffsE[IDX2D_MAT(materialEx,4)] * phi;
}
// Ey component
if ((NX != 1 || NZ != 1) && i > 0 && i < NX && j >= 0 && j < NY && k > 0 && k < NZ) {
int materialEy = ID[IDX4D_ID(1,i_ID,j_ID,k_ID)];
{{REAL}} phi = 0;
for (int pole = 0; pole < MAXPOLES; pole++) {
phi = phi + updatecoeffsdispersive[IDX2D_MATDISP(materialEy,pole*3)]{{REALFUNC}} * Ty[IDX4D_T(pole,i_T,j_T,k_T)]{{REALFUNC}};
Ty[IDX4D_T(pole,i_T,j_T,k_T)] = updatecoeffsdispersive[IDX2D_MATDISP(materialEy,1+(pole*3))] * Ty[IDX4D_T(pole,i_T,j_T,k_T)] +
updatecoeffsdispersive[IDX2D_MATDISP(materialEy,2+(pole*3))] * Ey[IDX3D_FIELDS(i,j,k)];
}
Ey[IDX3D_FIELDS(i,j,k)] = updatecoeffsE[IDX2D_MAT(materialEy,0)] * Ey[IDX3D_FIELDS(i,j,k)] +
updatecoeffsE[IDX2D_MAT(materialEy,3)] * (Hx[IDX3D_FIELDS(i,j,k)] - Hx[IDX3D_FIELDS(i,j,k-1)]) -
updatecoeffsE[IDX2D_MAT(materialEy,1)] * (Hz[IDX3D_FIELDS(i,j,k)] - Hz[IDX3D_FIELDS(i-1,j,k)]) -
updatecoeffsE[IDX2D_MAT(materialEy,4)] * phi;
}
// Ez component
if ((NX != 1 || NY != 1) && i > 0 && i < NX && j > 0 && j < NY && k >= 0 && k < NZ) {
int materialEz = ID[IDX4D_ID(2,i_ID,j_ID,k_ID)];
{{REAL}} phi = 0;
for (int pole = 0; pole < MAXPOLES; pole++) {
phi = phi + updatecoeffsdispersive[IDX2D_MATDISP(materialEz,pole*3)]{{REALFUNC}} * Tz[IDX4D_T(pole,i_T,j_T,k_T)]{{REALFUNC}};
Tz[IDX4D_T(pole,i_T,j_T,k_T)] = updatecoeffsdispersive[IDX2D_MATDISP(materialEz,1+(pole*3))] * Tz[IDX4D_T(pole,i_T,j_T,k_T)] +
updatecoeffsdispersive[IDX2D_MATDISP(materialEz,2+(pole*3))] * Ez[IDX3D_FIELDS(i,j,k)];
}
Ez[IDX3D_FIELDS(i,j,k)] = updatecoeffsE[IDX2D_MAT(materialEz,0)] * Ez[IDX3D_FIELDS(i,j,k)] +
updatecoeffsE[IDX2D_MAT(materialEz,1)] * (Hy[IDX3D_FIELDS(i,j,k)] - Hy[IDX3D_FIELDS(i-1,j,k)]) -
updatecoeffsE[IDX2D_MAT(materialEz,2)] * (Hx[IDX3D_FIELDS(i,j,k)] - Hx[IDX3D_FIELDS(i,j-1,k)]) -
updatecoeffsE[IDX2D_MAT(materialEz,4)] * phi;
}
}
{{KERNEL}} void update_electric_dispersive_B(int NX,
int NY,
int NZ,
int MAXPOLES,{% filter indent(width=43) %}{% block electric_dispersive_B_args %}{% endblock electric_dispersive_B_args %}{% endfilter %}{
// This function is part B which updates the dispersive field arrays when
// dispersive materials (with multiple poles) are present.
//
// Args:
// NX, NY, NZ: Number of cells of the model domain
// MAXPOLES: Maximum number of dispersive material poles present in model
// updatedispersivecoeffs, T, ID, E, H: Access to update coefficients, dispersive, ID and field component arrays
// Obtain the linear index corresponding to the current thread
{{self.threadidx()}}
// Convert the linear index to subscripts for 3D field arrays
int i = idx / ({{NY_FIELDS}} * {{NZ_FIELDS}});
int j = (idx % ({{NY_FIELDS}} * {{NZ_FIELDS}})) / {{NZ_FIELDS}};
int k = (idx % ({{NY_FIELDS}} * {{NZ_FIELDS}})) % {{NZ_FIELDS}};
// Convert the linear index to subscripts for 4D material ID array
int i_ID = (idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) / ({{NY_ID}} * {{NZ_ID}});
int j_ID = ((idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) / {{NZ_ID}};
int k_ID = ((idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) % {{NZ_ID}};
// Convert the linear index to subscripts for 4D dispersive array
int i_T = (idx % ({{NX_T}} * {{NY_T}} * {{NZ_T}})) / ({{NY_T}} * {{NZ_T}});
int j_T = ((idx % ({{NX_T}} * {{NY_T}} * {{NZ_T}})) % ({{NY_T}} * {{NZ_T}})) / {{NZ_T}};
int k_T = ((idx % ({{NX_T}} * {{NY_T}} * {{NZ_T}})) % ({{NY_T}} * {{NZ_T}})) % {{NZ_T}};
// Ex component
if ((NY != 1 || NZ != 1) && i >= 0 && i < NX && j > 0 && j < NY && k > 0 && k < NZ) {
int materialEx = ID[IDX4D_ID(0,i_ID,j_ID,k_ID)];
for (int pole = 0; pole < MAXPOLES; pole++) {
Tx[IDX4D_T(pole,i_T,j_T,k_T)] = Tx[IDX4D_T(pole,i_T,j_T,k_T)] -
updatecoeffsdispersive[IDX2D_MATDISP(materialEx,2+(pole*3))] * Ex[IDX3D_FIELDS(i,j,k)];
}
}
// Ey component
if ((NX != 1 || NZ != 1) && i > 0 && i < NX && j >= 0 && j < NY && k > 0 && k < NZ) {
int materialEy = ID[IDX4D_ID(1,i_ID,j_ID,k_ID)];
for (int pole = 0; pole < MAXPOLES; pole++) {
Ty[IDX4D_T(pole,i_T,j_T,k_T)] = Ty[IDX4D_T(pole,i_T,j_T,k_T)] -
updatecoeffsdispersive[IDX2D_MATDISP(materialEy,2+(pole*3))] * Ey[IDX3D_FIELDS(i,j,k)];
}
}
// Ez component
if ((NX != 1 || NY != 1) && i > 0 && i < NX && j > 0 && j < NY && k >= 0 && k < NZ) {
int materialEz = ID[IDX4D_ID(2,i_ID,j_ID,k_ID)];
for (int pole = 0; pole < MAXPOLES; pole++) {
Tz[IDX4D_T(pole,i_T,j_T,k_T)] = Tz[IDX4D_T(pole,i_T,j_T,k_T)] -
updatecoeffsdispersive[IDX2D_MATDISP(materialEz,2+(pole*3))] * Ez[IDX3D_FIELDS(i,j,k)];
}
}
}

查看文件

@@ -0,0 +1,65 @@
{% extends "fields_updates_base.tmpl" %}
{% block complex_header %}
#include <pycuda-complex.hpp>
{% endblock complex_header %}
{% block constmem %}
__device__ __constant__ {{REAL}} updatecoeffsE[{{N_updatecoeffsE}}];
__device__ __constant__ {{REAL}} updatecoeffsH[{{N_updatecoeffsH}}];
{% endblock constmem %}
{% block threadidx %}
int idx = blockIdx.x * blockDim.x + threadIdx.x;
{% endblock threadidx %}
{% block electric_args %}
const unsigned int* __restrict__ ID,
{{REAL}} *Ex,
{{REAL}} *Ey,
{{REAL}} *Ez,
const {{REAL}}* __restrict__ Hx,
const {{REAL}}* __restrict__ Hy,
const {{REAL}}* __restrict__ Hz)
{% endblock electric_args %}
{% block magnetic_args %}
const unsigned int* __restrict__ ID,
{{REAL}} *Hx,
{{REAL}} *Hy,
{{REAL}} *Hz,
const {{REAL}}* __restrict__ Ex,
const {{REAL}}* __restrict__ Ey,
const {{REAL}}* __restrict__ Ez)
{% endblock magnetic_args %}
{% block electric_dispersive_A_args %}
const {{COMPLEX}}* __restrict__ updatecoeffsdispersive,
{{COMPLEX}} *Tx,
{{COMPLEX}} *Ty,
{{COMPLEX}} *Tz,
const unsigned int* __restrict__ ID,
{{REAL}} *Ex,
{{REAL}} *Ey,
{{REAL}} *Ez,
const {{REAL}}* __restrict__ Hx,
const {{REAL}}* __restrict__ Hy,
const {{REAL}}* __restrict__ Hz)
{% endblock electric_dispersive_A_args %}
{% block electric_dispersive_B_args %}
const {{COMPLEX}}* __restrict__ updatecoeffsdispersive,
{{COMPLEX}} *Tx,
{{COMPLEX}} *Ty,
{{COMPLEX}} *Tz,
const unsigned int* __restrict__ ID,
const {{REAL}}* __restrict__ Ex,
const {{REAL}}* __restrict__ Ey,
const {{REAL}}* __restrict__ Ez)
{% endblock electric_dispersive_B_args %}

查看文件

@@ -0,0 +1,77 @@
{% extends "fields_updates_base.tmpl" %}
{% block complex_header %}
#include <pyopencl-complex.h>
{% endblock complex_header %}
{% block constmem %}
__constant {{REAL}} updatecoeffsE[{{N_updatecoeffsE}}] =
{
{% for i in updatecoeffsE %}
{{i}},
{% endfor %}
};
__constant {{REAL}} updatecoeffsH[{{N_updatecoeffsH}}] =
{
{% for i in updatecoeffsH %}
{{i}},
{% endfor %}
};
{% endblock constmem %}
{% block threadidx %}
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) +
get_global_id(1) * get_global_size(0) + get_global_id(0);
{% endblock threadidx %}
{% block electric_args %}
__global const unsigned int* restrict ID,
__global {{REAL}} *Ex,
__global {{REAL}} *Ey,
__global {{REAL}} *Ez,
__global const {{REAL}} * restrict Hx,
__global const {{REAL}} * restrict Hy,
__global const {{REAL}} * restrict Hz)
{% endblock electric_args %}
{% block magnetic_args %}
__global const unsigned int* restrict ID,
__global {{REAL}} *Hx,
__global {{REAL}} *Hy,
__global {{REAL}} *Hz,
__global const {{REAL}}* restrict Ex,
__global const {{REAL}}* restrict Ey,
__global const {{REAL}}* restrict Ez)
{% endblock magnetic_args %}
{% block electric_dispersive_A_args %}
__global const {{COMPLEX}}* restrict updatecoeffsdispersive,
__global {{COMPLEX}} *Tx,
__global {{COMPLEX}} *Ty,
__global {{COMPLEX}} *Tz,
__global const unsigned int* restrict ID,
__global {{REAL}} *Ex,
__global {{REAL}} *Ey,
__global {{REAL}} *Ez,
__global const {{REAL}}* restrict Hx,
__global const {{REAL}}* restrict Hy,
__global const {{REAL}}* restrict Hz)
{% endblock electric_dispersive_A_args %}
{% block electric_dispersive_B_args %}
__global const {{COMPLEX}}* restrict updatecoeffsdispersive,
__global {{COMPLEX}} *Tx,
__global {{COMPLEX}} *Ty,
__global {{COMPLEX}} *Tz,
__global const unsigned int* restrict ID,
__global const {{REAL}}* restrict Ex,
__global const {{REAL}}* restrict Ey,
__global const {{REAL}}* restrict Ez)
{% endblock electric_dispersive_B_args %}

文件差异内容过多而无法显示 加载差异

查看文件

@@ -0,0 +1,62 @@
{% extends "pml_updates_electric_HORIPML_base.tmpl" %}
{% block constmem %}
__device__ __constant__ {{REAL}} updatecoeffsE[{{N_updatecoeffsE}}];
{% endblock constmem %}
{% block threadidx %}
int idx = blockIdx.x * blockDim.x + threadIdx.x;
{% endblock threadidx %}
{% block x_args %}
const unsigned int* __restrict__ ID,
const {{REAL}}* __restrict__ Ex,
{{REAL}} *Ey,
{{REAL}} *Ez,
const {{REAL}}* __restrict__ Hx,
const {{REAL}}* __restrict__ Hy,
const {{REAL}}* __restrict__ Hz,
{{REAL}} *PHI1,
{{REAL}} *PHI2,
const {{REAL}}* __restrict__ RA,
const {{REAL}}* __restrict__ RB,
const {{REAL}}* __restrict__ RE,
const {{REAL}}* __restrict__ RF,
{% endblock x_args %}
{% block y_args %}
const unsigned int* __restrict__ ID,
{{REAL}} *Ex,
const {{REAL}}* __restrict__ Ey,
{{REAL}} *Ez,
const {{REAL}}* __restrict__ Hx,
const {{REAL}}* __restrict__ Hy,
const {{REAL}}* __restrict__ Hz,
{{REAL}} *PHI1,
{{REAL}} *PHI2,
const {{REAL}}* __restrict__ RA,
const {{REAL}}* __restrict__ RB,
const {{REAL}}* __restrict__ RE,
const {{REAL}}* __restrict__ RF,
{% endblock y_args %}
{% block z_args %}
const unsigned int* __restrict__ ID,
{{REAL}} *Ex,
{{REAL}} *Ey,
const {{REAL}}* __restrict__ Ez,
const {{REAL}}* __restrict__ Hx,
const {{REAL}}* __restrict__ Hy,
const {{REAL}}* __restrict__ Hz,
{{REAL}} *PHI1,
{{REAL}} *PHI2,
const {{REAL}}* __restrict__ RA,
const {{REAL}}* __restrict__ RB,
const {{REAL}}* __restrict__ RE,
const {{REAL}}* __restrict__ RF,
{% endblock z_args %}

查看文件

@@ -0,0 +1,68 @@
{% extends "pml_updates_electric_HORIPML_base.tmpl" %}
{% block constmem %}
__constant {{REAL}} updatecoeffsE[{{N_updatecoeffsE}}] =
{
{% for i in updatecoeffsE %}
{{i}},
{% endfor %}
};
{% endblock constmem %}
{% block threadidx %}
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) +
get_global_id(1) * get_global_size(0) + get_global_id(0);
{% endblock threadidx %}
{% block x_args %}
__global const unsigned int* restrict ID,
__global const {{REAL}}* restrict Ex,
__global {{REAL}} *Ey,
__global {{REAL}} *Ez,
__global const {{REAL}}* restrict Hx,
__global const {{REAL}}* restrict Hy,
__global const {{REAL}}* restrict Hz,
__global {{REAL}} *PHI1,
__global {{REAL}} *PHI2,
__global const {{REAL}}* restrict RA,
__global const {{REAL}}* restrict RB,
__global const {{REAL}}* restrict RE,
__global const {{REAL}}* restrict RF,
{% endblock x_args %}
{% block y_args %}
__global const unsigned int* restrict ID,
__global {{REAL}} *Ex,
__global const {{REAL}}* restrict Ey,
__global {{REAL}} *Ez,
__global const {{REAL}}* restrict Hx,
__global const {{REAL}}* restrict Hy,
__global const {{REAL}}* restrict Hz,
__global {{REAL}} *PHI1,
__global {{REAL}} *PHI2,
__global const {{REAL}}* restrict RA,
__global const {{REAL}}* restrict RB,
__global const {{REAL}}* restrict RE,
__global const {{REAL}}* restrict RF,
{% endblock y_args %}
{% block z_args %}
__global const unsigned int* restrict ID,
__global {{REAL}} *Ex,
__global {{REAL}} *Ey,
__global const {{REAL}}* restrict Ez,
__global const {{REAL}}* restrict Hx,
__global const {{REAL}}* restrict Hy,
__global const {{REAL}}* restrict Hz,
__global {{REAL}} *PHI1,
__global {{REAL}} *PHI2,
__global const {{REAL}}* restrict RA,
__global const {{REAL}}* restrict RB,
__global const {{REAL}}* restrict RE,
__global const {{REAL}}* restrict RF,
{% endblock z_args %}

文件差异内容过多而无法显示 加载差异

查看文件

@@ -0,0 +1,62 @@
{% extends "pml_updates_magnetic_HORIPML_base.tmpl" %}
{% block constmem %}
__device__ __constant__ {{REAL}} updatecoeffsH[{{N_updatecoeffsH}}];
{% endblock constmem %}
{% block threadidx %}
int idx = blockIdx.x * blockDim.x + threadIdx.x;
{% endblock threadidx %}
{% block x_args %}
const unsigned int* __restrict__ ID,
const {{REAL}}* __restrict__ Ex,
const {{REAL}}* __restrict__ Ey,
const {{REAL}}* __restrict__ Ez,
const {{REAL}}* __restrict__ Hx,
{{REAL}} *Hy,
{{REAL}} *Hz,
{{REAL}} *PHI1,
{{REAL}} *PHI2,
const {{REAL}}* __restrict__ RA,
const {{REAL}}* __restrict__ RB,
const {{REAL}}* __restrict__ RE,
const {{REAL}}* __restrict__ RF,
{% endblock x_args %}
{% block y_args %}
const unsigned int* __restrict__ ID,
const {{REAL}}* __restrict__ Ex,
const {{REAL}}* __restrict__ Ey,
const {{REAL}}* __restrict__ Ez,
{{REAL}} *Hx,
const {{REAL}}* __restrict__ Hy,
{{REAL}} *Hz,
{{REAL}} *PHI1,
{{REAL}} *PHI2,
const {{REAL}}* __restrict__ RA,
const {{REAL}}* __restrict__ RB,
const {{REAL}}* __restrict__ RE,
const {{REAL}}* __restrict__ RF,
{% endblock y_args %}
{% block z_args %}
const unsigned int* __restrict__ ID,
const {{REAL}}* __restrict__ Ex,
const {{REAL}}* __restrict__ Ey,
const {{REAL}}* __restrict__ Ez,
{{REAL}} *Hx,
{{REAL}} *Hy,
const {{REAL}}* __restrict__ Hz,
{{REAL}} *PHI1,
{{REAL}} *PHI2,
const {{REAL}}* __restrict__ RA,
const {{REAL}}* __restrict__ RB,
const {{REAL}}* __restrict__ RE,
const {{REAL}}* __restrict__ RF,
{% endblock z_args %}

查看文件

@@ -0,0 +1,68 @@
{% extends "pml_updates_magnetic_HORIPML_base.tmpl" %}
{% block constmem %}
__constant {{REAL}} updatecoeffsH[{{N_updatecoeffsH}}] =
{
{% for i in updatecoeffsH %}
{{i}},
{% endfor %}
};
{% endblock constmem %}
{% block threadidx %}
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) +
get_global_id(1) * get_global_size(0) + get_global_id(0);
{% endblock threadidx %}
{% block x_args %}
__global const unsigned int* restrict ID,
__global const {{REAL}}* restrict Ex,
__global const {{REAL}}* restrict Ey,
__global const {{REAL}}* restrict Ez,
__global const {{REAL}}* restrict Hx,
__global {{REAL}} *Hy,
__global {{REAL}} *Hz,
__global {{REAL}} *PHI1,
__global {{REAL}} *PHI2,
__global const {{REAL}}* restrict RA,
__global const {{REAL}}* restrict RB,
__global const {{REAL}}* restrict RE,
__global const {{REAL}}* restrict RF,
{% endblock x_args %}
{% block y_args %}
__global const unsigned int* restrict ID,
__global const {{REAL}}* restrict Ex,
__global const {{REAL}}* restrict Ey,
__global const {{REAL}}* restrict Ez,
__global {{REAL}} *Hx,
__global const {{REAL}}* restrict Hy,
__global {{REAL}} *Hz,
__global {{REAL}} *PHI1,
__global {{REAL}} *PHI2,
__global const {{REAL}}* restrict RA,
__global const {{REAL}}* restrict RB,
__global const {{REAL}}* restrict RE,
__global const {{REAL}}* restrict RF,
{% endblock y_args %}
{% block z_args %}
__global const unsigned int* restrict ID,
__global const {{REAL}}* restrict Ex,
__global const {{REAL}}* restrict Ey,
__global const {{REAL}}* restrict Ez,
__global {{REAL}} *Hx,
__global {{REAL}} *Hy,
__global const {{REAL}}* restrict Hz,
__global {{REAL}} *PHI1,
__global {{REAL}} *PHI2,
__global const {{REAL}}* restrict RA,
__global const {{REAL}}* restrict RB,
__global const {{REAL}}* restrict RE,
__global const {{REAL}}* restrict RF,
{% endblock z_args %}

查看文件

@@ -0,0 +1,90 @@
// Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom
// Authors: Craig Warren, Antonis Giannopoulos, and John Hartley
//
// This file is part of gprMax.
//
// gprMax is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// gprMax is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with gprMax. If not, see <http://www.gnu.org/licenses/>.
// Macros for converting subscripts to linear index:
#define IDX3D_FIELDS(i, j, k) (i)*({{NY_FIELDS}})*({{NZ_FIELDS}})+(j)*({{NZ_FIELDS}})+(k)
#define IDX4D_SNAPS(p, i, j, k) (p)*({{NX_SNAPS}})*({{NY_SNAPS}})*({{NZ_SNAPS}})+(i)*({{NY_SNAPS}})*({{NZ_SNAPS}})+(j)*({{NZ_SNAPS}})+(k)
////////////////////
// Store snapshot //
////////////////////
{{KERNEL}} void store_snapshot(int p,
int xs,
int xf,
int ys,
int yf,
int zs,
int zf,
int dx,
int dy,
int dz,{% filter indent(width=29) %}{% block snap_args %}{% endblock snap_args %}{% endfilter %}{
// This function stores field values for a snapshot.
//
// Args:
// p: Snapshot number
// xs, xf, ys, yf, xs, xf: Start and finish cell coordinates for snapshot
// dx, dy, dz: Sampling interval in cell coordinates for snapshot
// E, H: Access to field component arrays
// snapEx, snapEy, snapEz, snapHx, snapHy, snapHz: Access to arrays to store snapshots
// Obtain the linear index corresponding to the current thread
{% block threadidx %}{% endblock threadidx %}
// Convert the linear index to subscripts for 4D SNAPS array
int i = (idx % ({{NX_SNAPS}} * {{NY_SNAPS}} * {{NZ_SNAPS}})) / ({{NY_SNAPS}} * {{NZ_SNAPS}});
int j = ((idx % ({{NX_SNAPS}} * {{NY_SNAPS}} * {{NZ_SNAPS}})) % ({{NY_SNAPS}} * {{NZ_SNAPS}})) / {{NZ_SNAPS}};
int k = ((idx % ({{NX_SNAPS}} * {{NY_SNAPS}} * {{NZ_SNAPS}})) % ({{NY_SNAPS}} * {{NZ_SNAPS}})) % {{NZ_SNAPS}};
// Subscripts for field arrays
int ii, jj, kk;
if (i >= xs && i < xf && j >= ys && j < yf && k >= zs && k < zf) {
// Increment subscripts for field array to account for spatial sampling of snapshot
ii = (xs + i) * dx;
jj = (ys + j) * dy;
kk = (zs + k) * dz;
// The electric field component value at a point comes from an average of
// the 4 electric field component values in that cell
snapEx[IDX4D_SNAPS(p,i,j,k)] = (Ex[IDX3D_FIELDS(ii,jj,kk)] +
Ex[IDX3D_FIELDS(ii,jj+1,kk)] +
Ex[IDX3D_FIELDS(ii,jj,kk+1)] +
Ex[IDX3D_FIELDS(ii,jj+1,kk+1)]) / 4;
snapEy[IDX4D_SNAPS(p,i,j,k)] = (Ey[IDX3D_FIELDS(ii,jj,kk)] +
Ey[IDX3D_FIELDS(ii+1,jj,kk)] +
Ey[IDX3D_FIELDS(ii,jj,kk+1)] +
Ey[IDX3D_FIELDS(ii+1,jj,kk+1)]) / 4;
snapEz[IDX4D_SNAPS(p,i,j,k)] = (Ez[IDX3D_FIELDS(ii,jj,kk)] +
Ez[IDX3D_FIELDS(ii+1,jj,kk)] +
Ez[IDX3D_FIELDS(ii,jj+1,kk)] +
Ez[IDX3D_FIELDS(ii+1,jj+1,kk)]) / 4;
// The magnetic field component value at a point comes from average of
// 2 magnetic field component values in that cell and the following cell
snapHx[IDX4D_SNAPS(p,i,j,k)] = (Hx[IDX3D_FIELDS(ii,jj,kk)] +
Hx[IDX3D_FIELDS(ii+1,jj,kk)]) / 2;
snapHy[IDX4D_SNAPS(p,i,j,k)] = (Hy[IDX3D_FIELDS(ii,jj,kk)] +
Hy[IDX3D_FIELDS(ii,jj+1,kk)]) / 2;
snapHz[IDX4D_SNAPS(p,i,j,k)] = (Hz[IDX3D_FIELDS(ii,jj,kk)] +
Hz[IDX3D_FIELDS(ii,jj,kk+1)]) / 2;
}
}

查看文件

@@ -0,0 +1,22 @@
{% extends "snapshots_base.tmpl" %}
{% block threadidx %}
int idx = blockIdx.x * blockDim.x + threadIdx.x;
{% endblock threadidx %}
{% block snap_args %}
const {{REAL}}* __restrict__ Ex,
const {{REAL}}* __restrict__ Ey,
const {{REAL}}* __restrict__ Ez,
const {{REAL}}* __restrict__ Hx,
const {{REAL}}* __restrict__ Hy,
const {{REAL}}* __restrict__ Hz,
{{REAL}} *snapEx,
{{REAL}} *snapEy,
{{REAL}} *snapEz,
{{REAL}} *snapHx,
{{REAL}} *snapHy,
{{REAL}} *snapHz
{% endblock snap_args %}

查看文件

@@ -0,0 +1,23 @@
{% extends "snapshots_base.tmpl" %}
{% block threadidx %}
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) +
get_global_id(1) * get_global_size(0) + get_global_id(0);
{% endblock threadidx %}
{% block snap_args %}
__global const {{REAL}}* restrict Ex,
__global const {{REAL}}* restrict Ey,
__global const {{REAL}}* restrict Ez,
__global const {{REAL}}* restrict Hx,
__global const {{REAL}}* restrict Hy,
__global const {{REAL}}* restrict Hz,
__global {{REAL}} *snapEx,
__global {{REAL}} *snapEy,
__global {{REAL}} *snapEz,
__global {{REAL}} *snapHx,
__global {{REAL}} *snapHy,
__global {{REAL}} *snapHz)
{% endblock snap_args %}

查看文件

@@ -0,0 +1,217 @@
// Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom
// Authors: Craig Warren, Antonis Giannopoulos, and John Hartley
//
// This file is part of gprMax.
//
// gprMax is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// gprMax is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with gprMax. If not, see <http://www.gnu.org/licenses/>.
// Macros for converting subscripts to linear index:
#define IDX2D_MAT(m, n) (m)*({{NY_MATCOEFFS}})+(n)
#define IDX2D_SRCINFO(m, n) (m)*{{NY_SRCINFO}}+(n)
#define IDX2D_SRCWAVES(m, n) (m)*({{NY_SRCWAVES}})+(n)
#define IDX3D_FIELDS(i, j, k) (i)*({{NY_FIELDS}})*({{NZ_FIELDS}})+(j)*({{NZ_FIELDS}})+(k)
#define IDX4D_ID(p, i, j, k) (p)*({{NX_ID}})*({{NY_ID}})*({{NZ_ID}})+(i)*({{NY_ID}})*({{NZ_ID}})+(j)*({{NZ_ID}})+(k)
// Material coefficients (read-only) stored in constant memory of compute device
{% block constmem %}{% endblock constmem %}
///////////////////////////////////////////
// Hertzian dipole electric field update //
///////////////////////////////////////////
{{KERNEL}} void update_hertzian_dipole(int NHERTZDIPOLE,
int iteration,
{{REAL}} dx,
{{REAL}} dy,
{{REAL}} dz,{% filter indent(width=37) %}{% block electric_source_args %}{% endblock electric_source_args %}{% endfilter %}{
// This function updates electric field values for Hertzian dipole sources.
//
// Args:
// NHERTZDIPOLE: Total number of Hertzian dipoles in the model
// iteration: Iteration number of simulation
// dx, dy, dz: Spatial discretisations
// srcinfo1: Source cell coordinates and polarisation information
// srcinfo2: Other source information, e.g. length, resistance etc...
// srcwaveforms: Source waveform values
// ID, E: Access to ID and field component arrays
// Obtain the linear index corresponding to the current thread and use for each receiver
{% block threadidx %}{% endblock threadidx %}
if (src < NHERTZDIPOLE) {
{{REAL}} dl;
int i, j, k, polarisation;
i = srcinfo1[IDX2D_SRCINFO(src,0)];
j = srcinfo1[IDX2D_SRCINFO(src,1)];
k = srcinfo1[IDX2D_SRCINFO(src,2)];
polarisation = srcinfo1[IDX2D_SRCINFO(src,3)];
dl = srcinfo2[src];
// 'x' polarised source
if (polarisation == 0) {
int materialEx = ID[IDX4D_ID(0,i,j,k)];
Ex[IDX3D_FIELDS(i,j,k)] = Ex[IDX3D_FIELDS(i,j,k)] - updatecoeffsE[IDX2D_MAT(materialEx,4)] *
srcwaveforms[IDX2D_SRCWAVES(src,iteration)] * dl * (1 / (dx * dy * dz));
}
// 'y' polarised source
else if (polarisation == 1) {
int materialEy = ID[IDX4D_ID(1,i,j,k)];
Ey[IDX3D_FIELDS(i,j,k)] = Ey[IDX3D_FIELDS(i,j,k)] - updatecoeffsE[IDX2D_MAT(materialEy,4)] *
srcwaveforms[IDX2D_SRCWAVES(src,iteration)] * dl * (1 / (dx * dy * dz));
}
// 'z' polarised source
else if (polarisation == 2) {
int materialEz = ID[IDX4D_ID(2,i,j,k)];
Ez[IDX3D_FIELDS(i,j,k)] = Ez[IDX3D_FIELDS(i,j,k)] - updatecoeffsE[IDX2D_MAT(materialEz,4)] *
srcwaveforms[IDX2D_SRCWAVES(src,iteration)] * dl * (1 / (dx * dy * dz));
}
}
}
///////////////////////////////////////////
// Magnetic dipole magnetic field update //
///////////////////////////////////////////
{{KERNEL}} void update_magnetic_dipole(int NMAGDIPOLE,
int iteration,
{{REAL}} dx,
{{REAL}} dy,
{{REAL}} dz,{% filter indent(width=37) %}{% block magnetic_source_args %}{% endblock magnetic_source_args %}{% endfilter %}{
// This function updates magnetic field values for magnetic dipole sources.
//
// Args:
// NMAGDIPOLE: Total number of magnetic dipoles in the model
// iteration: Iteration number of simulation
// dx, dy, dz: Spatial discretisations
// srcinfo1: Source cell coordinates and polarisation information
// srcinfo2: Other source information, e.g. length, resistance etc...
// srcwaveforms: Source waveform values
// ID, H: Access to ID and field component arrays
// Obtain the linear index corresponding to the current thread and use for each receiver
{{self.threadidx()}}
if (src < NMAGDIPOLE) {
int i, j, k, polarisation;
i = srcinfo1[IDX2D_SRCINFO(src,0)];
j = srcinfo1[IDX2D_SRCINFO(src,1)];
k = srcinfo1[IDX2D_SRCINFO(src,2)];
polarisation = srcinfo1[IDX2D_SRCINFO(src,3)];
// 'x' polarised source
if (polarisation == 0) {
int materialHx = ID[IDX4D_ID(3,i,j,k)];
Hx[IDX3D_FIELDS(i,j,k)] = Hx[IDX3D_FIELDS(i,j,k)] - updatecoeffsH[IDX2D_MAT(materialHx,4)] *
srcwaveforms[IDX2D_SRCWAVES(src,iteration)] * (1 / (dx * dy * dz));
}
// 'y' polarised source
else if (polarisation == 1) {
int materialHy = ID[IDX4D_ID(4,i,j,k)];
Hy[IDX3D_FIELDS(i,j,k)] = Hy[IDX3D_FIELDS(i,j,k)] - updatecoeffsH[IDX2D_MAT(materialHy,4)] *
srcwaveforms[IDX2D_SRCWAVES(src,iteration)] * (1 / (dx * dy * dz));
}
// 'z' polarised source
else if (polarisation == 2) {
int materialHz = ID[IDX4D_ID(5,i,j,k)];
Hz[IDX3D_FIELDS(i,j,k)] = Hz[IDX3D_FIELDS(i,j,k)] - updatecoeffsH[IDX2D_MAT(materialHz,4)] *
srcwaveforms[IDX2D_SRCWAVES(src,iteration)] * (1 / (dx * dy * dz));
}
}
}
//////////////////////////////////////////
// Voltage source electric field update //
//////////////////////////////////////////
{{KERNEL}} void update_voltage_source(int NVOLTSRC,
int iteration,
{{REAL}} dx,
{{REAL}} dy,
{{REAL}} dz,{% filter indent(width=36) %}{{self.electric_source_args()}}{% endfilter %}{
// This function updates electric field values for voltage sources.
//
// Args:
// NVOLTSRC: Total number of voltage sources in the model
// iteration: Iteration number of simulation
// dx, dy, dz: Spatial discretisations
// srcinfo1: Source cell coordinates and polarisation information
// srcinfo2: Other source information, e.g. length, resistance etc...
// srcwaveforms: Source waveform values
// ID, E: Access to ID and field component arrays
// Obtain the linear index corresponding to the current thread and use for each receiver
{{self.threadidx()}}
if (src < NVOLTSRC) {
{{REAL}} resistance;
int i, j, k, polarisation;
i = srcinfo1[IDX2D_SRCINFO(src,0)];
j = srcinfo1[IDX2D_SRCINFO(src,1)];
k = srcinfo1[IDX2D_SRCINFO(src,2)];
polarisation = srcinfo1[IDX2D_SRCINFO(src,3)];
resistance = srcinfo2[src];
// 'x' polarised source
if (polarisation == 0) {
if (resistance != 0) {
int materialEx = ID[IDX4D_ID(0,i,j,k)];
Ex[IDX3D_FIELDS(i,j,k)] = Ex[IDX3D_FIELDS(i,j,k)] - updatecoeffsE[IDX2D_MAT(materialEx,4)] *
srcwaveforms[IDX2D_SRCWAVES(src,iteration)] * (1 / (resistance * dy * dz));
}
else {
Ex[IDX3D_FIELDS(i,j,k)] = -1 * srcwaveforms[IDX2D_SRCWAVES(src,iteration)] / dx;
}
}
// 'y' polarised source
else if (polarisation == 1) {
if (resistance != 0) {
int materialEy = ID[IDX4D_ID(1,i,j,k)];
Ey[IDX3D_FIELDS(i,j,k)] = Ey[IDX3D_FIELDS(i,j,k)] - updatecoeffsE[IDX2D_MAT(materialEy,4)] *
srcwaveforms[IDX2D_SRCWAVES(src,iteration)] * (1 / (resistance * dx * dz));
}
else {
Ey[IDX3D_FIELDS(i,j,k)] = -1 * srcwaveforms[IDX2D_SRCWAVES(src,iteration)] / dy;
}
}
// 'z' polarised source
else if (polarisation == 2) {
if (resistance != 0) {
int materialEz = ID[IDX4D_ID(2,i,j,k)];
Ez[IDX3D_FIELDS(i,j,k)] = Ez[IDX3D_FIELDS(i,j,k)] - updatecoeffsE[IDX2D_MAT(materialEz,4)] *
srcwaveforms[IDX2D_SRCWAVES(src,iteration)] * (1 / (resistance * dx * dy));
}
else {
Ez[IDX3D_FIELDS(i,j,k)] = -1 * srcwaveforms[IDX2D_SRCWAVES(src,iteration)] / dz;
}
}
}
}

查看文件

@@ -0,0 +1,34 @@
{% extends "source_updates_base.tmpl" %}
{% block constmem %}
__device__ __constant__ {{REAL}} updatecoeffsE[{{N_updatecoeffsE}}];
__device__ __constant__ {{REAL}} updatecoeffsH[{{N_updatecoeffsH}}];
{% endblock constmem %}
{% block threadidx %}
int src = blockIdx.x * blockDim.x + threadIdx.x;
{% endblock threadidx %}
{% block electric_source_args %}
const int* __restrict__ srcinfo1,
const {{REAL}}* __restrict__ srcinfo2,
const {{REAL}}* __restrict__ srcwaveforms,
const unsigned int* __restrict__ ID,
{{REAL}} *Ex,
{{REAL}} *Ey,
{{REAL}} *Ez)
{% endblock electric_source_args %}
{% block magnetic_source_args %}
const int* __restrict__ srcinfo1,
const {{REAL}}* __restrict__ srcinfo2,
const {{REAL}}* __restrict__ srcwaveforms,
const unsigned int* __restrict__ ID,
{{REAL}} *Hx,
{{REAL}} *Hy,
{{REAL}} *Hz)
{% endblock magnetic_source_args %}

查看文件

@@ -0,0 +1,46 @@
{% extends "source_updates_base.tmpl" %}
{% block constmem %}
__constant {{REAL}} updatecoeffsE[{{N_updatecoeffsE}}] =
{
{% for i in updatecoeffsE %}
{{i}},
{% endfor %}
};
__constant {{REAL}} updatecoeffsH[{{N_updatecoeffsH}}] =
{
{% for i in updatecoeffsH %}
{{i}},
{% endfor %}
};
{% endblock constmem %}
{% block threadidx %}
int src = get_global_id(2) * get_global_size(0) * get_global_size(1) +
get_global_id(1) * get_global_size(0) + get_global_id(0);
{% endblock threadidx %}
{% block electric_source_args %}
__global const int* restrict srcinfo1,
__global const {{REAL}}* restrict srcinfo2,
__global const {{REAL}}* restrict srcwaveforms,
__global const unsigned int* restrict ID,
__global {{REAL}} *Ex,
__global {{REAL}} *Ey,
__global {{REAL}} *Ez)
{% endblock electric_source_args %}
{% block magnetic_source_args %}
__global const int* restrict srcinfo1,
__global const {{REAL}}* restrict srcinfo2,
__global const {{REAL}}* restrict srcwaveforms,
__global const unsigned int* restrict ID,
__global {{REAL}} *Hx,
__global {{REAL}} *Hy,
__global {{REAL}} *Hz)
{% endblock magnetic_source_args %}

查看文件

@@ -0,0 +1,50 @@
// Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom
// Authors: Craig Warren, Antonis Giannopoulos, and John Hartley
//
// This file is part of gprMax.
//
// gprMax is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// gprMax is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with gprMax. If not, see <http://www.gnu.org/licenses/>.
#define IDX2D_RXCOORDS(m,n) (m)*({{NY_RXCOORDS}}) + (n)
#define IDX3D_RXS(i,j,k) (i)*({{NY_RXS}})*({{NZ_RXS}}) + (j)*({{NZ_RXS}}) + (k)
#define IDX3D_FIELDS(i,j,k) (i)*({{NY_FIELDS}})*({{NZ_FIELDS}}) + (j)*({{NZ_FIELDS}}) + (k)
{{KERNEL}} void store_outputs(int NRX,
int iteration,{% filter indent(width=28) %}{% block rx_args %}{% endblock rx_args %}{% endfilter %}{
// This function stores field component values for every receiver in the model.
//
// Args:
// NRX: total number of receivers in the model.
// rxs: array to store field components for receivers - rows
// are field components; columns are iterations; pages are receiver
// Obtain linear index corresponding to the current work item
{% block threadidx %}{% endblock threadidx %}
int i,j,k;
if (rx < NRX) {
i = rxcoords[IDX2D_RXCOORDS(rx,0)];
j = rxcoords[IDX2D_RXCOORDS(rx,1)];
k = rxcoords[IDX2D_RXCOORDS(rx,2)];
rxs[IDX3D_RXS(0,iteration,rx)] = Ex[IDX3D_FIELDS(i,j,k)];
rxs[IDX3D_RXS(1,iteration,rx)] = Ey[IDX3D_FIELDS(i,j,k)];
rxs[IDX3D_RXS(2,iteration,rx)] = Ez[IDX3D_FIELDS(i,j,k)];
rxs[IDX3D_RXS(3,iteration,rx)] = Hx[IDX3D_FIELDS(i,j,k)];
rxs[IDX3D_RXS(4,iteration,rx)] = Hy[IDX3D_FIELDS(i,j,k)];
rxs[IDX3D_RXS(5,iteration,rx)] = Hz[IDX3D_FIELDS(i,j,k)];
}
}

查看文件

@@ -0,0 +1,18 @@
{% extends "store_outputs_base.tmpl" %}
{% block threadidx %}
int rx = blockIdx.x * blockDim.x + threadIdx.x;
{% endblock threadidx %}
{% block rx_args %}
const int* __restrict__ rxcoords,
{{REAL}} *rxs,
const {{REAL}}* __restrict__ Ex,
const {{REAL}}* __restrict__ Ey,
const {{REAL}}* __restrict__ Ez,
const {{REAL}}* __restrict__ Hx,
const {{REAL}}* __restrict__ Hy,
const {{REAL}}* __restrict__ Hz)
{% endblock rx_args %}

查看文件

@@ -0,0 +1,19 @@
{% extends "store_outputs_base.tmpl" %}
{% block threadidx %}
int rx = get_global_id(2) * get_global_size(0) * get_global_size(1) +
get_global_id(1) * get_global_size(0) + get_global_id(0);
{% endblock threadidx %}
{% block rx_args %}
__global const int* restrict rxcoords,
__global {{REAL}} *rxs,
__global const {{REAL}}* restrict Ex,
__global const {{REAL}}* restrict Ey,
__global const {{REAL}}* restrict Ez,
__global const {{REAL}}* restrict Hx,
__global const {{REAL}}* restrict Hy,
__global const {{REAL}}* restrict Hz)
{% endblock rx_args %}

查看文件

@@ -0,0 +1,22 @@
{% block complex_header %}{% endblock complex_header %}
// Macros for converting subscripts to linear index
#define IDX2D_MAT(m, n) (m)*({{NY_MATCOEFFS}})+(n)
#define IDX2D_MATDISP(m, n) (m)*({{NY_MATDISPCOEFFS}})+(n)
#define IDX2D_R(m, n) (m)*(NY_R)+(n)
#define IDX2D_RXCOORDS(m,n) (m)*({{NY_RXCOORDS}}) + (n)
#define IDX2D_SRCINFO(m, n) (m)*{{NY_SRCINFO}}+(n)
#define IDX2D_SRCWAVES(m, n) (m)*({{NY_SRCWAVES}})+(n)
#define IDX3D_FIELDS(i, j, k) (i)*({{NY_FIELDS}})*({{NZ_FIELDS}})+(j)*({{NZ_FIELDS}})+(k)
#define IDX3D_RXS(i,j,k) (i)*({{NY_RXS}})*({{NZ_RXS}}) + (j)*({{NZ_RXS}}) + (k)
#define IDX4D_ID(p, i, j, k) (p)*({{NX_ID}})*({{NY_ID}})*({{NZ_ID}})+(i)*({{NY_ID}})*({{NZ_ID}})+(j)*({{NZ_ID}})+(k)
#define IDX4D_SNAPS(p, i, j, k) (p)*({{NX_SNAPS}})*({{NY_SNAPS}})*({{NZ_SNAPS}})+(i)*({{NY_SNAPS}})*({{NZ_SNAPS}})+(j)*({{NZ_SNAPS}})+(k)
#define IDX4D_T(p, i, j, k) (p)*({{NX_T}})*({{NY_T}})*({{NZ_T}})+(i)*({{NY_T}})*({{NZ_T}})+(j)*({{NZ_T}})+(k)
#define IDX4D_PHI1(p, i, j, k) (p)*(NX_PHI1)*(NY_PHI1)*(NZ_PHI1)+(i)*(NY_PHI1)*(NZ_PHI1)+(j)*(NZ_PHI1)+(k)
#define IDX4D_PHI2(p, i, j, k) (p)*(NX_PHI2)*(NY_PHI2)*(NZ_PHI2)+(i)*(NY_PHI2)*(NZ_PHI2)+(j)*(NZ_PHI2)+(k)
// Material coefficients (read-only) stored in constant memory of compute device
{% block constmem %}{% endblock constmem %}

查看文件

@@ -0,0 +1,11 @@
{% extends "knl_common_base.tmpl" %}
{% block complex_header %}
#include <pycuda-complex.hpp>
{% endblock complex_header %}
{% block constmem %}
__device__ __constant__ {{REAL}} updatecoeffsE[{{N_updatecoeffsE}}];
__device__ __constant__ {{REAL}} updatecoeffsH[{{N_updatecoeffsH}}];
{% endblock constmem %}

查看文件

@@ -0,0 +1,22 @@
{% extends "knl_common_base.tmpl" %}
{% block complex_header %}
#include <pyopencl-complex.h>
{% endblock complex_header %}
{% block constmem %}
__constant {{REAL}} updatecoeffsE[{{N_updatecoeffsE}}] =
{
{% for i in updatecoeffsE %}
{{i}},
{% endfor %}
};
__constant {{REAL}} updatecoeffsH[{{N_updatecoeffsH}}] =
{
{% for i in updatecoeffsH %}
{{i}},
{% endfor %}
};
{% endblock constmem %}

查看文件

@@ -0,0 +1,233 @@
# Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom
# Authors: Craig Warren, Antonis Giannopoulos, and John Hartley
#
# This file is part of gprMax.
#
# gprMax is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# gprMax is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with gprMax. If not, see <http://www.gnu.org/licenses/>.
from string import Template
update_electric = Template("""
// Electric field updates - normal materials.
//
// Args:
// NX, NY, NZ: Number of cells of the model domain.
// ID, E, H: Access to ID and field component arrays.
// Convert the linear index to subscripts for 3D field arrays
int x = i / ($NY_FIELDS * $NZ_FIELDS);
int y = (i % ($NY_FIELDS * $NZ_FIELDS)) / $NZ_FIELDS;
int z = (i % ($NY_FIELDS * $NZ_FIELDS)) % $NZ_FIELDS;
// Convert the linear index to subscripts for 4D material ID array
int x_ID = (i % ($NX_ID * $NY_ID * $NZ_ID)) / ($NY_ID * $NZ_ID);
int y_ID = ((i % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) / $NZ_ID;
int z_ID = ((i % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) % $NZ_ID;
// Ex component
if ((NY != 1 || NZ != 1) && x >= 0 && x < NX && y > 0 && y < NY && z > 0 && z < NZ) {
int materialEx = ID[IDX4D_ID(0,x_ID,y_ID,z_ID)];
Ex[IDX3D_FIELDS(x,y,z)] = updatecoeffsE[IDX2D_MAT(materialEx,0)] * Ex[IDX3D_FIELDS(x,y,z)] +
updatecoeffsE[IDX2D_MAT(materialEx,2)] * (Hz[IDX3D_FIELDS(x,y,z)] - Hz[IDX3D_FIELDS(x,y-1,z)]) -
updatecoeffsE[IDX2D_MAT(materialEx,3)] * (Hy[IDX3D_FIELDS(x,y,z)] - Hy[IDX3D_FIELDS(x,y,z-1)]);
}
// Ey component
if ((NX != 1 || NZ != 1) && x > 0 && x < NX && y >= 0 && y < NY && z > 0 && z < NZ) {
int materialEy = ID[IDX4D_ID(1,x_ID,y_ID,z_ID)];
Ey[IDX3D_FIELDS(x,y,z)] = updatecoeffsE[IDX2D_MAT(materialEy,0)] * Ey[IDX3D_FIELDS(x,y,z)] +
updatecoeffsE[IDX2D_MAT(materialEy,3)] * (Hx[IDX3D_FIELDS(x,y,z)] - Hx[IDX3D_FIELDS(x,y,z-1)]) -
updatecoeffsE[IDX2D_MAT(materialEy,1)] * (Hz[IDX3D_FIELDS(x,y,z)] - Hz[IDX3D_FIELDS(x-1,y,z)]);
}
// Ez component
if ((NX != 1 || NY != 1) && x > 0 && x < NX && y > 0 && y < NY && z >= 0 && z < NZ) {
int materialEz = ID[IDX4D_ID(2,x_ID,y_ID,z_ID)];
Ez[IDX3D_FIELDS(x,y,z)] = updatecoeffsE[IDX2D_MAT(materialEz,0)] * Ez[IDX3D_FIELDS(x,y,z)] +
updatecoeffsE[IDX2D_MAT(materialEz,1)] * (Hy[IDX3D_FIELDS(x,y,z)] - Hy[IDX3D_FIELDS(x-1,y,z)]) -
updatecoeffsE[IDX2D_MAT(materialEz,2)] * (Hx[IDX3D_FIELDS(x,y,z)] - Hx[IDX3D_FIELDS(x,y-1,z)]);
}
""")
update_magnetic = Template("""
// Magnetic field updates - normal materials.
//
// Args:
// NX, NY, NZ: Number of cells of the model domain.
// ID, E, H: Access to ID and field component arrays.
// Convert the linear index to subscripts for 3D field arrays
int x = i / ($NY_FIELDS * $NZ_FIELDS);
int y = (i % ($NY_FIELDS * $NZ_FIELDS)) / $NZ_FIELDS;
int z = (i % ($NY_FIELDS * $NZ_FIELDS)) % $NZ_FIELDS;
// Convert the linear index to subscripts for 4D material ID array
int x_ID = (i % ($NX_ID * $NY_ID * $NZ_ID)) / ($NY_ID * $NZ_ID);
int y_ID = ((i % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) / $NZ_ID;
int z_ID = ((i % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) % $NZ_ID;
// Hx component
if (NX != 1 && x > 0 && x < NX && y >= 0 && y < NY && z >= 0 && z < NZ) {
int materialHx = ID[IDX4D_ID(3,x_ID,y_ID,z_ID)];
Hx[IDX3D_FIELDS(x,y,z)] = updatecoeffsH[IDX2D_MAT(materialHx,0)] * Hx[IDX3D_FIELDS(x,y,z)] -
updatecoeffsH[IDX2D_MAT(materialHx,2)] * (Ez[IDX3D_FIELDS(x,y+1,z)] - Ez[IDX3D_FIELDS(x,y,z)]) +
updatecoeffsH[IDX2D_MAT(materialHx,3)] * (Ey[IDX3D_FIELDS(x,y,z+1)] - Ey[IDX3D_FIELDS(x,y,z)]);
}
// Hy component
if (NY != 1 && x >= 0 && x < NX && y > 0 && y < NY && z >= 0 && z < NZ) {
int materialHy = ID[IDX4D_ID(4,x_ID,y_ID,z_ID)];
Hy[IDX3D_FIELDS(x,y,z)] = updatecoeffsH[IDX2D_MAT(materialHy,0)] * Hy[IDX3D_FIELDS(x,y,z)] -
updatecoeffsH[IDX2D_MAT(materialHy,3)] * (Ex[IDX3D_FIELDS(x,y,z+1)] - Ex[IDX3D_FIELDS(x,y,z)]) +
updatecoeffsH[IDX2D_MAT(materialHy,1)] * (Ez[IDX3D_FIELDS(x+1,y,z)] - Ez[IDX3D_FIELDS(x,y,z)]);
}
// Hz component
if (NZ != 1 && x >= 0 && x < NX && y >= 0 && y < NY && z > 0 && z < NZ) {
int materialHz = ID[IDX4D_ID(5,x_ID,y_ID,z_ID)];
Hz[IDX3D_FIELDS(x,y,z)] = updatecoeffsH[IDX2D_MAT(materialHz,0)] * Hz[IDX3D_FIELDS(x,y,z)] -
updatecoeffsH[IDX2D_MAT(materialHz,1)] * (Ey[IDX3D_FIELDS(x+1,y,z)] - Ey[IDX3D_FIELDS(x,y,z)]) +
updatecoeffsH[IDX2D_MAT(materialHz,2)] * (Ex[IDX3D_FIELDS(x,y+1,z)] - Ex[IDX3D_FIELDS(x,y,z)]);
}
""")
update_electric_dispersive_A = Template("""
// Electric field updates - dispersive materials - part A of updates to electric
// field values when dispersive materials
// (with multiple poles) are present.
//
// Args:
// NX, NY, NZ: Number of cells of the model domain.
// MAXPOLES: Maximum number of dispersive material poles present in model.
// updatedispersivecoeffs, T, ID, E, H: Access to update coefficients,
// dispersive, ID and field
// component arrays.
// Convert the linear index to subscripts for 3D field arrays
int x = i / ($NY_FIELDS * $NZ_FIELDS);
int y = (i % ($NY_FIELDS * $NZ_FIELDS)) / $NZ_FIELDS;
int z = (i % ($NY_FIELDS * $NZ_FIELDS)) % $NZ_FIELDS;
// Convert the linear index to subscripts for 4D material ID array
int x_ID = (i % ($NX_ID * $NY_ID * $NZ_ID)) / ($NY_ID * $NZ_ID);
int y_ID = ((i % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) / $NZ_ID;
int z_ID = ((i % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) % $NZ_ID;
// Convert the linear index to subscripts for 4D dispersive array
int x_T = (i % ($NX_T * $NY_T * $NZ_T)) / ($NY_T * $NZ_T);
int y_T = ((i % ($NX_T * $NY_T * $NZ_T)) % ($NY_T * $NZ_T)) / $NZ_T;
int z_T = ((i % ($NX_T * $NY_T * $NZ_T)) % ($NY_T * $NZ_T)) % $NZ_T;
// Ex component
if ((NY != 1 || NZ != 1) && x >= 0 && x < NX && y > 0 && y < NY && z > 0 && z < NZ) {
int materialEx = ID[IDX4D_ID(0,x_ID,y_ID,z_ID)];
$REAL phi = 0;
for (int pole = 0; pole < MAXPOLES; pole++) {
phi = phi + updatecoeffsdispersive[IDX2D_MATDISP(materialEx,pole*3)]$REALFUNC * Tx[IDX4D_T(pole,x_T,y_T,z_T)]$REALFUNC;
Tx[IDX4D_T(pole,x_T,y_T,z_T)] = updatecoeffsdispersive[IDX2D_MATDISP(materialEx,1+(pole*3))] * Tx[IDX4D_T(pole,x_T,y_T,z_T)] +
updatecoeffsdispersive[IDX2D_MATDISP(materialEx,2+(pole*3))] * Ex[IDX3D_FIELDS(x,y,z)];
}
Ex[IDX3D_FIELDS(x,y,z)] = updatecoeffsE[IDX2D_MAT(materialEx,0)] * Ex[IDX3D_FIELDS(x,y,z)] +
updatecoeffsE[IDX2D_MAT(materialEx,2)] * (Hz[IDX3D_FIELDS(x,y,z)] - Hz[IDX3D_FIELDS(x,y-1,z)]) -
updatecoeffsE[IDX2D_MAT(materialEx,3)] * (Hy[IDX3D_FIELDS(x,y,z)] - Hy[IDX3D_FIELDS(x,y,z-1)]) -
updatecoeffsE[IDX2D_MAT(materialEx,4)] * phi;
}
// Ey component
if ((NX != 1 || NZ != 1) && x > 0 && x < NX && y >= 0 && y < NY && z > 0 && z < NZ) {
int materialEy = ID[IDX4D_ID(1,x_ID,y_ID,z_ID)];
$REAL phi = 0;
for (int pole = 0; pole < MAXPOLES; pole++) {
phi = phi + updatecoeffsdispersive[IDX2D_MATDISP(materialEy,pole*3)]$REALFUNC * Ty[IDX4D_T(pole,x_T,y_T,z_T)]$REALFUNC;
Ty[IDX4D_T(pole,x_T,y_T,z_T)] = updatecoeffsdispersive[IDX2D_MATDISP(materialEy,1+(pole*3))] * Ty[IDX4D_T(pole,x_T,y_T,z_T)] +
updatecoeffsdispersive[IDX2D_MATDISP(materialEy,2+(pole*3))] * Ey[IDX3D_FIELDS(x,y,z)];
}
Ey[IDX3D_FIELDS(x,y,z)] = updatecoeffsE[IDX2D_MAT(materialEy,0)] * Ey[IDX3D_FIELDS(x,y,z)] +
updatecoeffsE[IDX2D_MAT(materialEy,3)] * (Hx[IDX3D_FIELDS(x,y,z)] - Hx[IDX3D_FIELDS(x,y,z-1)]) -
updatecoeffsE[IDX2D_MAT(materialEy,1)] * (Hz[IDX3D_FIELDS(x,y,z)] - Hz[IDX3D_FIELDS(x-1,y,z)]) -
updatecoeffsE[IDX2D_MAT(materialEy,4)] * phi;
}
// Ez component
if ((NX != 1 || NY != 1) && x > 0 && x < NX && y > 0 && y < NY && z >= 0 && z < NZ) {
int materialEz = ID[IDX4D_ID(2,x_ID,y_ID,z_ID)];
$REAL phi = 0;
for (int pole = 0; pole < MAXPOLES; pole++) {
phi = phi + updatecoeffsdispersive[IDX2D_MATDISP(materialEz,pole*3)]$REALFUNC * Tz[IDX4D_T(pole,x_T,y_T,z_T)]$REALFUNC;
Tz[IDX4D_T(pole,x_T,y_T,z_T)] = updatecoeffsdispersive[IDX2D_MATDISP(materialEz,1+(pole*3))] * Tz[IDX4D_T(pole,x_T,y_T,z_T)] +
updatecoeffsdispersive[IDX2D_MATDISP(materialEz,2+(pole*3))] * Ez[IDX3D_FIELDS(x,y,z)];
}
Ez[IDX3D_FIELDS(x,y,z)] = updatecoeffsE[IDX2D_MAT(materialEz,0)] * Ez[IDX3D_FIELDS(x,y,z)] +
updatecoeffsE[IDX2D_MAT(materialEz,1)] * (Hy[IDX3D_FIELDS(x,y,z)] - Hy[IDX3D_FIELDS(x-1,y,z)]) -
updatecoeffsE[IDX2D_MAT(materialEz,2)] * (Hx[IDX3D_FIELDS(x,y,z)] - Hx[IDX3D_FIELDS(x,y-1,z)]) -
updatecoeffsE[IDX2D_MAT(materialEz,4)] * phi;
}
""")
update_electric_dispersive_B = Template("""
// Electric field updates - dispersive materials - part B of updates to electric
// field values when dispersive materials
// (with multiple poles) are present.
//
// Args:
// NX, NY, NZ: Number of cells of the model domain.
// MAXPOLES: Maximum number of dispersive material poles present in model.
// updatedispersivecoeffs, T, ID, E, H: Access to update coefficients,
// dispersive, ID and field
// component arrays.
// Convert the linear index to subscripts for 3D field arrays
int x = i / ($NY_FIELDS * $NZ_FIELDS);
int y = (i % ($NY_FIELDS * $NZ_FIELDS)) / $NZ_FIELDS;
int z = (i % ($NY_FIELDS * $NZ_FIELDS)) % $NZ_FIELDS;
// Convert the linear index to subscripts for 4D material ID array
int x_ID = (i % ($NX_ID * $NY_ID * $NZ_ID)) / ($NY_ID * $NZ_ID);
int y_ID = ((i % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) / $NZ_ID;
int z_ID = ((i % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) % $NZ_ID;
// Convert the linear index to subscripts for 4D dispersive array
int x_T = (i % ($NX_T * $NY_T * $NZ_T)) / ($NY_T * $NZ_T);
int y_T = ((i % ($NX_T * $NY_T * $NZ_T)) % ($NY_T * $NZ_T)) / $NZ_T;
int z_T = ((i % ($NX_T * $NY_T * $NZ_T)) % ($NY_T * $NZ_T)) % $NZ_T;
// Ex component
if ((NY != 1 || NZ != 1) && x >= 0 && x < NX && y > 0 && y < NY && z > 0 && z < NZ) {
int materialEx = ID[IDX4D_ID(0,x_ID,y_ID,z_ID)];
for (int pole = 0; pole < MAXPOLES; pole++) {
Tx[IDX4D_T(pole,x_T,y_T,z_T)] = Tx[IDX4D_T(pole,x_T,y_T,z_T)] -
updatecoeffsdispersive[IDX2D_MATDISP(materialEx,2+(pole*3))] * Ex[IDX3D_FIELDS(x,y,z)];
}
}
// Ey component
if ((NX != 1 || NZ != 1) && x > 0 && x < NX && y >= 0 && y < NY && z > 0 && z < NZ) {
int materialEy = ID[IDX4D_ID(1,x_ID,y_ID,z_ID)];
for (int pole = 0; pole < MAXPOLES; pole++) {
Ty[IDX4D_T(pole,x_T,y_T,z_T)] = Ty[IDX4D_T(pole,x_T,y_T,z_T)] -
updatecoeffsdispersive[IDX2D_MATDISP(materialEy,2+(pole*3))] * Ey[IDX3D_FIELDS(x,y,z)];
}
}
// Ez component
if ((NX != 1 || NY != 1) && x > 0 && x < NX && y > 0 && y < NY && z >= 0 && z < NZ) {
int materialEz = ID[IDX4D_ID(2,x_ID,y_ID,z_ID)];
for (int pole = 0; pole < MAXPOLES; pole++) {
Tz[IDX4D_T(pole,x_T,y_T,z_T)] = Tz[IDX4D_T(pole,x_T,y_T,z_T)] -
updatecoeffsdispersive[IDX2D_MATDISP(materialEz,2+(pole*3))] * Ez[IDX3D_FIELDS(x,y,z)];
}
}
""")

文件差异内容过多而无法显示 加载差异

文件差异内容过多而无法显示 加载差异

查看文件

@@ -0,0 +1,72 @@
# Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom
# Authors: Craig Warren, Antonis Giannopoulos, and John Hartley
#
# This file is part of gprMax.
#
# gprMax is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# gprMax is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with gprMax. If not, see <http://www.gnu.org/licenses/>.
from string import Template
store_snapshot = Template("""
// Stores field values for a snapshot.
//
// Args:
// p: Snapshot number.
// xs, xf, ys, yf, xs, xf: Start and finish cell coordinates for snapshot.
// dx, dy, dz: Sampling interval in cell coordinates for snapshot.
// E, H: Access to field component arrays.
// snapEx, snapEy, snapEz, snapHx, snapHy, snapHz: Access to arrays to store snapshots.
// Convert the linear index to subscripts for 4D SNAPS array
int x = (i % ($NX_SNAPS * $NY_SNAPS * $NZ_SNAPS)) / ($NY_SNAPS * $NZ_SNAPS);
int y = ((i % ($NX_SNAPS * $NY_SNAPS * $NZ_SNAPS)) % ($NY_SNAPS * $NZ_SNAPS)) / $NZ_SNAPS;
int z = ((i % ($NX_SNAPS * $NY_SNAPS * $NZ_SNAPS)) % ($NY_SNAPS * $NZ_SNAPS)) % $NZ_SNAPS;
// Subscripts for field arrays
int xx, yy, zz;
if (x >= xs && x < xf && y >= ys && y < yf && z >= zs && z < zf) {
// Increment subscripts for field array to account for spatial sampling of snapshot
xx = (xs + x) * dx;
yy = (ys + y) * dy;
zz = (zs + z) * dz;
// The electric field component value at a point comes from an average of
// the 4 electric field component values in that cell
snapEx[IDX4D_SNAPS(p,x,y,z)] = (Ex[IDX3D_FIELDS(xx,yy,zz)] +
Ex[IDX3D_FIELDS(xx,yy+1,zz)] +
Ex[IDX3D_FIELDS(xx,yy,zz+1)] +
Ex[IDX3D_FIELDS(xx,yy+1,zz+1)]) / 4;
snapEy[IDX4D_SNAPS(p,x,y,z)] = (Ey[IDX3D_FIELDS(xx,yy,zz)] +
Ey[IDX3D_FIELDS(xx+1,yy,zz)] +
Ey[IDX3D_FIELDS(xx,yy,zz+1)] +
Ey[IDX3D_FIELDS(xx+1,yy,zz+1)]) / 4;
snapEz[IDX4D_SNAPS(p,x,y,z)] = (Ez[IDX3D_FIELDS(xx,yy,zz)] +
Ez[IDX3D_FIELDS(xx+1,yy,zz)] +
Ez[IDX3D_FIELDS(xx,yy+1,zz)] +
Ez[IDX3D_FIELDS(xx+1,yy+1,zz)]) / 4;
// The magnetic field component value at a point comes from average of
// 2 magnetic field component values in that cell and the following cell
snapHx[IDX4D_SNAPS(p,x,y,z)] = (Hx[IDX3D_FIELDS(xx,yy,zz)] +
Hx[IDX3D_FIELDS(xx+1,yy,zz)]) / 2;
snapHy[IDX4D_SNAPS(p,x,y,z)] = (Hy[IDX3D_FIELDS(xx,yy,zz)] +
Hy[IDX3D_FIELDS(xx,yy+1,zz)]) / 2;
snapHz[IDX4D_SNAPS(p,x,y,z)] = (Hz[IDX3D_FIELDS(xx,yy,zz)] +
Hz[IDX3D_FIELDS(xx,yy,zz+1)]) / 2;
}
""")

查看文件

@@ -0,0 +1,173 @@
# Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom
# Authors: Craig Warren, Antonis Giannopoulos, and John Hartley
#
# This file is part of gprMax.
#
# gprMax is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# gprMax is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with gprMax. If not, see <http://www.gnu.org/licenses/>.
from string import Template
update_hertzian_dipole = Template("""
// Updates electric field values for Hertzian dipole sources.
//
// Args:
// NHERTZDIPOLE: Total number of Hertzian dipoles in the model.
// iteration: Iteration number of simulation.
// dx, dy, dz: Spatial discretisations.
// srcinfo1: Source cell coordinates and polarisation information.
// srcinfo2: Other source information, e.g. length, resistance etc...
// srcwaveforms: Source waveform values.
// ID, E: Access to ID and field component arrays.
if (i < NHERTZDIPOLE) {
$REAL dl;
int x, y, z, polarisation;
x = srcinfo1[IDX2D_SRCINFO(i,0)];
y = srcinfo1[IDX2D_SRCINFO(i,1)];
z = srcinfo1[IDX2D_SRCINFO(i,2)];
polarisation = srcinfo1[IDX2D_SRCINFO(i,3)];
dl = srcinfo2[i];
// 'x' polarised source
if (polarisation == 0) {
int materialEx = ID[IDX4D_ID(0,x,y,z)];
Ex[IDX3D_FIELDS(x,y,z)] = Ex[IDX3D_FIELDS(x,y,z)] - updatecoeffsE[IDX2D_MAT(materialEx,4)] *
srcwaveforms[IDX2D_SRCWAVES(i,iteration)] * dl * (1 / (dx * dy * dz));
}
// 'y' polarised source
else if (polarisation == 1) {
int materialEy = ID[IDX4D_ID(1,x,y,z)];
Ey[IDX3D_FIELDS(x,y,z)] = Ey[IDX3D_FIELDS(x,y,z)] - updatecoeffsE[IDX2D_MAT(materialEy,4)] *
srcwaveforms[IDX2D_SRCWAVES(i,iteration)] * dl * (1 / (dx * dy * dz));
}
// 'z' polarised source
else if (polarisation == 2) {
int materialEz = ID[IDX4D_ID(2,x,y,z)];
Ez[IDX3D_FIELDS(x,y,z)] = Ez[IDX3D_FIELDS(x,y,z)] - updatecoeffsE[IDX2D_MAT(materialEz,4)] *
srcwaveforms[IDX2D_SRCWAVES(i,iteration)] * dl * (1 / (dx * dy * dz));
}
}
""")
update_magnetic_dipole = Template("""
// Updates electric field values for Hertzian dipole sources.
//
// Args:
// NMAGDIPOLE: Total number of magnetic dipoles in the model.
// iteration: Iteration number of simulation.
// dx, dy, dz: Spatial discretisations.
// srcinfo1: Source cell coordinates and polarisation information.
// srcinfo2: Other source information, e.g. length, resistance etc...
// srcwaveforms: Source waveform values.
// ID, H: Access to ID and field component arrays.
if (i < NMAGDIPOLE) {
int x, y, z, polarisation;
x = srcinfo1[IDX2D_SRCINFO(i,0)];
y = srcinfo1[IDX2D_SRCINFO(i,1)];
z = srcinfo1[IDX2D_SRCINFO(i,2)];
polarisation = srcinfo1[IDX2D_SRCINFO(i,3)];
// 'x' polarised source
if (polarisation == 0) {
int materialHx = ID[IDX4D_ID(3,x,y,z)];
Hx[IDX3D_FIELDS(x,y,z)] = Hx[IDX3D_FIELDS(x,y,z)] - updatecoeffsH[IDX2D_MAT(materialHx,4)] *
srcwaveforms[IDX2D_SRCWAVES(i,iteration)] * (1 / (dx * dy * dz));
}
// 'y' polarised source
else if (polarisation == 1) {
int materialHy = ID[IDX4D_ID(4,x,y,z)];
Hy[IDX3D_FIELDS(x,y,z)] = Hy[IDX3D_FIELDS(x,y,z)] - updatecoeffsH[IDX2D_MAT(materialHy,4)] *
srcwaveforms[IDX2D_SRCWAVES(i,iteration)] * (1 / (dx * dy * dz));
}
// 'z' polarised source
else if (polarisation == 2) {
int materialHz = ID[IDX4D_ID(5,x,y,z)];
Hz[IDX3D_FIELDS(x,y,z)] = Hz[IDX3D_FIELDS(x,y,z)] - updatecoeffsH[IDX2D_MAT(materialHz,4)] *
srcwaveforms[IDX2D_SRCWAVES(i,iteration)] * (1 / (dx * dy * dz));
}
}
""")
update_voltage_source = Template("""
// Updates electric field values for voltage sources.
//
// Args:
// NVOLTSRC: Total number of voltage sources in the model.
// iteration: Iteration number of simulation.
// dx, dy, dz: Spatial discretisations.
// srcinfo1: Source cell coordinates and polarisation information.
// srcinfo2: Other source information, e.g. length, resistance etc...
// srcwaveforms: Source waveform values.
// ID, E: Access to ID and field component arrays.
if (i < NVOLTSRC) {
$REAL resistance;
int x, y, z, polarisation;
x = srcinfo1[IDX2D_SRCINFO(i,0)];
y = srcinfo1[IDX2D_SRCINFO(i,1)];
z = srcinfo1[IDX2D_SRCINFO(i,2)];
polarisation = srcinfo1[IDX2D_SRCINFO(i,3)];
resistance = srcinfo2[i];
// 'x' polarised source
if (polarisation == 0) {
if (resistance != 0) {
int materialEx = ID[IDX4D_ID(0,x,y,z)];
Ex[IDX3D_FIELDS(x,y,z)] = Ex[IDX3D_FIELDS(x,y,z)] - updatecoeffsE[IDX2D_MAT(materialEx,4)] *
srcwaveforms[IDX2D_SRCWAVES(i,iteration)] * (1 / (resistance * dy * dz));
}
else {
Ex[IDX3D_FIELDS(x,y,z)] = -1 * srcwaveforms[IDX2D_SRCWAVES(i,iteration)] / dx;
}
}
// 'y' polarised source
else if (polarisation == 1) {
if (resistance != 0) {
int materialEy = ID[IDX4D_ID(1,x,y,z)];
Ey[IDX3D_FIELDS(x,y,z)] = Ey[IDX3D_FIELDS(x,y,z)] - updatecoeffsE[IDX2D_MAT(materialEy,4)] *
srcwaveforms[IDX2D_SRCWAVES(i,iteration)] * (1 / (resistance * dx * dz));
}
else {
Ey[IDX3D_FIELDS(x,y,z)] = -1 * srcwaveforms[IDX2D_SRCWAVES(i,iteration)] / dy;
}
}
// 'z' polarised source
else if (polarisation == 2) {
if (resistance != 0) {
int materialEz = ID[IDX4D_ID(2,x,y,z)];
Ez[IDX3D_FIELDS(x,y,z)] = Ez[IDX3D_FIELDS(x,y,z)] - updatecoeffsE[IDX2D_MAT(materialEz,4)] *
srcwaveforms[IDX2D_SRCWAVES(i,iteration)] * (1 / (resistance * dx * dy));
}
else {
Ez[IDX3D_FIELDS(x,y,z)] = -1 * srcwaveforms[IDX2D_SRCWAVES(i,iteration)] / dz;
}
}
}
""")

查看文件

@@ -0,0 +1,42 @@
# Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom
# Authors: Craig Warren, Antonis Giannopoulos, and John Hartley
#
# This file is part of gprMax.
#
# gprMax is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# gprMax is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with gprMax. If not, see <http://www.gnu.org/licenses/>.
from string import Template
store_outputs = Template("""
// Stores field component values for every receiver in the model.
//
// Args:
// NRX: total number of receivers in the model.
// rxs: array to store field components for receivers - rows
// are field components; columns are iterations; pages are receiver.
if (i < NRX) {
int x, y, z;
x = rxcoords[IDX2D_RXCOORDS(i,0)];
y = rxcoords[IDX2D_RXCOORDS(i,1)];
z = rxcoords[IDX2D_RXCOORDS(i,2)];
rxs[IDX3D_RXS(0,iteration,i)] = Ex[IDX3D_FIELDS(x,y,z)];
rxs[IDX3D_RXS(1,iteration,i)] = Ey[IDX3D_FIELDS(x,y,z)];
rxs[IDX3D_RXS(2,iteration,i)] = Ez[IDX3D_FIELDS(x,y,z)];
rxs[IDX3D_RXS(3,iteration,i)] = Hx[IDX3D_FIELDS(x,y,z)];
rxs[IDX3D_RXS(4,iteration,i)] = Hy[IDX3D_FIELDS(x,y,z)];
rxs[IDX3D_RXS(5,iteration,i)] = Hz[IDX3D_FIELDS(x,y,z)];
}
""")

查看文件

@@ -56,7 +56,7 @@ def store_outputs(G):
tl.Itotal[iteration] = tl.current[tl.antpos]
kernel_template_store_outputs = Template("""
knl_template_store_outputs = Template("""
// Macros for converting subscripts to linear index:
#define INDEX2D_RXCOORDS(m, n) (m)*($NY_RXCOORDS)+(n)

查看文件

@@ -32,6 +32,7 @@ args_defaults = {'scenes': None,
'restart': None,
'mpi': False,
'gpu': None,
'opencl': None,
'subgrid': False,
'autotranslate': False,
'geometry_only': False,
@@ -67,6 +68,8 @@ help_msg = {'scenes': '(list, opt): List of the scenes to run the model. '
'performance section of the User Guide.',
'gpu': '(list/bool, opt): Flag to use NVIDIA GPU or list of NVIDIA '
'GPU device ID(s) for specific GPU card(s).',
'opencl': '(list/bool, opt): Flag to use OpenCL or list of OpenCL '
'device ID(s) for specific compute device(s).',
'subgrid': '(bool, opt): Flag to use sub-gridding.',
'autotranslate': '(bool, opt): For sub-gridding - auto translate '
'objects with main grid coordinates to their '
@@ -92,6 +95,7 @@ def run(scenes=args_defaults['scenes'],
restart=args_defaults['restart'],
mpi=args_defaults['mpi'],
gpu=args_defaults['gpu'],
opencl=args_defaults['opencl'],
subgrid=args_defaults['subgrid'],
autotranslate=args_defaults['autotranslate'],
geometry_only=args_defaults['geometry_only'],
@@ -112,6 +116,7 @@ def run(scenes=args_defaults['scenes'],
'restart': restart,
'mpi': mpi,
'gpu': gpu,
'opencl': opencl,
'subgrid': subgrid,
'autotranslate': autotranslate,
'geometry_only': geometry_only,
@@ -139,6 +144,8 @@ def cli():
help=help_msg['mpi'])
parser.add_argument('-gpu', type=int, action='append', nargs='*',
help=help_msg['gpu'])
parser.add_argument('-opencl', type=int, action='append', nargs='*',
help=help_msg['opencl'])
parser.add_argument('--geometry-only', action='store_true',
default=args_defaults['geometry_only'],
help=help_msg['geometry_only'])
@@ -176,11 +183,11 @@ def run_main(args):
if args.spotpy:
context = SPOTPYContext()
context.run(args.i)
# MPI running with (OpenMP/CUDA)
# MPI running with (OpenMP/CUDA/OpenCL)
elif config.sim_config.args.mpi:
context = MPIContext()
context.run()
# Standard running (OpenMP/CUDA)
# Standard running (OpenMP/CUDA/OpenCL)
else:
context = Context()
context.run()

查看文件

@@ -306,33 +306,74 @@ class CUDAGrid(FDTDGrid):
self.bpg = (int(np.ceil(((self.nx + 1) * (self.ny + 1) *
(self.nz + 1)) / self.tpb[0])), 1, 1)
def htod_geometry_arrays(self):
"""Initialise an array for cell edge IDs (ID) on GPU."""
import pycuda.gpuarray as gpuarray
def htod_geometry_arrays(self, queue=None):
"""Initialise an array for cell edge IDs (ID) on compute device.
Args:
queue: pyopencl queue.
"""
self.ID_gpu = gpuarray.to_gpu(self.ID)
if config.sim_config.general['solver'] == 'cuda':
import pycuda.gpuarray as gpuarray
self.ID_dev = gpuarray.to_gpu(self.ID)
def htod_field_arrays(self):
"""Initialise geometry and field arrays on GPU."""
elif config.sim_config.general['solver'] == 'opencl':
import pyopencl.array as clarray
self.ID_dev = clarray.to_device(queue, self.ID)
import pycuda.gpuarray as gpuarray
def htod_field_arrays(self, queue=None):
"""Initialise field arrays on compute device.
Args:
queue: pyopencl queue.
"""
self.Ex_gpu = gpuarray.to_gpu(self.Ex)
self.Ey_gpu = gpuarray.to_gpu(self.Ey)
self.Ez_gpu = gpuarray.to_gpu(self.Ez)
self.Hx_gpu = gpuarray.to_gpu(self.Hx)
self.Hy_gpu = gpuarray.to_gpu(self.Hy)
self.Hz_gpu = gpuarray.to_gpu(self.Hz)
if config.sim_config.general['solver'] == 'cuda':
import pycuda.gpuarray as gpuarray
self.Ex_dev = gpuarray.to_gpu(self.Ex)
self.Ey_dev = gpuarray.to_gpu(self.Ey)
self.Ez_dev = gpuarray.to_gpu(self.Ez)
self.Hx_dev = gpuarray.to_gpu(self.Hx)
self.Hy_dev = gpuarray.to_gpu(self.Hy)
self.Hz_dev = gpuarray.to_gpu(self.Hz)
elif config.sim_config.general['solver'] == 'opencl':
import pyopencl.array as clarray
self.Ex_dev = clarray.to_device(queue, self.Ex)
self.Ey_dev = clarray.to_device(queue, self.Ey)
self.Ez_dev = clarray.to_device(queue, self.Ez)
self.Hx_dev = clarray.to_device(queue, self.Hx)
self.Hy_dev = clarray.to_device(queue, self.Hy)
self.Hz_dev = clarray.to_device(queue, self.Hz)
def htod_dispersive_arrays(self):
"""Initialise dispersive material coefficient arrays on GPU."""
def htod_dispersive_arrays(self, queue=None):
"""Initialise dispersive material coefficient arrays on compute device.
Args:
queue: pyopencl queue.
"""
import pycuda.gpuarray as gpuarray
if config.sim_config.general['solver'] == 'cuda':
import pycuda.gpuarray as gpuarray
self.Tx_dev = gpuarray.to_gpu(self.Tx)
self.Ty_dev = gpuarray.to_gpu(self.Ty)
self.Tz_dev = gpuarray.to_gpu(self.Tz)
self.updatecoeffsdispersive_dev = gpuarray.to_gpu(self.updatecoeffsdispersive)
elif config.sim_config.general['solver'] == 'opencl':
import pyopencl.array as clarray
self.Tx_dev = clarray.to_device(queue, self.Tx)
self.Ty_dev = clarray.to_device(queue, self.Ty)
self.Tz_dev = clarray.to_device(queue, self.Tz)
self.updatecoeffsdispersive_dev = clarray.to_device(queue, self.updatecoeffsdispersive)
self.Tx_gpu = gpuarray.to_gpu(self.Tx)
self.Ty_gpu = gpuarray.to_gpu(self.Ty)
self.Tz_gpu = gpuarray.to_gpu(self.Tz)
self.updatecoeffsdispersive_gpu = gpuarray.to_gpu(self.updatecoeffsdispersive)
class OpenCLGrid(CUDAGrid):
"""Additional grid methods for solving on compute device using OpenCL."""
def __init__(self):
super().__init__()
def set_blocks_per_grid(self):
pass
def dispersion_analysis(G):

查看文件

@@ -149,7 +149,9 @@ class ModelBuildRun:
# Check memory requirements
total_mem, mem_strs = mem_check_all(grids)
logger.info(f'\nMemory required: {" + ".join(mem_strs)} + ~{human_size(config.get_model_config().mem_overhead)} overhead = {human_size(total_mem)}')
logger.info(f'\nMemory required: {" + ".join(mem_strs)} + '
f'~{human_size(config.get_model_config().mem_overhead)} '
f'overhead = {human_size(total_mem)}')
# Build grids
gridbuilders = [GridBuilder(grid) for grid in grids]
@@ -170,21 +172,41 @@ class ModelBuildRun:
# Check to see if numerical dispersion might be a problem
results = dispersion_analysis(gb.grid)
if results['error']:
logger.warning(f"\nNumerical dispersion analysis [{gb.grid.name}] not carried out as {results['error']}")
logger.warning(f"\nNumerical dispersion analysis [{gb.grid.name}] "
f"not carried out as {results['error']}")
elif results['N'] < config.get_model_config().numdispersion['mingridsampling']:
logger.exception(f"\nNon-physical wave propagation in [{gb.grid.name}] detected. Material '{results['material'].ID}' has wavelength sampled by {results['N']} cells, less than required minimum for physical wave propagation. Maximum significant frequency estimated as {results['maxfreq']:g}Hz")
logger.exception(f"\nNon-physical wave propagation in [{gb.grid.name}] "
f"detected. Material '{results['material'].ID}' "
f"has wavelength sampled by {results['N']} cells, "
f"less than required minimum for physical wave "
f"propagation. Maximum significant frequency "
f"estimated as {results['maxfreq']:g}Hz")
raise ValueError
elif (results['deltavp'] and np.abs(results['deltavp']) >
config.get_model_config().numdispersion['maxnumericaldisp']):
logger.warning(f"\n[{gb.grid.name}] has potentially significant numerical dispersion. Estimated largest physical phase-velocity error is {results['deltavp']:.2f}% in material '{results['material'].ID}' whose wavelength sampled by {results['N']} cells. Maximum significant frequency estimated as {results['maxfreq']:g}Hz")
logger.warning(f"\n[{gb.grid.name}] has potentially significant "
f"numerical dispersion. Estimated largest physical "
f"phase-velocity error is {results['deltavp']:.2f}% "
f"in material '{results['material'].ID}' whose "
f"wavelength sampled by {results['N']} cells. "
f"Maximum significant frequency estimated as "
f"{results['maxfreq']:g}Hz")
elif results['deltavp']:
logger.info(f"\nNumerical dispersion analysis [{gb.grid.name}]: estimated largest physical phase-velocity error is {results['deltavp']:.2f}% in material '{results['material'].ID}' whose wavelength sampled by {results['N']} cells. Maximum significant frequency estimated as {results['maxfreq']:g}Hz")
logger.info(f"\nNumerical dispersion analysis [{gb.grid.name}]: "
f"estimated largest physical phase-velocity error is "
f"{results['deltavp']:.2f}% in material '{results['material'].ID}' "
f"whose wavelength sampled by {results['N']} cells. "
f"Maximum significant frequency estimated as "
f"{results['maxfreq']:g}Hz")
def reuse_geometry(self):
# Reset iteration number
self.G.iteration = 0
s = f'\n--- Model {config.get_model_config().appendmodelnumber}/{config.sim_config.model_end}, input file (not re-processed, i.e. geometry fixed): {config.sim_config.input_file_path}'
config.get_model_config().inputfilestr = Fore.GREEN + f"{s} {'-' * (get_terminal_width() - 1 - len(s))}\n" + Style.RESET_ALL
s = (f'\n--- Model {config.get_model_config().appendmodelnumber}/{config.sim_config.model_end}, '
f'input file (not re-processed, i.e. geometry fixed): '
f'{config.sim_config.input_file_path}')
config.get_model_config().inputfilestr = (Fore.GREEN + f"{s} {'-' * (get_terminal_width() - 1 - len(s))}\n" +
Style.RESET_ALL)
logger.basic(config.get_model_config().inputfilestr)
for grid in [self.G] + self.G.subgrids:
grid.reset_fields()
@@ -224,7 +246,9 @@ class ModelBuildRun:
fn = snapshotdir / Path(snap.filename)
snap.filename = fn.with_suffix(snap.fileext)
pbar = tqdm(total=snap.vtkdatawritesize, leave=True, unit='byte',
unit_scale=True, desc=f'Writing snapshot file {i + 1} of {len(self.G.snapshots)}, {snap.filename.name}',
unit_scale=True, desc=f'Writing snapshot file {i + 1} '
f'of {len(self.G.snapshots)}, '
f'{snap.filename.name}',
ncols=get_terminal_width() - 1, file=sys.stdout,
disable=not config.sim_config.general['progressbars'])
snap.write_file(pbar, self.G)
@@ -235,12 +259,12 @@ class ModelBuildRun:
"""Print resource information on runtime and memory usage.
Args:
tsolve (float): Time taken to execute solving (seconds).
memsolve (float): Memory (RAM) used on GPU.
tsolve: float of time taken to execute solving (seconds).
memsolve: float of memory (RAM) used.
"""
mem_str = ''
if config.sim_config.general['cuda']:
if config.sim_config.general['solver'] == 'cuda':
mem_str = f' host + ~{human_size(memsolve)} GPU'
logger.info(f'\nMemory used: ~{human_size(self.p.memory_full_info().uss)}{mem_str}')
@@ -250,24 +274,37 @@ class ModelBuildRun:
"""Solve using FDTD method.
Args:
solver (Solver): solver object.
solver: solver object.
Returns:
tsolve (float): time taken to execute solving (seconds).
tsolve: float of time taken to execute solving (seconds).
"""
# Check number of OpenMP threads
if config.sim_config.general['cpu']:
logger.basic(f"CPU solver using: {config.get_model_config().ompthreads} OpenMP thread(s) on {config.sim_config.hostinfo['hostname']}\n")
# Print information about and check OpenMP threads
if config.sim_config.general['solver'] == 'cpu':
logger.basic(f"OPENMP solver with {config.get_model_config().ompthreads} "
f"thread(s) on {config.sim_config.hostinfo['hostname']}\n")
if config.get_model_config().ompthreads > config.sim_config.hostinfo['physicalcores']:
logger.warning(f"You have specified more threads ({config.get_model_config().ompthreads}) than available physical CPU cores ({config.sim_config.hostinfo['physicalcores']}). This may lead to degraded performance.")
# Print information about any GPU in use
elif config.sim_config.general['cuda']:
logger.basic(f"GPU solver using: {config.get_model_config().cuda['gpu'].deviceID} - {config.get_model_config().cuda['gpu'].name} on {config.sim_config.hostinfo['hostname']}\n")
logger.warning(f"You have specified more threads ({config.get_model_config().ompthreads}) "
f"than available physical CPU cores ({config.sim_config.hostinfo['physicalcores']}). "
f"This may lead to degraded performance.")
# Print information about any compute device, e.g. GPU, in use
elif config.sim_config.general['solver'] == 'cuda' or config.sim_config.general['solver'] == 'opencl':
solvername = config.sim_config.general['solver'].upper()
hostname = config.sim_config.hostinfo['hostname']
if config.sim_config.general['solver'] == 'opencl':
platformname = ' on ' + ' '.join(config.get_model_config().device['dev'].platform.name.split()) + ' platform'
else:
platformname = ''
devicename = ' '.join(config.get_model_config().device['dev'].name.split())
logger.basic(f"{solvername} solver using {devicename}{platformname} "
f"on {hostname}\n")
# Prepare iterator
if config.sim_config.general['progressbars']:
iterator = tqdm(range(self.G.iterations), desc=f'Running model {config.model_num + 1}/{config.sim_config.model_end}', ncols=get_terminal_width() - 1, file=sys.stdout, disable=not config.sim_config.general['progressbars'])
iterator = tqdm(range(self.G.iterations), desc=f'Running model {config.model_num + 1}/{config.sim_config.model_end}',
ncols=get_terminal_width() - 1, file=sys.stdout,
disable=not config.sim_config.general['progressbars'])
else:
iterator = range(self.G.iterations)

查看文件

@@ -0,0 +1,311 @@
# Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom
# Authors: Craig Warren, Antonis Giannopoulos, and John Hartley
#
# This file is part of gprMax.
#
# gprMax is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# gprMax is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with gprMax. If not, see <http://www.gnu.org/licenses/>.
#include <pyopencl-complex.h>
#define INDEX2D_MAT(m, n) (m)*({{NY_MATCOEFFS}}) + (n)
#define INDEX2D_MATDISP(m, n) (m)*({{NY_MATDISPCOEFFS}}) + (n)
#define INDEX3D_FIELDS(i, j, k) (i)*({{NY_FIELDS}})*({{NZ_FIELDS}}) + (j)*({{NZ_FIELDS}}) + (k)
#define INDEX4D_ID(p, i, j, k) (p)*({{NX_ID}})*({{NY_ID}})*({{NZ_ID}}) + (i)*({{NY_ID}})*({{NZ_ID}}) + (j)*({{NZ_ID}}) + (k)
#define INDEX4D_T(p, i, j, k) (p)*({{NX_T}})*({{NY_T}})*({{NZ_T}}) + (i)*({{NY_T}})*({{NZ_T}}) + (j)*({{NZ_T}}) + (k)
// Material coefficients (read-only) in constant memory
__constant {{REAL}} updatecoeffsE[{{N_updatecoeffsE}}] =
{
{% for i in updateEVal %}
{{i}},
{% endfor %}
};
__constant {{REAL}} updatecoeffsH[{{N_updatecoeffsH}}] =
{
{% for i in updateHVal %}
{{i}},
{% endfor %}
};
///////////////////////////////////////////////
// Electric field updates - normal materials //
///////////////////////////////////////////////
__kernel void update_electric(int NX, int NY, int NZ,
__global const unsigned int* restrict ID,
__global {{REAL}} *Ex,
__global {{REAL}} *Ey,
__global {{REAL}} *Ez,
__global const {{REAL}} * restrict Hx,
__global const {{REAL}} * restrict Hy,
__global const {{REAL}} * restrict Hz) {
// This function updates electric field values.
//
// Args:
// NX, NY, NZ: Number of cells of the models domain.
// ID, E, H: Access to ID and field component arrays.
// Obtain the linear index corresponding to the current thread
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) +
get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for 3D field arrays
int i = idx / ({{NY_FIELDS}} * {{NZ_FIELDS}});
int j = (idx % ({{NY_FIELDS}}*{{NZ_FIELDS}})) / {{NZ_FIELDS}};
int k = (idx % ({{NY_FIELDS}}*{{NZ_FIELDS}})) % {{NZ_FIELDS}};
// Convert the linear index to subscripts for 4D material ID arrays
int i_ID = (idx%({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) / ({{NY_ID}} * {{NZ_ID}});
int j_ID = ((idx%({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) / {{NZ_ID}};
int k_ID = ((idx%({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) % {{NZ_ID}};
// Ex component
if ((NY != 1 || NZ != 1) && i >= 0 && i < NX && j > 0 && j < NY && k > 0 && k < NZ) {
int materialEx = ID[INDEX4D_ID(0,i_ID,j_ID,k_ID)];
Ex[INDEX3D_FIELDS(i,j,k)] = updatecoeffsE[INDEX2D_MAT(materialEx,0)] * Ex[INDEX3D_FIELDS(i,j,k)] +
updatecoeffsE[INDEX2D_MAT(materialEx,2)] * (Hz[INDEX3D_FIELDS(i,j,k)] - Hz[INDEX3D_FIELDS(i,j-1,k)]) -
updatecoeffsE[INDEX2D_MAT(materialEx,3)] * (Hy[INDEX3D_FIELDS(i,j,k)] - Hy[INDEX3D_FIELDS(i,j,k-1)]);
}
// Ey component
if ((NX != 1 || NZ != 1) && i > 0 && i < NX && j >= 0 && j < NY && k > 0 && k < NZ) {
int materialEy = ID[INDEX4D_ID(1,i_ID,j_ID,k_ID)];
Ey[INDEX3D_FIELDS(i,j,k)] = updatecoeffsE[INDEX2D_MAT(materialEy,0)] * Ey[INDEX3D_FIELDS(i,j,k)] +
updatecoeffsE[INDEX2D_MAT(materialEy,3)] * (Hx[INDEX3D_FIELDS(i,j,k)] - Hx[INDEX3D_FIELDS(i,j,k-1)]) - updatecoeffsE[INDEX2D_MAT(materialEy,1)] * (Hz[INDEX3D_FIELDS(i,j,k)] - Hz[INDEX3D_FIELDS(i-1,j,k)]);
}
// Ez component
if ((NX != 1 || NY != 1) && i > 0 && i < NX && j > 0 && j < NY && k >= 0 && k < NZ) {
int materialEz = ID[INDEX4D_ID(2,i_ID,j_ID,k_ID)];
Ez[INDEX3D_FIELDS(i,j,k)] = updatecoeffsE[INDEX2D_MAT(materialEz,0)] * Ez[INDEX3D_FIELDS(i,j,k)] +
updatecoeffsE[INDEX2D_MAT(materialEz,1)] * (Hy[INDEX3D_FIELDS(i,j,k)] - Hy[INDEX3D_FIELDS(i-1,j,k)]) - updatecoeffsE[INDEX2D_MAT(materialEz,2)] * (Hx[INDEX3D_FIELDS(i,j,k)] - Hx[INDEX3D_FIELDS(i,j-1,k)]);
}
}
////////////////////////////
// Magnetic field updates //
////////////////////////////
__kernel void update_magnetic(int NX, int NY, int NZ,
__global const unsigned int* restrict ID,
__global {{REAL}} *Hx,
__global {{REAL}} *Hy,
__global {{REAL}} *Hz,
__global const {{REAL}}* restrict Ex,
__global const {{REAL}}* restrict Ey,
__global const {{REAL}}* restrict Ez) {
// This function updates magnetic field values.
//
// Args:
// NX, NY, NZ: number of cells of the model domain.
// ID, E, H: access to ID and field component arrays.
// Obtain the linear index corresponding to the current thread
int idx = get_global_id(0);
// convert the linear index to subscripts to 3D field arrays
int i = idx / ({{NY_FIELDS}} * {{NZ_FIELDS}});
int j = (idx%({{NY_FIELDS}}*{{NZ_FIELDS}})) / {{NZ_FIELDS}};
int k = (idx%({{NY_FIELDS}}*{{NZ_FIELDS}})) % {{NZ_FIELDS}};
// convert the linear index to subscripts to 4D material ID arrays
int i_ID = ( idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) / ({{NY_ID}} * {{NZ_ID}});
int j_ID = (( idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) / {{NZ_ID}};
int k_ID = (( idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) % {{NZ_ID}};
// Hx component
if (NX != 1 && i > 0 && i < NX && j >= 0 && j < NY && k >= 0 && k < NZ) {
int materialHx = ID[INDEX4D_ID(3,i_ID,j_ID,k_ID)];
Hx[INDEX3D_FIELDS(i,j,k)] = updatecoeffsH[INDEX2D_MAT(materialHx,0)] * Hx[INDEX3D_FIELDS(i,j,k)] -
updatecoeffsH[INDEX2D_MAT(materialHx,2)] * (Ez[INDEX3D_FIELDS(i,j+1,k)] - Ez[INDEX3D_FIELDS(i,j,k)]) + updatecoeffsH[INDEX2D_MAT(materialHx,3)] * (Ey[INDEX3D_FIELDS(i,j,k+1)] - Ey[INDEX3D_FIELDS(i,j,k)]);
}
// Hy component
if (NY != 1 && i >= 0 && i < NX && j > 0 && j < NY && k >= 0 && k < NZ) {
int materialHy = ID[INDEX4D_ID(4,i_ID,j_ID,k_ID)];
Hy[INDEX3D_FIELDS(i,j,k)] = updatecoeffsH[INDEX2D_MAT(materialHy,0)] * Hy[INDEX3D_FIELDS(i,j,k)] -
updatecoeffsH[INDEX2D_MAT(materialHy,3)] * (Ex[INDEX3D_FIELDS(i,j,k+1)] - Ex[INDEX3D_FIELDS(i,j,k)]) + updatecoeffsH[INDEX2D_MAT(materialHy,1)] * (Ez[INDEX3D_FIELDS(i+1,j,k)] - Ez[INDEX3D_FIELDS(i,j,k)]);
}
// Hz component
if (NZ != 1 && i >= 0 && i < NX && j >= 0 && j < NY && k > 0 && k < NZ) {
int materialHz = ID[INDEX4D_ID(5,i_ID,j_ID,k_ID)];
Hz[INDEX3D_FIELDS(i,j,k)] = updatecoeffsH[INDEX2D_MAT(materialHz,0)] * Hz[INDEX3D_FIELDS(i,j,k)] -
updatecoeffsH[INDEX2D_MAT(materialHz,1)] * (Ey[INDEX3D_FIELDS(i+1,j,k)] - Ey[INDEX3D_FIELDS(i,j,k)]) + updatecoeffsH[INDEX2D_MAT(materialHz,2)] * (Ex[INDEX3D_FIELDS(i,j+1,k)] - Ex[INDEX3D_FIELDS(i,j,k)]);
}
}
///////////////////////////////////////////////////
// Electric field updates - dispersive materials //
///////////////////////////////////////////////////
__kernel void update_electric_dispersive_A(int NX, int NY, int NZ, int MAXPOLES,
__global const {{COMPLEX-}}_t* restrict updatecoeffsdispersive,
__global {{COMPLEX-}}_t *Tx,
__global {{COMPLEX-}}_t *Ty,
__global {{COMPLEX-}}_t *Tz,
__global const unsigned int* restrict ID,
__global {{REAL}} *Ex,
__global {{REAL}} *Ey,
__global {{REAL}} *Ez,
__global const {{REAL}}* restrict Hx,
__global const {{REAL}}* restrict Hy,
__global const {{REAL}}* restrict Hz) {
// This function is part A of updates to electric field values when dispersive materials (with multiple poles) are present.
//
// Args:
// NX, NY, NZ: Number of cells of the model domain
// MAXPOLES: Maximum number of dispersive material poles present in model
// updatedispersivecoeffs, T, ID, E, H: Access to update coefficients, dispersive, ID and field component arrays
// Obtain the linear index corresponding to the current thread
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) +
get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for 3D field arrays
int i = idx / ({{NY_FIELDS}} * {{NZ_FIELDS}});
int j = (idx % ({{NY_FIELDS}} * {{NZ_FIELDS}})) / {{NZ_FIELDS}};
int k = (idx % ({{NY_FIELDS}} * {{NZ_FIELDS}})) % {{NZ_FIELDS}};
// Convert the linear index to subscripts for 4D material ID array
int i_ID = (idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) / ({{NY_ID}} * {{NZ_ID}});
int j_ID = ((idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) / {{NZ_ID}};
int k_ID = ((idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) % {{NZ_ID}};
// Convert the linear index to subscripts for 4D dispersive array
int i_T = (idx % ({{NX_T}} * {{NY_T}} * {{NZ_T}})) / ({{NY_T}} * {{NZ_T}});
int j_T = ((idx % ({{NX_T}} * {{NY_T}} * {{NZ_T}})) % ({{NY_T}} * {{NZ_T}})) / {{NZ_T}};
int k_T = ((idx % ({{NX_T}} * {{NY_T}} * {{NZ_T}})) % ({{NY_T}} * {{NZ_T}})) % {{NZ_T}};
// Ex component
if ((NY != 1 || NZ != 1) && i >= 0 && i < NX && j > 0 && j < NY && k > 0 && k < NZ) {
int materialEx = ID[INDEX4D_ID(0,i_ID,j_ID,k_ID)];
{{REAL}} phi = 0;
for (int pole = 0; pole < MAXPOLES; pole++) {
phi = phi + updatecoeffsdispersive[INDEX2D_MATDISP(materialEx,pole*3)].real * Tx[INDEX4D_T(pole,i_T,j_T,k_T)].real;
Tx[INDEX4D_T(pole,i_T,j_T,k_T)] = cfloat_add(cfloat_mul(updatecoeffsdispersive[INDEX2D_MATDISP(materialEx,1+(pole*3))],
Tx[INDEX4D_T(pole,i_T,j_T,k_T)]),
cfloat_mulr(updatecoeffsdispersive[INDEX2D_MATDISP(materialEx,2+(pole*3))],
Ex[INDEX3D_FIELDS(i,j,k)]));
}
Ex[INDEX3D_FIELDS(i,j,k)] = updatecoeffsE[INDEX2D_MAT(materialEx,0)] * Ex[INDEX3D_FIELDS(i,j,k)] +
updatecoeffsE[INDEX2D_MAT(materialEx,2)] * (Hz[INDEX3D_FIELDS(i,j,k)] - Hz[INDEX3D_FIELDS(i,j-1,k)]) - updatecoeffsE[INDEX2D_MAT(materialEx,3)] * (Hy[INDEX3D_FIELDS(i,j,k)] - Hy[INDEX3D_FIELDS(i,j,k-1)]) - updatecoeffsE[INDEX2D_MAT(materialEx,4)] * phi;
}
// Ey component
if ((NX != 1 || NZ != 1) && i > 0 && i < NX && j >= 0 && j < NY && k > 0 && k < NZ) {
int materialEy = ID[INDEX4D_ID(1,i_ID,j_ID,k_ID)];
{{REAL}} phi = 0;
for (int pole = 0; pole < MAXPOLES; pole++) {
phi = phi + updatecoeffsdispersive[INDEX2D_MATDISP(materialEy,pole*3)].real * Ty[INDEX4D_T(pole,i_T,j_T,k_T)].real;
Ty[INDEX4D_T(pole,i_T,j_T,k_T)] = cfloat_add(cfloat_mul(updatecoeffsdispersive[INDEX2D_MATDISP(materialEy,1+(pole*3))],
Ty[INDEX4D_T(pole,i_T,j_T,k_T)]),
cfloat_mulr(updatecoeffsdispersive[INDEX2D_MATDISP(materialEy,2+(pole*3))],
Ey[INDEX3D_FIELDS(i,j,k)]));
}
Ey[INDEX3D_FIELDS(i,j,k)] = updatecoeffsE[INDEX2D_MAT(materialEy,0)] * Ey[INDEX3D_FIELDS(i,j,k)] +
updatecoeffsE[INDEX2D_MAT(materialEy,3)] * (Hx[INDEX3D_FIELDS(i,j,k)] - Hx[INDEX3D_FIELDS(i,j,k-1)]) - updatecoeffsE[INDEX2D_MAT(materialEy,1)] * (Hz[INDEX3D_FIELDS(i,j,k)] - Hz[INDEX3D_FIELDS(i-1,j,k)]) - updatecoeffsE[INDEX2D_MAT(materialEy,4)] * phi;
}
// Ez component
if ((NX != 1 || NY != 1) && i > 0 && i < NX && j > 0 && j < NY && k >= 0 && k < NZ) {
int materialEz = ID[INDEX4D_ID(2,i_ID,j_ID,k_ID)];
{{REAL}} phi = 0;
for (int pole = 0; pole < MAXPOLES; pole++) {
phi = phi + updatecoeffsdispersive[INDEX2D_MATDISP(materialEz,pole*3)].real * Tz[INDEX4D_T(pole,i_T,j_T,k_T)].real;
Tz[INDEX4D_T(pole,i_T,j_T,k_T)] = cfloat_add(cfloat_mul(updatecoeffsdispersive[INDEX2D_MATDISP(materialEz,1+(pole*3))],
Tz[INDEX4D_T(pole,i_T,j_T,k_T)]),
cfloat_mulr(updatecoeffsdispersive[INDEX2D_MATDISP(materialEz,2+(pole*3))],
Ez[INDEX3D_FIELDS(i,j,k)]));
}
Ez[INDEX3D_FIELDS(i,j,k)] = updatecoeffsE[INDEX2D_MAT(materialEz,0)] * Ez[INDEX3D_FIELDS(i,j,k)] +
updatecoeffsE[INDEX2D_MAT(materialEz,1)] * (Hy[INDEX3D_FIELDS(i,j,k)] - Hy[INDEX3D_FIELDS(i-1,j,k)]) - updatecoeffsE[INDEX2D_MAT(materialEz,2)] * (Hx[INDEX3D_FIELDS(i,j,k)] - Hx[INDEX3D_FIELDS(i,j-1,k)]) - updatecoeffsE[INDEX2D_MAT(materialEz,4)] * phi;
}
}
__kernel void update_electric_dispersive_B(int NX, int NY, int NZ, int MAXPOLES,
__global const {{COMPLEX-}}_t* restrict updatecoeffsdispersive,
__global {{COMPLEX-}}_t *Tx,
__global {{COMPLEX-}}_t *Ty,
__global {{COMPLEX-}}_t *Tz,
__global const unsigned int* restrict ID,
__global const {{REAL}}* restrict Ex,
__global const {{REAL}}* restrict Ey,
__global const {{REAL}}* restrict Ez) {
// This function is part B which updates the dispersive field arrays when dispersive materials (with multiple poles) are present.
//
// Args:
// NX, NY, NZ: Number of cells of the model domain
// MAXPOLES: Maximum number of dispersive material poles present in model
// updatedispersivecoeffs, T, ID, E, H: Access to update coefficients, dispersive, ID and field component arrays
// Obtain the linear index corresponding to the current thread
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) +
get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for 3D field arrays
int i = idx / ({{NY_FIELDS}} * {{NZ_FIELDS}});
int j = (idx % ({{NY_FIELDS}} * {{NZ_FIELDS}})) / {{NZ_FIELDS}};
int k = (idx % ({{NY_FIELDS}} * {{NZ_FIELDS}})) % {{NZ_FIELDS}};
// Convert the linear index to subscripts for 4D material ID array
int i_ID = (idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) / ({{NY_ID}} * {{NZ_ID}});
int j_ID = ((idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) / {{NZ_ID}};
int k_ID = ((idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) % {{NZ_ID}};
// Convert the linear index to subscripts for 4D dispersive array
int i_T = (idx % ({{NX_T}} * {{NY_T}} * {{NZ_T}})) / ({{NY_T}} * {{NZ_T}});
int j_T = ((idx % ({{NX_T}} * {{NY_T}} * {{NZ_T}})) % ({{NY_T}} * {{NZ_T}})) / {{NZ_T}};
int k_T = ((idx % ({{NX_T}} * {{NY_T}} * {{NZ_T}})) % ({{NY_T}} * {{NZ_T}})) % {{NZ_T}};
// Ex component
if ((NY != 1 || NZ != 1) && i >= 0 && i < NX && j > 0 && j < NY && k > 0 && k < NZ) {
int materialEx = ID[INDEX4D_ID(0,i_ID,j_ID,k_ID)];
for (int pole = 0; pole < MAXPOLES; pole++) {
Tx[INDEX4D_T(pole,i_T,j_T,k_T)] = cfloat_sub(Tx[INDEX4D_T(pole,i_T,j_T,k_T)],
cfloat_mulr(updatecoeffsdispersive[INDEX2D_MATDISP(materialEx,2+(pole*3))],
Ex[INDEX3D_FIELDS(i,j,k)]));
}
}
// Ey component
if ((NX != 1 || NZ != 1) && i > 0 && i < NX && j >= 0 && j < NY && k > 0 && k < NZ) {
int materialEy = ID[INDEX4D_ID(1,i_ID,j_ID,k_ID)];
for (int pole = 0; pole < MAXPOLES; pole++) {
Ty[INDEX4D_T(pole,i_T,j_T,k_T)] = cfloat_sub(Ty[INDEX4D_T(pole,i_T,j_T,k_T)],
cfloat_mulr(updatecoeffsdispersive[INDEX2D_MATDISP(materialEy,2+(pole*3))],
Ey[INDEX3D_FIELDS(i,j,k)]));
}
}
// Ez component
if ((NX != 1 || NY != 1) && i > 0 && i < NX && j > 0 && j < NY && k >= 0 && k < NZ) {
int materialEz = ID[INDEX4D_ID(2,i_ID,j_ID,k_ID)];
for (int pole = 0; pole < MAXPOLES; pole++) {
Tz[INDEX4D_T(pole,i_T,j_T,k_T)] = cfloat_sub(Tz[INDEX4D_T(pole,i_T,j_T,k_T)],
cfloat_mulr(updatecoeffsdispersive[INDEX2D_MATDISP(materialEz,2+(pole*3))],
Ez[INDEX3D_FIELDS(i,j,k)]));
}
}
}

查看文件

@@ -0,0 +1,955 @@
// Macros for converting subscripts to linear index:
#define INDEX2D_R(m, n) (m)*(NY_R)+(n)
#define INDEX2D_MAT(m, n) (m)*({{NY_MATCOEFFS}})+(n)
#define INDEX3D_FIELDS(i, j, k) (i)*({{NY_FIELDS}})*({{NZ_FIELDS}})+(j)*({{NZ_FIELDS}})+(k)
#define INDEX4D_ID(p, i, j, k) (p)*({{NX_ID}})*({{NY_ID}})*({{NZ_ID}})+(i)*({{NY_ID}})*({{NZ_ID}})+(j)*({{NZ_ID}})+(k)
#define INDEX4D_PHI1(p, i, j, k) (p)*(NX_PHI1)*(NY_PHI1)*(NZ_PHI1)+(i)*(NY_PHI1)*(NZ_PHI1)+(j)*(NZ_PHI1)+(k)
#define INDEX4D_PHI2(p, i, j, k) (p)*(NX_PHI2)*(NY_PHI2)*(NZ_PHI2)+(i)*(NY_PHI2)*(NZ_PHI2)+(j)*(NZ_PHI2)+(k)
__constant {{REAL}} updatecoeffsE[{{N_updatecoeffsE}}] =
{
{% for i in updateEVal %}
{{i}},
{% endfor %}
};
__kernel void order1_xminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global {{REAL}} *Ey, __global {{REAL}} *Ez, __global const {{REAL}}* restrict Hx, __global const {{REAL}}* restrict Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d){
// This function updates the Ey and Ez field components for the xminus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// obtain the linear index corresponding to the current thread
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA01, RB0, RE0, RF0, dHy, dHz;
{{REAL}} dx = d;
int ii, jj, kk, materialEy, materialEz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = xf - i1;
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,i1)] - 1;
RB0 = RB[INDEX2D_R(0,i1)];
RE0 = RE[INDEX2D_R(0,i1)];
RF0 = RF[INDEX2D_R(0,i1)];
// Ey
materialEy = ID[INDEX4D_ID(1,ii,jj,kk)];
dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx;
Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA01 * dHz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = xf - i2;
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,i2)] - 1;
RB0 = RB[INDEX2D_R(0,i2)];
RE0 = RE[INDEX2D_R(0,i2)];
RF0 = RF[INDEX2D_R(0,i2)];
// Ez
materialEz = ID[INDEX4D_ID(2,ii,jj,kk)];
dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx;
Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA01 * dHy + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHy;
}
}
__kernel void order2_xminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global {{REAL}} *Ey, __global {{REAL}} *Ez, __global const {{REAL}}* restrict Hx, __global const {{REAL}}* restrict Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d){
// This function updates the Ey and Ez field components for the xminus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current thread
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dHy, dHz;
{{REAL}} dx = d;
int ii, jj, kk, materialEy, materialEz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = xf - i1;
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,i1)];
RB0 = RB[INDEX2D_R(0,i1)];
RE0 = RE[INDEX2D_R(0,i1)];
RF0 = RF[INDEX2D_R(0,i1)];
RA1 = RA[INDEX2D_R(1,i1)];
RB1 = RB[INDEX2D_R(1,i1)];
RE1 = RE[INDEX2D_R(1,i1)];
RF1 = RF[INDEX2D_R(1,i1)];
RA01 = RA[INDEX2D_R(0,i1)] * RA[INDEX2D_R(1,i1)] - 1;
// Ey
materialEy = ID[INDEX4D_ID(1,ii,jj,kk)];
dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx;
Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA01 * dHz + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dHz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = xf - i2;
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,i2)];
RB0 = RB[INDEX2D_R(0,i2)];
RE0 = RE[INDEX2D_R(0,i2)];
RF0 = RF[INDEX2D_R(0,i2)];
RA1 = RA[INDEX2D_R(1,i2)];
RB1 = RB[INDEX2D_R(1,i2)];
RE1 = RE[INDEX2D_R(1,i2)];
RF1 = RF[INDEX2D_R(1,i2)];
RA01 = RA[INDEX2D_R(0,i2)] * RA[INDEX2D_R(1,i2)] - 1;
// Ez
materialEz = ID[INDEX4D_ID(2,ii,jj,kk)];
dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx;
Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA01 * dHy + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dHy + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHy;
}
}
__kernel void order1_xplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global {{REAL}} *Ey, __global {{REAL}} *Ez, __global const {{REAL}}* restrict Hx, __global const {{REAL}}* restrict Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d){
// This function updates the Ey and Ez field components for the xplus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current thread
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA01, RB0, RE0, RF0, dHy, dHz;
{{REAL}} dx = d;
int ii, jj, kk, materialEy, materialEz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,i1)] - 1;
RB0 = RB[INDEX2D_R(0,i1)];
RE0 = RE[INDEX2D_R(0,i1)];
RF0 = RF[INDEX2D_R(0,i1)];
// Ey
materialEy = ID[INDEX4D_ID(1,ii,jj,kk)];
dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx;
Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA01 * dHz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,i2)] - 1;
RB0 = RB[INDEX2D_R(0,i2)];
RE0 = RE[INDEX2D_R(0,i2)];
RF0 = RF[INDEX2D_R(0,i2)];
// Ez
materialEz = ID[INDEX4D_ID(2,ii,jj,kk)];
dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx;
Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA01 * dHy + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHy;
}
}
__kernel void order2_xplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global {{REAL}} *Ey, __global {{REAL}} *Ez, __global const {{REAL}}* restrict Hx, __global const {{REAL}}* restrict Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Ey and Ez field components for the xplus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current thread
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dHy, dHz;
{{REAL}} dx = d;
int ii, jj, kk, materialEy, materialEz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,i1)];
RB0 = RB[INDEX2D_R(0,i1)];
RE0 = RE[INDEX2D_R(0,i1)];
RF0 = RF[INDEX2D_R(0,i1)];
RA1 = RA[INDEX2D_R(1,i1)];
RB1 = RB[INDEX2D_R(1,i1)];
RE1 = RE[INDEX2D_R(1,i1)];
RF1 = RF[INDEX2D_R(1,i1)];
RA01 = RA[INDEX2D_R(0,i1)] * RA[INDEX2D_R(1,i1)] - 1;
// Ey
materialEy = ID[INDEX4D_ID(1,ii,jj,kk)];
dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx;
Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA01 * dHz + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dHz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,i2)];
RB0 = RB[INDEX2D_R(0,i2)];
RE0 = RE[INDEX2D_R(0,i2)];
RF0 = RF[INDEX2D_R(0,i2)];
RA1 = RA[INDEX2D_R(1,i2)];
RB1 = RB[INDEX2D_R(1,i2)];
RE1 = RE[INDEX2D_R(1,i2)];
RF1 = RF[INDEX2D_R(1,i2)];
RA01 = RA[INDEX2D_R(0,i2)] * RA[INDEX2D_R(1,i2)] - 1;
// Ez
materialEz = ID[INDEX4D_ID(2,ii,jj,kk)];
dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx;
Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA01 * dHy + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dHy + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHy;
}
}
__kernel void order1_yminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global {{REAL}} *Ex, __global const {{REAL}}* restrict Ey, __global {{REAL}} *Ez, __global const {{REAL}}* restrict Hx, __global const {{REAL}}* restrict Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Ex and Ez field components for the yminus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current thread
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA01, RB0, RE0, RF0, dHx, dHz;
{{REAL}} dy = d;
int ii, jj, kk, materialEx, materialEz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = yf - j1;
kk = k1 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,j1)] - 1;
RB0 = RB[INDEX2D_R(0,j1)];
RE0 = RE[INDEX2D_R(0,j1)];
RF0 = RF[INDEX2D_R(0,j1)];
// Ex
materialEx = ID[INDEX4D_ID(0,ii,jj,kk)];
dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy;
Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA01 * dHz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = yf - j2;
kk = k2 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,j2)] - 1;
RB0 = RB[INDEX2D_R(0,j2)];
RE0 = RE[INDEX2D_R(0,j2)];
RF0 = RF[INDEX2D_R(0,j2)];
// Ez
materialEz = ID[INDEX4D_ID(2,ii,jj,kk)];
dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy;
Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA01 * dHx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx;
}
}
__kernel void order2_yminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global {{REAL}} *Ex, __global const {{REAL}}* restrict Ey, __global {{REAL}} *Ez, __global const {{REAL}}* restrict Hx, __global const {{REAL}}* restrict Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Ex and Ez field components for the yminus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current thread
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dHx, dHz;
{{REAL}} dy = d;
int ii, jj, kk, materialEx, materialEz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = yf - j1;
kk = k1 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,j1)];
RB0 = RB[INDEX2D_R(0,j1)];
RE0 = RE[INDEX2D_R(0,j1)];
RF0 = RF[INDEX2D_R(0,j1)];
RA1 = RA[INDEX2D_R(1,j1)];
RB1 = RB[INDEX2D_R(1,j1)];
RE1 = RE[INDEX2D_R(1,j1)];
RF1 = RF[INDEX2D_R(1,j1)];
RA01 = RA[INDEX2D_R(0,j1)] * RA[INDEX2D_R(1,j1)] - 1;
// Ex
materialEx = ID[INDEX4D_ID(0,ii,jj,kk)];
dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy;
Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA01 * dHz + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dHz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = yf - j2;
kk = k2 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,j2)];
RB0 = RB[INDEX2D_R(0,j2)];
RE0 = RE[INDEX2D_R(0,j2)];
RF0 = RF[INDEX2D_R(0,j2)];
RA1 = RA[INDEX2D_R(1,j2)];
RB1 = RB[INDEX2D_R(1,j2)];
RE1 = RE[INDEX2D_R(1,j2)];
RF1 = RF[INDEX2D_R(1,j2)];
RA01 = RA[INDEX2D_R(0,j2)] * RA[INDEX2D_R(1,j2)] - 1;
// Ez
materialEz = ID[INDEX4D_ID(2,ii,jj,kk)];
dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy;
Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA01 * dHx + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dHx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx;
}
}
__kernel void order1_yplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global {{REAL}} *Ex, __global const {{REAL}}* restrict Ey, __global {{REAL}} *Ez, __global const {{REAL}}* restrict Hx, __global const {{REAL}}* restrict Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Ex and Ez field components for the yplus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current thread
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA01, RB0, RE0, RF0, dHx, dHz;
{{REAL}} dy = d;
int ii, jj, kk, materialEx, materialEz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,j1)] - 1;
RB0 = RB[INDEX2D_R(0,j1)];
RE0 = RE[INDEX2D_R(0,j1)];
RF0 = RF[INDEX2D_R(0,j1)];
// Ex
materialEx = ID[INDEX4D_ID(0,ii,jj,kk)];
dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy;
Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA01 * dHz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,j2)] - 1;
RB0 = RB[INDEX2D_R(0,j2)];
RE0 = RE[INDEX2D_R(0,j2)];
RF0 = RF[INDEX2D_R(0,j2)];
// Ez
materialEz = ID[INDEX4D_ID(2,ii,jj,kk)];
dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy;
Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA01 * dHx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx;
}
}
__kernel void order2_yplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global {{REAL}} *Ex, __global const {{REAL}}* restrict Ey, __global {{REAL}} *Ez, __global const {{REAL}}* restrict Hx, __global const {{REAL}}* restrict Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Ex and Ez field components for the yplus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current thread
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dHx, dHz;
{{REAL}} dy = d;
int ii, jj, kk, materialEx, materialEz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,j1)];
RB0 = RB[INDEX2D_R(0,j1)];
RE0 = RE[INDEX2D_R(0,j1)];
RF0 = RF[INDEX2D_R(0,j1)];
RA1 = RA[INDEX2D_R(1,j1)];
RB1 = RB[INDEX2D_R(1,j1)];
RE1 = RE[INDEX2D_R(1,j1)];
RF1 = RF[INDEX2D_R(1,j1)];
RA01 = RA[INDEX2D_R(0,j1)] * RA[INDEX2D_R(1,j1)] - 1;
// Ex
materialEx = ID[INDEX4D_ID(0,ii,jj,kk)];
dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy;
Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA01 * dHz + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dHz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,j2)];
RB0 = RB[INDEX2D_R(0,j2)];
RE0 = RE[INDEX2D_R(0,j2)];
RF0 = RF[INDEX2D_R(0,j2)];
RA1 = RA[INDEX2D_R(1,j2)];
RB1 = RB[INDEX2D_R(1,j2)];
RE1 = RE[INDEX2D_R(1,j2)];
RF1 = RF[INDEX2D_R(1,j2)];
RA01 = RA[INDEX2D_R(0,j2)] * RA[INDEX2D_R(1,j2)] - 1;
// Ez
materialEz = ID[INDEX4D_ID(2,ii,jj,kk)];
dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy;
Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA01 * dHx + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dHx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx;
}
}
__kernel void order1_zminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global {{REAL}} *Ex, __global {{REAL}} *Ey, __global const {{REAL}}* restrict Ez, __global const {{REAL}}* restrict Hx, __global const {{REAL}}* restrict Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Ex and Ey field components for the zminus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current thread
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA01, RB0, RE0, RF0, dHx, dHy;
{{REAL}} dz = d;
int ii, jj, kk, materialEx, materialEy;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = zf - k1;
// PML coefficients
RA01 = RA[INDEX2D_R(0,k1)] - 1;
RB0 = RB[INDEX2D_R(0,k1)];
RE0 = RE[INDEX2D_R(0,k1)];
RF0 = RF[INDEX2D_R(0,k1)];
// Ex
materialEx = ID[INDEX4D_ID(0,ii,jj,kk)];
dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz;
Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA01 * dHy + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHy;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = zf - k2;
// PML coefficients
RA01 = RA[INDEX2D_R(0,k2)] - 1;
RB0 = RB[INDEX2D_R(0,k2)];
RE0 = RE[INDEX2D_R(0,k2)];
RF0 = RF[INDEX2D_R(0,k2)];
// Ey
materialEy = ID[INDEX4D_ID(1,ii,jj,kk)];
dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz;
Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA01 * dHx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx;
}
}
__kernel void order2_zminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global {{REAL}} *Ex, __global {{REAL}} *Ey, __global const {{REAL}}* restrict Ez, __global const {{REAL}}* restrict Hx, __global const {{REAL}}* restrict Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Ex and Ey field components for the zminus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current thread
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dHx, dHy;
{{REAL}} dz = d;
int ii, jj, kk, materialEx, materialEy;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = zf - k1;
// PML coefficients
RA0 = RA[INDEX2D_R(0,k1)];
RB0 = RB[INDEX2D_R(0,k1)];
RE0 = RE[INDEX2D_R(0,k1)];
RF0 = RF[INDEX2D_R(0,k1)];
RA1 = RA[INDEX2D_R(1,k1)];
RB1 = RB[INDEX2D_R(1,k1)];
RE1 = RE[INDEX2D_R(1,k1)];
RF1 = RF[INDEX2D_R(1,k1)];
RA01 = RA[INDEX2D_R(0,k1)] * RA[INDEX2D_R(1,k1)] - 1;
// Ex
materialEx = ID[INDEX4D_ID(0,ii,jj,kk)];
dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz;
Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA01 * dHy + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dHy + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHy;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = zf - k2;
// PML coefficients
RA0 = RA[INDEX2D_R(0,k2)];
RB0 = RB[INDEX2D_R(0,k2)];
RE0 = RE[INDEX2D_R(0,k2)];
RF0 = RF[INDEX2D_R(0,k2)];
RA1 = RA[INDEX2D_R(1,k2)];
RB1 = RB[INDEX2D_R(1,k2)];
RE1 = RE[INDEX2D_R(1,k2)];
RF1 = RF[INDEX2D_R(1,k2)];
RA01 = RA[INDEX2D_R(0,k2)] * RA[INDEX2D_R(1,k2)] - 1;
// Ey
materialEy = ID[INDEX4D_ID(1,ii,jj,kk)];
dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz;
Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA01 * dHx + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dHx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx;
}
}
__kernel void order1_zplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global {{REAL}} *Ex, __global {{REAL}} *Ey, __global const {{REAL}}* restrict Ez, __global const {{REAL}}* restrict Hx, __global const {{REAL}}* restrict Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Ex and Ey field components for the zplus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current thread
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA01, RB0, RE0, RF0, dHx, dHy;
{{REAL}} dz = d;
int ii, jj, kk, materialEx, materialEy;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,k1)] - 1;
RB0 = RB[INDEX2D_R(0,k1)];
RE0 = RE[INDEX2D_R(0,k1)];
RF0 = RF[INDEX2D_R(0,k1)];
// Ex
materialEx = ID[INDEX4D_ID(0,ii,jj,kk)];
dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz;
Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA01 * dHy + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHy;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,k2)] - 1;
RB0 = RB[INDEX2D_R(0,k2)];
RE0 = RE[INDEX2D_R(0,k2)];
RF0 = RF[INDEX2D_R(0,k2)];
// Ey
materialEy = ID[INDEX4D_ID(1,ii,jj,kk)];
dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz;
Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA01 * dHx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx;
}
}
__kernel void order2_zplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global {{REAL}} *Ex, __global {{REAL}} *Ey, __global const {{REAL}}* restrict Ez, __global const {{REAL}}* restrict Hx, __global const {{REAL}}* restrict Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Ex and Ey field components for the zplus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current thread
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dHx, dHy;
{{REAL}} dz = d;
int ii, jj, kk, materialEx, materialEy;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,k1)];
RB0 = RB[INDEX2D_R(0,k1)];
RE0 = RE[INDEX2D_R(0,k1)];
RF0 = RF[INDEX2D_R(0,k1)];
RA1 = RA[INDEX2D_R(1,k1)];
RB1 = RB[INDEX2D_R(1,k1)];
RE1 = RE[INDEX2D_R(1,k1)];
RF1 = RF[INDEX2D_R(1,k1)];
RA01 = RA[INDEX2D_R(0,k1)] * RA[INDEX2D_R(1,k1)] - 1;
// Ex
materialEx = ID[INDEX4D_ID(0,ii,jj,kk)];
dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz;
Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA01 * dHy + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dHy + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHy;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,k2)];
RB0 = RB[INDEX2D_R(0,k2)];
RE0 = RE[INDEX2D_R(0,k2)];
RF0 = RF[INDEX2D_R(0,k2)];
RA1 = RA[INDEX2D_R(1,k2)];
RB1 = RB[INDEX2D_R(1,k2)];
RE1 = RE[INDEX2D_R(1,k2)];
RF1 = RF[INDEX2D_R(1,k2)];
RA01 = RA[INDEX2D_R(0,k2)] * RA[INDEX2D_R(1,k2)] - 1;
// Ey
materialEy = ID[INDEX4D_ID(1,ii,jj,kk)];
dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz;
Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA01 * dHx + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dHx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx;
}
}

查看文件

@@ -0,0 +1,962 @@
// Macros for converting subscripts to linear index:
#define INDEX2D_R(m, n) (m)*(NY_R)+(n)
#define INDEX2D_MAT(m, n) (m)*({{NY_MATCOEFFS}})+(n)
#define INDEX3D_FIELDS(i, j, k) (i)*({{NY_FIELDS}})*({{NZ_FIELDS}})+(j)*({{NZ_FIELDS}})+(k)
#define INDEX4D_ID(p, i, j, k) (p)*({{NX_ID}})*({{NY_ID}})*({{NZ_ID}})+(i)*({{NY_ID}})*({{NZ_ID}})+(j)*({{NZ_ID}})+(k)
#define INDEX4D_PHI1(p, i, j, k) (p)*(NX_PHI1)*(NY_PHI1)*(NZ_PHI1)+(i)*(NY_PHI1)*(NZ_PHI1)+(j)*(NZ_PHI1)+(k)
#define INDEX4D_PHI2(p, i, j, k) (p)*(NX_PHI2)*(NY_PHI2)*(NZ_PHI2)+(i)*(NY_PHI2)*(NZ_PHI2)+(j)*(NZ_PHI2)+(k)
__constant {{REAL}} updatecoeffsH[{{N_updatecoeffsH}}] =
{
{% for i in updateHVal %}
{{i}},
{% endfor %}
};
__kernel void order1_xminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global const {{REAL}}* restrict Ey, __global const {{REAL}}* restrict Ez, __global const {{REAL}}* restrict Hx, __global {{REAL}} *Hy, __global {{REAL}} *Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Hy and Hz field components for the xminus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current tREad
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA01, RB0, RE0, RF0, dEy, dEz;
{{REAL}} dx = d;
int ii, jj, kk, materialHy, materialHz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = xf - (i1 + 1);
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,i1)] - 1;
RB0 = RB[INDEX2D_R(0,i1)];
RE0 = RE[INDEX2D_R(0,i1)];
RF0 = RF[INDEX2D_R(0,i1)];
// Hy
materialHy = ID[INDEX4D_ID(4,ii,jj,kk)];
dEz = (Ez[INDEX3D_FIELDS(ii+1,jj,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dx;
Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA01 * dEz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = xf - (i2 + 1);
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,i2)] - 1;
RB0 = RB[INDEX2D_R(0,i2)];
RE0 = RE[INDEX2D_R(0,i2)];
RF0 = RF[INDEX2D_R(0,i2)];
// Hz
materialHz = ID[INDEX4D_ID(5,ii,jj,kk)];
dEy = (Ey[INDEX3D_FIELDS(ii+1,jj,kk)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dx;
Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA01 * dEy + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEy;
}
}
__kernel void order2_xminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global const {{REAL}}* restrict Ey, __global const {{REAL}}* restrict Ez, __global const {{REAL}}* restrict Hx, __global {{REAL}} *Hy, __global {{REAL}} *Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Hy and Hz field components for the xminus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current tREad
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dEy, dEz;
{{REAL}} dx = d;
int ii, jj, kk, materialHy, materialHz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = xf - (i1 + 1);
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,i1)];
RB0 = RB[INDEX2D_R(0,i1)];
RE0 = RE[INDEX2D_R(0,i1)];
RF0 = RF[INDEX2D_R(0,i1)];
RA1 = RA[INDEX2D_R(1,i1)];
RB1 = RB[INDEX2D_R(1,i1)];
RE1 = RE[INDEX2D_R(1,i1)];
RF1 = RF[INDEX2D_R(1,i1)];
RA01 = RA[INDEX2D_R(0,i1)] * RA[INDEX2D_R(1,i1)] - 1;
// Hy
materialHy = ID[INDEX4D_ID(4,ii,jj,kk)];
dEz = (Ez[INDEX3D_FIELDS(ii+1,jj,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dx;
Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA01 * dEz + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dEz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = xf - (i2 + 1);
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,i2)];
RB0 = RB[INDEX2D_R(0,i2)];
RE0 = RE[INDEX2D_R(0,i2)];
RF0 = RF[INDEX2D_R(0,i2)];
RA1 = RA[INDEX2D_R(1,i2)];
RB1 = RB[INDEX2D_R(1,i2)];
RE1 = RE[INDEX2D_R(1,i2)];
RF1 = RF[INDEX2D_R(1,i2)];
RA01 = RA[INDEX2D_R(0,i2)] * RA[INDEX2D_R(1,i2)] - 1;
// Hz
materialHz = ID[INDEX4D_ID(5,ii,jj,kk)];
dEy = (Ey[INDEX3D_FIELDS(ii+1,jj,kk)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dx;
Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA01 * dEy + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dEy + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEy;
}
}
__kernel void order1_xplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global const {{REAL}}* restrict Ey, __global const {{REAL}}* restrict Ez, __global const {{REAL}}* restrict Hx, __global {{REAL}} *Hy, __global {{REAL}} *Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Hy and Hz field components for the xplus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current tREad
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA01, RB0, RE0, RF0, dEy, dEz;
{{REAL}} dx = d;
int ii, jj, kk, materialHy, materialHz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,i1)] - 1;
RB0 = RB[INDEX2D_R(0,i1)];
RE0 = RE[INDEX2D_R(0,i1)];
RF0 = RF[INDEX2D_R(0,i1)];
// Hy
materialHy = ID[INDEX4D_ID(4,ii,jj,kk)];
dEz = (Ez[INDEX3D_FIELDS(ii+1,jj,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dx;
Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA01 * dEz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,i2)] - 1;
RB0 = RB[INDEX2D_R(0,i2)];
RE0 = RE[INDEX2D_R(0,i2)];
RF0 = RF[INDEX2D_R(0,i2)];
// Hz
materialHz = ID[INDEX4D_ID(5,ii,jj,kk)];
dEy = (Ey[INDEX3D_FIELDS(ii+1,jj,kk)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dx;
Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA01 * dEy + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEy;
}
}
__kernel void order2_xplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global const {{REAL}}* restrict Ey, __global const {{REAL}}* restrict Ez, __global const {{REAL}}* restrict Hx, __global {{REAL}} *Hy, __global {{REAL}} *Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Hy and Hz field components for the xplus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current tREad
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dEy, dEz;
{{REAL}} dx = d;
int ii, jj, kk, materialHy, materialHz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,i1)];
RB0 = RB[INDEX2D_R(0,i1)];
RE0 = RE[INDEX2D_R(0,i1)];
RF0 = RF[INDEX2D_R(0,i1)];
RA1 = RA[INDEX2D_R(1,i1)];
RB1 = RB[INDEX2D_R(1,i1)];
RE1 = RE[INDEX2D_R(1,i1)];
RF1 = RF[INDEX2D_R(1,i1)];
RA01 = RA[INDEX2D_R(0,i1)] * RA[INDEX2D_R(1,i1)] - 1;
// Hy
materialHy = ID[INDEX4D_ID(4,ii,jj,kk)];
dEz = (Ez[INDEX3D_FIELDS(ii+1,jj,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dx;
Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA01 * dEz + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dEz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,i2)];
RB0 = RB[INDEX2D_R(0,i2)];
RE0 = RE[INDEX2D_R(0,i2)];
RF0 = RF[INDEX2D_R(0,i2)];
RA1 = RA[INDEX2D_R(1,i2)];
RB1 = RB[INDEX2D_R(1,i2)];
RE1 = RE[INDEX2D_R(1,i2)];
RF1 = RF[INDEX2D_R(1,i2)];
RA01 = RA[INDEX2D_R(0,i2)] * RA[INDEX2D_R(1,i2)] - 1;
// Hz
materialHz = ID[INDEX4D_ID(5,ii,jj,kk)];
dEy = (Ey[INDEX3D_FIELDS(ii+1,jj,kk)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dx;
Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA01 * dEy + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dEy + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEy;
}
}
__kernel void order1_yminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global const {{REAL}}* restrict Ey, __global const {{REAL}}* restrict Ez, __global {{REAL}} *Hx, __global const {{REAL}}* restrict Hy, __global {{REAL}} *Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Hx and Hz field components for the yminus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current tREad
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA01, RB0, RE0, RF0, dEx, dEz;
{{REAL}} dy = d;
int ii, jj, kk, materialHx, materialHz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = yf - (j1 + 1);
kk = k1 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,j1)] - 1;
RB0 = RB[INDEX2D_R(0,j1)];
RE0 = RE[INDEX2D_R(0,j1)];
RF0 = RF[INDEX2D_R(0,j1)];
// Hx
materialHx = ID[INDEX4D_ID(3,ii,jj,kk)];
dEz = (Ez[INDEX3D_FIELDS(ii,jj+1,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dy;
Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA01 * dEz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = yf - (j2 + 1);
kk = k2 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,j2)] - 1;
RB0 = RB[INDEX2D_R(0,j2)];
RE0 = RE[INDEX2D_R(0,j2)];
RF0 = RF[INDEX2D_R(0,j2)];
// Hz
materialHz = ID[INDEX4D_ID(5,ii,jj,kk)];
dEx = (Ex[INDEX3D_FIELDS(ii,jj+1,kk)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dy;
Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA01 * dEx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx;
}
}
__kernel void order2_yminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global const {{REAL}}* restrict Ey, __global const {{REAL}}* restrict Ez, __global {{REAL}} *Hx, __global const {{REAL}}* restrict Hy, __global {{REAL}} *Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Hx and Hz field components for the yminus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current tREad
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dEx, dEz;
{{REAL}} dy = d;
int ii, jj, kk, materialHx, materialHz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = yf - (j1 + 1);
kk = k1 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,j1)];
RB0 = RB[INDEX2D_R(0,j1)];
RE0 = RE[INDEX2D_R(0,j1)];
RF0 = RF[INDEX2D_R(0,j1)];
RA1 = RA[INDEX2D_R(1,j1)];
RB1 = RB[INDEX2D_R(1,j1)];
RE1 = RE[INDEX2D_R(1,j1)];
RF1 = RF[INDEX2D_R(1,j1)];
RA01 = RA[INDEX2D_R(0,j1)] * RA[INDEX2D_R(1,j1)] - 1;
// Hx
materialHx = ID[INDEX4D_ID(3,ii,jj,kk)];
dEz = (Ez[INDEX3D_FIELDS(ii,jj+1,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dy;
Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA01 * dEz + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dEz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = yf - (j2 + 1);
kk = k2 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,j2)];
RB0 = RB[INDEX2D_R(0,j2)];
RE0 = RE[INDEX2D_R(0,j2)];
RF0 = RF[INDEX2D_R(0,j2)];
RA1 = RA[INDEX2D_R(1,j2)];
RB1 = RB[INDEX2D_R(1,j2)];
RE1 = RE[INDEX2D_R(1,j2)];
RF1 = RF[INDEX2D_R(1,j2)];
RA01 = RA[INDEX2D_R(0,j2)] * RA[INDEX2D_R(1,j2)] - 1;
// Hz
materialHz = ID[INDEX4D_ID(5,ii,jj,kk)];
dEx = (Ex[INDEX3D_FIELDS(ii,jj+1,kk)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dy;
Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA01 * dEx + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dEx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx;
}
}
__kernel void order1_yplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global const {{REAL}}* restrict Ey, __global const {{REAL}}* restrict Ez, __global {{REAL}} *Hx, __global const {{REAL}}* restrict Hy, __global {{REAL}} *Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Hx and Hz field components for the yplus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current tREad
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA01, RB0, RE0, RF0, dEx, dEz;
{{REAL}} dy = d;
int ii, jj, kk, materialHx, materialHz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,j1)] - 1;
RB0 = RB[INDEX2D_R(0,j1)];
RE0 = RE[INDEX2D_R(0,j1)];
RF0 = RF[INDEX2D_R(0,j1)];
// Hx
materialHx = ID[INDEX4D_ID(3,ii,jj,kk)];
dEz = (Ez[INDEX3D_FIELDS(ii,jj+1,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dy;
Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA01 * dEz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,j2)] - 1;
RB0 = RB[INDEX2D_R(0,j2)];
RE0 = RE[INDEX2D_R(0,j2)];
RF0 = RF[INDEX2D_R(0,j2)];
// Hz
materialHz = ID[INDEX4D_ID(5,ii,jj,kk)];
dEx = (Ex[INDEX3D_FIELDS(ii,jj+1,kk)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dy;
Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA01 * dEx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx;
}
}
__kernel void order2_yplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global const {{REAL}}* restrict Ey, __global const {{REAL}}* restrict Ez, __global {{REAL}} *Hx, __global const {{REAL}}* restrict Hy, __global {{REAL}} *Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Hx and Hz field components for the yplus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current tREad
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dEx, dEz;
{{REAL}} dy = d;
int ii, jj, kk, materialHx, materialHz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,j1)];
RB0 = RB[INDEX2D_R(0,j1)];
RE0 = RE[INDEX2D_R(0,j1)];
RF0 = RF[INDEX2D_R(0,j1)];
RA1 = RA[INDEX2D_R(1,j1)];
RB1 = RB[INDEX2D_R(1,j1)];
RE1 = RE[INDEX2D_R(1,j1)];
RF1 = RF[INDEX2D_R(1,j1)];
RA01 = RA[INDEX2D_R(0,j1)] * RA[INDEX2D_R(1,j1)] - 1;
// Hx
materialHx = ID[INDEX4D_ID(3,ii,jj,kk)];
dEz = (Ez[INDEX3D_FIELDS(ii,jj+1,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dy;
Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA01 * dEz + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dEz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,j2)];
RB0 = RB[INDEX2D_R(0,j2)];
RE0 = RE[INDEX2D_R(0,j2)];
RF0 = RF[INDEX2D_R(0,j2)];
RA1 = RA[INDEX2D_R(1,j2)];
RB1 = RB[INDEX2D_R(1,j2)];
RE1 = RE[INDEX2D_R(1,j2)];
RF1 = RF[INDEX2D_R(1,j2)];
RA01 = RA[INDEX2D_R(0,j2)] * RA[INDEX2D_R(1,j2)] - 1;
// Hz
materialHz = ID[INDEX4D_ID(5,ii,jj,kk)];
dEx = (Ex[INDEX3D_FIELDS(ii,jj+1,kk)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dy;
Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA01 * dEx + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dEx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx;
}
}
__kernel void order1_zminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global const {{REAL}}* restrict Ey, __global const {{REAL}}* restrict Ez, __global {{REAL}} *Hx, __global {{REAL}} *Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Hx and Hy field components for the zminus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current tREad
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA01, RB0, RE0, RF0, dEx, dEy;
{{REAL}} dz = d;
int ii, jj, kk, materialHx, materialHy;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = zf - (k1 + 1);
// PML coefficients
RA01 = RA[INDEX2D_R(0,k1)] - 1;
RB0 = RB[INDEX2D_R(0,k1)];
RE0 = RE[INDEX2D_R(0,k1)];
RF0 = RF[INDEX2D_R(0,k1)];
// Hx
materialHx = ID[INDEX4D_ID(3,ii,jj,kk)];
dEy = (Ey[INDEX3D_FIELDS(ii,jj,kk+1)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dz;
Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA01 * dEy + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEy;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = zf - (k2 + 1);
// PML coefficients
RA01 = RA[INDEX2D_R(0,k2)] - 1;
RB0 = RB[INDEX2D_R(0,k2)];
RE0 = RE[INDEX2D_R(0,k2)];
RF0 = RF[INDEX2D_R(0,k2)];
// Hy
materialHy = ID[INDEX4D_ID(4,ii,jj,kk)];
dEx = (Ex[INDEX3D_FIELDS(ii,jj,kk+1)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dz;
Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA01 * dEx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx;
}
}
__kernel void order2_zminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global const {{REAL}}* restrict Ey, __global const {{REAL}}* restrict Ez, __global {{REAL}} *Hx, __global {{REAL}} *Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Hx and Hy field components for the zminus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current tREad
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dEx, dEy;
{{REAL}} dz = d;
int ii, jj, kk, materialHx, materialHy;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = zf - (k1 + 1);
// PML coefficients
RA0 = RA[INDEX2D_R(0,k1)];
RB0 = RB[INDEX2D_R(0,k1)];
RE0 = RE[INDEX2D_R(0,k1)];
RF0 = RF[INDEX2D_R(0,k1)];
RA1 = RA[INDEX2D_R(1,k1)];
RB1 = RB[INDEX2D_R(1,k1)];
RE1 = RE[INDEX2D_R(1,k1)];
RF1 = RF[INDEX2D_R(1,k1)];
RA01 = RA[INDEX2D_R(0,k1)] * RA[INDEX2D_R(1,k1)] - 1;
// Hx
materialHx = ID[INDEX4D_ID(3,ii,jj,kk)];
dEy = (Ey[INDEX3D_FIELDS(ii,jj,kk+1)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dz;
Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA01 * dEy + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dEy + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEy;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = zf - (k2 + 1);
// PML coefficients
RA0 = RA[INDEX2D_R(0,k2)];
RB0 = RB[INDEX2D_R(0,k2)];
RE0 = RE[INDEX2D_R(0,k2)];
RF0 = RF[INDEX2D_R(0,k2)];
RA1 = RA[INDEX2D_R(1,k2)];
RB1 = RB[INDEX2D_R(1,k2)];
RE1 = RE[INDEX2D_R(1,k2)];
RF1 = RF[INDEX2D_R(1,k2)];
RA01 = RA[INDEX2D_R(0,k2)] * RA[INDEX2D_R(1,k2)] - 1;
// Hy
materialHy = ID[INDEX4D_ID(4,ii,jj,kk)];
dEx = (Ex[INDEX3D_FIELDS(ii,jj,kk+1)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dz;
Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA01 * dEx + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dEx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx;
}
}
__kernel void order1_zplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global const {{REAL}}* restrict Ey, __global const {{REAL}}* restrict Ez, __global {{REAL}} *Hx, __global {{REAL}} *Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Hx and Hy field components for the zplus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current tREad
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA01, RB0, RE0, RF0, dEx, dEy;
{{REAL}} dz = d;
int ii, jj, kk, materialHx, materialHy;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,k1)] - 1;
RB0 = RB[INDEX2D_R(0,k1)];
RE0 = RE[INDEX2D_R(0,k1)];
RF0 = RF[INDEX2D_R(0,k1)];
// Hx
materialHx = ID[INDEX4D_ID(3,ii,jj,kk)];
dEy = (Ey[INDEX3D_FIELDS(ii,jj,kk+1)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dz;
Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA01 * dEy + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEy;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,k2)] - 1;
RB0 = RB[INDEX2D_R(0,k2)];
RE0 = RE[INDEX2D_R(0,k2)];
RF0 = RF[INDEX2D_R(0,k2)];
// Hy
materialHy = ID[INDEX4D_ID(4,ii,jj,kk)];
dEx = (Ex[INDEX3D_FIELDS(ii,jj,kk+1)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dz;
Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA01 * dEx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx;
}
}
__kernel void order2_zplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global const {{REAL}}* restrict Ey, __global const {{REAL}}* restrict Ez, __global {{REAL}} *Hx, __global {{REAL}} *Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Hx and Hy field components for the zplus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current tREad
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dEx, dEy;
{{REAL}} dz = d;
int ii, jj, kk, materialHx, materialHy;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,k1)];
RB0 = RB[INDEX2D_R(0,k1)];
RE0 = RE[INDEX2D_R(0,k1)];
RF0 = RF[INDEX2D_R(0,k1)];
RA1 = RA[INDEX2D_R(1,k1)];
RB1 = RB[INDEX2D_R(1,k1)];
RE1 = RE[INDEX2D_R(1,k1)];
RF1 = RF[INDEX2D_R(1,k1)];
RA01 = RA[INDEX2D_R(0,k1)] * RA[INDEX2D_R(1,k1)] - 1;
// Hx
materialHx = ID[INDEX4D_ID(3,ii,jj,kk)];
dEy = (Ey[INDEX3D_FIELDS(ii,jj,kk+1)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dz;
Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA01 * dEy + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dEy + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEy;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,k2)];
RB0 = RB[INDEX2D_R(0,k2)];
RE0 = RE[INDEX2D_R(0,k2)];
RF0 = RF[INDEX2D_R(0,k2)];
RA1 = RA[INDEX2D_R(1,k2)];
RB1 = RB[INDEX2D_R(1,k2)];
RE1 = RE[INDEX2D_R(1,k2)];
RF1 = RF[INDEX2D_R(1,k2)];
RA01 = RA[INDEX2D_R(0,k2)] * RA[INDEX2D_R(1,k2)] - 1;
// Hy
materialHy = ID[INDEX4D_ID(4,ii,jj,kk)];
dEx = (Ex[INDEX3D_FIELDS(ii,jj,kk+1)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dz;
Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA01 * dEx + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dEx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx;
}
}

55
gprMax/opencl/snapshots.cl 普通文件
查看文件

@@ -0,0 +1,55 @@
// Macros for converting subscripts to linear index:
#define INDEX3D_FIELDS(i, j, k) (i)*({{NY_FIELDS}})*({{NZ_FIELDS}})+(j)*({{NZ_FIELDS}})+(k)
#define INDEX4D_SNAPS(p, i, j, k) (p)*({{NX_SNAPS}})*({{NY_SNAPS}})*({{NZ_SNAPS}})+(i)*({{NY_SNAPS}})*({{NZ_SNAPS}})+(j)*({{NZ_SNAPS}})+(k)
////////////////////
// Store snapshot //
////////////////////
__kernel void store_snapshot(int p, int xs, int xf, int ys, int yf, int zs, int zf, int dx, int dy, int dz,
__global const {{REAL}}* __restrict__ Ex, __global const {{REAL}}* __restrict__ Ey,
__global const {{REAL}}* __restrict__ Ez, __global const {{REAL}}* __restrict__ Hx,
__global const {{REAL}}* __restrict__ Hy, __global const {{REAL}}* __restrict__ Hz,
__global {{REAL}} *snapEx, __global {{REAL}} *snapEy, __global {{REAL}} *snapEz,
__global {{REAL}} *snapHx, __global {{REAL}} *snapHy, __global {{REAL}} *snapHz) {
// This function stores field values for a snapshot.
//
// Args:
// p: Snapshot number
// xs, xf, ys, yf, xs, xf: Start and finish cell coordinates for snapshot
// dx, dy, dz: Sampling interval in cell coordinates for snapshot
// E, H: Access to field component arrays
// snapEx, snapEy, snapEz, snapHx, snapHy, snapHz: Access to arrays to store snapshots
// Obtain the linear index corresponding to the current thread
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for 4D SNAPS array
int i = (idx % ({{NX_SNAPS}} * {{NY_SNAPS}} * {{NZ_SNAPS}})) / ({{NY_SNAPS}} * {{NZ_SNAPS}});
int j = ((idx % ({{NX_SNAPS}} * {{NY_SNAPS}} * {{NZ_SNAPS}})) % ({{NY_SNAPS}} * {{NZ_SNAPS}})) / {{NZ_SNAPS}};
int k = ((idx % ({{NX_SNAPS}} * {{NY_SNAPS}} * {{NZ_SNAPS}})) % ({{NY_SNAPS}} * {{NZ_SNAPS}})) % {{NZ_SNAPS}};
// Subscripts for field arrays
int ii, jj, kk;
if (i >= xs && i < xf && j >= ys && j < yf && k >= zs && k < zf) {
// Increment subscripts for field array to account for spatial sampling of snapshot
ii = (xs + i) * dx;
jj = (ys + j) * dy;
kk = (zs + k) * dz;
// The electric field component value at a point comes from an average of
// the 4 electric field component values in that cell
snapEx[INDEX4D_SNAPS(p,i,j,k)] = (Ex[INDEX3D_FIELDS(ii,jj,kk)] + Ex[INDEX3D_FIELDS(ii,jj+1,kk)] + Ex[INDEX3D_FIELDS(ii,jj,kk+1)] + Ex[INDEX3D_FIELDS(ii,jj+1,kk+1)]) / 4;
snapEy[INDEX4D_SNAPS(p,i,j,k)] = (Ey[INDEX3D_FIELDS(ii,jj,kk)] + Ey[INDEX3D_FIELDS(ii+1,jj,kk)] + Ey[INDEX3D_FIELDS(ii,jj,kk+1)] + Ey[INDEX3D_FIELDS(ii+1,jj,kk+1)]) / 4;
snapEz[INDEX4D_SNAPS(p,i,j,k)] = (Ez[INDEX3D_FIELDS(ii,jj,kk)] + Ez[INDEX3D_FIELDS(ii+1,jj,kk)] + Ez[INDEX3D_FIELDS(ii,jj+1,kk)] + Ez[INDEX3D_FIELDS(ii+1,jj+1,kk)]) / 4;
// The magnetic field component value at a point comes from average of
// 2 magnetic field component values in that cell and the following cell
snapHx[INDEX4D_SNAPS(p,i,j,k)] = (Hx[INDEX3D_FIELDS(ii,jj,kk)] + Hx[INDEX3D_FIELDS(ii+1,jj,kk)]) / 2;
snapHy[INDEX4D_SNAPS(p,i,j,k)] = (Hy[INDEX3D_FIELDS(ii,jj,kk)] + Hy[INDEX3D_FIELDS(ii,jj+1,kk)]) / 2;
snapHz[INDEX4D_SNAPS(p,i,j,k)] = (Hz[INDEX3D_FIELDS(ii,jj,kk)] + Hz[INDEX3D_FIELDS(ii,jj,kk+1)]) / 2;
}
}

查看文件

@@ -0,0 +1,206 @@
# Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom
# Authors: Craig Warren, Antonis Giannopoulos, and John Hartley
#
# This file is part of gprMax.
#
# gprMax is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# gprMax is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with gprMax. If not, see <http://www.gnu.org/licenses/>.
// Macros for converting subscripts to linear index:
#define INDEX2D_MAT(m, n) (m)*({{NY_MATCOEFFS}}) + (n)
#define INDEX2D_SRCINFO(m, n) (m)*({{NY_SRCINFO}}) + (n)
#define INDEX2D_SRCWAVES(m, n) (m)*({{NY_SRCWAVES}}) + (n)
#define INDEX3D_FIELDS(i, j, k) (i)*({{NY_FIELDS}})*({{NZ_FIELDS}}) + (j)*({{NZ_FIELDS}}) + (k)
#define INDEX4D_ID(p, i, j, k) (p)*({{NX_ID}})*({{NY_ID}})*({{NZ_ID}}) + (i)*({{NY_ID}})*({{NZ_ID}}) + (j)*({{NZ_ID}}) + (k)
// Material coefficients (read-only) in constant memory
__constant {{REAL}} updatecoeffsE[{{N_updatecoeffsE}}] =
{
{% for i in updateEVal %}
{{i}},
{% endfor %}
};
__constant {{REAL}} updatecoeffsH[{{N_updatecoeffsH}}] =
{
{% for i in updateHVal %}
{{i}},
{% endfor %}
};
///////////////////////////////////////////
// Hertzian dipole electric field update //
///////////////////////////////////////////
__kernel void update_hertzian_dipole(int NHERTZDIPOLE, int iteration,
{{REAL}} dx, {{REAL}} dy, {{REAL}} dz,
__global const int* restrict srcinfo1,
__global const {{REAL}}* restrict srcinfo2,
__global const {{REAL}}* restrict srcwaveforms,
__global const unsigned int* restrict ID,
__global {{REAL}} *Ex,
__global {{REAL}} *Ey,
__global {{REAL}} *Ez) {
// This function updates electric field values for Hertzian dipole sources.
//
// Args:
// NHERTZDIPOLE: total number of hertizan dipole in the model
// iteration
// dx, dy, dz: spatial discretization
// srcinfo1: source cell coordinates and polarisation information
// srcinfo2: other source info, length, resistance, etc
// srcwaveforms : source waveforms values
// ID, E: access to ID and field component values
// get linear index
int src = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
if (src < NHERTZDIPOLE) {
{{REAL}} dl;
int i, j, k, polarisation;
i = srcinfo1[INDEX2D_SRCINFO(src,0)];
j = srcinfo1[INDEX2D_SRCINFO(src,1)];
k = srcinfo1[INDEX2D_SRCINFO(src,2)];
polarisation = srcinfo1[INDEX2D_SRCINFO(src,3)];
dl = srcinfo2[src];
// 'x' polarised source
if (polarisation == 0) {
int materialEx = ID[INDEX4D_ID(0,i,j,k)];
Ex[INDEX3D_FIELDS(i,j,k)] = Ex[INDEX3D_FIELDS(i,j,k)] - updatecoeffsE[INDEX2D_MAT(materialEx,4)] * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] * dl * (1 / (dx * dy * dz));
}
// 'y' polarised source
else if (polarisation == 1) {
int materialEy = ID[INDEX4D_ID(1,i,j,k)];
Ey[INDEX3D_FIELDS(i,j,k)] = Ey[INDEX3D_FIELDS(i,j,k)] - updatecoeffsE[INDEX2D_MAT(materialEy,4)] * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] * dl * (1 / (dx * dy * dz));
}
// 'z' polarised source
else if (polarisation == 2) {
int materialEz = ID[INDEX4D_ID(2,i,j,k)];
Ez[INDEX3D_FIELDS(i,j,k)] = Ez[INDEX3D_FIELDS(i,j,k)] - updatecoeffsE[INDEX2D_MAT(materialEz,4)] * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] * dl * (1 / (dx * dy * dz));
}
}
}
__kernel void update_magnetic_dipole(int NMAGDIPOLE, int iteration, {{REAL}} dx, {{REAL}} dy, {{REAL}} dz, __global const int* restrict srcinfo1, __global const {{REAL}}* restrict srcinfo2, __global const {{REAL}}* restrict srcwaveforms, __global const unsigned int* restrict ID, __global {{REAL}} *Hx, __global {{REAL}} *Hy, __global {{REAL}} *Hz){
// This function updates magnetic field values for magnetic dipole sources.
//
// Args:
// NMAGDIPOLE: Total number of magnetic dipoles in the model
// iteration: Iteration number of simulation
// dx, dy, dz: Spatial discretisations
// srcinfo1: Source cell coordinates and polarisation information
// srcinfo2: Other source information, e.g. length, resistance etc...
// srcwaveforms: Source waveform values
// ID, H: Access to ID and field component arrays
// Obtain the linear index corresponding to the current thread and use for each receiver
int src = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
if (src < NMAGDIPOLE) {
int i, j, k, polarisation;
i = srcinfo1[INDEX2D_SRCINFO(src,0)];
j = srcinfo1[INDEX2D_SRCINFO(src,1)];
k = srcinfo1[INDEX2D_SRCINFO(src,2)];
polarisation = srcinfo1[INDEX2D_SRCINFO(src,3)];
// 'x' polarised source
if (polarisation == 0) {
int materialHx = ID[INDEX4D_ID(3,i,j,k)];
Hx[INDEX3D_FIELDS(i,j,k)] = Hx[INDEX3D_FIELDS(i,j,k)] - updatecoeffsH[INDEX2D_MAT(materialHx,4)] * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] * (1 / (dx * dy * dz));
}
// 'y' polarised source
else if (polarisation == 1) {
int materialHy = ID[INDEX4D_ID(4,i,j,k)];
Hy[INDEX3D_FIELDS(i,j,k)] = Hy[INDEX3D_FIELDS(i,j,k)] - updatecoeffsH[INDEX2D_MAT(materialHy,4)] * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] * (1 / (dx * dy * dz));
}
// 'z' polarised source
else if (polarisation == 2) {
int materialHz = ID[INDEX4D_ID(5,i,j,k)];
Hz[INDEX3D_FIELDS(i,j,k)] = Hz[INDEX3D_FIELDS(i,j,k)] - updatecoeffsH[INDEX2D_MAT(materialHz,4)] * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] * (1 / (dx * dy * dz));
}
}
}
__kernel void update_voltage_source(int NVOLTSRC, int iteration, {{REAL}} dx, {{REAL}} dy, {{REAL}} dz, __global const int* restrict srcinfo1, __global const {{REAL}}* restrict srcinfo2, __global const {{REAL}}* restrict srcwaveforms, __global const unsigned int* restrict ID, __global {{REAL}} *Ex, __global {{REAL}} *Ey, __global {{REAL}} *Ez){
// This function updates electric field values for voltage sources.
//
// Args:
// NVOLTSRC: Total number of voltage sources in the model
// iteration: Iteration number of simulation
// dx, dy, dz: Spatial discretisations
// srcinfo1: Source cell coordinates and polarisation information
// srcinfo2: Other source information, e.g. length, resistance etc...
// srcwaveforms: Source waveform values
// ID, E: Access to ID and field component arrays
// Obtain the linear index corresponding to the current thread and use for each receiver
int src = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
if (src < NVOLTSRC) {
{{REAL}} resistance;
int i, j, k, polarisation;
i = srcinfo1[INDEX2D_SRCINFO(src,0)];
j = srcinfo1[INDEX2D_SRCINFO(src,1)];
k = srcinfo1[INDEX2D_SRCINFO(src,2)];
polarisation = srcinfo1[INDEX2D_SRCINFO(src,3)];
resistance = srcinfo2[src];
// 'x' polarised source
if (polarisation == 0) {
if (resistance != 0) {
int materialEx = ID[INDEX4D_ID(0,i,j,k)];
Ex[INDEX3D_FIELDS(i,j,k)] = Ex[INDEX3D_FIELDS(i,j,k)] - updatecoeffsE[INDEX2D_MAT(materialEx,4)] * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] * (1 / (resistance * dy * dz));
}
else {
Ex[INDEX3D_FIELDS(i,j,k)] = -1 * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] / dx;
}
}
// 'y' polarised source
else if (polarisation == 1) {
if (resistance != 0) {
int materialEy = ID[INDEX4D_ID(1,i,j,k)];
Ey[INDEX3D_FIELDS(i,j,k)] = Ey[INDEX3D_FIELDS(i,j,k)] - updatecoeffsE[INDEX2D_MAT(materialEy,4)] * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] * (1 / (resistance * dx * dz));
}
else {
Ey[INDEX3D_FIELDS(i,j,k)] = -1 * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] / dy;
}
}
// 'z' polarised source
else if (polarisation == 2) {
if (resistance != 0) {
int materialEz = ID[INDEX4D_ID(2,i,j,k)];
Ez[INDEX3D_FIELDS(i,j,k)] = Ez[INDEX3D_FIELDS(i,j,k)] - updatecoeffsE[INDEX2D_MAT(materialEz,4)] * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] * (1 / (resistance * dx * dy));
}
else {
Ez[INDEX3D_FIELDS(i,j,k)] = -1 * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] / dz;
}
}
}
}

查看文件

@@ -0,0 +1,59 @@
# Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom
# Authors: Craig Warren, Antonis Giannopoulos, and John Hartley
#
# This file is part of gprMax.
#
# gprMax is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# gprMax is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with gprMax. If not, see <http://www.gnu.org/licenses/>.
#define INDEX2D_RXCOORDS(m,n) (m)*({{NY_RXCOORDS}}) + (n)
#define INDEX3D_RXS(i,j,k) (i)*({{NY_RXS}})*({{NZ_RXS}}) + (j)*({{NZ_RXS}}) + (k)
#define INDEX3D_FIELDS(i,j,k) (i)*({{NY_FIELDS}})*({{NZ_FIELDS}}) + (j)*({{NZ_FIELDS}}) + (k)
__kernel void store_outputs(int NRX, int iteration,
__global const int* restrict rxcoords,
__global {{REAL}} *rxs,
__global const {{REAL}}* restrict Ex,
__global const {{REAL}}* restrict Ey,
__global const {{REAL}}* restrict Ez,
__global const {{REAL}}* restrict Hx,
__global const {{REAL}}* restrict Hy,
__global const {{REAL}}* restrict Hz) {
// This function stores field component values for every receiver in the model.
//
// Args:
// NRX: total number of receivers in the model.
// rxs: array to store field components for receivers - rows
// are field components; columns are iterations; pages are receiver
// Obtain linear index corresponding to the current work item
int rx = get_global_id(2) * get_global_size(0) * get_global_size(1) +
get_global_id(1) * get_global_size(0) + get_global_id(0);
int i,j,k;
if (rx < NRX) {
i = rxcoords[INDEX2D_RXCOORDS(rx,0)];
j = rxcoords[INDEX2D_RXCOORDS(rx,1)];
k = rxcoords[INDEX2D_RXCOORDS(rx,2)];
rxs[INDEX3D_RXS(0,iteration,rx)] = Ex[INDEX3D_FIELDS(i,j,k)];
rxs[INDEX3D_RXS(1,iteration,rx)] = Ey[INDEX3D_FIELDS(i,j,k)];
rxs[INDEX3D_RXS(2,iteration,rx)] = Ez[INDEX3D_FIELDS(i,j,k)];
rxs[INDEX3D_RXS(3,iteration,rx)] = Hx[INDEX3D_FIELDS(i,j,k)];
rxs[INDEX3D_RXS(4,iteration,rx)] = Hy[INDEX3D_FIELDS(i,j,k)];
rxs[INDEX3D_RXS(5,iteration,rx)] = Hz[INDEX3D_FIELDS(i,j,k)];
}
}

查看文件

@@ -21,25 +21,31 @@ from importlib import import_module
import gprMax.config as config
import numpy as np
from .utilities.utilities import timer
class CFSParameter:
"""Individual CFS parameter (e.g. alpha, kappa, or sigma)."""
# Allowable scaling profiles and directions
scalingprofiles = {'constant': 0, 'linear': 1, 'quadratic': 2, 'cubic': 3,
'quartic': 4, 'quintic': 5, 'sextic': 6, 'septic': 7, 'octic': 8}
'quartic': 4, 'quintic': 5, 'sextic': 6, 'septic': 7,
'octic': 8}
scalingdirections = ['forward', 'reverse']
def __init__(self, ID=None, scaling='polynomial', scalingprofile=None,
scalingdirection='forward', min=0, max=0):
"""
Args:
ID (str): Identifier for CFS parameter, can be: 'alpha', 'kappa' or 'sigma'.
scaling (str): Type of scaling, can be: 'polynomial'.
scalingprofile (str): Type of scaling profile from scalingprofiles.
scalingdirection (str): Direction of scaling profile from scalingdirections.
min (float): Minimum value for parameter.
max (float): Maximum value for parameter.
ID: string identifier for CFS parameter, can be: 'alpha', 'kappa' or
'sigma'.
scaling: string for type of scaling, can be: 'polynomial'.
scalingprofile: string for type of scaling profile from
scalingprofiles.
scalingdirection: string for direction of scaling profile from
scalingdirections.
min: float for minimum value for parameter.
max: float for maximum value for parameter.
"""
self.ID = ID
@@ -56,9 +62,9 @@ class CFS:
def __init__(self):
"""
Args:
alpha (CFSParameter): alpha parameter for CFS.
kappa (CFSParameter): kappa parameter for CFS.
sigma (CFSParameter): sigma parameter for CFS.
alpha: CFSParameter alpha parameter for CFS.
kappa: CFSParameter kappa parameter for CFS.
sigma: CFSParameter sigma parameter for CFS.
"""
self.alpha = CFSParameter(ID='alpha', scalingprofile='constant')
@@ -70,11 +76,11 @@ class CFS:
material properties.
Args:
d (float): dx, dy, or dz in direction of PML.
er (float): Average permittivity of underlying material.
mr (float): Average permeability of underlying material.
G (class): Grid class instance - holds essential parameters
describing the model.
d: float for dx, dy, or dz in direction of PML.
er: float for average permittivity of underlying material.
mr: float for average permeability of underlying material.
G: FDTDGrid object that holds essential parameters describing the
model.
"""
# Calculation of the maximum value of sigma from http://dx.doi.org/10.1109/8.546249
@@ -86,17 +92,17 @@ class CFS:
electric and magnetic PML updates.
Args:
order (int): Order of polynomial for scaling profile.
Evalues (float): numpy array holding scaling profile values for
electric PML update.
Hvalues (float): numpy array holding scaling profile values for
magnetic PML update.
order: int of order of polynomial for scaling profile.
Evalues: float array holding scaling profile values for
electric PML update.
Hvalues: float array holding scaling profile values for
magnetic PML update.
Returns:
Evalues (float): numpy array holding scaling profile values for
electric PML update.
Hvalues (float): numpy array holding scaling profile values for
magnetic PML update.
Evalues: float array holding scaling profile values for
electric PML update.
Hvalues: float array holding scaling profile values for
magnetic PML update.
"""
tmp = (np.linspace(0, (len(Evalues) - 1) + 0.5, num=2 * len(Evalues))
@@ -111,17 +117,18 @@ class CFS:
profile type and minimum and maximum values.
Args:
thickness (int): Thickness of PML in cells.
parameter (CFSParameter): Instance of CFSParameter
thickness: int of thickness of PML in cells.
parameter: instance of CFSParameter
Returns:
Evalues (float): numpy array holding profile value for electric
PML update.
Hvalues (float): numpy array holding profile value for magnetic
PML update.
Evalues: float array holding profile value for electric
PML update.
Hvalues: float array holding profile value for magnetic
PML update.
"""
# Extra cell of thickness added to allow correct scaling of electric and magnetic values
# Extra cell of thickness added to allow correct scaling of electric and
# magnetic values
Evalues = np.zeros(thickness + 1, dtype=config.sim_config.dtypes['float_or_double'])
Hvalues = np.zeros(thickness + 1, dtype=config.sim_config.dtypes['float_or_double'])
@@ -146,10 +153,12 @@ class CFS:
if parameter.scalingdirection == 'reverse':
Evalues = Evalues[::-1]
Hvalues = Hvalues[::-1]
# Magnetic values must be shifted one element to the left after reversal
# Magnetic values must be shifted one element to the left after
# reversal
Hvalues = np.roll(Hvalues, -1)
# Extra cell of thickness not required and therefore removed after scaling
# Extra cell of thickness not required and therefore removed after
# scaling
Evalues = Evalues[:-1]
Hvalues = Hvalues[:-1]
@@ -168,17 +177,20 @@ class PML:
boundaryIDs = ['x0', 'y0', 'z0', 'xmax', 'ymax', 'zmax']
# Indicates direction of increasing absorption
# xminus, yminus, zminus - absorption increases in negative direction of x-axis, y-axis, or z-axis
# xplus, yplus, zplus - absorption increases in positive direction of x-axis, y-axis, or z-axis
# xminus, yminus, zminus - absorption increases in negative direction of
# x-axis, y-axis, or z-axis
# xplus, yplus, zplus - absorption increases in positive direction of
# x-axis, y-axis, or z-axis
directions = ['xminus', 'yminus', 'zminus', 'xplus', 'yplus', 'zplus']
def __init__(self, G, ID=None, direction=None, xs=0, xf=0, ys=0, yf=0, zs=0, zf=0):
"""
Args:
G (FDTDGrid): Holds essential parameters describing the model.
ID (str): Identifier for PML slab.
direction (str): Direction of increasing absorption.
xs, xf, ys, yf, zs, zf (float): Extent of the PML slab.
G: FDTDGrid object that holds essential parameters describing the
model.
ID: string identifier for PML slab.
direction: string for direction of increasing absorption.
xs, xf, ys, yf, zs, zf: floats of extent of the PML slab.
"""
self.G = G
@@ -244,8 +256,8 @@ class PML:
"""Calculates electric and magnetic update coefficients for the PML.
Args:
er (float): Average permittivity of underlying material
mr (float): Average permeability of underlying material
er: float of average permittivity of underlying material
mr: float of average permeability of underlying material
"""
self.ERA = np.zeros((len(self.CFS), self.thickness),
@@ -275,19 +287,25 @@ class PML:
# Define different parameters depending on PML formulation
if self.G.pmlformulation == 'HORIPML':
# HORIPML electric update coefficients
tmp = (2 * config.sim_config.em_consts['e0'] * Ekappa) + self.G.dt * (Ealpha * Ekappa + Esigma)
self.ERA[x, :] = (2 * config.sim_config.em_consts['e0'] + self.G.dt * Ealpha) / tmp
self.ERB[x, :] = (2 * config.sim_config.em_consts['e0'] * Ekappa) / tmp
self.ERE[x, :] = ((2 * config.sim_config.em_consts['e0'] * Ekappa) - self.G.dt
* (Ealpha * Ekappa + Esigma)) / tmp
tmp = ((2 * config.sim_config.em_consts['e0'] * Ekappa) +
self.G.dt * (Ealpha * Ekappa + Esigma))
self.ERA[x, :] = ((2 * config.sim_config.em_consts['e0'] +
self.G.dt * Ealpha) / tmp)
self.ERB[x, :] = ((2 * config.sim_config.em_consts['e0'] * Ekappa)
/ tmp)
self.ERE[x, :] = (((2 * config.sim_config.em_consts['e0'] * Ekappa) -
self.G.dt * (Ealpha * Ekappa + Esigma)) / tmp)
self.ERF[x, :] = (2 * Esigma * self.G.dt) / (Ekappa * tmp)
# HORIPML magnetic update coefficients
tmp = (2 * config.sim_config.em_consts['e0'] * Hkappa) + self.G.dt * (Halpha * Hkappa + Hsigma)
self.HRA[x, :] = (2 * config.sim_config.em_consts['e0'] + self.G.dt * Halpha) / tmp
self.HRB[x, :] = (2 * config.sim_config.em_consts['e0'] * Hkappa) / tmp
self.HRE[x, :] = ((2 * config.sim_config.em_consts['e0'] * Hkappa) - self.G.dt
* (Halpha * Hkappa + Hsigma)) / tmp
tmp = ((2 * config.sim_config.em_consts['e0'] * Hkappa) +
self.G.dt * (Halpha * Hkappa + Hsigma))
self.HRA[x, :] = ((2 * config.sim_config.em_consts['e0'] +
self.G.dt * Halpha) / tmp)
self.HRB[x, :] = ((2 * config.sim_config.em_consts['e0'] * Hkappa)
/ tmp)
self.HRE[x, :] = (((2 * config.sim_config.em_consts['e0'] * Hkappa) -
self.G.dt * (Halpha * Hkappa + Hsigma)) / tmp)
self.HRF[x, :] = (2 * Hsigma * self.G.dt) / (Hkappa * tmp)
elif self.G.pmlformulation == 'MRIPML':
@@ -295,31 +313,39 @@ class PML:
tmp = 2 * config.sim_config.em_consts['e0'] + self.G.dt * Ealpha
self.ERA[x, :] = Ekappa + (self.G.dt * Esigma) / tmp
self.ERB[x, :] = (2 * config.sim_config.em_consts['e0']) / tmp
self.ERE[x, :] = ((2 * config.sim_config.em_consts['e0']) - self.G.dt * Ealpha) / tmp
self.ERE[x, :] = (((2 * config.sim_config.em_consts['e0'])
- self.G.dt * Ealpha) / tmp)
self.ERF[x, :] = (2 * Esigma * self.G.dt) / tmp
# MRIPML magnetic update coefficients
tmp = 2 * config.sim_config.em_consts['e0'] + self.G.dt * Halpha
self.HRA[x, :] = Hkappa + (self.G.dt * Hsigma) / tmp
self.HRB[x, :] = (2 * config.sim_config.em_consts['e0']) / tmp
self.HRE[x, :] = ((2 * config.sim_config.sim_config.em_consts['e0']) - self.G.dt * Halpha) / tmp
self.HRE[x, :] = (((2 * config.sim_config.sim_config.em_consts['e0'])
- self.G.dt * Halpha) / tmp)
self.HRF[x, :] = (2 * Hsigma * self.G.dt) / tmp
def update_electric(self):
"""This functions updates electric field components with the PML correction."""
"""This functions updates electric field components with the PML
correction.
"""
pmlmodule = 'gprMax.cython.pml_updates_electric_' + self.G.pmlformulation
func = getattr(import_module(pmlmodule), 'order' + str(len(self.CFS)) + '_' + self.direction)
func = getattr(import_module(pmlmodule),
'order' + str(len(self.CFS)) + '_' + self.direction)
func(self.xs, self.xf, self.ys, self.yf, self.zs, self.zf,
config.get_model_config().ompthreads, self.G.updatecoeffsE, self.G.ID,
self.G.Ex, self.G.Ey, self.G.Ez, self.G.Hx, self.G.Hy, self.G.Hz,
self.EPhi1, self.EPhi2, self.ERA, self.ERB, self.ERE, self.ERF, self.d)
def update_magnetic(self):
"""This functions updates magnetic field components with the PML correction."""
"""This functions updates magnetic field components with the PML
correction.
"""
pmlmodule = 'gprMax.cython.pml_updates_magnetic_' + self.G.pmlformulation
func = getattr(import_module(pmlmodule), 'order' + str(len(self.CFS)) + '_' + self.direction)
func = getattr(import_module(pmlmodule),
'order' + str(len(self.CFS)) + '_' + self.direction)
func(self.xs, self.xf, self.ys, self.yf, self.zs, self.zf,
config.get_model_config().ompthreads, self.G.updatecoeffsH, self.G.ID,
self.G.Ex, self.G.Ey, self.G.Ez, self.G.Hx, self.G.Hy, self.G.Hz,
@@ -331,6 +357,9 @@ class CUDAPML(PML):
solving on GPU using CUDA.
"""
def __init__(self, *args, **kwargs):
super(CUDAPML, self).__init__(*args, **kwargs)
def htod_field_arrays(self):
"""Initialise PML field and coefficient arrays on GPU."""
@@ -360,9 +389,9 @@ class CUDAPML(PML):
"""Get update functions from PML kernels.
Args:
kernelselectric: PyCuda SourceModule containing PML kernels for
kernelselectric: pycuda SourceModule containing PML kernels for
electric updates.
kernelsmagnetic: PyCuda SourceModule containing PML kernels for
kernelsmagnetic: pycuda SourceModule containing PML kernels for
magnetic updates.
"""
@@ -373,9 +402,12 @@ class CUDAPML(PML):
"""This functions updates electric field components with the PML
correction on the GPU.
"""
self.update_electric_gpu(np.int32(self.xs), np.int32(self.xf),
np.int32(self.ys), np.int32(self.yf),
np.int32(self.zs), np.int32(self.zf),
self.update_electric_gpu(np.int32(self.xs),
np.int32(self.xf),
np.int32(self.ys),
np.int32(self.yf),
np.int32(self.zs),
np.int32(self.zf),
np.int32(self.EPhi1_gpu.shape[1]),
np.int32(self.EPhi1_gpu.shape[2]),
np.int32(self.EPhi1_gpu.shape[3]),
@@ -384,11 +416,18 @@ class CUDAPML(PML):
np.int32(self.EPhi2_gpu.shape[3]),
np.int32(self.thickness),
self.G.ID_gpu.gpudata,
self.G.Ex_gpu.gpudata, self.G.Ey_gpu.gpudata, self.G.Ez_gpu.gpudata,
self.G.Hx_gpu.gpudata, self.G.Hy_gpu.gpudata, self.G.Hz_gpu.gpudata,
self.EPhi1_gpu.gpudata, self.EPhi2_gpu.gpudata,
self.ERA_gpu.gpudata, self.ERB_gpu.gpudata,
self.ERE_gpu.gpudata, self.ERF_gpu.gpudata,
self.G.Ex_gpu.gpudata,
self.G.Ey_gpu.gpudata,
self.G.Ez_gpu.gpudata,
self.G.Hx_gpu.gpudata,
self.G.Hy_gpu.gpudata,
self.G.Hz_gpu.gpudata,
self.EPhi1_gpu.gpudata,
self.EPhi2_gpu.gpudata,
self.ERA_gpu.gpudata,
self.ERB_gpu.gpudata,
self.ERE_gpu.gpudata,
self.ERF_gpu.gpudata,
config.sim_config.dtypes['float_or_double'](self.d),
block=self.G.tpb, grid=self.bpg)
@@ -396,9 +435,12 @@ class CUDAPML(PML):
"""This functions updates magnetic field components with the PML
correction on the GPU.
"""
self.update_magnetic_gpu(np.int32(self.xs), np.int32(self.xf),
np.int32(self.ys), np.int32(self.yf),
np.int32(self.zs), np.int32(self.zf),
self.update_magnetic_gpu(np.int32(self.xs),
np.int32(self.xf),
np.int32(self.ys),
np.int32(self.yf),
np.int32(self.zs),
np.int32(self.zf),
np.int32(self.HPhi1_gpu.shape[1]),
np.int32(self.HPhi1_gpu.shape[2]),
np.int32(self.HPhi1_gpu.shape[3]),
@@ -407,19 +449,147 @@ class CUDAPML(PML):
np.int32(self.HPhi2_gpu.shape[3]),
np.int32(self.thickness),
self.G.ID_gpu.gpudata,
self.G.Ex_gpu.gpudata, self.G.Ey_gpu.gpudata, self.G.Ez_gpu.gpudata,
self.G.Hx_gpu.gpudata, self.G.Hy_gpu.gpudata, self.G.Hz_gpu.gpudata,
self.HPhi1_gpu.gpudata, self.HPhi2_gpu.gpudata,
self.HRA_gpu.gpudata, self.HRB_gpu.gpudata,
self.HRE_gpu.gpudata, self.HRF_gpu.gpudata,
self.G.Ex_gpu.gpudata,
self.G.Ey_gpu.gpudata,
self.G.Ez_gpu.gpudata,
self.G.Hx_gpu.gpudata,
self.G.Hy_gpu.gpudata,
self.G.Hz_gpu.gpudata,
self.HPhi1_gpu.gpudata,
self.HPhi2_gpu.gpudata,
self.HRA_gpu.gpudata,
self.HRB_gpu.gpudata,
self.HRE_gpu.gpudata,
self.HRF_gpu.gpudata,
config.sim_config.dtypes['float_or_double'](self.d),
block=self.G.tpb, grid=self.bpg)
class OpenCLPML(PML):
"""Perfectly Matched Layer (PML) Absorbing Boundary Conditions (ABC) for
solving on compute device using OpenCL.
"""
def __init__(self, *args, **kwargs):
super(OpenCLPML, self).__init__(*args, **kwargs)
self.compute_time = 0
def set_queue(self, queue):
"""Passes in pyopencl queue.
Args:
queue: pyopencl queue.
"""
self.queue = queue
def htod_field_arrays(self):
"""Initialise PML field and coefficient arrays on compute device."""
import pyopencl.array as clarray
self.ERA_dev = clarray.to_device(self.queue, self.ERA)
self.ERB_dev = clarray.to_device(self.queue, self.ERB)
self.ERE_dev = clarray.to_device(self.queue, self.ERE)
self.ERF_dev = clarray.to_device(self.queue, self.ERF)
self.HRA_dev = clarray.to_device(self.queue, self.HRA)
self.HRB_dev = clarray.to_device(self.queue, self.HRB)
self.HRE_dev = clarray.to_device(self.queue, self.HRE)
self.HRF_dev = clarray.to_device(self.queue, self.HRF)
self.EPhi1_dev = clarray.to_device(self.queue, self.EPhi1)
self.EPhi2_dev = clarray.to_device(self.queue, self.EPhi2)
self.HPhi1_dev = clarray.to_device(self.queue, self.HPhi1)
self.HPhi2_dev = clarray.to_device(self.queue, self.HPhi2)
def set_blocks_per_grid():
pass
def set_wgs(self):
"""Set the workgroup size used for updating the PML field arrays
on a compute device.
"""
self.wgs = (((int(np.ceil(((self.EPhi1_dev.shape[1] + 1) *
(self.EPhi1_dev.shape[2] + 1) *
(self.EPhi1_dev.shape[3] + 1)) / self.G.tpb[0]))) * 256), 1, 1)
def get_update_funcs():
pass
def update_electric(self):
"""This functions updates electric field components with the PML
correction on the compute device.
"""
start_time = timer()
event = self.update_electric_dev(np.int32(self.xs),
np.int32(self.xf),
np.int32(self.ys),
np.int32(self.yf),
np.int32(self.zs),
np.int32(self.zf),
np.int32(self.EPhi1_dev.shape[1]),
np.int32(self.EPhi1_dev.shape[2]),
np.int32(self.EPhi1_dev.shape[3]),
np.int32(self.EPhi2_dev.shape[1]),
np.int32(self.EPhi2_dev.shape[2]),
np.int32(self.EPhi2_dev.shape[3]),
np.int32(self.thickness),
self.G.ID_dev,
self.G.Ex_dev,
self.G.Ey_dev,
self.G.Ez_dev,
self.G.Hx_dev,
self.G.Hy_dev,
self.G.Hz_dev,
self.EPhi1_dev,
self.EPhi2_dev,
self.ERA_dev,
self.ERB_dev,
self.ERE_dev,
self.ERF_dev,
config.sim_config.dtypes['float_or_double'](self.d))
event.wait()
self.compute_time += (timer() - start_time)
def update_magnetic(self):
"""This functions updates magnetic field components with the PML
correction on the compute device.
"""
start_time = timer()
event = self.update_magnetic_dev(np.int32(self.xs),
np.int32(self.xf),
np.int32(self.ys),
np.int32(self.yf),
np.int32(self.zs),
np.int32(self.zf),
np.int32(self.HPhi1_dev.shape[1]),
np.int32(self.HPhi1_dev.shape[2]),
np.int32(self.HPhi1_dev.shape[3]),
np.int32(self.HPhi2_dev.shape[1]),
np.int32(self.HPhi2_dev.shape[2]),
np.int32(self.HPhi2_dev.shape[3]),
np.int32(self.thickness),
self.G.ID_dev,
self.G.Ex_dev,
self.G.Ey_dev,
self.G.Ez_dev,
self.G.Hx_dev,
self.G.Hy_dev,
self.G.Hz_dev,
self.HPhi1_dev,
self.HPhi2_dev,
self.HRA_dev,
self.HRB_dev,
self.HRE_dev,
self.HRF_dev,
config.sim_config.dtypes['float_or_double'](self.d))
event.wait()
self.compute_time += (timer() - start_time)
def print_pml_info(G):
"""Information about PMLs.
Args:
G (FDTDGrid): Parameters describing a grid in a model.
G: FDTDGrid objects that holds parameters describing a grid in a model.
"""
# No PML
if all(value == 0 for value in G.pmlthickness.values()):
@@ -433,7 +603,8 @@ def print_pml_info(G):
pmlinfo += f'{key}: {value}, '
pmlinfo = pmlinfo[:-2]
return f'\nPML boundaries [{G.name}]: {{formulation: {G.pmlformulation}, order: {len(G.cfs)}, thickness (cells): {pmlinfo}}}'
return (f'\nPML boundaries [{G.name}]: {{formulation: {G.pmlformulation}, '
f'order: {len(G.cfs)}, thickness (cells): {pmlinfo}}}')
def build_pml(G, key, value):
@@ -442,21 +613,28 @@ def build_pml(G, key, value):
(based on underlying material er and mr from solid array).
Args:
G (FDTDGrid): Parameters describing a grid in a model.
key (str): Identifier of PML slab.
value (int): Thickness of PML slab in cells.
G: FDTDGrid objects that holds parameters describing a grid in a model.
key: string dentifier of PML slab.
value: int with thickness of PML slab in cells.
"""
pml_type = CUDAPML if config.sim_config.general['cuda'] else PML
if config.sim_config.general['solver'] == 'cpu':
pml_type = PML
elif config.sim_config.general['solver'] == 'cuda':
pml_type = CUDAPML
elif config.sim_config.general['solver'] == 'opencl':
pml_type = OpenCLPML
sumer = 0 # Sum of relative permittivities in PML slab
summr = 0 # Sum of relative permeabilities in PML slab
if key[0] == 'x':
if key == 'x0':
pml = pml_type(G, ID=key, direction='xminus', xf=value, yf=G.ny, zf=G.nz)
pml = pml_type(G, ID=key, direction='xminus',
xf=value, yf=G.ny, zf=G.nz)
elif key == 'xmax':
pml = pml_type(G, ID=key, direction='xplus', xs=G.nx - value, xf=G.nx, yf=G.ny, zf=G.nz)
pml = pml_type(G, ID=key, direction='xplus',
xs=G.nx - value, xf=G.nx, yf=G.ny, zf=G.nz)
G.pmls.append(pml)
for j in range(G.ny):
for k in range(G.nz):
@@ -469,9 +647,11 @@ def build_pml(G, key, value):
elif key[0] == 'y':
if key == 'y0':
pml = pml_type(G, ID=key, direction='yminus', yf=value, xf=G.nx, zf=G.nz)
pml = pml_type(G, ID=key, direction='yminus',
yf=value, xf=G.nx, zf=G.nz)
elif key == 'ymax':
pml = pml_type(G, ID=key, direction='yplus', ys=G.ny - value, xf=G.nx, yf=G.ny, zf=G.nz)
pml = pml_type(G, ID=key, direction='yplus',
ys=G.ny - value, xf=G.nx, yf=G.ny, zf=G.nz)
G.pmls.append(pml)
for i in range(G.nx):
for k in range(G.nz):
@@ -484,9 +664,11 @@ def build_pml(G, key, value):
elif key[0] == 'z':
if key == 'z0':
pml = pml_type(G, ID=key, direction='zminus', zf=value, xf=G.nx, yf=G.ny)
pml = pml_type(G, ID=key, direction='zminus',
zf=value, xf=G.nx, yf=G.ny)
elif key == 'zmax':
pml = pml_type(G, ID=key, direction='zplus', zs=G.nz - value, xf=G.nx, yf=G.ny, zf=G.nz)
pml = pml_type(G, ID=key, direction='zplus',
zs=G.nz - value, xf=G.nx, yf=G.ny, zf=G.nz)
G.pmls.append(pml)
for i in range(G.nx):
for j in range(G.ny):

查看文件

@@ -26,8 +26,8 @@ class Rx:
allowableoutputs = ['Ex', 'Ey', 'Ez', 'Hx', 'Hy', 'Hz', 'Ix', 'Iy', 'Iz']
defaultoutputs = allowableoutputs[:-3]
allowableoutputs_gpu = allowableoutputs[:-3]
maxnumoutputs_gpu = 0
allowableoutputs_dev = allowableoutputs[:-3]
maxnumoutputs_dev = 0
def __init__(self):
@@ -41,57 +41,65 @@ class Rx:
self.zcoordorigin = None
def htod_rx_arrays(G):
"""Initialise arrays on GPU for receiver coordinates and to store field
def htod_rx_arrays(G, queue=None):
"""Initialise arrays on compute device for receiver coordinates and to store field
components for receivers.
Args:
G (FDTDGrid): Holds essential parameters describing the model.
G: FDTDGrid object that holds essential parameters describing the model.
queue: pyopencl queue.
Returns:
rxcoords_gpu (int): numpy array of receiver coordinates from GPU.
rxs_gpu (float): numpy array of receiver data from GPU - rows are field
components; columns are iterations; pages are receivers.
rxcoords_dev: int array of receiver coordinates on compute device.
rxs_dev: float array of receiver data on compute device - rows are field
components; columns are iterations; pages are receivers.
"""
import pycuda.gpuarray as gpuarray
# Array to store receiver coordinates on GPU
# Array to store receiver coordinates on compute device
rxcoords = np.zeros((len(G.rxs), 3), dtype=np.int32)
for i, rx in enumerate(G.rxs):
rxcoords[i, 0] = rx.xcoord
rxcoords[i, 1] = rx.ycoord
rxcoords[i, 2] = rx.zcoord
# Store maximum number of output components
if len(rx.outputs) > Rx.maxnumoutputs_gpu:
Rx.maxnumoutputs_gpu = len(rx.outputs)
if len(rx.outputs) > Rx.maxnumoutputs_dev:
Rx.maxnumoutputs_dev = len(rx.outputs)
# Array to store field components for receivers on GPU - rows are field components;
# columns are iterations; pages are receivers
rxs = np.zeros((len(Rx.allowableoutputs_gpu), G.iterations, len(G.rxs)),
# Array to store field components for receivers on compute device -
# rows are field components; columns are iterations; pages are receivers
rxs = np.zeros((len(Rx.allowableoutputs_dev), G.iterations, len(G.rxs)),
dtype=config.sim_config.dtypes['float_or_double'])
# Copy arrays to GPU
rxcoords_gpu = gpuarray.to_gpu(rxcoords)
rxs_gpu = gpuarray.to_gpu(rxs)
# Copy arrays to compute device
if config.sim_config.general['solver'] == 'cuda':
import pycuda.gpuarray as gpuarray
rxcoords_dev = gpuarray.to_gpu(rxcoords)
rxs_dev = gpuarray.to_gpu(rxs)
return rxcoords_gpu, rxs_gpu
elif config.sim_config.general['solver'] == 'opencl':
import pyopencl.array as clarray
rxcoords_dev = clarray.to_device(queue, rxcoords)
rxs_dev = clarray.to_device(queue, rxs)
return rxcoords_dev, rxs_dev
def dtoh_rx_array(rxs_gpu, rxcoords_gpu, G):
"""Copy output from receivers array used on GPU back to receiver objects.
def dtoh_rx_array(rxs_dev, rxcoords_dev, G):
"""Copy output from receivers array used on compute device back to receiver
objects.
Args:
rxs_gpu (float): numpy array of receiver data from GPU - rows are field
components; columns are iterations; pages are receivers.
rxcoords_gpu (int): numpy array of receiver coordinates from GPU.
G (FDTDGrid): Holds essential parameters describing the model.
rxcoords_dev: int array of receiver coordinates on compute device.
rxs_dev: float array of receiver data on compute device - rows are field
components; columns are iterations; pages are receivers.
G: FDTDGrid object that holds essential parameters describing the model.
"""
for rx in G.rxs:
for rxgpu in range(len(G.rxs)):
if (rx.xcoord == rxcoords_gpu[rxgpu, 0] and
rx.ycoord == rxcoords_gpu[rxgpu, 1] and
rx.zcoord == rxcoords_gpu[rxgpu, 2]):
for rxd in range(len(G.rxs)):
if (rx.xcoord == rxcoords_dev[rxd, 0] and
rx.ycoord == rxcoords_dev[rxd, 1] and
rx.zcoord == rxcoords_dev[rxd, 2]):
for output in rx.outputs.keys():
rx.outputs[output] = rxs_gpu[Rx.allowableoutputs_gpu.index(output), :, rxgpu]
rx.outputs[output] = rxs_dev[Rx.allowableoutputs_dev.index(output), :, rxd]

查看文件

@@ -223,18 +223,17 @@ class Snapshot:
f.close()
def htod_snapshot_array(G):
"""Initialise array on GPU for to store field data for snapshots.
def htod_snapshot_array(G, queue=None):
"""Initialise array on compute device for to store field data for snapshots.
Args:
G (FDTDGrid): Parameters describing a grid in a model.
G: FDTDGrid object with parameters describing a grid in a model.
queue: pyopencl queue.
Returns:
snapE_gpu, snapH_gpu (float): numpy arrays of snapshot data on GPU.
snapE_dev, snapH_dev: float arrays of snapshot data on compute device.
"""
import pycuda.gpuarray as gpuarray
# Get dimensions of largest requested snapshot
for snap in G.snapshots:
if snap.nx > Snapshot.nx_max:
@@ -244,15 +243,21 @@ def htod_snapshot_array(G):
if snap.nz > Snapshot.nz_max:
Snapshot.nz_max = snap.nz
# GPU - blocks per grid - according to largest requested snapshot
Snapshot.bpg = (int(np.ceil(((Snapshot.nx_max) *
(Snapshot.ny_max) *
(Snapshot.nz_max)) / Snapshot.tpb[0])), 1, 1)
if config.sim_config.general['solver'] == 'cuda':
# Blocks per grid - according to largest requested snapshot
Snapshot.bpg = (int(np.ceil(((Snapshot.nx_max) *
(Snapshot.ny_max) *
(Snapshot.nz_max)) / Snapshot.tpb[0])), 1, 1)
elif config.sim_config.general['solver'] == 'opencl':
# Workgroup size - according to largest requested snapshot
Snapshot.wgs = (int(np.ceil(((Snapshot.nx_max) *
(Snapshot.ny_max) *
(Snapshot.nz_max)))), 1, 1)
# 4D arrays to store snapshots on GPU, e.g. snapEx(time, x, y, z);
# if snapshots are not being stored on the GPU during the simulation then
# they are copied back to the host after each iteration, hence numsnaps = 1
numsnaps = 1 if config.get_model_config().cuda['snapsgpu2cpu'] else len(G.snapshots)
numsnaps = 1 if config.get_model_config().device['snapsgpu2cpu'] else len(G.snapshots)
snapEx = np.zeros((numsnaps, Snapshot.nx_max, Snapshot.ny_max, Snapshot.nz_max),
dtype=config.sim_config.dtypes['float_or_double'])
snapEy = np.zeros((numsnaps, Snapshot.nx_max, Snapshot.ny_max, Snapshot.nz_max),
@@ -266,29 +271,41 @@ def htod_snapshot_array(G):
snapHz = np.zeros((numsnaps, Snapshot.nx_max, Snapshot.ny_max, Snapshot.nz_max),
dtype=config.sim_config.dtypes['float_or_double'])
# Copy arrays to GPU
snapEx_gpu = gpuarray.to_gpu(snapEx)
snapEy_gpu = gpuarray.to_gpu(snapEy)
snapEz_gpu = gpuarray.to_gpu(snapEz)
snapHx_gpu = gpuarray.to_gpu(snapHx)
snapHy_gpu = gpuarray.to_gpu(snapHy)
snapHz_gpu = gpuarray.to_gpu(snapHz)
# Copy arrays to compute device
if config.sim_config.general['solver'] == 'cuda':
import pycuda.gpuarray as gpuarray
snapEx_dev = gpuarray.to_gpu(snapEx)
snapEy_dev = gpuarray.to_gpu(snapEy)
snapEz_dev = gpuarray.to_gpu(snapEz)
snapHx_dev = gpuarray.to_gpu(snapHx)
snapHy_dev = gpuarray.to_gpu(snapHy)
snapHz_dev = gpuarray.to_gpu(snapHz)
return snapEx_gpu, snapEy_gpu, snapEz_gpu, snapHx_gpu, snapHy_gpu, snapHz_gpu
elif config.sim_config.general['solver'] == 'opencl':
import pyopencl.array as clarray
snapEx_dev = clarray.to_device(queue, snapEx)
snapEy_dev = clarray.to_device(queue, snapEy)
snapEz_dev = clarray.to_device(queue, snapEz)
snapHx_dev = clarray.to_device(queue, snapHx)
snapHy_dev = clarray.to_device(queue, snapHy)
snapHz_dev = clarray.to_device(queue, snapHz)
return snapEx_dev, snapEy_dev, snapEz_dev, snapHx_dev, snapHy_dev, snapHz_dev
def dtoh_snapshot_array(snapEx_gpu, snapEy_gpu, snapEz_gpu, snapHx_gpu, snapHy_gpu, snapHz_gpu, i, snap):
"""Copy snapshot array used on GPU back to snapshot objects and store in format for Paraview.
def dtoh_snapshot_array(snapEx_dev, snapEy_dev, snapEz_dev, snapHx_dev, snapHy_dev, snapHz_dev, i, snap):
"""Copy snapshot array used on compute device back to snapshot objects and
store in format for Paraview.
Args:
snapE_gpu, snapH_gpu (float): numpy arrays of snapshot data from GPU.
i (int): index for snapshot data on GPU array.
snap (class): Snapshot class instance
snapE_dev, snapH_dev: float arrays of snapshot data from compute device.
i: int for index of snapshot data on compute device array.
snap: Snapshot class instance
"""
snap.Exsnap = snapEx_gpu[i, snap.xs:snap.xf, snap.ys:snap.yf, snap.zs:snap.zf]
snap.Eysnap = snapEy_gpu[i, snap.xs:snap.xf, snap.ys:snap.yf, snap.zs:snap.zf]
snap.Ezsnap = snapEz_gpu[i, snap.xs:snap.xf, snap.ys:snap.yf, snap.zs:snap.zf]
snap.Hxsnap = snapHx_gpu[i, snap.xs:snap.xf, snap.ys:snap.yf, snap.zs:snap.zf]
snap.Hysnap = snapHy_gpu[i, snap.xs:snap.xf, snap.ys:snap.yf, snap.zs:snap.zf]
snap.Hzsnap = snapHz_gpu[i, snap.xs:snap.xf, snap.ys:snap.yf, snap.zs:snap.zf]
snap.Exsnap = snapEx_dev[i, snap.xs:snap.xf, snap.ys:snap.yf, snap.zs:snap.zf]
snap.Eysnap = snapEy_dev[i, snap.xs:snap.xf, snap.ys:snap.yf, snap.zs:snap.zf]
snap.Ezsnap = snapEz_dev[i, snap.xs:snap.xf, snap.ys:snap.yf, snap.zs:snap.zf]
snap.Hxsnap = snapHx_dev[i, snap.xs:snap.xf, snap.ys:snap.yf, snap.zs:snap.zf]
snap.Hysnap = snapHy_dev[i, snap.xs:snap.xf, snap.ys:snap.yf, snap.zs:snap.zf]
snap.Hzsnap = snapHz_dev[i, snap.xs:snap.xf, snap.ys:snap.yf, snap.zs:snap.zf]

查看文件

@@ -18,22 +18,24 @@
import gprMax.config as config
from .grid import CUDAGrid, FDTDGrid
from .grid import CUDAGrid, FDTDGrid, OpenCLGrid
from .subgrids.updates import create_updates as create_subgrid_updates
from .updates import CPUUpdates, CUDAUpdates
from .updates import CPUUpdates, CUDAUpdates, OpenCLUpdates
def create_G():
"""Create grid object according to solver.
Returns:
G (FDTDGrid): Holds essential parameters describing the model.
G: FDTDGrid that holds essential parameters describing the model.
"""
if config.sim_config.general['cpu']:
if config.sim_config.general['solver'] == 'cpu':
G = FDTDGrid()
elif config.sim_config.general['cuda']:
elif config.sim_config.general['solver'] == 'cuda':
G = CUDAGrid()
elif config.sim_config.general['solver'] == 'opencl':
G = OpenCLGrid()
return G
@@ -42,10 +44,10 @@ def create_solver(G):
"""Create configured solver object.
Args:
G (FDTDGrid): Holds essential parameters describing the model.
G: FDTDGrid that holds essential parameters describing the model.
Returns:
solver (Solver): solver object.
solver: Solver object.
"""
if config.sim_config.general['subgrid']:
@@ -56,14 +58,17 @@ def create_solver(G):
# the required numerical precision and dispersive material type.
props = updates.adapt_dispersive_config()
updates.set_dispersive_updates(props)
elif config.sim_config.general['cpu']:
elif config.sim_config.general['solver'] == 'cpu':
updates = CPUUpdates(G)
solver = Solver(updates)
props = updates.adapt_dispersive_config()
updates.set_dispersive_updates(props)
elif config.sim_config.general['cuda']:
elif config.sim_config.general['solver'] == 'cuda':
updates = CUDAUpdates(G)
solver = Solver(updates)
elif config.sim_config.general['solver'] == 'opencl':
updates = OpenCLUpdates(G)
solver = Solver(updates)
return solver
@@ -74,8 +79,8 @@ class Solver:
def __init__(self, updates, hsg=False):
"""
Args:
updates (Updates): Updates contains methods to run FDTD algorithm.
hsg (bool): Use sub-gridding.
updates: Updates contains methods to run FDTD algorithm.
hsg: bool to use sub-gridding.
"""
self.updates = updates
@@ -85,13 +90,14 @@ class Solver:
"""Time step the FDTD model.
Args:
iterator (iterator): can be range() or tqdm()
iterator: can be range() or tqdm()
Returns:
tsolve (float): Time taken to execute solving (seconds).
memsolve (float): Memory (RAM) used.
tsolve: float for time taken to execute solving (seconds).
memsolve: float for memory (RAM) used.
"""
memsolve = 0
self.updates.time_start()
for iteration in iterator:
@@ -108,7 +114,8 @@ class Solver:
if self.hsg:
self.updates.hsg_1()
self.updates.update_electric_b()
memsolve = self.updates.calculate_memsolve(iteration) if config.sim_config.general['cuda'] else None
if config.sim_config.general['solver'] == 'cuda':
memsolve = self.updates.calculate_memsolve(iteration)
self.updates.finalise()
tsolve = self.updates.calculate_tsolve()

查看文件

@@ -45,7 +45,8 @@ class Source:
"""Calculates all waveform values for source for duration of simulation.
Args:
G (FDTDGrid): Holds essential parameters describing the model.
G: FDTDGrid object that olds essential parameters describing the
model.
"""
# Waveform values for electric sources - calculated half a timestep later
self.waveformvaluesJ = np.zeros((G.iterations),
@@ -82,13 +83,14 @@ class VoltageSource(Source):
"""Updates electric field values for a voltage source.
Args:
iteration (int): Current iteration (timestep).
updatecoeffsE (memory view): numpy array of electric field update
coefficients.
ID (memory view): numpy array of numeric IDs corresponding to
materials in the model.
Ex, Ey, Ez (memory view): numpy array of electric field values.
G (FDTDGrid): Holds essential parameters describing the model.
iteration: int of current iteration (timestep).
updatecoeffsE: memory view of array of electric field update
coefficients.
ID: memory view of array of numeric IDs corresponding to materials
in the model.
Ex, Ey, Ez: memory view of array of electric field values.
G: FDTDGrid object that olds essential parameters describing the
model.
"""
if iteration * G.dt >= self.start and iteration * G.dt <= self.stop:
@@ -126,7 +128,8 @@ class VoltageSource(Source):
voltage source conductivity to the underlying parameters.
Args:
G (FDTDGrid): Holds essential parameters describing the model.
G: FDTDGrid object that olds essential parameters describing the
model.
"""
if self.resistance != 0:
@@ -166,13 +169,14 @@ class HertzianDipole(Source):
"""Updates electric field values for a Hertzian dipole.
Args:
iteration (int): Current iteration (timestep).
updatecoeffsE (memory view): numpy array of electric field update
coefficients.
ID (memory view): numpy array of numeric IDs corresponding to
materials in the model.
Ex, Ey, Ez (memory view): numpy array of electric field values.
G (FDTDGrid): Holds essential parameters describing the model.
iteration: int of current iteration (timestep).
updatecoeffsE: memory view of array of electric field update
coefficients.
ID: memory view of array of numeric IDs corresponding to materials
in the model.
Ex, Ey, Ez: memory view of array of electric field values.
G: FDTDGrid object that olds essential parameters describing the
model.
"""
if iteration * G.dt >= self.start and iteration * G.dt <= self.stop:
@@ -203,13 +207,14 @@ class MagneticDipole(Source):
"""Updates magnetic field values for a magnetic dipole.
Args:
iteration (int): Current iteration (timestep).
updatecoeffsH (memory view): numpy array of magnetic field update
coefficients.
ID (memory view): numpy array of numeric IDs corresponding to
materials in the model.
Hx, Hy, Hz (memory view): numpy array of magnetic field values.
G (FDTDGrid): Holds essential parameters describing the model.
iteration: int of current iteration (timestep).
updatecoeffsH: memory view of array of magnetic field update
coefficients.
ID: memory view of array of numeric IDs corresponding to materials
in the model.
Hx, Hy, Hz: memory view of array of magnetic field values.
G: FDTDGrid object that olds essential parameters describing the
model.
"""
if iteration * G.dt >= self.start and iteration * G.dt <= self.stop:
@@ -234,24 +239,23 @@ class MagneticDipole(Source):
(1 / (G.dx * G.dy * G.dz)))
def htod_src_arrays(sources, G):
"""Initialise arrays on GPU for source coordinates/polarisation, other
source information, and source waveform values.
def htod_src_arrays(sources, G, queue=None):
"""Initialise arrays on compute device for source coordinates/polarisation,
other source information, and source waveform values.
Args:
sources (list): List of sources of one type, e.g. HertzianDipole
G (FDTDGrid): Holds essential parameters describing the model.
sources: list of sources of one type, e.g. HertzianDipole
G: FDTDGrid object that holds essential parameters describing the model.
queue: pyopencl queue.
Returns:
srcinfo1_gpu (int): numpy array of source cell coordinates and
polarisation information.
srcinfo2_gpu (float): numpy array of other source information,
e.g. length, resistance etc...
srcwaves_gpu (float): numpy array of source waveform values.
srcinfo1_dev: int array of source cell coordinates and polarisation
information.
srcinfo2_dev: float array of other source information, e.g. length,
resistance etc...
srcwaves_dev: float array of source waveform values.
"""
import pycuda.gpuarray as gpuarray
srcinfo1 = np.zeros((len(sources), 4), dtype=np.int32)
srcinfo2 = np.zeros((len(sources)), dtype=config.sim_config.dtypes['float_or_double'])
srcwaves = np.zeros((len(sources), G.iterations), dtype=config.sim_config.dtypes['float_or_double'])
@@ -276,11 +280,19 @@ def htod_src_arrays(sources, G):
elif src.__class__.__name__ == 'MagneticDipole':
srcwaves[i, :] = src.waveformvaluesM
srcinfo1_gpu = gpuarray.to_gpu(srcinfo1)
srcinfo2_gpu = gpuarray.to_gpu(srcinfo2)
srcwaves_gpu = gpuarray.to_gpu(srcwaves)
# Copy arrays to compute device
if config.sim_config.general['solver'] == 'cuda':
import pycuda.gpuarray as gpuarray
srcinfo1_dev = gpuarray.to_gpu(srcinfo1)
srcinfo2_dev = gpuarray.to_gpu(srcinfo2)
srcwaves_dev = gpuarray.to_gpu(srcwaves)
elif config.sim_config.general['solver'] == 'opencl':
import pyopencl.array as clarray
srcinfo1_dev = clarray.to_device(queue, srcinfo1)
srcinfo2_dev = clarray.to_device(queue, srcinfo2)
srcwaves_dev = clarray.to_device(queue, srcwaves)
return srcinfo1_gpu, srcinfo2_gpu, srcwaves_gpu
return srcinfo1_dev, srcinfo2_dev, srcwaves_dev
class TransmissionLine(Source):
@@ -291,7 +303,8 @@ class TransmissionLine(Source):
def __init__(self, G):
"""
Args:
G (FDTDGrid): Holds essential parameters describing the model.
G: FDTDGrid object that holds essential parameters describing the
model.
"""
super().__init__()
@@ -328,7 +341,8 @@ class TransmissionLine(Source):
from: http://dx.doi.org/10.1002/mop.10415
Args:
G (FDTDGrid): Holds essential parameters describing the model.
G: FDTDGrid object that holds essential parameters describing the
model.
"""
for iteration in range(G.iterations):
@@ -344,7 +358,8 @@ class TransmissionLine(Source):
"""Updates absorbing boundary condition at end of the transmission line.
Args:
G (FDTDGrid): Holds essential parameters describing the model.
G: FDTDGrid object that holds essential parameters describing the
model.
"""
h = (config.c * G.dt - self.dl) / (config.c * G.dt + self.dl)
@@ -357,8 +372,9 @@ class TransmissionLine(Source):
"""Updates voltage values along the transmission line.
Args:
iteration (int): Current iteration (timestep).
G (FDTDGrid): Holds essential parameters describing the model.
iteration: int of current iteration (timestep).
G: FDTDGrid object that holds essential parameters describing the
model.
"""
# Update all the voltage values along the line
@@ -375,8 +391,9 @@ class TransmissionLine(Source):
"""Updates current values along the transmission line.
Args:
iteration (int): Current iteration (timestep).
G (FDTDGrid): Holds essential parameters describing the model.
iteration: int of current iteration (timestep).
G: FDTDGrid object that holds essential parameters describing the
model.
"""
# Update all the current values along the line
@@ -393,13 +410,14 @@ class TransmissionLine(Source):
the transmission line.
Args:
iteration (int): Current iteration (timestep).
updatecoeffsE (memory view): numpy array of electric field update
coefficients.
ID (memory view): numpy array of numeric IDs corresponding to
materials in the model.
Ex, Ey, Ez (memory view): numpy array of electric field values.
G (FDTDGrid): Holds essential parameters describing the model.
iteration: int of current iteration (timestep).
updatecoeffsE: memory view of array of electric field update
coefficients.
ID: memory view of array of numeric IDs corresponding to materials
in the model.
Ex, Ey, Ez: memory view of array of electric field values.
G: FDTDGrid object that olds essential parameters describing the
model.
"""
if iteration * G.dt >= self.start and iteration * G.dt <= self.stop:
@@ -423,13 +441,14 @@ class TransmissionLine(Source):
in the main grid.
Args:
iteration (int): Current iteration (timestep).
updatecoeffsH (memory view): numpy array of magnetic field update
coefficients.
ID (memory view): numpy array of numeric IDs corresponding to
materials in the model.
Hx, Hy, Hz (memory view): numpy array of magnetic field values.
G (FDTDGrid): Holds essential parameters describing the model.
iteration: int of current iteration (timestep).
updatecoeffsH: memory view of array of magnetic field update
coefficients.
ID: memory view of array of numeric IDs corresponding to materials
in the model.
Hx, Hy, Hz: memory view of array of magnetic field values.
G: FDTDGrid object that olds essential parameters describing the
model.
"""
if iteration * G.dt >= self.start and iteration * G.dt <= self.stop:

查看文件

@@ -18,19 +18,21 @@
import logging
from importlib import import_module
from string import Template
import numpy as np
from jinja2 import Environment, PackageLoader
import gprMax.config as config
from .cuda.fields_updates import kernel_template_fields
from .cuda.snapshots import kernel_template_store_snapshot
from .cuda.source_updates import kernel_template_sources
from .cuda.snapshots import knl_template_store_snapshot
from .cuda_opencl_el import (knl_fields_updates, knl_snapshots,
knl_source_updates, knl_store_outputs)
from .cython.fields_updates_normal import \
update_electric as update_electric_cpu
from .cython.fields_updates_normal import \
update_magnetic as update_magnetic_cpu
from .fields_outputs import kernel_template_store_outputs
from .fields_outputs import knl_template_store_outputs
from .fields_outputs import store_outputs as store_outputs_cpu
from .receivers import dtoh_rx_array, htod_rx_arrays
from .snapshots import Snapshot, dtoh_snapshot_array, htod_snapshot_array
@@ -256,27 +258,27 @@ class CUDAUpdates:
self.source_module = getattr(import_module('pycuda.compiler'), 'SourceModule')
self.drv.init()
# Create device handle and context on specifc GPU device (and make it current context)
# Create device handle and context on specific GPU device (and make it current context)
self.dev = self.drv.Device(config.get_model_config().cuda['gpu'].deviceID)
self.ctx = self.dev.make_context()
# Initialise arrays on GPU, prepare kernels, and get kernel functions
self._set_field_kernels()
self._set_field_knls()
if self.grid.pmls:
self._set_pml_kernels()
self._set_pml_knls()
if self.grid.rxs:
self._set_rx_kernel()
self._set_rx_knl()
if self.grid.voltagesources + self.grid.hertziandipoles + self.grid.magneticdipoles:
self._set_src_kernels()
self._set_src_knls()
if self.grid.snapshots:
self._set_snapshot_kernel()
self._set_snapshot_knl()
def _set_field_kernels(self):
def _set_field_knls(self):
"""Electric and magnetic field updates - prepare kernels, and
get kernel functions.
"""
if config.get_model_config().materials['maxpoles'] > 0:
kernels_fields = self.source_module(kernel_template_fields.substitute(
knls_fields = self.source_module(knl_template_fields.substitute(
REAL=config.sim_config.dtypes['C_float_or_double'],
REALFUNC=config.get_model_config().materials['cudarealfunc'],
COMPLEX=config.get_model_config().materials['dispersiveCdtype'],
@@ -296,7 +298,7 @@ class CUDAUpdates:
options=config.sim_config.cuda['nvcc_opts'])
else: # Set to one any substitutions for dispersive materials.
# Value of COMPLEX is not relevant.
kernels_fields = self.source_module(kernel_template_fields.substitute(
knls_fields = self.source_module(knl_template_fields.substitute(
REAL=config.sim_config.dtypes['C_float_or_double'],
REALFUNC=config.get_model_config().materials['cudarealfunc'],
COMPLEX=config.sim_config.dtypes['C_float_or_double'],
@@ -314,17 +316,17 @@ class CUDAUpdates:
NY_T=1,
NZ_T=1),
options=config.sim_config.cuda['nvcc_opts'])
self.update_electric_gpu = kernels_fields.get_function("update_electric")
self.update_magnetic_gpu = kernels_fields.get_function("update_magnetic")
self._copy_mat_coeffs(kernels_fields, kernels_fields)
self.update_electric_gpu = knls_fields.get_function("update_electric")
self.update_magnetic_gpu = knls_fields.get_function("update_magnetic")
self._copy_mat_coeffs(knls_fields, knls_fields)
# Electric and magnetic field updates - dispersive materials
# - get kernel functions and initialise array on GPU
# If there are any dispersive materials (updates are split into two
# parts as they require present and updated electric field values).
if config.get_model_config().materials['maxpoles'] > 0:
self.dispersive_update_a = kernels_fields.get_function("update_electric_dispersive_A")
self.dispersive_update_b = kernels_fields.get_function("update_electric_dispersive_B")
self.dispersive_update_a = knls_fields.get_function("update_electric_dispersive_A")
self.dispersive_update_b = knls_fields.get_function("update_electric_dispersive_B")
# Electric and magnetic field updates - set blocks per grid and
# initialise field arrays on GPU
@@ -334,17 +336,17 @@ class CUDAUpdates:
if config.get_model_config().materials['maxpoles'] > 0:
self.grid.htod_dispersive_arrays()
def _set_pml_kernels(self):
def _set_pml_knls(self):
"""PMLS - prepare kernels and get kernel functions."""
pmlmodulelectric = 'gprMax.cuda.pml_updates_electric_' + self.grid.pmlformulation
kernelelectricfunc = getattr(import_module(pmlmodulelectric),
'kernels_template_pml_electric_' +
knlelectricfunc = getattr(import_module(pmlmodulelectric),
'knls_template_pml_electric_' +
self.grid.pmlformulation)
pmlmodulemagnetic = 'gprMax.cuda.pml_updates_magnetic_' + self.grid.pmlformulation
kernelmagneticfunc = getattr(import_module(pmlmodulemagnetic),
'kernels_template_pml_magnetic_' +
knlmagneticfunc = getattr(import_module(pmlmodulemagnetic),
'knls_template_pml_magnetic_' +
self.grid.pmlformulation)
kernels_pml_electric = self.source_module(kernelelectricfunc.substitute(
knls_pml_electric = self.source_module(knlelectricfunc.substitute(
REAL=config.sim_config.dtypes['C_float_or_double'],
N_updatecoeffsE=self.grid.updatecoeffsE.size,
NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1],
@@ -355,7 +357,7 @@ class CUDAUpdates:
NY_ID=self.grid.ID.shape[2],
NZ_ID=self.grid.ID.shape[3]),
options=config.sim_config.cuda['nvcc_opts'])
kernels_pml_magnetic = self.source_module(kernelmagneticfunc.substitute(
knls_pml_magnetic = self.source_module(knlmagneticfunc.substitute(
REAL=config.sim_config.dtypes['C_float_or_double'],
N_updatecoeffsH=self.grid.updatecoeffsH.size,
NY_MATCOEFFS=self.grid.updatecoeffsH.shape[1],
@@ -366,19 +368,19 @@ class CUDAUpdates:
NY_ID=self.grid.ID.shape[2],
NZ_ID=self.grid.ID.shape[3]),
options=config.sim_config.cuda['nvcc_opts'])
self._copy_mat_coeffs(kernels_pml_electric, kernels_pml_magnetic)
self._copy_mat_coeffs(knls_pml_electric, knls_pml_magnetic)
# Set block per grid, initialise arrays on GPU, and get kernel functions
for pml in self.grid.pmls:
pml.htod_field_arrays()
pml.set_blocks_per_grid()
pml.get_update_funcs(kernels_pml_electric, kernels_pml_magnetic)
pml.get_update_funcs(knls_pml_electric, knls_pml_magnetic)
def _set_rx_kernel(self):
def _set_rx_knl(self):
"""Receivers - initialise arrays on GPU, prepare kernel and get kernel
function.
"""
self.rxcoords_gpu, self.rxs_gpu = htod_rx_arrays(self.grid)
kernel_store_outputs = self.source_module(kernel_template_store_outputs.substitute(
knl_store_outputs = self.source_module(knl_template_store_outputs.substitute(
REAL=config.sim_config.dtypes['C_float_or_double'],
NY_RXCOORDS=3,
NX_RXS=6,
@@ -388,13 +390,13 @@ class CUDAUpdates:
NY_FIELDS=self.grid.ny + 1,
NZ_FIELDS=self.grid.nz + 1),
options=config.sim_config.cuda['nvcc_opts'])
self.store_outputs_gpu = kernel_store_outputs.get_function("store_outputs")
self.store_outputs_gpu = knl_store_outputs.get_function("store_outputs")
def _set_src_kernels(self):
def _set_src_knls(self):
"""Sources - initialise arrays on GPU, prepare kernel and get kernel
function.
"""
kernels_sources = self.source_module(kernel_template_sources.substitute(
knls_sources = self.source_module(knl_template_sources.substitute(
REAL=config.sim_config.dtypes['C_float_or_double'],
N_updatecoeffsE=self.grid.updatecoeffsE.size,
N_updatecoeffsH=self.grid.updatecoeffsH.size,
@@ -408,23 +410,23 @@ class CUDAUpdates:
NY_ID=self.grid.ID.shape[2],
NZ_ID=self.grid.ID.shape[3]),
options=config.sim_config.cuda['nvcc_opts'])
self._copy_mat_coeffs(kernels_sources, kernels_sources)
self._copy_mat_coeffs(knls_sources, knls_sources)
if self.grid.hertziandipoles:
self.srcinfo1_hertzian_gpu, self.srcinfo2_hertzian_gpu, self.srcwaves_hertzian_gpu = htod_src_arrays(self.grid.hertziandipoles, self.grid)
self.update_hertzian_dipole_gpu = kernels_sources.get_function("update_hertzian_dipole")
self.update_hertzian_dipole_gpu = knls_sources.get_function("update_hertzian_dipole")
if self.grid.magneticdipoles:
self.srcinfo1_magnetic_gpu, self.srcinfo2_magnetic_gpu, self.srcwaves_magnetic_gpu = htod_src_arrays(self.grid.magneticdipoles, self.grid)
self.update_magnetic_dipole_gpu = kernels_sources.get_function("update_magnetic_dipole")
self.update_magnetic_dipole_gpu = knls_sources.get_function("update_magnetic_dipole")
if self.grid.voltagesources:
self.srcinfo1_voltage_gpu, self.srcinfo2_voltage_gpu, self.srcwaves_voltage_gpu = htod_src_arrays(self.grid.voltagesources, self.grid)
self.update_voltage_source_gpu = kernels_sources.get_function("update_voltage_source")
self.update_voltage_source_gpu = knls_sources.get_function("update_voltage_source")
def _set_snapshot_kernel(self):
def _set_snapshot_knl(self):
"""Snapshots - initialise arrays on GPU, prepare kernel and get kernel
function.
"""
self.snapEx_gpu, self.snapEy_gpu, self.snapEz_gpu, self.snapHx_gpu, self.snapHy_gpu, self.snapHz_gpu = htod_snapshot_array(self.grid)
kernel_store_snapshot = self.source_module(kernel_template_store_snapshot.substitute(
knl_store_snapshot = self.source_module(knl_template_store_snapshot.substitute(
REAL=config.sim_config.dtypes['C_float_or_double'],
NX_SNAPS=Snapshot.nx_max,
NY_SNAPS=Snapshot.ny_max,
@@ -433,25 +435,25 @@ class CUDAUpdates:
NY_FIELDS=self.grid.ny + 1,
NZ_FIELDS=self.grid.nz + 1),
options=config.sim_config.cuda['nvcc_opts'])
self.store_snapshot_gpu = kernel_store_snapshot.get_function("store_snapshot")
self.store_snapshot_gpu = knl_store_snapshot.get_function("store_snapshot")
def _copy_mat_coeffs(self, kernelE, kernelH):
def _copy_mat_coeffs(self, knlE, knlH):
"""Copy material coefficient arrays to constant memory of GPU
(must be <64KB).
Args:
kernelE (kernel): electric field kernel.
kernelH (kernel): magnetic field kernel.
knlE (kernel): electric field kernel.
knlH (kernel): magnetic field kernel.
"""
# Check if coefficient arrays will fit on constant memory of GPU
if (self.grid.updatecoeffsE.nbytes + self.grid.updatecoeffsH.nbytes
> config.get_model_config().cuda['gpu'].constmem):
logger.exception(f"Too many materials in the model to fit onto constant memory of size {human_size(config.get_model_config().cuda['gpu'].constmem)} on {config.get_model_config().cuda['gpu'].deviceID} - {config.get_model_config().cuda['gpu'].name} GPU")
> config.get_model_config().cuda['gpu'].total_constant_memory):
logger.exception(f"Too many materials in the model to fit onto constant memory of size {human_size(config.get_model_config().cuda['gpu'].total_constant_memory)} on {config.get_model_config().cuda['gpu'].deviceID} - {config.get_model_config().cuda['gpu'].name} GPU")
raise ValueError
updatecoeffsE = kernelE.get_global('updatecoeffsE')[0]
updatecoeffsH = kernelH.get_global('updatecoeffsH')[0]
updatecoeffsE = knlE.get_global('updatecoeffsE')[0]
updatecoeffsH = knlH.get_global('updatecoeffsH')[0]
self.drv.memcpy_htod(updatecoeffsE, self.grid.updatecoeffsE)
self.drv.memcpy_htod(updatecoeffsH, self.grid.updatecoeffsH)
@@ -707,3 +709,595 @@ class CUDAUpdates:
# Remove context from top of stack and delete
self.ctx.pop()
del self.ctx
class OpenCLUpdates:
"""Defines update functions for OpenCL-based solver."""
def __init__(self, G):
"""
Args:
G: FDTDObject of parameters describing a grid in a model.
"""
self.grid = G
self.dispersive_update_a = None
self.dispersive_update_b = None
self.compute_time = 0
# Import pyopencl module
self.cl = import_module('pyopencl')
self.elwise = getattr(import_module('pyopencl.elementwise'), 'ElementwiseKernel')
# Select device, create context and command queue
self.dev = config.get_model_config().device['dev']
self.ctx = self.cl.Context(devices=[self.dev])
self.queue = self.cl.CommandQueue(self.ctx,
properties=self.cl.command_queue_properties.PROFILING_ENABLE)
# Enviroment for templating kernels
self.env = Environment(loader=PackageLoader('gprMax', 'cuda_opencl_el'))
# Initialise arrays on device, prepare kernels, and get kernel functions
self._set_field_knls()
if self.grid.pmls:
self._set_pml_knls()
if self.grid.rxs:
self._set_rx_knl()
if self.grid.voltagesources + self.grid.hertziandipoles + self.grid.magneticdipoles:
self._set_src_knls()
if self.grid.snapshots:
self._set_snapshot_knl()
def _set_field_knls(self):
"""Electric and magnetic field updates - prepare kernels, and
get kernel functions.
"""
if config.get_model_config().materials['maxpoles'] > 0:
NY_MATDISPCOEFFS = self.grid.updatecoeffsdispersive.shape[1]
NX_T = self.grid.Tx.shape[1]
NY_T = self.grid.Tx.shape[2]
NZ_T = self.grid.Tx.shape[3]
else: # Set to one any substitutions for dispersive materials.
NY_MATDISPCOEFFS = 1
NX_T = 1
NY_T = 1
NZ_T = 1
self.knl_common = self.env.get_template('knl_common_opencl.tmpl').render(
updatecoeffsE = self.grid.updatecoeffsE.ravel(),
updatecoeffsH = self.grid.updatecoeffsH.ravel(),
REAL=config.sim_config.dtypes['C_float_or_double'],
N_updatecoeffsE=self.grid.updatecoeffsE.size,
N_updatecoeffsH=self.grid.updatecoeffsH.size,
NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1],
NY_MATDISPCOEFFS=NY_MATDISPCOEFFS,
NX_FIELDS=self.grid.nx + 1,
NY_FIELDS=self.grid.ny + 1,
NZ_FIELDS=self.grid.nz + 1,
NX_ID=self.grid.ID.shape[1],
NY_ID=self.grid.ID.shape[2],
NZ_ID=self.grid.ID.shape[3],
NX_T=NX_T,
NY_T=NY_T,
NZ_T=NZ_T,
NY_RXCOORDS=3,
NX_RXS=6,
NY_RXS=self.grid.iterations,
NZ_RXS=len(self.grid.rxs),
NY_SRCINFO=4,
NY_SRCWAVES=self.grid.iterations,
NX_SNAPS=Snapshot.nx_max,
NY_SNAPS=Snapshot.ny_max,
NZ_SNAPS=Snapshot.nz_max)
self.update_electric_dev = self.elwise(self.ctx,
Template("int NX, "
"int NY, "
"int NZ, "
"__global const unsigned int* restrict ID, "
"__global $REAL *Ex, "
"__global $REAL *Ey, "
"__global $REAL *Ez, "
"__global const $REAL * restrict Hx, "
"__global const $REAL * restrict Hy, "
"__global const $REAL * restrict Hz").substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
knl_fields_updates.update_electric.substitute({
'NX_FIELDS': self.grid.nx + 1,
'NY_FIELDS': self.grid.ny + 1,
'NZ_FIELDS': self.grid.nz + 1,
'NX_ID': self.grid.ID.shape[1],
'NY_ID': self.grid.ID.shape[2],
'NZ_ID': self.grid.ID.shape[3]}),
'update_electric', preamble=self.knl_common)
self.update_magnetic_dev = self.elwise(self.ctx,
Template("int NX, "
"int NY, "
"int NZ, "
"__global const unsigned int* restrict ID, "
"__global $REAL *Hx, "
"__global $REAL *Hy, "
"__global $REAL *Hz, "
"__global const $REAL * restrict Ex, "
"__global const $REAL * restrict Ey, "
"__global const $REAL * restrict Ez").substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
knl_fields_updates.update_magnetic.substitute({
'NX_FIELDS': self.grid.nx + 1,
'NY_FIELDS': self.grid.ny + 1,
'NZ_FIELDS': self.grid.nz + 1,
'NX_ID': self.grid.ID.shape[1],
'NY_ID': self.grid.ID.shape[2],
'NZ_ID': self.grid.ID.shape[3]}),
'update_magnetic', preamble=self.knl_common)
# Electric and magnetic field updates - dispersive materials -
# get kernel functions
# If there are any dispersive materials (updates are split into two
# parts as they require present and updated electric field values).
if config.get_model_config().materials['maxpoles'] > 0:
self.dispersive_update_a = self.elwise(self.ctx,
Template("int NX, "
"int NY, "
"int NZ, "
"int MAXPOLES, "
"__global const $COMPLEX* restrict updatecoeffsdispersive, "
"__global $COMPLEX *Tx, "
"__global $COMPLEX *Ty, "
"__global $COMPLEX *Tz, "
"__global const unsigned int* restrict ID, "
"__global $REAL *Ex, "
"__global $REAL *Ey, "
"__global $REAL *Ez, "
"__global const $REAL* restrict Hx, "
"__global const $REAL* restrict Hy, "
"__global const $REAL* restrict Hz").substitute({'REAL': config.sim_config.dtypes['C_float_or_double'], 'COMPLEX': config.get_model_config().materials['dispersiveCdtype']}),
knl_fields_updates.update_electric_dispersive_A.substitute({
'REAL': config.sim_config.dtypes['C_float_or_double'],
'REALFUNC': config.get_model_config().materials['crealfunc'],
'NX_FIELDS': self.grid.nx + 1,
'NY_FIELDS': self.grid.ny + 1,
'NZ_FIELDS': self.grid.nz + 1,
'NX_ID': self.grid.ID.shape[1],
'NY_ID': self.grid.ID.shape[2],
'NZ_ID': self.grid.ID.shape[3],
'NX_T': NX_T,
'NY_T': NY_T,
'NZ_T': NZ_T}),
'update_electric_dispersive_A', preamble=self.knl_common)
self.dispersive_update_b = self.elwise(self.ctx,
Template("int NX, "
"int NY, "
"int NZ, "
"int MAXPOLES, "
"__global const $COMPLEX* restrict updatecoeffsdispersive, "
"__global $COMPLEX *Tx, "
"__global $COMPLEX *Ty, "
"__global $COMPLEX *Tz, "
"__global const unsigned int* restrict ID, "
"__global $REAL *Ex, "
"__global $REAL *Ey, "
"__global $REAL *Ez, "
"__global const $REAL* restrict Hx, "
"__global const $REAL* restrict Hy, "
"__global const $REAL* restrict Hz").substitute({'REAL': config.sim_config.dtypes['C_float_or_double'] ,'COMPLEX': config.get_model_config().materials['dispersiveCdtype']}),
knl_fields_updates.update_electric_dispersive_B.substitute({
'REAL': config.sim_config.dtypes['C_float_or_double'],
'REALFUNC': config.get_model_config().materials['crealfunc'],
'NX_FIELDS': self.grid.nx + 1,
'NY_FIELDS': self.grid.ny + 1,
'NZ_FIELDS': self.grid.nz + 1,
'NX_ID': self.grid.ID.shape[1],
'NY_ID': self.grid.ID.shape[2],
'NZ_ID': self.grid.ID.shape[3],
'NX_T': NX_T,
'NY_T': NY_T,
'NZ_T': NZ_T}),
'update_electric_dispersive_B', preamble=self.knl_common)
# Electric and magnetic field updates - initialise field arrays on
# compute device
self.grid.htod_geometry_arrays(self.queue)
self.grid.htod_field_arrays(self.queue)
if config.get_model_config().materials['maxpoles'] > 0:
self.grid.htod_dispersive_arrays(self.queue)
def _set_pml_knls(self):
"""PMLS - prepare kernels and get kernel functions."""
knl_pml_updates_electric = import_module('gprMax.cuda_opencl_el.knl_pml_updates_electric_' + self.grid.pmlformulation)
knl_pml_updates_magnetic = import_module('gprMax.cuda_opencl_el.knl_pml_updates_magnetic_' + self.grid.pmlformulation)
# Set workgroup size, initialise arrays on compute device, and get
# kernel functions
for pml in self.grid.pmls:
pml.set_queue(self.queue)
pml.htod_field_arrays()
pml.set_wgs()
knl_name = 'order' + str(len(pml.CFS)) + '_' + pml.direction
knl_electric_name = getattr(knl_pml_updates_electric, knl_name)
knl_magnetic_name = getattr(knl_pml_updates_magnetic, knl_name)
pml.update_electric_dev = self.elwise(self.ctx,
knl_electric_name['args'].substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
knl_electric_name['func'].substitute({
'REAL': config.sim_config.dtypes['C_float_or_double'],
'NX_FIELDS': self.grid.nx + 1,
'NY_FIELDS': self.grid.ny + 1,
'NZ_FIELDS': self.grid.nz + 1,
'NX_ID': self.grid.ID.shape[1],
'NY_ID': self.grid.ID.shape[2],
'NZ_ID': self.grid.ID.shape[3]}),
'pml_updates_electric_' + knl_name,
preamble=self.knl_common)
pml.update_magnetic_dev = self.elwise(self.ctx,
knl_magnetic_name['args'].substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
knl_magnetic_name['func'].substitute({
'REAL': config.sim_config.dtypes['C_float_or_double'],
'NX_FIELDS': self.grid.nx + 1,
'NY_FIELDS': self.grid.ny + 1,
'NZ_FIELDS': self.grid.nz + 1,
'NX_ID': self.grid.ID.shape[1],
'NY_ID': self.grid.ID.shape[2],
'NZ_ID': self.grid.ID.shape[3]}),
'pml_updates_magnetic_' + knl_name,
preamble=self.knl_common)
def _set_rx_knl(self):
"""Receivers - initialise arrays on compute device, prepare kernel and
get kernel function.
"""
self.rxcoords_dev, self.rxs_dev = htod_rx_arrays(self.grid, self.queue)
self.store_outputs_dev = self.elwise(self.ctx,
Template("int NRX, "
"int iteration, "
"__global const int* restrict rxcoords, "
"__global $REAL *rxs, "
"__global const $REAL* restrict Ex, "
"__global const $REAL* restrict Ey, "
"__global const $REAL* restrict Ez, "
"__global const $REAL* restrict Hx, "
"__global const $REAL* restrict Hy, "
"__global const $REAL* restrict Hz").substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
knl_store_outputs.store_outputs.substitute(),
'store_outputs', preamble=self.knl_common)
def _set_src_knls(self):
"""Sources - initialise arrays on compute device, prepare kernel and
get kernel function.
"""
if self.grid.hertziandipoles:
self.srcinfo1_hertzian_dev, self.srcinfo2_hertzian_dev, self.srcwaves_hertzian_dev = htod_src_arrays(self.grid.hertziandipoles, self.grid, self.queue)
self.update_hertzian_dipole_dev = self.elwise(self.ctx,
Template("int NHERTZDIPOLE, "
"int iteration, "
"$REAL dx, "
"$REAL dy, "
"$REAL dz, "
"__global const int* restrict srcinfo1, "
"__global const $REAL* restrict srcinfo2, "
"__global const $REAL* restrict srcwaveforms, "
"__global const unsigned int* restrict ID, "
"__global $REAL *Ex, "
"__global $REAL *Ey, "
"__global $REAL *Ez").substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
knl_source_updates.update_hertzian_dipole.substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
'update_hertzian_dipole', preamble=self.knl_common)
if self.grid.magneticdipoles:
self.srcinfo1_magnetic_dev, self.srcinfo2_magnetic_dev, self.srcwaves_magnetic_dev = htod_src_arrays(self.grid.magneticdipoles, self.grid, self.queue)
self.update_magnetic_dipole_dev = self.elwise(self.ctx,
Template("int NMAGDIPOLE, "
"int iteration, "
"$REAL dx, "
"$REAL dy, "
"$REAL dz, "
"__global const int* restrict srcinfo1, "
"__global const $REAL* restrict srcinfo2, "
"__global const $REAL* restrict srcwaveforms, "
"__global const unsigned int* restrict ID, "
"__global $REAL *Hx, "
"__global $REAL *Hy, "
"__global $REAL *Hz").substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
knl_source_updates.update_magnetic_dipole.substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
'update_magnetic_dipole', preamble=self.knl_common)
if self.grid.voltagesources:
self.srcinfo1_voltage_dev, self.srcinfo2_voltage_dev,self.srcwaves_voltage_dev = htod_src_arrays(self.grid.voltagesources, self.grid, self.queue)
self.update_voltage_source_dev = self.elwise(self.ctx,
Template("int NVOLTSRC, "
"int iteration, "
"$REAL dx, "
"$REAL dy, "
"$REAL dz, "
"__global const int* restrict srcinfo1, "
"__global const $REAL* restrict srcinfo2, "
"__global const $REAL* restrict srcwaveforms, "
"__global const unsigned int* restrict ID, "
"__global $REAL *Ex, "
"__global $REAL *Ey, "
"__global $REAL *Ez").substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
knl_source_updates.update_voltage_source.substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}), 'update_voltage_source', preamble=self.knl_common)
def _set_snapshot_knl(self):
"""Snapshots - initialise arrays on compute device, prepare kernel and
get kernel function.
"""
self.snapEx_dev, self.snapEy_dev, self.snapEz_dev, self.snapHx_dev, self.snapHy_dev, self.snapHz_dev = htod_snapshot_array(self.grid, self.queue)
self.store_snapshot_dev = self.elwise(self.ctx,
Template("int p, "
"int xs, "
"int xf, "
"int ys, "
"int yf, "
"int zs, "
"int zf, "
"int dx, "
"int dy, "
"int dz, "
"__global const $REAL* restrict Ex, "
"__global const $REAL* restrict Ey, "
"__global const $REAL* restrict Ez, "
"__global const $REAL* restrict Hx, "
"__global const $REAL* restrict Hy, "
"__global const $REAL* restrict Hz, "
"__global $REAL *snapEx, "
"__global $REAL *snapEy, "
"__global $REAL *snapEz, "
"__global $REAL *snapHx, "
"__global $REAL *snapHy, "
"__global $REAL *snapHz").substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
knl_snapshots.store_snapshot.substitute({'NX_SNAPS': Snapshot.nx_max,
'NY_SNAPS': Snapshot.ny_max,
'NZ_SNAPS': Snapshot.nz_max}),
'store_snapshot', preamble=self.knl_common)
def store_outputs(self):
"""Store field component values for every receiver."""
if self.grid.rxs:
event = self.store_outputs_dev(np.int32(len(self.grid.rxs)),
np.int32(self.grid.iteration),
self.rxcoords_dev,
self.rxs_dev,
self.grid.Ex_dev,
self.grid.Ey_dev,
self.grid.Ez_dev,
self.grid.Hx_dev,
self.grid.Hy_dev,
self.grid.Hz_dev)
event.wait()
self.compute_time += (event.profile.end - event.profile.start)*1e-9
def store_snapshots(self, iteration):
"""Store any snapshots.
Args:
iteration: int for iteration number.
"""
for i, snap in enumerate(self.grid.snapshots):
if snap.time == iteration + 1:
snapno = 0 if config.get_model_config().device['snapsgpu2cpu'] else i
event = self.store_snapshot_dev(np.int32(snapno),
np.int32(snap.xs),
np.int32(snap.xf),
np.int32(snap.ys),
np.int32(snap.yf),
np.int32(snap.zs),
np.int32(snap.zf),
np.int32(snap.dx),
np.int32(snap.dy),
np.int32(snap.dz),
self.grid.Ex_dev,
self.grid.Ey_dev,
self.grid.Ez_dev,
self.grid.Hx_dev,
self.grid.Hy_dev,
self.grid.Hz_dev,
self.snapEx_dev,
self.snapEy_dev,
self.snapEz_dev,
self.snapHx_dev,
self.snapHy_dev,
self.snapHz_dev)
event.wait()
if config.get_model_config().device['snapsgpu2cpu']:
dtoh_snapshot_array(self.snapEx_dev.get(),
self.snapEy_dev.get(),
self.snapEz_dev.get(),
self.snapHx_dev.get(),
self.snapHy_dev.get(),
self.snapHz_dev.get(),
0,
snap)
self.compute_time += (event.profile.end - event.profile.start)*1e-9
def update_magnetic(self):
"""Update magnetic field components."""
event = self.update_magnetic_dev(np.int32(self.grid.nx),
np.int32(self.grid.ny),
np.int32(self.grid.nz),
self.grid.ID_dev,
self.grid.Hx_dev,
self.grid.Hy_dev,
self.grid.Hz_dev,
self.grid.Ex_dev,
self.grid.Ey_dev,
self.grid.Ez_dev)
event.wait()
self.compute_time += (event.profile.end - event.profile.start)*1e-9
def update_magnetic_pml(self):
"""Update magnetic field components with the PML correction."""
for pml in self.grid.pmls:
pml.update_magnetic()
self.compute_time += pml.compute_time
def update_magnetic_sources(self):
"""Update magnetic field components from sources."""
if self.grid.magneticdipoles:
event = self.update_magnetic_dipole_dev(np.int32(len(self.grid.magneticdipoles)),
np.int32(self.grid.iteration),
config.sim_config.dtypes['float_or_double'](self.grid.dx),
config.sim_config.dtypes['float_or_double'](self.grid.dy),
config.sim_config.dtypes['float_or_double'](self.grid.dz),
self.srcinfo1_magnetic_dev,
self.srcinfo2_magnetic_dev,
self.srcwaves_magnetic_dev,
self.grid.ID_dev,
self.grid.Hx_dev,
self.grid.Hy_dev,
self.grid.Hz_dev)
event.wait()
self.compute_time += (event.profile.end - event.profile.start)*1e-9
def update_electric_a(self):
"""Update electric field components."""
# All materials are non-dispersive so do standard update.
if config.get_model_config().materials['maxpoles'] == 0:
event = self.update_electric_dev(np.int32(self.grid.nx),
np.int32(self.grid.ny),
np.int32(self.grid.nz),
self.grid.ID_dev,
self.grid.Ex_dev,
self.grid.Ey_dev,
self.grid.Ez_dev,
self.grid.Hx_dev,
self.grid.Hy_dev,
self.grid.Hz_dev)
event.wait()
self.compute_time += (event.profile.end - event.profile.start)*1e-9
# If there are any dispersive materials do 1st part of dispersive update
# (it is split into two parts as it requires present and updated electric field values).
else:
event = self.dispersive_update_a(np.int32(self.grid.nx),
np.int32(self.grid.ny),
np.int32(self.grid.nz),
np.int32(config.get_model_config().materials['maxpoles']),
self.grid.updatecoeffsdispersive_dev,
self.grid.Tx_dev,
self.grid.Ty_dev,
self.grid.Tz_dev,
self.grid.ID_dev,
self.grid.Ex_dev,
self.grid.Ey_dev,
self.grid.Ez_dev,
self.grid.Hx_dev,
self.grid.Hy_dev,
self.grid.Hz_dev)
event.wait()
self.compute_time += (event.profile.end - event.profile.start)*1e-9
def update_electric_pml(self):
"""Update electric field components with the PML correction."""
for pml in self.grid.pmls:
pml.update_electric()
self.compute_time += pml.compute_time
def update_electric_sources(self):
"""Update electric field components from sources -
update any Hertzian dipole sources last.
"""
if self.grid.voltagesources:
event = self.update_voltage_source_dev(np.int32(len(self.grid.voltagesources)),
np.int32(self.grid.iteration),
config.sim_config.dtypes['float_or_double'](self.grid.dx),
config.sim_config.dtypes['float_or_double'](self.grid.dy),
config.sim_config.dtypes['float_or_double'](self.grid.dz),
self.srcinfo1_voltage_dev,
self.srcinfo2_voltage_dev,
self.srcwaves_voltage_dev,
self.grid.ID_dev,
self.grid.Ex_dev,
self.grid.Ey_dev,
self.grid.Ez_dev)
event.wait()
self.compute_time += (event.profile.end - event.profile.start)*1e-9
if self.grid.hertziandipoles:
event = self.update_hertzian_dipole_dev(np.int32(len(self.grid.hertziandipoles)),
np.int32(self.grid.iteration),
config.sim_config.dtypes['float_or_double'](self.grid.dx),
config.sim_config.dtypes['float_or_double'](self.grid.dy),
config.sim_config.dtypes['float_or_double'](self.grid.dz),
self.srcinfo1_hertzian_dev,
self.srcinfo2_hertzian_dev,
self.srcwaves_hertzian_dev,
self.grid.ID_dev,
self.grid.Ex_dev,
self.grid.Ey_dev,
self.grid.Ez_dev)
event.wait()
self.compute_time += (event.profile.end - event.profile.start)*1e-9
self.grid.iteration += 1
def update_electric_b(self):
"""If there are any dispersive materials do 2nd part of dispersive
update - it is split into two parts as it requires present and
updated electric field values. Therefore it can only be completely
updated after the electric field has been updated by the PML and
source updates.
"""
if config.get_model_config().materials['maxpoles'] > 0:
event = self.dispersive_update_b(np.int32(self.grid.nx),
np.int32(self.grid.ny),
np.int32(self.grid.nz),
np.int32(config.get_model_config().materials['maxpoles']),
self.grid.updatecoeffsdispersive_dev,
self.grid.Tx_dev,
self.grid.Ty_dev,
self.grid.Tz_dev,
self.grid.ID_dev,
self.grid.Ex_dev,
self.grid.Ey_dev,
self.grid.Ez_dev,
self.grid.Hx_dev,
self.grid.Hy_dev,
self.grid.Hz_dev)
event.wait()
self.compute_time += (event.profile.end - event.profile.start)*1e-9
def time_start(self):
pass
def calculate_memsolve(self, iteration):
"""Calculate memory used on last iteration.
Args:
iteration: int of iteration number.
Returns:
Memory (RAM) used on compute device.
"""
# if iteration == self.grid.iterations - 1:
# return self.drv.mem_get_info()[1] - self.drv.mem_get_info()[0]
logger.debug('Look at memory estimate for pyopencl')
pass
def calculate_tsolve(self):
"""Calculate solving time for model."""
return self.compute_time
def finalise(self):
"""Copy data from compute device back to CPU to save to file(s)."""
# Copy output from receivers array back to correct receiver objects
if self.grid.rxs:
dtoh_rx_array(self.rxs_dev.get(), self.rxcoords_dev.get(), self.grid)
# Copy data from any snapshots back to correct snapshot objects
if self.grid.snapshots and not config.get_model_config().device['snapsgpu2cpu']:
for i, snap in enumerate(self.grid.snapshots):
dtoh_snapshot_array(self.snapEx_dev.get(),
self.snapEy_dev.get(),
self.snapEz_dev.get(),
self.snapHx_dev.get(),
self.snapHy_dev.get(),
self.snapHz_dev.get(),
i, snap)
def cleanup(self):
"""Cleanup compute device context."""
logger.debug('Check if pyopencl needs explicit cleanup.')
# Remove context from top of stack and delete
# self.ctx.pop()
# del self.ctx

查看文件

@@ -26,7 +26,7 @@ import sys
import gprMax.config as config
import psutil
from .utilities import human_size
from .utilities import get_terminal_width, human_size
logger = logging.getLogger(__name__)
@@ -35,9 +35,9 @@ def get_host_info():
"""Get information about the machine, CPU, RAM, and OS.
Returns:
hostinfo (dict): Manufacturer and model of machine; description of CPU
type, speed, cores; RAM; name and
version of operating system.
hostinfo: dict containing manufacturer and model of machine;
description of CPU type, speed, cores; RAM; name and
version of operating system.
"""
# Default to 'unknown' if any of the detection fails
@@ -47,13 +47,17 @@ def get_host_info():
if sys.platform == 'win32':
# Manufacturer/model
try:
manufacturer = subprocess.check_output("wmic csproduct get vendor", shell=True, stderr=subprocess.STDOUT).decode('utf-8').strip()
manufacturer = subprocess.check_output("wmic csproduct get vendor",
shell=True,
stderr=subprocess.STDOUT).decode('utf-8').strip()
manufacturer = manufacturer.split('\n')
if len(manufacturer) > 1:
manufacturer = manufacturer[1]
else:
manufacturer = manufacturer[0]
model = subprocess.check_output("wmic computersystem get model", shell=True, stderr=subprocess.STDOUT).decode('utf-8').strip()
model = subprocess.check_output("wmic computersystem get model",
shell=True,
stderr=subprocess.STDOUT).decode('utf-8').strip()
model = model.split('\n')
if len(model) > 1:
model = model[1]
@@ -61,16 +65,19 @@ def get_host_info():
model = model[0]
except subprocess.CalledProcessError:
pass
machineID = manufacturer + ' ' + model
machineID = ' '.join(manufacturer.split()) + ' ' + ' '.join(model.split())
# CPU information
try:
allcpuinfo = subprocess.check_output("wmic cpu get Name", shell=True, stderr=subprocess.STDOUT).decode('utf-8').strip()
allcpuinfo = subprocess.check_output("wmic cpu get Name",
shell=True,
stderr=subprocess.STDOUT).decode('utf-8').strip()
allcpuinfo = allcpuinfo.split('\n')
sockets = 0
for line in allcpuinfo:
if 'CPU' in line:
cpuID = line.strip()
cpuID = ' '.join(cpuID.split())
sockets += 1
except subprocess.CalledProcessError:
pass
@@ -93,16 +100,21 @@ def get_host_info():
# Manufacturer/model
manufacturer = 'Apple'
try:
model = subprocess.check_output("sysctl -n hw.model", shell=True, stderr=subprocess.STDOUT).decode('utf-8').strip()
model = subprocess.check_output("sysctl -n hw.model", shell=True,
stderr=subprocess.STDOUT).decode('utf-8').strip()
except subprocess.CalledProcessError:
pass
machineID = manufacturer + ' ' + model
machineID = ' '.join(manufacturer.split()) + ' ' + ' '.join(model.split())
# CPU information
try:
sockets = subprocess.check_output("sysctl -n hw.packages", shell=True, stderr=subprocess.STDOUT).decode('utf-8').strip()
sockets = subprocess.check_output("sysctl -n hw.packages",
shell=True,
stderr=subprocess.STDOUT).decode('utf-8').strip()
sockets = int(sockets)
cpuID = subprocess.check_output("sysctl -n machdep.cpu.brand_string", shell=True, stderr=subprocess.STDOUT).decode('utf-8').strip()
cpuID = subprocess.check_output("sysctl -n machdep.cpu.brand_string",
shell=True,
stderr=subprocess.STDOUT).decode('utf-8').strip()
cpuID = ' '.join(cpuID.split())
except subprocess.CalledProcessError:
pass
@@ -123,21 +135,30 @@ def get_host_info():
elif sys.platform == 'linux':
# Manufacturer/model
try:
manufacturer = subprocess.check_output("cat /sys/class/dmi/id/sys_vendor", shell=True, stderr=subprocess.STDOUT).decode('utf-8').strip()
model = subprocess.check_output("cat /sys/class/dmi/id/product_name", shell=True, stderr=subprocess.STDOUT).decode('utf-8').strip()
manufacturer = subprocess.check_output("cat /sys/class/dmi/id/sys_vendor",
shell=True,
stderr=subprocess.STDOUT).decode('utf-8').strip()
model = subprocess.check_output("cat /sys/class/dmi/id/product_name",
shell=True,
stderr=subprocess.STDOUT).decode('utf-8').strip()
except subprocess.CalledProcessError:
pass
machineID = manufacturer + ' ' + model
machineID = ' '.join(manufacturer.split()) + ' ' + ' '.join(model.split())
# CPU information
try:
# Locale to ensure English
myenv = {**os.environ, 'LANG': 'en_US.utf8'}
cpuIDinfo = subprocess.check_output("cat /proc/cpuinfo", shell=True, stderr=subprocess.STDOUT, env=myenv).decode('utf-8').strip()
cpuIDinfo = subprocess.check_output("cat /proc/cpuinfo", shell=True,
stderr=subprocess.STDOUT,
env=myenv).decode('utf-8').strip()
for line in cpuIDinfo.split('\n'):
if re.search('model name', line):
cpuID = re.sub('.*model name.*:', '', line, 1).strip()
allcpuinfo = subprocess.check_output("lscpu", shell=True, stderr=subprocess.STDOUT, env=myenv).decode('utf-8').strip()
cpuID = ' '.join(cpuID.split())
allcpuinfo = subprocess.check_output("lscpu", shell=True,
stderr=subprocess.STDOUT,
env=myenv).decode('utf-8').strip()
for line in allcpuinfo.split('\n'):
if 'Socket(s)' in line:
sockets = int(line.strip()[-1])
@@ -177,11 +198,31 @@ def get_host_info():
return hostinfo
def print_host_info(hostinfo):
"""Print information about the machine, CPU, RAM, and OS.
Args:
hostinfo: dict containing manufacturer and model of machine;
description of CPU type, speed, cores; RAM; name and
version of operating system.
"""
hyperthreadingstr = (f", {config.sim_config.hostinfo['logicalcores']} "
f"cores with Hyper-Threading" if config.sim_config.hostinfo['hyperthreading'] else '')
logger.basic(f"\n{config.sim_config.hostinfo['hostname']} | "
f"{config.sim_config.hostinfo['machineID']} "
f"{hostinfo['sockets']} x {hostinfo['cpuID']} "
f"({hostinfo['physicalcores']} cores{hyperthreadingstr}) | "
f"{human_size(hostinfo['ram'], a_kilobyte_is_1024_bytes=True)} | "
f"{hostinfo['osversion']}")
logger.basic(f"|--->OpenMP: {hostinfo['physicalcores']} threads")
def set_omp_threads(nthreads=None):
"""Sets the number of OpenMP CPU threads for parallelised parts of code.
Returns:
nthreads (int): Number of OpenMP threads.
nthreads: int for number of OpenMP threads.
"""
if sys.platform == 'darwin':
@@ -228,29 +269,39 @@ def mem_check_host(mem):
"""Check if the required amount of memory (RAM) is available on host.
Args:
mem (int): Memory required (bytes).
mem: int for memory required (bytes).
"""
if mem > config.sim_config.hostinfo['ram']:
logger.exception(f"Memory (RAM) required ~{human_size(mem)} exceeds {human_size(config.sim_config.hostinfo['ram'], a_kilobyte_is_1024_bytes=True)} detected!\n")
logger.exception(f"Memory (RAM) required ~{human_size(mem)} exceeds "
f"{human_size(config.sim_config.hostinfo['ram'], a_kilobyte_is_1024_bytes=True)} "
"detected!\n")
raise ValueError
def mem_check_gpu_snaps(total_mem, snaps_mem):
def mem_check_device_snaps(total_mem, snaps_mem):
"""Check if the required amount of memory (RAM) for all snapshots can fit
on specified GPU.
on specified device.
Args:
total_mem (int): Total memory required for model (bytes).
snaps_mem (int): Memory required for all snapshots (bytes).
total_mem: int for total memory required for model (bytes).
snaps_mem: int for memory required for all snapshots (bytes).
"""
if total_mem - snaps_mem > config.get_model_config().cuda['gpu'].totalmem:
logger.exception(f"Memory (RAM) required ~{human_size(total_mem)} exceeds {human_size(config.get_model_config().cuda['gpu'].totalmem, a_kilobyte_is_1024_bytes=True)} detected on specified {config.get_model_config().cuda['gpu'].deviceID} - {config.get_model_config().cuda['gpu'].name} GPU!\n")
if config.sim_config.general['solver'] == 'cuda':
device_mem = config.get_model_config().device['dev'].total_memory()
elif config.sim_config.general['solver'] == 'opencl':
device_mem = config.get_model_config().device['dev'].global_mem_size
if total_mem - snaps_mem > device_mem:
logger.exception(f"Memory (RAM) required ~{human_size(total_mem)} exceeds "
f"{human_size(device_mem, a_kilobyte_is_1024_bytes=True)} "
f"detected on specified {' '.join(config.get_model_config().device['dev'].name.split())} device!\n")
raise ValueError
# If the required memory without the snapshots will fit on the GPU then
# transfer and store snaphots on host
if snaps_mem != 0 and total_mem - snaps_mem < config.get_model_config().cuda['gpu'].totalmem:
config.get_model_config().cuda['snapsgpu2cpu'] = True
if snaps_mem != 0 and total_mem - snaps_mem < device_mem:
config.get_model_config().device['snapsgpu2cpu'] = True
def mem_check_all(grids):
@@ -259,12 +310,12 @@ def mem_check_all(grids):
memory.
Args:
grids (list): FDTDGrid objects.
grids: list of FDTDGrid objects.
Returns:
total_mem (int): Total memory required for all grids.
mem_strs (list): Strings containing text of memory requirements for
each grid.
total_mem: int for total memory required for all grids.
mem_str: list of strings containing text of memory requirements for
each grid.
"""
total_snaps_mem = 0
@@ -297,59 +348,56 @@ def mem_check_all(grids):
mem_check_host(total_mem)
# Check if there is sufficient memory for any snapshots on GPU
if total_snaps_mem > 0 and config.sim_config.general['cuda']:
mem_check_gpu_snaps(total_mem, total_snaps_mem)
if (total_snaps_mem > 0 and config.sim_config.general['solver'] == 'cuda' or
config.sim_config.general['solver'] == 'opencl'):
mem_check_device_snaps(total_mem, total_snaps_mem)
return total_mem, mem_strs
class GPU:
"""GPU information."""
def has_pycuda():
"""Check if pycuda module is installed."""
pycuda = True
try:
import pycuda
except ImportError:
pycuda = False
return pycuda
def __init__(self):
self.deviceID = None
self.name = None
self.pcibusID = None
self.constmem = None
self.totalmem = None
def get_cuda_gpu_info(self, drv, deviceID):
"""Set information about GPU.
Args:
drv (object): pycuda driver.
deviceID (int): Device ID for GPU.
"""
self.deviceID = deviceID
self.name = drv.Device(self.deviceID).name()
self.pcibusID = drv.Device(self.deviceID).pci_bus_id()
self.constmem = drv.Device(self.deviceID).total_constant_memory
self.totalmem = drv.Device(self.deviceID).total_memory()
def has_pyopencl():
"""Check if pyopencl module is installed."""
pyopencl = True
try:
import pyopencl
except ImportError:
pyopencl = False
return pyopencl
def detect_cuda_gpus():
"""Get information about Nvidia GPU(s).
"""Get information about CUDA-capable GPU(s).
Returns:
gpus (list): Detected GPU(s) object(s).
gpus: dict of detected pycuda device object(s) where where device ID(s)
are keys.
"""
try:
gpus = {}
cuda_reqs = ('To use gprMax with CUDA you must:'
'\n 1) install pycuda'
'\n 2) install NVIDIA CUDA Toolkit (https://developer.nvidia.com/cuda-toolkit)'
'\n 3) have an NVIDIA CUDA-Enabled GPU (https://developer.nvidia.com/cuda-gpus)')
if has_pycuda():
import pycuda.driver as drv
has_pycuda = True
except ImportError:
logger.warning('pycuda not detected - to use gprMax in GPU mode the pycuda package must be installed, and you must have a NVIDIA CUDA-Enabled GPU (https://developer.nvidia.com/cuda-gpus).')
has_pycuda = False
if has_pycuda:
drv.init()
# Check and list any CUDA-Enabled GPUs
deviceIDsavail = []
if drv.Device.count() == 0:
logger.exception('No NVIDIA CUDA-Enabled GPUs detected (https://developer.nvidia.com/cuda-gpus)')
raise ValueError
logger.warning('No NVIDIA CUDA-Enabled GPUs detected!\n' + cuda_reqs)
elif 'CUDA_VISIBLE_DEVICES' in os.environ:
deviceIDsavail = os.environ.get('CUDA_VISIBLE_DEVICES')
deviceIDsavail = [int(s) for s in deviceIDsavail.split(',')]
@@ -357,33 +405,88 @@ def detect_cuda_gpus():
deviceIDsavail = range(drv.Device.count())
# Gather information about detected GPUs
gpus = []
for ID in deviceIDsavail:
gpu = GPU()
gpu.get_cuda_gpu_info(drv, ID)
gpus.append(gpu)
gpus[ID] = drv.Device(ID)
else:
gpus = None
logger.warning('pycuda not detected!\n' + cuda_reqs)
return gpus
def print_cuda_info(devs):
""""Print info about detected CUDA-capable GPU(s).
Args:
devs: dict of detected pycuda device object(s) where where device ID(s)
are keys.
"""""
import pycuda
logger.basic('|--->CUDA:')
logger.debug(f'PyCUDA: {pycuda.VERSION_TEXT}')
for ID, gpu in devs.items():
logger.basic(f" |--->Device {ID}: {' '.join(gpu.name.split())} | "
f"{human_size(gpu.total_memory(), a_kilobyte_is_1024_bytes=True)}")
def detect_opencl():
"""Get information about OpenCL platforms and devices.
Returns:
gpus (list): Detected GPU(s) object(s).
devs: dict of detected pyopencl device object(s) where where device ID(s)
are keys.
"""
try:
import pyopencl as cl
has_pyopencl = True
except ImportError:
logger.warning('pyopencl not detected - to use gprMax with OpenCL, the pyopencl package must be installed, and you must have at least one OpenCL capable platform.')
has_pyopencl = False
devs = {}
if has_pyopencl:
platforms = cl.get_platforms()
platform_names = [p.name for p in platforms]
logger.info(platform_names)
ocl_reqs = ('To use gprMax with OpenCL you must:'
'\n 1) install pyopencl'
'\n 2) install appropriate OpenCL device driver(s)'
'\n 3) have at least one OpenCL-capable platform.')
if has_pyopencl():
import pyopencl as cl
try:
i = 0
for platform in cl.get_platforms():
for device in platform.get_devices():
devs[i] = device
i += 1
except:
logger.warning('No OpenCL-capable platforms detected!\n' + ocl_reqs)
else:
logger.warning('pyopencl not detected!\n' + ocl_reqs)
return devs
def print_opencl_info(devs):
""""Print info about detected OpenCL-capable device(s).
Args:
devs: dict of detected pyopencl device object(s) where where device ID(s)
are keys.
"""""
import pyopencl as cl
logger.basic('|--->OpenCL:')
logger.debug(f'PyOpenCL: {cl.VERSION_TEXT}')
for i, (ID, dev) in enumerate(devs.items()):
if i == 0:
platform = dev.platform.name
logger.basic(f' |--->Platform: {platform}')
if not platform == dev.platform.name:
logger.basic(f' |--->Platform: {dev.platform.name}')
types = cl.device_type.to_string(dev.type)
if 'CPU' in types:
type = 'CPU'
if 'GPU' in types:
type = 'GPU'
logger.basic(f" |--->Device {ID}: {type} | {' '.join(dev.name.split())} | "
f"{human_size(dev.global_mem_size, a_kilobyte_is_1024_bytes=True)}")

查看文件

@@ -50,7 +50,7 @@ def build_dispersive_material_templates():
env = Environment(loader = FileSystemLoader(os.path.join('gprMax', 'cython')), )
template = env.get_template('fields_updates_dispersive_template')
template = env.get_template('fields_updates_dispersive_template.jinja')
# Render dispersive template for different types
r = template.render(
@@ -159,7 +159,7 @@ if 'cleanall' in sys.argv:
shutil.rmtree(p, ignore_errors=True)
print(f'Removed: {p}')
# Remove 'gprMax/cython/fields_updates_dispersive.pyx' if its there
# Remove 'gprMax/cython/fields_updates_dispersive.jinja' if its there
if os.path.isfile(cython_disp_file):
os.remove(cython_disp_file)

50
tools/get_host_spec.py 普通文件
查看文件

@@ -0,0 +1,50 @@
# Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom
# Authors: Craig Warren, Antonis Giannopoulos, and John Hartley
#
# This file is part of gprMax.
#
# gprMax is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# gprMax is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with gprMax. If not, see <http://www.gnu.org/licenses/>.
from gprMax.utilities.host_info import (detect_cuda_gpus, detect_opencl,
get_host_info, print_cuda_info,
print_opencl_info)
from gprMax.utilities.utilities import get_terminal_width, human_size
# Host machine info.
hostinfo = get_host_info()
hyperthreadingstr = f", {hostinfo['logicalcores']} cores with Hyper-Threading" if hostinfo['hyperthreading'] else ''
hostname = (f"\n=== {hostinfo['hostname']}")
print(f"{hostname} {'=' * (get_terminal_width() - len(hostname) - 1)}")
print(f"\n{'Mfr/model:':<12} {hostinfo['machineID']}")
print(f"{'CPU:':<12} {hostinfo['sockets']} x {hostinfo['cpuID']} ({hostinfo['physicalcores']} cores{hyperthreadingstr})")
print(f"{'RAM:':<12} {human_size(hostinfo['ram'], a_kilobyte_is_1024_bytes=True)}")
print(f"{'OS/Version:':<12} {hostinfo['osversion']}")
# OpenMP
print("\n\n=== OpenMP capabilities (gprMax will not use Hyper-Threading with OpenMP as there is no performance advantage)\n")
print(f"{'OpenMP threads: '} {hostinfo['physicalcores']}")
# CUDA
print("\n\n=== CUDA capabilities\n")
gpus = detect_cuda_gpus()
if gpus:
print_cuda_info(gpus)
# OpenCL
print("\n\n=== OpenCL capabilities\n")
devs = detect_opencl()
if devs:
print_opencl_info(devs)
print(f"\n{'=' * (get_terminal_width() - 1)}\n")