Work to implement OpenCL solver - temporarily breaks CUDA

这个提交包含在:
Craig Warren
2022-02-19 16:52:52 +00:00
父节点 0203d03cae
当前提交 cc0e30900b
共有 52 个文件被更改,包括 10369 次插入739 次删除

查看文件

@@ -28,7 +28,7 @@ from scipy.constants import c
from scipy.constants import epsilon_0 as e0 from scipy.constants import epsilon_0 as e0
from scipy.constants import mu_0 as m0 from scipy.constants import mu_0 as m0
from .utilities.host_info import detect_cuda_gpus, get_host_info from .utilities.host_info import detect_cuda_gpus, detect_opencl, get_host_info
from .utilities.utilities import get_terminal_width from .utilities.utilities import get_terminal_width
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -61,15 +61,21 @@ class ModelConfig:
self.grids = [] self.grids = []
self.ompthreads = None self.ompthreads = None
# Store information for CUDA solver # Store information for CUDA or OpenCL solver
# gpu: GPU object # dev: compute device object.
# snapsgpu2cpu: copy snapshot data from GPU to CPU during simulation # snapsgpu2cpu: copy snapshot data from GPU to CPU during simulation.
# N.B. This will happen if the requested snapshots are too large to fit # N.B. This will happen if the requested snapshots are too large to
# on the memory of the GPU. If True this will slow performance significantly # fit on the memory of the GPU. If True this will slow
if sim_config.general['cuda']: # performance significantly.
# If a list of lists of GPU deviceIDs is found, flatten it if sim_config.general['solver'] == 'cuda' or sim_config.general['solver'] == 'opencl':
if any(isinstance(element, list) for element in sim_config.args.gpu): if sim_config.general['solver'] == 'cuda':
deviceID = [val for sublist in sim_config.args.gpu for val in sublist] devs = sim_config.args.gpu
elif sim_config.general['solver'] == 'opencl':
devs = sim_config.args.opencl
# If a list of lists of deviceIDs is found, flatten it
if any(isinstance(element, list) for element in devs):
deviceID = [val for sublist in devs for val in sublist]
# If no deviceID is given default to using deviceID 0. Else if either # If no deviceID is given default to using deviceID 0. Else if either
# a single deviceID or list of deviceIDs is given use first one. # a single deviceID or list of deviceIDs is given use first one.
@@ -78,7 +84,7 @@ class ModelConfig:
except: except:
deviceID = 0 deviceID = 0
self.cuda = {'gpu': sim_config.set_model_gpu(deviceID), self.device = {'dev': sim_config.set_model_device(deviceID),
'snapsgpu2cpu': False} 'snapsgpu2cpu': False}
# Total memory usage for all grids in the model. Starts with 50MB overhead. # Total memory usage for all grids in the model. Starts with 50MB overhead.
@@ -88,29 +94,34 @@ class ModelConfig:
self.reuse_geometry = False self.reuse_geometry = False
# String to print at start of each model run # String to print at start of each model run
s = f'\n--- Model {model_num + 1}/{sim_config.model_end}, input file: {sim_config.input_file_path}' s = (f'\n--- Model {model_num + 1}/{sim_config.model_end}, '
self.inputfilestr = Fore.GREEN + f"{s} {'-' * (get_terminal_width() - 1 - len(s))}\n" + Style.RESET_ALL f'input file: {sim_config.input_file_path}')
self.inputfilestr = (Fore.GREEN + f"{s} {'-' * (get_terminal_width() - 1 - len(s))}\n" +
Style.RESET_ALL)
# Output file path and name for specific model # Output file path and name for specific model
self.appendmodelnumber = '' if sim_config.single_model else str(model_num + 1) # Indexed from 1 self.appendmodelnumber = '' if sim_config.single_model else str(model_num + 1) # Indexed from 1
self.set_output_file_path() self.set_output_file_path()
# Numerical dispersion analysis parameters # Numerical dispersion analysis parameters
# highestfreqthres: threshold (dB) down from maximum power (0dB) of main frequency used # highestfreqthres: threshold (dB) down from maximum power (0dB) of
# to calculate highest frequency for numerical dispersion analysis # main frequency used to calculate highest
# maxnumericaldisp: maximum allowable percentage physical phase-velocity phase error # frequency for numerical dispersion analysis.
# mingridsampling: minimum grid sampling of smallest wavelength for physical wave propagation # maxnumericaldisp: maximum allowable percentage physical
# phase-velocity phase error.
# mingridsampling: minimum grid sampling of smallest wavelength for
# physical wave propagation.
self.numdispersion = {'highestfreqthres': 40, self.numdispersion = {'highestfreqthres': 40,
'maxnumericaldisp': 2, 'maxnumericaldisp': 2,
'mingridsampling': 3} 'mingridsampling': 3}
# General information to configure materials # General information to configure materials
# maxpoles: Maximum number of dispersive material poles in a model # maxpoles: Maximum number of dispersive material poles in a model.
# dispersivedtype: Data type for dispersive materials # dispersivedtype: Data type for dispersive materials.
# dispersiveCdtype: Data type for dispersive materials in Cython # dispersiveCdtype: Data type for dispersive materials in Cython.
# drudelorentz: True/False model contains Drude or Lorentz materials # drudelorentz: True/False model contains Drude or Lorentz materials.
# cudarealfunc: String to substitute into CUDA kernels for fields # cudarealfunc: String to substitute into CUDA kernels for fields
# dependent on dispersive material type # dependent on dispersive material type.
self.materials = {'maxpoles': 0, self.materials = {'maxpoles': 0,
'dispersivedtype': None, 'dispersivedtype': None,
'dispersiveCdtype': None, 'dispersiveCdtype': None,
@@ -123,32 +134,32 @@ class ModelConfig:
else: return None else: return None
def get_usernamespace(self): def get_usernamespace(self):
return {'c': c, # Speed of light in free space (m/s) tmp = {'number_model_runs': sim_config.model_end,
'e0': e0, # Permittivity of free space (F/m)
'm0': m0, # Permeability of free space (H/m)
'z0': np.sqrt(m0 / e0), # Impedance of free space (Ohms)
'number_model_runs': sim_config.model_end,
'current_model_run': model_num + 1, 'current_model_run': model_num + 1,
'inputfile': sim_config.input_file_path.resolve()} 'inputfile': sim_config.input_file_path.resolve()}
return dict(**sim_config.em_consts, **tmp)
def set_dispersive_material_types(self): def set_dispersive_material_types(self):
"""Set data type for disperive materials. Complex if Drude or Lorentz """Set data type for disperive materials. Complex if Drude or Lorentz
materials are present. Real if Debye materials. materials are present. Real if Debye materials.
""" """
if self.materials['drudelorentz']: if self.materials['drudelorentz']:
self.materials['cudarealfunc'] = '.real()' self.materials['crealfunc'] = '.real()'
self.materials['dispersivedtype'] = sim_config.dtypes['complex'] self.materials['dispersivedtype'] = sim_config.dtypes['complex']
self.materials['dispersiveCdtype'] = sim_config.dtypes['C_complex'] self.materials['dispersiveCdtype'] = sim_config.dtypes['C_complex']
else: else:
self.materials['crealfunc'] = ''
self.materials['dispersivedtype'] = sim_config.dtypes['float_or_double'] self.materials['dispersivedtype'] = sim_config.dtypes['float_or_double']
self.materials['dispersiveCdtype'] = sim_config.dtypes['C_float_or_double'] self.materials['dispersiveCdtype'] = sim_config.dtypes['C_float_or_double']
def set_output_file_path(self, outputdir=None): def set_output_file_path(self, outputdir=None):
"""Output file path can be provided by the user via the API or an input file """Output file path can be provided by the user via the API or an input
command. If they haven't provided one use the input file path instead. file command. If they haven't provided one use the input file path
instead.
Args: Args:
outputdir (str): Output file directory given from input file command. outputdir: string of output file directory given by input file command.
""" """
if not outputdir: if not outputdir:
@@ -171,7 +182,7 @@ class ModelConfig:
"""Set directory to store any snapshots. """Set directory to store any snapshots.
Returns: Returns:
snapshot_dir (Path): directory to store snapshot files in. snapshot_dir: Path to directory to store snapshot files in.
""" """
parts = self.output_file_path.with_suffix('').parts parts = self.output_file_path.with_suffix('').parts
snapshot_dir = Path(*parts[:-1], parts[-1] + '_snaps') snapshot_dir = Path(*parts[:-1], parts[-1] + '_snaps')
@@ -187,7 +198,7 @@ class SimulationConfig:
def __init__(self, args): def __init__(self, args):
""" """
Args: Args:
args (Namespace): Arguments from either API or CLI. args: Namespace with arguments from either API or CLI.
""" """
self.args = args self.args = args
@@ -196,17 +207,19 @@ class SimulationConfig:
logger.exception('The geometry fixed option cannot be used with MPI.') logger.exception('The geometry fixed option cannot be used with MPI.')
raise ValueError raise ValueError
# General settings for the simulation if args.gpu and args.opencl:
# inputfilepath: path to inputfile location logger.exception('You cannot use both CUDA and OpenCl simultaneously.')
# outputfilepath: path to outputfile location raise ValueError
# progressbars: whether to show progress bars on stdoout or not
# cpu, cuda, opencl: solver type
# subgrid: whether the simulation uses sub-grids
# precision: data type for electromagnetic field output (single/double)
self.general = {'cpu': True, # General settings for the simulation
'cuda': False, # inputfilepath: path to inputfile location.
'opencl': False, # outputfilepath: path to outputfile location.
# progressbars: whether to show progress bars on stdoout or not.
# solver: cpu, cuda, opencl.
# subgrid: whether the simulation uses sub-grids.
# precision: data type for electromagnetic field output (single/double).
self.general = {'solver': 'cpu',
'subgrid': False, 'subgrid': False,
'precision': 'single'} 'precision': 'single'}
@@ -222,29 +235,37 @@ class SimulationConfig:
# Store information about host machine # Store information about host machine
self.hostinfo = get_host_info() self.hostinfo = get_host_info()
# Information about any Nvidia GPUs # CUDA
if self.args.gpu is not None: if self.args.gpu is not None:
self.general['cuda'] = True self.general['solver'] = 'cuda'
self.general['cpu'] = False
self.general['opencl'] = False
# Both single and double precision are possible on GPUs, but single # Both single and double precision are possible on GPUs, but single
# provides best performance. # provides best performance.
self.general['precision'] = 'single' self.general['precision'] = 'single'
self.cuda = {'gpus': [], # gpus: list of GPU objects self.devices = {'devs': [], # devs: list of pycuda device objects
'nvcc_opts': None} # nvcc_opts: nvcc compiler options 'nvcc_opts': None} # nvcc_opts: nvcc compiler options
# Suppress nvcc warnings on Microsoft Windows # Suppress nvcc warnings on Microsoft Windows
if sys.platform == 'win32': self.cuda['nvcc_opts'] = ['-w'] if sys.platform == 'win32': self.cuda['nvcc_opts'] = ['-w']
# List of GPU objects of available GPUs # Add pycuda available GPU(s)
self.cuda['gpus'] = detect_cuda_gpus() self.devices['devs'] = detect_cuda_gpus()
# OpenCL
if self.args.opencl is not None:
self.general['solver'] = 'opencl'
self.general['precision'] = 'single'
# List of pyopencl available device(s)
self.devices = {'devs': []}
self.devices['devs'] = detect_opencl()
# Subgrid parameter may not exist if user enters via CLI # Subgrid parameter may not exist if user enters via CLI
try: try:
self.general['subgrid'] = self.args.subgrid self.general['subgrid'] = self.args.subgrid
# Double precision should be used with subgrid for best accuracy # Double precision should be used with subgrid for best accuracy
self.general['precision'] = 'double' self.general['precision'] = 'double'
if self.general['subgrid'] and self.general['cuda']: if ((self.general['subgrid'] and self.general['cuda']) or
logger.exception('The CUDA-based solver cannot currently be used with models that contain sub-grids.') (self.general['subgrid'] and self.general['opencl'])):
logger.exception('You cannot currently use CUDA or OpenCL-based '
'solvers with models that contain sub-grids.')
raise ValueError raise ValueError
except AttributeError: except AttributeError:
self.general['subgrid'] = False self.general['subgrid'] = False
@@ -262,34 +283,35 @@ class SimulationConfig:
self._set_model_start_end() self._set_model_start_end()
self._set_single_model() self._set_single_model()
def set_model_gpu(self, deviceID): def set_model_device(self, deviceID):
"""Specify GPU object for model. """Specify pycuda/pyopencl object for model.
Args: Args:
deviceID (int): Requested deviceID of GPU deviceID: int of requested deviceID of compute device.
Returns: Returns:
gpu (GPU object): Requested GPU object. dev: requested pycuda/pyopencl device object.
""" """
found = False found = False
for gpu in self.cuda['gpus']: for ID, dev in self.devices['devs'].items():
if gpu.deviceID == deviceID: if ID == deviceID:
found = True found = True
return gpu return dev
if not found: if not found:
logger.exception(f'GPU with device ID {deviceID} does not exist') logger.exception(f'Compute device with device ID {deviceID} does '
'not exist.')
raise ValueError raise ValueError
def _set_precision(self): def _set_precision(self):
"""Data type (precision) for electromagnetic field output. """Data type (precision) for electromagnetic field output.
Solid and ID arrays use 32-bit integers (0 to 4294967295) Solid and ID arrays use 32-bit integers (0 to 4294967295).
Rigid arrays use 8-bit integers (the smallest available type to store true/false) Rigid arrays use 8-bit integers (the smallest available type to store true/false).
Fractal arrays use complex numbers Fractal arrays use complex numbers.
Dispersive coefficient arrays use either float or complex numbers Dispersive coefficient arrays use either float or complex numbers.
Main field arrays use floats Main field arrays use floats.
""" """
if self.general['precision'] == 'single': if self.general['precision'] == 'single':
@@ -298,16 +320,25 @@ class SimulationConfig:
'cython_float_or_double': cython.float, 'cython_float_or_double': cython.float,
'cython_complex': cython.floatcomplex, 'cython_complex': cython.floatcomplex,
'C_float_or_double': 'float', 'C_float_or_double': 'float',
'C_complex': 'pycuda::complex<float>', 'C_complex': None,
'vtk_float': 'Float32'} 'vtk_float': 'Float32'}
if self.general['solver'] == 'cuda':
self.dtypes['C_complex'] = 'pycuda::complex<float>'
elif self.general['solver'] == 'opencl':
self.dtypes['C_complex'] = 'cfloat'
elif self.general['precision'] == 'double': elif self.general['precision'] == 'double':
self.dtypes = {'float_or_double': np.float64, self.dtypes = {'float_or_double': np.float64,
'complex': np.complex128, 'complex': np.complex128,
'cython_float_or_double': cython.double, 'cython_float_or_double': cython.double,
'cython_complex': cython.doublecomplex, 'cython_complex': cython.doublecomplex,
'C_float_or_double': 'double', 'C_float_or_double': 'double',
'C_complex': 'pycuda::complex<double>', 'C_complex': None,
'vtk_float': 'Float64'} 'vtk_float': 'Float64'}
if self.general['solver'] == 'cuda':
self.dtypes['C_complex'] = 'pycuda::complex<double>'
elif self.general['solver'] == 'opencl':
self.dtypes['C_complex'] = 'cdouble'
def _get_byteorder(self): def _get_byteorder(self):
"""Check the byte order of system to use for VTK files, i.e. geometry """Check the byte order of system to use for VTK files, i.e. geometry

查看文件

@@ -25,18 +25,23 @@ import gprMax.config as config
from ._version import __version__, codename from ._version import __version__, codename
from .model_build_run import ModelBuildRun from .model_build_run import ModelBuildRun
from .solvers import create_G, create_solver from .solvers import create_G, create_solver
from .utilities.utilities import get_terminal_width, human_size, logo, timer from .utilities.host_info import (detect_cuda_gpus, detect_opencl,
print_cuda_info, print_host_info,
print_opencl_info)
from .utilities.utilities import get_terminal_width, logo, timer
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
class Context: class Context:
"""Standard context - models are run one after another and each model """Standard context - models are run one after another and each model
can exploit parallelisation using either OpenMP (CPU) or CUDA (GPU). can exploit parallelisation using either OpenMP (CPU), CUDA (GPU), or
OpenCL (CPU/GPU).
""" """
def __init__(self): def __init__(self):
self.model_range = range(config.sim_config.model_start, config.sim_config.model_end) self.model_range = range(config.sim_config.model_start,
config.sim_config.model_end)
self.tsimend = None self.tsimend = None
self.tsimstart = None self.tsimstart = None
@@ -44,9 +49,11 @@ class Context:
"""Run the simulation in the correct context.""" """Run the simulation in the correct context."""
self.tsimstart = timer() self.tsimstart = timer()
self.print_logo_copyright() self.print_logo_copyright()
self.print_host_info() print_host_info(config.sim_config.hostinfo)
if config.sim_config.general['cuda']: if config.sim_config.general['solver'] == 'cuda':
self.print_gpu_info() print_cuda_info(config.sim_config.devices['devs'])
elif config.sim_config.general['solver'] == 'opencl':
print_opencl_info(config.sim_config.devices['devs'])
# Clear list of model configs. It can be retained when gprMax is # Clear list of model configs. It can be retained when gprMax is
# called in a loop, and want to avoid this. # called in a loop, and want to avoid this.
@@ -79,33 +86,23 @@ class Context:
logo_copyright = logo(__version__ + ' (' + codename + ')') logo_copyright = logo(__version__ + ' (' + codename + ')')
logger.basic(logo_copyright) logger.basic(logo_copyright)
def print_host_info(self):
"""Print information about the host machine."""
hyperthreadingstr = f", {config.sim_config.hostinfo['logicalcores']} cores with Hyper-Threading" if config.sim_config.hostinfo['hyperthreading'] else ''
logger.basic(f"\nHost: {config.sim_config.hostinfo['hostname']} | {config.sim_config.hostinfo['machineID']} | {config.sim_config.hostinfo['sockets']} x {config.sim_config.hostinfo['cpuID']} ({config.sim_config.hostinfo['physicalcores']} cores{hyperthreadingstr}) | {human_size(config.sim_config.hostinfo['ram'], a_kilobyte_is_1024_bytes=True)} RAM | {config.sim_config.hostinfo['osversion']}")
def print_gpu_info(self):
"""Print information about any NVIDIA CUDA GPUs detected."""
gpus_info = []
for gpu in config.sim_config.cuda['gpus']:
gpus_info.append(f'{gpu.deviceID} - {gpu.name}, {human_size(gpu.totalmem, a_kilobyte_is_1024_bytes=True)}')
logger.basic(f"GPU resources: {' | '.join(gpus_info)}")
def print_time_report(self): def print_time_report(self):
"""Print the total simulation time based on context.""" """Print the total simulation time based on context."""
s = f"\n=== Simulation completed in [HH:MM:SS]: {datetime.timedelta(seconds=self.tsimend - self.tsimstart)}" s = ("\n=== Simulation completed in [HH:MM:SS]: "
f"{datetime.timedelta(seconds=self.tsimend - self.tsimstart)}")
logger.basic(f"{s} {'=' * (get_terminal_width() - 1 - len(s))}\n") logger.basic(f"{s} {'=' * (get_terminal_width() - 1 - len(s))}\n")
class MPIContext(Context): class MPIContext(Context):
"""Mixed mode MPI/OpenMP/CUDA context - MPI task farm is used to distribute """Mixed mode MPI/OpenMP/CUDA context - MPI task farm is used to distribute
models, and each model parallelised using either OpenMP (CPU) models, and each model parallelised using either OpenMP (CPU),
or CUDA (GPU). CUDA (GPU), or OpenCL (CPU/GPU).
""" """
def __init__(self): def __init__(self):
super().__init__() super().__init__()
from mpi4py import MPI from mpi4py import MPI
from gprMax.mpi import MPIExecutor from gprMax.mpi import MPIExecutor
self.comm = MPI.COMM_WORLD self.comm = MPI.COMM_WORLD
@@ -149,7 +146,9 @@ class MPIContext(Context):
if executor.is_master(): if executor.is_master():
if config.sim_config.general['cuda']: if config.sim_config.general['cuda']:
if executor.size - 1 > len(config.sim_config.cuda['gpus']): if executor.size - 1 > len(config.sim_config.cuda['gpus']):
logger.exception('Not enough GPU resources for number of MPI tasks requested. Number of MPI tasks should be equal to number of GPUs + 1.') logger.exception('Not enough GPU resources for number of '
'MPI tasks requested. Number of MPI tasks '
'should be equal to number of GPUs + 1.')
raise ValueError raise ValueError
# Create job list # Create job list
@@ -175,7 +174,8 @@ class SPOTPYContext(Context):
(https://github.com/thouska/spotpy). SPOTPY coupling can utilise 2 levels (https://github.com/thouska/spotpy). SPOTPY coupling can utilise 2 levels
of MPI parallelism - where the top level is where SPOPTY optmisation of MPI parallelism - where the top level is where SPOPTY optmisation
algorithms can be parallelised, and the lower level is where gprMax algorithms can be parallelised, and the lower level is where gprMax
models can be parallelised using either OpenMP (CPU) or CUDA (GPU). models can be parallelised using either OpenMP (CPU), CUDA (GPU), or
OpenCL (CPU/GPU).
""" """
def __init__(self): def __init__(self):

查看文件

@@ -1,244 +0,0 @@
# Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom
# Authors: Craig Warren, Antonis Giannopoulos, and John Hartley
#
# This file is part of gprMax.
#
# gprMax is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# gprMax is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with gprMax. If not, see <http://www.gnu.org/licenses/>.
from string import Template
kernel_template_fields = Template("""
#include <pycuda-complex.hpp>
// Macros for converting subscripts to linear index:
#define INDEX2D_MAT(m, n) (m)*($NY_MATCOEFFS)+(n)
#define INDEX2D_MATDISP(m, n) (m)*($NY_MATDISPCOEFFS)+(n)
#define INDEX3D_FIELDS(i, j, k) (i)*($NY_FIELDS)*($NZ_FIELDS)+(j)*($NZ_FIELDS)+(k)
#define INDEX4D_ID(p, i, j, k) (p)*($NX_ID)*($NY_ID)*($NZ_ID)+(i)*($NY_ID)*($NZ_ID)+(j)*($NZ_ID)+(k)
#define INDEX4D_T(p, i, j, k) (p)*($NX_T)*($NY_T)*($NZ_T)+(i)*($NY_T)*($NZ_T)+(j)*($NZ_T)+(k)
// Material coefficients (read-only) in constant memory (64KB)_
__device__ __constant__ $REAL updatecoeffsE[$N_updatecoeffsE];
__device__ __constant__ $REAL updatecoeffsH[$N_updatecoeffsH];
///////////////////////////////////////////////
// Electric field updates - normal materials //
///////////////////////////////////////////////
__global__ void update_electric(int NX, int NY, int NZ, const unsigned int* __restrict__ ID, $REAL *Ex, $REAL *Ey, $REAL *Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz) {
// This function updates electric field values.
//
// Args:
// NX, NY, NZ: Number of cells of the model domain
// ID, E, H: Access to ID and field component arrays
// Obtain the linear index corresponding to the current thread
int idx = blockIdx.x * blockDim.x + threadIdx.x;
// Convert the linear index to subscripts for 3D field arrays
int i = idx / ($NY_FIELDS * $NZ_FIELDS);
int j = (idx % ($NY_FIELDS * $NZ_FIELDS)) / $NZ_FIELDS;
int k = (idx % ($NY_FIELDS * $NZ_FIELDS)) % $NZ_FIELDS;
// Convert the linear index to subscripts for 4D material ID array
int i_ID = (idx % ($NX_ID * $NY_ID * $NZ_ID)) / ($NY_ID * $NZ_ID);
int j_ID = ((idx % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) / $NZ_ID;
int k_ID = ((idx % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) % $NZ_ID;
// Ex component
if ((NY != 1 || NZ != 1) && i >= 0 && i < NX && j > 0 && j < NY && k > 0 && k < NZ) {
int materialEx = ID[INDEX4D_ID(0,i_ID,j_ID,k_ID)];
Ex[INDEX3D_FIELDS(i,j,k)] = updatecoeffsE[INDEX2D_MAT(materialEx,0)] * Ex[INDEX3D_FIELDS(i,j,k)] + updatecoeffsE[INDEX2D_MAT(materialEx,2)] * (Hz[INDEX3D_FIELDS(i,j,k)] - Hz[INDEX3D_FIELDS(i,j-1,k)]) - updatecoeffsE[INDEX2D_MAT(materialEx,3)] * (Hy[INDEX3D_FIELDS(i,j,k)] - Hy[INDEX3D_FIELDS(i,j,k-1)]);
}
// Ey component
if ((NX != 1 || NZ != 1) && i > 0 && i < NX && j >= 0 && j < NY && k > 0 && k < NZ) {
int materialEy = ID[INDEX4D_ID(1,i_ID,j_ID,k_ID)];
Ey[INDEX3D_FIELDS(i,j,k)] = updatecoeffsE[INDEX2D_MAT(materialEy,0)] * Ey[INDEX3D_FIELDS(i,j,k)] + updatecoeffsE[INDEX2D_MAT(materialEy,3)] * (Hx[INDEX3D_FIELDS(i,j,k)] - Hx[INDEX3D_FIELDS(i,j,k-1)]) - updatecoeffsE[INDEX2D_MAT(materialEy,1)] * (Hz[INDEX3D_FIELDS(i,j,k)] - Hz[INDEX3D_FIELDS(i-1,j,k)]);
}
// Ez component
if ((NX != 1 || NY != 1) && i > 0 && i < NX && j > 0 && j < NY && k >= 0 && k < NZ) {
int materialEz = ID[INDEX4D_ID(2,i_ID,j_ID,k_ID)];
Ez[INDEX3D_FIELDS(i,j,k)] = updatecoeffsE[INDEX2D_MAT(materialEz,0)] * Ez[INDEX3D_FIELDS(i,j,k)] + updatecoeffsE[INDEX2D_MAT(materialEz,1)] * (Hy[INDEX3D_FIELDS(i,j,k)] - Hy[INDEX3D_FIELDS(i-1,j,k)]) - updatecoeffsE[INDEX2D_MAT(materialEz,2)] * (Hx[INDEX3D_FIELDS(i,j,k)] - Hx[INDEX3D_FIELDS(i,j-1,k)]);
}
}
////////////////////////////
// Magnetic field updates //
////////////////////////////
__global__ void update_magnetic(int NX, int NY, int NZ, const unsigned int* __restrict__ ID, $REAL *Hx, $REAL *Hy, $REAL *Hz, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez) {
// This function updates magnetic field values.
//
// Args:
// NX, NY, NZ: Number of cells of the model domain
// ID, E, H: Access to ID and field component arrays
// Obtain the linear index corresponding to the current thread
int idx = blockIdx.x * blockDim.x + threadIdx.x;
// Convert the linear index to subscripts for 3D field arrays
int i = idx / ($NY_FIELDS * $NZ_FIELDS);
int j = (idx % ($NY_FIELDS * $NZ_FIELDS)) / $NZ_FIELDS;
int k = (idx % ($NY_FIELDS * $NZ_FIELDS)) % $NZ_FIELDS;
// Convert the linear index to subscripts for 4D material ID array
int i_ID = (idx % ($NX_ID * $NY_ID * $NZ_ID)) / ($NY_ID * $NZ_ID);
int j_ID = ((idx % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) / $NZ_ID;
int k_ID = ((idx % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) % $NZ_ID;
// Hx component
if (NX != 1 && i > 0 && i < NX && j >= 0 && j < NY && k >= 0 && k < NZ) {
int materialHx = ID[INDEX4D_ID(3,i_ID,j_ID,k_ID)];
Hx[INDEX3D_FIELDS(i,j,k)] = updatecoeffsH[INDEX2D_MAT(materialHx,0)] * Hx[INDEX3D_FIELDS(i,j,k)] - updatecoeffsH[INDEX2D_MAT(materialHx,2)] * (Ez[INDEX3D_FIELDS(i,j+1,k)] - Ez[INDEX3D_FIELDS(i,j,k)]) + updatecoeffsH[INDEX2D_MAT(materialHx,3)] * (Ey[INDEX3D_FIELDS(i,j,k+1)] - Ey[INDEX3D_FIELDS(i,j,k)]);
}
// Hy component
if (NY != 1 && i >= 0 && i < NX && j > 0 && j < NY && k >= 0 && k < NZ) {
int materialHy = ID[INDEX4D_ID(4,i_ID,j_ID,k_ID)];
Hy[INDEX3D_FIELDS(i,j,k)] = updatecoeffsH[INDEX2D_MAT(materialHy,0)] * Hy[INDEX3D_FIELDS(i,j,k)] - updatecoeffsH[INDEX2D_MAT(materialHy,3)] * (Ex[INDEX3D_FIELDS(i,j,k+1)] - Ex[INDEX3D_FIELDS(i,j,k)]) + updatecoeffsH[INDEX2D_MAT(materialHy,1)] * (Ez[INDEX3D_FIELDS(i+1,j,k)] - Ez[INDEX3D_FIELDS(i,j,k)]);
}
// Hz component
if (NZ != 1 && i >= 0 && i < NX && j >= 0 && j < NY && k > 0 && k < NZ) {
int materialHz = ID[INDEX4D_ID(5,i_ID,j_ID,k_ID)];
Hz[INDEX3D_FIELDS(i,j,k)] = updatecoeffsH[INDEX2D_MAT(materialHz,0)] * Hz[INDEX3D_FIELDS(i,j,k)] - updatecoeffsH[INDEX2D_MAT(materialHz,1)] * (Ey[INDEX3D_FIELDS(i+1,j,k)] - Ey[INDEX3D_FIELDS(i,j,k)]) + updatecoeffsH[INDEX2D_MAT(materialHz,2)] * (Ex[INDEX3D_FIELDS(i,j+1,k)] - Ex[INDEX3D_FIELDS(i,j,k)]);
}
}
///////////////////////////////////////////////////
// Electric field updates - dispersive materials //
///////////////////////////////////////////////////
__global__ void update_electric_dispersive_A(int NX, int NY, int NZ, int MAXPOLES, const $COMPLEX* __restrict__ updatecoeffsdispersive, $COMPLEX *Tx, $COMPLEX *Ty, $COMPLEX *Tz, const unsigned int* __restrict__ ID, $REAL *Ex, $REAL *Ey, $REAL *Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz) {
// This function is part A of updates to electric field values when dispersive materials (with multiple poles) are present.
//
// Args:
// NX, NY, NZ: Number of cells of the model domain
// MAXPOLES: Maximum number of dispersive material poles present in model
// updatedispersivecoeffs, T, ID, E, H: Access to update coefficients, dispersive, ID and field component arrays
// Obtain the linear index corresponding to the current thread
int idx = blockIdx.x * blockDim.x + threadIdx.x;
// Convert the linear index to subscripts for 3D field arrays
int i = idx / ($NY_FIELDS * $NZ_FIELDS);
int j = (idx % ($NY_FIELDS * $NZ_FIELDS)) / $NZ_FIELDS;
int k = (idx % ($NY_FIELDS * $NZ_FIELDS)) % $NZ_FIELDS;
// Convert the linear index to subscripts for 4D material ID array
int i_ID = (idx % ($NX_ID * $NY_ID * $NZ_ID)) / ($NY_ID * $NZ_ID);
int j_ID = ((idx % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) / $NZ_ID;
int k_ID = ((idx % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) % $NZ_ID;
// Convert the linear index to subscripts for 4D dispersive array
int i_T = (idx % ($NX_T * $NY_T * $NZ_T)) / ($NY_T * $NZ_T);
int j_T = ((idx % ($NX_T * $NY_T * $NZ_T)) % ($NY_T * $NZ_T)) / $NZ_T;
int k_T = ((idx % ($NX_T * $NY_T * $NZ_T)) % ($NY_T * $NZ_T)) % $NZ_T;
// Ex component
if ((NY != 1 || NZ != 1) && i >= 0 && i < NX && j > 0 && j < NY && k > 0 && k < NZ) {
int materialEx = ID[INDEX4D_ID(0,i_ID,j_ID,k_ID)];
$REAL phi = 0;
for (int pole = 0; pole < MAXPOLES; pole++) {
phi = phi + updatecoeffsdispersive[INDEX2D_MATDISP(materialEx,pole*3)]$REALFUNC * Tx[INDEX4D_T(pole,i_T,j_T,k_T)]$REALFUNC;
Tx[INDEX4D_T(pole,i_T,j_T,k_T)] = updatecoeffsdispersive[INDEX2D_MATDISP(materialEx,1+(pole*3))] * Tx[INDEX4D_T(pole,i_T,j_T,k_T)] + updatecoeffsdispersive[INDEX2D_MATDISP(materialEx,2+(pole*3))] * Ex[INDEX3D_FIELDS(i,j,k)];
}
Ex[INDEX3D_FIELDS(i,j,k)] = updatecoeffsE[INDEX2D_MAT(materialEx,0)] * Ex[INDEX3D_FIELDS(i,j,k)] + updatecoeffsE[INDEX2D_MAT(materialEx,2)] * (Hz[INDEX3D_FIELDS(i,j,k)] - Hz[INDEX3D_FIELDS(i,j-1,k)]) - updatecoeffsE[INDEX2D_MAT(materialEx,3)] * (Hy[INDEX3D_FIELDS(i,j,k)] - Hy[INDEX3D_FIELDS(i,j,k-1)]) - updatecoeffsE[INDEX2D_MAT(materialEx,4)] * phi;
}
// Ey component
if ((NX != 1 || NZ != 1) && i > 0 && i < NX && j >= 0 && j < NY && k > 0 && k < NZ) {
int materialEy = ID[INDEX4D_ID(1,i_ID,j_ID,k_ID)];
$REAL phi = 0;
for (int pole = 0; pole < MAXPOLES; pole++) {
phi = phi + updatecoeffsdispersive[INDEX2D_MATDISP(materialEy,pole*3)]$REALFUNC * Ty[INDEX4D_T(pole,i_T,j_T,k_T)]$REALFUNC;
Ty[INDEX4D_T(pole,i_T,j_T,k_T)] = updatecoeffsdispersive[INDEX2D_MATDISP(materialEy,1+(pole*3))] * Ty[INDEX4D_T(pole,i_T,j_T,k_T)] + updatecoeffsdispersive[INDEX2D_MATDISP(materialEy,2+(pole*3))] * Ey[INDEX3D_FIELDS(i,j,k)];
}
Ey[INDEX3D_FIELDS(i,j,k)] = updatecoeffsE[INDEX2D_MAT(materialEy,0)] * Ey[INDEX3D_FIELDS(i,j,k)] + updatecoeffsE[INDEX2D_MAT(materialEy,3)] * (Hx[INDEX3D_FIELDS(i,j,k)] - Hx[INDEX3D_FIELDS(i,j,k-1)]) - updatecoeffsE[INDEX2D_MAT(materialEy,1)] * (Hz[INDEX3D_FIELDS(i,j,k)] - Hz[INDEX3D_FIELDS(i-1,j,k)]) - updatecoeffsE[INDEX2D_MAT(materialEy,4)] * phi;
}
// Ez component
if ((NX != 1 || NY != 1) && i > 0 && i < NX && j > 0 && j < NY && k >= 0 && k < NZ) {
int materialEz = ID[INDEX4D_ID(2,i_ID,j_ID,k_ID)];
$REAL phi = 0;
for (int pole = 0; pole < MAXPOLES; pole++) {
phi = phi + updatecoeffsdispersive[INDEX2D_MATDISP(materialEz,pole*3)]$REALFUNC * Tz[INDEX4D_T(pole,i_T,j_T,k_T)]$REALFUNC;
Tz[INDEX4D_T(pole,i_T,j_T,k_T)] = updatecoeffsdispersive[INDEX2D_MATDISP(materialEz,1+(pole*3))] * Tz[INDEX4D_T(pole,i_T,j_T,k_T)] + updatecoeffsdispersive[INDEX2D_MATDISP(materialEz,2+(pole*3))] * Ez[INDEX3D_FIELDS(i,j,k)];
}
Ez[INDEX3D_FIELDS(i,j,k)] = updatecoeffsE[INDEX2D_MAT(materialEz,0)] * Ez[INDEX3D_FIELDS(i,j,k)] + updatecoeffsE[INDEX2D_MAT(materialEz,1)] * (Hy[INDEX3D_FIELDS(i,j,k)] - Hy[INDEX3D_FIELDS(i-1,j,k)]) - updatecoeffsE[INDEX2D_MAT(materialEz,2)] * (Hx[INDEX3D_FIELDS(i,j,k)] - Hx[INDEX3D_FIELDS(i,j-1,k)]) - updatecoeffsE[INDEX2D_MAT(materialEz,4)] * phi;
}
}
__global__ void update_electric_dispersive_B(int NX, int NY, int NZ, int MAXPOLES, const $COMPLEX* __restrict__ updatecoeffsdispersive, $COMPLEX *Tx, $COMPLEX *Ty, $COMPLEX *Tz, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez) {
// This function is part B which updates the dispersive field arrays when dispersive materials (with multiple poles) are present.
//
// Args:
// NX, NY, NZ: Number of cells of the model domain
// MAXPOLES: Maximum number of dispersive material poles present in model
// updatedispersivecoeffs, T, ID, E, H: Access to update coefficients, dispersive, ID and field component arrays
// Obtain the linear index corresponding to the current thread
int idx = blockIdx.x * blockDim.x + threadIdx.x;
// Convert the linear index to subscripts for 3D field arrays
int i = idx / ($NY_FIELDS * $NZ_FIELDS);
int j = (idx % ($NY_FIELDS * $NZ_FIELDS)) / $NZ_FIELDS;
int k = (idx % ($NY_FIELDS * $NZ_FIELDS)) % $NZ_FIELDS;
// Convert the linear index to subscripts for 4D material ID array
int i_ID = (idx % ($NX_ID * $NY_ID * $NZ_ID)) / ($NY_ID * $NZ_ID);
int j_ID = ((idx % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) / $NZ_ID;
int k_ID = ((idx % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) % $NZ_ID;
// Convert the linear index to subscripts for 4D dispersive array
int i_T = (idx % ($NX_T * $NY_T * $NZ_T)) / ($NY_T * $NZ_T);
int j_T = ((idx % ($NX_T * $NY_T * $NZ_T)) % ($NY_T * $NZ_T)) / $NZ_T;
int k_T = ((idx % ($NX_T * $NY_T * $NZ_T)) % ($NY_T * $NZ_T)) % $NZ_T;
// Ex component
if ((NY != 1 || NZ != 1) && i >= 0 && i < NX && j > 0 && j < NY && k > 0 && k < NZ) {
int materialEx = ID[INDEX4D_ID(0,i_ID,j_ID,k_ID)];
for (int pole = 0; pole < MAXPOLES; pole++) {
Tx[INDEX4D_T(pole,i_T,j_T,k_T)] = Tx[INDEX4D_T(pole,i_T,j_T,k_T)] - updatecoeffsdispersive[INDEX2D_MATDISP(materialEx,2+(pole*3))] * Ex[INDEX3D_FIELDS(i,j,k)];
}
}
// Ey component
if ((NX != 1 || NZ != 1) && i > 0 && i < NX && j >= 0 && j < NY && k > 0 && k < NZ) {
int materialEy = ID[INDEX4D_ID(1,i_ID,j_ID,k_ID)];
for (int pole = 0; pole < MAXPOLES; pole++) {
Ty[INDEX4D_T(pole,i_T,j_T,k_T)] = Ty[INDEX4D_T(pole,i_T,j_T,k_T)] - updatecoeffsdispersive[INDEX2D_MATDISP(materialEy,2+(pole*3))] * Ey[INDEX3D_FIELDS(i,j,k)];
}
}
// Ez component
if ((NX != 1 || NY != 1) && i > 0 && i < NX && j > 0 && j < NY && k >= 0 && k < NZ) {
int materialEz = ID[INDEX4D_ID(2,i_ID,j_ID,k_ID)];
for (int pole = 0; pole < MAXPOLES; pole++) {
Tz[INDEX4D_T(pole,i_T,j_T,k_T)] = Tz[INDEX4D_T(pole,i_T,j_T,k_T)] - updatecoeffsdispersive[INDEX2D_MATDISP(materialEz,2+(pole*3))] * Ez[INDEX3D_FIELDS(i,j,k)];
}
}
}
""")

查看文件

@@ -18,7 +18,7 @@
from string import Template from string import Template
kernel_template_store_snapshot = Template(""" knl_template_store_snapshot = Template("""
// Macros for converting subscripts to linear index: // Macros for converting subscripts to linear index:
#define INDEX3D_FIELDS(i, j, k) (i)*($NY_FIELDS)*($NZ_FIELDS)+(j)*($NZ_FIELDS)+(k) #define INDEX3D_FIELDS(i, j, k) (i)*($NY_FIELDS)*($NZ_FIELDS)+(j)*($NZ_FIELDS)+(k)

查看文件

@@ -18,7 +18,7 @@
from string import Template from string import Template
kernel_template_sources = Template(""" knl_template_sources = Template("""
// Macros for converting subscripts to linear index: // Macros for converting subscripts to linear index:
#define INDEX2D_MAT(m, n) (m)*($NY_MATCOEFFS)+(n) #define INDEX2D_MAT(m, n) (m)*($NY_MATCOEFFS)+(n)

查看文件

@@ -0,0 +1,276 @@
// Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom
// Authors: Craig Warren, Antonis Giannopoulos, and John Hartley
//
// This file is part of gprMax.
//
// gprMax is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// gprMax is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with gprMax. If not, see <http://www.gnu.org/licenses/>.
{% block complex_header %}{% endblock complex_header %}
// Macros for converting subscripts to linear index:
#define IDX2D_MAT(m, n) (m)*({{NY_MATCOEFFS}})+(n)
#define IDX2D_MATDISP(m, n) (m)*({{NY_MATDISPCOEFFS}})+(n)
#define IDX3D_FIELDS(i, j, k) (i)*({{NY_FIELDS}})*({{NZ_FIELDS}})+(j)*({{NZ_FIELDS}})+(k)
#define IDX4D_ID(p, i, j, k) (p)*({{NX_ID}})*({{NY_ID}})*({{NZ_ID}})+(i)*({{NY_ID}})*({{NZ_ID}})+(j)*({{NZ_ID}})+(k)
#define IDX4D_T(p, i, j, k) (p)*({{NX_T}})*({{NY_T}})*({{NZ_T}})+(i)*({{NY_T}})*({{NZ_T}})+(j)*({{NZ_T}})+(k)
// Material coefficients (read-only) stored in constant memory of compute device
{% block constmem %}{% endblock constmem %}
///////////////////////////////////////////////
// Electric field updates - normal materials //
///////////////////////////////////////////////
{{KERNEL}} void update_electric(int NX,
int NY,
int NZ,{% filter indent(width=30) %}{% block electric_args %}{% endblock electric_args %}{% endfilter %}{
// This function updates electric field values.
//
// Args:
// NX, NY, NZ: Number of cells of the model domain
// ID, E, H: Access to ID and field component arrays
// Obtain the linear index corresponding to the current thread
{% block threadidx %}{% endblock threadidx %}
// Convert the linear index to subscripts for 3D field arrays
int i = idx / ({{NY_FIELDS}} * {{NZ_FIELDS}});
int j = (idx % ({{NY_FIELDS}} * {{NZ_FIELDS}})) / {{NZ_FIELDS}};
int k = (idx % ({{NY_FIELDS}} * {{NZ_FIELDS}})) % {{NZ_FIELDS}};
// Convert the linear index to subscripts for 4D material ID array
int i_ID = (idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) / ({{NY_ID}} * {{NZ_ID}});
int j_ID = ((idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) / {{NZ_ID}};
int k_ID = ((idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) % {{NZ_ID}};
// Ex component
if ((NY != 1 || NZ != 1) && i >= 0 && i < NX && j > 0 && j < NY && k > 0 && k < NZ) {
int materialEx = ID[IDX4D_ID(0,i_ID,j_ID,k_ID)];
Ex[IDX3D_FIELDS(i,j,k)] = updatecoeffsE[IDX2D_MAT(materialEx,0)] * Ex[IDX3D_FIELDS(i,j,k)] +
updatecoeffsE[IDX2D_MAT(materialEx,2)] * (Hz[IDX3D_FIELDS(i,j,k)] - Hz[IDX3D_FIELDS(i,j-1,k)]) -
updatecoeffsE[IDX2D_MAT(materialEx,3)] * (Hy[IDX3D_FIELDS(i,j,k)] - Hy[IDX3D_FIELDS(i,j,k-1)]);
}
// Ey component
if ((NX != 1 || NZ != 1) && i > 0 && i < NX && j >= 0 && j < NY && k > 0 && k < NZ) {
int materialEy = ID[IDX4D_ID(1,i_ID,j_ID,k_ID)];
Ey[IDX3D_FIELDS(i,j,k)] = updatecoeffsE[IDX2D_MAT(materialEy,0)] * Ey[IDX3D_FIELDS(i,j,k)] +
updatecoeffsE[IDX2D_MAT(materialEy,3)] * (Hx[IDX3D_FIELDS(i,j,k)] - Hx[IDX3D_FIELDS(i,j,k-1)]) -
updatecoeffsE[IDX2D_MAT(materialEy,1)] * (Hz[IDX3D_FIELDS(i,j,k)] - Hz[IDX3D_FIELDS(i-1,j,k)]);
}
// Ez component
if ((NX != 1 || NY != 1) && i > 0 && i < NX && j > 0 && j < NY && k >= 0 && k < NZ) {
int materialEz = ID[IDX4D_ID(2,i_ID,j_ID,k_ID)];
Ez[IDX3D_FIELDS(i,j,k)] = updatecoeffsE[IDX2D_MAT(materialEz,0)] * Ez[IDX3D_FIELDS(i,j,k)] +
updatecoeffsE[IDX2D_MAT(materialEz,1)] * (Hy[IDX3D_FIELDS(i,j,k)] - Hy[IDX3D_FIELDS(i-1,j,k)]) -
updatecoeffsE[IDX2D_MAT(materialEz,2)] * (Hx[IDX3D_FIELDS(i,j,k)] - Hx[IDX3D_FIELDS(i,j-1,k)]);
}
}
////////////////////////////
// Magnetic field updates //
////////////////////////////
{{KERNEL}} void update_magnetic(int NX,
int NY,
int NZ,{% filter indent(width=30) %}{% block magnetic_args %}{% endblock magnetic_args %}{% endfilter %}{
// This function updates magnetic field values.
//
// Args:
// NX, NY, NZ: Number of cells of the model domain
// ID, E, H: Access to ID and field component arrays
// Obtain the linear index corresponding to the current thread
{{self.threadidx()}}
// Convert the linear index to subscripts for 3D field arrays
int i = idx / ({{NY_FIELDS}} * {{NZ_FIELDS}});
int j = (idx % ({{NY_FIELDS}} * {{NZ_FIELDS}})) / {{NZ_FIELDS}};
int k = (idx % ({{NY_FIELDS}} * {{NZ_FIELDS}})) % {{NZ_FIELDS}};
// Convert the linear index to subscripts for 4D material ID array
int i_ID = (idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) / ({{NY_ID}} * {{NZ_ID}});
int j_ID = ((idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) / {{NZ_ID}};
int k_ID = ((idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) % {{NZ_ID}};
// Hx component
if (NX != 1 && i > 0 && i < NX && j >= 0 && j < NY && k >= 0 && k < NZ) {
int materialHx = ID[IDX4D_ID(3,i_ID,j_ID,k_ID)];
Hx[IDX3D_FIELDS(i,j,k)] = updatecoeffsH[IDX2D_MAT(materialHx,0)] * Hx[IDX3D_FIELDS(i,j,k)] -
updatecoeffsH[IDX2D_MAT(materialHx,2)] * (Ez[IDX3D_FIELDS(i,j+1,k)] - Ez[IDX3D_FIELDS(i,j,k)]) +
updatecoeffsH[IDX2D_MAT(materialHx,3)] * (Ey[IDX3D_FIELDS(i,j,k+1)] - Ey[IDX3D_FIELDS(i,j,k)]);
}
// Hy component
if (NY != 1 && i >= 0 && i < NX && j > 0 && j < NY && k >= 0 && k < NZ) {
int materialHy = ID[IDX4D_ID(4,i_ID,j_ID,k_ID)];
Hy[IDX3D_FIELDS(i,j,k)] = updatecoeffsH[IDX2D_MAT(materialHy,0)] * Hy[IDX3D_FIELDS(i,j,k)] -
updatecoeffsH[IDX2D_MAT(materialHy,3)] * (Ex[IDX3D_FIELDS(i,j,k+1)] - Ex[IDX3D_FIELDS(i,j,k)]) +
updatecoeffsH[IDX2D_MAT(materialHy,1)] * (Ez[IDX3D_FIELDS(i+1,j,k)] - Ez[IDX3D_FIELDS(i,j,k)]);
}
// Hz component
if (NZ != 1 && i >= 0 && i < NX && j >= 0 && j < NY && k > 0 && k < NZ) {
int materialHz = ID[IDX4D_ID(5,i_ID,j_ID,k_ID)];
Hz[IDX3D_FIELDS(i,j,k)] = updatecoeffsH[IDX2D_MAT(materialHz,0)] * Hz[IDX3D_FIELDS(i,j,k)] -
updatecoeffsH[IDX2D_MAT(materialHz,1)] * (Ey[IDX3D_FIELDS(i+1,j,k)] - Ey[IDX3D_FIELDS(i,j,k)]) +
updatecoeffsH[IDX2D_MAT(materialHz,2)] * (Ex[IDX3D_FIELDS(i,j+1,k)] - Ex[IDX3D_FIELDS(i,j,k)]);
}
}
///////////////////////////////////////////////////
// Electric field updates - dispersive materials //
///////////////////////////////////////////////////
{{KERNEL}} void update_electric_dispersive_A(int NX,
int NY,
int NZ,
int MAXPOLES,{% filter indent(width=43) %}{% block electric_dispersive_A_args %}{% endblock electric_dispersive_A_args %}{% endfilter %}{
// This function is part A of updates to electric field values when
// dispersive materials (with multiple poles) are present.
//
// Args:
// NX, NY, NZ: Number of cells of the model domain
// MAXPOLES: Maximum number of dispersive material poles present in model
// updatedispersivecoeffs, T, ID, E, H: Access to update coefficients, dispersive, ID and field component arrays
// Obtain the linear index corresponding to the current thread
{{self.threadidx()}}
// Convert the linear index to subscripts for 3D field arrays
int i = idx / ({{NY_FIELDS}} * {{NZ_FIELDS}});
int j = (idx % ({{NY_FIELDS}} * {{NZ_FIELDS}})) / {{NZ_FIELDS}};
int k = (idx % ({{NY_FIELDS}} * {{NZ_FIELDS}})) % {{NZ_FIELDS}};
// Convert the linear index to subscripts for 4D material ID array
int i_ID = (idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) / ({{NY_ID}} * {{NZ_ID}});
int j_ID = ((idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) / {{NZ_ID}};
int k_ID = ((idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) % {{NZ_ID}};
// Convert the linear index to subscripts for 4D dispersive array
int i_T = (idx % ({{NX_T}} * {{NY_T}} * {{NZ_T}})) / ({{NY_T}} * {{NZ_T}});
int j_T = ((idx % ({{NX_T}} * {{NY_T}} * {{NZ_T}})) % ({{NY_T}} * {{NZ_T}})) / {{NZ_T}};
int k_T = ((idx % ({{NX_T}} * {{NY_T}} * {{NZ_T}})) % ({{NY_T}} * {{NZ_T}})) % {{NZ_T}};
// Ex component
if ((NY != 1 || NZ != 1) && i >= 0 && i < NX && j > 0 && j < NY && k > 0 && k < NZ) {
int materialEx = ID[IDX4D_ID(0,i_ID,j_ID,k_ID)];
{{REAL}} phi = 0;
for (int pole = 0; pole < MAXPOLES; pole++) {
phi = phi + updatecoeffsdispersive[IDX2D_MATDISP(materialEx,pole*3)]{{REALFUNC}} * Tx[IDX4D_T(pole,i_T,j_T,k_T)]{{REALFUNC}};
Tx[IDX4D_T(pole,i_T,j_T,k_T)] = updatecoeffsdispersive[IDX2D_MATDISP(materialEx,1+(pole*3))] * Tx[IDX4D_T(pole,i_T,j_T,k_T)] +
updatecoeffsdispersive[IDX2D_MATDISP(materialEx,2+(pole*3))] * Ex[IDX3D_FIELDS(i,j,k)];
}
Ex[IDX3D_FIELDS(i,j,k)] = updatecoeffsE[IDX2D_MAT(materialEx,0)] * Ex[IDX3D_FIELDS(i,j,k)] +
updatecoeffsE[IDX2D_MAT(materialEx,2)] * (Hz[IDX3D_FIELDS(i,j,k)] - Hz[IDX3D_FIELDS(i,j-1,k)]) -
updatecoeffsE[IDX2D_MAT(materialEx,3)] * (Hy[IDX3D_FIELDS(i,j,k)] - Hy[IDX3D_FIELDS(i,j,k-1)]) -
updatecoeffsE[IDX2D_MAT(materialEx,4)] * phi;
}
// Ey component
if ((NX != 1 || NZ != 1) && i > 0 && i < NX && j >= 0 && j < NY && k > 0 && k < NZ) {
int materialEy = ID[IDX4D_ID(1,i_ID,j_ID,k_ID)];
{{REAL}} phi = 0;
for (int pole = 0; pole < MAXPOLES; pole++) {
phi = phi + updatecoeffsdispersive[IDX2D_MATDISP(materialEy,pole*3)]{{REALFUNC}} * Ty[IDX4D_T(pole,i_T,j_T,k_T)]{{REALFUNC}};
Ty[IDX4D_T(pole,i_T,j_T,k_T)] = updatecoeffsdispersive[IDX2D_MATDISP(materialEy,1+(pole*3))] * Ty[IDX4D_T(pole,i_T,j_T,k_T)] +
updatecoeffsdispersive[IDX2D_MATDISP(materialEy,2+(pole*3))] * Ey[IDX3D_FIELDS(i,j,k)];
}
Ey[IDX3D_FIELDS(i,j,k)] = updatecoeffsE[IDX2D_MAT(materialEy,0)] * Ey[IDX3D_FIELDS(i,j,k)] +
updatecoeffsE[IDX2D_MAT(materialEy,3)] * (Hx[IDX3D_FIELDS(i,j,k)] - Hx[IDX3D_FIELDS(i,j,k-1)]) -
updatecoeffsE[IDX2D_MAT(materialEy,1)] * (Hz[IDX3D_FIELDS(i,j,k)] - Hz[IDX3D_FIELDS(i-1,j,k)]) -
updatecoeffsE[IDX2D_MAT(materialEy,4)] * phi;
}
// Ez component
if ((NX != 1 || NY != 1) && i > 0 && i < NX && j > 0 && j < NY && k >= 0 && k < NZ) {
int materialEz = ID[IDX4D_ID(2,i_ID,j_ID,k_ID)];
{{REAL}} phi = 0;
for (int pole = 0; pole < MAXPOLES; pole++) {
phi = phi + updatecoeffsdispersive[IDX2D_MATDISP(materialEz,pole*3)]{{REALFUNC}} * Tz[IDX4D_T(pole,i_T,j_T,k_T)]{{REALFUNC}};
Tz[IDX4D_T(pole,i_T,j_T,k_T)] = updatecoeffsdispersive[IDX2D_MATDISP(materialEz,1+(pole*3))] * Tz[IDX4D_T(pole,i_T,j_T,k_T)] +
updatecoeffsdispersive[IDX2D_MATDISP(materialEz,2+(pole*3))] * Ez[IDX3D_FIELDS(i,j,k)];
}
Ez[IDX3D_FIELDS(i,j,k)] = updatecoeffsE[IDX2D_MAT(materialEz,0)] * Ez[IDX3D_FIELDS(i,j,k)] +
updatecoeffsE[IDX2D_MAT(materialEz,1)] * (Hy[IDX3D_FIELDS(i,j,k)] - Hy[IDX3D_FIELDS(i-1,j,k)]) -
updatecoeffsE[IDX2D_MAT(materialEz,2)] * (Hx[IDX3D_FIELDS(i,j,k)] - Hx[IDX3D_FIELDS(i,j-1,k)]) -
updatecoeffsE[IDX2D_MAT(materialEz,4)] * phi;
}
}
{{KERNEL}} void update_electric_dispersive_B(int NX,
int NY,
int NZ,
int MAXPOLES,{% filter indent(width=43) %}{% block electric_dispersive_B_args %}{% endblock electric_dispersive_B_args %}{% endfilter %}{
// This function is part B which updates the dispersive field arrays when
// dispersive materials (with multiple poles) are present.
//
// Args:
// NX, NY, NZ: Number of cells of the model domain
// MAXPOLES: Maximum number of dispersive material poles present in model
// updatedispersivecoeffs, T, ID, E, H: Access to update coefficients, dispersive, ID and field component arrays
// Obtain the linear index corresponding to the current thread
{{self.threadidx()}}
// Convert the linear index to subscripts for 3D field arrays
int i = idx / ({{NY_FIELDS}} * {{NZ_FIELDS}});
int j = (idx % ({{NY_FIELDS}} * {{NZ_FIELDS}})) / {{NZ_FIELDS}};
int k = (idx % ({{NY_FIELDS}} * {{NZ_FIELDS}})) % {{NZ_FIELDS}};
// Convert the linear index to subscripts for 4D material ID array
int i_ID = (idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) / ({{NY_ID}} * {{NZ_ID}});
int j_ID = ((idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) / {{NZ_ID}};
int k_ID = ((idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) % {{NZ_ID}};
// Convert the linear index to subscripts for 4D dispersive array
int i_T = (idx % ({{NX_T}} * {{NY_T}} * {{NZ_T}})) / ({{NY_T}} * {{NZ_T}});
int j_T = ((idx % ({{NX_T}} * {{NY_T}} * {{NZ_T}})) % ({{NY_T}} * {{NZ_T}})) / {{NZ_T}};
int k_T = ((idx % ({{NX_T}} * {{NY_T}} * {{NZ_T}})) % ({{NY_T}} * {{NZ_T}})) % {{NZ_T}};
// Ex component
if ((NY != 1 || NZ != 1) && i >= 0 && i < NX && j > 0 && j < NY && k > 0 && k < NZ) {
int materialEx = ID[IDX4D_ID(0,i_ID,j_ID,k_ID)];
for (int pole = 0; pole < MAXPOLES; pole++) {
Tx[IDX4D_T(pole,i_T,j_T,k_T)] = Tx[IDX4D_T(pole,i_T,j_T,k_T)] -
updatecoeffsdispersive[IDX2D_MATDISP(materialEx,2+(pole*3))] * Ex[IDX3D_FIELDS(i,j,k)];
}
}
// Ey component
if ((NX != 1 || NZ != 1) && i > 0 && i < NX && j >= 0 && j < NY && k > 0 && k < NZ) {
int materialEy = ID[IDX4D_ID(1,i_ID,j_ID,k_ID)];
for (int pole = 0; pole < MAXPOLES; pole++) {
Ty[IDX4D_T(pole,i_T,j_T,k_T)] = Ty[IDX4D_T(pole,i_T,j_T,k_T)] -
updatecoeffsdispersive[IDX2D_MATDISP(materialEy,2+(pole*3))] * Ey[IDX3D_FIELDS(i,j,k)];
}
}
// Ez component
if ((NX != 1 || NY != 1) && i > 0 && i < NX && j > 0 && j < NY && k >= 0 && k < NZ) {
int materialEz = ID[IDX4D_ID(2,i_ID,j_ID,k_ID)];
for (int pole = 0; pole < MAXPOLES; pole++) {
Tz[IDX4D_T(pole,i_T,j_T,k_T)] = Tz[IDX4D_T(pole,i_T,j_T,k_T)] -
updatecoeffsdispersive[IDX2D_MATDISP(materialEz,2+(pole*3))] * Ez[IDX3D_FIELDS(i,j,k)];
}
}
}

查看文件

@@ -0,0 +1,65 @@
{% extends "fields_updates_base.tmpl" %}
{% block complex_header %}
#include <pycuda-complex.hpp>
{% endblock complex_header %}
{% block constmem %}
__device__ __constant__ {{REAL}} updatecoeffsE[{{N_updatecoeffsE}}];
__device__ __constant__ {{REAL}} updatecoeffsH[{{N_updatecoeffsH}}];
{% endblock constmem %}
{% block threadidx %}
int idx = blockIdx.x * blockDim.x + threadIdx.x;
{% endblock threadidx %}
{% block electric_args %}
const unsigned int* __restrict__ ID,
{{REAL}} *Ex,
{{REAL}} *Ey,
{{REAL}} *Ez,
const {{REAL}}* __restrict__ Hx,
const {{REAL}}* __restrict__ Hy,
const {{REAL}}* __restrict__ Hz)
{% endblock electric_args %}
{% block magnetic_args %}
const unsigned int* __restrict__ ID,
{{REAL}} *Hx,
{{REAL}} *Hy,
{{REAL}} *Hz,
const {{REAL}}* __restrict__ Ex,
const {{REAL}}* __restrict__ Ey,
const {{REAL}}* __restrict__ Ez)
{% endblock magnetic_args %}
{% block electric_dispersive_A_args %}
const {{COMPLEX}}* __restrict__ updatecoeffsdispersive,
{{COMPLEX}} *Tx,
{{COMPLEX}} *Ty,
{{COMPLEX}} *Tz,
const unsigned int* __restrict__ ID,
{{REAL}} *Ex,
{{REAL}} *Ey,
{{REAL}} *Ez,
const {{REAL}}* __restrict__ Hx,
const {{REAL}}* __restrict__ Hy,
const {{REAL}}* __restrict__ Hz)
{% endblock electric_dispersive_A_args %}
{% block electric_dispersive_B_args %}
const {{COMPLEX}}* __restrict__ updatecoeffsdispersive,
{{COMPLEX}} *Tx,
{{COMPLEX}} *Ty,
{{COMPLEX}} *Tz,
const unsigned int* __restrict__ ID,
const {{REAL}}* __restrict__ Ex,
const {{REAL}}* __restrict__ Ey,
const {{REAL}}* __restrict__ Ez)
{% endblock electric_dispersive_B_args %}

查看文件

@@ -0,0 +1,77 @@
{% extends "fields_updates_base.tmpl" %}
{% block complex_header %}
#include <pyopencl-complex.h>
{% endblock complex_header %}
{% block constmem %}
__constant {{REAL}} updatecoeffsE[{{N_updatecoeffsE}}] =
{
{% for i in updatecoeffsE %}
{{i}},
{% endfor %}
};
__constant {{REAL}} updatecoeffsH[{{N_updatecoeffsH}}] =
{
{% for i in updatecoeffsH %}
{{i}},
{% endfor %}
};
{% endblock constmem %}
{% block threadidx %}
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) +
get_global_id(1) * get_global_size(0) + get_global_id(0);
{% endblock threadidx %}
{% block electric_args %}
__global const unsigned int* restrict ID,
__global {{REAL}} *Ex,
__global {{REAL}} *Ey,
__global {{REAL}} *Ez,
__global const {{REAL}} * restrict Hx,
__global const {{REAL}} * restrict Hy,
__global const {{REAL}} * restrict Hz)
{% endblock electric_args %}
{% block magnetic_args %}
__global const unsigned int* restrict ID,
__global {{REAL}} *Hx,
__global {{REAL}} *Hy,
__global {{REAL}} *Hz,
__global const {{REAL}}* restrict Ex,
__global const {{REAL}}* restrict Ey,
__global const {{REAL}}* restrict Ez)
{% endblock magnetic_args %}
{% block electric_dispersive_A_args %}
__global const {{COMPLEX}}* restrict updatecoeffsdispersive,
__global {{COMPLEX}} *Tx,
__global {{COMPLEX}} *Ty,
__global {{COMPLEX}} *Tz,
__global const unsigned int* restrict ID,
__global {{REAL}} *Ex,
__global {{REAL}} *Ey,
__global {{REAL}} *Ez,
__global const {{REAL}}* restrict Hx,
__global const {{REAL}}* restrict Hy,
__global const {{REAL}}* restrict Hz)
{% endblock electric_dispersive_A_args %}
{% block electric_dispersive_B_args %}
__global const {{COMPLEX}}* restrict updatecoeffsdispersive,
__global {{COMPLEX}} *Tx,
__global {{COMPLEX}} *Ty,
__global {{COMPLEX}} *Tz,
__global const unsigned int* restrict ID,
__global const {{REAL}}* restrict Ex,
__global const {{REAL}}* restrict Ey,
__global const {{REAL}}* restrict Ez)
{% endblock electric_dispersive_B_args %}

文件差异内容过多而无法显示 加载差异

查看文件

@@ -0,0 +1,62 @@
{% extends "pml_updates_electric_HORIPML_base.tmpl" %}
{% block constmem %}
__device__ __constant__ {{REAL}} updatecoeffsE[{{N_updatecoeffsE}}];
{% endblock constmem %}
{% block threadidx %}
int idx = blockIdx.x * blockDim.x + threadIdx.x;
{% endblock threadidx %}
{% block x_args %}
const unsigned int* __restrict__ ID,
const {{REAL}}* __restrict__ Ex,
{{REAL}} *Ey,
{{REAL}} *Ez,
const {{REAL}}* __restrict__ Hx,
const {{REAL}}* __restrict__ Hy,
const {{REAL}}* __restrict__ Hz,
{{REAL}} *PHI1,
{{REAL}} *PHI2,
const {{REAL}}* __restrict__ RA,
const {{REAL}}* __restrict__ RB,
const {{REAL}}* __restrict__ RE,
const {{REAL}}* __restrict__ RF,
{% endblock x_args %}
{% block y_args %}
const unsigned int* __restrict__ ID,
{{REAL}} *Ex,
const {{REAL}}* __restrict__ Ey,
{{REAL}} *Ez,
const {{REAL}}* __restrict__ Hx,
const {{REAL}}* __restrict__ Hy,
const {{REAL}}* __restrict__ Hz,
{{REAL}} *PHI1,
{{REAL}} *PHI2,
const {{REAL}}* __restrict__ RA,
const {{REAL}}* __restrict__ RB,
const {{REAL}}* __restrict__ RE,
const {{REAL}}* __restrict__ RF,
{% endblock y_args %}
{% block z_args %}
const unsigned int* __restrict__ ID,
{{REAL}} *Ex,
{{REAL}} *Ey,
const {{REAL}}* __restrict__ Ez,
const {{REAL}}* __restrict__ Hx,
const {{REAL}}* __restrict__ Hy,
const {{REAL}}* __restrict__ Hz,
{{REAL}} *PHI1,
{{REAL}} *PHI2,
const {{REAL}}* __restrict__ RA,
const {{REAL}}* __restrict__ RB,
const {{REAL}}* __restrict__ RE,
const {{REAL}}* __restrict__ RF,
{% endblock z_args %}

查看文件

@@ -0,0 +1,68 @@
{% extends "pml_updates_electric_HORIPML_base.tmpl" %}
{% block constmem %}
__constant {{REAL}} updatecoeffsE[{{N_updatecoeffsE}}] =
{
{% for i in updatecoeffsE %}
{{i}},
{% endfor %}
};
{% endblock constmem %}
{% block threadidx %}
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) +
get_global_id(1) * get_global_size(0) + get_global_id(0);
{% endblock threadidx %}
{% block x_args %}
__global const unsigned int* restrict ID,
__global const {{REAL}}* restrict Ex,
__global {{REAL}} *Ey,
__global {{REAL}} *Ez,
__global const {{REAL}}* restrict Hx,
__global const {{REAL}}* restrict Hy,
__global const {{REAL}}* restrict Hz,
__global {{REAL}} *PHI1,
__global {{REAL}} *PHI2,
__global const {{REAL}}* restrict RA,
__global const {{REAL}}* restrict RB,
__global const {{REAL}}* restrict RE,
__global const {{REAL}}* restrict RF,
{% endblock x_args %}
{% block y_args %}
__global const unsigned int* restrict ID,
__global {{REAL}} *Ex,
__global const {{REAL}}* restrict Ey,
__global {{REAL}} *Ez,
__global const {{REAL}}* restrict Hx,
__global const {{REAL}}* restrict Hy,
__global const {{REAL}}* restrict Hz,
__global {{REAL}} *PHI1,
__global {{REAL}} *PHI2,
__global const {{REAL}}* restrict RA,
__global const {{REAL}}* restrict RB,
__global const {{REAL}}* restrict RE,
__global const {{REAL}}* restrict RF,
{% endblock y_args %}
{% block z_args %}
__global const unsigned int* restrict ID,
__global {{REAL}} *Ex,
__global {{REAL}} *Ey,
__global const {{REAL}}* restrict Ez,
__global const {{REAL}}* restrict Hx,
__global const {{REAL}}* restrict Hy,
__global const {{REAL}}* restrict Hz,
__global {{REAL}} *PHI1,
__global {{REAL}} *PHI2,
__global const {{REAL}}* restrict RA,
__global const {{REAL}}* restrict RB,
__global const {{REAL}}* restrict RE,
__global const {{REAL}}* restrict RF,
{% endblock z_args %}

文件差异内容过多而无法显示 加载差异

查看文件

@@ -0,0 +1,62 @@
{% extends "pml_updates_magnetic_HORIPML_base.tmpl" %}
{% block constmem %}
__device__ __constant__ {{REAL}} updatecoeffsH[{{N_updatecoeffsH}}];
{% endblock constmem %}
{% block threadidx %}
int idx = blockIdx.x * blockDim.x + threadIdx.x;
{% endblock threadidx %}
{% block x_args %}
const unsigned int* __restrict__ ID,
const {{REAL}}* __restrict__ Ex,
const {{REAL}}* __restrict__ Ey,
const {{REAL}}* __restrict__ Ez,
const {{REAL}}* __restrict__ Hx,
{{REAL}} *Hy,
{{REAL}} *Hz,
{{REAL}} *PHI1,
{{REAL}} *PHI2,
const {{REAL}}* __restrict__ RA,
const {{REAL}}* __restrict__ RB,
const {{REAL}}* __restrict__ RE,
const {{REAL}}* __restrict__ RF,
{% endblock x_args %}
{% block y_args %}
const unsigned int* __restrict__ ID,
const {{REAL}}* __restrict__ Ex,
const {{REAL}}* __restrict__ Ey,
const {{REAL}}* __restrict__ Ez,
{{REAL}} *Hx,
const {{REAL}}* __restrict__ Hy,
{{REAL}} *Hz,
{{REAL}} *PHI1,
{{REAL}} *PHI2,
const {{REAL}}* __restrict__ RA,
const {{REAL}}* __restrict__ RB,
const {{REAL}}* __restrict__ RE,
const {{REAL}}* __restrict__ RF,
{% endblock y_args %}
{% block z_args %}
const unsigned int* __restrict__ ID,
const {{REAL}}* __restrict__ Ex,
const {{REAL}}* __restrict__ Ey,
const {{REAL}}* __restrict__ Ez,
{{REAL}} *Hx,
{{REAL}} *Hy,
const {{REAL}}* __restrict__ Hz,
{{REAL}} *PHI1,
{{REAL}} *PHI2,
const {{REAL}}* __restrict__ RA,
const {{REAL}}* __restrict__ RB,
const {{REAL}}* __restrict__ RE,
const {{REAL}}* __restrict__ RF,
{% endblock z_args %}

查看文件

@@ -0,0 +1,68 @@
{% extends "pml_updates_magnetic_HORIPML_base.tmpl" %}
{% block constmem %}
__constant {{REAL}} updatecoeffsH[{{N_updatecoeffsH}}] =
{
{% for i in updatecoeffsH %}
{{i}},
{% endfor %}
};
{% endblock constmem %}
{% block threadidx %}
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) +
get_global_id(1) * get_global_size(0) + get_global_id(0);
{% endblock threadidx %}
{% block x_args %}
__global const unsigned int* restrict ID,
__global const {{REAL}}* restrict Ex,
__global const {{REAL}}* restrict Ey,
__global const {{REAL}}* restrict Ez,
__global const {{REAL}}* restrict Hx,
__global {{REAL}} *Hy,
__global {{REAL}} *Hz,
__global {{REAL}} *PHI1,
__global {{REAL}} *PHI2,
__global const {{REAL}}* restrict RA,
__global const {{REAL}}* restrict RB,
__global const {{REAL}}* restrict RE,
__global const {{REAL}}* restrict RF,
{% endblock x_args %}
{% block y_args %}
__global const unsigned int* restrict ID,
__global const {{REAL}}* restrict Ex,
__global const {{REAL}}* restrict Ey,
__global const {{REAL}}* restrict Ez,
__global {{REAL}} *Hx,
__global const {{REAL}}* restrict Hy,
__global {{REAL}} *Hz,
__global {{REAL}} *PHI1,
__global {{REAL}} *PHI2,
__global const {{REAL}}* restrict RA,
__global const {{REAL}}* restrict RB,
__global const {{REAL}}* restrict RE,
__global const {{REAL}}* restrict RF,
{% endblock y_args %}
{% block z_args %}
__global const unsigned int* restrict ID,
__global const {{REAL}}* restrict Ex,
__global const {{REAL}}* restrict Ey,
__global const {{REAL}}* restrict Ez,
__global {{REAL}} *Hx,
__global {{REAL}} *Hy,
__global const {{REAL}}* restrict Hz,
__global {{REAL}} *PHI1,
__global {{REAL}} *PHI2,
__global const {{REAL}}* restrict RA,
__global const {{REAL}}* restrict RB,
__global const {{REAL}}* restrict RE,
__global const {{REAL}}* restrict RF,
{% endblock z_args %}

查看文件

@@ -0,0 +1,90 @@
// Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom
// Authors: Craig Warren, Antonis Giannopoulos, and John Hartley
//
// This file is part of gprMax.
//
// gprMax is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// gprMax is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with gprMax. If not, see <http://www.gnu.org/licenses/>.
// Macros for converting subscripts to linear index:
#define IDX3D_FIELDS(i, j, k) (i)*({{NY_FIELDS}})*({{NZ_FIELDS}})+(j)*({{NZ_FIELDS}})+(k)
#define IDX4D_SNAPS(p, i, j, k) (p)*({{NX_SNAPS}})*({{NY_SNAPS}})*({{NZ_SNAPS}})+(i)*({{NY_SNAPS}})*({{NZ_SNAPS}})+(j)*({{NZ_SNAPS}})+(k)
////////////////////
// Store snapshot //
////////////////////
{{KERNEL}} void store_snapshot(int p,
int xs,
int xf,
int ys,
int yf,
int zs,
int zf,
int dx,
int dy,
int dz,{% filter indent(width=29) %}{% block snap_args %}{% endblock snap_args %}{% endfilter %}{
// This function stores field values for a snapshot.
//
// Args:
// p: Snapshot number
// xs, xf, ys, yf, xs, xf: Start and finish cell coordinates for snapshot
// dx, dy, dz: Sampling interval in cell coordinates for snapshot
// E, H: Access to field component arrays
// snapEx, snapEy, snapEz, snapHx, snapHy, snapHz: Access to arrays to store snapshots
// Obtain the linear index corresponding to the current thread
{% block threadidx %}{% endblock threadidx %}
// Convert the linear index to subscripts for 4D SNAPS array
int i = (idx % ({{NX_SNAPS}} * {{NY_SNAPS}} * {{NZ_SNAPS}})) / ({{NY_SNAPS}} * {{NZ_SNAPS}});
int j = ((idx % ({{NX_SNAPS}} * {{NY_SNAPS}} * {{NZ_SNAPS}})) % ({{NY_SNAPS}} * {{NZ_SNAPS}})) / {{NZ_SNAPS}};
int k = ((idx % ({{NX_SNAPS}} * {{NY_SNAPS}} * {{NZ_SNAPS}})) % ({{NY_SNAPS}} * {{NZ_SNAPS}})) % {{NZ_SNAPS}};
// Subscripts for field arrays
int ii, jj, kk;
if (i >= xs && i < xf && j >= ys && j < yf && k >= zs && k < zf) {
// Increment subscripts for field array to account for spatial sampling of snapshot
ii = (xs + i) * dx;
jj = (ys + j) * dy;
kk = (zs + k) * dz;
// The electric field component value at a point comes from an average of
// the 4 electric field component values in that cell
snapEx[IDX4D_SNAPS(p,i,j,k)] = (Ex[IDX3D_FIELDS(ii,jj,kk)] +
Ex[IDX3D_FIELDS(ii,jj+1,kk)] +
Ex[IDX3D_FIELDS(ii,jj,kk+1)] +
Ex[IDX3D_FIELDS(ii,jj+1,kk+1)]) / 4;
snapEy[IDX4D_SNAPS(p,i,j,k)] = (Ey[IDX3D_FIELDS(ii,jj,kk)] +
Ey[IDX3D_FIELDS(ii+1,jj,kk)] +
Ey[IDX3D_FIELDS(ii,jj,kk+1)] +
Ey[IDX3D_FIELDS(ii+1,jj,kk+1)]) / 4;
snapEz[IDX4D_SNAPS(p,i,j,k)] = (Ez[IDX3D_FIELDS(ii,jj,kk)] +
Ez[IDX3D_FIELDS(ii+1,jj,kk)] +
Ez[IDX3D_FIELDS(ii,jj+1,kk)] +
Ez[IDX3D_FIELDS(ii+1,jj+1,kk)]) / 4;
// The magnetic field component value at a point comes from average of
// 2 magnetic field component values in that cell and the following cell
snapHx[IDX4D_SNAPS(p,i,j,k)] = (Hx[IDX3D_FIELDS(ii,jj,kk)] +
Hx[IDX3D_FIELDS(ii+1,jj,kk)]) / 2;
snapHy[IDX4D_SNAPS(p,i,j,k)] = (Hy[IDX3D_FIELDS(ii,jj,kk)] +
Hy[IDX3D_FIELDS(ii,jj+1,kk)]) / 2;
snapHz[IDX4D_SNAPS(p,i,j,k)] = (Hz[IDX3D_FIELDS(ii,jj,kk)] +
Hz[IDX3D_FIELDS(ii,jj,kk+1)]) / 2;
}
}

查看文件

@@ -0,0 +1,22 @@
{% extends "snapshots_base.tmpl" %}
{% block threadidx %}
int idx = blockIdx.x * blockDim.x + threadIdx.x;
{% endblock threadidx %}
{% block snap_args %}
const {{REAL}}* __restrict__ Ex,
const {{REAL}}* __restrict__ Ey,
const {{REAL}}* __restrict__ Ez,
const {{REAL}}* __restrict__ Hx,
const {{REAL}}* __restrict__ Hy,
const {{REAL}}* __restrict__ Hz,
{{REAL}} *snapEx,
{{REAL}} *snapEy,
{{REAL}} *snapEz,
{{REAL}} *snapHx,
{{REAL}} *snapHy,
{{REAL}} *snapHz
{% endblock snap_args %}

查看文件

@@ -0,0 +1,23 @@
{% extends "snapshots_base.tmpl" %}
{% block threadidx %}
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) +
get_global_id(1) * get_global_size(0) + get_global_id(0);
{% endblock threadidx %}
{% block snap_args %}
__global const {{REAL}}* restrict Ex,
__global const {{REAL}}* restrict Ey,
__global const {{REAL}}* restrict Ez,
__global const {{REAL}}* restrict Hx,
__global const {{REAL}}* restrict Hy,
__global const {{REAL}}* restrict Hz,
__global {{REAL}} *snapEx,
__global {{REAL}} *snapEy,
__global {{REAL}} *snapEz,
__global {{REAL}} *snapHx,
__global {{REAL}} *snapHy,
__global {{REAL}} *snapHz)
{% endblock snap_args %}

查看文件

@@ -0,0 +1,217 @@
// Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom
// Authors: Craig Warren, Antonis Giannopoulos, and John Hartley
//
// This file is part of gprMax.
//
// gprMax is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// gprMax is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with gprMax. If not, see <http://www.gnu.org/licenses/>.
// Macros for converting subscripts to linear index:
#define IDX2D_MAT(m, n) (m)*({{NY_MATCOEFFS}})+(n)
#define IDX2D_SRCINFO(m, n) (m)*{{NY_SRCINFO}}+(n)
#define IDX2D_SRCWAVES(m, n) (m)*({{NY_SRCWAVES}})+(n)
#define IDX3D_FIELDS(i, j, k) (i)*({{NY_FIELDS}})*({{NZ_FIELDS}})+(j)*({{NZ_FIELDS}})+(k)
#define IDX4D_ID(p, i, j, k) (p)*({{NX_ID}})*({{NY_ID}})*({{NZ_ID}})+(i)*({{NY_ID}})*({{NZ_ID}})+(j)*({{NZ_ID}})+(k)
// Material coefficients (read-only) stored in constant memory of compute device
{% block constmem %}{% endblock constmem %}
///////////////////////////////////////////
// Hertzian dipole electric field update //
///////////////////////////////////////////
{{KERNEL}} void update_hertzian_dipole(int NHERTZDIPOLE,
int iteration,
{{REAL}} dx,
{{REAL}} dy,
{{REAL}} dz,{% filter indent(width=37) %}{% block electric_source_args %}{% endblock electric_source_args %}{% endfilter %}{
// This function updates electric field values for Hertzian dipole sources.
//
// Args:
// NHERTZDIPOLE: Total number of Hertzian dipoles in the model
// iteration: Iteration number of simulation
// dx, dy, dz: Spatial discretisations
// srcinfo1: Source cell coordinates and polarisation information
// srcinfo2: Other source information, e.g. length, resistance etc...
// srcwaveforms: Source waveform values
// ID, E: Access to ID and field component arrays
// Obtain the linear index corresponding to the current thread and use for each receiver
{% block threadidx %}{% endblock threadidx %}
if (src < NHERTZDIPOLE) {
{{REAL}} dl;
int i, j, k, polarisation;
i = srcinfo1[IDX2D_SRCINFO(src,0)];
j = srcinfo1[IDX2D_SRCINFO(src,1)];
k = srcinfo1[IDX2D_SRCINFO(src,2)];
polarisation = srcinfo1[IDX2D_SRCINFO(src,3)];
dl = srcinfo2[src];
// 'x' polarised source
if (polarisation == 0) {
int materialEx = ID[IDX4D_ID(0,i,j,k)];
Ex[IDX3D_FIELDS(i,j,k)] = Ex[IDX3D_FIELDS(i,j,k)] - updatecoeffsE[IDX2D_MAT(materialEx,4)] *
srcwaveforms[IDX2D_SRCWAVES(src,iteration)] * dl * (1 / (dx * dy * dz));
}
// 'y' polarised source
else if (polarisation == 1) {
int materialEy = ID[IDX4D_ID(1,i,j,k)];
Ey[IDX3D_FIELDS(i,j,k)] = Ey[IDX3D_FIELDS(i,j,k)] - updatecoeffsE[IDX2D_MAT(materialEy,4)] *
srcwaveforms[IDX2D_SRCWAVES(src,iteration)] * dl * (1 / (dx * dy * dz));
}
// 'z' polarised source
else if (polarisation == 2) {
int materialEz = ID[IDX4D_ID(2,i,j,k)];
Ez[IDX3D_FIELDS(i,j,k)] = Ez[IDX3D_FIELDS(i,j,k)] - updatecoeffsE[IDX2D_MAT(materialEz,4)] *
srcwaveforms[IDX2D_SRCWAVES(src,iteration)] * dl * (1 / (dx * dy * dz));
}
}
}
///////////////////////////////////////////
// Magnetic dipole magnetic field update //
///////////////////////////////////////////
{{KERNEL}} void update_magnetic_dipole(int NMAGDIPOLE,
int iteration,
{{REAL}} dx,
{{REAL}} dy,
{{REAL}} dz,{% filter indent(width=37) %}{% block magnetic_source_args %}{% endblock magnetic_source_args %}{% endfilter %}{
// This function updates magnetic field values for magnetic dipole sources.
//
// Args:
// NMAGDIPOLE: Total number of magnetic dipoles in the model
// iteration: Iteration number of simulation
// dx, dy, dz: Spatial discretisations
// srcinfo1: Source cell coordinates and polarisation information
// srcinfo2: Other source information, e.g. length, resistance etc...
// srcwaveforms: Source waveform values
// ID, H: Access to ID and field component arrays
// Obtain the linear index corresponding to the current thread and use for each receiver
{{self.threadidx()}}
if (src < NMAGDIPOLE) {
int i, j, k, polarisation;
i = srcinfo1[IDX2D_SRCINFO(src,0)];
j = srcinfo1[IDX2D_SRCINFO(src,1)];
k = srcinfo1[IDX2D_SRCINFO(src,2)];
polarisation = srcinfo1[IDX2D_SRCINFO(src,3)];
// 'x' polarised source
if (polarisation == 0) {
int materialHx = ID[IDX4D_ID(3,i,j,k)];
Hx[IDX3D_FIELDS(i,j,k)] = Hx[IDX3D_FIELDS(i,j,k)] - updatecoeffsH[IDX2D_MAT(materialHx,4)] *
srcwaveforms[IDX2D_SRCWAVES(src,iteration)] * (1 / (dx * dy * dz));
}
// 'y' polarised source
else if (polarisation == 1) {
int materialHy = ID[IDX4D_ID(4,i,j,k)];
Hy[IDX3D_FIELDS(i,j,k)] = Hy[IDX3D_FIELDS(i,j,k)] - updatecoeffsH[IDX2D_MAT(materialHy,4)] *
srcwaveforms[IDX2D_SRCWAVES(src,iteration)] * (1 / (dx * dy * dz));
}
// 'z' polarised source
else if (polarisation == 2) {
int materialHz = ID[IDX4D_ID(5,i,j,k)];
Hz[IDX3D_FIELDS(i,j,k)] = Hz[IDX3D_FIELDS(i,j,k)] - updatecoeffsH[IDX2D_MAT(materialHz,4)] *
srcwaveforms[IDX2D_SRCWAVES(src,iteration)] * (1 / (dx * dy * dz));
}
}
}
//////////////////////////////////////////
// Voltage source electric field update //
//////////////////////////////////////////
{{KERNEL}} void update_voltage_source(int NVOLTSRC,
int iteration,
{{REAL}} dx,
{{REAL}} dy,
{{REAL}} dz,{% filter indent(width=36) %}{{self.electric_source_args()}}{% endfilter %}{
// This function updates electric field values for voltage sources.
//
// Args:
// NVOLTSRC: Total number of voltage sources in the model
// iteration: Iteration number of simulation
// dx, dy, dz: Spatial discretisations
// srcinfo1: Source cell coordinates and polarisation information
// srcinfo2: Other source information, e.g. length, resistance etc...
// srcwaveforms: Source waveform values
// ID, E: Access to ID and field component arrays
// Obtain the linear index corresponding to the current thread and use for each receiver
{{self.threadidx()}}
if (src < NVOLTSRC) {
{{REAL}} resistance;
int i, j, k, polarisation;
i = srcinfo1[IDX2D_SRCINFO(src,0)];
j = srcinfo1[IDX2D_SRCINFO(src,1)];
k = srcinfo1[IDX2D_SRCINFO(src,2)];
polarisation = srcinfo1[IDX2D_SRCINFO(src,3)];
resistance = srcinfo2[src];
// 'x' polarised source
if (polarisation == 0) {
if (resistance != 0) {
int materialEx = ID[IDX4D_ID(0,i,j,k)];
Ex[IDX3D_FIELDS(i,j,k)] = Ex[IDX3D_FIELDS(i,j,k)] - updatecoeffsE[IDX2D_MAT(materialEx,4)] *
srcwaveforms[IDX2D_SRCWAVES(src,iteration)] * (1 / (resistance * dy * dz));
}
else {
Ex[IDX3D_FIELDS(i,j,k)] = -1 * srcwaveforms[IDX2D_SRCWAVES(src,iteration)] / dx;
}
}
// 'y' polarised source
else if (polarisation == 1) {
if (resistance != 0) {
int materialEy = ID[IDX4D_ID(1,i,j,k)];
Ey[IDX3D_FIELDS(i,j,k)] = Ey[IDX3D_FIELDS(i,j,k)] - updatecoeffsE[IDX2D_MAT(materialEy,4)] *
srcwaveforms[IDX2D_SRCWAVES(src,iteration)] * (1 / (resistance * dx * dz));
}
else {
Ey[IDX3D_FIELDS(i,j,k)] = -1 * srcwaveforms[IDX2D_SRCWAVES(src,iteration)] / dy;
}
}
// 'z' polarised source
else if (polarisation == 2) {
if (resistance != 0) {
int materialEz = ID[IDX4D_ID(2,i,j,k)];
Ez[IDX3D_FIELDS(i,j,k)] = Ez[IDX3D_FIELDS(i,j,k)] - updatecoeffsE[IDX2D_MAT(materialEz,4)] *
srcwaveforms[IDX2D_SRCWAVES(src,iteration)] * (1 / (resistance * dx * dy));
}
else {
Ez[IDX3D_FIELDS(i,j,k)] = -1 * srcwaveforms[IDX2D_SRCWAVES(src,iteration)] / dz;
}
}
}
}

查看文件

@@ -0,0 +1,34 @@
{% extends "source_updates_base.tmpl" %}
{% block constmem %}
__device__ __constant__ {{REAL}} updatecoeffsE[{{N_updatecoeffsE}}];
__device__ __constant__ {{REAL}} updatecoeffsH[{{N_updatecoeffsH}}];
{% endblock constmem %}
{% block threadidx %}
int src = blockIdx.x * blockDim.x + threadIdx.x;
{% endblock threadidx %}
{% block electric_source_args %}
const int* __restrict__ srcinfo1,
const {{REAL}}* __restrict__ srcinfo2,
const {{REAL}}* __restrict__ srcwaveforms,
const unsigned int* __restrict__ ID,
{{REAL}} *Ex,
{{REAL}} *Ey,
{{REAL}} *Ez)
{% endblock electric_source_args %}
{% block magnetic_source_args %}
const int* __restrict__ srcinfo1,
const {{REAL}}* __restrict__ srcinfo2,
const {{REAL}}* __restrict__ srcwaveforms,
const unsigned int* __restrict__ ID,
{{REAL}} *Hx,
{{REAL}} *Hy,
{{REAL}} *Hz)
{% endblock magnetic_source_args %}

查看文件

@@ -0,0 +1,46 @@
{% extends "source_updates_base.tmpl" %}
{% block constmem %}
__constant {{REAL}} updatecoeffsE[{{N_updatecoeffsE}}] =
{
{% for i in updatecoeffsE %}
{{i}},
{% endfor %}
};
__constant {{REAL}} updatecoeffsH[{{N_updatecoeffsH}}] =
{
{% for i in updatecoeffsH %}
{{i}},
{% endfor %}
};
{% endblock constmem %}
{% block threadidx %}
int src = get_global_id(2) * get_global_size(0) * get_global_size(1) +
get_global_id(1) * get_global_size(0) + get_global_id(0);
{% endblock threadidx %}
{% block electric_source_args %}
__global const int* restrict srcinfo1,
__global const {{REAL}}* restrict srcinfo2,
__global const {{REAL}}* restrict srcwaveforms,
__global const unsigned int* restrict ID,
__global {{REAL}} *Ex,
__global {{REAL}} *Ey,
__global {{REAL}} *Ez)
{% endblock electric_source_args %}
{% block magnetic_source_args %}
__global const int* restrict srcinfo1,
__global const {{REAL}}* restrict srcinfo2,
__global const {{REAL}}* restrict srcwaveforms,
__global const unsigned int* restrict ID,
__global {{REAL}} *Hx,
__global {{REAL}} *Hy,
__global {{REAL}} *Hz)
{% endblock magnetic_source_args %}

查看文件

@@ -0,0 +1,50 @@
// Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom
// Authors: Craig Warren, Antonis Giannopoulos, and John Hartley
//
// This file is part of gprMax.
//
// gprMax is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// gprMax is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with gprMax. If not, see <http://www.gnu.org/licenses/>.
#define IDX2D_RXCOORDS(m,n) (m)*({{NY_RXCOORDS}}) + (n)
#define IDX3D_RXS(i,j,k) (i)*({{NY_RXS}})*({{NZ_RXS}}) + (j)*({{NZ_RXS}}) + (k)
#define IDX3D_FIELDS(i,j,k) (i)*({{NY_FIELDS}})*({{NZ_FIELDS}}) + (j)*({{NZ_FIELDS}}) + (k)
{{KERNEL}} void store_outputs(int NRX,
int iteration,{% filter indent(width=28) %}{% block rx_args %}{% endblock rx_args %}{% endfilter %}{
// This function stores field component values for every receiver in the model.
//
// Args:
// NRX: total number of receivers in the model.
// rxs: array to store field components for receivers - rows
// are field components; columns are iterations; pages are receiver
// Obtain linear index corresponding to the current work item
{% block threadidx %}{% endblock threadidx %}
int i,j,k;
if (rx < NRX) {
i = rxcoords[IDX2D_RXCOORDS(rx,0)];
j = rxcoords[IDX2D_RXCOORDS(rx,1)];
k = rxcoords[IDX2D_RXCOORDS(rx,2)];
rxs[IDX3D_RXS(0,iteration,rx)] = Ex[IDX3D_FIELDS(i,j,k)];
rxs[IDX3D_RXS(1,iteration,rx)] = Ey[IDX3D_FIELDS(i,j,k)];
rxs[IDX3D_RXS(2,iteration,rx)] = Ez[IDX3D_FIELDS(i,j,k)];
rxs[IDX3D_RXS(3,iteration,rx)] = Hx[IDX3D_FIELDS(i,j,k)];
rxs[IDX3D_RXS(4,iteration,rx)] = Hy[IDX3D_FIELDS(i,j,k)];
rxs[IDX3D_RXS(5,iteration,rx)] = Hz[IDX3D_FIELDS(i,j,k)];
}
}

查看文件

@@ -0,0 +1,18 @@
{% extends "store_outputs_base.tmpl" %}
{% block threadidx %}
int rx = blockIdx.x * blockDim.x + threadIdx.x;
{% endblock threadidx %}
{% block rx_args %}
const int* __restrict__ rxcoords,
{{REAL}} *rxs,
const {{REAL}}* __restrict__ Ex,
const {{REAL}}* __restrict__ Ey,
const {{REAL}}* __restrict__ Ez,
const {{REAL}}* __restrict__ Hx,
const {{REAL}}* __restrict__ Hy,
const {{REAL}}* __restrict__ Hz)
{% endblock rx_args %}

查看文件

@@ -0,0 +1,19 @@
{% extends "store_outputs_base.tmpl" %}
{% block threadidx %}
int rx = get_global_id(2) * get_global_size(0) * get_global_size(1) +
get_global_id(1) * get_global_size(0) + get_global_id(0);
{% endblock threadidx %}
{% block rx_args %}
__global const int* restrict rxcoords,
__global {{REAL}} *rxs,
__global const {{REAL}}* restrict Ex,
__global const {{REAL}}* restrict Ey,
__global const {{REAL}}* restrict Ez,
__global const {{REAL}}* restrict Hx,
__global const {{REAL}}* restrict Hy,
__global const {{REAL}}* restrict Hz)
{% endblock rx_args %}

查看文件

@@ -0,0 +1,22 @@
{% block complex_header %}{% endblock complex_header %}
// Macros for converting subscripts to linear index
#define IDX2D_MAT(m, n) (m)*({{NY_MATCOEFFS}})+(n)
#define IDX2D_MATDISP(m, n) (m)*({{NY_MATDISPCOEFFS}})+(n)
#define IDX2D_R(m, n) (m)*(NY_R)+(n)
#define IDX2D_RXCOORDS(m,n) (m)*({{NY_RXCOORDS}}) + (n)
#define IDX2D_SRCINFO(m, n) (m)*{{NY_SRCINFO}}+(n)
#define IDX2D_SRCWAVES(m, n) (m)*({{NY_SRCWAVES}})+(n)
#define IDX3D_FIELDS(i, j, k) (i)*({{NY_FIELDS}})*({{NZ_FIELDS}})+(j)*({{NZ_FIELDS}})+(k)
#define IDX3D_RXS(i,j,k) (i)*({{NY_RXS}})*({{NZ_RXS}}) + (j)*({{NZ_RXS}}) + (k)
#define IDX4D_ID(p, i, j, k) (p)*({{NX_ID}})*({{NY_ID}})*({{NZ_ID}})+(i)*({{NY_ID}})*({{NZ_ID}})+(j)*({{NZ_ID}})+(k)
#define IDX4D_SNAPS(p, i, j, k) (p)*({{NX_SNAPS}})*({{NY_SNAPS}})*({{NZ_SNAPS}})+(i)*({{NY_SNAPS}})*({{NZ_SNAPS}})+(j)*({{NZ_SNAPS}})+(k)
#define IDX4D_T(p, i, j, k) (p)*({{NX_T}})*({{NY_T}})*({{NZ_T}})+(i)*({{NY_T}})*({{NZ_T}})+(j)*({{NZ_T}})+(k)
#define IDX4D_PHI1(p, i, j, k) (p)*(NX_PHI1)*(NY_PHI1)*(NZ_PHI1)+(i)*(NY_PHI1)*(NZ_PHI1)+(j)*(NZ_PHI1)+(k)
#define IDX4D_PHI2(p, i, j, k) (p)*(NX_PHI2)*(NY_PHI2)*(NZ_PHI2)+(i)*(NY_PHI2)*(NZ_PHI2)+(j)*(NZ_PHI2)+(k)
// Material coefficients (read-only) stored in constant memory of compute device
{% block constmem %}{% endblock constmem %}

查看文件

@@ -0,0 +1,11 @@
{% extends "knl_common_base.tmpl" %}
{% block complex_header %}
#include <pycuda-complex.hpp>
{% endblock complex_header %}
{% block constmem %}
__device__ __constant__ {{REAL}} updatecoeffsE[{{N_updatecoeffsE}}];
__device__ __constant__ {{REAL}} updatecoeffsH[{{N_updatecoeffsH}}];
{% endblock constmem %}

查看文件

@@ -0,0 +1,22 @@
{% extends "knl_common_base.tmpl" %}
{% block complex_header %}
#include <pyopencl-complex.h>
{% endblock complex_header %}
{% block constmem %}
__constant {{REAL}} updatecoeffsE[{{N_updatecoeffsE}}] =
{
{% for i in updatecoeffsE %}
{{i}},
{% endfor %}
};
__constant {{REAL}} updatecoeffsH[{{N_updatecoeffsH}}] =
{
{% for i in updatecoeffsH %}
{{i}},
{% endfor %}
};
{% endblock constmem %}

查看文件

@@ -0,0 +1,233 @@
# Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom
# Authors: Craig Warren, Antonis Giannopoulos, and John Hartley
#
# This file is part of gprMax.
#
# gprMax is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# gprMax is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with gprMax. If not, see <http://www.gnu.org/licenses/>.
from string import Template
update_electric = Template("""
// Electric field updates - normal materials.
//
// Args:
// NX, NY, NZ: Number of cells of the model domain.
// ID, E, H: Access to ID and field component arrays.
// Convert the linear index to subscripts for 3D field arrays
int x = i / ($NY_FIELDS * $NZ_FIELDS);
int y = (i % ($NY_FIELDS * $NZ_FIELDS)) / $NZ_FIELDS;
int z = (i % ($NY_FIELDS * $NZ_FIELDS)) % $NZ_FIELDS;
// Convert the linear index to subscripts for 4D material ID array
int x_ID = (i % ($NX_ID * $NY_ID * $NZ_ID)) / ($NY_ID * $NZ_ID);
int y_ID = ((i % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) / $NZ_ID;
int z_ID = ((i % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) % $NZ_ID;
// Ex component
if ((NY != 1 || NZ != 1) && x >= 0 && x < NX && y > 0 && y < NY && z > 0 && z < NZ) {
int materialEx = ID[IDX4D_ID(0,x_ID,y_ID,z_ID)];
Ex[IDX3D_FIELDS(x,y,z)] = updatecoeffsE[IDX2D_MAT(materialEx,0)] * Ex[IDX3D_FIELDS(x,y,z)] +
updatecoeffsE[IDX2D_MAT(materialEx,2)] * (Hz[IDX3D_FIELDS(x,y,z)] - Hz[IDX3D_FIELDS(x,y-1,z)]) -
updatecoeffsE[IDX2D_MAT(materialEx,3)] * (Hy[IDX3D_FIELDS(x,y,z)] - Hy[IDX3D_FIELDS(x,y,z-1)]);
}
// Ey component
if ((NX != 1 || NZ != 1) && x > 0 && x < NX && y >= 0 && y < NY && z > 0 && z < NZ) {
int materialEy = ID[IDX4D_ID(1,x_ID,y_ID,z_ID)];
Ey[IDX3D_FIELDS(x,y,z)] = updatecoeffsE[IDX2D_MAT(materialEy,0)] * Ey[IDX3D_FIELDS(x,y,z)] +
updatecoeffsE[IDX2D_MAT(materialEy,3)] * (Hx[IDX3D_FIELDS(x,y,z)] - Hx[IDX3D_FIELDS(x,y,z-1)]) -
updatecoeffsE[IDX2D_MAT(materialEy,1)] * (Hz[IDX3D_FIELDS(x,y,z)] - Hz[IDX3D_FIELDS(x-1,y,z)]);
}
// Ez component
if ((NX != 1 || NY != 1) && x > 0 && x < NX && y > 0 && y < NY && z >= 0 && z < NZ) {
int materialEz = ID[IDX4D_ID(2,x_ID,y_ID,z_ID)];
Ez[IDX3D_FIELDS(x,y,z)] = updatecoeffsE[IDX2D_MAT(materialEz,0)] * Ez[IDX3D_FIELDS(x,y,z)] +
updatecoeffsE[IDX2D_MAT(materialEz,1)] * (Hy[IDX3D_FIELDS(x,y,z)] - Hy[IDX3D_FIELDS(x-1,y,z)]) -
updatecoeffsE[IDX2D_MAT(materialEz,2)] * (Hx[IDX3D_FIELDS(x,y,z)] - Hx[IDX3D_FIELDS(x,y-1,z)]);
}
""")
update_magnetic = Template("""
// Magnetic field updates - normal materials.
//
// Args:
// NX, NY, NZ: Number of cells of the model domain.
// ID, E, H: Access to ID and field component arrays.
// Convert the linear index to subscripts for 3D field arrays
int x = i / ($NY_FIELDS * $NZ_FIELDS);
int y = (i % ($NY_FIELDS * $NZ_FIELDS)) / $NZ_FIELDS;
int z = (i % ($NY_FIELDS * $NZ_FIELDS)) % $NZ_FIELDS;
// Convert the linear index to subscripts for 4D material ID array
int x_ID = (i % ($NX_ID * $NY_ID * $NZ_ID)) / ($NY_ID * $NZ_ID);
int y_ID = ((i % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) / $NZ_ID;
int z_ID = ((i % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) % $NZ_ID;
// Hx component
if (NX != 1 && x > 0 && x < NX && y >= 0 && y < NY && z >= 0 && z < NZ) {
int materialHx = ID[IDX4D_ID(3,x_ID,y_ID,z_ID)];
Hx[IDX3D_FIELDS(x,y,z)] = updatecoeffsH[IDX2D_MAT(materialHx,0)] * Hx[IDX3D_FIELDS(x,y,z)] -
updatecoeffsH[IDX2D_MAT(materialHx,2)] * (Ez[IDX3D_FIELDS(x,y+1,z)] - Ez[IDX3D_FIELDS(x,y,z)]) +
updatecoeffsH[IDX2D_MAT(materialHx,3)] * (Ey[IDX3D_FIELDS(x,y,z+1)] - Ey[IDX3D_FIELDS(x,y,z)]);
}
// Hy component
if (NY != 1 && x >= 0 && x < NX && y > 0 && y < NY && z >= 0 && z < NZ) {
int materialHy = ID[IDX4D_ID(4,x_ID,y_ID,z_ID)];
Hy[IDX3D_FIELDS(x,y,z)] = updatecoeffsH[IDX2D_MAT(materialHy,0)] * Hy[IDX3D_FIELDS(x,y,z)] -
updatecoeffsH[IDX2D_MAT(materialHy,3)] * (Ex[IDX3D_FIELDS(x,y,z+1)] - Ex[IDX3D_FIELDS(x,y,z)]) +
updatecoeffsH[IDX2D_MAT(materialHy,1)] * (Ez[IDX3D_FIELDS(x+1,y,z)] - Ez[IDX3D_FIELDS(x,y,z)]);
}
// Hz component
if (NZ != 1 && x >= 0 && x < NX && y >= 0 && y < NY && z > 0 && z < NZ) {
int materialHz = ID[IDX4D_ID(5,x_ID,y_ID,z_ID)];
Hz[IDX3D_FIELDS(x,y,z)] = updatecoeffsH[IDX2D_MAT(materialHz,0)] * Hz[IDX3D_FIELDS(x,y,z)] -
updatecoeffsH[IDX2D_MAT(materialHz,1)] * (Ey[IDX3D_FIELDS(x+1,y,z)] - Ey[IDX3D_FIELDS(x,y,z)]) +
updatecoeffsH[IDX2D_MAT(materialHz,2)] * (Ex[IDX3D_FIELDS(x,y+1,z)] - Ex[IDX3D_FIELDS(x,y,z)]);
}
""")
update_electric_dispersive_A = Template("""
// Electric field updates - dispersive materials - part A of updates to electric
// field values when dispersive materials
// (with multiple poles) are present.
//
// Args:
// NX, NY, NZ: Number of cells of the model domain.
// MAXPOLES: Maximum number of dispersive material poles present in model.
// updatedispersivecoeffs, T, ID, E, H: Access to update coefficients,
// dispersive, ID and field
// component arrays.
// Convert the linear index to subscripts for 3D field arrays
int x = i / ($NY_FIELDS * $NZ_FIELDS);
int y = (i % ($NY_FIELDS * $NZ_FIELDS)) / $NZ_FIELDS;
int z = (i % ($NY_FIELDS * $NZ_FIELDS)) % $NZ_FIELDS;
// Convert the linear index to subscripts for 4D material ID array
int x_ID = (i % ($NX_ID * $NY_ID * $NZ_ID)) / ($NY_ID * $NZ_ID);
int y_ID = ((i % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) / $NZ_ID;
int z_ID = ((i % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) % $NZ_ID;
// Convert the linear index to subscripts for 4D dispersive array
int x_T = (i % ($NX_T * $NY_T * $NZ_T)) / ($NY_T * $NZ_T);
int y_T = ((i % ($NX_T * $NY_T * $NZ_T)) % ($NY_T * $NZ_T)) / $NZ_T;
int z_T = ((i % ($NX_T * $NY_T * $NZ_T)) % ($NY_T * $NZ_T)) % $NZ_T;
// Ex component
if ((NY != 1 || NZ != 1) && x >= 0 && x < NX && y > 0 && y < NY && z > 0 && z < NZ) {
int materialEx = ID[IDX4D_ID(0,x_ID,y_ID,z_ID)];
$REAL phi = 0;
for (int pole = 0; pole < MAXPOLES; pole++) {
phi = phi + updatecoeffsdispersive[IDX2D_MATDISP(materialEx,pole*3)]$REALFUNC * Tx[IDX4D_T(pole,x_T,y_T,z_T)]$REALFUNC;
Tx[IDX4D_T(pole,x_T,y_T,z_T)] = updatecoeffsdispersive[IDX2D_MATDISP(materialEx,1+(pole*3))] * Tx[IDX4D_T(pole,x_T,y_T,z_T)] +
updatecoeffsdispersive[IDX2D_MATDISP(materialEx,2+(pole*3))] * Ex[IDX3D_FIELDS(x,y,z)];
}
Ex[IDX3D_FIELDS(x,y,z)] = updatecoeffsE[IDX2D_MAT(materialEx,0)] * Ex[IDX3D_FIELDS(x,y,z)] +
updatecoeffsE[IDX2D_MAT(materialEx,2)] * (Hz[IDX3D_FIELDS(x,y,z)] - Hz[IDX3D_FIELDS(x,y-1,z)]) -
updatecoeffsE[IDX2D_MAT(materialEx,3)] * (Hy[IDX3D_FIELDS(x,y,z)] - Hy[IDX3D_FIELDS(x,y,z-1)]) -
updatecoeffsE[IDX2D_MAT(materialEx,4)] * phi;
}
// Ey component
if ((NX != 1 || NZ != 1) && x > 0 && x < NX && y >= 0 && y < NY && z > 0 && z < NZ) {
int materialEy = ID[IDX4D_ID(1,x_ID,y_ID,z_ID)];
$REAL phi = 0;
for (int pole = 0; pole < MAXPOLES; pole++) {
phi = phi + updatecoeffsdispersive[IDX2D_MATDISP(materialEy,pole*3)]$REALFUNC * Ty[IDX4D_T(pole,x_T,y_T,z_T)]$REALFUNC;
Ty[IDX4D_T(pole,x_T,y_T,z_T)] = updatecoeffsdispersive[IDX2D_MATDISP(materialEy,1+(pole*3))] * Ty[IDX4D_T(pole,x_T,y_T,z_T)] +
updatecoeffsdispersive[IDX2D_MATDISP(materialEy,2+(pole*3))] * Ey[IDX3D_FIELDS(x,y,z)];
}
Ey[IDX3D_FIELDS(x,y,z)] = updatecoeffsE[IDX2D_MAT(materialEy,0)] * Ey[IDX3D_FIELDS(x,y,z)] +
updatecoeffsE[IDX2D_MAT(materialEy,3)] * (Hx[IDX3D_FIELDS(x,y,z)] - Hx[IDX3D_FIELDS(x,y,z-1)]) -
updatecoeffsE[IDX2D_MAT(materialEy,1)] * (Hz[IDX3D_FIELDS(x,y,z)] - Hz[IDX3D_FIELDS(x-1,y,z)]) -
updatecoeffsE[IDX2D_MAT(materialEy,4)] * phi;
}
// Ez component
if ((NX != 1 || NY != 1) && x > 0 && x < NX && y > 0 && y < NY && z >= 0 && z < NZ) {
int materialEz = ID[IDX4D_ID(2,x_ID,y_ID,z_ID)];
$REAL phi = 0;
for (int pole = 0; pole < MAXPOLES; pole++) {
phi = phi + updatecoeffsdispersive[IDX2D_MATDISP(materialEz,pole*3)]$REALFUNC * Tz[IDX4D_T(pole,x_T,y_T,z_T)]$REALFUNC;
Tz[IDX4D_T(pole,x_T,y_T,z_T)] = updatecoeffsdispersive[IDX2D_MATDISP(materialEz,1+(pole*3))] * Tz[IDX4D_T(pole,x_T,y_T,z_T)] +
updatecoeffsdispersive[IDX2D_MATDISP(materialEz,2+(pole*3))] * Ez[IDX3D_FIELDS(x,y,z)];
}
Ez[IDX3D_FIELDS(x,y,z)] = updatecoeffsE[IDX2D_MAT(materialEz,0)] * Ez[IDX3D_FIELDS(x,y,z)] +
updatecoeffsE[IDX2D_MAT(materialEz,1)] * (Hy[IDX3D_FIELDS(x,y,z)] - Hy[IDX3D_FIELDS(x-1,y,z)]) -
updatecoeffsE[IDX2D_MAT(materialEz,2)] * (Hx[IDX3D_FIELDS(x,y,z)] - Hx[IDX3D_FIELDS(x,y-1,z)]) -
updatecoeffsE[IDX2D_MAT(materialEz,4)] * phi;
}
""")
update_electric_dispersive_B = Template("""
// Electric field updates - dispersive materials - part B of updates to electric
// field values when dispersive materials
// (with multiple poles) are present.
//
// Args:
// NX, NY, NZ: Number of cells of the model domain.
// MAXPOLES: Maximum number of dispersive material poles present in model.
// updatedispersivecoeffs, T, ID, E, H: Access to update coefficients,
// dispersive, ID and field
// component arrays.
// Convert the linear index to subscripts for 3D field arrays
int x = i / ($NY_FIELDS * $NZ_FIELDS);
int y = (i % ($NY_FIELDS * $NZ_FIELDS)) / $NZ_FIELDS;
int z = (i % ($NY_FIELDS * $NZ_FIELDS)) % $NZ_FIELDS;
// Convert the linear index to subscripts for 4D material ID array
int x_ID = (i % ($NX_ID * $NY_ID * $NZ_ID)) / ($NY_ID * $NZ_ID);
int y_ID = ((i % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) / $NZ_ID;
int z_ID = ((i % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) % $NZ_ID;
// Convert the linear index to subscripts for 4D dispersive array
int x_T = (i % ($NX_T * $NY_T * $NZ_T)) / ($NY_T * $NZ_T);
int y_T = ((i % ($NX_T * $NY_T * $NZ_T)) % ($NY_T * $NZ_T)) / $NZ_T;
int z_T = ((i % ($NX_T * $NY_T * $NZ_T)) % ($NY_T * $NZ_T)) % $NZ_T;
// Ex component
if ((NY != 1 || NZ != 1) && x >= 0 && x < NX && y > 0 && y < NY && z > 0 && z < NZ) {
int materialEx = ID[IDX4D_ID(0,x_ID,y_ID,z_ID)];
for (int pole = 0; pole < MAXPOLES; pole++) {
Tx[IDX4D_T(pole,x_T,y_T,z_T)] = Tx[IDX4D_T(pole,x_T,y_T,z_T)] -
updatecoeffsdispersive[IDX2D_MATDISP(materialEx,2+(pole*3))] * Ex[IDX3D_FIELDS(x,y,z)];
}
}
// Ey component
if ((NX != 1 || NZ != 1) && x > 0 && x < NX && y >= 0 && y < NY && z > 0 && z < NZ) {
int materialEy = ID[IDX4D_ID(1,x_ID,y_ID,z_ID)];
for (int pole = 0; pole < MAXPOLES; pole++) {
Ty[IDX4D_T(pole,x_T,y_T,z_T)] = Ty[IDX4D_T(pole,x_T,y_T,z_T)] -
updatecoeffsdispersive[IDX2D_MATDISP(materialEy,2+(pole*3))] * Ey[IDX3D_FIELDS(x,y,z)];
}
}
// Ez component
if ((NX != 1 || NY != 1) && x > 0 && x < NX && y > 0 && y < NY && z >= 0 && z < NZ) {
int materialEz = ID[IDX4D_ID(2,x_ID,y_ID,z_ID)];
for (int pole = 0; pole < MAXPOLES; pole++) {
Tz[IDX4D_T(pole,x_T,y_T,z_T)] = Tz[IDX4D_T(pole,x_T,y_T,z_T)] -
updatecoeffsdispersive[IDX2D_MATDISP(materialEz,2+(pole*3))] * Ez[IDX3D_FIELDS(x,y,z)];
}
}
""")

文件差异内容过多而无法显示 加载差异

文件差异内容过多而无法显示 加载差异

查看文件

@@ -0,0 +1,72 @@
# Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom
# Authors: Craig Warren, Antonis Giannopoulos, and John Hartley
#
# This file is part of gprMax.
#
# gprMax is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# gprMax is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with gprMax. If not, see <http://www.gnu.org/licenses/>.
from string import Template
store_snapshot = Template("""
// Stores field values for a snapshot.
//
// Args:
// p: Snapshot number.
// xs, xf, ys, yf, xs, xf: Start and finish cell coordinates for snapshot.
// dx, dy, dz: Sampling interval in cell coordinates for snapshot.
// E, H: Access to field component arrays.
// snapEx, snapEy, snapEz, snapHx, snapHy, snapHz: Access to arrays to store snapshots.
// Convert the linear index to subscripts for 4D SNAPS array
int x = (i % ($NX_SNAPS * $NY_SNAPS * $NZ_SNAPS)) / ($NY_SNAPS * $NZ_SNAPS);
int y = ((i % ($NX_SNAPS * $NY_SNAPS * $NZ_SNAPS)) % ($NY_SNAPS * $NZ_SNAPS)) / $NZ_SNAPS;
int z = ((i % ($NX_SNAPS * $NY_SNAPS * $NZ_SNAPS)) % ($NY_SNAPS * $NZ_SNAPS)) % $NZ_SNAPS;
// Subscripts for field arrays
int xx, yy, zz;
if (x >= xs && x < xf && y >= ys && y < yf && z >= zs && z < zf) {
// Increment subscripts for field array to account for spatial sampling of snapshot
xx = (xs + x) * dx;
yy = (ys + y) * dy;
zz = (zs + z) * dz;
// The electric field component value at a point comes from an average of
// the 4 electric field component values in that cell
snapEx[IDX4D_SNAPS(p,x,y,z)] = (Ex[IDX3D_FIELDS(xx,yy,zz)] +
Ex[IDX3D_FIELDS(xx,yy+1,zz)] +
Ex[IDX3D_FIELDS(xx,yy,zz+1)] +
Ex[IDX3D_FIELDS(xx,yy+1,zz+1)]) / 4;
snapEy[IDX4D_SNAPS(p,x,y,z)] = (Ey[IDX3D_FIELDS(xx,yy,zz)] +
Ey[IDX3D_FIELDS(xx+1,yy,zz)] +
Ey[IDX3D_FIELDS(xx,yy,zz+1)] +
Ey[IDX3D_FIELDS(xx+1,yy,zz+1)]) / 4;
snapEz[IDX4D_SNAPS(p,x,y,z)] = (Ez[IDX3D_FIELDS(xx,yy,zz)] +
Ez[IDX3D_FIELDS(xx+1,yy,zz)] +
Ez[IDX3D_FIELDS(xx,yy+1,zz)] +
Ez[IDX3D_FIELDS(xx+1,yy+1,zz)]) / 4;
// The magnetic field component value at a point comes from average of
// 2 magnetic field component values in that cell and the following cell
snapHx[IDX4D_SNAPS(p,x,y,z)] = (Hx[IDX3D_FIELDS(xx,yy,zz)] +
Hx[IDX3D_FIELDS(xx+1,yy,zz)]) / 2;
snapHy[IDX4D_SNAPS(p,x,y,z)] = (Hy[IDX3D_FIELDS(xx,yy,zz)] +
Hy[IDX3D_FIELDS(xx,yy+1,zz)]) / 2;
snapHz[IDX4D_SNAPS(p,x,y,z)] = (Hz[IDX3D_FIELDS(xx,yy,zz)] +
Hz[IDX3D_FIELDS(xx,yy,zz+1)]) / 2;
}
""")

查看文件

@@ -0,0 +1,173 @@
# Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom
# Authors: Craig Warren, Antonis Giannopoulos, and John Hartley
#
# This file is part of gprMax.
#
# gprMax is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# gprMax is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with gprMax. If not, see <http://www.gnu.org/licenses/>.
from string import Template
update_hertzian_dipole = Template("""
// Updates electric field values for Hertzian dipole sources.
//
// Args:
// NHERTZDIPOLE: Total number of Hertzian dipoles in the model.
// iteration: Iteration number of simulation.
// dx, dy, dz: Spatial discretisations.
// srcinfo1: Source cell coordinates and polarisation information.
// srcinfo2: Other source information, e.g. length, resistance etc...
// srcwaveforms: Source waveform values.
// ID, E: Access to ID and field component arrays.
if (i < NHERTZDIPOLE) {
$REAL dl;
int x, y, z, polarisation;
x = srcinfo1[IDX2D_SRCINFO(i,0)];
y = srcinfo1[IDX2D_SRCINFO(i,1)];
z = srcinfo1[IDX2D_SRCINFO(i,2)];
polarisation = srcinfo1[IDX2D_SRCINFO(i,3)];
dl = srcinfo2[i];
// 'x' polarised source
if (polarisation == 0) {
int materialEx = ID[IDX4D_ID(0,x,y,z)];
Ex[IDX3D_FIELDS(x,y,z)] = Ex[IDX3D_FIELDS(x,y,z)] - updatecoeffsE[IDX2D_MAT(materialEx,4)] *
srcwaveforms[IDX2D_SRCWAVES(i,iteration)] * dl * (1 / (dx * dy * dz));
}
// 'y' polarised source
else if (polarisation == 1) {
int materialEy = ID[IDX4D_ID(1,x,y,z)];
Ey[IDX3D_FIELDS(x,y,z)] = Ey[IDX3D_FIELDS(x,y,z)] - updatecoeffsE[IDX2D_MAT(materialEy,4)] *
srcwaveforms[IDX2D_SRCWAVES(i,iteration)] * dl * (1 / (dx * dy * dz));
}
// 'z' polarised source
else if (polarisation == 2) {
int materialEz = ID[IDX4D_ID(2,x,y,z)];
Ez[IDX3D_FIELDS(x,y,z)] = Ez[IDX3D_FIELDS(x,y,z)] - updatecoeffsE[IDX2D_MAT(materialEz,4)] *
srcwaveforms[IDX2D_SRCWAVES(i,iteration)] * dl * (1 / (dx * dy * dz));
}
}
""")
update_magnetic_dipole = Template("""
// Updates electric field values for Hertzian dipole sources.
//
// Args:
// NMAGDIPOLE: Total number of magnetic dipoles in the model.
// iteration: Iteration number of simulation.
// dx, dy, dz: Spatial discretisations.
// srcinfo1: Source cell coordinates and polarisation information.
// srcinfo2: Other source information, e.g. length, resistance etc...
// srcwaveforms: Source waveform values.
// ID, H: Access to ID and field component arrays.
if (i < NMAGDIPOLE) {
int x, y, z, polarisation;
x = srcinfo1[IDX2D_SRCINFO(i,0)];
y = srcinfo1[IDX2D_SRCINFO(i,1)];
z = srcinfo1[IDX2D_SRCINFO(i,2)];
polarisation = srcinfo1[IDX2D_SRCINFO(i,3)];
// 'x' polarised source
if (polarisation == 0) {
int materialHx = ID[IDX4D_ID(3,x,y,z)];
Hx[IDX3D_FIELDS(x,y,z)] = Hx[IDX3D_FIELDS(x,y,z)] - updatecoeffsH[IDX2D_MAT(materialHx,4)] *
srcwaveforms[IDX2D_SRCWAVES(i,iteration)] * (1 / (dx * dy * dz));
}
// 'y' polarised source
else if (polarisation == 1) {
int materialHy = ID[IDX4D_ID(4,x,y,z)];
Hy[IDX3D_FIELDS(x,y,z)] = Hy[IDX3D_FIELDS(x,y,z)] - updatecoeffsH[IDX2D_MAT(materialHy,4)] *
srcwaveforms[IDX2D_SRCWAVES(i,iteration)] * (1 / (dx * dy * dz));
}
// 'z' polarised source
else if (polarisation == 2) {
int materialHz = ID[IDX4D_ID(5,x,y,z)];
Hz[IDX3D_FIELDS(x,y,z)] = Hz[IDX3D_FIELDS(x,y,z)] - updatecoeffsH[IDX2D_MAT(materialHz,4)] *
srcwaveforms[IDX2D_SRCWAVES(i,iteration)] * (1 / (dx * dy * dz));
}
}
""")
update_voltage_source = Template("""
// Updates electric field values for voltage sources.
//
// Args:
// NVOLTSRC: Total number of voltage sources in the model.
// iteration: Iteration number of simulation.
// dx, dy, dz: Spatial discretisations.
// srcinfo1: Source cell coordinates and polarisation information.
// srcinfo2: Other source information, e.g. length, resistance etc...
// srcwaveforms: Source waveform values.
// ID, E: Access to ID and field component arrays.
if (i < NVOLTSRC) {
$REAL resistance;
int x, y, z, polarisation;
x = srcinfo1[IDX2D_SRCINFO(i,0)];
y = srcinfo1[IDX2D_SRCINFO(i,1)];
z = srcinfo1[IDX2D_SRCINFO(i,2)];
polarisation = srcinfo1[IDX2D_SRCINFO(i,3)];
resistance = srcinfo2[i];
// 'x' polarised source
if (polarisation == 0) {
if (resistance != 0) {
int materialEx = ID[IDX4D_ID(0,x,y,z)];
Ex[IDX3D_FIELDS(x,y,z)] = Ex[IDX3D_FIELDS(x,y,z)] - updatecoeffsE[IDX2D_MAT(materialEx,4)] *
srcwaveforms[IDX2D_SRCWAVES(i,iteration)] * (1 / (resistance * dy * dz));
}
else {
Ex[IDX3D_FIELDS(x,y,z)] = -1 * srcwaveforms[IDX2D_SRCWAVES(i,iteration)] / dx;
}
}
// 'y' polarised source
else if (polarisation == 1) {
if (resistance != 0) {
int materialEy = ID[IDX4D_ID(1,x,y,z)];
Ey[IDX3D_FIELDS(x,y,z)] = Ey[IDX3D_FIELDS(x,y,z)] - updatecoeffsE[IDX2D_MAT(materialEy,4)] *
srcwaveforms[IDX2D_SRCWAVES(i,iteration)] * (1 / (resistance * dx * dz));
}
else {
Ey[IDX3D_FIELDS(x,y,z)] = -1 * srcwaveforms[IDX2D_SRCWAVES(i,iteration)] / dy;
}
}
// 'z' polarised source
else if (polarisation == 2) {
if (resistance != 0) {
int materialEz = ID[IDX4D_ID(2,x,y,z)];
Ez[IDX3D_FIELDS(x,y,z)] = Ez[IDX3D_FIELDS(x,y,z)] - updatecoeffsE[IDX2D_MAT(materialEz,4)] *
srcwaveforms[IDX2D_SRCWAVES(i,iteration)] * (1 / (resistance * dx * dy));
}
else {
Ez[IDX3D_FIELDS(x,y,z)] = -1 * srcwaveforms[IDX2D_SRCWAVES(i,iteration)] / dz;
}
}
}
""")

查看文件

@@ -0,0 +1,42 @@
# Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom
# Authors: Craig Warren, Antonis Giannopoulos, and John Hartley
#
# This file is part of gprMax.
#
# gprMax is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# gprMax is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with gprMax. If not, see <http://www.gnu.org/licenses/>.
from string import Template
store_outputs = Template("""
// Stores field component values for every receiver in the model.
//
// Args:
// NRX: total number of receivers in the model.
// rxs: array to store field components for receivers - rows
// are field components; columns are iterations; pages are receiver.
if (i < NRX) {
int x, y, z;
x = rxcoords[IDX2D_RXCOORDS(i,0)];
y = rxcoords[IDX2D_RXCOORDS(i,1)];
z = rxcoords[IDX2D_RXCOORDS(i,2)];
rxs[IDX3D_RXS(0,iteration,i)] = Ex[IDX3D_FIELDS(x,y,z)];
rxs[IDX3D_RXS(1,iteration,i)] = Ey[IDX3D_FIELDS(x,y,z)];
rxs[IDX3D_RXS(2,iteration,i)] = Ez[IDX3D_FIELDS(x,y,z)];
rxs[IDX3D_RXS(3,iteration,i)] = Hx[IDX3D_FIELDS(x,y,z)];
rxs[IDX3D_RXS(4,iteration,i)] = Hy[IDX3D_FIELDS(x,y,z)];
rxs[IDX3D_RXS(5,iteration,i)] = Hz[IDX3D_FIELDS(x,y,z)];
}
""")

查看文件

@@ -56,7 +56,7 @@ def store_outputs(G):
tl.Itotal[iteration] = tl.current[tl.antpos] tl.Itotal[iteration] = tl.current[tl.antpos]
kernel_template_store_outputs = Template(""" knl_template_store_outputs = Template("""
// Macros for converting subscripts to linear index: // Macros for converting subscripts to linear index:
#define INDEX2D_RXCOORDS(m, n) (m)*($NY_RXCOORDS)+(n) #define INDEX2D_RXCOORDS(m, n) (m)*($NY_RXCOORDS)+(n)

查看文件

@@ -32,6 +32,7 @@ args_defaults = {'scenes': None,
'restart': None, 'restart': None,
'mpi': False, 'mpi': False,
'gpu': None, 'gpu': None,
'opencl': None,
'subgrid': False, 'subgrid': False,
'autotranslate': False, 'autotranslate': False,
'geometry_only': False, 'geometry_only': False,
@@ -67,6 +68,8 @@ help_msg = {'scenes': '(list, opt): List of the scenes to run the model. '
'performance section of the User Guide.', 'performance section of the User Guide.',
'gpu': '(list/bool, opt): Flag to use NVIDIA GPU or list of NVIDIA ' 'gpu': '(list/bool, opt): Flag to use NVIDIA GPU or list of NVIDIA '
'GPU device ID(s) for specific GPU card(s).', 'GPU device ID(s) for specific GPU card(s).',
'opencl': '(list/bool, opt): Flag to use OpenCL or list of OpenCL '
'device ID(s) for specific compute device(s).',
'subgrid': '(bool, opt): Flag to use sub-gridding.', 'subgrid': '(bool, opt): Flag to use sub-gridding.',
'autotranslate': '(bool, opt): For sub-gridding - auto translate ' 'autotranslate': '(bool, opt): For sub-gridding - auto translate '
'objects with main grid coordinates to their ' 'objects with main grid coordinates to their '
@@ -92,6 +95,7 @@ def run(scenes=args_defaults['scenes'],
restart=args_defaults['restart'], restart=args_defaults['restart'],
mpi=args_defaults['mpi'], mpi=args_defaults['mpi'],
gpu=args_defaults['gpu'], gpu=args_defaults['gpu'],
opencl=args_defaults['opencl'],
subgrid=args_defaults['subgrid'], subgrid=args_defaults['subgrid'],
autotranslate=args_defaults['autotranslate'], autotranslate=args_defaults['autotranslate'],
geometry_only=args_defaults['geometry_only'], geometry_only=args_defaults['geometry_only'],
@@ -112,6 +116,7 @@ def run(scenes=args_defaults['scenes'],
'restart': restart, 'restart': restart,
'mpi': mpi, 'mpi': mpi,
'gpu': gpu, 'gpu': gpu,
'opencl': opencl,
'subgrid': subgrid, 'subgrid': subgrid,
'autotranslate': autotranslate, 'autotranslate': autotranslate,
'geometry_only': geometry_only, 'geometry_only': geometry_only,
@@ -139,6 +144,8 @@ def cli():
help=help_msg['mpi']) help=help_msg['mpi'])
parser.add_argument('-gpu', type=int, action='append', nargs='*', parser.add_argument('-gpu', type=int, action='append', nargs='*',
help=help_msg['gpu']) help=help_msg['gpu'])
parser.add_argument('-opencl', type=int, action='append', nargs='*',
help=help_msg['opencl'])
parser.add_argument('--geometry-only', action='store_true', parser.add_argument('--geometry-only', action='store_true',
default=args_defaults['geometry_only'], default=args_defaults['geometry_only'],
help=help_msg['geometry_only']) help=help_msg['geometry_only'])
@@ -176,11 +183,11 @@ def run_main(args):
if args.spotpy: if args.spotpy:
context = SPOTPYContext() context = SPOTPYContext()
context.run(args.i) context.run(args.i)
# MPI running with (OpenMP/CUDA) # MPI running with (OpenMP/CUDA/OpenCL)
elif config.sim_config.args.mpi: elif config.sim_config.args.mpi:
context = MPIContext() context = MPIContext()
context.run() context.run()
# Standard running (OpenMP/CUDA) # Standard running (OpenMP/CUDA/OpenCL)
else: else:
context = Context() context = Context()
context.run() context.run()

查看文件

@@ -306,33 +306,74 @@ class CUDAGrid(FDTDGrid):
self.bpg = (int(np.ceil(((self.nx + 1) * (self.ny + 1) * self.bpg = (int(np.ceil(((self.nx + 1) * (self.ny + 1) *
(self.nz + 1)) / self.tpb[0])), 1, 1) (self.nz + 1)) / self.tpb[0])), 1, 1)
def htod_geometry_arrays(self): def htod_geometry_arrays(self, queue=None):
"""Initialise an array for cell edge IDs (ID) on GPU.""" """Initialise an array for cell edge IDs (ID) on compute device.
Args:
queue: pyopencl queue.
"""
if config.sim_config.general['solver'] == 'cuda':
import pycuda.gpuarray as gpuarray import pycuda.gpuarray as gpuarray
self.ID_dev = gpuarray.to_gpu(self.ID)
self.ID_gpu = gpuarray.to_gpu(self.ID) elif config.sim_config.general['solver'] == 'opencl':
import pyopencl.array as clarray
self.ID_dev = clarray.to_device(queue, self.ID)
def htod_field_arrays(self): def htod_field_arrays(self, queue=None):
"""Initialise geometry and field arrays on GPU.""" """Initialise field arrays on compute device.
Args:
queue: pyopencl queue.
"""
if config.sim_config.general['solver'] == 'cuda':
import pycuda.gpuarray as gpuarray import pycuda.gpuarray as gpuarray
self.Ex_dev = gpuarray.to_gpu(self.Ex)
self.Ey_dev = gpuarray.to_gpu(self.Ey)
self.Ez_dev = gpuarray.to_gpu(self.Ez)
self.Hx_dev = gpuarray.to_gpu(self.Hx)
self.Hy_dev = gpuarray.to_gpu(self.Hy)
self.Hz_dev = gpuarray.to_gpu(self.Hz)
elif config.sim_config.general['solver'] == 'opencl':
import pyopencl.array as clarray
self.Ex_dev = clarray.to_device(queue, self.Ex)
self.Ey_dev = clarray.to_device(queue, self.Ey)
self.Ez_dev = clarray.to_device(queue, self.Ez)
self.Hx_dev = clarray.to_device(queue, self.Hx)
self.Hy_dev = clarray.to_device(queue, self.Hy)
self.Hz_dev = clarray.to_device(queue, self.Hz)
self.Ex_gpu = gpuarray.to_gpu(self.Ex) def htod_dispersive_arrays(self, queue=None):
self.Ey_gpu = gpuarray.to_gpu(self.Ey) """Initialise dispersive material coefficient arrays on compute device.
self.Ez_gpu = gpuarray.to_gpu(self.Ez)
self.Hx_gpu = gpuarray.to_gpu(self.Hx)
self.Hy_gpu = gpuarray.to_gpu(self.Hy)
self.Hz_gpu = gpuarray.to_gpu(self.Hz)
def htod_dispersive_arrays(self): Args:
"""Initialise dispersive material coefficient arrays on GPU.""" queue: pyopencl queue.
"""
if config.sim_config.general['solver'] == 'cuda':
import pycuda.gpuarray as gpuarray import pycuda.gpuarray as gpuarray
self.Tx_dev = gpuarray.to_gpu(self.Tx)
self.Ty_dev = gpuarray.to_gpu(self.Ty)
self.Tz_dev = gpuarray.to_gpu(self.Tz)
self.updatecoeffsdispersive_dev = gpuarray.to_gpu(self.updatecoeffsdispersive)
elif config.sim_config.general['solver'] == 'opencl':
import pyopencl.array as clarray
self.Tx_dev = clarray.to_device(queue, self.Tx)
self.Ty_dev = clarray.to_device(queue, self.Ty)
self.Tz_dev = clarray.to_device(queue, self.Tz)
self.updatecoeffsdispersive_dev = clarray.to_device(queue, self.updatecoeffsdispersive)
self.Tx_gpu = gpuarray.to_gpu(self.Tx)
self.Ty_gpu = gpuarray.to_gpu(self.Ty) class OpenCLGrid(CUDAGrid):
self.Tz_gpu = gpuarray.to_gpu(self.Tz) """Additional grid methods for solving on compute device using OpenCL."""
self.updatecoeffsdispersive_gpu = gpuarray.to_gpu(self.updatecoeffsdispersive)
def __init__(self):
super().__init__()
def set_blocks_per_grid(self):
pass
def dispersion_analysis(G): def dispersion_analysis(G):

查看文件

@@ -149,7 +149,9 @@ class ModelBuildRun:
# Check memory requirements # Check memory requirements
total_mem, mem_strs = mem_check_all(grids) total_mem, mem_strs = mem_check_all(grids)
logger.info(f'\nMemory required: {" + ".join(mem_strs)} + ~{human_size(config.get_model_config().mem_overhead)} overhead = {human_size(total_mem)}') logger.info(f'\nMemory required: {" + ".join(mem_strs)} + '
f'~{human_size(config.get_model_config().mem_overhead)} '
f'overhead = {human_size(total_mem)}')
# Build grids # Build grids
gridbuilders = [GridBuilder(grid) for grid in grids] gridbuilders = [GridBuilder(grid) for grid in grids]
@@ -170,21 +172,41 @@ class ModelBuildRun:
# Check to see if numerical dispersion might be a problem # Check to see if numerical dispersion might be a problem
results = dispersion_analysis(gb.grid) results = dispersion_analysis(gb.grid)
if results['error']: if results['error']:
logger.warning(f"\nNumerical dispersion analysis [{gb.grid.name}] not carried out as {results['error']}") logger.warning(f"\nNumerical dispersion analysis [{gb.grid.name}] "
f"not carried out as {results['error']}")
elif results['N'] < config.get_model_config().numdispersion['mingridsampling']: elif results['N'] < config.get_model_config().numdispersion['mingridsampling']:
logger.exception(f"\nNon-physical wave propagation in [{gb.grid.name}] detected. Material '{results['material'].ID}' has wavelength sampled by {results['N']} cells, less than required minimum for physical wave propagation. Maximum significant frequency estimated as {results['maxfreq']:g}Hz") logger.exception(f"\nNon-physical wave propagation in [{gb.grid.name}] "
f"detected. Material '{results['material'].ID}' "
f"has wavelength sampled by {results['N']} cells, "
f"less than required minimum for physical wave "
f"propagation. Maximum significant frequency "
f"estimated as {results['maxfreq']:g}Hz")
raise ValueError raise ValueError
elif (results['deltavp'] and np.abs(results['deltavp']) > elif (results['deltavp'] and np.abs(results['deltavp']) >
config.get_model_config().numdispersion['maxnumericaldisp']): config.get_model_config().numdispersion['maxnumericaldisp']):
logger.warning(f"\n[{gb.grid.name}] has potentially significant numerical dispersion. Estimated largest physical phase-velocity error is {results['deltavp']:.2f}% in material '{results['material'].ID}' whose wavelength sampled by {results['N']} cells. Maximum significant frequency estimated as {results['maxfreq']:g}Hz") logger.warning(f"\n[{gb.grid.name}] has potentially significant "
f"numerical dispersion. Estimated largest physical "
f"phase-velocity error is {results['deltavp']:.2f}% "
f"in material '{results['material'].ID}' whose "
f"wavelength sampled by {results['N']} cells. "
f"Maximum significant frequency estimated as "
f"{results['maxfreq']:g}Hz")
elif results['deltavp']: elif results['deltavp']:
logger.info(f"\nNumerical dispersion analysis [{gb.grid.name}]: estimated largest physical phase-velocity error is {results['deltavp']:.2f}% in material '{results['material'].ID}' whose wavelength sampled by {results['N']} cells. Maximum significant frequency estimated as {results['maxfreq']:g}Hz") logger.info(f"\nNumerical dispersion analysis [{gb.grid.name}]: "
f"estimated largest physical phase-velocity error is "
f"{results['deltavp']:.2f}% in material '{results['material'].ID}' "
f"whose wavelength sampled by {results['N']} cells. "
f"Maximum significant frequency estimated as "
f"{results['maxfreq']:g}Hz")
def reuse_geometry(self): def reuse_geometry(self):
# Reset iteration number # Reset iteration number
self.G.iteration = 0 self.G.iteration = 0
s = f'\n--- Model {config.get_model_config().appendmodelnumber}/{config.sim_config.model_end}, input file (not re-processed, i.e. geometry fixed): {config.sim_config.input_file_path}' s = (f'\n--- Model {config.get_model_config().appendmodelnumber}/{config.sim_config.model_end}, '
config.get_model_config().inputfilestr = Fore.GREEN + f"{s} {'-' * (get_terminal_width() - 1 - len(s))}\n" + Style.RESET_ALL f'input file (not re-processed, i.e. geometry fixed): '
f'{config.sim_config.input_file_path}')
config.get_model_config().inputfilestr = (Fore.GREEN + f"{s} {'-' * (get_terminal_width() - 1 - len(s))}\n" +
Style.RESET_ALL)
logger.basic(config.get_model_config().inputfilestr) logger.basic(config.get_model_config().inputfilestr)
for grid in [self.G] + self.G.subgrids: for grid in [self.G] + self.G.subgrids:
grid.reset_fields() grid.reset_fields()
@@ -224,7 +246,9 @@ class ModelBuildRun:
fn = snapshotdir / Path(snap.filename) fn = snapshotdir / Path(snap.filename)
snap.filename = fn.with_suffix(snap.fileext) snap.filename = fn.with_suffix(snap.fileext)
pbar = tqdm(total=snap.vtkdatawritesize, leave=True, unit='byte', pbar = tqdm(total=snap.vtkdatawritesize, leave=True, unit='byte',
unit_scale=True, desc=f'Writing snapshot file {i + 1} of {len(self.G.snapshots)}, {snap.filename.name}', unit_scale=True, desc=f'Writing snapshot file {i + 1} '
f'of {len(self.G.snapshots)}, '
f'{snap.filename.name}',
ncols=get_terminal_width() - 1, file=sys.stdout, ncols=get_terminal_width() - 1, file=sys.stdout,
disable=not config.sim_config.general['progressbars']) disable=not config.sim_config.general['progressbars'])
snap.write_file(pbar, self.G) snap.write_file(pbar, self.G)
@@ -235,12 +259,12 @@ class ModelBuildRun:
"""Print resource information on runtime and memory usage. """Print resource information on runtime and memory usage.
Args: Args:
tsolve (float): Time taken to execute solving (seconds). tsolve: float of time taken to execute solving (seconds).
memsolve (float): Memory (RAM) used on GPU. memsolve: float of memory (RAM) used.
""" """
mem_str = '' mem_str = ''
if config.sim_config.general['cuda']: if config.sim_config.general['solver'] == 'cuda':
mem_str = f' host + ~{human_size(memsolve)} GPU' mem_str = f' host + ~{human_size(memsolve)} GPU'
logger.info(f'\nMemory used: ~{human_size(self.p.memory_full_info().uss)}{mem_str}') logger.info(f'\nMemory used: ~{human_size(self.p.memory_full_info().uss)}{mem_str}')
@@ -250,24 +274,37 @@ class ModelBuildRun:
"""Solve using FDTD method. """Solve using FDTD method.
Args: Args:
solver (Solver): solver object. solver: solver object.
Returns: Returns:
tsolve (float): time taken to execute solving (seconds). tsolve: float of time taken to execute solving (seconds).
""" """
# Check number of OpenMP threads # Print information about and check OpenMP threads
if config.sim_config.general['cpu']: if config.sim_config.general['solver'] == 'cpu':
logger.basic(f"CPU solver using: {config.get_model_config().ompthreads} OpenMP thread(s) on {config.sim_config.hostinfo['hostname']}\n") logger.basic(f"OPENMP solver with {config.get_model_config().ompthreads} "
f"thread(s) on {config.sim_config.hostinfo['hostname']}\n")
if config.get_model_config().ompthreads > config.sim_config.hostinfo['physicalcores']: if config.get_model_config().ompthreads > config.sim_config.hostinfo['physicalcores']:
logger.warning(f"You have specified more threads ({config.get_model_config().ompthreads}) than available physical CPU cores ({config.sim_config.hostinfo['physicalcores']}). This may lead to degraded performance.") logger.warning(f"You have specified more threads ({config.get_model_config().ompthreads}) "
# Print information about any GPU in use f"than available physical CPU cores ({config.sim_config.hostinfo['physicalcores']}). "
elif config.sim_config.general['cuda']: f"This may lead to degraded performance.")
logger.basic(f"GPU solver using: {config.get_model_config().cuda['gpu'].deviceID} - {config.get_model_config().cuda['gpu'].name} on {config.sim_config.hostinfo['hostname']}\n") # Print information about any compute device, e.g. GPU, in use
elif config.sim_config.general['solver'] == 'cuda' or config.sim_config.general['solver'] == 'opencl':
solvername = config.sim_config.general['solver'].upper()
hostname = config.sim_config.hostinfo['hostname']
if config.sim_config.general['solver'] == 'opencl':
platformname = ' on ' + ' '.join(config.get_model_config().device['dev'].platform.name.split()) + ' platform'
else:
platformname = ''
devicename = ' '.join(config.get_model_config().device['dev'].name.split())
logger.basic(f"{solvername} solver using {devicename}{platformname} "
f"on {hostname}\n")
# Prepare iterator # Prepare iterator
if config.sim_config.general['progressbars']: if config.sim_config.general['progressbars']:
iterator = tqdm(range(self.G.iterations), desc=f'Running model {config.model_num + 1}/{config.sim_config.model_end}', ncols=get_terminal_width() - 1, file=sys.stdout, disable=not config.sim_config.general['progressbars']) iterator = tqdm(range(self.G.iterations), desc=f'Running model {config.model_num + 1}/{config.sim_config.model_end}',
ncols=get_terminal_width() - 1, file=sys.stdout,
disable=not config.sim_config.general['progressbars'])
else: else:
iterator = range(self.G.iterations) iterator = range(self.G.iterations)

查看文件

@@ -0,0 +1,311 @@
# Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom
# Authors: Craig Warren, Antonis Giannopoulos, and John Hartley
#
# This file is part of gprMax.
#
# gprMax is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# gprMax is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with gprMax. If not, see <http://www.gnu.org/licenses/>.
#include <pyopencl-complex.h>
#define INDEX2D_MAT(m, n) (m)*({{NY_MATCOEFFS}}) + (n)
#define INDEX2D_MATDISP(m, n) (m)*({{NY_MATDISPCOEFFS}}) + (n)
#define INDEX3D_FIELDS(i, j, k) (i)*({{NY_FIELDS}})*({{NZ_FIELDS}}) + (j)*({{NZ_FIELDS}}) + (k)
#define INDEX4D_ID(p, i, j, k) (p)*({{NX_ID}})*({{NY_ID}})*({{NZ_ID}}) + (i)*({{NY_ID}})*({{NZ_ID}}) + (j)*({{NZ_ID}}) + (k)
#define INDEX4D_T(p, i, j, k) (p)*({{NX_T}})*({{NY_T}})*({{NZ_T}}) + (i)*({{NY_T}})*({{NZ_T}}) + (j)*({{NZ_T}}) + (k)
// Material coefficients (read-only) in constant memory
__constant {{REAL}} updatecoeffsE[{{N_updatecoeffsE}}] =
{
{% for i in updateEVal %}
{{i}},
{% endfor %}
};
__constant {{REAL}} updatecoeffsH[{{N_updatecoeffsH}}] =
{
{% for i in updateHVal %}
{{i}},
{% endfor %}
};
///////////////////////////////////////////////
// Electric field updates - normal materials //
///////////////////////////////////////////////
__kernel void update_electric(int NX, int NY, int NZ,
__global const unsigned int* restrict ID,
__global {{REAL}} *Ex,
__global {{REAL}} *Ey,
__global {{REAL}} *Ez,
__global const {{REAL}} * restrict Hx,
__global const {{REAL}} * restrict Hy,
__global const {{REAL}} * restrict Hz) {
// This function updates electric field values.
//
// Args:
// NX, NY, NZ: Number of cells of the models domain.
// ID, E, H: Access to ID and field component arrays.
// Obtain the linear index corresponding to the current thread
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) +
get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for 3D field arrays
int i = idx / ({{NY_FIELDS}} * {{NZ_FIELDS}});
int j = (idx % ({{NY_FIELDS}}*{{NZ_FIELDS}})) / {{NZ_FIELDS}};
int k = (idx % ({{NY_FIELDS}}*{{NZ_FIELDS}})) % {{NZ_FIELDS}};
// Convert the linear index to subscripts for 4D material ID arrays
int i_ID = (idx%({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) / ({{NY_ID}} * {{NZ_ID}});
int j_ID = ((idx%({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) / {{NZ_ID}};
int k_ID = ((idx%({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) % {{NZ_ID}};
// Ex component
if ((NY != 1 || NZ != 1) && i >= 0 && i < NX && j > 0 && j < NY && k > 0 && k < NZ) {
int materialEx = ID[INDEX4D_ID(0,i_ID,j_ID,k_ID)];
Ex[INDEX3D_FIELDS(i,j,k)] = updatecoeffsE[INDEX2D_MAT(materialEx,0)] * Ex[INDEX3D_FIELDS(i,j,k)] +
updatecoeffsE[INDEX2D_MAT(materialEx,2)] * (Hz[INDEX3D_FIELDS(i,j,k)] - Hz[INDEX3D_FIELDS(i,j-1,k)]) -
updatecoeffsE[INDEX2D_MAT(materialEx,3)] * (Hy[INDEX3D_FIELDS(i,j,k)] - Hy[INDEX3D_FIELDS(i,j,k-1)]);
}
// Ey component
if ((NX != 1 || NZ != 1) && i > 0 && i < NX && j >= 0 && j < NY && k > 0 && k < NZ) {
int materialEy = ID[INDEX4D_ID(1,i_ID,j_ID,k_ID)];
Ey[INDEX3D_FIELDS(i,j,k)] = updatecoeffsE[INDEX2D_MAT(materialEy,0)] * Ey[INDEX3D_FIELDS(i,j,k)] +
updatecoeffsE[INDEX2D_MAT(materialEy,3)] * (Hx[INDEX3D_FIELDS(i,j,k)] - Hx[INDEX3D_FIELDS(i,j,k-1)]) - updatecoeffsE[INDEX2D_MAT(materialEy,1)] * (Hz[INDEX3D_FIELDS(i,j,k)] - Hz[INDEX3D_FIELDS(i-1,j,k)]);
}
// Ez component
if ((NX != 1 || NY != 1) && i > 0 && i < NX && j > 0 && j < NY && k >= 0 && k < NZ) {
int materialEz = ID[INDEX4D_ID(2,i_ID,j_ID,k_ID)];
Ez[INDEX3D_FIELDS(i,j,k)] = updatecoeffsE[INDEX2D_MAT(materialEz,0)] * Ez[INDEX3D_FIELDS(i,j,k)] +
updatecoeffsE[INDEX2D_MAT(materialEz,1)] * (Hy[INDEX3D_FIELDS(i,j,k)] - Hy[INDEX3D_FIELDS(i-1,j,k)]) - updatecoeffsE[INDEX2D_MAT(materialEz,2)] * (Hx[INDEX3D_FIELDS(i,j,k)] - Hx[INDEX3D_FIELDS(i,j-1,k)]);
}
}
////////////////////////////
// Magnetic field updates //
////////////////////////////
__kernel void update_magnetic(int NX, int NY, int NZ,
__global const unsigned int* restrict ID,
__global {{REAL}} *Hx,
__global {{REAL}} *Hy,
__global {{REAL}} *Hz,
__global const {{REAL}}* restrict Ex,
__global const {{REAL}}* restrict Ey,
__global const {{REAL}}* restrict Ez) {
// This function updates magnetic field values.
//
// Args:
// NX, NY, NZ: number of cells of the model domain.
// ID, E, H: access to ID and field component arrays.
// Obtain the linear index corresponding to the current thread
int idx = get_global_id(0);
// convert the linear index to subscripts to 3D field arrays
int i = idx / ({{NY_FIELDS}} * {{NZ_FIELDS}});
int j = (idx%({{NY_FIELDS}}*{{NZ_FIELDS}})) / {{NZ_FIELDS}};
int k = (idx%({{NY_FIELDS}}*{{NZ_FIELDS}})) % {{NZ_FIELDS}};
// convert the linear index to subscripts to 4D material ID arrays
int i_ID = ( idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) / ({{NY_ID}} * {{NZ_ID}});
int j_ID = (( idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) / {{NZ_ID}};
int k_ID = (( idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) % {{NZ_ID}};
// Hx component
if (NX != 1 && i > 0 && i < NX && j >= 0 && j < NY && k >= 0 && k < NZ) {
int materialHx = ID[INDEX4D_ID(3,i_ID,j_ID,k_ID)];
Hx[INDEX3D_FIELDS(i,j,k)] = updatecoeffsH[INDEX2D_MAT(materialHx,0)] * Hx[INDEX3D_FIELDS(i,j,k)] -
updatecoeffsH[INDEX2D_MAT(materialHx,2)] * (Ez[INDEX3D_FIELDS(i,j+1,k)] - Ez[INDEX3D_FIELDS(i,j,k)]) + updatecoeffsH[INDEX2D_MAT(materialHx,3)] * (Ey[INDEX3D_FIELDS(i,j,k+1)] - Ey[INDEX3D_FIELDS(i,j,k)]);
}
// Hy component
if (NY != 1 && i >= 0 && i < NX && j > 0 && j < NY && k >= 0 && k < NZ) {
int materialHy = ID[INDEX4D_ID(4,i_ID,j_ID,k_ID)];
Hy[INDEX3D_FIELDS(i,j,k)] = updatecoeffsH[INDEX2D_MAT(materialHy,0)] * Hy[INDEX3D_FIELDS(i,j,k)] -
updatecoeffsH[INDEX2D_MAT(materialHy,3)] * (Ex[INDEX3D_FIELDS(i,j,k+1)] - Ex[INDEX3D_FIELDS(i,j,k)]) + updatecoeffsH[INDEX2D_MAT(materialHy,1)] * (Ez[INDEX3D_FIELDS(i+1,j,k)] - Ez[INDEX3D_FIELDS(i,j,k)]);
}
// Hz component
if (NZ != 1 && i >= 0 && i < NX && j >= 0 && j < NY && k > 0 && k < NZ) {
int materialHz = ID[INDEX4D_ID(5,i_ID,j_ID,k_ID)];
Hz[INDEX3D_FIELDS(i,j,k)] = updatecoeffsH[INDEX2D_MAT(materialHz,0)] * Hz[INDEX3D_FIELDS(i,j,k)] -
updatecoeffsH[INDEX2D_MAT(materialHz,1)] * (Ey[INDEX3D_FIELDS(i+1,j,k)] - Ey[INDEX3D_FIELDS(i,j,k)]) + updatecoeffsH[INDEX2D_MAT(materialHz,2)] * (Ex[INDEX3D_FIELDS(i,j+1,k)] - Ex[INDEX3D_FIELDS(i,j,k)]);
}
}
///////////////////////////////////////////////////
// Electric field updates - dispersive materials //
///////////////////////////////////////////////////
__kernel void update_electric_dispersive_A(int NX, int NY, int NZ, int MAXPOLES,
__global const {{COMPLEX-}}_t* restrict updatecoeffsdispersive,
__global {{COMPLEX-}}_t *Tx,
__global {{COMPLEX-}}_t *Ty,
__global {{COMPLEX-}}_t *Tz,
__global const unsigned int* restrict ID,
__global {{REAL}} *Ex,
__global {{REAL}} *Ey,
__global {{REAL}} *Ez,
__global const {{REAL}}* restrict Hx,
__global const {{REAL}}* restrict Hy,
__global const {{REAL}}* restrict Hz) {
// This function is part A of updates to electric field values when dispersive materials (with multiple poles) are present.
//
// Args:
// NX, NY, NZ: Number of cells of the model domain
// MAXPOLES: Maximum number of dispersive material poles present in model
// updatedispersivecoeffs, T, ID, E, H: Access to update coefficients, dispersive, ID and field component arrays
// Obtain the linear index corresponding to the current thread
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) +
get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for 3D field arrays
int i = idx / ({{NY_FIELDS}} * {{NZ_FIELDS}});
int j = (idx % ({{NY_FIELDS}} * {{NZ_FIELDS}})) / {{NZ_FIELDS}};
int k = (idx % ({{NY_FIELDS}} * {{NZ_FIELDS}})) % {{NZ_FIELDS}};
// Convert the linear index to subscripts for 4D material ID array
int i_ID = (idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) / ({{NY_ID}} * {{NZ_ID}});
int j_ID = ((idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) / {{NZ_ID}};
int k_ID = ((idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) % {{NZ_ID}};
// Convert the linear index to subscripts for 4D dispersive array
int i_T = (idx % ({{NX_T}} * {{NY_T}} * {{NZ_T}})) / ({{NY_T}} * {{NZ_T}});
int j_T = ((idx % ({{NX_T}} * {{NY_T}} * {{NZ_T}})) % ({{NY_T}} * {{NZ_T}})) / {{NZ_T}};
int k_T = ((idx % ({{NX_T}} * {{NY_T}} * {{NZ_T}})) % ({{NY_T}} * {{NZ_T}})) % {{NZ_T}};
// Ex component
if ((NY != 1 || NZ != 1) && i >= 0 && i < NX && j > 0 && j < NY && k > 0 && k < NZ) {
int materialEx = ID[INDEX4D_ID(0,i_ID,j_ID,k_ID)];
{{REAL}} phi = 0;
for (int pole = 0; pole < MAXPOLES; pole++) {
phi = phi + updatecoeffsdispersive[INDEX2D_MATDISP(materialEx,pole*3)].real * Tx[INDEX4D_T(pole,i_T,j_T,k_T)].real;
Tx[INDEX4D_T(pole,i_T,j_T,k_T)] = cfloat_add(cfloat_mul(updatecoeffsdispersive[INDEX2D_MATDISP(materialEx,1+(pole*3))],
Tx[INDEX4D_T(pole,i_T,j_T,k_T)]),
cfloat_mulr(updatecoeffsdispersive[INDEX2D_MATDISP(materialEx,2+(pole*3))],
Ex[INDEX3D_FIELDS(i,j,k)]));
}
Ex[INDEX3D_FIELDS(i,j,k)] = updatecoeffsE[INDEX2D_MAT(materialEx,0)] * Ex[INDEX3D_FIELDS(i,j,k)] +
updatecoeffsE[INDEX2D_MAT(materialEx,2)] * (Hz[INDEX3D_FIELDS(i,j,k)] - Hz[INDEX3D_FIELDS(i,j-1,k)]) - updatecoeffsE[INDEX2D_MAT(materialEx,3)] * (Hy[INDEX3D_FIELDS(i,j,k)] - Hy[INDEX3D_FIELDS(i,j,k-1)]) - updatecoeffsE[INDEX2D_MAT(materialEx,4)] * phi;
}
// Ey component
if ((NX != 1 || NZ != 1) && i > 0 && i < NX && j >= 0 && j < NY && k > 0 && k < NZ) {
int materialEy = ID[INDEX4D_ID(1,i_ID,j_ID,k_ID)];
{{REAL}} phi = 0;
for (int pole = 0; pole < MAXPOLES; pole++) {
phi = phi + updatecoeffsdispersive[INDEX2D_MATDISP(materialEy,pole*3)].real * Ty[INDEX4D_T(pole,i_T,j_T,k_T)].real;
Ty[INDEX4D_T(pole,i_T,j_T,k_T)] = cfloat_add(cfloat_mul(updatecoeffsdispersive[INDEX2D_MATDISP(materialEy,1+(pole*3))],
Ty[INDEX4D_T(pole,i_T,j_T,k_T)]),
cfloat_mulr(updatecoeffsdispersive[INDEX2D_MATDISP(materialEy,2+(pole*3))],
Ey[INDEX3D_FIELDS(i,j,k)]));
}
Ey[INDEX3D_FIELDS(i,j,k)] = updatecoeffsE[INDEX2D_MAT(materialEy,0)] * Ey[INDEX3D_FIELDS(i,j,k)] +
updatecoeffsE[INDEX2D_MAT(materialEy,3)] * (Hx[INDEX3D_FIELDS(i,j,k)] - Hx[INDEX3D_FIELDS(i,j,k-1)]) - updatecoeffsE[INDEX2D_MAT(materialEy,1)] * (Hz[INDEX3D_FIELDS(i,j,k)] - Hz[INDEX3D_FIELDS(i-1,j,k)]) - updatecoeffsE[INDEX2D_MAT(materialEy,4)] * phi;
}
// Ez component
if ((NX != 1 || NY != 1) && i > 0 && i < NX && j > 0 && j < NY && k >= 0 && k < NZ) {
int materialEz = ID[INDEX4D_ID(2,i_ID,j_ID,k_ID)];
{{REAL}} phi = 0;
for (int pole = 0; pole < MAXPOLES; pole++) {
phi = phi + updatecoeffsdispersive[INDEX2D_MATDISP(materialEz,pole*3)].real * Tz[INDEX4D_T(pole,i_T,j_T,k_T)].real;
Tz[INDEX4D_T(pole,i_T,j_T,k_T)] = cfloat_add(cfloat_mul(updatecoeffsdispersive[INDEX2D_MATDISP(materialEz,1+(pole*3))],
Tz[INDEX4D_T(pole,i_T,j_T,k_T)]),
cfloat_mulr(updatecoeffsdispersive[INDEX2D_MATDISP(materialEz,2+(pole*3))],
Ez[INDEX3D_FIELDS(i,j,k)]));
}
Ez[INDEX3D_FIELDS(i,j,k)] = updatecoeffsE[INDEX2D_MAT(materialEz,0)] * Ez[INDEX3D_FIELDS(i,j,k)] +
updatecoeffsE[INDEX2D_MAT(materialEz,1)] * (Hy[INDEX3D_FIELDS(i,j,k)] - Hy[INDEX3D_FIELDS(i-1,j,k)]) - updatecoeffsE[INDEX2D_MAT(materialEz,2)] * (Hx[INDEX3D_FIELDS(i,j,k)] - Hx[INDEX3D_FIELDS(i,j-1,k)]) - updatecoeffsE[INDEX2D_MAT(materialEz,4)] * phi;
}
}
__kernel void update_electric_dispersive_B(int NX, int NY, int NZ, int MAXPOLES,
__global const {{COMPLEX-}}_t* restrict updatecoeffsdispersive,
__global {{COMPLEX-}}_t *Tx,
__global {{COMPLEX-}}_t *Ty,
__global {{COMPLEX-}}_t *Tz,
__global const unsigned int* restrict ID,
__global const {{REAL}}* restrict Ex,
__global const {{REAL}}* restrict Ey,
__global const {{REAL}}* restrict Ez) {
// This function is part B which updates the dispersive field arrays when dispersive materials (with multiple poles) are present.
//
// Args:
// NX, NY, NZ: Number of cells of the model domain
// MAXPOLES: Maximum number of dispersive material poles present in model
// updatedispersivecoeffs, T, ID, E, H: Access to update coefficients, dispersive, ID and field component arrays
// Obtain the linear index corresponding to the current thread
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) +
get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for 3D field arrays
int i = idx / ({{NY_FIELDS}} * {{NZ_FIELDS}});
int j = (idx % ({{NY_FIELDS}} * {{NZ_FIELDS}})) / {{NZ_FIELDS}};
int k = (idx % ({{NY_FIELDS}} * {{NZ_FIELDS}})) % {{NZ_FIELDS}};
// Convert the linear index to subscripts for 4D material ID array
int i_ID = (idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) / ({{NY_ID}} * {{NZ_ID}});
int j_ID = ((idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) / {{NZ_ID}};
int k_ID = ((idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) % {{NZ_ID}};
// Convert the linear index to subscripts for 4D dispersive array
int i_T = (idx % ({{NX_T}} * {{NY_T}} * {{NZ_T}})) / ({{NY_T}} * {{NZ_T}});
int j_T = ((idx % ({{NX_T}} * {{NY_T}} * {{NZ_T}})) % ({{NY_T}} * {{NZ_T}})) / {{NZ_T}};
int k_T = ((idx % ({{NX_T}} * {{NY_T}} * {{NZ_T}})) % ({{NY_T}} * {{NZ_T}})) % {{NZ_T}};
// Ex component
if ((NY != 1 || NZ != 1) && i >= 0 && i < NX && j > 0 && j < NY && k > 0 && k < NZ) {
int materialEx = ID[INDEX4D_ID(0,i_ID,j_ID,k_ID)];
for (int pole = 0; pole < MAXPOLES; pole++) {
Tx[INDEX4D_T(pole,i_T,j_T,k_T)] = cfloat_sub(Tx[INDEX4D_T(pole,i_T,j_T,k_T)],
cfloat_mulr(updatecoeffsdispersive[INDEX2D_MATDISP(materialEx,2+(pole*3))],
Ex[INDEX3D_FIELDS(i,j,k)]));
}
}
// Ey component
if ((NX != 1 || NZ != 1) && i > 0 && i < NX && j >= 0 && j < NY && k > 0 && k < NZ) {
int materialEy = ID[INDEX4D_ID(1,i_ID,j_ID,k_ID)];
for (int pole = 0; pole < MAXPOLES; pole++) {
Ty[INDEX4D_T(pole,i_T,j_T,k_T)] = cfloat_sub(Ty[INDEX4D_T(pole,i_T,j_T,k_T)],
cfloat_mulr(updatecoeffsdispersive[INDEX2D_MATDISP(materialEy,2+(pole*3))],
Ey[INDEX3D_FIELDS(i,j,k)]));
}
}
// Ez component
if ((NX != 1 || NY != 1) && i > 0 && i < NX && j > 0 && j < NY && k >= 0 && k < NZ) {
int materialEz = ID[INDEX4D_ID(2,i_ID,j_ID,k_ID)];
for (int pole = 0; pole < MAXPOLES; pole++) {
Tz[INDEX4D_T(pole,i_T,j_T,k_T)] = cfloat_sub(Tz[INDEX4D_T(pole,i_T,j_T,k_T)],
cfloat_mulr(updatecoeffsdispersive[INDEX2D_MATDISP(materialEz,2+(pole*3))],
Ez[INDEX3D_FIELDS(i,j,k)]));
}
}
}

查看文件

@@ -0,0 +1,955 @@
// Macros for converting subscripts to linear index:
#define INDEX2D_R(m, n) (m)*(NY_R)+(n)
#define INDEX2D_MAT(m, n) (m)*({{NY_MATCOEFFS}})+(n)
#define INDEX3D_FIELDS(i, j, k) (i)*({{NY_FIELDS}})*({{NZ_FIELDS}})+(j)*({{NZ_FIELDS}})+(k)
#define INDEX4D_ID(p, i, j, k) (p)*({{NX_ID}})*({{NY_ID}})*({{NZ_ID}})+(i)*({{NY_ID}})*({{NZ_ID}})+(j)*({{NZ_ID}})+(k)
#define INDEX4D_PHI1(p, i, j, k) (p)*(NX_PHI1)*(NY_PHI1)*(NZ_PHI1)+(i)*(NY_PHI1)*(NZ_PHI1)+(j)*(NZ_PHI1)+(k)
#define INDEX4D_PHI2(p, i, j, k) (p)*(NX_PHI2)*(NY_PHI2)*(NZ_PHI2)+(i)*(NY_PHI2)*(NZ_PHI2)+(j)*(NZ_PHI2)+(k)
__constant {{REAL}} updatecoeffsE[{{N_updatecoeffsE}}] =
{
{% for i in updateEVal %}
{{i}},
{% endfor %}
};
__kernel void order1_xminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global {{REAL}} *Ey, __global {{REAL}} *Ez, __global const {{REAL}}* restrict Hx, __global const {{REAL}}* restrict Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d){
// This function updates the Ey and Ez field components for the xminus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// obtain the linear index corresponding to the current thread
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA01, RB0, RE0, RF0, dHy, dHz;
{{REAL}} dx = d;
int ii, jj, kk, materialEy, materialEz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = xf - i1;
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,i1)] - 1;
RB0 = RB[INDEX2D_R(0,i1)];
RE0 = RE[INDEX2D_R(0,i1)];
RF0 = RF[INDEX2D_R(0,i1)];
// Ey
materialEy = ID[INDEX4D_ID(1,ii,jj,kk)];
dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx;
Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA01 * dHz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = xf - i2;
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,i2)] - 1;
RB0 = RB[INDEX2D_R(0,i2)];
RE0 = RE[INDEX2D_R(0,i2)];
RF0 = RF[INDEX2D_R(0,i2)];
// Ez
materialEz = ID[INDEX4D_ID(2,ii,jj,kk)];
dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx;
Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA01 * dHy + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHy;
}
}
__kernel void order2_xminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global {{REAL}} *Ey, __global {{REAL}} *Ez, __global const {{REAL}}* restrict Hx, __global const {{REAL}}* restrict Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d){
// This function updates the Ey and Ez field components for the xminus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current thread
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dHy, dHz;
{{REAL}} dx = d;
int ii, jj, kk, materialEy, materialEz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = xf - i1;
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,i1)];
RB0 = RB[INDEX2D_R(0,i1)];
RE0 = RE[INDEX2D_R(0,i1)];
RF0 = RF[INDEX2D_R(0,i1)];
RA1 = RA[INDEX2D_R(1,i1)];
RB1 = RB[INDEX2D_R(1,i1)];
RE1 = RE[INDEX2D_R(1,i1)];
RF1 = RF[INDEX2D_R(1,i1)];
RA01 = RA[INDEX2D_R(0,i1)] * RA[INDEX2D_R(1,i1)] - 1;
// Ey
materialEy = ID[INDEX4D_ID(1,ii,jj,kk)];
dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx;
Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA01 * dHz + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dHz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = xf - i2;
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,i2)];
RB0 = RB[INDEX2D_R(0,i2)];
RE0 = RE[INDEX2D_R(0,i2)];
RF0 = RF[INDEX2D_R(0,i2)];
RA1 = RA[INDEX2D_R(1,i2)];
RB1 = RB[INDEX2D_R(1,i2)];
RE1 = RE[INDEX2D_R(1,i2)];
RF1 = RF[INDEX2D_R(1,i2)];
RA01 = RA[INDEX2D_R(0,i2)] * RA[INDEX2D_R(1,i2)] - 1;
// Ez
materialEz = ID[INDEX4D_ID(2,ii,jj,kk)];
dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx;
Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA01 * dHy + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dHy + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHy;
}
}
__kernel void order1_xplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global {{REAL}} *Ey, __global {{REAL}} *Ez, __global const {{REAL}}* restrict Hx, __global const {{REAL}}* restrict Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d){
// This function updates the Ey and Ez field components for the xplus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current thread
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA01, RB0, RE0, RF0, dHy, dHz;
{{REAL}} dx = d;
int ii, jj, kk, materialEy, materialEz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,i1)] - 1;
RB0 = RB[INDEX2D_R(0,i1)];
RE0 = RE[INDEX2D_R(0,i1)];
RF0 = RF[INDEX2D_R(0,i1)];
// Ey
materialEy = ID[INDEX4D_ID(1,ii,jj,kk)];
dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx;
Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA01 * dHz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,i2)] - 1;
RB0 = RB[INDEX2D_R(0,i2)];
RE0 = RE[INDEX2D_R(0,i2)];
RF0 = RF[INDEX2D_R(0,i2)];
// Ez
materialEz = ID[INDEX4D_ID(2,ii,jj,kk)];
dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx;
Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA01 * dHy + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHy;
}
}
__kernel void order2_xplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global {{REAL}} *Ey, __global {{REAL}} *Ez, __global const {{REAL}}* restrict Hx, __global const {{REAL}}* restrict Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Ey and Ez field components for the xplus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current thread
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dHy, dHz;
{{REAL}} dx = d;
int ii, jj, kk, materialEy, materialEz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,i1)];
RB0 = RB[INDEX2D_R(0,i1)];
RE0 = RE[INDEX2D_R(0,i1)];
RF0 = RF[INDEX2D_R(0,i1)];
RA1 = RA[INDEX2D_R(1,i1)];
RB1 = RB[INDEX2D_R(1,i1)];
RE1 = RE[INDEX2D_R(1,i1)];
RF1 = RF[INDEX2D_R(1,i1)];
RA01 = RA[INDEX2D_R(0,i1)] * RA[INDEX2D_R(1,i1)] - 1;
// Ey
materialEy = ID[INDEX4D_ID(1,ii,jj,kk)];
dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx;
Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA01 * dHz + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dHz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,i2)];
RB0 = RB[INDEX2D_R(0,i2)];
RE0 = RE[INDEX2D_R(0,i2)];
RF0 = RF[INDEX2D_R(0,i2)];
RA1 = RA[INDEX2D_R(1,i2)];
RB1 = RB[INDEX2D_R(1,i2)];
RE1 = RE[INDEX2D_R(1,i2)];
RF1 = RF[INDEX2D_R(1,i2)];
RA01 = RA[INDEX2D_R(0,i2)] * RA[INDEX2D_R(1,i2)] - 1;
// Ez
materialEz = ID[INDEX4D_ID(2,ii,jj,kk)];
dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx;
Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA01 * dHy + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dHy + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHy;
}
}
__kernel void order1_yminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global {{REAL}} *Ex, __global const {{REAL}}* restrict Ey, __global {{REAL}} *Ez, __global const {{REAL}}* restrict Hx, __global const {{REAL}}* restrict Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Ex and Ez field components for the yminus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current thread
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA01, RB0, RE0, RF0, dHx, dHz;
{{REAL}} dy = d;
int ii, jj, kk, materialEx, materialEz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = yf - j1;
kk = k1 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,j1)] - 1;
RB0 = RB[INDEX2D_R(0,j1)];
RE0 = RE[INDEX2D_R(0,j1)];
RF0 = RF[INDEX2D_R(0,j1)];
// Ex
materialEx = ID[INDEX4D_ID(0,ii,jj,kk)];
dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy;
Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA01 * dHz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = yf - j2;
kk = k2 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,j2)] - 1;
RB0 = RB[INDEX2D_R(0,j2)];
RE0 = RE[INDEX2D_R(0,j2)];
RF0 = RF[INDEX2D_R(0,j2)];
// Ez
materialEz = ID[INDEX4D_ID(2,ii,jj,kk)];
dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy;
Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA01 * dHx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx;
}
}
__kernel void order2_yminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global {{REAL}} *Ex, __global const {{REAL}}* restrict Ey, __global {{REAL}} *Ez, __global const {{REAL}}* restrict Hx, __global const {{REAL}}* restrict Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Ex and Ez field components for the yminus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current thread
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dHx, dHz;
{{REAL}} dy = d;
int ii, jj, kk, materialEx, materialEz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = yf - j1;
kk = k1 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,j1)];
RB0 = RB[INDEX2D_R(0,j1)];
RE0 = RE[INDEX2D_R(0,j1)];
RF0 = RF[INDEX2D_R(0,j1)];
RA1 = RA[INDEX2D_R(1,j1)];
RB1 = RB[INDEX2D_R(1,j1)];
RE1 = RE[INDEX2D_R(1,j1)];
RF1 = RF[INDEX2D_R(1,j1)];
RA01 = RA[INDEX2D_R(0,j1)] * RA[INDEX2D_R(1,j1)] - 1;
// Ex
materialEx = ID[INDEX4D_ID(0,ii,jj,kk)];
dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy;
Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA01 * dHz + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dHz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = yf - j2;
kk = k2 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,j2)];
RB0 = RB[INDEX2D_R(0,j2)];
RE0 = RE[INDEX2D_R(0,j2)];
RF0 = RF[INDEX2D_R(0,j2)];
RA1 = RA[INDEX2D_R(1,j2)];
RB1 = RB[INDEX2D_R(1,j2)];
RE1 = RE[INDEX2D_R(1,j2)];
RF1 = RF[INDEX2D_R(1,j2)];
RA01 = RA[INDEX2D_R(0,j2)] * RA[INDEX2D_R(1,j2)] - 1;
// Ez
materialEz = ID[INDEX4D_ID(2,ii,jj,kk)];
dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy;
Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA01 * dHx + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dHx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx;
}
}
__kernel void order1_yplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global {{REAL}} *Ex, __global const {{REAL}}* restrict Ey, __global {{REAL}} *Ez, __global const {{REAL}}* restrict Hx, __global const {{REAL}}* restrict Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Ex and Ez field components for the yplus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current thread
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA01, RB0, RE0, RF0, dHx, dHz;
{{REAL}} dy = d;
int ii, jj, kk, materialEx, materialEz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,j1)] - 1;
RB0 = RB[INDEX2D_R(0,j1)];
RE0 = RE[INDEX2D_R(0,j1)];
RF0 = RF[INDEX2D_R(0,j1)];
// Ex
materialEx = ID[INDEX4D_ID(0,ii,jj,kk)];
dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy;
Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA01 * dHz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,j2)] - 1;
RB0 = RB[INDEX2D_R(0,j2)];
RE0 = RE[INDEX2D_R(0,j2)];
RF0 = RF[INDEX2D_R(0,j2)];
// Ez
materialEz = ID[INDEX4D_ID(2,ii,jj,kk)];
dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy;
Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA01 * dHx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx;
}
}
__kernel void order2_yplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global {{REAL}} *Ex, __global const {{REAL}}* restrict Ey, __global {{REAL}} *Ez, __global const {{REAL}}* restrict Hx, __global const {{REAL}}* restrict Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Ex and Ez field components for the yplus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current thread
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dHx, dHz;
{{REAL}} dy = d;
int ii, jj, kk, materialEx, materialEz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,j1)];
RB0 = RB[INDEX2D_R(0,j1)];
RE0 = RE[INDEX2D_R(0,j1)];
RF0 = RF[INDEX2D_R(0,j1)];
RA1 = RA[INDEX2D_R(1,j1)];
RB1 = RB[INDEX2D_R(1,j1)];
RE1 = RE[INDEX2D_R(1,j1)];
RF1 = RF[INDEX2D_R(1,j1)];
RA01 = RA[INDEX2D_R(0,j1)] * RA[INDEX2D_R(1,j1)] - 1;
// Ex
materialEx = ID[INDEX4D_ID(0,ii,jj,kk)];
dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy;
Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA01 * dHz + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dHz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,j2)];
RB0 = RB[INDEX2D_R(0,j2)];
RE0 = RE[INDEX2D_R(0,j2)];
RF0 = RF[INDEX2D_R(0,j2)];
RA1 = RA[INDEX2D_R(1,j2)];
RB1 = RB[INDEX2D_R(1,j2)];
RE1 = RE[INDEX2D_R(1,j2)];
RF1 = RF[INDEX2D_R(1,j2)];
RA01 = RA[INDEX2D_R(0,j2)] * RA[INDEX2D_R(1,j2)] - 1;
// Ez
materialEz = ID[INDEX4D_ID(2,ii,jj,kk)];
dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy;
Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA01 * dHx + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dHx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx;
}
}
__kernel void order1_zminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global {{REAL}} *Ex, __global {{REAL}} *Ey, __global const {{REAL}}* restrict Ez, __global const {{REAL}}* restrict Hx, __global const {{REAL}}* restrict Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Ex and Ey field components for the zminus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current thread
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA01, RB0, RE0, RF0, dHx, dHy;
{{REAL}} dz = d;
int ii, jj, kk, materialEx, materialEy;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = zf - k1;
// PML coefficients
RA01 = RA[INDEX2D_R(0,k1)] - 1;
RB0 = RB[INDEX2D_R(0,k1)];
RE0 = RE[INDEX2D_R(0,k1)];
RF0 = RF[INDEX2D_R(0,k1)];
// Ex
materialEx = ID[INDEX4D_ID(0,ii,jj,kk)];
dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz;
Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA01 * dHy + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHy;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = zf - k2;
// PML coefficients
RA01 = RA[INDEX2D_R(0,k2)] - 1;
RB0 = RB[INDEX2D_R(0,k2)];
RE0 = RE[INDEX2D_R(0,k2)];
RF0 = RF[INDEX2D_R(0,k2)];
// Ey
materialEy = ID[INDEX4D_ID(1,ii,jj,kk)];
dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz;
Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA01 * dHx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx;
}
}
__kernel void order2_zminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global {{REAL}} *Ex, __global {{REAL}} *Ey, __global const {{REAL}}* restrict Ez, __global const {{REAL}}* restrict Hx, __global const {{REAL}}* restrict Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Ex and Ey field components for the zminus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current thread
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dHx, dHy;
{{REAL}} dz = d;
int ii, jj, kk, materialEx, materialEy;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = zf - k1;
// PML coefficients
RA0 = RA[INDEX2D_R(0,k1)];
RB0 = RB[INDEX2D_R(0,k1)];
RE0 = RE[INDEX2D_R(0,k1)];
RF0 = RF[INDEX2D_R(0,k1)];
RA1 = RA[INDEX2D_R(1,k1)];
RB1 = RB[INDEX2D_R(1,k1)];
RE1 = RE[INDEX2D_R(1,k1)];
RF1 = RF[INDEX2D_R(1,k1)];
RA01 = RA[INDEX2D_R(0,k1)] * RA[INDEX2D_R(1,k1)] - 1;
// Ex
materialEx = ID[INDEX4D_ID(0,ii,jj,kk)];
dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz;
Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA01 * dHy + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dHy + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHy;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = zf - k2;
// PML coefficients
RA0 = RA[INDEX2D_R(0,k2)];
RB0 = RB[INDEX2D_R(0,k2)];
RE0 = RE[INDEX2D_R(0,k2)];
RF0 = RF[INDEX2D_R(0,k2)];
RA1 = RA[INDEX2D_R(1,k2)];
RB1 = RB[INDEX2D_R(1,k2)];
RE1 = RE[INDEX2D_R(1,k2)];
RF1 = RF[INDEX2D_R(1,k2)];
RA01 = RA[INDEX2D_R(0,k2)] * RA[INDEX2D_R(1,k2)] - 1;
// Ey
materialEy = ID[INDEX4D_ID(1,ii,jj,kk)];
dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz;
Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA01 * dHx + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dHx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx;
}
}
__kernel void order1_zplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global {{REAL}} *Ex, __global {{REAL}} *Ey, __global const {{REAL}}* restrict Ez, __global const {{REAL}}* restrict Hx, __global const {{REAL}}* restrict Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Ex and Ey field components for the zplus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current thread
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA01, RB0, RE0, RF0, dHx, dHy;
{{REAL}} dz = d;
int ii, jj, kk, materialEx, materialEy;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,k1)] - 1;
RB0 = RB[INDEX2D_R(0,k1)];
RE0 = RE[INDEX2D_R(0,k1)];
RF0 = RF[INDEX2D_R(0,k1)];
// Ex
materialEx = ID[INDEX4D_ID(0,ii,jj,kk)];
dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz;
Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA01 * dHy + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHy;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,k2)] - 1;
RB0 = RB[INDEX2D_R(0,k2)];
RE0 = RE[INDEX2D_R(0,k2)];
RF0 = RF[INDEX2D_R(0,k2)];
// Ey
materialEy = ID[INDEX4D_ID(1,ii,jj,kk)];
dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz;
Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA01 * dHx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx;
}
}
__kernel void order2_zplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global {{REAL}} *Ex, __global {{REAL}} *Ey, __global const {{REAL}}* restrict Ez, __global const {{REAL}}* restrict Hx, __global const {{REAL}}* restrict Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Ex and Ey field components for the zplus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current thread
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dHx, dHy;
{{REAL}} dz = d;
int ii, jj, kk, materialEx, materialEy;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,k1)];
RB0 = RB[INDEX2D_R(0,k1)];
RE0 = RE[INDEX2D_R(0,k1)];
RF0 = RF[INDEX2D_R(0,k1)];
RA1 = RA[INDEX2D_R(1,k1)];
RB1 = RB[INDEX2D_R(1,k1)];
RE1 = RE[INDEX2D_R(1,k1)];
RF1 = RF[INDEX2D_R(1,k1)];
RA01 = RA[INDEX2D_R(0,k1)] * RA[INDEX2D_R(1,k1)] - 1;
// Ex
materialEx = ID[INDEX4D_ID(0,ii,jj,kk)];
dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz;
Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA01 * dHy + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dHy + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHy;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,k2)];
RB0 = RB[INDEX2D_R(0,k2)];
RE0 = RE[INDEX2D_R(0,k2)];
RF0 = RF[INDEX2D_R(0,k2)];
RA1 = RA[INDEX2D_R(1,k2)];
RB1 = RB[INDEX2D_R(1,k2)];
RE1 = RE[INDEX2D_R(1,k2)];
RF1 = RF[INDEX2D_R(1,k2)];
RA01 = RA[INDEX2D_R(0,k2)] * RA[INDEX2D_R(1,k2)] - 1;
// Ey
materialEy = ID[INDEX4D_ID(1,ii,jj,kk)];
dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz;
Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA01 * dHx + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dHx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx;
}
}

查看文件

@@ -0,0 +1,962 @@
// Macros for converting subscripts to linear index:
#define INDEX2D_R(m, n) (m)*(NY_R)+(n)
#define INDEX2D_MAT(m, n) (m)*({{NY_MATCOEFFS}})+(n)
#define INDEX3D_FIELDS(i, j, k) (i)*({{NY_FIELDS}})*({{NZ_FIELDS}})+(j)*({{NZ_FIELDS}})+(k)
#define INDEX4D_ID(p, i, j, k) (p)*({{NX_ID}})*({{NY_ID}})*({{NZ_ID}})+(i)*({{NY_ID}})*({{NZ_ID}})+(j)*({{NZ_ID}})+(k)
#define INDEX4D_PHI1(p, i, j, k) (p)*(NX_PHI1)*(NY_PHI1)*(NZ_PHI1)+(i)*(NY_PHI1)*(NZ_PHI1)+(j)*(NZ_PHI1)+(k)
#define INDEX4D_PHI2(p, i, j, k) (p)*(NX_PHI2)*(NY_PHI2)*(NZ_PHI2)+(i)*(NY_PHI2)*(NZ_PHI2)+(j)*(NZ_PHI2)+(k)
__constant {{REAL}} updatecoeffsH[{{N_updatecoeffsH}}] =
{
{% for i in updateHVal %}
{{i}},
{% endfor %}
};
__kernel void order1_xminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global const {{REAL}}* restrict Ey, __global const {{REAL}}* restrict Ez, __global const {{REAL}}* restrict Hx, __global {{REAL}} *Hy, __global {{REAL}} *Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Hy and Hz field components for the xminus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current tREad
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA01, RB0, RE0, RF0, dEy, dEz;
{{REAL}} dx = d;
int ii, jj, kk, materialHy, materialHz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = xf - (i1 + 1);
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,i1)] - 1;
RB0 = RB[INDEX2D_R(0,i1)];
RE0 = RE[INDEX2D_R(0,i1)];
RF0 = RF[INDEX2D_R(0,i1)];
// Hy
materialHy = ID[INDEX4D_ID(4,ii,jj,kk)];
dEz = (Ez[INDEX3D_FIELDS(ii+1,jj,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dx;
Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA01 * dEz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = xf - (i2 + 1);
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,i2)] - 1;
RB0 = RB[INDEX2D_R(0,i2)];
RE0 = RE[INDEX2D_R(0,i2)];
RF0 = RF[INDEX2D_R(0,i2)];
// Hz
materialHz = ID[INDEX4D_ID(5,ii,jj,kk)];
dEy = (Ey[INDEX3D_FIELDS(ii+1,jj,kk)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dx;
Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA01 * dEy + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEy;
}
}
__kernel void order2_xminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global const {{REAL}}* restrict Ey, __global const {{REAL}}* restrict Ez, __global const {{REAL}}* restrict Hx, __global {{REAL}} *Hy, __global {{REAL}} *Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Hy and Hz field components for the xminus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current tREad
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dEy, dEz;
{{REAL}} dx = d;
int ii, jj, kk, materialHy, materialHz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = xf - (i1 + 1);
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,i1)];
RB0 = RB[INDEX2D_R(0,i1)];
RE0 = RE[INDEX2D_R(0,i1)];
RF0 = RF[INDEX2D_R(0,i1)];
RA1 = RA[INDEX2D_R(1,i1)];
RB1 = RB[INDEX2D_R(1,i1)];
RE1 = RE[INDEX2D_R(1,i1)];
RF1 = RF[INDEX2D_R(1,i1)];
RA01 = RA[INDEX2D_R(0,i1)] * RA[INDEX2D_R(1,i1)] - 1;
// Hy
materialHy = ID[INDEX4D_ID(4,ii,jj,kk)];
dEz = (Ez[INDEX3D_FIELDS(ii+1,jj,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dx;
Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA01 * dEz + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dEz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = xf - (i2 + 1);
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,i2)];
RB0 = RB[INDEX2D_R(0,i2)];
RE0 = RE[INDEX2D_R(0,i2)];
RF0 = RF[INDEX2D_R(0,i2)];
RA1 = RA[INDEX2D_R(1,i2)];
RB1 = RB[INDEX2D_R(1,i2)];
RE1 = RE[INDEX2D_R(1,i2)];
RF1 = RF[INDEX2D_R(1,i2)];
RA01 = RA[INDEX2D_R(0,i2)] * RA[INDEX2D_R(1,i2)] - 1;
// Hz
materialHz = ID[INDEX4D_ID(5,ii,jj,kk)];
dEy = (Ey[INDEX3D_FIELDS(ii+1,jj,kk)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dx;
Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA01 * dEy + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dEy + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEy;
}
}
__kernel void order1_xplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global const {{REAL}}* restrict Ey, __global const {{REAL}}* restrict Ez, __global const {{REAL}}* restrict Hx, __global {{REAL}} *Hy, __global {{REAL}} *Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Hy and Hz field components for the xplus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current tREad
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA01, RB0, RE0, RF0, dEy, dEz;
{{REAL}} dx = d;
int ii, jj, kk, materialHy, materialHz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,i1)] - 1;
RB0 = RB[INDEX2D_R(0,i1)];
RE0 = RE[INDEX2D_R(0,i1)];
RF0 = RF[INDEX2D_R(0,i1)];
// Hy
materialHy = ID[INDEX4D_ID(4,ii,jj,kk)];
dEz = (Ez[INDEX3D_FIELDS(ii+1,jj,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dx;
Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA01 * dEz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,i2)] - 1;
RB0 = RB[INDEX2D_R(0,i2)];
RE0 = RE[INDEX2D_R(0,i2)];
RF0 = RF[INDEX2D_R(0,i2)];
// Hz
materialHz = ID[INDEX4D_ID(5,ii,jj,kk)];
dEy = (Ey[INDEX3D_FIELDS(ii+1,jj,kk)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dx;
Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA01 * dEy + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEy;
}
}
__kernel void order2_xplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global const {{REAL}}* restrict Ey, __global const {{REAL}}* restrict Ez, __global const {{REAL}}* restrict Hx, __global {{REAL}} *Hy, __global {{REAL}} *Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Hy and Hz field components for the xplus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current tREad
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dEy, dEz;
{{REAL}} dx = d;
int ii, jj, kk, materialHy, materialHz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,i1)];
RB0 = RB[INDEX2D_R(0,i1)];
RE0 = RE[INDEX2D_R(0,i1)];
RF0 = RF[INDEX2D_R(0,i1)];
RA1 = RA[INDEX2D_R(1,i1)];
RB1 = RB[INDEX2D_R(1,i1)];
RE1 = RE[INDEX2D_R(1,i1)];
RF1 = RF[INDEX2D_R(1,i1)];
RA01 = RA[INDEX2D_R(0,i1)] * RA[INDEX2D_R(1,i1)] - 1;
// Hy
materialHy = ID[INDEX4D_ID(4,ii,jj,kk)];
dEz = (Ez[INDEX3D_FIELDS(ii+1,jj,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dx;
Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA01 * dEz + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dEz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,i2)];
RB0 = RB[INDEX2D_R(0,i2)];
RE0 = RE[INDEX2D_R(0,i2)];
RF0 = RF[INDEX2D_R(0,i2)];
RA1 = RA[INDEX2D_R(1,i2)];
RB1 = RB[INDEX2D_R(1,i2)];
RE1 = RE[INDEX2D_R(1,i2)];
RF1 = RF[INDEX2D_R(1,i2)];
RA01 = RA[INDEX2D_R(0,i2)] * RA[INDEX2D_R(1,i2)] - 1;
// Hz
materialHz = ID[INDEX4D_ID(5,ii,jj,kk)];
dEy = (Ey[INDEX3D_FIELDS(ii+1,jj,kk)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dx;
Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA01 * dEy + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dEy + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEy;
}
}
__kernel void order1_yminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global const {{REAL}}* restrict Ey, __global const {{REAL}}* restrict Ez, __global {{REAL}} *Hx, __global const {{REAL}}* restrict Hy, __global {{REAL}} *Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Hx and Hz field components for the yminus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current tREad
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA01, RB0, RE0, RF0, dEx, dEz;
{{REAL}} dy = d;
int ii, jj, kk, materialHx, materialHz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = yf - (j1 + 1);
kk = k1 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,j1)] - 1;
RB0 = RB[INDEX2D_R(0,j1)];
RE0 = RE[INDEX2D_R(0,j1)];
RF0 = RF[INDEX2D_R(0,j1)];
// Hx
materialHx = ID[INDEX4D_ID(3,ii,jj,kk)];
dEz = (Ez[INDEX3D_FIELDS(ii,jj+1,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dy;
Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA01 * dEz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = yf - (j2 + 1);
kk = k2 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,j2)] - 1;
RB0 = RB[INDEX2D_R(0,j2)];
RE0 = RE[INDEX2D_R(0,j2)];
RF0 = RF[INDEX2D_R(0,j2)];
// Hz
materialHz = ID[INDEX4D_ID(5,ii,jj,kk)];
dEx = (Ex[INDEX3D_FIELDS(ii,jj+1,kk)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dy;
Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA01 * dEx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx;
}
}
__kernel void order2_yminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global const {{REAL}}* restrict Ey, __global const {{REAL}}* restrict Ez, __global {{REAL}} *Hx, __global const {{REAL}}* restrict Hy, __global {{REAL}} *Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Hx and Hz field components for the yminus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current tREad
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dEx, dEz;
{{REAL}} dy = d;
int ii, jj, kk, materialHx, materialHz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = yf - (j1 + 1);
kk = k1 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,j1)];
RB0 = RB[INDEX2D_R(0,j1)];
RE0 = RE[INDEX2D_R(0,j1)];
RF0 = RF[INDEX2D_R(0,j1)];
RA1 = RA[INDEX2D_R(1,j1)];
RB1 = RB[INDEX2D_R(1,j1)];
RE1 = RE[INDEX2D_R(1,j1)];
RF1 = RF[INDEX2D_R(1,j1)];
RA01 = RA[INDEX2D_R(0,j1)] * RA[INDEX2D_R(1,j1)] - 1;
// Hx
materialHx = ID[INDEX4D_ID(3,ii,jj,kk)];
dEz = (Ez[INDEX3D_FIELDS(ii,jj+1,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dy;
Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA01 * dEz + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dEz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = yf - (j2 + 1);
kk = k2 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,j2)];
RB0 = RB[INDEX2D_R(0,j2)];
RE0 = RE[INDEX2D_R(0,j2)];
RF0 = RF[INDEX2D_R(0,j2)];
RA1 = RA[INDEX2D_R(1,j2)];
RB1 = RB[INDEX2D_R(1,j2)];
RE1 = RE[INDEX2D_R(1,j2)];
RF1 = RF[INDEX2D_R(1,j2)];
RA01 = RA[INDEX2D_R(0,j2)] * RA[INDEX2D_R(1,j2)] - 1;
// Hz
materialHz = ID[INDEX4D_ID(5,ii,jj,kk)];
dEx = (Ex[INDEX3D_FIELDS(ii,jj+1,kk)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dy;
Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA01 * dEx + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dEx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx;
}
}
__kernel void order1_yplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global const {{REAL}}* restrict Ey, __global const {{REAL}}* restrict Ez, __global {{REAL}} *Hx, __global const {{REAL}}* restrict Hy, __global {{REAL}} *Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Hx and Hz field components for the yplus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current tREad
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA01, RB0, RE0, RF0, dEx, dEz;
{{REAL}} dy = d;
int ii, jj, kk, materialHx, materialHz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,j1)] - 1;
RB0 = RB[INDEX2D_R(0,j1)];
RE0 = RE[INDEX2D_R(0,j1)];
RF0 = RF[INDEX2D_R(0,j1)];
// Hx
materialHx = ID[INDEX4D_ID(3,ii,jj,kk)];
dEz = (Ez[INDEX3D_FIELDS(ii,jj+1,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dy;
Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA01 * dEz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,j2)] - 1;
RB0 = RB[INDEX2D_R(0,j2)];
RE0 = RE[INDEX2D_R(0,j2)];
RF0 = RF[INDEX2D_R(0,j2)];
// Hz
materialHz = ID[INDEX4D_ID(5,ii,jj,kk)];
dEx = (Ex[INDEX3D_FIELDS(ii,jj+1,kk)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dy;
Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA01 * dEx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx;
}
}
__kernel void order2_yplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global const {{REAL}}* restrict Ey, __global const {{REAL}}* restrict Ez, __global {{REAL}} *Hx, __global const {{REAL}}* restrict Hy, __global {{REAL}} *Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Hx and Hz field components for the yplus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current tREad
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dEx, dEz;
{{REAL}} dy = d;
int ii, jj, kk, materialHx, materialHz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,j1)];
RB0 = RB[INDEX2D_R(0,j1)];
RE0 = RE[INDEX2D_R(0,j1)];
RF0 = RF[INDEX2D_R(0,j1)];
RA1 = RA[INDEX2D_R(1,j1)];
RB1 = RB[INDEX2D_R(1,j1)];
RE1 = RE[INDEX2D_R(1,j1)];
RF1 = RF[INDEX2D_R(1,j1)];
RA01 = RA[INDEX2D_R(0,j1)] * RA[INDEX2D_R(1,j1)] - 1;
// Hx
materialHx = ID[INDEX4D_ID(3,ii,jj,kk)];
dEz = (Ez[INDEX3D_FIELDS(ii,jj+1,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dy;
Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA01 * dEz + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dEz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,j2)];
RB0 = RB[INDEX2D_R(0,j2)];
RE0 = RE[INDEX2D_R(0,j2)];
RF0 = RF[INDEX2D_R(0,j2)];
RA1 = RA[INDEX2D_R(1,j2)];
RB1 = RB[INDEX2D_R(1,j2)];
RE1 = RE[INDEX2D_R(1,j2)];
RF1 = RF[INDEX2D_R(1,j2)];
RA01 = RA[INDEX2D_R(0,j2)] * RA[INDEX2D_R(1,j2)] - 1;
// Hz
materialHz = ID[INDEX4D_ID(5,ii,jj,kk)];
dEx = (Ex[INDEX3D_FIELDS(ii,jj+1,kk)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dy;
Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA01 * dEx + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dEx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx;
}
}
__kernel void order1_zminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global const {{REAL}}* restrict Ey, __global const {{REAL}}* restrict Ez, __global {{REAL}} *Hx, __global {{REAL}} *Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Hx and Hy field components for the zminus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current tREad
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA01, RB0, RE0, RF0, dEx, dEy;
{{REAL}} dz = d;
int ii, jj, kk, materialHx, materialHy;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = zf - (k1 + 1);
// PML coefficients
RA01 = RA[INDEX2D_R(0,k1)] - 1;
RB0 = RB[INDEX2D_R(0,k1)];
RE0 = RE[INDEX2D_R(0,k1)];
RF0 = RF[INDEX2D_R(0,k1)];
// Hx
materialHx = ID[INDEX4D_ID(3,ii,jj,kk)];
dEy = (Ey[INDEX3D_FIELDS(ii,jj,kk+1)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dz;
Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA01 * dEy + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEy;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = zf - (k2 + 1);
// PML coefficients
RA01 = RA[INDEX2D_R(0,k2)] - 1;
RB0 = RB[INDEX2D_R(0,k2)];
RE0 = RE[INDEX2D_R(0,k2)];
RF0 = RF[INDEX2D_R(0,k2)];
// Hy
materialHy = ID[INDEX4D_ID(4,ii,jj,kk)];
dEx = (Ex[INDEX3D_FIELDS(ii,jj,kk+1)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dz;
Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA01 * dEx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx;
}
}
__kernel void order2_zminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global const {{REAL}}* restrict Ey, __global const {{REAL}}* restrict Ez, __global {{REAL}} *Hx, __global {{REAL}} *Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Hx and Hy field components for the zminus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current tREad
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dEx, dEy;
{{REAL}} dz = d;
int ii, jj, kk, materialHx, materialHy;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = zf - (k1 + 1);
// PML coefficients
RA0 = RA[INDEX2D_R(0,k1)];
RB0 = RB[INDEX2D_R(0,k1)];
RE0 = RE[INDEX2D_R(0,k1)];
RF0 = RF[INDEX2D_R(0,k1)];
RA1 = RA[INDEX2D_R(1,k1)];
RB1 = RB[INDEX2D_R(1,k1)];
RE1 = RE[INDEX2D_R(1,k1)];
RF1 = RF[INDEX2D_R(1,k1)];
RA01 = RA[INDEX2D_R(0,k1)] * RA[INDEX2D_R(1,k1)] - 1;
// Hx
materialHx = ID[INDEX4D_ID(3,ii,jj,kk)];
dEy = (Ey[INDEX3D_FIELDS(ii,jj,kk+1)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dz;
Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA01 * dEy + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dEy + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEy;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = zf - (k2 + 1);
// PML coefficients
RA0 = RA[INDEX2D_R(0,k2)];
RB0 = RB[INDEX2D_R(0,k2)];
RE0 = RE[INDEX2D_R(0,k2)];
RF0 = RF[INDEX2D_R(0,k2)];
RA1 = RA[INDEX2D_R(1,k2)];
RB1 = RB[INDEX2D_R(1,k2)];
RE1 = RE[INDEX2D_R(1,k2)];
RF1 = RF[INDEX2D_R(1,k2)];
RA01 = RA[INDEX2D_R(0,k2)] * RA[INDEX2D_R(1,k2)] - 1;
// Hy
materialHy = ID[INDEX4D_ID(4,ii,jj,kk)];
dEx = (Ex[INDEX3D_FIELDS(ii,jj,kk+1)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dz;
Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA01 * dEx + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dEx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx;
}
}
__kernel void order1_zplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global const {{REAL}}* restrict Ey, __global const {{REAL}}* restrict Ez, __global {{REAL}} *Hx, __global {{REAL}} *Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Hx and Hy field components for the zplus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current tREad
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA01, RB0, RE0, RF0, dEx, dEy;
{{REAL}} dz = d;
int ii, jj, kk, materialHx, materialHy;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,k1)] - 1;
RB0 = RB[INDEX2D_R(0,k1)];
RE0 = RE[INDEX2D_R(0,k1)];
RF0 = RF[INDEX2D_R(0,k1)];
// Hx
materialHx = ID[INDEX4D_ID(3,ii,jj,kk)];
dEy = (Ey[INDEX3D_FIELDS(ii,jj,kk+1)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dz;
Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA01 * dEy + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEy;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,k2)] - 1;
RB0 = RB[INDEX2D_R(0,k2)];
RE0 = RE[INDEX2D_R(0,k2)];
RF0 = RF[INDEX2D_R(0,k2)];
// Hy
materialHy = ID[INDEX4D_ID(4,ii,jj,kk)];
dEx = (Ex[INDEX3D_FIELDS(ii,jj,kk+1)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dz;
Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA01 * dEx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx;
}
}
__kernel void order2_zplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global const {{REAL}}* restrict Ey, __global const {{REAL}}* restrict Ez, __global {{REAL}} *Hx, __global {{REAL}} *Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Hx and Hy field components for the zplus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current tREad
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dEx, dEy;
{{REAL}} dz = d;
int ii, jj, kk, materialHx, materialHy;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,k1)];
RB0 = RB[INDEX2D_R(0,k1)];
RE0 = RE[INDEX2D_R(0,k1)];
RF0 = RF[INDEX2D_R(0,k1)];
RA1 = RA[INDEX2D_R(1,k1)];
RB1 = RB[INDEX2D_R(1,k1)];
RE1 = RE[INDEX2D_R(1,k1)];
RF1 = RF[INDEX2D_R(1,k1)];
RA01 = RA[INDEX2D_R(0,k1)] * RA[INDEX2D_R(1,k1)] - 1;
// Hx
materialHx = ID[INDEX4D_ID(3,ii,jj,kk)];
dEy = (Ey[INDEX3D_FIELDS(ii,jj,kk+1)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dz;
Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA01 * dEy + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dEy + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEy;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,k2)];
RB0 = RB[INDEX2D_R(0,k2)];
RE0 = RE[INDEX2D_R(0,k2)];
RF0 = RF[INDEX2D_R(0,k2)];
RA1 = RA[INDEX2D_R(1,k2)];
RB1 = RB[INDEX2D_R(1,k2)];
RE1 = RE[INDEX2D_R(1,k2)];
RF1 = RF[INDEX2D_R(1,k2)];
RA01 = RA[INDEX2D_R(0,k2)] * RA[INDEX2D_R(1,k2)] - 1;
// Hy
materialHy = ID[INDEX4D_ID(4,ii,jj,kk)];
dEx = (Ex[INDEX3D_FIELDS(ii,jj,kk+1)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dz;
Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA01 * dEx + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dEx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx;
}
}

55
gprMax/opencl/snapshots.cl 普通文件
查看文件

@@ -0,0 +1,55 @@
// Macros for converting subscripts to linear index:
#define INDEX3D_FIELDS(i, j, k) (i)*({{NY_FIELDS}})*({{NZ_FIELDS}})+(j)*({{NZ_FIELDS}})+(k)
#define INDEX4D_SNAPS(p, i, j, k) (p)*({{NX_SNAPS}})*({{NY_SNAPS}})*({{NZ_SNAPS}})+(i)*({{NY_SNAPS}})*({{NZ_SNAPS}})+(j)*({{NZ_SNAPS}})+(k)
////////////////////
// Store snapshot //
////////////////////
__kernel void store_snapshot(int p, int xs, int xf, int ys, int yf, int zs, int zf, int dx, int dy, int dz,
__global const {{REAL}}* __restrict__ Ex, __global const {{REAL}}* __restrict__ Ey,
__global const {{REAL}}* __restrict__ Ez, __global const {{REAL}}* __restrict__ Hx,
__global const {{REAL}}* __restrict__ Hy, __global const {{REAL}}* __restrict__ Hz,
__global {{REAL}} *snapEx, __global {{REAL}} *snapEy, __global {{REAL}} *snapEz,
__global {{REAL}} *snapHx, __global {{REAL}} *snapHy, __global {{REAL}} *snapHz) {
// This function stores field values for a snapshot.
//
// Args:
// p: Snapshot number
// xs, xf, ys, yf, xs, xf: Start and finish cell coordinates for snapshot
// dx, dy, dz: Sampling interval in cell coordinates for snapshot
// E, H: Access to field component arrays
// snapEx, snapEy, snapEz, snapHx, snapHy, snapHz: Access to arrays to store snapshots
// Obtain the linear index corresponding to the current thread
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for 4D SNAPS array
int i = (idx % ({{NX_SNAPS}} * {{NY_SNAPS}} * {{NZ_SNAPS}})) / ({{NY_SNAPS}} * {{NZ_SNAPS}});
int j = ((idx % ({{NX_SNAPS}} * {{NY_SNAPS}} * {{NZ_SNAPS}})) % ({{NY_SNAPS}} * {{NZ_SNAPS}})) / {{NZ_SNAPS}};
int k = ((idx % ({{NX_SNAPS}} * {{NY_SNAPS}} * {{NZ_SNAPS}})) % ({{NY_SNAPS}} * {{NZ_SNAPS}})) % {{NZ_SNAPS}};
// Subscripts for field arrays
int ii, jj, kk;
if (i >= xs && i < xf && j >= ys && j < yf && k >= zs && k < zf) {
// Increment subscripts for field array to account for spatial sampling of snapshot
ii = (xs + i) * dx;
jj = (ys + j) * dy;
kk = (zs + k) * dz;
// The electric field component value at a point comes from an average of
// the 4 electric field component values in that cell
snapEx[INDEX4D_SNAPS(p,i,j,k)] = (Ex[INDEX3D_FIELDS(ii,jj,kk)] + Ex[INDEX3D_FIELDS(ii,jj+1,kk)] + Ex[INDEX3D_FIELDS(ii,jj,kk+1)] + Ex[INDEX3D_FIELDS(ii,jj+1,kk+1)]) / 4;
snapEy[INDEX4D_SNAPS(p,i,j,k)] = (Ey[INDEX3D_FIELDS(ii,jj,kk)] + Ey[INDEX3D_FIELDS(ii+1,jj,kk)] + Ey[INDEX3D_FIELDS(ii,jj,kk+1)] + Ey[INDEX3D_FIELDS(ii+1,jj,kk+1)]) / 4;
snapEz[INDEX4D_SNAPS(p,i,j,k)] = (Ez[INDEX3D_FIELDS(ii,jj,kk)] + Ez[INDEX3D_FIELDS(ii+1,jj,kk)] + Ez[INDEX3D_FIELDS(ii,jj+1,kk)] + Ez[INDEX3D_FIELDS(ii+1,jj+1,kk)]) / 4;
// The magnetic field component value at a point comes from average of
// 2 magnetic field component values in that cell and the following cell
snapHx[INDEX4D_SNAPS(p,i,j,k)] = (Hx[INDEX3D_FIELDS(ii,jj,kk)] + Hx[INDEX3D_FIELDS(ii+1,jj,kk)]) / 2;
snapHy[INDEX4D_SNAPS(p,i,j,k)] = (Hy[INDEX3D_FIELDS(ii,jj,kk)] + Hy[INDEX3D_FIELDS(ii,jj+1,kk)]) / 2;
snapHz[INDEX4D_SNAPS(p,i,j,k)] = (Hz[INDEX3D_FIELDS(ii,jj,kk)] + Hz[INDEX3D_FIELDS(ii,jj,kk+1)]) / 2;
}
}

查看文件

@@ -0,0 +1,206 @@
# Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom
# Authors: Craig Warren, Antonis Giannopoulos, and John Hartley
#
# This file is part of gprMax.
#
# gprMax is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# gprMax is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with gprMax. If not, see <http://www.gnu.org/licenses/>.
// Macros for converting subscripts to linear index:
#define INDEX2D_MAT(m, n) (m)*({{NY_MATCOEFFS}}) + (n)
#define INDEX2D_SRCINFO(m, n) (m)*({{NY_SRCINFO}}) + (n)
#define INDEX2D_SRCWAVES(m, n) (m)*({{NY_SRCWAVES}}) + (n)
#define INDEX3D_FIELDS(i, j, k) (i)*({{NY_FIELDS}})*({{NZ_FIELDS}}) + (j)*({{NZ_FIELDS}}) + (k)
#define INDEX4D_ID(p, i, j, k) (p)*({{NX_ID}})*({{NY_ID}})*({{NZ_ID}}) + (i)*({{NY_ID}})*({{NZ_ID}}) + (j)*({{NZ_ID}}) + (k)
// Material coefficients (read-only) in constant memory
__constant {{REAL}} updatecoeffsE[{{N_updatecoeffsE}}] =
{
{% for i in updateEVal %}
{{i}},
{% endfor %}
};
__constant {{REAL}} updatecoeffsH[{{N_updatecoeffsH}}] =
{
{% for i in updateHVal %}
{{i}},
{% endfor %}
};
///////////////////////////////////////////
// Hertzian dipole electric field update //
///////////////////////////////////////////
__kernel void update_hertzian_dipole(int NHERTZDIPOLE, int iteration,
{{REAL}} dx, {{REAL}} dy, {{REAL}} dz,
__global const int* restrict srcinfo1,
__global const {{REAL}}* restrict srcinfo2,
__global const {{REAL}}* restrict srcwaveforms,
__global const unsigned int* restrict ID,
__global {{REAL}} *Ex,
__global {{REAL}} *Ey,
__global {{REAL}} *Ez) {
// This function updates electric field values for Hertzian dipole sources.
//
// Args:
// NHERTZDIPOLE: total number of hertizan dipole in the model
// iteration
// dx, dy, dz: spatial discretization
// srcinfo1: source cell coordinates and polarisation information
// srcinfo2: other source info, length, resistance, etc
// srcwaveforms : source waveforms values
// ID, E: access to ID and field component values
// get linear index
int src = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
if (src < NHERTZDIPOLE) {
{{REAL}} dl;
int i, j, k, polarisation;
i = srcinfo1[INDEX2D_SRCINFO(src,0)];
j = srcinfo1[INDEX2D_SRCINFO(src,1)];
k = srcinfo1[INDEX2D_SRCINFO(src,2)];
polarisation = srcinfo1[INDEX2D_SRCINFO(src,3)];
dl = srcinfo2[src];
// 'x' polarised source
if (polarisation == 0) {
int materialEx = ID[INDEX4D_ID(0,i,j,k)];
Ex[INDEX3D_FIELDS(i,j,k)] = Ex[INDEX3D_FIELDS(i,j,k)] - updatecoeffsE[INDEX2D_MAT(materialEx,4)] * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] * dl * (1 / (dx * dy * dz));
}
// 'y' polarised source
else if (polarisation == 1) {
int materialEy = ID[INDEX4D_ID(1,i,j,k)];
Ey[INDEX3D_FIELDS(i,j,k)] = Ey[INDEX3D_FIELDS(i,j,k)] - updatecoeffsE[INDEX2D_MAT(materialEy,4)] * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] * dl * (1 / (dx * dy * dz));
}
// 'z' polarised source
else if (polarisation == 2) {
int materialEz = ID[INDEX4D_ID(2,i,j,k)];
Ez[INDEX3D_FIELDS(i,j,k)] = Ez[INDEX3D_FIELDS(i,j,k)] - updatecoeffsE[INDEX2D_MAT(materialEz,4)] * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] * dl * (1 / (dx * dy * dz));
}
}
}
__kernel void update_magnetic_dipole(int NMAGDIPOLE, int iteration, {{REAL}} dx, {{REAL}} dy, {{REAL}} dz, __global const int* restrict srcinfo1, __global const {{REAL}}* restrict srcinfo2, __global const {{REAL}}* restrict srcwaveforms, __global const unsigned int* restrict ID, __global {{REAL}} *Hx, __global {{REAL}} *Hy, __global {{REAL}} *Hz){
// This function updates magnetic field values for magnetic dipole sources.
//
// Args:
// NMAGDIPOLE: Total number of magnetic dipoles in the model
// iteration: Iteration number of simulation
// dx, dy, dz: Spatial discretisations
// srcinfo1: Source cell coordinates and polarisation information
// srcinfo2: Other source information, e.g. length, resistance etc...
// srcwaveforms: Source waveform values
// ID, H: Access to ID and field component arrays
// Obtain the linear index corresponding to the current thread and use for each receiver
int src = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
if (src < NMAGDIPOLE) {
int i, j, k, polarisation;
i = srcinfo1[INDEX2D_SRCINFO(src,0)];
j = srcinfo1[INDEX2D_SRCINFO(src,1)];
k = srcinfo1[INDEX2D_SRCINFO(src,2)];
polarisation = srcinfo1[INDEX2D_SRCINFO(src,3)];
// 'x' polarised source
if (polarisation == 0) {
int materialHx = ID[INDEX4D_ID(3,i,j,k)];
Hx[INDEX3D_FIELDS(i,j,k)] = Hx[INDEX3D_FIELDS(i,j,k)] - updatecoeffsH[INDEX2D_MAT(materialHx,4)] * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] * (1 / (dx * dy * dz));
}
// 'y' polarised source
else if (polarisation == 1) {
int materialHy = ID[INDEX4D_ID(4,i,j,k)];
Hy[INDEX3D_FIELDS(i,j,k)] = Hy[INDEX3D_FIELDS(i,j,k)] - updatecoeffsH[INDEX2D_MAT(materialHy,4)] * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] * (1 / (dx * dy * dz));
}
// 'z' polarised source
else if (polarisation == 2) {
int materialHz = ID[INDEX4D_ID(5,i,j,k)];
Hz[INDEX3D_FIELDS(i,j,k)] = Hz[INDEX3D_FIELDS(i,j,k)] - updatecoeffsH[INDEX2D_MAT(materialHz,4)] * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] * (1 / (dx * dy * dz));
}
}
}
__kernel void update_voltage_source(int NVOLTSRC, int iteration, {{REAL}} dx, {{REAL}} dy, {{REAL}} dz, __global const int* restrict srcinfo1, __global const {{REAL}}* restrict srcinfo2, __global const {{REAL}}* restrict srcwaveforms, __global const unsigned int* restrict ID, __global {{REAL}} *Ex, __global {{REAL}} *Ey, __global {{REAL}} *Ez){
// This function updates electric field values for voltage sources.
//
// Args:
// NVOLTSRC: Total number of voltage sources in the model
// iteration: Iteration number of simulation
// dx, dy, dz: Spatial discretisations
// srcinfo1: Source cell coordinates and polarisation information
// srcinfo2: Other source information, e.g. length, resistance etc...
// srcwaveforms: Source waveform values
// ID, E: Access to ID and field component arrays
// Obtain the linear index corresponding to the current thread and use for each receiver
int src = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
if (src < NVOLTSRC) {
{{REAL}} resistance;
int i, j, k, polarisation;
i = srcinfo1[INDEX2D_SRCINFO(src,0)];
j = srcinfo1[INDEX2D_SRCINFO(src,1)];
k = srcinfo1[INDEX2D_SRCINFO(src,2)];
polarisation = srcinfo1[INDEX2D_SRCINFO(src,3)];
resistance = srcinfo2[src];
// 'x' polarised source
if (polarisation == 0) {
if (resistance != 0) {
int materialEx = ID[INDEX4D_ID(0,i,j,k)];
Ex[INDEX3D_FIELDS(i,j,k)] = Ex[INDEX3D_FIELDS(i,j,k)] - updatecoeffsE[INDEX2D_MAT(materialEx,4)] * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] * (1 / (resistance * dy * dz));
}
else {
Ex[INDEX3D_FIELDS(i,j,k)] = -1 * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] / dx;
}
}
// 'y' polarised source
else if (polarisation == 1) {
if (resistance != 0) {
int materialEy = ID[INDEX4D_ID(1,i,j,k)];
Ey[INDEX3D_FIELDS(i,j,k)] = Ey[INDEX3D_FIELDS(i,j,k)] - updatecoeffsE[INDEX2D_MAT(materialEy,4)] * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] * (1 / (resistance * dx * dz));
}
else {
Ey[INDEX3D_FIELDS(i,j,k)] = -1 * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] / dy;
}
}
// 'z' polarised source
else if (polarisation == 2) {
if (resistance != 0) {
int materialEz = ID[INDEX4D_ID(2,i,j,k)];
Ez[INDEX3D_FIELDS(i,j,k)] = Ez[INDEX3D_FIELDS(i,j,k)] - updatecoeffsE[INDEX2D_MAT(materialEz,4)] * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] * (1 / (resistance * dx * dy));
}
else {
Ez[INDEX3D_FIELDS(i,j,k)] = -1 * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] / dz;
}
}
}
}

查看文件

@@ -0,0 +1,59 @@
# Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom
# Authors: Craig Warren, Antonis Giannopoulos, and John Hartley
#
# This file is part of gprMax.
#
# gprMax is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# gprMax is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with gprMax. If not, see <http://www.gnu.org/licenses/>.
#define INDEX2D_RXCOORDS(m,n) (m)*({{NY_RXCOORDS}}) + (n)
#define INDEX3D_RXS(i,j,k) (i)*({{NY_RXS}})*({{NZ_RXS}}) + (j)*({{NZ_RXS}}) + (k)
#define INDEX3D_FIELDS(i,j,k) (i)*({{NY_FIELDS}})*({{NZ_FIELDS}}) + (j)*({{NZ_FIELDS}}) + (k)
__kernel void store_outputs(int NRX, int iteration,
__global const int* restrict rxcoords,
__global {{REAL}} *rxs,
__global const {{REAL}}* restrict Ex,
__global const {{REAL}}* restrict Ey,
__global const {{REAL}}* restrict Ez,
__global const {{REAL}}* restrict Hx,
__global const {{REAL}}* restrict Hy,
__global const {{REAL}}* restrict Hz) {
// This function stores field component values for every receiver in the model.
//
// Args:
// NRX: total number of receivers in the model.
// rxs: array to store field components for receivers - rows
// are field components; columns are iterations; pages are receiver
// Obtain linear index corresponding to the current work item
int rx = get_global_id(2) * get_global_size(0) * get_global_size(1) +
get_global_id(1) * get_global_size(0) + get_global_id(0);
int i,j,k;
if (rx < NRX) {
i = rxcoords[INDEX2D_RXCOORDS(rx,0)];
j = rxcoords[INDEX2D_RXCOORDS(rx,1)];
k = rxcoords[INDEX2D_RXCOORDS(rx,2)];
rxs[INDEX3D_RXS(0,iteration,rx)] = Ex[INDEX3D_FIELDS(i,j,k)];
rxs[INDEX3D_RXS(1,iteration,rx)] = Ey[INDEX3D_FIELDS(i,j,k)];
rxs[INDEX3D_RXS(2,iteration,rx)] = Ez[INDEX3D_FIELDS(i,j,k)];
rxs[INDEX3D_RXS(3,iteration,rx)] = Hx[INDEX3D_FIELDS(i,j,k)];
rxs[INDEX3D_RXS(4,iteration,rx)] = Hy[INDEX3D_FIELDS(i,j,k)];
rxs[INDEX3D_RXS(5,iteration,rx)] = Hz[INDEX3D_FIELDS(i,j,k)];
}
}

查看文件

@@ -21,25 +21,31 @@ from importlib import import_module
import gprMax.config as config import gprMax.config as config
import numpy as np import numpy as np
from .utilities.utilities import timer
class CFSParameter: class CFSParameter:
"""Individual CFS parameter (e.g. alpha, kappa, or sigma).""" """Individual CFS parameter (e.g. alpha, kappa, or sigma)."""
# Allowable scaling profiles and directions # Allowable scaling profiles and directions
scalingprofiles = {'constant': 0, 'linear': 1, 'quadratic': 2, 'cubic': 3, scalingprofiles = {'constant': 0, 'linear': 1, 'quadratic': 2, 'cubic': 3,
'quartic': 4, 'quintic': 5, 'sextic': 6, 'septic': 7, 'octic': 8} 'quartic': 4, 'quintic': 5, 'sextic': 6, 'septic': 7,
'octic': 8}
scalingdirections = ['forward', 'reverse'] scalingdirections = ['forward', 'reverse']
def __init__(self, ID=None, scaling='polynomial', scalingprofile=None, def __init__(self, ID=None, scaling='polynomial', scalingprofile=None,
scalingdirection='forward', min=0, max=0): scalingdirection='forward', min=0, max=0):
""" """
Args: Args:
ID (str): Identifier for CFS parameter, can be: 'alpha', 'kappa' or 'sigma'. ID: string identifier for CFS parameter, can be: 'alpha', 'kappa' or
scaling (str): Type of scaling, can be: 'polynomial'. 'sigma'.
scalingprofile (str): Type of scaling profile from scalingprofiles. scaling: string for type of scaling, can be: 'polynomial'.
scalingdirection (str): Direction of scaling profile from scalingdirections. scalingprofile: string for type of scaling profile from
min (float): Minimum value for parameter. scalingprofiles.
max (float): Maximum value for parameter. scalingdirection: string for direction of scaling profile from
scalingdirections.
min: float for minimum value for parameter.
max: float for maximum value for parameter.
""" """
self.ID = ID self.ID = ID
@@ -56,9 +62,9 @@ class CFS:
def __init__(self): def __init__(self):
""" """
Args: Args:
alpha (CFSParameter): alpha parameter for CFS. alpha: CFSParameter alpha parameter for CFS.
kappa (CFSParameter): kappa parameter for CFS. kappa: CFSParameter kappa parameter for CFS.
sigma (CFSParameter): sigma parameter for CFS. sigma: CFSParameter sigma parameter for CFS.
""" """
self.alpha = CFSParameter(ID='alpha', scalingprofile='constant') self.alpha = CFSParameter(ID='alpha', scalingprofile='constant')
@@ -70,11 +76,11 @@ class CFS:
material properties. material properties.
Args: Args:
d (float): dx, dy, or dz in direction of PML. d: float for dx, dy, or dz in direction of PML.
er (float): Average permittivity of underlying material. er: float for average permittivity of underlying material.
mr (float): Average permeability of underlying material. mr: float for average permeability of underlying material.
G (class): Grid class instance - holds essential parameters G: FDTDGrid object that holds essential parameters describing the
describing the model. model.
""" """
# Calculation of the maximum value of sigma from http://dx.doi.org/10.1109/8.546249 # Calculation of the maximum value of sigma from http://dx.doi.org/10.1109/8.546249
@@ -86,16 +92,16 @@ class CFS:
electric and magnetic PML updates. electric and magnetic PML updates.
Args: Args:
order (int): Order of polynomial for scaling profile. order: int of order of polynomial for scaling profile.
Evalues (float): numpy array holding scaling profile values for Evalues: float array holding scaling profile values for
electric PML update. electric PML update.
Hvalues (float): numpy array holding scaling profile values for Hvalues: float array holding scaling profile values for
magnetic PML update. magnetic PML update.
Returns: Returns:
Evalues (float): numpy array holding scaling profile values for Evalues: float array holding scaling profile values for
electric PML update. electric PML update.
Hvalues (float): numpy array holding scaling profile values for Hvalues: float array holding scaling profile values for
magnetic PML update. magnetic PML update.
""" """
@@ -111,17 +117,18 @@ class CFS:
profile type and minimum and maximum values. profile type and minimum and maximum values.
Args: Args:
thickness (int): Thickness of PML in cells. thickness: int of thickness of PML in cells.
parameter (CFSParameter): Instance of CFSParameter parameter: instance of CFSParameter
Returns: Returns:
Evalues (float): numpy array holding profile value for electric Evalues: float array holding profile value for electric
PML update. PML update.
Hvalues (float): numpy array holding profile value for magnetic Hvalues: float array holding profile value for magnetic
PML update. PML update.
""" """
# Extra cell of thickness added to allow correct scaling of electric and magnetic values # Extra cell of thickness added to allow correct scaling of electric and
# magnetic values
Evalues = np.zeros(thickness + 1, dtype=config.sim_config.dtypes['float_or_double']) Evalues = np.zeros(thickness + 1, dtype=config.sim_config.dtypes['float_or_double'])
Hvalues = np.zeros(thickness + 1, dtype=config.sim_config.dtypes['float_or_double']) Hvalues = np.zeros(thickness + 1, dtype=config.sim_config.dtypes['float_or_double'])
@@ -146,10 +153,12 @@ class CFS:
if parameter.scalingdirection == 'reverse': if parameter.scalingdirection == 'reverse':
Evalues = Evalues[::-1] Evalues = Evalues[::-1]
Hvalues = Hvalues[::-1] Hvalues = Hvalues[::-1]
# Magnetic values must be shifted one element to the left after reversal # Magnetic values must be shifted one element to the left after
# reversal
Hvalues = np.roll(Hvalues, -1) Hvalues = np.roll(Hvalues, -1)
# Extra cell of thickness not required and therefore removed after scaling # Extra cell of thickness not required and therefore removed after
# scaling
Evalues = Evalues[:-1] Evalues = Evalues[:-1]
Hvalues = Hvalues[:-1] Hvalues = Hvalues[:-1]
@@ -168,17 +177,20 @@ class PML:
boundaryIDs = ['x0', 'y0', 'z0', 'xmax', 'ymax', 'zmax'] boundaryIDs = ['x0', 'y0', 'z0', 'xmax', 'ymax', 'zmax']
# Indicates direction of increasing absorption # Indicates direction of increasing absorption
# xminus, yminus, zminus - absorption increases in negative direction of x-axis, y-axis, or z-axis # xminus, yminus, zminus - absorption increases in negative direction of
# xplus, yplus, zplus - absorption increases in positive direction of x-axis, y-axis, or z-axis # x-axis, y-axis, or z-axis
# xplus, yplus, zplus - absorption increases in positive direction of
# x-axis, y-axis, or z-axis
directions = ['xminus', 'yminus', 'zminus', 'xplus', 'yplus', 'zplus'] directions = ['xminus', 'yminus', 'zminus', 'xplus', 'yplus', 'zplus']
def __init__(self, G, ID=None, direction=None, xs=0, xf=0, ys=0, yf=0, zs=0, zf=0): def __init__(self, G, ID=None, direction=None, xs=0, xf=0, ys=0, yf=0, zs=0, zf=0):
""" """
Args: Args:
G (FDTDGrid): Holds essential parameters describing the model. G: FDTDGrid object that holds essential parameters describing the
ID (str): Identifier for PML slab. model.
direction (str): Direction of increasing absorption. ID: string identifier for PML slab.
xs, xf, ys, yf, zs, zf (float): Extent of the PML slab. direction: string for direction of increasing absorption.
xs, xf, ys, yf, zs, zf: floats of extent of the PML slab.
""" """
self.G = G self.G = G
@@ -244,8 +256,8 @@ class PML:
"""Calculates electric and magnetic update coefficients for the PML. """Calculates electric and magnetic update coefficients for the PML.
Args: Args:
er (float): Average permittivity of underlying material er: float of average permittivity of underlying material
mr (float): Average permeability of underlying material mr: float of average permeability of underlying material
""" """
self.ERA = np.zeros((len(self.CFS), self.thickness), self.ERA = np.zeros((len(self.CFS), self.thickness),
@@ -275,19 +287,25 @@ class PML:
# Define different parameters depending on PML formulation # Define different parameters depending on PML formulation
if self.G.pmlformulation == 'HORIPML': if self.G.pmlformulation == 'HORIPML':
# HORIPML electric update coefficients # HORIPML electric update coefficients
tmp = (2 * config.sim_config.em_consts['e0'] * Ekappa) + self.G.dt * (Ealpha * Ekappa + Esigma) tmp = ((2 * config.sim_config.em_consts['e0'] * Ekappa) +
self.ERA[x, :] = (2 * config.sim_config.em_consts['e0'] + self.G.dt * Ealpha) / tmp self.G.dt * (Ealpha * Ekappa + Esigma))
self.ERB[x, :] = (2 * config.sim_config.em_consts['e0'] * Ekappa) / tmp self.ERA[x, :] = ((2 * config.sim_config.em_consts['e0'] +
self.ERE[x, :] = ((2 * config.sim_config.em_consts['e0'] * Ekappa) - self.G.dt self.G.dt * Ealpha) / tmp)
* (Ealpha * Ekappa + Esigma)) / tmp self.ERB[x, :] = ((2 * config.sim_config.em_consts['e0'] * Ekappa)
/ tmp)
self.ERE[x, :] = (((2 * config.sim_config.em_consts['e0'] * Ekappa) -
self.G.dt * (Ealpha * Ekappa + Esigma)) / tmp)
self.ERF[x, :] = (2 * Esigma * self.G.dt) / (Ekappa * tmp) self.ERF[x, :] = (2 * Esigma * self.G.dt) / (Ekappa * tmp)
# HORIPML magnetic update coefficients # HORIPML magnetic update coefficients
tmp = (2 * config.sim_config.em_consts['e0'] * Hkappa) + self.G.dt * (Halpha * Hkappa + Hsigma) tmp = ((2 * config.sim_config.em_consts['e0'] * Hkappa) +
self.HRA[x, :] = (2 * config.sim_config.em_consts['e0'] + self.G.dt * Halpha) / tmp self.G.dt * (Halpha * Hkappa + Hsigma))
self.HRB[x, :] = (2 * config.sim_config.em_consts['e0'] * Hkappa) / tmp self.HRA[x, :] = ((2 * config.sim_config.em_consts['e0'] +
self.HRE[x, :] = ((2 * config.sim_config.em_consts['e0'] * Hkappa) - self.G.dt self.G.dt * Halpha) / tmp)
* (Halpha * Hkappa + Hsigma)) / tmp self.HRB[x, :] = ((2 * config.sim_config.em_consts['e0'] * Hkappa)
/ tmp)
self.HRE[x, :] = (((2 * config.sim_config.em_consts['e0'] * Hkappa) -
self.G.dt * (Halpha * Hkappa + Hsigma)) / tmp)
self.HRF[x, :] = (2 * Hsigma * self.G.dt) / (Hkappa * tmp) self.HRF[x, :] = (2 * Hsigma * self.G.dt) / (Hkappa * tmp)
elif self.G.pmlformulation == 'MRIPML': elif self.G.pmlformulation == 'MRIPML':
@@ -295,31 +313,39 @@ class PML:
tmp = 2 * config.sim_config.em_consts['e0'] + self.G.dt * Ealpha tmp = 2 * config.sim_config.em_consts['e0'] + self.G.dt * Ealpha
self.ERA[x, :] = Ekappa + (self.G.dt * Esigma) / tmp self.ERA[x, :] = Ekappa + (self.G.dt * Esigma) / tmp
self.ERB[x, :] = (2 * config.sim_config.em_consts['e0']) / tmp self.ERB[x, :] = (2 * config.sim_config.em_consts['e0']) / tmp
self.ERE[x, :] = ((2 * config.sim_config.em_consts['e0']) - self.G.dt * Ealpha) / tmp self.ERE[x, :] = (((2 * config.sim_config.em_consts['e0'])
- self.G.dt * Ealpha) / tmp)
self.ERF[x, :] = (2 * Esigma * self.G.dt) / tmp self.ERF[x, :] = (2 * Esigma * self.G.dt) / tmp
# MRIPML magnetic update coefficients # MRIPML magnetic update coefficients
tmp = 2 * config.sim_config.em_consts['e0'] + self.G.dt * Halpha tmp = 2 * config.sim_config.em_consts['e0'] + self.G.dt * Halpha
self.HRA[x, :] = Hkappa + (self.G.dt * Hsigma) / tmp self.HRA[x, :] = Hkappa + (self.G.dt * Hsigma) / tmp
self.HRB[x, :] = (2 * config.sim_config.em_consts['e0']) / tmp self.HRB[x, :] = (2 * config.sim_config.em_consts['e0']) / tmp
self.HRE[x, :] = ((2 * config.sim_config.sim_config.em_consts['e0']) - self.G.dt * Halpha) / tmp self.HRE[x, :] = (((2 * config.sim_config.sim_config.em_consts['e0'])
- self.G.dt * Halpha) / tmp)
self.HRF[x, :] = (2 * Hsigma * self.G.dt) / tmp self.HRF[x, :] = (2 * Hsigma * self.G.dt) / tmp
def update_electric(self): def update_electric(self):
"""This functions updates electric field components with the PML correction.""" """This functions updates electric field components with the PML
correction.
"""
pmlmodule = 'gprMax.cython.pml_updates_electric_' + self.G.pmlformulation pmlmodule = 'gprMax.cython.pml_updates_electric_' + self.G.pmlformulation
func = getattr(import_module(pmlmodule), 'order' + str(len(self.CFS)) + '_' + self.direction) func = getattr(import_module(pmlmodule),
'order' + str(len(self.CFS)) + '_' + self.direction)
func(self.xs, self.xf, self.ys, self.yf, self.zs, self.zf, func(self.xs, self.xf, self.ys, self.yf, self.zs, self.zf,
config.get_model_config().ompthreads, self.G.updatecoeffsE, self.G.ID, config.get_model_config().ompthreads, self.G.updatecoeffsE, self.G.ID,
self.G.Ex, self.G.Ey, self.G.Ez, self.G.Hx, self.G.Hy, self.G.Hz, self.G.Ex, self.G.Ey, self.G.Ez, self.G.Hx, self.G.Hy, self.G.Hz,
self.EPhi1, self.EPhi2, self.ERA, self.ERB, self.ERE, self.ERF, self.d) self.EPhi1, self.EPhi2, self.ERA, self.ERB, self.ERE, self.ERF, self.d)
def update_magnetic(self): def update_magnetic(self):
"""This functions updates magnetic field components with the PML correction.""" """This functions updates magnetic field components with the PML
correction.
"""
pmlmodule = 'gprMax.cython.pml_updates_magnetic_' + self.G.pmlformulation pmlmodule = 'gprMax.cython.pml_updates_magnetic_' + self.G.pmlformulation
func = getattr(import_module(pmlmodule), 'order' + str(len(self.CFS)) + '_' + self.direction) func = getattr(import_module(pmlmodule),
'order' + str(len(self.CFS)) + '_' + self.direction)
func(self.xs, self.xf, self.ys, self.yf, self.zs, self.zf, func(self.xs, self.xf, self.ys, self.yf, self.zs, self.zf,
config.get_model_config().ompthreads, self.G.updatecoeffsH, self.G.ID, config.get_model_config().ompthreads, self.G.updatecoeffsH, self.G.ID,
self.G.Ex, self.G.Ey, self.G.Ez, self.G.Hx, self.G.Hy, self.G.Hz, self.G.Ex, self.G.Ey, self.G.Ez, self.G.Hx, self.G.Hy, self.G.Hz,
@@ -331,6 +357,9 @@ class CUDAPML(PML):
solving on GPU using CUDA. solving on GPU using CUDA.
""" """
def __init__(self, *args, **kwargs):
super(CUDAPML, self).__init__(*args, **kwargs)
def htod_field_arrays(self): def htod_field_arrays(self):
"""Initialise PML field and coefficient arrays on GPU.""" """Initialise PML field and coefficient arrays on GPU."""
@@ -360,9 +389,9 @@ class CUDAPML(PML):
"""Get update functions from PML kernels. """Get update functions from PML kernels.
Args: Args:
kernelselectric: PyCuda SourceModule containing PML kernels for kernelselectric: pycuda SourceModule containing PML kernels for
electric updates. electric updates.
kernelsmagnetic: PyCuda SourceModule containing PML kernels for kernelsmagnetic: pycuda SourceModule containing PML kernels for
magnetic updates. magnetic updates.
""" """
@@ -373,9 +402,12 @@ class CUDAPML(PML):
"""This functions updates electric field components with the PML """This functions updates electric field components with the PML
correction on the GPU. correction on the GPU.
""" """
self.update_electric_gpu(np.int32(self.xs), np.int32(self.xf), self.update_electric_gpu(np.int32(self.xs),
np.int32(self.ys), np.int32(self.yf), np.int32(self.xf),
np.int32(self.zs), np.int32(self.zf), np.int32(self.ys),
np.int32(self.yf),
np.int32(self.zs),
np.int32(self.zf),
np.int32(self.EPhi1_gpu.shape[1]), np.int32(self.EPhi1_gpu.shape[1]),
np.int32(self.EPhi1_gpu.shape[2]), np.int32(self.EPhi1_gpu.shape[2]),
np.int32(self.EPhi1_gpu.shape[3]), np.int32(self.EPhi1_gpu.shape[3]),
@@ -384,11 +416,18 @@ class CUDAPML(PML):
np.int32(self.EPhi2_gpu.shape[3]), np.int32(self.EPhi2_gpu.shape[3]),
np.int32(self.thickness), np.int32(self.thickness),
self.G.ID_gpu.gpudata, self.G.ID_gpu.gpudata,
self.G.Ex_gpu.gpudata, self.G.Ey_gpu.gpudata, self.G.Ez_gpu.gpudata, self.G.Ex_gpu.gpudata,
self.G.Hx_gpu.gpudata, self.G.Hy_gpu.gpudata, self.G.Hz_gpu.gpudata, self.G.Ey_gpu.gpudata,
self.EPhi1_gpu.gpudata, self.EPhi2_gpu.gpudata, self.G.Ez_gpu.gpudata,
self.ERA_gpu.gpudata, self.ERB_gpu.gpudata, self.G.Hx_gpu.gpudata,
self.ERE_gpu.gpudata, self.ERF_gpu.gpudata, self.G.Hy_gpu.gpudata,
self.G.Hz_gpu.gpudata,
self.EPhi1_gpu.gpudata,
self.EPhi2_gpu.gpudata,
self.ERA_gpu.gpudata,
self.ERB_gpu.gpudata,
self.ERE_gpu.gpudata,
self.ERF_gpu.gpudata,
config.sim_config.dtypes['float_or_double'](self.d), config.sim_config.dtypes['float_or_double'](self.d),
block=self.G.tpb, grid=self.bpg) block=self.G.tpb, grid=self.bpg)
@@ -396,9 +435,12 @@ class CUDAPML(PML):
"""This functions updates magnetic field components with the PML """This functions updates magnetic field components with the PML
correction on the GPU. correction on the GPU.
""" """
self.update_magnetic_gpu(np.int32(self.xs), np.int32(self.xf), self.update_magnetic_gpu(np.int32(self.xs),
np.int32(self.ys), np.int32(self.yf), np.int32(self.xf),
np.int32(self.zs), np.int32(self.zf), np.int32(self.ys),
np.int32(self.yf),
np.int32(self.zs),
np.int32(self.zf),
np.int32(self.HPhi1_gpu.shape[1]), np.int32(self.HPhi1_gpu.shape[1]),
np.int32(self.HPhi1_gpu.shape[2]), np.int32(self.HPhi1_gpu.shape[2]),
np.int32(self.HPhi1_gpu.shape[3]), np.int32(self.HPhi1_gpu.shape[3]),
@@ -407,19 +449,147 @@ class CUDAPML(PML):
np.int32(self.HPhi2_gpu.shape[3]), np.int32(self.HPhi2_gpu.shape[3]),
np.int32(self.thickness), np.int32(self.thickness),
self.G.ID_gpu.gpudata, self.G.ID_gpu.gpudata,
self.G.Ex_gpu.gpudata, self.G.Ey_gpu.gpudata, self.G.Ez_gpu.gpudata, self.G.Ex_gpu.gpudata,
self.G.Hx_gpu.gpudata, self.G.Hy_gpu.gpudata, self.G.Hz_gpu.gpudata, self.G.Ey_gpu.gpudata,
self.HPhi1_gpu.gpudata, self.HPhi2_gpu.gpudata, self.G.Ez_gpu.gpudata,
self.HRA_gpu.gpudata, self.HRB_gpu.gpudata, self.G.Hx_gpu.gpudata,
self.HRE_gpu.gpudata, self.HRF_gpu.gpudata, self.G.Hy_gpu.gpudata,
self.G.Hz_gpu.gpudata,
self.HPhi1_gpu.gpudata,
self.HPhi2_gpu.gpudata,
self.HRA_gpu.gpudata,
self.HRB_gpu.gpudata,
self.HRE_gpu.gpudata,
self.HRF_gpu.gpudata,
config.sim_config.dtypes['float_or_double'](self.d), config.sim_config.dtypes['float_or_double'](self.d),
block=self.G.tpb, grid=self.bpg) block=self.G.tpb, grid=self.bpg)
class OpenCLPML(PML):
"""Perfectly Matched Layer (PML) Absorbing Boundary Conditions (ABC) for
solving on compute device using OpenCL.
"""
def __init__(self, *args, **kwargs):
super(OpenCLPML, self).__init__(*args, **kwargs)
self.compute_time = 0
def set_queue(self, queue):
"""Passes in pyopencl queue.
Args:
queue: pyopencl queue.
"""
self.queue = queue
def htod_field_arrays(self):
"""Initialise PML field and coefficient arrays on compute device."""
import pyopencl.array as clarray
self.ERA_dev = clarray.to_device(self.queue, self.ERA)
self.ERB_dev = clarray.to_device(self.queue, self.ERB)
self.ERE_dev = clarray.to_device(self.queue, self.ERE)
self.ERF_dev = clarray.to_device(self.queue, self.ERF)
self.HRA_dev = clarray.to_device(self.queue, self.HRA)
self.HRB_dev = clarray.to_device(self.queue, self.HRB)
self.HRE_dev = clarray.to_device(self.queue, self.HRE)
self.HRF_dev = clarray.to_device(self.queue, self.HRF)
self.EPhi1_dev = clarray.to_device(self.queue, self.EPhi1)
self.EPhi2_dev = clarray.to_device(self.queue, self.EPhi2)
self.HPhi1_dev = clarray.to_device(self.queue, self.HPhi1)
self.HPhi2_dev = clarray.to_device(self.queue, self.HPhi2)
def set_blocks_per_grid():
pass
def set_wgs(self):
"""Set the workgroup size used for updating the PML field arrays
on a compute device.
"""
self.wgs = (((int(np.ceil(((self.EPhi1_dev.shape[1] + 1) *
(self.EPhi1_dev.shape[2] + 1) *
(self.EPhi1_dev.shape[3] + 1)) / self.G.tpb[0]))) * 256), 1, 1)
def get_update_funcs():
pass
def update_electric(self):
"""This functions updates electric field components with the PML
correction on the compute device.
"""
start_time = timer()
event = self.update_electric_dev(np.int32(self.xs),
np.int32(self.xf),
np.int32(self.ys),
np.int32(self.yf),
np.int32(self.zs),
np.int32(self.zf),
np.int32(self.EPhi1_dev.shape[1]),
np.int32(self.EPhi1_dev.shape[2]),
np.int32(self.EPhi1_dev.shape[3]),
np.int32(self.EPhi2_dev.shape[1]),
np.int32(self.EPhi2_dev.shape[2]),
np.int32(self.EPhi2_dev.shape[3]),
np.int32(self.thickness),
self.G.ID_dev,
self.G.Ex_dev,
self.G.Ey_dev,
self.G.Ez_dev,
self.G.Hx_dev,
self.G.Hy_dev,
self.G.Hz_dev,
self.EPhi1_dev,
self.EPhi2_dev,
self.ERA_dev,
self.ERB_dev,
self.ERE_dev,
self.ERF_dev,
config.sim_config.dtypes['float_or_double'](self.d))
event.wait()
self.compute_time += (timer() - start_time)
def update_magnetic(self):
"""This functions updates magnetic field components with the PML
correction on the compute device.
"""
start_time = timer()
event = self.update_magnetic_dev(np.int32(self.xs),
np.int32(self.xf),
np.int32(self.ys),
np.int32(self.yf),
np.int32(self.zs),
np.int32(self.zf),
np.int32(self.HPhi1_dev.shape[1]),
np.int32(self.HPhi1_dev.shape[2]),
np.int32(self.HPhi1_dev.shape[3]),
np.int32(self.HPhi2_dev.shape[1]),
np.int32(self.HPhi2_dev.shape[2]),
np.int32(self.HPhi2_dev.shape[3]),
np.int32(self.thickness),
self.G.ID_dev,
self.G.Ex_dev,
self.G.Ey_dev,
self.G.Ez_dev,
self.G.Hx_dev,
self.G.Hy_dev,
self.G.Hz_dev,
self.HPhi1_dev,
self.HPhi2_dev,
self.HRA_dev,
self.HRB_dev,
self.HRE_dev,
self.HRF_dev,
config.sim_config.dtypes['float_or_double'](self.d))
event.wait()
self.compute_time += (timer() - start_time)
def print_pml_info(G): def print_pml_info(G):
"""Information about PMLs. """Information about PMLs.
Args: Args:
G (FDTDGrid): Parameters describing a grid in a model. G: FDTDGrid objects that holds parameters describing a grid in a model.
""" """
# No PML # No PML
if all(value == 0 for value in G.pmlthickness.values()): if all(value == 0 for value in G.pmlthickness.values()):
@@ -433,7 +603,8 @@ def print_pml_info(G):
pmlinfo += f'{key}: {value}, ' pmlinfo += f'{key}: {value}, '
pmlinfo = pmlinfo[:-2] pmlinfo = pmlinfo[:-2]
return f'\nPML boundaries [{G.name}]: {{formulation: {G.pmlformulation}, order: {len(G.cfs)}, thickness (cells): {pmlinfo}}}' return (f'\nPML boundaries [{G.name}]: {{formulation: {G.pmlformulation}, '
f'order: {len(G.cfs)}, thickness (cells): {pmlinfo}}}')
def build_pml(G, key, value): def build_pml(G, key, value):
@@ -442,21 +613,28 @@ def build_pml(G, key, value):
(based on underlying material er and mr from solid array). (based on underlying material er and mr from solid array).
Args: Args:
G (FDTDGrid): Parameters describing a grid in a model. G: FDTDGrid objects that holds parameters describing a grid in a model.
key (str): Identifier of PML slab. key: string dentifier of PML slab.
value (int): Thickness of PML slab in cells. value: int with thickness of PML slab in cells.
""" """
pml_type = CUDAPML if config.sim_config.general['cuda'] else PML if config.sim_config.general['solver'] == 'cpu':
pml_type = PML
elif config.sim_config.general['solver'] == 'cuda':
pml_type = CUDAPML
elif config.sim_config.general['solver'] == 'opencl':
pml_type = OpenCLPML
sumer = 0 # Sum of relative permittivities in PML slab sumer = 0 # Sum of relative permittivities in PML slab
summr = 0 # Sum of relative permeabilities in PML slab summr = 0 # Sum of relative permeabilities in PML slab
if key[0] == 'x': if key[0] == 'x':
if key == 'x0': if key == 'x0':
pml = pml_type(G, ID=key, direction='xminus', xf=value, yf=G.ny, zf=G.nz) pml = pml_type(G, ID=key, direction='xminus',
xf=value, yf=G.ny, zf=G.nz)
elif key == 'xmax': elif key == 'xmax':
pml = pml_type(G, ID=key, direction='xplus', xs=G.nx - value, xf=G.nx, yf=G.ny, zf=G.nz) pml = pml_type(G, ID=key, direction='xplus',
xs=G.nx - value, xf=G.nx, yf=G.ny, zf=G.nz)
G.pmls.append(pml) G.pmls.append(pml)
for j in range(G.ny): for j in range(G.ny):
for k in range(G.nz): for k in range(G.nz):
@@ -469,9 +647,11 @@ def build_pml(G, key, value):
elif key[0] == 'y': elif key[0] == 'y':
if key == 'y0': if key == 'y0':
pml = pml_type(G, ID=key, direction='yminus', yf=value, xf=G.nx, zf=G.nz) pml = pml_type(G, ID=key, direction='yminus',
yf=value, xf=G.nx, zf=G.nz)
elif key == 'ymax': elif key == 'ymax':
pml = pml_type(G, ID=key, direction='yplus', ys=G.ny - value, xf=G.nx, yf=G.ny, zf=G.nz) pml = pml_type(G, ID=key, direction='yplus',
ys=G.ny - value, xf=G.nx, yf=G.ny, zf=G.nz)
G.pmls.append(pml) G.pmls.append(pml)
for i in range(G.nx): for i in range(G.nx):
for k in range(G.nz): for k in range(G.nz):
@@ -484,9 +664,11 @@ def build_pml(G, key, value):
elif key[0] == 'z': elif key[0] == 'z':
if key == 'z0': if key == 'z0':
pml = pml_type(G, ID=key, direction='zminus', zf=value, xf=G.nx, yf=G.ny) pml = pml_type(G, ID=key, direction='zminus',
zf=value, xf=G.nx, yf=G.ny)
elif key == 'zmax': elif key == 'zmax':
pml = pml_type(G, ID=key, direction='zplus', zs=G.nz - value, xf=G.nx, yf=G.ny, zf=G.nz) pml = pml_type(G, ID=key, direction='zplus',
zs=G.nz - value, xf=G.nx, yf=G.ny, zf=G.nz)
G.pmls.append(pml) G.pmls.append(pml)
for i in range(G.nx): for i in range(G.nx):
for j in range(G.ny): for j in range(G.ny):

查看文件

@@ -26,8 +26,8 @@ class Rx:
allowableoutputs = ['Ex', 'Ey', 'Ez', 'Hx', 'Hy', 'Hz', 'Ix', 'Iy', 'Iz'] allowableoutputs = ['Ex', 'Ey', 'Ez', 'Hx', 'Hy', 'Hz', 'Ix', 'Iy', 'Iz']
defaultoutputs = allowableoutputs[:-3] defaultoutputs = allowableoutputs[:-3]
allowableoutputs_gpu = allowableoutputs[:-3] allowableoutputs_dev = allowableoutputs[:-3]
maxnumoutputs_gpu = 0 maxnumoutputs_dev = 0
def __init__(self): def __init__(self):
@@ -41,57 +41,65 @@ class Rx:
self.zcoordorigin = None self.zcoordorigin = None
def htod_rx_arrays(G): def htod_rx_arrays(G, queue=None):
"""Initialise arrays on GPU for receiver coordinates and to store field """Initialise arrays on compute device for receiver coordinates and to store field
components for receivers. components for receivers.
Args: Args:
G (FDTDGrid): Holds essential parameters describing the model. G: FDTDGrid object that holds essential parameters describing the model.
queue: pyopencl queue.
Returns: Returns:
rxcoords_gpu (int): numpy array of receiver coordinates from GPU. rxcoords_dev: int array of receiver coordinates on compute device.
rxs_gpu (float): numpy array of receiver data from GPU - rows are field rxs_dev: float array of receiver data on compute device - rows are field
components; columns are iterations; pages are receivers. components; columns are iterations; pages are receivers.
""" """
import pycuda.gpuarray as gpuarray # Array to store receiver coordinates on compute device
# Array to store receiver coordinates on GPU
rxcoords = np.zeros((len(G.rxs), 3), dtype=np.int32) rxcoords = np.zeros((len(G.rxs), 3), dtype=np.int32)
for i, rx in enumerate(G.rxs): for i, rx in enumerate(G.rxs):
rxcoords[i, 0] = rx.xcoord rxcoords[i, 0] = rx.xcoord
rxcoords[i, 1] = rx.ycoord rxcoords[i, 1] = rx.ycoord
rxcoords[i, 2] = rx.zcoord rxcoords[i, 2] = rx.zcoord
# Store maximum number of output components # Store maximum number of output components
if len(rx.outputs) > Rx.maxnumoutputs_gpu: if len(rx.outputs) > Rx.maxnumoutputs_dev:
Rx.maxnumoutputs_gpu = len(rx.outputs) Rx.maxnumoutputs_dev = len(rx.outputs)
# Array to store field components for receivers on GPU - rows are field components; # Array to store field components for receivers on compute device -
# columns are iterations; pages are receivers # rows are field components; columns are iterations; pages are receivers
rxs = np.zeros((len(Rx.allowableoutputs_gpu), G.iterations, len(G.rxs)), rxs = np.zeros((len(Rx.allowableoutputs_dev), G.iterations, len(G.rxs)),
dtype=config.sim_config.dtypes['float_or_double']) dtype=config.sim_config.dtypes['float_or_double'])
# Copy arrays to GPU # Copy arrays to compute device
rxcoords_gpu = gpuarray.to_gpu(rxcoords) if config.sim_config.general['solver'] == 'cuda':
rxs_gpu = gpuarray.to_gpu(rxs) import pycuda.gpuarray as gpuarray
rxcoords_dev = gpuarray.to_gpu(rxcoords)
rxs_dev = gpuarray.to_gpu(rxs)
return rxcoords_gpu, rxs_gpu elif config.sim_config.general['solver'] == 'opencl':
import pyopencl.array as clarray
rxcoords_dev = clarray.to_device(queue, rxcoords)
rxs_dev = clarray.to_device(queue, rxs)
return rxcoords_dev, rxs_dev
def dtoh_rx_array(rxs_gpu, rxcoords_gpu, G): def dtoh_rx_array(rxs_dev, rxcoords_dev, G):
"""Copy output from receivers array used on GPU back to receiver objects. """Copy output from receivers array used on compute device back to receiver
objects.
Args: Args:
rxs_gpu (float): numpy array of receiver data from GPU - rows are field rxcoords_dev: int array of receiver coordinates on compute device.
rxs_dev: float array of receiver data on compute device - rows are field
components; columns are iterations; pages are receivers. components; columns are iterations; pages are receivers.
rxcoords_gpu (int): numpy array of receiver coordinates from GPU. G: FDTDGrid object that holds essential parameters describing the model.
G (FDTDGrid): Holds essential parameters describing the model.
""" """
for rx in G.rxs: for rx in G.rxs:
for rxgpu in range(len(G.rxs)): for rxd in range(len(G.rxs)):
if (rx.xcoord == rxcoords_gpu[rxgpu, 0] and if (rx.xcoord == rxcoords_dev[rxd, 0] and
rx.ycoord == rxcoords_gpu[rxgpu, 1] and rx.ycoord == rxcoords_dev[rxd, 1] and
rx.zcoord == rxcoords_gpu[rxgpu, 2]): rx.zcoord == rxcoords_dev[rxd, 2]):
for output in rx.outputs.keys(): for output in rx.outputs.keys():
rx.outputs[output] = rxs_gpu[Rx.allowableoutputs_gpu.index(output), :, rxgpu] rx.outputs[output] = rxs_dev[Rx.allowableoutputs_dev.index(output), :, rxd]

查看文件

@@ -223,18 +223,17 @@ class Snapshot:
f.close() f.close()
def htod_snapshot_array(G): def htod_snapshot_array(G, queue=None):
"""Initialise array on GPU for to store field data for snapshots. """Initialise array on compute device for to store field data for snapshots.
Args: Args:
G (FDTDGrid): Parameters describing a grid in a model. G: FDTDGrid object with parameters describing a grid in a model.
queue: pyopencl queue.
Returns: Returns:
snapE_gpu, snapH_gpu (float): numpy arrays of snapshot data on GPU. snapE_dev, snapH_dev: float arrays of snapshot data on compute device.
""" """
import pycuda.gpuarray as gpuarray
# Get dimensions of largest requested snapshot # Get dimensions of largest requested snapshot
for snap in G.snapshots: for snap in G.snapshots:
if snap.nx > Snapshot.nx_max: if snap.nx > Snapshot.nx_max:
@@ -244,15 +243,21 @@ def htod_snapshot_array(G):
if snap.nz > Snapshot.nz_max: if snap.nz > Snapshot.nz_max:
Snapshot.nz_max = snap.nz Snapshot.nz_max = snap.nz
# GPU - blocks per grid - according to largest requested snapshot if config.sim_config.general['solver'] == 'cuda':
# Blocks per grid - according to largest requested snapshot
Snapshot.bpg = (int(np.ceil(((Snapshot.nx_max) * Snapshot.bpg = (int(np.ceil(((Snapshot.nx_max) *
(Snapshot.ny_max) * (Snapshot.ny_max) *
(Snapshot.nz_max)) / Snapshot.tpb[0])), 1, 1) (Snapshot.nz_max)) / Snapshot.tpb[0])), 1, 1)
elif config.sim_config.general['solver'] == 'opencl':
# Workgroup size - according to largest requested snapshot
Snapshot.wgs = (int(np.ceil(((Snapshot.nx_max) *
(Snapshot.ny_max) *
(Snapshot.nz_max)))), 1, 1)
# 4D arrays to store snapshots on GPU, e.g. snapEx(time, x, y, z); # 4D arrays to store snapshots on GPU, e.g. snapEx(time, x, y, z);
# if snapshots are not being stored on the GPU during the simulation then # if snapshots are not being stored on the GPU during the simulation then
# they are copied back to the host after each iteration, hence numsnaps = 1 # they are copied back to the host after each iteration, hence numsnaps = 1
numsnaps = 1 if config.get_model_config().cuda['snapsgpu2cpu'] else len(G.snapshots) numsnaps = 1 if config.get_model_config().device['snapsgpu2cpu'] else len(G.snapshots)
snapEx = np.zeros((numsnaps, Snapshot.nx_max, Snapshot.ny_max, Snapshot.nz_max), snapEx = np.zeros((numsnaps, Snapshot.nx_max, Snapshot.ny_max, Snapshot.nz_max),
dtype=config.sim_config.dtypes['float_or_double']) dtype=config.sim_config.dtypes['float_or_double'])
snapEy = np.zeros((numsnaps, Snapshot.nx_max, Snapshot.ny_max, Snapshot.nz_max), snapEy = np.zeros((numsnaps, Snapshot.nx_max, Snapshot.ny_max, Snapshot.nz_max),
@@ -266,29 +271,41 @@ def htod_snapshot_array(G):
snapHz = np.zeros((numsnaps, Snapshot.nx_max, Snapshot.ny_max, Snapshot.nz_max), snapHz = np.zeros((numsnaps, Snapshot.nx_max, Snapshot.ny_max, Snapshot.nz_max),
dtype=config.sim_config.dtypes['float_or_double']) dtype=config.sim_config.dtypes['float_or_double'])
# Copy arrays to GPU # Copy arrays to compute device
snapEx_gpu = gpuarray.to_gpu(snapEx) if config.sim_config.general['solver'] == 'cuda':
snapEy_gpu = gpuarray.to_gpu(snapEy) import pycuda.gpuarray as gpuarray
snapEz_gpu = gpuarray.to_gpu(snapEz) snapEx_dev = gpuarray.to_gpu(snapEx)
snapHx_gpu = gpuarray.to_gpu(snapHx) snapEy_dev = gpuarray.to_gpu(snapEy)
snapHy_gpu = gpuarray.to_gpu(snapHy) snapEz_dev = gpuarray.to_gpu(snapEz)
snapHz_gpu = gpuarray.to_gpu(snapHz) snapHx_dev = gpuarray.to_gpu(snapHx)
snapHy_dev = gpuarray.to_gpu(snapHy)
snapHz_dev = gpuarray.to_gpu(snapHz)
return snapEx_gpu, snapEy_gpu, snapEz_gpu, snapHx_gpu, snapHy_gpu, snapHz_gpu elif config.sim_config.general['solver'] == 'opencl':
import pyopencl.array as clarray
snapEx_dev = clarray.to_device(queue, snapEx)
snapEy_dev = clarray.to_device(queue, snapEy)
snapEz_dev = clarray.to_device(queue, snapEz)
snapHx_dev = clarray.to_device(queue, snapHx)
snapHy_dev = clarray.to_device(queue, snapHy)
snapHz_dev = clarray.to_device(queue, snapHz)
return snapEx_dev, snapEy_dev, snapEz_dev, snapHx_dev, snapHy_dev, snapHz_dev
def dtoh_snapshot_array(snapEx_gpu, snapEy_gpu, snapEz_gpu, snapHx_gpu, snapHy_gpu, snapHz_gpu, i, snap): def dtoh_snapshot_array(snapEx_dev, snapEy_dev, snapEz_dev, snapHx_dev, snapHy_dev, snapHz_dev, i, snap):
"""Copy snapshot array used on GPU back to snapshot objects and store in format for Paraview. """Copy snapshot array used on compute device back to snapshot objects and
store in format for Paraview.
Args: Args:
snapE_gpu, snapH_gpu (float): numpy arrays of snapshot data from GPU. snapE_dev, snapH_dev: float arrays of snapshot data from compute device.
i (int): index for snapshot data on GPU array. i: int for index of snapshot data on compute device array.
snap (class): Snapshot class instance snap: Snapshot class instance
""" """
snap.Exsnap = snapEx_gpu[i, snap.xs:snap.xf, snap.ys:snap.yf, snap.zs:snap.zf] snap.Exsnap = snapEx_dev[i, snap.xs:snap.xf, snap.ys:snap.yf, snap.zs:snap.zf]
snap.Eysnap = snapEy_gpu[i, snap.xs:snap.xf, snap.ys:snap.yf, snap.zs:snap.zf] snap.Eysnap = snapEy_dev[i, snap.xs:snap.xf, snap.ys:snap.yf, snap.zs:snap.zf]
snap.Ezsnap = snapEz_gpu[i, snap.xs:snap.xf, snap.ys:snap.yf, snap.zs:snap.zf] snap.Ezsnap = snapEz_dev[i, snap.xs:snap.xf, snap.ys:snap.yf, snap.zs:snap.zf]
snap.Hxsnap = snapHx_gpu[i, snap.xs:snap.xf, snap.ys:snap.yf, snap.zs:snap.zf] snap.Hxsnap = snapHx_dev[i, snap.xs:snap.xf, snap.ys:snap.yf, snap.zs:snap.zf]
snap.Hysnap = snapHy_gpu[i, snap.xs:snap.xf, snap.ys:snap.yf, snap.zs:snap.zf] snap.Hysnap = snapHy_dev[i, snap.xs:snap.xf, snap.ys:snap.yf, snap.zs:snap.zf]
snap.Hzsnap = snapHz_gpu[i, snap.xs:snap.xf, snap.ys:snap.yf, snap.zs:snap.zf] snap.Hzsnap = snapHz_dev[i, snap.xs:snap.xf, snap.ys:snap.yf, snap.zs:snap.zf]

查看文件

@@ -18,22 +18,24 @@
import gprMax.config as config import gprMax.config as config
from .grid import CUDAGrid, FDTDGrid from .grid import CUDAGrid, FDTDGrid, OpenCLGrid
from .subgrids.updates import create_updates as create_subgrid_updates from .subgrids.updates import create_updates as create_subgrid_updates
from .updates import CPUUpdates, CUDAUpdates from .updates import CPUUpdates, CUDAUpdates, OpenCLUpdates
def create_G(): def create_G():
"""Create grid object according to solver. """Create grid object according to solver.
Returns: Returns:
G (FDTDGrid): Holds essential parameters describing the model. G: FDTDGrid that holds essential parameters describing the model.
""" """
if config.sim_config.general['cpu']: if config.sim_config.general['solver'] == 'cpu':
G = FDTDGrid() G = FDTDGrid()
elif config.sim_config.general['cuda']: elif config.sim_config.general['solver'] == 'cuda':
G = CUDAGrid() G = CUDAGrid()
elif config.sim_config.general['solver'] == 'opencl':
G = OpenCLGrid()
return G return G
@@ -42,10 +44,10 @@ def create_solver(G):
"""Create configured solver object. """Create configured solver object.
Args: Args:
G (FDTDGrid): Holds essential parameters describing the model. G: FDTDGrid that holds essential parameters describing the model.
Returns: Returns:
solver (Solver): solver object. solver: Solver object.
""" """
if config.sim_config.general['subgrid']: if config.sim_config.general['subgrid']:
@@ -56,14 +58,17 @@ def create_solver(G):
# the required numerical precision and dispersive material type. # the required numerical precision and dispersive material type.
props = updates.adapt_dispersive_config() props = updates.adapt_dispersive_config()
updates.set_dispersive_updates(props) updates.set_dispersive_updates(props)
elif config.sim_config.general['cpu']: elif config.sim_config.general['solver'] == 'cpu':
updates = CPUUpdates(G) updates = CPUUpdates(G)
solver = Solver(updates) solver = Solver(updates)
props = updates.adapt_dispersive_config() props = updates.adapt_dispersive_config()
updates.set_dispersive_updates(props) updates.set_dispersive_updates(props)
elif config.sim_config.general['cuda']: elif config.sim_config.general['solver'] == 'cuda':
updates = CUDAUpdates(G) updates = CUDAUpdates(G)
solver = Solver(updates) solver = Solver(updates)
elif config.sim_config.general['solver'] == 'opencl':
updates = OpenCLUpdates(G)
solver = Solver(updates)
return solver return solver
@@ -74,8 +79,8 @@ class Solver:
def __init__(self, updates, hsg=False): def __init__(self, updates, hsg=False):
""" """
Args: Args:
updates (Updates): Updates contains methods to run FDTD algorithm. updates: Updates contains methods to run FDTD algorithm.
hsg (bool): Use sub-gridding. hsg: bool to use sub-gridding.
""" """
self.updates = updates self.updates = updates
@@ -85,13 +90,14 @@ class Solver:
"""Time step the FDTD model. """Time step the FDTD model.
Args: Args:
iterator (iterator): can be range() or tqdm() iterator: can be range() or tqdm()
Returns: Returns:
tsolve (float): Time taken to execute solving (seconds). tsolve: float for time taken to execute solving (seconds).
memsolve (float): Memory (RAM) used. memsolve: float for memory (RAM) used.
""" """
memsolve = 0
self.updates.time_start() self.updates.time_start()
for iteration in iterator: for iteration in iterator:
@@ -108,7 +114,8 @@ class Solver:
if self.hsg: if self.hsg:
self.updates.hsg_1() self.updates.hsg_1()
self.updates.update_electric_b() self.updates.update_electric_b()
memsolve = self.updates.calculate_memsolve(iteration) if config.sim_config.general['cuda'] else None if config.sim_config.general['solver'] == 'cuda':
memsolve = self.updates.calculate_memsolve(iteration)
self.updates.finalise() self.updates.finalise()
tsolve = self.updates.calculate_tsolve() tsolve = self.updates.calculate_tsolve()

查看文件

@@ -45,7 +45,8 @@ class Source:
"""Calculates all waveform values for source for duration of simulation. """Calculates all waveform values for source for duration of simulation.
Args: Args:
G (FDTDGrid): Holds essential parameters describing the model. G: FDTDGrid object that olds essential parameters describing the
model.
""" """
# Waveform values for electric sources - calculated half a timestep later # Waveform values for electric sources - calculated half a timestep later
self.waveformvaluesJ = np.zeros((G.iterations), self.waveformvaluesJ = np.zeros((G.iterations),
@@ -82,13 +83,14 @@ class VoltageSource(Source):
"""Updates electric field values for a voltage source. """Updates electric field values for a voltage source.
Args: Args:
iteration (int): Current iteration (timestep). iteration: int of current iteration (timestep).
updatecoeffsE (memory view): numpy array of electric field update updatecoeffsE: memory view of array of electric field update
coefficients. coefficients.
ID (memory view): numpy array of numeric IDs corresponding to ID: memory view of array of numeric IDs corresponding to materials
materials in the model. in the model.
Ex, Ey, Ez (memory view): numpy array of electric field values. Ex, Ey, Ez: memory view of array of electric field values.
G (FDTDGrid): Holds essential parameters describing the model. G: FDTDGrid object that olds essential parameters describing the
model.
""" """
if iteration * G.dt >= self.start and iteration * G.dt <= self.stop: if iteration * G.dt >= self.start and iteration * G.dt <= self.stop:
@@ -126,7 +128,8 @@ class VoltageSource(Source):
voltage source conductivity to the underlying parameters. voltage source conductivity to the underlying parameters.
Args: Args:
G (FDTDGrid): Holds essential parameters describing the model. G: FDTDGrid object that olds essential parameters describing the
model.
""" """
if self.resistance != 0: if self.resistance != 0:
@@ -166,13 +169,14 @@ class HertzianDipole(Source):
"""Updates electric field values for a Hertzian dipole. """Updates electric field values for a Hertzian dipole.
Args: Args:
iteration (int): Current iteration (timestep). iteration: int of current iteration (timestep).
updatecoeffsE (memory view): numpy array of electric field update updatecoeffsE: memory view of array of electric field update
coefficients. coefficients.
ID (memory view): numpy array of numeric IDs corresponding to ID: memory view of array of numeric IDs corresponding to materials
materials in the model. in the model.
Ex, Ey, Ez (memory view): numpy array of electric field values. Ex, Ey, Ez: memory view of array of electric field values.
G (FDTDGrid): Holds essential parameters describing the model. G: FDTDGrid object that olds essential parameters describing the
model.
""" """
if iteration * G.dt >= self.start and iteration * G.dt <= self.stop: if iteration * G.dt >= self.start and iteration * G.dt <= self.stop:
@@ -203,13 +207,14 @@ class MagneticDipole(Source):
"""Updates magnetic field values for a magnetic dipole. """Updates magnetic field values for a magnetic dipole.
Args: Args:
iteration (int): Current iteration (timestep). iteration: int of current iteration (timestep).
updatecoeffsH (memory view): numpy array of magnetic field update updatecoeffsH: memory view of array of magnetic field update
coefficients. coefficients.
ID (memory view): numpy array of numeric IDs corresponding to ID: memory view of array of numeric IDs corresponding to materials
materials in the model. in the model.
Hx, Hy, Hz (memory view): numpy array of magnetic field values. Hx, Hy, Hz: memory view of array of magnetic field values.
G (FDTDGrid): Holds essential parameters describing the model. G: FDTDGrid object that olds essential parameters describing the
model.
""" """
if iteration * G.dt >= self.start and iteration * G.dt <= self.stop: if iteration * G.dt >= self.start and iteration * G.dt <= self.stop:
@@ -234,24 +239,23 @@ class MagneticDipole(Source):
(1 / (G.dx * G.dy * G.dz))) (1 / (G.dx * G.dy * G.dz)))
def htod_src_arrays(sources, G): def htod_src_arrays(sources, G, queue=None):
"""Initialise arrays on GPU for source coordinates/polarisation, other """Initialise arrays on compute device for source coordinates/polarisation,
source information, and source waveform values. other source information, and source waveform values.
Args: Args:
sources (list): List of sources of one type, e.g. HertzianDipole sources: list of sources of one type, e.g. HertzianDipole
G (FDTDGrid): Holds essential parameters describing the model. G: FDTDGrid object that holds essential parameters describing the model.
queue: pyopencl queue.
Returns: Returns:
srcinfo1_gpu (int): numpy array of source cell coordinates and srcinfo1_dev: int array of source cell coordinates and polarisation
polarisation information. information.
srcinfo2_gpu (float): numpy array of other source information, srcinfo2_dev: float array of other source information, e.g. length,
e.g. length, resistance etc... resistance etc...
srcwaves_gpu (float): numpy array of source waveform values. srcwaves_dev: float array of source waveform values.
""" """
import pycuda.gpuarray as gpuarray
srcinfo1 = np.zeros((len(sources), 4), dtype=np.int32) srcinfo1 = np.zeros((len(sources), 4), dtype=np.int32)
srcinfo2 = np.zeros((len(sources)), dtype=config.sim_config.dtypes['float_or_double']) srcinfo2 = np.zeros((len(sources)), dtype=config.sim_config.dtypes['float_or_double'])
srcwaves = np.zeros((len(sources), G.iterations), dtype=config.sim_config.dtypes['float_or_double']) srcwaves = np.zeros((len(sources), G.iterations), dtype=config.sim_config.dtypes['float_or_double'])
@@ -276,11 +280,19 @@ def htod_src_arrays(sources, G):
elif src.__class__.__name__ == 'MagneticDipole': elif src.__class__.__name__ == 'MagneticDipole':
srcwaves[i, :] = src.waveformvaluesM srcwaves[i, :] = src.waveformvaluesM
srcinfo1_gpu = gpuarray.to_gpu(srcinfo1) # Copy arrays to compute device
srcinfo2_gpu = gpuarray.to_gpu(srcinfo2) if config.sim_config.general['solver'] == 'cuda':
srcwaves_gpu = gpuarray.to_gpu(srcwaves) import pycuda.gpuarray as gpuarray
srcinfo1_dev = gpuarray.to_gpu(srcinfo1)
srcinfo2_dev = gpuarray.to_gpu(srcinfo2)
srcwaves_dev = gpuarray.to_gpu(srcwaves)
elif config.sim_config.general['solver'] == 'opencl':
import pyopencl.array as clarray
srcinfo1_dev = clarray.to_device(queue, srcinfo1)
srcinfo2_dev = clarray.to_device(queue, srcinfo2)
srcwaves_dev = clarray.to_device(queue, srcwaves)
return srcinfo1_gpu, srcinfo2_gpu, srcwaves_gpu return srcinfo1_dev, srcinfo2_dev, srcwaves_dev
class TransmissionLine(Source): class TransmissionLine(Source):
@@ -291,7 +303,8 @@ class TransmissionLine(Source):
def __init__(self, G): def __init__(self, G):
""" """
Args: Args:
G (FDTDGrid): Holds essential parameters describing the model. G: FDTDGrid object that holds essential parameters describing the
model.
""" """
super().__init__() super().__init__()
@@ -328,7 +341,8 @@ class TransmissionLine(Source):
from: http://dx.doi.org/10.1002/mop.10415 from: http://dx.doi.org/10.1002/mop.10415
Args: Args:
G (FDTDGrid): Holds essential parameters describing the model. G: FDTDGrid object that holds essential parameters describing the
model.
""" """
for iteration in range(G.iterations): for iteration in range(G.iterations):
@@ -344,7 +358,8 @@ class TransmissionLine(Source):
"""Updates absorbing boundary condition at end of the transmission line. """Updates absorbing boundary condition at end of the transmission line.
Args: Args:
G (FDTDGrid): Holds essential parameters describing the model. G: FDTDGrid object that holds essential parameters describing the
model.
""" """
h = (config.c * G.dt - self.dl) / (config.c * G.dt + self.dl) h = (config.c * G.dt - self.dl) / (config.c * G.dt + self.dl)
@@ -357,8 +372,9 @@ class TransmissionLine(Source):
"""Updates voltage values along the transmission line. """Updates voltage values along the transmission line.
Args: Args:
iteration (int): Current iteration (timestep). iteration: int of current iteration (timestep).
G (FDTDGrid): Holds essential parameters describing the model. G: FDTDGrid object that holds essential parameters describing the
model.
""" """
# Update all the voltage values along the line # Update all the voltage values along the line
@@ -375,8 +391,9 @@ class TransmissionLine(Source):
"""Updates current values along the transmission line. """Updates current values along the transmission line.
Args: Args:
iteration (int): Current iteration (timestep). iteration: int of current iteration (timestep).
G (FDTDGrid): Holds essential parameters describing the model. G: FDTDGrid object that holds essential parameters describing the
model.
""" """
# Update all the current values along the line # Update all the current values along the line
@@ -393,13 +410,14 @@ class TransmissionLine(Source):
the transmission line. the transmission line.
Args: Args:
iteration (int): Current iteration (timestep). iteration: int of current iteration (timestep).
updatecoeffsE (memory view): numpy array of electric field update updatecoeffsE: memory view of array of electric field update
coefficients. coefficients.
ID (memory view): numpy array of numeric IDs corresponding to ID: memory view of array of numeric IDs corresponding to materials
materials in the model. in the model.
Ex, Ey, Ez (memory view): numpy array of electric field values. Ex, Ey, Ez: memory view of array of electric field values.
G (FDTDGrid): Holds essential parameters describing the model. G: FDTDGrid object that olds essential parameters describing the
model.
""" """
if iteration * G.dt >= self.start and iteration * G.dt <= self.stop: if iteration * G.dt >= self.start and iteration * G.dt <= self.stop:
@@ -423,13 +441,14 @@ class TransmissionLine(Source):
in the main grid. in the main grid.
Args: Args:
iteration (int): Current iteration (timestep). iteration: int of current iteration (timestep).
updatecoeffsH (memory view): numpy array of magnetic field update updatecoeffsH: memory view of array of magnetic field update
coefficients. coefficients.
ID (memory view): numpy array of numeric IDs corresponding to ID: memory view of array of numeric IDs corresponding to materials
materials in the model. in the model.
Hx, Hy, Hz (memory view): numpy array of magnetic field values. Hx, Hy, Hz: memory view of array of magnetic field values.
G (FDTDGrid): Holds essential parameters describing the model. G: FDTDGrid object that olds essential parameters describing the
model.
""" """
if iteration * G.dt >= self.start and iteration * G.dt <= self.stop: if iteration * G.dt >= self.start and iteration * G.dt <= self.stop:

查看文件

@@ -18,19 +18,21 @@
import logging import logging
from importlib import import_module from importlib import import_module
from string import Template
import numpy as np import numpy as np
from jinja2 import Environment, PackageLoader
import gprMax.config as config import gprMax.config as config
from .cuda.fields_updates import kernel_template_fields from .cuda.snapshots import knl_template_store_snapshot
from .cuda.snapshots import kernel_template_store_snapshot from .cuda_opencl_el import (knl_fields_updates, knl_snapshots,
from .cuda.source_updates import kernel_template_sources knl_source_updates, knl_store_outputs)
from .cython.fields_updates_normal import \ from .cython.fields_updates_normal import \
update_electric as update_electric_cpu update_electric as update_electric_cpu
from .cython.fields_updates_normal import \ from .cython.fields_updates_normal import \
update_magnetic as update_magnetic_cpu update_magnetic as update_magnetic_cpu
from .fields_outputs import kernel_template_store_outputs from .fields_outputs import knl_template_store_outputs
from .fields_outputs import store_outputs as store_outputs_cpu from .fields_outputs import store_outputs as store_outputs_cpu
from .receivers import dtoh_rx_array, htod_rx_arrays from .receivers import dtoh_rx_array, htod_rx_arrays
from .snapshots import Snapshot, dtoh_snapshot_array, htod_snapshot_array from .snapshots import Snapshot, dtoh_snapshot_array, htod_snapshot_array
@@ -256,27 +258,27 @@ class CUDAUpdates:
self.source_module = getattr(import_module('pycuda.compiler'), 'SourceModule') self.source_module = getattr(import_module('pycuda.compiler'), 'SourceModule')
self.drv.init() self.drv.init()
# Create device handle and context on specifc GPU device (and make it current context) # Create device handle and context on specific GPU device (and make it current context)
self.dev = self.drv.Device(config.get_model_config().cuda['gpu'].deviceID) self.dev = self.drv.Device(config.get_model_config().cuda['gpu'].deviceID)
self.ctx = self.dev.make_context() self.ctx = self.dev.make_context()
# Initialise arrays on GPU, prepare kernels, and get kernel functions # Initialise arrays on GPU, prepare kernels, and get kernel functions
self._set_field_kernels() self._set_field_knls()
if self.grid.pmls: if self.grid.pmls:
self._set_pml_kernels() self._set_pml_knls()
if self.grid.rxs: if self.grid.rxs:
self._set_rx_kernel() self._set_rx_knl()
if self.grid.voltagesources + self.grid.hertziandipoles + self.grid.magneticdipoles: if self.grid.voltagesources + self.grid.hertziandipoles + self.grid.magneticdipoles:
self._set_src_kernels() self._set_src_knls()
if self.grid.snapshots: if self.grid.snapshots:
self._set_snapshot_kernel() self._set_snapshot_knl()
def _set_field_kernels(self): def _set_field_knls(self):
"""Electric and magnetic field updates - prepare kernels, and """Electric and magnetic field updates - prepare kernels, and
get kernel functions. get kernel functions.
""" """
if config.get_model_config().materials['maxpoles'] > 0: if config.get_model_config().materials['maxpoles'] > 0:
kernels_fields = self.source_module(kernel_template_fields.substitute( knls_fields = self.source_module(knl_template_fields.substitute(
REAL=config.sim_config.dtypes['C_float_or_double'], REAL=config.sim_config.dtypes['C_float_or_double'],
REALFUNC=config.get_model_config().materials['cudarealfunc'], REALFUNC=config.get_model_config().materials['cudarealfunc'],
COMPLEX=config.get_model_config().materials['dispersiveCdtype'], COMPLEX=config.get_model_config().materials['dispersiveCdtype'],
@@ -296,7 +298,7 @@ class CUDAUpdates:
options=config.sim_config.cuda['nvcc_opts']) options=config.sim_config.cuda['nvcc_opts'])
else: # Set to one any substitutions for dispersive materials. else: # Set to one any substitutions for dispersive materials.
# Value of COMPLEX is not relevant. # Value of COMPLEX is not relevant.
kernels_fields = self.source_module(kernel_template_fields.substitute( knls_fields = self.source_module(knl_template_fields.substitute(
REAL=config.sim_config.dtypes['C_float_or_double'], REAL=config.sim_config.dtypes['C_float_or_double'],
REALFUNC=config.get_model_config().materials['cudarealfunc'], REALFUNC=config.get_model_config().materials['cudarealfunc'],
COMPLEX=config.sim_config.dtypes['C_float_or_double'], COMPLEX=config.sim_config.dtypes['C_float_or_double'],
@@ -314,17 +316,17 @@ class CUDAUpdates:
NY_T=1, NY_T=1,
NZ_T=1), NZ_T=1),
options=config.sim_config.cuda['nvcc_opts']) options=config.sim_config.cuda['nvcc_opts'])
self.update_electric_gpu = kernels_fields.get_function("update_electric") self.update_electric_gpu = knls_fields.get_function("update_electric")
self.update_magnetic_gpu = kernels_fields.get_function("update_magnetic") self.update_magnetic_gpu = knls_fields.get_function("update_magnetic")
self._copy_mat_coeffs(kernels_fields, kernels_fields) self._copy_mat_coeffs(knls_fields, knls_fields)
# Electric and magnetic field updates - dispersive materials # Electric and magnetic field updates - dispersive materials
# - get kernel functions and initialise array on GPU # - get kernel functions and initialise array on GPU
# If there are any dispersive materials (updates are split into two # If there are any dispersive materials (updates are split into two
# parts as they require present and updated electric field values). # parts as they require present and updated electric field values).
if config.get_model_config().materials['maxpoles'] > 0: if config.get_model_config().materials['maxpoles'] > 0:
self.dispersive_update_a = kernels_fields.get_function("update_electric_dispersive_A") self.dispersive_update_a = knls_fields.get_function("update_electric_dispersive_A")
self.dispersive_update_b = kernels_fields.get_function("update_electric_dispersive_B") self.dispersive_update_b = knls_fields.get_function("update_electric_dispersive_B")
# Electric and magnetic field updates - set blocks per grid and # Electric and magnetic field updates - set blocks per grid and
# initialise field arrays on GPU # initialise field arrays on GPU
@@ -334,17 +336,17 @@ class CUDAUpdates:
if config.get_model_config().materials['maxpoles'] > 0: if config.get_model_config().materials['maxpoles'] > 0:
self.grid.htod_dispersive_arrays() self.grid.htod_dispersive_arrays()
def _set_pml_kernels(self): def _set_pml_knls(self):
"""PMLS - prepare kernels and get kernel functions.""" """PMLS - prepare kernels and get kernel functions."""
pmlmodulelectric = 'gprMax.cuda.pml_updates_electric_' + self.grid.pmlformulation pmlmodulelectric = 'gprMax.cuda.pml_updates_electric_' + self.grid.pmlformulation
kernelelectricfunc = getattr(import_module(pmlmodulelectric), knlelectricfunc = getattr(import_module(pmlmodulelectric),
'kernels_template_pml_electric_' + 'knls_template_pml_electric_' +
self.grid.pmlformulation) self.grid.pmlformulation)
pmlmodulemagnetic = 'gprMax.cuda.pml_updates_magnetic_' + self.grid.pmlformulation pmlmodulemagnetic = 'gprMax.cuda.pml_updates_magnetic_' + self.grid.pmlformulation
kernelmagneticfunc = getattr(import_module(pmlmodulemagnetic), knlmagneticfunc = getattr(import_module(pmlmodulemagnetic),
'kernels_template_pml_magnetic_' + 'knls_template_pml_magnetic_' +
self.grid.pmlformulation) self.grid.pmlformulation)
kernels_pml_electric = self.source_module(kernelelectricfunc.substitute( knls_pml_electric = self.source_module(knlelectricfunc.substitute(
REAL=config.sim_config.dtypes['C_float_or_double'], REAL=config.sim_config.dtypes['C_float_or_double'],
N_updatecoeffsE=self.grid.updatecoeffsE.size, N_updatecoeffsE=self.grid.updatecoeffsE.size,
NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1], NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1],
@@ -355,7 +357,7 @@ class CUDAUpdates:
NY_ID=self.grid.ID.shape[2], NY_ID=self.grid.ID.shape[2],
NZ_ID=self.grid.ID.shape[3]), NZ_ID=self.grid.ID.shape[3]),
options=config.sim_config.cuda['nvcc_opts']) options=config.sim_config.cuda['nvcc_opts'])
kernels_pml_magnetic = self.source_module(kernelmagneticfunc.substitute( knls_pml_magnetic = self.source_module(knlmagneticfunc.substitute(
REAL=config.sim_config.dtypes['C_float_or_double'], REAL=config.sim_config.dtypes['C_float_or_double'],
N_updatecoeffsH=self.grid.updatecoeffsH.size, N_updatecoeffsH=self.grid.updatecoeffsH.size,
NY_MATCOEFFS=self.grid.updatecoeffsH.shape[1], NY_MATCOEFFS=self.grid.updatecoeffsH.shape[1],
@@ -366,19 +368,19 @@ class CUDAUpdates:
NY_ID=self.grid.ID.shape[2], NY_ID=self.grid.ID.shape[2],
NZ_ID=self.grid.ID.shape[3]), NZ_ID=self.grid.ID.shape[3]),
options=config.sim_config.cuda['nvcc_opts']) options=config.sim_config.cuda['nvcc_opts'])
self._copy_mat_coeffs(kernels_pml_electric, kernels_pml_magnetic) self._copy_mat_coeffs(knls_pml_electric, knls_pml_magnetic)
# Set block per grid, initialise arrays on GPU, and get kernel functions # Set block per grid, initialise arrays on GPU, and get kernel functions
for pml in self.grid.pmls: for pml in self.grid.pmls:
pml.htod_field_arrays() pml.htod_field_arrays()
pml.set_blocks_per_grid() pml.set_blocks_per_grid()
pml.get_update_funcs(kernels_pml_electric, kernels_pml_magnetic) pml.get_update_funcs(knls_pml_electric, knls_pml_magnetic)
def _set_rx_kernel(self): def _set_rx_knl(self):
"""Receivers - initialise arrays on GPU, prepare kernel and get kernel """Receivers - initialise arrays on GPU, prepare kernel and get kernel
function. function.
""" """
self.rxcoords_gpu, self.rxs_gpu = htod_rx_arrays(self.grid) self.rxcoords_gpu, self.rxs_gpu = htod_rx_arrays(self.grid)
kernel_store_outputs = self.source_module(kernel_template_store_outputs.substitute( knl_store_outputs = self.source_module(knl_template_store_outputs.substitute(
REAL=config.sim_config.dtypes['C_float_or_double'], REAL=config.sim_config.dtypes['C_float_or_double'],
NY_RXCOORDS=3, NY_RXCOORDS=3,
NX_RXS=6, NX_RXS=6,
@@ -388,13 +390,13 @@ class CUDAUpdates:
NY_FIELDS=self.grid.ny + 1, NY_FIELDS=self.grid.ny + 1,
NZ_FIELDS=self.grid.nz + 1), NZ_FIELDS=self.grid.nz + 1),
options=config.sim_config.cuda['nvcc_opts']) options=config.sim_config.cuda['nvcc_opts'])
self.store_outputs_gpu = kernel_store_outputs.get_function("store_outputs") self.store_outputs_gpu = knl_store_outputs.get_function("store_outputs")
def _set_src_kernels(self): def _set_src_knls(self):
"""Sources - initialise arrays on GPU, prepare kernel and get kernel """Sources - initialise arrays on GPU, prepare kernel and get kernel
function. function.
""" """
kernels_sources = self.source_module(kernel_template_sources.substitute( knls_sources = self.source_module(knl_template_sources.substitute(
REAL=config.sim_config.dtypes['C_float_or_double'], REAL=config.sim_config.dtypes['C_float_or_double'],
N_updatecoeffsE=self.grid.updatecoeffsE.size, N_updatecoeffsE=self.grid.updatecoeffsE.size,
N_updatecoeffsH=self.grid.updatecoeffsH.size, N_updatecoeffsH=self.grid.updatecoeffsH.size,
@@ -408,23 +410,23 @@ class CUDAUpdates:
NY_ID=self.grid.ID.shape[2], NY_ID=self.grid.ID.shape[2],
NZ_ID=self.grid.ID.shape[3]), NZ_ID=self.grid.ID.shape[3]),
options=config.sim_config.cuda['nvcc_opts']) options=config.sim_config.cuda['nvcc_opts'])
self._copy_mat_coeffs(kernels_sources, kernels_sources) self._copy_mat_coeffs(knls_sources, knls_sources)
if self.grid.hertziandipoles: if self.grid.hertziandipoles:
self.srcinfo1_hertzian_gpu, self.srcinfo2_hertzian_gpu, self.srcwaves_hertzian_gpu = htod_src_arrays(self.grid.hertziandipoles, self.grid) self.srcinfo1_hertzian_gpu, self.srcinfo2_hertzian_gpu, self.srcwaves_hertzian_gpu = htod_src_arrays(self.grid.hertziandipoles, self.grid)
self.update_hertzian_dipole_gpu = kernels_sources.get_function("update_hertzian_dipole") self.update_hertzian_dipole_gpu = knls_sources.get_function("update_hertzian_dipole")
if self.grid.magneticdipoles: if self.grid.magneticdipoles:
self.srcinfo1_magnetic_gpu, self.srcinfo2_magnetic_gpu, self.srcwaves_magnetic_gpu = htod_src_arrays(self.grid.magneticdipoles, self.grid) self.srcinfo1_magnetic_gpu, self.srcinfo2_magnetic_gpu, self.srcwaves_magnetic_gpu = htod_src_arrays(self.grid.magneticdipoles, self.grid)
self.update_magnetic_dipole_gpu = kernels_sources.get_function("update_magnetic_dipole") self.update_magnetic_dipole_gpu = knls_sources.get_function("update_magnetic_dipole")
if self.grid.voltagesources: if self.grid.voltagesources:
self.srcinfo1_voltage_gpu, self.srcinfo2_voltage_gpu, self.srcwaves_voltage_gpu = htod_src_arrays(self.grid.voltagesources, self.grid) self.srcinfo1_voltage_gpu, self.srcinfo2_voltage_gpu, self.srcwaves_voltage_gpu = htod_src_arrays(self.grid.voltagesources, self.grid)
self.update_voltage_source_gpu = kernels_sources.get_function("update_voltage_source") self.update_voltage_source_gpu = knls_sources.get_function("update_voltage_source")
def _set_snapshot_kernel(self): def _set_snapshot_knl(self):
"""Snapshots - initialise arrays on GPU, prepare kernel and get kernel """Snapshots - initialise arrays on GPU, prepare kernel and get kernel
function. function.
""" """
self.snapEx_gpu, self.snapEy_gpu, self.snapEz_gpu, self.snapHx_gpu, self.snapHy_gpu, self.snapHz_gpu = htod_snapshot_array(self.grid) self.snapEx_gpu, self.snapEy_gpu, self.snapEz_gpu, self.snapHx_gpu, self.snapHy_gpu, self.snapHz_gpu = htod_snapshot_array(self.grid)
kernel_store_snapshot = self.source_module(kernel_template_store_snapshot.substitute( knl_store_snapshot = self.source_module(knl_template_store_snapshot.substitute(
REAL=config.sim_config.dtypes['C_float_or_double'], REAL=config.sim_config.dtypes['C_float_or_double'],
NX_SNAPS=Snapshot.nx_max, NX_SNAPS=Snapshot.nx_max,
NY_SNAPS=Snapshot.ny_max, NY_SNAPS=Snapshot.ny_max,
@@ -433,25 +435,25 @@ class CUDAUpdates:
NY_FIELDS=self.grid.ny + 1, NY_FIELDS=self.grid.ny + 1,
NZ_FIELDS=self.grid.nz + 1), NZ_FIELDS=self.grid.nz + 1),
options=config.sim_config.cuda['nvcc_opts']) options=config.sim_config.cuda['nvcc_opts'])
self.store_snapshot_gpu = kernel_store_snapshot.get_function("store_snapshot") self.store_snapshot_gpu = knl_store_snapshot.get_function("store_snapshot")
def _copy_mat_coeffs(self, kernelE, kernelH): def _copy_mat_coeffs(self, knlE, knlH):
"""Copy material coefficient arrays to constant memory of GPU """Copy material coefficient arrays to constant memory of GPU
(must be <64KB). (must be <64KB).
Args: Args:
kernelE (kernel): electric field kernel. knlE (kernel): electric field kernel.
kernelH (kernel): magnetic field kernel. knlH (kernel): magnetic field kernel.
""" """
# Check if coefficient arrays will fit on constant memory of GPU # Check if coefficient arrays will fit on constant memory of GPU
if (self.grid.updatecoeffsE.nbytes + self.grid.updatecoeffsH.nbytes if (self.grid.updatecoeffsE.nbytes + self.grid.updatecoeffsH.nbytes
> config.get_model_config().cuda['gpu'].constmem): > config.get_model_config().cuda['gpu'].total_constant_memory):
logger.exception(f"Too many materials in the model to fit onto constant memory of size {human_size(config.get_model_config().cuda['gpu'].constmem)} on {config.get_model_config().cuda['gpu'].deviceID} - {config.get_model_config().cuda['gpu'].name} GPU") logger.exception(f"Too many materials in the model to fit onto constant memory of size {human_size(config.get_model_config().cuda['gpu'].total_constant_memory)} on {config.get_model_config().cuda['gpu'].deviceID} - {config.get_model_config().cuda['gpu'].name} GPU")
raise ValueError raise ValueError
updatecoeffsE = kernelE.get_global('updatecoeffsE')[0] updatecoeffsE = knlE.get_global('updatecoeffsE')[0]
updatecoeffsH = kernelH.get_global('updatecoeffsH')[0] updatecoeffsH = knlH.get_global('updatecoeffsH')[0]
self.drv.memcpy_htod(updatecoeffsE, self.grid.updatecoeffsE) self.drv.memcpy_htod(updatecoeffsE, self.grid.updatecoeffsE)
self.drv.memcpy_htod(updatecoeffsH, self.grid.updatecoeffsH) self.drv.memcpy_htod(updatecoeffsH, self.grid.updatecoeffsH)
@@ -707,3 +709,595 @@ class CUDAUpdates:
# Remove context from top of stack and delete # Remove context from top of stack and delete
self.ctx.pop() self.ctx.pop()
del self.ctx del self.ctx
class OpenCLUpdates:
"""Defines update functions for OpenCL-based solver."""
def __init__(self, G):
"""
Args:
G: FDTDObject of parameters describing a grid in a model.
"""
self.grid = G
self.dispersive_update_a = None
self.dispersive_update_b = None
self.compute_time = 0
# Import pyopencl module
self.cl = import_module('pyopencl')
self.elwise = getattr(import_module('pyopencl.elementwise'), 'ElementwiseKernel')
# Select device, create context and command queue
self.dev = config.get_model_config().device['dev']
self.ctx = self.cl.Context(devices=[self.dev])
self.queue = self.cl.CommandQueue(self.ctx,
properties=self.cl.command_queue_properties.PROFILING_ENABLE)
# Enviroment for templating kernels
self.env = Environment(loader=PackageLoader('gprMax', 'cuda_opencl_el'))
# Initialise arrays on device, prepare kernels, and get kernel functions
self._set_field_knls()
if self.grid.pmls:
self._set_pml_knls()
if self.grid.rxs:
self._set_rx_knl()
if self.grid.voltagesources + self.grid.hertziandipoles + self.grid.magneticdipoles:
self._set_src_knls()
if self.grid.snapshots:
self._set_snapshot_knl()
def _set_field_knls(self):
"""Electric and magnetic field updates - prepare kernels, and
get kernel functions.
"""
if config.get_model_config().materials['maxpoles'] > 0:
NY_MATDISPCOEFFS = self.grid.updatecoeffsdispersive.shape[1]
NX_T = self.grid.Tx.shape[1]
NY_T = self.grid.Tx.shape[2]
NZ_T = self.grid.Tx.shape[3]
else: # Set to one any substitutions for dispersive materials.
NY_MATDISPCOEFFS = 1
NX_T = 1
NY_T = 1
NZ_T = 1
self.knl_common = self.env.get_template('knl_common_opencl.tmpl').render(
updatecoeffsE = self.grid.updatecoeffsE.ravel(),
updatecoeffsH = self.grid.updatecoeffsH.ravel(),
REAL=config.sim_config.dtypes['C_float_or_double'],
N_updatecoeffsE=self.grid.updatecoeffsE.size,
N_updatecoeffsH=self.grid.updatecoeffsH.size,
NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1],
NY_MATDISPCOEFFS=NY_MATDISPCOEFFS,
NX_FIELDS=self.grid.nx + 1,
NY_FIELDS=self.grid.ny + 1,
NZ_FIELDS=self.grid.nz + 1,
NX_ID=self.grid.ID.shape[1],
NY_ID=self.grid.ID.shape[2],
NZ_ID=self.grid.ID.shape[3],
NX_T=NX_T,
NY_T=NY_T,
NZ_T=NZ_T,
NY_RXCOORDS=3,
NX_RXS=6,
NY_RXS=self.grid.iterations,
NZ_RXS=len(self.grid.rxs),
NY_SRCINFO=4,
NY_SRCWAVES=self.grid.iterations,
NX_SNAPS=Snapshot.nx_max,
NY_SNAPS=Snapshot.ny_max,
NZ_SNAPS=Snapshot.nz_max)
self.update_electric_dev = self.elwise(self.ctx,
Template("int NX, "
"int NY, "
"int NZ, "
"__global const unsigned int* restrict ID, "
"__global $REAL *Ex, "
"__global $REAL *Ey, "
"__global $REAL *Ez, "
"__global const $REAL * restrict Hx, "
"__global const $REAL * restrict Hy, "
"__global const $REAL * restrict Hz").substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
knl_fields_updates.update_electric.substitute({
'NX_FIELDS': self.grid.nx + 1,
'NY_FIELDS': self.grid.ny + 1,
'NZ_FIELDS': self.grid.nz + 1,
'NX_ID': self.grid.ID.shape[1],
'NY_ID': self.grid.ID.shape[2],
'NZ_ID': self.grid.ID.shape[3]}),
'update_electric', preamble=self.knl_common)
self.update_magnetic_dev = self.elwise(self.ctx,
Template("int NX, "
"int NY, "
"int NZ, "
"__global const unsigned int* restrict ID, "
"__global $REAL *Hx, "
"__global $REAL *Hy, "
"__global $REAL *Hz, "
"__global const $REAL * restrict Ex, "
"__global const $REAL * restrict Ey, "
"__global const $REAL * restrict Ez").substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
knl_fields_updates.update_magnetic.substitute({
'NX_FIELDS': self.grid.nx + 1,
'NY_FIELDS': self.grid.ny + 1,
'NZ_FIELDS': self.grid.nz + 1,
'NX_ID': self.grid.ID.shape[1],
'NY_ID': self.grid.ID.shape[2],
'NZ_ID': self.grid.ID.shape[3]}),
'update_magnetic', preamble=self.knl_common)
# Electric and magnetic field updates - dispersive materials -
# get kernel functions
# If there are any dispersive materials (updates are split into two
# parts as they require present and updated electric field values).
if config.get_model_config().materials['maxpoles'] > 0:
self.dispersive_update_a = self.elwise(self.ctx,
Template("int NX, "
"int NY, "
"int NZ, "
"int MAXPOLES, "
"__global const $COMPLEX* restrict updatecoeffsdispersive, "
"__global $COMPLEX *Tx, "
"__global $COMPLEX *Ty, "
"__global $COMPLEX *Tz, "
"__global const unsigned int* restrict ID, "
"__global $REAL *Ex, "
"__global $REAL *Ey, "
"__global $REAL *Ez, "
"__global const $REAL* restrict Hx, "
"__global const $REAL* restrict Hy, "
"__global const $REAL* restrict Hz").substitute({'REAL': config.sim_config.dtypes['C_float_or_double'], 'COMPLEX': config.get_model_config().materials['dispersiveCdtype']}),
knl_fields_updates.update_electric_dispersive_A.substitute({
'REAL': config.sim_config.dtypes['C_float_or_double'],
'REALFUNC': config.get_model_config().materials['crealfunc'],
'NX_FIELDS': self.grid.nx + 1,
'NY_FIELDS': self.grid.ny + 1,
'NZ_FIELDS': self.grid.nz + 1,
'NX_ID': self.grid.ID.shape[1],
'NY_ID': self.grid.ID.shape[2],
'NZ_ID': self.grid.ID.shape[3],
'NX_T': NX_T,
'NY_T': NY_T,
'NZ_T': NZ_T}),
'update_electric_dispersive_A', preamble=self.knl_common)
self.dispersive_update_b = self.elwise(self.ctx,
Template("int NX, "
"int NY, "
"int NZ, "
"int MAXPOLES, "
"__global const $COMPLEX* restrict updatecoeffsdispersive, "
"__global $COMPLEX *Tx, "
"__global $COMPLEX *Ty, "
"__global $COMPLEX *Tz, "
"__global const unsigned int* restrict ID, "
"__global $REAL *Ex, "
"__global $REAL *Ey, "
"__global $REAL *Ez, "
"__global const $REAL* restrict Hx, "
"__global const $REAL* restrict Hy, "
"__global const $REAL* restrict Hz").substitute({'REAL': config.sim_config.dtypes['C_float_or_double'] ,'COMPLEX': config.get_model_config().materials['dispersiveCdtype']}),
knl_fields_updates.update_electric_dispersive_B.substitute({
'REAL': config.sim_config.dtypes['C_float_or_double'],
'REALFUNC': config.get_model_config().materials['crealfunc'],
'NX_FIELDS': self.grid.nx + 1,
'NY_FIELDS': self.grid.ny + 1,
'NZ_FIELDS': self.grid.nz + 1,
'NX_ID': self.grid.ID.shape[1],
'NY_ID': self.grid.ID.shape[2],
'NZ_ID': self.grid.ID.shape[3],
'NX_T': NX_T,
'NY_T': NY_T,
'NZ_T': NZ_T}),
'update_electric_dispersive_B', preamble=self.knl_common)
# Electric and magnetic field updates - initialise field arrays on
# compute device
self.grid.htod_geometry_arrays(self.queue)
self.grid.htod_field_arrays(self.queue)
if config.get_model_config().materials['maxpoles'] > 0:
self.grid.htod_dispersive_arrays(self.queue)
def _set_pml_knls(self):
"""PMLS - prepare kernels and get kernel functions."""
knl_pml_updates_electric = import_module('gprMax.cuda_opencl_el.knl_pml_updates_electric_' + self.grid.pmlformulation)
knl_pml_updates_magnetic = import_module('gprMax.cuda_opencl_el.knl_pml_updates_magnetic_' + self.grid.pmlformulation)
# Set workgroup size, initialise arrays on compute device, and get
# kernel functions
for pml in self.grid.pmls:
pml.set_queue(self.queue)
pml.htod_field_arrays()
pml.set_wgs()
knl_name = 'order' + str(len(pml.CFS)) + '_' + pml.direction
knl_electric_name = getattr(knl_pml_updates_electric, knl_name)
knl_magnetic_name = getattr(knl_pml_updates_magnetic, knl_name)
pml.update_electric_dev = self.elwise(self.ctx,
knl_electric_name['args'].substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
knl_electric_name['func'].substitute({
'REAL': config.sim_config.dtypes['C_float_or_double'],
'NX_FIELDS': self.grid.nx + 1,
'NY_FIELDS': self.grid.ny + 1,
'NZ_FIELDS': self.grid.nz + 1,
'NX_ID': self.grid.ID.shape[1],
'NY_ID': self.grid.ID.shape[2],
'NZ_ID': self.grid.ID.shape[3]}),
'pml_updates_electric_' + knl_name,
preamble=self.knl_common)
pml.update_magnetic_dev = self.elwise(self.ctx,
knl_magnetic_name['args'].substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
knl_magnetic_name['func'].substitute({
'REAL': config.sim_config.dtypes['C_float_or_double'],
'NX_FIELDS': self.grid.nx + 1,
'NY_FIELDS': self.grid.ny + 1,
'NZ_FIELDS': self.grid.nz + 1,
'NX_ID': self.grid.ID.shape[1],
'NY_ID': self.grid.ID.shape[2],
'NZ_ID': self.grid.ID.shape[3]}),
'pml_updates_magnetic_' + knl_name,
preamble=self.knl_common)
def _set_rx_knl(self):
"""Receivers - initialise arrays on compute device, prepare kernel and
get kernel function.
"""
self.rxcoords_dev, self.rxs_dev = htod_rx_arrays(self.grid, self.queue)
self.store_outputs_dev = self.elwise(self.ctx,
Template("int NRX, "
"int iteration, "
"__global const int* restrict rxcoords, "
"__global $REAL *rxs, "
"__global const $REAL* restrict Ex, "
"__global const $REAL* restrict Ey, "
"__global const $REAL* restrict Ez, "
"__global const $REAL* restrict Hx, "
"__global const $REAL* restrict Hy, "
"__global const $REAL* restrict Hz").substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
knl_store_outputs.store_outputs.substitute(),
'store_outputs', preamble=self.knl_common)
def _set_src_knls(self):
"""Sources - initialise arrays on compute device, prepare kernel and
get kernel function.
"""
if self.grid.hertziandipoles:
self.srcinfo1_hertzian_dev, self.srcinfo2_hertzian_dev, self.srcwaves_hertzian_dev = htod_src_arrays(self.grid.hertziandipoles, self.grid, self.queue)
self.update_hertzian_dipole_dev = self.elwise(self.ctx,
Template("int NHERTZDIPOLE, "
"int iteration, "
"$REAL dx, "
"$REAL dy, "
"$REAL dz, "
"__global const int* restrict srcinfo1, "
"__global const $REAL* restrict srcinfo2, "
"__global const $REAL* restrict srcwaveforms, "
"__global const unsigned int* restrict ID, "
"__global $REAL *Ex, "
"__global $REAL *Ey, "
"__global $REAL *Ez").substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
knl_source_updates.update_hertzian_dipole.substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
'update_hertzian_dipole', preamble=self.knl_common)
if self.grid.magneticdipoles:
self.srcinfo1_magnetic_dev, self.srcinfo2_magnetic_dev, self.srcwaves_magnetic_dev = htod_src_arrays(self.grid.magneticdipoles, self.grid, self.queue)
self.update_magnetic_dipole_dev = self.elwise(self.ctx,
Template("int NMAGDIPOLE, "
"int iteration, "
"$REAL dx, "
"$REAL dy, "
"$REAL dz, "
"__global const int* restrict srcinfo1, "
"__global const $REAL* restrict srcinfo2, "
"__global const $REAL* restrict srcwaveforms, "
"__global const unsigned int* restrict ID, "
"__global $REAL *Hx, "
"__global $REAL *Hy, "
"__global $REAL *Hz").substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
knl_source_updates.update_magnetic_dipole.substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
'update_magnetic_dipole', preamble=self.knl_common)
if self.grid.voltagesources:
self.srcinfo1_voltage_dev, self.srcinfo2_voltage_dev,self.srcwaves_voltage_dev = htod_src_arrays(self.grid.voltagesources, self.grid, self.queue)
self.update_voltage_source_dev = self.elwise(self.ctx,
Template("int NVOLTSRC, "
"int iteration, "
"$REAL dx, "
"$REAL dy, "
"$REAL dz, "
"__global const int* restrict srcinfo1, "
"__global const $REAL* restrict srcinfo2, "
"__global const $REAL* restrict srcwaveforms, "
"__global const unsigned int* restrict ID, "
"__global $REAL *Ex, "
"__global $REAL *Ey, "
"__global $REAL *Ez").substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
knl_source_updates.update_voltage_source.substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}), 'update_voltage_source', preamble=self.knl_common)
def _set_snapshot_knl(self):
"""Snapshots - initialise arrays on compute device, prepare kernel and
get kernel function.
"""
self.snapEx_dev, self.snapEy_dev, self.snapEz_dev, self.snapHx_dev, self.snapHy_dev, self.snapHz_dev = htod_snapshot_array(self.grid, self.queue)
self.store_snapshot_dev = self.elwise(self.ctx,
Template("int p, "
"int xs, "
"int xf, "
"int ys, "
"int yf, "
"int zs, "
"int zf, "
"int dx, "
"int dy, "
"int dz, "
"__global const $REAL* restrict Ex, "
"__global const $REAL* restrict Ey, "
"__global const $REAL* restrict Ez, "
"__global const $REAL* restrict Hx, "
"__global const $REAL* restrict Hy, "
"__global const $REAL* restrict Hz, "
"__global $REAL *snapEx, "
"__global $REAL *snapEy, "
"__global $REAL *snapEz, "
"__global $REAL *snapHx, "
"__global $REAL *snapHy, "
"__global $REAL *snapHz").substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
knl_snapshots.store_snapshot.substitute({'NX_SNAPS': Snapshot.nx_max,
'NY_SNAPS': Snapshot.ny_max,
'NZ_SNAPS': Snapshot.nz_max}),
'store_snapshot', preamble=self.knl_common)
def store_outputs(self):
"""Store field component values for every receiver."""
if self.grid.rxs:
event = self.store_outputs_dev(np.int32(len(self.grid.rxs)),
np.int32(self.grid.iteration),
self.rxcoords_dev,
self.rxs_dev,
self.grid.Ex_dev,
self.grid.Ey_dev,
self.grid.Ez_dev,
self.grid.Hx_dev,
self.grid.Hy_dev,
self.grid.Hz_dev)
event.wait()
self.compute_time += (event.profile.end - event.profile.start)*1e-9
def store_snapshots(self, iteration):
"""Store any snapshots.
Args:
iteration: int for iteration number.
"""
for i, snap in enumerate(self.grid.snapshots):
if snap.time == iteration + 1:
snapno = 0 if config.get_model_config().device['snapsgpu2cpu'] else i
event = self.store_snapshot_dev(np.int32(snapno),
np.int32(snap.xs),
np.int32(snap.xf),
np.int32(snap.ys),
np.int32(snap.yf),
np.int32(snap.zs),
np.int32(snap.zf),
np.int32(snap.dx),
np.int32(snap.dy),
np.int32(snap.dz),
self.grid.Ex_dev,
self.grid.Ey_dev,
self.grid.Ez_dev,
self.grid.Hx_dev,
self.grid.Hy_dev,
self.grid.Hz_dev,
self.snapEx_dev,
self.snapEy_dev,
self.snapEz_dev,
self.snapHx_dev,
self.snapHy_dev,
self.snapHz_dev)
event.wait()
if config.get_model_config().device['snapsgpu2cpu']:
dtoh_snapshot_array(self.snapEx_dev.get(),
self.snapEy_dev.get(),
self.snapEz_dev.get(),
self.snapHx_dev.get(),
self.snapHy_dev.get(),
self.snapHz_dev.get(),
0,
snap)
self.compute_time += (event.profile.end - event.profile.start)*1e-9
def update_magnetic(self):
"""Update magnetic field components."""
event = self.update_magnetic_dev(np.int32(self.grid.nx),
np.int32(self.grid.ny),
np.int32(self.grid.nz),
self.grid.ID_dev,
self.grid.Hx_dev,
self.grid.Hy_dev,
self.grid.Hz_dev,
self.grid.Ex_dev,
self.grid.Ey_dev,
self.grid.Ez_dev)
event.wait()
self.compute_time += (event.profile.end - event.profile.start)*1e-9
def update_magnetic_pml(self):
"""Update magnetic field components with the PML correction."""
for pml in self.grid.pmls:
pml.update_magnetic()
self.compute_time += pml.compute_time
def update_magnetic_sources(self):
"""Update magnetic field components from sources."""
if self.grid.magneticdipoles:
event = self.update_magnetic_dipole_dev(np.int32(len(self.grid.magneticdipoles)),
np.int32(self.grid.iteration),
config.sim_config.dtypes['float_or_double'](self.grid.dx),
config.sim_config.dtypes['float_or_double'](self.grid.dy),
config.sim_config.dtypes['float_or_double'](self.grid.dz),
self.srcinfo1_magnetic_dev,
self.srcinfo2_magnetic_dev,
self.srcwaves_magnetic_dev,
self.grid.ID_dev,
self.grid.Hx_dev,
self.grid.Hy_dev,
self.grid.Hz_dev)
event.wait()
self.compute_time += (event.profile.end - event.profile.start)*1e-9
def update_electric_a(self):
"""Update electric field components."""
# All materials are non-dispersive so do standard update.
if config.get_model_config().materials['maxpoles'] == 0:
event = self.update_electric_dev(np.int32(self.grid.nx),
np.int32(self.grid.ny),
np.int32(self.grid.nz),
self.grid.ID_dev,
self.grid.Ex_dev,
self.grid.Ey_dev,
self.grid.Ez_dev,
self.grid.Hx_dev,
self.grid.Hy_dev,
self.grid.Hz_dev)
event.wait()
self.compute_time += (event.profile.end - event.profile.start)*1e-9
# If there are any dispersive materials do 1st part of dispersive update
# (it is split into two parts as it requires present and updated electric field values).
else:
event = self.dispersive_update_a(np.int32(self.grid.nx),
np.int32(self.grid.ny),
np.int32(self.grid.nz),
np.int32(config.get_model_config().materials['maxpoles']),
self.grid.updatecoeffsdispersive_dev,
self.grid.Tx_dev,
self.grid.Ty_dev,
self.grid.Tz_dev,
self.grid.ID_dev,
self.grid.Ex_dev,
self.grid.Ey_dev,
self.grid.Ez_dev,
self.grid.Hx_dev,
self.grid.Hy_dev,
self.grid.Hz_dev)
event.wait()
self.compute_time += (event.profile.end - event.profile.start)*1e-9
def update_electric_pml(self):
"""Update electric field components with the PML correction."""
for pml in self.grid.pmls:
pml.update_electric()
self.compute_time += pml.compute_time
def update_electric_sources(self):
"""Update electric field components from sources -
update any Hertzian dipole sources last.
"""
if self.grid.voltagesources:
event = self.update_voltage_source_dev(np.int32(len(self.grid.voltagesources)),
np.int32(self.grid.iteration),
config.sim_config.dtypes['float_or_double'](self.grid.dx),
config.sim_config.dtypes['float_or_double'](self.grid.dy),
config.sim_config.dtypes['float_or_double'](self.grid.dz),
self.srcinfo1_voltage_dev,
self.srcinfo2_voltage_dev,
self.srcwaves_voltage_dev,
self.grid.ID_dev,
self.grid.Ex_dev,
self.grid.Ey_dev,
self.grid.Ez_dev)
event.wait()
self.compute_time += (event.profile.end - event.profile.start)*1e-9
if self.grid.hertziandipoles:
event = self.update_hertzian_dipole_dev(np.int32(len(self.grid.hertziandipoles)),
np.int32(self.grid.iteration),
config.sim_config.dtypes['float_or_double'](self.grid.dx),
config.sim_config.dtypes['float_or_double'](self.grid.dy),
config.sim_config.dtypes['float_or_double'](self.grid.dz),
self.srcinfo1_hertzian_dev,
self.srcinfo2_hertzian_dev,
self.srcwaves_hertzian_dev,
self.grid.ID_dev,
self.grid.Ex_dev,
self.grid.Ey_dev,
self.grid.Ez_dev)
event.wait()
self.compute_time += (event.profile.end - event.profile.start)*1e-9
self.grid.iteration += 1
def update_electric_b(self):
"""If there are any dispersive materials do 2nd part of dispersive
update - it is split into two parts as it requires present and
updated electric field values. Therefore it can only be completely
updated after the electric field has been updated by the PML and
source updates.
"""
if config.get_model_config().materials['maxpoles'] > 0:
event = self.dispersive_update_b(np.int32(self.grid.nx),
np.int32(self.grid.ny),
np.int32(self.grid.nz),
np.int32(config.get_model_config().materials['maxpoles']),
self.grid.updatecoeffsdispersive_dev,
self.grid.Tx_dev,
self.grid.Ty_dev,
self.grid.Tz_dev,
self.grid.ID_dev,
self.grid.Ex_dev,
self.grid.Ey_dev,
self.grid.Ez_dev,
self.grid.Hx_dev,
self.grid.Hy_dev,
self.grid.Hz_dev)
event.wait()
self.compute_time += (event.profile.end - event.profile.start)*1e-9
def time_start(self):
pass
def calculate_memsolve(self, iteration):
"""Calculate memory used on last iteration.
Args:
iteration: int of iteration number.
Returns:
Memory (RAM) used on compute device.
"""
# if iteration == self.grid.iterations - 1:
# return self.drv.mem_get_info()[1] - self.drv.mem_get_info()[0]
logger.debug('Look at memory estimate for pyopencl')
pass
def calculate_tsolve(self):
"""Calculate solving time for model."""
return self.compute_time
def finalise(self):
"""Copy data from compute device back to CPU to save to file(s)."""
# Copy output from receivers array back to correct receiver objects
if self.grid.rxs:
dtoh_rx_array(self.rxs_dev.get(), self.rxcoords_dev.get(), self.grid)
# Copy data from any snapshots back to correct snapshot objects
if self.grid.snapshots and not config.get_model_config().device['snapsgpu2cpu']:
for i, snap in enumerate(self.grid.snapshots):
dtoh_snapshot_array(self.snapEx_dev.get(),
self.snapEy_dev.get(),
self.snapEz_dev.get(),
self.snapHx_dev.get(),
self.snapHy_dev.get(),
self.snapHz_dev.get(),
i, snap)
def cleanup(self):
"""Cleanup compute device context."""
logger.debug('Check if pyopencl needs explicit cleanup.')
# Remove context from top of stack and delete
# self.ctx.pop()
# del self.ctx

查看文件

@@ -26,7 +26,7 @@ import sys
import gprMax.config as config import gprMax.config as config
import psutil import psutil
from .utilities import human_size from .utilities import get_terminal_width, human_size
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -35,8 +35,8 @@ def get_host_info():
"""Get information about the machine, CPU, RAM, and OS. """Get information about the machine, CPU, RAM, and OS.
Returns: Returns:
hostinfo (dict): Manufacturer and model of machine; description of CPU hostinfo: dict containing manufacturer and model of machine;
type, speed, cores; RAM; name and description of CPU type, speed, cores; RAM; name and
version of operating system. version of operating system.
""" """
@@ -47,13 +47,17 @@ def get_host_info():
if sys.platform == 'win32': if sys.platform == 'win32':
# Manufacturer/model # Manufacturer/model
try: try:
manufacturer = subprocess.check_output("wmic csproduct get vendor", shell=True, stderr=subprocess.STDOUT).decode('utf-8').strip() manufacturer = subprocess.check_output("wmic csproduct get vendor",
shell=True,
stderr=subprocess.STDOUT).decode('utf-8').strip()
manufacturer = manufacturer.split('\n') manufacturer = manufacturer.split('\n')
if len(manufacturer) > 1: if len(manufacturer) > 1:
manufacturer = manufacturer[1] manufacturer = manufacturer[1]
else: else:
manufacturer = manufacturer[0] manufacturer = manufacturer[0]
model = subprocess.check_output("wmic computersystem get model", shell=True, stderr=subprocess.STDOUT).decode('utf-8').strip() model = subprocess.check_output("wmic computersystem get model",
shell=True,
stderr=subprocess.STDOUT).decode('utf-8').strip()
model = model.split('\n') model = model.split('\n')
if len(model) > 1: if len(model) > 1:
model = model[1] model = model[1]
@@ -61,16 +65,19 @@ def get_host_info():
model = model[0] model = model[0]
except subprocess.CalledProcessError: except subprocess.CalledProcessError:
pass pass
machineID = manufacturer + ' ' + model machineID = ' '.join(manufacturer.split()) + ' ' + ' '.join(model.split())
# CPU information # CPU information
try: try:
allcpuinfo = subprocess.check_output("wmic cpu get Name", shell=True, stderr=subprocess.STDOUT).decode('utf-8').strip() allcpuinfo = subprocess.check_output("wmic cpu get Name",
shell=True,
stderr=subprocess.STDOUT).decode('utf-8').strip()
allcpuinfo = allcpuinfo.split('\n') allcpuinfo = allcpuinfo.split('\n')
sockets = 0 sockets = 0
for line in allcpuinfo: for line in allcpuinfo:
if 'CPU' in line: if 'CPU' in line:
cpuID = line.strip() cpuID = line.strip()
cpuID = ' '.join(cpuID.split())
sockets += 1 sockets += 1
except subprocess.CalledProcessError: except subprocess.CalledProcessError:
pass pass
@@ -93,16 +100,21 @@ def get_host_info():
# Manufacturer/model # Manufacturer/model
manufacturer = 'Apple' manufacturer = 'Apple'
try: try:
model = subprocess.check_output("sysctl -n hw.model", shell=True, stderr=subprocess.STDOUT).decode('utf-8').strip() model = subprocess.check_output("sysctl -n hw.model", shell=True,
stderr=subprocess.STDOUT).decode('utf-8').strip()
except subprocess.CalledProcessError: except subprocess.CalledProcessError:
pass pass
machineID = manufacturer + ' ' + model machineID = ' '.join(manufacturer.split()) + ' ' + ' '.join(model.split())
# CPU information # CPU information
try: try:
sockets = subprocess.check_output("sysctl -n hw.packages", shell=True, stderr=subprocess.STDOUT).decode('utf-8').strip() sockets = subprocess.check_output("sysctl -n hw.packages",
shell=True,
stderr=subprocess.STDOUT).decode('utf-8').strip()
sockets = int(sockets) sockets = int(sockets)
cpuID = subprocess.check_output("sysctl -n machdep.cpu.brand_string", shell=True, stderr=subprocess.STDOUT).decode('utf-8').strip() cpuID = subprocess.check_output("sysctl -n machdep.cpu.brand_string",
shell=True,
stderr=subprocess.STDOUT).decode('utf-8').strip()
cpuID = ' '.join(cpuID.split()) cpuID = ' '.join(cpuID.split())
except subprocess.CalledProcessError: except subprocess.CalledProcessError:
pass pass
@@ -123,21 +135,30 @@ def get_host_info():
elif sys.platform == 'linux': elif sys.platform == 'linux':
# Manufacturer/model # Manufacturer/model
try: try:
manufacturer = subprocess.check_output("cat /sys/class/dmi/id/sys_vendor", shell=True, stderr=subprocess.STDOUT).decode('utf-8').strip() manufacturer = subprocess.check_output("cat /sys/class/dmi/id/sys_vendor",
model = subprocess.check_output("cat /sys/class/dmi/id/product_name", shell=True, stderr=subprocess.STDOUT).decode('utf-8').strip() shell=True,
stderr=subprocess.STDOUT).decode('utf-8').strip()
model = subprocess.check_output("cat /sys/class/dmi/id/product_name",
shell=True,
stderr=subprocess.STDOUT).decode('utf-8').strip()
except subprocess.CalledProcessError: except subprocess.CalledProcessError:
pass pass
machineID = manufacturer + ' ' + model machineID = ' '.join(manufacturer.split()) + ' ' + ' '.join(model.split())
# CPU information # CPU information
try: try:
# Locale to ensure English # Locale to ensure English
myenv = {**os.environ, 'LANG': 'en_US.utf8'} myenv = {**os.environ, 'LANG': 'en_US.utf8'}
cpuIDinfo = subprocess.check_output("cat /proc/cpuinfo", shell=True, stderr=subprocess.STDOUT, env=myenv).decode('utf-8').strip() cpuIDinfo = subprocess.check_output("cat /proc/cpuinfo", shell=True,
stderr=subprocess.STDOUT,
env=myenv).decode('utf-8').strip()
for line in cpuIDinfo.split('\n'): for line in cpuIDinfo.split('\n'):
if re.search('model name', line): if re.search('model name', line):
cpuID = re.sub('.*model name.*:', '', line, 1).strip() cpuID = re.sub('.*model name.*:', '', line, 1).strip()
allcpuinfo = subprocess.check_output("lscpu", shell=True, stderr=subprocess.STDOUT, env=myenv).decode('utf-8').strip() cpuID = ' '.join(cpuID.split())
allcpuinfo = subprocess.check_output("lscpu", shell=True,
stderr=subprocess.STDOUT,
env=myenv).decode('utf-8').strip()
for line in allcpuinfo.split('\n'): for line in allcpuinfo.split('\n'):
if 'Socket(s)' in line: if 'Socket(s)' in line:
sockets = int(line.strip()[-1]) sockets = int(line.strip()[-1])
@@ -177,11 +198,31 @@ def get_host_info():
return hostinfo return hostinfo
def print_host_info(hostinfo):
"""Print information about the machine, CPU, RAM, and OS.
Args:
hostinfo: dict containing manufacturer and model of machine;
description of CPU type, speed, cores; RAM; name and
version of operating system.
"""
hyperthreadingstr = (f", {config.sim_config.hostinfo['logicalcores']} "
f"cores with Hyper-Threading" if config.sim_config.hostinfo['hyperthreading'] else '')
logger.basic(f"\n{config.sim_config.hostinfo['hostname']} | "
f"{config.sim_config.hostinfo['machineID']} "
f"{hostinfo['sockets']} x {hostinfo['cpuID']} "
f"({hostinfo['physicalcores']} cores{hyperthreadingstr}) | "
f"{human_size(hostinfo['ram'], a_kilobyte_is_1024_bytes=True)} | "
f"{hostinfo['osversion']}")
logger.basic(f"|--->OpenMP: {hostinfo['physicalcores']} threads")
def set_omp_threads(nthreads=None): def set_omp_threads(nthreads=None):
"""Sets the number of OpenMP CPU threads for parallelised parts of code. """Sets the number of OpenMP CPU threads for parallelised parts of code.
Returns: Returns:
nthreads (int): Number of OpenMP threads. nthreads: int for number of OpenMP threads.
""" """
if sys.platform == 'darwin': if sys.platform == 'darwin':
@@ -228,29 +269,39 @@ def mem_check_host(mem):
"""Check if the required amount of memory (RAM) is available on host. """Check if the required amount of memory (RAM) is available on host.
Args: Args:
mem (int): Memory required (bytes). mem: int for memory required (bytes).
""" """
if mem > config.sim_config.hostinfo['ram']: if mem > config.sim_config.hostinfo['ram']:
logger.exception(f"Memory (RAM) required ~{human_size(mem)} exceeds {human_size(config.sim_config.hostinfo['ram'], a_kilobyte_is_1024_bytes=True)} detected!\n") logger.exception(f"Memory (RAM) required ~{human_size(mem)} exceeds "
f"{human_size(config.sim_config.hostinfo['ram'], a_kilobyte_is_1024_bytes=True)} "
"detected!\n")
raise ValueError raise ValueError
def mem_check_gpu_snaps(total_mem, snaps_mem): def mem_check_device_snaps(total_mem, snaps_mem):
"""Check if the required amount of memory (RAM) for all snapshots can fit """Check if the required amount of memory (RAM) for all snapshots can fit
on specified GPU. on specified device.
Args: Args:
total_mem (int): Total memory required for model (bytes). total_mem: int for total memory required for model (bytes).
snaps_mem (int): Memory required for all snapshots (bytes). snaps_mem: int for memory required for all snapshots (bytes).
""" """
if total_mem - snaps_mem > config.get_model_config().cuda['gpu'].totalmem:
logger.exception(f"Memory (RAM) required ~{human_size(total_mem)} exceeds {human_size(config.get_model_config().cuda['gpu'].totalmem, a_kilobyte_is_1024_bytes=True)} detected on specified {config.get_model_config().cuda['gpu'].deviceID} - {config.get_model_config().cuda['gpu'].name} GPU!\n") if config.sim_config.general['solver'] == 'cuda':
device_mem = config.get_model_config().device['dev'].total_memory()
elif config.sim_config.general['solver'] == 'opencl':
device_mem = config.get_model_config().device['dev'].global_mem_size
if total_mem - snaps_mem > device_mem:
logger.exception(f"Memory (RAM) required ~{human_size(total_mem)} exceeds "
f"{human_size(device_mem, a_kilobyte_is_1024_bytes=True)} "
f"detected on specified {' '.join(config.get_model_config().device['dev'].name.split())} device!\n")
raise ValueError raise ValueError
# If the required memory without the snapshots will fit on the GPU then # If the required memory without the snapshots will fit on the GPU then
# transfer and store snaphots on host # transfer and store snaphots on host
if snaps_mem != 0 and total_mem - snaps_mem < config.get_model_config().cuda['gpu'].totalmem: if snaps_mem != 0 and total_mem - snaps_mem < device_mem:
config.get_model_config().cuda['snapsgpu2cpu'] = True config.get_model_config().device['snapsgpu2cpu'] = True
def mem_check_all(grids): def mem_check_all(grids):
@@ -259,11 +310,11 @@ def mem_check_all(grids):
memory. memory.
Args: Args:
grids (list): FDTDGrid objects. grids: list of FDTDGrid objects.
Returns: Returns:
total_mem (int): Total memory required for all grids. total_mem: int for total memory required for all grids.
mem_strs (list): Strings containing text of memory requirements for mem_str: list of strings containing text of memory requirements for
each grid. each grid.
""" """
@@ -297,59 +348,56 @@ def mem_check_all(grids):
mem_check_host(total_mem) mem_check_host(total_mem)
# Check if there is sufficient memory for any snapshots on GPU # Check if there is sufficient memory for any snapshots on GPU
if total_snaps_mem > 0 and config.sim_config.general['cuda']: if (total_snaps_mem > 0 and config.sim_config.general['solver'] == 'cuda' or
mem_check_gpu_snaps(total_mem, total_snaps_mem) config.sim_config.general['solver'] == 'opencl'):
mem_check_device_snaps(total_mem, total_snaps_mem)
return total_mem, mem_strs return total_mem, mem_strs
class GPU: def has_pycuda():
"""GPU information.""" """Check if pycuda module is installed."""
pycuda = True
try:
import pycuda
except ImportError:
pycuda = False
return pycuda
def __init__(self):
self.deviceID = None def has_pyopencl():
self.name = None """Check if pyopencl module is installed."""
self.pcibusID = None pyopencl = True
self.constmem = None try:
self.totalmem = None import pyopencl
except ImportError:
def get_cuda_gpu_info(self, drv, deviceID): pyopencl = False
"""Set information about GPU. return pyopencl
Args:
drv (object): pycuda driver.
deviceID (int): Device ID for GPU.
"""
self.deviceID = deviceID
self.name = drv.Device(self.deviceID).name()
self.pcibusID = drv.Device(self.deviceID).pci_bus_id()
self.constmem = drv.Device(self.deviceID).total_constant_memory
self.totalmem = drv.Device(self.deviceID).total_memory()
def detect_cuda_gpus(): def detect_cuda_gpus():
"""Get information about Nvidia GPU(s). """Get information about CUDA-capable GPU(s).
Returns: Returns:
gpus (list): Detected GPU(s) object(s). gpus: dict of detected pycuda device object(s) where where device ID(s)
are keys.
""" """
try: gpus = {}
import pycuda.driver as drv
has_pycuda = True
except ImportError:
logger.warning('pycuda not detected - to use gprMax in GPU mode the pycuda package must be installed, and you must have a NVIDIA CUDA-Enabled GPU (https://developer.nvidia.com/cuda-gpus).')
has_pycuda = False
if has_pycuda: cuda_reqs = ('To use gprMax with CUDA you must:'
'\n 1) install pycuda'
'\n 2) install NVIDIA CUDA Toolkit (https://developer.nvidia.com/cuda-toolkit)'
'\n 3) have an NVIDIA CUDA-Enabled GPU (https://developer.nvidia.com/cuda-gpus)')
if has_pycuda():
import pycuda.driver as drv
drv.init() drv.init()
# Check and list any CUDA-Enabled GPUs # Check and list any CUDA-Enabled GPUs
deviceIDsavail = []
if drv.Device.count() == 0: if drv.Device.count() == 0:
logger.exception('No NVIDIA CUDA-Enabled GPUs detected (https://developer.nvidia.com/cuda-gpus)') logger.warning('No NVIDIA CUDA-Enabled GPUs detected!\n' + cuda_reqs)
raise ValueError
elif 'CUDA_VISIBLE_DEVICES' in os.environ: elif 'CUDA_VISIBLE_DEVICES' in os.environ:
deviceIDsavail = os.environ.get('CUDA_VISIBLE_DEVICES') deviceIDsavail = os.environ.get('CUDA_VISIBLE_DEVICES')
deviceIDsavail = [int(s) for s in deviceIDsavail.split(',')] deviceIDsavail = [int(s) for s in deviceIDsavail.split(',')]
@@ -357,33 +405,88 @@ def detect_cuda_gpus():
deviceIDsavail = range(drv.Device.count()) deviceIDsavail = range(drv.Device.count())
# Gather information about detected GPUs # Gather information about detected GPUs
gpus = []
for ID in deviceIDsavail: for ID in deviceIDsavail:
gpu = GPU() gpus[ID] = drv.Device(ID)
gpu.get_cuda_gpu_info(drv, ID)
gpus.append(gpu)
else: else:
gpus = None logger.warning('pycuda not detected!\n' + cuda_reqs)
return gpus return gpus
def print_cuda_info(devs):
""""Print info about detected CUDA-capable GPU(s).
Args:
devs: dict of detected pycuda device object(s) where where device ID(s)
are keys.
"""""
import pycuda
logger.basic('|--->CUDA:')
logger.debug(f'PyCUDA: {pycuda.VERSION_TEXT}')
for ID, gpu in devs.items():
logger.basic(f" |--->Device {ID}: {' '.join(gpu.name.split())} | "
f"{human_size(gpu.total_memory(), a_kilobyte_is_1024_bytes=True)}")
def detect_opencl(): def detect_opencl():
"""Get information about OpenCL platforms and devices. """Get information about OpenCL platforms and devices.
Returns: Returns:
gpus (list): Detected GPU(s) object(s). devs: dict of detected pyopencl device object(s) where where device ID(s)
are keys.
""" """
try: devs = {}
import pyopencl as cl
has_pyopencl = True
except ImportError:
logger.warning('pyopencl not detected - to use gprMax with OpenCL, the pyopencl package must be installed, and you must have at least one OpenCL capable platform.')
has_pyopencl = False
if has_pyopencl: ocl_reqs = ('To use gprMax with OpenCL you must:'
platforms = cl.get_platforms() '\n 1) install pyopencl'
platform_names = [p.name for p in platforms] '\n 2) install appropriate OpenCL device driver(s)'
logger.info(platform_names) '\n 3) have at least one OpenCL-capable platform.')
if has_pyopencl():
import pyopencl as cl
try:
i = 0
for platform in cl.get_platforms():
for device in platform.get_devices():
devs[i] = device
i += 1
except:
logger.warning('No OpenCL-capable platforms detected!\n' + ocl_reqs)
else:
logger.warning('pyopencl not detected!\n' + ocl_reqs)
return devs
def print_opencl_info(devs):
""""Print info about detected OpenCL-capable device(s).
Args:
devs: dict of detected pyopencl device object(s) where where device ID(s)
are keys.
"""""
import pyopencl as cl
logger.basic('|--->OpenCL:')
logger.debug(f'PyOpenCL: {cl.VERSION_TEXT}')
for i, (ID, dev) in enumerate(devs.items()):
if i == 0:
platform = dev.platform.name
logger.basic(f' |--->Platform: {platform}')
if not platform == dev.platform.name:
logger.basic(f' |--->Platform: {dev.platform.name}')
types = cl.device_type.to_string(dev.type)
if 'CPU' in types:
type = 'CPU'
if 'GPU' in types:
type = 'GPU'
logger.basic(f" |--->Device {ID}: {type} | {' '.join(dev.name.split())} | "
f"{human_size(dev.global_mem_size, a_kilobyte_is_1024_bytes=True)}")

查看文件

@@ -50,7 +50,7 @@ def build_dispersive_material_templates():
env = Environment(loader = FileSystemLoader(os.path.join('gprMax', 'cython')), ) env = Environment(loader = FileSystemLoader(os.path.join('gprMax', 'cython')), )
template = env.get_template('fields_updates_dispersive_template') template = env.get_template('fields_updates_dispersive_template.jinja')
# Render dispersive template for different types # Render dispersive template for different types
r = template.render( r = template.render(
@@ -159,7 +159,7 @@ if 'cleanall' in sys.argv:
shutil.rmtree(p, ignore_errors=True) shutil.rmtree(p, ignore_errors=True)
print(f'Removed: {p}') print(f'Removed: {p}')
# Remove 'gprMax/cython/fields_updates_dispersive.pyx' if its there # Remove 'gprMax/cython/fields_updates_dispersive.jinja' if its there
if os.path.isfile(cython_disp_file): if os.path.isfile(cython_disp_file):
os.remove(cython_disp_file) os.remove(cython_disp_file)

50
tools/get_host_spec.py 普通文件
查看文件

@@ -0,0 +1,50 @@
# Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom
# Authors: Craig Warren, Antonis Giannopoulos, and John Hartley
#
# This file is part of gprMax.
#
# gprMax is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# gprMax is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with gprMax. If not, see <http://www.gnu.org/licenses/>.
from gprMax.utilities.host_info import (detect_cuda_gpus, detect_opencl,
get_host_info, print_cuda_info,
print_opencl_info)
from gprMax.utilities.utilities import get_terminal_width, human_size
# Host machine info.
hostinfo = get_host_info()
hyperthreadingstr = f", {hostinfo['logicalcores']} cores with Hyper-Threading" if hostinfo['hyperthreading'] else ''
hostname = (f"\n=== {hostinfo['hostname']}")
print(f"{hostname} {'=' * (get_terminal_width() - len(hostname) - 1)}")
print(f"\n{'Mfr/model:':<12} {hostinfo['machineID']}")
print(f"{'CPU:':<12} {hostinfo['sockets']} x {hostinfo['cpuID']} ({hostinfo['physicalcores']} cores{hyperthreadingstr})")
print(f"{'RAM:':<12} {human_size(hostinfo['ram'], a_kilobyte_is_1024_bytes=True)}")
print(f"{'OS/Version:':<12} {hostinfo['osversion']}")
# OpenMP
print("\n\n=== OpenMP capabilities (gprMax will not use Hyper-Threading with OpenMP as there is no performance advantage)\n")
print(f"{'OpenMP threads: '} {hostinfo['physicalcores']}")
# CUDA
print("\n\n=== CUDA capabilities\n")
gpus = detect_cuda_gpus()
if gpus:
print_cuda_info(gpus)
# OpenCL
print("\n\n=== OpenCL capabilities\n")
devs = detect_opencl()
if devs:
print_opencl_info(devs)
print(f"\n{'=' * (get_terminal_width() - 1)}\n")