diff --git a/gprMax/config.py b/gprMax/config.py index eedb972b..f09b08ba 100644 --- a/gprMax/config.py +++ b/gprMax/config.py @@ -28,7 +28,7 @@ from scipy.constants import c from scipy.constants import epsilon_0 as e0 from scipy.constants import mu_0 as m0 -from .utilities.host_info import detect_cuda_gpus, get_host_info +from .utilities.host_info import detect_cuda_gpus, detect_opencl, get_host_info from .utilities.utilities import get_terminal_width logger = logging.getLogger(__name__) @@ -61,15 +61,21 @@ class ModelConfig: self.grids = [] self.ompthreads = None - # Store information for CUDA solver - # gpu: GPU object - # snapsgpu2cpu: copy snapshot data from GPU to CPU during simulation - # N.B. This will happen if the requested snapshots are too large to fit - # on the memory of the GPU. If True this will slow performance significantly - if sim_config.general['cuda']: - # If a list of lists of GPU deviceIDs is found, flatten it - if any(isinstance(element, list) for element in sim_config.args.gpu): - deviceID = [val for sublist in sim_config.args.gpu for val in sublist] + # Store information for CUDA or OpenCL solver + # dev: compute device object. + # snapsgpu2cpu: copy snapshot data from GPU to CPU during simulation. + # N.B. This will happen if the requested snapshots are too large to + # fit on the memory of the GPU. If True this will slow + # performance significantly. + if sim_config.general['solver'] == 'cuda' or sim_config.general['solver'] == 'opencl': + if sim_config.general['solver'] == 'cuda': + devs = sim_config.args.gpu + elif sim_config.general['solver'] == 'opencl': + devs = sim_config.args.opencl + + # If a list of lists of deviceIDs is found, flatten it + if any(isinstance(element, list) for element in devs): + deviceID = [val for sublist in devs for val in sublist] # If no deviceID is given default to using deviceID 0. Else if either # a single deviceID or list of deviceIDs is given use first one. @@ -78,8 +84,8 @@ class ModelConfig: except: deviceID = 0 - self.cuda = {'gpu': sim_config.set_model_gpu(deviceID), - 'snapsgpu2cpu': False} + self.device = {'dev': sim_config.set_model_device(deviceID), + 'snapsgpu2cpu': False} # Total memory usage for all grids in the model. Starts with 50MB overhead. self.mem_overhead = 50e6 @@ -88,29 +94,34 @@ class ModelConfig: self.reuse_geometry = False # String to print at start of each model run - s = f'\n--- Model {model_num + 1}/{sim_config.model_end}, input file: {sim_config.input_file_path}' - self.inputfilestr = Fore.GREEN + f"{s} {'-' * (get_terminal_width() - 1 - len(s))}\n" + Style.RESET_ALL + s = (f'\n--- Model {model_num + 1}/{sim_config.model_end}, ' + f'input file: {sim_config.input_file_path}') + self.inputfilestr = (Fore.GREEN + f"{s} {'-' * (get_terminal_width() - 1 - len(s))}\n" + + Style.RESET_ALL) # Output file path and name for specific model self.appendmodelnumber = '' if sim_config.single_model else str(model_num + 1) # Indexed from 1 self.set_output_file_path() # Numerical dispersion analysis parameters - # highestfreqthres: threshold (dB) down from maximum power (0dB) of main frequency used - # to calculate highest frequency for numerical dispersion analysis - # maxnumericaldisp: maximum allowable percentage physical phase-velocity phase error - # mingridsampling: minimum grid sampling of smallest wavelength for physical wave propagation + # highestfreqthres: threshold (dB) down from maximum power (0dB) of + # main frequency used to calculate highest + # frequency for numerical dispersion analysis. + # maxnumericaldisp: maximum allowable percentage physical + # phase-velocity phase error. + # mingridsampling: minimum grid sampling of smallest wavelength for + # physical wave propagation. self.numdispersion = {'highestfreqthres': 40, 'maxnumericaldisp': 2, 'mingridsampling': 3} # General information to configure materials - # maxpoles: Maximum number of dispersive material poles in a model - # dispersivedtype: Data type for dispersive materials - # dispersiveCdtype: Data type for dispersive materials in Cython - # drudelorentz: True/False model contains Drude or Lorentz materials + # maxpoles: Maximum number of dispersive material poles in a model. + # dispersivedtype: Data type for dispersive materials. + # dispersiveCdtype: Data type for dispersive materials in Cython. + # drudelorentz: True/False model contains Drude or Lorentz materials. # cudarealfunc: String to substitute into CUDA kernels for fields - # dependent on dispersive material type + # dependent on dispersive material type. self.materials = {'maxpoles': 0, 'dispersivedtype': None, 'dispersiveCdtype': None, @@ -123,32 +134,32 @@ class ModelConfig: else: return None def get_usernamespace(self): - return {'c': c, # Speed of light in free space (m/s) - 'e0': e0, # Permittivity of free space (F/m) - 'm0': m0, # Permeability of free space (H/m) - 'z0': np.sqrt(m0 / e0), # Impedance of free space (Ohms) - 'number_model_runs': sim_config.model_end, - 'current_model_run': model_num + 1, - 'inputfile': sim_config.input_file_path.resolve()} + tmp = {'number_model_runs': sim_config.model_end, + 'current_model_run': model_num + 1, + 'inputfile': sim_config.input_file_path.resolve()} + return dict(**sim_config.em_consts, **tmp) + def set_dispersive_material_types(self): """Set data type for disperive materials. Complex if Drude or Lorentz materials are present. Real if Debye materials. """ if self.materials['drudelorentz']: - self.materials['cudarealfunc'] = '.real()' + self.materials['crealfunc'] = '.real()' self.materials['dispersivedtype'] = sim_config.dtypes['complex'] self.materials['dispersiveCdtype'] = sim_config.dtypes['C_complex'] else: + self.materials['crealfunc'] = '' self.materials['dispersivedtype'] = sim_config.dtypes['float_or_double'] self.materials['dispersiveCdtype'] = sim_config.dtypes['C_float_or_double'] def set_output_file_path(self, outputdir=None): - """Output file path can be provided by the user via the API or an input file - command. If they haven't provided one use the input file path instead. + """Output file path can be provided by the user via the API or an input + file command. If they haven't provided one use the input file path + instead. Args: - outputdir (str): Output file directory given from input file command. + outputdir: string of output file directory given by input file command. """ if not outputdir: @@ -171,7 +182,7 @@ class ModelConfig: """Set directory to store any snapshots. Returns: - snapshot_dir (Path): directory to store snapshot files in. + snapshot_dir: Path to directory to store snapshot files in. """ parts = self.output_file_path.with_suffix('').parts snapshot_dir = Path(*parts[:-1], parts[-1] + '_snaps') @@ -187,7 +198,7 @@ class SimulationConfig: def __init__(self, args): """ Args: - args (Namespace): Arguments from either API or CLI. + args: Namespace with arguments from either API or CLI. """ self.args = args @@ -196,17 +207,19 @@ class SimulationConfig: logger.exception('The geometry fixed option cannot be used with MPI.') raise ValueError - # General settings for the simulation - # inputfilepath: path to inputfile location - # outputfilepath: path to outputfile location - # progressbars: whether to show progress bars on stdoout or not - # cpu, cuda, opencl: solver type - # subgrid: whether the simulation uses sub-grids - # precision: data type for electromagnetic field output (single/double) + if args.gpu and args.opencl: + logger.exception('You cannot use both CUDA and OpenCl simultaneously.') + raise ValueError - self.general = {'cpu': True, - 'cuda': False, - 'opencl': False, + # General settings for the simulation + # inputfilepath: path to inputfile location. + # outputfilepath: path to outputfile location. + # progressbars: whether to show progress bars on stdoout or not. + # solver: cpu, cuda, opencl. + # subgrid: whether the simulation uses sub-grids. + # precision: data type for electromagnetic field output (single/double). + + self.general = {'solver': 'cpu', 'subgrid': False, 'precision': 'single'} @@ -222,29 +235,37 @@ class SimulationConfig: # Store information about host machine self.hostinfo = get_host_info() - # Information about any Nvidia GPUs + # CUDA if self.args.gpu is not None: - self.general['cuda'] = True - self.general['cpu'] = False - self.general['opencl'] = False + self.general['solver'] = 'cuda' # Both single and double precision are possible on GPUs, but single # provides best performance. self.general['precision'] = 'single' - self.cuda = {'gpus': [], # gpus: list of GPU objects - 'nvcc_opts': None} # nvcc_opts: nvcc compiler options + self.devices = {'devs': [], # devs: list of pycuda device objects + 'nvcc_opts': None} # nvcc_opts: nvcc compiler options # Suppress nvcc warnings on Microsoft Windows if sys.platform == 'win32': self.cuda['nvcc_opts'] = ['-w'] - # List of GPU objects of available GPUs - self.cuda['gpus'] = detect_cuda_gpus() + # Add pycuda available GPU(s) + self.devices['devs'] = detect_cuda_gpus() + + # OpenCL + if self.args.opencl is not None: + self.general['solver'] = 'opencl' + self.general['precision'] = 'single' + # List of pyopencl available device(s) + self.devices = {'devs': []} + self.devices['devs'] = detect_opencl() # Subgrid parameter may not exist if user enters via CLI try: self.general['subgrid'] = self.args.subgrid # Double precision should be used with subgrid for best accuracy self.general['precision'] = 'double' - if self.general['subgrid'] and self.general['cuda']: - logger.exception('The CUDA-based solver cannot currently be used with models that contain sub-grids.') + if ((self.general['subgrid'] and self.general['cuda']) or + (self.general['subgrid'] and self.general['opencl'])): + logger.exception('You cannot currently use CUDA or OpenCL-based ' + 'solvers with models that contain sub-grids.') raise ValueError except AttributeError: self.general['subgrid'] = False @@ -262,34 +283,35 @@ class SimulationConfig: self._set_model_start_end() self._set_single_model() - def set_model_gpu(self, deviceID): - """Specify GPU object for model. + def set_model_device(self, deviceID): + """Specify pycuda/pyopencl object for model. Args: - deviceID (int): Requested deviceID of GPU + deviceID: int of requested deviceID of compute device. Returns: - gpu (GPU object): Requested GPU object. + dev: requested pycuda/pyopencl device object. """ found = False - for gpu in self.cuda['gpus']: - if gpu.deviceID == deviceID: + for ID, dev in self.devices['devs'].items(): + if ID == deviceID: found = True - return gpu + return dev if not found: - logger.exception(f'GPU with device ID {deviceID} does not exist') + logger.exception(f'Compute device with device ID {deviceID} does ' + 'not exist.') raise ValueError def _set_precision(self): """Data type (precision) for electromagnetic field output. - Solid and ID arrays use 32-bit integers (0 to 4294967295) - Rigid arrays use 8-bit integers (the smallest available type to store true/false) - Fractal arrays use complex numbers - Dispersive coefficient arrays use either float or complex numbers - Main field arrays use floats + Solid and ID arrays use 32-bit integers (0 to 4294967295). + Rigid arrays use 8-bit integers (the smallest available type to store true/false). + Fractal arrays use complex numbers. + Dispersive coefficient arrays use either float or complex numbers. + Main field arrays use floats. """ if self.general['precision'] == 'single': @@ -298,16 +320,25 @@ class SimulationConfig: 'cython_float_or_double': cython.float, 'cython_complex': cython.floatcomplex, 'C_float_or_double': 'float', - 'C_complex': 'pycuda::complex', + 'C_complex': None, 'vtk_float': 'Float32'} + if self.general['solver'] == 'cuda': + self.dtypes['C_complex'] = 'pycuda::complex' + elif self.general['solver'] == 'opencl': + self.dtypes['C_complex'] = 'cfloat' + elif self.general['precision'] == 'double': self.dtypes = {'float_or_double': np.float64, 'complex': np.complex128, 'cython_float_or_double': cython.double, 'cython_complex': cython.doublecomplex, 'C_float_or_double': 'double', - 'C_complex': 'pycuda::complex', + 'C_complex': None, 'vtk_float': 'Float64'} + if self.general['solver'] == 'cuda': + self.dtypes['C_complex'] = 'pycuda::complex' + elif self.general['solver'] == 'opencl': + self.dtypes['C_complex'] = 'cdouble' def _get_byteorder(self): """Check the byte order of system to use for VTK files, i.e. geometry diff --git a/gprMax/contexts.py b/gprMax/contexts.py index 9ff1e2b7..34474aa9 100644 --- a/gprMax/contexts.py +++ b/gprMax/contexts.py @@ -25,18 +25,23 @@ import gprMax.config as config from ._version import __version__, codename from .model_build_run import ModelBuildRun from .solvers import create_G, create_solver -from .utilities.utilities import get_terminal_width, human_size, logo, timer +from .utilities.host_info import (detect_cuda_gpus, detect_opencl, + print_cuda_info, print_host_info, + print_opencl_info) +from .utilities.utilities import get_terminal_width, logo, timer logger = logging.getLogger(__name__) class Context: """Standard context - models are run one after another and each model - can exploit parallelisation using either OpenMP (CPU) or CUDA (GPU). + can exploit parallelisation using either OpenMP (CPU), CUDA (GPU), or + OpenCL (CPU/GPU). """ def __init__(self): - self.model_range = range(config.sim_config.model_start, config.sim_config.model_end) + self.model_range = range(config.sim_config.model_start, + config.sim_config.model_end) self.tsimend = None self.tsimstart = None @@ -44,10 +49,12 @@ class Context: """Run the simulation in the correct context.""" self.tsimstart = timer() self.print_logo_copyright() - self.print_host_info() - if config.sim_config.general['cuda']: - self.print_gpu_info() - + print_host_info(config.sim_config.hostinfo) + if config.sim_config.general['solver'] == 'cuda': + print_cuda_info(config.sim_config.devices['devs']) + elif config.sim_config.general['solver'] == 'opencl': + print_opencl_info(config.sim_config.devices['devs']) + # Clear list of model configs. It can be retained when gprMax is # called in a loop, and want to avoid this. config.model_configs = [] @@ -79,33 +86,23 @@ class Context: logo_copyright = logo(__version__ + ' (' + codename + ')') logger.basic(logo_copyright) - def print_host_info(self): - """Print information about the host machine.""" - hyperthreadingstr = f", {config.sim_config.hostinfo['logicalcores']} cores with Hyper-Threading" if config.sim_config.hostinfo['hyperthreading'] else '' - logger.basic(f"\nHost: {config.sim_config.hostinfo['hostname']} | {config.sim_config.hostinfo['machineID']} | {config.sim_config.hostinfo['sockets']} x {config.sim_config.hostinfo['cpuID']} ({config.sim_config.hostinfo['physicalcores']} cores{hyperthreadingstr}) | {human_size(config.sim_config.hostinfo['ram'], a_kilobyte_is_1024_bytes=True)} RAM | {config.sim_config.hostinfo['osversion']}") - - def print_gpu_info(self): - """Print information about any NVIDIA CUDA GPUs detected.""" - gpus_info = [] - for gpu in config.sim_config.cuda['gpus']: - gpus_info.append(f'{gpu.deviceID} - {gpu.name}, {human_size(gpu.totalmem, a_kilobyte_is_1024_bytes=True)}') - logger.basic(f"GPU resources: {' | '.join(gpus_info)}") - def print_time_report(self): """Print the total simulation time based on context.""" - s = f"\n=== Simulation completed in [HH:MM:SS]: {datetime.timedelta(seconds=self.tsimend - self.tsimstart)}" + s = ("\n=== Simulation completed in [HH:MM:SS]: " + f"{datetime.timedelta(seconds=self.tsimend - self.tsimstart)}") logger.basic(f"{s} {'=' * (get_terminal_width() - 1 - len(s))}\n") class MPIContext(Context): """Mixed mode MPI/OpenMP/CUDA context - MPI task farm is used to distribute - models, and each model parallelised using either OpenMP (CPU) - or CUDA (GPU). + models, and each model parallelised using either OpenMP (CPU), + CUDA (GPU), or OpenCL (CPU/GPU). """ def __init__(self): super().__init__() from mpi4py import MPI + from gprMax.mpi import MPIExecutor self.comm = MPI.COMM_WORLD @@ -149,7 +146,9 @@ class MPIContext(Context): if executor.is_master(): if config.sim_config.general['cuda']: if executor.size - 1 > len(config.sim_config.cuda['gpus']): - logger.exception('Not enough GPU resources for number of MPI tasks requested. Number of MPI tasks should be equal to number of GPUs + 1.') + logger.exception('Not enough GPU resources for number of ' + 'MPI tasks requested. Number of MPI tasks ' + 'should be equal to number of GPUs + 1.') raise ValueError # Create job list @@ -175,7 +174,8 @@ class SPOTPYContext(Context): (https://github.com/thouska/spotpy). SPOTPY coupling can utilise 2 levels of MPI parallelism - where the top level is where SPOPTY optmisation algorithms can be parallelised, and the lower level is where gprMax - models can be parallelised using either OpenMP (CPU) or CUDA (GPU). + models can be parallelised using either OpenMP (CPU), CUDA (GPU), or + OpenCL (CPU/GPU). """ def __init__(self): diff --git a/gprMax/cuda/fields_updates.py b/gprMax/cuda/fields_updates.py deleted file mode 100644 index bd168b52..00000000 --- a/gprMax/cuda/fields_updates.py +++ /dev/null @@ -1,244 +0,0 @@ -# Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom -# Authors: Craig Warren, Antonis Giannopoulos, and John Hartley -# -# This file is part of gprMax. -# -# gprMax is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# gprMax is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with gprMax. If not, see . - -from string import Template - -kernel_template_fields = Template(""" - -#include - -// Macros for converting subscripts to linear index: -#define INDEX2D_MAT(m, n) (m)*($NY_MATCOEFFS)+(n) -#define INDEX2D_MATDISP(m, n) (m)*($NY_MATDISPCOEFFS)+(n) -#define INDEX3D_FIELDS(i, j, k) (i)*($NY_FIELDS)*($NZ_FIELDS)+(j)*($NZ_FIELDS)+(k) -#define INDEX4D_ID(p, i, j, k) (p)*($NX_ID)*($NY_ID)*($NZ_ID)+(i)*($NY_ID)*($NZ_ID)+(j)*($NZ_ID)+(k) -#define INDEX4D_T(p, i, j, k) (p)*($NX_T)*($NY_T)*($NZ_T)+(i)*($NY_T)*($NZ_T)+(j)*($NZ_T)+(k) - -// Material coefficients (read-only) in constant memory (64KB)_ -__device__ __constant__ $REAL updatecoeffsE[$N_updatecoeffsE]; -__device__ __constant__ $REAL updatecoeffsH[$N_updatecoeffsH]; - - -/////////////////////////////////////////////// -// Electric field updates - normal materials // -/////////////////////////////////////////////// - -__global__ void update_electric(int NX, int NY, int NZ, const unsigned int* __restrict__ ID, $REAL *Ex, $REAL *Ey, $REAL *Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz) { - - // This function updates electric field values. - // - // Args: - // NX, NY, NZ: Number of cells of the model domain - // ID, E, H: Access to ID and field component arrays - - // Obtain the linear index corresponding to the current thread - int idx = blockIdx.x * blockDim.x + threadIdx.x; - - // Convert the linear index to subscripts for 3D field arrays - int i = idx / ($NY_FIELDS * $NZ_FIELDS); - int j = (idx % ($NY_FIELDS * $NZ_FIELDS)) / $NZ_FIELDS; - int k = (idx % ($NY_FIELDS * $NZ_FIELDS)) % $NZ_FIELDS; - - // Convert the linear index to subscripts for 4D material ID array - int i_ID = (idx % ($NX_ID * $NY_ID * $NZ_ID)) / ($NY_ID * $NZ_ID); - int j_ID = ((idx % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) / $NZ_ID; - int k_ID = ((idx % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) % $NZ_ID; - - // Ex component - if ((NY != 1 || NZ != 1) && i >= 0 && i < NX && j > 0 && j < NY && k > 0 && k < NZ) { - int materialEx = ID[INDEX4D_ID(0,i_ID,j_ID,k_ID)]; - Ex[INDEX3D_FIELDS(i,j,k)] = updatecoeffsE[INDEX2D_MAT(materialEx,0)] * Ex[INDEX3D_FIELDS(i,j,k)] + updatecoeffsE[INDEX2D_MAT(materialEx,2)] * (Hz[INDEX3D_FIELDS(i,j,k)] - Hz[INDEX3D_FIELDS(i,j-1,k)]) - updatecoeffsE[INDEX2D_MAT(materialEx,3)] * (Hy[INDEX3D_FIELDS(i,j,k)] - Hy[INDEX3D_FIELDS(i,j,k-1)]); - } - - // Ey component - if ((NX != 1 || NZ != 1) && i > 0 && i < NX && j >= 0 && j < NY && k > 0 && k < NZ) { - int materialEy = ID[INDEX4D_ID(1,i_ID,j_ID,k_ID)]; - Ey[INDEX3D_FIELDS(i,j,k)] = updatecoeffsE[INDEX2D_MAT(materialEy,0)] * Ey[INDEX3D_FIELDS(i,j,k)] + updatecoeffsE[INDEX2D_MAT(materialEy,3)] * (Hx[INDEX3D_FIELDS(i,j,k)] - Hx[INDEX3D_FIELDS(i,j,k-1)]) - updatecoeffsE[INDEX2D_MAT(materialEy,1)] * (Hz[INDEX3D_FIELDS(i,j,k)] - Hz[INDEX3D_FIELDS(i-1,j,k)]); - } - - // Ez component - if ((NX != 1 || NY != 1) && i > 0 && i < NX && j > 0 && j < NY && k >= 0 && k < NZ) { - int materialEz = ID[INDEX4D_ID(2,i_ID,j_ID,k_ID)]; - Ez[INDEX3D_FIELDS(i,j,k)] = updatecoeffsE[INDEX2D_MAT(materialEz,0)] * Ez[INDEX3D_FIELDS(i,j,k)] + updatecoeffsE[INDEX2D_MAT(materialEz,1)] * (Hy[INDEX3D_FIELDS(i,j,k)] - Hy[INDEX3D_FIELDS(i-1,j,k)]) - updatecoeffsE[INDEX2D_MAT(materialEz,2)] * (Hx[INDEX3D_FIELDS(i,j,k)] - Hx[INDEX3D_FIELDS(i,j-1,k)]); - } -} - - -//////////////////////////// -// Magnetic field updates // -//////////////////////////// - -__global__ void update_magnetic(int NX, int NY, int NZ, const unsigned int* __restrict__ ID, $REAL *Hx, $REAL *Hy, $REAL *Hz, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez) { - - // This function updates magnetic field values. - // - // Args: - // NX, NY, NZ: Number of cells of the model domain - // ID, E, H: Access to ID and field component arrays - - // Obtain the linear index corresponding to the current thread - int idx = blockIdx.x * blockDim.x + threadIdx.x; - - // Convert the linear index to subscripts for 3D field arrays - int i = idx / ($NY_FIELDS * $NZ_FIELDS); - int j = (idx % ($NY_FIELDS * $NZ_FIELDS)) / $NZ_FIELDS; - int k = (idx % ($NY_FIELDS * $NZ_FIELDS)) % $NZ_FIELDS; - - // Convert the linear index to subscripts for 4D material ID array - int i_ID = (idx % ($NX_ID * $NY_ID * $NZ_ID)) / ($NY_ID * $NZ_ID); - int j_ID = ((idx % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) / $NZ_ID; - int k_ID = ((idx % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) % $NZ_ID; - - // Hx component - if (NX != 1 && i > 0 && i < NX && j >= 0 && j < NY && k >= 0 && k < NZ) { - int materialHx = ID[INDEX4D_ID(3,i_ID,j_ID,k_ID)]; - Hx[INDEX3D_FIELDS(i,j,k)] = updatecoeffsH[INDEX2D_MAT(materialHx,0)] * Hx[INDEX3D_FIELDS(i,j,k)] - updatecoeffsH[INDEX2D_MAT(materialHx,2)] * (Ez[INDEX3D_FIELDS(i,j+1,k)] - Ez[INDEX3D_FIELDS(i,j,k)]) + updatecoeffsH[INDEX2D_MAT(materialHx,3)] * (Ey[INDEX3D_FIELDS(i,j,k+1)] - Ey[INDEX3D_FIELDS(i,j,k)]); - } - - // Hy component - if (NY != 1 && i >= 0 && i < NX && j > 0 && j < NY && k >= 0 && k < NZ) { - int materialHy = ID[INDEX4D_ID(4,i_ID,j_ID,k_ID)]; - Hy[INDEX3D_FIELDS(i,j,k)] = updatecoeffsH[INDEX2D_MAT(materialHy,0)] * Hy[INDEX3D_FIELDS(i,j,k)] - updatecoeffsH[INDEX2D_MAT(materialHy,3)] * (Ex[INDEX3D_FIELDS(i,j,k+1)] - Ex[INDEX3D_FIELDS(i,j,k)]) + updatecoeffsH[INDEX2D_MAT(materialHy,1)] * (Ez[INDEX3D_FIELDS(i+1,j,k)] - Ez[INDEX3D_FIELDS(i,j,k)]); - } - - // Hz component - if (NZ != 1 && i >= 0 && i < NX && j >= 0 && j < NY && k > 0 && k < NZ) { - int materialHz = ID[INDEX4D_ID(5,i_ID,j_ID,k_ID)]; - Hz[INDEX3D_FIELDS(i,j,k)] = updatecoeffsH[INDEX2D_MAT(materialHz,0)] * Hz[INDEX3D_FIELDS(i,j,k)] - updatecoeffsH[INDEX2D_MAT(materialHz,1)] * (Ey[INDEX3D_FIELDS(i+1,j,k)] - Ey[INDEX3D_FIELDS(i,j,k)]) + updatecoeffsH[INDEX2D_MAT(materialHz,2)] * (Ex[INDEX3D_FIELDS(i,j+1,k)] - Ex[INDEX3D_FIELDS(i,j,k)]); - } -} - - -/////////////////////////////////////////////////// -// Electric field updates - dispersive materials // -/////////////////////////////////////////////////// - -__global__ void update_electric_dispersive_A(int NX, int NY, int NZ, int MAXPOLES, const $COMPLEX* __restrict__ updatecoeffsdispersive, $COMPLEX *Tx, $COMPLEX *Ty, $COMPLEX *Tz, const unsigned int* __restrict__ ID, $REAL *Ex, $REAL *Ey, $REAL *Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz) { - - // This function is part A of updates to electric field values when dispersive materials (with multiple poles) are present. - // - // Args: - // NX, NY, NZ: Number of cells of the model domain - // MAXPOLES: Maximum number of dispersive material poles present in model - // updatedispersivecoeffs, T, ID, E, H: Access to update coefficients, dispersive, ID and field component arrays - - // Obtain the linear index corresponding to the current thread - int idx = blockIdx.x * blockDim.x + threadIdx.x; - - // Convert the linear index to subscripts for 3D field arrays - int i = idx / ($NY_FIELDS * $NZ_FIELDS); - int j = (idx % ($NY_FIELDS * $NZ_FIELDS)) / $NZ_FIELDS; - int k = (idx % ($NY_FIELDS * $NZ_FIELDS)) % $NZ_FIELDS; - - // Convert the linear index to subscripts for 4D material ID array - int i_ID = (idx % ($NX_ID * $NY_ID * $NZ_ID)) / ($NY_ID * $NZ_ID); - int j_ID = ((idx % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) / $NZ_ID; - int k_ID = ((idx % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) % $NZ_ID; - - // Convert the linear index to subscripts for 4D dispersive array - int i_T = (idx % ($NX_T * $NY_T * $NZ_T)) / ($NY_T * $NZ_T); - int j_T = ((idx % ($NX_T * $NY_T * $NZ_T)) % ($NY_T * $NZ_T)) / $NZ_T; - int k_T = ((idx % ($NX_T * $NY_T * $NZ_T)) % ($NY_T * $NZ_T)) % $NZ_T; - - // Ex component - if ((NY != 1 || NZ != 1) && i >= 0 && i < NX && j > 0 && j < NY && k > 0 && k < NZ) { - int materialEx = ID[INDEX4D_ID(0,i_ID,j_ID,k_ID)]; - $REAL phi = 0; - for (int pole = 0; pole < MAXPOLES; pole++) { - phi = phi + updatecoeffsdispersive[INDEX2D_MATDISP(materialEx,pole*3)]$REALFUNC * Tx[INDEX4D_T(pole,i_T,j_T,k_T)]$REALFUNC; - Tx[INDEX4D_T(pole,i_T,j_T,k_T)] = updatecoeffsdispersive[INDEX2D_MATDISP(materialEx,1+(pole*3))] * Tx[INDEX4D_T(pole,i_T,j_T,k_T)] + updatecoeffsdispersive[INDEX2D_MATDISP(materialEx,2+(pole*3))] * Ex[INDEX3D_FIELDS(i,j,k)]; - } - Ex[INDEX3D_FIELDS(i,j,k)] = updatecoeffsE[INDEX2D_MAT(materialEx,0)] * Ex[INDEX3D_FIELDS(i,j,k)] + updatecoeffsE[INDEX2D_MAT(materialEx,2)] * (Hz[INDEX3D_FIELDS(i,j,k)] - Hz[INDEX3D_FIELDS(i,j-1,k)]) - updatecoeffsE[INDEX2D_MAT(materialEx,3)] * (Hy[INDEX3D_FIELDS(i,j,k)] - Hy[INDEX3D_FIELDS(i,j,k-1)]) - updatecoeffsE[INDEX2D_MAT(materialEx,4)] * phi; - } - - // Ey component - if ((NX != 1 || NZ != 1) && i > 0 && i < NX && j >= 0 && j < NY && k > 0 && k < NZ) { - int materialEy = ID[INDEX4D_ID(1,i_ID,j_ID,k_ID)]; - $REAL phi = 0; - for (int pole = 0; pole < MAXPOLES; pole++) { - phi = phi + updatecoeffsdispersive[INDEX2D_MATDISP(materialEy,pole*3)]$REALFUNC * Ty[INDEX4D_T(pole,i_T,j_T,k_T)]$REALFUNC; - Ty[INDEX4D_T(pole,i_T,j_T,k_T)] = updatecoeffsdispersive[INDEX2D_MATDISP(materialEy,1+(pole*3))] * Ty[INDEX4D_T(pole,i_T,j_T,k_T)] + updatecoeffsdispersive[INDEX2D_MATDISP(materialEy,2+(pole*3))] * Ey[INDEX3D_FIELDS(i,j,k)]; - } - Ey[INDEX3D_FIELDS(i,j,k)] = updatecoeffsE[INDEX2D_MAT(materialEy,0)] * Ey[INDEX3D_FIELDS(i,j,k)] + updatecoeffsE[INDEX2D_MAT(materialEy,3)] * (Hx[INDEX3D_FIELDS(i,j,k)] - Hx[INDEX3D_FIELDS(i,j,k-1)]) - updatecoeffsE[INDEX2D_MAT(materialEy,1)] * (Hz[INDEX3D_FIELDS(i,j,k)] - Hz[INDEX3D_FIELDS(i-1,j,k)]) - updatecoeffsE[INDEX2D_MAT(materialEy,4)] * phi; - } - - // Ez component - if ((NX != 1 || NY != 1) && i > 0 && i < NX && j > 0 && j < NY && k >= 0 && k < NZ) { - int materialEz = ID[INDEX4D_ID(2,i_ID,j_ID,k_ID)]; - $REAL phi = 0; - for (int pole = 0; pole < MAXPOLES; pole++) { - phi = phi + updatecoeffsdispersive[INDEX2D_MATDISP(materialEz,pole*3)]$REALFUNC * Tz[INDEX4D_T(pole,i_T,j_T,k_T)]$REALFUNC; - Tz[INDEX4D_T(pole,i_T,j_T,k_T)] = updatecoeffsdispersive[INDEX2D_MATDISP(materialEz,1+(pole*3))] * Tz[INDEX4D_T(pole,i_T,j_T,k_T)] + updatecoeffsdispersive[INDEX2D_MATDISP(materialEz,2+(pole*3))] * Ez[INDEX3D_FIELDS(i,j,k)]; - } - Ez[INDEX3D_FIELDS(i,j,k)] = updatecoeffsE[INDEX2D_MAT(materialEz,0)] * Ez[INDEX3D_FIELDS(i,j,k)] + updatecoeffsE[INDEX2D_MAT(materialEz,1)] * (Hy[INDEX3D_FIELDS(i,j,k)] - Hy[INDEX3D_FIELDS(i-1,j,k)]) - updatecoeffsE[INDEX2D_MAT(materialEz,2)] * (Hx[INDEX3D_FIELDS(i,j,k)] - Hx[INDEX3D_FIELDS(i,j-1,k)]) - updatecoeffsE[INDEX2D_MAT(materialEz,4)] * phi; - } -} - -__global__ void update_electric_dispersive_B(int NX, int NY, int NZ, int MAXPOLES, const $COMPLEX* __restrict__ updatecoeffsdispersive, $COMPLEX *Tx, $COMPLEX *Ty, $COMPLEX *Tz, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez) { - - // This function is part B which updates the dispersive field arrays when dispersive materials (with multiple poles) are present. - // - // Args: - // NX, NY, NZ: Number of cells of the model domain - // MAXPOLES: Maximum number of dispersive material poles present in model - // updatedispersivecoeffs, T, ID, E, H: Access to update coefficients, dispersive, ID and field component arrays - - // Obtain the linear index corresponding to the current thread - int idx = blockIdx.x * blockDim.x + threadIdx.x; - - // Convert the linear index to subscripts for 3D field arrays - int i = idx / ($NY_FIELDS * $NZ_FIELDS); - int j = (idx % ($NY_FIELDS * $NZ_FIELDS)) / $NZ_FIELDS; - int k = (idx % ($NY_FIELDS * $NZ_FIELDS)) % $NZ_FIELDS; - - // Convert the linear index to subscripts for 4D material ID array - int i_ID = (idx % ($NX_ID * $NY_ID * $NZ_ID)) / ($NY_ID * $NZ_ID); - int j_ID = ((idx % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) / $NZ_ID; - int k_ID = ((idx % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) % $NZ_ID; - - // Convert the linear index to subscripts for 4D dispersive array - int i_T = (idx % ($NX_T * $NY_T * $NZ_T)) / ($NY_T * $NZ_T); - int j_T = ((idx % ($NX_T * $NY_T * $NZ_T)) % ($NY_T * $NZ_T)) / $NZ_T; - int k_T = ((idx % ($NX_T * $NY_T * $NZ_T)) % ($NY_T * $NZ_T)) % $NZ_T; - - // Ex component - if ((NY != 1 || NZ != 1) && i >= 0 && i < NX && j > 0 && j < NY && k > 0 && k < NZ) { - int materialEx = ID[INDEX4D_ID(0,i_ID,j_ID,k_ID)]; - for (int pole = 0; pole < MAXPOLES; pole++) { - Tx[INDEX4D_T(pole,i_T,j_T,k_T)] = Tx[INDEX4D_T(pole,i_T,j_T,k_T)] - updatecoeffsdispersive[INDEX2D_MATDISP(materialEx,2+(pole*3))] * Ex[INDEX3D_FIELDS(i,j,k)]; - } - } - - // Ey component - if ((NX != 1 || NZ != 1) && i > 0 && i < NX && j >= 0 && j < NY && k > 0 && k < NZ) { - int materialEy = ID[INDEX4D_ID(1,i_ID,j_ID,k_ID)]; - for (int pole = 0; pole < MAXPOLES; pole++) { - Ty[INDEX4D_T(pole,i_T,j_T,k_T)] = Ty[INDEX4D_T(pole,i_T,j_T,k_T)] - updatecoeffsdispersive[INDEX2D_MATDISP(materialEy,2+(pole*3))] * Ey[INDEX3D_FIELDS(i,j,k)]; - } - } - - // Ez component - if ((NX != 1 || NY != 1) && i > 0 && i < NX && j > 0 && j < NY && k >= 0 && k < NZ) { - int materialEz = ID[INDEX4D_ID(2,i_ID,j_ID,k_ID)]; - for (int pole = 0; pole < MAXPOLES; pole++) { - Tz[INDEX4D_T(pole,i_T,j_T,k_T)] = Tz[INDEX4D_T(pole,i_T,j_T,k_T)] - updatecoeffsdispersive[INDEX2D_MATDISP(materialEz,2+(pole*3))] * Ez[INDEX3D_FIELDS(i,j,k)]; - } - } -} - -""") diff --git a/gprMax/cuda/snapshots.py b/gprMax/cuda/snapshots.py index e7fab069..6074ec40 100644 --- a/gprMax/cuda/snapshots.py +++ b/gprMax/cuda/snapshots.py @@ -18,7 +18,7 @@ from string import Template -kernel_template_store_snapshot = Template(""" +knl_template_store_snapshot = Template(""" // Macros for converting subscripts to linear index: #define INDEX3D_FIELDS(i, j, k) (i)*($NY_FIELDS)*($NZ_FIELDS)+(j)*($NZ_FIELDS)+(k) diff --git a/gprMax/cuda/source_updates.py b/gprMax/cuda/source_updates.py index e7a5ed70..751843a3 100644 --- a/gprMax/cuda/source_updates.py +++ b/gprMax/cuda/source_updates.py @@ -18,7 +18,7 @@ from string import Template -kernel_template_sources = Template(""" +knl_template_sources = Template(""" // Macros for converting subscripts to linear index: #define INDEX2D_MAT(m, n) (m)*($NY_MATCOEFFS)+(n) diff --git a/gprMax/cuda_opencl/fields_updates_base.tmpl b/gprMax/cuda_opencl/fields_updates_base.tmpl new file mode 100644 index 00000000..5e066b66 --- /dev/null +++ b/gprMax/cuda_opencl/fields_updates_base.tmpl @@ -0,0 +1,276 @@ +// Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom +// Authors: Craig Warren, Antonis Giannopoulos, and John Hartley +// +// This file is part of gprMax. +// +// gprMax is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// gprMax is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with gprMax. If not, see . + + +{% block complex_header %}{% endblock complex_header %} + +// Macros for converting subscripts to linear index: +#define IDX2D_MAT(m, n) (m)*({{NY_MATCOEFFS}})+(n) +#define IDX2D_MATDISP(m, n) (m)*({{NY_MATDISPCOEFFS}})+(n) +#define IDX3D_FIELDS(i, j, k) (i)*({{NY_FIELDS}})*({{NZ_FIELDS}})+(j)*({{NZ_FIELDS}})+(k) +#define IDX4D_ID(p, i, j, k) (p)*({{NX_ID}})*({{NY_ID}})*({{NZ_ID}})+(i)*({{NY_ID}})*({{NZ_ID}})+(j)*({{NZ_ID}})+(k) +#define IDX4D_T(p, i, j, k) (p)*({{NX_T}})*({{NY_T}})*({{NZ_T}})+(i)*({{NY_T}})*({{NZ_T}})+(j)*({{NZ_T}})+(k) + +// Material coefficients (read-only) stored in constant memory of compute device +{% block constmem %}{% endblock constmem %} + +/////////////////////////////////////////////// +// Electric field updates - normal materials // +/////////////////////////////////////////////// + +{{KERNEL}} void update_electric(int NX, + int NY, + int NZ,{% filter indent(width=30) %}{% block electric_args %}{% endblock electric_args %}{% endfilter %}{ + + // This function updates electric field values. + // + // Args: + // NX, NY, NZ: Number of cells of the model domain + // ID, E, H: Access to ID and field component arrays + + // Obtain the linear index corresponding to the current thread + {% block threadidx %}{% endblock threadidx %} + + // Convert the linear index to subscripts for 3D field arrays + int i = idx / ({{NY_FIELDS}} * {{NZ_FIELDS}}); + int j = (idx % ({{NY_FIELDS}} * {{NZ_FIELDS}})) / {{NZ_FIELDS}}; + int k = (idx % ({{NY_FIELDS}} * {{NZ_FIELDS}})) % {{NZ_FIELDS}}; + + // Convert the linear index to subscripts for 4D material ID array + int i_ID = (idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) / ({{NY_ID}} * {{NZ_ID}}); + int j_ID = ((idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) / {{NZ_ID}}; + int k_ID = ((idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) % {{NZ_ID}}; + + // Ex component + if ((NY != 1 || NZ != 1) && i >= 0 && i < NX && j > 0 && j < NY && k > 0 && k < NZ) { + int materialEx = ID[IDX4D_ID(0,i_ID,j_ID,k_ID)]; + Ex[IDX3D_FIELDS(i,j,k)] = updatecoeffsE[IDX2D_MAT(materialEx,0)] * Ex[IDX3D_FIELDS(i,j,k)] + + updatecoeffsE[IDX2D_MAT(materialEx,2)] * (Hz[IDX3D_FIELDS(i,j,k)] - Hz[IDX3D_FIELDS(i,j-1,k)]) - + updatecoeffsE[IDX2D_MAT(materialEx,3)] * (Hy[IDX3D_FIELDS(i,j,k)] - Hy[IDX3D_FIELDS(i,j,k-1)]); + } + + // Ey component + if ((NX != 1 || NZ != 1) && i > 0 && i < NX && j >= 0 && j < NY && k > 0 && k < NZ) { + int materialEy = ID[IDX4D_ID(1,i_ID,j_ID,k_ID)]; + Ey[IDX3D_FIELDS(i,j,k)] = updatecoeffsE[IDX2D_MAT(materialEy,0)] * Ey[IDX3D_FIELDS(i,j,k)] + + updatecoeffsE[IDX2D_MAT(materialEy,3)] * (Hx[IDX3D_FIELDS(i,j,k)] - Hx[IDX3D_FIELDS(i,j,k-1)]) - + updatecoeffsE[IDX2D_MAT(materialEy,1)] * (Hz[IDX3D_FIELDS(i,j,k)] - Hz[IDX3D_FIELDS(i-1,j,k)]); + } + + // Ez component + if ((NX != 1 || NY != 1) && i > 0 && i < NX && j > 0 && j < NY && k >= 0 && k < NZ) { + int materialEz = ID[IDX4D_ID(2,i_ID,j_ID,k_ID)]; + Ez[IDX3D_FIELDS(i,j,k)] = updatecoeffsE[IDX2D_MAT(materialEz,0)] * Ez[IDX3D_FIELDS(i,j,k)] + + updatecoeffsE[IDX2D_MAT(materialEz,1)] * (Hy[IDX3D_FIELDS(i,j,k)] - Hy[IDX3D_FIELDS(i-1,j,k)]) - + updatecoeffsE[IDX2D_MAT(materialEz,2)] * (Hx[IDX3D_FIELDS(i,j,k)] - Hx[IDX3D_FIELDS(i,j-1,k)]); + } +} + + +//////////////////////////// +// Magnetic field updates // +//////////////////////////// + +{{KERNEL}} void update_magnetic(int NX, + int NY, + int NZ,{% filter indent(width=30) %}{% block magnetic_args %}{% endblock magnetic_args %}{% endfilter %}{ + + // This function updates magnetic field values. + // + // Args: + // NX, NY, NZ: Number of cells of the model domain + // ID, E, H: Access to ID and field component arrays + + // Obtain the linear index corresponding to the current thread + {{self.threadidx()}} + + // Convert the linear index to subscripts for 3D field arrays + int i = idx / ({{NY_FIELDS}} * {{NZ_FIELDS}}); + int j = (idx % ({{NY_FIELDS}} * {{NZ_FIELDS}})) / {{NZ_FIELDS}}; + int k = (idx % ({{NY_FIELDS}} * {{NZ_FIELDS}})) % {{NZ_FIELDS}}; + + // Convert the linear index to subscripts for 4D material ID array + int i_ID = (idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) / ({{NY_ID}} * {{NZ_ID}}); + int j_ID = ((idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) / {{NZ_ID}}; + int k_ID = ((idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) % {{NZ_ID}}; + + // Hx component + if (NX != 1 && i > 0 && i < NX && j >= 0 && j < NY && k >= 0 && k < NZ) { + int materialHx = ID[IDX4D_ID(3,i_ID,j_ID,k_ID)]; + Hx[IDX3D_FIELDS(i,j,k)] = updatecoeffsH[IDX2D_MAT(materialHx,0)] * Hx[IDX3D_FIELDS(i,j,k)] - + updatecoeffsH[IDX2D_MAT(materialHx,2)] * (Ez[IDX3D_FIELDS(i,j+1,k)] - Ez[IDX3D_FIELDS(i,j,k)]) + + updatecoeffsH[IDX2D_MAT(materialHx,3)] * (Ey[IDX3D_FIELDS(i,j,k+1)] - Ey[IDX3D_FIELDS(i,j,k)]); + } + + // Hy component + if (NY != 1 && i >= 0 && i < NX && j > 0 && j < NY && k >= 0 && k < NZ) { + int materialHy = ID[IDX4D_ID(4,i_ID,j_ID,k_ID)]; + Hy[IDX3D_FIELDS(i,j,k)] = updatecoeffsH[IDX2D_MAT(materialHy,0)] * Hy[IDX3D_FIELDS(i,j,k)] - + updatecoeffsH[IDX2D_MAT(materialHy,3)] * (Ex[IDX3D_FIELDS(i,j,k+1)] - Ex[IDX3D_FIELDS(i,j,k)]) + + updatecoeffsH[IDX2D_MAT(materialHy,1)] * (Ez[IDX3D_FIELDS(i+1,j,k)] - Ez[IDX3D_FIELDS(i,j,k)]); + } + + // Hz component + if (NZ != 1 && i >= 0 && i < NX && j >= 0 && j < NY && k > 0 && k < NZ) { + int materialHz = ID[IDX4D_ID(5,i_ID,j_ID,k_ID)]; + Hz[IDX3D_FIELDS(i,j,k)] = updatecoeffsH[IDX2D_MAT(materialHz,0)] * Hz[IDX3D_FIELDS(i,j,k)] - + updatecoeffsH[IDX2D_MAT(materialHz,1)] * (Ey[IDX3D_FIELDS(i+1,j,k)] - Ey[IDX3D_FIELDS(i,j,k)]) + + updatecoeffsH[IDX2D_MAT(materialHz,2)] * (Ex[IDX3D_FIELDS(i,j+1,k)] - Ex[IDX3D_FIELDS(i,j,k)]); + } +} + + +/////////////////////////////////////////////////// +// Electric field updates - dispersive materials // +/////////////////////////////////////////////////// + +{{KERNEL}} void update_electric_dispersive_A(int NX, + int NY, + int NZ, + int MAXPOLES,{% filter indent(width=43) %}{% block electric_dispersive_A_args %}{% endblock electric_dispersive_A_args %}{% endfilter %}{ + + // This function is part A of updates to electric field values when + // dispersive materials (with multiple poles) are present. + // + // Args: + // NX, NY, NZ: Number of cells of the model domain + // MAXPOLES: Maximum number of dispersive material poles present in model + // updatedispersivecoeffs, T, ID, E, H: Access to update coefficients, dispersive, ID and field component arrays + + // Obtain the linear index corresponding to the current thread + {{self.threadidx()}} + + // Convert the linear index to subscripts for 3D field arrays + int i = idx / ({{NY_FIELDS}} * {{NZ_FIELDS}}); + int j = (idx % ({{NY_FIELDS}} * {{NZ_FIELDS}})) / {{NZ_FIELDS}}; + int k = (idx % ({{NY_FIELDS}} * {{NZ_FIELDS}})) % {{NZ_FIELDS}}; + + // Convert the linear index to subscripts for 4D material ID array + int i_ID = (idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) / ({{NY_ID}} * {{NZ_ID}}); + int j_ID = ((idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) / {{NZ_ID}}; + int k_ID = ((idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) % {{NZ_ID}}; + + // Convert the linear index to subscripts for 4D dispersive array + int i_T = (idx % ({{NX_T}} * {{NY_T}} * {{NZ_T}})) / ({{NY_T}} * {{NZ_T}}); + int j_T = ((idx % ({{NX_T}} * {{NY_T}} * {{NZ_T}})) % ({{NY_T}} * {{NZ_T}})) / {{NZ_T}}; + int k_T = ((idx % ({{NX_T}} * {{NY_T}} * {{NZ_T}})) % ({{NY_T}} * {{NZ_T}})) % {{NZ_T}}; + + // Ex component + if ((NY != 1 || NZ != 1) && i >= 0 && i < NX && j > 0 && j < NY && k > 0 && k < NZ) { + int materialEx = ID[IDX4D_ID(0,i_ID,j_ID,k_ID)]; + {{REAL}} phi = 0; + for (int pole = 0; pole < MAXPOLES; pole++) { + phi = phi + updatecoeffsdispersive[IDX2D_MATDISP(materialEx,pole*3)]{{REALFUNC}} * Tx[IDX4D_T(pole,i_T,j_T,k_T)]{{REALFUNC}}; + Tx[IDX4D_T(pole,i_T,j_T,k_T)] = updatecoeffsdispersive[IDX2D_MATDISP(materialEx,1+(pole*3))] * Tx[IDX4D_T(pole,i_T,j_T,k_T)] + + updatecoeffsdispersive[IDX2D_MATDISP(materialEx,2+(pole*3))] * Ex[IDX3D_FIELDS(i,j,k)]; + } + Ex[IDX3D_FIELDS(i,j,k)] = updatecoeffsE[IDX2D_MAT(materialEx,0)] * Ex[IDX3D_FIELDS(i,j,k)] + + updatecoeffsE[IDX2D_MAT(materialEx,2)] * (Hz[IDX3D_FIELDS(i,j,k)] - Hz[IDX3D_FIELDS(i,j-1,k)]) - + updatecoeffsE[IDX2D_MAT(materialEx,3)] * (Hy[IDX3D_FIELDS(i,j,k)] - Hy[IDX3D_FIELDS(i,j,k-1)]) - + updatecoeffsE[IDX2D_MAT(materialEx,4)] * phi; + } + + // Ey component + if ((NX != 1 || NZ != 1) && i > 0 && i < NX && j >= 0 && j < NY && k > 0 && k < NZ) { + int materialEy = ID[IDX4D_ID(1,i_ID,j_ID,k_ID)]; + {{REAL}} phi = 0; + for (int pole = 0; pole < MAXPOLES; pole++) { + phi = phi + updatecoeffsdispersive[IDX2D_MATDISP(materialEy,pole*3)]{{REALFUNC}} * Ty[IDX4D_T(pole,i_T,j_T,k_T)]{{REALFUNC}}; + Ty[IDX4D_T(pole,i_T,j_T,k_T)] = updatecoeffsdispersive[IDX2D_MATDISP(materialEy,1+(pole*3))] * Ty[IDX4D_T(pole,i_T,j_T,k_T)] + + updatecoeffsdispersive[IDX2D_MATDISP(materialEy,2+(pole*3))] * Ey[IDX3D_FIELDS(i,j,k)]; + } + Ey[IDX3D_FIELDS(i,j,k)] = updatecoeffsE[IDX2D_MAT(materialEy,0)] * Ey[IDX3D_FIELDS(i,j,k)] + + updatecoeffsE[IDX2D_MAT(materialEy,3)] * (Hx[IDX3D_FIELDS(i,j,k)] - Hx[IDX3D_FIELDS(i,j,k-1)]) - + updatecoeffsE[IDX2D_MAT(materialEy,1)] * (Hz[IDX3D_FIELDS(i,j,k)] - Hz[IDX3D_FIELDS(i-1,j,k)]) - + updatecoeffsE[IDX2D_MAT(materialEy,4)] * phi; + } + + // Ez component + if ((NX != 1 || NY != 1) && i > 0 && i < NX && j > 0 && j < NY && k >= 0 && k < NZ) { + int materialEz = ID[IDX4D_ID(2,i_ID,j_ID,k_ID)]; + {{REAL}} phi = 0; + for (int pole = 0; pole < MAXPOLES; pole++) { + phi = phi + updatecoeffsdispersive[IDX2D_MATDISP(materialEz,pole*3)]{{REALFUNC}} * Tz[IDX4D_T(pole,i_T,j_T,k_T)]{{REALFUNC}}; + Tz[IDX4D_T(pole,i_T,j_T,k_T)] = updatecoeffsdispersive[IDX2D_MATDISP(materialEz,1+(pole*3))] * Tz[IDX4D_T(pole,i_T,j_T,k_T)] + + updatecoeffsdispersive[IDX2D_MATDISP(materialEz,2+(pole*3))] * Ez[IDX3D_FIELDS(i,j,k)]; + } + Ez[IDX3D_FIELDS(i,j,k)] = updatecoeffsE[IDX2D_MAT(materialEz,0)] * Ez[IDX3D_FIELDS(i,j,k)] + + updatecoeffsE[IDX2D_MAT(materialEz,1)] * (Hy[IDX3D_FIELDS(i,j,k)] - Hy[IDX3D_FIELDS(i-1,j,k)]) - + updatecoeffsE[IDX2D_MAT(materialEz,2)] * (Hx[IDX3D_FIELDS(i,j,k)] - Hx[IDX3D_FIELDS(i,j-1,k)]) - + updatecoeffsE[IDX2D_MAT(materialEz,4)] * phi; + } +} + +{{KERNEL}} void update_electric_dispersive_B(int NX, + int NY, + int NZ, + int MAXPOLES,{% filter indent(width=43) %}{% block electric_dispersive_B_args %}{% endblock electric_dispersive_B_args %}{% endfilter %}{ + + // This function is part B which updates the dispersive field arrays when + // dispersive materials (with multiple poles) are present. + // + // Args: + // NX, NY, NZ: Number of cells of the model domain + // MAXPOLES: Maximum number of dispersive material poles present in model + // updatedispersivecoeffs, T, ID, E, H: Access to update coefficients, dispersive, ID and field component arrays + + // Obtain the linear index corresponding to the current thread + {{self.threadidx()}} + + // Convert the linear index to subscripts for 3D field arrays + int i = idx / ({{NY_FIELDS}} * {{NZ_FIELDS}}); + int j = (idx % ({{NY_FIELDS}} * {{NZ_FIELDS}})) / {{NZ_FIELDS}}; + int k = (idx % ({{NY_FIELDS}} * {{NZ_FIELDS}})) % {{NZ_FIELDS}}; + + // Convert the linear index to subscripts for 4D material ID array + int i_ID = (idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) / ({{NY_ID}} * {{NZ_ID}}); + int j_ID = ((idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) / {{NZ_ID}}; + int k_ID = ((idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) % {{NZ_ID}}; + + // Convert the linear index to subscripts for 4D dispersive array + int i_T = (idx % ({{NX_T}} * {{NY_T}} * {{NZ_T}})) / ({{NY_T}} * {{NZ_T}}); + int j_T = ((idx % ({{NX_T}} * {{NY_T}} * {{NZ_T}})) % ({{NY_T}} * {{NZ_T}})) / {{NZ_T}}; + int k_T = ((idx % ({{NX_T}} * {{NY_T}} * {{NZ_T}})) % ({{NY_T}} * {{NZ_T}})) % {{NZ_T}}; + + // Ex component + if ((NY != 1 || NZ != 1) && i >= 0 && i < NX && j > 0 && j < NY && k > 0 && k < NZ) { + int materialEx = ID[IDX4D_ID(0,i_ID,j_ID,k_ID)]; + for (int pole = 0; pole < MAXPOLES; pole++) { + Tx[IDX4D_T(pole,i_T,j_T,k_T)] = Tx[IDX4D_T(pole,i_T,j_T,k_T)] - + updatecoeffsdispersive[IDX2D_MATDISP(materialEx,2+(pole*3))] * Ex[IDX3D_FIELDS(i,j,k)]; + } + } + + // Ey component + if ((NX != 1 || NZ != 1) && i > 0 && i < NX && j >= 0 && j < NY && k > 0 && k < NZ) { + int materialEy = ID[IDX4D_ID(1,i_ID,j_ID,k_ID)]; + for (int pole = 0; pole < MAXPOLES; pole++) { + Ty[IDX4D_T(pole,i_T,j_T,k_T)] = Ty[IDX4D_T(pole,i_T,j_T,k_T)] - + updatecoeffsdispersive[IDX2D_MATDISP(materialEy,2+(pole*3))] * Ey[IDX3D_FIELDS(i,j,k)]; + } + } + + // Ez component + if ((NX != 1 || NY != 1) && i > 0 && i < NX && j > 0 && j < NY && k >= 0 && k < NZ) { + int materialEz = ID[IDX4D_ID(2,i_ID,j_ID,k_ID)]; + for (int pole = 0; pole < MAXPOLES; pole++) { + Tz[IDX4D_T(pole,i_T,j_T,k_T)] = Tz[IDX4D_T(pole,i_T,j_T,k_T)] - + updatecoeffsdispersive[IDX2D_MATDISP(materialEz,2+(pole*3))] * Ez[IDX3D_FIELDS(i,j,k)]; + } + } +} \ No newline at end of file diff --git a/gprMax/cuda_opencl/fields_updates_cuda.tmpl b/gprMax/cuda_opencl/fields_updates_cuda.tmpl new file mode 100644 index 00000000..fc0baf67 --- /dev/null +++ b/gprMax/cuda_opencl/fields_updates_cuda.tmpl @@ -0,0 +1,65 @@ +{% extends "fields_updates_base.tmpl" %} + +{% block complex_header %} +#include +{% endblock complex_header %} + + +{% block constmem %} +__device__ __constant__ {{REAL}} updatecoeffsE[{{N_updatecoeffsE}}]; +__device__ __constant__ {{REAL}} updatecoeffsH[{{N_updatecoeffsH}}]; +{% endblock constmem %} + + +{% block threadidx %} + int idx = blockIdx.x * blockDim.x + threadIdx.x; +{% endblock threadidx %} + + +{% block electric_args %} +const unsigned int* __restrict__ ID, +{{REAL}} *Ex, +{{REAL}} *Ey, +{{REAL}} *Ez, +const {{REAL}}* __restrict__ Hx, +const {{REAL}}* __restrict__ Hy, +const {{REAL}}* __restrict__ Hz) +{% endblock electric_args %} + + +{% block magnetic_args %} +const unsigned int* __restrict__ ID, +{{REAL}} *Hx, +{{REAL}} *Hy, +{{REAL}} *Hz, +const {{REAL}}* __restrict__ Ex, +const {{REAL}}* __restrict__ Ey, +const {{REAL}}* __restrict__ Ez) +{% endblock magnetic_args %} + + +{% block electric_dispersive_A_args %} +const {{COMPLEX}}* __restrict__ updatecoeffsdispersive, +{{COMPLEX}} *Tx, +{{COMPLEX}} *Ty, +{{COMPLEX}} *Tz, +const unsigned int* __restrict__ ID, +{{REAL}} *Ex, +{{REAL}} *Ey, +{{REAL}} *Ez, +const {{REAL}}* __restrict__ Hx, +const {{REAL}}* __restrict__ Hy, +const {{REAL}}* __restrict__ Hz) +{% endblock electric_dispersive_A_args %} + + +{% block electric_dispersive_B_args %} +const {{COMPLEX}}* __restrict__ updatecoeffsdispersive, +{{COMPLEX}} *Tx, +{{COMPLEX}} *Ty, +{{COMPLEX}} *Tz, +const unsigned int* __restrict__ ID, +const {{REAL}}* __restrict__ Ex, +const {{REAL}}* __restrict__ Ey, +const {{REAL}}* __restrict__ Ez) +{% endblock electric_dispersive_B_args %} \ No newline at end of file diff --git a/gprMax/cuda_opencl/fields_updates_opencl.tmpl b/gprMax/cuda_opencl/fields_updates_opencl.tmpl new file mode 100644 index 00000000..6c499de0 --- /dev/null +++ b/gprMax/cuda_opencl/fields_updates_opencl.tmpl @@ -0,0 +1,77 @@ +{% extends "fields_updates_base.tmpl" %} + +{% block complex_header %} +#include +{% endblock complex_header %} + + +{% block constmem %} +__constant {{REAL}} updatecoeffsE[{{N_updatecoeffsE}}] = +{ + {% for i in updatecoeffsE %} + {{i}}, + {% endfor %} +}; + +__constant {{REAL}} updatecoeffsH[{{N_updatecoeffsH}}] = +{ + {% for i in updatecoeffsH %} + {{i}}, + {% endfor %} +}; +{% endblock constmem %} + + +{% block threadidx %} + int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + + get_global_id(1) * get_global_size(0) + get_global_id(0); +{% endblock threadidx %} + + +{% block electric_args %} +__global const unsigned int* restrict ID, +__global {{REAL}} *Ex, +__global {{REAL}} *Ey, +__global {{REAL}} *Ez, +__global const {{REAL}} * restrict Hx, +__global const {{REAL}} * restrict Hy, +__global const {{REAL}} * restrict Hz) +{% endblock electric_args %} + + +{% block magnetic_args %} +__global const unsigned int* restrict ID, +__global {{REAL}} *Hx, +__global {{REAL}} *Hy, +__global {{REAL}} *Hz, +__global const {{REAL}}* restrict Ex, +__global const {{REAL}}* restrict Ey, +__global const {{REAL}}* restrict Ez) +{% endblock magnetic_args %} + + +{% block electric_dispersive_A_args %} +__global const {{COMPLEX}}* restrict updatecoeffsdispersive, +__global {{COMPLEX}} *Tx, +__global {{COMPLEX}} *Ty, +__global {{COMPLEX}} *Tz, +__global const unsigned int* restrict ID, +__global {{REAL}} *Ex, +__global {{REAL}} *Ey, +__global {{REAL}} *Ez, +__global const {{REAL}}* restrict Hx, +__global const {{REAL}}* restrict Hy, +__global const {{REAL}}* restrict Hz) +{% endblock electric_dispersive_A_args %} + + +{% block electric_dispersive_B_args %} +__global const {{COMPLEX}}* restrict updatecoeffsdispersive, +__global {{COMPLEX}} *Tx, +__global {{COMPLEX}} *Ty, +__global {{COMPLEX}} *Tz, +__global const unsigned int* restrict ID, +__global const {{REAL}}* restrict Ex, +__global const {{REAL}}* restrict Ey, +__global const {{REAL}}* restrict Ez) +{% endblock electric_dispersive_B_args %} \ No newline at end of file diff --git a/gprMax/cuda_opencl/pml_updates_electric_HORIPML_base.tmpl b/gprMax/cuda_opencl/pml_updates_electric_HORIPML_base.tmpl new file mode 100644 index 00000000..cb40b9a9 --- /dev/null +++ b/gprMax/cuda_opencl/pml_updates_electric_HORIPML_base.tmpl @@ -0,0 +1,1180 @@ +// Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom +// Authors: Craig Warren, Antonis Giannopoulos, and John Hartley +// +// This file is part of gprMax. +// +// gprMax is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// gprMax is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with gprMax. If not, see . + + +// Macros for converting subscripts to linear index: +#define IDX2D_R(m, n) (m)*(NY_R)+(n) +#define IDX2D_MAT(m, n) (m)*({{NY_MATCOEFFS}})+(n) +#define IDX3D_FIELDS(i, j, k) (i)*({{NY_FIELDS}})*({{NZ_FIELDS}})+(j)*({{NZ_FIELDS}})+(k) +#define IDX4D_ID(p, i, j, k) (p)*({{NX_ID}})*({{NY_ID}})*({{NZ_ID}})+(i)*({{NY_ID}})*({{NZ_ID}})+(j)*({{NZ_ID}})+(k) +#define IDX4D_PHI1(p, i, j, k) (p)*(NX_PHI1)*(NY_PHI1)*(NZ_PHI1)+(i)*(NY_PHI1)*(NZ_PHI1)+(j)*(NZ_PHI1)+(k) +#define IDX4D_PHI2(p, i, j, k) (p)*(NX_PHI2)*(NY_PHI2)*(NZ_PHI2)+(i)*(NY_PHI2)*(NZ_PHI2)+(j)*(NZ_PHI2)+(k) + +// Material coefficients (read-only) stored in constant memory of compute device +{% block constmem %}{% endblock constmem %} + + +{{KERNEL}} void order1_xminus(int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int NX_PHI1, + int NY_PHI1, + int NZ_PHI1, + int NX_PHI2, + int NY_PHI2, + int NZ_PHI2, + int NY_R,{% filter indent(width=30) %}{% block x_args %}{% endblock x_args %}{% endfilter %} + {{REAL}} d) { + + // This function updates the Ey and Ez field components for the xminus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current thread + {% block threadidx %}{% endblock threadidx %} + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + {{REAL}} RA01, RB0, RE0, RF0, dHy, dHz; + {{REAL}} dx = d; + int ii, jj, kk, materialEy, materialEz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = xf - i1; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA01 = RA[IDX2D_R(0,i1)] - 1; + RB0 = RB[IDX2D_R(0,i1)]; + RE0 = RE[IDX2D_R(0,i1)]; + RF0 = RF[IDX2D_R(0,i1)]; + + // Ey + materialEy = ID[IDX4D_ID(1,ii,jj,kk)]; + dHz = (Hz[IDX3D_FIELDS(ii,jj,kk)] - Hz[IDX3D_FIELDS(ii-1,jj,kk)]) / dx; + Ey[IDX3D_FIELDS(ii,jj,kk)] = Ey[IDX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[IDX2D_MAT(materialEy,4)] * + (RA01 * dHz + RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)]); + PHI1[IDX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = xf - i2; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA01 = RA[IDX2D_R(0,i2)] - 1; + RB0 = RB[IDX2D_R(0,i2)]; + RE0 = RE[IDX2D_R(0,i2)]; + RF0 = RF[IDX2D_R(0,i2)]; + + // Ez + materialEz = ID[IDX4D_ID(2,ii,jj,kk)]; + dHy = (Hy[IDX3D_FIELDS(ii,jj,kk)] - Hy[IDX3D_FIELDS(ii-1,jj,kk)]) / dx; + Ez[IDX3D_FIELDS(ii,jj,kk)] = Ez[IDX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[IDX2D_MAT(materialEz,4)] * + (RA01 * dHy + RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)]); + PHI2[IDX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] - RF0 * dHy; + } +} + + +{{KERNEL}} void order2_xminus(int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int NX_PHI1, + int NY_PHI1, + int NZ_PHI1, + int NX_PHI2, + int NY_PHI2, + int NZ_PHI2, + int NY_R,{% filter indent(width=30) %}{{self.x_args()}}{% endfilter %} + {{REAL}} d) { + + // This function updates the Ey and Ez field components for the xminus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current thread + {{self.threadidx()}} + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + {{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dHy, dHz; + {{REAL}} dx = d; + int ii, jj, kk, materialEy, materialEz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = xf - i1; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA0 = RA[IDX2D_R(0,i1)]; + RB0 = RB[IDX2D_R(0,i1)]; + RE0 = RE[IDX2D_R(0,i1)]; + RF0 = RF[IDX2D_R(0,i1)]; + RA1 = RA[IDX2D_R(1,i1)]; + RB1 = RB[IDX2D_R(1,i1)]; + RE1 = RE[IDX2D_R(1,i1)]; + RF1 = RF[IDX2D_R(1,i1)]; + RA01 = RA[IDX2D_R(0,i1)] * RA[IDX2D_R(1,i1)] - 1; + + // Ey + materialEy = ID[IDX4D_ID(1,ii,jj,kk)]; + dHz = (Hz[IDX3D_FIELDS(ii,jj,kk)] - Hz[IDX3D_FIELDS(ii-1,jj,kk)]) / dx; + Ey[IDX3D_FIELDS(ii,jj,kk)] = Ey[IDX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[IDX2D_MAT(materialEy,4)] * + (RA01 * dHz + RA1 * RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] + + RB1 * PHI1[IDX4D_PHI1(1,i1,j1,k1)]); + PHI1[IDX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[IDX4D_PHI1(1,i1,j1,k1)] - RF1 * + (RA0 * dHz + RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)]); + PHI1[IDX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = xf - i2; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA0 = RA[IDX2D_R(0,i2)]; + RB0 = RB[IDX2D_R(0,i2)]; + RE0 = RE[IDX2D_R(0,i2)]; + RF0 = RF[IDX2D_R(0,i2)]; + RA1 = RA[IDX2D_R(1,i2)]; + RB1 = RB[IDX2D_R(1,i2)]; + RE1 = RE[IDX2D_R(1,i2)]; + RF1 = RF[IDX2D_R(1,i2)]; + RA01 = RA[IDX2D_R(0,i2)] * RA[IDX2D_R(1,i2)] - 1; + + // Ez + materialEz = ID[IDX4D_ID(2,ii,jj,kk)]; + dHy = (Hy[IDX3D_FIELDS(ii,jj,kk)] - Hy[IDX3D_FIELDS(ii-1,jj,kk)]) / dx; + Ez[IDX3D_FIELDS(ii,jj,kk)] = Ez[IDX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[IDX2D_MAT(materialEz,4)] * + (RA01 * dHy + RA1 * RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] + + RB1 * PHI2[IDX4D_PHI2(1,i2,j2,k2)]); + PHI2[IDX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[IDX4D_PHI2(1,i2,j2,k2)] - RF1 * + (RA0 * dHy + RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)]); + PHI2[IDX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] - RF0 * dHy; + } +} + + +{{KERNEL}} void order1_xplus(int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int NX_PHI1, + int NY_PHI1, + int NZ_PHI1, + int NX_PHI2, + int NY_PHI2, + int NZ_PHI2, + int NY_R,{% filter indent(width=30) %}{{self.x_args()}}{% endfilter %} + {{REAL}} d) { + + // This function updates the Ey and Ez field components for the xplus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current thread + {{self.threadidx()}} + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + {{REAL}} RA01, RB0, RE0, RF0, dHy, dHz; + {{REAL}} dx = d; + int ii, jj, kk, materialEy, materialEz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA01 = RA[IDX2D_R(0,i1)] - 1; + RB0 = RB[IDX2D_R(0,i1)]; + RE0 = RE[IDX2D_R(0,i1)]; + RF0 = RF[IDX2D_R(0,i1)]; + + // Ey + materialEy = ID[IDX4D_ID(1,ii,jj,kk)]; + dHz = (Hz[IDX3D_FIELDS(ii,jj,kk)] - Hz[IDX3D_FIELDS(ii-1,jj,kk)]) / dx; + Ey[IDX3D_FIELDS(ii,jj,kk)] = Ey[IDX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[IDX2D_MAT(materialEy,4)] * + (RA01 * dHz + RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)]); + PHI1[IDX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA01 = RA[IDX2D_R(0,i2)] - 1; + RB0 = RB[IDX2D_R(0,i2)]; + RE0 = RE[IDX2D_R(0,i2)]; + RF0 = RF[IDX2D_R(0,i2)]; + + // Ez + materialEz = ID[IDX4D_ID(2,ii,jj,kk)]; + dHy = (Hy[IDX3D_FIELDS(ii,jj,kk)] - Hy[IDX3D_FIELDS(ii-1,jj,kk)]) / dx; + Ez[IDX3D_FIELDS(ii,jj,kk)] = Ez[IDX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[IDX2D_MAT(materialEz,4)] * + (RA01 * dHy + RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)]); + PHI2[IDX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] - RF0 * dHy; + } +} + + +{{KERNEL}} void order2_xplus(int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int NX_PHI1, + int NY_PHI1, + int NZ_PHI1, + int NX_PHI2, + int NY_PHI2, + int NZ_PHI2, + int NY_R,{% filter indent(width=30) %}{{self.x_args()}}{% endfilter %} + {{REAL}} d) { + + // This function updates the Ey and Ez field components for the xplus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current thread + {{self.threadidx()}} + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + {{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dHy, dHz; + {{REAL}} dx = d; + int ii, jj, kk, materialEy, materialEz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA0 = RA[IDX2D_R(0,i1)]; + RB0 = RB[IDX2D_R(0,i1)]; + RE0 = RE[IDX2D_R(0,i1)]; + RF0 = RF[IDX2D_R(0,i1)]; + RA1 = RA[IDX2D_R(1,i1)]; + RB1 = RB[IDX2D_R(1,i1)]; + RE1 = RE[IDX2D_R(1,i1)]; + RF1 = RF[IDX2D_R(1,i1)]; + RA01 = RA[IDX2D_R(0,i1)] * RA[IDX2D_R(1,i1)] - 1; + + // Ey + materialEy = ID[IDX4D_ID(1,ii,jj,kk)]; + dHz = (Hz[IDX3D_FIELDS(ii,jj,kk)] - Hz[IDX3D_FIELDS(ii-1,jj,kk)]) / dx; + Ey[IDX3D_FIELDS(ii,jj,kk)] = Ey[IDX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[IDX2D_MAT(materialEy,4)] * + (RA01 * dHz + RA1 * RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[IDX4D_PHI1(1,i1,j1,k1)]); + PHI1[IDX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[IDX4D_PHI1(1,i1,j1,k1)] - RF1 * + (RA0 * dHz + RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)]); + PHI1[IDX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA0 = RA[IDX2D_R(0,i2)]; + RB0 = RB[IDX2D_R(0,i2)]; + RE0 = RE[IDX2D_R(0,i2)]; + RF0 = RF[IDX2D_R(0,i2)]; + RA1 = RA[IDX2D_R(1,i2)]; + RB1 = RB[IDX2D_R(1,i2)]; + RE1 = RE[IDX2D_R(1,i2)]; + RF1 = RF[IDX2D_R(1,i2)]; + RA01 = RA[IDX2D_R(0,i2)] * RA[IDX2D_R(1,i2)] - 1; + + // Ez + materialEz = ID[IDX4D_ID(2,ii,jj,kk)]; + dHy = (Hy[IDX3D_FIELDS(ii,jj,kk)] - Hy[IDX3D_FIELDS(ii-1,jj,kk)]) / dx; + Ez[IDX3D_FIELDS(ii,jj,kk)] = Ez[IDX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[IDX2D_MAT(materialEz,4)] * + (RA01 * dHy + RA1 * RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] + + RB1 * PHI2[IDX4D_PHI2(1,i2,j2,k2)]); + PHI2[IDX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[IDX4D_PHI2(1,i2,j2,k2)] - RF1 * + (RA0 * dHy + RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)]); + PHI2[IDX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] - RF0 * dHy; + } +} + + +{{KERNEL}} void order1_yminus(int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int NX_PHI1, + int NY_PHI1, + int NZ_PHI1, + int NX_PHI2, + int NY_PHI2, + int NZ_PHI2, + int NY_R,{% filter indent(width=30) %}{% block y_args %}{% endblock y_args %}{% endfilter %} + {{REAL}} d) { + + // This function updates the Ex and Ez field components for the yminus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current thread + {{self.threadidx()}} + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + {{REAL}} RA01, RB0, RE0, RF0, dHx, dHz; + {{REAL}} dy = d; + int ii, jj, kk, materialEx, materialEz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = yf - j1; + kk = k1 + zs; + + // PML coefficients + RA01 = RA[IDX2D_R(0,j1)] - 1; + RB0 = RB[IDX2D_R(0,j1)]; + RE0 = RE[IDX2D_R(0,j1)]; + RF0 = RF[IDX2D_R(0,j1)]; + + // Ex + materialEx = ID[IDX4D_ID(0,ii,jj,kk)]; + dHz = (Hz[IDX3D_FIELDS(ii,jj,kk)] - Hz[IDX3D_FIELDS(ii,jj-1,kk)]) / dy; + Ex[IDX3D_FIELDS(ii,jj,kk)] = Ex[IDX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[IDX2D_MAT(materialEx,4)] * + (RA01 * dHz + RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)]); + PHI1[IDX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = yf - j2; + kk = k2 + zs; + + // PML coefficients + RA01 = RA[IDX2D_R(0,j2)] - 1; + RB0 = RB[IDX2D_R(0,j2)]; + RE0 = RE[IDX2D_R(0,j2)]; + RF0 = RF[IDX2D_R(0,j2)]; + + // Ez + materialEz = ID[IDX4D_ID(2,ii,jj,kk)]; + dHx = (Hx[IDX3D_FIELDS(ii,jj,kk)] - Hx[IDX3D_FIELDS(ii,jj-1,kk)]) / dy; + Ez[IDX3D_FIELDS(ii,jj,kk)] = Ez[IDX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[IDX2D_MAT(materialEz,4)] * + (RA01 * dHx + RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)]); + PHI2[IDX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx; + } +} + + +{{KERNEL}} void order2_yminus(int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int NX_PHI1, + int NY_PHI1, + int NZ_PHI1, + int NX_PHI2, + int NY_PHI2, + int NZ_PHI2, + int NY_R,{% filter indent(width=30) %}{{self.y_args()}}{% endfilter %} + {{REAL}} d) { + + // This function updates the Ex and Ez field components for the yminus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current thread + {{self.threadidx()}} + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + {{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dHx, dHz; + {{REAL}} dy = d; + int ii, jj, kk, materialEx, materialEz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = yf - j1; + kk = k1 + zs; + + // PML coefficients + RA0 = RA[IDX2D_R(0,j1)]; + RB0 = RB[IDX2D_R(0,j1)]; + RE0 = RE[IDX2D_R(0,j1)]; + RF0 = RF[IDX2D_R(0,j1)]; + RA1 = RA[IDX2D_R(1,j1)]; + RB1 = RB[IDX2D_R(1,j1)]; + RE1 = RE[IDX2D_R(1,j1)]; + RF1 = RF[IDX2D_R(1,j1)]; + RA01 = RA[IDX2D_R(0,j1)] * RA[IDX2D_R(1,j1)] - 1; + + // Ex + materialEx = ID[IDX4D_ID(0,ii,jj,kk)]; + dHz = (Hz[IDX3D_FIELDS(ii,jj,kk)] - Hz[IDX3D_FIELDS(ii,jj-1,kk)]) / dy; + Ex[IDX3D_FIELDS(ii,jj,kk)] = Ex[IDX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[IDX2D_MAT(materialEx,4)] * + (RA01 * dHz + RA1 * RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] + + RB1 * PHI1[IDX4D_PHI1(1,i1,j1,k1)]); + PHI1[IDX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[IDX4D_PHI1(1,i1,j1,k1)] - RF1 * + (RA0 * dHz + RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)]); + PHI1[IDX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = yf - j2; + kk = k2 + zs; + + // PML coefficients + RA0 = RA[IDX2D_R(0,j2)]; + RB0 = RB[IDX2D_R(0,j2)]; + RE0 = RE[IDX2D_R(0,j2)]; + RF0 = RF[IDX2D_R(0,j2)]; + RA1 = RA[IDX2D_R(1,j2)]; + RB1 = RB[IDX2D_R(1,j2)]; + RE1 = RE[IDX2D_R(1,j2)]; + RF1 = RF[IDX2D_R(1,j2)]; + RA01 = RA[IDX2D_R(0,j2)] * RA[IDX2D_R(1,j2)] - 1; + + // Ez + materialEz = ID[IDX4D_ID(2,ii,jj,kk)]; + dHx = (Hx[IDX3D_FIELDS(ii,jj,kk)] - Hx[IDX3D_FIELDS(ii,jj-1,kk)]) / dy; + Ez[IDX3D_FIELDS(ii,jj,kk)] = Ez[IDX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[IDX2D_MAT(materialEz,4)] * + (RA01 * dHx + RA1 * RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] + + RB1 * PHI2[IDX4D_PHI2(1,i2,j2,k2)]); + PHI2[IDX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[IDX4D_PHI2(1,i2,j2,k2)] - RF1 * + (RA0 * dHx + RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)]); + PHI2[IDX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx; + } +} + + +{{KERNEL}} void order1_yplus(int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int NX_PHI1, + int NY_PHI1, + int NZ_PHI1, + int NX_PHI2, + int NY_PHI2, + int NZ_PHI2, + int NY_R,{% filter indent(width=30) %}{{self.y_args()}}{% endfilter %} + {{REAL}} d) { + + // This function updates the Ex and Ez field components for the yplus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current thread + {{self.threadidx()}} + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + {{REAL}} RA01, RB0, RE0, RF0, dHx, dHz; + {{REAL}} dy = d; + int ii, jj, kk, materialEx, materialEz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA01 = RA[IDX2D_R(0,j1)] - 1; + RB0 = RB[IDX2D_R(0,j1)]; + RE0 = RE[IDX2D_R(0,j1)]; + RF0 = RF[IDX2D_R(0,j1)]; + + // Ex + materialEx = ID[IDX4D_ID(0,ii,jj,kk)]; + dHz = (Hz[IDX3D_FIELDS(ii,jj,kk)] - Hz[IDX3D_FIELDS(ii,jj-1,kk)]) / dy; + Ex[IDX3D_FIELDS(ii,jj,kk)] = Ex[IDX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[IDX2D_MAT(materialEx,4)] * + (RA01 * dHz + RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)]); + PHI1[IDX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA01 = RA[IDX2D_R(0,j2)] - 1; + RB0 = RB[IDX2D_R(0,j2)]; + RE0 = RE[IDX2D_R(0,j2)]; + RF0 = RF[IDX2D_R(0,j2)]; + + // Ez + materialEz = ID[IDX4D_ID(2,ii,jj,kk)]; + dHx = (Hx[IDX3D_FIELDS(ii,jj,kk)] - Hx[IDX3D_FIELDS(ii,jj-1,kk)]) / dy; + Ez[IDX3D_FIELDS(ii,jj,kk)] = Ez[IDX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[IDX2D_MAT(materialEz,4)] * + (RA01 * dHx + RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)]); + PHI2[IDX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx; + } +} + + +{{KERNEL}} void order2_yplus(int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int NX_PHI1, + int NY_PHI1, + int NZ_PHI1, + int NX_PHI2, + int NY_PHI2, + int NZ_PHI2, + int NY_R,{% filter indent(width=30) %}{{self.y_args()}}{% endfilter %} + {{REAL}} d) { + + // This function updates the Ex and Ez field components for the yplus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current thread + {{self.threadidx()}} + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + {{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dHx, dHz; + {{REAL}} dy = d; + int ii, jj, kk, materialEx, materialEz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA0 = RA[IDX2D_R(0,j1)]; + RB0 = RB[IDX2D_R(0,j1)]; + RE0 = RE[IDX2D_R(0,j1)]; + RF0 = RF[IDX2D_R(0,j1)]; + RA1 = RA[IDX2D_R(1,j1)]; + RB1 = RB[IDX2D_R(1,j1)]; + RE1 = RE[IDX2D_R(1,j1)]; + RF1 = RF[IDX2D_R(1,j1)]; + RA01 = RA[IDX2D_R(0,j1)] * RA[IDX2D_R(1,j1)] - 1; + + // Ex + materialEx = ID[IDX4D_ID(0,ii,jj,kk)]; + dHz = (Hz[IDX3D_FIELDS(ii,jj,kk)] - Hz[IDX3D_FIELDS(ii,jj-1,kk)]) / dy; + Ex[IDX3D_FIELDS(ii,jj,kk)] = Ex[IDX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[IDX2D_MAT(materialEx,4)] * + (RA01 * dHz + RA1 * RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] + + RB1 * PHI1[IDX4D_PHI1(1,i1,j1,k1)]); + PHI1[IDX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[IDX4D_PHI1(1,i1,j1,k1)] - RF1 * + (RA0 * dHz + RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)]); + PHI1[IDX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA0 = RA[IDX2D_R(0,j2)]; + RB0 = RB[IDX2D_R(0,j2)]; + RE0 = RE[IDX2D_R(0,j2)]; + RF0 = RF[IDX2D_R(0,j2)]; + RA1 = RA[IDX2D_R(1,j2)]; + RB1 = RB[IDX2D_R(1,j2)]; + RE1 = RE[IDX2D_R(1,j2)]; + RF1 = RF[IDX2D_R(1,j2)]; + RA01 = RA[IDX2D_R(0,j2)] * RA[IDX2D_R(1,j2)] - 1; + + // Ez + materialEz = ID[IDX4D_ID(2,ii,jj,kk)]; + dHx = (Hx[IDX3D_FIELDS(ii,jj,kk)] - Hx[IDX3D_FIELDS(ii,jj-1,kk)]) / dy; + Ez[IDX3D_FIELDS(ii,jj,kk)] = Ez[IDX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[IDX2D_MAT(materialEz,4)] * + (RA01 * dHx + RA1 * RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] + + RB1 * PHI2[IDX4D_PHI2(1,i2,j2,k2)]); + PHI2[IDX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[IDX4D_PHI2(1,i2,j2,k2)] - RF1 * + (RA0 * dHx + RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)]); + PHI2[IDX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx; + } +} + + +{{KERNEL}} void order1_zminus(int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int NX_PHI1, + int NY_PHI1, + int NZ_PHI1, + int NX_PHI2, + int NY_PHI2, + int NZ_PHI2, + int NY_R,{% filter indent(width=30) %}{% block z_args %}{% endblock z_args %}{% endfilter %} + {{REAL}} d) { + + // This function updates the Ex and Ey field components for the zminus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current thread + {{self.threadidx()}} + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + {{REAL}} RA01, RB0, RE0, RF0, dHx, dHy; + {{REAL}} dz = d; + int ii, jj, kk, materialEx, materialEy; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = zf - k1; + + // PML coefficients + RA01 = RA[IDX2D_R(0,k1)] - 1; + RB0 = RB[IDX2D_R(0,k1)]; + RE0 = RE[IDX2D_R(0,k1)]; + RF0 = RF[IDX2D_R(0,k1)]; + + // Ex + materialEx = ID[IDX4D_ID(0,ii,jj,kk)]; + dHy = (Hy[IDX3D_FIELDS(ii,jj,kk)] - Hy[IDX3D_FIELDS(ii,jj,kk-1)]) / dz; + Ex[IDX3D_FIELDS(ii,jj,kk)] = Ex[IDX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[IDX2D_MAT(materialEx,4)] * + (RA01 * dHy + RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)]); + PHI1[IDX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] - RF0 * dHy; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = zf - k2; + + // PML coefficients + RA01 = RA[IDX2D_R(0,k2)] - 1; + RB0 = RB[IDX2D_R(0,k2)]; + RE0 = RE[IDX2D_R(0,k2)]; + RF0 = RF[IDX2D_R(0,k2)]; + + // Ey + materialEy = ID[IDX4D_ID(1,ii,jj,kk)]; + dHx = (Hx[IDX3D_FIELDS(ii,jj,kk)] - Hx[IDX3D_FIELDS(ii,jj,kk-1)]) / dz; + Ey[IDX3D_FIELDS(ii,jj,kk)] = Ey[IDX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[IDX2D_MAT(materialEy,4)] * + (RA01 * dHx + RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)]); + PHI2[IDX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx; + } +} + + +{{KERNEL}} void order2_zminus(int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int NX_PHI1, + int NY_PHI1, + int NZ_PHI1, + int NX_PHI2, + int NY_PHI2, + int NZ_PHI2, + int NY_R,{% filter indent(width=30) %}{{self.z_args()}}{% endfilter %} + {{REAL}} d) { + + // This function updates the Ex and Ey field components for the zminus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current thread + {{self.threadidx()}} + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + {{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dHx, dHy; + {{REAL}} dz = d; + int ii, jj, kk, materialEx, materialEy; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = zf - k1; + + // PML coefficients + RA0 = RA[IDX2D_R(0,k1)]; + RB0 = RB[IDX2D_R(0,k1)]; + RE0 = RE[IDX2D_R(0,k1)]; + RF0 = RF[IDX2D_R(0,k1)]; + RA1 = RA[IDX2D_R(1,k1)]; + RB1 = RB[IDX2D_R(1,k1)]; + RE1 = RE[IDX2D_R(1,k1)]; + RF1 = RF[IDX2D_R(1,k1)]; + RA01 = RA[IDX2D_R(0,k1)] * RA[IDX2D_R(1,k1)] - 1; + + // Ex + materialEx = ID[IDX4D_ID(0,ii,jj,kk)]; + dHy = (Hy[IDX3D_FIELDS(ii,jj,kk)] - Hy[IDX3D_FIELDS(ii,jj,kk-1)]) / dz; + Ex[IDX3D_FIELDS(ii,jj,kk)] = Ex[IDX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[IDX2D_MAT(materialEx,4)] * + (RA01 * dHy + RA1 * RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] + + RB1 * PHI1[IDX4D_PHI1(1,i1,j1,k1)]); + PHI1[IDX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[IDX4D_PHI1(1,i1,j1,k1)] - RF1 * + (RA0 * dHy + RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)]); + PHI1[IDX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] - RF0 * dHy; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = zf - k2; + + // PML coefficients + RA0 = RA[IDX2D_R(0,k2)]; + RB0 = RB[IDX2D_R(0,k2)]; + RE0 = RE[IDX2D_R(0,k2)]; + RF0 = RF[IDX2D_R(0,k2)]; + RA1 = RA[IDX2D_R(1,k2)]; + RB1 = RB[IDX2D_R(1,k2)]; + RE1 = RE[IDX2D_R(1,k2)]; + RF1 = RF[IDX2D_R(1,k2)]; + RA01 = RA[IDX2D_R(0,k2)] * RA[IDX2D_R(1,k2)] - 1; + + // Ey + materialEy = ID[IDX4D_ID(1,ii,jj,kk)]; + dHx = (Hx[IDX3D_FIELDS(ii,jj,kk)] - Hx[IDX3D_FIELDS(ii,jj,kk-1)]) / dz; + Ey[IDX3D_FIELDS(ii,jj,kk)] = Ey[IDX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[IDX2D_MAT(materialEy,4)] * + (RA01 * dHx + RA1 * RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] + + RB1 * PHI2[IDX4D_PHI2(1,i2,j2,k2)]); + PHI2[IDX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[IDX4D_PHI2(1,i2,j2,k2)] - RF1 * + (RA0 * dHx + RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)]); + PHI2[IDX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx; + } +} + + +{{KERNEL}} void order1_zplus(int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int NX_PHI1, + int NY_PHI1, + int NZ_PHI1, + int NX_PHI2, + int NY_PHI2, + int NZ_PHI2, + int NY_R,{% filter indent(width=30) %}{{self.z_args()}}{% endfilter %} + {{REAL}} d) { + + // This function updates the Ex and Ey field components for the zplus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current thread + {{self.threadidx()}} + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + {{REAL}} RA01, RB0, RE0, RF0, dHx, dHy; + {{REAL}} dz = d; + int ii, jj, kk, materialEx, materialEy; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA01 = RA[IDX2D_R(0,k1)] - 1; + RB0 = RB[IDX2D_R(0,k1)]; + RE0 = RE[IDX2D_R(0,k1)]; + RF0 = RF[IDX2D_R(0,k1)]; + + // Ex + materialEx = ID[IDX4D_ID(0,ii,jj,kk)]; + dHy = (Hy[IDX3D_FIELDS(ii,jj,kk)] - Hy[IDX3D_FIELDS(ii,jj,kk-1)]) / dz; + Ex[IDX3D_FIELDS(ii,jj,kk)] = Ex[IDX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[IDX2D_MAT(materialEx,4)] * + (RA01 * dHy + RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)]); + PHI1[IDX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] - RF0 * dHy; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA01 = RA[IDX2D_R(0,k2)] - 1; + RB0 = RB[IDX2D_R(0,k2)]; + RE0 = RE[IDX2D_R(0,k2)]; + RF0 = RF[IDX2D_R(0,k2)]; + + // Ey + materialEy = ID[IDX4D_ID(1,ii,jj,kk)]; + dHx = (Hx[IDX3D_FIELDS(ii,jj,kk)] - Hx[IDX3D_FIELDS(ii,jj,kk-1)]) / dz; + Ey[IDX3D_FIELDS(ii,jj,kk)] = Ey[IDX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[IDX2D_MAT(materialEy,4)] * + (RA01 * dHx + RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)]); + PHI2[IDX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx; + } +} + + +{{KERNEL}} void order2_zplus(int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int NX_PHI1, + int NY_PHI1, + int NZ_PHI1, + int NX_PHI2, + int NY_PHI2, + int NZ_PHI2, + int NY_R,{% filter indent(width=30) %}{{self.z_args()}}{% endfilter %} + {{REAL}} d) { + + // This function updates the Ex and Ey field components for the zplus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current thread + {{self.threadidx()}} + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + {{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dHx, dHy; + {{REAL}} dz = d; + int ii, jj, kk, materialEx, materialEy; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA0 = RA[IDX2D_R(0,k1)]; + RB0 = RB[IDX2D_R(0,k1)]; + RE0 = RE[IDX2D_R(0,k1)]; + RF0 = RF[IDX2D_R(0,k1)]; + RA1 = RA[IDX2D_R(1,k1)]; + RB1 = RB[IDX2D_R(1,k1)]; + RE1 = RE[IDX2D_R(1,k1)]; + RF1 = RF[IDX2D_R(1,k1)]; + RA01 = RA[IDX2D_R(0,k1)] * RA[IDX2D_R(1,k1)] - 1; + + // Ex + materialEx = ID[IDX4D_ID(0,ii,jj,kk)]; + dHy = (Hy[IDX3D_FIELDS(ii,jj,kk)] - Hy[IDX3D_FIELDS(ii,jj,kk-1)]) / dz; + Ex[IDX3D_FIELDS(ii,jj,kk)] = Ex[IDX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[IDX2D_MAT(materialEx,4)] * + (RA01 * dHy + RA1 * RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] + + RB1 * PHI1[IDX4D_PHI1(1,i1,j1,k1)]); + PHI1[IDX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[IDX4D_PHI1(1,i1,j1,k1)] - RF1 * + (RA0 * dHy + RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)]); + PHI1[IDX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] - RF0 * dHy; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA0 = RA[IDX2D_R(0,k2)]; + RB0 = RB[IDX2D_R(0,k2)]; + RE0 = RE[IDX2D_R(0,k2)]; + RF0 = RF[IDX2D_R(0,k2)]; + RA1 = RA[IDX2D_R(1,k2)]; + RB1 = RB[IDX2D_R(1,k2)]; + RE1 = RE[IDX2D_R(1,k2)]; + RF1 = RF[IDX2D_R(1,k2)]; + RA01 = RA[IDX2D_R(0,k2)] * RA[IDX2D_R(1,k2)] - 1; + + // Ey + materialEy = ID[IDX4D_ID(1,ii,jj,kk)]; + dHx = (Hx[IDX3D_FIELDS(ii,jj,kk)] - Hx[IDX3D_FIELDS(ii,jj,kk-1)]) / dz; + Ey[IDX3D_FIELDS(ii,jj,kk)] = Ey[IDX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[IDX2D_MAT(materialEy,4)] * + (RA01 * dHx + RA1 * RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] + + RB1 * PHI2[IDX4D_PHI2(1,i2,j2,k2)]); + PHI2[IDX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[IDX4D_PHI2(1,i2,j2,k2)] - RF1 * + (RA0 * dHx + RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)]); + PHI2[IDX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx; + } +} \ No newline at end of file diff --git a/gprMax/cuda_opencl/pml_updates_electric_HORIPML_cuda.tmpl b/gprMax/cuda_opencl/pml_updates_electric_HORIPML_cuda.tmpl new file mode 100644 index 00000000..3b0efa25 --- /dev/null +++ b/gprMax/cuda_opencl/pml_updates_electric_HORIPML_cuda.tmpl @@ -0,0 +1,62 @@ +{% extends "pml_updates_electric_HORIPML_base.tmpl" %} + + +{% block constmem %} +__device__ __constant__ {{REAL}} updatecoeffsE[{{N_updatecoeffsE}}]; +{% endblock constmem %} + + +{% block threadidx %} + int idx = blockIdx.x * blockDim.x + threadIdx.x; +{% endblock threadidx %} + + +{% block x_args %} +const unsigned int* __restrict__ ID, +const {{REAL}}* __restrict__ Ex, +{{REAL}} *Ey, +{{REAL}} *Ez, +const {{REAL}}* __restrict__ Hx, +const {{REAL}}* __restrict__ Hy, +const {{REAL}}* __restrict__ Hz, +{{REAL}} *PHI1, +{{REAL}} *PHI2, +const {{REAL}}* __restrict__ RA, +const {{REAL}}* __restrict__ RB, +const {{REAL}}* __restrict__ RE, +const {{REAL}}* __restrict__ RF, +{% endblock x_args %} + + +{% block y_args %} +const unsigned int* __restrict__ ID, +{{REAL}} *Ex, +const {{REAL}}* __restrict__ Ey, +{{REAL}} *Ez, +const {{REAL}}* __restrict__ Hx, +const {{REAL}}* __restrict__ Hy, +const {{REAL}}* __restrict__ Hz, +{{REAL}} *PHI1, +{{REAL}} *PHI2, +const {{REAL}}* __restrict__ RA, +const {{REAL}}* __restrict__ RB, +const {{REAL}}* __restrict__ RE, +const {{REAL}}* __restrict__ RF, +{% endblock y_args %} + + +{% block z_args %} +const unsigned int* __restrict__ ID, +{{REAL}} *Ex, +{{REAL}} *Ey, +const {{REAL}}* __restrict__ Ez, +const {{REAL}}* __restrict__ Hx, +const {{REAL}}* __restrict__ Hy, +const {{REAL}}* __restrict__ Hz, +{{REAL}} *PHI1, +{{REAL}} *PHI2, +const {{REAL}}* __restrict__ RA, +const {{REAL}}* __restrict__ RB, +const {{REAL}}* __restrict__ RE, +const {{REAL}}* __restrict__ RF, +{% endblock z_args %} \ No newline at end of file diff --git a/gprMax/cuda_opencl/pml_updates_electric_HORIPML_opencl.tmpl b/gprMax/cuda_opencl/pml_updates_electric_HORIPML_opencl.tmpl new file mode 100644 index 00000000..b2636c19 --- /dev/null +++ b/gprMax/cuda_opencl/pml_updates_electric_HORIPML_opencl.tmpl @@ -0,0 +1,68 @@ +{% extends "pml_updates_electric_HORIPML_base.tmpl" %} + + +{% block constmem %} +__constant {{REAL}} updatecoeffsE[{{N_updatecoeffsE}}] = +{ + {% for i in updatecoeffsE %} + {{i}}, + {% endfor %} +}; +{% endblock constmem %} + + +{% block threadidx %} + int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + + get_global_id(1) * get_global_size(0) + get_global_id(0); +{% endblock threadidx %} + + +{% block x_args %} +__global const unsigned int* restrict ID, +__global const {{REAL}}* restrict Ex, +__global {{REAL}} *Ey, +__global {{REAL}} *Ez, +__global const {{REAL}}* restrict Hx, +__global const {{REAL}}* restrict Hy, +__global const {{REAL}}* restrict Hz, +__global {{REAL}} *PHI1, +__global {{REAL}} *PHI2, +__global const {{REAL}}* restrict RA, +__global const {{REAL}}* restrict RB, +__global const {{REAL}}* restrict RE, +__global const {{REAL}}* restrict RF, +{% endblock x_args %} + + +{% block y_args %} +__global const unsigned int* restrict ID, +__global {{REAL}} *Ex, +__global const {{REAL}}* restrict Ey, +__global {{REAL}} *Ez, +__global const {{REAL}}* restrict Hx, +__global const {{REAL}}* restrict Hy, +__global const {{REAL}}* restrict Hz, +__global {{REAL}} *PHI1, +__global {{REAL}} *PHI2, +__global const {{REAL}}* restrict RA, +__global const {{REAL}}* restrict RB, +__global const {{REAL}}* restrict RE, +__global const {{REAL}}* restrict RF, +{% endblock y_args %} + + +{% block z_args %} +__global const unsigned int* restrict ID, +__global {{REAL}} *Ex, +__global {{REAL}} *Ey, +__global const {{REAL}}* restrict Ez, +__global const {{REAL}}* restrict Hx, +__global const {{REAL}}* restrict Hy, +__global const {{REAL}}* restrict Hz, +__global {{REAL}} *PHI1, +__global {{REAL}} *PHI2, +__global const {{REAL}}* restrict RA, +__global const {{REAL}}* restrict RB, +__global const {{REAL}}* restrict RE, +__global const {{REAL}}* restrict RF, +{% endblock z_args %} \ No newline at end of file diff --git a/gprMax/cuda_opencl/pml_updates_magnetic_HORIPML_base.tmpl b/gprMax/cuda_opencl/pml_updates_magnetic_HORIPML_base.tmpl new file mode 100644 index 00000000..ded47efa --- /dev/null +++ b/gprMax/cuda_opencl/pml_updates_magnetic_HORIPML_base.tmpl @@ -0,0 +1,1178 @@ +// Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom +// Authors: Craig Warren, Antonis Giannopoulos, and John Hartley +// +// This file is part of gprMax. +// +// gprMax is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// gprMax is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with gprMax. If not, see . + + +// Macros for converting subscripts to linear index: +#define IDX2D_R(m, n) (m)*(NY_R)+(n) +#define IDX2D_MAT(m, n) (m)*({{NY_MATCOEFFS}})+(n) +#define IDX3D_FIELDS(i, j, k) (i)*({{NY_FIELDS}})*({{NZ_FIELDS}})+(j)*({{NZ_FIELDS}})+(k) +#define IDX4D_ID(p, i, j, k) (p)*({{NX_ID}})*({{NY_ID}})*({{NZ_ID}})+(i)*({{NY_ID}})*({{NZ_ID}})+(j)*({{NZ_ID}})+(k) +#define IDX4D_PHI1(p, i, j, k) (p)*(NX_PHI1)*(NY_PHI1)*(NZ_PHI1)+(i)*(NY_PHI1)*(NZ_PHI1)+(j)*(NZ_PHI1)+(k) +#define IDX4D_PHI2(p, i, j, k) (p)*(NX_PHI2)*(NY_PHI2)*(NZ_PHI2)+(i)*(NY_PHI2)*(NZ_PHI2)+(j)*(NZ_PHI2)+(k) + +// Material coefficients (read-only) stored in constant memory of compute device +{% block constmem %}{% endblock constmem %} + + +{{KERNEL}} void order1_xminus(int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int NX_PHI1, + int NY_PHI1, + int NZ_PHI1, + int NX_PHI2, + int NY_PHI2, + int NZ_PHI2, + int NY_R,{% filter indent(width=30) %}{% block x_args %}{% endblock x_args %}{% endfilter %} + {{REAL}} d) { + + // This function updates the Hy and Hz field components for the xminus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current tREad + {% block threadidx %}{% endblock threadidx %} + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + {{REAL}} RA01, RB0, RE0, RF0, dEy, dEz; + {{REAL}} dx = d; + int ii, jj, kk, materialHy, materialHz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = xf - (i1 + 1); + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA01 = RA[IDX2D_R(0,i1)] - 1; + RB0 = RB[IDX2D_R(0,i1)]; + RE0 = RE[IDX2D_R(0,i1)]; + RF0 = RF[IDX2D_R(0,i1)]; + + // Hy + materialHy = ID[IDX4D_ID(4,ii,jj,kk)]; + dEz = (Ez[IDX3D_FIELDS(ii+1,jj,kk)] - Ez[IDX3D_FIELDS(ii,jj,kk)]) / dx; + Hy[IDX3D_FIELDS(ii,jj,kk)] = Hy[IDX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[IDX2D_MAT(materialHy,4)] * + (RA01 * dEz + RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)]); + PHI1[IDX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = xf - (i2 + 1); + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA01 = RA[IDX2D_R(0,i2)] - 1; + RB0 = RB[IDX2D_R(0,i2)]; + RE0 = RE[IDX2D_R(0,i2)]; + RF0 = RF[IDX2D_R(0,i2)]; + + // Hz + materialHz = ID[IDX4D_ID(5,ii,jj,kk)]; + dEy = (Ey[IDX3D_FIELDS(ii+1,jj,kk)] - Ey[IDX3D_FIELDS(ii,jj,kk)]) / dx; + Hz[IDX3D_FIELDS(ii,jj,kk)] = Hz[IDX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[IDX2D_MAT(materialHz,4)] * + (RA01 * dEy + RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)]); + PHI2[IDX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] - RF0 * dEy; + } +} + + +{{KERNEL}} void order2_xminus(int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int NX_PHI1, + int NY_PHI1, + int NZ_PHI1, + int NX_PHI2, + int NY_PHI2, + int NZ_PHI2, + int NY_R,{% filter indent(width=30) %}{{self.x_args()}}{% endfilter %} + {{REAL}} d) { + + // This function updates the Hy and Hz field components for the xminus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current tREad + {{self.threadidx()}} + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + {{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dEy, dEz; + {{REAL}} dx = d; + int ii, jj, kk, materialHy, materialHz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = xf - (i1 + 1); + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA0 = RA[IDX2D_R(0,i1)]; + RB0 = RB[IDX2D_R(0,i1)]; + RE0 = RE[IDX2D_R(0,i1)]; + RF0 = RF[IDX2D_R(0,i1)]; + RA1 = RA[IDX2D_R(1,i1)]; + RB1 = RB[IDX2D_R(1,i1)]; + RE1 = RE[IDX2D_R(1,i1)]; + RF1 = RF[IDX2D_R(1,i1)]; + RA01 = RA[IDX2D_R(0,i1)] * RA[IDX2D_R(1,i1)] - 1; + + // Hy + materialHy = ID[IDX4D_ID(4,ii,jj,kk)]; + dEz = (Ez[IDX3D_FIELDS(ii+1,jj,kk)] - Ez[IDX3D_FIELDS(ii,jj,kk)]) / dx; + Hy[IDX3D_FIELDS(ii,jj,kk)] = Hy[IDX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[IDX2D_MAT(materialHy,4)] * + (RA01 * dEz + RA1 * RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] + + RB1 * PHI1[IDX4D_PHI1(1,i1,j1,k1)]); + PHI1[IDX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[IDX4D_PHI1(1,i1,j1,k1)] - RF1 * + (RA0 * dEz + RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)]); + PHI1[IDX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = xf - (i2 + 1); + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA0 = RA[IDX2D_R(0,i2)]; + RB0 = RB[IDX2D_R(0,i2)]; + RE0 = RE[IDX2D_R(0,i2)]; + RF0 = RF[IDX2D_R(0,i2)]; + RA1 = RA[IDX2D_R(1,i2)]; + RB1 = RB[IDX2D_R(1,i2)]; + RE1 = RE[IDX2D_R(1,i2)]; + RF1 = RF[IDX2D_R(1,i2)]; + RA01 = RA[IDX2D_R(0,i2)] * RA[IDX2D_R(1,i2)] - 1; + + // Hz + materialHz = ID[IDX4D_ID(5,ii,jj,kk)]; + dEy = (Ey[IDX3D_FIELDS(ii+1,jj,kk)] - Ey[IDX3D_FIELDS(ii,jj,kk)]) / dx; + Hz[IDX3D_FIELDS(ii,jj,kk)] = Hz[IDX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[IDX2D_MAT(materialHz,4)] * + (RA01 * dEy + RA1 * RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] + + RB1 * PHI2[IDX4D_PHI2(1,i2,j2,k2)]); + PHI2[IDX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[IDX4D_PHI2(1,i2,j2,k2)] - RF1 * + (RA0 * dEy + RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)]); + PHI2[IDX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] - RF0 * dEy; + } +} + + +{{KERNEL}} void order1_xplus(int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int NX_PHI1, + int NY_PHI1, + int NZ_PHI1, + int NX_PHI2, + int NY_PHI2, + int NZ_PHI2, + int NY_R,{% filter indent(width=30) %}{{self.x_args()}}{% endfilter %} + {{REAL}} d) { + + // This function updates the Hy and Hz field components for the xplus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current tREad + {{self.threadidx()}} + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + {{REAL}} RA01, RB0, RE0, RF0, dEy, dEz; + {{REAL}} dx = d; + int ii, jj, kk, materialHy, materialHz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA01 = RA[IDX2D_R(0,i1)] - 1; + RB0 = RB[IDX2D_R(0,i1)]; + RE0 = RE[IDX2D_R(0,i1)]; + RF0 = RF[IDX2D_R(0,i1)]; + + // Hy + materialHy = ID[IDX4D_ID(4,ii,jj,kk)]; + dEz = (Ez[IDX3D_FIELDS(ii+1,jj,kk)] - Ez[IDX3D_FIELDS(ii,jj,kk)]) / dx; + Hy[IDX3D_FIELDS(ii,jj,kk)] = Hy[IDX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[IDX2D_MAT(materialHy,4)] * + (RA01 * dEz + RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)]); + PHI1[IDX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA01 = RA[IDX2D_R(0,i2)] - 1; + RB0 = RB[IDX2D_R(0,i2)]; + RE0 = RE[IDX2D_R(0,i2)]; + RF0 = RF[IDX2D_R(0,i2)]; + + // Hz + materialHz = ID[IDX4D_ID(5,ii,jj,kk)]; + dEy = (Ey[IDX3D_FIELDS(ii+1,jj,kk)] - Ey[IDX3D_FIELDS(ii,jj,kk)]) / dx; + Hz[IDX3D_FIELDS(ii,jj,kk)] = Hz[IDX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[IDX2D_MAT(materialHz,4)] * + (RA01 * dEy + RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)]); + PHI2[IDX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] - RF0 * dEy; + } +} + + +{{KERNEL}} void order2_xplus(int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int NX_PHI1, + int NY_PHI1, + int NZ_PHI1, + int NX_PHI2, + int NY_PHI2, + int NZ_PHI2, + int NY_R,{% filter indent(width=30) %}{{self.x_args()}}{% endfilter %} + {{REAL}} d) { + + // This function updates the Hy and Hz field components for the xplus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current tREad + {{self.threadidx()}} + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + {{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dEy, dEz; + {{REAL}} dx = d; + int ii, jj, kk, materialHy, materialHz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA0 = RA[IDX2D_R(0,i1)]; + RB0 = RB[IDX2D_R(0,i1)]; + RE0 = RE[IDX2D_R(0,i1)]; + RF0 = RF[IDX2D_R(0,i1)]; + RA1 = RA[IDX2D_R(1,i1)]; + RB1 = RB[IDX2D_R(1,i1)]; + RE1 = RE[IDX2D_R(1,i1)]; + RF1 = RF[IDX2D_R(1,i1)]; + RA01 = RA[IDX2D_R(0,i1)] * RA[IDX2D_R(1,i1)] - 1; + + // Hy + materialHy = ID[IDX4D_ID(4,ii,jj,kk)]; + dEz = (Ez[IDX3D_FIELDS(ii+1,jj,kk)] - Ez[IDX3D_FIELDS(ii,jj,kk)]) / dx; + Hy[IDX3D_FIELDS(ii,jj,kk)] = Hy[IDX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[IDX2D_MAT(materialHy,4)] * + (RA01 * dEz + RA1 * RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] + + RB1 * PHI1[IDX4D_PHI1(1,i1,j1,k1)]); + PHI1[IDX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[IDX4D_PHI1(1,i1,j1,k1)] - RF1 * + (RA0 * dEz + RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)]); + PHI1[IDX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA0 = RA[IDX2D_R(0,i2)]; + RB0 = RB[IDX2D_R(0,i2)]; + RE0 = RE[IDX2D_R(0,i2)]; + RF0 = RF[IDX2D_R(0,i2)]; + RA1 = RA[IDX2D_R(1,i2)]; + RB1 = RB[IDX2D_R(1,i2)]; + RE1 = RE[IDX2D_R(1,i2)]; + RF1 = RF[IDX2D_R(1,i2)]; + RA01 = RA[IDX2D_R(0,i2)] * RA[IDX2D_R(1,i2)] - 1; + + // Hz + materialHz = ID[IDX4D_ID(5,ii,jj,kk)]; + dEy = (Ey[IDX3D_FIELDS(ii+1,jj,kk)] - Ey[IDX3D_FIELDS(ii,jj,kk)]) / dx; + Hz[IDX3D_FIELDS(ii,jj,kk)] = Hz[IDX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[IDX2D_MAT(materialHz,4)] * + (RA01 * dEy + RA1 * RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] + + RB1 * PHI2[IDX4D_PHI2(1,i2,j2,k2)]); + PHI2[IDX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[IDX4D_PHI2(1,i2,j2,k2)] - RF1 * + (RA0 * dEy + RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)]); + PHI2[IDX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] - RF0 * dEy; + } +} + + +{{KERNEL}} void order1_yminus(int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int NX_PHI1, + int NY_PHI1, + int NZ_PHI1, + int NX_PHI2, + int NY_PHI2, + int NZ_PHI2, + int NY_R,{% filter indent(width=30) %}{% block y_args %}{% endblock y_args %}{% endfilter %} + {{REAL}} d) { + + // This function updates the Hx and Hz field components for the yminus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current tREad + {{self.threadidx()}} + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + {{REAL}} RA01, RB0, RE0, RF0, dEx, dEz; + {{REAL}} dy = d; + int ii, jj, kk, materialHx, materialHz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = yf - (j1 + 1); + kk = k1 + zs; + + // PML coefficients + RA01 = RA[IDX2D_R(0,j1)] - 1; + RB0 = RB[IDX2D_R(0,j1)]; + RE0 = RE[IDX2D_R(0,j1)]; + RF0 = RF[IDX2D_R(0,j1)]; + + // Hx + materialHx = ID[IDX4D_ID(3,ii,jj,kk)]; + dEz = (Ez[IDX3D_FIELDS(ii,jj+1,kk)] - Ez[IDX3D_FIELDS(ii,jj,kk)]) / dy; + Hx[IDX3D_FIELDS(ii,jj,kk)] = Hx[IDX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[IDX2D_MAT(materialHx,4)] * + (RA01 * dEz + RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)]); + PHI1[IDX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = yf - (j2 + 1); + kk = k2 + zs; + + // PML coefficients + RA01 = RA[IDX2D_R(0,j2)] - 1; + RB0 = RB[IDX2D_R(0,j2)]; + RE0 = RE[IDX2D_R(0,j2)]; + RF0 = RF[IDX2D_R(0,j2)]; + + // Hz + materialHz = ID[IDX4D_ID(5,ii,jj,kk)]; + dEx = (Ex[IDX3D_FIELDS(ii,jj+1,kk)] - Ex[IDX3D_FIELDS(ii,jj,kk)]) / dy; + Hz[IDX3D_FIELDS(ii,jj,kk)] = Hz[IDX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[IDX2D_MAT(materialHz,4)] * + (RA01 * dEx + RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)]); + PHI2[IDX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx; + } +} + + +{{KERNEL}} void order2_yminus(int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int NX_PHI1, + int NY_PHI1, + int NZ_PHI1, + int NX_PHI2, + int NY_PHI2, + int NZ_PHI2, + int NY_R,{% filter indent(width=30) %}{{self.y_args()}}{% endfilter %} + {{REAL}} d) { + + // This function updates the Hx and Hz field components for the yminus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current tREad + {{self.threadidx()}} + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + {{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dEx, dEz; + {{REAL}} dy = d; + int ii, jj, kk, materialHx, materialHz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = yf - (j1 + 1); + kk = k1 + zs; + + // PML coefficients + RA0 = RA[IDX2D_R(0,j1)]; + RB0 = RB[IDX2D_R(0,j1)]; + RE0 = RE[IDX2D_R(0,j1)]; + RF0 = RF[IDX2D_R(0,j1)]; + RA1 = RA[IDX2D_R(1,j1)]; + RB1 = RB[IDX2D_R(1,j1)]; + RE1 = RE[IDX2D_R(1,j1)]; + RF1 = RF[IDX2D_R(1,j1)]; + RA01 = RA[IDX2D_R(0,j1)] * RA[IDX2D_R(1,j1)] - 1; + + // Hx + materialHx = ID[IDX4D_ID(3,ii,jj,kk)]; + dEz = (Ez[IDX3D_FIELDS(ii,jj+1,kk)] - Ez[IDX3D_FIELDS(ii,jj,kk)]) / dy; + Hx[IDX3D_FIELDS(ii,jj,kk)] = Hx[IDX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[IDX2D_MAT(materialHx,4)] * + (RA01 * dEz + RA1 * RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] + + RB1 * PHI1[IDX4D_PHI1(1,i1,j1,k1)]); + PHI1[IDX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[IDX4D_PHI1(1,i1,j1,k1)] - RF1 * + (RA0 * dEz + RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)]); + PHI1[IDX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = yf - (j2 + 1); + kk = k2 + zs; + + // PML coefficients + RA0 = RA[IDX2D_R(0,j2)]; + RB0 = RB[IDX2D_R(0,j2)]; + RE0 = RE[IDX2D_R(0,j2)]; + RF0 = RF[IDX2D_R(0,j2)]; + RA1 = RA[IDX2D_R(1,j2)]; + RB1 = RB[IDX2D_R(1,j2)]; + RE1 = RE[IDX2D_R(1,j2)]; + RF1 = RF[IDX2D_R(1,j2)]; + RA01 = RA[IDX2D_R(0,j2)] * RA[IDX2D_R(1,j2)] - 1; + + // Hz + materialHz = ID[IDX4D_ID(5,ii,jj,kk)]; + dEx = (Ex[IDX3D_FIELDS(ii,jj+1,kk)] - Ex[IDX3D_FIELDS(ii,jj,kk)]) / dy; + Hz[IDX3D_FIELDS(ii,jj,kk)] = Hz[IDX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[IDX2D_MAT(materialHz,4)] * + (RA01 * dEx + RA1 * RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[IDX4D_PHI2(1,i2,j2,k2)]); + PHI2[IDX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[IDX4D_PHI2(1,i2,j2,k2)] - RF1 * + (RA0 * dEx + RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)]); + PHI2[IDX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx; + } +} + + +{{KERNEL}} void order1_yplus(int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int NX_PHI1, + int NY_PHI1, + int NZ_PHI1, + int NX_PHI2, + int NY_PHI2, + int NZ_PHI2, + int NY_R,{% filter indent(width=30) %}{{self.y_args()}}{% endfilter %} + {{REAL}} d) { + + // This function updates the Hx and Hz field components for the yplus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current tREad + {{self.threadidx()}} + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + {{REAL}} RA01, RB0, RE0, RF0, dEx, dEz; + {{REAL}} dy = d; + int ii, jj, kk, materialHx, materialHz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA01 = RA[IDX2D_R(0,j1)] - 1; + RB0 = RB[IDX2D_R(0,j1)]; + RE0 = RE[IDX2D_R(0,j1)]; + RF0 = RF[IDX2D_R(0,j1)]; + + // Hx + materialHx = ID[IDX4D_ID(3,ii,jj,kk)]; + dEz = (Ez[IDX3D_FIELDS(ii,jj+1,kk)] - Ez[IDX3D_FIELDS(ii,jj,kk)]) / dy; + Hx[IDX3D_FIELDS(ii,jj,kk)] = Hx[IDX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[IDX2D_MAT(materialHx,4)] * + (RA01 * dEz + RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)]); + PHI1[IDX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA01 = RA[IDX2D_R(0,j2)] - 1; + RB0 = RB[IDX2D_R(0,j2)]; + RE0 = RE[IDX2D_R(0,j2)]; + RF0 = RF[IDX2D_R(0,j2)]; + + // Hz + materialHz = ID[IDX4D_ID(5,ii,jj,kk)]; + dEx = (Ex[IDX3D_FIELDS(ii,jj+1,kk)] - Ex[IDX3D_FIELDS(ii,jj,kk)]) / dy; + Hz[IDX3D_FIELDS(ii,jj,kk)] = Hz[IDX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[IDX2D_MAT(materialHz,4)] * + (RA01 * dEx + RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)]); + PHI2[IDX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx; + } +} + + +{{KERNEL}} void order2_yplus(int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int NX_PHI1, + int NY_PHI1, + int NZ_PHI1, + int NX_PHI2, + int NY_PHI2, + int NZ_PHI2, + int NY_R,{% filter indent(width=30) %}{{self.y_args()}}{% endfilter %} + {{REAL}} d) { + + // This function updates the Hx and Hz field components for the yplus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current tREad + {{self.threadidx()}} + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + {{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dEx, dEz; + {{REAL}} dy = d; + int ii, jj, kk, materialHx, materialHz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA0 = RA[IDX2D_R(0,j1)]; + RB0 = RB[IDX2D_R(0,j1)]; + RE0 = RE[IDX2D_R(0,j1)]; + RF0 = RF[IDX2D_R(0,j1)]; + RA1 = RA[IDX2D_R(1,j1)]; + RB1 = RB[IDX2D_R(1,j1)]; + RE1 = RE[IDX2D_R(1,j1)]; + RF1 = RF[IDX2D_R(1,j1)]; + RA01 = RA[IDX2D_R(0,j1)] * RA[IDX2D_R(1,j1)] - 1; + + // Hx + materialHx = ID[IDX4D_ID(3,ii,jj,kk)]; + dEz = (Ez[IDX3D_FIELDS(ii,jj+1,kk)] - Ez[IDX3D_FIELDS(ii,jj,kk)]) / dy; + Hx[IDX3D_FIELDS(ii,jj,kk)] = Hx[IDX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[IDX2D_MAT(materialHx,4)] * + (RA01 * dEz + RA1 * RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[IDX4D_PHI1(1,i1,j1,k1)]); + PHI1[IDX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[IDX4D_PHI1(1,i1,j1,k1)] - RF1 * + (RA0 * dEz + RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)]); + PHI1[IDX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA0 = RA[IDX2D_R(0,j2)]; + RB0 = RB[IDX2D_R(0,j2)]; + RE0 = RE[IDX2D_R(0,j2)]; + RF0 = RF[IDX2D_R(0,j2)]; + RA1 = RA[IDX2D_R(1,j2)]; + RB1 = RB[IDX2D_R(1,j2)]; + RE1 = RE[IDX2D_R(1,j2)]; + RF1 = RF[IDX2D_R(1,j2)]; + RA01 = RA[IDX2D_R(0,j2)] * RA[IDX2D_R(1,j2)] - 1; + + // Hz + materialHz = ID[IDX4D_ID(5,ii,jj,kk)]; + dEx = (Ex[IDX3D_FIELDS(ii,jj+1,kk)] - Ex[IDX3D_FIELDS(ii,jj,kk)]) / dy; + Hz[IDX3D_FIELDS(ii,jj,kk)] = Hz[IDX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[IDX2D_MAT(materialHz,4)] * + (RA01 * dEx + RA1 * RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[IDX4D_PHI2(1,i2,j2,k2)]); + PHI2[IDX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[IDX4D_PHI2(1,i2,j2,k2)] - RF1 * + (RA0 * dEx + RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)]); + PHI2[IDX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx; + } +} + + +{{KERNEL}} void order1_zminus(int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int NX_PHI1, + int NY_PHI1, + int NZ_PHI1, + int NX_PHI2, + int NY_PHI2, + int NZ_PHI2, + int NY_R,{% filter indent(width=30) %}{% block z_args %}{% endblock z_args %}{% endfilter %} + {{REAL}} d) { + + // This function updates the Hx and Hy field components for the zminus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current tREad + {{self.threadidx()}} + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + {{REAL}} RA01, RB0, RE0, RF0, dEx, dEy; + {{REAL}} dz = d; + int ii, jj, kk, materialHx, materialHy; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = zf - (k1 + 1); + + // PML coefficients + RA01 = RA[IDX2D_R(0,k1)] - 1; + RB0 = RB[IDX2D_R(0,k1)]; + RE0 = RE[IDX2D_R(0,k1)]; + RF0 = RF[IDX2D_R(0,k1)]; + + // Hx + materialHx = ID[IDX4D_ID(3,ii,jj,kk)]; + dEy = (Ey[IDX3D_FIELDS(ii,jj,kk+1)] - Ey[IDX3D_FIELDS(ii,jj,kk)]) / dz; + Hx[IDX3D_FIELDS(ii,jj,kk)] = Hx[IDX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[IDX2D_MAT(materialHx,4)] * + (RA01 * dEy + RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)]); + PHI1[IDX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] - RF0 * dEy; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = zf - (k2 + 1); + + // PML coefficients + RA01 = RA[IDX2D_R(0,k2)] - 1; + RB0 = RB[IDX2D_R(0,k2)]; + RE0 = RE[IDX2D_R(0,k2)]; + RF0 = RF[IDX2D_R(0,k2)]; + + // Hy + materialHy = ID[IDX4D_ID(4,ii,jj,kk)]; + dEx = (Ex[IDX3D_FIELDS(ii,jj,kk+1)] - Ex[IDX3D_FIELDS(ii,jj,kk)]) / dz; + Hy[IDX3D_FIELDS(ii,jj,kk)] = Hy[IDX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[IDX2D_MAT(materialHy,4)] * + (RA01 * dEx + RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)]); + PHI2[IDX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx; + } +} + + +{{KERNEL}} void order2_zminus(int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int NX_PHI1, + int NY_PHI1, + int NZ_PHI1, + int NX_PHI2, + int NY_PHI2, + int NZ_PHI2, + int NY_R,{% filter indent(width=30) %}{{self.z_args()}}{% endfilter %} + {{REAL}} d) { + + // This function updates the Hx and Hy field components for the zminus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current tREad + {{self.threadidx()}} + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + {{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dEx, dEy; + {{REAL}} dz = d; + int ii, jj, kk, materialHx, materialHy; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = zf - (k1 + 1); + + // PML coefficients + RA0 = RA[IDX2D_R(0,k1)]; + RB0 = RB[IDX2D_R(0,k1)]; + RE0 = RE[IDX2D_R(0,k1)]; + RF0 = RF[IDX2D_R(0,k1)]; + RA1 = RA[IDX2D_R(1,k1)]; + RB1 = RB[IDX2D_R(1,k1)]; + RE1 = RE[IDX2D_R(1,k1)]; + RF1 = RF[IDX2D_R(1,k1)]; + RA01 = RA[IDX2D_R(0,k1)] * RA[IDX2D_R(1,k1)] - 1; + + // Hx + materialHx = ID[IDX4D_ID(3,ii,jj,kk)]; + dEy = (Ey[IDX3D_FIELDS(ii,jj,kk+1)] - Ey[IDX3D_FIELDS(ii,jj,kk)]) / dz; + Hx[IDX3D_FIELDS(ii,jj,kk)] = Hx[IDX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[IDX2D_MAT(materialHx,4)] * + (RA01 * dEy + RA1 * RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] + + RB1 * PHI1[IDX4D_PHI1(1,i1,j1,k1)]); + PHI1[IDX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[IDX4D_PHI1(1,i1,j1,k1)] - RF1 * + (RA0 * dEy + RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)]); + PHI1[IDX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] - RF0 * dEy; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = zf - (k2 + 1); + + // PML coefficients + RA0 = RA[IDX2D_R(0,k2)]; + RB0 = RB[IDX2D_R(0,k2)]; + RE0 = RE[IDX2D_R(0,k2)]; + RF0 = RF[IDX2D_R(0,k2)]; + RA1 = RA[IDX2D_R(1,k2)]; + RB1 = RB[IDX2D_R(1,k2)]; + RE1 = RE[IDX2D_R(1,k2)]; + RF1 = RF[IDX2D_R(1,k2)]; + RA01 = RA[IDX2D_R(0,k2)] * RA[IDX2D_R(1,k2)] - 1; + + // Hy + materialHy = ID[IDX4D_ID(4,ii,jj,kk)]; + dEx = (Ex[IDX3D_FIELDS(ii,jj,kk+1)] - Ex[IDX3D_FIELDS(ii,jj,kk)]) / dz; + Hy[IDX3D_FIELDS(ii,jj,kk)] = Hy[IDX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[IDX2D_MAT(materialHy,4)] * + (RA01 * dEx + RA1 * RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] + + RB1 * PHI2[IDX4D_PHI2(1,i2,j2,k2)]); + PHI2[IDX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[IDX4D_PHI2(1,i2,j2,k2)] - RF1 * + (RA0 * dEx + RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)]); + PHI2[IDX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx; + } +} + + +{{KERNEL}} void order1_zplus(int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int NX_PHI1, + int NY_PHI1, + int NZ_PHI1, + int NX_PHI2, + int NY_PHI2, + int NZ_PHI2, + int NY_R,{% filter indent(width=30) %}{{self.z_args()}}{% endfilter %} + {{REAL}} d) { + + // This function updates the Hx and Hy field components for the zplus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current tREad + {{self.threadidx()}} + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + {{REAL}} RA01, RB0, RE0, RF0, dEx, dEy; + {{REAL}} dz = d; + int ii, jj, kk, materialHx, materialHy; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA01 = RA[IDX2D_R(0,k1)] - 1; + RB0 = RB[IDX2D_R(0,k1)]; + RE0 = RE[IDX2D_R(0,k1)]; + RF0 = RF[IDX2D_R(0,k1)]; + + // Hx + materialHx = ID[IDX4D_ID(3,ii,jj,kk)]; + dEy = (Ey[IDX3D_FIELDS(ii,jj,kk+1)] - Ey[IDX3D_FIELDS(ii,jj,kk)]) / dz; + Hx[IDX3D_FIELDS(ii,jj,kk)] = Hx[IDX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[IDX2D_MAT(materialHx,4)] * + (RA01 * dEy + RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)]); + PHI1[IDX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] - RF0 * dEy; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA01 = RA[IDX2D_R(0,k2)] - 1; + RB0 = RB[IDX2D_R(0,k2)]; + RE0 = RE[IDX2D_R(0,k2)]; + RF0 = RF[IDX2D_R(0,k2)]; + + // Hy + materialHy = ID[IDX4D_ID(4,ii,jj,kk)]; + dEx = (Ex[IDX3D_FIELDS(ii,jj,kk+1)] - Ex[IDX3D_FIELDS(ii,jj,kk)]) / dz; + Hy[IDX3D_FIELDS(ii,jj,kk)] = Hy[IDX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[IDX2D_MAT(materialHy,4)] * + (RA01 * dEx + RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)]); + PHI2[IDX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx; + } +} + + +{{KERNEL}} void order2_zplus(int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int NX_PHI1, + int NY_PHI1, + int NZ_PHI1, + int NX_PHI2, + int NY_PHI2, + int NZ_PHI2, + int NY_R,{% filter indent(width=30) %}{{self.z_args()}}{% endfilter %} + {{REAL}} d) { + + // This function updates the Hx and Hy field components for the zplus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current tREad + {{self.threadidx()}} + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + {{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dEx, dEy; + {{REAL}} dz = d; + int ii, jj, kk, materialHx, materialHy; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA0 = RA[IDX2D_R(0,k1)]; + RB0 = RB[IDX2D_R(0,k1)]; + RE0 = RE[IDX2D_R(0,k1)]; + RF0 = RF[IDX2D_R(0,k1)]; + RA1 = RA[IDX2D_R(1,k1)]; + RB1 = RB[IDX2D_R(1,k1)]; + RE1 = RE[IDX2D_R(1,k1)]; + RF1 = RF[IDX2D_R(1,k1)]; + RA01 = RA[IDX2D_R(0,k1)] * RA[IDX2D_R(1,k1)] - 1; + + // Hx + materialHx = ID[IDX4D_ID(3,ii,jj,kk)]; + dEy = (Ey[IDX3D_FIELDS(ii,jj,kk+1)] - Ey[IDX3D_FIELDS(ii,jj,kk)]) / dz; + Hx[IDX3D_FIELDS(ii,jj,kk)] = Hx[IDX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[IDX2D_MAT(materialHx,4)] * + (RA01 * dEy + RA1 * RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] + + RB1 * PHI1[IDX4D_PHI1(1,i1,j1,k1)]); + PHI1[IDX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[IDX4D_PHI1(1,i1,j1,k1)] - RF1 * + (RA0 * dEy + RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)]); + PHI1[IDX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] - RF0 * dEy; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA0 = RA[IDX2D_R(0,k2)]; + RB0 = RB[IDX2D_R(0,k2)]; + RE0 = RE[IDX2D_R(0,k2)]; + RF0 = RF[IDX2D_R(0,k2)]; + RA1 = RA[IDX2D_R(1,k2)]; + RB1 = RB[IDX2D_R(1,k2)]; + RE1 = RE[IDX2D_R(1,k2)]; + RF1 = RF[IDX2D_R(1,k2)]; + RA01 = RA[IDX2D_R(0,k2)] * RA[IDX2D_R(1,k2)] - 1; + + // Hy + materialHy = ID[IDX4D_ID(4,ii,jj,kk)]; + dEx = (Ex[IDX3D_FIELDS(ii,jj,kk+1)] - Ex[IDX3D_FIELDS(ii,jj,kk)]) / dz; + Hy[IDX3D_FIELDS(ii,jj,kk)] = Hy[IDX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[IDX2D_MAT(materialHy,4)] * + (RA01 * dEx + RA1 * RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] + + RB1 * PHI2[IDX4D_PHI2(1,i2,j2,k2)]); + PHI2[IDX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[IDX4D_PHI2(1,i2,j2,k2)] - RF1 * + (RA0 * dEx + RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)]); + PHI2[IDX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx; + } +} \ No newline at end of file diff --git a/gprMax/cuda_opencl/pml_updates_magnetic_HORIPML_cuda.tmpl b/gprMax/cuda_opencl/pml_updates_magnetic_HORIPML_cuda.tmpl new file mode 100644 index 00000000..08628432 --- /dev/null +++ b/gprMax/cuda_opencl/pml_updates_magnetic_HORIPML_cuda.tmpl @@ -0,0 +1,62 @@ +{% extends "pml_updates_magnetic_HORIPML_base.tmpl" %} + + +{% block constmem %} +__device__ __constant__ {{REAL}} updatecoeffsH[{{N_updatecoeffsH}}]; +{% endblock constmem %} + + +{% block threadidx %} + int idx = blockIdx.x * blockDim.x + threadIdx.x; +{% endblock threadidx %} + + +{% block x_args %} +const unsigned int* __restrict__ ID, +const {{REAL}}* __restrict__ Ex, +const {{REAL}}* __restrict__ Ey, +const {{REAL}}* __restrict__ Ez, +const {{REAL}}* __restrict__ Hx, +{{REAL}} *Hy, +{{REAL}} *Hz, +{{REAL}} *PHI1, +{{REAL}} *PHI2, +const {{REAL}}* __restrict__ RA, +const {{REAL}}* __restrict__ RB, +const {{REAL}}* __restrict__ RE, +const {{REAL}}* __restrict__ RF, +{% endblock x_args %} + + +{% block y_args %} +const unsigned int* __restrict__ ID, +const {{REAL}}* __restrict__ Ex, +const {{REAL}}* __restrict__ Ey, +const {{REAL}}* __restrict__ Ez, +{{REAL}} *Hx, +const {{REAL}}* __restrict__ Hy, +{{REAL}} *Hz, +{{REAL}} *PHI1, +{{REAL}} *PHI2, +const {{REAL}}* __restrict__ RA, +const {{REAL}}* __restrict__ RB, +const {{REAL}}* __restrict__ RE, +const {{REAL}}* __restrict__ RF, +{% endblock y_args %} + + +{% block z_args %} +const unsigned int* __restrict__ ID, +const {{REAL}}* __restrict__ Ex, +const {{REAL}}* __restrict__ Ey, +const {{REAL}}* __restrict__ Ez, +{{REAL}} *Hx, +{{REAL}} *Hy, +const {{REAL}}* __restrict__ Hz, +{{REAL}} *PHI1, +{{REAL}} *PHI2, +const {{REAL}}* __restrict__ RA, +const {{REAL}}* __restrict__ RB, +const {{REAL}}* __restrict__ RE, +const {{REAL}}* __restrict__ RF, +{% endblock z_args %} \ No newline at end of file diff --git a/gprMax/cuda_opencl/pml_updates_magnetic_HORIPML_opencl.tmpl b/gprMax/cuda_opencl/pml_updates_magnetic_HORIPML_opencl.tmpl new file mode 100644 index 00000000..8e8b5899 --- /dev/null +++ b/gprMax/cuda_opencl/pml_updates_magnetic_HORIPML_opencl.tmpl @@ -0,0 +1,68 @@ +{% extends "pml_updates_magnetic_HORIPML_base.tmpl" %} + + +{% block constmem %} +__constant {{REAL}} updatecoeffsH[{{N_updatecoeffsH}}] = +{ + {% for i in updatecoeffsH %} + {{i}}, + {% endfor %} +}; +{% endblock constmem %} + + +{% block threadidx %} + int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + + get_global_id(1) * get_global_size(0) + get_global_id(0); +{% endblock threadidx %} + + +{% block x_args %} +__global const unsigned int* restrict ID, +__global const {{REAL}}* restrict Ex, +__global const {{REAL}}* restrict Ey, +__global const {{REAL}}* restrict Ez, +__global const {{REAL}}* restrict Hx, +__global {{REAL}} *Hy, +__global {{REAL}} *Hz, +__global {{REAL}} *PHI1, +__global {{REAL}} *PHI2, +__global const {{REAL}}* restrict RA, +__global const {{REAL}}* restrict RB, +__global const {{REAL}}* restrict RE, +__global const {{REAL}}* restrict RF, +{% endblock x_args %} + + +{% block y_args %} +__global const unsigned int* restrict ID, +__global const {{REAL}}* restrict Ex, +__global const {{REAL}}* restrict Ey, +__global const {{REAL}}* restrict Ez, +__global {{REAL}} *Hx, +__global const {{REAL}}* restrict Hy, +__global {{REAL}} *Hz, +__global {{REAL}} *PHI1, +__global {{REAL}} *PHI2, +__global const {{REAL}}* restrict RA, +__global const {{REAL}}* restrict RB, +__global const {{REAL}}* restrict RE, +__global const {{REAL}}* restrict RF, +{% endblock y_args %} + + +{% block z_args %} +__global const unsigned int* restrict ID, +__global const {{REAL}}* restrict Ex, +__global const {{REAL}}* restrict Ey, +__global const {{REAL}}* restrict Ez, +__global {{REAL}} *Hx, +__global {{REAL}} *Hy, +__global const {{REAL}}* restrict Hz, +__global {{REAL}} *PHI1, +__global {{REAL}} *PHI2, +__global const {{REAL}}* restrict RA, +__global const {{REAL}}* restrict RB, +__global const {{REAL}}* restrict RE, +__global const {{REAL}}* restrict RF, +{% endblock z_args %} \ No newline at end of file diff --git a/gprMax/cuda_opencl/snapshots_base.tmpl b/gprMax/cuda_opencl/snapshots_base.tmpl new file mode 100644 index 00000000..0ca588b6 --- /dev/null +++ b/gprMax/cuda_opencl/snapshots_base.tmpl @@ -0,0 +1,90 @@ +// Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom +// Authors: Craig Warren, Antonis Giannopoulos, and John Hartley +// +// This file is part of gprMax. +// +// gprMax is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// gprMax is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with gprMax. If not, see . + + +// Macros for converting subscripts to linear index: +#define IDX3D_FIELDS(i, j, k) (i)*({{NY_FIELDS}})*({{NZ_FIELDS}})+(j)*({{NZ_FIELDS}})+(k) +#define IDX4D_SNAPS(p, i, j, k) (p)*({{NX_SNAPS}})*({{NY_SNAPS}})*({{NZ_SNAPS}})+(i)*({{NY_SNAPS}})*({{NZ_SNAPS}})+(j)*({{NZ_SNAPS}})+(k) + +//////////////////// +// Store snapshot // +//////////////////// + +{{KERNEL}} void store_snapshot(int p, + int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int dx, + int dy, + int dz,{% filter indent(width=29) %}{% block snap_args %}{% endblock snap_args %}{% endfilter %}{ + + // This function stores field values for a snapshot. + // + // Args: + // p: Snapshot number + // xs, xf, ys, yf, xs, xf: Start and finish cell coordinates for snapshot + // dx, dy, dz: Sampling interval in cell coordinates for snapshot + // E, H: Access to field component arrays + // snapEx, snapEy, snapEz, snapHx, snapHy, snapHz: Access to arrays to store snapshots + + // Obtain the linear index corresponding to the current thread + {% block threadidx %}{% endblock threadidx %} + + // Convert the linear index to subscripts for 4D SNAPS array + int i = (idx % ({{NX_SNAPS}} * {{NY_SNAPS}} * {{NZ_SNAPS}})) / ({{NY_SNAPS}} * {{NZ_SNAPS}}); + int j = ((idx % ({{NX_SNAPS}} * {{NY_SNAPS}} * {{NZ_SNAPS}})) % ({{NY_SNAPS}} * {{NZ_SNAPS}})) / {{NZ_SNAPS}}; + int k = ((idx % ({{NX_SNAPS}} * {{NY_SNAPS}} * {{NZ_SNAPS}})) % ({{NY_SNAPS}} * {{NZ_SNAPS}})) % {{NZ_SNAPS}}; + + // Subscripts for field arrays + int ii, jj, kk; + + if (i >= xs && i < xf && j >= ys && j < yf && k >= zs && k < zf) { + + // Increment subscripts for field array to account for spatial sampling of snapshot + ii = (xs + i) * dx; + jj = (ys + j) * dy; + kk = (zs + k) * dz; + + // The electric field component value at a point comes from an average of + // the 4 electric field component values in that cell + snapEx[IDX4D_SNAPS(p,i,j,k)] = (Ex[IDX3D_FIELDS(ii,jj,kk)] + + Ex[IDX3D_FIELDS(ii,jj+1,kk)] + + Ex[IDX3D_FIELDS(ii,jj,kk+1)] + + Ex[IDX3D_FIELDS(ii,jj+1,kk+1)]) / 4; + snapEy[IDX4D_SNAPS(p,i,j,k)] = (Ey[IDX3D_FIELDS(ii,jj,kk)] + + Ey[IDX3D_FIELDS(ii+1,jj,kk)] + + Ey[IDX3D_FIELDS(ii,jj,kk+1)] + + Ey[IDX3D_FIELDS(ii+1,jj,kk+1)]) / 4; + snapEz[IDX4D_SNAPS(p,i,j,k)] = (Ez[IDX3D_FIELDS(ii,jj,kk)] + + Ez[IDX3D_FIELDS(ii+1,jj,kk)] + + Ez[IDX3D_FIELDS(ii,jj+1,kk)] + + Ez[IDX3D_FIELDS(ii+1,jj+1,kk)]) / 4; + + // The magnetic field component value at a point comes from average of + // 2 magnetic field component values in that cell and the following cell + snapHx[IDX4D_SNAPS(p,i,j,k)] = (Hx[IDX3D_FIELDS(ii,jj,kk)] + + Hx[IDX3D_FIELDS(ii+1,jj,kk)]) / 2; + snapHy[IDX4D_SNAPS(p,i,j,k)] = (Hy[IDX3D_FIELDS(ii,jj,kk)] + + Hy[IDX3D_FIELDS(ii,jj+1,kk)]) / 2; + snapHz[IDX4D_SNAPS(p,i,j,k)] = (Hz[IDX3D_FIELDS(ii,jj,kk)] + + Hz[IDX3D_FIELDS(ii,jj,kk+1)]) / 2; + } +} \ No newline at end of file diff --git a/gprMax/cuda_opencl/snapshots_cuda.tmpl b/gprMax/cuda_opencl/snapshots_cuda.tmpl new file mode 100644 index 00000000..02474ab2 --- /dev/null +++ b/gprMax/cuda_opencl/snapshots_cuda.tmpl @@ -0,0 +1,22 @@ +{% extends "snapshots_base.tmpl" %} + + +{% block threadidx %} + int idx = blockIdx.x * blockDim.x + threadIdx.x; +{% endblock threadidx %} + + +{% block snap_args %} +const {{REAL}}* __restrict__ Ex, +const {{REAL}}* __restrict__ Ey, +const {{REAL}}* __restrict__ Ez, +const {{REAL}}* __restrict__ Hx, +const {{REAL}}* __restrict__ Hy, +const {{REAL}}* __restrict__ Hz, +{{REAL}} *snapEx, +{{REAL}} *snapEy, +{{REAL}} *snapEz, +{{REAL}} *snapHx, +{{REAL}} *snapHy, +{{REAL}} *snapHz +{% endblock snap_args %} \ No newline at end of file diff --git a/gprMax/cuda_opencl/snapshots_opencl.tmpl b/gprMax/cuda_opencl/snapshots_opencl.tmpl new file mode 100644 index 00000000..943f9651 --- /dev/null +++ b/gprMax/cuda_opencl/snapshots_opencl.tmpl @@ -0,0 +1,23 @@ +{% extends "snapshots_base.tmpl" %} + + +{% block threadidx %} + int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + + get_global_id(1) * get_global_size(0) + get_global_id(0); +{% endblock threadidx %} + + +{% block snap_args %} +__global const {{REAL}}* restrict Ex, +__global const {{REAL}}* restrict Ey, +__global const {{REAL}}* restrict Ez, +__global const {{REAL}}* restrict Hx, +__global const {{REAL}}* restrict Hy, +__global const {{REAL}}* restrict Hz, +__global {{REAL}} *snapEx, +__global {{REAL}} *snapEy, +__global {{REAL}} *snapEz, +__global {{REAL}} *snapHx, +__global {{REAL}} *snapHy, +__global {{REAL}} *snapHz) +{% endblock snap_args %} \ No newline at end of file diff --git a/gprMax/cuda_opencl/source_updates_base.tmpl b/gprMax/cuda_opencl/source_updates_base.tmpl new file mode 100644 index 00000000..fd8d338c --- /dev/null +++ b/gprMax/cuda_opencl/source_updates_base.tmpl @@ -0,0 +1,217 @@ +// Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom +// Authors: Craig Warren, Antonis Giannopoulos, and John Hartley +// +// This file is part of gprMax. +// +// gprMax is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// gprMax is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with gprMax. If not, see . + + +// Macros for converting subscripts to linear index: +#define IDX2D_MAT(m, n) (m)*({{NY_MATCOEFFS}})+(n) +#define IDX2D_SRCINFO(m, n) (m)*{{NY_SRCINFO}}+(n) +#define IDX2D_SRCWAVES(m, n) (m)*({{NY_SRCWAVES}})+(n) +#define IDX3D_FIELDS(i, j, k) (i)*({{NY_FIELDS}})*({{NZ_FIELDS}})+(j)*({{NZ_FIELDS}})+(k) +#define IDX4D_ID(p, i, j, k) (p)*({{NX_ID}})*({{NY_ID}})*({{NZ_ID}})+(i)*({{NY_ID}})*({{NZ_ID}})+(j)*({{NZ_ID}})+(k) + +// Material coefficients (read-only) stored in constant memory of compute device +{% block constmem %}{% endblock constmem %} + +/////////////////////////////////////////// +// Hertzian dipole electric field update // +/////////////////////////////////////////// + +{{KERNEL}} void update_hertzian_dipole(int NHERTZDIPOLE, + int iteration, + {{REAL}} dx, + {{REAL}} dy, + {{REAL}} dz,{% filter indent(width=37) %}{% block electric_source_args %}{% endblock electric_source_args %}{% endfilter %}{ + + // This function updates electric field values for Hertzian dipole sources. + // + // Args: + // NHERTZDIPOLE: Total number of Hertzian dipoles in the model + // iteration: Iteration number of simulation + // dx, dy, dz: Spatial discretisations + // srcinfo1: Source cell coordinates and polarisation information + // srcinfo2: Other source information, e.g. length, resistance etc... + // srcwaveforms: Source waveform values + // ID, E: Access to ID and field component arrays + + // Obtain the linear index corresponding to the current thread and use for each receiver + {% block threadidx %}{% endblock threadidx %} + + if (src < NHERTZDIPOLE) { + + {{REAL}} dl; + int i, j, k, polarisation; + + i = srcinfo1[IDX2D_SRCINFO(src,0)]; + j = srcinfo1[IDX2D_SRCINFO(src,1)]; + k = srcinfo1[IDX2D_SRCINFO(src,2)]; + polarisation = srcinfo1[IDX2D_SRCINFO(src,3)]; + dl = srcinfo2[src]; + + // 'x' polarised source + if (polarisation == 0) { + int materialEx = ID[IDX4D_ID(0,i,j,k)]; + Ex[IDX3D_FIELDS(i,j,k)] = Ex[IDX3D_FIELDS(i,j,k)] - updatecoeffsE[IDX2D_MAT(materialEx,4)] * + srcwaveforms[IDX2D_SRCWAVES(src,iteration)] * dl * (1 / (dx * dy * dz)); + } + + // 'y' polarised source + else if (polarisation == 1) { + int materialEy = ID[IDX4D_ID(1,i,j,k)]; + Ey[IDX3D_FIELDS(i,j,k)] = Ey[IDX3D_FIELDS(i,j,k)] - updatecoeffsE[IDX2D_MAT(materialEy,4)] * + srcwaveforms[IDX2D_SRCWAVES(src,iteration)] * dl * (1 / (dx * dy * dz)); + } + + // 'z' polarised source + else if (polarisation == 2) { + int materialEz = ID[IDX4D_ID(2,i,j,k)]; + Ez[IDX3D_FIELDS(i,j,k)] = Ez[IDX3D_FIELDS(i,j,k)] - updatecoeffsE[IDX2D_MAT(materialEz,4)] * + srcwaveforms[IDX2D_SRCWAVES(src,iteration)] * dl * (1 / (dx * dy * dz)); + } + } +} + + +/////////////////////////////////////////// +// Magnetic dipole magnetic field update // +/////////////////////////////////////////// + +{{KERNEL}} void update_magnetic_dipole(int NMAGDIPOLE, + int iteration, + {{REAL}} dx, + {{REAL}} dy, + {{REAL}} dz,{% filter indent(width=37) %}{% block magnetic_source_args %}{% endblock magnetic_source_args %}{% endfilter %}{ + + // This function updates magnetic field values for magnetic dipole sources. + // + // Args: + // NMAGDIPOLE: Total number of magnetic dipoles in the model + // iteration: Iteration number of simulation + // dx, dy, dz: Spatial discretisations + // srcinfo1: Source cell coordinates and polarisation information + // srcinfo2: Other source information, e.g. length, resistance etc... + // srcwaveforms: Source waveform values + // ID, H: Access to ID and field component arrays + + // Obtain the linear index corresponding to the current thread and use for each receiver + {{self.threadidx()}} + + if (src < NMAGDIPOLE) { + + int i, j, k, polarisation; + + i = srcinfo1[IDX2D_SRCINFO(src,0)]; + j = srcinfo1[IDX2D_SRCINFO(src,1)]; + k = srcinfo1[IDX2D_SRCINFO(src,2)]; + polarisation = srcinfo1[IDX2D_SRCINFO(src,3)]; + + // 'x' polarised source + if (polarisation == 0) { + int materialHx = ID[IDX4D_ID(3,i,j,k)]; + Hx[IDX3D_FIELDS(i,j,k)] = Hx[IDX3D_FIELDS(i,j,k)] - updatecoeffsH[IDX2D_MAT(materialHx,4)] * + srcwaveforms[IDX2D_SRCWAVES(src,iteration)] * (1 / (dx * dy * dz)); + } + + // 'y' polarised source + else if (polarisation == 1) { + int materialHy = ID[IDX4D_ID(4,i,j,k)]; + Hy[IDX3D_FIELDS(i,j,k)] = Hy[IDX3D_FIELDS(i,j,k)] - updatecoeffsH[IDX2D_MAT(materialHy,4)] * + srcwaveforms[IDX2D_SRCWAVES(src,iteration)] * (1 / (dx * dy * dz)); + } + + // 'z' polarised source + else if (polarisation == 2) { + int materialHz = ID[IDX4D_ID(5,i,j,k)]; + Hz[IDX3D_FIELDS(i,j,k)] = Hz[IDX3D_FIELDS(i,j,k)] - updatecoeffsH[IDX2D_MAT(materialHz,4)] * + srcwaveforms[IDX2D_SRCWAVES(src,iteration)] * (1 / (dx * dy * dz)); + } + } +} + + +////////////////////////////////////////// +// Voltage source electric field update // +////////////////////////////////////////// + +{{KERNEL}} void update_voltage_source(int NVOLTSRC, + int iteration, + {{REAL}} dx, + {{REAL}} dy, + {{REAL}} dz,{% filter indent(width=36) %}{{self.electric_source_args()}}{% endfilter %}{ + + // This function updates electric field values for voltage sources. + // + // Args: + // NVOLTSRC: Total number of voltage sources in the model + // iteration: Iteration number of simulation + // dx, dy, dz: Spatial discretisations + // srcinfo1: Source cell coordinates and polarisation information + // srcinfo2: Other source information, e.g. length, resistance etc... + // srcwaveforms: Source waveform values + // ID, E: Access to ID and field component arrays + + // Obtain the linear index corresponding to the current thread and use for each receiver + {{self.threadidx()}} + + if (src < NVOLTSRC) { + + {{REAL}} resistance; + int i, j, k, polarisation; + + i = srcinfo1[IDX2D_SRCINFO(src,0)]; + j = srcinfo1[IDX2D_SRCINFO(src,1)]; + k = srcinfo1[IDX2D_SRCINFO(src,2)]; + polarisation = srcinfo1[IDX2D_SRCINFO(src,3)]; + resistance = srcinfo2[src]; + + // 'x' polarised source + if (polarisation == 0) { + if (resistance != 0) { + int materialEx = ID[IDX4D_ID(0,i,j,k)]; + Ex[IDX3D_FIELDS(i,j,k)] = Ex[IDX3D_FIELDS(i,j,k)] - updatecoeffsE[IDX2D_MAT(materialEx,4)] * + srcwaveforms[IDX2D_SRCWAVES(src,iteration)] * (1 / (resistance * dy * dz)); + } + else { + Ex[IDX3D_FIELDS(i,j,k)] = -1 * srcwaveforms[IDX2D_SRCWAVES(src,iteration)] / dx; + } + } + + // 'y' polarised source + else if (polarisation == 1) { + if (resistance != 0) { + int materialEy = ID[IDX4D_ID(1,i,j,k)]; + Ey[IDX3D_FIELDS(i,j,k)] = Ey[IDX3D_FIELDS(i,j,k)] - updatecoeffsE[IDX2D_MAT(materialEy,4)] * + srcwaveforms[IDX2D_SRCWAVES(src,iteration)] * (1 / (resistance * dx * dz)); + } + else { + Ey[IDX3D_FIELDS(i,j,k)] = -1 * srcwaveforms[IDX2D_SRCWAVES(src,iteration)] / dy; + } + } + + // 'z' polarised source + else if (polarisation == 2) { + if (resistance != 0) { + int materialEz = ID[IDX4D_ID(2,i,j,k)]; + Ez[IDX3D_FIELDS(i,j,k)] = Ez[IDX3D_FIELDS(i,j,k)] - updatecoeffsE[IDX2D_MAT(materialEz,4)] * + srcwaveforms[IDX2D_SRCWAVES(src,iteration)] * (1 / (resistance * dx * dy)); + } + else { + Ez[IDX3D_FIELDS(i,j,k)] = -1 * srcwaveforms[IDX2D_SRCWAVES(src,iteration)] / dz; + } + } + } +} diff --git a/gprMax/cuda_opencl/source_updates_cuda.tmpl b/gprMax/cuda_opencl/source_updates_cuda.tmpl new file mode 100644 index 00000000..ae1ad77d --- /dev/null +++ b/gprMax/cuda_opencl/source_updates_cuda.tmpl @@ -0,0 +1,34 @@ +{% extends "source_updates_base.tmpl" %} + + +{% block constmem %} +__device__ __constant__ {{REAL}} updatecoeffsE[{{N_updatecoeffsE}}]; +__device__ __constant__ {{REAL}} updatecoeffsH[{{N_updatecoeffsH}}]; +{% endblock constmem %} + + +{% block threadidx %} + int src = blockIdx.x * blockDim.x + threadIdx.x; +{% endblock threadidx %} + + +{% block electric_source_args %} +const int* __restrict__ srcinfo1, +const {{REAL}}* __restrict__ srcinfo2, +const {{REAL}}* __restrict__ srcwaveforms, +const unsigned int* __restrict__ ID, +{{REAL}} *Ex, +{{REAL}} *Ey, +{{REAL}} *Ez) +{% endblock electric_source_args %} + + +{% block magnetic_source_args %} +const int* __restrict__ srcinfo1, +const {{REAL}}* __restrict__ srcinfo2, +const {{REAL}}* __restrict__ srcwaveforms, +const unsigned int* __restrict__ ID, +{{REAL}} *Hx, +{{REAL}} *Hy, +{{REAL}} *Hz) +{% endblock magnetic_source_args %} \ No newline at end of file diff --git a/gprMax/cuda_opencl/source_updates_opencl.tmpl b/gprMax/cuda_opencl/source_updates_opencl.tmpl new file mode 100644 index 00000000..b8da74d6 --- /dev/null +++ b/gprMax/cuda_opencl/source_updates_opencl.tmpl @@ -0,0 +1,46 @@ +{% extends "source_updates_base.tmpl" %} + + +{% block constmem %} +__constant {{REAL}} updatecoeffsE[{{N_updatecoeffsE}}] = +{ + {% for i in updatecoeffsE %} + {{i}}, + {% endfor %} +}; + +__constant {{REAL}} updatecoeffsH[{{N_updatecoeffsH}}] = +{ + {% for i in updatecoeffsH %} + {{i}}, + {% endfor %} +}; +{% endblock constmem %} + + +{% block threadidx %} + int src = get_global_id(2) * get_global_size(0) * get_global_size(1) + + get_global_id(1) * get_global_size(0) + get_global_id(0); +{% endblock threadidx %} + + +{% block electric_source_args %} +__global const int* restrict srcinfo1, +__global const {{REAL}}* restrict srcinfo2, +__global const {{REAL}}* restrict srcwaveforms, +__global const unsigned int* restrict ID, +__global {{REAL}} *Ex, +__global {{REAL}} *Ey, +__global {{REAL}} *Ez) +{% endblock electric_source_args %} + + +{% block magnetic_source_args %} +__global const int* restrict srcinfo1, +__global const {{REAL}}* restrict srcinfo2, +__global const {{REAL}}* restrict srcwaveforms, +__global const unsigned int* restrict ID, +__global {{REAL}} *Hx, +__global {{REAL}} *Hy, +__global {{REAL}} *Hz) +{% endblock magnetic_source_args %} \ No newline at end of file diff --git a/gprMax/cuda_opencl/store_outputs_base.tmpl b/gprMax/cuda_opencl/store_outputs_base.tmpl new file mode 100644 index 00000000..37eb24ea --- /dev/null +++ b/gprMax/cuda_opencl/store_outputs_base.tmpl @@ -0,0 +1,50 @@ +// Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom +// Authors: Craig Warren, Antonis Giannopoulos, and John Hartley +// +// This file is part of gprMax. +// +// gprMax is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// gprMax is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with gprMax. If not, see . + + +#define IDX2D_RXCOORDS(m,n) (m)*({{NY_RXCOORDS}}) + (n) +#define IDX3D_RXS(i,j,k) (i)*({{NY_RXS}})*({{NZ_RXS}}) + (j)*({{NZ_RXS}}) + (k) +#define IDX3D_FIELDS(i,j,k) (i)*({{NY_FIELDS}})*({{NZ_FIELDS}}) + (j)*({{NZ_FIELDS}}) + (k) + +{{KERNEL}} void store_outputs(int NRX, + int iteration,{% filter indent(width=28) %}{% block rx_args %}{% endblock rx_args %}{% endfilter %}{ + + // This function stores field component values for every receiver in the model. + // + // Args: + // NRX: total number of receivers in the model. + // rxs: array to store field components for receivers - rows + // are field components; columns are iterations; pages are receiver + + // Obtain linear index corresponding to the current work item + {% block threadidx %}{% endblock threadidx %} + + int i,j,k; + + if (rx < NRX) { + i = rxcoords[IDX2D_RXCOORDS(rx,0)]; + j = rxcoords[IDX2D_RXCOORDS(rx,1)]; + k = rxcoords[IDX2D_RXCOORDS(rx,2)]; + rxs[IDX3D_RXS(0,iteration,rx)] = Ex[IDX3D_FIELDS(i,j,k)]; + rxs[IDX3D_RXS(1,iteration,rx)] = Ey[IDX3D_FIELDS(i,j,k)]; + rxs[IDX3D_RXS(2,iteration,rx)] = Ez[IDX3D_FIELDS(i,j,k)]; + rxs[IDX3D_RXS(3,iteration,rx)] = Hx[IDX3D_FIELDS(i,j,k)]; + rxs[IDX3D_RXS(4,iteration,rx)] = Hy[IDX3D_FIELDS(i,j,k)]; + rxs[IDX3D_RXS(5,iteration,rx)] = Hz[IDX3D_FIELDS(i,j,k)]; + } +} \ No newline at end of file diff --git a/gprMax/cuda_opencl/store_outputs_cuda.tmpl b/gprMax/cuda_opencl/store_outputs_cuda.tmpl new file mode 100644 index 00000000..f5d535e1 --- /dev/null +++ b/gprMax/cuda_opencl/store_outputs_cuda.tmpl @@ -0,0 +1,18 @@ +{% extends "store_outputs_base.tmpl" %} + + +{% block threadidx %} + int rx = blockIdx.x * blockDim.x + threadIdx.x; +{% endblock threadidx %} + + +{% block rx_args %} +const int* __restrict__ rxcoords, +{{REAL}} *rxs, +const {{REAL}}* __restrict__ Ex, +const {{REAL}}* __restrict__ Ey, +const {{REAL}}* __restrict__ Ez, +const {{REAL}}* __restrict__ Hx, +const {{REAL}}* __restrict__ Hy, +const {{REAL}}* __restrict__ Hz) +{% endblock rx_args %} \ No newline at end of file diff --git a/gprMax/cuda_opencl/store_outputs_opencl.tmpl b/gprMax/cuda_opencl/store_outputs_opencl.tmpl new file mode 100644 index 00000000..5d958274 --- /dev/null +++ b/gprMax/cuda_opencl/store_outputs_opencl.tmpl @@ -0,0 +1,19 @@ +{% extends "store_outputs_base.tmpl" %} + + +{% block threadidx %} + int rx = get_global_id(2) * get_global_size(0) * get_global_size(1) + + get_global_id(1) * get_global_size(0) + get_global_id(0); +{% endblock threadidx %} + + +{% block rx_args %} +__global const int* restrict rxcoords, +__global {{REAL}} *rxs, +__global const {{REAL}}* restrict Ex, +__global const {{REAL}}* restrict Ey, +__global const {{REAL}}* restrict Ez, +__global const {{REAL}}* restrict Hx, +__global const {{REAL}}* restrict Hy, +__global const {{REAL}}* restrict Hz) +{% endblock rx_args %} \ No newline at end of file diff --git a/gprMax/cuda_opencl_el/knl_common_base.tmpl b/gprMax/cuda_opencl_el/knl_common_base.tmpl new file mode 100644 index 00000000..d49a3e16 --- /dev/null +++ b/gprMax/cuda_opencl_el/knl_common_base.tmpl @@ -0,0 +1,22 @@ +{% block complex_header %}{% endblock complex_header %} + +// Macros for converting subscripts to linear index +#define IDX2D_MAT(m, n) (m)*({{NY_MATCOEFFS}})+(n) +#define IDX2D_MATDISP(m, n) (m)*({{NY_MATDISPCOEFFS}})+(n) +#define IDX2D_R(m, n) (m)*(NY_R)+(n) +#define IDX2D_RXCOORDS(m,n) (m)*({{NY_RXCOORDS}}) + (n) +#define IDX2D_SRCINFO(m, n) (m)*{{NY_SRCINFO}}+(n) +#define IDX2D_SRCWAVES(m, n) (m)*({{NY_SRCWAVES}})+(n) + +#define IDX3D_FIELDS(i, j, k) (i)*({{NY_FIELDS}})*({{NZ_FIELDS}})+(j)*({{NZ_FIELDS}})+(k) +#define IDX3D_RXS(i,j,k) (i)*({{NY_RXS}})*({{NZ_RXS}}) + (j)*({{NZ_RXS}}) + (k) + +#define IDX4D_ID(p, i, j, k) (p)*({{NX_ID}})*({{NY_ID}})*({{NZ_ID}})+(i)*({{NY_ID}})*({{NZ_ID}})+(j)*({{NZ_ID}})+(k) +#define IDX4D_SNAPS(p, i, j, k) (p)*({{NX_SNAPS}})*({{NY_SNAPS}})*({{NZ_SNAPS}})+(i)*({{NY_SNAPS}})*({{NZ_SNAPS}})+(j)*({{NZ_SNAPS}})+(k) +#define IDX4D_T(p, i, j, k) (p)*({{NX_T}})*({{NY_T}})*({{NZ_T}})+(i)*({{NY_T}})*({{NZ_T}})+(j)*({{NZ_T}})+(k) +#define IDX4D_PHI1(p, i, j, k) (p)*(NX_PHI1)*(NY_PHI1)*(NZ_PHI1)+(i)*(NY_PHI1)*(NZ_PHI1)+(j)*(NZ_PHI1)+(k) +#define IDX4D_PHI2(p, i, j, k) (p)*(NX_PHI2)*(NY_PHI2)*(NZ_PHI2)+(i)*(NY_PHI2)*(NZ_PHI2)+(j)*(NZ_PHI2)+(k) + + +// Material coefficients (read-only) stored in constant memory of compute device +{% block constmem %}{% endblock constmem %} diff --git a/gprMax/cuda_opencl_el/knl_common_cuda.tmpl b/gprMax/cuda_opencl_el/knl_common_cuda.tmpl new file mode 100644 index 00000000..7fdc89a0 --- /dev/null +++ b/gprMax/cuda_opencl_el/knl_common_cuda.tmpl @@ -0,0 +1,11 @@ +{% extends "knl_common_base.tmpl" %} + +{% block complex_header %} +#include +{% endblock complex_header %} + + +{% block constmem %} +__device__ __constant__ {{REAL}} updatecoeffsE[{{N_updatecoeffsE}}]; +__device__ __constant__ {{REAL}} updatecoeffsH[{{N_updatecoeffsH}}]; +{% endblock constmem %} \ No newline at end of file diff --git a/gprMax/cuda_opencl_el/knl_common_opencl.tmpl b/gprMax/cuda_opencl_el/knl_common_opencl.tmpl new file mode 100644 index 00000000..5c16bfa8 --- /dev/null +++ b/gprMax/cuda_opencl_el/knl_common_opencl.tmpl @@ -0,0 +1,22 @@ +{% extends "knl_common_base.tmpl" %} + +{% block complex_header %} +#include +{% endblock complex_header %} + + +{% block constmem %} +__constant {{REAL}} updatecoeffsE[{{N_updatecoeffsE}}] = +{ + {% for i in updatecoeffsE %} + {{i}}, + {% endfor %} +}; + +__constant {{REAL}} updatecoeffsH[{{N_updatecoeffsH}}] = +{ + {% for i in updatecoeffsH %} + {{i}}, + {% endfor %} +}; +{% endblock constmem %} \ No newline at end of file diff --git a/gprMax/cuda_opencl_el/knl_fields_updates.py b/gprMax/cuda_opencl_el/knl_fields_updates.py new file mode 100644 index 00000000..644a6eac --- /dev/null +++ b/gprMax/cuda_opencl_el/knl_fields_updates.py @@ -0,0 +1,233 @@ +# Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom +# Authors: Craig Warren, Antonis Giannopoulos, and John Hartley +# +# This file is part of gprMax. +# +# gprMax is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# gprMax is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with gprMax. If not, see . + +from string import Template + +update_electric = Template(""" + // Electric field updates - normal materials. + // + // Args: + // NX, NY, NZ: Number of cells of the model domain. + // ID, E, H: Access to ID and field component arrays. + + // Convert the linear index to subscripts for 3D field arrays + int x = i / ($NY_FIELDS * $NZ_FIELDS); + int y = (i % ($NY_FIELDS * $NZ_FIELDS)) / $NZ_FIELDS; + int z = (i % ($NY_FIELDS * $NZ_FIELDS)) % $NZ_FIELDS; + + // Convert the linear index to subscripts for 4D material ID array + int x_ID = (i % ($NX_ID * $NY_ID * $NZ_ID)) / ($NY_ID * $NZ_ID); + int y_ID = ((i % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) / $NZ_ID; + int z_ID = ((i % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) % $NZ_ID; + + // Ex component + if ((NY != 1 || NZ != 1) && x >= 0 && x < NX && y > 0 && y < NY && z > 0 && z < NZ) { + int materialEx = ID[IDX4D_ID(0,x_ID,y_ID,z_ID)]; + Ex[IDX3D_FIELDS(x,y,z)] = updatecoeffsE[IDX2D_MAT(materialEx,0)] * Ex[IDX3D_FIELDS(x,y,z)] + + updatecoeffsE[IDX2D_MAT(materialEx,2)] * (Hz[IDX3D_FIELDS(x,y,z)] - Hz[IDX3D_FIELDS(x,y-1,z)]) - + updatecoeffsE[IDX2D_MAT(materialEx,3)] * (Hy[IDX3D_FIELDS(x,y,z)] - Hy[IDX3D_FIELDS(x,y,z-1)]); + } + + // Ey component + if ((NX != 1 || NZ != 1) && x > 0 && x < NX && y >= 0 && y < NY && z > 0 && z < NZ) { + int materialEy = ID[IDX4D_ID(1,x_ID,y_ID,z_ID)]; + Ey[IDX3D_FIELDS(x,y,z)] = updatecoeffsE[IDX2D_MAT(materialEy,0)] * Ey[IDX3D_FIELDS(x,y,z)] + + updatecoeffsE[IDX2D_MAT(materialEy,3)] * (Hx[IDX3D_FIELDS(x,y,z)] - Hx[IDX3D_FIELDS(x,y,z-1)]) - + updatecoeffsE[IDX2D_MAT(materialEy,1)] * (Hz[IDX3D_FIELDS(x,y,z)] - Hz[IDX3D_FIELDS(x-1,y,z)]); + } + + // Ez component + if ((NX != 1 || NY != 1) && x > 0 && x < NX && y > 0 && y < NY && z >= 0 && z < NZ) { + int materialEz = ID[IDX4D_ID(2,x_ID,y_ID,z_ID)]; + Ez[IDX3D_FIELDS(x,y,z)] = updatecoeffsE[IDX2D_MAT(materialEz,0)] * Ez[IDX3D_FIELDS(x,y,z)] + + updatecoeffsE[IDX2D_MAT(materialEz,1)] * (Hy[IDX3D_FIELDS(x,y,z)] - Hy[IDX3D_FIELDS(x-1,y,z)]) - + updatecoeffsE[IDX2D_MAT(materialEz,2)] * (Hx[IDX3D_FIELDS(x,y,z)] - Hx[IDX3D_FIELDS(x,y-1,z)]); + } +""") + +update_magnetic = Template(""" + // Magnetic field updates - normal materials. + // + // Args: + // NX, NY, NZ: Number of cells of the model domain. + // ID, E, H: Access to ID and field component arrays. + + // Convert the linear index to subscripts for 3D field arrays + int x = i / ($NY_FIELDS * $NZ_FIELDS); + int y = (i % ($NY_FIELDS * $NZ_FIELDS)) / $NZ_FIELDS; + int z = (i % ($NY_FIELDS * $NZ_FIELDS)) % $NZ_FIELDS; + + // Convert the linear index to subscripts for 4D material ID array + int x_ID = (i % ($NX_ID * $NY_ID * $NZ_ID)) / ($NY_ID * $NZ_ID); + int y_ID = ((i % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) / $NZ_ID; + int z_ID = ((i % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) % $NZ_ID; + + // Hx component + if (NX != 1 && x > 0 && x < NX && y >= 0 && y < NY && z >= 0 && z < NZ) { + int materialHx = ID[IDX4D_ID(3,x_ID,y_ID,z_ID)]; + Hx[IDX3D_FIELDS(x,y,z)] = updatecoeffsH[IDX2D_MAT(materialHx,0)] * Hx[IDX3D_FIELDS(x,y,z)] - + updatecoeffsH[IDX2D_MAT(materialHx,2)] * (Ez[IDX3D_FIELDS(x,y+1,z)] - Ez[IDX3D_FIELDS(x,y,z)]) + + updatecoeffsH[IDX2D_MAT(materialHx,3)] * (Ey[IDX3D_FIELDS(x,y,z+1)] - Ey[IDX3D_FIELDS(x,y,z)]); + } + + // Hy component + if (NY != 1 && x >= 0 && x < NX && y > 0 && y < NY && z >= 0 && z < NZ) { + int materialHy = ID[IDX4D_ID(4,x_ID,y_ID,z_ID)]; + Hy[IDX3D_FIELDS(x,y,z)] = updatecoeffsH[IDX2D_MAT(materialHy,0)] * Hy[IDX3D_FIELDS(x,y,z)] - + updatecoeffsH[IDX2D_MAT(materialHy,3)] * (Ex[IDX3D_FIELDS(x,y,z+1)] - Ex[IDX3D_FIELDS(x,y,z)]) + + updatecoeffsH[IDX2D_MAT(materialHy,1)] * (Ez[IDX3D_FIELDS(x+1,y,z)] - Ez[IDX3D_FIELDS(x,y,z)]); + } + + // Hz component + if (NZ != 1 && x >= 0 && x < NX && y >= 0 && y < NY && z > 0 && z < NZ) { + int materialHz = ID[IDX4D_ID(5,x_ID,y_ID,z_ID)]; + Hz[IDX3D_FIELDS(x,y,z)] = updatecoeffsH[IDX2D_MAT(materialHz,0)] * Hz[IDX3D_FIELDS(x,y,z)] - + updatecoeffsH[IDX2D_MAT(materialHz,1)] * (Ey[IDX3D_FIELDS(x+1,y,z)] - Ey[IDX3D_FIELDS(x,y,z)]) + + updatecoeffsH[IDX2D_MAT(materialHz,2)] * (Ex[IDX3D_FIELDS(x,y+1,z)] - Ex[IDX3D_FIELDS(x,y,z)]); + } +""") + +update_electric_dispersive_A = Template(""" + // Electric field updates - dispersive materials - part A of updates to electric + // field values when dispersive materials + // (with multiple poles) are present. + // + // Args: + // NX, NY, NZ: Number of cells of the model domain. + // MAXPOLES: Maximum number of dispersive material poles present in model. + // updatedispersivecoeffs, T, ID, E, H: Access to update coefficients, + // dispersive, ID and field + // component arrays. + + + // Convert the linear index to subscripts for 3D field arrays + int x = i / ($NY_FIELDS * $NZ_FIELDS); + int y = (i % ($NY_FIELDS * $NZ_FIELDS)) / $NZ_FIELDS; + int z = (i % ($NY_FIELDS * $NZ_FIELDS)) % $NZ_FIELDS; + + // Convert the linear index to subscripts for 4D material ID array + int x_ID = (i % ($NX_ID * $NY_ID * $NZ_ID)) / ($NY_ID * $NZ_ID); + int y_ID = ((i % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) / $NZ_ID; + int z_ID = ((i % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) % $NZ_ID; + + // Convert the linear index to subscripts for 4D dispersive array + int x_T = (i % ($NX_T * $NY_T * $NZ_T)) / ($NY_T * $NZ_T); + int y_T = ((i % ($NX_T * $NY_T * $NZ_T)) % ($NY_T * $NZ_T)) / $NZ_T; + int z_T = ((i % ($NX_T * $NY_T * $NZ_T)) % ($NY_T * $NZ_T)) % $NZ_T; + + // Ex component + if ((NY != 1 || NZ != 1) && x >= 0 && x < NX && y > 0 && y < NY && z > 0 && z < NZ) { + int materialEx = ID[IDX4D_ID(0,x_ID,y_ID,z_ID)]; + $REAL phi = 0; + for (int pole = 0; pole < MAXPOLES; pole++) { + phi = phi + updatecoeffsdispersive[IDX2D_MATDISP(materialEx,pole*3)]$REALFUNC * Tx[IDX4D_T(pole,x_T,y_T,z_T)]$REALFUNC; + Tx[IDX4D_T(pole,x_T,y_T,z_T)] = updatecoeffsdispersive[IDX2D_MATDISP(materialEx,1+(pole*3))] * Tx[IDX4D_T(pole,x_T,y_T,z_T)] + + updatecoeffsdispersive[IDX2D_MATDISP(materialEx,2+(pole*3))] * Ex[IDX3D_FIELDS(x,y,z)]; + } + Ex[IDX3D_FIELDS(x,y,z)] = updatecoeffsE[IDX2D_MAT(materialEx,0)] * Ex[IDX3D_FIELDS(x,y,z)] + + updatecoeffsE[IDX2D_MAT(materialEx,2)] * (Hz[IDX3D_FIELDS(x,y,z)] - Hz[IDX3D_FIELDS(x,y-1,z)]) - + updatecoeffsE[IDX2D_MAT(materialEx,3)] * (Hy[IDX3D_FIELDS(x,y,z)] - Hy[IDX3D_FIELDS(x,y,z-1)]) - + updatecoeffsE[IDX2D_MAT(materialEx,4)] * phi; + } + + // Ey component + if ((NX != 1 || NZ != 1) && x > 0 && x < NX && y >= 0 && y < NY && z > 0 && z < NZ) { + int materialEy = ID[IDX4D_ID(1,x_ID,y_ID,z_ID)]; + $REAL phi = 0; + for (int pole = 0; pole < MAXPOLES; pole++) { + phi = phi + updatecoeffsdispersive[IDX2D_MATDISP(materialEy,pole*3)]$REALFUNC * Ty[IDX4D_T(pole,x_T,y_T,z_T)]$REALFUNC; + Ty[IDX4D_T(pole,x_T,y_T,z_T)] = updatecoeffsdispersive[IDX2D_MATDISP(materialEy,1+(pole*3))] * Ty[IDX4D_T(pole,x_T,y_T,z_T)] + + updatecoeffsdispersive[IDX2D_MATDISP(materialEy,2+(pole*3))] * Ey[IDX3D_FIELDS(x,y,z)]; + } + Ey[IDX3D_FIELDS(x,y,z)] = updatecoeffsE[IDX2D_MAT(materialEy,0)] * Ey[IDX3D_FIELDS(x,y,z)] + + updatecoeffsE[IDX2D_MAT(materialEy,3)] * (Hx[IDX3D_FIELDS(x,y,z)] - Hx[IDX3D_FIELDS(x,y,z-1)]) - + updatecoeffsE[IDX2D_MAT(materialEy,1)] * (Hz[IDX3D_FIELDS(x,y,z)] - Hz[IDX3D_FIELDS(x-1,y,z)]) - + updatecoeffsE[IDX2D_MAT(materialEy,4)] * phi; + } + + // Ez component + if ((NX != 1 || NY != 1) && x > 0 && x < NX && y > 0 && y < NY && z >= 0 && z < NZ) { + int materialEz = ID[IDX4D_ID(2,x_ID,y_ID,z_ID)]; + $REAL phi = 0; + for (int pole = 0; pole < MAXPOLES; pole++) { + phi = phi + updatecoeffsdispersive[IDX2D_MATDISP(materialEz,pole*3)]$REALFUNC * Tz[IDX4D_T(pole,x_T,y_T,z_T)]$REALFUNC; + Tz[IDX4D_T(pole,x_T,y_T,z_T)] = updatecoeffsdispersive[IDX2D_MATDISP(materialEz,1+(pole*3))] * Tz[IDX4D_T(pole,x_T,y_T,z_T)] + + updatecoeffsdispersive[IDX2D_MATDISP(materialEz,2+(pole*3))] * Ez[IDX3D_FIELDS(x,y,z)]; + } + Ez[IDX3D_FIELDS(x,y,z)] = updatecoeffsE[IDX2D_MAT(materialEz,0)] * Ez[IDX3D_FIELDS(x,y,z)] + + updatecoeffsE[IDX2D_MAT(materialEz,1)] * (Hy[IDX3D_FIELDS(x,y,z)] - Hy[IDX3D_FIELDS(x-1,y,z)]) - + updatecoeffsE[IDX2D_MAT(materialEz,2)] * (Hx[IDX3D_FIELDS(x,y,z)] - Hx[IDX3D_FIELDS(x,y-1,z)]) - + updatecoeffsE[IDX2D_MAT(materialEz,4)] * phi; + } +""") + +update_electric_dispersive_B = Template(""" + // Electric field updates - dispersive materials - part B of updates to electric + // field values when dispersive materials + // (with multiple poles) are present. + // + // Args: + // NX, NY, NZ: Number of cells of the model domain. + // MAXPOLES: Maximum number of dispersive material poles present in model. + // updatedispersivecoeffs, T, ID, E, H: Access to update coefficients, + // dispersive, ID and field + // component arrays. + + + // Convert the linear index to subscripts for 3D field arrays + int x = i / ($NY_FIELDS * $NZ_FIELDS); + int y = (i % ($NY_FIELDS * $NZ_FIELDS)) / $NZ_FIELDS; + int z = (i % ($NY_FIELDS * $NZ_FIELDS)) % $NZ_FIELDS; + + // Convert the linear index to subscripts for 4D material ID array + int x_ID = (i % ($NX_ID * $NY_ID * $NZ_ID)) / ($NY_ID * $NZ_ID); + int y_ID = ((i % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) / $NZ_ID; + int z_ID = ((i % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) % $NZ_ID; + + // Convert the linear index to subscripts for 4D dispersive array + int x_T = (i % ($NX_T * $NY_T * $NZ_T)) / ($NY_T * $NZ_T); + int y_T = ((i % ($NX_T * $NY_T * $NZ_T)) % ($NY_T * $NZ_T)) / $NZ_T; + int z_T = ((i % ($NX_T * $NY_T * $NZ_T)) % ($NY_T * $NZ_T)) % $NZ_T; + + // Ex component + if ((NY != 1 || NZ != 1) && x >= 0 && x < NX && y > 0 && y < NY && z > 0 && z < NZ) { + int materialEx = ID[IDX4D_ID(0,x_ID,y_ID,z_ID)]; + for (int pole = 0; pole < MAXPOLES; pole++) { + Tx[IDX4D_T(pole,x_T,y_T,z_T)] = Tx[IDX4D_T(pole,x_T,y_T,z_T)] - + updatecoeffsdispersive[IDX2D_MATDISP(materialEx,2+(pole*3))] * Ex[IDX3D_FIELDS(x,y,z)]; + } + } + + // Ey component + if ((NX != 1 || NZ != 1) && x > 0 && x < NX && y >= 0 && y < NY && z > 0 && z < NZ) { + int materialEy = ID[IDX4D_ID(1,x_ID,y_ID,z_ID)]; + for (int pole = 0; pole < MAXPOLES; pole++) { + Ty[IDX4D_T(pole,x_T,y_T,z_T)] = Ty[IDX4D_T(pole,x_T,y_T,z_T)] - + updatecoeffsdispersive[IDX2D_MATDISP(materialEy,2+(pole*3))] * Ey[IDX3D_FIELDS(x,y,z)]; + } + } + + // Ez component + if ((NX != 1 || NY != 1) && x > 0 && x < NX && y > 0 && y < NY && z >= 0 && z < NZ) { + int materialEz = ID[IDX4D_ID(2,x_ID,y_ID,z_ID)]; + for (int pole = 0; pole < MAXPOLES; pole++) { + Tz[IDX4D_T(pole,x_T,y_T,z_T)] = Tz[IDX4D_T(pole,x_T,y_T,z_T)] - + updatecoeffsdispersive[IDX2D_MATDISP(materialEz,2+(pole*3))] * Ez[IDX3D_FIELDS(x,y,z)]; + } + } +""") \ No newline at end of file diff --git a/gprMax/cuda_opencl_el/knl_pml_updates_electric_HORIPML.py b/gprMax/cuda_opencl_el/knl_pml_updates_electric_HORIPML.py new file mode 100644 index 00000000..3a6f50c7 --- /dev/null +++ b/gprMax/cuda_opencl_el/knl_pml_updates_electric_HORIPML.py @@ -0,0 +1,1051 @@ +# Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom +# Authors: Craig Warren, Antonis Giannopoulos, and John Hartley +# +# This file is part of gprMax. +# +# gprMax is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# gprMax is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with gprMax. If not, see . + +from string import Template + +x_args = Template("int xs, " + "int xf, " + "int ys, " + "int yf, " + "int zs, " + "int zf, " + "int NX_PHI1, " + "int NY_PHI1, " + "int NZ_PHI1, " + "int NX_PHI2, " + "int NY_PHI2, " + "int NZ_PHI2, " + "int NY_R, " + "__global const unsigned int* restrict ID, " + "__global const $REAL* restrict Ex, " + "__global $REAL *Ey, " + "__global $REAL *Ez, " + "__global const $REAL* restrict Hx, " + "__global const $REAL* restrict Hy, " + "__global const $REAL* restrict Hz, " + "__global $REAL *PHI1, " + "__global $REAL *PHI2, " + "__global const $REAL* restrict RA, " + "__global const $REAL* restrict RB, " + "__global const $REAL* restrict RE, " + "__global const $REAL* restrict RF, " + "$REAL d") + +y_args = Template("int xs, " + "int xf, " + "int ys, " + "int yf, " + "int zs, " + "int zf, " + "int NX_PHI1, " + "int NY_PHI1, " + "int NZ_PHI1, " + "int NX_PHI2, " + "int NY_PHI2, " + "int NZ_PHI2, " + "int NY_R, " + "__global const unsigned int* restrict ID, " + "__global $REAL *Ex, " + "__global const $REAL* restrict Ey, " + "__global $REAL *Ez, " + "__global const $REAL* restrict Hx, " + "__global const $REAL* restrict Hy, " + "__global const $REAL* restrict Hz, " + "__global $REAL *PHI1, " + "__global $REAL *PHI2, " + "__global const $REAL* restrict RA, " + "__global const $REAL* restrict RB, " + "__global const $REAL* restrict RE, " + "__global const $REAL* restrict RF, " + "$REAL d") + +z_args = Template("int xs, " + "int xf, " + "int ys, " + "int yf, " + "int zs, " + "int zf, " + "int NX_PHI1, " + "int NY_PHI1, " + "int NZ_PHI1, " + "int NX_PHI2, " + "int NY_PHI2, " + "int NZ_PHI2, " + "int NY_R, " + "__global const unsigned int* restrict ID, " + "__global $REAL *Ex, " + "__global $REAL *Ey, " + "__global const $REAL* restrict Ez, " + "__global const $REAL* restrict Hx, " + "__global const $REAL* restrict Hy, " + "__global const $REAL* restrict Hz, " + "__global $REAL *PHI1, " + "__global $REAL *PHI2, " + "__global const $REAL* restrict RA, " + "__global const $REAL* restrict RB, " + "__global const $REAL* restrict RE, " + "__global const $REAL* restrict RF, " + "$REAL d") + +order1_xminus = {'args': x_args, + 'func': Template(""" + // This function updates the Ey and Ez field components for the xminus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = i / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL RA01, RB0, RE0, RF0, dHy, dHz; + $REAL dx = d; + int ii, jj, kk, materialEy, materialEz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = xf - i1; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA01 = RA[IDX2D_R(0,i1)] - 1; + RB0 = RB[IDX2D_R(0,i1)]; + RE0 = RE[IDX2D_R(0,i1)]; + RF0 = RF[IDX2D_R(0,i1)]; + + // Ey + materialEy = ID[IDX4D_ID(1,ii,jj,kk)]; + dHz = (Hz[IDX3D_FIELDS(ii,jj,kk)] - Hz[IDX3D_FIELDS(ii-1,jj,kk)]) / dx; + Ey[IDX3D_FIELDS(ii,jj,kk)] = Ey[IDX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[IDX2D_MAT(materialEy,4)] * + (RA01 * dHz + RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)]); + PHI1[IDX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = xf - i2; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA01 = RA[IDX2D_R(0,i2)] - 1; + RB0 = RB[IDX2D_R(0,i2)]; + RE0 = RE[IDX2D_R(0,i2)]; + RF0 = RF[IDX2D_R(0,i2)]; + + // Ez + materialEz = ID[IDX4D_ID(2,ii,jj,kk)]; + dHy = (Hy[IDX3D_FIELDS(ii,jj,kk)] - Hy[IDX3D_FIELDS(ii-1,jj,kk)]) / dx; + Ez[IDX3D_FIELDS(ii,jj,kk)] = Ez[IDX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[IDX2D_MAT(materialEz,4)] * + (RA01 * dHy + RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)]); + PHI2[IDX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] - RF0 * dHy; + } +""")} + +order2_xminus = {'args': x_args, + 'func': Template(""" + // This function updates the Ey and Ez field components for the xminus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = i / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dHy, dHz; + $REAL dx = d; + int ii, jj, kk, materialEy, materialEz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = xf - i1; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA0 = RA[IDX2D_R(0,i1)]; + RB0 = RB[IDX2D_R(0,i1)]; + RE0 = RE[IDX2D_R(0,i1)]; + RF0 = RF[IDX2D_R(0,i1)]; + RA1 = RA[IDX2D_R(1,i1)]; + RB1 = RB[IDX2D_R(1,i1)]; + RE1 = RE[IDX2D_R(1,i1)]; + RF1 = RF[IDX2D_R(1,i1)]; + RA01 = RA[IDX2D_R(0,i1)] * RA[IDX2D_R(1,i1)] - 1; + + // Ey + materialEy = ID[IDX4D_ID(1,ii,jj,kk)]; + dHz = (Hz[IDX3D_FIELDS(ii,jj,kk)] - Hz[IDX3D_FIELDS(ii-1,jj,kk)]) / dx; + Ey[IDX3D_FIELDS(ii,jj,kk)] = Ey[IDX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[IDX2D_MAT(materialEy,4)] * + (RA01 * dHz + RA1 * RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] + + RB1 * PHI1[IDX4D_PHI1(1,i1,j1,k1)]); + PHI1[IDX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[IDX4D_PHI1(1,i1,j1,k1)] - RF1 * + (RA0 * dHz + RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)]); + PHI1[IDX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = xf - i2; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA0 = RA[IDX2D_R(0,i2)]; + RB0 = RB[IDX2D_R(0,i2)]; + RE0 = RE[IDX2D_R(0,i2)]; + RF0 = RF[IDX2D_R(0,i2)]; + RA1 = RA[IDX2D_R(1,i2)]; + RB1 = RB[IDX2D_R(1,i2)]; + RE1 = RE[IDX2D_R(1,i2)]; + RF1 = RF[IDX2D_R(1,i2)]; + RA01 = RA[IDX2D_R(0,i2)] * RA[IDX2D_R(1,i2)] - 1; + + // Ez + materialEz = ID[IDX4D_ID(2,ii,jj,kk)]; + dHy = (Hy[IDX3D_FIELDS(ii,jj,kk)] - Hy[IDX3D_FIELDS(ii-1,jj,kk)]) / dx; + Ez[IDX3D_FIELDS(ii,jj,kk)] = Ez[IDX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[IDX2D_MAT(materialEz,4)] * + (RA01 * dHy + RA1 * RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] + + RB1 * PHI2[IDX4D_PHI2(1,i2,j2,k2)]); + PHI2[IDX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[IDX4D_PHI2(1,i2,j2,k2)] - RF1 * + (RA0 * dHy + RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)]); + PHI2[IDX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] - RF0 * dHy; + } +""")} + +order1_xplus = {'args': x_args, + 'func': Template(""" + // This function updates the Ey and Ez field components for the xplus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = i / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL RA01, RB0, RE0, RF0, dHy, dHz; + $REAL dx = d; + int ii, jj, kk, materialEy, materialEz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA01 = RA[IDX2D_R(0,i1)] - 1; + RB0 = RB[IDX2D_R(0,i1)]; + RE0 = RE[IDX2D_R(0,i1)]; + RF0 = RF[IDX2D_R(0,i1)]; + + // Ey + materialEy = ID[IDX4D_ID(1,ii,jj,kk)]; + dHz = (Hz[IDX3D_FIELDS(ii,jj,kk)] - Hz[IDX3D_FIELDS(ii-1,jj,kk)]) / dx; + Ey[IDX3D_FIELDS(ii,jj,kk)] = Ey[IDX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[IDX2D_MAT(materialEy,4)] * + (RA01 * dHz + RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)]); + PHI1[IDX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA01 = RA[IDX2D_R(0,i2)] - 1; + RB0 = RB[IDX2D_R(0,i2)]; + RE0 = RE[IDX2D_R(0,i2)]; + RF0 = RF[IDX2D_R(0,i2)]; + + // Ez + materialEz = ID[IDX4D_ID(2,ii,jj,kk)]; + dHy = (Hy[IDX3D_FIELDS(ii,jj,kk)] - Hy[IDX3D_FIELDS(ii-1,jj,kk)]) / dx; + Ez[IDX3D_FIELDS(ii,jj,kk)] = Ez[IDX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[IDX2D_MAT(materialEz,4)] * + (RA01 * dHy + RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)]); + PHI2[IDX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] - RF0 * dHy; + } +""")} + +order2_xplus = {'args': x_args, + 'func': Template(""" + // This function updates the Ey and Ez field components for the xplus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = i / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dHy, dHz; + $REAL dx = d; + int ii, jj, kk, materialEy, materialEz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA0 = RA[IDX2D_R(0,i1)]; + RB0 = RB[IDX2D_R(0,i1)]; + RE0 = RE[IDX2D_R(0,i1)]; + RF0 = RF[IDX2D_R(0,i1)]; + RA1 = RA[IDX2D_R(1,i1)]; + RB1 = RB[IDX2D_R(1,i1)]; + RE1 = RE[IDX2D_R(1,i1)]; + RF1 = RF[IDX2D_R(1,i1)]; + RA01 = RA[IDX2D_R(0,i1)] * RA[IDX2D_R(1,i1)] - 1; + + // Ey + materialEy = ID[IDX4D_ID(1,ii,jj,kk)]; + dHz = (Hz[IDX3D_FIELDS(ii,jj,kk)] - Hz[IDX3D_FIELDS(ii-1,jj,kk)]) / dx; + Ey[IDX3D_FIELDS(ii,jj,kk)] = Ey[IDX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[IDX2D_MAT(materialEy,4)] * + (RA01 * dHz + RA1 * RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[IDX4D_PHI1(1,i1,j1,k1)]); + PHI1[IDX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[IDX4D_PHI1(1,i1,j1,k1)] - RF1 * + (RA0 * dHz + RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)]); + PHI1[IDX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA0 = RA[IDX2D_R(0,i2)]; + RB0 = RB[IDX2D_R(0,i2)]; + RE0 = RE[IDX2D_R(0,i2)]; + RF0 = RF[IDX2D_R(0,i2)]; + RA1 = RA[IDX2D_R(1,i2)]; + RB1 = RB[IDX2D_R(1,i2)]; + RE1 = RE[IDX2D_R(1,i2)]; + RF1 = RF[IDX2D_R(1,i2)]; + RA01 = RA[IDX2D_R(0,i2)] * RA[IDX2D_R(1,i2)] - 1; + + // Ez + materialEz = ID[IDX4D_ID(2,ii,jj,kk)]; + dHy = (Hy[IDX3D_FIELDS(ii,jj,kk)] - Hy[IDX3D_FIELDS(ii-1,jj,kk)]) / dx; + Ez[IDX3D_FIELDS(ii,jj,kk)] = Ez[IDX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[IDX2D_MAT(materialEz,4)] * + (RA01 * dHy + RA1 * RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] + + RB1 * PHI2[IDX4D_PHI2(1,i2,j2,k2)]); + PHI2[IDX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[IDX4D_PHI2(1,i2,j2,k2)] - RF1 * + (RA0 * dHy + RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)]); + PHI2[IDX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] - RF0 * dHy; + } +""")} + +order1_yminus = {'args': y_args, + 'func': Template(""" + // This function updates the Ex and Ez field components for the yminus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = i / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL RA01, RB0, RE0, RF0, dHx, dHz; + $REAL dy = d; + int ii, jj, kk, materialEx, materialEz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = yf - j1; + kk = k1 + zs; + + // PML coefficients + RA01 = RA[IDX2D_R(0,j1)] - 1; + RB0 = RB[IDX2D_R(0,j1)]; + RE0 = RE[IDX2D_R(0,j1)]; + RF0 = RF[IDX2D_R(0,j1)]; + + // Ex + materialEx = ID[IDX4D_ID(0,ii,jj,kk)]; + dHz = (Hz[IDX3D_FIELDS(ii,jj,kk)] - Hz[IDX3D_FIELDS(ii,jj-1,kk)]) / dy; + Ex[IDX3D_FIELDS(ii,jj,kk)] = Ex[IDX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[IDX2D_MAT(materialEx,4)] * + (RA01 * dHz + RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)]); + PHI1[IDX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = yf - j2; + kk = k2 + zs; + + // PML coefficients + RA01 = RA[IDX2D_R(0,j2)] - 1; + RB0 = RB[IDX2D_R(0,j2)]; + RE0 = RE[IDX2D_R(0,j2)]; + RF0 = RF[IDX2D_R(0,j2)]; + + // Ez + materialEz = ID[IDX4D_ID(2,ii,jj,kk)]; + dHx = (Hx[IDX3D_FIELDS(ii,jj,kk)] - Hx[IDX3D_FIELDS(ii,jj-1,kk)]) / dy; + Ez[IDX3D_FIELDS(ii,jj,kk)] = Ez[IDX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[IDX2D_MAT(materialEz,4)] * + (RA01 * dHx + RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)]); + PHI2[IDX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx; + } +""")} + +order2_yminus = {'args': y_args, + 'func': Template(""" + // This function updates the Ex and Ez field components for the yminus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = i / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dHx, dHz; + $REAL dy = d; + int ii, jj, kk, materialEx, materialEz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = yf - j1; + kk = k1 + zs; + + // PML coefficients + RA0 = RA[IDX2D_R(0,j1)]; + RB0 = RB[IDX2D_R(0,j1)]; + RE0 = RE[IDX2D_R(0,j1)]; + RF0 = RF[IDX2D_R(0,j1)]; + RA1 = RA[IDX2D_R(1,j1)]; + RB1 = RB[IDX2D_R(1,j1)]; + RE1 = RE[IDX2D_R(1,j1)]; + RF1 = RF[IDX2D_R(1,j1)]; + RA01 = RA[IDX2D_R(0,j1)] * RA[IDX2D_R(1,j1)] - 1; + + // Ex + materialEx = ID[IDX4D_ID(0,ii,jj,kk)]; + dHz = (Hz[IDX3D_FIELDS(ii,jj,kk)] - Hz[IDX3D_FIELDS(ii,jj-1,kk)]) / dy; + Ex[IDX3D_FIELDS(ii,jj,kk)] = Ex[IDX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[IDX2D_MAT(materialEx,4)] * + (RA01 * dHz + RA1 * RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] + + RB1 * PHI1[IDX4D_PHI1(1,i1,j1,k1)]); + PHI1[IDX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[IDX4D_PHI1(1,i1,j1,k1)] - RF1 * + (RA0 * dHz + RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)]); + PHI1[IDX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = yf - j2; + kk = k2 + zs; + + // PML coefficients + RA0 = RA[IDX2D_R(0,j2)]; + RB0 = RB[IDX2D_R(0,j2)]; + RE0 = RE[IDX2D_R(0,j2)]; + RF0 = RF[IDX2D_R(0,j2)]; + RA1 = RA[IDX2D_R(1,j2)]; + RB1 = RB[IDX2D_R(1,j2)]; + RE1 = RE[IDX2D_R(1,j2)]; + RF1 = RF[IDX2D_R(1,j2)]; + RA01 = RA[IDX2D_R(0,j2)] * RA[IDX2D_R(1,j2)] - 1; + + // Ez + materialEz = ID[IDX4D_ID(2,ii,jj,kk)]; + dHx = (Hx[IDX3D_FIELDS(ii,jj,kk)] - Hx[IDX3D_FIELDS(ii,jj-1,kk)]) / dy; + Ez[IDX3D_FIELDS(ii,jj,kk)] = Ez[IDX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[IDX2D_MAT(materialEz,4)] * + (RA01 * dHx + RA1 * RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] + + RB1 * PHI2[IDX4D_PHI2(1,i2,j2,k2)]); + PHI2[IDX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[IDX4D_PHI2(1,i2,j2,k2)] - RF1 * + (RA0 * dHx + RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)]); + PHI2[IDX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx; + } +""")} + +order1_yplus = {'args': y_args, + 'func': Template(""" + // This function updates the Ex and Ez field components for the yplus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = i / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL RA01, RB0, RE0, RF0, dHx, dHz; + $REAL dy = d; + int ii, jj, kk, materialEx, materialEz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA01 = RA[IDX2D_R(0,j1)] - 1; + RB0 = RB[IDX2D_R(0,j1)]; + RE0 = RE[IDX2D_R(0,j1)]; + RF0 = RF[IDX2D_R(0,j1)]; + + // Ex + materialEx = ID[IDX4D_ID(0,ii,jj,kk)]; + dHz = (Hz[IDX3D_FIELDS(ii,jj,kk)] - Hz[IDX3D_FIELDS(ii,jj-1,kk)]) / dy; + Ex[IDX3D_FIELDS(ii,jj,kk)] = Ex[IDX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[IDX2D_MAT(materialEx,4)] * + (RA01 * dHz + RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)]); + PHI1[IDX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA01 = RA[IDX2D_R(0,j2)] - 1; + RB0 = RB[IDX2D_R(0,j2)]; + RE0 = RE[IDX2D_R(0,j2)]; + RF0 = RF[IDX2D_R(0,j2)]; + + // Ez + materialEz = ID[IDX4D_ID(2,ii,jj,kk)]; + dHx = (Hx[IDX3D_FIELDS(ii,jj,kk)] - Hx[IDX3D_FIELDS(ii,jj-1,kk)]) / dy; + Ez[IDX3D_FIELDS(ii,jj,kk)] = Ez[IDX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[IDX2D_MAT(materialEz,4)] * + (RA01 * dHx + RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)]); + PHI2[IDX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx; + } +""")} + +order2_yplus = {'args': y_args, + 'func': Template(""" + // This function updates the Ex and Ez field components for the yplus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = i / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dHx, dHz; + $REAL dy = d; + int ii, jj, kk, materialEx, materialEz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA0 = RA[IDX2D_R(0,j1)]; + RB0 = RB[IDX2D_R(0,j1)]; + RE0 = RE[IDX2D_R(0,j1)]; + RF0 = RF[IDX2D_R(0,j1)]; + RA1 = RA[IDX2D_R(1,j1)]; + RB1 = RB[IDX2D_R(1,j1)]; + RE1 = RE[IDX2D_R(1,j1)]; + RF1 = RF[IDX2D_R(1,j1)]; + RA01 = RA[IDX2D_R(0,j1)] * RA[IDX2D_R(1,j1)] - 1; + + // Ex + materialEx = ID[IDX4D_ID(0,ii,jj,kk)]; + dHz = (Hz[IDX3D_FIELDS(ii,jj,kk)] - Hz[IDX3D_FIELDS(ii,jj-1,kk)]) / dy; + Ex[IDX3D_FIELDS(ii,jj,kk)] = Ex[IDX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[IDX2D_MAT(materialEx,4)] * + (RA01 * dHz + RA1 * RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] + + RB1 * PHI1[IDX4D_PHI1(1,i1,j1,k1)]); + PHI1[IDX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[IDX4D_PHI1(1,i1,j1,k1)] - RF1 * + (RA0 * dHz + RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)]); + PHI1[IDX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA0 = RA[IDX2D_R(0,j2)]; + RB0 = RB[IDX2D_R(0,j2)]; + RE0 = RE[IDX2D_R(0,j2)]; + RF0 = RF[IDX2D_R(0,j2)]; + RA1 = RA[IDX2D_R(1,j2)]; + RB1 = RB[IDX2D_R(1,j2)]; + RE1 = RE[IDX2D_R(1,j2)]; + RF1 = RF[IDX2D_R(1,j2)]; + RA01 = RA[IDX2D_R(0,j2)] * RA[IDX2D_R(1,j2)] - 1; + + // Ez + materialEz = ID[IDX4D_ID(2,ii,jj,kk)]; + dHx = (Hx[IDX3D_FIELDS(ii,jj,kk)] - Hx[IDX3D_FIELDS(ii,jj-1,kk)]) / dy; + Ez[IDX3D_FIELDS(ii,jj,kk)] = Ez[IDX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[IDX2D_MAT(materialEz,4)] * + (RA01 * dHx + RA1 * RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] + + RB1 * PHI2[IDX4D_PHI2(1,i2,j2,k2)]); + PHI2[IDX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[IDX4D_PHI2(1,i2,j2,k2)] - RF1 * + (RA0 * dHx + RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)]); + PHI2[IDX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx; + } +""")} + +order1_zminus = {'args': z_args, + 'func': Template(""" + // This function updates the Ex and Ey field components for the zminus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = i / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL RA01, RB0, RE0, RF0, dHx, dHy; + $REAL dz = d; + int ii, jj, kk, materialEx, materialEy; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = zf - k1; + + // PML coefficients + RA01 = RA[IDX2D_R(0,k1)] - 1; + RB0 = RB[IDX2D_R(0,k1)]; + RE0 = RE[IDX2D_R(0,k1)]; + RF0 = RF[IDX2D_R(0,k1)]; + + // Ex + materialEx = ID[IDX4D_ID(0,ii,jj,kk)]; + dHy = (Hy[IDX3D_FIELDS(ii,jj,kk)] - Hy[IDX3D_FIELDS(ii,jj,kk-1)]) / dz; + Ex[IDX3D_FIELDS(ii,jj,kk)] = Ex[IDX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[IDX2D_MAT(materialEx,4)] * + (RA01 * dHy + RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)]); + PHI1[IDX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] - RF0 * dHy; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = zf - k2; + + // PML coefficients + RA01 = RA[IDX2D_R(0,k2)] - 1; + RB0 = RB[IDX2D_R(0,k2)]; + RE0 = RE[IDX2D_R(0,k2)]; + RF0 = RF[IDX2D_R(0,k2)]; + + // Ey + materialEy = ID[IDX4D_ID(1,ii,jj,kk)]; + dHx = (Hx[IDX3D_FIELDS(ii,jj,kk)] - Hx[IDX3D_FIELDS(ii,jj,kk-1)]) / dz; + Ey[IDX3D_FIELDS(ii,jj,kk)] = Ey[IDX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[IDX2D_MAT(materialEy,4)] * + (RA01 * dHx + RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)]); + PHI2[IDX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx; + } +""")} + +order2_zminus = {'args': z_args, + 'func': Template(""" + // This function updates the Ex and Ey field components for the zminus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = i / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dHx, dHy; + $REAL dz = d; + int ii, jj, kk, materialEx, materialEy; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = zf - k1; + + // PML coefficients + RA0 = RA[IDX2D_R(0,k1)]; + RB0 = RB[IDX2D_R(0,k1)]; + RE0 = RE[IDX2D_R(0,k1)]; + RF0 = RF[IDX2D_R(0,k1)]; + RA1 = RA[IDX2D_R(1,k1)]; + RB1 = RB[IDX2D_R(1,k1)]; + RE1 = RE[IDX2D_R(1,k1)]; + RF1 = RF[IDX2D_R(1,k1)]; + RA01 = RA[IDX2D_R(0,k1)] * RA[IDX2D_R(1,k1)] - 1; + + // Ex + materialEx = ID[IDX4D_ID(0,ii,jj,kk)]; + dHy = (Hy[IDX3D_FIELDS(ii,jj,kk)] - Hy[IDX3D_FIELDS(ii,jj,kk-1)]) / dz; + Ex[IDX3D_FIELDS(ii,jj,kk)] = Ex[IDX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[IDX2D_MAT(materialEx,4)] * + (RA01 * dHy + RA1 * RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] + + RB1 * PHI1[IDX4D_PHI1(1,i1,j1,k1)]); + PHI1[IDX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[IDX4D_PHI1(1,i1,j1,k1)] - RF1 * + (RA0 * dHy + RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)]); + PHI1[IDX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] - RF0 * dHy; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = zf - k2; + + // PML coefficients + RA0 = RA[IDX2D_R(0,k2)]; + RB0 = RB[IDX2D_R(0,k2)]; + RE0 = RE[IDX2D_R(0,k2)]; + RF0 = RF[IDX2D_R(0,k2)]; + RA1 = RA[IDX2D_R(1,k2)]; + RB1 = RB[IDX2D_R(1,k2)]; + RE1 = RE[IDX2D_R(1,k2)]; + RF1 = RF[IDX2D_R(1,k2)]; + RA01 = RA[IDX2D_R(0,k2)] * RA[IDX2D_R(1,k2)] - 1; + + // Ey + materialEy = ID[IDX4D_ID(1,ii,jj,kk)]; + dHx = (Hx[IDX3D_FIELDS(ii,jj,kk)] - Hx[IDX3D_FIELDS(ii,jj,kk-1)]) / dz; + Ey[IDX3D_FIELDS(ii,jj,kk)] = Ey[IDX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[IDX2D_MAT(materialEy,4)] * + (RA01 * dHx + RA1 * RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] + + RB1 * PHI2[IDX4D_PHI2(1,i2,j2,k2)]); + PHI2[IDX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[IDX4D_PHI2(1,i2,j2,k2)] - RF1 * + (RA0 * dHx + RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)]); + PHI2[IDX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx; + } +""")} + +order1_zplus = {'args': z_args, + 'func': Template(""" + // This function updates the Ex and Ey field components for the zplus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = i / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL RA01, RB0, RE0, RF0, dHx, dHy; + $REAL dz = d; + int ii, jj, kk, materialEx, materialEy; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA01 = RA[IDX2D_R(0,k1)] - 1; + RB0 = RB[IDX2D_R(0,k1)]; + RE0 = RE[IDX2D_R(0,k1)]; + RF0 = RF[IDX2D_R(0,k1)]; + + // Ex + materialEx = ID[IDX4D_ID(0,ii,jj,kk)]; + dHy = (Hy[IDX3D_FIELDS(ii,jj,kk)] - Hy[IDX3D_FIELDS(ii,jj,kk-1)]) / dz; + Ex[IDX3D_FIELDS(ii,jj,kk)] = Ex[IDX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[IDX2D_MAT(materialEx,4)] * + (RA01 * dHy + RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)]); + PHI1[IDX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] - RF0 * dHy; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA01 = RA[IDX2D_R(0,k2)] - 1; + RB0 = RB[IDX2D_R(0,k2)]; + RE0 = RE[IDX2D_R(0,k2)]; + RF0 = RF[IDX2D_R(0,k2)]; + + // Ey + materialEy = ID[IDX4D_ID(1,ii,jj,kk)]; + dHx = (Hx[IDX3D_FIELDS(ii,jj,kk)] - Hx[IDX3D_FIELDS(ii,jj,kk-1)]) / dz; + Ey[IDX3D_FIELDS(ii,jj,kk)] = Ey[IDX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[IDX2D_MAT(materialEy,4)] * + (RA01 * dHx + RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)]); + PHI2[IDX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx; + } +""")} + +order2_zplus = {'args': z_args, + 'func': Template(""" + // This function updates the Ex and Ey field components for the zplus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = i / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dHx, dHy; + $REAL dz = d; + int ii, jj, kk, materialEx, materialEy; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA0 = RA[IDX2D_R(0,k1)]; + RB0 = RB[IDX2D_R(0,k1)]; + RE0 = RE[IDX2D_R(0,k1)]; + RF0 = RF[IDX2D_R(0,k1)]; + RA1 = RA[IDX2D_R(1,k1)]; + RB1 = RB[IDX2D_R(1,k1)]; + RE1 = RE[IDX2D_R(1,k1)]; + RF1 = RF[IDX2D_R(1,k1)]; + RA01 = RA[IDX2D_R(0,k1)] * RA[IDX2D_R(1,k1)] - 1; + + // Ex + materialEx = ID[IDX4D_ID(0,ii,jj,kk)]; + dHy = (Hy[IDX3D_FIELDS(ii,jj,kk)] - Hy[IDX3D_FIELDS(ii,jj,kk-1)]) / dz; + Ex[IDX3D_FIELDS(ii,jj,kk)] = Ex[IDX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[IDX2D_MAT(materialEx,4)] * + (RA01 * dHy + RA1 * RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] + + RB1 * PHI1[IDX4D_PHI1(1,i1,j1,k1)]); + PHI1[IDX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[IDX4D_PHI1(1,i1,j1,k1)] - RF1 * + (RA0 * dHy + RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)]); + PHI1[IDX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] - RF0 * dHy; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA0 = RA[IDX2D_R(0,k2)]; + RB0 = RB[IDX2D_R(0,k2)]; + RE0 = RE[IDX2D_R(0,k2)]; + RF0 = RF[IDX2D_R(0,k2)]; + RA1 = RA[IDX2D_R(1,k2)]; + RB1 = RB[IDX2D_R(1,k2)]; + RE1 = RE[IDX2D_R(1,k2)]; + RF1 = RF[IDX2D_R(1,k2)]; + RA01 = RA[IDX2D_R(0,k2)] * RA[IDX2D_R(1,k2)] - 1; + + // Ey + materialEy = ID[IDX4D_ID(1,ii,jj,kk)]; + dHx = (Hx[IDX3D_FIELDS(ii,jj,kk)] - Hx[IDX3D_FIELDS(ii,jj,kk-1)]) / dz; + Ey[IDX3D_FIELDS(ii,jj,kk)] = Ey[IDX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[IDX2D_MAT(materialEy,4)] * + (RA01 * dHx + RA1 * RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] + + RB1 * PHI2[IDX4D_PHI2(1,i2,j2,k2)]); + PHI2[IDX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[IDX4D_PHI2(1,i2,j2,k2)] - RF1 * + (RA0 * dHx + RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)]); + PHI2[IDX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx; + } +""")} + diff --git a/gprMax/cuda_opencl_el/knl_pml_updates_magnetic_HORIPML.py b/gprMax/cuda_opencl_el/knl_pml_updates_magnetic_HORIPML.py new file mode 100644 index 00000000..1201659d --- /dev/null +++ b/gprMax/cuda_opencl_el/knl_pml_updates_magnetic_HORIPML.py @@ -0,0 +1,1049 @@ +# Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom +# Authors: Craig Warren, Antonis Giannopoulos, and John Hartley +# +# This file is part of gprMax. +# +# gprMax is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# gprMax is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with gprMax. If not, see . + +from string import Template + +x_args = Template("int xs, " + "int xf, " + "int ys, " + "int yf, " + "int zs, " + "int zf, " + "int NX_PHI1, " + "int NY_PHI1, " + "int NZ_PHI1, " + "int NX_PHI2, " + "int NY_PHI2, " + "int NZ_PHI2, " + "int NY_R, " + "__global const unsigned int* restrict ID, " + "__global const $REAL* restrict Ex, " + "__global const $REAL* restrict Ey, " + "__global const $REAL* restrict Ez, " + "__global const $REAL* restrict Hx, " + "__global $REAL *Hy, " + "__global $REAL *Hz, " + "__global $REAL *PHI1, " + "__global $REAL *PHI2, " + "__global const $REAL* restrict RA, " + "__global const $REAL* restrict RB, " + "__global const $REAL* restrict RE, " + "__global const $REAL* restrict RF, " + "$REAL d") + +y_args = Template("int xs, " + "int xf, " + "int ys, " + "int yf, " + "int zs, " + "int zf, " + "int NX_PHI1, " + "int NY_PHI1, " + "int NZ_PHI1, " + "int NX_PHI2, " + "int NY_PHI2, " + "int NZ_PHI2, " + "int NY_R, " + "__global const unsigned int* restrict ID, " + "__global const $REAL* restrict Ex, " + "__global const $REAL* restrict Ey, " + "__global const $REAL* restrict Ez, " + "__global $REAL *Hx, " + "__global const $REAL* restrict Hy, " + "__global $REAL *Hz, " + "__global $REAL *PHI1, " + "__global $REAL *PHI2, " + "__global const $REAL* restrict RA, " + "__global const $REAL* restrict RB, " + "__global const $REAL* restrict RE, " + "__global const $REAL* restrict RF, " + "$REAL d") + +z_args = Template("int xs, " + "int xf, " + "int ys, " + "int yf, " + "int zs, " + "int zf, " + "int NX_PHI1, " + "int NY_PHI1, " + "int NZ_PHI1, " + "int NX_PHI2, " + "int NY_PHI2, " + "int NZ_PHI2, " + "int NY_R, " + "__global const unsigned int* restrict ID, " + "__global const $REAL* restrict Ex, " + "__global const $REAL* restrict Ey, " + "__global const $REAL* restrict Ez, " + "__global $REAL *Hx, " + "__global $REAL *Hy, " + "__global const $REAL* restrict Hz, " + "__global $REAL *PHI1, " + "__global $REAL *PHI2, " + "__global const $REAL* restrict RA, " + "__global const $REAL* restrict RB, " + "__global const $REAL* restrict RE, " + "__global const $REAL* restrict RF, " + "$REAL d") + +order1_xminus = {'args': x_args, + 'func': Template(""" + // This function updates the Hy and Hz field components for the xminus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = i / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL RA01, RB0, RE0, RF0, dEy, dEz; + $REAL dx = d; + int ii, jj, kk, materialHy, materialHz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = xf - (i1 + 1); + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA01 = RA[IDX2D_R(0,i1)] - 1; + RB0 = RB[IDX2D_R(0,i1)]; + RE0 = RE[IDX2D_R(0,i1)]; + RF0 = RF[IDX2D_R(0,i1)]; + + // Hy + materialHy = ID[IDX4D_ID(4,ii,jj,kk)]; + dEz = (Ez[IDX3D_FIELDS(ii+1,jj,kk)] - Ez[IDX3D_FIELDS(ii,jj,kk)]) / dx; + Hy[IDX3D_FIELDS(ii,jj,kk)] = Hy[IDX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[IDX2D_MAT(materialHy,4)] * + (RA01 * dEz + RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)]); + PHI1[IDX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = xf - (i2 + 1); + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA01 = RA[IDX2D_R(0,i2)] - 1; + RB0 = RB[IDX2D_R(0,i2)]; + RE0 = RE[IDX2D_R(0,i2)]; + RF0 = RF[IDX2D_R(0,i2)]; + + // Hz + materialHz = ID[IDX4D_ID(5,ii,jj,kk)]; + dEy = (Ey[IDX3D_FIELDS(ii+1,jj,kk)] - Ey[IDX3D_FIELDS(ii,jj,kk)]) / dx; + Hz[IDX3D_FIELDS(ii,jj,kk)] = Hz[IDX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[IDX2D_MAT(materialHz,4)] * + (RA01 * dEy + RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)]); + PHI2[IDX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] - RF0 * dEy; + } +""")} + +order2_xminus = {'args': x_args, + 'func': Template(""" + // This function updates the Hy and Hz field components for the xminus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = i / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dEy, dEz; + $REAL dx = d; + int ii, jj, kk, materialHy, materialHz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = xf - (i1 + 1); + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA0 = RA[IDX2D_R(0,i1)]; + RB0 = RB[IDX2D_R(0,i1)]; + RE0 = RE[IDX2D_R(0,i1)]; + RF0 = RF[IDX2D_R(0,i1)]; + RA1 = RA[IDX2D_R(1,i1)]; + RB1 = RB[IDX2D_R(1,i1)]; + RE1 = RE[IDX2D_R(1,i1)]; + RF1 = RF[IDX2D_R(1,i1)]; + RA01 = RA[IDX2D_R(0,i1)] * RA[IDX2D_R(1,i1)] - 1; + + // Hy + materialHy = ID[IDX4D_ID(4,ii,jj,kk)]; + dEz = (Ez[IDX3D_FIELDS(ii+1,jj,kk)] - Ez[IDX3D_FIELDS(ii,jj,kk)]) / dx; + Hy[IDX3D_FIELDS(ii,jj,kk)] = Hy[IDX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[IDX2D_MAT(materialHy,4)] * + (RA01 * dEz + RA1 * RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] + + RB1 * PHI1[IDX4D_PHI1(1,i1,j1,k1)]); + PHI1[IDX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[IDX4D_PHI1(1,i1,j1,k1)] - RF1 * + (RA0 * dEz + RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)]); + PHI1[IDX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = xf - (i2 + 1); + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA0 = RA[IDX2D_R(0,i2)]; + RB0 = RB[IDX2D_R(0,i2)]; + RE0 = RE[IDX2D_R(0,i2)]; + RF0 = RF[IDX2D_R(0,i2)]; + RA1 = RA[IDX2D_R(1,i2)]; + RB1 = RB[IDX2D_R(1,i2)]; + RE1 = RE[IDX2D_R(1,i2)]; + RF1 = RF[IDX2D_R(1,i2)]; + RA01 = RA[IDX2D_R(0,i2)] * RA[IDX2D_R(1,i2)] - 1; + + // Hz + materialHz = ID[IDX4D_ID(5,ii,jj,kk)]; + dEy = (Ey[IDX3D_FIELDS(ii+1,jj,kk)] - Ey[IDX3D_FIELDS(ii,jj,kk)]) / dx; + Hz[IDX3D_FIELDS(ii,jj,kk)] = Hz[IDX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[IDX2D_MAT(materialHz,4)] * + (RA01 * dEy + RA1 * RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] + + RB1 * PHI2[IDX4D_PHI2(1,i2,j2,k2)]); + PHI2[IDX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[IDX4D_PHI2(1,i2,j2,k2)] - RF1 * + (RA0 * dEy + RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)]); + PHI2[IDX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] - RF0 * dEy; + } +""")} + +order1_xplus = {'args': x_args, + 'func': Template(""" + // This function updates the Hy and Hz field components for the xplus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = i / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL RA01, RB0, RE0, RF0, dEy, dEz; + $REAL dx = d; + int ii, jj, kk, materialHy, materialHz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA01 = RA[IDX2D_R(0,i1)] - 1; + RB0 = RB[IDX2D_R(0,i1)]; + RE0 = RE[IDX2D_R(0,i1)]; + RF0 = RF[IDX2D_R(0,i1)]; + + // Hy + materialHy = ID[IDX4D_ID(4,ii,jj,kk)]; + dEz = (Ez[IDX3D_FIELDS(ii+1,jj,kk)] - Ez[IDX3D_FIELDS(ii,jj,kk)]) / dx; + Hy[IDX3D_FIELDS(ii,jj,kk)] = Hy[IDX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[IDX2D_MAT(materialHy,4)] * + (RA01 * dEz + RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)]); + PHI1[IDX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA01 = RA[IDX2D_R(0,i2)] - 1; + RB0 = RB[IDX2D_R(0,i2)]; + RE0 = RE[IDX2D_R(0,i2)]; + RF0 = RF[IDX2D_R(0,i2)]; + + // Hz + materialHz = ID[IDX4D_ID(5,ii,jj,kk)]; + dEy = (Ey[IDX3D_FIELDS(ii+1,jj,kk)] - Ey[IDX3D_FIELDS(ii,jj,kk)]) / dx; + Hz[IDX3D_FIELDS(ii,jj,kk)] = Hz[IDX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[IDX2D_MAT(materialHz,4)] * + (RA01 * dEy + RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)]); + PHI2[IDX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] - RF0 * dEy; + } +""")} + +order2_xplus = {'args': x_args, + 'func': Template(""" + // This function updates the Hy and Hz field components for the xplus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = i / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dEy, dEz; + $REAL dx = d; + int ii, jj, kk, materialHy, materialHz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA0 = RA[IDX2D_R(0,i1)]; + RB0 = RB[IDX2D_R(0,i1)]; + RE0 = RE[IDX2D_R(0,i1)]; + RF0 = RF[IDX2D_R(0,i1)]; + RA1 = RA[IDX2D_R(1,i1)]; + RB1 = RB[IDX2D_R(1,i1)]; + RE1 = RE[IDX2D_R(1,i1)]; + RF1 = RF[IDX2D_R(1,i1)]; + RA01 = RA[IDX2D_R(0,i1)] * RA[IDX2D_R(1,i1)] - 1; + + // Hy + materialHy = ID[IDX4D_ID(4,ii,jj,kk)]; + dEz = (Ez[IDX3D_FIELDS(ii+1,jj,kk)] - Ez[IDX3D_FIELDS(ii,jj,kk)]) / dx; + Hy[IDX3D_FIELDS(ii,jj,kk)] = Hy[IDX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[IDX2D_MAT(materialHy,4)] * + (RA01 * dEz + RA1 * RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] + + RB1 * PHI1[IDX4D_PHI1(1,i1,j1,k1)]); + PHI1[IDX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[IDX4D_PHI1(1,i1,j1,k1)] - RF1 * + (RA0 * dEz + RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)]); + PHI1[IDX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA0 = RA[IDX2D_R(0,i2)]; + RB0 = RB[IDX2D_R(0,i2)]; + RE0 = RE[IDX2D_R(0,i2)]; + RF0 = RF[IDX2D_R(0,i2)]; + RA1 = RA[IDX2D_R(1,i2)]; + RB1 = RB[IDX2D_R(1,i2)]; + RE1 = RE[IDX2D_R(1,i2)]; + RF1 = RF[IDX2D_R(1,i2)]; + RA01 = RA[IDX2D_R(0,i2)] * RA[IDX2D_R(1,i2)] - 1; + + // Hz + materialHz = ID[IDX4D_ID(5,ii,jj,kk)]; + dEy = (Ey[IDX3D_FIELDS(ii+1,jj,kk)] - Ey[IDX3D_FIELDS(ii,jj,kk)]) / dx; + Hz[IDX3D_FIELDS(ii,jj,kk)] = Hz[IDX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[IDX2D_MAT(materialHz,4)] * + (RA01 * dEy + RA1 * RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] + + RB1 * PHI2[IDX4D_PHI2(1,i2,j2,k2)]); + PHI2[IDX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[IDX4D_PHI2(1,i2,j2,k2)] - RF1 * + (RA0 * dEy + RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)]); + PHI2[IDX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] - RF0 * dEy; + } +""")} + +order1_yminus = {'args': y_args, + 'func': Template(""" + // This function updates the Hx and Hz field components for the yminus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = i / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL RA01, RB0, RE0, RF0, dEx, dEz; + $REAL dy = d; + int ii, jj, kk, materialHx, materialHz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = yf - (j1 + 1); + kk = k1 + zs; + + // PML coefficients + RA01 = RA[IDX2D_R(0,j1)] - 1; + RB0 = RB[IDX2D_R(0,j1)]; + RE0 = RE[IDX2D_R(0,j1)]; + RF0 = RF[IDX2D_R(0,j1)]; + + // Hx + materialHx = ID[IDX4D_ID(3,ii,jj,kk)]; + dEz = (Ez[IDX3D_FIELDS(ii,jj+1,kk)] - Ez[IDX3D_FIELDS(ii,jj,kk)]) / dy; + Hx[IDX3D_FIELDS(ii,jj,kk)] = Hx[IDX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[IDX2D_MAT(materialHx,4)] * + (RA01 * dEz + RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)]); + PHI1[IDX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = yf - (j2 + 1); + kk = k2 + zs; + + // PML coefficients + RA01 = RA[IDX2D_R(0,j2)] - 1; + RB0 = RB[IDX2D_R(0,j2)]; + RE0 = RE[IDX2D_R(0,j2)]; + RF0 = RF[IDX2D_R(0,j2)]; + + // Hz + materialHz = ID[IDX4D_ID(5,ii,jj,kk)]; + dEx = (Ex[IDX3D_FIELDS(ii,jj+1,kk)] - Ex[IDX3D_FIELDS(ii,jj,kk)]) / dy; + Hz[IDX3D_FIELDS(ii,jj,kk)] = Hz[IDX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[IDX2D_MAT(materialHz,4)] * + (RA01 * dEx + RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)]); + PHI2[IDX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx; + } +""")} + +order2_yminus = {'args': y_args, + 'func': Template(""" + // This function updates the Hx and Hz field components for the yminus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = i / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dEx, dEz; + $REAL dy = d; + int ii, jj, kk, materialHx, materialHz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = yf - (j1 + 1); + kk = k1 + zs; + + // PML coefficients + RA0 = RA[IDX2D_R(0,j1)]; + RB0 = RB[IDX2D_R(0,j1)]; + RE0 = RE[IDX2D_R(0,j1)]; + RF0 = RF[IDX2D_R(0,j1)]; + RA1 = RA[IDX2D_R(1,j1)]; + RB1 = RB[IDX2D_R(1,j1)]; + RE1 = RE[IDX2D_R(1,j1)]; + RF1 = RF[IDX2D_R(1,j1)]; + RA01 = RA[IDX2D_R(0,j1)] * RA[IDX2D_R(1,j1)] - 1; + + // Hx + materialHx = ID[IDX4D_ID(3,ii,jj,kk)]; + dEz = (Ez[IDX3D_FIELDS(ii,jj+1,kk)] - Ez[IDX3D_FIELDS(ii,jj,kk)]) / dy; + Hx[IDX3D_FIELDS(ii,jj,kk)] = Hx[IDX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[IDX2D_MAT(materialHx,4)] * + (RA01 * dEz + RA1 * RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] + + RB1 * PHI1[IDX4D_PHI1(1,i1,j1,k1)]); + PHI1[IDX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[IDX4D_PHI1(1,i1,j1,k1)] - RF1 * + (RA0 * dEz + RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)]); + PHI1[IDX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = yf - (j2 + 1); + kk = k2 + zs; + + // PML coefficients + RA0 = RA[IDX2D_R(0,j2)]; + RB0 = RB[IDX2D_R(0,j2)]; + RE0 = RE[IDX2D_R(0,j2)]; + RF0 = RF[IDX2D_R(0,j2)]; + RA1 = RA[IDX2D_R(1,j2)]; + RB1 = RB[IDX2D_R(1,j2)]; + RE1 = RE[IDX2D_R(1,j2)]; + RF1 = RF[IDX2D_R(1,j2)]; + RA01 = RA[IDX2D_R(0,j2)] * RA[IDX2D_R(1,j2)] - 1; + + // Hz + materialHz = ID[IDX4D_ID(5,ii,jj,kk)]; + dEx = (Ex[IDX3D_FIELDS(ii,jj+1,kk)] - Ex[IDX3D_FIELDS(ii,jj,kk)]) / dy; + Hz[IDX3D_FIELDS(ii,jj,kk)] = Hz[IDX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[IDX2D_MAT(materialHz,4)] * + (RA01 * dEx + RA1 * RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[IDX4D_PHI2(1,i2,j2,k2)]); + PHI2[IDX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[IDX4D_PHI2(1,i2,j2,k2)] - RF1 * + (RA0 * dEx + RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)]); + PHI2[IDX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx; + } +""")} + +order1_yplus = {'args': y_args, + 'func': Template(""" + // This function updates the Hx and Hz field components for the yplus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = i / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL RA01, RB0, RE0, RF0, dEx, dEz; + $REAL dy = d; + int ii, jj, kk, materialHx, materialHz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA01 = RA[IDX2D_R(0,j1)] - 1; + RB0 = RB[IDX2D_R(0,j1)]; + RE0 = RE[IDX2D_R(0,j1)]; + RF0 = RF[IDX2D_R(0,j1)]; + + // Hx + materialHx = ID[IDX4D_ID(3,ii,jj,kk)]; + dEz = (Ez[IDX3D_FIELDS(ii,jj+1,kk)] - Ez[IDX3D_FIELDS(ii,jj,kk)]) / dy; + Hx[IDX3D_FIELDS(ii,jj,kk)] = Hx[IDX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[IDX2D_MAT(materialHx,4)] * + (RA01 * dEz + RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)]); + PHI1[IDX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA01 = RA[IDX2D_R(0,j2)] - 1; + RB0 = RB[IDX2D_R(0,j2)]; + RE0 = RE[IDX2D_R(0,j2)]; + RF0 = RF[IDX2D_R(0,j2)]; + + // Hz + materialHz = ID[IDX4D_ID(5,ii,jj,kk)]; + dEx = (Ex[IDX3D_FIELDS(ii,jj+1,kk)] - Ex[IDX3D_FIELDS(ii,jj,kk)]) / dy; + Hz[IDX3D_FIELDS(ii,jj,kk)] = Hz[IDX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[IDX2D_MAT(materialHz,4)] * + (RA01 * dEx + RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)]); + PHI2[IDX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx; + } +""")} + +order2_yplus = {'args': y_args, + 'func': Template(""" + // This function updates the Hx and Hz field components for the yplus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = i / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dEx, dEz; + $REAL dy = d; + int ii, jj, kk, materialHx, materialHz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA0 = RA[IDX2D_R(0,j1)]; + RB0 = RB[IDX2D_R(0,j1)]; + RE0 = RE[IDX2D_R(0,j1)]; + RF0 = RF[IDX2D_R(0,j1)]; + RA1 = RA[IDX2D_R(1,j1)]; + RB1 = RB[IDX2D_R(1,j1)]; + RE1 = RE[IDX2D_R(1,j1)]; + RF1 = RF[IDX2D_R(1,j1)]; + RA01 = RA[IDX2D_R(0,j1)] * RA[IDX2D_R(1,j1)] - 1; + + // Hx + materialHx = ID[IDX4D_ID(3,ii,jj,kk)]; + dEz = (Ez[IDX3D_FIELDS(ii,jj+1,kk)] - Ez[IDX3D_FIELDS(ii,jj,kk)]) / dy; + Hx[IDX3D_FIELDS(ii,jj,kk)] = Hx[IDX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[IDX2D_MAT(materialHx,4)] * + (RA01 * dEz + RA1 * RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[IDX4D_PHI1(1,i1,j1,k1)]); + PHI1[IDX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[IDX4D_PHI1(1,i1,j1,k1)] - RF1 * + (RA0 * dEz + RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)]); + PHI1[IDX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA0 = RA[IDX2D_R(0,j2)]; + RB0 = RB[IDX2D_R(0,j2)]; + RE0 = RE[IDX2D_R(0,j2)]; + RF0 = RF[IDX2D_R(0,j2)]; + RA1 = RA[IDX2D_R(1,j2)]; + RB1 = RB[IDX2D_R(1,j2)]; + RE1 = RE[IDX2D_R(1,j2)]; + RF1 = RF[IDX2D_R(1,j2)]; + RA01 = RA[IDX2D_R(0,j2)] * RA[IDX2D_R(1,j2)] - 1; + + // Hz + materialHz = ID[IDX4D_ID(5,ii,jj,kk)]; + dEx = (Ex[IDX3D_FIELDS(ii,jj+1,kk)] - Ex[IDX3D_FIELDS(ii,jj,kk)]) / dy; + Hz[IDX3D_FIELDS(ii,jj,kk)] = Hz[IDX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[IDX2D_MAT(materialHz,4)] * + (RA01 * dEx + RA1 * RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[IDX4D_PHI2(1,i2,j2,k2)]); + PHI2[IDX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[IDX4D_PHI2(1,i2,j2,k2)] - RF1 * + (RA0 * dEx + RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)]); + PHI2[IDX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx; + } +""")} + +order1_zminus = {'args': z_args, + 'func': Template(""" + // This function updates the Hx and Hy field components for the zminus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = i / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL RA01, RB0, RE0, RF0, dEx, dEy; + $REAL dz = d; + int ii, jj, kk, materialHx, materialHy; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = zf - (k1 + 1); + + // PML coefficients + RA01 = RA[IDX2D_R(0,k1)] - 1; + RB0 = RB[IDX2D_R(0,k1)]; + RE0 = RE[IDX2D_R(0,k1)]; + RF0 = RF[IDX2D_R(0,k1)]; + + // Hx + materialHx = ID[IDX4D_ID(3,ii,jj,kk)]; + dEy = (Ey[IDX3D_FIELDS(ii,jj,kk+1)] - Ey[IDX3D_FIELDS(ii,jj,kk)]) / dz; + Hx[IDX3D_FIELDS(ii,jj,kk)] = Hx[IDX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[IDX2D_MAT(materialHx,4)] * + (RA01 * dEy + RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)]); + PHI1[IDX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] - RF0 * dEy; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = zf - (k2 + 1); + + // PML coefficients + RA01 = RA[IDX2D_R(0,k2)] - 1; + RB0 = RB[IDX2D_R(0,k2)]; + RE0 = RE[IDX2D_R(0,k2)]; + RF0 = RF[IDX2D_R(0,k2)]; + + // Hy + materialHy = ID[IDX4D_ID(4,ii,jj,kk)]; + dEx = (Ex[IDX3D_FIELDS(ii,jj,kk+1)] - Ex[IDX3D_FIELDS(ii,jj,kk)]) / dz; + Hy[IDX3D_FIELDS(ii,jj,kk)] = Hy[IDX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[IDX2D_MAT(materialHy,4)] * + (RA01 * dEx + RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)]); + PHI2[IDX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx; + } +""")} + +order2_zminus = {'args': z_args, + 'func': Template(""" + // This function updates the Hx and Hy field components for the zminus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = i / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dEx, dEy; + $REAL dz = d; + int ii, jj, kk, materialHx, materialHy; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = zf - (k1 + 1); + + // PML coefficients + RA0 = RA[IDX2D_R(0,k1)]; + RB0 = RB[IDX2D_R(0,k1)]; + RE0 = RE[IDX2D_R(0,k1)]; + RF0 = RF[IDX2D_R(0,k1)]; + RA1 = RA[IDX2D_R(1,k1)]; + RB1 = RB[IDX2D_R(1,k1)]; + RE1 = RE[IDX2D_R(1,k1)]; + RF1 = RF[IDX2D_R(1,k1)]; + RA01 = RA[IDX2D_R(0,k1)] * RA[IDX2D_R(1,k1)] - 1; + + // Hx + materialHx = ID[IDX4D_ID(3,ii,jj,kk)]; + dEy = (Ey[IDX3D_FIELDS(ii,jj,kk+1)] - Ey[IDX3D_FIELDS(ii,jj,kk)]) / dz; + Hx[IDX3D_FIELDS(ii,jj,kk)] = Hx[IDX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[IDX2D_MAT(materialHx,4)] * + (RA01 * dEy + RA1 * RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] + + RB1 * PHI1[IDX4D_PHI1(1,i1,j1,k1)]); + PHI1[IDX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[IDX4D_PHI1(1,i1,j1,k1)] - RF1 * + (RA0 * dEy + RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)]); + PHI1[IDX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] - RF0 * dEy; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = zf - (k2 + 1); + + // PML coefficients + RA0 = RA[IDX2D_R(0,k2)]; + RB0 = RB[IDX2D_R(0,k2)]; + RE0 = RE[IDX2D_R(0,k2)]; + RF0 = RF[IDX2D_R(0,k2)]; + RA1 = RA[IDX2D_R(1,k2)]; + RB1 = RB[IDX2D_R(1,k2)]; + RE1 = RE[IDX2D_R(1,k2)]; + RF1 = RF[IDX2D_R(1,k2)]; + RA01 = RA[IDX2D_R(0,k2)] * RA[IDX2D_R(1,k2)] - 1; + + // Hy + materialHy = ID[IDX4D_ID(4,ii,jj,kk)]; + dEx = (Ex[IDX3D_FIELDS(ii,jj,kk+1)] - Ex[IDX3D_FIELDS(ii,jj,kk)]) / dz; + Hy[IDX3D_FIELDS(ii,jj,kk)] = Hy[IDX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[IDX2D_MAT(materialHy,4)] * + (RA01 * dEx + RA1 * RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] + + RB1 * PHI2[IDX4D_PHI2(1,i2,j2,k2)]); + PHI2[IDX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[IDX4D_PHI2(1,i2,j2,k2)] - RF1 * + (RA0 * dEx + RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)]); + PHI2[IDX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx; + } +""")} + +order1_zplus = {'args': z_args, + 'func': Template(""" + // This function updates the Hx and Hy field components for the zplus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = i / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL RA01, RB0, RE0, RF0, dEx, dEy; + $REAL dz = d; + int ii, jj, kk, materialHx, materialHy; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA01 = RA[IDX2D_R(0,k1)] - 1; + RB0 = RB[IDX2D_R(0,k1)]; + RE0 = RE[IDX2D_R(0,k1)]; + RF0 = RF[IDX2D_R(0,k1)]; + + // Hx + materialHx = ID[IDX4D_ID(3,ii,jj,kk)]; + dEy = (Ey[IDX3D_FIELDS(ii,jj,kk+1)] - Ey[IDX3D_FIELDS(ii,jj,kk)]) / dz; + Hx[IDX3D_FIELDS(ii,jj,kk)] = Hx[IDX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[IDX2D_MAT(materialHx,4)] * + (RA01 * dEy + RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)]); + PHI1[IDX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] - RF0 * dEy; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA01 = RA[IDX2D_R(0,k2)] - 1; + RB0 = RB[IDX2D_R(0,k2)]; + RE0 = RE[IDX2D_R(0,k2)]; + RF0 = RF[IDX2D_R(0,k2)]; + + // Hy + materialHy = ID[IDX4D_ID(4,ii,jj,kk)]; + dEx = (Ex[IDX3D_FIELDS(ii,jj,kk+1)] - Ex[IDX3D_FIELDS(ii,jj,kk)]) / dz; + Hy[IDX3D_FIELDS(ii,jj,kk)] = Hy[IDX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[IDX2D_MAT(materialHy,4)] * + (RA01 * dEx + RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)]); + PHI2[IDX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx; + } +""")} + +order2_zplus = {'args': z_args, + 'func': Template(""" + // This function updates the Hx and Hy field components for the zplus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = i / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((i % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dEx, dEy; + $REAL dz = d; + int ii, jj, kk, materialHx, materialHy; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA0 = RA[IDX2D_R(0,k1)]; + RB0 = RB[IDX2D_R(0,k1)]; + RE0 = RE[IDX2D_R(0,k1)]; + RF0 = RF[IDX2D_R(0,k1)]; + RA1 = RA[IDX2D_R(1,k1)]; + RB1 = RB[IDX2D_R(1,k1)]; + RE1 = RE[IDX2D_R(1,k1)]; + RF1 = RF[IDX2D_R(1,k1)]; + RA01 = RA[IDX2D_R(0,k1)] * RA[IDX2D_R(1,k1)] - 1; + + // Hx + materialHx = ID[IDX4D_ID(3,ii,jj,kk)]; + dEy = (Ey[IDX3D_FIELDS(ii,jj,kk+1)] - Ey[IDX3D_FIELDS(ii,jj,kk)]) / dz; + Hx[IDX3D_FIELDS(ii,jj,kk)] = Hx[IDX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[IDX2D_MAT(materialHx,4)] * + (RA01 * dEy + RA1 * RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] + + RB1 * PHI1[IDX4D_PHI1(1,i1,j1,k1)]); + PHI1[IDX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[IDX4D_PHI1(1,i1,j1,k1)] - RF1 * + (RA0 * dEy + RB0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)]); + PHI1[IDX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[IDX4D_PHI1(0,i1,j1,k1)] - RF0 * dEy; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA0 = RA[IDX2D_R(0,k2)]; + RB0 = RB[IDX2D_R(0,k2)]; + RE0 = RE[IDX2D_R(0,k2)]; + RF0 = RF[IDX2D_R(0,k2)]; + RA1 = RA[IDX2D_R(1,k2)]; + RB1 = RB[IDX2D_R(1,k2)]; + RE1 = RE[IDX2D_R(1,k2)]; + RF1 = RF[IDX2D_R(1,k2)]; + RA01 = RA[IDX2D_R(0,k2)] * RA[IDX2D_R(1,k2)] - 1; + + // Hy + materialHy = ID[IDX4D_ID(4,ii,jj,kk)]; + dEx = (Ex[IDX3D_FIELDS(ii,jj,kk+1)] - Ex[IDX3D_FIELDS(ii,jj,kk)]) / dz; + Hy[IDX3D_FIELDS(ii,jj,kk)] = Hy[IDX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[IDX2D_MAT(materialHy,4)] * + (RA01 * dEx + RA1 * RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] + + RB1 * PHI2[IDX4D_PHI2(1,i2,j2,k2)]); + PHI2[IDX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[IDX4D_PHI2(1,i2,j2,k2)] - RF1 * + (RA0 * dEx + RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)]); + PHI2[IDX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx; + } +""")} + diff --git a/gprMax/cuda_opencl_el/knl_snapshots.py b/gprMax/cuda_opencl_el/knl_snapshots.py new file mode 100644 index 00000000..2da13396 --- /dev/null +++ b/gprMax/cuda_opencl_el/knl_snapshots.py @@ -0,0 +1,72 @@ +# Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom +# Authors: Craig Warren, Antonis Giannopoulos, and John Hartley +# +# This file is part of gprMax. +# +# gprMax is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# gprMax is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with gprMax. If not, see . + +from string import Template + + +store_snapshot = Template(""" + // Stores field values for a snapshot. + // + // Args: + // p: Snapshot number. + // xs, xf, ys, yf, xs, xf: Start and finish cell coordinates for snapshot. + // dx, dy, dz: Sampling interval in cell coordinates for snapshot. + // E, H: Access to field component arrays. + // snapEx, snapEy, snapEz, snapHx, snapHy, snapHz: Access to arrays to store snapshots. + + + // Convert the linear index to subscripts for 4D SNAPS array + int x = (i % ($NX_SNAPS * $NY_SNAPS * $NZ_SNAPS)) / ($NY_SNAPS * $NZ_SNAPS); + int y = ((i % ($NX_SNAPS * $NY_SNAPS * $NZ_SNAPS)) % ($NY_SNAPS * $NZ_SNAPS)) / $NZ_SNAPS; + int z = ((i % ($NX_SNAPS * $NY_SNAPS * $NZ_SNAPS)) % ($NY_SNAPS * $NZ_SNAPS)) % $NZ_SNAPS; + + // Subscripts for field arrays + int xx, yy, zz; + + if (x >= xs && x < xf && y >= ys && y < yf && z >= zs && z < zf) { + + // Increment subscripts for field array to account for spatial sampling of snapshot + xx = (xs + x) * dx; + yy = (ys + y) * dy; + zz = (zs + z) * dz; + + // The electric field component value at a point comes from an average of + // the 4 electric field component values in that cell + snapEx[IDX4D_SNAPS(p,x,y,z)] = (Ex[IDX3D_FIELDS(xx,yy,zz)] + + Ex[IDX3D_FIELDS(xx,yy+1,zz)] + + Ex[IDX3D_FIELDS(xx,yy,zz+1)] + + Ex[IDX3D_FIELDS(xx,yy+1,zz+1)]) / 4; + snapEy[IDX4D_SNAPS(p,x,y,z)] = (Ey[IDX3D_FIELDS(xx,yy,zz)] + + Ey[IDX3D_FIELDS(xx+1,yy,zz)] + + Ey[IDX3D_FIELDS(xx,yy,zz+1)] + + Ey[IDX3D_FIELDS(xx+1,yy,zz+1)]) / 4; + snapEz[IDX4D_SNAPS(p,x,y,z)] = (Ez[IDX3D_FIELDS(xx,yy,zz)] + + Ez[IDX3D_FIELDS(xx+1,yy,zz)] + + Ez[IDX3D_FIELDS(xx,yy+1,zz)] + + Ez[IDX3D_FIELDS(xx+1,yy+1,zz)]) / 4; + + // The magnetic field component value at a point comes from average of + // 2 magnetic field component values in that cell and the following cell + snapHx[IDX4D_SNAPS(p,x,y,z)] = (Hx[IDX3D_FIELDS(xx,yy,zz)] + + Hx[IDX3D_FIELDS(xx+1,yy,zz)]) / 2; + snapHy[IDX4D_SNAPS(p,x,y,z)] = (Hy[IDX3D_FIELDS(xx,yy,zz)] + + Hy[IDX3D_FIELDS(xx,yy+1,zz)]) / 2; + snapHz[IDX4D_SNAPS(p,x,y,z)] = (Hz[IDX3D_FIELDS(xx,yy,zz)] + + Hz[IDX3D_FIELDS(xx,yy,zz+1)]) / 2; + } +""") \ No newline at end of file diff --git a/gprMax/cuda_opencl_el/knl_source_updates.py b/gprMax/cuda_opencl_el/knl_source_updates.py new file mode 100644 index 00000000..a7ca3372 --- /dev/null +++ b/gprMax/cuda_opencl_el/knl_source_updates.py @@ -0,0 +1,173 @@ +# Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom +# Authors: Craig Warren, Antonis Giannopoulos, and John Hartley +# +# This file is part of gprMax. +# +# gprMax is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# gprMax is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with gprMax. If not, see . + +from string import Template + +update_hertzian_dipole = Template(""" + // Updates electric field values for Hertzian dipole sources. + // + // Args: + // NHERTZDIPOLE: Total number of Hertzian dipoles in the model. + // iteration: Iteration number of simulation. + // dx, dy, dz: Spatial discretisations. + // srcinfo1: Source cell coordinates and polarisation information. + // srcinfo2: Other source information, e.g. length, resistance etc... + // srcwaveforms: Source waveform values. + // ID, E: Access to ID and field component arrays. + + + if (i < NHERTZDIPOLE) { + + $REAL dl; + int x, y, z, polarisation; + + x = srcinfo1[IDX2D_SRCINFO(i,0)]; + y = srcinfo1[IDX2D_SRCINFO(i,1)]; + z = srcinfo1[IDX2D_SRCINFO(i,2)]; + polarisation = srcinfo1[IDX2D_SRCINFO(i,3)]; + dl = srcinfo2[i]; + + // 'x' polarised source + if (polarisation == 0) { + int materialEx = ID[IDX4D_ID(0,x,y,z)]; + Ex[IDX3D_FIELDS(x,y,z)] = Ex[IDX3D_FIELDS(x,y,z)] - updatecoeffsE[IDX2D_MAT(materialEx,4)] * + srcwaveforms[IDX2D_SRCWAVES(i,iteration)] * dl * (1 / (dx * dy * dz)); + } + + // 'y' polarised source + else if (polarisation == 1) { + int materialEy = ID[IDX4D_ID(1,x,y,z)]; + Ey[IDX3D_FIELDS(x,y,z)] = Ey[IDX3D_FIELDS(x,y,z)] - updatecoeffsE[IDX2D_MAT(materialEy,4)] * + srcwaveforms[IDX2D_SRCWAVES(i,iteration)] * dl * (1 / (dx * dy * dz)); + } + + // 'z' polarised source + else if (polarisation == 2) { + int materialEz = ID[IDX4D_ID(2,x,y,z)]; + Ez[IDX3D_FIELDS(x,y,z)] = Ez[IDX3D_FIELDS(x,y,z)] - updatecoeffsE[IDX2D_MAT(materialEz,4)] * + srcwaveforms[IDX2D_SRCWAVES(i,iteration)] * dl * (1 / (dx * dy * dz)); + } + } +""") + +update_magnetic_dipole = Template(""" + // Updates electric field values for Hertzian dipole sources. + // + // Args: + // NMAGDIPOLE: Total number of magnetic dipoles in the model. + // iteration: Iteration number of simulation. + // dx, dy, dz: Spatial discretisations. + // srcinfo1: Source cell coordinates and polarisation information. + // srcinfo2: Other source information, e.g. length, resistance etc... + // srcwaveforms: Source waveform values. + // ID, H: Access to ID and field component arrays. + + + if (i < NMAGDIPOLE) { + + int x, y, z, polarisation; + + x = srcinfo1[IDX2D_SRCINFO(i,0)]; + y = srcinfo1[IDX2D_SRCINFO(i,1)]; + z = srcinfo1[IDX2D_SRCINFO(i,2)]; + polarisation = srcinfo1[IDX2D_SRCINFO(i,3)]; + + // 'x' polarised source + if (polarisation == 0) { + int materialHx = ID[IDX4D_ID(3,x,y,z)]; + Hx[IDX3D_FIELDS(x,y,z)] = Hx[IDX3D_FIELDS(x,y,z)] - updatecoeffsH[IDX2D_MAT(materialHx,4)] * + srcwaveforms[IDX2D_SRCWAVES(i,iteration)] * (1 / (dx * dy * dz)); + } + + // 'y' polarised source + else if (polarisation == 1) { + int materialHy = ID[IDX4D_ID(4,x,y,z)]; + Hy[IDX3D_FIELDS(x,y,z)] = Hy[IDX3D_FIELDS(x,y,z)] - updatecoeffsH[IDX2D_MAT(materialHy,4)] * + srcwaveforms[IDX2D_SRCWAVES(i,iteration)] * (1 / (dx * dy * dz)); + } + + // 'z' polarised source + else if (polarisation == 2) { + int materialHz = ID[IDX4D_ID(5,x,y,z)]; + Hz[IDX3D_FIELDS(x,y,z)] = Hz[IDX3D_FIELDS(x,y,z)] - updatecoeffsH[IDX2D_MAT(materialHz,4)] * + srcwaveforms[IDX2D_SRCWAVES(i,iteration)] * (1 / (dx * dy * dz)); + } + } +""") + +update_voltage_source = Template(""" + // Updates electric field values for voltage sources. + // + // Args: + // NVOLTSRC: Total number of voltage sources in the model. + // iteration: Iteration number of simulation. + // dx, dy, dz: Spatial discretisations. + // srcinfo1: Source cell coordinates and polarisation information. + // srcinfo2: Other source information, e.g. length, resistance etc... + // srcwaveforms: Source waveform values. + // ID, E: Access to ID and field component arrays. + + + if (i < NVOLTSRC) { + + $REAL resistance; + int x, y, z, polarisation; + + x = srcinfo1[IDX2D_SRCINFO(i,0)]; + y = srcinfo1[IDX2D_SRCINFO(i,1)]; + z = srcinfo1[IDX2D_SRCINFO(i,2)]; + polarisation = srcinfo1[IDX2D_SRCINFO(i,3)]; + resistance = srcinfo2[i]; + + // 'x' polarised source + if (polarisation == 0) { + if (resistance != 0) { + int materialEx = ID[IDX4D_ID(0,x,y,z)]; + Ex[IDX3D_FIELDS(x,y,z)] = Ex[IDX3D_FIELDS(x,y,z)] - updatecoeffsE[IDX2D_MAT(materialEx,4)] * + srcwaveforms[IDX2D_SRCWAVES(i,iteration)] * (1 / (resistance * dy * dz)); + } + else { + Ex[IDX3D_FIELDS(x,y,z)] = -1 * srcwaveforms[IDX2D_SRCWAVES(i,iteration)] / dx; + } + } + + // 'y' polarised source + else if (polarisation == 1) { + if (resistance != 0) { + int materialEy = ID[IDX4D_ID(1,x,y,z)]; + Ey[IDX3D_FIELDS(x,y,z)] = Ey[IDX3D_FIELDS(x,y,z)] - updatecoeffsE[IDX2D_MAT(materialEy,4)] * + srcwaveforms[IDX2D_SRCWAVES(i,iteration)] * (1 / (resistance * dx * dz)); + } + else { + Ey[IDX3D_FIELDS(x,y,z)] = -1 * srcwaveforms[IDX2D_SRCWAVES(i,iteration)] / dy; + } + } + + // 'z' polarised source + else if (polarisation == 2) { + if (resistance != 0) { + int materialEz = ID[IDX4D_ID(2,x,y,z)]; + Ez[IDX3D_FIELDS(x,y,z)] = Ez[IDX3D_FIELDS(x,y,z)] - updatecoeffsE[IDX2D_MAT(materialEz,4)] * + srcwaveforms[IDX2D_SRCWAVES(i,iteration)] * (1 / (resistance * dx * dy)); + } + else { + Ez[IDX3D_FIELDS(x,y,z)] = -1 * srcwaveforms[IDX2D_SRCWAVES(i,iteration)] / dz; + } + } + } +""") \ No newline at end of file diff --git a/gprMax/cuda_opencl_el/knl_store_outputs.py b/gprMax/cuda_opencl_el/knl_store_outputs.py new file mode 100644 index 00000000..496ea3d9 --- /dev/null +++ b/gprMax/cuda_opencl_el/knl_store_outputs.py @@ -0,0 +1,42 @@ +# Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom +# Authors: Craig Warren, Antonis Giannopoulos, and John Hartley +# +# This file is part of gprMax. +# +# gprMax is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# gprMax is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with gprMax. If not, see . + +from string import Template + + +store_outputs = Template(""" + // Stores field component values for every receiver in the model. + // + // Args: + // NRX: total number of receivers in the model. + // rxs: array to store field components for receivers - rows + // are field components; columns are iterations; pages are receiver. + + if (i < NRX) { + int x, y, z; + x = rxcoords[IDX2D_RXCOORDS(i,0)]; + y = rxcoords[IDX2D_RXCOORDS(i,1)]; + z = rxcoords[IDX2D_RXCOORDS(i,2)]; + rxs[IDX3D_RXS(0,iteration,i)] = Ex[IDX3D_FIELDS(x,y,z)]; + rxs[IDX3D_RXS(1,iteration,i)] = Ey[IDX3D_FIELDS(x,y,z)]; + rxs[IDX3D_RXS(2,iteration,i)] = Ez[IDX3D_FIELDS(x,y,z)]; + rxs[IDX3D_RXS(3,iteration,i)] = Hx[IDX3D_FIELDS(x,y,z)]; + rxs[IDX3D_RXS(4,iteration,i)] = Hy[IDX3D_FIELDS(x,y,z)]; + rxs[IDX3D_RXS(5,iteration,i)] = Hz[IDX3D_FIELDS(x,y,z)]; + } +""") \ No newline at end of file diff --git a/gprMax/cython/fields_updates_dispersive_template b/gprMax/cython/fields_updates_dispersive_template.jinja similarity index 100% rename from gprMax/cython/fields_updates_dispersive_template rename to gprMax/cython/fields_updates_dispersive_template.jinja diff --git a/gprMax/fields_outputs.py b/gprMax/fields_outputs.py index 87a5500e..0d1891e8 100644 --- a/gprMax/fields_outputs.py +++ b/gprMax/fields_outputs.py @@ -56,7 +56,7 @@ def store_outputs(G): tl.Itotal[iteration] = tl.current[tl.antpos] -kernel_template_store_outputs = Template(""" +knl_template_store_outputs = Template(""" // Macros for converting subscripts to linear index: #define INDEX2D_RXCOORDS(m, n) (m)*($NY_RXCOORDS)+(n) diff --git a/gprMax/gprMax.py b/gprMax/gprMax.py index add69ce4..0c2b600b 100644 --- a/gprMax/gprMax.py +++ b/gprMax/gprMax.py @@ -32,6 +32,7 @@ args_defaults = {'scenes': None, 'restart': None, 'mpi': False, 'gpu': None, + 'opencl': None, 'subgrid': False, 'autotranslate': False, 'geometry_only': False, @@ -67,6 +68,8 @@ help_msg = {'scenes': '(list, opt): List of the scenes to run the model. ' 'performance section of the User Guide.', 'gpu': '(list/bool, opt): Flag to use NVIDIA GPU or list of NVIDIA ' 'GPU device ID(s) for specific GPU card(s).', + 'opencl': '(list/bool, opt): Flag to use OpenCL or list of OpenCL ' + 'device ID(s) for specific compute device(s).', 'subgrid': '(bool, opt): Flag to use sub-gridding.', 'autotranslate': '(bool, opt): For sub-gridding - auto translate ' 'objects with main grid coordinates to their ' @@ -92,6 +95,7 @@ def run(scenes=args_defaults['scenes'], restart=args_defaults['restart'], mpi=args_defaults['mpi'], gpu=args_defaults['gpu'], + opencl=args_defaults['opencl'], subgrid=args_defaults['subgrid'], autotranslate=args_defaults['autotranslate'], geometry_only=args_defaults['geometry_only'], @@ -112,6 +116,7 @@ def run(scenes=args_defaults['scenes'], 'restart': restart, 'mpi': mpi, 'gpu': gpu, + 'opencl': opencl, 'subgrid': subgrid, 'autotranslate': autotranslate, 'geometry_only': geometry_only, @@ -139,6 +144,8 @@ def cli(): help=help_msg['mpi']) parser.add_argument('-gpu', type=int, action='append', nargs='*', help=help_msg['gpu']) + parser.add_argument('-opencl', type=int, action='append', nargs='*', + help=help_msg['opencl']) parser.add_argument('--geometry-only', action='store_true', default=args_defaults['geometry_only'], help=help_msg['geometry_only']) @@ -176,11 +183,11 @@ def run_main(args): if args.spotpy: context = SPOTPYContext() context.run(args.i) - # MPI running with (OpenMP/CUDA) + # MPI running with (OpenMP/CUDA/OpenCL) elif config.sim_config.args.mpi: context = MPIContext() context.run() - # Standard running (OpenMP/CUDA) + # Standard running (OpenMP/CUDA/OpenCL) else: context = Context() context.run() diff --git a/gprMax/grid.py b/gprMax/grid.py index 32b34adc..9446b55a 100644 --- a/gprMax/grid.py +++ b/gprMax/grid.py @@ -306,33 +306,74 @@ class CUDAGrid(FDTDGrid): self.bpg = (int(np.ceil(((self.nx + 1) * (self.ny + 1) * (self.nz + 1)) / self.tpb[0])), 1, 1) - def htod_geometry_arrays(self): - """Initialise an array for cell edge IDs (ID) on GPU.""" - import pycuda.gpuarray as gpuarray + def htod_geometry_arrays(self, queue=None): + """Initialise an array for cell edge IDs (ID) on compute device. + + Args: + queue: pyopencl queue. + """ - self.ID_gpu = gpuarray.to_gpu(self.ID) + if config.sim_config.general['solver'] == 'cuda': + import pycuda.gpuarray as gpuarray + self.ID_dev = gpuarray.to_gpu(self.ID) - def htod_field_arrays(self): - """Initialise geometry and field arrays on GPU.""" + elif config.sim_config.general['solver'] == 'opencl': + import pyopencl.array as clarray + self.ID_dev = clarray.to_device(queue, self.ID) - import pycuda.gpuarray as gpuarray + def htod_field_arrays(self, queue=None): + """Initialise field arrays on compute device. + + Args: + queue: pyopencl queue. + """ - self.Ex_gpu = gpuarray.to_gpu(self.Ex) - self.Ey_gpu = gpuarray.to_gpu(self.Ey) - self.Ez_gpu = gpuarray.to_gpu(self.Ez) - self.Hx_gpu = gpuarray.to_gpu(self.Hx) - self.Hy_gpu = gpuarray.to_gpu(self.Hy) - self.Hz_gpu = gpuarray.to_gpu(self.Hz) + if config.sim_config.general['solver'] == 'cuda': + import pycuda.gpuarray as gpuarray + self.Ex_dev = gpuarray.to_gpu(self.Ex) + self.Ey_dev = gpuarray.to_gpu(self.Ey) + self.Ez_dev = gpuarray.to_gpu(self.Ez) + self.Hx_dev = gpuarray.to_gpu(self.Hx) + self.Hy_dev = gpuarray.to_gpu(self.Hy) + self.Hz_dev = gpuarray.to_gpu(self.Hz) + elif config.sim_config.general['solver'] == 'opencl': + import pyopencl.array as clarray + self.Ex_dev = clarray.to_device(queue, self.Ex) + self.Ey_dev = clarray.to_device(queue, self.Ey) + self.Ez_dev = clarray.to_device(queue, self.Ez) + self.Hx_dev = clarray.to_device(queue, self.Hx) + self.Hy_dev = clarray.to_device(queue, self.Hy) + self.Hz_dev = clarray.to_device(queue, self.Hz) - def htod_dispersive_arrays(self): - """Initialise dispersive material coefficient arrays on GPU.""" + def htod_dispersive_arrays(self, queue=None): + """Initialise dispersive material coefficient arrays on compute device. + + Args: + queue: pyopencl queue. + """ - import pycuda.gpuarray as gpuarray + if config.sim_config.general['solver'] == 'cuda': + import pycuda.gpuarray as gpuarray + self.Tx_dev = gpuarray.to_gpu(self.Tx) + self.Ty_dev = gpuarray.to_gpu(self.Ty) + self.Tz_dev = gpuarray.to_gpu(self.Tz) + self.updatecoeffsdispersive_dev = gpuarray.to_gpu(self.updatecoeffsdispersive) + elif config.sim_config.general['solver'] == 'opencl': + import pyopencl.array as clarray + self.Tx_dev = clarray.to_device(queue, self.Tx) + self.Ty_dev = clarray.to_device(queue, self.Ty) + self.Tz_dev = clarray.to_device(queue, self.Tz) + self.updatecoeffsdispersive_dev = clarray.to_device(queue, self.updatecoeffsdispersive) - self.Tx_gpu = gpuarray.to_gpu(self.Tx) - self.Ty_gpu = gpuarray.to_gpu(self.Ty) - self.Tz_gpu = gpuarray.to_gpu(self.Tz) - self.updatecoeffsdispersive_gpu = gpuarray.to_gpu(self.updatecoeffsdispersive) + +class OpenCLGrid(CUDAGrid): + """Additional grid methods for solving on compute device using OpenCL.""" + + def __init__(self): + super().__init__() + + def set_blocks_per_grid(self): + pass def dispersion_analysis(G): diff --git a/gprMax/model_build_run.py b/gprMax/model_build_run.py index 688259b1..13329d7c 100644 --- a/gprMax/model_build_run.py +++ b/gprMax/model_build_run.py @@ -149,7 +149,9 @@ class ModelBuildRun: # Check memory requirements total_mem, mem_strs = mem_check_all(grids) - logger.info(f'\nMemory required: {" + ".join(mem_strs)} + ~{human_size(config.get_model_config().mem_overhead)} overhead = {human_size(total_mem)}') + logger.info(f'\nMemory required: {" + ".join(mem_strs)} + ' + f'~{human_size(config.get_model_config().mem_overhead)} ' + f'overhead = {human_size(total_mem)}') # Build grids gridbuilders = [GridBuilder(grid) for grid in grids] @@ -170,21 +172,41 @@ class ModelBuildRun: # Check to see if numerical dispersion might be a problem results = dispersion_analysis(gb.grid) if results['error']: - logger.warning(f"\nNumerical dispersion analysis [{gb.grid.name}] not carried out as {results['error']}") + logger.warning(f"\nNumerical dispersion analysis [{gb.grid.name}] " + f"not carried out as {results['error']}") elif results['N'] < config.get_model_config().numdispersion['mingridsampling']: - logger.exception(f"\nNon-physical wave propagation in [{gb.grid.name}] detected. Material '{results['material'].ID}' has wavelength sampled by {results['N']} cells, less than required minimum for physical wave propagation. Maximum significant frequency estimated as {results['maxfreq']:g}Hz") + logger.exception(f"\nNon-physical wave propagation in [{gb.grid.name}] " + f"detected. Material '{results['material'].ID}' " + f"has wavelength sampled by {results['N']} cells, " + f"less than required minimum for physical wave " + f"propagation. Maximum significant frequency " + f"estimated as {results['maxfreq']:g}Hz") raise ValueError elif (results['deltavp'] and np.abs(results['deltavp']) > config.get_model_config().numdispersion['maxnumericaldisp']): - logger.warning(f"\n[{gb.grid.name}] has potentially significant numerical dispersion. Estimated largest physical phase-velocity error is {results['deltavp']:.2f}% in material '{results['material'].ID}' whose wavelength sampled by {results['N']} cells. Maximum significant frequency estimated as {results['maxfreq']:g}Hz") + logger.warning(f"\n[{gb.grid.name}] has potentially significant " + f"numerical dispersion. Estimated largest physical " + f"phase-velocity error is {results['deltavp']:.2f}% " + f"in material '{results['material'].ID}' whose " + f"wavelength sampled by {results['N']} cells. " + f"Maximum significant frequency estimated as " + f"{results['maxfreq']:g}Hz") elif results['deltavp']: - logger.info(f"\nNumerical dispersion analysis [{gb.grid.name}]: estimated largest physical phase-velocity error is {results['deltavp']:.2f}% in material '{results['material'].ID}' whose wavelength sampled by {results['N']} cells. Maximum significant frequency estimated as {results['maxfreq']:g}Hz") + logger.info(f"\nNumerical dispersion analysis [{gb.grid.name}]: " + f"estimated largest physical phase-velocity error is " + f"{results['deltavp']:.2f}% in material '{results['material'].ID}' " + f"whose wavelength sampled by {results['N']} cells. " + f"Maximum significant frequency estimated as " + f"{results['maxfreq']:g}Hz") def reuse_geometry(self): # Reset iteration number self.G.iteration = 0 - s = f'\n--- Model {config.get_model_config().appendmodelnumber}/{config.sim_config.model_end}, input file (not re-processed, i.e. geometry fixed): {config.sim_config.input_file_path}' - config.get_model_config().inputfilestr = Fore.GREEN + f"{s} {'-' * (get_terminal_width() - 1 - len(s))}\n" + Style.RESET_ALL + s = (f'\n--- Model {config.get_model_config().appendmodelnumber}/{config.sim_config.model_end}, ' + f'input file (not re-processed, i.e. geometry fixed): ' + f'{config.sim_config.input_file_path}') + config.get_model_config().inputfilestr = (Fore.GREEN + f"{s} {'-' * (get_terminal_width() - 1 - len(s))}\n" + + Style.RESET_ALL) logger.basic(config.get_model_config().inputfilestr) for grid in [self.G] + self.G.subgrids: grid.reset_fields() @@ -224,7 +246,9 @@ class ModelBuildRun: fn = snapshotdir / Path(snap.filename) snap.filename = fn.with_suffix(snap.fileext) pbar = tqdm(total=snap.vtkdatawritesize, leave=True, unit='byte', - unit_scale=True, desc=f'Writing snapshot file {i + 1} of {len(self.G.snapshots)}, {snap.filename.name}', + unit_scale=True, desc=f'Writing snapshot file {i + 1} ' + f'of {len(self.G.snapshots)}, ' + f'{snap.filename.name}', ncols=get_terminal_width() - 1, file=sys.stdout, disable=not config.sim_config.general['progressbars']) snap.write_file(pbar, self.G) @@ -235,12 +259,12 @@ class ModelBuildRun: """Print resource information on runtime and memory usage. Args: - tsolve (float): Time taken to execute solving (seconds). - memsolve (float): Memory (RAM) used on GPU. + tsolve: float of time taken to execute solving (seconds). + memsolve: float of memory (RAM) used. """ mem_str = '' - if config.sim_config.general['cuda']: + if config.sim_config.general['solver'] == 'cuda': mem_str = f' host + ~{human_size(memsolve)} GPU' logger.info(f'\nMemory used: ~{human_size(self.p.memory_full_info().uss)}{mem_str}') @@ -250,24 +274,37 @@ class ModelBuildRun: """Solve using FDTD method. Args: - solver (Solver): solver object. + solver: solver object. Returns: - tsolve (float): time taken to execute solving (seconds). + tsolve: float of time taken to execute solving (seconds). """ - # Check number of OpenMP threads - if config.sim_config.general['cpu']: - logger.basic(f"CPU solver using: {config.get_model_config().ompthreads} OpenMP thread(s) on {config.sim_config.hostinfo['hostname']}\n") + # Print information about and check OpenMP threads + if config.sim_config.general['solver'] == 'cpu': + logger.basic(f"OPENMP solver with {config.get_model_config().ompthreads} " + f"thread(s) on {config.sim_config.hostinfo['hostname']}\n") if config.get_model_config().ompthreads > config.sim_config.hostinfo['physicalcores']: - logger.warning(f"You have specified more threads ({config.get_model_config().ompthreads}) than available physical CPU cores ({config.sim_config.hostinfo['physicalcores']}). This may lead to degraded performance.") - # Print information about any GPU in use - elif config.sim_config.general['cuda']: - logger.basic(f"GPU solver using: {config.get_model_config().cuda['gpu'].deviceID} - {config.get_model_config().cuda['gpu'].name} on {config.sim_config.hostinfo['hostname']}\n") + logger.warning(f"You have specified more threads ({config.get_model_config().ompthreads}) " + f"than available physical CPU cores ({config.sim_config.hostinfo['physicalcores']}). " + f"This may lead to degraded performance.") + # Print information about any compute device, e.g. GPU, in use + elif config.sim_config.general['solver'] == 'cuda' or config.sim_config.general['solver'] == 'opencl': + solvername = config.sim_config.general['solver'].upper() + hostname = config.sim_config.hostinfo['hostname'] + if config.sim_config.general['solver'] == 'opencl': + platformname = ' on ' + ' '.join(config.get_model_config().device['dev'].platform.name.split()) + ' platform' + else: + platformname = '' + devicename = ' '.join(config.get_model_config().device['dev'].name.split()) + logger.basic(f"{solvername} solver using {devicename}{platformname} " + f"on {hostname}\n") # Prepare iterator if config.sim_config.general['progressbars']: - iterator = tqdm(range(self.G.iterations), desc=f'Running model {config.model_num + 1}/{config.sim_config.model_end}', ncols=get_terminal_width() - 1, file=sys.stdout, disable=not config.sim_config.general['progressbars']) + iterator = tqdm(range(self.G.iterations), desc=f'Running model {config.model_num + 1}/{config.sim_config.model_end}', + ncols=get_terminal_width() - 1, file=sys.stdout, + disable=not config.sim_config.general['progressbars']) else: iterator = range(self.G.iterations) diff --git a/gprMax/opencl/fields_updates.cl b/gprMax/opencl/fields_updates.cl new file mode 100644 index 00000000..5f05fd09 --- /dev/null +++ b/gprMax/opencl/fields_updates.cl @@ -0,0 +1,311 @@ +# Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom +# Authors: Craig Warren, Antonis Giannopoulos, and John Hartley +# +# This file is part of gprMax. +# +# gprMax is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# gprMax is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with gprMax. If not, see . + + +#include + +#define INDEX2D_MAT(m, n) (m)*({{NY_MATCOEFFS}}) + (n) +#define INDEX2D_MATDISP(m, n) (m)*({{NY_MATDISPCOEFFS}}) + (n) +#define INDEX3D_FIELDS(i, j, k) (i)*({{NY_FIELDS}})*({{NZ_FIELDS}}) + (j)*({{NZ_FIELDS}}) + (k) +#define INDEX4D_ID(p, i, j, k) (p)*({{NX_ID}})*({{NY_ID}})*({{NZ_ID}}) + (i)*({{NY_ID}})*({{NZ_ID}}) + (j)*({{NZ_ID}}) + (k) +#define INDEX4D_T(p, i, j, k) (p)*({{NX_T}})*({{NY_T}})*({{NZ_T}}) + (i)*({{NY_T}})*({{NZ_T}}) + (j)*({{NZ_T}}) + (k) + +// Material coefficients (read-only) in constant memory +__constant {{REAL}} updatecoeffsE[{{N_updatecoeffsE}}] = +{ + {% for i in updateEVal %} + {{i}}, + {% endfor %} +}; + +__constant {{REAL}} updatecoeffsH[{{N_updatecoeffsH}}] = +{ + {% for i in updateHVal %} + {{i}}, + {% endfor %} +}; + + +/////////////////////////////////////////////// +// Electric field updates - normal materials // +/////////////////////////////////////////////// + +__kernel void update_electric(int NX, int NY, int NZ, + __global const unsigned int* restrict ID, + __global {{REAL}} *Ex, + __global {{REAL}} *Ey, + __global {{REAL}} *Ez, + __global const {{REAL}} * restrict Hx, + __global const {{REAL}} * restrict Hy, + __global const {{REAL}} * restrict Hz) { + + // This function updates electric field values. + // + // Args: + // NX, NY, NZ: Number of cells of the models domain. + // ID, E, H: Access to ID and field component arrays. + + // Obtain the linear index corresponding to the current thread + int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + + get_global_id(1) * get_global_size(0) + get_global_id(0); + + // Convert the linear index to subscripts for 3D field arrays + int i = idx / ({{NY_FIELDS}} * {{NZ_FIELDS}}); + int j = (idx % ({{NY_FIELDS}}*{{NZ_FIELDS}})) / {{NZ_FIELDS}}; + int k = (idx % ({{NY_FIELDS}}*{{NZ_FIELDS}})) % {{NZ_FIELDS}}; + + // Convert the linear index to subscripts for 4D material ID arrays + int i_ID = (idx%({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) / ({{NY_ID}} * {{NZ_ID}}); + int j_ID = ((idx%({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) / {{NZ_ID}}; + int k_ID = ((idx%({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) % {{NZ_ID}}; + + // Ex component + if ((NY != 1 || NZ != 1) && i >= 0 && i < NX && j > 0 && j < NY && k > 0 && k < NZ) { + int materialEx = ID[INDEX4D_ID(0,i_ID,j_ID,k_ID)]; + Ex[INDEX3D_FIELDS(i,j,k)] = updatecoeffsE[INDEX2D_MAT(materialEx,0)] * Ex[INDEX3D_FIELDS(i,j,k)] + + updatecoeffsE[INDEX2D_MAT(materialEx,2)] * (Hz[INDEX3D_FIELDS(i,j,k)] - Hz[INDEX3D_FIELDS(i,j-1,k)]) - + updatecoeffsE[INDEX2D_MAT(materialEx,3)] * (Hy[INDEX3D_FIELDS(i,j,k)] - Hy[INDEX3D_FIELDS(i,j,k-1)]); + } + + // Ey component + if ((NX != 1 || NZ != 1) && i > 0 && i < NX && j >= 0 && j < NY && k > 0 && k < NZ) { + int materialEy = ID[INDEX4D_ID(1,i_ID,j_ID,k_ID)]; + Ey[INDEX3D_FIELDS(i,j,k)] = updatecoeffsE[INDEX2D_MAT(materialEy,0)] * Ey[INDEX3D_FIELDS(i,j,k)] + + updatecoeffsE[INDEX2D_MAT(materialEy,3)] * (Hx[INDEX3D_FIELDS(i,j,k)] - Hx[INDEX3D_FIELDS(i,j,k-1)]) - updatecoeffsE[INDEX2D_MAT(materialEy,1)] * (Hz[INDEX3D_FIELDS(i,j,k)] - Hz[INDEX3D_FIELDS(i-1,j,k)]); + } + + // Ez component + if ((NX != 1 || NY != 1) && i > 0 && i < NX && j > 0 && j < NY && k >= 0 && k < NZ) { + int materialEz = ID[INDEX4D_ID(2,i_ID,j_ID,k_ID)]; + Ez[INDEX3D_FIELDS(i,j,k)] = updatecoeffsE[INDEX2D_MAT(materialEz,0)] * Ez[INDEX3D_FIELDS(i,j,k)] + + updatecoeffsE[INDEX2D_MAT(materialEz,1)] * (Hy[INDEX3D_FIELDS(i,j,k)] - Hy[INDEX3D_FIELDS(i-1,j,k)]) - updatecoeffsE[INDEX2D_MAT(materialEz,2)] * (Hx[INDEX3D_FIELDS(i,j,k)] - Hx[INDEX3D_FIELDS(i,j-1,k)]); + } +} + + +//////////////////////////// +// Magnetic field updates // +//////////////////////////// + +__kernel void update_magnetic(int NX, int NY, int NZ, + __global const unsigned int* restrict ID, + __global {{REAL}} *Hx, + __global {{REAL}} *Hy, + __global {{REAL}} *Hz, + __global const {{REAL}}* restrict Ex, + __global const {{REAL}}* restrict Ey, + __global const {{REAL}}* restrict Ez) { + + // This function updates magnetic field values. + // + // Args: + // NX, NY, NZ: number of cells of the model domain. + // ID, E, H: access to ID and field component arrays. + + // Obtain the linear index corresponding to the current thread + int idx = get_global_id(0); + + // convert the linear index to subscripts to 3D field arrays + int i = idx / ({{NY_FIELDS}} * {{NZ_FIELDS}}); + int j = (idx%({{NY_FIELDS}}*{{NZ_FIELDS}})) / {{NZ_FIELDS}}; + int k = (idx%({{NY_FIELDS}}*{{NZ_FIELDS}})) % {{NZ_FIELDS}}; + + // convert the linear index to subscripts to 4D material ID arrays + int i_ID = ( idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) / ({{NY_ID}} * {{NZ_ID}}); + int j_ID = (( idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) / {{NZ_ID}}; + int k_ID = (( idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) % {{NZ_ID}}; + + // Hx component + if (NX != 1 && i > 0 && i < NX && j >= 0 && j < NY && k >= 0 && k < NZ) { + int materialHx = ID[INDEX4D_ID(3,i_ID,j_ID,k_ID)]; + Hx[INDEX3D_FIELDS(i,j,k)] = updatecoeffsH[INDEX2D_MAT(materialHx,0)] * Hx[INDEX3D_FIELDS(i,j,k)] - + updatecoeffsH[INDEX2D_MAT(materialHx,2)] * (Ez[INDEX3D_FIELDS(i,j+1,k)] - Ez[INDEX3D_FIELDS(i,j,k)]) + updatecoeffsH[INDEX2D_MAT(materialHx,3)] * (Ey[INDEX3D_FIELDS(i,j,k+1)] - Ey[INDEX3D_FIELDS(i,j,k)]); + } + + // Hy component + if (NY != 1 && i >= 0 && i < NX && j > 0 && j < NY && k >= 0 && k < NZ) { + int materialHy = ID[INDEX4D_ID(4,i_ID,j_ID,k_ID)]; + Hy[INDEX3D_FIELDS(i,j,k)] = updatecoeffsH[INDEX2D_MAT(materialHy,0)] * Hy[INDEX3D_FIELDS(i,j,k)] - + updatecoeffsH[INDEX2D_MAT(materialHy,3)] * (Ex[INDEX3D_FIELDS(i,j,k+1)] - Ex[INDEX3D_FIELDS(i,j,k)]) + updatecoeffsH[INDEX2D_MAT(materialHy,1)] * (Ez[INDEX3D_FIELDS(i+1,j,k)] - Ez[INDEX3D_FIELDS(i,j,k)]); + } + + // Hz component + if (NZ != 1 && i >= 0 && i < NX && j >= 0 && j < NY && k > 0 && k < NZ) { + int materialHz = ID[INDEX4D_ID(5,i_ID,j_ID,k_ID)]; + Hz[INDEX3D_FIELDS(i,j,k)] = updatecoeffsH[INDEX2D_MAT(materialHz,0)] * Hz[INDEX3D_FIELDS(i,j,k)] - + updatecoeffsH[INDEX2D_MAT(materialHz,1)] * (Ey[INDEX3D_FIELDS(i+1,j,k)] - Ey[INDEX3D_FIELDS(i,j,k)]) + updatecoeffsH[INDEX2D_MAT(materialHz,2)] * (Ex[INDEX3D_FIELDS(i,j+1,k)] - Ex[INDEX3D_FIELDS(i,j,k)]); + } +} + + +/////////////////////////////////////////////////// +// Electric field updates - dispersive materials // +/////////////////////////////////////////////////// + +__kernel void update_electric_dispersive_A(int NX, int NY, int NZ, int MAXPOLES, + __global const {{COMPLEX-}}_t* restrict updatecoeffsdispersive, + __global {{COMPLEX-}}_t *Tx, + __global {{COMPLEX-}}_t *Ty, + __global {{COMPLEX-}}_t *Tz, + __global const unsigned int* restrict ID, + __global {{REAL}} *Ex, + __global {{REAL}} *Ey, + __global {{REAL}} *Ez, + __global const {{REAL}}* restrict Hx, + __global const {{REAL}}* restrict Hy, + __global const {{REAL}}* restrict Hz) { + + // This function is part A of updates to electric field values when dispersive materials (with multiple poles) are present. + // + // Args: + // NX, NY, NZ: Number of cells of the model domain + // MAXPOLES: Maximum number of dispersive material poles present in model + // updatedispersivecoeffs, T, ID, E, H: Access to update coefficients, dispersive, ID and field component arrays + + // Obtain the linear index corresponding to the current thread + int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + + get_global_id(1) * get_global_size(0) + get_global_id(0); + + // Convert the linear index to subscripts for 3D field arrays + int i = idx / ({{NY_FIELDS}} * {{NZ_FIELDS}}); + int j = (idx % ({{NY_FIELDS}} * {{NZ_FIELDS}})) / {{NZ_FIELDS}}; + int k = (idx % ({{NY_FIELDS}} * {{NZ_FIELDS}})) % {{NZ_FIELDS}}; + + // Convert the linear index to subscripts for 4D material ID array + int i_ID = (idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) / ({{NY_ID}} * {{NZ_ID}}); + int j_ID = ((idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) / {{NZ_ID}}; + int k_ID = ((idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) % {{NZ_ID}}; + + // Convert the linear index to subscripts for 4D dispersive array + int i_T = (idx % ({{NX_T}} * {{NY_T}} * {{NZ_T}})) / ({{NY_T}} * {{NZ_T}}); + int j_T = ((idx % ({{NX_T}} * {{NY_T}} * {{NZ_T}})) % ({{NY_T}} * {{NZ_T}})) / {{NZ_T}}; + int k_T = ((idx % ({{NX_T}} * {{NY_T}} * {{NZ_T}})) % ({{NY_T}} * {{NZ_T}})) % {{NZ_T}}; + + // Ex component + if ((NY != 1 || NZ != 1) && i >= 0 && i < NX && j > 0 && j < NY && k > 0 && k < NZ) { + int materialEx = ID[INDEX4D_ID(0,i_ID,j_ID,k_ID)]; + {{REAL}} phi = 0; + for (int pole = 0; pole < MAXPOLES; pole++) { + phi = phi + updatecoeffsdispersive[INDEX2D_MATDISP(materialEx,pole*3)].real * Tx[INDEX4D_T(pole,i_T,j_T,k_T)].real; + Tx[INDEX4D_T(pole,i_T,j_T,k_T)] = cfloat_add(cfloat_mul(updatecoeffsdispersive[INDEX2D_MATDISP(materialEx,1+(pole*3))], + Tx[INDEX4D_T(pole,i_T,j_T,k_T)]), + cfloat_mulr(updatecoeffsdispersive[INDEX2D_MATDISP(materialEx,2+(pole*3))], + Ex[INDEX3D_FIELDS(i,j,k)])); + } + Ex[INDEX3D_FIELDS(i,j,k)] = updatecoeffsE[INDEX2D_MAT(materialEx,0)] * Ex[INDEX3D_FIELDS(i,j,k)] + + updatecoeffsE[INDEX2D_MAT(materialEx,2)] * (Hz[INDEX3D_FIELDS(i,j,k)] - Hz[INDEX3D_FIELDS(i,j-1,k)]) - updatecoeffsE[INDEX2D_MAT(materialEx,3)] * (Hy[INDEX3D_FIELDS(i,j,k)] - Hy[INDEX3D_FIELDS(i,j,k-1)]) - updatecoeffsE[INDEX2D_MAT(materialEx,4)] * phi; + } + + // Ey component + if ((NX != 1 || NZ != 1) && i > 0 && i < NX && j >= 0 && j < NY && k > 0 && k < NZ) { + int materialEy = ID[INDEX4D_ID(1,i_ID,j_ID,k_ID)]; + {{REAL}} phi = 0; + for (int pole = 0; pole < MAXPOLES; pole++) { + phi = phi + updatecoeffsdispersive[INDEX2D_MATDISP(materialEy,pole*3)].real * Ty[INDEX4D_T(pole,i_T,j_T,k_T)].real; + Ty[INDEX4D_T(pole,i_T,j_T,k_T)] = cfloat_add(cfloat_mul(updatecoeffsdispersive[INDEX2D_MATDISP(materialEy,1+(pole*3))], + Ty[INDEX4D_T(pole,i_T,j_T,k_T)]), + cfloat_mulr(updatecoeffsdispersive[INDEX2D_MATDISP(materialEy,2+(pole*3))], + Ey[INDEX3D_FIELDS(i,j,k)])); + } + Ey[INDEX3D_FIELDS(i,j,k)] = updatecoeffsE[INDEX2D_MAT(materialEy,0)] * Ey[INDEX3D_FIELDS(i,j,k)] + + updatecoeffsE[INDEX2D_MAT(materialEy,3)] * (Hx[INDEX3D_FIELDS(i,j,k)] - Hx[INDEX3D_FIELDS(i,j,k-1)]) - updatecoeffsE[INDEX2D_MAT(materialEy,1)] * (Hz[INDEX3D_FIELDS(i,j,k)] - Hz[INDEX3D_FIELDS(i-1,j,k)]) - updatecoeffsE[INDEX2D_MAT(materialEy,4)] * phi; + } + + // Ez component + if ((NX != 1 || NY != 1) && i > 0 && i < NX && j > 0 && j < NY && k >= 0 && k < NZ) { + int materialEz = ID[INDEX4D_ID(2,i_ID,j_ID,k_ID)]; + {{REAL}} phi = 0; + for (int pole = 0; pole < MAXPOLES; pole++) { + phi = phi + updatecoeffsdispersive[INDEX2D_MATDISP(materialEz,pole*3)].real * Tz[INDEX4D_T(pole,i_T,j_T,k_T)].real; + Tz[INDEX4D_T(pole,i_T,j_T,k_T)] = cfloat_add(cfloat_mul(updatecoeffsdispersive[INDEX2D_MATDISP(materialEz,1+(pole*3))], + Tz[INDEX4D_T(pole,i_T,j_T,k_T)]), + cfloat_mulr(updatecoeffsdispersive[INDEX2D_MATDISP(materialEz,2+(pole*3))], + Ez[INDEX3D_FIELDS(i,j,k)])); + } + Ez[INDEX3D_FIELDS(i,j,k)] = updatecoeffsE[INDEX2D_MAT(materialEz,0)] * Ez[INDEX3D_FIELDS(i,j,k)] + + updatecoeffsE[INDEX2D_MAT(materialEz,1)] * (Hy[INDEX3D_FIELDS(i,j,k)] - Hy[INDEX3D_FIELDS(i-1,j,k)]) - updatecoeffsE[INDEX2D_MAT(materialEz,2)] * (Hx[INDEX3D_FIELDS(i,j,k)] - Hx[INDEX3D_FIELDS(i,j-1,k)]) - updatecoeffsE[INDEX2D_MAT(materialEz,4)] * phi; + } +} + + +__kernel void update_electric_dispersive_B(int NX, int NY, int NZ, int MAXPOLES, + __global const {{COMPLEX-}}_t* restrict updatecoeffsdispersive, + __global {{COMPLEX-}}_t *Tx, + __global {{COMPLEX-}}_t *Ty, + __global {{COMPLEX-}}_t *Tz, + __global const unsigned int* restrict ID, + __global const {{REAL}}* restrict Ex, + __global const {{REAL}}* restrict Ey, + __global const {{REAL}}* restrict Ez) { + + // This function is part B which updates the dispersive field arrays when dispersive materials (with multiple poles) are present. + // + // Args: + // NX, NY, NZ: Number of cells of the model domain + // MAXPOLES: Maximum number of dispersive material poles present in model + // updatedispersivecoeffs, T, ID, E, H: Access to update coefficients, dispersive, ID and field component arrays + + // Obtain the linear index corresponding to the current thread + int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + + get_global_id(1) * get_global_size(0) + get_global_id(0); + + // Convert the linear index to subscripts for 3D field arrays + int i = idx / ({{NY_FIELDS}} * {{NZ_FIELDS}}); + int j = (idx % ({{NY_FIELDS}} * {{NZ_FIELDS}})) / {{NZ_FIELDS}}; + int k = (idx % ({{NY_FIELDS}} * {{NZ_FIELDS}})) % {{NZ_FIELDS}}; + + // Convert the linear index to subscripts for 4D material ID array + int i_ID = (idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) / ({{NY_ID}} * {{NZ_ID}}); + int j_ID = ((idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) / {{NZ_ID}}; + int k_ID = ((idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) % {{NZ_ID}}; + + // Convert the linear index to subscripts for 4D dispersive array + int i_T = (idx % ({{NX_T}} * {{NY_T}} * {{NZ_T}})) / ({{NY_T}} * {{NZ_T}}); + int j_T = ((idx % ({{NX_T}} * {{NY_T}} * {{NZ_T}})) % ({{NY_T}} * {{NZ_T}})) / {{NZ_T}}; + int k_T = ((idx % ({{NX_T}} * {{NY_T}} * {{NZ_T}})) % ({{NY_T}} * {{NZ_T}})) % {{NZ_T}}; + + // Ex component + if ((NY != 1 || NZ != 1) && i >= 0 && i < NX && j > 0 && j < NY && k > 0 && k < NZ) { + int materialEx = ID[INDEX4D_ID(0,i_ID,j_ID,k_ID)]; + for (int pole = 0; pole < MAXPOLES; pole++) { + Tx[INDEX4D_T(pole,i_T,j_T,k_T)] = cfloat_sub(Tx[INDEX4D_T(pole,i_T,j_T,k_T)], + cfloat_mulr(updatecoeffsdispersive[INDEX2D_MATDISP(materialEx,2+(pole*3))], + Ex[INDEX3D_FIELDS(i,j,k)])); + } + } + + // Ey component + if ((NX != 1 || NZ != 1) && i > 0 && i < NX && j >= 0 && j < NY && k > 0 && k < NZ) { + int materialEy = ID[INDEX4D_ID(1,i_ID,j_ID,k_ID)]; + for (int pole = 0; pole < MAXPOLES; pole++) { + Ty[INDEX4D_T(pole,i_T,j_T,k_T)] = cfloat_sub(Ty[INDEX4D_T(pole,i_T,j_T,k_T)], + cfloat_mulr(updatecoeffsdispersive[INDEX2D_MATDISP(materialEy,2+(pole*3))], + Ey[INDEX3D_FIELDS(i,j,k)])); + } + } + + // Ez component + if ((NX != 1 || NY != 1) && i > 0 && i < NX && j > 0 && j < NY && k >= 0 && k < NZ) { + int materialEz = ID[INDEX4D_ID(2,i_ID,j_ID,k_ID)]; + for (int pole = 0; pole < MAXPOLES; pole++) { + Tz[INDEX4D_T(pole,i_T,j_T,k_T)] = cfloat_sub(Tz[INDEX4D_T(pole,i_T,j_T,k_T)], + cfloat_mulr(updatecoeffsdispersive[INDEX2D_MATDISP(materialEz,2+(pole*3))], + Ez[INDEX3D_FIELDS(i,j,k)])); + } + } +} \ No newline at end of file diff --git a/gprMax/opencl/pml_updates_electric_HORIPML.cl b/gprMax/opencl/pml_updates_electric_HORIPML.cl new file mode 100644 index 00000000..6182bb75 --- /dev/null +++ b/gprMax/opencl/pml_updates_electric_HORIPML.cl @@ -0,0 +1,955 @@ +// Macros for converting subscripts to linear index: +#define INDEX2D_R(m, n) (m)*(NY_R)+(n) +#define INDEX2D_MAT(m, n) (m)*({{NY_MATCOEFFS}})+(n) +#define INDEX3D_FIELDS(i, j, k) (i)*({{NY_FIELDS}})*({{NZ_FIELDS}})+(j)*({{NZ_FIELDS}})+(k) +#define INDEX4D_ID(p, i, j, k) (p)*({{NX_ID}})*({{NY_ID}})*({{NZ_ID}})+(i)*({{NY_ID}})*({{NZ_ID}})+(j)*({{NZ_ID}})+(k) +#define INDEX4D_PHI1(p, i, j, k) (p)*(NX_PHI1)*(NY_PHI1)*(NZ_PHI1)+(i)*(NY_PHI1)*(NZ_PHI1)+(j)*(NZ_PHI1)+(k) +#define INDEX4D_PHI2(p, i, j, k) (p)*(NX_PHI2)*(NY_PHI2)*(NZ_PHI2)+(i)*(NY_PHI2)*(NZ_PHI2)+(j)*(NZ_PHI2)+(k) + +__constant {{REAL}} updatecoeffsE[{{N_updatecoeffsE}}] = +{ + {% for i in updateEVal %} + {{i}}, + {% endfor %} +}; + + +__kernel void order1_xminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global {{REAL}} *Ey, __global {{REAL}} *Ez, __global const {{REAL}}* restrict Hx, __global const {{REAL}}* restrict Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d){ + + // This function updates the Ey and Ez field components for the xminus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // obtain the linear index corresponding to the current thread + int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0); + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + {{REAL}} RA01, RB0, RE0, RF0, dHy, dHz; + {{REAL}} dx = d; + int ii, jj, kk, materialEy, materialEz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = xf - i1; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA01 = RA[INDEX2D_R(0,i1)] - 1; + RB0 = RB[INDEX2D_R(0,i1)]; + RE0 = RE[INDEX2D_R(0,i1)]; + RF0 = RF[INDEX2D_R(0,i1)]; + + // Ey + materialEy = ID[INDEX4D_ID(1,ii,jj,kk)]; + dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx; + Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA01 * dHz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = xf - i2; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA01 = RA[INDEX2D_R(0,i2)] - 1; + RB0 = RB[INDEX2D_R(0,i2)]; + RE0 = RE[INDEX2D_R(0,i2)]; + RF0 = RF[INDEX2D_R(0,i2)]; + + // Ez + materialEz = ID[INDEX4D_ID(2,ii,jj,kk)]; + dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx; + Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA01 * dHy + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHy; + } +} + +__kernel void order2_xminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global {{REAL}} *Ey, __global {{REAL}} *Ez, __global const {{REAL}}* restrict Hx, __global const {{REAL}}* restrict Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d){ + // This function updates the Ey and Ez field components for the xminus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current thread + int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0); + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + {{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dHy, dHz; + {{REAL}} dx = d; + int ii, jj, kk, materialEy, materialEz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = xf - i1; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA0 = RA[INDEX2D_R(0,i1)]; + RB0 = RB[INDEX2D_R(0,i1)]; + RE0 = RE[INDEX2D_R(0,i1)]; + RF0 = RF[INDEX2D_R(0,i1)]; + RA1 = RA[INDEX2D_R(1,i1)]; + RB1 = RB[INDEX2D_R(1,i1)]; + RE1 = RE[INDEX2D_R(1,i1)]; + RF1 = RF[INDEX2D_R(1,i1)]; + RA01 = RA[INDEX2D_R(0,i1)] * RA[INDEX2D_R(1,i1)] - 1; + + // Ey + materialEy = ID[INDEX4D_ID(1,ii,jj,kk)]; + dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx; + Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA01 * dHz + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dHz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = xf - i2; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA0 = RA[INDEX2D_R(0,i2)]; + RB0 = RB[INDEX2D_R(0,i2)]; + RE0 = RE[INDEX2D_R(0,i2)]; + RF0 = RF[INDEX2D_R(0,i2)]; + RA1 = RA[INDEX2D_R(1,i2)]; + RB1 = RB[INDEX2D_R(1,i2)]; + RE1 = RE[INDEX2D_R(1,i2)]; + RF1 = RF[INDEX2D_R(1,i2)]; + RA01 = RA[INDEX2D_R(0,i2)] * RA[INDEX2D_R(1,i2)] - 1; + + // Ez + materialEz = ID[INDEX4D_ID(2,ii,jj,kk)]; + dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx; + Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA01 * dHy + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dHy + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHy; + } +} + +__kernel void order1_xplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global {{REAL}} *Ey, __global {{REAL}} *Ez, __global const {{REAL}}* restrict Hx, __global const {{REAL}}* restrict Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d){ + // This function updates the Ey and Ez field components for the xplus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current thread + int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0); + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + {{REAL}} RA01, RB0, RE0, RF0, dHy, dHz; + {{REAL}} dx = d; + int ii, jj, kk, materialEy, materialEz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA01 = RA[INDEX2D_R(0,i1)] - 1; + RB0 = RB[INDEX2D_R(0,i1)]; + RE0 = RE[INDEX2D_R(0,i1)]; + RF0 = RF[INDEX2D_R(0,i1)]; + + // Ey + materialEy = ID[INDEX4D_ID(1,ii,jj,kk)]; + dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx; + Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA01 * dHz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA01 = RA[INDEX2D_R(0,i2)] - 1; + RB0 = RB[INDEX2D_R(0,i2)]; + RE0 = RE[INDEX2D_R(0,i2)]; + RF0 = RF[INDEX2D_R(0,i2)]; + + // Ez + materialEz = ID[INDEX4D_ID(2,ii,jj,kk)]; + dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx; + Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA01 * dHy + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHy; + } +} + +__kernel void order2_xplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global {{REAL}} *Ey, __global {{REAL}} *Ez, __global const {{REAL}}* restrict Hx, __global const {{REAL}}* restrict Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) { + + // This function updates the Ey and Ez field components for the xplus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current thread + int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0); + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + {{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dHy, dHz; + {{REAL}} dx = d; + int ii, jj, kk, materialEy, materialEz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA0 = RA[INDEX2D_R(0,i1)]; + RB0 = RB[INDEX2D_R(0,i1)]; + RE0 = RE[INDEX2D_R(0,i1)]; + RF0 = RF[INDEX2D_R(0,i1)]; + RA1 = RA[INDEX2D_R(1,i1)]; + RB1 = RB[INDEX2D_R(1,i1)]; + RE1 = RE[INDEX2D_R(1,i1)]; + RF1 = RF[INDEX2D_R(1,i1)]; + RA01 = RA[INDEX2D_R(0,i1)] * RA[INDEX2D_R(1,i1)] - 1; + + // Ey + materialEy = ID[INDEX4D_ID(1,ii,jj,kk)]; + dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx; + Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA01 * dHz + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dHz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA0 = RA[INDEX2D_R(0,i2)]; + RB0 = RB[INDEX2D_R(0,i2)]; + RE0 = RE[INDEX2D_R(0,i2)]; + RF0 = RF[INDEX2D_R(0,i2)]; + RA1 = RA[INDEX2D_R(1,i2)]; + RB1 = RB[INDEX2D_R(1,i2)]; + RE1 = RE[INDEX2D_R(1,i2)]; + RF1 = RF[INDEX2D_R(1,i2)]; + RA01 = RA[INDEX2D_R(0,i2)] * RA[INDEX2D_R(1,i2)] - 1; + + // Ez + materialEz = ID[INDEX4D_ID(2,ii,jj,kk)]; + dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx; + Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA01 * dHy + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dHy + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHy; + } +} + + +__kernel void order1_yminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global {{REAL}} *Ex, __global const {{REAL}}* restrict Ey, __global {{REAL}} *Ez, __global const {{REAL}}* restrict Hx, __global const {{REAL}}* restrict Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) { + + // This function updates the Ex and Ez field components for the yminus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current thread + int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0); + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + {{REAL}} RA01, RB0, RE0, RF0, dHx, dHz; + {{REAL}} dy = d; + int ii, jj, kk, materialEx, materialEz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = yf - j1; + kk = k1 + zs; + + // PML coefficients + RA01 = RA[INDEX2D_R(0,j1)] - 1; + RB0 = RB[INDEX2D_R(0,j1)]; + RE0 = RE[INDEX2D_R(0,j1)]; + RF0 = RF[INDEX2D_R(0,j1)]; + + // Ex + materialEx = ID[INDEX4D_ID(0,ii,jj,kk)]; + dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy; + Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA01 * dHz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = yf - j2; + kk = k2 + zs; + + // PML coefficients + RA01 = RA[INDEX2D_R(0,j2)] - 1; + RB0 = RB[INDEX2D_R(0,j2)]; + RE0 = RE[INDEX2D_R(0,j2)]; + RF0 = RF[INDEX2D_R(0,j2)]; + + // Ez + materialEz = ID[INDEX4D_ID(2,ii,jj,kk)]; + dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy; + Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA01 * dHx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx; + } +} + +__kernel void order2_yminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global {{REAL}} *Ex, __global const {{REAL}}* restrict Ey, __global {{REAL}} *Ez, __global const {{REAL}}* restrict Hx, __global const {{REAL}}* restrict Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) { + + // This function updates the Ex and Ez field components for the yminus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current thread + int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0); + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + {{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dHx, dHz; + {{REAL}} dy = d; + int ii, jj, kk, materialEx, materialEz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = yf - j1; + kk = k1 + zs; + + // PML coefficients + RA0 = RA[INDEX2D_R(0,j1)]; + RB0 = RB[INDEX2D_R(0,j1)]; + RE0 = RE[INDEX2D_R(0,j1)]; + RF0 = RF[INDEX2D_R(0,j1)]; + RA1 = RA[INDEX2D_R(1,j1)]; + RB1 = RB[INDEX2D_R(1,j1)]; + RE1 = RE[INDEX2D_R(1,j1)]; + RF1 = RF[INDEX2D_R(1,j1)]; + RA01 = RA[INDEX2D_R(0,j1)] * RA[INDEX2D_R(1,j1)] - 1; + + // Ex + materialEx = ID[INDEX4D_ID(0,ii,jj,kk)]; + dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy; + Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA01 * dHz + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dHz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = yf - j2; + kk = k2 + zs; + + // PML coefficients + RA0 = RA[INDEX2D_R(0,j2)]; + RB0 = RB[INDEX2D_R(0,j2)]; + RE0 = RE[INDEX2D_R(0,j2)]; + RF0 = RF[INDEX2D_R(0,j2)]; + RA1 = RA[INDEX2D_R(1,j2)]; + RB1 = RB[INDEX2D_R(1,j2)]; + RE1 = RE[INDEX2D_R(1,j2)]; + RF1 = RF[INDEX2D_R(1,j2)]; + RA01 = RA[INDEX2D_R(0,j2)] * RA[INDEX2D_R(1,j2)] - 1; + + // Ez + materialEz = ID[INDEX4D_ID(2,ii,jj,kk)]; + dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy; + Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA01 * dHx + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dHx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx; + } +} + + +__kernel void order1_yplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global {{REAL}} *Ex, __global const {{REAL}}* restrict Ey, __global {{REAL}} *Ez, __global const {{REAL}}* restrict Hx, __global const {{REAL}}* restrict Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) { + + // This function updates the Ex and Ez field components for the yplus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current thread + int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0); + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + {{REAL}} RA01, RB0, RE0, RF0, dHx, dHz; + {{REAL}} dy = d; + int ii, jj, kk, materialEx, materialEz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA01 = RA[INDEX2D_R(0,j1)] - 1; + RB0 = RB[INDEX2D_R(0,j1)]; + RE0 = RE[INDEX2D_R(0,j1)]; + RF0 = RF[INDEX2D_R(0,j1)]; + + // Ex + materialEx = ID[INDEX4D_ID(0,ii,jj,kk)]; + dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy; + Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA01 * dHz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA01 = RA[INDEX2D_R(0,j2)] - 1; + RB0 = RB[INDEX2D_R(0,j2)]; + RE0 = RE[INDEX2D_R(0,j2)]; + RF0 = RF[INDEX2D_R(0,j2)]; + + // Ez + materialEz = ID[INDEX4D_ID(2,ii,jj,kk)]; + dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy; + Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA01 * dHx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx; + } +} + + +__kernel void order2_yplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global {{REAL}} *Ex, __global const {{REAL}}* restrict Ey, __global {{REAL}} *Ez, __global const {{REAL}}* restrict Hx, __global const {{REAL}}* restrict Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) { + + // This function updates the Ex and Ez field components for the yplus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current thread + int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0); + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + {{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dHx, dHz; + {{REAL}} dy = d; + int ii, jj, kk, materialEx, materialEz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA0 = RA[INDEX2D_R(0,j1)]; + RB0 = RB[INDEX2D_R(0,j1)]; + RE0 = RE[INDEX2D_R(0,j1)]; + RF0 = RF[INDEX2D_R(0,j1)]; + RA1 = RA[INDEX2D_R(1,j1)]; + RB1 = RB[INDEX2D_R(1,j1)]; + RE1 = RE[INDEX2D_R(1,j1)]; + RF1 = RF[INDEX2D_R(1,j1)]; + RA01 = RA[INDEX2D_R(0,j1)] * RA[INDEX2D_R(1,j1)] - 1; + + // Ex + materialEx = ID[INDEX4D_ID(0,ii,jj,kk)]; + dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy; + Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA01 * dHz + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dHz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA0 = RA[INDEX2D_R(0,j2)]; + RB0 = RB[INDEX2D_R(0,j2)]; + RE0 = RE[INDEX2D_R(0,j2)]; + RF0 = RF[INDEX2D_R(0,j2)]; + RA1 = RA[INDEX2D_R(1,j2)]; + RB1 = RB[INDEX2D_R(1,j2)]; + RE1 = RE[INDEX2D_R(1,j2)]; + RF1 = RF[INDEX2D_R(1,j2)]; + RA01 = RA[INDEX2D_R(0,j2)] * RA[INDEX2D_R(1,j2)] - 1; + + // Ez + materialEz = ID[INDEX4D_ID(2,ii,jj,kk)]; + dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy; + Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA01 * dHx + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dHx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx; + } +} + +__kernel void order1_zminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global {{REAL}} *Ex, __global {{REAL}} *Ey, __global const {{REAL}}* restrict Ez, __global const {{REAL}}* restrict Hx, __global const {{REAL}}* restrict Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) { + + // This function updates the Ex and Ey field components for the zminus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current thread + int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0); + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + {{REAL}} RA01, RB0, RE0, RF0, dHx, dHy; + {{REAL}} dz = d; + int ii, jj, kk, materialEx, materialEy; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = zf - k1; + + // PML coefficients + RA01 = RA[INDEX2D_R(0,k1)] - 1; + RB0 = RB[INDEX2D_R(0,k1)]; + RE0 = RE[INDEX2D_R(0,k1)]; + RF0 = RF[INDEX2D_R(0,k1)]; + + // Ex + materialEx = ID[INDEX4D_ID(0,ii,jj,kk)]; + dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz; + Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA01 * dHy + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHy; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = zf - k2; + + // PML coefficients + RA01 = RA[INDEX2D_R(0,k2)] - 1; + RB0 = RB[INDEX2D_R(0,k2)]; + RE0 = RE[INDEX2D_R(0,k2)]; + RF0 = RF[INDEX2D_R(0,k2)]; + + // Ey + materialEy = ID[INDEX4D_ID(1,ii,jj,kk)]; + dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz; + Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA01 * dHx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx; + } +} + +__kernel void order2_zminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global {{REAL}} *Ex, __global {{REAL}} *Ey, __global const {{REAL}}* restrict Ez, __global const {{REAL}}* restrict Hx, __global const {{REAL}}* restrict Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) { + + // This function updates the Ex and Ey field components for the zminus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current thread + int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0); + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + {{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dHx, dHy; + {{REAL}} dz = d; + int ii, jj, kk, materialEx, materialEy; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = zf - k1; + + // PML coefficients + RA0 = RA[INDEX2D_R(0,k1)]; + RB0 = RB[INDEX2D_R(0,k1)]; + RE0 = RE[INDEX2D_R(0,k1)]; + RF0 = RF[INDEX2D_R(0,k1)]; + RA1 = RA[INDEX2D_R(1,k1)]; + RB1 = RB[INDEX2D_R(1,k1)]; + RE1 = RE[INDEX2D_R(1,k1)]; + RF1 = RF[INDEX2D_R(1,k1)]; + RA01 = RA[INDEX2D_R(0,k1)] * RA[INDEX2D_R(1,k1)] - 1; + + // Ex + materialEx = ID[INDEX4D_ID(0,ii,jj,kk)]; + dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz; + Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA01 * dHy + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dHy + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHy; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = zf - k2; + + // PML coefficients + RA0 = RA[INDEX2D_R(0,k2)]; + RB0 = RB[INDEX2D_R(0,k2)]; + RE0 = RE[INDEX2D_R(0,k2)]; + RF0 = RF[INDEX2D_R(0,k2)]; + RA1 = RA[INDEX2D_R(1,k2)]; + RB1 = RB[INDEX2D_R(1,k2)]; + RE1 = RE[INDEX2D_R(1,k2)]; + RF1 = RF[INDEX2D_R(1,k2)]; + RA01 = RA[INDEX2D_R(0,k2)] * RA[INDEX2D_R(1,k2)] - 1; + + // Ey + materialEy = ID[INDEX4D_ID(1,ii,jj,kk)]; + dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz; + Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA01 * dHx + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dHx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx; + } +} + + +__kernel void order1_zplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global {{REAL}} *Ex, __global {{REAL}} *Ey, __global const {{REAL}}* restrict Ez, __global const {{REAL}}* restrict Hx, __global const {{REAL}}* restrict Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) { + + // This function updates the Ex and Ey field components for the zplus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current thread + int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0); + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + {{REAL}} RA01, RB0, RE0, RF0, dHx, dHy; + {{REAL}} dz = d; + int ii, jj, kk, materialEx, materialEy; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA01 = RA[INDEX2D_R(0,k1)] - 1; + RB0 = RB[INDEX2D_R(0,k1)]; + RE0 = RE[INDEX2D_R(0,k1)]; + RF0 = RF[INDEX2D_R(0,k1)]; + + // Ex + materialEx = ID[INDEX4D_ID(0,ii,jj,kk)]; + dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz; + Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA01 * dHy + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHy; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA01 = RA[INDEX2D_R(0,k2)] - 1; + RB0 = RB[INDEX2D_R(0,k2)]; + RE0 = RE[INDEX2D_R(0,k2)]; + RF0 = RF[INDEX2D_R(0,k2)]; + + // Ey + materialEy = ID[INDEX4D_ID(1,ii,jj,kk)]; + dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz; + Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA01 * dHx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx; + } +} + + +__kernel void order2_zplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global {{REAL}} *Ex, __global {{REAL}} *Ey, __global const {{REAL}}* restrict Ez, __global const {{REAL}}* restrict Hx, __global const {{REAL}}* restrict Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) { + + // This function updates the Ex and Ey field components for the zplus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current thread + int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0); + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + {{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dHx, dHy; + {{REAL}} dz = d; + int ii, jj, kk, materialEx, materialEy; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA0 = RA[INDEX2D_R(0,k1)]; + RB0 = RB[INDEX2D_R(0,k1)]; + RE0 = RE[INDEX2D_R(0,k1)]; + RF0 = RF[INDEX2D_R(0,k1)]; + RA1 = RA[INDEX2D_R(1,k1)]; + RB1 = RB[INDEX2D_R(1,k1)]; + RE1 = RE[INDEX2D_R(1,k1)]; + RF1 = RF[INDEX2D_R(1,k1)]; + RA01 = RA[INDEX2D_R(0,k1)] * RA[INDEX2D_R(1,k1)] - 1; + + // Ex + materialEx = ID[INDEX4D_ID(0,ii,jj,kk)]; + dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz; + Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA01 * dHy + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dHy + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHy; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA0 = RA[INDEX2D_R(0,k2)]; + RB0 = RB[INDEX2D_R(0,k2)]; + RE0 = RE[INDEX2D_R(0,k2)]; + RF0 = RF[INDEX2D_R(0,k2)]; + RA1 = RA[INDEX2D_R(1,k2)]; + RB1 = RB[INDEX2D_R(1,k2)]; + RE1 = RE[INDEX2D_R(1,k2)]; + RF1 = RF[INDEX2D_R(1,k2)]; + RA01 = RA[INDEX2D_R(0,k2)] * RA[INDEX2D_R(1,k2)] - 1; + + // Ey + materialEy = ID[INDEX4D_ID(1,ii,jj,kk)]; + dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz; + Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA01 * dHx + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dHx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx; + } +} + diff --git a/gprMax/opencl/pml_updates_magnetic_HORIPML.cl b/gprMax/opencl/pml_updates_magnetic_HORIPML.cl new file mode 100644 index 00000000..1d6e7a56 --- /dev/null +++ b/gprMax/opencl/pml_updates_magnetic_HORIPML.cl @@ -0,0 +1,962 @@ +// Macros for converting subscripts to linear index: +#define INDEX2D_R(m, n) (m)*(NY_R)+(n) +#define INDEX2D_MAT(m, n) (m)*({{NY_MATCOEFFS}})+(n) +#define INDEX3D_FIELDS(i, j, k) (i)*({{NY_FIELDS}})*({{NZ_FIELDS}})+(j)*({{NZ_FIELDS}})+(k) +#define INDEX4D_ID(p, i, j, k) (p)*({{NX_ID}})*({{NY_ID}})*({{NZ_ID}})+(i)*({{NY_ID}})*({{NZ_ID}})+(j)*({{NZ_ID}})+(k) +#define INDEX4D_PHI1(p, i, j, k) (p)*(NX_PHI1)*(NY_PHI1)*(NZ_PHI1)+(i)*(NY_PHI1)*(NZ_PHI1)+(j)*(NZ_PHI1)+(k) +#define INDEX4D_PHI2(p, i, j, k) (p)*(NX_PHI2)*(NY_PHI2)*(NZ_PHI2)+(i)*(NY_PHI2)*(NZ_PHI2)+(j)*(NZ_PHI2)+(k) + + +__constant {{REAL}} updatecoeffsH[{{N_updatecoeffsH}}] = +{ + {% for i in updateHVal %} + {{i}}, + {% endfor %} +}; + +__kernel void order1_xminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global const {{REAL}}* restrict Ey, __global const {{REAL}}* restrict Ez, __global const {{REAL}}* restrict Hx, __global {{REAL}} *Hy, __global {{REAL}} *Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) { + + // This function updates the Hy and Hz field components for the xminus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current tREad + int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0); + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + {{REAL}} RA01, RB0, RE0, RF0, dEy, dEz; + {{REAL}} dx = d; + int ii, jj, kk, materialHy, materialHz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = xf - (i1 + 1); + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA01 = RA[INDEX2D_R(0,i1)] - 1; + RB0 = RB[INDEX2D_R(0,i1)]; + RE0 = RE[INDEX2D_R(0,i1)]; + RF0 = RF[INDEX2D_R(0,i1)]; + + // Hy + materialHy = ID[INDEX4D_ID(4,ii,jj,kk)]; + dEz = (Ez[INDEX3D_FIELDS(ii+1,jj,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dx; + Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA01 * dEz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = xf - (i2 + 1); + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA01 = RA[INDEX2D_R(0,i2)] - 1; + RB0 = RB[INDEX2D_R(0,i2)]; + RE0 = RE[INDEX2D_R(0,i2)]; + RF0 = RF[INDEX2D_R(0,i2)]; + + // Hz + materialHz = ID[INDEX4D_ID(5,ii,jj,kk)]; + dEy = (Ey[INDEX3D_FIELDS(ii+1,jj,kk)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dx; + Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA01 * dEy + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEy; + } +} + + +__kernel void order2_xminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global const {{REAL}}* restrict Ey, __global const {{REAL}}* restrict Ez, __global const {{REAL}}* restrict Hx, __global {{REAL}} *Hy, __global {{REAL}} *Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) { + + // This function updates the Hy and Hz field components for the xminus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current tREad + int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0); + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + {{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dEy, dEz; + {{REAL}} dx = d; + int ii, jj, kk, materialHy, materialHz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = xf - (i1 + 1); + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA0 = RA[INDEX2D_R(0,i1)]; + RB0 = RB[INDEX2D_R(0,i1)]; + RE0 = RE[INDEX2D_R(0,i1)]; + RF0 = RF[INDEX2D_R(0,i1)]; + RA1 = RA[INDEX2D_R(1,i1)]; + RB1 = RB[INDEX2D_R(1,i1)]; + RE1 = RE[INDEX2D_R(1,i1)]; + RF1 = RF[INDEX2D_R(1,i1)]; + RA01 = RA[INDEX2D_R(0,i1)] * RA[INDEX2D_R(1,i1)] - 1; + + // Hy + materialHy = ID[INDEX4D_ID(4,ii,jj,kk)]; + dEz = (Ez[INDEX3D_FIELDS(ii+1,jj,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dx; + Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA01 * dEz + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dEz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = xf - (i2 + 1); + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA0 = RA[INDEX2D_R(0,i2)]; + RB0 = RB[INDEX2D_R(0,i2)]; + RE0 = RE[INDEX2D_R(0,i2)]; + RF0 = RF[INDEX2D_R(0,i2)]; + RA1 = RA[INDEX2D_R(1,i2)]; + RB1 = RB[INDEX2D_R(1,i2)]; + RE1 = RE[INDEX2D_R(1,i2)]; + RF1 = RF[INDEX2D_R(1,i2)]; + RA01 = RA[INDEX2D_R(0,i2)] * RA[INDEX2D_R(1,i2)] - 1; + + // Hz + materialHz = ID[INDEX4D_ID(5,ii,jj,kk)]; + dEy = (Ey[INDEX3D_FIELDS(ii+1,jj,kk)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dx; + Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA01 * dEy + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dEy + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEy; + } +} + + +__kernel void order1_xplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global const {{REAL}}* restrict Ey, __global const {{REAL}}* restrict Ez, __global const {{REAL}}* restrict Hx, __global {{REAL}} *Hy, __global {{REAL}} *Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) { + + // This function updates the Hy and Hz field components for the xplus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current tREad + int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0); + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + {{REAL}} RA01, RB0, RE0, RF0, dEy, dEz; + {{REAL}} dx = d; + int ii, jj, kk, materialHy, materialHz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA01 = RA[INDEX2D_R(0,i1)] - 1; + RB0 = RB[INDEX2D_R(0,i1)]; + RE0 = RE[INDEX2D_R(0,i1)]; + RF0 = RF[INDEX2D_R(0,i1)]; + + // Hy + materialHy = ID[INDEX4D_ID(4,ii,jj,kk)]; + dEz = (Ez[INDEX3D_FIELDS(ii+1,jj,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dx; + Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA01 * dEz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA01 = RA[INDEX2D_R(0,i2)] - 1; + RB0 = RB[INDEX2D_R(0,i2)]; + RE0 = RE[INDEX2D_R(0,i2)]; + RF0 = RF[INDEX2D_R(0,i2)]; + + // Hz + materialHz = ID[INDEX4D_ID(5,ii,jj,kk)]; + dEy = (Ey[INDEX3D_FIELDS(ii+1,jj,kk)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dx; + Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA01 * dEy + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEy; + } +} + + +__kernel void order2_xplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global const {{REAL}}* restrict Ey, __global const {{REAL}}* restrict Ez, __global const {{REAL}}* restrict Hx, __global {{REAL}} *Hy, __global {{REAL}} *Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) { + + // This function updates the Hy and Hz field components for the xplus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current tREad + int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0); + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + {{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dEy, dEz; + {{REAL}} dx = d; + int ii, jj, kk, materialHy, materialHz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA0 = RA[INDEX2D_R(0,i1)]; + RB0 = RB[INDEX2D_R(0,i1)]; + RE0 = RE[INDEX2D_R(0,i1)]; + RF0 = RF[INDEX2D_R(0,i1)]; + RA1 = RA[INDEX2D_R(1,i1)]; + RB1 = RB[INDEX2D_R(1,i1)]; + RE1 = RE[INDEX2D_R(1,i1)]; + RF1 = RF[INDEX2D_R(1,i1)]; + RA01 = RA[INDEX2D_R(0,i1)] * RA[INDEX2D_R(1,i1)] - 1; + + // Hy + materialHy = ID[INDEX4D_ID(4,ii,jj,kk)]; + dEz = (Ez[INDEX3D_FIELDS(ii+1,jj,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dx; + Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA01 * dEz + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dEz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA0 = RA[INDEX2D_R(0,i2)]; + RB0 = RB[INDEX2D_R(0,i2)]; + RE0 = RE[INDEX2D_R(0,i2)]; + RF0 = RF[INDEX2D_R(0,i2)]; + RA1 = RA[INDEX2D_R(1,i2)]; + RB1 = RB[INDEX2D_R(1,i2)]; + RE1 = RE[INDEX2D_R(1,i2)]; + RF1 = RF[INDEX2D_R(1,i2)]; + RA01 = RA[INDEX2D_R(0,i2)] * RA[INDEX2D_R(1,i2)] - 1; + + // Hz + materialHz = ID[INDEX4D_ID(5,ii,jj,kk)]; + dEy = (Ey[INDEX3D_FIELDS(ii+1,jj,kk)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dx; + Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA01 * dEy + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dEy + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEy; + } +} + + +__kernel void order1_yminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global const {{REAL}}* restrict Ey, __global const {{REAL}}* restrict Ez, __global {{REAL}} *Hx, __global const {{REAL}}* restrict Hy, __global {{REAL}} *Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) { + + // This function updates the Hx and Hz field components for the yminus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current tREad + int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0); + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + {{REAL}} RA01, RB0, RE0, RF0, dEx, dEz; + {{REAL}} dy = d; + int ii, jj, kk, materialHx, materialHz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = yf - (j1 + 1); + kk = k1 + zs; + + // PML coefficients + RA01 = RA[INDEX2D_R(0,j1)] - 1; + RB0 = RB[INDEX2D_R(0,j1)]; + RE0 = RE[INDEX2D_R(0,j1)]; + RF0 = RF[INDEX2D_R(0,j1)]; + + // Hx + materialHx = ID[INDEX4D_ID(3,ii,jj,kk)]; + dEz = (Ez[INDEX3D_FIELDS(ii,jj+1,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dy; + Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA01 * dEz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = yf - (j2 + 1); + kk = k2 + zs; + + // PML coefficients + RA01 = RA[INDEX2D_R(0,j2)] - 1; + RB0 = RB[INDEX2D_R(0,j2)]; + RE0 = RE[INDEX2D_R(0,j2)]; + RF0 = RF[INDEX2D_R(0,j2)]; + + // Hz + materialHz = ID[INDEX4D_ID(5,ii,jj,kk)]; + dEx = (Ex[INDEX3D_FIELDS(ii,jj+1,kk)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dy; + Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA01 * dEx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx; + } +} + + +__kernel void order2_yminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global const {{REAL}}* restrict Ey, __global const {{REAL}}* restrict Ez, __global {{REAL}} *Hx, __global const {{REAL}}* restrict Hy, __global {{REAL}} *Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) { + + // This function updates the Hx and Hz field components for the yminus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current tREad + int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0); + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + {{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dEx, dEz; + {{REAL}} dy = d; + int ii, jj, kk, materialHx, materialHz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = yf - (j1 + 1); + kk = k1 + zs; + + // PML coefficients + RA0 = RA[INDEX2D_R(0,j1)]; + RB0 = RB[INDEX2D_R(0,j1)]; + RE0 = RE[INDEX2D_R(0,j1)]; + RF0 = RF[INDEX2D_R(0,j1)]; + RA1 = RA[INDEX2D_R(1,j1)]; + RB1 = RB[INDEX2D_R(1,j1)]; + RE1 = RE[INDEX2D_R(1,j1)]; + RF1 = RF[INDEX2D_R(1,j1)]; + RA01 = RA[INDEX2D_R(0,j1)] * RA[INDEX2D_R(1,j1)] - 1; + + // Hx + materialHx = ID[INDEX4D_ID(3,ii,jj,kk)]; + dEz = (Ez[INDEX3D_FIELDS(ii,jj+1,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dy; + Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA01 * dEz + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dEz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = yf - (j2 + 1); + kk = k2 + zs; + + // PML coefficients + RA0 = RA[INDEX2D_R(0,j2)]; + RB0 = RB[INDEX2D_R(0,j2)]; + RE0 = RE[INDEX2D_R(0,j2)]; + RF0 = RF[INDEX2D_R(0,j2)]; + RA1 = RA[INDEX2D_R(1,j2)]; + RB1 = RB[INDEX2D_R(1,j2)]; + RE1 = RE[INDEX2D_R(1,j2)]; + RF1 = RF[INDEX2D_R(1,j2)]; + RA01 = RA[INDEX2D_R(0,j2)] * RA[INDEX2D_R(1,j2)] - 1; + + // Hz + materialHz = ID[INDEX4D_ID(5,ii,jj,kk)]; + dEx = (Ex[INDEX3D_FIELDS(ii,jj+1,kk)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dy; + Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA01 * dEx + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dEx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx; + } +} + + +__kernel void order1_yplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global const {{REAL}}* restrict Ey, __global const {{REAL}}* restrict Ez, __global {{REAL}} *Hx, __global const {{REAL}}* restrict Hy, __global {{REAL}} *Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) { + + // This function updates the Hx and Hz field components for the yplus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current tREad + int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0); + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + {{REAL}} RA01, RB0, RE0, RF0, dEx, dEz; + {{REAL}} dy = d; + int ii, jj, kk, materialHx, materialHz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA01 = RA[INDEX2D_R(0,j1)] - 1; + RB0 = RB[INDEX2D_R(0,j1)]; + RE0 = RE[INDEX2D_R(0,j1)]; + RF0 = RF[INDEX2D_R(0,j1)]; + + // Hx + materialHx = ID[INDEX4D_ID(3,ii,jj,kk)]; + dEz = (Ez[INDEX3D_FIELDS(ii,jj+1,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dy; + Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA01 * dEz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA01 = RA[INDEX2D_R(0,j2)] - 1; + RB0 = RB[INDEX2D_R(0,j2)]; + RE0 = RE[INDEX2D_R(0,j2)]; + RF0 = RF[INDEX2D_R(0,j2)]; + + // Hz + materialHz = ID[INDEX4D_ID(5,ii,jj,kk)]; + dEx = (Ex[INDEX3D_FIELDS(ii,jj+1,kk)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dy; + Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA01 * dEx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx; + } +} + + +__kernel void order2_yplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global const {{REAL}}* restrict Ey, __global const {{REAL}}* restrict Ez, __global {{REAL}} *Hx, __global const {{REAL}}* restrict Hy, __global {{REAL}} *Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) { + + // This function updates the Hx and Hz field components for the yplus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current tREad + int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0); + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + {{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dEx, dEz; + {{REAL}} dy = d; + int ii, jj, kk, materialHx, materialHz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA0 = RA[INDEX2D_R(0,j1)]; + RB0 = RB[INDEX2D_R(0,j1)]; + RE0 = RE[INDEX2D_R(0,j1)]; + RF0 = RF[INDEX2D_R(0,j1)]; + RA1 = RA[INDEX2D_R(1,j1)]; + RB1 = RB[INDEX2D_R(1,j1)]; + RE1 = RE[INDEX2D_R(1,j1)]; + RF1 = RF[INDEX2D_R(1,j1)]; + RA01 = RA[INDEX2D_R(0,j1)] * RA[INDEX2D_R(1,j1)] - 1; + + // Hx + materialHx = ID[INDEX4D_ID(3,ii,jj,kk)]; + dEz = (Ez[INDEX3D_FIELDS(ii,jj+1,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dy; + Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA01 * dEz + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dEz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA0 = RA[INDEX2D_R(0,j2)]; + RB0 = RB[INDEX2D_R(0,j2)]; + RE0 = RE[INDEX2D_R(0,j2)]; + RF0 = RF[INDEX2D_R(0,j2)]; + RA1 = RA[INDEX2D_R(1,j2)]; + RB1 = RB[INDEX2D_R(1,j2)]; + RE1 = RE[INDEX2D_R(1,j2)]; + RF1 = RF[INDEX2D_R(1,j2)]; + RA01 = RA[INDEX2D_R(0,j2)] * RA[INDEX2D_R(1,j2)] - 1; + + // Hz + materialHz = ID[INDEX4D_ID(5,ii,jj,kk)]; + dEx = (Ex[INDEX3D_FIELDS(ii,jj+1,kk)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dy; + Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA01 * dEx + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dEx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx; + } +} + + +__kernel void order1_zminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global const {{REAL}}* restrict Ey, __global const {{REAL}}* restrict Ez, __global {{REAL}} *Hx, __global {{REAL}} *Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) { + + // This function updates the Hx and Hy field components for the zminus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current tREad + int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0); + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + {{REAL}} RA01, RB0, RE0, RF0, dEx, dEy; + {{REAL}} dz = d; + int ii, jj, kk, materialHx, materialHy; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = zf - (k1 + 1); + + // PML coefficients + RA01 = RA[INDEX2D_R(0,k1)] - 1; + RB0 = RB[INDEX2D_R(0,k1)]; + RE0 = RE[INDEX2D_R(0,k1)]; + RF0 = RF[INDEX2D_R(0,k1)]; + + // Hx + materialHx = ID[INDEX4D_ID(3,ii,jj,kk)]; + dEy = (Ey[INDEX3D_FIELDS(ii,jj,kk+1)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dz; + Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA01 * dEy + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEy; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = zf - (k2 + 1); + + // PML coefficients + RA01 = RA[INDEX2D_R(0,k2)] - 1; + RB0 = RB[INDEX2D_R(0,k2)]; + RE0 = RE[INDEX2D_R(0,k2)]; + RF0 = RF[INDEX2D_R(0,k2)]; + + // Hy + materialHy = ID[INDEX4D_ID(4,ii,jj,kk)]; + dEx = (Ex[INDEX3D_FIELDS(ii,jj,kk+1)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dz; + Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA01 * dEx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx; + } +} + + +__kernel void order2_zminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global const {{REAL}}* restrict Ey, __global const {{REAL}}* restrict Ez, __global {{REAL}} *Hx, __global {{REAL}} *Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) { + + // This function updates the Hx and Hy field components for the zminus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current tREad + int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0); + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + {{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dEx, dEy; + {{REAL}} dz = d; + int ii, jj, kk, materialHx, materialHy; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = zf - (k1 + 1); + + // PML coefficients + RA0 = RA[INDEX2D_R(0,k1)]; + RB0 = RB[INDEX2D_R(0,k1)]; + RE0 = RE[INDEX2D_R(0,k1)]; + RF0 = RF[INDEX2D_R(0,k1)]; + RA1 = RA[INDEX2D_R(1,k1)]; + RB1 = RB[INDEX2D_R(1,k1)]; + RE1 = RE[INDEX2D_R(1,k1)]; + RF1 = RF[INDEX2D_R(1,k1)]; + RA01 = RA[INDEX2D_R(0,k1)] * RA[INDEX2D_R(1,k1)] - 1; + + // Hx + materialHx = ID[INDEX4D_ID(3,ii,jj,kk)]; + dEy = (Ey[INDEX3D_FIELDS(ii,jj,kk+1)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dz; + Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA01 * dEy + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dEy + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEy; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = zf - (k2 + 1); + + // PML coefficients + RA0 = RA[INDEX2D_R(0,k2)]; + RB0 = RB[INDEX2D_R(0,k2)]; + RE0 = RE[INDEX2D_R(0,k2)]; + RF0 = RF[INDEX2D_R(0,k2)]; + RA1 = RA[INDEX2D_R(1,k2)]; + RB1 = RB[INDEX2D_R(1,k2)]; + RE1 = RE[INDEX2D_R(1,k2)]; + RF1 = RF[INDEX2D_R(1,k2)]; + RA01 = RA[INDEX2D_R(0,k2)] * RA[INDEX2D_R(1,k2)] - 1; + + // Hy + materialHy = ID[INDEX4D_ID(4,ii,jj,kk)]; + dEx = (Ex[INDEX3D_FIELDS(ii,jj,kk+1)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dz; + Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA01 * dEx + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dEx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx; + } +} + + +__kernel void order1_zplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global const {{REAL}}* restrict Ey, __global const {{REAL}}* restrict Ez, __global {{REAL}} *Hx, __global {{REAL}} *Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) { + + // This function updates the Hx and Hy field components for the zplus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current tREad + int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0); + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + {{REAL}} RA01, RB0, RE0, RF0, dEx, dEy; + {{REAL}} dz = d; + int ii, jj, kk, materialHx, materialHy; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA01 = RA[INDEX2D_R(0,k1)] - 1; + RB0 = RB[INDEX2D_R(0,k1)]; + RE0 = RE[INDEX2D_R(0,k1)]; + RF0 = RF[INDEX2D_R(0,k1)]; + + // Hx + materialHx = ID[INDEX4D_ID(3,ii,jj,kk)]; + dEy = (Ey[INDEX3D_FIELDS(ii,jj,kk+1)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dz; + Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA01 * dEy + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEy; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA01 = RA[INDEX2D_R(0,k2)] - 1; + RB0 = RB[INDEX2D_R(0,k2)]; + RE0 = RE[INDEX2D_R(0,k2)]; + RF0 = RF[INDEX2D_R(0,k2)]; + + // Hy + materialHy = ID[INDEX4D_ID(4,ii,jj,kk)]; + dEx = (Ex[INDEX3D_FIELDS(ii,jj,kk+1)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dz; + Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA01 * dEx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx; + } +} + + +__kernel void order2_zplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global const {{REAL}}* restrict Ey, __global const {{REAL}}* restrict Ez, __global {{REAL}} *Hx, __global {{REAL}} *Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) { + + // This function updates the Hx and Hy field components for the zplus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current tREad + int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0); + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + {{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dEx, dEy; + {{REAL}} dz = d; + int ii, jj, kk, materialHx, materialHy; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA0 = RA[INDEX2D_R(0,k1)]; + RB0 = RB[INDEX2D_R(0,k1)]; + RE0 = RE[INDEX2D_R(0,k1)]; + RF0 = RF[INDEX2D_R(0,k1)]; + RA1 = RA[INDEX2D_R(1,k1)]; + RB1 = RB[INDEX2D_R(1,k1)]; + RE1 = RE[INDEX2D_R(1,k1)]; + RF1 = RF[INDEX2D_R(1,k1)]; + RA01 = RA[INDEX2D_R(0,k1)] * RA[INDEX2D_R(1,k1)] - 1; + + // Hx + materialHx = ID[INDEX4D_ID(3,ii,jj,kk)]; + dEy = (Ey[INDEX3D_FIELDS(ii,jj,kk+1)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dz; + Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA01 * dEy + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dEy + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEy; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA0 = RA[INDEX2D_R(0,k2)]; + RB0 = RB[INDEX2D_R(0,k2)]; + RE0 = RE[INDEX2D_R(0,k2)]; + RF0 = RF[INDEX2D_R(0,k2)]; + RA1 = RA[INDEX2D_R(1,k2)]; + RB1 = RB[INDEX2D_R(1,k2)]; + RE1 = RE[INDEX2D_R(1,k2)]; + RF1 = RF[INDEX2D_R(1,k2)]; + RA01 = RA[INDEX2D_R(0,k2)] * RA[INDEX2D_R(1,k2)] - 1; + + // Hy + materialHy = ID[INDEX4D_ID(4,ii,jj,kk)]; + dEx = (Ex[INDEX3D_FIELDS(ii,jj,kk+1)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dz; + Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA01 * dEx + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dEx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx; + } +} \ No newline at end of file diff --git a/gprMax/opencl/snapshots.cl b/gprMax/opencl/snapshots.cl new file mode 100644 index 00000000..b1f9bf08 --- /dev/null +++ b/gprMax/opencl/snapshots.cl @@ -0,0 +1,55 @@ +// Macros for converting subscripts to linear index: +#define INDEX3D_FIELDS(i, j, k) (i)*({{NY_FIELDS}})*({{NZ_FIELDS}})+(j)*({{NZ_FIELDS}})+(k) +#define INDEX4D_SNAPS(p, i, j, k) (p)*({{NX_SNAPS}})*({{NY_SNAPS}})*({{NZ_SNAPS}})+(i)*({{NY_SNAPS}})*({{NZ_SNAPS}})+(j)*({{NZ_SNAPS}})+(k) + +//////////////////// +// Store snapshot // +//////////////////// + +__kernel void store_snapshot(int p, int xs, int xf, int ys, int yf, int zs, int zf, int dx, int dy, int dz, + __global const {{REAL}}* __restrict__ Ex, __global const {{REAL}}* __restrict__ Ey, + __global const {{REAL}}* __restrict__ Ez, __global const {{REAL}}* __restrict__ Hx, + __global const {{REAL}}* __restrict__ Hy, __global const {{REAL}}* __restrict__ Hz, + __global {{REAL}} *snapEx, __global {{REAL}} *snapEy, __global {{REAL}} *snapEz, + __global {{REAL}} *snapHx, __global {{REAL}} *snapHy, __global {{REAL}} *snapHz) { + + // This function stores field values for a snapshot. + // + // Args: + // p: Snapshot number + // xs, xf, ys, yf, xs, xf: Start and finish cell coordinates for snapshot + // dx, dy, dz: Sampling interval in cell coordinates for snapshot + // E, H: Access to field component arrays + // snapEx, snapEy, snapEz, snapHx, snapHy, snapHz: Access to arrays to store snapshots + + // Obtain the linear index corresponding to the current thread + int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0); + + // Convert the linear index to subscripts for 4D SNAPS array + int i = (idx % ({{NX_SNAPS}} * {{NY_SNAPS}} * {{NZ_SNAPS}})) / ({{NY_SNAPS}} * {{NZ_SNAPS}}); + int j = ((idx % ({{NX_SNAPS}} * {{NY_SNAPS}} * {{NZ_SNAPS}})) % ({{NY_SNAPS}} * {{NZ_SNAPS}})) / {{NZ_SNAPS}}; + int k = ((idx % ({{NX_SNAPS}} * {{NY_SNAPS}} * {{NZ_SNAPS}})) % ({{NY_SNAPS}} * {{NZ_SNAPS}})) % {{NZ_SNAPS}}; + + // Subscripts for field arrays + int ii, jj, kk; + + if (i >= xs && i < xf && j >= ys && j < yf && k >= zs && k < zf) { + + // Increment subscripts for field array to account for spatial sampling of snapshot + ii = (xs + i) * dx; + jj = (ys + j) * dy; + kk = (zs + k) * dz; + + // The electric field component value at a point comes from an average of + // the 4 electric field component values in that cell + snapEx[INDEX4D_SNAPS(p,i,j,k)] = (Ex[INDEX3D_FIELDS(ii,jj,kk)] + Ex[INDEX3D_FIELDS(ii,jj+1,kk)] + Ex[INDEX3D_FIELDS(ii,jj,kk+1)] + Ex[INDEX3D_FIELDS(ii,jj+1,kk+1)]) / 4; + snapEy[INDEX4D_SNAPS(p,i,j,k)] = (Ey[INDEX3D_FIELDS(ii,jj,kk)] + Ey[INDEX3D_FIELDS(ii+1,jj,kk)] + Ey[INDEX3D_FIELDS(ii,jj,kk+1)] + Ey[INDEX3D_FIELDS(ii+1,jj,kk+1)]) / 4; + snapEz[INDEX4D_SNAPS(p,i,j,k)] = (Ez[INDEX3D_FIELDS(ii,jj,kk)] + Ez[INDEX3D_FIELDS(ii+1,jj,kk)] + Ez[INDEX3D_FIELDS(ii,jj+1,kk)] + Ez[INDEX3D_FIELDS(ii+1,jj+1,kk)]) / 4; + + // The magnetic field component value at a point comes from average of + // 2 magnetic field component values in that cell and the following cell + snapHx[INDEX4D_SNAPS(p,i,j,k)] = (Hx[INDEX3D_FIELDS(ii,jj,kk)] + Hx[INDEX3D_FIELDS(ii+1,jj,kk)]) / 2; + snapHy[INDEX4D_SNAPS(p,i,j,k)] = (Hy[INDEX3D_FIELDS(ii,jj,kk)] + Hy[INDEX3D_FIELDS(ii,jj+1,kk)]) / 2; + snapHz[INDEX4D_SNAPS(p,i,j,k)] = (Hz[INDEX3D_FIELDS(ii,jj,kk)] + Hz[INDEX3D_FIELDS(ii,jj,kk+1)]) / 2; + } +} \ No newline at end of file diff --git a/gprMax/opencl/source_updates.cl b/gprMax/opencl/source_updates.cl new file mode 100644 index 00000000..e838ea21 --- /dev/null +++ b/gprMax/opencl/source_updates.cl @@ -0,0 +1,206 @@ +# Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom +# Authors: Craig Warren, Antonis Giannopoulos, and John Hartley +# +# This file is part of gprMax. +# +# gprMax is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# gprMax is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with gprMax. If not, see . + + +// Macros for converting subscripts to linear index: +#define INDEX2D_MAT(m, n) (m)*({{NY_MATCOEFFS}}) + (n) +#define INDEX2D_SRCINFO(m, n) (m)*({{NY_SRCINFO}}) + (n) +#define INDEX2D_SRCWAVES(m, n) (m)*({{NY_SRCWAVES}}) + (n) +#define INDEX3D_FIELDS(i, j, k) (i)*({{NY_FIELDS}})*({{NZ_FIELDS}}) + (j)*({{NZ_FIELDS}}) + (k) +#define INDEX4D_ID(p, i, j, k) (p)*({{NX_ID}})*({{NY_ID}})*({{NZ_ID}}) + (i)*({{NY_ID}})*({{NZ_ID}}) + (j)*({{NZ_ID}}) + (k) + +// Material coefficients (read-only) in constant memory +__constant {{REAL}} updatecoeffsE[{{N_updatecoeffsE}}] = +{ + {% for i in updateEVal %} + {{i}}, + {% endfor %} +}; + +__constant {{REAL}} updatecoeffsH[{{N_updatecoeffsH}}] = +{ + {% for i in updateHVal %} + {{i}}, + {% endfor %} +}; + + +/////////////////////////////////////////// +// Hertzian dipole electric field update // +/////////////////////////////////////////// + +__kernel void update_hertzian_dipole(int NHERTZDIPOLE, int iteration, + {{REAL}} dx, {{REAL}} dy, {{REAL}} dz, + __global const int* restrict srcinfo1, + __global const {{REAL}}* restrict srcinfo2, + __global const {{REAL}}* restrict srcwaveforms, + __global const unsigned int* restrict ID, + __global {{REAL}} *Ex, + __global {{REAL}} *Ey, + __global {{REAL}} *Ez) { + + // This function updates electric field values for Hertzian dipole sources. + // + // Args: + // NHERTZDIPOLE: total number of hertizan dipole in the model + // iteration + // dx, dy, dz: spatial discretization + // srcinfo1: source cell coordinates and polarisation information + // srcinfo2: other source info, length, resistance, etc + // srcwaveforms : source waveforms values + // ID, E: access to ID and field component values + + // get linear index + int src = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0); + + if (src < NHERTZDIPOLE) { + {{REAL}} dl; + int i, j, k, polarisation; + + i = srcinfo1[INDEX2D_SRCINFO(src,0)]; + j = srcinfo1[INDEX2D_SRCINFO(src,1)]; + k = srcinfo1[INDEX2D_SRCINFO(src,2)]; + + polarisation = srcinfo1[INDEX2D_SRCINFO(src,3)]; + dl = srcinfo2[src]; + + // 'x' polarised source + if (polarisation == 0) { + int materialEx = ID[INDEX4D_ID(0,i,j,k)]; + Ex[INDEX3D_FIELDS(i,j,k)] = Ex[INDEX3D_FIELDS(i,j,k)] - updatecoeffsE[INDEX2D_MAT(materialEx,4)] * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] * dl * (1 / (dx * dy * dz)); + } + + // 'y' polarised source + else if (polarisation == 1) { + int materialEy = ID[INDEX4D_ID(1,i,j,k)]; + Ey[INDEX3D_FIELDS(i,j,k)] = Ey[INDEX3D_FIELDS(i,j,k)] - updatecoeffsE[INDEX2D_MAT(materialEy,4)] * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] * dl * (1 / (dx * dy * dz)); + } + + // 'z' polarised source + else if (polarisation == 2) { + int materialEz = ID[INDEX4D_ID(2,i,j,k)]; + Ez[INDEX3D_FIELDS(i,j,k)] = Ez[INDEX3D_FIELDS(i,j,k)] - updatecoeffsE[INDEX2D_MAT(materialEz,4)] * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] * dl * (1 / (dx * dy * dz)); + } + } +} + +__kernel void update_magnetic_dipole(int NMAGDIPOLE, int iteration, {{REAL}} dx, {{REAL}} dy, {{REAL}} dz, __global const int* restrict srcinfo1, __global const {{REAL}}* restrict srcinfo2, __global const {{REAL}}* restrict srcwaveforms, __global const unsigned int* restrict ID, __global {{REAL}} *Hx, __global {{REAL}} *Hy, __global {{REAL}} *Hz){ + // This function updates magnetic field values for magnetic dipole sources. + // + // Args: + // NMAGDIPOLE: Total number of magnetic dipoles in the model + // iteration: Iteration number of simulation + // dx, dy, dz: Spatial discretisations + // srcinfo1: Source cell coordinates and polarisation information + // srcinfo2: Other source information, e.g. length, resistance etc... + // srcwaveforms: Source waveform values + // ID, H: Access to ID and field component arrays + + // Obtain the linear index corresponding to the current thread and use for each receiver + int src = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0); + + if (src < NMAGDIPOLE) { + + int i, j, k, polarisation; + + i = srcinfo1[INDEX2D_SRCINFO(src,0)]; + j = srcinfo1[INDEX2D_SRCINFO(src,1)]; + k = srcinfo1[INDEX2D_SRCINFO(src,2)]; + polarisation = srcinfo1[INDEX2D_SRCINFO(src,3)]; + + // 'x' polarised source + if (polarisation == 0) { + int materialHx = ID[INDEX4D_ID(3,i,j,k)]; + Hx[INDEX3D_FIELDS(i,j,k)] = Hx[INDEX3D_FIELDS(i,j,k)] - updatecoeffsH[INDEX2D_MAT(materialHx,4)] * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] * (1 / (dx * dy * dz)); + } + + // 'y' polarised source + else if (polarisation == 1) { + int materialHy = ID[INDEX4D_ID(4,i,j,k)]; + Hy[INDEX3D_FIELDS(i,j,k)] = Hy[INDEX3D_FIELDS(i,j,k)] - updatecoeffsH[INDEX2D_MAT(materialHy,4)] * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] * (1 / (dx * dy * dz)); + } + + // 'z' polarised source + else if (polarisation == 2) { + int materialHz = ID[INDEX4D_ID(5,i,j,k)]; + Hz[INDEX3D_FIELDS(i,j,k)] = Hz[INDEX3D_FIELDS(i,j,k)] - updatecoeffsH[INDEX2D_MAT(materialHz,4)] * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] * (1 / (dx * dy * dz)); + } + } +} + +__kernel void update_voltage_source(int NVOLTSRC, int iteration, {{REAL}} dx, {{REAL}} dy, {{REAL}} dz, __global const int* restrict srcinfo1, __global const {{REAL}}* restrict srcinfo2, __global const {{REAL}}* restrict srcwaveforms, __global const unsigned int* restrict ID, __global {{REAL}} *Ex, __global {{REAL}} *Ey, __global {{REAL}} *Ez){ + + // This function updates electric field values for voltage sources. + // + // Args: + // NVOLTSRC: Total number of voltage sources in the model + // iteration: Iteration number of simulation + // dx, dy, dz: Spatial discretisations + // srcinfo1: Source cell coordinates and polarisation information + // srcinfo2: Other source information, e.g. length, resistance etc... + // srcwaveforms: Source waveform values + // ID, E: Access to ID and field component arrays + + // Obtain the linear index corresponding to the current thread and use for each receiver + int src = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0); + + if (src < NVOLTSRC) { + + {{REAL}} resistance; + int i, j, k, polarisation; + + i = srcinfo1[INDEX2D_SRCINFO(src,0)]; + j = srcinfo1[INDEX2D_SRCINFO(src,1)]; + k = srcinfo1[INDEX2D_SRCINFO(src,2)]; + polarisation = srcinfo1[INDEX2D_SRCINFO(src,3)]; + resistance = srcinfo2[src]; + + // 'x' polarised source + if (polarisation == 0) { + if (resistance != 0) { + int materialEx = ID[INDEX4D_ID(0,i,j,k)]; + Ex[INDEX3D_FIELDS(i,j,k)] = Ex[INDEX3D_FIELDS(i,j,k)] - updatecoeffsE[INDEX2D_MAT(materialEx,4)] * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] * (1 / (resistance * dy * dz)); + } + else { + Ex[INDEX3D_FIELDS(i,j,k)] = -1 * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] / dx; + } + } + + // 'y' polarised source + else if (polarisation == 1) { + if (resistance != 0) { + int materialEy = ID[INDEX4D_ID(1,i,j,k)]; + Ey[INDEX3D_FIELDS(i,j,k)] = Ey[INDEX3D_FIELDS(i,j,k)] - updatecoeffsE[INDEX2D_MAT(materialEy,4)] * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] * (1 / (resistance * dx * dz)); + } + else { + Ey[INDEX3D_FIELDS(i,j,k)] = -1 * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] / dy; + } + } + + // 'z' polarised source + else if (polarisation == 2) { + if (resistance != 0) { + int materialEz = ID[INDEX4D_ID(2,i,j,k)]; + Ez[INDEX3D_FIELDS(i,j,k)] = Ez[INDEX3D_FIELDS(i,j,k)] - updatecoeffsE[INDEX2D_MAT(materialEz,4)] * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] * (1 / (resistance * dx * dy)); + } + else { + Ez[INDEX3D_FIELDS(i,j,k)] = -1 * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] / dz; + } + } + } +} \ No newline at end of file diff --git a/gprMax/opencl/store_outputs.cl b/gprMax/opencl/store_outputs.cl new file mode 100644 index 00000000..5facdf22 --- /dev/null +++ b/gprMax/opencl/store_outputs.cl @@ -0,0 +1,59 @@ +# Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom +# Authors: Craig Warren, Antonis Giannopoulos, and John Hartley +# +# This file is part of gprMax. +# +# gprMax is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# gprMax is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with gprMax. If not, see . + + +#define INDEX2D_RXCOORDS(m,n) (m)*({{NY_RXCOORDS}}) + (n) +#define INDEX3D_RXS(i,j,k) (i)*({{NY_RXS}})*({{NZ_RXS}}) + (j)*({{NZ_RXS}}) + (k) +#define INDEX3D_FIELDS(i,j,k) (i)*({{NY_FIELDS}})*({{NZ_FIELDS}}) + (j)*({{NZ_FIELDS}}) + (k) + +__kernel void store_outputs(int NRX, int iteration, + __global const int* restrict rxcoords, + __global {{REAL}} *rxs, + __global const {{REAL}}* restrict Ex, + __global const {{REAL}}* restrict Ey, + __global const {{REAL}}* restrict Ez, + __global const {{REAL}}* restrict Hx, + __global const {{REAL}}* restrict Hy, + __global const {{REAL}}* restrict Hz) { + + // This function stores field component values for every receiver in the model. + // + // Args: + // NRX: total number of receivers in the model. + // rxs: array to store field components for receivers - rows + // are field components; columns are iterations; pages are receiver + + // Obtain linear index corresponding to the current work item + int rx = get_global_id(2) * get_global_size(0) * get_global_size(1) + + get_global_id(1) * get_global_size(0) + get_global_id(0); + + int i,j,k; + + if (rx < NRX) { + i = rxcoords[INDEX2D_RXCOORDS(rx,0)]; + j = rxcoords[INDEX2D_RXCOORDS(rx,1)]; + k = rxcoords[INDEX2D_RXCOORDS(rx,2)]; + rxs[INDEX3D_RXS(0,iteration,rx)] = Ex[INDEX3D_FIELDS(i,j,k)]; + rxs[INDEX3D_RXS(1,iteration,rx)] = Ey[INDEX3D_FIELDS(i,j,k)]; + rxs[INDEX3D_RXS(2,iteration,rx)] = Ez[INDEX3D_FIELDS(i,j,k)]; + rxs[INDEX3D_RXS(3,iteration,rx)] = Hx[INDEX3D_FIELDS(i,j,k)]; + rxs[INDEX3D_RXS(4,iteration,rx)] = Hy[INDEX3D_FIELDS(i,j,k)]; + rxs[INDEX3D_RXS(5,iteration,rx)] = Hz[INDEX3D_FIELDS(i,j,k)]; + } +} + diff --git a/gprMax/pml.py b/gprMax/pml.py index ff879f84..19170f46 100644 --- a/gprMax/pml.py +++ b/gprMax/pml.py @@ -21,25 +21,31 @@ from importlib import import_module import gprMax.config as config import numpy as np +from .utilities.utilities import timer + class CFSParameter: """Individual CFS parameter (e.g. alpha, kappa, or sigma).""" # Allowable scaling profiles and directions scalingprofiles = {'constant': 0, 'linear': 1, 'quadratic': 2, 'cubic': 3, - 'quartic': 4, 'quintic': 5, 'sextic': 6, 'septic': 7, 'octic': 8} + 'quartic': 4, 'quintic': 5, 'sextic': 6, 'septic': 7, + 'octic': 8} scalingdirections = ['forward', 'reverse'] def __init__(self, ID=None, scaling='polynomial', scalingprofile=None, scalingdirection='forward', min=0, max=0): """ Args: - ID (str): Identifier for CFS parameter, can be: 'alpha', 'kappa' or 'sigma'. - scaling (str): Type of scaling, can be: 'polynomial'. - scalingprofile (str): Type of scaling profile from scalingprofiles. - scalingdirection (str): Direction of scaling profile from scalingdirections. - min (float): Minimum value for parameter. - max (float): Maximum value for parameter. + ID: string identifier for CFS parameter, can be: 'alpha', 'kappa' or + 'sigma'. + scaling: string for type of scaling, can be: 'polynomial'. + scalingprofile: string for type of scaling profile from + scalingprofiles. + scalingdirection: string for direction of scaling profile from + scalingdirections. + min: float for minimum value for parameter. + max: float for maximum value for parameter. """ self.ID = ID @@ -56,9 +62,9 @@ class CFS: def __init__(self): """ Args: - alpha (CFSParameter): alpha parameter for CFS. - kappa (CFSParameter): kappa parameter for CFS. - sigma (CFSParameter): sigma parameter for CFS. + alpha: CFSParameter alpha parameter for CFS. + kappa: CFSParameter kappa parameter for CFS. + sigma: CFSParameter sigma parameter for CFS. """ self.alpha = CFSParameter(ID='alpha', scalingprofile='constant') @@ -70,11 +76,11 @@ class CFS: material properties. Args: - d (float): dx, dy, or dz in direction of PML. - er (float): Average permittivity of underlying material. - mr (float): Average permeability of underlying material. - G (class): Grid class instance - holds essential parameters - describing the model. + d: float for dx, dy, or dz in direction of PML. + er: float for average permittivity of underlying material. + mr: float for average permeability of underlying material. + G: FDTDGrid object that holds essential parameters describing the + model. """ # Calculation of the maximum value of sigma from http://dx.doi.org/10.1109/8.546249 @@ -86,17 +92,17 @@ class CFS: electric and magnetic PML updates. Args: - order (int): Order of polynomial for scaling profile. - Evalues (float): numpy array holding scaling profile values for - electric PML update. - Hvalues (float): numpy array holding scaling profile values for - magnetic PML update. + order: int of order of polynomial for scaling profile. + Evalues: float array holding scaling profile values for + electric PML update. + Hvalues: float array holding scaling profile values for + magnetic PML update. Returns: - Evalues (float): numpy array holding scaling profile values for - electric PML update. - Hvalues (float): numpy array holding scaling profile values for - magnetic PML update. + Evalues: float array holding scaling profile values for + electric PML update. + Hvalues: float array holding scaling profile values for + magnetic PML update. """ tmp = (np.linspace(0, (len(Evalues) - 1) + 0.5, num=2 * len(Evalues)) @@ -111,17 +117,18 @@ class CFS: profile type and minimum and maximum values. Args: - thickness (int): Thickness of PML in cells. - parameter (CFSParameter): Instance of CFSParameter + thickness: int of thickness of PML in cells. + parameter: instance of CFSParameter Returns: - Evalues (float): numpy array holding profile value for electric - PML update. - Hvalues (float): numpy array holding profile value for magnetic - PML update. + Evalues: float array holding profile value for electric + PML update. + Hvalues: float array holding profile value for magnetic + PML update. """ - # Extra cell of thickness added to allow correct scaling of electric and magnetic values + # Extra cell of thickness added to allow correct scaling of electric and + # magnetic values Evalues = np.zeros(thickness + 1, dtype=config.sim_config.dtypes['float_or_double']) Hvalues = np.zeros(thickness + 1, dtype=config.sim_config.dtypes['float_or_double']) @@ -146,10 +153,12 @@ class CFS: if parameter.scalingdirection == 'reverse': Evalues = Evalues[::-1] Hvalues = Hvalues[::-1] - # Magnetic values must be shifted one element to the left after reversal + # Magnetic values must be shifted one element to the left after + # reversal Hvalues = np.roll(Hvalues, -1) - # Extra cell of thickness not required and therefore removed after scaling + # Extra cell of thickness not required and therefore removed after + # scaling Evalues = Evalues[:-1] Hvalues = Hvalues[:-1] @@ -168,17 +177,20 @@ class PML: boundaryIDs = ['x0', 'y0', 'z0', 'xmax', 'ymax', 'zmax'] # Indicates direction of increasing absorption - # xminus, yminus, zminus - absorption increases in negative direction of x-axis, y-axis, or z-axis - # xplus, yplus, zplus - absorption increases in positive direction of x-axis, y-axis, or z-axis + # xminus, yminus, zminus - absorption increases in negative direction of + # x-axis, y-axis, or z-axis + # xplus, yplus, zplus - absorption increases in positive direction of + # x-axis, y-axis, or z-axis directions = ['xminus', 'yminus', 'zminus', 'xplus', 'yplus', 'zplus'] def __init__(self, G, ID=None, direction=None, xs=0, xf=0, ys=0, yf=0, zs=0, zf=0): """ Args: - G (FDTDGrid): Holds essential parameters describing the model. - ID (str): Identifier for PML slab. - direction (str): Direction of increasing absorption. - xs, xf, ys, yf, zs, zf (float): Extent of the PML slab. + G: FDTDGrid object that holds essential parameters describing the + model. + ID: string identifier for PML slab. + direction: string for direction of increasing absorption. + xs, xf, ys, yf, zs, zf: floats of extent of the PML slab. """ self.G = G @@ -244,8 +256,8 @@ class PML: """Calculates electric and magnetic update coefficients for the PML. Args: - er (float): Average permittivity of underlying material - mr (float): Average permeability of underlying material + er: float of average permittivity of underlying material + mr: float of average permeability of underlying material """ self.ERA = np.zeros((len(self.CFS), self.thickness), @@ -275,19 +287,25 @@ class PML: # Define different parameters depending on PML formulation if self.G.pmlformulation == 'HORIPML': # HORIPML electric update coefficients - tmp = (2 * config.sim_config.em_consts['e0'] * Ekappa) + self.G.dt * (Ealpha * Ekappa + Esigma) - self.ERA[x, :] = (2 * config.sim_config.em_consts['e0'] + self.G.dt * Ealpha) / tmp - self.ERB[x, :] = (2 * config.sim_config.em_consts['e0'] * Ekappa) / tmp - self.ERE[x, :] = ((2 * config.sim_config.em_consts['e0'] * Ekappa) - self.G.dt - * (Ealpha * Ekappa + Esigma)) / tmp + tmp = ((2 * config.sim_config.em_consts['e0'] * Ekappa) + + self.G.dt * (Ealpha * Ekappa + Esigma)) + self.ERA[x, :] = ((2 * config.sim_config.em_consts['e0'] + + self.G.dt * Ealpha) / tmp) + self.ERB[x, :] = ((2 * config.sim_config.em_consts['e0'] * Ekappa) + / tmp) + self.ERE[x, :] = (((2 * config.sim_config.em_consts['e0'] * Ekappa) - + self.G.dt * (Ealpha * Ekappa + Esigma)) / tmp) self.ERF[x, :] = (2 * Esigma * self.G.dt) / (Ekappa * tmp) # HORIPML magnetic update coefficients - tmp = (2 * config.sim_config.em_consts['e0'] * Hkappa) + self.G.dt * (Halpha * Hkappa + Hsigma) - self.HRA[x, :] = (2 * config.sim_config.em_consts['e0'] + self.G.dt * Halpha) / tmp - self.HRB[x, :] = (2 * config.sim_config.em_consts['e0'] * Hkappa) / tmp - self.HRE[x, :] = ((2 * config.sim_config.em_consts['e0'] * Hkappa) - self.G.dt - * (Halpha * Hkappa + Hsigma)) / tmp + tmp = ((2 * config.sim_config.em_consts['e0'] * Hkappa) + + self.G.dt * (Halpha * Hkappa + Hsigma)) + self.HRA[x, :] = ((2 * config.sim_config.em_consts['e0'] + + self.G.dt * Halpha) / tmp) + self.HRB[x, :] = ((2 * config.sim_config.em_consts['e0'] * Hkappa) + / tmp) + self.HRE[x, :] = (((2 * config.sim_config.em_consts['e0'] * Hkappa) - + self.G.dt * (Halpha * Hkappa + Hsigma)) / tmp) self.HRF[x, :] = (2 * Hsigma * self.G.dt) / (Hkappa * tmp) elif self.G.pmlformulation == 'MRIPML': @@ -295,31 +313,39 @@ class PML: tmp = 2 * config.sim_config.em_consts['e0'] + self.G.dt * Ealpha self.ERA[x, :] = Ekappa + (self.G.dt * Esigma) / tmp self.ERB[x, :] = (2 * config.sim_config.em_consts['e0']) / tmp - self.ERE[x, :] = ((2 * config.sim_config.em_consts['e0']) - self.G.dt * Ealpha) / tmp + self.ERE[x, :] = (((2 * config.sim_config.em_consts['e0']) + - self.G.dt * Ealpha) / tmp) self.ERF[x, :] = (2 * Esigma * self.G.dt) / tmp # MRIPML magnetic update coefficients tmp = 2 * config.sim_config.em_consts['e0'] + self.G.dt * Halpha self.HRA[x, :] = Hkappa + (self.G.dt * Hsigma) / tmp self.HRB[x, :] = (2 * config.sim_config.em_consts['e0']) / tmp - self.HRE[x, :] = ((2 * config.sim_config.sim_config.em_consts['e0']) - self.G.dt * Halpha) / tmp + self.HRE[x, :] = (((2 * config.sim_config.sim_config.em_consts['e0']) + - self.G.dt * Halpha) / tmp) self.HRF[x, :] = (2 * Hsigma * self.G.dt) / tmp def update_electric(self): - """This functions updates electric field components with the PML correction.""" + """This functions updates electric field components with the PML + correction. + """ pmlmodule = 'gprMax.cython.pml_updates_electric_' + self.G.pmlformulation - func = getattr(import_module(pmlmodule), 'order' + str(len(self.CFS)) + '_' + self.direction) + func = getattr(import_module(pmlmodule), + 'order' + str(len(self.CFS)) + '_' + self.direction) func(self.xs, self.xf, self.ys, self.yf, self.zs, self.zf, config.get_model_config().ompthreads, self.G.updatecoeffsE, self.G.ID, self.G.Ex, self.G.Ey, self.G.Ez, self.G.Hx, self.G.Hy, self.G.Hz, self.EPhi1, self.EPhi2, self.ERA, self.ERB, self.ERE, self.ERF, self.d) def update_magnetic(self): - """This functions updates magnetic field components with the PML correction.""" + """This functions updates magnetic field components with the PML + correction. + """ pmlmodule = 'gprMax.cython.pml_updates_magnetic_' + self.G.pmlformulation - func = getattr(import_module(pmlmodule), 'order' + str(len(self.CFS)) + '_' + self.direction) + func = getattr(import_module(pmlmodule), + 'order' + str(len(self.CFS)) + '_' + self.direction) func(self.xs, self.xf, self.ys, self.yf, self.zs, self.zf, config.get_model_config().ompthreads, self.G.updatecoeffsH, self.G.ID, self.G.Ex, self.G.Ey, self.G.Ez, self.G.Hx, self.G.Hy, self.G.Hz, @@ -331,6 +357,9 @@ class CUDAPML(PML): solving on GPU using CUDA. """ + def __init__(self, *args, **kwargs): + super(CUDAPML, self).__init__(*args, **kwargs) + def htod_field_arrays(self): """Initialise PML field and coefficient arrays on GPU.""" @@ -360,9 +389,9 @@ class CUDAPML(PML): """Get update functions from PML kernels. Args: - kernelselectric: PyCuda SourceModule containing PML kernels for + kernelselectric: pycuda SourceModule containing PML kernels for electric updates. - kernelsmagnetic: PyCuda SourceModule containing PML kernels for + kernelsmagnetic: pycuda SourceModule containing PML kernels for magnetic updates. """ @@ -373,9 +402,12 @@ class CUDAPML(PML): """This functions updates electric field components with the PML correction on the GPU. """ - self.update_electric_gpu(np.int32(self.xs), np.int32(self.xf), - np.int32(self.ys), np.int32(self.yf), - np.int32(self.zs), np.int32(self.zf), + self.update_electric_gpu(np.int32(self.xs), + np.int32(self.xf), + np.int32(self.ys), + np.int32(self.yf), + np.int32(self.zs), + np.int32(self.zf), np.int32(self.EPhi1_gpu.shape[1]), np.int32(self.EPhi1_gpu.shape[2]), np.int32(self.EPhi1_gpu.shape[3]), @@ -384,11 +416,18 @@ class CUDAPML(PML): np.int32(self.EPhi2_gpu.shape[3]), np.int32(self.thickness), self.G.ID_gpu.gpudata, - self.G.Ex_gpu.gpudata, self.G.Ey_gpu.gpudata, self.G.Ez_gpu.gpudata, - self.G.Hx_gpu.gpudata, self.G.Hy_gpu.gpudata, self.G.Hz_gpu.gpudata, - self.EPhi1_gpu.gpudata, self.EPhi2_gpu.gpudata, - self.ERA_gpu.gpudata, self.ERB_gpu.gpudata, - self.ERE_gpu.gpudata, self.ERF_gpu.gpudata, + self.G.Ex_gpu.gpudata, + self.G.Ey_gpu.gpudata, + self.G.Ez_gpu.gpudata, + self.G.Hx_gpu.gpudata, + self.G.Hy_gpu.gpudata, + self.G.Hz_gpu.gpudata, + self.EPhi1_gpu.gpudata, + self.EPhi2_gpu.gpudata, + self.ERA_gpu.gpudata, + self.ERB_gpu.gpudata, + self.ERE_gpu.gpudata, + self.ERF_gpu.gpudata, config.sim_config.dtypes['float_or_double'](self.d), block=self.G.tpb, grid=self.bpg) @@ -396,9 +435,12 @@ class CUDAPML(PML): """This functions updates magnetic field components with the PML correction on the GPU. """ - self.update_magnetic_gpu(np.int32(self.xs), np.int32(self.xf), - np.int32(self.ys), np.int32(self.yf), - np.int32(self.zs), np.int32(self.zf), + self.update_magnetic_gpu(np.int32(self.xs), + np.int32(self.xf), + np.int32(self.ys), + np.int32(self.yf), + np.int32(self.zs), + np.int32(self.zf), np.int32(self.HPhi1_gpu.shape[1]), np.int32(self.HPhi1_gpu.shape[2]), np.int32(self.HPhi1_gpu.shape[3]), @@ -407,19 +449,147 @@ class CUDAPML(PML): np.int32(self.HPhi2_gpu.shape[3]), np.int32(self.thickness), self.G.ID_gpu.gpudata, - self.G.Ex_gpu.gpudata, self.G.Ey_gpu.gpudata, self.G.Ez_gpu.gpudata, - self.G.Hx_gpu.gpudata, self.G.Hy_gpu.gpudata, self.G.Hz_gpu.gpudata, - self.HPhi1_gpu.gpudata, self.HPhi2_gpu.gpudata, - self.HRA_gpu.gpudata, self.HRB_gpu.gpudata, - self.HRE_gpu.gpudata, self.HRF_gpu.gpudata, + self.G.Ex_gpu.gpudata, + self.G.Ey_gpu.gpudata, + self.G.Ez_gpu.gpudata, + self.G.Hx_gpu.gpudata, + self.G.Hy_gpu.gpudata, + self.G.Hz_gpu.gpudata, + self.HPhi1_gpu.gpudata, + self.HPhi2_gpu.gpudata, + self.HRA_gpu.gpudata, + self.HRB_gpu.gpudata, + self.HRE_gpu.gpudata, + self.HRF_gpu.gpudata, config.sim_config.dtypes['float_or_double'](self.d), block=self.G.tpb, grid=self.bpg) + +class OpenCLPML(PML): + """Perfectly Matched Layer (PML) Absorbing Boundary Conditions (ABC) for + solving on compute device using OpenCL. + """ + + def __init__(self, *args, **kwargs): + super(OpenCLPML, self).__init__(*args, **kwargs) + self.compute_time = 0 + + def set_queue(self, queue): + """Passes in pyopencl queue. + + Args: + queue: pyopencl queue. + """ + self.queue = queue + + def htod_field_arrays(self): + """Initialise PML field and coefficient arrays on compute device.""" + + import pyopencl.array as clarray + + self.ERA_dev = clarray.to_device(self.queue, self.ERA) + self.ERB_dev = clarray.to_device(self.queue, self.ERB) + self.ERE_dev = clarray.to_device(self.queue, self.ERE) + self.ERF_dev = clarray.to_device(self.queue, self.ERF) + self.HRA_dev = clarray.to_device(self.queue, self.HRA) + self.HRB_dev = clarray.to_device(self.queue, self.HRB) + self.HRE_dev = clarray.to_device(self.queue, self.HRE) + self.HRF_dev = clarray.to_device(self.queue, self.HRF) + self.EPhi1_dev = clarray.to_device(self.queue, self.EPhi1) + self.EPhi2_dev = clarray.to_device(self.queue, self.EPhi2) + self.HPhi1_dev = clarray.to_device(self.queue, self.HPhi1) + self.HPhi2_dev = clarray.to_device(self.queue, self.HPhi2) + + def set_blocks_per_grid(): + pass + + def set_wgs(self): + """Set the workgroup size used for updating the PML field arrays + on a compute device. + """ + self.wgs = (((int(np.ceil(((self.EPhi1_dev.shape[1] + 1) * + (self.EPhi1_dev.shape[2] + 1) * + (self.EPhi1_dev.shape[3] + 1)) / self.G.tpb[0]))) * 256), 1, 1) + + def get_update_funcs(): + pass + + def update_electric(self): + """This functions updates electric field components with the PML + correction on the compute device. + """ + start_time = timer() + event = self.update_electric_dev(np.int32(self.xs), + np.int32(self.xf), + np.int32(self.ys), + np.int32(self.yf), + np.int32(self.zs), + np.int32(self.zf), + np.int32(self.EPhi1_dev.shape[1]), + np.int32(self.EPhi1_dev.shape[2]), + np.int32(self.EPhi1_dev.shape[3]), + np.int32(self.EPhi2_dev.shape[1]), + np.int32(self.EPhi2_dev.shape[2]), + np.int32(self.EPhi2_dev.shape[3]), + np.int32(self.thickness), + self.G.ID_dev, + self.G.Ex_dev, + self.G.Ey_dev, + self.G.Ez_dev, + self.G.Hx_dev, + self.G.Hy_dev, + self.G.Hz_dev, + self.EPhi1_dev, + self.EPhi2_dev, + self.ERA_dev, + self.ERB_dev, + self.ERE_dev, + self.ERF_dev, + config.sim_config.dtypes['float_or_double'](self.d)) + event.wait() + self.compute_time += (timer() - start_time) + + def update_magnetic(self): + """This functions updates magnetic field components with the PML + correction on the compute device. + """ + start_time = timer() + event = self.update_magnetic_dev(np.int32(self.xs), + np.int32(self.xf), + np.int32(self.ys), + np.int32(self.yf), + np.int32(self.zs), + np.int32(self.zf), + np.int32(self.HPhi1_dev.shape[1]), + np.int32(self.HPhi1_dev.shape[2]), + np.int32(self.HPhi1_dev.shape[3]), + np.int32(self.HPhi2_dev.shape[1]), + np.int32(self.HPhi2_dev.shape[2]), + np.int32(self.HPhi2_dev.shape[3]), + np.int32(self.thickness), + self.G.ID_dev, + self.G.Ex_dev, + self.G.Ey_dev, + self.G.Ez_dev, + self.G.Hx_dev, + self.G.Hy_dev, + self.G.Hz_dev, + self.HPhi1_dev, + self.HPhi2_dev, + self.HRA_dev, + self.HRB_dev, + self.HRE_dev, + self.HRF_dev, + config.sim_config.dtypes['float_or_double'](self.d)) + event.wait() + self.compute_time += (timer() - start_time) + + def print_pml_info(G): """Information about PMLs. Args: - G (FDTDGrid): Parameters describing a grid in a model. + G: FDTDGrid objects that holds parameters describing a grid in a model. """ # No PML if all(value == 0 for value in G.pmlthickness.values()): @@ -433,7 +603,8 @@ def print_pml_info(G): pmlinfo += f'{key}: {value}, ' pmlinfo = pmlinfo[:-2] - return f'\nPML boundaries [{G.name}]: {{formulation: {G.pmlformulation}, order: {len(G.cfs)}, thickness (cells): {pmlinfo}}}' + return (f'\nPML boundaries [{G.name}]: {{formulation: {G.pmlformulation}, ' + f'order: {len(G.cfs)}, thickness (cells): {pmlinfo}}}') def build_pml(G, key, value): @@ -442,21 +613,28 @@ def build_pml(G, key, value): (based on underlying material er and mr from solid array). Args: - G (FDTDGrid): Parameters describing a grid in a model. - key (str): Identifier of PML slab. - value (int): Thickness of PML slab in cells. + G: FDTDGrid objects that holds parameters describing a grid in a model. + key: string dentifier of PML slab. + value: int with thickness of PML slab in cells. """ - pml_type = CUDAPML if config.sim_config.general['cuda'] else PML + if config.sim_config.general['solver'] == 'cpu': + pml_type = PML + elif config.sim_config.general['solver'] == 'cuda': + pml_type = CUDAPML + elif config.sim_config.general['solver'] == 'opencl': + pml_type = OpenCLPML sumer = 0 # Sum of relative permittivities in PML slab summr = 0 # Sum of relative permeabilities in PML slab if key[0] == 'x': if key == 'x0': - pml = pml_type(G, ID=key, direction='xminus', xf=value, yf=G.ny, zf=G.nz) + pml = pml_type(G, ID=key, direction='xminus', + xf=value, yf=G.ny, zf=G.nz) elif key == 'xmax': - pml = pml_type(G, ID=key, direction='xplus', xs=G.nx - value, xf=G.nx, yf=G.ny, zf=G.nz) + pml = pml_type(G, ID=key, direction='xplus', + xs=G.nx - value, xf=G.nx, yf=G.ny, zf=G.nz) G.pmls.append(pml) for j in range(G.ny): for k in range(G.nz): @@ -469,9 +647,11 @@ def build_pml(G, key, value): elif key[0] == 'y': if key == 'y0': - pml = pml_type(G, ID=key, direction='yminus', yf=value, xf=G.nx, zf=G.nz) + pml = pml_type(G, ID=key, direction='yminus', + yf=value, xf=G.nx, zf=G.nz) elif key == 'ymax': - pml = pml_type(G, ID=key, direction='yplus', ys=G.ny - value, xf=G.nx, yf=G.ny, zf=G.nz) + pml = pml_type(G, ID=key, direction='yplus', + ys=G.ny - value, xf=G.nx, yf=G.ny, zf=G.nz) G.pmls.append(pml) for i in range(G.nx): for k in range(G.nz): @@ -484,9 +664,11 @@ def build_pml(G, key, value): elif key[0] == 'z': if key == 'z0': - pml = pml_type(G, ID=key, direction='zminus', zf=value, xf=G.nx, yf=G.ny) + pml = pml_type(G, ID=key, direction='zminus', + zf=value, xf=G.nx, yf=G.ny) elif key == 'zmax': - pml = pml_type(G, ID=key, direction='zplus', zs=G.nz - value, xf=G.nx, yf=G.ny, zf=G.nz) + pml = pml_type(G, ID=key, direction='zplus', + zs=G.nz - value, xf=G.nx, yf=G.ny, zf=G.nz) G.pmls.append(pml) for i in range(G.nx): for j in range(G.ny): diff --git a/gprMax/receivers.py b/gprMax/receivers.py index 7da6f923..16053069 100644 --- a/gprMax/receivers.py +++ b/gprMax/receivers.py @@ -26,8 +26,8 @@ class Rx: allowableoutputs = ['Ex', 'Ey', 'Ez', 'Hx', 'Hy', 'Hz', 'Ix', 'Iy', 'Iz'] defaultoutputs = allowableoutputs[:-3] - allowableoutputs_gpu = allowableoutputs[:-3] - maxnumoutputs_gpu = 0 + allowableoutputs_dev = allowableoutputs[:-3] + maxnumoutputs_dev = 0 def __init__(self): @@ -41,57 +41,65 @@ class Rx: self.zcoordorigin = None -def htod_rx_arrays(G): - """Initialise arrays on GPU for receiver coordinates and to store field +def htod_rx_arrays(G, queue=None): + """Initialise arrays on compute device for receiver coordinates and to store field components for receivers. Args: - G (FDTDGrid): Holds essential parameters describing the model. + G: FDTDGrid object that holds essential parameters describing the model. + queue: pyopencl queue. Returns: - rxcoords_gpu (int): numpy array of receiver coordinates from GPU. - rxs_gpu (float): numpy array of receiver data from GPU - rows are field - components; columns are iterations; pages are receivers. + rxcoords_dev: int array of receiver coordinates on compute device. + rxs_dev: float array of receiver data on compute device - rows are field + components; columns are iterations; pages are receivers. """ - import pycuda.gpuarray as gpuarray - - # Array to store receiver coordinates on GPU + # Array to store receiver coordinates on compute device rxcoords = np.zeros((len(G.rxs), 3), dtype=np.int32) for i, rx in enumerate(G.rxs): rxcoords[i, 0] = rx.xcoord rxcoords[i, 1] = rx.ycoord rxcoords[i, 2] = rx.zcoord # Store maximum number of output components - if len(rx.outputs) > Rx.maxnumoutputs_gpu: - Rx.maxnumoutputs_gpu = len(rx.outputs) + if len(rx.outputs) > Rx.maxnumoutputs_dev: + Rx.maxnumoutputs_dev = len(rx.outputs) - # Array to store field components for receivers on GPU - rows are field components; - # columns are iterations; pages are receivers - rxs = np.zeros((len(Rx.allowableoutputs_gpu), G.iterations, len(G.rxs)), + # Array to store field components for receivers on compute device - + # rows are field components; columns are iterations; pages are receivers + rxs = np.zeros((len(Rx.allowableoutputs_dev), G.iterations, len(G.rxs)), dtype=config.sim_config.dtypes['float_or_double']) - # Copy arrays to GPU - rxcoords_gpu = gpuarray.to_gpu(rxcoords) - rxs_gpu = gpuarray.to_gpu(rxs) + # Copy arrays to compute device + if config.sim_config.general['solver'] == 'cuda': + import pycuda.gpuarray as gpuarray + rxcoords_dev = gpuarray.to_gpu(rxcoords) + rxs_dev = gpuarray.to_gpu(rxs) - return rxcoords_gpu, rxs_gpu + elif config.sim_config.general['solver'] == 'opencl': + import pyopencl.array as clarray + rxcoords_dev = clarray.to_device(queue, rxcoords) + rxs_dev = clarray.to_device(queue, rxs) + + return rxcoords_dev, rxs_dev -def dtoh_rx_array(rxs_gpu, rxcoords_gpu, G): - """Copy output from receivers array used on GPU back to receiver objects. +def dtoh_rx_array(rxs_dev, rxcoords_dev, G): + """Copy output from receivers array used on compute device back to receiver + objects. Args: - rxs_gpu (float): numpy array of receiver data from GPU - rows are field - components; columns are iterations; pages are receivers. - rxcoords_gpu (int): numpy array of receiver coordinates from GPU. - G (FDTDGrid): Holds essential parameters describing the model. + rxcoords_dev: int array of receiver coordinates on compute device. + rxs_dev: float array of receiver data on compute device - rows are field + components; columns are iterations; pages are receivers. + G: FDTDGrid object that holds essential parameters describing the model. + """ for rx in G.rxs: - for rxgpu in range(len(G.rxs)): - if (rx.xcoord == rxcoords_gpu[rxgpu, 0] and - rx.ycoord == rxcoords_gpu[rxgpu, 1] and - rx.zcoord == rxcoords_gpu[rxgpu, 2]): + for rxd in range(len(G.rxs)): + if (rx.xcoord == rxcoords_dev[rxd, 0] and + rx.ycoord == rxcoords_dev[rxd, 1] and + rx.zcoord == rxcoords_dev[rxd, 2]): for output in rx.outputs.keys(): - rx.outputs[output] = rxs_gpu[Rx.allowableoutputs_gpu.index(output), :, rxgpu] + rx.outputs[output] = rxs_dev[Rx.allowableoutputs_dev.index(output), :, rxd] diff --git a/gprMax/snapshots.py b/gprMax/snapshots.py index 70356ac6..9e5bb014 100644 --- a/gprMax/snapshots.py +++ b/gprMax/snapshots.py @@ -223,18 +223,17 @@ class Snapshot: f.close() -def htod_snapshot_array(G): - """Initialise array on GPU for to store field data for snapshots. +def htod_snapshot_array(G, queue=None): + """Initialise array on compute device for to store field data for snapshots. Args: - G (FDTDGrid): Parameters describing a grid in a model. + G: FDTDGrid object with parameters describing a grid in a model. + queue: pyopencl queue. Returns: - snapE_gpu, snapH_gpu (float): numpy arrays of snapshot data on GPU. + snapE_dev, snapH_dev: float arrays of snapshot data on compute device. """ - import pycuda.gpuarray as gpuarray - # Get dimensions of largest requested snapshot for snap in G.snapshots: if snap.nx > Snapshot.nx_max: @@ -244,15 +243,21 @@ def htod_snapshot_array(G): if snap.nz > Snapshot.nz_max: Snapshot.nz_max = snap.nz - # GPU - blocks per grid - according to largest requested snapshot - Snapshot.bpg = (int(np.ceil(((Snapshot.nx_max) * - (Snapshot.ny_max) * - (Snapshot.nz_max)) / Snapshot.tpb[0])), 1, 1) + if config.sim_config.general['solver'] == 'cuda': + # Blocks per grid - according to largest requested snapshot + Snapshot.bpg = (int(np.ceil(((Snapshot.nx_max) * + (Snapshot.ny_max) * + (Snapshot.nz_max)) / Snapshot.tpb[0])), 1, 1) + elif config.sim_config.general['solver'] == 'opencl': + # Workgroup size - according to largest requested snapshot + Snapshot.wgs = (int(np.ceil(((Snapshot.nx_max) * + (Snapshot.ny_max) * + (Snapshot.nz_max)))), 1, 1) # 4D arrays to store snapshots on GPU, e.g. snapEx(time, x, y, z); # if snapshots are not being stored on the GPU during the simulation then # they are copied back to the host after each iteration, hence numsnaps = 1 - numsnaps = 1 if config.get_model_config().cuda['snapsgpu2cpu'] else len(G.snapshots) + numsnaps = 1 if config.get_model_config().device['snapsgpu2cpu'] else len(G.snapshots) snapEx = np.zeros((numsnaps, Snapshot.nx_max, Snapshot.ny_max, Snapshot.nz_max), dtype=config.sim_config.dtypes['float_or_double']) snapEy = np.zeros((numsnaps, Snapshot.nx_max, Snapshot.ny_max, Snapshot.nz_max), @@ -266,29 +271,41 @@ def htod_snapshot_array(G): snapHz = np.zeros((numsnaps, Snapshot.nx_max, Snapshot.ny_max, Snapshot.nz_max), dtype=config.sim_config.dtypes['float_or_double']) - # Copy arrays to GPU - snapEx_gpu = gpuarray.to_gpu(snapEx) - snapEy_gpu = gpuarray.to_gpu(snapEy) - snapEz_gpu = gpuarray.to_gpu(snapEz) - snapHx_gpu = gpuarray.to_gpu(snapHx) - snapHy_gpu = gpuarray.to_gpu(snapHy) - snapHz_gpu = gpuarray.to_gpu(snapHz) + # Copy arrays to compute device + if config.sim_config.general['solver'] == 'cuda': + import pycuda.gpuarray as gpuarray + snapEx_dev = gpuarray.to_gpu(snapEx) + snapEy_dev = gpuarray.to_gpu(snapEy) + snapEz_dev = gpuarray.to_gpu(snapEz) + snapHx_dev = gpuarray.to_gpu(snapHx) + snapHy_dev = gpuarray.to_gpu(snapHy) + snapHz_dev = gpuarray.to_gpu(snapHz) - return snapEx_gpu, snapEy_gpu, snapEz_gpu, snapHx_gpu, snapHy_gpu, snapHz_gpu + elif config.sim_config.general['solver'] == 'opencl': + import pyopencl.array as clarray + snapEx_dev = clarray.to_device(queue, snapEx) + snapEy_dev = clarray.to_device(queue, snapEy) + snapEz_dev = clarray.to_device(queue, snapEz) + snapHx_dev = clarray.to_device(queue, snapHx) + snapHy_dev = clarray.to_device(queue, snapHy) + snapHz_dev = clarray.to_device(queue, snapHz) + + return snapEx_dev, snapEy_dev, snapEz_dev, snapHx_dev, snapHy_dev, snapHz_dev -def dtoh_snapshot_array(snapEx_gpu, snapEy_gpu, snapEz_gpu, snapHx_gpu, snapHy_gpu, snapHz_gpu, i, snap): - """Copy snapshot array used on GPU back to snapshot objects and store in format for Paraview. +def dtoh_snapshot_array(snapEx_dev, snapEy_dev, snapEz_dev, snapHx_dev, snapHy_dev, snapHz_dev, i, snap): + """Copy snapshot array used on compute device back to snapshot objects and + store in format for Paraview. Args: - snapE_gpu, snapH_gpu (float): numpy arrays of snapshot data from GPU. - i (int): index for snapshot data on GPU array. - snap (class): Snapshot class instance + snapE_dev, snapH_dev: float arrays of snapshot data from compute device. + i: int for index of snapshot data on compute device array. + snap: Snapshot class instance """ - snap.Exsnap = snapEx_gpu[i, snap.xs:snap.xf, snap.ys:snap.yf, snap.zs:snap.zf] - snap.Eysnap = snapEy_gpu[i, snap.xs:snap.xf, snap.ys:snap.yf, snap.zs:snap.zf] - snap.Ezsnap = snapEz_gpu[i, snap.xs:snap.xf, snap.ys:snap.yf, snap.zs:snap.zf] - snap.Hxsnap = snapHx_gpu[i, snap.xs:snap.xf, snap.ys:snap.yf, snap.zs:snap.zf] - snap.Hysnap = snapHy_gpu[i, snap.xs:snap.xf, snap.ys:snap.yf, snap.zs:snap.zf] - snap.Hzsnap = snapHz_gpu[i, snap.xs:snap.xf, snap.ys:snap.yf, snap.zs:snap.zf] + snap.Exsnap = snapEx_dev[i, snap.xs:snap.xf, snap.ys:snap.yf, snap.zs:snap.zf] + snap.Eysnap = snapEy_dev[i, snap.xs:snap.xf, snap.ys:snap.yf, snap.zs:snap.zf] + snap.Ezsnap = snapEz_dev[i, snap.xs:snap.xf, snap.ys:snap.yf, snap.zs:snap.zf] + snap.Hxsnap = snapHx_dev[i, snap.xs:snap.xf, snap.ys:snap.yf, snap.zs:snap.zf] + snap.Hysnap = snapHy_dev[i, snap.xs:snap.xf, snap.ys:snap.yf, snap.zs:snap.zf] + snap.Hzsnap = snapHz_dev[i, snap.xs:snap.xf, snap.ys:snap.yf, snap.zs:snap.zf] diff --git a/gprMax/solvers.py b/gprMax/solvers.py index 24538861..2975efbe 100644 --- a/gprMax/solvers.py +++ b/gprMax/solvers.py @@ -18,22 +18,24 @@ import gprMax.config as config -from .grid import CUDAGrid, FDTDGrid +from .grid import CUDAGrid, FDTDGrid, OpenCLGrid from .subgrids.updates import create_updates as create_subgrid_updates -from .updates import CPUUpdates, CUDAUpdates +from .updates import CPUUpdates, CUDAUpdates, OpenCLUpdates def create_G(): """Create grid object according to solver. Returns: - G (FDTDGrid): Holds essential parameters describing the model. + G: FDTDGrid that holds essential parameters describing the model. """ - if config.sim_config.general['cpu']: + if config.sim_config.general['solver'] == 'cpu': G = FDTDGrid() - elif config.sim_config.general['cuda']: + elif config.sim_config.general['solver'] == 'cuda': G = CUDAGrid() + elif config.sim_config.general['solver'] == 'opencl': + G = OpenCLGrid() return G @@ -42,10 +44,10 @@ def create_solver(G): """Create configured solver object. Args: - G (FDTDGrid): Holds essential parameters describing the model. + G: FDTDGrid that holds essential parameters describing the model. Returns: - solver (Solver): solver object. + solver: Solver object. """ if config.sim_config.general['subgrid']: @@ -56,14 +58,17 @@ def create_solver(G): # the required numerical precision and dispersive material type. props = updates.adapt_dispersive_config() updates.set_dispersive_updates(props) - elif config.sim_config.general['cpu']: + elif config.sim_config.general['solver'] == 'cpu': updates = CPUUpdates(G) solver = Solver(updates) props = updates.adapt_dispersive_config() updates.set_dispersive_updates(props) - elif config.sim_config.general['cuda']: + elif config.sim_config.general['solver'] == 'cuda': updates = CUDAUpdates(G) solver = Solver(updates) + elif config.sim_config.general['solver'] == 'opencl': + updates = OpenCLUpdates(G) + solver = Solver(updates) return solver @@ -74,8 +79,8 @@ class Solver: def __init__(self, updates, hsg=False): """ Args: - updates (Updates): Updates contains methods to run FDTD algorithm. - hsg (bool): Use sub-gridding. + updates: Updates contains methods to run FDTD algorithm. + hsg: bool to use sub-gridding. """ self.updates = updates @@ -85,13 +90,14 @@ class Solver: """Time step the FDTD model. Args: - iterator (iterator): can be range() or tqdm() + iterator: can be range() or tqdm() Returns: - tsolve (float): Time taken to execute solving (seconds). - memsolve (float): Memory (RAM) used. + tsolve: float for time taken to execute solving (seconds). + memsolve: float for memory (RAM) used. """ + memsolve = 0 self.updates.time_start() for iteration in iterator: @@ -108,7 +114,8 @@ class Solver: if self.hsg: self.updates.hsg_1() self.updates.update_electric_b() - memsolve = self.updates.calculate_memsolve(iteration) if config.sim_config.general['cuda'] else None + if config.sim_config.general['solver'] == 'cuda': + memsolve = self.updates.calculate_memsolve(iteration) self.updates.finalise() tsolve = self.updates.calculate_tsolve() diff --git a/gprMax/sources.py b/gprMax/sources.py index 75772e55..c4b786a2 100644 --- a/gprMax/sources.py +++ b/gprMax/sources.py @@ -45,7 +45,8 @@ class Source: """Calculates all waveform values for source for duration of simulation. Args: - G (FDTDGrid): Holds essential parameters describing the model. + G: FDTDGrid object that olds essential parameters describing the + model. """ # Waveform values for electric sources - calculated half a timestep later self.waveformvaluesJ = np.zeros((G.iterations), @@ -82,13 +83,14 @@ class VoltageSource(Source): """Updates electric field values for a voltage source. Args: - iteration (int): Current iteration (timestep). - updatecoeffsE (memory view): numpy array of electric field update - coefficients. - ID (memory view): numpy array of numeric IDs corresponding to - materials in the model. - Ex, Ey, Ez (memory view): numpy array of electric field values. - G (FDTDGrid): Holds essential parameters describing the model. + iteration: int of current iteration (timestep). + updatecoeffsE: memory view of array of electric field update + coefficients. + ID: memory view of array of numeric IDs corresponding to materials + in the model. + Ex, Ey, Ez: memory view of array of electric field values. + G: FDTDGrid object that olds essential parameters describing the + model. """ if iteration * G.dt >= self.start and iteration * G.dt <= self.stop: @@ -126,7 +128,8 @@ class VoltageSource(Source): voltage source conductivity to the underlying parameters. Args: - G (FDTDGrid): Holds essential parameters describing the model. + G: FDTDGrid object that olds essential parameters describing the + model. """ if self.resistance != 0: @@ -166,13 +169,14 @@ class HertzianDipole(Source): """Updates electric field values for a Hertzian dipole. Args: - iteration (int): Current iteration (timestep). - updatecoeffsE (memory view): numpy array of electric field update - coefficients. - ID (memory view): numpy array of numeric IDs corresponding to - materials in the model. - Ex, Ey, Ez (memory view): numpy array of electric field values. - G (FDTDGrid): Holds essential parameters describing the model. + iteration: int of current iteration (timestep). + updatecoeffsE: memory view of array of electric field update + coefficients. + ID: memory view of array of numeric IDs corresponding to materials + in the model. + Ex, Ey, Ez: memory view of array of electric field values. + G: FDTDGrid object that olds essential parameters describing the + model. """ if iteration * G.dt >= self.start and iteration * G.dt <= self.stop: @@ -203,13 +207,14 @@ class MagneticDipole(Source): """Updates magnetic field values for a magnetic dipole. Args: - iteration (int): Current iteration (timestep). - updatecoeffsH (memory view): numpy array of magnetic field update - coefficients. - ID (memory view): numpy array of numeric IDs corresponding to - materials in the model. - Hx, Hy, Hz (memory view): numpy array of magnetic field values. - G (FDTDGrid): Holds essential parameters describing the model. + iteration: int of current iteration (timestep). + updatecoeffsH: memory view of array of magnetic field update + coefficients. + ID: memory view of array of numeric IDs corresponding to materials + in the model. + Hx, Hy, Hz: memory view of array of magnetic field values. + G: FDTDGrid object that olds essential parameters describing the + model. """ if iteration * G.dt >= self.start and iteration * G.dt <= self.stop: @@ -234,24 +239,23 @@ class MagneticDipole(Source): (1 / (G.dx * G.dy * G.dz))) -def htod_src_arrays(sources, G): - """Initialise arrays on GPU for source coordinates/polarisation, other - source information, and source waveform values. +def htod_src_arrays(sources, G, queue=None): + """Initialise arrays on compute device for source coordinates/polarisation, + other source information, and source waveform values. Args: - sources (list): List of sources of one type, e.g. HertzianDipole - G (FDTDGrid): Holds essential parameters describing the model. + sources: list of sources of one type, e.g. HertzianDipole + G: FDTDGrid object that holds essential parameters describing the model. + queue: pyopencl queue. Returns: - srcinfo1_gpu (int): numpy array of source cell coordinates and - polarisation information. - srcinfo2_gpu (float): numpy array of other source information, - e.g. length, resistance etc... - srcwaves_gpu (float): numpy array of source waveform values. + srcinfo1_dev: int array of source cell coordinates and polarisation + information. + srcinfo2_dev: float array of other source information, e.g. length, + resistance etc... + srcwaves_dev: float array of source waveform values. """ - import pycuda.gpuarray as gpuarray - srcinfo1 = np.zeros((len(sources), 4), dtype=np.int32) srcinfo2 = np.zeros((len(sources)), dtype=config.sim_config.dtypes['float_or_double']) srcwaves = np.zeros((len(sources), G.iterations), dtype=config.sim_config.dtypes['float_or_double']) @@ -276,11 +280,19 @@ def htod_src_arrays(sources, G): elif src.__class__.__name__ == 'MagneticDipole': srcwaves[i, :] = src.waveformvaluesM - srcinfo1_gpu = gpuarray.to_gpu(srcinfo1) - srcinfo2_gpu = gpuarray.to_gpu(srcinfo2) - srcwaves_gpu = gpuarray.to_gpu(srcwaves) + # Copy arrays to compute device + if config.sim_config.general['solver'] == 'cuda': + import pycuda.gpuarray as gpuarray + srcinfo1_dev = gpuarray.to_gpu(srcinfo1) + srcinfo2_dev = gpuarray.to_gpu(srcinfo2) + srcwaves_dev = gpuarray.to_gpu(srcwaves) + elif config.sim_config.general['solver'] == 'opencl': + import pyopencl.array as clarray + srcinfo1_dev = clarray.to_device(queue, srcinfo1) + srcinfo2_dev = clarray.to_device(queue, srcinfo2) + srcwaves_dev = clarray.to_device(queue, srcwaves) - return srcinfo1_gpu, srcinfo2_gpu, srcwaves_gpu + return srcinfo1_dev, srcinfo2_dev, srcwaves_dev class TransmissionLine(Source): @@ -291,7 +303,8 @@ class TransmissionLine(Source): def __init__(self, G): """ Args: - G (FDTDGrid): Holds essential parameters describing the model. + G: FDTDGrid object that holds essential parameters describing the + model. """ super().__init__() @@ -328,7 +341,8 @@ class TransmissionLine(Source): from: http://dx.doi.org/10.1002/mop.10415 Args: - G (FDTDGrid): Holds essential parameters describing the model. + G: FDTDGrid object that holds essential parameters describing the + model. """ for iteration in range(G.iterations): @@ -344,7 +358,8 @@ class TransmissionLine(Source): """Updates absorbing boundary condition at end of the transmission line. Args: - G (FDTDGrid): Holds essential parameters describing the model. + G: FDTDGrid object that holds essential parameters describing the + model. """ h = (config.c * G.dt - self.dl) / (config.c * G.dt + self.dl) @@ -357,8 +372,9 @@ class TransmissionLine(Source): """Updates voltage values along the transmission line. Args: - iteration (int): Current iteration (timestep). - G (FDTDGrid): Holds essential parameters describing the model. + iteration: int of current iteration (timestep). + G: FDTDGrid object that holds essential parameters describing the + model. """ # Update all the voltage values along the line @@ -375,8 +391,9 @@ class TransmissionLine(Source): """Updates current values along the transmission line. Args: - iteration (int): Current iteration (timestep). - G (FDTDGrid): Holds essential parameters describing the model. + iteration: int of current iteration (timestep). + G: FDTDGrid object that holds essential parameters describing the + model. """ # Update all the current values along the line @@ -393,13 +410,14 @@ class TransmissionLine(Source): the transmission line. Args: - iteration (int): Current iteration (timestep). - updatecoeffsE (memory view): numpy array of electric field update - coefficients. - ID (memory view): numpy array of numeric IDs corresponding to - materials in the model. - Ex, Ey, Ez (memory view): numpy array of electric field values. - G (FDTDGrid): Holds essential parameters describing the model. + iteration: int of current iteration (timestep). + updatecoeffsE: memory view of array of electric field update + coefficients. + ID: memory view of array of numeric IDs corresponding to materials + in the model. + Ex, Ey, Ez: memory view of array of electric field values. + G: FDTDGrid object that olds essential parameters describing the + model. """ if iteration * G.dt >= self.start and iteration * G.dt <= self.stop: @@ -423,13 +441,14 @@ class TransmissionLine(Source): in the main grid. Args: - iteration (int): Current iteration (timestep). - updatecoeffsH (memory view): numpy array of magnetic field update - coefficients. - ID (memory view): numpy array of numeric IDs corresponding to - materials in the model. - Hx, Hy, Hz (memory view): numpy array of magnetic field values. - G (FDTDGrid): Holds essential parameters describing the model. + iteration: int of current iteration (timestep). + updatecoeffsH: memory view of array of magnetic field update + coefficients. + ID: memory view of array of numeric IDs corresponding to materials + in the model. + Hx, Hy, Hz: memory view of array of magnetic field values. + G: FDTDGrid object that olds essential parameters describing the + model. """ if iteration * G.dt >= self.start and iteration * G.dt <= self.stop: diff --git a/gprMax/updates.py b/gprMax/updates.py index cce55bbf..94722557 100644 --- a/gprMax/updates.py +++ b/gprMax/updates.py @@ -18,19 +18,21 @@ import logging from importlib import import_module +from string import Template import numpy as np +from jinja2 import Environment, PackageLoader import gprMax.config as config -from .cuda.fields_updates import kernel_template_fields -from .cuda.snapshots import kernel_template_store_snapshot -from .cuda.source_updates import kernel_template_sources +from .cuda.snapshots import knl_template_store_snapshot +from .cuda_opencl_el import (knl_fields_updates, knl_snapshots, + knl_source_updates, knl_store_outputs) from .cython.fields_updates_normal import \ update_electric as update_electric_cpu from .cython.fields_updates_normal import \ update_magnetic as update_magnetic_cpu -from .fields_outputs import kernel_template_store_outputs +from .fields_outputs import knl_template_store_outputs from .fields_outputs import store_outputs as store_outputs_cpu from .receivers import dtoh_rx_array, htod_rx_arrays from .snapshots import Snapshot, dtoh_snapshot_array, htod_snapshot_array @@ -256,27 +258,27 @@ class CUDAUpdates: self.source_module = getattr(import_module('pycuda.compiler'), 'SourceModule') self.drv.init() - # Create device handle and context on specifc GPU device (and make it current context) + # Create device handle and context on specific GPU device (and make it current context) self.dev = self.drv.Device(config.get_model_config().cuda['gpu'].deviceID) self.ctx = self.dev.make_context() # Initialise arrays on GPU, prepare kernels, and get kernel functions - self._set_field_kernels() + self._set_field_knls() if self.grid.pmls: - self._set_pml_kernels() + self._set_pml_knls() if self.grid.rxs: - self._set_rx_kernel() + self._set_rx_knl() if self.grid.voltagesources + self.grid.hertziandipoles + self.grid.magneticdipoles: - self._set_src_kernels() + self._set_src_knls() if self.grid.snapshots: - self._set_snapshot_kernel() + self._set_snapshot_knl() - def _set_field_kernels(self): + def _set_field_knls(self): """Electric and magnetic field updates - prepare kernels, and get kernel functions. """ if config.get_model_config().materials['maxpoles'] > 0: - kernels_fields = self.source_module(kernel_template_fields.substitute( + knls_fields = self.source_module(knl_template_fields.substitute( REAL=config.sim_config.dtypes['C_float_or_double'], REALFUNC=config.get_model_config().materials['cudarealfunc'], COMPLEX=config.get_model_config().materials['dispersiveCdtype'], @@ -296,7 +298,7 @@ class CUDAUpdates: options=config.sim_config.cuda['nvcc_opts']) else: # Set to one any substitutions for dispersive materials. # Value of COMPLEX is not relevant. - kernels_fields = self.source_module(kernel_template_fields.substitute( + knls_fields = self.source_module(knl_template_fields.substitute( REAL=config.sim_config.dtypes['C_float_or_double'], REALFUNC=config.get_model_config().materials['cudarealfunc'], COMPLEX=config.sim_config.dtypes['C_float_or_double'], @@ -314,17 +316,17 @@ class CUDAUpdates: NY_T=1, NZ_T=1), options=config.sim_config.cuda['nvcc_opts']) - self.update_electric_gpu = kernels_fields.get_function("update_electric") - self.update_magnetic_gpu = kernels_fields.get_function("update_magnetic") - self._copy_mat_coeffs(kernels_fields, kernels_fields) + self.update_electric_gpu = knls_fields.get_function("update_electric") + self.update_magnetic_gpu = knls_fields.get_function("update_magnetic") + self._copy_mat_coeffs(knls_fields, knls_fields) # Electric and magnetic field updates - dispersive materials # - get kernel functions and initialise array on GPU # If there are any dispersive materials (updates are split into two # parts as they require present and updated electric field values). if config.get_model_config().materials['maxpoles'] > 0: - self.dispersive_update_a = kernels_fields.get_function("update_electric_dispersive_A") - self.dispersive_update_b = kernels_fields.get_function("update_electric_dispersive_B") + self.dispersive_update_a = knls_fields.get_function("update_electric_dispersive_A") + self.dispersive_update_b = knls_fields.get_function("update_electric_dispersive_B") # Electric and magnetic field updates - set blocks per grid and # initialise field arrays on GPU @@ -334,17 +336,17 @@ class CUDAUpdates: if config.get_model_config().materials['maxpoles'] > 0: self.grid.htod_dispersive_arrays() - def _set_pml_kernels(self): + def _set_pml_knls(self): """PMLS - prepare kernels and get kernel functions.""" pmlmodulelectric = 'gprMax.cuda.pml_updates_electric_' + self.grid.pmlformulation - kernelelectricfunc = getattr(import_module(pmlmodulelectric), - 'kernels_template_pml_electric_' + + knlelectricfunc = getattr(import_module(pmlmodulelectric), + 'knls_template_pml_electric_' + self.grid.pmlformulation) pmlmodulemagnetic = 'gprMax.cuda.pml_updates_magnetic_' + self.grid.pmlformulation - kernelmagneticfunc = getattr(import_module(pmlmodulemagnetic), - 'kernels_template_pml_magnetic_' + + knlmagneticfunc = getattr(import_module(pmlmodulemagnetic), + 'knls_template_pml_magnetic_' + self.grid.pmlformulation) - kernels_pml_electric = self.source_module(kernelelectricfunc.substitute( + knls_pml_electric = self.source_module(knlelectricfunc.substitute( REAL=config.sim_config.dtypes['C_float_or_double'], N_updatecoeffsE=self.grid.updatecoeffsE.size, NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1], @@ -355,7 +357,7 @@ class CUDAUpdates: NY_ID=self.grid.ID.shape[2], NZ_ID=self.grid.ID.shape[3]), options=config.sim_config.cuda['nvcc_opts']) - kernels_pml_magnetic = self.source_module(kernelmagneticfunc.substitute( + knls_pml_magnetic = self.source_module(knlmagneticfunc.substitute( REAL=config.sim_config.dtypes['C_float_or_double'], N_updatecoeffsH=self.grid.updatecoeffsH.size, NY_MATCOEFFS=self.grid.updatecoeffsH.shape[1], @@ -366,19 +368,19 @@ class CUDAUpdates: NY_ID=self.grid.ID.shape[2], NZ_ID=self.grid.ID.shape[3]), options=config.sim_config.cuda['nvcc_opts']) - self._copy_mat_coeffs(kernels_pml_electric, kernels_pml_magnetic) + self._copy_mat_coeffs(knls_pml_electric, knls_pml_magnetic) # Set block per grid, initialise arrays on GPU, and get kernel functions for pml in self.grid.pmls: pml.htod_field_arrays() pml.set_blocks_per_grid() - pml.get_update_funcs(kernels_pml_electric, kernels_pml_magnetic) + pml.get_update_funcs(knls_pml_electric, knls_pml_magnetic) - def _set_rx_kernel(self): + def _set_rx_knl(self): """Receivers - initialise arrays on GPU, prepare kernel and get kernel function. """ self.rxcoords_gpu, self.rxs_gpu = htod_rx_arrays(self.grid) - kernel_store_outputs = self.source_module(kernel_template_store_outputs.substitute( + knl_store_outputs = self.source_module(knl_template_store_outputs.substitute( REAL=config.sim_config.dtypes['C_float_or_double'], NY_RXCOORDS=3, NX_RXS=6, @@ -388,13 +390,13 @@ class CUDAUpdates: NY_FIELDS=self.grid.ny + 1, NZ_FIELDS=self.grid.nz + 1), options=config.sim_config.cuda['nvcc_opts']) - self.store_outputs_gpu = kernel_store_outputs.get_function("store_outputs") + self.store_outputs_gpu = knl_store_outputs.get_function("store_outputs") - def _set_src_kernels(self): + def _set_src_knls(self): """Sources - initialise arrays on GPU, prepare kernel and get kernel function. """ - kernels_sources = self.source_module(kernel_template_sources.substitute( + knls_sources = self.source_module(knl_template_sources.substitute( REAL=config.sim_config.dtypes['C_float_or_double'], N_updatecoeffsE=self.grid.updatecoeffsE.size, N_updatecoeffsH=self.grid.updatecoeffsH.size, @@ -408,23 +410,23 @@ class CUDAUpdates: NY_ID=self.grid.ID.shape[2], NZ_ID=self.grid.ID.shape[3]), options=config.sim_config.cuda['nvcc_opts']) - self._copy_mat_coeffs(kernels_sources, kernels_sources) + self._copy_mat_coeffs(knls_sources, knls_sources) if self.grid.hertziandipoles: self.srcinfo1_hertzian_gpu, self.srcinfo2_hertzian_gpu, self.srcwaves_hertzian_gpu = htod_src_arrays(self.grid.hertziandipoles, self.grid) - self.update_hertzian_dipole_gpu = kernels_sources.get_function("update_hertzian_dipole") + self.update_hertzian_dipole_gpu = knls_sources.get_function("update_hertzian_dipole") if self.grid.magneticdipoles: self.srcinfo1_magnetic_gpu, self.srcinfo2_magnetic_gpu, self.srcwaves_magnetic_gpu = htod_src_arrays(self.grid.magneticdipoles, self.grid) - self.update_magnetic_dipole_gpu = kernels_sources.get_function("update_magnetic_dipole") + self.update_magnetic_dipole_gpu = knls_sources.get_function("update_magnetic_dipole") if self.grid.voltagesources: self.srcinfo1_voltage_gpu, self.srcinfo2_voltage_gpu, self.srcwaves_voltage_gpu = htod_src_arrays(self.grid.voltagesources, self.grid) - self.update_voltage_source_gpu = kernels_sources.get_function("update_voltage_source") + self.update_voltage_source_gpu = knls_sources.get_function("update_voltage_source") - def _set_snapshot_kernel(self): + def _set_snapshot_knl(self): """Snapshots - initialise arrays on GPU, prepare kernel and get kernel function. """ self.snapEx_gpu, self.snapEy_gpu, self.snapEz_gpu, self.snapHx_gpu, self.snapHy_gpu, self.snapHz_gpu = htod_snapshot_array(self.grid) - kernel_store_snapshot = self.source_module(kernel_template_store_snapshot.substitute( + knl_store_snapshot = self.source_module(knl_template_store_snapshot.substitute( REAL=config.sim_config.dtypes['C_float_or_double'], NX_SNAPS=Snapshot.nx_max, NY_SNAPS=Snapshot.ny_max, @@ -433,25 +435,25 @@ class CUDAUpdates: NY_FIELDS=self.grid.ny + 1, NZ_FIELDS=self.grid.nz + 1), options=config.sim_config.cuda['nvcc_opts']) - self.store_snapshot_gpu = kernel_store_snapshot.get_function("store_snapshot") + self.store_snapshot_gpu = knl_store_snapshot.get_function("store_snapshot") - def _copy_mat_coeffs(self, kernelE, kernelH): + def _copy_mat_coeffs(self, knlE, knlH): """Copy material coefficient arrays to constant memory of GPU (must be <64KB). Args: - kernelE (kernel): electric field kernel. - kernelH (kernel): magnetic field kernel. + knlE (kernel): electric field kernel. + knlH (kernel): magnetic field kernel. """ # Check if coefficient arrays will fit on constant memory of GPU if (self.grid.updatecoeffsE.nbytes + self.grid.updatecoeffsH.nbytes - > config.get_model_config().cuda['gpu'].constmem): - logger.exception(f"Too many materials in the model to fit onto constant memory of size {human_size(config.get_model_config().cuda['gpu'].constmem)} on {config.get_model_config().cuda['gpu'].deviceID} - {config.get_model_config().cuda['gpu'].name} GPU") + > config.get_model_config().cuda['gpu'].total_constant_memory): + logger.exception(f"Too many materials in the model to fit onto constant memory of size {human_size(config.get_model_config().cuda['gpu'].total_constant_memory)} on {config.get_model_config().cuda['gpu'].deviceID} - {config.get_model_config().cuda['gpu'].name} GPU") raise ValueError - updatecoeffsE = kernelE.get_global('updatecoeffsE')[0] - updatecoeffsH = kernelH.get_global('updatecoeffsH')[0] + updatecoeffsE = knlE.get_global('updatecoeffsE')[0] + updatecoeffsH = knlH.get_global('updatecoeffsH')[0] self.drv.memcpy_htod(updatecoeffsE, self.grid.updatecoeffsE) self.drv.memcpy_htod(updatecoeffsH, self.grid.updatecoeffsH) @@ -707,3 +709,595 @@ class CUDAUpdates: # Remove context from top of stack and delete self.ctx.pop() del self.ctx + + +class OpenCLUpdates: + """Defines update functions for OpenCL-based solver.""" + + def __init__(self, G): + """ + Args: + G: FDTDObject of parameters describing a grid in a model. + """ + + self.grid = G + self.dispersive_update_a = None + self.dispersive_update_b = None + self.compute_time = 0 + + # Import pyopencl module + self.cl = import_module('pyopencl') + self.elwise = getattr(import_module('pyopencl.elementwise'), 'ElementwiseKernel') + + # Select device, create context and command queue + self.dev = config.get_model_config().device['dev'] + self.ctx = self.cl.Context(devices=[self.dev]) + self.queue = self.cl.CommandQueue(self.ctx, + properties=self.cl.command_queue_properties.PROFILING_ENABLE) + + # Enviroment for templating kernels + self.env = Environment(loader=PackageLoader('gprMax', 'cuda_opencl_el')) + + # Initialise arrays on device, prepare kernels, and get kernel functions + self._set_field_knls() + if self.grid.pmls: + self._set_pml_knls() + if self.grid.rxs: + self._set_rx_knl() + if self.grid.voltagesources + self.grid.hertziandipoles + self.grid.magneticdipoles: + self._set_src_knls() + if self.grid.snapshots: + self._set_snapshot_knl() + + def _set_field_knls(self): + """Electric and magnetic field updates - prepare kernels, and + get kernel functions. + """ + if config.get_model_config().materials['maxpoles'] > 0: + NY_MATDISPCOEFFS = self.grid.updatecoeffsdispersive.shape[1] + NX_T = self.grid.Tx.shape[1] + NY_T = self.grid.Tx.shape[2] + NZ_T = self.grid.Tx.shape[3] + else: # Set to one any substitutions for dispersive materials. + NY_MATDISPCOEFFS = 1 + NX_T = 1 + NY_T = 1 + NZ_T = 1 + + self.knl_common = self.env.get_template('knl_common_opencl.tmpl').render( + updatecoeffsE = self.grid.updatecoeffsE.ravel(), + updatecoeffsH = self.grid.updatecoeffsH.ravel(), + REAL=config.sim_config.dtypes['C_float_or_double'], + N_updatecoeffsE=self.grid.updatecoeffsE.size, + N_updatecoeffsH=self.grid.updatecoeffsH.size, + NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1], + NY_MATDISPCOEFFS=NY_MATDISPCOEFFS, + NX_FIELDS=self.grid.nx + 1, + NY_FIELDS=self.grid.ny + 1, + NZ_FIELDS=self.grid.nz + 1, + NX_ID=self.grid.ID.shape[1], + NY_ID=self.grid.ID.shape[2], + NZ_ID=self.grid.ID.shape[3], + NX_T=NX_T, + NY_T=NY_T, + NZ_T=NZ_T, + NY_RXCOORDS=3, + NX_RXS=6, + NY_RXS=self.grid.iterations, + NZ_RXS=len(self.grid.rxs), + NY_SRCINFO=4, + NY_SRCWAVES=self.grid.iterations, + NX_SNAPS=Snapshot.nx_max, + NY_SNAPS=Snapshot.ny_max, + NZ_SNAPS=Snapshot.nz_max) + + self.update_electric_dev = self.elwise(self.ctx, + Template("int NX, " + "int NY, " + "int NZ, " + "__global const unsigned int* restrict ID, " + "__global $REAL *Ex, " + "__global $REAL *Ey, " + "__global $REAL *Ez, " + "__global const $REAL * restrict Hx, " + "__global const $REAL * restrict Hy, " + "__global const $REAL * restrict Hz").substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}), + knl_fields_updates.update_electric.substitute({ + 'NX_FIELDS': self.grid.nx + 1, + 'NY_FIELDS': self.grid.ny + 1, + 'NZ_FIELDS': self.grid.nz + 1, + 'NX_ID': self.grid.ID.shape[1], + 'NY_ID': self.grid.ID.shape[2], + 'NZ_ID': self.grid.ID.shape[3]}), + 'update_electric', preamble=self.knl_common) + + self.update_magnetic_dev = self.elwise(self.ctx, + Template("int NX, " + "int NY, " + "int NZ, " + "__global const unsigned int* restrict ID, " + "__global $REAL *Hx, " + "__global $REAL *Hy, " + "__global $REAL *Hz, " + "__global const $REAL * restrict Ex, " + "__global const $REAL * restrict Ey, " + "__global const $REAL * restrict Ez").substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}), + knl_fields_updates.update_magnetic.substitute({ + 'NX_FIELDS': self.grid.nx + 1, + 'NY_FIELDS': self.grid.ny + 1, + 'NZ_FIELDS': self.grid.nz + 1, + 'NX_ID': self.grid.ID.shape[1], + 'NY_ID': self.grid.ID.shape[2], + 'NZ_ID': self.grid.ID.shape[3]}), + 'update_magnetic', preamble=self.knl_common) + + # Electric and magnetic field updates - dispersive materials - + # get kernel functions + # If there are any dispersive materials (updates are split into two + # parts as they require present and updated electric field values). + if config.get_model_config().materials['maxpoles'] > 0: + self.dispersive_update_a = self.elwise(self.ctx, + Template("int NX, " + "int NY, " + "int NZ, " + "int MAXPOLES, " + "__global const $COMPLEX* restrict updatecoeffsdispersive, " + "__global $COMPLEX *Tx, " + "__global $COMPLEX *Ty, " + "__global $COMPLEX *Tz, " + "__global const unsigned int* restrict ID, " + "__global $REAL *Ex, " + "__global $REAL *Ey, " + "__global $REAL *Ez, " + "__global const $REAL* restrict Hx, " + "__global const $REAL* restrict Hy, " + "__global const $REAL* restrict Hz").substitute({'REAL': config.sim_config.dtypes['C_float_or_double'], 'COMPLEX': config.get_model_config().materials['dispersiveCdtype']}), + knl_fields_updates.update_electric_dispersive_A.substitute({ + 'REAL': config.sim_config.dtypes['C_float_or_double'], + 'REALFUNC': config.get_model_config().materials['crealfunc'], + 'NX_FIELDS': self.grid.nx + 1, + 'NY_FIELDS': self.grid.ny + 1, + 'NZ_FIELDS': self.grid.nz + 1, + 'NX_ID': self.grid.ID.shape[1], + 'NY_ID': self.grid.ID.shape[2], + 'NZ_ID': self.grid.ID.shape[3], + 'NX_T': NX_T, + 'NY_T': NY_T, + 'NZ_T': NZ_T}), + 'update_electric_dispersive_A', preamble=self.knl_common) + self.dispersive_update_b = self.elwise(self.ctx, + Template("int NX, " + "int NY, " + "int NZ, " + "int MAXPOLES, " + "__global const $COMPLEX* restrict updatecoeffsdispersive, " + "__global $COMPLEX *Tx, " + "__global $COMPLEX *Ty, " + "__global $COMPLEX *Tz, " + "__global const unsigned int* restrict ID, " + "__global $REAL *Ex, " + "__global $REAL *Ey, " + "__global $REAL *Ez, " + "__global const $REAL* restrict Hx, " + "__global const $REAL* restrict Hy, " + "__global const $REAL* restrict Hz").substitute({'REAL': config.sim_config.dtypes['C_float_or_double'] ,'COMPLEX': config.get_model_config().materials['dispersiveCdtype']}), + knl_fields_updates.update_electric_dispersive_B.substitute({ + 'REAL': config.sim_config.dtypes['C_float_or_double'], + 'REALFUNC': config.get_model_config().materials['crealfunc'], + 'NX_FIELDS': self.grid.nx + 1, + 'NY_FIELDS': self.grid.ny + 1, + 'NZ_FIELDS': self.grid.nz + 1, + 'NX_ID': self.grid.ID.shape[1], + 'NY_ID': self.grid.ID.shape[2], + 'NZ_ID': self.grid.ID.shape[3], + 'NX_T': NX_T, + 'NY_T': NY_T, + 'NZ_T': NZ_T}), + 'update_electric_dispersive_B', preamble=self.knl_common) + + # Electric and magnetic field updates - initialise field arrays on + # compute device + self.grid.htod_geometry_arrays(self.queue) + self.grid.htod_field_arrays(self.queue) + if config.get_model_config().materials['maxpoles'] > 0: + self.grid.htod_dispersive_arrays(self.queue) + + def _set_pml_knls(self): + """PMLS - prepare kernels and get kernel functions.""" + knl_pml_updates_electric = import_module('gprMax.cuda_opencl_el.knl_pml_updates_electric_' + self.grid.pmlformulation) + knl_pml_updates_magnetic = import_module('gprMax.cuda_opencl_el.knl_pml_updates_magnetic_' + self.grid.pmlformulation) + + # Set workgroup size, initialise arrays on compute device, and get + # kernel functions + for pml in self.grid.pmls: + pml.set_queue(self.queue) + pml.htod_field_arrays() + pml.set_wgs() + knl_name = 'order' + str(len(pml.CFS)) + '_' + pml.direction + knl_electric_name = getattr(knl_pml_updates_electric, knl_name) + knl_magnetic_name = getattr(knl_pml_updates_magnetic, knl_name) + + pml.update_electric_dev = self.elwise(self.ctx, + knl_electric_name['args'].substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}), + knl_electric_name['func'].substitute({ + 'REAL': config.sim_config.dtypes['C_float_or_double'], + 'NX_FIELDS': self.grid.nx + 1, + 'NY_FIELDS': self.grid.ny + 1, + 'NZ_FIELDS': self.grid.nz + 1, + 'NX_ID': self.grid.ID.shape[1], + 'NY_ID': self.grid.ID.shape[2], + 'NZ_ID': self.grid.ID.shape[3]}), + 'pml_updates_electric_' + knl_name, + preamble=self.knl_common) + + pml.update_magnetic_dev = self.elwise(self.ctx, + knl_magnetic_name['args'].substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}), + knl_magnetic_name['func'].substitute({ + 'REAL': config.sim_config.dtypes['C_float_or_double'], + 'NX_FIELDS': self.grid.nx + 1, + 'NY_FIELDS': self.grid.ny + 1, + 'NZ_FIELDS': self.grid.nz + 1, + 'NX_ID': self.grid.ID.shape[1], + 'NY_ID': self.grid.ID.shape[2], + 'NZ_ID': self.grid.ID.shape[3]}), + 'pml_updates_magnetic_' + knl_name, + preamble=self.knl_common) + + def _set_rx_knl(self): + """Receivers - initialise arrays on compute device, prepare kernel and + get kernel function. + """ + self.rxcoords_dev, self.rxs_dev = htod_rx_arrays(self.grid, self.queue) + self.store_outputs_dev = self.elwise(self.ctx, + Template("int NRX, " + "int iteration, " + "__global const int* restrict rxcoords, " + "__global $REAL *rxs, " + "__global const $REAL* restrict Ex, " + "__global const $REAL* restrict Ey, " + "__global const $REAL* restrict Ez, " + "__global const $REAL* restrict Hx, " + "__global const $REAL* restrict Hy, " + "__global const $REAL* restrict Hz").substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}), + knl_store_outputs.store_outputs.substitute(), + 'store_outputs', preamble=self.knl_common) + + def _set_src_knls(self): + """Sources - initialise arrays on compute device, prepare kernel and + get kernel function. + """ + if self.grid.hertziandipoles: + self.srcinfo1_hertzian_dev, self.srcinfo2_hertzian_dev, self.srcwaves_hertzian_dev = htod_src_arrays(self.grid.hertziandipoles, self.grid, self.queue) + self.update_hertzian_dipole_dev = self.elwise(self.ctx, + Template("int NHERTZDIPOLE, " + "int iteration, " + "$REAL dx, " + "$REAL dy, " + "$REAL dz, " + "__global const int* restrict srcinfo1, " + "__global const $REAL* restrict srcinfo2, " + "__global const $REAL* restrict srcwaveforms, " + "__global const unsigned int* restrict ID, " + "__global $REAL *Ex, " + "__global $REAL *Ey, " + "__global $REAL *Ez").substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}), + knl_source_updates.update_hertzian_dipole.substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}), + 'update_hertzian_dipole', preamble=self.knl_common) + if self.grid.magneticdipoles: + self.srcinfo1_magnetic_dev, self.srcinfo2_magnetic_dev, self.srcwaves_magnetic_dev = htod_src_arrays(self.grid.magneticdipoles, self.grid, self.queue) + self.update_magnetic_dipole_dev = self.elwise(self.ctx, + Template("int NMAGDIPOLE, " + "int iteration, " + "$REAL dx, " + "$REAL dy, " + "$REAL dz, " + "__global const int* restrict srcinfo1, " + "__global const $REAL* restrict srcinfo2, " + "__global const $REAL* restrict srcwaveforms, " + "__global const unsigned int* restrict ID, " + "__global $REAL *Hx, " + "__global $REAL *Hy, " + "__global $REAL *Hz").substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}), + knl_source_updates.update_magnetic_dipole.substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}), + 'update_magnetic_dipole', preamble=self.knl_common) + if self.grid.voltagesources: + self.srcinfo1_voltage_dev, self.srcinfo2_voltage_dev,self.srcwaves_voltage_dev = htod_src_arrays(self.grid.voltagesources, self.grid, self.queue) + self.update_voltage_source_dev = self.elwise(self.ctx, + Template("int NVOLTSRC, " + "int iteration, " + "$REAL dx, " + "$REAL dy, " + "$REAL dz, " + "__global const int* restrict srcinfo1, " + "__global const $REAL* restrict srcinfo2, " + "__global const $REAL* restrict srcwaveforms, " + "__global const unsigned int* restrict ID, " + "__global $REAL *Ex, " + "__global $REAL *Ey, " + "__global $REAL *Ez").substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}), + knl_source_updates.update_voltage_source.substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}), 'update_voltage_source', preamble=self.knl_common) + + def _set_snapshot_knl(self): + """Snapshots - initialise arrays on compute device, prepare kernel and + get kernel function. + """ + self.snapEx_dev, self.snapEy_dev, self.snapEz_dev, self.snapHx_dev, self.snapHy_dev, self.snapHz_dev = htod_snapshot_array(self.grid, self.queue) + self.store_snapshot_dev = self.elwise(self.ctx, + Template("int p, " + "int xs, " + "int xf, " + "int ys, " + "int yf, " + "int zs, " + "int zf, " + "int dx, " + "int dy, " + "int dz, " + "__global const $REAL* restrict Ex, " + "__global const $REAL* restrict Ey, " + "__global const $REAL* restrict Ez, " + "__global const $REAL* restrict Hx, " + "__global const $REAL* restrict Hy, " + "__global const $REAL* restrict Hz, " + "__global $REAL *snapEx, " + "__global $REAL *snapEy, " + "__global $REAL *snapEz, " + "__global $REAL *snapHx, " + "__global $REAL *snapHy, " + "__global $REAL *snapHz").substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}), + knl_snapshots.store_snapshot.substitute({'NX_SNAPS': Snapshot.nx_max, + 'NY_SNAPS': Snapshot.ny_max, + 'NZ_SNAPS': Snapshot.nz_max}), + 'store_snapshot', preamble=self.knl_common) + + def store_outputs(self): + """Store field component values for every receiver.""" + if self.grid.rxs: + event = self.store_outputs_dev(np.int32(len(self.grid.rxs)), + np.int32(self.grid.iteration), + self.rxcoords_dev, + self.rxs_dev, + self.grid.Ex_dev, + self.grid.Ey_dev, + self.grid.Ez_dev, + self.grid.Hx_dev, + self.grid.Hy_dev, + self.grid.Hz_dev) + event.wait() + self.compute_time += (event.profile.end - event.profile.start)*1e-9 + + def store_snapshots(self, iteration): + """Store any snapshots. + + Args: + iteration: int for iteration number. + """ + + for i, snap in enumerate(self.grid.snapshots): + if snap.time == iteration + 1: + snapno = 0 if config.get_model_config().device['snapsgpu2cpu'] else i + event = self.store_snapshot_dev(np.int32(snapno), + np.int32(snap.xs), + np.int32(snap.xf), + np.int32(snap.ys), + np.int32(snap.yf), + np.int32(snap.zs), + np.int32(snap.zf), + np.int32(snap.dx), + np.int32(snap.dy), + np.int32(snap.dz), + self.grid.Ex_dev, + self.grid.Ey_dev, + self.grid.Ez_dev, + self.grid.Hx_dev, + self.grid.Hy_dev, + self.grid.Hz_dev, + self.snapEx_dev, + self.snapEy_dev, + self.snapEz_dev, + self.snapHx_dev, + self.snapHy_dev, + self.snapHz_dev) + event.wait() + if config.get_model_config().device['snapsgpu2cpu']: + dtoh_snapshot_array(self.snapEx_dev.get(), + self.snapEy_dev.get(), + self.snapEz_dev.get(), + self.snapHx_dev.get(), + self.snapHy_dev.get(), + self.snapHz_dev.get(), + 0, + snap) + self.compute_time += (event.profile.end - event.profile.start)*1e-9 + + def update_magnetic(self): + """Update magnetic field components.""" + event = self.update_magnetic_dev(np.int32(self.grid.nx), + np.int32(self.grid.ny), + np.int32(self.grid.nz), + self.grid.ID_dev, + self.grid.Hx_dev, + self.grid.Hy_dev, + self.grid.Hz_dev, + self.grid.Ex_dev, + self.grid.Ey_dev, + self.grid.Ez_dev) + event.wait() + self.compute_time += (event.profile.end - event.profile.start)*1e-9 + + def update_magnetic_pml(self): + """Update magnetic field components with the PML correction.""" + for pml in self.grid.pmls: + pml.update_magnetic() + self.compute_time += pml.compute_time + + def update_magnetic_sources(self): + """Update magnetic field components from sources.""" + if self.grid.magneticdipoles: + event = self.update_magnetic_dipole_dev(np.int32(len(self.grid.magneticdipoles)), + np.int32(self.grid.iteration), + config.sim_config.dtypes['float_or_double'](self.grid.dx), + config.sim_config.dtypes['float_or_double'](self.grid.dy), + config.sim_config.dtypes['float_or_double'](self.grid.dz), + self.srcinfo1_magnetic_dev, + self.srcinfo2_magnetic_dev, + self.srcwaves_magnetic_dev, + self.grid.ID_dev, + self.grid.Hx_dev, + self.grid.Hy_dev, + self.grid.Hz_dev) + event.wait() + self.compute_time += (event.profile.end - event.profile.start)*1e-9 + + def update_electric_a(self): + """Update electric field components.""" + # All materials are non-dispersive so do standard update. + if config.get_model_config().materials['maxpoles'] == 0: + event = self.update_electric_dev(np.int32(self.grid.nx), + np.int32(self.grid.ny), + np.int32(self.grid.nz), + self.grid.ID_dev, + self.grid.Ex_dev, + self.grid.Ey_dev, + self.grid.Ez_dev, + self.grid.Hx_dev, + self.grid.Hy_dev, + self.grid.Hz_dev) + event.wait() + self.compute_time += (event.profile.end - event.profile.start)*1e-9 + + # If there are any dispersive materials do 1st part of dispersive update + # (it is split into two parts as it requires present and updated electric field values). + else: + event = self.dispersive_update_a(np.int32(self.grid.nx), + np.int32(self.grid.ny), + np.int32(self.grid.nz), + np.int32(config.get_model_config().materials['maxpoles']), + self.grid.updatecoeffsdispersive_dev, + self.grid.Tx_dev, + self.grid.Ty_dev, + self.grid.Tz_dev, + self.grid.ID_dev, + self.grid.Ex_dev, + self.grid.Ey_dev, + self.grid.Ez_dev, + self.grid.Hx_dev, + self.grid.Hy_dev, + self.grid.Hz_dev) + event.wait() + self.compute_time += (event.profile.end - event.profile.start)*1e-9 + + def update_electric_pml(self): + """Update electric field components with the PML correction.""" + for pml in self.grid.pmls: + pml.update_electric() + self.compute_time += pml.compute_time + + def update_electric_sources(self): + """Update electric field components from sources - + update any Hertzian dipole sources last. + """ + if self.grid.voltagesources: + event = self.update_voltage_source_dev(np.int32(len(self.grid.voltagesources)), + np.int32(self.grid.iteration), + config.sim_config.dtypes['float_or_double'](self.grid.dx), + config.sim_config.dtypes['float_or_double'](self.grid.dy), + config.sim_config.dtypes['float_or_double'](self.grid.dz), + self.srcinfo1_voltage_dev, + self.srcinfo2_voltage_dev, + self.srcwaves_voltage_dev, + self.grid.ID_dev, + self.grid.Ex_dev, + self.grid.Ey_dev, + self.grid.Ez_dev) + event.wait() + self.compute_time += (event.profile.end - event.profile.start)*1e-9 + + if self.grid.hertziandipoles: + event = self.update_hertzian_dipole_dev(np.int32(len(self.grid.hertziandipoles)), + np.int32(self.grid.iteration), + config.sim_config.dtypes['float_or_double'](self.grid.dx), + config.sim_config.dtypes['float_or_double'](self.grid.dy), + config.sim_config.dtypes['float_or_double'](self.grid.dz), + self.srcinfo1_hertzian_dev, + self.srcinfo2_hertzian_dev, + self.srcwaves_hertzian_dev, + self.grid.ID_dev, + self.grid.Ex_dev, + self.grid.Ey_dev, + self.grid.Ez_dev) + event.wait() + self.compute_time += (event.profile.end - event.profile.start)*1e-9 + + self.grid.iteration += 1 + + def update_electric_b(self): + """If there are any dispersive materials do 2nd part of dispersive + update - it is split into two parts as it requires present and + updated electric field values. Therefore it can only be completely + updated after the electric field has been updated by the PML and + source updates. + """ + if config.get_model_config().materials['maxpoles'] > 0: + event = self.dispersive_update_b(np.int32(self.grid.nx), + np.int32(self.grid.ny), + np.int32(self.grid.nz), + np.int32(config.get_model_config().materials['maxpoles']), + self.grid.updatecoeffsdispersive_dev, + self.grid.Tx_dev, + self.grid.Ty_dev, + self.grid.Tz_dev, + self.grid.ID_dev, + self.grid.Ex_dev, + self.grid.Ey_dev, + self.grid.Ez_dev, + self.grid.Hx_dev, + self.grid.Hy_dev, + self.grid.Hz_dev) + event.wait() + self.compute_time += (event.profile.end - event.profile.start)*1e-9 + + def time_start(self): + pass + + def calculate_memsolve(self, iteration): + """Calculate memory used on last iteration. + + Args: + iteration: int of iteration number. + + Returns: + Memory (RAM) used on compute device. + """ + # if iteration == self.grid.iterations - 1: + # return self.drv.mem_get_info()[1] - self.drv.mem_get_info()[0] + logger.debug('Look at memory estimate for pyopencl') + pass + + def calculate_tsolve(self): + """Calculate solving time for model.""" + return self.compute_time + + def finalise(self): + """Copy data from compute device back to CPU to save to file(s).""" + # Copy output from receivers array back to correct receiver objects + if self.grid.rxs: + dtoh_rx_array(self.rxs_dev.get(), self.rxcoords_dev.get(), self.grid) + + # Copy data from any snapshots back to correct snapshot objects + if self.grid.snapshots and not config.get_model_config().device['snapsgpu2cpu']: + for i, snap in enumerate(self.grid.snapshots): + dtoh_snapshot_array(self.snapEx_dev.get(), + self.snapEy_dev.get(), + self.snapEz_dev.get(), + self.snapHx_dev.get(), + self.snapHy_dev.get(), + self.snapHz_dev.get(), + i, snap) + + def cleanup(self): + """Cleanup compute device context.""" + logger.debug('Check if pyopencl needs explicit cleanup.') + # Remove context from top of stack and delete + # self.ctx.pop() + # del self.ctx diff --git a/gprMax/utilities/host_info.py b/gprMax/utilities/host_info.py index d427cc13..0e860fb4 100644 --- a/gprMax/utilities/host_info.py +++ b/gprMax/utilities/host_info.py @@ -26,7 +26,7 @@ import sys import gprMax.config as config import psutil -from .utilities import human_size +from .utilities import get_terminal_width, human_size logger = logging.getLogger(__name__) @@ -35,9 +35,9 @@ def get_host_info(): """Get information about the machine, CPU, RAM, and OS. Returns: - hostinfo (dict): Manufacturer and model of machine; description of CPU - type, speed, cores; RAM; name and - version of operating system. + hostinfo: dict containing manufacturer and model of machine; + description of CPU type, speed, cores; RAM; name and + version of operating system. """ # Default to 'unknown' if any of the detection fails @@ -47,13 +47,17 @@ def get_host_info(): if sys.platform == 'win32': # Manufacturer/model try: - manufacturer = subprocess.check_output("wmic csproduct get vendor", shell=True, stderr=subprocess.STDOUT).decode('utf-8').strip() + manufacturer = subprocess.check_output("wmic csproduct get vendor", + shell=True, + stderr=subprocess.STDOUT).decode('utf-8').strip() manufacturer = manufacturer.split('\n') if len(manufacturer) > 1: manufacturer = manufacturer[1] else: manufacturer = manufacturer[0] - model = subprocess.check_output("wmic computersystem get model", shell=True, stderr=subprocess.STDOUT).decode('utf-8').strip() + model = subprocess.check_output("wmic computersystem get model", + shell=True, + stderr=subprocess.STDOUT).decode('utf-8').strip() model = model.split('\n') if len(model) > 1: model = model[1] @@ -61,16 +65,19 @@ def get_host_info(): model = model[0] except subprocess.CalledProcessError: pass - machineID = manufacturer + ' ' + model + machineID = ' '.join(manufacturer.split()) + ' ' + ' '.join(model.split()) # CPU information try: - allcpuinfo = subprocess.check_output("wmic cpu get Name", shell=True, stderr=subprocess.STDOUT).decode('utf-8').strip() + allcpuinfo = subprocess.check_output("wmic cpu get Name", + shell=True, + stderr=subprocess.STDOUT).decode('utf-8').strip() allcpuinfo = allcpuinfo.split('\n') sockets = 0 for line in allcpuinfo: if 'CPU' in line: cpuID = line.strip() + cpuID = ' '.join(cpuID.split()) sockets += 1 except subprocess.CalledProcessError: pass @@ -93,16 +100,21 @@ def get_host_info(): # Manufacturer/model manufacturer = 'Apple' try: - model = subprocess.check_output("sysctl -n hw.model", shell=True, stderr=subprocess.STDOUT).decode('utf-8').strip() + model = subprocess.check_output("sysctl -n hw.model", shell=True, + stderr=subprocess.STDOUT).decode('utf-8').strip() except subprocess.CalledProcessError: pass - machineID = manufacturer + ' ' + model + machineID = ' '.join(manufacturer.split()) + ' ' + ' '.join(model.split()) # CPU information try: - sockets = subprocess.check_output("sysctl -n hw.packages", shell=True, stderr=subprocess.STDOUT).decode('utf-8').strip() + sockets = subprocess.check_output("sysctl -n hw.packages", + shell=True, + stderr=subprocess.STDOUT).decode('utf-8').strip() sockets = int(sockets) - cpuID = subprocess.check_output("sysctl -n machdep.cpu.brand_string", shell=True, stderr=subprocess.STDOUT).decode('utf-8').strip() + cpuID = subprocess.check_output("sysctl -n machdep.cpu.brand_string", + shell=True, + stderr=subprocess.STDOUT).decode('utf-8').strip() cpuID = ' '.join(cpuID.split()) except subprocess.CalledProcessError: pass @@ -123,21 +135,30 @@ def get_host_info(): elif sys.platform == 'linux': # Manufacturer/model try: - manufacturer = subprocess.check_output("cat /sys/class/dmi/id/sys_vendor", shell=True, stderr=subprocess.STDOUT).decode('utf-8').strip() - model = subprocess.check_output("cat /sys/class/dmi/id/product_name", shell=True, stderr=subprocess.STDOUT).decode('utf-8').strip() + manufacturer = subprocess.check_output("cat /sys/class/dmi/id/sys_vendor", + shell=True, + stderr=subprocess.STDOUT).decode('utf-8').strip() + model = subprocess.check_output("cat /sys/class/dmi/id/product_name", + shell=True, + stderr=subprocess.STDOUT).decode('utf-8').strip() except subprocess.CalledProcessError: pass - machineID = manufacturer + ' ' + model + machineID = ' '.join(manufacturer.split()) + ' ' + ' '.join(model.split()) # CPU information try: # Locale to ensure English myenv = {**os.environ, 'LANG': 'en_US.utf8'} - cpuIDinfo = subprocess.check_output("cat /proc/cpuinfo", shell=True, stderr=subprocess.STDOUT, env=myenv).decode('utf-8').strip() + cpuIDinfo = subprocess.check_output("cat /proc/cpuinfo", shell=True, + stderr=subprocess.STDOUT, + env=myenv).decode('utf-8').strip() for line in cpuIDinfo.split('\n'): if re.search('model name', line): cpuID = re.sub('.*model name.*:', '', line, 1).strip() - allcpuinfo = subprocess.check_output("lscpu", shell=True, stderr=subprocess.STDOUT, env=myenv).decode('utf-8').strip() + cpuID = ' '.join(cpuID.split()) + allcpuinfo = subprocess.check_output("lscpu", shell=True, + stderr=subprocess.STDOUT, + env=myenv).decode('utf-8').strip() for line in allcpuinfo.split('\n'): if 'Socket(s)' in line: sockets = int(line.strip()[-1]) @@ -177,11 +198,31 @@ def get_host_info(): return hostinfo +def print_host_info(hostinfo): + """Print information about the machine, CPU, RAM, and OS. + + Args: + hostinfo: dict containing manufacturer and model of machine; + description of CPU type, speed, cores; RAM; name and + version of operating system. + """ + + hyperthreadingstr = (f", {config.sim_config.hostinfo['logicalcores']} " + f"cores with Hyper-Threading" if config.sim_config.hostinfo['hyperthreading'] else '') + logger.basic(f"\n{config.sim_config.hostinfo['hostname']} | " + f"{config.sim_config.hostinfo['machineID']} " + f"{hostinfo['sockets']} x {hostinfo['cpuID']} " + f"({hostinfo['physicalcores']} cores{hyperthreadingstr}) | " + f"{human_size(hostinfo['ram'], a_kilobyte_is_1024_bytes=True)} | " + f"{hostinfo['osversion']}") + logger.basic(f"|--->OpenMP: {hostinfo['physicalcores']} threads") + + def set_omp_threads(nthreads=None): """Sets the number of OpenMP CPU threads for parallelised parts of code. Returns: - nthreads (int): Number of OpenMP threads. + nthreads: int for number of OpenMP threads. """ if sys.platform == 'darwin': @@ -228,29 +269,39 @@ def mem_check_host(mem): """Check if the required amount of memory (RAM) is available on host. Args: - mem (int): Memory required (bytes). + mem: int for memory required (bytes). """ if mem > config.sim_config.hostinfo['ram']: - logger.exception(f"Memory (RAM) required ~{human_size(mem)} exceeds {human_size(config.sim_config.hostinfo['ram'], a_kilobyte_is_1024_bytes=True)} detected!\n") + logger.exception(f"Memory (RAM) required ~{human_size(mem)} exceeds " + f"{human_size(config.sim_config.hostinfo['ram'], a_kilobyte_is_1024_bytes=True)} " + "detected!\n") raise ValueError -def mem_check_gpu_snaps(total_mem, snaps_mem): +def mem_check_device_snaps(total_mem, snaps_mem): """Check if the required amount of memory (RAM) for all snapshots can fit - on specified GPU. + on specified device. Args: - total_mem (int): Total memory required for model (bytes). - snaps_mem (int): Memory required for all snapshots (bytes). + total_mem: int for total memory required for model (bytes). + snaps_mem: int for memory required for all snapshots (bytes). """ - if total_mem - snaps_mem > config.get_model_config().cuda['gpu'].totalmem: - logger.exception(f"Memory (RAM) required ~{human_size(total_mem)} exceeds {human_size(config.get_model_config().cuda['gpu'].totalmem, a_kilobyte_is_1024_bytes=True)} detected on specified {config.get_model_config().cuda['gpu'].deviceID} - {config.get_model_config().cuda['gpu'].name} GPU!\n") + + if config.sim_config.general['solver'] == 'cuda': + device_mem = config.get_model_config().device['dev'].total_memory() + elif config.sim_config.general['solver'] == 'opencl': + device_mem = config.get_model_config().device['dev'].global_mem_size + + if total_mem - snaps_mem > device_mem: + logger.exception(f"Memory (RAM) required ~{human_size(total_mem)} exceeds " + f"{human_size(device_mem, a_kilobyte_is_1024_bytes=True)} " + f"detected on specified {' '.join(config.get_model_config().device['dev'].name.split())} device!\n") raise ValueError # If the required memory without the snapshots will fit on the GPU then # transfer and store snaphots on host - if snaps_mem != 0 and total_mem - snaps_mem < config.get_model_config().cuda['gpu'].totalmem: - config.get_model_config().cuda['snapsgpu2cpu'] = True + if snaps_mem != 0 and total_mem - snaps_mem < device_mem: + config.get_model_config().device['snapsgpu2cpu'] = True def mem_check_all(grids): @@ -259,12 +310,12 @@ def mem_check_all(grids): memory. Args: - grids (list): FDTDGrid objects. + grids: list of FDTDGrid objects. Returns: - total_mem (int): Total memory required for all grids. - mem_strs (list): Strings containing text of memory requirements for - each grid. + total_mem: int for total memory required for all grids. + mem_str: list of strings containing text of memory requirements for + each grid. """ total_snaps_mem = 0 @@ -297,59 +348,56 @@ def mem_check_all(grids): mem_check_host(total_mem) # Check if there is sufficient memory for any snapshots on GPU - if total_snaps_mem > 0 and config.sim_config.general['cuda']: - mem_check_gpu_snaps(total_mem, total_snaps_mem) + if (total_snaps_mem > 0 and config.sim_config.general['solver'] == 'cuda' or + config.sim_config.general['solver'] == 'opencl'): + mem_check_device_snaps(total_mem, total_snaps_mem) return total_mem, mem_strs -class GPU: - """GPU information.""" +def has_pycuda(): + """Check if pycuda module is installed.""" + pycuda = True + try: + import pycuda + except ImportError: + pycuda = False + return pycuda - def __init__(self): - self.deviceID = None - self.name = None - self.pcibusID = None - self.constmem = None - self.totalmem = None - - def get_cuda_gpu_info(self, drv, deviceID): - """Set information about GPU. - - Args: - drv (object): pycuda driver. - deviceID (int): Device ID for GPU. - """ - - self.deviceID = deviceID - self.name = drv.Device(self.deviceID).name() - self.pcibusID = drv.Device(self.deviceID).pci_bus_id() - self.constmem = drv.Device(self.deviceID).total_constant_memory - self.totalmem = drv.Device(self.deviceID).total_memory() +def has_pyopencl(): + """Check if pyopencl module is installed.""" + pyopencl = True + try: + import pyopencl + except ImportError: + pyopencl = False + return pyopencl def detect_cuda_gpus(): - """Get information about Nvidia GPU(s). + """Get information about CUDA-capable GPU(s). Returns: - gpus (list): Detected GPU(s) object(s). + gpus: dict of detected pycuda device object(s) where where device ID(s) + are keys. """ - try: + gpus = {} + + cuda_reqs = ('To use gprMax with CUDA you must:' + '\n 1) install pycuda' + '\n 2) install NVIDIA CUDA Toolkit (https://developer.nvidia.com/cuda-toolkit)' + '\n 3) have an NVIDIA CUDA-Enabled GPU (https://developer.nvidia.com/cuda-gpus)') + + if has_pycuda(): import pycuda.driver as drv - has_pycuda = True - except ImportError: - logger.warning('pycuda not detected - to use gprMax in GPU mode the pycuda package must be installed, and you must have a NVIDIA CUDA-Enabled GPU (https://developer.nvidia.com/cuda-gpus).') - has_pycuda = False - - if has_pycuda: drv.init() # Check and list any CUDA-Enabled GPUs + deviceIDsavail = [] if drv.Device.count() == 0: - logger.exception('No NVIDIA CUDA-Enabled GPUs detected (https://developer.nvidia.com/cuda-gpus)') - raise ValueError + logger.warning('No NVIDIA CUDA-Enabled GPUs detected!\n' + cuda_reqs) elif 'CUDA_VISIBLE_DEVICES' in os.environ: deviceIDsavail = os.environ.get('CUDA_VISIBLE_DEVICES') deviceIDsavail = [int(s) for s in deviceIDsavail.split(',')] @@ -357,33 +405,88 @@ def detect_cuda_gpus(): deviceIDsavail = range(drv.Device.count()) # Gather information about detected GPUs - gpus = [] for ID in deviceIDsavail: - gpu = GPU() - gpu.get_cuda_gpu_info(drv, ID) - gpus.append(gpu) + gpus[ID] = drv.Device(ID) else: - gpus = None + logger.warning('pycuda not detected!\n' + cuda_reqs) return gpus +def print_cuda_info(devs): + """"Print info about detected CUDA-capable GPU(s). + + Args: + devs: dict of detected pycuda device object(s) where where device ID(s) + are keys. + """"" + + import pycuda + + logger.basic('|--->CUDA:') + logger.debug(f'PyCUDA: {pycuda.VERSION_TEXT}') + + for ID, gpu in devs.items(): + logger.basic(f" |--->Device {ID}: {' '.join(gpu.name.split())} | " + f"{human_size(gpu.total_memory(), a_kilobyte_is_1024_bytes=True)}") + + def detect_opencl(): """Get information about OpenCL platforms and devices. Returns: - gpus (list): Detected GPU(s) object(s). + devs: dict of detected pyopencl device object(s) where where device ID(s) + are keys. """ - try: - import pyopencl as cl - has_pyopencl = True - except ImportError: - logger.warning('pyopencl not detected - to use gprMax with OpenCL, the pyopencl package must be installed, and you must have at least one OpenCL capable platform.') - has_pyopencl = False + devs = {} - if has_pyopencl: - platforms = cl.get_platforms() - platform_names = [p.name for p in platforms] - logger.info(platform_names) + ocl_reqs = ('To use gprMax with OpenCL you must:' + '\n 1) install pyopencl' + '\n 2) install appropriate OpenCL device driver(s)' + '\n 3) have at least one OpenCL-capable platform.') + + if has_pyopencl(): + import pyopencl as cl + try: + i = 0 + for platform in cl.get_platforms(): + for device in platform.get_devices(): + devs[i] = device + i += 1 + except: + logger.warning('No OpenCL-capable platforms detected!\n' + ocl_reqs) + + else: + logger.warning('pyopencl not detected!\n' + ocl_reqs) + + return devs + + +def print_opencl_info(devs): + """"Print info about detected OpenCL-capable device(s). + + Args: + devs: dict of detected pyopencl device object(s) where where device ID(s) + are keys. + """"" + + import pyopencl as cl + + logger.basic('|--->OpenCL:') + logger.debug(f'PyOpenCL: {cl.VERSION_TEXT}') + + for i, (ID, dev) in enumerate(devs.items()): + if i == 0: + platform = dev.platform.name + logger.basic(f' |--->Platform: {platform}') + if not platform == dev.platform.name: + logger.basic(f' |--->Platform: {dev.platform.name}') + types = cl.device_type.to_string(dev.type) + if 'CPU' in types: + type = 'CPU' + if 'GPU' in types: + type = 'GPU' + logger.basic(f" |--->Device {ID}: {type} | {' '.join(dev.name.split())} | " + f"{human_size(dev.global_mem_size, a_kilobyte_is_1024_bytes=True)}") diff --git a/setup.py b/setup.py index c70f4c1a..21d64ec8 100644 --- a/setup.py +++ b/setup.py @@ -50,7 +50,7 @@ def build_dispersive_material_templates(): env = Environment(loader = FileSystemLoader(os.path.join('gprMax', 'cython')), ) - template = env.get_template('fields_updates_dispersive_template') + template = env.get_template('fields_updates_dispersive_template.jinja') # Render dispersive template for different types r = template.render( @@ -159,7 +159,7 @@ if 'cleanall' in sys.argv: shutil.rmtree(p, ignore_errors=True) print(f'Removed: {p}') - # Remove 'gprMax/cython/fields_updates_dispersive.pyx' if its there + # Remove 'gprMax/cython/fields_updates_dispersive.jinja' if its there if os.path.isfile(cython_disp_file): os.remove(cython_disp_file) diff --git a/tools/get_host_spec.py b/tools/get_host_spec.py new file mode 100644 index 00000000..06af0145 --- /dev/null +++ b/tools/get_host_spec.py @@ -0,0 +1,50 @@ +# Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom +# Authors: Craig Warren, Antonis Giannopoulos, and John Hartley +# +# This file is part of gprMax. +# +# gprMax is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# gprMax is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with gprMax. If not, see . + +from gprMax.utilities.host_info import (detect_cuda_gpus, detect_opencl, + get_host_info, print_cuda_info, + print_opencl_info) +from gprMax.utilities.utilities import get_terminal_width, human_size + +# Host machine info. +hostinfo = get_host_info() +hyperthreadingstr = f", {hostinfo['logicalcores']} cores with Hyper-Threading" if hostinfo['hyperthreading'] else '' +hostname = (f"\n=== {hostinfo['hostname']}") +print(f"{hostname} {'=' * (get_terminal_width() - len(hostname) - 1)}") +print(f"\n{'Mfr/model:':<12} {hostinfo['machineID']}") +print(f"{'CPU:':<12} {hostinfo['sockets']} x {hostinfo['cpuID']} ({hostinfo['physicalcores']} cores{hyperthreadingstr})") +print(f"{'RAM:':<12} {human_size(hostinfo['ram'], a_kilobyte_is_1024_bytes=True)}") +print(f"{'OS/Version:':<12} {hostinfo['osversion']}") + +# OpenMP +print("\n\n=== OpenMP capabilities (gprMax will not use Hyper-Threading with OpenMP as there is no performance advantage)\n") +print(f"{'OpenMP threads: '} {hostinfo['physicalcores']}") + +# CUDA +print("\n\n=== CUDA capabilities\n") +gpus = detect_cuda_gpus() +if gpus: + print_cuda_info(gpus) + +# OpenCL +print("\n\n=== OpenCL capabilities\n") +devs = detect_opencl() +if devs: + print_opencl_info(devs) + +print(f"\n{'=' * (get_terminal_width() - 1)}\n")