diff --git a/docs/source/input.rst b/docs/source/input.rst index 47cda5d2..b5e9623b 100644 --- a/docs/source/input.rst +++ b/docs/source/input.rst @@ -904,12 +904,12 @@ For example to save a snapshot of the electromagnetic fields in the model at a s PML commands ============ -The default behaviour is for gprMax to use a first order CFS PML that has a thickness of 10 cells on each of the six sides of the model domain. This can be altered by using the following commands. +The default behaviour for the absorbing boundary conditions (ABC) is first order Complex Frequency Shifted (CFS) Perfectly Matched Layers (PML), with thicknesses of 10 cells on each of the six sides of the model domain. This can be altered by using the following commands. #pml_cells: ------------ -Allows you to control the number of cells of PML that are used on the six sides of the model domain. The PML is defined within the model domain, i.e. it is not added to the domain size. The syntax of the command is: +Allows you to control the number of cells (thickness) of PML that are used on the six sides of the model domain. The PML is defined within the model domain, i.e. it is not added to the domain size. The syntax of the command is: .. code-block:: none @@ -929,6 +929,23 @@ For example to use a PML with 20 cells (thicker than the default 10 cells) on on #pml_cells: 10 10 20 10 10 20 +#pml_formulation: +----------------- + +Allows you to alter the formulation used for the PML. The current options are to use the Higher Order RIPML (HORIPML) - https://doi.org/10.1109/TAP.2011.2180344, or Multipole RIPML (MRIPML) - https://doi.org/10.1109/TAP.2018.2823864. The syntax of the command is: + +.. code-block:: none + + #pml_formulation: str + +* ``str`` can be either 'HORIPML' or 'MRIPML' + +For example to use the Multipole RIPML: + +.. code-block:: none + + #pml_formulation: MRIPML + #pml_cfs: --------- diff --git a/gprMax/geometry_outputs.py b/gprMax/geometry_outputs.py index a8055e19..21269f67 100644 --- a/gprMax/geometry_outputs.py +++ b/gprMax/geometry_outputs.py @@ -74,9 +74,9 @@ class GeometryView(object): self.vtk_nycells = round_value(self.ny / self.dy) self.vtk_nzcells = round_value(self.nz / self.dz) self.vtk_ncells = self.vtk_nxcells * self.vtk_nycells * self.vtk_nzcells - self.datawritesize = (np.dtype(np.uint32).itemsize * self.vtk_ncells + - 2 * np.dtype(np.int8).itemsize * self.vtk_ncells + - 3 * np.dtype(np.uint32).itemsize) + self.datawritesize = (np.dtype(np.uint32).itemsize * self.vtk_ncells + + 2 * np.dtype(np.int8).itemsize * self.vtk_ncells + + 3 * np.dtype(np.uint32).itemsize) elif self.fileext == '.vtp': self.vtk_numpoints = (self.nx + 1) * (self.ny + 1) * (self.nz + 1) @@ -86,23 +86,23 @@ class GeometryView(object): self.vtk_nylines = self.ny * (self.nx + 1) * (self.nz + 1) self.vtk_nzlines = self.nz * (self.nx + 1) * (self.ny + 1) self.vtk_numlines = self.vtk_nxlines + self.vtk_nylines + self.vtk_nzlines - self.vtk_connectivity_offset = round_value((self.vtk_numpoints * - self.vtk_numpoint_components * - np.dtype(np.float32).itemsize) + - np.dtype(np.uint32).itemsize) - self.vtk_offsets_offset = round_value(self.vtk_connectivity_offset + - (self.vtk_numlines * self.vtk_numline_components * np.dtype(np.uint32).itemsize) + - np.dtype(np.uint32).itemsize) - self.vtk_materials_offset = round_value(self.vtk_offsets_offset + - (self.vtk_numlines * np.dtype(np.uint32).itemsize) + - np.dtype(np.uint32).itemsize) - vtk_cell_offsets = ((self.vtk_numline_components * self.vtk_numlines) + - self.vtk_numline_components - self.vtk_numline_components - 1) // self.vtk_numline_components + 1 - self.datawritesize = (np.dtype(np.float32).itemsize * self.vtk_numpoints * self.vtk_numpoint_components + - np.dtype(np.uint32).itemsize * self.vtk_numlines * self.vtk_numline_components + - np.dtype(np.uint32).itemsize * self.vtk_numlines + - np.dtype(np.uint32).itemsize * vtk_cell_offsets + - np.dtype(np.uint32).itemsize * 4) + self.vtk_connectivity_offset = round_value((self.vtk_numpoints + * self.vtk_numpoint_components + * np.dtype(np.float32).itemsize) + + np.dtype(np.uint32).itemsize) + self.vtk_offsets_offset = round_value(self.vtk_connectivity_offset + + (self.vtk_numlines * self.vtk_numline_components * np.dtype(np.uint32).itemsize) + + np.dtype(np.uint32).itemsize) + self.vtk_materials_offset = round_value(self.vtk_offsets_offset + + (self.vtk_numlines * np.dtype(np.uint32).itemsize) + + np.dtype(np.uint32).itemsize) + vtk_cell_offsets = ((self.vtk_numline_components * self.vtk_numlines) + + self.vtk_numline_components - self.vtk_numline_components - 1) // self.vtk_numline_components + 1 + self.datawritesize = (np.dtype(np.float32).itemsize * self.vtk_numpoints * self.vtk_numpoint_components + + np.dtype(np.uint32).itemsize * self.vtk_numlines * self.vtk_numline_components + + np.dtype(np.uint32).itemsize * self.vtk_numlines + + np.dtype(np.uint32).itemsize * vtk_cell_offsets + + np.dtype(np.uint32).itemsize * 4) def set_filename(self, appendmodelnumber, G): """ diff --git a/gprMax/input_cmds_file.py b/gprMax/input_cmds_file.py index 5d38326f..424d4550 100644 --- a/gprMax/input_cmds_file.py +++ b/gprMax/input_cmds_file.py @@ -192,7 +192,7 @@ def check_cmd_names(processedlines, checkessential=True): essentialcmds = ['#domain', '#dx_dy_dz', '#time_window'] # Commands that there should only be one instance of in a model - singlecmds = dict.fromkeys(['#domain', '#dx_dy_dz', '#time_window', '#title', '#messages', '#num_threads', '#time_step_stability_factor', '#pml_cells', '#excitation_file', '#src_steps', '#rx_steps', '#taguchi', '#end_taguchi', '#output_dir'], None) + singlecmds = dict.fromkeys(['#domain', '#dx_dy_dz', '#time_window', '#title', '#messages', '#num_threads', '#time_step_stability_factor', '#pml_formulation', '#pml_cells', '#excitation_file', '#src_steps', '#rx_steps', '#taguchi', '#end_taguchi', '#output_dir'], None) # Commands that there can be multiple instances of in a model - these will be lists within the dictionary multiplecmds = {key: [] for key in ['#geometry_view', '#geometry_objects_write', '#material', '#soil_peplinski', '#add_dispersion_debye', '#add_dispersion_lorentz', '#add_dispersion_drude', '#waveform', '#voltage_source', '#hertzian_dipole', '#magnetic_dipole', '#transmission_line', '#rx', '#rx_array', '#snapshot', '#pml_cfs', '#include_file']} diff --git a/gprMax/input_cmds_multiuse.py b/gprMax/input_cmds_multiuse.py index ceae8f70..3240effc 100644 --- a/gprMax/input_cmds_multiuse.py +++ b/gprMax/input_cmds_multiuse.py @@ -815,8 +815,8 @@ def process_multicmds(multicmds, G): if tmp[1] not in CFSParameter.scalingdirections or tmp[5] not in CFSParameter.scalingdirections or tmp[9] not in CFSParameter.scalingdirections: raise CmdInputError("'" + cmdname + ': ' + ' '.join(tmp) + "'" + ' must have scaling type {}'.format(','.join(CFSParameter.scalingdirections))) if float(tmp[2]) < 0 or float(tmp[3]) < 0 or float(tmp[6]) < 0 or float(tmp[7]) < 0 or float(tmp[10]) < 0: - raise CmdInputError("'" + cmdname + ': ' + ' '.join(tmp) + "'" + ' minimum and maximum scaling values must be greater than zero') - if float(tmp[6]) < 1: + raise CmdInputError("'" + cmdname + ': ' + ' '.join(tmp) + "'" + ' minimum and maximum scaling values must be positive') + if float(tmp[6]) < 1 and G.pmlformulation == 'HORIPML': raise CmdInputError("'" + cmdname + ': ' + ' '.join(tmp) + "'" + ' minimum scaling value for kappa must be greater than or equal to one') cfsalpha = CFSParameter() diff --git a/gprMax/input_cmds_singleuse.py b/gprMax/input_cmds_singleuse.py index 8b163dd9..adba1a30 100644 --- a/gprMax/input_cmds_singleuse.py +++ b/gprMax/input_cmds_singleuse.py @@ -32,6 +32,7 @@ from gprMax.constants import c from gprMax.constants import floattype from gprMax.exceptions import CmdInputError from gprMax.exceptions import GeneralError +from gprMax.pml import PML from gprMax.utilities import get_host_info from gprMax.utilities import human_size from gprMax.utilities import round_value @@ -205,7 +206,7 @@ def process_singlecmds(singlecmds, G): if G.messages: print('Time window: {:g} secs ({} iterations)'.format(G.timewindow, G.iterations)) - # PML + # PML cells cmd = '#pml_cells' if singlecmds[cmd] is not None: tmp = singlecmds[cmd].split() @@ -224,6 +225,17 @@ def process_singlecmds(singlecmds, G): if 2 * G.pmlthickness['x0'] >= G.nx or 2 * G.pmlthickness['y0'] >= G.ny or 2 * G.pmlthickness['z0'] >= G.nz or 2 * G.pmlthickness['xmax'] >= G.nx or 2 * G.pmlthickness['ymax'] >= G.ny or 2 * G.pmlthickness['zmax'] >= G.nz: raise CmdInputError(cmd + ' has too many cells for the domain size') + # PML formulation + cmd = '#pml_formulation' + if singlecmds[cmd] is not None: + tmp = singlecmds[cmd].split() + if len(tmp) != 1: + raise CmdInputError(cmd + ' requires exactly one parameter') + if singlecmds[cmd].upper() in PML.formulations: + G.pmlformulation = singlecmds[cmd].upper() + else: + raise CmdInputError(cmd + ' PML formulation is not found') + # src_steps cmd = '#src_steps' if singlecmds[cmd] is not None: diff --git a/gprMax/model_build_run.py b/gprMax/model_build_run.py index 4c4ba19c..5a560345 100644 --- a/gprMax/model_build_run.py +++ b/gprMax/model_build_run.py @@ -63,7 +63,8 @@ from gprMax.materials import process_materials from gprMax.pml import CFS from gprMax.pml import PML from gprMax.pml import build_pmls -from gprMax.pml_updates_gpu import kernels_template_pml +from gprMax.pml_updates.pml_updates_electric_HORIPML_gpu import kernels_template_pml_electric_HORIPML +from gprMax.pml_updates.pml_updates_magnetic_HORIPML_gpu import kernels_template_pml_magnetic_HORIPML from gprMax.receivers import gpu_initialise_rx_arrays from gprMax.receivers import gpu_get_rx_array from gprMax.snapshots import Snapshot @@ -528,17 +529,18 @@ def solve_gpu(currentmodelrun, modelend, G): # PML updates if G.pmls: # Prepare kernels - kernels_pml = SourceModule(kernels_template_pml.substitute(REAL=cudafloattype, N_updatecoeffsE=G.updatecoeffsE.size, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NY_R=G.pmls[0].ERA.shape[1], NX_FIELDS=G.Ex.shape[0], NY_FIELDS=G.Ex.shape[1], NZ_FIELDS=G.Ex.shape[2], NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3]), options=compiler_opts) + kernels_pml_electric = SourceModule(kernels_template_pml_electric_HORIPML.substitute(REAL=cudafloattype, N_updatecoeffsE=G.updatecoeffsE.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NY_R=G.pmls[0].ERA.shape[1], NX_FIELDS=G.Ex.shape[0], NY_FIELDS=G.Ex.shape[1], NZ_FIELDS=G.Ex.shape[2], NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3]), options=compiler_opts) + kernels_pml_magnetic = SourceModule(kernels_template_pml_magnetic_HORIPML.substitute(REAL=cudafloattype, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NY_R=G.pmls[0].ERA.shape[1], NX_FIELDS=G.Ex.shape[0], NY_FIELDS=G.Ex.shape[1], NZ_FIELDS=G.Ex.shape[2], NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3]), options=compiler_opts) # Copy material coefficient arrays to constant memory of GPU (must be <64KB) for PML kernels - updatecoeffsE = kernels_pml.get_global('updatecoeffsE')[0] - updatecoeffsH = kernels_pml.get_global('updatecoeffsH')[0] + updatecoeffsE = kernels_pml_electric.get_global('updatecoeffsE')[0] + updatecoeffsH = kernels_pml_magnetic.get_global('updatecoeffsH')[0] drv.memcpy_htod(updatecoeffsE, G.updatecoeffsE) drv.memcpy_htod(updatecoeffsH, G.updatecoeffsH) # Set block per grid, initialise arrays on GPU, and get kernel functions for pml in G.pmls: pml.gpu_set_blocks_per_grid(G) pml.gpu_initialise_arrays() - pml.gpu_get_update_funcs(kernels_pml) + pml.gpu_get_update_funcs(kernels_pml_electric, kernels_pml_magnetic) # Receivers if G.rxs: diff --git a/gprMax/pml.py b/gprMax/pml.py index 8f99fa7b..c647c4c6 100644 --- a/gprMax/pml.py +++ b/gprMax/pml.py @@ -24,13 +24,14 @@ from tqdm import tqdm from gprMax.constants import e0 from gprMax.constants import z0 from gprMax.constants import floattype +from gprMax.exceptions import GeneralError class CFSParameter(object): """Individual CFS parameter (e.g. alpha, kappa, or sigma).""" # Allowable scaling profiles and directions - scalingprofiles = {'constant': 0, 'linear': 1, 'quadratic': 2, 'cubic': 3, 'quartic': 4, 'quintic': 5, 'sextic': 6} + scalingprofiles = {'constant': 0, 'linear': 1, 'quadratic': 2, 'cubic': 3, 'quartic': 4, 'quintic': 5, 'sextic': 6, 'septic': 7, 'octic': 8} scalingdirections = ['forward', 'reverse'] def __init__(self, ID=None, scaling='polynomial', scalingprofile=None, scalingdirection='forward', min=0, max=0): @@ -112,8 +113,9 @@ class CFS(object): Hvalues (float): numpy array holding profile value for magnetic PML update. """ - Evalues = np.zeros(thickness, dtype=floattype) - Hvalues = np.zeros(thickness, dtype=floattype) + # Extra cell of thickness added to allow correct scaling of electric and magnetic values + Evalues = np.zeros(thickness + 1, dtype=floattype) + Hvalues = np.zeros(thickness + 1, dtype=floattype) if parameter.scalingprofile == 'constant': Evalues += parameter.max @@ -134,12 +136,23 @@ class CFS(object): if parameter.scalingdirection == 'reverse': Evalues = Evalues[::-1] Hvalues = Hvalues[::-1] + # Magnetic values must be shifted one element to the left after reversal + Hvalues = np.roll(Hvalues, -1) + + # Extra cell of thickness not required and therefore removed after scaling + Evalues = Evalues[:-1] + Hvalues = Hvalues[:-1] return Evalues, Hvalues class PML(object): - """PML - the implementation comes from the derivation in: http://dx.doi.org/10.1109/TAP.2011.2180344""" + """Perfectly Matched Layer (PML) Absorbing Boundary Conditions (ABC)""" + + # Available PML formulations: + # Higher Order RIPML (HORIPML) see: https://doi.org/10.1109/TAP.2011.2180344 + # Multipole RIPML (MRIPML) see: https://doi.org/10.1109/TAP.2018.2823864 + formulations = ['HORIPML', 'MRIPML'] # PML slabs IDs at boundaries of domain. boundaryIDs = ['x0', 'y0', 'z0', 'xmax', 'ymax', 'zmax'] @@ -170,21 +183,18 @@ class PML(object): self.ny = yf - ys self.nz = zf - zs - # Spatial discretisation and thickness (one extra cell of thickness - # required for interpolation of electric and magnetic scaling values) + # Spatial discretisation and thickness if self.direction[0] == 'x': self.d = G.dx - self.thickness = self.nx + 1 + self.thickness = self.nx elif self.direction[0] == 'y': self.d = G.dy - self.thickness = self.ny + 1 + self.thickness = self.ny elif self.direction[0] == 'z': self.d = G.dz - self.thickness = self.nz + 1 + self.thickness = self.nz self.CFS = G.cfs - if not self.CFS: - self.CFS = [CFS()] self.initialise_field_arrays() @@ -232,19 +242,35 @@ class PML(object): Ekappa, Hkappa = cfs.calculate_values(self.thickness, cfs.kappa) Esigma, Hsigma = cfs.calculate_values(self.thickness, cfs.sigma) - # Electric PML update coefficients - tmp = (2 * e0 * Ekappa) + G.dt * (Ealpha * Ekappa + Esigma) - self.ERA[x, :] = (2 * e0 + G.dt * Ealpha) / tmp - self.ERB[x, :] = (2 * e0 * Ekappa) / tmp - self.ERE[x, :] = ((2 * e0 * Ekappa) - G.dt * (Ealpha * Ekappa + Esigma)) / tmp - self.ERF[x, :] = (2 * Esigma * G.dt) / (Ekappa * tmp) + # Define different parameters depending on PML formulation + if G.pmlformulation == 'HORIPML': + # HORIPML electric update coefficients + tmp = (2 * e0 * Ekappa) + G.dt * (Ealpha * Ekappa + Esigma) + self.ERA[x, :] = (2 * e0 + G.dt * Ealpha) / tmp + self.ERB[x, :] = (2 * e0 * Ekappa) / tmp + self.ERE[x, :] = ((2 * e0 * Ekappa) - G.dt * (Ealpha * Ekappa + Esigma)) / tmp + self.ERF[x, :] = (2 * Esigma * G.dt) / (Ekappa * tmp) - # Magnetic PML update coefficients - tmp = (2 * e0 * Hkappa) + G.dt * (Halpha * Hkappa + Hsigma) - self.HRA[x, :] = (2 * e0 + G.dt * Halpha) / tmp - self.HRB[x, :] = (2 * e0 * Hkappa) / tmp - self.HRE[x, :] = ((2 * e0 * Hkappa) - G.dt * (Halpha * Hkappa + Hsigma)) / tmp - self.HRF[x, :] = (2 * Hsigma * G.dt) / (Hkappa * tmp) + # HORIPML magnetic update coefficients + tmp = (2 * e0 * Hkappa) + G.dt * (Halpha * Hkappa + Hsigma) + self.HRA[x, :] = (2 * e0 + G.dt * Halpha) / tmp + self.HRB[x, :] = (2 * e0 * Hkappa) / tmp + self.HRE[x, :] = ((2 * e0 * Hkappa) - G.dt * (Halpha * Hkappa + Hsigma)) / tmp + self.HRF[x, :] = (2 * Hsigma * G.dt) / (Hkappa * tmp) + + elif G.pmlformulation == 'MRIPML': + tmp = 2 * e0 + G.dt * Ealpha + self.ERA[x, :] = Ekappa + (G.dt * Esigma) / tmp + self.ERB[x, :] = (2 * e0) / tmp + self.ERE[x, :] = ((2 * e0) - G.dt * Ealpha) / tmp + self.ERF[x, :] = (2 * Esigma * G.dt) / tmp + + # MRIPML magnetic update coefficients + tmp = 2 * e0 + G.dt * Halpha + self.HRA[x, :] = Hkappa + (G.dt * Hsigma) / tmp + self.HRB[x, :] = (2 * e0) / tmp + self.HRE[x, :] = ((2 * e0) - G.dt * Halpha) / tmp + self.HRF[x, :] = (2 * Hsigma * G.dt) / tmp def update_electric(self, G): """This functions updates electric field components with the PML correction. @@ -253,7 +279,8 @@ class PML(object): G (class): Grid class instance - holds essential parameters describing the model. """ - func = getattr(import_module('gprMax.pml_updates_ext'), 'update_pml_' + str(len(self.CFS)) + 'order_electric_' + self.direction) + pmlmodule = 'gprMax.pml_updates.pml_updates_electric_' + G.pmlformulation + '_ext' + func = getattr(import_module(pmlmodule), 'order' + str(len(self.CFS)) + '_' + self.direction) func(self.xs, self.xf, self.ys, self.yf, self.zs, self.zf, G.nthreads, G.updatecoeffsE, G.ID, G.Ex, G.Ey, G.Ez, G.Hx, G.Hy, G.Hz, self.EPhi1, self.EPhi2, self.ERA, self.ERB, self.ERE, self.ERF, self.d) def update_magnetic(self, G): @@ -263,7 +290,8 @@ class PML(object): G (class): Grid class instance - holds essential parameters describing the model. """ - func = getattr(import_module('gprMax.pml_updates_ext'), 'update_pml_' + str(len(self.CFS)) + 'order_magnetic_' + self.direction) + pmlmodule = 'gprMax.pml_updates.pml_updates_magnetic_' + G.pmlformulation + '_ext' + func = getattr(import_module(pmlmodule), 'order' + str(len(self.CFS)) + '_' + self.direction) func(self.xs, self.xf, self.ys, self.yf, self.zs, self.zf, G.nthreads, G.updatecoeffsH, G.ID, G.Ex, G.Ey, G.Ez, G.Hx, G.Hy, G.Hz, self.HPhi1, self.HPhi2, self.HRA, self.HRB, self.HRE, self.HRF, self.d) def gpu_set_blocks_per_grid(self, G): @@ -293,17 +321,18 @@ class PML(object): self.HRE_gpu = gpuarray.to_gpu(self.HRE) self.HRF_gpu = gpuarray.to_gpu(self.HRF) - def gpu_get_update_funcs(self, kernels): + def gpu_get_update_funcs(self, kernelselectric, kernelsmagnetic): """Get update functions from PML kernels. Args: - kernels: PyCuda SourceModule containing PML kernels. + kernelselectric: PyCuda SourceModule containing PML kernels for electric updates. + kernelsmagnetic: PyCuda SourceModule containing PML kernels for magnetic updates. """ from pycuda.compiler import SourceModule - self.update_electric_gpu = kernels.get_function('update_pml_' + str(len(self.CFS)) + 'order_electric_' + self.direction) - self.update_magnetic_gpu = kernels.get_function('update_pml_' + str(len(self.CFS)) + 'order_magnetic_' + self.direction) + self.update_electric_gpu = kernelselectric.get_function('order' + str(len(self.CFS)) + '_' + self.direction) + self.update_magnetic_gpu = kernelsmagnetic.get_function('order' + str(len(self.CFS)) + '_' + self.direction) def gpu_update_electric(self, G): """This functions updates electric field components with the PML correction on the GPU. diff --git a/gprMax/pml_updates/__init__.py b/gprMax/pml_updates/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/gprMax/pml_updates/pml_updates_electric_HORIPML_ext.pyx b/gprMax/pml_updates/pml_updates_electric_HORIPML_ext.pyx new file mode 100644 index 00000000..b91dfbf4 --- /dev/null +++ b/gprMax/pml_updates/pml_updates_electric_HORIPML_ext.pyx @@ -0,0 +1,827 @@ +# Copyright (C) 2015-2019: The University of Edinburgh +# Authors: Craig Warren and Antonis Giannopoulos +# +# This file is part of gprMax. +# +# gprMax is free software: you can redistribute it and/or modify +# it under the terms of the GNU GenRAl Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# gprMax is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU GenRAl Public License for more details. +# +# You should have received a copy of the GNU GenRAl Public License +# along with gprMax. If not, see . + +import numpy as np +cimport numpy as np +from cython.parallel import prange + +from gprMax.constants cimport floattype_t + + +cpdef void order1_xminus( + int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int nthreads, + floattype_t[:, ::1] updatecoeffsE, + np.uint32_t[:, :, :, ::1] ID, + floattype_t[:, :, ::1] Ex, + floattype_t[:, :, ::1] Ey, + floattype_t[:, :, ::1] Ez, + floattype_t[:, :, ::1] Hx, + floattype_t[:, :, ::1] Hy, + floattype_t[:, :, ::1] Hz, + floattype_t[:, :, :, ::1] Phi1, + floattype_t[:, :, :, ::1] Phi2, + floattype_t[:, ::1] RA, + floattype_t[:, ::1] RB, + floattype_t[:, ::1] RE, + floattype_t[:, ::1] RF, + float d + ): + """This function updates the Ey and Ez field components for the xminus slab. + + Args: + xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box + nthreads (int): Number of threads to use + updatecoeffs, ID, E, H (memoryviews): Access to update coefficients, ID and field component arrays + EPhi, RA, RB, RE, RF (memoryviews): Access to PML coefficient arrays + d (float): Spatial discretisation, e.g. dx, dy or dz + """ + + cdef Py_ssize_t i, j, k, ii, jj, kk + cdef int nx, ny, nz, materialEy, materialEz + cdef floattype_t dx, dHy, dHz, RA01, RB0, RE0, RF0 + dx = d + nx = xf - xs + ny = yf - ys + nz = zf - zs + + for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): + RA01 = RA[0, i] - 1 + RB0 = RB[0, i] + RE0 = RE[0, i] + RF0 = RF[0, i] + ii = xf - i + for j in range(0, ny): + jj = j + ys + for k in range(0, nz): + kk = k + zs + # Ey + materialEy = ID[1, ii, jj, kk] + dHz = (Hz[ii, jj, kk] - Hz[ii - 1, jj, kk]) / dx + Ey[ii, jj, kk] = Ey[ii, jj, kk] - updatecoeffsE[materialEy, 4] * (RA01 * dHz + RB0 * Phi1[0, i, j, k]) + Phi1[0, i, j, k] = RE0 * Phi1[0, i, j, k] - RF0 * dHz + # Ez + materialEz = ID[2, ii, jj, kk] + dHy = (Hy[ii, jj, kk] - Hy[ii - 1, jj, kk]) / dx + Ez[ii, jj, kk] = Ez[ii, jj, kk] + updatecoeffsE[materialEz, 4] * (RA01 * dHy + RB0 * Phi2[0, i, j, k]) + Phi2[0, i, j, k] = RE0 * Phi2[0, i, j, k] - RF0 * dHy + +cpdef void order2_xminus( + int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int nthreads, + floattype_t[:, ::1] updatecoeffsE, + np.uint32_t[:, :, :, ::1] ID, + floattype_t[:, :, ::1] Ex, + floattype_t[:, :, ::1] Ey, + floattype_t[:, :, ::1] Ez, + floattype_t[:, :, ::1] Hx, + floattype_t[:, :, ::1] Hy, + floattype_t[:, :, ::1] Hz, + floattype_t[:, :, :, ::1] Phi1, + floattype_t[:, :, :, ::1] Phi2, + floattype_t[:, ::1] RA, + floattype_t[:, ::1] RB, + floattype_t[:, ::1] RE, + floattype_t[:, ::1] RF, + float d + ): + """This function updates the Ey and Ez field components for the xminus slab. + + Args: + xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box + nthreads (int): Number of threads to use + updatecoeffs, ID, E, H (memoryviews): Access to update coefficients, ID and field component arrays + EPhi, RA, RB, RE, RF (memoryviews): Access to PML coefficient arrays + d (float): Spatial discretisation, e.g. dx, dy or dz + """ + + cdef Py_ssize_t i, j, k, ii, jj, kk + cdef int nx, ny, nz, materialEy, materialEz + cdef floattype_t dx, dHy, dHz, RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01 + dx = d + nx = xf - xs + ny = yf - ys + nz = zf - zs + + for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): + RA0 = RA[0, i] + RB0 = RB[0, i] + RE0 = RE[0, i] + RF0 = RF[0, i] + RA1 = RA[1, i] + RB1 = RB[1, i] + RE1 = RE[1, i] + RF1 = RF[1, i] + RA01 = RA[0, i] * RA[1, i] - 1 + ii = xf - i + for j in range(0, ny): + jj = j + ys + for k in range(0, nz): + kk = k + zs + # Ey + materialEy = ID[1, ii, jj, kk] + dHz = (Hz[ii, jj, kk] - Hz[ii - 1, jj, kk]) / dx + Ey[ii, jj, kk] = Ey[ii, jj, kk] - updatecoeffsE[materialEy, 4] * (RA01 * dHz + RA1 * RB0 * Phi1[0, i, j, k] + RB1 * Phi1[1, i, j, k]) + Phi1[1, i, j, k] = RE1 * Phi1[1, i, j, k] - RF1 * (RA0 * dHz + RB0 * Phi1[0, i, j, k]) + Phi1[0, i, j, k] = RE0 * Phi1[0, i, j, k] - RF0 * dHz + # Ez + materialEz = ID[2, ii, jj, kk] + dHy = (Hy[ii, jj, kk] - Hy[ii - 1, jj, kk]) / dx + Ez[ii, jj, kk] = Ez[ii, jj, kk] + updatecoeffsE[materialEz, 4] * (RA01 * dHy + RA1 * RB0 * Phi2[0, i, j, k] + RB1 * Phi2[1, i, j, k]) + Phi2[1, i, j, k] = RE1 * Phi2[1, i, j, k] - RF1 * (RA0 * dHy + RB0 * Phi2[0, i, j, k]) + Phi2[0, i, j, k] = RE0 * Phi2[0, i, j, k] - RF0 * dHy + + +cpdef void order1_xplus( + int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int nthreads, + floattype_t[:, ::1] updatecoeffsE, + np.uint32_t[:, :, :, ::1] ID, + floattype_t[:, :, ::1] Ex, + floattype_t[:, :, ::1] Ey, + floattype_t[:, :, ::1] Ez, + floattype_t[:, :, ::1] Hx, + floattype_t[:, :, ::1] Hy, + floattype_t[:, :, ::1] Hz, + floattype_t[:, :, :, ::1] Phi1, + floattype_t[:, :, :, ::1] Phi2, + floattype_t[:, ::1] RA, + floattype_t[:, ::1] RB, + floattype_t[:, ::1] RE, + floattype_t[:, ::1] RF, + float d + ): + """This function updates the Ey and Ez field components for the xplus slab. + + Args: + xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box + nthreads (int): Number of threads to use + updatecoeffs, ID, E, H (memoryviews): Access to update coefficients, ID and field component arrays + EPhi, RA, RB, RE, RF (memoryviews): Access to PML coefficient arrays + d (float): Spatial discretisation, e.g. dx, dy or dz + """ + + cdef Py_ssize_t i, j, k, ii, jj, kk + cdef int nx, ny, nz, materialEy, materialEz + cdef floattype_t dx, dHy, dHz, RA01, RB0, RE0, RF0 + dx = d + nx = xf - xs + ny = yf - ys + nz = zf - zs + + for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): + RA01 = RA[0, i] - 1 + RB0 = RB[0, i] + RE0 = RE[0, i] + RF0 = RF[0, i] + ii = i + xs + for j in range(0, ny): + jj = j + ys + for k in range(0, nz): + kk = k + zs + # Ey + materialEy = ID[1, ii, jj, kk] + dHz = (Hz[ii, jj, kk] - Hz[ii - 1, jj, kk]) / dx + Ey[ii, jj, kk] = Ey[ii, jj, kk] - updatecoeffsE[materialEy, 4] * (RA01 * dHz + RB0 * Phi1[0, i, j, k]) + Phi1[0, i, j, k] = RE0 * Phi1[0, i, j, k] - RF0 * dHz + # Ez + materialEz = ID[2, ii, jj, kk] + dHy = (Hy[ii, jj, kk] - Hy[ii - 1, jj, kk]) / dx + Ez[ii, jj, kk] = Ez[ii, jj, kk] + updatecoeffsE[materialEz, 4] * (RA01 * dHy + RB0 * Phi2[0, i, j, k]) + Phi2[0, i, j, k] = RE0 * Phi2[0, i, j, k] - RF0 * dHy + +cpdef void order2_xplus( + int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int nthreads, + floattype_t[:, ::1] updatecoeffsE, + np.uint32_t[:, :, :, ::1] ID, + floattype_t[:, :, ::1] Ex, + floattype_t[:, :, ::1] Ey, + floattype_t[:, :, ::1] Ez, + floattype_t[:, :, ::1] Hx, + floattype_t[:, :, ::1] Hy, + floattype_t[:, :, ::1] Hz, + floattype_t[:, :, :, ::1] Phi1, + floattype_t[:, :, :, ::1] Phi2, + floattype_t[:, ::1] RA, + floattype_t[:, ::1] RB, + floattype_t[:, ::1] RE, + floattype_t[:, ::1] RF, + float d + ): + """This function updates the Ey and Ez field components for the xplus slab. + + Args: + xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box + nthreads (int): Number of threads to use + updatecoeffs, ID, E, H (memoryviews): Access to update coefficients, ID and field component arrays + EPhi, RA, RB, RE, RF (memoryviews): Access to PML coefficient arrays + d (float): Spatial discretisation, e.g. dx, dy or dz + """ + + cdef Py_ssize_t i, j, k, ii, jj, kk + cdef int nx, ny, nz, materialEy, materialEz + cdef floattype_t dx, dHy, dHz, RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01 + dx = d + nx = xf - xs + ny = yf - ys + nz = zf - zs + + for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): + RA0 = RA[0, i] + RB0 = RB[0, i] + RE0 = RE[0, i] + RF0 = RF[0, i] + RA1 = RA[1, i] + RB1 = RB[1, i] + RE1 = RE[1, i] + RF1 = RF[1, i] + RA01 = RA[0, i] * RA[1, i] - 1 + ii = i + xs + for j in range(0, ny): + jj = j + ys + for k in range(0, nz): + kk = k + zs + # Ey + materialEy = ID[1, ii, jj, kk] + dHz = (Hz[ii, jj, kk] - Hz[ii - 1, jj, kk]) / dx + Ey[ii, jj, kk] = Ey[ii, jj, kk] - updatecoeffsE[materialEy, 4] * (RA01 * dHz + RA1 * RB0 * Phi1[0, i, j, k] + RB1 * Phi1[1, i, j, k]) + Phi1[1, i, j, k] = RE1 * Phi1[1, i, j, k] - RF1 * (RA0 * dHz + RB0 * Phi1[0, i, j, k]) + Phi1[0, i, j, k] = RE0 * Phi1[0, i, j, k] - RF0 * dHz + # Ez + materialEz = ID[2, ii, jj, kk] + dHy = (Hy[ii, jj, kk] - Hy[ii - 1, jj, kk]) / dx + Ez[ii, jj, kk] = Ez[ii, jj, kk] + updatecoeffsE[materialEz, 4] * (RA01 * dHy + RA1 * RB0 * Phi2[0, i, j, k] + RB1 * Phi2[1, i, j, k]) + Phi2[1, i, j, k] = RE1 * Phi2[1, i, j, k] - RF1 * (RA0 * dHy + RB0 * Phi2[0, i, j, k]) + Phi2[0, i, j, k] = RE0 * Phi2[0, i, j, k] - RF0 * dHy + + +cpdef void order1_yminus( + int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int nthreads, + floattype_t[:, ::1] updatecoeffsE, + np.uint32_t[:, :, :, ::1] ID, + floattype_t[:, :, ::1] Ex, + floattype_t[:, :, ::1] Ey, + floattype_t[:, :, ::1] Ez, + floattype_t[:, :, ::1] Hx, + floattype_t[:, :, ::1] Hy, + floattype_t[:, :, ::1] Hz, + floattype_t[:, :, :, ::1] Phi1, + floattype_t[:, :, :, ::1] Phi2, + floattype_t[:, ::1] RA, + floattype_t[:, ::1] RB, + floattype_t[:, ::1] RE, + floattype_t[:, ::1] RF, + float d + ): + """This function updates the Ex and Ez field components for the yminus slab. + + Args: + xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box + nthreads (int): Number of threads to use + updatecoeffs, ID, E, H (memoryviews): Access to update coefficients, ID and field component arrays + EPhi, RA, RB, RE, RF (memoryviews): Access to PML coefficient arrays + d (float): Spatial discretisation, e.g. dx, dy or dz + """ + + cdef Py_ssize_t i, j, k, ii, jj, kk + cdef int nx, ny, nz, materialEx, materialEz + cdef floattype_t dy, dHx, dHz, RA01, RB0, RE0, RF0 + dy = d + nx = xf - xs + ny = yf - ys + nz = zf - zs + + for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): + ii = i + xs + for j in range(0, ny): + jj = yf - j + RA01 = RA[0, j] - 1 + RB0 = RB[0, j] + RE0 = RE[0, j] + RF0 = RF[0, j] + for k in range(0, nz): + kk = k + zs + # Ex + materialEx = ID[0, ii, jj, kk] + dHz = (Hz[ii, jj, kk] - Hz[ii, jj - 1, kk]) / dy + Ex[ii, jj, kk] = Ex[ii, jj, kk] + updatecoeffsE[materialEx, 4] * (RA01 * dHz + RB0 * Phi1[0, i, j, k]) + Phi1[0, i, j, k] = RE0 * Phi1[0, i, j, k] - RF0 * dHz + # Ez + materialEz = ID[2, ii, jj, kk] + dHx = (Hx[ii, jj, kk] - Hx[ii, jj - 1, kk]) / dy + Ez[ii, jj, kk] = Ez[ii, jj, kk] - updatecoeffsE[materialEz, 4] * (RA01 * dHx + RB0 * Phi2[0, i, j, k]) + Phi2[0, i, j, k] = RE0 * Phi2[0, i, j, k] - RF0 * dHx + +cpdef void order2_yminus( + int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int nthreads, + floattype_t[:, ::1] updatecoeffsE, + np.uint32_t[:, :, :, ::1] ID, + floattype_t[:, :, ::1] Ex, + floattype_t[:, :, ::1] Ey, + floattype_t[:, :, ::1] Ez, + floattype_t[:, :, ::1] Hx, + floattype_t[:, :, ::1] Hy, + floattype_t[:, :, ::1] Hz, + floattype_t[:, :, :, ::1] Phi1, + floattype_t[:, :, :, ::1] Phi2, + floattype_t[:, ::1] RA, + floattype_t[:, ::1] RB, + floattype_t[:, ::1] RE, + floattype_t[:, ::1] RF, + float d + ): + """This function updates the Ex and Ez field components for the yminus slab. + + Args: + xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box + nthreads (int): Number of threads to use + updatecoeffs, ID, E, H (memoryviews): Access to update coefficients, ID and field component arrays + EPhi, RA, RB, RE, RF (memoryviews): Access to PML coefficient arrays + d (float): Spatial discretisation, e.g. dx, dy or dz + """ + + cdef Py_ssize_t i, j, k, ii, jj, kk + cdef int nx, ny, nz, materialEx, materialEz + cdef floattype_t dy, dHx, dHz, RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01 + dy = d + nx = xf - xs + ny = yf - ys + nz = zf - zs + + for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): + ii = i + xs + for j in range(0, ny): + jj = yf - j + RA0 = RA[0, j] + RB0 = RB[0, j] + RE0 = RE[0, j] + RF0 = RF[0, j] + RA1 = RA[1, j] + RB1 = RB[1, j] + RE1 = RE[1, j] + RF1 = RF[1, j] + RA01 = RA[0, j] * RA[1, j] - 1 + for k in range(0, nz): + kk = k + zs + # Ex + materialEx = ID[0, ii, jj, kk] + dHz = (Hz[ii, jj, kk] - Hz[ii, jj - 1, kk]) / dy + Ex[ii, jj, kk] = Ex[ii, jj, kk] + updatecoeffsE[materialEx, 4] * (RA01 * dHz + RA1 * RB0 * Phi1[0, i, j, k] + RB1 * Phi1[1, i, j, k]) + Phi1[1, i, j, k] = RE1 * Phi1[1, i, j, k] - RF1 * (RA0 * dHz + RB0 * Phi1[0, i, j, k]) + Phi1[0, i, j, k] = RE0 * Phi1[0, i, j, k] - RF0 * dHz + # Ez + materialEz = ID[2, ii, jj, kk] + dHx = (Hx[ii, jj, kk] - Hx[ii, jj - 1, kk]) / dy + Ez[ii, jj, kk] = Ez[ii, jj, kk] - updatecoeffsE[materialEz, 4] * (RA01 * dHx + RA1 * RB0 * Phi2[0, i, j, k] + RB1 * Phi2[1, i, j, k]) + Phi2[1, i, j, k] = RE1 * Phi2[1, i, j, k] - RF1 * (RA0 * dHx + RB0 * Phi2[0, i, j, k]) + Phi2[0, i, j, k] = RE0 * Phi2[0, i, j, k] - RF0 * dHx + + +cpdef void order1_yplus( + int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int nthreads, + floattype_t[:, ::1] updatecoeffsE, + np.uint32_t[:, :, :, ::1] ID, + floattype_t[:, :, ::1] Ex, + floattype_t[:, :, ::1] Ey, + floattype_t[:, :, ::1] Ez, + floattype_t[:, :, ::1] Hx, + floattype_t[:, :, ::1] Hy, + floattype_t[:, :, ::1] Hz, + floattype_t[:, :, :, ::1] Phi1, + floattype_t[:, :, :, ::1] Phi2, + floattype_t[:, ::1] RA, + floattype_t[:, ::1] RB, + floattype_t[:, ::1] RE, + floattype_t[:, ::1] RF, + float d + ): + """This function updates the Ex and Ez field components for the yplus slab. + + Args: + xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box + nthreads (int): Number of threads to use + updatecoeffs, ID, E, H (memoryviews): Access to update coefficients, ID and field component arrays + EPhi, RA, RB, RE, RF (memoryviews): Access to PML coefficient arrays + d (float): Spatial discretisation, e.g. dx, dy or dz + """ + + cdef Py_ssize_t i, j, k, ii, jj, kk + cdef int nx, ny, nz, materialEx, materialEz + cdef floattype_t dy, dHx, dHz, RA01, RB0, RE0, RF0 + dy = d + nx = xf - xs + ny = yf - ys + nz = zf - zs + + for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): + ii = i + xs + for j in range(0, ny): + jj = j + ys + RA01 = RA[0, j] - 1 + RB0 = RB[0, j] + RE0 = RE[0, j] + RF0 = RF[0, j] + for k in range(0, nz): + kk = k + zs + # Ex + materialEx = ID[0, ii, jj, kk] + dHz = (Hz[ii, jj, kk] - Hz[ii, jj - 1, kk]) / dy + Ex[ii, jj, kk] = Ex[ii, jj, kk] + updatecoeffsE[materialEx, 4] * (RA01 * dHz + RB0 * Phi1[0, i, j, k]) + Phi1[0, i, j, k] = RE0 * Phi1[0, i, j, k] - RF0 * dHz + # Ez + materialEz = ID[2, ii, jj, kk] + dHx = (Hx[ii, jj, kk] - Hx[ii, jj - 1, kk]) / dy + Ez[ii, jj, kk] = Ez[ii, jj, kk] - updatecoeffsE[materialEz, 4] * (RA01 * dHx + RB0 * Phi2[0, i, j, k]) + Phi2[0, i, j, k] = RE0 * Phi2[0, i, j, k] - RF0 * dHx + +cpdef void order2_yplus( + int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int nthreads, + floattype_t[:, ::1] updatecoeffsE, + np.uint32_t[:, :, :, ::1] ID, + floattype_t[:, :, ::1] Ex, + floattype_t[:, :, ::1] Ey, + floattype_t[:, :, ::1] Ez, + floattype_t[:, :, ::1] Hx, + floattype_t[:, :, ::1] Hy, + floattype_t[:, :, ::1] Hz, + floattype_t[:, :, :, ::1] Phi1, + floattype_t[:, :, :, ::1] Phi2, + floattype_t[:, ::1] RA, + floattype_t[:, ::1] RB, + floattype_t[:, ::1] RE, + floattype_t[:, ::1] RF, + float d + ): + """This function updates the Ex and Ez field components for the yplus slab. + + Args: + xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box + nthreads (int): Number of threads to use + updatecoeffs, ID, E, H (memoryviews): Access to update coefficients, ID and field component arrays + EPhi, RA, RB, RE, RF (memoryviews): Access to PML coefficient arrays + d (float): Spatial discretisation, e.g. dx, dy or dz + """ + + cdef Py_ssize_t i, j, k, ii, jj, kk + cdef int nx, ny, nz, materialEx, materialEz + cdef floattype_t dy, dHx, dHz, RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01 + dy = d + nx = xf - xs + ny = yf - ys + nz = zf - zs + + for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): + ii = i + xs + for j in range(0, ny): + jj = j + ys + RA0 = RA[0, j] + RB0 = RB[0, j] + RE0 = RE[0, j] + RF0 = RF[0, j] + RA1 = RA[1, j] + RB1 = RB[1, j] + RE1 = RE[1, j] + RF1 = RF[1, j] + RA01 = RA[0, j] * RA[1, j] - 1 + for k in range(0, nz): + kk = k + zs + # Ex + materialEx = ID[0, ii, jj, kk] + dHz = (Hz[ii, jj, kk] - Hz[ii, jj - 1, kk]) / dy + Ex[ii, jj, kk] = Ex[ii, jj, kk] + updatecoeffsE[materialEx, 4] * (RA01 * dHz + RA1 * RB0 * Phi1[0, i, j, k] + RB1 * Phi1[1, i, j, k]) + Phi1[1, i, j, k] = RE1 * Phi1[1, i, j, k] - RF1 * (RA0 * dHz + RB0 * Phi1[0, i, j, k]) + Phi1[0, i, j, k] = RE0 * Phi1[0, i, j, k] - RF0 * dHz + # Ez + materialEz = ID[2, ii, jj, kk] + dHx = (Hx[ii, jj, kk] - Hx[ii, jj - 1, kk]) / dy + Ez[ii, jj, kk] = Ez[ii, jj, kk] - updatecoeffsE[materialEz, 4] * (RA01 * dHx + RA1 * RB0 * Phi2[0, i, j, k] + RB1 * Phi2[1, i, j, k]) + Phi2[1, i, j, k] = RE1 * Phi2[1, i, j, k] - RF1 * (RA0 * dHx + RB0 * Phi2[0, i, j, k]) + Phi2[0, i, j, k] = RE0 * Phi2[0, i, j, k] - RF0 * dHx + + +cpdef void order1_zminus( + int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int nthreads, + floattype_t[:, ::1] updatecoeffsE, + np.uint32_t[:, :, :, ::1] ID, + floattype_t[:, :, ::1] Ex, + floattype_t[:, :, ::1] Ey, + floattype_t[:, :, ::1] Ez, + floattype_t[:, :, ::1] Hx, + floattype_t[:, :, ::1] Hy, + floattype_t[:, :, ::1] Hz, + floattype_t[:, :, :, ::1] Phi1, + floattype_t[:, :, :, ::1] Phi2, + floattype_t[:, ::1] RA, + floattype_t[:, ::1] RB, + floattype_t[:, ::1] RE, + floattype_t[:, ::1] RF, + float d + ): + """This function updates the Ex and Ey field components for the zminus slab. + + Args: + xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box + nthreads (int): Number of threads to use + updatecoeffs, ID, E, H (memoryviews): Access to update coefficients, ID and field component arrays + EPhi, RA, RB, RE, RF (memoryviews): Access to PML coefficient arrays + d (float): Spatial discretisation, e.g. dx, dy or dz + """ + + cdef Py_ssize_t i, j, k, ii, jj, kk + cdef int nx, ny, nz, materialEx, materialEy + cdef floattype_t dz, dHx, dHy, RA01, RB0, RE0, RF0 + dz = d + nx = xf - xs + ny = yf - ys + nz = zf - zs + + for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): + ii = i + xs + for j in range(0, ny): + jj = j + ys + for k in range(0, nz): + kk = zf - k + RA01 = RA[0, k] - 1 + RB0 = RB[0, k] + RE0 = RE[0, k] + RF0 = RF[0, k] + # Ex + materialEx = ID[0, ii, jj, kk] + dHy = (Hy[ii, jj, kk] - Hy[ii, jj, kk - 1]) / dz + Ex[ii, jj, kk] = Ex[ii, jj, kk] - updatecoeffsE[materialEx, 4] * (RA01 * dHy + RB0 * Phi1[0, i, j, k]) + Phi1[0, i, j, k] = RE0 * Phi1[0, i, j, k] - RF0 * dHy + # Ey + materialEy = ID[1, ii, jj, kk] + dHx = (Hx[ii, jj, kk] - Hx[ii, jj, kk - 1]) / dz + Ey[ii, jj, kk] = Ey[ii, jj, kk] + updatecoeffsE[materialEy, 4] * (RA01 * dHx + RB0 * Phi2[0, i, j, k]) + Phi2[0, i, j, k] = RE0 * Phi2[0, i, j, k] - RF0 * dHx + +cpdef void order2_zminus( + int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int nthreads, + floattype_t[:, ::1] updatecoeffsE, + np.uint32_t[:, :, :, ::1] ID, + floattype_t[:, :, ::1] Ex, + floattype_t[:, :, ::1] Ey, + floattype_t[:, :, ::1] Ez, + floattype_t[:, :, ::1] Hx, + floattype_t[:, :, ::1] Hy, + floattype_t[:, :, ::1] Hz, + floattype_t[:, :, :, ::1] Phi1, + floattype_t[:, :, :, ::1] Phi2, + floattype_t[:, ::1] RA, + floattype_t[:, ::1] RB, + floattype_t[:, ::1] RE, + floattype_t[:, ::1] RF, + float d + ): + """This function updates the Ex and Ey field components for the zminus slab. + + Args: + xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box + nthreads (int): Number of threads to use + updatecoeffs, ID, E, H (memoryviews): Access to update coefficients, ID and field component arrays + EPhi, RA, RB, RE, RF (memoryviews): Access to PML coefficient arrays + d (float): Spatial discretisation, e.g. dx, dy or dz + """ + + cdef Py_ssize_t i, j, k, ii, jj, kk + cdef int nx, ny, nz, materialEx, materialEy + cdef floattype_t dz, dHx, dHy, RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01 + dz = d + nx = xf - xs + ny = yf - ys + nz = zf - zs + + for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): + ii = i + xs + for j in range(0, ny): + jj = j + ys + for k in range(0, nz): + kk = zf - k + RA0 = RA[0, k] + RB0 = RB[0, k] + RE0 = RE[0, k] + RF0 = RF[0, k] + RA1 = RA[1, k] + RB1 = RB[1, k] + RE1 = RE[1, k] + RF1 = RF[1, k] + RA01 = RA[0, k] * RA[1, k] - 1 + # Ex + materialEx = ID[0, ii, jj, kk] + dHy = (Hy[ii, jj, kk] - Hy[ii, jj, kk - 1]) / dz + Ex[ii, jj, kk] = Ex[ii, jj, kk] - updatecoeffsE[materialEx, 4] * (RA01 * dHy + RA1 * RB0 * Phi1[0, i, j, k] + RB1 * Phi1[1, i, j, k]) + Phi1[1, i, j, k] = RE1 * Phi1[1, i, j, k] - RF1 * (RA0 * dHy + RB0 * Phi1[0, i, j, k]) + Phi1[0, i, j, k] = RE0 * Phi1[0, i, j, k] - RF0 * dHy + # Ey + materialEy = ID[1, ii, jj, kk] + dHx = (Hx[ii, jj, kk] - Hx[ii, jj, kk - 1]) / dz + Ey[ii, jj, kk] = Ey[ii, jj, kk] + updatecoeffsE[materialEy, 4] * (RA01 * dHx + RA1 * RB0 * Phi2[0, i, j, k] + RB1 * Phi2[1, i, j, k]) + Phi2[1, i, j, k] = RE1 * Phi2[1, i, j, k] - RF1 * (RA0 * dHx + RB0 * Phi2[0, i, j, k]) + Phi2[0, i, j, k] = RE0 * Phi2[0, i, j, k] - RF0 * dHx + + +cpdef void order1_zplus( + int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int nthreads, + floattype_t[:, ::1] updatecoeffsE, + np.uint32_t[:, :, :, ::1] ID, + floattype_t[:, :, ::1] Ex, + floattype_t[:, :, ::1] Ey, + floattype_t[:, :, ::1] Ez, + floattype_t[:, :, ::1] Hx, + floattype_t[:, :, ::1] Hy, + floattype_t[:, :, ::1] Hz, + floattype_t[:, :, :, ::1] Phi1, + floattype_t[:, :, :, ::1] Phi2, + floattype_t[:, ::1] RA, + floattype_t[:, ::1] RB, + floattype_t[:, ::1] RE, + floattype_t[:, ::1] RF, + float d + ): + """This function updates the Ex and Ey field components for the zplus slab. + + Args: + xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box + nthreads (int): Number of threads to use + updatecoeffs, ID, E, H (memoryviews): Access to update coefficients, ID and field component arrays + EPhi, RA, RB, RE, RF (memoryviews): Access to PML coefficient arrays + d (float): Spatial discretisation, e.g. dx, dy or dz + """ + + cdef Py_ssize_t i, j, k, ii, jj, kk + cdef int nx, ny, nz, materialEx, materialEy + cdef floattype_t dz, dHx, dHy, RA01, RB0, RE0, RF0 + dz = d + nx = xf - xs + ny = yf - ys + nz = zf - zs + + for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): + ii = i + xs + for j in range(0, ny): + jj = j + ys + for k in range(0, nz): + kk = k + zs + RA01 = RA[0, k] - 1 + RB0 = RB[0, k] + RE0 = RE[0, k] + RF0 = RF[0, k] + # Ex + materialEx = ID[0, ii, jj, kk] + dHy = (Hy[ii, jj, kk] - Hy[ii, jj, kk - 1]) / dz + Ex[ii, jj, kk] = Ex[ii, jj, kk] - updatecoeffsE[materialEx, 4] * (RA01 * dHy + RB0 * Phi1[0, i, j, k]) + Phi1[0, i, j, k] = RE0 * Phi1[0, i, j, k] - RF0 * dHy + # Ey + materialEy = ID[1, ii, jj, kk] + dHx = (Hx[ii, jj, kk] - Hx[ii, jj, kk - 1]) / dz + Ey[ii, jj, kk] = Ey[ii, jj, kk] + updatecoeffsE[materialEy, 4] * (RA01 * dHx + RB0 * Phi2[0, i, j, k]) + Phi2[0, i, j, k] = RE0 * Phi2[0, i, j, k] - RF0 * dHx + +cpdef void order2_zplus( + int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int nthreads, + floattype_t[:, ::1] updatecoeffsE, + np.uint32_t[:, :, :, ::1] ID, + floattype_t[:, :, ::1] Ex, + floattype_t[:, :, ::1] Ey, + floattype_t[:, :, ::1] Ez, + floattype_t[:, :, ::1] Hx, + floattype_t[:, :, ::1] Hy, + floattype_t[:, :, ::1] Hz, + floattype_t[:, :, :, ::1] Phi1, + floattype_t[:, :, :, ::1] Phi2, + floattype_t[:, ::1] RA, + floattype_t[:, ::1] RB, + floattype_t[:, ::1] RE, + floattype_t[:, ::1] RF, + float d + ): + """This function updates the Ex and Ey field components for the zplus slab. + + Args: + xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box + nthreads (int): Number of threads to use + updatecoeffs, ID, E, H (memoryviews): Access to update coefficients, ID and field component arrays + EPhi, RA, RB, RE, RF (memoryviews): Access to PML coefficient arrays + d (float): Spatial discretisation, e.g. dx, dy or dz + """ + + cdef Py_ssize_t i, j, k, ii, jj, kk + cdef int nx, ny, nz, materialEx, materialEy + cdef floattype_t dz, dHx, dHy, RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01 + dz = d + nx = xf - xs + ny = yf - ys + nz = zf - zs + + for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): + ii = i + xs + for j in range(0, ny): + jj = j + ys + for k in range(0, nz): + kk = k + zs + RA0 = RA[0, k] + RB0 = RB[0, k] + RE0 = RE[0, k] + RF0 = RF[0, k] + RA1 = RA[1, k] + RB1 = RB[1, k] + RE1 = RE[1, k] + RF1 = RF[1, k] + RA01 = RA[0, k] * RA[1, k] - 1 + # Ex + materialEx = ID[0, ii, jj, kk] + dHy = (Hy[ii, jj, kk] - Hy[ii, jj, kk - 1]) / dz + Ex[ii, jj, kk] = Ex[ii, jj, kk] - updatecoeffsE[materialEx, 4] * (RA01 * dHy + RA1 * RB0 * Phi1[0, i, j, k] + RB1 * Phi1[1, i, j, k]) + Phi1[1, i, j, k] = RE1 * Phi1[1, i, j, k] - RF1 * (RA0 * dHy + RB0 * Phi1[0, i, j, k]) + Phi1[0, i, j, k] = RE0 * Phi1[0, i, j, k] - RF0 * dHy + # Ey + materialEy = ID[1, ii, jj, kk] + dHx = (Hx[ii, jj, kk] - Hx[ii, jj, kk - 1]) / dz + Ey[ii, jj, kk] = Ey[ii, jj, kk] + updatecoeffsE[materialEy, 4] * (RA01 * dHx + RA1 * RB0 * Phi2[0, i, j, k] + RB1 * Phi2[1, i, j, k]) + Phi2[1, i, j, k] = RE1 * Phi2[1, i, j, k] - RF1 * (RA0 * dHx + RB0 * Phi2[0, i, j, k]) + Phi2[0, i, j, k] = RE0 * Phi2[0, i, j, k] - RF0 * dHx diff --git a/gprMax/pml_updates/pml_updates_electric_HORIPML_gpu.py b/gprMax/pml_updates/pml_updates_electric_HORIPML_gpu.py new file mode 100644 index 00000000..4d4e2439 --- /dev/null +++ b/gprMax/pml_updates/pml_updates_electric_HORIPML_gpu.py @@ -0,0 +1,982 @@ +# Copyright (C) 2015-2019: The University of Edinburgh +# Authors: Craig Warren and Antonis Giannopoulos +# +# This file is part of gprMax. +# +# gprMax is free software: you can redistribute it and/or modify +# it under the terms of the GNU GenRAl Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# gprMax is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU GenRAl Public License for more details. +# +# You should have received a copy of the GNU GenRAl Public License +# along with gprMax. If not, see . + +from string import Template + +kernels_template_pml_electric_HORIPML = Template(""" + +// Macros for converting subscripts to linear index: +#define INDEX2D_R(m, n) (m)*($NY_R)+(n) +#define INDEX2D_MAT(m, n) (m)*($NY_MATCOEFFS)+(n) +#define INDEX3D_FIELDS(i, j, k) (i)*($NY_FIELDS)*($NZ_FIELDS)+(j)*($NZ_FIELDS)+(k) +#define INDEX4D_ID(p, i, j, k) (p)*($NX_ID)*($NY_ID)*($NZ_ID)+(i)*($NY_ID)*($NZ_ID)+(j)*($NZ_ID)+(k) +#define INDEX4D_PHI1(p, i, j, k) (p)*(NX_PHI1)*(NY_PHI1)*(NZ_PHI1)+(i)*(NY_PHI1)*(NZ_PHI1)+(j)*(NZ_PHI1)+(k) +#define INDEX4D_PHI2(p, i, j, k) (p)*(NX_PHI2)*(NY_PHI2)*(NZ_PHI2)+(i)*(NY_PHI2)*(NZ_PHI2)+(j)*(NZ_PHI2)+(k) + +// Material coefficients (read-only) in constant memory (64KB) +__device__ __constant__ $REAL updatecoeffsE[$N_updatecoeffsE]; + + +__global__ void order1_xminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, $REAL *Ey, $REAL *Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) { + + // This function updates the Ey and Ez field components for the xminus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_EPHI, NY_EPHI, NZ_EPHI: Dimensions of PHI1 and PHI2 PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current thread + int idx = blockIdx.x * blockDim.x + threadIdx.x; + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL RA01, RB0, RE0, RF0, dHy, dHz; + $REAL dx = d; + int ii, jj, kk, materialEy, materialEz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = xf - i1; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA01 = RA[INDEX2D_R(0,i1)] - 1; + RB0 = RB[INDEX2D_R(0,i1)]; + RE0 = RE[INDEX2D_R(0,i1)]; + RF0 = RF[INDEX2D_R(0,i1)]; + + // Ey + materialEy = ID[INDEX4D_ID(1,ii,jj,kk)]; + dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx; + Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA01 * dHz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = xf - i2; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA01 = RA[INDEX2D_R(0,i2)] - 1; + RB0 = RB[INDEX2D_R(0,i2)]; + RE0 = RE[INDEX2D_R(0,i2)]; + RF0 = RF[INDEX2D_R(0,i2)]; + + // Ez + materialEz = ID[INDEX4D_ID(2,ii,jj,kk)]; + dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx; + Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA01 * dHy + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHy; + } +} + + +__global__ void order2_xminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, $REAL *Ey, $REAL *Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) { + + // This function updates the Ey and Ez field components for the xminus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_EPHI, NY_EPHI, NZ_EPHI: Dimensions of PHI1 and PHI2 PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current thread + int idx = blockIdx.x * blockDim.x + threadIdx.x; + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL RA01, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dHy, dHz; + $REAL dx = d; + int ii, jj, kk, materialEy, materialEz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = xf - i1; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA0 = RA[INDEX2D_R(0,i1)]; + RB0 = RB[INDEX2D_R(0,i1)]; + RE0 = RE[INDEX2D_R(0,i1)]; + RF0 = RF[INDEX2D_R(0,i1)]; + RA1 = RA[INDEX2D_R(1,i1)]; + RB1 = RB[INDEX2D_R(1,i1)]; + RE1 = RE[INDEX2D_R(1,i1)]; + RF1 = RF[INDEX2D_R(1,i1)]; + RA01 = RA[INDEX2D_R(0,i1)] * RA[INDEX2D_R(1,i1)] - 1; + + // Ey + materialEy = ID[INDEX4D_ID(1,ii,jj,kk)]; + dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx; + Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA01 * dHz + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dHz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = xf - i2; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA0 = RA[INDEX2D_R(0,i2)]; + RB0 = RB[INDEX2D_R(0,i2)]; + RE0 = RE[INDEX2D_R(0,i2)]; + RF0 = RF[INDEX2D_R(0,i2)]; + RA1 = RA[INDEX2D_R(1,i2)]; + RB1 = RB[INDEX2D_R(1,i2)]; + RE1 = RE[INDEX2D_R(1,i2)]; + RF1 = RF[INDEX2D_R(1,i2)]; + RA01 = RA[INDEX2D_R(0,i2)] * RA[INDEX2D_R(1,i2)] - 1; + + // Ez + materialEz = ID[INDEX4D_ID(2,ii,jj,kk)]; + dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx; + Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA01 * dHy + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dHy + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHy; + } +} + + +__global__ void order1_xplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, $REAL *Ey, $REAL *Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) { + + // This function updates the Ey and Ez field components for the xplus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_EPHI, NY_EPHI, NZ_EPHI: Dimensions of PHI1 and PHI2 PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current thread + int idx = blockIdx.x * blockDim.x + threadIdx.x; + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL RA01, RB0, RE0, RF0, dHy, dHz; + $REAL dx = d; + int ii, jj, kk, materialEy, materialEz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA01 = RA[INDEX2D_R(0,i1)] - 1; + RB0 = RB[INDEX2D_R(0,i1)]; + RE0 = RE[INDEX2D_R(0,i1)]; + RF0 = RF[INDEX2D_R(0,i1)]; + + // Ey + materialEy = ID[INDEX4D_ID(1,ii,jj,kk)]; + dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx; + Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA01 * dHz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA01 = RA[INDEX2D_R(0,i2)] - 1; + RB0 = RB[INDEX2D_R(0,i2)]; + RE0 = RE[INDEX2D_R(0,i2)]; + RF0 = RF[INDEX2D_R(0,i2)]; + + // Ez + materialEz = ID[INDEX4D_ID(2,ii,jj,kk)]; + dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx; + Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA01 * dHy + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHy; + } +} + + +__global__ void order2_xplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, $REAL *Ey, $REAL *Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) { + + // This function updates the Ey and Ez field components for the xplus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_EPHI, NY_EPHI, NZ_EPHI: Dimensions of PHI1 and PHI2 PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current thread + int idx = blockIdx.x * blockDim.x + threadIdx.x; + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL RA01, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dHy, dHz; + $REAL dx = d; + int ii, jj, kk, materialEy, materialEz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA0 = RA[INDEX2D_R(0,i1)]; + RB0 = RB[INDEX2D_R(0,i1)]; + RE0 = RE[INDEX2D_R(0,i1)]; + RF0 = RF[INDEX2D_R(0,i1)]; + RA1 = RA[INDEX2D_R(1,i1)]; + RB1 = RB[INDEX2D_R(1,i1)]; + RE1 = RE[INDEX2D_R(1,i1)]; + RF1 = RF[INDEX2D_R(1,i1)]; + RA01 = RA[INDEX2D_R(0,i1)] * RA[INDEX2D_R(1,i1)] - 1; + + // Ey + materialEy = ID[INDEX4D_ID(1,ii,jj,kk)]; + dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx; + Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA01 * dHz + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dHz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA0 = RA[INDEX2D_R(0,i2)]; + RB0 = RB[INDEX2D_R(0,i2)]; + RE0 = RE[INDEX2D_R(0,i2)]; + RF0 = RF[INDEX2D_R(0,i2)]; + RA1 = RA[INDEX2D_R(1,i2)]; + RB1 = RB[INDEX2D_R(1,i2)]; + RE1 = RE[INDEX2D_R(1,i2)]; + RF1 = RF[INDEX2D_R(1,i2)]; + RA01 = RA[INDEX2D_R(0,i2)] * RA[INDEX2D_R(1,i2)] - 1; + + // Ez + materialEz = ID[INDEX4D_ID(2,ii,jj,kk)]; + dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx; + Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA01 * dHy + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dHy + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHy; + } +} + + +__global__ void order1_yminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, const unsigned int* __restrict__ ID, $REAL *Ex, const $REAL* __restrict__ Ey, $REAL *Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) { + + // This function updates the Ex and Ez field components for the yminus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_EPHI, NY_EPHI, NZ_EPHI: Dimensions of PHI1 and PHI2 PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current thread + int idx = blockIdx.x * blockDim.x + threadIdx.x; + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL RA01, RB0, RE0, RF0, dHx, dHz; + $REAL dy = d; + int ii, jj, kk, materialEx, materialEz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = yf - j1; + kk = k1 + zs; + + // PML coefficients + RA01 = RA[INDEX2D_R(0,j1)] - 1; + RB0 = RB[INDEX2D_R(0,j1)]; + RE0 = RE[INDEX2D_R(0,j1)]; + RF0 = RF[INDEX2D_R(0,j1)]; + + // Ex + materialEx = ID[INDEX4D_ID(0,ii,jj,kk)]; + dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy; + Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA01 * dHz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = yf - j2; + kk = k2 + zs; + + // PML coefficients + RA01 = RA[INDEX2D_R(0,j2)] - 1; + RB0 = RB[INDEX2D_R(0,j2)]; + RE0 = RE[INDEX2D_R(0,j2)]; + RF0 = RF[INDEX2D_R(0,j2)]; + + // Ez + materialEz = ID[INDEX4D_ID(2,ii,jj,kk)]; + dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy; + Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA01 * dHx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx; + } +} + + +__global__ void order2_yminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, const unsigned int* __restrict__ ID, $REAL *Ex, const $REAL* __restrict__ Ey, $REAL *Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) { + + // This function updates the Ex and Ez field components for the yminus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_EPHI, NY_EPHI, NZ_EPHI: Dimensions of PHI1 and PHI2 PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current thread + int idx = blockIdx.x * blockDim.x + threadIdx.x; + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL RA01, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dHx, dHz; + $REAL dy = d; + int ii, jj, kk, materialEx, materialEz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = yf - j1; + kk = k1 + zs; + + // PML coefficients + RA0 = RA[INDEX2D_R(0,j1)]; + RB0 = RB[INDEX2D_R(0,j1)]; + RE0 = RE[INDEX2D_R(0,j1)]; + RF0 = RF[INDEX2D_R(0,j1)]; + RA1 = RA[INDEX2D_R(1,j1)]; + RB1 = RB[INDEX2D_R(1,j1)]; + RE1 = RE[INDEX2D_R(1,j1)]; + RF1 = RF[INDEX2D_R(1,j1)]; + RA01 = RA[INDEX2D_R(0,j1)] * RA[INDEX2D_R(1,j1)] - 1; + + // Ex + materialEx = ID[INDEX4D_ID(0,ii,jj,kk)]; + dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy; + Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA01 * dHz + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dHz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = yf - j2; + kk = k2 + zs; + + // PML coefficients + RA0 = RA[INDEX2D_R(0,j2)]; + RB0 = RB[INDEX2D_R(0,j2)]; + RE0 = RE[INDEX2D_R(0,j2)]; + RF0 = RF[INDEX2D_R(0,j2)]; + RA1 = RA[INDEX2D_R(1,j2)]; + RB1 = RB[INDEX2D_R(1,j2)]; + RE1 = RE[INDEX2D_R(1,j2)]; + RF1 = RF[INDEX2D_R(1,j2)]; + RA01 = RA[INDEX2D_R(0,j2)] * RA[INDEX2D_R(1,j2)] - 1; + + // Ez + materialEz = ID[INDEX4D_ID(2,ii,jj,kk)]; + dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy; + Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA01 * dHx + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dHx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx; + } +} + + +__global__ void order1_yplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, const unsigned int* __restrict__ ID, $REAL *Ex, const $REAL* __restrict__ Ey, $REAL *Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) { + + // This function updates the Ex and Ez field components for the yplus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_EPHI, NY_EPHI, NZ_EPHI: Dimensions of PHI1 and PHI2 PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current thread + int idx = blockIdx.x * blockDim.x + threadIdx.x; + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL RA01, RB0, RE0, RF0, dHx, dHz; + $REAL dy = d; + int ii, jj, kk, materialEx, materialEz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA01 = RA[INDEX2D_R(0,j1)] - 1; + RB0 = RB[INDEX2D_R(0,j1)]; + RE0 = RE[INDEX2D_R(0,j1)]; + RF0 = RF[INDEX2D_R(0,j1)]; + + // Ex + materialEx = ID[INDEX4D_ID(0,ii,jj,kk)]; + dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy; + Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA01 * dHz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA01 = RA[INDEX2D_R(0,j2)] - 1; + RB0 = RB[INDEX2D_R(0,j2)]; + RE0 = RE[INDEX2D_R(0,j2)]; + RF0 = RF[INDEX2D_R(0,j2)]; + + // Ez + materialEz = ID[INDEX4D_ID(2,ii,jj,kk)]; + dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy; + Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA01 * dHx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx; + } +} + + +__global__ order2_yplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, const unsigned int* __restrict__ ID, $REAL *Ex, const $REAL* __restrict__ Ey, $REAL *Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) { + + // This function updates the Ex and Ez field components for the yplus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_EPHI, NY_EPHI, NZ_EPHI: Dimensions of PHI1 and PHI2 PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current thread + int idx = blockIdx.x * blockDim.x + threadIdx.x; + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL RA01, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dHx, dHz; + $REAL dy = d; + int ii, jj, kk, materialEx, materialEz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA0 = RA[INDEX2D_R(0,j1)]; + RB0 = RB[INDEX2D_R(0,j1)]; + RE0 = RE[INDEX2D_R(0,j1)]; + RF0 = RF[INDEX2D_R(0,j1)]; + RA1 = RA[INDEX2D_R(1,j1)]; + RB1 = RB[INDEX2D_R(1,j1)]; + RE1 = RE[INDEX2D_R(1,j1)]; + RF1 = RF[INDEX2D_R(1,j1)]; + RA01 = RA[INDEX2D_R(0,j1)] * RA[INDEX2D_R(1,j1)] - 1; + + // Ex + materialEx = ID[INDEX4D_ID(0,ii,jj,kk)]; + dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy; + Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA01 * dHz + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dHz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA0 = RA[INDEX2D_R(0,j2)]; + RB0 = RB[INDEX2D_R(0,j2)]; + RE0 = RE[INDEX2D_R(0,j2)]; + RF0 = RF[INDEX2D_R(0,j2)]; + RA1 = RA[INDEX2D_R(1,j2)]; + RB1 = RB[INDEX2D_R(1,j2)]; + RE1 = RE[INDEX2D_R(1,j2)]; + RF1 = RF[INDEX2D_R(1,j2)]; + RA01 = RA[INDEX2D_R(0,j2)] * RA[INDEX2D_R(1,j2)] - 1; + + // Ez + materialEz = ID[INDEX4D_ID(2,ii,jj,kk)]; + dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy; + Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA01 * dHx + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dHx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx; + } +} + + +__global__ void order1_zminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, const unsigned int* __restrict__ ID, $REAL *Ex, $REAL *Ey, const $REAL* __restrict__ Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) { + + // This function updates the Ex and Ey field components for the zminus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_EPHI, NY_EPHI, NZ_EPHI: Dimensions of PHI1 and PHI2 PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current thread + int idx = blockIdx.x * blockDim.x + threadIdx.x; + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL RA01, RB0, RE0, RF0, dHx, dHy; + $REAL dz = d; + int ii, jj, kk, materialEx, materialEy; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = zf - k1; + + // PML coefficients + RA01 = RA[INDEX2D_R(0,k1)] - 1; + RB0 = RB[INDEX2D_R(0,k1)]; + RE0 = RE[INDEX2D_R(0,k1)]; + RF0 = RF[INDEX2D_R(0,k1)]; + + // Ex + materialEx = ID[INDEX4D_ID(0,ii,jj,kk)]; + dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz; + Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA01 * dHy + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHy; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + xs; + kk = zf - k2; + + // PML coefficients + RA01 = RA[INDEX2D_R(0,k2)] - 1; + RB0 = RB[INDEX2D_R(0,k2)]; + RE0 = RE[INDEX2D_R(0,k2)]; + RF0 = RF[INDEX2D_R(0,k2)]; + + // Ey + materialEy = ID[INDEX4D_ID(1,ii,jj,kk)]; + dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz; + Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA01 * dHx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx; + } +} + + +__global__ void order2_zminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, const unsigned int* __restrict__ ID, $REAL *Ex, $REAL *Ey, const $REAL* __restrict__ Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) { + + // This function updates the Ex and Ey field components for the zminus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_EPHI, NY_EPHI, NZ_EPHI: Dimensions of PHI1 and PHI2 PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current thread + int idx = blockIdx.x * blockDim.x + threadIdx.x; + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL RA01, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dHx, dHy; + $REAL dz = d; + int ii, jj, kk, materialEx, materialEy; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = zf - k1; + + // PML coefficients + RA0 = RA[INDEX2D_R(0,k1)]; + RB0 = RB[INDEX2D_R(0,k1)]; + RE0 = RE[INDEX2D_R(0,k1)]; + RF0 = RF[INDEX2D_R(0,k1)]; + RA1 = RA[INDEX2D_R(1,k1)]; + RB1 = RB[INDEX2D_R(1,k1)]; + RE1 = RE[INDEX2D_R(1,k1)]; + RF1 = RF[INDEX2D_R(1,k1)]; + RA01 = RA[INDEX2D_R(0,k1)] * RA[INDEX2D_R(1,k1)] - 1; + + // Ex + materialEx = ID[INDEX4D_ID(0,ii,jj,kk)]; + dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz; + Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA01 * dHy + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dHy + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHy; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + xs; + kk = zf - k2; + + // PML coefficients + RA0 = RA[INDEX2D_R(0,k2)]; + RB0 = RB[INDEX2D_R(0,k2)]; + RE0 = RE[INDEX2D_R(0,k2)]; + RF0 = RF[INDEX2D_R(0,k2)]; + RA1 = RA[INDEX2D_R(1,k2)]; + RB1 = RB[INDEX2D_R(1,k2)]; + RE1 = RE[INDEX2D_R(1,k2)]; + RF1 = RF[INDEX2D_R(1,k2)]; + RA01 = RA[INDEX2D_R(0,k2)] * RA[INDEX2D_R(1,k2)] - 1; + + // Ey + materialEy = ID[INDEX4D_ID(1,ii,jj,kk)]; + dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz; + Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA01 * dHx + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dHx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx; + } +} + + +__global__ void order1_zplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, const unsigned int* __restrict__ ID, $REAL *Ex, $REAL *Ey, const $REAL* __restrict__ Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) { + + // This function updates the Ex and Ey field components for the zplus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_EPHI, NY_EPHI, NZ_EPHI: Dimensions of PHI1 and PHI2 PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current thread + int idx = blockIdx.x * blockDim.x + threadIdx.x; + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL RA01, RB0, RE0, RF0, dHx, dHy; + $REAL dz = d; + int ii, jj, kk, materialEx, materialEy; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA01 = RA[INDEX2D_R(0,k1)] - 1; + RB0 = RB[INDEX2D_R(0,k1)]; + RE0 = RE[INDEX2D_R(0,k1)]; + RF0 = RF[INDEX2D_R(0,k1)]; + + // Ex + materialEx = ID[INDEX4D_ID(0,ii,jj,kk)]; + dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz; + Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA01 * dHy + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHy; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA01 = RA[INDEX2D_R(0,k2)] - 1; + RB0 = RB[INDEX2D_R(0,k2)]; + RE0 = RE[INDEX2D_R(0,k2)]; + RF0 = RF[INDEX2D_R(0,k2)]; + + // Ey + materialEy = ID[INDEX4D_ID(1,ii,jj,kk)]; + dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz; + Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA01 * dHx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx; + } +} + + +__global__ void order2_zplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, const unsigned int* __restrict__ ID, $REAL *Ex, $REAL *Ey, const $REAL* __restrict__ Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) { + + // This function updates the Ex and Ey field components for the zplus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_EPHI, NY_EPHI, NZ_EPHI: Dimensions of PHI1 and PHI2 PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current thread + int idx = blockIdx.x * blockDim.x + threadIdx.x; + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL RA01, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dHx, dHy; + $REAL dz = d; + int ii, jj, kk, materialEx, materialEy; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA0 = RA[INDEX2D_R(0,k1)]; + RB0 = RB[INDEX2D_R(0,k1)]; + RE0 = RE[INDEX2D_R(0,k1)]; + RF0 = RF[INDEX2D_R(0,k1)]; + RA1 = RA[INDEX2D_R(1,k1)]; + RB1 = RB[INDEX2D_R(1,k1)]; + RE1 = RE[INDEX2D_R(1,k1)]; + RF1 = RF[INDEX2D_R(1,k1)]; + RA01 = RA[INDEX2D_R(0,k1)] * RA[INDEX2D_R(1,k1)] - 1; + + // Ex + materialEx = ID[INDEX4D_ID(0,ii,jj,kk)]; + dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz; + Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA01 * dHy + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dHy + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHy; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA0 = RA[INDEX2D_R(0,k2)]; + RB0 = RB[INDEX2D_R(0,k2)]; + RE0 = RE[INDEX2D_R(0,k2)]; + RF0 = RF[INDEX2D_R(0,k2)]; + RA1 = RA[INDEX2D_R(1,k2)]; + RB1 = RB[INDEX2D_R(1,k2)]; + RE1 = RE[INDEX2D_R(1,k2)]; + RF1 = RF[INDEX2D_R(1,k2)]; + RA01 = RA[INDEX2D_R(0,k2)] * RA[INDEX2D_R(1,k2)] - 1; + + // Ey + materialEy = ID[INDEX4D_ID(1,ii,jj,kk)]; + dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz; + Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA01 * dHx + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dHx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx; + } +} + +""") diff --git a/gprMax/pml_updates/pml_updates_electric_MRIPML_ext.pyx b/gprMax/pml_updates/pml_updates_electric_MRIPML_ext.pyx new file mode 100644 index 00000000..5a292ad3 --- /dev/null +++ b/gprMax/pml_updates/pml_updates_electric_MRIPML_ext.pyx @@ -0,0 +1,857 @@ +cdef floattype_t# Copyright (C) 2015-2019: The University of Edinburgh +# Authors: Craig Warren and Antonis Giannopoulos +# +# This file is part of gprMax. +# +# gprMax is free software: you can redistribute it and/or modify +# it under the terms of the GNU GenRAl Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# gprMax is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU GenRAl Public License for more details. +# +# You should have received a copy of the GNU GenRAl Public License +# along with gprMax. If not, see . + +import numpy as np +cimport numpy as np +from cython.parallel import prange + +from gprMax.constants cimport floattype_t + + +cpdef void order1_xminus( + int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int nthreads, + floattype_t[:, ::1] updatecoeffsE, + np.uint32_t[:, :, :, ::1] ID, + floattype_t[:, :, ::1] Ex, + floattype_t[:, :, ::1] Ey, + floattype_t[:, :, ::1] Ez, + floattype_t[:, :, ::1] Hx, + floattype_t[:, :, ::1] Hy, + floattype_t[:, :, ::1] Hz, + floattype_t[:, :, :, ::1] Phi1, + floattype_t[:, :, :, ::1] Phi2, + floattype_t[:, ::1] RA, + floattype_t[:, ::1] RB, + floattype_t[:, ::1] RE, + floattype_t[:, ::1] RF, + float d + ): + """This function updates the Ey and Ez field components for the xminus slab. + + Args: + xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box + nthreads (int): Number of threads to use + updatecoeffs, ID, E, H (memoryviews): Access to update coefficients, ID and field component arrays + Phi, RA, RB, RE, RF (memoryviews): Access to PML coefficient arrays + d (float): Spatial discretisation, e.g. dx, dy or dz + """ + + cdef Py_ssize_t i, j, k, ii, jj, kk + cdef int nx, ny, nz, materialEy, materialEz + cdef floattype_t dx, dHy, dHz, IRA, IRA1, RB0, RC0, RE0, RF0 + dx = d + nx = xf - xs + ny = yf - ys + nz = zf - zs + + for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): + IRA = 1 / RA[0, i] + IRA1 = IRA - 1 + RB0 = RB[0, i] + RE0 = RE[0, i] + RF0 = RF[0, i] + RC0 = IRA * RB0 * RF0 + ii = xf - i + for j in range(0, ny): + jj = j + ys + for k in range(0, nz): + kk = k + zs + # Ey + materialEy = ID[1, ii, jj, kk] + dHz = (Hz[ii, jj, kk] - Hz[ii - 1, jj, kk]) / dx + Ey[ii, jj, kk] = Ey[ii, jj, kk] - updatecoeffsE[materialEy, 4] * (IRA1 * dHz - IRA * Phi1[0, i, j, k]) + Phi1[0, i, j, k] = RE0 * Phi1[0, i, j, k] + RC0 * dHz - RC0 * Phi1[0, i, j, k] + # Ez + materialEz = ID[2, ii, jj, kk] + dHy = (Hy[ii, jj, kk] - Hy[ii - 1, jj, kk]) / dx + Ez[ii, jj, kk] = Ez[ii, jj, kk] + updatecoeffsE[materialEz, 4] * (IRA1 * dHy - IRA * Phi2[0, i, j, k]) + Phi2[0, i, j, k] = RE0 * Phi2[0, i, j, k] + RC0 * dHy - RC0 * Phi2[0, i, j, k] + +cpdef void order2_xminus( + int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int nthreads, + floattype_t[:, ::1] updatecoeffsE, + np.uint32_t[:, :, :, ::1] ID, + floattype_t[:, :, ::1] Ex, + floattype_t[:, :, ::1] Ey, + floattype_t[:, :, ::1] Ez, + floattype_t[:, :, ::1] Hx, + floattype_t[:, :, ::1] Hy, + floattype_t[:, :, ::1] Hz, + floattype_t[:, :, :, ::1] Phi1, + floattype_t[:, :, :, ::1] Phi2, + floattype_t[:, ::1] RA, + floattype_t[:, ::1] RB, + floattype_t[:, ::1] RE, + floattype_t[:, ::1] RF, + float d + ): + """This function updates the Ey and Ez field components for the xminus slab. + + Args: + xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box + nthreads (int): Number of threads to use + updatecoeffs, ID, E, H (memoryviews): Access to update coefficients, ID and field component arrays + Phi, RA, RB, RE, RF (memoryviews): Access to PML coefficient arrays + d (float): Spatial discretisation, e.g. dx, dy or dz + """ + + cdef Py_ssize_t i, j, k, ii, jj, kk + cdef int nx, ny, nz, materialEy, materialEz + cdef floattype_t dx, dHy, dHz, IRA, IRA1, RB0, RC0, RE0, RF0, RB1, RC1, RE1, RF1, Psi1, Psi2 + dx = d + nx = xf - xs + ny = yf - ys + nz = zf - zs + + for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): + IRA = 1 / (RA[0, i] + RA[1, i]) + IRA1 = IRA - 1 + RB0 = RB[0, i] + RE0 = RE[0, i] + RF0 = RF[0, i] + RC0 = IRA * RF0 + RB1 = RB[1, i] + RE1 = RE[1, i] + RF1 = RF[1, i] + RC1 = IRA * RF1 + ii = xf - i + for j in range(0, ny): + jj = j + ys + for k in range(0, nz): + kk = k + zs + Psi1 = RB0 * Phi1[0, i, j, k] + RB1 * Phi1[1, i, j, k] + Psi2 = RB0 * Phi2[0, i, j, k] + RB1 * Phi2[1, i, j, k] + # Ey + materialEy = ID[1, ii, jj, kk] + dHz = (Hz[ii, jj, kk] - Hz[ii - 1, jj, kk]) / dx + Ey[ii, jj, kk] = Ey[ii, jj, kk] - updatecoeffsE[materialEy, 4] * (IRA1 * dHz - IRA * Psi1) + Phi1[1, i, j, k] = RE1 * Phi1[1, i, j, k] + RC1 * (dHz - Psi1) + Phi1[0, i, j, k] = RE0 * Phi1[0, i, j, k] + RC0 * (dHz - Psi1) + # Ez + materialEz = ID[2, ii, jj, kk] + dHy = (Hy[ii, jj, kk] - Hy[ii - 1, jj, kk]) / dx + Ez[ii, jj, kk] = Ez[ii, jj, kk] + updatecoeffsE[materialEz, 4] * (IRA1 * dHy - IRA * Psi2) + Phi2[1, i, j, k] = RE1 * Phi2[1, i, j, k] + RC1 * (dHy - Psi2) + Phi2[0, i, j, k] = RE0 * Phi2[0, i, j, k] + RC0 * (dHy - Psi2) + + +cpdef void order1_xplus( + int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int nthreads, + floattype_t[:, ::1] updatecoeffsE, + np.uint32_t[:, :, :, ::1] ID, + floattype_t[:, :, ::1] Ex, + floattype_t[:, :, ::1] Ey, + floattype_t[:, :, ::1] Ez, + floattype_t[:, :, ::1] Hx, + floattype_t[:, :, ::1] Hy, + floattype_t[:, :, ::1] Hz, + floattype_t[:, :, :, ::1] Phi1, + floattype_t[:, :, :, ::1] Phi2, + floattype_t[:, ::1] RA, + floattype_t[:, ::1] RB, + floattype_t[:, ::1] RE, + floattype_t[:, ::1] RF, + float d + ): + """This function updates the Ey and Ez field components for the xplus slab. + + Args: + xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box + nthreads (int): Number of threads to use + updatecoeffs, ID, E, H (memoryviews): Access to update coefficients, ID and field component arrays + Phi, RA, RB, RE, RF (memoryviews): Access to PML coefficient arrays + d (float): Spatial discretisation, e.g. dx, dy or dz + """ + + cdef Py_ssize_t i, j, k, ii, jj, kk + cdef int nx, ny, nz, materialEy, materialEz + cdef floattype_t dx, dHy, dHz, IRA, IRA1, RB0, RC0, RE0, RF0 + dx = d + nx = xf - xs + ny = yf - ys + nz = zf - zs + + for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): + IRA = 1 / RA[0, i] + IRA1 = IRA - 1 + RB0 = RB[0, i] + RE0 = RE[0, i] + RF0 = RF[0, i] + RC0 = IRA * RB0 * RF0 + ii = i + xs + for j in range(0, ny): + jj = j + ys + for k in range(0, nz): + kk = k + zs + # Ey + materialEy = ID[1, ii, jj, kk] + dHz = (Hz[ii, jj, kk] - Hz[ii - 1, jj, kk]) / dx + Ey[ii, jj, kk] = Ey[ii, jj, kk] - updatecoeffsE[materialEy, 4] * (IRA1 * dHz - IRA * Phi1[0, i, j, k]) + Phi1[0, i, j, k] = RE0 * Phi1[0, i, j, k] + RC0 * dHz - RC0 * Phi1[0, i, j, k] + # Ez + materialEz = ID[2, ii, jj, kk] + dHy = (Hy[ii, jj, kk] - Hy[ii - 1, jj, kk]) / dx + Ez[ii, jj, kk] = Ez[ii, jj, kk] + updatecoeffsE[materialEz, 4] * (IRA1 * dHy - IRA * Phi2[0, i, j, k]) + Phi2[0, i, j, k] = RE0 * Phi2[0, i, j, k] + RC0 * dHy - RC0 * Phi2[0, i, j, k] + +cpdef void order2_xplus( + int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int nthreads, + floattype_t[:, ::1] updatecoeffsE, + np.uint32_t[:, :, :, ::1] ID, + floattype_t[:, :, ::1] Ex, + floattype_t[:, :, ::1] Ey, + floattype_t[:, :, ::1] Ez, + floattype_t[:, :, ::1] Hx, + floattype_t[:, :, ::1] Hy, + floattype_t[:, :, ::1] Hz, + floattype_t[:, :, :, ::1] Phi1, + floattype_t[:, :, :, ::1] Phi2, + floattype_t[:, ::1] RA, + floattype_t[:, ::1] RB, + floattype_t[:, ::1] RE, + floattype_t[:, ::1] RF, + float d + ): + """This function updates the Ey and Ez field components for the xplus slab. + + Args: + xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box + nthreads (int): Number of threads to use + updatecoeffs, ID, E, H (memoryviews): Access to update coefficients, ID and field component arrays + Phi, RA, RB, RE, RF (memoryviews): Access to PML coefficient arrays + d (float): Spatial discretisation, e.g. dx, dy or dz + """ + + cdef Py_ssize_t i, j, k, ii, jj, kk + cdef int nx, ny, nz, materialEy, materialEz + cdef floattype_t dx, dHy, dHz, IRA, IRA1, RB0, RC0, RE0, RF0, RB1, RC1, RE1, RF1, Psi1, Psi2 + dx = d + nx = xf - xs + ny = yf - ys + nz = zf - zs + + for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): + IRA = 1 / (RA[0, i] + RA[1, i]) + IRA1 = IRA - 1 + RB0 = RB[0, i] + RE0 = RE[0, i] + RF0 = RF[0, i] + RC0 = IRA * RF0 + RB1 = RB[1, i] + RE1 = RE[1, i] + RF1 = RF[1, i] + RC1 = IRA * RF1 + ii = i + xs + for j in range(0, ny): + jj = j + ys + for k in range(0, nz): + kk = k + zs + Psi1 = RB0 * Phi1[0, i, j, k] + RB1 * Phi1[1, i, j, k] + Psi2 = RB0 * Phi2[0, i, j, k] + RB1 * Phi2[1, i, j, k] + # Ey + materialEy = ID[1, ii, jj, kk] + dHz = (Hz[ii, jj, kk] - Hz[ii - 1, jj, kk]) / dx + Ey[ii, jj, kk] = Ey[ii, jj, kk] - updatecoeffsE[materialEy, 4] * (IRA1 * dHz - IRA * Psi1) + Phi1[1, i, j, k] = RE1 * Phi1[1, i, j, k] + RC1 * (dHz - Psi1) + Phi1[0, i, j, k] = RE0 * Phi1[0, i, j, k] + RC0 * (dHz - Psi1) + # Ez + materialEz = ID[2, ii, jj, kk] + dHy = (Hy[ii, jj, kk] - Hy[ii - 1, jj, kk]) / dx + Ez[ii, jj, kk] = Ez[ii, jj, kk] + updatecoeffsE[materialEz, 4] * (IRA1 * dHy - IRA * Psi2) + Phi2[1, i, j, k] = RE1 * Phi2[1, i, j, k] + RC1 * (dHy - Psi2) + Phi2[0, i, j, k] = RE0 * Phi2[0, i, j, k] + RC0 * (dHy - Psi2) + + +cpdef void order1_yminus( + int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int nthreads, + floattype_t[:, ::1] updatecoeffsE, + np.uint32_t[:, :, :, ::1] ID, + floattype_t[:, :, ::1] Ex, + floattype_t[:, :, ::1] Ey, + floattype_t[:, :, ::1] Ez, + floattype_t[:, :, ::1] Hx, + floattype_t[:, :, ::1] Hy, + floattype_t[:, :, ::1] Hz, + floattype_t[:, :, :, ::1] Phi1, + floattype_t[:, :, :, ::1] Phi2, + floattype_t[:, ::1] RA, + floattype_t[:, ::1] RB, + floattype_t[:, ::1] RE, + floattype_t[:, ::1] RF, + float d + ): + """This function updates the Ex and Ez field components for the yminus slab. + + Args: + xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box + nthreads (int): Number of threads to use + updatecoeffs, ID, E, H (memoryviews): Access to update coefficients, ID and field component arrays + Phi, RA, RB, RE, RF (memoryviews): Access to PML coefficient arrays + d (float): Spatial discretisation, e.g. dx, dy or dz + """ + + cdef Py_ssize_t i, j, k, ii, jj, kk + cdef int nx, ny, nz, materialEx, materialEz + cdef floattype_t dy, dHx, dHz, IRA, IRA1, RB0, RC0, RE0, RF0 + dy = d + nx = xf - xs + ny = yf - ys + nz = zf - zs + + for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): + ii = i + xs + for j in range(0, ny): + jj = yf - j + IRA = 1 / RA[0, j] + IRA1 = IRA - 1 + RB0 = RB[0, j] + RE0 = RE[0, j] + RF0 = RF[0, j] + RC0 = IRA * RB0 * RF0 + for k in range(0, nz): + kk = k + zs + # Ex + materialEx = ID[0, ii, jj, kk] + dHz = (Hz[ii, jj, kk] - Hz[ii, jj - 1, kk]) / dy + Ex[ii, jj, kk] = Ex[ii, jj, kk] + updatecoeffsE[materialEx, 4] * (IRA1 * dHz - IRA * Phi1[0, i, j, k]) + Phi1[0, i, j, k] = RE0 * Phi1[0, i, j, k] + RC0 * dHz - RC0 * Phi1[0, i, j, k] + # Ez + materialEz = ID[2, ii, jj, kk] + dHx = (Hx[ii, jj, kk] - Hx[ii, jj - 1, kk]) / dy + Ez[ii, jj, kk] = Ez[ii, jj, kk] - updatecoeffsE[materialEz, 4] * (IRA1 * dHx - IRA * Phi2[0, i, j, k]) + Phi2[0, i, j, k] = RE0 * Phi2[0, i, j, k] + RC0 * dHx - RC0 * Phi2[0, i, j, k] + +cpdef void order2_yminus( + int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int nthreads, + floattype_t[:, ::1] updatecoeffsE, + np.uint32_t[:, :, :, ::1] ID, + floattype_t[:, :, ::1] Ex, + floattype_t[:, :, ::1] Ey, + floattype_t[:, :, ::1] Ez, + floattype_t[:, :, ::1] Hx, + floattype_t[:, :, ::1] Hy, + floattype_t[:, :, ::1] Hz, + floattype_t[:, :, :, ::1] Phi1, + floattype_t[:, :, :, ::1] Phi2, + floattype_t[:, ::1] RA, + floattype_t[:, ::1] RB, + floattype_t[:, ::1] RE, + floattype_t[:, ::1] RF, + float d + ): + """This function updates the Ex and Ez field components for the yminus slab. + + Args: + xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box + nthreads (int): Number of threads to use + updatecoeffs, ID, E, H (memoryviews): Access to update coefficients, ID and field component arrays + Phi, RA, RB, RE, RF (memoryviews): Access to PML coefficient arrays + d (float): Spatial discretisation, e.g. dx, dy or dz + """ + + cdef Py_ssize_t i, j, k, ii, jj, kk + cdef int nx, ny, nz, materialEx, materialEz + cdef floattype_t dy, dHx, dHz, IRA, IRA1, RB0, RC0, RE0, RF0, RB1, RC1, RE1, RF1, Psi1, Psi2 + dy = d + nx = xf - xs + ny = yf - ys + nz = zf - zs + + for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): + ii = i + xs + for j in range(0, ny): + jj = yf - j + IRA = 1 / (RA[0, j] + RA[1, j]) + IRA1 = IRA - 1 + RB0 = RB[0, j] + RE0 = RE[0, j] + RF0 = RF[0, j] + RC0 = IRA * RF0 + RB1 = RB[1, j] + RE1 = RE[1, j] + RF1 = RF[1, j] + RC1 = IRA * RF1 + for k in range(0, nz): + kk = k + zs + Psi1 = RB0 * Phi1[0, i, j, k] + RB1 * Phi1[1, i, j, k] + Psi2 = RB0 * Phi2[0, i, j, k] + RB1 * Phi2[1, i, j, k] + # Ex + materialEx = ID[0, ii, jj, kk] + dHz = (Hz[ii, jj, kk] - Hz[ii, jj - 1, kk]) / dy + Ex[ii, jj, kk] = Ex[ii, jj, kk] + updatecoeffsE[materialEx, 4] * (IRA1 * dHz - IRA * Psi1) + Phi1[1, i, j, k] = RE1 * Phi1[1, i, j, k] + RC1 * (dHz - Psi1) + Phi1[0, i, j, k] = RE0 * Phi1[0, i, j, k] + RC0 * (dHz - Psi1) + # Ez + materialEz = ID[2, ii, jj, kk] + dHx = (Hx[ii, jj, kk] - Hx[ii, jj - 1, kk]) / dy + Ez[ii, jj, kk] = Ez[ii, jj, kk] - updatecoeffsE[materialEz, 4] * (IRA1 * dHx - IRA * Psi2) + Phi2[1, i, j, k] = RE1 * Phi2[1, i, j, k] + RC1 * (dHx - Psi2) + Phi2[0, i, j, k] = RE0 * Phi2[0, i, j, k] + RC0 * (dHx - Psi2) + + +cpdef void order1_yplus( + int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int nthreads, + floattype_t[:, ::1] updatecoeffsE, + np.uint32_t[:, :, :, ::1] ID, + floattype_t[:, :, ::1] Ex, + floattype_t[:, :, ::1] Ey, + floattype_t[:, :, ::1] Ez, + floattype_t[:, :, ::1] Hx, + floattype_t[:, :, ::1] Hy, + floattype_t[:, :, ::1] Hz, + floattype_t[:, :, :, ::1] Phi1, + floattype_t[:, :, :, ::1] Phi2, + floattype_t[:, ::1] RA, + floattype_t[:, ::1] RB, + floattype_t[:, ::1] RE, + floattype_t[:, ::1] RF, + float d + ): + """This function updates the Ex and Ez field components for the yplus slab. + + Args: + xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box + nthreads (int): Number of threads to use + updatecoeffs, ID, E, H (memoryviews): Access to update coefficients, ID and field component arrays + Phi, RA, RB, RE, RF (memoryviews): Access to PML coefficient arrays + d (float): Spatial discretisation, e.g. dx, dy or dz + """ + + cdef Py_ssize_t i, j, k, ii, jj, kk + cdef int nx, ny, nz, materialEx, materialEz + cdef floattype_t dy, dHx, dHz, IRA, IRA1, RB0, RC0, RE0, RF0 + dy = d + nx = xf - xs + ny = yf - ys + nz = zf - zs + + for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): + ii = i + xs + for j in range(0, ny): + jj = j + ys + IRA = 1 / RA[0, j] + IRA1 = IRA - 1 + RB0 = RB[0, j] + RE0 = RE[0, j] + RF0 = RF[0, j] + RC0 = IRA * RB0 * RF0 + for k in range(0, nz): + kk = k + zs + # Ex + materialEx = ID[0, ii, jj, kk] + dHz = (Hz[ii, jj, kk] - Hz[ii, jj - 1, kk]) / dy + Ex[ii, jj, kk] = Ex[ii, jj, kk] + updatecoeffsE[materialEx, 4] * (IRA1 * dHz - IRA * Phi1[0, i, j, k]) + Phi1[0, i, j, k] = RE0 * Phi1[0, i, j, k] + RC0 * dHz - RC0 * Phi1[0, i, j, k] + # Ez + materialEz = ID[2, ii, jj, kk] + dHx = (Hx[ii, jj, kk] - Hx[ii, jj - 1, kk]) / dy + Ez[ii, jj, kk] = Ez[ii, jj, kk] - updatecoeffsE[materialEz, 4] * (IRA1 * dHx - IRA * Phi2[0, i, j, k]) + Phi2[0, i, j, k] = RE0 * Phi2[0, i, j, k] + RC0 * dHx - RC0 * Phi2[0, i, j, k] + +cpdef void order2_yplus( + int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int nthreads, + floattype_t[:, ::1] updatecoeffsE, + np.uint32_t[:, :, :, ::1] ID, + floattype_t[:, :, ::1] Ex, + floattype_t[:, :, ::1] Ey, + floattype_t[:, :, ::1] Ez, + floattype_t[:, :, ::1] Hx, + floattype_t[:, :, ::1] Hy, + floattype_t[:, :, ::1] Hz, + floattype_t[:, :, :, ::1] Phi1, + floattype_t[:, :, :, ::1] Phi2, + floattype_t[:, ::1] RA, + floattype_t[:, ::1] RB, + floattype_t[:, ::1] RE, + floattype_t[:, ::1] RF, + float d + ): + """This function updates the Ex and Ez field components for the yplus slab. + + Args: + xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box + nthreads (int): Number of threads to use + updatecoeffs, ID, E, H (memoryviews): Access to update coefficients, ID and field component arrays + Phi, RA, RB, RE, RF (memoryviews): Access to PML coefficient arrays + d (float): Spatial discretisation, e.g. dx, dy or dz + """ + + cdef Py_ssize_t i, j, k, ii, jj, kk + cdef int nx, ny, nz, materialEx, materialEz + cdef floattype_t dy, dHx, dHz, IRA, IRA1, RB0, RC0, RE0, RF0, RB1, RC1, RE1, RF1, Psi1, Psi2 + dy = d + nx = xf - xs + ny = yf - ys + nz = zf - zs + + for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): + ii = i + xs + for j in range(0, ny): + jj = j + ys + IRA = 1 / (RA[0, j] + RA[1, j]) + IRA1 = IRA - 1 + RB0 = RB[0, j] + RE0 = RE[0, j] + RF0 = RF[0, j] + RC0 = IRA * RF0 + RB1 = RB[1, j] + RE1 = RE[1, j] + RF1 = RF[1, j] + RC1 = IRA * RF1 + for k in range(0, nz): + kk = k + zs + Psi1 = RB0 * Phi1[0, i, j, k] + RB1 * Phi1[1, i, j, k] + Psi2 = RB0 * Phi2[0, i, j, k] + RB1 * Phi2[1, i, j, k] + # Ex + materialEx = ID[0, ii, jj, kk] + dHz = (Hz[ii, jj, kk] - Hz[ii, jj - 1, kk]) / dy + Ex[ii, jj, kk] = Ex[ii, jj, kk] + updatecoeffsE[materialEx, 4] * (IRA1 * dHz - IRA * Psi1) + Phi1[1, i, j, k] = RE1 * Phi1[1, i, j, k] + RC1 * (dHz - Psi1) + Phi1[0, i, j, k] = RE0 * Phi1[0, i, j, k] + RC0 * (dHz - Psi1) + # Ez + materialEz = ID[2, ii, jj, kk] + dHx = (Hx[ii, jj, kk] - Hx[ii, jj - 1, kk]) / dy + Ez[ii, jj, kk] = Ez[ii, jj, kk] - updatecoeffsE[materialEz, 4] * (IRA1 * dHx - IRA * Psi2) + Phi2[1, i, j, k] = RE1 * Phi2[1, i, j, k] + RC1 * (dHx - Psi2) + Phi2[0, i, j, k] = RE0 * Phi2[0, i, j, k] + RC0 * (dHx - Psi2) + + +cpdef void order1_zminus( + int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int nthreads, + floattype_t[:, ::1] updatecoeffsE, + np.uint32_t[:, :, :, ::1] ID, + floattype_t[:, :, ::1] Ex, + floattype_t[:, :, ::1] Ey, + floattype_t[:, :, ::1] Ez, + floattype_t[:, :, ::1] Hx, + floattype_t[:, :, ::1] Hy, + floattype_t[:, :, ::1] Hz, + floattype_t[:, :, :, ::1] Phi1, + floattype_t[:, :, :, ::1] Phi2, + floattype_t[:, ::1] RA, + floattype_t[:, ::1] RB, + floattype_t[:, ::1] RE, + floattype_t[:, ::1] RF, + float d + ): + """This function updates the Ex and Ey field components for the zminus slab. + + Args: + xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box + nthreads (int): Number of threads to use + updatecoeffs, ID, E, H (memoryviews): Access to update coefficients, ID and field component arrays + Phi, RA, RB, RE, RF (memoryviews): Access to PML coefficient arrays + d (float): Spatial discretisation, e.g. dx, dy or dz + """ + + cdef Py_ssize_t i, j, k, ii, jj, kk + cdef int nx, ny, nz, materialEx, materialEy + cdef floattype_t dz, dHx, dHy, IRA, IRA1, RB0, RC0, RE0, RF0 + dz = d + nx = xf - xs + ny = yf - ys + nz = zf - zs + + for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): + ii = i + xs + for j in range(0, ny): + jj = j + ys + for k in range(0, nz): + kk = zf - k + IRA = 1 / RA[0, k] + IRA1 = IRA - 1 + RB0 = RB[0, k] + RE0 = RE[0, k] + RF0 = RF[0, k] + RC0 = IRA * RB0 * RF0 + # Ex + materialEx = ID[0, ii, jj, kk] + dHy = (Hy[ii, jj, kk] - Hy[ii, jj, kk - 1]) / dz + Ex[ii, jj, kk] = Ex[ii, jj, kk] - updatecoeffsE[materialEx, 4] * (IRA1 * dHy - IRA * Phi1[0, i, j, k]) + Phi1[0, i, j, k] = RE0 * Phi1[0, i, j, k] + RC0 * dHy - RC0 * Phi1[0, i, j, k] + # Ey + materialEy = ID[1, ii, jj, kk] + dHx = (Hx[ii, jj, kk] - Hx[ii, jj, kk - 1]) / dz + Ey[ii, jj, kk] = Ey[ii, jj, kk] + updatecoeffsE[materialEy, 4] * (IRA1 * dHx - IRA * Phi2[0, i, j, k]) + Phi2[0, i, j, k] = RE0 * Phi2[0, i, j, k] + RC0 * dHx - RC0 * Phi2[0, i, j, k] + +cpdef void order2_zminus( + int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int nthreads, + floattype_t[:, ::1] updatecoeffsE, + np.uint32_t[:, :, :, ::1] ID, + floattype_t[:, :, ::1] Ex, + floattype_t[:, :, ::1] Ey, + floattype_t[:, :, ::1] Ez, + floattype_t[:, :, ::1] Hx, + floattype_t[:, :, ::1] Hy, + floattype_t[:, :, ::1] Hz, + floattype_t[:, :, :, ::1] Phi1, + floattype_t[:, :, :, ::1] Phi2, + floattype_t[:, ::1] RA, + floattype_t[:, ::1] RB, + floattype_t[:, ::1] RE, + floattype_t[:, ::1] RF, + float d + ): + """This function updates the Ex and Ey field components for the zminus slab. + + Args: + xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box + nthreads (int): Number of threads to use + updatecoeffs, ID, E, H (memoryviews): Access to update coefficients, ID and field component arrays + Phi, RA, RB, RE, RF (memoryviews): Access to PML coefficient arrays + d (float): Spatial discretisation, e.g. dx, dy or dz + """ + + cdef Py_ssize_t i, j, k, ii, jj, kk + cdef int nx, ny, nz, materialEx, materialEy + cdef floattype_t dz, dHx, dHy, IRA, IRA1, RB0, RC0, RE0, RF0, RB1, RC1, RE1, RF1, Psi1, Psi2 + dz = d + nx = xf - xs + ny = yf - ys + nz = zf - zs + + for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): + ii = i + xs + for j in range(0, ny): + jj = j + ys + for k in range(0, nz): + kk = zf - k + IRA = 1 / (RA[0, k] + RA[1, k]) + IRA1 = IRA - 1 + RB0 = RB[0, k] + RE0 = RE[0, k] + RF0 = RF[0, k] + RC0 = IRA * RF0 + RB1 = RB[1, k] + RE1 = RE[1, k] + RF1 = RF[1, k] + RC1 = IRA * RF1 + Psi1 = RB0 * Phi1[0, i, j, k] + RB1 * Phi1[1, i, j, k] + Psi2 = RB0 * Phi2[0, i, j, k] + RB1 * Phi2[1, i, j, k] + # Ex + materialEx = ID[0, ii, jj, kk] + dHy = (Hy[ii, jj, kk] - Hy[ii, jj, kk - 1]) / dz + Ex[ii, jj, kk] = Ex[ii, jj, kk] - updatecoeffsE[materialEx, 4] * (IRA1 * dHy - IRA * Psi1) + Phi1[1, i, j, k] = RE1 * Phi1[1, i, j, k] + RC1 * (dHy - Psi1) + Phi1[0, i, j, k] = RE0 * Phi1[0, i, j, k] + RC0 * (dHy - Psi1) + # Ey + materialEy = ID[1, ii, jj, kk] + dHx = (Hx[ii, jj, kk] - Hx[ii, jj, kk - 1]) / dz + Ey[ii, jj, kk] = Ey[ii, jj, kk] + updatecoeffsE[materialEy, 4] * (IRA1 * dHx - IRA * Psi2) + Phi2[1, i, j, k] = RE1 * Phi2[1, i, j, k] + RC1 * (dHx - Psi2) + Phi2[0, i, j, k] = RE0 * Phi2[0, i, j, k] + RC0 * (dHx - Psi2) + + +cpdef void order1_zplus( + int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int nthreads, + floattype_t[:, ::1] updatecoeffsE, + np.uint32_t[:, :, :, ::1] ID, + floattype_t[:, :, ::1] Ex, + floattype_t[:, :, ::1] Ey, + floattype_t[:, :, ::1] Ez, + floattype_t[:, :, ::1] Hx, + floattype_t[:, :, ::1] Hy, + floattype_t[:, :, ::1] Hz, + floattype_t[:, :, :, ::1] Phi1, + floattype_t[:, :, :, ::1] Phi2, + floattype_t[:, ::1] RA, + floattype_t[:, ::1] RB, + floattype_t[:, ::1] RE, + floattype_t[:, ::1] RF, + float d + ): + """This function updates the Ex and Ey field components for the zplus slab. + + Args: + xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box + nthreads (int): Number of threads to use + updatecoeffs, ID, E, H (memoryviews): Access to update coefficients, ID and field component arrays + Phi, RA, RB, RE, RF (memoryviews): Access to PML coefficient arrays + d (float): Spatial discretisation, e.g. dx, dy or dz + """ + + cdef Py_ssize_t i, j, k, ii, jj, kk + cdef int nx, ny, nz, materialEx, materialEy + cdef floattype_t dz, dHx, dHy, IRA, IRA1, RB0, RC0, RE0, RF0 + dz = d + nx = xf - xs + ny = yf - ys + nz = zf - zs + + for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): + ii = i + xs + for j in range(0, ny): + jj = j + ys + for k in range(0, nz): + kk = k + zs + IRA = 1 / RA[0, k] + IRA1 = IRA - 1 + RB0 = RB[0, k] + RE0 = RE[0, k] + RF0 = RF[0, k] + RC0 = IRA * RB0 * RF0 + # Ex + materialEx = ID[0, ii, jj, kk] + dHy = (Hy[ii, jj, kk] - Hy[ii, jj, kk - 1]) / dz + Ex[ii, jj, kk] = Ex[ii, jj, kk] - updatecoeffsE[materialEx, 4] * (IRA1 * dHy - IRA * Phi1[0, i, j, k]) + Phi1[0, i, j, k] = RE0 * Phi1[0, i, j, k] + RC0 * dHy - RC0 * Phi1[0, i, j, k] + # Ey + materialEy = ID[1, ii, jj, kk] + dHx = (Hx[ii, jj, kk] - Hx[ii, jj, kk - 1]) / dz + Ey[ii, jj, kk] = Ey[ii, jj, kk] + updatecoeffsE[materialEy, 4] * (IRA1 * dHx - IRA * Phi2[0, i, j, k]) + Phi2[0, i, j, k] = RE0 * Phi2[0, i, j, k] + RC0 * dHx - RC0 * Phi2[0, i, j, k] + +cpdef void order2_zplus( + int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int nthreads, + floattype_t[:, ::1] updatecoeffsE, + np.uint32_t[:, :, :, ::1] ID, + floattype_t[:, :, ::1] Ex, + floattype_t[:, :, ::1] Ey, + floattype_t[:, :, ::1] Ez, + floattype_t[:, :, ::1] Hx, + floattype_t[:, :, ::1] Hy, + floattype_t[:, :, ::1] Hz, + floattype_t[:, :, :, ::1] Phi1, + floattype_t[:, :, :, ::1] Phi2, + floattype_t[:, ::1] RA, + floattype_t[:, ::1] RB, + floattype_t[:, ::1] RE, + floattype_t[:, ::1] RF, + float d + ): + """This function updates the Ex and Ey field components for the zplus slab. + + Args: + xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box + nthreads (int): Number of threads to use + updatecoeffs, ID, E, H (memoryviews): Access to update coefficients, ID and field component arrays + Phi, RA, RB, RE, RF (memoryviews): Access to PML coefficient arrays + d (float): Spatial discretisation, e.g. dx, dy or dz + """ + + cdef Py_ssize_t i, j, k, ii, jj, kk + cdef int nx, ny, nz, materialEx, materialEy + cdef floattype_t dz, dHx, dHy, IRA, IRA1, RB0, RC0, RE0, RF0, RB1, RC1, RE1, RF1, Psi1, Psi2 + dz = d + nx = xf - xs + ny = yf - ys + nz = zf - zs + + for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): + ii = i + xs + for j in range(0, ny): + jj = j + ys + for k in range(0, nz): + kk = k + zs + IRA = 1 / (RA[0, k] + RA[1, k]) + IRA1 = IRA - 1 + RB0 = RB[0, k] + RE0 = RE[0, k] + RF0 = RF[0, k] + RC0 = IRA * RF0 + RB1 = RB[1, k] + RE1 = RE[1, k] + RF1 = RF[1, k] + RC1 = IRA * RF1 + Psi1 = RB0 * Phi1[0, i, j, k] + RB1 * Phi1[1, i, j, k] + Psi2 = RB0 * Phi2[0, i, j, k] + RB1 * Phi2[1, i, j, k] + # Ex + materialEx = ID[0, ii, jj, kk] + dHy = (Hy[ii, jj, kk] - Hy[ii, jj, kk - 1]) / dz + Ex[ii, jj, kk] = Ex[ii, jj, kk] - updatecoeffsE[materialEx, 4] * (IRA1 * dHy - IRA * Psi1) + Phi1[1, i, j, k] = RE1 * Phi1[1, i, j, k] + RC1 * (dHy - Psi1) + Phi1[0, i, j, k] = RE0 * Phi1[0, i, j, k] + RC0 * (dHy - Psi1) + # Ey + materialEy = ID[1, ii, jj, kk] + dHx = (Hx[ii, jj, kk] - Hx[ii, jj, kk - 1]) / dz + Ey[ii, jj, kk] = Ey[ii, jj, kk] + updatecoeffsE[materialEy, 4] * (IRA1 * dHx - IRA * Psi2) + Phi2[1, i, j, k] = RE1 * Phi2[1, i, j, k] + RC1 * (dHx - Psi2) + Phi2[0, i, j, k] = RE0 * Phi2[0, i, j, k] + RC0 * (dHx - Psi2) diff --git a/gprMax/pml_updates/pml_updates_electric_MRIPML_gpu.py b/gprMax/pml_updates/pml_updates_electric_MRIPML_gpu.py new file mode 100644 index 00000000..2712e8bc --- /dev/null +++ b/gprMax/pml_updates/pml_updates_electric_MRIPML_gpu.py @@ -0,0 +1,1030 @@ +# Copyright (C) 2015-2019: The University of Edinburgh +# Authors: Craig Warren and Antonis Giannopoulos +# +# This file is part of gprMax. +# +# gprMax is free software: you can redistribute it and/or modify +# it under the terms of the GNU GenRAl Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# gprMax is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU GenRAl Public License for more details. +# +# You should have received a copy of the GNU GenRAl Public License +# along with gprMax. If not, see . + +from string import Template + +kernels_template_pml_electric_MRIPML = Template(""" + +// Macros for converting subscripts to linear index: +#define INDEX2D_R(m, n) (m)*($NY_R)+(n) +#define INDEX2D_MAT(m, n) (m)*($NY_MATCOEFFS)+(n) +#define INDEX3D_FIELDS(i, j, k) (i)*($NY_FIELDS)*($NZ_FIELDS)+(j)*($NZ_FIELDS)+(k) +#define INDEX4D_ID(p, i, j, k) (p)*($NX_ID)*($NY_ID)*($NZ_ID)+(i)*($NY_ID)*($NZ_ID)+(j)*($NZ_ID)+(k) +#define INDEX4D_PHI1(p, i, j, k) (p)*(NX_PHI1)*(NY_PHI1)*(NZ_PHI1)+(i)*(NY_PHI1)*(NZ_PHI1)+(j)*(NZ_PHI1)+(k) +#define INDEX4D_PHI2(p, i, j, k) (p)*(NX_PHI2)*(NY_PHI2)*(NZ_PHI2)+(i)*(NY_PHI2)*(NZ_PHI2)+(j)*(NZ_PHI2)+(k) + +// Material coefficients (read-only) in constant memory (64KB) +__device__ __constant__ $REAL updatecoeffsE[$N_updatecoeffsE]; + + +__global__ void order1_xminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, $REAL *Ey, $REAL *Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) { + + // This function updates the Ey and Ez field components for the xminus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_EPHI, NY_EPHI, NZ_EPHI: Dimensions of PHI1 and PHI2 PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current thread + int idx = blockIdx.x * blockDim.x + threadIdx.x; + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL IRA, IRA1, RB0, RC0, RE0, RF0, dHy, dHz; + $REAL dx = d; + int ii, jj, kk, materialEy, materialEz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = xf - i1; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + IRA = 1 / RA[INDEX2D_R(0,i1)]; + IRA1 = IRA - 1; + RB0 = RB[INDEX2D_R(0,i1)]; + RE0 = RE[INDEX2D_R(0,i1)]; + RF0 = RF[INDEX2D_R(0,i1)]; + RC0 = IRA * RB0 * RF0; + + // Ey + materialEy = ID[INDEX4D_ID(1,ii,jj,kk)]; + dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx; + Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (IRA1 * dHz - IRA * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RC0 * dHz - RC0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]); + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = xf - i2; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + IRA = 1 / RA[INDEX2D_R(0,i2)]; + IRA1 = IRA - 1; + RB0 = RB[INDEX2D_R(0,i2)]; + RE0 = RE[INDEX2D_R(0,i2)]; + RF0 = RF[INDEX2D_R(0,i2)]; + RC0 = IRA * RB0 * RF0; + + // Ez + materialEz = ID[INDEX4D_ID(2,ii,jj,kk)]; + dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx; + Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (IRA1 * dHy - IRA * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RC0 * dHy - RC0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]; + } +} + + +__global__ void order2_xminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, $REAL *Ey, $REAL *Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) { + + // This function updates the Ey and Ez field components for the xminus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_EPHI, NY_EPHI, NZ_EPHI: Dimensions of PHI1 and PHI2 PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current thread + int idx = blockIdx.x * blockDim.x + threadIdx.x; + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL IRA, IRA1, RB0, RC0, RE0, RF0, RB1, RC0, RE1, RF1, Psi1, Psi2, dHy, dHz; + $REAL dx = d; + int ii, jj, kk, materialEy, materialEz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = xf - i1; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + IRA = 1 / (RA[INDEX2D_R(0,i1)] + RA[INDEX2D_R(1,i1)]); + IRA1 = IRA - 1; + RB0 = RB[INDEX2D_R(0,i1)]; + RE0 = RE[INDEX2D_R(0,i1)]; + RF0 = RF[INDEX2D_R(0,i1)]; + RC0 = IRA * RF0; + RB1 = RB[INDEX2D_R(1,i1)]; + RE1 = RE[INDEX2D_R(1,i1)]; + RF1 = RF[INDEX2D_R(1,i1)]; + RC1 = IRA * RF1; + + // Ey + Psi1 = RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]; + materialEy = ID[INDEX4D_ID(1,ii,jj,kk)]; + dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx; + Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (IRA1 * dHz - IRA * Psi1; + PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] + RC1 * (dHz - Psi1); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RC0 * (dHz - Psi1); + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = xf - i2; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + IRA = 1 / (RA[INDEX2D_R(0,i2)] + RA[INDEX2D_R(1,i2)]); + IRA1 = IRA - 1; + RB0 = RB[INDEX2D_R(0,i2)]; + RE0 = RE[INDEX2D_R(0,i2)]; + RF0 = RF[INDEX2D_R(0,i2)]; + RC0 = IRA * RF0; + RB1 = RB[INDEX2D_R(1,i2)]; + RE1 = RE[INDEX2D_R(1,i2)]; + RF1 = RF[INDEX2D_R(1,i2)]; + RC1 = IRA * RF1; + + // Ez + Psi2 = RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]; + materialEz = ID[INDEX4D_ID(2,ii,jj,kk)]; + dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx; + Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (IRA1 * dHy - IRA * Psi2; + PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] + RC1 * (dHy - Psi2); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RC0 * (dHy - Psi2); + } +} + + +__global__ void order1_xplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, $REAL *Ey, $REAL *Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) { + + // This function updates the Ey and Ez field components for the xplus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_EPHI, NY_EPHI, NZ_EPHI: Dimensions of PHI1 and PHI2 PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current thread + int idx = blockIdx.x * blockDim.x + threadIdx.x; + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL IRA, IRA1, RB0, RC0, RE0, RF0, dHy, dHz; + $REAL dx = d; + int ii, jj, kk, materialEy, materialEz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + IRA = 1 / RA[INDEX2D_R(0,i1)]; + IRA1 = IRA - 1; + RB0 = RB[INDEX2D_R(0,i1)]; + RE0 = RE[INDEX2D_R(0,i1)]; + RF0 = RF[INDEX2D_R(0,i1)]; + RC0 = IRA * RB0 * RF0; + + // Ey + materialEy = ID[INDEX4D_ID(1,ii,jj,kk)]; + dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx; + Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (IRA1 * dHz - IRA * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RC0 * dHz - RC0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + IRA = 1 / RA[INDEX2D_R(0,i2)]; + IRA1 = IRA - 1; + RB0 = RB[INDEX2D_R(0,i2)]; + RE0 = RE[INDEX2D_R(0,i2)]; + RF0 = RF[INDEX2D_R(0,i2)]; + RC0 = IRA * RB0 * RF0; + + // Ez + materialEz = ID[INDEX4D_ID(2,ii,jj,kk)]; + dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx; + Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (IRA1 * dHy - IRA * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RC0 * dHy - RC0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]; + } +} + + +__global__ void order2_xplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, $REAL *Ey, $REAL *Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) { + + // This function updates the Ey and Ez field components for the xplus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_EPHI, NY_EPHI, NZ_EPHI: Dimensions of PHI1 and PHI2 PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current thread + int idx = blockIdx.x * blockDim.x + threadIdx.x; + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL IRA, IRA1, RB0, RC0, RE0, RF0, RB1, RC0, RE1, RF1, Psi1, Psi2, dHy, dHz; + $REAL dx = d; + int ii, jj, kk, materialEy, materialEz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + IRA = 1 / (RA[INDEX2D_R(0,i1)] + RA[INDEX2D_R(1,i1)]); + IRA1 = IRA - 1; + RB0 = RB[INDEX2D_R(0,i1)]; + RE0 = RE[INDEX2D_R(0,i1)]; + RF0 = RF[INDEX2D_R(0,i1)]; + RC0 = IRA * RF0; + RB1 = RB[INDEX2D_R(1,i1)]; + RE1 = RE[INDEX2D_R(1,i1)]; + RF1 = RF[INDEX2D_R(1,i1)]; + RC1 = IRA * RF1; + + // Ey + Psi1 = RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]; + materialEy = ID[INDEX4D_ID(1,ii,jj,kk)]; + dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx; + Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (IRA1 * dHz - IRA * Psi1); + PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] + RC1 * (dHz - Psi1); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RC0 * (dHz - Psi1); + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + IRA = 1 / (RA[INDEX2D_R(0,i2)] + RA[INDEX2D_R(1,i2)]); + IRA1 = IRA - 1; + RB0 = RB[INDEX2D_R(0,i2)]; + RE0 = RE[INDEX2D_R(0,i2)]; + RF0 = RF[INDEX2D_R(0,i2)]; + RC0 = IRA * RF0; + RB1 = RB[INDEX2D_R(1,i2)]; + RE1 = RE[INDEX2D_R(1,i2)]; + RF1 = RF[INDEX2D_R(1,i2)]; + RC1 = IRA * RF1; + + // Ez + Psi2 = RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]; + materialEz = ID[INDEX4D_ID(2,ii,jj,kk)]; + dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx; + Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (IRA1 * dHy - IRA * Psi2); + PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] + RC1 * (dHy - Psi2); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RC0 * (dHy - Psi2); + } +} + + +__global__ void order1_yminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, const unsigned int* __restrict__ ID, $REAL *Ex, const $REAL* __restrict__ Ey, $REAL *Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) { + + // This function updates the Ex and Ez field components for the yminus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_EPHI, NY_EPHI, NZ_EPHI: Dimensions of PHI1 and PHI2 PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current thread + int idx = blockIdx.x * blockDim.x + threadIdx.x; + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL IRA, IRA1, RB0, RC0, RE0, RF0, dHx, dHz; + $REAL dy = d; + int ii, jj, kk, materialEx, materialEz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = yf - j1; + kk = k1 + zs; + + // PML coefficients + IRA = 1 / RA[INDEX2D_R(0,j1)]; + IRA1 = IRA - 1; + RB0 = RB[INDEX2D_R(0,j1)]; + RE0 = RE[INDEX2D_R(0,j1)]; + RF0 = RF[INDEX2D_R(0,j1)]; + RC0 = IRA * RB0 * RF0; + + // Ex + materialEx = ID[INDEX4D_ID(0,ii,jj,kk)]; + dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy; + Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (IRA1 * dHz + IRA * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RC0 * dHz - RC0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = yf - j2; + kk = k2 + zs; + + // PML coefficients + IRA = 1 / RA[INDEX2D_R(0,j2)]; + IRA1 = IRA - 1; + RB0 = RB[INDEX2D_R(0,j2)]; + RE0 = RE[INDEX2D_R(0,j2)]; + RF0 = RF[INDEX2D_R(0,j2)]; + RC0 = IRA * RB0 * RF0; + + // Ez + materialEz = ID[INDEX4D_ID(2,ii,jj,kk)]; + dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy; + Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (IRA1 * dHx - IRA * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RC0 * dHx - RC0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]; + } +} + + +__global__ void order2_yminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, const unsigned int* __restrict__ ID, $REAL *Ex, const $REAL* __restrict__ Ey, $REAL *Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) { + + // This function updates the Ex and Ez field components for the yminus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_EPHI, NY_EPHI, NZ_EPHI: Dimensions of PHI1 and PHI2 PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current thread + int idx = blockIdx.x * blockDim.x + threadIdx.x; + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL IRA, IRA1, RB0, RC0, RE0, RF0, RB1, RC0, RE1, RF1, Psi1, Psi2, dHx, dHz; + $REAL dy = d; + int ii, jj, kk, materialEx, materialEz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = yf - j1; + kk = k1 + zs; + + // PML coefficients + IRA = 1 / (RA[INDEX2D_R(0,j1)] + RA[INDEX2D_R(1,j1)]); + IRA1 = IRA - 1; + RB0 = RB[INDEX2D_R(0,j1)]; + RE0 = RE[INDEX2D_R(0,j1)]; + RF0 = RF[INDEX2D_R(0,j1)]; + RC0 = IRA * RF0; + RB1 = RB[INDEX2D_R(1,j1)]; + RE1 = RE[INDEX2D_R(1,j1)]; + RF1 = RF[INDEX2D_R(1,j1)]; + RC1 = IRA * RF1; + + // Ex + Psi1 = RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]; + materialEx = ID[INDEX4D_ID(0,ii,jj,kk)]; + dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy; + Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (IRA1 * dHz - IRA * Psi1); + PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] + RC1 * (dHz - Psi1); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RC0 * (dHz - Psi1); + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = yf - j2; + kk = k2 + zs; + + // PML coefficients + IRA = 1 / (RA[INDEX2D_R(0,j2)] + RA[INDEX2D_R(1,j2)]); + IRA1 = IRA - 1; + RB0 = RB[INDEX2D_R(0,j2)]; + RE0 = RE[INDEX2D_R(0,j2)]; + RF0 = RF[INDEX2D_R(0,j2)]; + RC0 = IRA * RF0; + RB1 = RB[INDEX2D_R(1,j2)]; + RE1 = RE[INDEX2D_R(1,j2)]; + RF1 = RF[INDEX2D_R(1,j2)]; + RC1 = IRA * RF1; + + // Ez + Psi2 = RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]; + materialEz = ID[INDEX4D_ID(2,ii,jj,kk)]; + dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy; + Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (IRA1 * dHx - IRA * Psi2); + PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] + RC1 * (dHx - Psi2); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RC0 * (dHx - Psi2); + } +} + + +__global__ void order1_yplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, const unsigned int* __restrict__ ID, $REAL *Ex, const $REAL* __restrict__ Ey, $REAL *Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) { + + // This function updates the Ex and Ez field components for the yplus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_EPHI, NY_EPHI, NZ_EPHI: Dimensions of PHI1 and PHI2 PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current thread + int idx = blockIdx.x * blockDim.x + threadIdx.x; + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL IRA, IRA1, RB0, RC0, RE0, RF0, dHx, dHz; + $REAL dy = d; + int ii, jj, kk, materialEx, materialEz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + IRA = 1 / RA[INDEX2D_R(0,j1)]; + IRA1 = IRA - 1; + RB0 = RB[INDEX2D_R(0,j1)]; + RE0 = RE[INDEX2D_R(0,j1)]; + RF0 = RF[INDEX2D_R(0,j1)]; + RC0 = IRA * RB0 * RF0; + + // Ex + materialEx = ID[INDEX4D_ID(0,ii,jj,kk)]; + dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy; + Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (IRA1 * dHz - IRA * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RC0 * dHz - RC0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + IRA = 1 / RA[INDEX2D_R(0,j2)]; + IRA1 = IRA - 1; + RB0 = RB[INDEX2D_R(0,j2)]; + RE0 = RE[INDEX2D_R(0,j2)]; + RF0 = RF[INDEX2D_R(0,j2)]; + RC0 = IRA * RB0 * RF0; + + // Ez + materialEz = ID[INDEX4D_ID(2,ii,jj,kk)]; + dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy; + Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (IRA1 * dHx - IRA * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RC0 * dHx - RC0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]; + } +} + + +__global__ order2_yplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, const unsigned int* __restrict__ ID, $REAL *Ex, const $REAL* __restrict__ Ey, $REAL *Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) { + + // This function updates the Ex and Ez field components for the yplus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_EPHI, NY_EPHI, NZ_EPHI: Dimensions of PHI1 and PHI2 PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current thread + int idx = blockIdx.x * blockDim.x + threadIdx.x; + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL IRA, IRA1, RB0, RC0, RE0, RF0, RB1, RC0, RE1, RF1, Psi1, Psi2, dHx, dHz; + $REAL dy = d; + int ii, jj, kk, materialEx, materialEz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + IRA = 1 / (RA[INDEX2D_R(0,j1)] + RA[INDEX2D_R(1,j1)]); + IRA1 = IRA - 1; + RB0 = RB[INDEX2D_R(0,j1)]; + RE0 = RE[INDEX2D_R(0,j1)]; + RF0 = RF[INDEX2D_R(0,j1)]; + RC0 = IRA * RF0; + RB1 = RB[INDEX2D_R(1,j1)]; + RE1 = RE[INDEX2D_R(1,j1)]; + RF1 = RF[INDEX2D_R(1,j1)]; + RC1 = IRA * RF1; + + // Ex + Psi1 = RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]; + materialEx = ID[INDEX4D_ID(0,ii,jj,kk)]; + dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy; + Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (IRA1 * dHz - IRA * Psi1); + PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] + RC1 * (dHz - Psi1); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RC0 * (dHz - Psi1); + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + IRA = 1 / (RA[INDEX2D_R(0,j2)] + RA[INDEX2D_R(1,j2)]); + IRA1 = IRA - 1; + RB0 = RB[INDEX2D_R(0,j2)]; + RE0 = RE[INDEX2D_R(0,j2)]; + RF0 = RF[INDEX2D_R(0,j2)]; + RC0 = IRA * RF0; + RB1 = RB[INDEX2D_R(1,j2)]; + RE1 = RE[INDEX2D_R(1,j2)]; + RF1 = RF[INDEX2D_R(1,j2)]; + RC1 = IRA * RF1; + + // Ez + Psi2 = RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]; + materialEz = ID[INDEX4D_ID(2,ii,jj,kk)]; + dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy; + Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (IRA1 * dHx - IRA * Psi2); + PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] + RC1 * (dHx - Psi2); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RC0 * (dHx - Psi2); + } +} + + +__global__ void order1_zminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, const unsigned int* __restrict__ ID, $REAL *Ex, $REAL *Ey, const $REAL* __restrict__ Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) { + + // This function updates the Ex and Ey field components for the zminus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_EPHI, NY_EPHI, NZ_EPHI: Dimensions of PHI1 and PHI2 PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current thread + int idx = blockIdx.x * blockDim.x + threadIdx.x; + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL IRA, IRA1, RB0, RC0, RE0, RF0, dHx, dHy; + $REAL dz = d; + int ii, jj, kk, materialEx, materialEy; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = zf - k1; + + // PML coefficients + IRA = 1 / RA[INDEX2D_R(0,k1)]; + IRA1 = IRA - 1; + RB0 = RB[INDEX2D_R(0,k1)]; + RE0 = RE[INDEX2D_R(0,k1)]; + RF0 = RF[INDEX2D_R(0,k1)]; + RC0 = IRA * RB0 * RF0; + + // Ex + materialEx = ID[INDEX4D_ID(0,ii,jj,kk)]; + dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz; + Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (IRA1 * dHy - IRA * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RC0 * dHy - RC0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + xs; + kk = zf - k2; + + // PML coefficients + IRA = 1 / RA[INDEX2D_R(0,k2)]; + IRA1 = IRA - 1; + RB0 = RB[INDEX2D_R(0,k2)]; + RE0 = RE[INDEX2D_R(0,k2)]; + RF0 = RF[INDEX2D_R(0,k2)]; + RC0 = IRA * RB0 * RF0; + + // Ey + materialEy = ID[INDEX4D_ID(1,ii,jj,kk)]; + dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz; + Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (IRA1 * dHx - IRA * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RC0 * dHx - RC0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]; + } +} + + +__global__ void order2_zminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, const unsigned int* __restrict__ ID, $REAL *Ex, $REAL *Ey, const $REAL* __restrict__ Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) { + + // This function updates the Ex and Ey field components for the zminus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_EPHI, NY_EPHI, NZ_EPHI: Dimensions of PHI1 and PHI2 PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current thread + int idx = blockIdx.x * blockDim.x + threadIdx.x; + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL IRA, IRA1, RB0, RC0, RE0, RF0, RB1, RC0, RE1, RF1, Psi1, Psi2, dHx, dHy; + $REAL dz = d; + int ii, jj, kk, materialEx, materialEy; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = zf - k1; + + // PML coefficients + IRA = 1 / (RA[INDEX2D_R(0,k1)] + RA[INDEX2D_R(1,k1)]); + IRA1 = IRA - 1; + RB0 = RB[INDEX2D_R(0,k1)]; + RE0 = RE[INDEX2D_R(0,k1)]; + RF0 = RF[INDEX2D_R(0,k1)]; + RC0 = IRA * RF0; + RB1 = RB[INDEX2D_R(1,k1)]; + RE1 = RE[INDEX2D_R(1,k1)]; + RF1 = RF[INDEX2D_R(1,k1)]; + RC1 = IRA * RF1; + + // Ex + Psi1 = RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]; + materialEx = ID[INDEX4D_ID(0,ii,jj,kk)]; + dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz; + Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (IRA1 * dHy - IRA * Psi1); + PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] + RC1 * (dHy - Psi1); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RC0 * (dHy - Psi1); + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + xs; + kk = zf - k2; + + // PML coefficients + IRA = 1 / (RA[INDEX2D_R(0,k2)] + RA[INDEX2D_R(1,k2)]); + IRA1 = IRA - 1; + RB0 = RB[INDEX2D_R(0,k2)]; + RE0 = RE[INDEX2D_R(0,k2)]; + RF0 = RF[INDEX2D_R(0,k2)]; + RC0 = IRA * RF0; + RB1 = RB[INDEX2D_R(1,k2)]; + RE1 = RE[INDEX2D_R(1,k2)]; + RF1 = RF[INDEX2D_R(1,k2)]; + RC1 = IRA * RF1; + + // Ey + Psi2 = RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]; + materialEy = ID[INDEX4D_ID(1,ii,jj,kk)]; + dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz; + Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (IRA1 * dHx - IRA * Psi2); + PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] + RC1 * (dHx - Psi2); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RC0 * (dHx - Psi2); + } +} + + +__global__ void order1_zplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, const unsigned int* __restrict__ ID, $REAL *Ex, $REAL *Ey, const $REAL* __restrict__ Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) { + + // This function updates the Ex and Ey field components for the zplus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_EPHI, NY_EPHI, NZ_EPHI: Dimensions of PHI1 and PHI2 PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current thread + int idx = blockIdx.x * blockDim.x + threadIdx.x; + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL IRA, IRA1, RB0, RC0, RE0, RF0, dHx, dHy; + $REAL dz = d; + int ii, jj, kk, materialEx, materialEy; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + IRA = 1 / RA[INDEX2D_R(0,k1)]; + IRA1 = IRA - 1; + RB0 = RB[INDEX2D_R(0,k1)]; + RE0 = RE[INDEX2D_R(0,k1)]; + RF0 = RF[INDEX2D_R(0,k1)]; + RC0 = IRA * RB0 * RF0; + + // Ex + materialEx = ID[INDEX4D_ID(0,ii,jj,kk)]; + dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz; + Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (IRA1 * dHy - IRA * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RC0 * dHy - RC0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + IRA = 1 / RA[INDEX2D_R(0,k2)]; + IRA1 = IRA - 1; + RB0 = RB[INDEX2D_R(0,k2)]; + RE0 = RE[INDEX2D_R(0,k2)]; + RF0 = RF[INDEX2D_R(0,k2)]; + RC0 = IRA * RB0 * RF0; + + // Ey + materialEy = ID[INDEX4D_ID(1,ii,jj,kk)]; + dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz; + Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (IRA1 * dHx - IRA * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RC0 * dHx - RC0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]; + } +} + + +__global__ void order2_zplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, const unsigned int* __restrict__ ID, $REAL *Ex, $REAL *Ey, const $REAL* __restrict__ Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) { + + // This function updates the Ex and Ey field components for the zplus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_EPHI, NY_EPHI, NZ_EPHI: Dimensions of PHI1 and PHI2 PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current thread + int idx = blockIdx.x * blockDim.x + threadIdx.x; + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL IRA, IRA1, RB0, RC0, RE0, RF0, RB1, RC0, RE1, RF1, Psi1, Psi2, dHx, dHy; + $REAL dz = d; + int ii, jj, kk, materialEx, materialEy; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + IRA = 1 / (RA[INDEX2D_R(0,k1)] + RA[INDEX2D_R(0,k1)]); + IRA1 = IRA - 1; + RB0 = RB[INDEX2D_R(0,k1)]; + RE0 = RE[INDEX2D_R(0,k1)]; + RF0 = RF[INDEX2D_R(0,k1)]; + RC0 = IRA * RF0; + RB1 = RB[INDEX2D_R(1,k1)]; + RE1 = RE[INDEX2D_R(1,k1)]; + RF1 = RF[INDEX2D_R(1,k1)]; + RC1 = IRA * RF1; + + // Ex + Psi1 = RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]; + materialEx = ID[INDEX4D_ID(0,ii,jj,kk)]; + dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz; + Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (IRA1 * dHy - IRA * Psi1); + PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] + RC1 * (dHy - Psi1); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RC0 * (dHy - Psi1); + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + IRA = 1 / (RA[INDEX2D_R(0,k2)] + RA[INDEX2D_R(0,k2)]); + IRA1 = IRA - 1; + RB0 = RB[INDEX2D_R(0,k2)]; + RE0 = RE[INDEX2D_R(0,k2)]; + RF0 = RF[INDEX2D_R(0,k2)]; + RC0 = IRA * RF0; + RB1 = RB[INDEX2D_R(1,k2)]; + RE1 = RE[INDEX2D_R(1,k2)]; + RF1 = RF[INDEX2D_R(1,k2)]; + RC1 = IRA * RF1; + + // Ey + Psi2 = RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]; + materialEy = ID[INDEX4D_ID(1,ii,jj,kk)]; + dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz; + Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (IRA1 * dHx - IRA * Psi1); + PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] + RC1 * (dHx - Psi2); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RC0 * (dHx - Psi2); + } +} + +""") diff --git a/gprMax/pml_updates/pml_updates_magnetic_HORIPML_ext.pyx b/gprMax/pml_updates/pml_updates_magnetic_HORIPML_ext.pyx new file mode 100644 index 00000000..0604a6f5 --- /dev/null +++ b/gprMax/pml_updates/pml_updates_magnetic_HORIPML_ext.pyx @@ -0,0 +1,827 @@ +# Copyright (C) 2015-2019: The University of Edinburgh +# Authors: Craig Warren and Antonis Giannopoulos +# +# This file is part of gprMax. +# +# gprMax is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# gprMax is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with gprMax. If not, see . + +import numpy as np +cimport numpy as np +from cython.parallel import prange + +from gprMax.constants cimport floattype_t + + +cpdef void order1_xminus( + int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int nthreads, + floattype_t[:, ::1] updatecoeffsH, + np.uint32_t[:, :, :, ::1] ID, + floattype_t[:, :, ::1] Ex, + floattype_t[:, :, ::1] Ey, + floattype_t[:, :, ::1] Ez, + floattype_t[:, :, ::1] Hx, + floattype_t[:, :, ::1] Hy, + floattype_t[:, :, ::1] Hz, + floattype_t[:, :, :, ::1] Phi1, + floattype_t[:, :, :, ::1] Phi2, + floattype_t[:, ::1] RA, + floattype_t[:, ::1] RB, + floattype_t[:, ::1] RE, + floattype_t[:, ::1] RF, + float d + ): + """This function updates the Hy and Hz field components for the xminus slab. + + Args: + xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box + ntREads (int): Number of tREads to use + updatecoeffs, ID, E, H (memoryviews): Access to update coefficients, ID and field component arrays + HPhi, RA, RB, ERE, RF (memoryviews): Access to PML coefficient arrays + d (float): Spatial discretisation, e.g. dx, dy or dz + """ + + cdef Py_ssize_t i, j, k, ii, jj, kk + cdef int nx, ny, nz, materialHy, materialHz + cdef floattype_t dx, dEy, dEz, RA01, RB0, RE0, RF0 + dx = d + nx = xf - xs + ny = yf - ys + nz = zf - zs + + for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): + ii = xf - (i + 1) + RA01 = RA[0, i] - 1 + RB0 = RB[0, i] + RE0 = RE[0, i] + RF0 = RF[0, i] + for j in range(0, ny): + jj = j + ys + for k in range(0, nz): + kk = k + zs + # Hy + materialHy = ID[4, ii, jj, kk] + dEz = (Ez[ii + 1, jj, kk] - Ez[ii, jj, kk]) / dx + Hy[ii, jj, kk] = Hy[ii, jj, kk] + updatecoeffsH[materialHy, 4] * (RA01 * dEz + RB0 * Phi1[0, i, j, k]) + Phi1[0, i, j, k] = RE0 * Phi1[0, i, j, k] - RF0 * dEz + # Hz + materialHz = ID[5, ii, jj, kk] + dEy = (Ey[ii + 1, jj, kk] - Ey[ii, jj, kk]) / dx + Hz[ii, jj, kk] = Hz[ii, jj, kk] - updatecoeffsH[materialHz, 4] * (RA01 * dEy + RB0 * Phi2[0, i, j, k]) + Phi2[0, i, j, k] = RE0 * Phi2[0, i, j, k] - RF0 * dEy + +cpdef void order2_xminus( + int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int nthreads, + floattype_t[:, ::1] updatecoeffsH, + np.uint32_t[:, :, :, ::1] ID, + floattype_t[:, :, ::1] Ex, + floattype_t[:, :, ::1] Ey, + floattype_t[:, :, ::1] Ez, + floattype_t[:, :, ::1] Hx, + floattype_t[:, :, ::1] Hy, + floattype_t[:, :, ::1] Hz, + floattype_t[:, :, :, ::1] Phi1, + floattype_t[:, :, :, ::1] Phi2, + floattype_t[:, ::1] RA, + floattype_t[:, ::1] RB, + floattype_t[:, ::1] RE, + floattype_t[:, ::1] RF, + float d + ): + """This function updates the Hy and Hz field components for the xminus slab. + + Args: + xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box + ntREads (int): Number of tREads to use + updatecoeffs, ID, E, H (memoryviews): Access to update coefficients, ID and field component arrays + HPhi, RA, RB, ERE, RF (memoryviews): Access to PML coefficient arrays + d (float): Spatial discretisation, e.g. dx, dy or dz + """ + + cdef Py_ssize_t i, j, k, ii, jj, kk + cdef int nx, ny, nz, materialHy, materialHz + cdef floattype_t dx, dEy, dEz, RA0, RA01, RB0, RE0, RF0, RA1, RB1, RE1, RF1 + dx = d + nx = xf - xs + ny = yf - ys + nz = zf - zs + + for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): + ii = xf - (i + 1) + RA0 = RA[0, i] + RB0 = RB[0, i] + RE0 = RE[0, i] + RF0 = RF[0, i] + RA1 = RA[1, i] + RB1 = RB[1, i] + RE1 = RE[1, i] + RF1 = RF[1, i] + RA01 = RA[0, i] * RA[1, i] - 1 + for j in range(0, ny): + jj = j + ys + for k in range(0, nz): + kk = k + zs + # Hy + materialHy = ID[4, ii, jj, kk] + dEz = (Ez[ii + 1, jj, kk] - Ez[ii, jj, kk]) / dx + Hy[ii, jj, kk] = Hy[ii, jj, kk] + updatecoeffsH[materialHy, 4] * (RA01 * dEz + RA1 * RB0 * Phi1[0, i, j, k] + RB1 * Phi1[1, i, j, k]) + Phi1[1, i, j, k] = RE1 * Phi1[1, i, j, k] - RF1 * (RA0 * dEz + RB0 * Phi1[0, i, j, k]) + Phi1[0, i, j, k] = RE0 * Phi1[0, i, j, k] - RF0 * dEz + # Hz + materialHz = ID[5, ii, jj, kk] + dEy = (Ey[ii + 1, jj, kk] - Ey[ii, jj, kk]) / dx + Hz[ii, jj, kk] = Hz[ii, jj, kk] - updatecoeffsH[materialHz, 4] * (RA01 * dEy + RA1 * RB0 * Phi2[0, i, j, k] + RB1 * Phi2[1, i, j, k]) + Phi2[1, i, j, k] = RE1 * Phi2[1, i, j, k] - RF1 * (RA0 * dEy + RB0 * Phi2[0, i, j, k]) + Phi2[0, i, j, k] = RE0 * Phi2[0, i, j, k] - RF0 * dEy + + +cpdef void order1_xplus( + int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int nthreads, + floattype_t[:, ::1] updatecoeffsH, + np.uint32_t[:, :, :, ::1] ID, + floattype_t[:, :, ::1] Ex, + floattype_t[:, :, ::1] Ey, + floattype_t[:, :, ::1] Ez, + floattype_t[:, :, ::1] Hx, + floattype_t[:, :, ::1] Hy, + floattype_t[:, :, ::1] Hz, + floattype_t[:, :, :, ::1] Phi1, + floattype_t[:, :, :, ::1] Phi2, + floattype_t[:, ::1] RA, + floattype_t[:, ::1] RB, + floattype_t[:, ::1] RE, + floattype_t[:, ::1] RF, + float d + ): + """This function updates the Hy and Hz field components for the xplus slab. + + Args: + xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box + ntREads (int): Number of tREads to use + updatecoeffs, ID, E, H (memoryviews): Access to update coefficients, ID and field component arrays + EPhi, HPhi, RA, RB, RE, RF (memoryviews): Access to PML coefficient arrays + d (float): Spatial discretisation, e.g. dx, dy or dz + """ + + cdef Py_ssize_t i, j, k, ii, jj, kk + cdef int nx, ny, nz, materialHy, materialHz + cdef floattype_t dx, dEy, dEz, RA01, RB0, RE0, RF0 + dx = d + nx = xf - xs + ny = yf - ys + nz = zf - zs + + for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): + ii = i + xs + RA01 = RA[0, i] - 1 + RB0 = RB[0, i] + RE0 = RE[0, i] + RF0 = RF[0, i] + for j in range(0, ny): + jj = j + ys + for k in range(0, nz): + kk = k + zs + # Hy + materialHy = ID[4, ii, jj, kk] + dEz = (Ez[ii + 1, jj, kk] - Ez[ii, jj, kk]) / dx + Hy[ii, jj, kk] = Hy[ii, jj, kk] + updatecoeffsH[materialHy, 4] * (RA01 * dEz + RB0 * Phi1[0, i, j, k]) + Phi1[0, i, j, k] = RE0 * Phi1[0, i, j, k] - RF0 * dEz + # Hz + materialHz = ID[5, ii, jj, kk] + dEy = (Ey[ii + 1, jj, kk] - Ey[ii, jj, kk]) / dx + Hz[ii, jj, kk] = Hz[ii, jj, kk] - updatecoeffsH[materialHz, 4] * (RA01 * dEy + RB0 * Phi2[0, i, j, k]) + Phi2[0, i, j, k] = RE0 * Phi2[0, i, j, k] - RF0 * dEy + +cpdef void order2_xplus( + int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int nthreads, + floattype_t[:, ::1] updatecoeffsH, + np.uint32_t[:, :, :, ::1] ID, + floattype_t[:, :, ::1] Ex, + floattype_t[:, :, ::1] Ey, + floattype_t[:, :, ::1] Ez, + floattype_t[:, :, ::1] Hx, + floattype_t[:, :, ::1] Hy, + floattype_t[:, :, ::1] Hz, + floattype_t[:, :, :, ::1] Phi1, + floattype_t[:, :, :, ::1] Phi2, + floattype_t[:, ::1] RA, + floattype_t[:, ::1] RB, + floattype_t[:, ::1] RE, + floattype_t[:, ::1] RF, + float d + ): + """This function updates the Hy and Hz field components for the xplus slab. + + Args: + xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box + ntREads (int): Number of tREads to use + updatecoeffs, ID, E, H (memoryviews): Access to update coefficients, ID and field component arrays + EPhi, HPhi, RA, RB, RE, RF (memoryviews): Access to PML coefficient arrays + d (float): Spatial discretisation, e.g. dx, dy or dz + """ + + cdef Py_ssize_t i, j, k, ii, jj, kk + cdef int nx, ny, nz, materialHy, materialHz + cdef floattype_t dx, dEy, dEz, RA0, RA01, RB0, RE0, RF0, RA1, RB1, RE1, RF1 + dx = d + nx = xf - xs + ny = yf - ys + nz = zf - zs + + for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): + ii = i + xs + RA0 = RA[0, i] + RB0 = RB[0, i] + RE0 = RE[0, i] + RF0 = RF[0, i] + RA1 = RA[1, i] + RB1 = RB[1, i] + RE1 = RE[1, i] + RF1 = RF[1, i] + RA01 = RA[0, i] * RA[1, i] - 1 + for j in range(0, ny): + jj = j + ys + for k in range(0, nz): + kk = k + zs + # Hy + materialHy = ID[4, ii, jj, kk] + dEz = (Ez[ii + 1, jj, kk] - Ez[ii, jj, kk]) / dx + Hy[ii, jj, kk] = Hy[ii, jj, kk] + updatecoeffsH[materialHy, 4] * (RA01 * dEz + RA1 * RB0 * Phi1[0, i, j, k] + RB1 * Phi1[1, i, j, k]) + Phi1[1, i, j, k] = RE1 * Phi1[1, i, j, k] - RF1 * (RA0 * dEz + RB0 * Phi1[0, i, j, k]) + Phi1[0, i, j, k] = RE0 * Phi1[0, i, j, k] - RF0 * dEz + # Hz + materialHz = ID[5, ii, jj, kk] + dEy = (Ey[ii + 1, jj, kk] - Ey[ii, jj, kk]) / dx + Hz[ii, jj, kk] = Hz[ii, jj, kk] - updatecoeffsH[materialHz, 4] * (RA01 * dEy + RA1 * RB0 * Phi2[0, i, j, k] + RB1 * Phi2[1, i, j, k]) + Phi2[1, i, j, k] = RE1 * Phi2[1, i, j, k] - RF1 * (RA0 * dEy + RB0 * Phi2[0, i, j, k]) + Phi2[0, i, j, k] = RE0 * Phi2[0, i, j, k] - RF0 * dEy + + +cpdef void order1_yminus( + int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int nthreads, + floattype_t[:, ::1] updatecoeffsH, + np.uint32_t[:, :, :, ::1] ID, + floattype_t[:, :, ::1] Ex, + floattype_t[:, :, ::1] Ey, + floattype_t[:, :, ::1] Ez, + floattype_t[:, :, ::1] Hx, + floattype_t[:, :, ::1] Hy, + floattype_t[:, :, ::1] Hz, + floattype_t[:, :, :, ::1] Phi1, + floattype_t[:, :, :, ::1] Phi2, + floattype_t[:, ::1] RA, + floattype_t[:, ::1] RB, + floattype_t[:, ::1] RE, + floattype_t[:, ::1] RF, + float d + ): + """This function updates the Hx and Hz field components for the yminus slab. + + Args: + xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box + ntREads (int): Number of tREads to use + updatecoeffs, ID, E, H (memoryviews): Access to update coefficients, ID and field component arrays + EPhi, HPhi, RA, RB, RE, RF (memoryviews): Access to PML coefficient arrays + d (float): Spatial discretisation, e.g. dx, dy or dz + """ + + cdef Py_ssize_t i, j, k, ii, jj, kk + cdef int nx, ny, nz, materialHx, materialHz + cdef floattype_t dy, dEx, dEz, RA01, RB0, RE0, RF0 + dy = d + nx = xf - xs + ny = yf - ys + nz = zf - zs + + for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): + ii = i + xs + for j in range(0, ny): + jj = yf - (j + 1) + RA01 = RA[0, j] - 1 + RB0 = RB[0, j] + RE0 = RE[0, j] + RF0 = RF[0, j] + for k in range(0, nz): + kk = k + zs + # Hx + materialHx = ID[3, ii, jj, kk] + dEz = (Ez[ii, jj + 1, kk] - Ez[ii, jj, kk]) / dy + Hx[ii, jj, kk] = Hx[ii, jj, kk] - updatecoeffsH[materialHx, 4] * (RA01 * dEz + RB0 * Phi1[0, i, j, k]) + Phi1[0, i, j, k] = RE0 * Phi1[0, i, j, k] - RF0 * dEz + # Hz + materialHz = ID[5, ii, jj, kk] + dEx = (Ex[ii, jj + 1, kk] - Ex[ii, jj, kk]) / dy + Hz[ii, jj, kk] = Hz[ii, jj, kk] + updatecoeffsH[materialHz, 4] * (RA01 * dEx + RB0 * Phi2[0, i, j, k]) + Phi2[0, i, j, k] = RE0 * Phi2[0, i, j, k] - RF0 * dEx + +cpdef void order2_yminus( + int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int nthreads, + floattype_t[:, ::1] updatecoeffsH, + np.uint32_t[:, :, :, ::1] ID, + floattype_t[:, :, ::1] Ex, + floattype_t[:, :, ::1] Ey, + floattype_t[:, :, ::1] Ez, + floattype_t[:, :, ::1] Hx, + floattype_t[:, :, ::1] Hy, + floattype_t[:, :, ::1] Hz, + floattype_t[:, :, :, ::1] Phi1, + floattype_t[:, :, :, ::1] Phi2, + floattype_t[:, ::1] RA, + floattype_t[:, ::1] RB, + floattype_t[:, ::1] RE, + floattype_t[:, ::1] RF, + float d + ): + """This function updates the Hx and Hz field components for the yminus slab. + + Args: + xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box + ntREads (int): Number of tREads to use + updatecoeffs, ID, E, H (memoryviews): Access to update coefficients, ID and field component arrays + EPhi, HPhi, RA, RB, RE, RF (memoryviews): Access to PML coefficient arrays + d (float): Spatial discretisation, e.g. dx, dy or dz + """ + + cdef Py_ssize_t i, j, k, ii, jj, kk + cdef int nx, ny, nz, materialHx, materialHz + cdef floattype_t dy, dEx, dEz, RA0, RA01, RB0, RE0, RF0, RA1, RB1, RE1, RF1 + dy = d + nx = xf - xs + ny = yf - ys + nz = zf - zs + + for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): + ii = i + xs + for j in range(0, ny): + jj = yf - (j + 1) + RA0 = RA[0, j] + RB0 = RB[0, j] + RE0 = RE[0, j] + RF0 = RF[0, j] + RA1 = RA[1, j] + RB1 = RB[1, j] + RE1 = RE[1, j] + RF1 = RF[1, j] + RA01 = RA[0, j] * RA[1, j] - 1 + for k in range(0, nz): + kk = k + zs + # Hx + materialHx = ID[3, ii, jj, kk] + dEz = (Ez[ii, jj + 1, kk] - Ez[ii, jj, kk]) / dy + Hx[ii, jj, kk] = Hx[ii, jj, kk] - updatecoeffsH[materialHx, 4] * (RA01 * dEz + RA1 * RB0 * Phi1[0, i, j, k] + RB1 * Phi1[1, i, j, k]) + Phi1[1, i, j, k] = RE1 * Phi1[1, i, j, k] - RF1 * (RA0 * dEz + RB0 * Phi1[0, i, j, k]) + Phi1[0, i, j, k] = RE0 * Phi1[0, i, j, k] - RF0 * dEz + # Hz + materialHz = ID[5, ii, jj, kk] + dEx = (Ex[ii, jj + 1, kk] - Ex[ii, jj, kk]) / dy + Hz[ii, jj, kk] = Hz[ii, jj, kk] + updatecoeffsH[materialHz, 4] * (RA01 * dEx + RA1 * RB0 * Phi2[0, i, j, k] + RB1 * Phi2[1, i, j, k]) + Phi2[1, i, j, k] = RE1 * Phi2[1, i, j, k] - RF1 * (RA0 * dEx + RB0 * Phi2[0, i, j, k]) + Phi2[0, i, j, k] = RE0 * Phi2[0, i, j, k] - RF0 * dEx + + +cpdef void order1_yplus( + int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int nthreads, + floattype_t[:, ::1] updatecoeffsH, + np.uint32_t[:, :, :, ::1] ID, + floattype_t[:, :, ::1] Ex, + floattype_t[:, :, ::1] Ey, + floattype_t[:, :, ::1] Ez, + floattype_t[:, :, ::1] Hx, + floattype_t[:, :, ::1] Hy, + floattype_t[:, :, ::1] Hz, + floattype_t[:, :, :, ::1] Phi1, + floattype_t[:, :, :, ::1] Phi2, + floattype_t[:, ::1] RA, + floattype_t[:, ::1] RB, + floattype_t[:, ::1] RE, + floattype_t[:, ::1] RF, + float d + ): + """This function updates the Hx and Hz field components for the yplus slab. + + Args: + xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box + ntREads (int): Number of tREads to use + updatecoeffs, ID, E, H (memoryviews): Access to update coefficients, ID and field component arrays + EPhi, HPhi, RA, RB, RE, RF (memoryviews): Access to PML coefficient arrays + d (float): Spatial discretisation, e.g. dx, dy or dz + """ + + cdef Py_ssize_t i, j, k, ii, jj, kk + cdef int nx, ny, nz, materialHx, materialHz + cdef floattype_t dy, dEx, dEz, RA01, RB0, RE0, RF0 + dy = d + nx = xf - xs + ny = yf - ys + nz = zf - zs + + for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): + ii = i + xs + for j in range(0, ny): + jj = j + ys + RA01 = RA[0, j] - 1 + RB0 = RB[0, j] + RE0 = RE[0, j] + RF0 = RF[0, j] + for k in range(0, nz): + kk = k + zs + # Hx + materialHx = ID[3, ii, jj, kk] + dEz = (Ez[ii, jj + 1, kk] - Ez[ii, jj, kk]) / dy + Hx[ii, jj, kk] = Hx[ii, jj, kk] - updatecoeffsH[materialHx, 4] * (RA01 * dEz + RB0 * Phi1[0, i, j, k]) + Phi1[0, i, j, k] = RE0 * Phi1[0, i, j, k] - RF0 * dEz + # Hz + materialHz = ID[5, ii, jj, kk] + dEx = (Ex[ii, jj + 1, kk] - Ex[ii, jj, kk]) / dy + Hz[ii, jj, kk] = Hz[ii, jj, kk] + updatecoeffsH[materialHz, 4] * (RA01 * dEx + RB0 * Phi2[0, i, j, k]) + Phi2[0, i, j, k] = RE0 * Phi2[0, i, j, k] - RF0 * dEx + +cpdef void order2_yplus( + int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int nthreads, + floattype_t[:, ::1] updatecoeffsH, + np.uint32_t[:, :, :, ::1] ID, + floattype_t[:, :, ::1] Ex, + floattype_t[:, :, ::1] Ey, + floattype_t[:, :, ::1] Ez, + floattype_t[:, :, ::1] Hx, + floattype_t[:, :, ::1] Hy, + floattype_t[:, :, ::1] Hz, + floattype_t[:, :, :, ::1] Phi1, + floattype_t[:, :, :, ::1] Phi2, + floattype_t[:, ::1] RA, + floattype_t[:, ::1] RB, + floattype_t[:, ::1] RE, + floattype_t[:, ::1] RF, + float d + ): + """This function updates the Hx and Hz field components for the yplus slab. + + Args: + xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box + ntREads (int): Number of tREads to use + updatecoeffs, ID, E, H (memoryviews): Access to update coefficients, ID and field component arrays + EPhi, HPhi, RA, RB, RE, RF (memoryviews): Access to PML coefficient arrays + d (float): Spatial discretisation, e.g. dx, dy or dz + """ + + cdef Py_ssize_t i, j, k, ii, jj, kk + cdef int nx, ny, nz, materialHx, materialHz + cdef floattype_t dy, dEx, dEz, RA0, RA01, RB0, RE0, RF0, RA1, RB1, RE1, RF1 + dy = d + nx = xf - xs + ny = yf - ys + nz = zf - zs + + for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): + ii = i + xs + for j in range(0, ny): + jj = j + ys + RA0 = RA[0, j] + RB0 = RB[0, j] + RE0 = RE[0, j] + RF0 = RF[0, j] + RA1 = RA[1, j] + RB1 = RB[1, j] + RE1 = RE[1, j] + RF1 = RF[1, j] + RA01 = RA[0, j] * RA[1, j] - 1 + for k in range(0, nz): + kk = k + zs + # Hx + materialHx = ID[3, ii, jj, kk] + dEz = (Ez[ii, jj + 1, kk] - Ez[ii, jj, kk]) / dy + Hx[ii, jj, kk] = Hx[ii, jj, kk] - updatecoeffsH[materialHx, 4] * (RA01 * dEz + RA1 * RB0 * Phi1[0, i, j, k] + RB1 * Phi1[1, i, j, k]) + Phi1[1, i, j, k] = RE1 * Phi1[1, i, j, k] - RF1 * (RA0 * dEz + RB0 * Phi1[0, i, j, k]) + Phi1[0, i, j, k] = RE0 * Phi1[0, i, j, k] - RF0 * dEz + # Hz + materialHz = ID[5, ii, jj, kk] + dEx = (Ex[ii, jj + 1, kk] - Ex[ii, jj, kk]) / dy + Hz[ii, jj, kk] = Hz[ii, jj, kk] + updatecoeffsH[materialHz, 4] * (RA01 * dEx + RA1 * RB0 * Phi2[0, i, j, k] + RB1 * Phi2[1, i, j, k]) + Phi2[1, i, j, k] = RE1 * Phi2[1, i, j, k] - RF1 * (RA0 * dEx + RB0 * Phi2[0, i, j, k]) + Phi2[0, i, j, k] = RE0 * Phi2[0, i, j, k] - RF0 * dEx + + +cpdef void order1_zminus( + int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int nthreads, + floattype_t[:, ::1] updatecoeffsH, + np.uint32_t[:, :, :, ::1] ID, + floattype_t[:, :, ::1] Ex, + floattype_t[:, :, ::1] Ey, + floattype_t[:, :, ::1] Ez, + floattype_t[:, :, ::1] Hx, + floattype_t[:, :, ::1] Hy, + floattype_t[:, :, ::1] Hz, + floattype_t[:, :, :, ::1] Phi1, + floattype_t[:, :, :, ::1] Phi2, + floattype_t[:, ::1] RA, + floattype_t[:, ::1] RB, + floattype_t[:, ::1] RE, + floattype_t[:, ::1] RF, + float d + ): + """This function updates the Hx and Hy field components for the zminus slab. + + Args: + xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box + ntREads (int): Number of tREads to use + updatecoeffs, ID, E, H (memoryviews): Access to update coefficients, ID and field component arrays + EPhi, HPhi, RA, RB, RE, RF (memoryviews): Access to PML coefficient arrays + d (float): Spatial discretisation, e.g. dx, dy or dz + """ + + cdef Py_ssize_t i, j, k, ii, jj, kk + cdef int nx, ny, nz, materialHx, materialHy + cdef floattype_t dz, dEx, dEy, RA01, RB0, RE0, RF0 + dz = d + nx = xf - xs + ny = yf - ys + nz = zf - zs + + for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): + ii = i + xs + for j in range(0, ny): + jj = j + ys + for k in range(0, nz): + kk = zf - (k + 1) + RA01 = RA[0, k] - 1 + RB0 = RB[0, k] + RE0 = RE[0, k] + RF0 = RF[0, k] + # Hx + materialHx = ID[3, ii, jj, kk] + dEy = (Ey[ii, jj, kk + 1] - Ey[ii, jj, kk]) / dz + Hx[ii, jj, kk] = Hx[ii, jj, kk] + updatecoeffsH[materialHx, 4] * (RA01 * dEy + RB0 * Phi1[0, i, j, k]) + Phi1[0, i, j, k] = RE0 * Phi1[0, i, j, k] - RF0 * dEy + # Hy + materialHy = ID[4, ii, jj, kk] + dEx = (Ex[ii, jj, kk + 1] - Ex[ii, jj, kk]) / dz + Hy[ii, jj, kk] = Hy[ii, jj, kk] - updatecoeffsH[materialHy, 4] * (RA01 * dEx + RB0 * Phi2[0, i, j, k]) + Phi2[0, i, j, k] = RE0 * Phi2[0, i, j, k] - RF0 * dEx + +cpdef void order2_zminus( + int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int nthreads, + floattype_t[:, ::1] updatecoeffsH, + np.uint32_t[:, :, :, ::1] ID, + floattype_t[:, :, ::1] Ex, + floattype_t[:, :, ::1] Ey, + floattype_t[:, :, ::1] Ez, + floattype_t[:, :, ::1] Hx, + floattype_t[:, :, ::1] Hy, + floattype_t[:, :, ::1] Hz, + floattype_t[:, :, :, ::1] Phi1, + floattype_t[:, :, :, ::1] Phi2, + floattype_t[:, ::1] RA, + floattype_t[:, ::1] RB, + floattype_t[:, ::1] RE, + floattype_t[:, ::1] RF, + float d + ): + """This function updates the Hx and Hy field components for the zminus slab. + + Args: + xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box + ntREads (int): Number of tREads to use + updatecoeffs, ID, E, H (memoryviews): Access to update coefficients, ID and field component arrays + EPhi, HPhi, RA, RB, RE, RF (memoryviews): Access to PML coefficient arrays + d (float): Spatial discretisation, e.g. dx, dy or dz + """ + + cdef Py_ssize_t i, j, k, ii, jj, kk + cdef int nx, ny, nz, materialHx, materialHy + cdef floattype_t dz, dEx, dEy, RA0, RA01, RB0, RE0, RF0, RA1, RB1, RE1, RF1 + dz = d + nx = xf - xs + ny = yf - ys + nz = zf - zs + + for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): + ii = i + xs + for j in range(0, ny): + jj = j + ys + for k in range(0, nz): + kk = zf - (k + 1) + RA0 = RA[0, k] + RB0 = RB[0, k] + RE0 = RE[0, k] + RF0 = RF[0, k] + RA1 = RA[1, k] + RB1 = RB[1, k] + RE1 = RE[1, k] + RF1 = RF[1, k] + RA01 = RA[0, k] * RA[1, k] - 1 + # Hx + materialHx = ID[3, ii, jj, kk] + dEy = (Ey[ii, jj, kk + 1] - Ey[ii, jj, kk]) / dz + Hx[ii, jj, kk] = Hx[ii, jj, kk] + updatecoeffsH[materialHx, 4] * (RA01 * dEy + RA1 * RB0 * Phi1[0, i, j, k] + RB1 * Phi1[1, i, j, k]) + Phi1[1, i, j, k] = RE1 * Phi1[1, i, j, k] - RF1 * (RA0 * dEy + RB0 * Phi1[0, i, j, k]) + Phi1[0, i, j, k] = RE0 * Phi1[0, i, j, k] - RF0 * dEy + # Hy + materialHy = ID[4, ii, jj, kk] + dEx = (Ex[ii, jj, kk + 1] - Ex[ii, jj, kk]) / dz + Hy[ii, jj, kk] = Hy[ii, jj, kk] - updatecoeffsH[materialHy, 4] * (RA01 * dEx + RA1 * RB0 * Phi2[0, i, j, k] + RB1 * Phi2[1, i, j, k]) + Phi2[1, i, j, k] = RE1 * Phi2[1, i, j, k] - RF1 * (RA0 * dEx + RB0 * Phi2[0, i, j, k]) + Phi2[0, i, j, k] = RE0 * Phi2[0, i, j, k] - RF0 * dEx + + +cpdef void order1_zplus( + int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int nthreads, + floattype_t[:, ::1] updatecoeffsH, + np.uint32_t[:, :, :, ::1] ID, + floattype_t[:, :, ::1] Ex, + floattype_t[:, :, ::1] Ey, + floattype_t[:, :, ::1] Ez, + floattype_t[:, :, ::1] Hx, + floattype_t[:, :, ::1] Hy, + floattype_t[:, :, ::1] Hz, + floattype_t[:, :, :, ::1] Phi1, + floattype_t[:, :, :, ::1] Phi2, + floattype_t[:, ::1] RA, + floattype_t[:, ::1] RB, + floattype_t[:, ::1] RE, + floattype_t[:, ::1] RF, + float d + ): + """This function updates the Hx and Hy field components for the zplus slab. + + Args: + xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box + ntREads (int): Number of tREads to use + updatecoeffs, ID, E, H (memoryviews): Access to update coefficients, ID and field component arrays + EPhi, HPhi, RA, RB, RE, RF (memoryviews): Access to PML coefficient arrays + d (float): Spatial discretisation, e.g. dx, dy or dz + """ + + cdef Py_ssize_t i, j, k, ii, jj, kk + cdef int nx, ny, nz, materialHx, materialHy + cdef floattype_t dz, dEx, dEy, RA01, RB0, RE0, RF0 + dz = d + nx = xf - xs + ny = yf - ys + nz = zf - zs + + for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): + ii = i + xs + for j in range(0, ny): + jj = j + ys + for k in range(0, nz): + kk = k + zs + RA01 = RA[0, k] - 1 + RB0 = RB[0, k] + RE0 = RE[0, k] + RF0 = RF[0, k] + # Hx + materialHx = ID[3, ii, jj, kk] + dEy = (Ey[ii, jj, kk + 1] - Ey[ii, jj, kk]) / dz + Hx[ii, jj, kk] = Hx[ii, jj, kk] + updatecoeffsH[materialHx, 4] * (RA01 * dEy + RB0 * Phi1[0, i, j, k]) + Phi1[0, i, j, k] = RE0 * Phi1[0, i, j, k] - RF0 * dEy + # Hy + materialHy = ID[4, ii, jj, kk] + dEx = (Ex[ii, jj, kk + 1] - Ex[ii, jj, kk]) / dz + Hy[ii, jj, kk] = Hy[ii, jj, kk] - updatecoeffsH[materialHy, 4] * (RA01 * dEx + RB0 * Phi2[0, i, j, k]) + Phi2[0, i, j, k] = RE0 * Phi2[0, i, j, k] - RF0 * dEx + +cpdef void order2_zplus( + int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int nthreads, + floattype_t[:, ::1] updatecoeffsH, + np.uint32_t[:, :, :, ::1] ID, + floattype_t[:, :, ::1] Ex, + floattype_t[:, :, ::1] Ey, + floattype_t[:, :, ::1] Ez, + floattype_t[:, :, ::1] Hx, + floattype_t[:, :, ::1] Hy, + floattype_t[:, :, ::1] Hz, + floattype_t[:, :, :, ::1] Phi1, + floattype_t[:, :, :, ::1] Phi2, + floattype_t[:, ::1] RA, + floattype_t[:, ::1] RB, + floattype_t[:, ::1] RE, + floattype_t[:, ::1] RF, + float d + ): + """This function updates the Hx and Hy field components for the zplus slab. + + Args: + xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box + ntREads (int): Number of tREads to use + updatecoeffs, ID, E, H (memoryviews): Access to update coefficients, ID and field component arrays + EPhi, HPhi, RA, RB, RE, RF (memoryviews): Access to PML coefficient arrays + d (float): Spatial discretisation, e.g. dx, dy or dz + """ + + cdef Py_ssize_t i, j, k, ii, jj, kk + cdef int nx, ny, nz, materialHx, materialHy + cdef floattype_t dz, dEx, dEy, RA0, RA01, RB0, RE0, RF0, RA1, RB1, RE1, RF1 + dz = d + nx = xf - xs + ny = yf - ys + nz = zf - zs + + for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): + ii = i + xs + for j in range(0, ny): + jj = j + ys + for k in range(0, nz): + kk = k + zs + RA0 = RA[0, k] + RB0 = RB[0, k] + RE0 = RE[0, k] + RF0 = RF[0, k] + RA1 = RA[1, k] + RB1 = RB[1, k] + RE1 = RE[1, k] + RF1 = RF[1, k] + RA01 = RA[0, k] * RA[1, k] - 1 + # Hx + materialHx = ID[3, ii, jj, kk] + dEy = (Ey[ii, jj, kk + 1] - Ey[ii, jj, kk]) / dz + Hx[ii, jj, kk] = Hx[ii, jj, kk] + updatecoeffsH[materialHx, 4] * (RA01 * dEy + RA1 * RB0 * Phi1[0, i, j, k] + RB1 * Phi1[1, i, j, k]) + Phi1[1, i, j, k] = RE1 * Phi1[1, i, j, k] - RF1 * (RA0 * dEy + RB0 * Phi1[0, i, j, k]) + Phi1[0, i, j, k] = RE0 * Phi1[0, i, j, k] - RF0 * dEy + # Hy + materialHy = ID[4, ii, jj, kk] + dEx = (Ex[ii, jj, kk + 1] - Ex[ii, jj, kk]) / dz + Hy[ii, jj, kk] = Hy[ii, jj, kk] - updatecoeffsH[materialHy, 4] * (RA01 * dEx + RA1 * RB0 * Phi2[0, i, j, k] + RB1 * Phi2[1, i, j, k]) + Phi2[1, i, j, k] = RE1 * Phi2[1, i, j, k] - RF1 * (RA0 * dEx + RB0 * Phi2[0, i, j, k]) + Phi2[0, i, j, k] = RE0 * Phi2[0, i, j, k] - RF0 * dEx diff --git a/gprMax/pml_updates/pml_updates_magnetic_HORIPML_gpu.py b/gprMax/pml_updates/pml_updates_magnetic_HORIPML_gpu.py new file mode 100644 index 00000000..6758e317 --- /dev/null +++ b/gprMax/pml_updates/pml_updates_magnetic_HORIPML_gpu.py @@ -0,0 +1,982 @@ +# Copyright (C) 2015-2019: The University of Edinburgh +# Authors: Craig Warren and Antonis Giannopoulos +# +# This file is part of gprMax. +# +# gprMax is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# gprMax is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with gprMax. If not, see . + +from string import Template + +kernels_template_pml_magnetic_HORIPML = Template(""" + +// Macros for converting subscripts to linear index: +#define INDEX2D_R(m, n) (m)*($NY_R)+(n) +#define INDEX2D_MAT(m, n) (m)*($NY_MATCOEFFS)+(n) +#define INDEX3D_FIELDS(i, j, k) (i)*($NY_FIELDS)*($NZ_FIELDS)+(j)*($NZ_FIELDS)+(k) +#define INDEX4D_ID(p, i, j, k) (p)*($NX_ID)*($NY_ID)*($NZ_ID)+(i)*($NY_ID)*($NZ_ID)+(j)*($NZ_ID)+(k) +#define INDEX4D_PHI1(p, i, j, k) (p)*(NX_PHI1)*(NY_PHI1)*(NZ_PHI1)+(i)*(NY_PHI1)*(NZ_PHI1)+(j)*(NZ_PHI1)+(k) +#define INDEX4D_PHI2(p, i, j, k) (p)*(NX_PHI2)*(NY_PHI2)*(NZ_PHI2)+(i)*(NY_PHI2)*(NZ_PHI2)+(j)*(NZ_PHI2)+(k) + +// Material coefficients (read-only) in constant memory (64KB) +__device__ __constant__ $REAL updatecoeffsH[$N_updatecoeffsH]; + + +__global__ void order1_xminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez, const $REAL* __restrict__ Hx, $REAL *Hy, $REAL *Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) { + + // This function updates the Hy and Hz field components for the xminus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_HPHI, NY_HPHI, NZ_HPHI: Dimensions of PHI1 and PHI2 PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current tREad + int idx = blockIdx.x * blockDim.x + tREadIdx.x; + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL RA01, RB0, RE0, RF0, dEy, dEz; + $REAL dx = d; + int ii, jj, kk, materialHy, materialHz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = xf - (i1 + 1); + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA01 = RA[INDEX2D_R(0,i1)] - 1; + RB0 = RB[INDEX2D_R(0,i1)]; + RE0 = RE[INDEX2D_R(0,i1)]; + RF0 = RF[INDEX2D_R(0,i1)]; + + // Hy + materialHy = ID[INDEX4D_ID(4,ii,jj,kk)]; + dEz = (Ez[INDEX3D_FIELDS(ii+1,jj,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dx; + Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA01 * dEz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = xf - (i2 + 1); + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA01 = RA[INDEX2D_R(0,i2)] - 1; + RB0 = RB[INDEX2D_R(0,i2)]; + RE0 = RE[INDEX2D_R(0,i2)]; + RF0 = RF[INDEX2D_R(0,i2)]; + + // Hz + materialHz = ID[INDEX4D_ID(5,ii,jj,kk)]; + dEy = (Ey[INDEX3D_FIELDS(ii+1,jj,kk)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dx; + Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA01 * dEy + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEy; + } +} + + +__global__ void order2_xminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez, const $REAL* __restrict__ Hx, $REAL *Hy, $REAL *Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) { + + // This function updates the Hy and Hz field components for the xminus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_HPHI, NY_HPHI, NZ_HPHI: Dimensions of PHI1 and PHI2 PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current tREad + int idx = blockIdx.x * blockDim.x + tREadIdx.x; + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL RA01, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dEy, dEz; + $REAL dx = d; + int ii, jj, kk, materialHy, materialHz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = xf - (i1 + 1); + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA0 = RA[INDEX2D_R(0,i1)]; + RB0 = RB[INDEX2D_R(0,i1)]; + RE0 = RE[INDEX2D_R(0,i1)]; + RF0 = RF[INDEX2D_R(0,i1)]; + RA1 = RA[INDEX2D_R(1,i1)]; + RB1 = RB[INDEX2D_R(1,i1)]; + RE1 = RE[INDEX2D_R(1,i1)]; + RF1 = RF[INDEX2D_R(1,i1)]; + RA01 = RA[INDEX2D_R(0,i1)] * RA[INDEX2D_R(1,i1)] - 1; + + // Hy + materialHy = ID[INDEX4D_ID(4,ii,jj,kk)]; + dEz = (Ez[INDEX3D_FIELDS(ii+1,jj,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dx; + Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA01 * dEz + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dEz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = xf - (i2 + 1); + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA0 = RA[INDEX2D_R(0,i2)]; + RB0 = RB[INDEX2D_R(0,i2)]; + RE0 = RE[INDEX2D_R(0,i2)]; + RF0 = RF[INDEX2D_R(0,i2)]; + RA1 = RA[INDEX2D_R(1,i2)]; + RB1 = RB[INDEX2D_R(1,i2)]; + RE1 = RE[INDEX2D_R(1,i2)]; + RF1 = RF[INDEX2D_R(1,i2)]; + RA01 = RA[INDEX2D_R(0,i2)] * RA[INDEX2D_R(1,i2)] - 1; + + // Hz + materialHz = ID[INDEX4D_ID(5,ii,jj,kk)]; + dEy = (Ey[INDEX3D_FIELDS(ii+1,jj,kk)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dx; + Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA01 * dEy + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dEy + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEy; + } +} + + +__global__ void order1_xplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez, const $REAL* __restrict__ Hx, $REAL *Hy, $REAL *Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) { + + // This function updates the Hy and Hz field components for the xplus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_HPHI, NY_HPHI, NZ_HPHI: Dimensions of PHI1 and PHI2 PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current tREad + int idx = blockIdx.x * blockDim.x + tREadIdx.x; + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL RA01, RB0, RE0, RF0, dEy, dEz; + $REAL dx = d; + int ii, jj, kk, materialHy, materialHz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA01 = RA[INDEX2D_R(0,i1)] - 1; + RB0 = RB[INDEX2D_R(0,i1)]; + RE0 = RE[INDEX2D_R(0,i1)]; + RF0 = RF[INDEX2D_R(0,i1)]; + + // Hy + materialHy = ID[INDEX4D_ID(4,ii,jj,kk)]; + dEz = (Ez[INDEX3D_FIELDS(ii+1,jj,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dx; + Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA01 * dEz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA01 = RA[INDEX2D_R(0,i2)] - 1; + RB0 = RB[INDEX2D_R(0,i2)]; + RE0 = RE[INDEX2D_R(0,i2)]; + RF0 = RF[INDEX2D_R(0,i2)]; + + // Hz + materialHz = ID[INDEX4D_ID(5,ii,jj,kk)]; + dEy = (Ey[INDEX3D_FIELDS(ii+1,jj,kk)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dx; + Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA01 * dEy + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEy; + } +} + + +__global__ void order2_xplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez, const $REAL* __restrict__ Hx, $REAL *Hy, $REAL *Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) { + + // This function updates the Hy and Hz field components for the xplus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_HPHI, NY_HPHI, NZ_HPHI: Dimensions of PHI1 and PHI2 PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current tREad + int idx = blockIdx.x * blockDim.x + tREadIdx.x; + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL RA01, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dEy, dEz; + $REAL dx = d; + int ii, jj, kk, materialHy, materialHz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA0 = RA[INDEX2D_R(0,i1)]; + RB0 = RB[INDEX2D_R(0,i1)]; + RE0 = RE[INDEX2D_R(0,i1)]; + RF0 = RF[INDEX2D_R(0,i1)]; + RA1 = RA[INDEX2D_R(1,i1)]; + RB1 = RB[INDEX2D_R(1,i1)]; + RE1 = RE[INDEX2D_R(1,i1)]; + RF1 = RF[INDEX2D_R(1,i1)]; + RA01 = RA[INDEX2D_R(0,i1)] * RA[INDEX2D_R(1,i1)] - 1; + + // Hy + materialHy = ID[INDEX4D_ID(4,ii,jj,kk)]; + dEz = (Ez[INDEX3D_FIELDS(ii+1,jj,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dx; + Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA01 * dEz + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dEz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA0 = RA[INDEX2D_R(0,i2)]; + RB0 = RB[INDEX2D_R(0,i2)]; + RE0 = RE[INDEX2D_R(0,i2)]; + RF0 = RF[INDEX2D_R(0,i2)]; + RA1 = RA[INDEX2D_R(1,i2)]; + RB1 = RB[INDEX2D_R(1,i2)]; + RE1 = RE[INDEX2D_R(1,i2)]; + RF1 = RF[INDEX2D_R(1,i2)]; + RA01 = RA[INDEX2D_R(0,i2)] * RA[INDEX2D_R(1,i2)] - 1; + + // Hz + materialHz = ID[INDEX4D_ID(5,ii,jj,kk)]; + dEy = (Ey[INDEX3D_FIELDS(ii+1,jj,kk)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dx; + Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA01 * dEy + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dEy + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEy; + } +} + + +__global__ void order1_yminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez, $REAL *Hx, const $REAL* __restrict__ Hy, $REAL *Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) { + + // This function updates the Hx and Hz field components for the yminus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_HPHI, NY_HPHI, NZ_HPHI: Dimensions of PHI1 and PHI2 PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current tREad + int idx = blockIdx.x * blockDim.x + tREadIdx.x; + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL RA01, RB0, RE0, RF0, dEx, dEz; + $REAL dy = d; + int ii, jj, kk, materialHx, materialHz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = yf - (j1 + 1); + kk = k1 + zs; + + // PML coefficients + RA01 = RA[INDEX2D_R(0,j1)] - 1; + RB0 = RB[INDEX2D_R(0,j1)]; + RE0 = RE[INDEX2D_R(0,j1)]; + RF0 = RF[INDEX2D_R(0,j1)]; + + // Hx + materialHx = ID[INDEX4D_ID(3,ii,jj,kk)]; + dEz = (Ez[INDEX3D_FIELDS(ii,jj+1,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dy; + Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA01 * dEz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = yf - (j2 + 1); + kk = k2 + zs; + + // PML coefficients + RA01 = RA[INDEX2D_R(0,j2)] - 1; + RB0 = RB[INDEX2D_R(0,j2)]; + RE0 = RE[INDEX2D_R(0,j2)]; + RF0 = RF[INDEX2D_R(0,j2)]; + + // Hz + materialHz = ID[INDEX4D_ID(5,ii,jj,kk)]; + dEx = (Ex[INDEX3D_FIELDS(ii,jj+1,kk)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dy; + Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA01 * dEx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx; + } +} + + +__global__ void order2_yminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez, $REAL *Hx, const $REAL* __restrict__ Hy, $REAL *Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) { + + // This function updates the Hx and Hz field components for the yminus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_HPHI, NY_HPHI, NZ_HPHI: Dimensions of PHI1 and PHI2 PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current tREad + int idx = blockIdx.x * blockDim.x + tREadIdx.x; + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL RA01, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dEx, dEz; + $REAL dy = d; + int ii, jj, kk, materialHx, materialHz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = yf - (j1 + 1); + kk = k1 + zs; + + // PML coefficients + RA0 = RA[INDEX2D_R(0,j1)]; + RB0 = RB[INDEX2D_R(0,j1)]; + RE0 = RE[INDEX2D_R(0,j1)]; + RF0 = RF[INDEX2D_R(0,j1)]; + RA1 = RA[INDEX2D_R(1,j1)]; + RB1 = RB[INDEX2D_R(1,j1)]; + RE1 = RE[INDEX2D_R(1,j1)]; + RF1 = RF[INDEX2D_R(1,j1)]; + RA01 = RA[INDEX2D_R(0,j1)] * RA[INDEX2D_R(1,j1)] - 1; + + // Hx + materialHx = ID[INDEX4D_ID(3,ii,jj,kk)]; + dEz = (Ez[INDEX3D_FIELDS(ii,jj+1,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dy; + Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA01 * dEz + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dEz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = yf - (j2 + 1); + kk = k2 + zs; + + // PML coefficients + RA0 = RA[INDEX2D_R(0,j2)]; + RB0 = RB[INDEX2D_R(0,j2)]; + RE0 = RE[INDEX2D_R(0,j2)]; + RF0 = RF[INDEX2D_R(0,j2)]; + RA1 = RA[INDEX2D_R(1,j2)]; + RB1 = RB[INDEX2D_R(1,j2)]; + RE1 = RE[INDEX2D_R(1,j2)]; + RF1 = RF[INDEX2D_R(1,j2)]; + RA01 = RA[INDEX2D_R(0,j2)] * RA[INDEX2D_R(1,j2)] - 1; + + // Hz + materialHz = ID[INDEX4D_ID(5,ii,jj,kk)]; + dEx = (Ex[INDEX3D_FIELDS(ii,jj+1,kk)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dy; + Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA01 * dEx + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dEx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx; + } +} + + +__global__ void order1_yplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez, $REAL *Hx, const $REAL* __restrict__ Hy, $REAL *Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) { + + // This function updates the Hx and Hz field components for the yplus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_HPHI, NY_HPHI, NZ_HPHI: Dimensions of PHI1 and PHI2 PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current tREad + int idx = blockIdx.x * blockDim.x + tREadIdx.x; + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL RA01, RB0, RE0, RF0, dEx, dEz; + $REAL dy = d; + int ii, jj, kk, materialHx, materialHz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA01 = RA[INDEX2D_R(0,j1)] - 1; + RB0 = RB[INDEX2D_R(0,j1)]; + RE0 = RE[INDEX2D_R(0,j1)]; + RF0 = RF[INDEX2D_R(0,j1)]; + + // Hx + materialHx = ID[INDEX4D_ID(3,ii,jj,kk)]; + dEz = (Ez[INDEX3D_FIELDS(ii,jj+1,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dy; + Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA01 * dEz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA01 = RA[INDEX2D_R(0,j2)] - 1; + RB0 = RB[INDEX2D_R(0,j2)]; + RE0 = RE[INDEX2D_R(0,j2)]; + RF0 = RF[INDEX2D_R(0,j2)]; + + // Hz + materialHz = ID[INDEX4D_ID(5,ii,jj,kk)]; + dEx = (Ex[INDEX3D_FIELDS(ii,jj+1,kk)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dy; + Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA01 * dEx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx; + } +} + + +__global__ void order2_yplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez, $REAL *Hx, const $REAL* __restrict__ Hy, $REAL *Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) { + + // This function updates the Hx and Hz field components for the yplus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_HPHI, NY_HPHI, NZ_HPHI: Dimensions of PHI1 and PHI2 PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current tREad + int idx = blockIdx.x * blockDim.x + tREadIdx.x; + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL RA01, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dEx, dEz; + $REAL dy = d; + int ii, jj, kk, materialHx, materialHz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA0 = RA[INDEX2D_R(0,j1)]; + RB0 = RB[INDEX2D_R(0,j1)]; + RE0 = RE[INDEX2D_R(0,j1)]; + RF0 = RF[INDEX2D_R(0,j1)]; + RA1 = RA[INDEX2D_R(1,j1)]; + RB1 = RB[INDEX2D_R(1,j1)]; + RE1 = RE[INDEX2D_R(1,j1)]; + RF1 = RF[INDEX2D_R(1,j1)]; + RA01 = RA[INDEX2D_R(0,j1)] * RA[INDEX2D_R(1,j1)] - 1; + + // Hx + materialHx = ID[INDEX4D_ID(3,ii,jj,kk)]; + dEz = (Ez[INDEX3D_FIELDS(ii,jj+1,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dy; + Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA01 * dEz + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dEz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA0 = RA[INDEX2D_R(0,j2)]; + RB0 = RB[INDEX2D_R(0,j2)]; + RE0 = RE[INDEX2D_R(0,j2)]; + RF0 = RF[INDEX2D_R(0,j2)]; + RA1 = RA[INDEX2D_R(1,j2)]; + RB1 = RB[INDEX2D_R(1,j2)]; + RE1 = RE[INDEX2D_R(1,j2)]; + RF1 = RF[INDEX2D_R(1,j2)]; + RA01 = RA[INDEX2D_R(0,j2)] * RA[INDEX2D_R(1,j2)] - 1; + + // Hz + materialHz = ID[INDEX4D_ID(5,ii,jj,kk)]; + dEx = (Ex[INDEX3D_FIELDS(ii,jj+1,kk)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dy; + Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA01 * dEx + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dEx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx; + } +} + + +__global__ void order1_zminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez, $REAL *Hx, $REAL *Hy, const $REAL* __restrict__ Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) { + + // This function updates the Hx and Hy field components for the zminus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_HPHI, NY_HPHI, NZ_HPHI: Dimensions of PHI1 and PHI2 PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current tREad + int idx = blockIdx.x * blockDim.x + tREadIdx.x; + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL RA01, RB0, RE0, RF0, dEx, dEy; + $REAL dz = d; + int ii, jj, kk, materialHx, materialHy; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = zf - (k1 + 1); + + // PML coefficients + RA01 = RA[INDEX2D_R(0,k1)] - 1; + RB0 = RB[INDEX2D_R(0,k1)]; + RE0 = RE[INDEX2D_R(0,k1)]; + RF0 = RF[INDEX2D_R(0,k1)]; + + // Hx + materialHx = ID[INDEX4D_ID(3,ii,jj,kk)]; + dEy = (Ey[INDEX3D_FIELDS(ii,jj,kk+1)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dz; + Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA01 * dEy + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEy; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = zf - (k2 + 1); + + // PML coefficients + RA01 = RA[INDEX2D_R(0,k2)] - 1; + RB0 = RB[INDEX2D_R(0,k2)]; + RE0 = RE[INDEX2D_R(0,k2)]; + RF0 = RF[INDEX2D_R(0,k2)]; + + // Hy + materialHy = ID[INDEX4D_ID(4,ii,jj,kk)]; + dEx = (Ex[INDEX3D_FIELDS(ii,jj,kk+1)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dz; + Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA01 * dEx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx; + } +} + + +__global__ void order2_zminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez, $REAL *Hx, $REAL *Hy, const $REAL* __restrict__ Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) { + + // This function updates the Hx and Hy field components for the zminus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_HPHI, NY_HPHI, NZ_HPHI: Dimensions of PHI1 and PHI2 PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current tREad + int idx = blockIdx.x * blockDim.x + tREadIdx.x; + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL RA01, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dEx, dEy; + $REAL dz = d; + int ii, jj, kk, materialHx, materialHy; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = zf - (k1 + 1); + + // PML coefficients + RA0 = RA[INDEX2D_R(0,k1)]; + RB0 = RB[INDEX2D_R(0,k1)]; + RE0 = RE[INDEX2D_R(0,k1)]; + RF0 = RF[INDEX2D_R(0,k1)]; + RA1 = RA[INDEX2D_R(1,k1)]; + RB1 = RB[INDEX2D_R(1,k1)]; + RE1 = RE[INDEX2D_R(1,k1)]; + RF1 = RF[INDEX2D_R(1,k1)]; + RA01 = RA[INDEX2D_R(0,k1)] * RA[INDEX2D_R(1,k1)] - 1; + + // Hx + materialHx = ID[INDEX4D_ID(3,ii,jj,kk)]; + dEy = (Ey[INDEX3D_FIELDS(ii,jj,kk+1)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dz; + Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA01 * dEy + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dEy + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEy; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = zf - (k2 + 1); + + // PML coefficients + RA0 = RA[INDEX2D_R(0,k2)]; + RB0 = RB[INDEX2D_R(0,k2)]; + RE0 = RE[INDEX2D_R(0,k2)]; + RF0 = RF[INDEX2D_R(0,k2)]; + RA1 = RA[INDEX2D_R(1,k2)]; + RB1 = RB[INDEX2D_R(1,k2)]; + RE1 = RE[INDEX2D_R(1,k2)]; + RF1 = RF[INDEX2D_R(1,k2)]; + RA01 = RA[INDEX2D_R(0,k2)] * RA[INDEX2D_R(1,k2)] - 1; + + // Hy + materialHy = ID[INDEX4D_ID(4,ii,jj,kk)]; + dEx = (Ex[INDEX3D_FIELDS(ii,jj,kk+1)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dz; + Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA01 * dEx + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dEx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx; + } +} + + +__global__ void order1_zplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez, $REAL *Hx, $REAL *Hy, const $REAL* __restrict__ Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) { + + // This function updates the Hx and Hy field components for the zplus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_HPHI, NY_HPHI, NZ_HPHI: Dimensions of PHI1 and PHI2 PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current tREad + int idx = blockIdx.x * blockDim.x + tREadIdx.x; + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL RA01, RB0, RE0, RF0, dEx, dEy; + $REAL dz = d; + int ii, jj, kk, materialHx, materialHy; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA01 = RA[INDEX2D_R(0,k1)] - 1; + RB0 = RB[INDEX2D_R(0,k1)]; + RE0 = RE[INDEX2D_R(0,k1)]; + RF0 = RF[INDEX2D_R(0,k1)]; + + // Hx + materialHx = ID[INDEX4D_ID(3,ii,jj,kk)]; + dEy = (Ey[INDEX3D_FIELDS(ii,jj,kk+1)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dz; + Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA01 * dEy + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEy; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA01 = RA[INDEX2D_R(0,k2)] - 1; + RB0 = RB[INDEX2D_R(0,k2)]; + RE0 = RE[INDEX2D_R(0,k2)]; + RF0 = RF[INDEX2D_R(0,k2)]; + + // Hy + materialHy = ID[INDEX4D_ID(4,ii,jj,kk)]; + dEx = (Ex[INDEX3D_FIELDS(ii,jj,kk+1)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dz; + Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA01 * dEx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx; + } +} + + +__global__ void order2_zplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez, $REAL *Hx, $REAL *Hy, const $REAL* __restrict__ Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) { + + // This function updates the Hx and Hy field components for the zplus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_HPHI, NY_HPHI, NZ_HPHI: Dimensions of PHI1 and PHI2 PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current tREad + int idx = blockIdx.x * blockDim.x + tREadIdx.x; + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL RA01, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dEx, dEy; + $REAL dz = d; + int ii, jj, kk, materialHx, materialHy; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + RA0 = RA[INDEX2D_R(0,k1)]; + RB0 = RB[INDEX2D_R(0,k1)]; + RE0 = RE[INDEX2D_R(0,k1)]; + RF0 = RF[INDEX2D_R(0,k1)]; + RA1 = RA[INDEX2D_R(1,k1)]; + RB1 = RB[INDEX2D_R(1,k1)]; + RE1 = RE[INDEX2D_R(1,k1)]; + RF1 = RF[INDEX2D_R(1,k1)]; + RA01 = RA[INDEX2D_R(0,k1)] * RA[INDEX2D_R(1,k1)] - 1; + + // Hx + materialHx = ID[INDEX4D_ID(3,ii,jj,kk)]; + dEy = (Ey[INDEX3D_FIELDS(ii,jj,kk+1)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dz; + Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA01 * dEy + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dEy + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEy; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + RA0 = RA[INDEX2D_R(0,k2)]; + RB0 = RB[INDEX2D_R(0,k2)]; + RE0 = RE[INDEX2D_R(0,k2)]; + RF0 = RF[INDEX2D_R(0,k2)]; + RA1 = RA[INDEX2D_R(1,k2)]; + RB1 = RB[INDEX2D_R(1,k2)]; + RE1 = RE[INDEX2D_R(1,k2)]; + RF1 = RF[INDEX2D_R(1,k2)]; + RA01 = RA[INDEX2D_R(0,k2)] * RA[INDEX2D_R(1,k2)] - 1; + + // Hy + materialHy = ID[INDEX4D_ID(4,ii,jj,kk)]; + dEx = (Ex[INDEX3D_FIELDS(ii,jj,kk+1)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dz; + Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA01 * dEx + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dEx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx; + } +} + +""") diff --git a/gprMax/pml_updates/pml_updates_magnetic_MRIPML_ext.pyx b/gprMax/pml_updates/pml_updates_magnetic_MRIPML_ext.pyx new file mode 100644 index 00000000..0204f1cb --- /dev/null +++ b/gprMax/pml_updates/pml_updates_magnetic_MRIPML_ext.pyx @@ -0,0 +1,857 @@ +# Copyright (C) 2015-2019: The University of Edinburgh +# Authors: Craig Warren and Antonis Giannopoulos +# +# This file is part of gprMax. +# +# gprMax is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# gprMax is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with gprMax. If not, see . + +import numpy as np +cimport numpy as np +from cython.parallel import prange + +from gprMax.constants cimport floattype_t + + +cpdef void order1_xminus( + int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int nthreads, + floattype_t[:, ::1] updatecoeffsH, + np.uint32_t[:, :, :, ::1] ID, + floattype_t[:, :, ::1] Ex, + floattype_t[:, :, ::1] Ey, + floattype_t[:, :, ::1] Ez, + floattype_t[:, :, ::1] Hx, + floattype_t[:, :, ::1] Hy, + floattype_t[:, :, ::1] Hz, + floattype_t[:, :, :, ::1] Phi1, + floattype_t[:, :, :, ::1] Phi2, + floattype_t[:, ::1] RA, + floattype_t[:, ::1] RB, + floattype_t[:, ::1] RE, + floattype_t[:, ::1] RF, + float d + ): + """This function updates the Hy and Hz field components for the xminus slab. + + Args: + xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box + nthreads (int): Number of threads to use + updatecoeffs, ID, E, H (memoryviews): Access to update coefficients, ID and field component arrays + Phi, RA, RB, ERE, RF (memoryviews): Access to PML coefficient arrays + d (float): Spatial discretisation, e.g. dx, dy or dz + """ + + cdef Py_ssize_t i, j, k, ii, jj, kk + cdef int nx, ny, nz, materialHy, materialHz + cdef floattype_t dx, dEy, dEz, IRA, IRA1, RB0, RC0, RE0, RF0 + dx = d + nx = xf - xs + ny = yf - ys + nz = zf - zs + + for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): + ii = xf - (i + 1) + IRA = 1 / RA[0, i] + IRA1 = IRA - 1 + RB0 = RB[0, i] + RE0 = RE[0, i] + RF0 = RF[0, i] + RC0 = IRA * RB0 * RF0 + for j in range(0, ny): + jj = j + ys + for k in range(0, nz): + kk = k + zs + # Hy + materialHy = ID[4, ii, jj, kk] + dEz = (Ez[ii + 1, jj, kk] - Ez[ii, jj, kk]) / dx + Hy[ii, jj, kk] = Hy[ii, jj, kk] + updatecoeffsH[materialHy, 4] * (IRA1 * dEz - IRA * Phi1[0, i, j, k]) + Phi1[0, i, j, k] = RE0 * Phi1[0, i, j, k] + RC0 * dEz - RC0 * Phi1[0, i, j, k] + # Hz + materialHz = ID[5, ii, jj, kk] + dEy = (Ey[ii + 1, jj, kk] - Ey[ii, jj, kk]) / dx + Hz[ii, jj, kk] = Hz[ii, jj, kk] - updatecoeffsH[materialHz, 4] * (IRA1 * dEy - IRA * Phi2[0, i, j, k]) + Phi2[0, i, j, k] = RE0 * Phi2[0, i, j, k] + RC0 * dEy - RC0 * Phi2[0, i, j, k] + +cpdef void order2_xminus( + int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int nthreads, + floattype_t[:, ::1] updatecoeffsH, + np.uint32_t[:, :, :, ::1] ID, + floattype_t[:, :, ::1] Ex, + floattype_t[:, :, ::1] Ey, + floattype_t[:, :, ::1] Ez, + floattype_t[:, :, ::1] Hx, + floattype_t[:, :, ::1] Hy, + floattype_t[:, :, ::1] Hz, + floattype_t[:, :, :, ::1] Phi1, + floattype_t[:, :, :, ::1] Phi2, + floattype_t[:, ::1] RA, + floattype_t[:, ::1] RB, + floattype_t[:, ::1] RE, + floattype_t[:, ::1] RF, + float d + ): + """This function updates the Hy and Hz field components for the xminus slab. + + Args: + xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box + nthreads (int): Number of threads to use + updatecoeffs, ID, E, H (memoryviews): Access to update coefficients, ID and field component arrays + Phi, RA, RB, ERE, RF (memoryviews): Access to PML coefficient arrays + d (float): Spatial discretisation, e.g. dx, dy or dz + """ + + cdef Py_ssize_t i, j, k, ii, jj, kk + cdef int nx, ny, nz, materialHy, materialHz + cdef floattype_t dx, dEy, dEz, IRA, IRA1, RB0, RC0, RE0, RF0, RB1, RC1, RE1, RF1, Psi1, Psi2 + dx = d + nx = xf - xs + ny = yf - ys + nz = zf - zs + + for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): + ii = xf - (i + 1) + IRA = 1 / (RA[0, i] + RA[1, i]) + IRA1 = IRA - 1 + RB0 = RB[0, i] + RE0 = RE[0, i] + RF0 = RF[0, i] + RC0 = IRA * RF0 + RB1 = RB[1, i] + RE1 = RE[1, i] + RF1 = RF[1, i] + RC1 = IRA * RF1 + for j in range(0, ny): + jj = j + ys + for k in range(0, nz): + kk = k + zs + Psi1 = RB0 * Phi1[0, i, j, k] + RB1 * Phi1[1, i, j, k] + Psi2 = RB0 * Phi2[0, i, j, k] + RB1 * Phi2[1, i, j, k] + # Hy + materialHy = ID[4, ii, jj, kk] + dEz = (Ez[ii + 1, jj, kk] - Ez[ii, jj, kk]) / dx + Hy[ii, jj, kk] = Hy[ii, jj, kk] + updatecoeffsH[materialHy, 4] * (IRA1 * dEz - IRA * Psi1) + Phi1[1, i, j, k] = RE1 * Phi1[1, i, j, k] + RC1 * (dEz - Psi1) + Phi1[0, i, j, k] = RE0 * Phi1[0, i, j, k] + RC0 * (dEz - Psi1) + # Hz + materialHz = ID[5, ii, jj, kk] + dEy = (Ey[ii + 1, jj, kk] - Ey[ii, jj, kk]) / dx + Hz[ii, jj, kk] = Hz[ii, jj, kk] - updatecoeffsH[materialHz, 4] * (IRA1 * dEy - IRA * Psi2) + Phi2[1, i, j, k] = RE1 * Phi2[1, i, j, k] + RC1 * (dEy - Psi2) + Phi2[0, i, j, k] = RE0 * Phi2[0, i, j, k] + RC0 * (dEy - Psi2) + + +cpdef void order1_xplus( + int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int nthreads, + floattype_t[:, ::1] updatecoeffsH, + np.uint32_t[:, :, :, ::1] ID, + floattype_t[:, :, ::1] Ex, + floattype_t[:, :, ::1] Ey, + floattype_t[:, :, ::1] Ez, + floattype_t[:, :, ::1] Hx, + floattype_t[:, :, ::1] Hy, + floattype_t[:, :, ::1] Hz, + floattype_t[:, :, :, ::1] Phi1, + floattype_t[:, :, :, ::1] Phi2, + floattype_t[:, ::1] RA, + floattype_t[:, ::1] RB, + floattype_t[:, ::1] RE, + floattype_t[:, ::1] RF, + float d + ): + """This function updates the Hy and Hz field components for the xplus slab. + + Args: + xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box + nthreads (int): Number of threads to use + updatecoeffs, ID, E, H (memoryviews): Access to update coefficients, ID and field component arrays + Phi, RA, RB, RE, RF (memoryviews): Access to PML coefficient arrays + d (float): Spatial discretisation, e.g. dx, dy or dz + """ + + cdef Py_ssize_t i, j, k, ii, jj, kk + cdef int nx, ny, nz, materialHy, materialHz + cdef floattype_t dx, dEy, dEz, IRA, IRA1, RB0, RC0, RE0, RF0 + dx = d + nx = xf - xs + ny = yf - ys + nz = zf - zs + + for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): + ii = i + xs + IRA = 1 / RA[0, i] + IRA1 = IRA - 1 + RB0 = RB[0, i] + RE0 = RE[0, i] + RF0 = RF[0, i] + RC0 = IRA * RB0 * RF0 + for j in range(0, ny): + jj = j + ys + for k in range(0, nz): + kk = k + zs + # Hy + materialHy = ID[4, ii, jj, kk] + dEz = (Ez[ii + 1, jj, kk] - Ez[ii, jj, kk]) / dx + Hy[ii, jj, kk] = Hy[ii, jj, kk] + updatecoeffsH[materialHy, 4] * (IRA1 * dEz - IRA * Phi1[0, i, j, k]) + Phi1[0, i, j, k] = RE0 * Phi1[0, i, j, k] + RC0 * dEz - RC0 * Phi1[0, i, j, k] + # Hz + materialHz = ID[5, ii, jj, kk] + dEy = (Ey[ii + 1, jj, kk] - Ey[ii, jj, kk]) / dx + Hz[ii, jj, kk] = Hz[ii, jj, kk] - updatecoeffsH[materialHz, 4] * (IRA1 * dEy - IRA * Phi2[0, i, j, k]) + Phi2[0, i, j, k] = RE0 * Phi2[0, i, j, k] + RC0 * dEy - RC0 * Phi2[0, i, j, k] + +cpdef void order2_xplus( + int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int nthreads, + floattype_t[:, ::1] updatecoeffsH, + np.uint32_t[:, :, :, ::1] ID, + floattype_t[:, :, ::1] Ex, + floattype_t[:, :, ::1] Ey, + floattype_t[:, :, ::1] Ez, + floattype_t[:, :, ::1] Hx, + floattype_t[:, :, ::1] Hy, + floattype_t[:, :, ::1] Hz, + floattype_t[:, :, :, ::1] Phi1, + floattype_t[:, :, :, ::1] Phi2, + floattype_t[:, ::1] RA, + floattype_t[:, ::1] RB, + floattype_t[:, ::1] RE, + floattype_t[:, ::1] RF, + float d + ): + """This function updates the Hy and Hz field components for the xplus slab. + + Args: + xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box + nthreads (int): Number of threads to use + updatecoeffs, ID, E, H (memoryviews): Access to update coefficients, ID and field component arrays + Phi, RA, RB, RE, RF (memoryviews): Access to PML coefficient arrays + d (float): Spatial discretisation, e.g. dx, dy or dz + """ + + cdef Py_ssize_t i, j, k, ii, jj, kk + cdef int nx, ny, nz, materialHy, materialHz + cdef floattype_t dx, dEy, dEz, IRA, IRA1, RB0, RC0, RE0, RF0, RB1, RC1, RE1, RF1, Psi1, Psi2 + dx = d + nx = xf - xs + ny = yf - ys + nz = zf - zs + + for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): + ii = i + xs + IRA = 1 / (RA[0, i] + RA[1, i]) + IRA1 = IRA - 1 + RB0 = RB[0, i] + RE0 = RE[0, i] + RF0 = RF[0, i] + RC0 = IRA * RF0 + RB1 = RB[1, i] + RE1 = RE[1, i] + RF1 = RF[1, i] + RC1 = IRA * RF1 + for j in range(0, ny): + jj = j + ys + for k in range(0, nz): + kk = k + zs + Psi1 = RB0 * Phi1[0, i, j, k] + RB1 * Phi1[1, i, j, k] + Psi2 = RB0 * Phi2[0, i, j, k] + RB1 * Phi2[1, i, j, k] + # Hy + materialHy = ID[4, ii, jj, kk] + dEz = (Ez[ii + 1, jj, kk] - Ez[ii, jj, kk]) / dx + Hy[ii, jj, kk] = Hy[ii, jj, kk] + updatecoeffsH[materialHy, 4] * (IRA1 * dEz - IRA * Psi1) + Phi1[1, i, j, k] = RE1 * Phi1[1, i, j, k] + RC1 * (dEz - Psi1) + Phi1[0, i, j, k] = RE0 * Phi1[0, i, j, k] + RC0 * (dEz - Psi1) + # Hz + materialHz = ID[5, ii, jj, kk] + dEy = (Ey[ii + 1, jj, kk] - Ey[ii, jj, kk]) / dx + Hz[ii, jj, kk] = Hz[ii, jj, kk] - updatecoeffsH[materialHz, 4] * (IRA1 * dEy - IRA * Psi2) + Phi2[1, i, j, k] = RE1 * Phi2[1, i, j, k] + RC1 * (dEy - Psi2) + Phi2[0, i, j, k] = RE0 * Phi2[0, i, j, k] + RC0 * (dEy - Psi2) + + +cpdef void order1_yminus( + int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int nthreads, + floattype_t[:, ::1] updatecoeffsH, + np.uint32_t[:, :, :, ::1] ID, + floattype_t[:, :, ::1] Ex, + floattype_t[:, :, ::1] Ey, + floattype_t[:, :, ::1] Ez, + floattype_t[:, :, ::1] Hx, + floattype_t[:, :, ::1] Hy, + floattype_t[:, :, ::1] Hz, + floattype_t[:, :, :, ::1] Phi1, + floattype_t[:, :, :, ::1] Phi2, + floattype_t[:, ::1] RA, + floattype_t[:, ::1] RB, + floattype_t[:, ::1] RE, + floattype_t[:, ::1] RF, + float d + ): + """This function updates the Hx and Hz field components for the yminus slab. + + Args: + xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box + nthreads (int): Number of threads to use + updatecoeffs, ID, E, H (memoryviews): Access to update coefficients, ID and field component arrays + Phi, RA, RB, RE, RF (memoryviews): Access to PML coefficient arrays + d (float): Spatial discretisation, e.g. dx, dy or dz + """ + + cdef Py_ssize_t i, j, k, ii, jj, kk + cdef int nx, ny, nz, materialHx, materialHz + cdef floattype_t dy, dEx, dEz, IRA, IRA1, RB0, RC0, RE0, RF0 + dy = d + nx = xf - xs + ny = yf - ys + nz = zf - zs + + for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): + ii = i + xs + for j in range(0, ny): + jj = yf - (j + 1) + IRA = 1 / RA[0, j] + IRA1 = IRA - 1 + RB0 = RB[0, j] + RE0 = RE[0, j] + RF0 = RF[0, j] + RC0 = IRA * RB0 * RF0 + for k in range(0, nz): + kk = k + zs + # Hx + materialHx = ID[3, ii, jj, kk] + dEz = (Ez[ii, jj + 1, kk] - Ez[ii, jj, kk]) / dy + Hx[ii, jj, kk] = Hx[ii, jj, kk] - updatecoeffsH[materialHx, 4] * (IRA1 * dEz - IRA * Phi1[0, i, j, k]) + Phi1[0, i, j, k] = RE0 * Phi1[0, i, j, k] + RC0 * dEz - RC0 * Phi1[0, i, j, k] + # Hz + materialHz = ID[5, ii, jj, kk] + dEx = (Ex[ii, jj + 1, kk] - Ex[ii, jj, kk]) / dy + Hz[ii, jj, kk] = Hz[ii, jj, kk] + updatecoeffsH[materialHz, 4] * (IRA1 * dEx - IRA * Phi2[0, i, j, k]) + Phi2[0, i, j, k] = RE0 * Phi2[0, i, j, k] + RC0 * dEx - RC0 * Phi2[0, i, j, k] + +cpdef void order2_yminus( + int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int nthreads, + floattype_t[:, ::1] updatecoeffsH, + np.uint32_t[:, :, :, ::1] ID, + floattype_t[:, :, ::1] Ex, + floattype_t[:, :, ::1] Ey, + floattype_t[:, :, ::1] Ez, + floattype_t[:, :, ::1] Hx, + floattype_t[:, :, ::1] Hy, + floattype_t[:, :, ::1] Hz, + floattype_t[:, :, :, ::1] Phi1, + floattype_t[:, :, :, ::1] Phi2, + floattype_t[:, ::1] RA, + floattype_t[:, ::1] RB, + floattype_t[:, ::1] RE, + floattype_t[:, ::1] RF, + float d + ): + """This function updates the Hx and Hz field components for the yminus slab. + + Args: + xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box + nthreads (int): Number of threads to use + updatecoeffs, ID, E, H (memoryviews): Access to update coefficients, ID and field component arrays + Phi, RA, RB, RE, RF (memoryviews): Access to PML coefficient arrays + d (float): Spatial discretisation, e.g. dx, dy or dz + """ + + cdef Py_ssize_t i, j, k, ii, jj, kk + cdef int nx, ny, nz, materialHx, materialHz + cdef floattype_t dy, dEx, dEz, IRA, IRA1, RB0, RC0, RE0, RF0, RB1, RC1, RE1, RF1, Psi1, Psi2 + dy = d + nx = xf - xs + ny = yf - ys + nz = zf - zs + + for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): + ii = i + xs + for j in range(0, ny): + jj = yf - (j + 1) + IRA = 1 / (RA[0, j] + RA[1, j]) + IRA1 = IRA - 1 + RB0 = RB[0, j] + RE0 = RE[0, j] + RF0 = RF[0, j] + RC0 = IRA * RF0 + RB1 = RB[1, j] + RE1 = RE[1, j] + RF1 = RF[1, j] + RC1 = IRA * RF1 + for k in range(0, nz): + kk = k + zs + Psi1 = RB0 * Phi1[0, i, j, k] + RB1 * Phi1[1, i, j, k] + Psi2 = RB0 * Phi2[0, i, j, k] + RB1 * Phi2[1, i, j, k] + # Hx + materialHx = ID[3, ii, jj, kk] + dEz = (Ez[ii, jj + 1, kk] - Ez[ii, jj, kk]) / dy + Hx[ii, jj, kk] = Hx[ii, jj, kk] - updatecoeffsH[materialHx, 4] * (IRA1 * dEz - IRA * Psi1) + Phi1[1, i, j, k] = RE1 * Phi1[1, i, j, k] + RC1 * (dEz - Psi1) + Phi1[0, i, j, k] = RE0 * Phi1[0, i, j, k] + RC0 * (dEz - Psi1) + # Hz + materialHz = ID[5, ii, jj, kk] + dEx = (Ex[ii, jj + 1, kk] - Ex[ii, jj, kk]) / dy + Hz[ii, jj, kk] = Hz[ii, jj, kk] + updatecoeffsH[materialHz, 4] * (IRA1 * dEx - IRA * Psi2) + Phi2[1, i, j, k] = RE1 * Phi2[1, i, j, k] + RC1 * (dEx - Psi2) + Phi2[0, i, j, k] = RE0 * Phi2[0, i, j, k] + RC0 * (dEx - Psi2) + + +cpdef void order1_yplus( + int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int nthreads, + floattype_t[:, ::1] updatecoeffsH, + np.uint32_t[:, :, :, ::1] ID, + floattype_t[:, :, ::1] Ex, + floattype_t[:, :, ::1] Ey, + floattype_t[:, :, ::1] Ez, + floattype_t[:, :, ::1] Hx, + floattype_t[:, :, ::1] Hy, + floattype_t[:, :, ::1] Hz, + floattype_t[:, :, :, ::1] Phi1, + floattype_t[:, :, :, ::1] Phi2, + floattype_t[:, ::1] RA, + floattype_t[:, ::1] RB, + floattype_t[:, ::1] RE, + floattype_t[:, ::1] RF, + float d + ): + """This function updates the Hx and Hz field components for the yplus slab. + + Args: + xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box + nthreads (int): Number of threads to use + updatecoeffs, ID, E, H (memoryviews): Access to update coefficients, ID and field component arrays + Phi, RA, RB, RE, RF (memoryviews): Access to PML coefficient arrays + d (float): Spatial discretisation, e.g. dx, dy or dz + """ + + cdef Py_ssize_t i, j, k, ii, jj, kk + cdef int nx, ny, nz, materialHx, materialHz + cdef floattype_t dy, dEx, dEz, IRA, IRA1, RB0, RC0, RE0, RF0 + dy = d + nx = xf - xs + ny = yf - ys + nz = zf - zs + + for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): + ii = i + xs + for j in range(0, ny): + jj = j + ys + IRA = 1 / RA[0, j] + IRA1 = IRA - 1 + RB0 = RB[0, j] + RE0 = RE[0, j] + RF0 = RF[0, j] + RC0 = IRA * RB0 * RF0 + for k in range(0, nz): + kk = k + zs + # Hx + materialHx = ID[3, ii, jj, kk] + dEz = (Ez[ii, jj + 1, kk] - Ez[ii, jj, kk]) / dy + Hx[ii, jj, kk] = Hx[ii, jj, kk] - updatecoeffsH[materialHx, 4] * (IRA1 * dEz - IRA * Phi1[0, i, j, k]) + Phi1[0, i, j, k] = RE0 * Phi1[0, i, j, k] + RC0 * dEz - RC0 * Phi1[0, i, j, k] + # Hz + materialHz = ID[5, ii, jj, kk] + dEx = (Ex[ii, jj + 1, kk] - Ex[ii, jj, kk]) / dy + Hz[ii, jj, kk] = Hz[ii, jj, kk] + updatecoeffsH[materialHz, 4] * (IRA1 * dEx - IRA * Phi2[0, i, j, k]) + Phi2[0, i, j, k] = RE0 * Phi2[0, i, j, k] + RC0 * dEx - RC0 * Phi2[0, i, j, k] + +cpdef void order2_yplus( + int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int nthreads, + floattype_t[:, ::1] updatecoeffsH, + np.uint32_t[:, :, :, ::1] ID, + floattype_t[:, :, ::1] Ex, + floattype_t[:, :, ::1] Ey, + floattype_t[:, :, ::1] Ez, + floattype_t[:, :, ::1] Hx, + floattype_t[:, :, ::1] Hy, + floattype_t[:, :, ::1] Hz, + floattype_t[:, :, :, ::1] Phi1, + floattype_t[:, :, :, ::1] Phi2, + floattype_t[:, ::1] RA, + floattype_t[:, ::1] RB, + floattype_t[:, ::1] RE, + floattype_t[:, ::1] RF, + float d + ): + """This function updates the Hx and Hz field components for the yplus slab. + + Args: + xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box + nthreads (int): Number of threads to use + updatecoeffs, ID, E, H (memoryviews): Access to update coefficients, ID and field component arrays + Phi, RA, RB, RE, RF (memoryviews): Access to PML coefficient arrays + d (float): Spatial discretisation, e.g. dx, dy or dz + """ + + cdef Py_ssize_t i, j, k, ii, jj, kk + cdef int nx, ny, nz, materialHx, materialHz + cdef floattype_t dy, dEx, dEz, IRA, IRA1, RB0, RC0, RE0, RF0, RB1, RC1, RE1, RF1, Psi1, Psi2 + dy = d + nx = xf - xs + ny = yf - ys + nz = zf - zs + + for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): + ii = i + xs + for j in range(0, ny): + jj = j + ys + IRA = 1 / (RA[0, j] + RA[1, j]) + IRA1 = IRA - 1 + RB0 = RB[0, j] + RE0 = RE[0, j] + RF0 = RF[0, j] + RC0 = IRA * RF0 + RB1 = RB[1, j] + RE1 = RE[1, j] + RF1 = RF[1, j] + RC1 = IRA * RF1 + for k in range(0, nz): + kk = k + zs + Psi1 = RB0 * Phi1[0, i, j, k] + RB1 * Phi1[1, i, j, k] + Psi2 = RB0 * Phi2[0, i, j, k] + RB1 * Phi2[1, i, j, k] + # Hx + materialHx = ID[3, ii, jj, kk] + dEz = (Ez[ii, jj + 1, kk] - Ez[ii, jj, kk]) / dy + Hx[ii, jj, kk] = Hx[ii, jj, kk] - updatecoeffsH[materialHx, 4] * (IRA1 * dEz - IRA * Psi1) + Phi1[1, i, j, k] = RE1 * Phi1[1, i, j, k] + RC1 * (dEz - Psi1) + Phi1[0, i, j, k] = RE0 * Phi1[0, i, j, k] + RC0 * (dEz - Psi1) + # Hz + materialHz = ID[5, ii, jj, kk] + dEx = (Ex[ii, jj + 1, kk] - Ex[ii, jj, kk]) / dy + Hz[ii, jj, kk] = Hz[ii, jj, kk] + updatecoeffsH[materialHz, 4] * (IRA1 * dEx - IRA * Psi2) + Phi2[1, i, j, k] = RE1 * Phi2[1, i, j, k] + RC1 * (dEx - Psi2) + Phi2[0, i, j, k] = RE0 * Phi2[0, i, j, k] + RC0 * (dEx - Psi2) + + +cpdef void order1_zminus( + int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int nthreads, + floattype_t[:, ::1] updatecoeffsH, + np.uint32_t[:, :, :, ::1] ID, + floattype_t[:, :, ::1] Ex, + floattype_t[:, :, ::1] Ey, + floattype_t[:, :, ::1] Ez, + floattype_t[:, :, ::1] Hx, + floattype_t[:, :, ::1] Hy, + floattype_t[:, :, ::1] Hz, + floattype_t[:, :, :, ::1] Phi1, + floattype_t[:, :, :, ::1] Phi2, + floattype_t[:, ::1] RA, + floattype_t[:, ::1] RB, + floattype_t[:, ::1] RE, + floattype_t[:, ::1] RF, + float d + ): + """This function updates the Hx and Hy field components for the zminus slab. + + Args: + xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box + nthreads (int): Number of threads to use + updatecoeffs, ID, E, H (memoryviews): Access to update coefficients, ID and field component arrays + Phi, RA, RB, RE, RF (memoryviews): Access to PML coefficient arrays + d (float): Spatial discretisation, e.g. dx, dy or dz + """ + + cdef Py_ssize_t i, j, k, ii, jj, kk + cdef int nx, ny, nz, materialHx, materialHy + cdef floattype_t dz, dEx, dEy, IRA, IRA1, RB0, RC0, RE0, RF0 + dz = d + nx = xf - xs + ny = yf - ys + nz = zf - zs + + for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): + ii = i + xs + for j in range(0, ny): + jj = j + ys + for k in range(0, nz): + kk = zf - (k + 1) + IRA = 1 / RA[0, k] + IRA1 = IRA - 1 + RB0 = RB[0, k] + RE0 = RE[0, k] + RF0 = RF[0, k] + RC0 = IRA * RB0 * RF0 + # Hx + materialHx = ID[3, ii, jj, kk] + dEy = (Ey[ii, jj, kk + 1] - Ey[ii, jj, kk]) / dz + Hx[ii, jj, kk] = Hx[ii, jj, kk] + updatecoeffsH[materialHx, 4] * (IRA1 * dEy - IRA * Phi1[0, i, j, k]) + Phi1[0, i, j, k] = RE0 * Phi1[0, i, j, k] + RC0 * dEy - RC0 * Phi1[0, i, j, k] + # Hy + materialHy = ID[4, ii, jj, kk] + dEx = (Ex[ii, jj, kk + 1] - Ex[ii, jj, kk]) / dz + Hy[ii, jj, kk] = Hy[ii, jj, kk] - updatecoeffsH[materialHy, 4] * (IRA1 * dEx - IRA * Phi2[0, i, j, k]) + Phi2[0, i, j, k] = RE0 * Phi2[0, i, j, k] + RC0 * dEx - RC0 * Phi2[0, i, j, k] + +cpdef void order2_zminus( + int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int nthreads, + floattype_t[:, ::1] updatecoeffsH, + np.uint32_t[:, :, :, ::1] ID, + floattype_t[:, :, ::1] Ex, + floattype_t[:, :, ::1] Ey, + floattype_t[:, :, ::1] Ez, + floattype_t[:, :, ::1] Hx, + floattype_t[:, :, ::1] Hy, + floattype_t[:, :, ::1] Hz, + floattype_t[:, :, :, ::1] Phi1, + floattype_t[:, :, :, ::1] Phi2, + floattype_t[:, ::1] RA, + floattype_t[:, ::1] RB, + floattype_t[:, ::1] RE, + floattype_t[:, ::1] RF, + float d + ): + """This function updates the Hx and Hy field components for the zminus slab. + + Args: + xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box + nthreads (int): Number of threads to use + updatecoeffs, ID, E, H (memoryviews): Access to update coefficients, ID and field component arrays + Phi, RA, RB, RE, RF (memoryviews): Access to PML coefficient arrays + d (float): Spatial discretisation, e.g. dx, dy or dz + """ + + cdef Py_ssize_t i, j, k, ii, jj, kk + cdef int nx, ny, nz, materialHx, materialHy + cdef floattype_t dz, dEx, dEy, IRA, IRA1, RB0, RC0, RE0, RF0, RB1, RC1, RE1, RF1, Psi1, Psi2 + dz = d + nx = xf - xs + ny = yf - ys + nz = zf - zs + + for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): + ii = i + xs + for j in range(0, ny): + jj = j + ys + for k in range(0, nz): + kk = zf - (k + 1) + IRA = 1 / (RA[0, k] + RA[1, k]) + IRA1 = IRA - 1 + RB0 = RB[0, k] + RE0 = RE[0, k] + RF0 = RF[0, k] + RC0 = IRA * RF0 + RB1 = RB[1, k] + RE1 = RE[1, k] + RF1 = RF[1, k] + RC1 = IRA * RF1 + Psi1 = RB0 * Phi1[0, i, j, k] + RB1 * Phi1[1, i, j, k] + Psi2 = RB0 * Phi2[0, i, j, k] + RB1 * Phi2[1, i, j, k] + # Hx + materialHx = ID[3, ii, jj, kk] + dEy = (Ey[ii, jj, kk + 1] - Ey[ii, jj, kk]) / dz + Hx[ii, jj, kk] = Hx[ii, jj, kk] + updatecoeffsH[materialHx, 4] * (IRA1 * dEy - IRA * Psi1) + Phi1[1, i, j, k] = RE1 * Phi1[1, i, j, k] + RC1 * (dEy - Psi1) + Phi1[0, i, j, k] = RE0 * Phi1[0, i, j, k] + RC0 * (dEy - Psi1) + # Hy + materialHy = ID[4, ii, jj, kk] + dEx = (Ex[ii, jj, kk + 1] - Ex[ii, jj, kk]) / dz + Hy[ii, jj, kk] = Hy[ii, jj, kk] - updatecoeffsH[materialHy, 4] * (IRA1 * dEx - IRA * Psi2) + Phi2[1, i, j, k] = RE1 * Phi2[1, i, j, k] + RC1 * (dEx - Psi2) + Phi2[0, i, j, k] = RE0 * Phi2[0, i, j, k] + RC0 * (dEx - Psi2) + + +cpdef void order1_zplus( + int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int nthreads, + floattype_t[:, ::1] updatecoeffsH, + np.uint32_t[:, :, :, ::1] ID, + floattype_t[:, :, ::1] Ex, + floattype_t[:, :, ::1] Ey, + floattype_t[:, :, ::1] Ez, + floattype_t[:, :, ::1] Hx, + floattype_t[:, :, ::1] Hy, + floattype_t[:, :, ::1] Hz, + floattype_t[:, :, :, ::1] Phi1, + floattype_t[:, :, :, ::1] Phi2, + floattype_t[:, ::1] RA, + floattype_t[:, ::1] RB, + floattype_t[:, ::1] RE, + floattype_t[:, ::1] RF, + float d + ): + """This function updates the Hx and Hy field components for the zplus slab. + + Args: + xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box + nthreads (int): Number of threads to use + updatecoeffs, ID, E, H (memoryviews): Access to update coefficients, ID and field component arrays + Phi, RA, RB, RE, RF (memoryviews): Access to PML coefficient arrays + d (float): Spatial discretisation, e.g. dx, dy or dz + """ + + cdef Py_ssize_t i, j, k, ii, jj, kk + cdef int nx, ny, nz, materialHx, materialHy + cdef floattype_t dz, dEx, dEy, IRA, IRA1, RB0, RC0, RE0, RF0 + dz = d + nx = xf - xs + ny = yf - ys + nz = zf - zs + + for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): + ii = i + xs + for j in range(0, ny): + jj = j + ys + for k in range(0, nz): + kk = k + zs + IRA = 1 / RA[0, k] + IRA1 = IRA - 1 + RB0 = RB[0, k] + RE0 = RE[0, k] + RF0 = RF[0, k] + RC0 = IRA * RB0 * RF0 + # Hx + materialHx = ID[3, ii, jj, kk] + dEy = (Ey[ii, jj, kk + 1] - Ey[ii, jj, kk]) / dz + Hx[ii, jj, kk] = Hx[ii, jj, kk] + updatecoeffsH[materialHx, 4] * (IRA1 * dEy - IRA * Phi1[0, i, j, k]) + Phi1[0, i, j, k] = RE0 * Phi1[0, i, j, k] + RC0 * dEy - RC0 * Phi1[0, i, j, k] + # Hy + materialHy = ID[4, ii, jj, kk] + dEx = (Ex[ii, jj, kk + 1] - Ex[ii, jj, kk]) / dz + Hy[ii, jj, kk] = Hy[ii, jj, kk] - updatecoeffsH[materialHy, 4] * (IRA1 * dEx - IRA * Phi2[0, i, j, k]) + Phi2[0, i, j, k] = RE0 * Phi2[0, i, j, k] + RC0 * dEx - RC0 * Phi2[0, i, j, k] + +cpdef void order2_zplus( + int xs, + int xf, + int ys, + int yf, + int zs, + int zf, + int nthreads, + floattype_t[:, ::1] updatecoeffsH, + np.uint32_t[:, :, :, ::1] ID, + floattype_t[:, :, ::1] Ex, + floattype_t[:, :, ::1] Ey, + floattype_t[:, :, ::1] Ez, + floattype_t[:, :, ::1] Hx, + floattype_t[:, :, ::1] Hy, + floattype_t[:, :, ::1] Hz, + floattype_t[:, :, :, ::1] Phi1, + floattype_t[:, :, :, ::1] Phi2, + floattype_t[:, ::1] RA, + floattype_t[:, ::1] RB, + floattype_t[:, ::1] RE, + floattype_t[:, ::1] RF, + float d + ): + """This function updates the Hx and Hy field components for the zplus slab. + + Args: + xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box + nthreads (int): Number of threads to use + updatecoeffs, ID, E, H (memoryviews): Access to update coefficients, ID and field component arrays + Phi, RA, RB, RE, RF (memoryviews): Access to PML coefficient arrays + d (float): Spatial discretisation, e.g. dx, dy or dz + """ + + cdef Py_ssize_t i, j, k, ii, jj, kk + cdef int nx, ny, nz, materialHx, materialHy + cdef floattype_t dz, dEx, dEy, IRA, IRA1, RB0, RC0, RE0, RF0, RB1, RC1, RE1, RF1, Psi1, Psi2 + dz = d + nx = xf - xs + ny = yf - ys + nz = zf - zs + + for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): + ii = i + xs + for j in range(0, ny): + jj = j + ys + for k in range(0, nz): + kk = k + zs + IRA = 1 / (RA[0, k] + RA[1, k]) + IRA1 = IRA - 1 + RB0 = RB[0, k] + RE0 = RE[0, k] + RF0 = RF[0, k] + RC0 = IRA * RF0 + RB1 = RB[1, k] + RE1 = RE[1, k] + RF1 = RF[1, k] + RC1 = IRA * RF1 + Psi1 = RB0 * Phi1[0, i, j, k] + RB1 * Phi1[1, i, j, k] + Psi2 = RB0 * Phi2[0, i, j, k] + RB1 * Phi2[1, i, j, k] + # Hx + materialHx = ID[3, ii, jj, kk] + dEy = (Ey[ii, jj, kk + 1] - Ey[ii, jj, kk]) / dz + Hx[ii, jj, kk] = Hx[ii, jj, kk] + updatecoeffsH[materialHx, 4] * (IRA1 * dEy - IRA * Psi1) + Phi1[1, i, j, k] = RE1 * Phi1[1, i, j, k] + RC1 * (dEy - Psi1) + Phi1[0, i, j, k] = RE0 * Phi1[0, i, j, k] + RC0 * (dEy - Psi1) + # Hy + materialHy = ID[4, ii, jj, kk] + dEx = (Ex[ii, jj, kk + 1] - Ex[ii, jj, kk]) / dz + Hy[ii, jj, kk] = Hy[ii, jj, kk] - updatecoeffsH[materialHy, 4] * (IRA1 * dEx - IRA * Psi2) + Phi2[1, i, j, k] = RE1 * Phi2[1, i, j, k] + RC1 * (dEx - Psi2) + Phi2[0, i, j, k] = RE0 * Phi2[0, i, j, k] + RC0 * (dEx - Psi2) diff --git a/gprMax/pml_updates/pml_updates_magnetic_MRIPML_gpu.py b/gprMax/pml_updates/pml_updates_magnetic_MRIPML_gpu.py new file mode 100644 index 00000000..79f3f302 --- /dev/null +++ b/gprMax/pml_updates/pml_updates_magnetic_MRIPML_gpu.py @@ -0,0 +1,1030 @@ +# Copyright (C) 2015-2019: The University of Edinburgh +# Authors: Craig Warren and Antonis Giannopoulos +# +# This file is part of gprMax. +# +# gprMax is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# gprMax is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with gprMax. If not, see . + +from string import Template + +kernels_template_pml_magnetic_MRIPML = Template(""" + +// Macros for converting subscripts to linear index: +#define INDEX2D_R(m, n) (m)*($NY_R)+(n) +#define INDEX2D_MAT(m, n) (m)*($NY_MATCOEFFS)+(n) +#define INDEX3D_FIELDS(i, j, k) (i)*($NY_FIELDS)*($NZ_FIELDS)+(j)*($NZ_FIELDS)+(k) +#define INDEX4D_ID(p, i, j, k) (p)*($NX_ID)*($NY_ID)*($NZ_ID)+(i)*($NY_ID)*($NZ_ID)+(j)*($NZ_ID)+(k) +#define INDEX4D_PHI1(p, i, j, k) (p)*(NX_PHI1)*(NY_PHI1)*(NZ_PHI1)+(i)*(NY_PHI1)*(NZ_PHI1)+(j)*(NZ_PHI1)+(k) +#define INDEX4D_PHI2(p, i, j, k) (p)*(NX_PHI2)*(NY_PHI2)*(NZ_PHI2)+(i)*(NY_PHI2)*(NZ_PHI2)+(j)*(NZ_PHI2)+(k) + +// Material coefficients (read-only) in constant memory (64KB) +__device__ __constant__ $REAL updatecoeffsH[$N_updatecoeffsH]; + + +__global__ void order1_xminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez, const $REAL* __restrict__ Hx, $REAL *Hy, $REAL *Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) { + + // This function updates the Hy and Hz field components for the xminus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_HPHI, NY_HPHI, NZ_HPHI: Dimensions of PHI1 and PHI2 PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current tREad + int idx = blockIdx.x * blockDim.x + tREadIdx.x; + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL IRA, IRA1, RB0, RC0, RE0, RF0, dEy, dEz; + $REAL dx = d; + int ii, jj, kk, materialHy, materialHz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = xf - (i1 + 1); + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + IRA = 1 / RA[INDEX2D_R(0,i1)]; + IRA1 = IRA - 1; + RB0 = RB[INDEX2D_R(0,i1)]; + RE0 = RE[INDEX2D_R(0,i1)]; + RF0 = RF[INDEX2D_R(0,i1)]; + RC0 = IRA * RB0 * RF0; + + // Hy + materialHy = ID[INDEX4D_ID(4,ii,jj,kk)]; + dEz = (Ez[INDEX3D_FIELDS(ii+1,jj,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dx; + Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (IRA1 * dEz - IRA * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RC0 * dEz - RC0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = xf - (i2 + 1); + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + IRA = 1 / RA[INDEX2D_R(0,i2)]; + IRA1 = IRA - 1; + RB0 = RB[INDEX2D_R(0,i2)]; + RE0 = RE[INDEX2D_R(0,i2)]; + RF0 = RF[INDEX2D_R(0,i2)]; + RC0 = IRA * RB0 * RF0; + + // Hz + materialHz = ID[INDEX4D_ID(5,ii,jj,kk)]; + dEy = (Ey[INDEX3D_FIELDS(ii+1,jj,kk)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dx; + Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (IRA1 * dEy - IRA * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RC0 * dEy - RC0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]; + } +} + + +__global__ void order2_xminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez, const $REAL* __restrict__ Hx, $REAL *Hy, $REAL *Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) { + + // This function updates the Hy and Hz field components for the xminus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_HPHI, NY_HPHI, NZ_HPHI: Dimensions of PHI1 and PHI2 PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current tREad + int idx = blockIdx.x * blockDim.x + tREadIdx.x; + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL IRA, IRA1, RB0, RC0, RE0, RF0, RB1, RC1, RE1, RF1, Psi1, Psi2, dEy, dEz; + $REAL dx = d; + int ii, jj, kk, materialHy, materialHz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = xf - (i1 + 1); + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + IRA = 1 / (RA[INDEX2D_R(0,i1)] + RA[INDEX2D_R(1,i1)]); + IRA1 = IRA - 1; + RB0 = RB[INDEX2D_R(0,i1)]; + RE0 = RE[INDEX2D_R(0,i1)]; + RF0 = RF[INDEX2D_R(0,i1)]; + RC0 = IRA * RF0; + RB1 = RB[INDEX2D_R(1,i1)]; + RE1 = RE[INDEX2D_R(1,i1)]; + RF1 = RF[INDEX2D_R(1,i1)]; + RC1 = IRA * RF1; + + // Hy + Psi1 = RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]; + materialHy = ID[INDEX4D_ID(4,ii,jj,kk)]; + dEz = (Ez[INDEX3D_FIELDS(ii+1,jj,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dx; + Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (IRA1 * dEz - IRA * Psi1); + PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] + RC1 * (dEz - Psi1); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RC0 * (dEz - Psi1); + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = xf - (i2 + 1); + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + IRA = 1 / (RA[INDEX2D_R(0,i2)] + RA[INDEX2D_R(1,i2)]); + IRA1 = IRA - 1; + RB0 = RB[INDEX2D_R(0,i2)]; + RE0 = RE[INDEX2D_R(0,i2)]; + RF0 = RF[INDEX2D_R(0,i2)]; + RC0 = IRA * RF0; + RB1 = RB[INDEX2D_R(1,i2)]; + RE1 = RE[INDEX2D_R(1,i2)]; + RF1 = RF[INDEX2D_R(1,i2)]; + RC1 = IRA * RF1; + + // Hz + Psi2 = RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]; + materialHz = ID[INDEX4D_ID(5,ii,jj,kk)]; + dEy = (Ey[INDEX3D_FIELDS(ii+1,jj,kk)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dx; + Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (IRA * dEy - IRA * Psi2); + PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] + RC1 * (dEy - Psi2); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RC0 * (dEy - Psi2); + } +} + + +__global__ void order1_xplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez, const $REAL* __restrict__ Hx, $REAL *Hy, $REAL *Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) { + + // This function updates the Hy and Hz field components for the xplus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_HPHI, NY_HPHI, NZ_HPHI: Dimensions of PHI1 and PHI2 PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current tREad + int idx = blockIdx.x * blockDim.x + tREadIdx.x; + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL IRA, IRA1, RB0, RC0, RE0, RF0, dEy, dEz; + $REAL dx = d; + int ii, jj, kk, materialHy, materialHz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + IRA = 1 / RA[INDEX2D_R(0,i1)]; + IRA1 = IRA - 1; + RB0 = RB[INDEX2D_R(0,i1)]; + RE0 = RE[INDEX2D_R(0,i1)]; + RF0 = RF[INDEX2D_R(0,i1)]; + RC0 = IRA * RB0 * RF0; + + // Hy + materialHy = ID[INDEX4D_ID(4,ii,jj,kk)]; + dEz = (Ez[INDEX3D_FIELDS(ii+1,jj,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dx; + Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (IRA1 * dEz - IRA * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RC0 * dEz - RC0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + IRA = 1 / RA[INDEX2D_R(0,i2)]; + IRA1 = IRA - 1; + RB0 = RB[INDEX2D_R(0,i2)]; + RE0 = RE[INDEX2D_R(0,i2)]; + RF0 = RF[INDEX2D_R(0,i2)]; + RC0 = IRA * RB0 * RF0; + + // Hz + materialHz = ID[INDEX4D_ID(5,ii,jj,kk)]; + dEy = (Ey[INDEX3D_FIELDS(ii+1,jj,kk)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dx; + Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (IRA1 * dEy - IRA * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RC0 * dEy - RC0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]; + } +} + + +__global__ void order2_xplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez, const $REAL* __restrict__ Hx, $REAL *Hy, $REAL *Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) { + + // This function updates the Hy and Hz field components for the xplus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_HPHI, NY_HPHI, NZ_HPHI: Dimensions of PHI1 and PHI2 PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current tREad + int idx = blockIdx.x * blockDim.x + tREadIdx.x; + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL IRA, IRA1, RB0, RC0, RE0, RF0, RB1, RC1, RE1, RF1, Psi1, Psi2, dEy, dEz; + $REAL dx = d; + int ii, jj, kk, materialHy, materialHz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + IRA = 1 / (RA[INDEX2D_R(0,i1)] + RA[INDEX2D_R(1,i1)]); + IRA1 = IRA - 1; + RB0 = RB[INDEX2D_R(0,i1)]; + RE0 = RE[INDEX2D_R(0,i1)]; + RF0 = RF[INDEX2D_R(0,i1)]; + RC0 = IRA * RF0; + RB1 = RB[INDEX2D_R(1,i1)]; + RE1 = RE[INDEX2D_R(1,i1)]; + RF1 = RF[INDEX2D_R(1,i1)]; + RC1 = IRA * RF1; + + // Hy + Psi1 = RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]; + materialHy = ID[INDEX4D_ID(4,ii,jj,kk)]; + dEz = (Ez[INDEX3D_FIELDS(ii+1,jj,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dx; + Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (IRA1 * dEz - IRA * Psi1); + PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] + RC1 * (dEz - Psi1); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RC0 * (dEz - Psi1); + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + IRA = 1 / (RA[INDEX2D_R(0,i2)] + RA[INDEX2D_R(1,i2)]); + IRA1 = IRA - 1; + RB0 = RB[INDEX2D_R(0,i2)]; + RE0 = RE[INDEX2D_R(0,i2)]; + RF0 = RF[INDEX2D_R(0,i2)]; + RC0 = IRA * RF0; + RB1 = RB[INDEX2D_R(1,i2)]; + RE1 = RE[INDEX2D_R(1,i2)]; + RF1 = RF[INDEX2D_R(1,i2)]; + RC1 = IRA * RF1; + + // Hz + Psi2 = RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]; + materialHz = ID[INDEX4D_ID(5,ii,jj,kk)]; + dEy = (Ey[INDEX3D_FIELDS(ii+1,jj,kk)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dx; + Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (IRA1 * dEy - IRA * Psi2); + PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] + RC1 * (dEy - Psi2); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RC0 * (dEy - Psi2); + } +} + + +__global__ void order1_yminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez, $REAL *Hx, const $REAL* __restrict__ Hy, $REAL *Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) { + + // This function updates the Hx and Hz field components for the yminus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_HPHI, NY_HPHI, NZ_HPHI: Dimensions of PHI1 and PHI2 PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current tREad + int idx = blockIdx.x * blockDim.x + tREadIdx.x; + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL IRA, IRA1, RB0, RC0, RE0, RF0, dEx, dEz; + $REAL dy = d; + int ii, jj, kk, materialHx, materialHz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = yf - (j1 + 1); + kk = k1 + zs; + + // PML coefficients + IRA = 1 / RA[INDEX2D_R(0,j1)]; + IRA1 = IRA - 1; + RB0 = RB[INDEX2D_R(0,j1)]; + RE0 = RE[INDEX2D_R(0,j1)]; + RF0 = RF[INDEX2D_R(0,j1)]; + RC0 = IRA * RB0 * RF0; + + // Hx + materialHx = ID[INDEX4D_ID(3,ii,jj,kk)]; + dEz = (Ez[INDEX3D_FIELDS(ii,jj+1,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dy; + Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (IRA1 * dEz - IRA * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RC0 * dEz - RC0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = yf - (j2 + 1); + kk = k2 + zs; + + // PML coefficients + IRA = 1 / RA[INDEX2D_R(0,j2)]; + IRA1 = IRA - 1; + RB0 = RB[INDEX2D_R(0,j2)]; + RE0 = RE[INDEX2D_R(0,j2)]; + RF0 = RF[INDEX2D_R(0,j2)]; + RC0 = IRA * RB0 * RF0; + + // Hz + materialHz = ID[INDEX4D_ID(5,ii,jj,kk)]; + dEx = (Ex[INDEX3D_FIELDS(ii,jj+1,kk)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dy; + Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (IRA1 * dEx - IRA * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RC0 * dEx - RC0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]; + } +} + + +__global__ void order2_yminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez, $REAL *Hx, const $REAL* __restrict__ Hy, $REAL *Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) { + + // This function updates the Hx and Hz field components for the yminus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_HPHI, NY_HPHI, NZ_HPHI: Dimensions of PHI1 and PHI2 PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current tREad + int idx = blockIdx.x * blockDim.x + tREadIdx.x; + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL IRA, IRA1, RB0, RC0, RE0, RF0, RB1, RC1, RE1, RF1, Psi1, Psi2, dEx, dEz; + $REAL dy = d; + int ii, jj, kk, materialHx, materialHz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = yf - (j1 + 1); + kk = k1 + zs; + + // PML coefficients + IRA = 1 / (RA[INDEX2D_R(0,j1)] + RA[INDEX2D_R(1,j1)]); + IRA1 = IRA - 1; + RB0 = RB[INDEX2D_R(0,j1)]; + RE0 = RE[INDEX2D_R(0,j1)]; + RF0 = RF[INDEX2D_R(0,j1)]; + RC0 = IRA * RF0; + RB1 = RB[INDEX2D_R(1,j1)]; + RE1 = RE[INDEX2D_R(1,j1)]; + RF1 = RF[INDEX2D_R(1,j1)]; + RC1 = IRA * RF1; + + // Hx + Psi1 = RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]; + materialHx = ID[INDEX4D_ID(3,ii,jj,kk)]; + dEz = (Ez[INDEX3D_FIELDS(ii,jj+1,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dy; + Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (IRA1 * dEz - IRA * Psi1); + PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] + RC1 * (dEz - Psi1); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RC0 * (dEz - Psi1); + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = yf - (j2 + 1); + kk = k2 + zs; + + // PML coefficients + IRA = 1 / (RA[INDEX2D_R(0,j2)] + RA[INDEX2D_R(1,j2)]); + IRA1 = IRA - 1; + RB0 = RB[INDEX2D_R(0,j2)]; + RE0 = RE[INDEX2D_R(0,j2)]; + RF0 = RF[INDEX2D_R(0,j2)]; + RC0 = IRA * RF0; + RB1 = RB[INDEX2D_R(1,j2)]; + RE1 = RE[INDEX2D_R(1,j2)]; + RF1 = RF[INDEX2D_R(1,j2)]; + RC1 = IRA * RF1; + + // Hz + Psi2 = RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]; + materialHz = ID[INDEX4D_ID(5,ii,jj,kk)]; + dEx = (Ex[INDEX3D_FIELDS(ii,jj+1,kk)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dy; + Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (IRA1 * dEx - IRA * Psi2); + PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] + RC1 * (dEx - Psi2); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RC0 * (dEx - Psi2); + } +} + + +__global__ void order1_yplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez, $REAL *Hx, const $REAL* __restrict__ Hy, $REAL *Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) { + + // This function updates the Hx and Hz field components for the yplus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_HPHI, NY_HPHI, NZ_HPHI: Dimensions of PHI1 and PHI2 PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current tREad + int idx = blockIdx.x * blockDim.x + tREadIdx.x; + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL IRA, IRA1, RB0, RC0, RE0, RF0, dEx, dEz; + $REAL dy = d; + int ii, jj, kk, materialHx, materialHz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + IRA = 1 / RA[INDEX2D_R(0,j1)]; + IRA1 = IRA - 1; + RB0 = RB[INDEX2D_R(0,j1)]; + RE0 = RE[INDEX2D_R(0,j1)]; + RF0 = RF[INDEX2D_R(0,j1)]; + RC0 = IRA * RB0 * RF0; + + // Hx + materialHx = ID[INDEX4D_ID(3,ii,jj,kk)]; + dEz = (Ez[INDEX3D_FIELDS(ii,jj+1,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dy; + Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (IRA1 * dEz - IRA * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RC0 * dEz - RC0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + IRA = 1 / RA[INDEX2D_R(0,j2)]; + IRA1 = IRA - 1; + RB0 = RB[INDEX2D_R(0,j2)]; + RE0 = RE[INDEX2D_R(0,j2)]; + RF0 = RF[INDEX2D_R(0,j2)]; + RC0 = IRA * RB0 * RF0; + + // Hz + materialHz = ID[INDEX4D_ID(5,ii,jj,kk)]; + dEx = (Ex[INDEX3D_FIELDS(ii,jj+1,kk)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dy; + Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (IRA1 * dEx - IRA * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RC0 * dEx - RC0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]; + } +} + + +__global__ void order2_yplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez, $REAL *Hx, const $REAL* __restrict__ Hy, $REAL *Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) { + + // This function updates the Hx and Hz field components for the yplus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_HPHI, NY_HPHI, NZ_HPHI: Dimensions of PHI1 and PHI2 PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current tREad + int idx = blockIdx.x * blockDim.x + tREadIdx.x; + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL IRA, IRA1, RB0, RC0, RE0, RF0, RB1, RC1, RE1, RF1, Psi1, Psi2, dEx, dEz; + $REAL dy = d; + int ii, jj, kk, materialHx, materialHz; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + IRA = 1 / (RA[INDEX2D_R(0,j1)] + RA[INDEX2D_R(1,j1)]); + IRA1 = IRA - 1; + RB0 = RB[INDEX2D_R(0,j1)]; + RE0 = RE[INDEX2D_R(0,j1)]; + RF0 = RF[INDEX2D_R(0,j1)]; + RC0 = IRA * RF0; + RB1 = RB[INDEX2D_R(1,j1)]; + RE1 = RE[INDEX2D_R(1,j1)]; + RF1 = RF[INDEX2D_R(1,j1)]; + RC1 = IRA * RF1; + + // Hx + Psi1 = RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]; + materialHx = ID[INDEX4D_ID(3,ii,jj,kk)]; + dEz = (Ez[INDEX3D_FIELDS(ii,jj+1,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dy; + Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (IRA1 * dEz - IRA * Psi1); + PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] + RC1 * (dEz - Psi1); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RC0 * (dEz - Psi1); + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + IRA = 1 / (RA[INDEX2D_R(0,j2)] + RA[INDEX2D_R(1,j2)]); + IRA1 = IRA - 1; + RB0 = RB[INDEX2D_R(0,j2)]; + RE0 = RE[INDEX2D_R(0,j2)]; + RF0 = RF[INDEX2D_R(0,j2)]; + RC0 = IRA * RF0; + RB1 = RB[INDEX2D_R(1,j2)]; + RE1 = RE[INDEX2D_R(1,j2)]; + RF1 = RF[INDEX2D_R(1,j2)]; + RC1 = IRA * RF1; + + // Hz + Psi2 = RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]; + materialHz = ID[INDEX4D_ID(5,ii,jj,kk)]; + dEx = (Ex[INDEX3D_FIELDS(ii,jj+1,kk)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dy; + Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (IRA1 * dEx - IRA * Psi2); + PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] + RC1 * (dEx - Psi2); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RC0 * (dEx - Psi2); + } +} + + +__global__ void order1_zminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez, $REAL *Hx, $REAL *Hy, const $REAL* __restrict__ Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) { + + // This function updates the Hx and Hy field components for the zminus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_HPHI, NY_HPHI, NZ_HPHI: Dimensions of PHI1 and PHI2 PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current tREad + int idx = blockIdx.x * blockDim.x + tREadIdx.x; + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL IRA, IRA1, RB0, RC0, RE0, RF0, dEx, dEy; + $REAL dz = d; + int ii, jj, kk, materialHx, materialHy; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = zf - (k1 + 1); + + // PML coefficients + IRA = 1 / RA[INDEX2D_R(0,k1)]; + IRA1 = IRA - 1; + RB0 = RB[INDEX2D_R(0,k1)]; + RE0 = RE[INDEX2D_R(0,k1)]; + RF0 = RF[INDEX2D_R(0,k1)]; + RC0 = IRA * RB0 * RF0; + + // Hx + materialHx = ID[INDEX4D_ID(3,ii,jj,kk)]; + dEy = (Ey[INDEX3D_FIELDS(ii,jj,kk+1)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dz; + Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (IRA1 * dEy - IRA * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RC0 * dEy - RC0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = zf - (k2 + 1); + + // PML coefficients + IRA = 1 / RA[INDEX2D_R(0,k2)]; + IRA1 = IRA - 1; + RB0 = RB[INDEX2D_R(0,k2)]; + RE0 = RE[INDEX2D_R(0,k2)]; + RF0 = RF[INDEX2D_R(0,k2)]; + RC0 = IRA * RB0 * RF0; + + // Hy + materialHy = ID[INDEX4D_ID(4,ii,jj,kk)]; + dEx = (Ex[INDEX3D_FIELDS(ii,jj,kk+1)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dz; + Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (IRA1 * dEx - IRA * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RC0 * dEx - RC0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]; + } +} + + +__global__ void order2_zminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez, $REAL *Hx, $REAL *Hy, const $REAL* __restrict__ Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) { + + // This function updates the Hx and Hy field components for the zminus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_HPHI, NY_HPHI, NZ_HPHI: Dimensions of PHI1 and PHI2 PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current tREad + int idx = blockIdx.x * blockDim.x + tREadIdx.x; + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL IRA, IRA1, RB0, RC0, RE0, RF0, RB1, RC1, RE1, RF1, Psi1, Psi2, dEx, dEy; + $REAL dz = d; + int ii, jj, kk, materialHx, materialHy; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = zf - (k1 + 1); + + // PML coefficients + IRA = 1 / (RA[INDEX2D_R(0,k1)] + RA[INDEX2D_R(1,k1)]); + IRA1 = IRA - 1; + RB0 = RB[INDEX2D_R(0,k1)]; + RE0 = RE[INDEX2D_R(0,k1)]; + RF0 = RF[INDEX2D_R(0,k1)]; + RC0 = IRA * RF0; + RB1 = RB[INDEX2D_R(1,k1)]; + RE1 = RE[INDEX2D_R(1,k1)]; + RF1 = RF[INDEX2D_R(1,k1)]; + RC1 = IRA * RF1; + + // Hx + Psi1 = RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]; + materialHx = ID[INDEX4D_ID(3,ii,jj,kk)]; + dEy = (Ey[INDEX3D_FIELDS(ii,jj,kk+1)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dz; + Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (IRA1 * dEy - IRA * Psi1); + PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] + RC1 * (dEy - Psi1); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RC0 * (dEy - Psi1); + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = zf - (k2 + 1); + + // PML coefficients + IRA = 1 / (RA[INDEX2D_R(0,k2)] + RA[INDEX2D_R(1,k2)]); + IRA1 = IRA - 1; + RB0 = RB[INDEX2D_R(0,k2)]; + RE0 = RE[INDEX2D_R(0,k2)]; + RF0 = RF[INDEX2D_R(0,k2)]; + RC0 = IRA * RF0; + RB1 = RB[INDEX2D_R(1,k2)]; + RE1 = RE[INDEX2D_R(1,k2)]; + RF1 = RF[INDEX2D_R(1,k2)]; + RC1 = IRA * RF1; + + // Hy + Psi2 = RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]; + materialHy = ID[INDEX4D_ID(4,ii,jj,kk)]; + dEx = (Ex[INDEX3D_FIELDS(ii,jj,kk+1)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dz; + Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (IRA1 * dEx - IRA * Psi2); + PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] + RC1 * (dEx - Psi2); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RC0 * (dEx - Psi2); + } +} + + +__global__ void order1_zplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez, $REAL *Hx, $REAL *Hy, const $REAL* __restrict__ Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) { + + // This function updates the Hx and Hy field components for the zplus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_HPHI, NY_HPHI, NZ_HPHI: Dimensions of PHI1 and PHI2 PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current tREad + int idx = blockIdx.x * blockDim.x + tREadIdx.x; + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL IRA, IRA1, RB0, RC0, RE0, RF0, dEx, dEy; + $REAL dz = d; + int ii, jj, kk, materialHx, materialHy; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + IRA = RA[INDEX2D_R(0,k1)]; + IRA1 = IRA - 1; + RB0 = RB[INDEX2D_R(0,k1)]; + RE0 = RE[INDEX2D_R(0,k1)]; + RF0 = RF[INDEX2D_R(0,k1)]; + RC0 = IRA * RB0 * RF0; + + // Hx + materialHx = ID[INDEX4D_ID(3,ii,jj,kk)]; + dEy = (Ey[INDEX3D_FIELDS(ii,jj,kk+1)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dz; + Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (IRA1 * dEy - IRA * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RC0 * dEy - RC0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]; + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + IRA = RA[INDEX2D_R(0,k2)]; + IRA1 = IRA - 1; + RB0 = RB[INDEX2D_R(0,k2)]; + RE0 = RE[INDEX2D_R(0,k2)]; + RF0 = RF[INDEX2D_R(0,k2)]; + RC0 = IRA * RB0 * RF0; + + // Hy + materialHy = ID[INDEX4D_ID(4,ii,jj,kk)]; + dEx = (Ex[INDEX3D_FIELDS(ii,jj,kk+1)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dz; + Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (IRA1 * dEx - IRA * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RC0 * dEx - RC0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]; + } +} + + +__global__ void order2_zplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez, $REAL *Hx, $REAL *Hy, const $REAL* __restrict__ Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) { + + // This function updates the Hx and Hy field components for the zplus slab. + // + // Args: + // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab + // NX_HPHI, NY_HPHI, NZ_HPHI: Dimensions of PHI1 and PHI2 PML arrays + // ID, E, H: Access to ID and field component arrays + // Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays + // d: Spatial discretisation, e.g. dx, dy or dz + + // Obtain the linear index corresponding to the current tREad + int idx = blockIdx.x * blockDim.x + tREadIdx.x; + + // Convert the linear index to subscripts for PML PHI1 (4D) arrays + int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1); + int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1); + int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1; + int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1; + + // Convert the linear index to subscripts for PML PHI2 (4D) arrays + int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2); + int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2); + int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2; + int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2; + + $REAL IRA, IRA1, RB0, RC0, RE0, RF0, RB1, RC1, RE1, RF1, Psi1, Psi2, dEx, dEy; + $REAL dz = d; + int ii, jj, kk, materialHx, materialHy; + int nx = xf - xs; + int ny = yf - ys; + int nz = zf - zs; + + if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { + // Subscripts for field arrays + ii = i1 + xs; + jj = j1 + ys; + kk = k1 + zs; + + // PML coefficients + IRA = 1 / (RA[INDEX2D_R(0,k1)] + RA[INDEX2D_R(0,k1)]); + IRA1 = IRA - 1; + RB0 = RB[INDEX2D_R(0,k1)]; + RE0 = RE[INDEX2D_R(0,k1)]; + RF0 = RF[INDEX2D_R(0,k1)]; + RC0 = IRA * RF0; + RB1 = RB[INDEX2D_R(1,k1)]; + RE1 = RE[INDEX2D_R(1,k1)]; + RF1 = RF[INDEX2D_R(1,k1)]; + RC1 = IRA * RF1; + + // Hx + Psi1 = RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]; + materialHx = ID[INDEX4D_ID(3,ii,jj,kk)]; + dEy = (Ey[INDEX3D_FIELDS(ii,jj,kk+1)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dz; + Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (IRA1 * dEy - IRA * Psi1); + PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] + RC1 * (dEy - Psi1); + PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RC0 * (dEy - Psi1); + } + + if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { + // Subscripts for field arrays + ii = i2 + xs; + jj = j2 + ys; + kk = k2 + zs; + + // PML coefficients + IRA = 1 / (RA[INDEX2D_R(0,k2)] + RA[INDEX2D_R(0,k2)]); + IRA1 = IRA - 1; + RB0 = RB[INDEX2D_R(0,k2)]; + RE0 = RE[INDEX2D_R(0,k2)]; + RF0 = RF[INDEX2D_R(0,k2)]; + RC0 = IRA * RF0; + RB1 = RB[INDEX2D_R(1,k2)]; + RE1 = RE[INDEX2D_R(1,k2)]; + RF1 = RF[INDEX2D_R(1,k2)]; + RC1 = IRA * RF1; + + // Hy + Psi2 = RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]; + materialHy = ID[INDEX4D_ID(4,ii,jj,kk)]; + dEx = (Ex[INDEX3D_FIELDS(ii,jj,kk+1)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dz; + Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (IRA1 * dEx - IRA * Psi2); + PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] + RC1 * (dEx - Psi2); + PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RC0 * (dEx - Psi2); + } +} + +""") diff --git a/gprMax/pml_updates_ext.pyx b/gprMax/pml_updates_ext.pyx deleted file mode 100644 index 2080808f..00000000 --- a/gprMax/pml_updates_ext.pyx +++ /dev/null @@ -1,1715 +0,0 @@ -# Copyright (C) 2015-2019: The University of Edinburgh -# Authors: Craig Warren and Antonis Giannopoulos -# -# This file is part of gprMax. -# -# gprMax is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# gprMax is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with gprMax. If not, see . - -import numpy as np -cimport numpy as np -from cython.parallel import prange - -from gprMax.constants cimport floattype_t, complextype_t - - -######################################################## -# Electric field PML updates - 1st order - xminus slab # -######################################################## -cpdef void update_pml_1order_electric_xminus( - int xs, - int xf, - int ys, - int yf, - int zs, - int zf, - int nthreads, - floattype_t[:, ::1] updatecoeffsE, - np.uint32_t[:, :, :, ::1] ID, - floattype_t[:, :, ::1] Ex, - floattype_t[:, :, ::1] Ey, - floattype_t[:, :, ::1] Ez, - floattype_t[:, :, ::1] Hx, - floattype_t[:, :, ::1] Hy, - floattype_t[:, :, ::1] Hz, - floattype_t[:, :, :, ::1] EPhi1, - floattype_t[:, :, :, ::1] EPhi2, - floattype_t[:, ::1] ERA, - floattype_t[:, ::1] ERB, - floattype_t[:, ::1] ERE, - floattype_t[:, ::1] ERF, - float d - ): - """This function updates the Ey and Ez field components for the xminus slab. - - Args: - xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box - nthreads (int): Number of threads to use - updatecoeffs, ID, E, H (memoryviews): Access to update coeffients, ID and field component arrays - EPhi, HPhi, ERA, ERB, ERE, ERF (memoryviews): Access to PML coefficient arrays - d (float): Spatial discretisation, e.g. dx, dy or dz - """ - - cdef Py_ssize_t i, j, k, ii, jj, kk - cdef int nx, ny, nz, materialEy, materialEz - cdef float dx, dHy, dHz, RA0, RB0, RE0, RF0 - dx = d - nx = xf - xs - ny = yf - ys - nz = zf - zs - - for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): - RA0 = (ERA[0, i] - 1) - RB0 = ERB[0, i] - RE0 = ERE[0, i] - RF0 = ERF[0, i] - ii = xf - i - for j in range(0, ny): - jj = j + ys - for k in range(0, nz): - kk = k + zs - # Ey - materialEy = ID[1, ii, jj, kk] - dHz = (Hz[ii, jj, kk] - Hz[ii - 1, jj, kk]) / dx - Ey[ii, jj, kk] = Ey[ii, jj, kk] - updatecoeffsE[materialEy, 4] * (RA0 * dHz + RB0 * EPhi1[0, i, j, k]) - EPhi1[0, i, j, k] = RE0 * EPhi1[0, i, j, k] - RF0 * dHz - # Ez - materialEz = ID[2, ii, jj, kk] - dHy = (Hy[ii, jj, kk] - Hy[ii - 1, jj, kk]) / dx - Ez[ii, jj, kk] = Ez[ii, jj, kk] + updatecoeffsE[materialEz, 4] * (RA0 * dHy + RB0 * EPhi2[0, i, j, k]) - EPhi2[0, i, j, k] = RE0 * EPhi2[0, i, j, k] - RF0 * dHy - - -####################################################### -# Electric field PML updates - 1st order - xplus slab # -####################################################### -cpdef void update_pml_1order_electric_xplus( - int xs, - int xf, - int ys, - int yf, - int zs, - int zf, - int nthreads, - floattype_t[:, ::1] updatecoeffsE, - np.uint32_t[:, :, :, ::1] ID, - floattype_t[:, :, ::1] Ex, - floattype_t[:, :, ::1] Ey, - floattype_t[:, :, ::1] Ez, - floattype_t[:, :, ::1] Hx, - floattype_t[:, :, ::1] Hy, - floattype_t[:, :, ::1] Hz, - floattype_t[:, :, :, ::1] EPhi1, - floattype_t[:, :, :, ::1] EPhi2, - floattype_t[:, ::1] ERA, - floattype_t[:, ::1] ERB, - floattype_t[:, ::1] ERE, - floattype_t[:, ::1] ERF, - float d - ): - """This function updates the Ey and Ez field components for the xplus slab. - - Args: - xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box - nthreads (int): Number of threads to use - updatecoeffs, ID, E, H (memoryviews): Access to update coeffients, ID and field component arrays - EPhi, HPhi, ERA, ERB, ERE, ERF (memoryviews): Access to PML coefficient arrays - d (float): Spatial discretisation, e.g. dx, dy or dz - """ - - cdef Py_ssize_t i, j, k, ii, jj, kk - cdef int nx, ny, nz, materialEy, materialEz - cdef float dx, dHy, dHz, RA0, RB0, RE0, RF0 - dx = d - nx = xf - xs - ny = yf - ys - nz = zf - zs - - for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): - RA0 = (ERA[0, i] - 1) - RB0 = ERB[0, i] - RE0 = ERE[0, i] - RF0 = ERF[0, i] - ii = i + xs - for j in range(0, ny): - jj = j + ys - for k in range(0, nz): - kk = k + zs - # Ey - materialEy = ID[1, ii, jj, kk] - dHz = (Hz[ii, jj, kk] - Hz[ii - 1, jj, kk]) / dx - Ey[ii, jj, kk] = Ey[ii, jj, kk] - updatecoeffsE[materialEy, 4] * (RA0 * dHz + RB0 * EPhi1[0, i, j, k]) - EPhi1[0, i, j, k] = RE0 * EPhi1[0, i, j, k] - RF0 * dHz - # Ez - materialEz = ID[2, ii, jj, kk] - dHy = (Hy[ii, jj, kk] - Hy[ii - 1, jj, kk]) / dx - Ez[ii, jj, kk] = Ez[ii, jj, kk] + updatecoeffsE[materialEz, 4] * (RA0 * dHy + RB0 * EPhi2[0, i, j, k]) - EPhi2[0, i, j, k] = RE0 * EPhi2[0, i, j, k] - RF0 * dHy - - -######################################################## -# Electric field PML updates - 1st order - yminus slab # -######################################################## -cpdef void update_pml_1order_electric_yminus( - int xs, - int xf, - int ys, - int yf, - int zs, - int zf, - int nthreads, - floattype_t[:, ::1] updatecoeffsE, - np.uint32_t[:, :, :, ::1] ID, - floattype_t[:, :, ::1] Ex, - floattype_t[:, :, ::1] Ey, - floattype_t[:, :, ::1] Ez, - floattype_t[:, :, ::1] Hx, - floattype_t[:, :, ::1] Hy, - floattype_t[:, :, ::1] Hz, - floattype_t[:, :, :, ::1] EPhi1, - floattype_t[:, :, :, ::1] EPhi2, - floattype_t[:, ::1] ERA, - floattype_t[:, ::1] ERB, - floattype_t[:, ::1] ERE, - floattype_t[:, ::1] ERF, - float d - ): - """This function updates the Ex and Ez field components for the yminus slab. - - Args: - xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box - nthreads (int): Number of threads to use - updatecoeffs, ID, E, H (memoryviews): Access to update coeffients, ID and field component arrays - EPhi, HPhi, ERA, ERB, ERE, ERF (memoryviews): Access to PML coefficient arrays - d (float): Spatial discretisation, e.g. dx, dy or dz - """ - - cdef Py_ssize_t i, j, k, ii, jj, kk - cdef int nx, ny, nz, materialEx, materialEz - cdef float dy, dHx, dHz, RA0, RB0, RE0, RF0 - dy = d - nx = xf - xs - ny = yf - ys - nz = zf - zs - - for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): - ii = i + xs - for j in range(0, ny): - jj = yf - j - RA0 = (ERA[0, j] - 1) - RB0 = ERB[0, j] - RE0 = ERE[0, j] - RF0 = ERF[0, j] - for k in range(0, nz): - kk = k + zs - # Ex - materialEx = ID[0, ii, jj, kk] - dHz = (Hz[ii, jj, kk] - Hz[ii, jj - 1, kk]) / dy - Ex[ii, jj, kk] = Ex[ii, jj, kk] + updatecoeffsE[materialEx, 4] * (RA0 * dHz + RB0 * EPhi1[0, i, j, k]) - EPhi1[0, i, j, k] = RE0 * EPhi1[0, i, j, k] - RF0 * dHz - # Ez - materialEz = ID[2, ii, jj, kk] - dHx = (Hx[ii, jj, kk] - Hx[ii, jj - 1, kk]) / dy - Ez[ii, jj, kk] = Ez[ii, jj, kk] - updatecoeffsE[materialEz, 4] * (RA0 * dHx + RB0 * EPhi2[0, i, j, k]) - EPhi2[0, i, j, k] = RE0 * EPhi2[0, i, j, k] - RF0 * dHx - - -####################################################### -# Electric field PML updates - 1st order - yplus slab # -####################################################### -cpdef void update_pml_1order_electric_yplus( - int xs, - int xf, - int ys, - int yf, - int zs, - int zf, - int nthreads, - floattype_t[:, ::1] updatecoeffsE, - np.uint32_t[:, :, :, ::1] ID, - floattype_t[:, :, ::1] Ex, - floattype_t[:, :, ::1] Ey, - floattype_t[:, :, ::1] Ez, - floattype_t[:, :, ::1] Hx, - floattype_t[:, :, ::1] Hy, - floattype_t[:, :, ::1] Hz, - floattype_t[:, :, :, ::1] EPhi1, - floattype_t[:, :, :, ::1] EPhi2, - floattype_t[:, ::1] ERA, - floattype_t[:, ::1] ERB, - floattype_t[:, ::1] ERE, - floattype_t[:, ::1] ERF, - float d - ): - """This function updates the Ex and Ez field components for the yplus slab. - - Args: - xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box - nthreads (int): Number of threads to use - updatecoeffs, ID, E, H (memoryviews): Access to update coeffients, ID and field component arrays - EPhi, HPhi, ERA, ERB, ERE, ERF (memoryviews): Access to PML coefficient arrays - d (float): Spatial discretisation, e.g. dx, dy or dz - """ - - cdef Py_ssize_t i, j, k, ii, jj, kk - cdef int nx, ny, nz, materialEx, materialEz - cdef float dy, dHx, dHz, RA0, RB0, RE0, RF0 - dy = d - nx = xf - xs - ny = yf - ys - nz = zf - zs - - for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): - ii = i + xs - for j in range(0, ny): - jj = j + ys - RA0 = (ERA[0, j] - 1) - RB0 = ERB[0, j] - RE0 = ERE[0, j] - RF0 = ERF[0, j] - for k in range(0, nz): - kk = k + zs - # Ex - materialEx = ID[0, ii, jj, kk] - dHz = (Hz[ii, jj, kk] - Hz[ii, jj - 1, kk]) / dy - Ex[ii, jj, kk] = Ex[ii, jj, kk] + updatecoeffsE[materialEx, 4] * (RA0 * dHz + RB0 * EPhi1[0, i, j, k]) - EPhi1[0, i, j, k] = RE0 * EPhi1[0, i, j, k] - RF0 * dHz - # Ez - materialEz = ID[2, ii, jj, kk] - dHx = (Hx[ii, jj, kk] - Hx[ii, jj - 1, kk]) / dy - Ez[ii, jj, kk] = Ez[ii, jj, kk] - updatecoeffsE[materialEz, 4] * (RA0 * dHx + RB0 * EPhi2[0, i, j, k]) - EPhi2[0, i, j, k] = RE0 * EPhi2[0, i, j, k] - RF0 * dHx - - -######################################################## -# Electric field PML updates - 1st order - zminus slab # -######################################################## -cpdef void update_pml_1order_electric_zminus( - int xs, - int xf, - int ys, - int yf, - int zs, - int zf, - int nthreads, - floattype_t[:, ::1] updatecoeffsE, - np.uint32_t[:, :, :, ::1] ID, - floattype_t[:, :, ::1] Ex, - floattype_t[:, :, ::1] Ey, - floattype_t[:, :, ::1] Ez, - floattype_t[:, :, ::1] Hx, - floattype_t[:, :, ::1] Hy, - floattype_t[:, :, ::1] Hz, - floattype_t[:, :, :, ::1] EPhi1, - floattype_t[:, :, :, ::1] EPhi2, - floattype_t[:, ::1] ERA, - floattype_t[:, ::1] ERB, - floattype_t[:, ::1] ERE, - floattype_t[:, ::1] ERF, - float d - ): - """This function updates the Ex and Ey field components for the zminus slab. - - Args: - xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box - nthreads (int): Number of threads to use - updatecoeffs, ID, E, H (memoryviews): Access to update coeffients, ID and field component arrays - EPhi, HPhi, ERA, ERB, ERE, ERF (memoryviews): Access to PML coefficient arrays - d (float): Spatial discretisation, e.g. dx, dy or dz - """ - - cdef Py_ssize_t i, j, k, ii, jj, kk - cdef int nx, ny, nz, materialEx, materialEy - cdef float dz, dHx, dHy, RA0, RB0, RE0, RF0 - dz = d - nx = xf - xs - ny = yf - ys - nz = zf - zs - - for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): - ii = i + xs - for j in range(0, ny): - jj = j + ys - for k in range(0, nz): - kk = zf - k - RA0 = (ERA[0, k] - 1) - RB0 = ERB[0, k] - RE0 = ERE[0, k] - RF0 = ERF[0, k] - # Ex - materialEx = ID[0, ii, jj, kk] - dHy = (Hy[ii, jj, kk] - Hy[ii, jj, kk - 1]) / dz - Ex[ii, jj, kk] = Ex[ii, jj, kk] - updatecoeffsE[materialEx, 4] * (RA0 * dHy + RB0 * EPhi1[0, i, j, k]) - EPhi1[0, i, j, k] = RE0 * EPhi1[0, i, j, k] - RF0 * dHy - # Ey - materialEy = ID[1, ii, jj, kk] - dHx = (Hx[ii, jj, kk] - Hx[ii, jj, kk - 1]) / dz - Ey[ii, jj, kk] = Ey[ii, jj, kk] + updatecoeffsE[materialEy, 4] * (RA0 * dHx + RB0 * EPhi2[0, i, j, k]) - EPhi2[0, i, j, k] = RE0 * EPhi2[0, i, j, k] - RF0 * dHx - - -####################################################### -# Electric field PML updates - 1st order - zplus slab # -####################################################### -cpdef void update_pml_1order_electric_zplus( - int xs, - int xf, - int ys, - int yf, - int zs, - int zf, - int nthreads, - floattype_t[:, ::1] updatecoeffsE, - np.uint32_t[:, :, :, ::1] ID, - floattype_t[:, :, ::1] Ex, - floattype_t[:, :, ::1] Ey, - floattype_t[:, :, ::1] Ez, - floattype_t[:, :, ::1] Hx, - floattype_t[:, :, ::1] Hy, - floattype_t[:, :, ::1] Hz, - floattype_t[:, :, :, ::1] EPhi1, - floattype_t[:, :, :, ::1] EPhi2, - floattype_t[:, ::1] ERA, - floattype_t[:, ::1] ERB, - floattype_t[:, ::1] ERE, - floattype_t[:, ::1] ERF, - float d - ): - """This function updates the Ex and Ey field components for the zplus slab. - - Args: - xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box - nthreads (int): Number of threads to use - updatecoeffs, ID, E, H (memoryviews): Access to update coeffients, ID and field component arrays - EPhi, HPhi, ERA, ERB, ERE, ERF (memoryviews): Access to PML coefficient arrays - d (float): Spatial discretisation, e.g. dx, dy or dz - """ - - cdef Py_ssize_t i, j, k, ii, jj, kk - cdef int nx, ny, nz, materialEx, materialEy - cdef float dz, dHx, dHy, RA0, RB0, RE0, RF0 - dz = d - nx = xf - xs - ny = yf - ys - nz = zf - zs - - for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): - ii = i + xs - for j in range(0, ny): - jj = j + ys - for k in range(0, nz): - kk = k + zs - RA0 = (ERA[0, k] - 1) - RB0 = ERB[0, k] - RE0 = ERE[0, k] - RF0 = ERF[0, k] - # Ex - materialEx = ID[0, ii, jj, kk] - dHy = (Hy[ii, jj, kk] - Hy[ii, jj, kk - 1]) / dz - Ex[ii, jj, kk] = Ex[ii, jj, kk] - updatecoeffsE[materialEx, 4] * (RA0 * dHy + RB0 * EPhi1[0, i, j, k]) - EPhi1[0, i, j, k] = RE0 * EPhi1[0, i, j, k] - RF0 * dHy - # Ey - materialEy = ID[1, ii, jj, kk] - dHx = (Hx[ii, jj, kk] - Hx[ii, jj, kk - 1]) / dz - Ey[ii, jj, kk] = Ey[ii, jj, kk] + updatecoeffsE[materialEy, 4] * (RA0 * dHx + RB0 * EPhi2[0, i, j, k]) - EPhi2[0, i, j, k] = RE0 * EPhi2[0, i, j, k] - RF0 * dHx - - -######################################################## -# Magnetic field PML updates - 1st order - xminus slab # -######################################################## -cpdef void update_pml_1order_magnetic_xminus( - int xs, - int xf, - int ys, - int yf, - int zs, - int zf, - int nthreads, - floattype_t[:, ::1] updatecoeffsH, - np.uint32_t[:, :, :, ::1] ID, - floattype_t[:, :, ::1] Ex, - floattype_t[:, :, ::1] Ey, - floattype_t[:, :, ::1] Ez, - floattype_t[:, :, ::1] Hx, - floattype_t[:, :, ::1] Hy, - floattype_t[:, :, ::1] Hz, - floattype_t[:, :, :, ::1] HPhi1, - floattype_t[:, :, :, ::1] HPhi2, - floattype_t[:, ::1] HRA, - floattype_t[:, ::1] HRB, - floattype_t[:, ::1] HRE, - floattype_t[:, ::1] HRF, - float d - ): - """This function updates the Hy and Hz field components for the xminus slab. - - Args: - xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box - nthreads (int): Number of threads to use - updatecoeffs, ID, E, H (memoryviews): Access to update coeffients, ID and field component arrays - EPhi, HPhi, HRA, HRB, ERE, HRF (memoryviews): Access to PML coefficient arrays - d (float): Spatial discretisation, e.g. dx, dy or dz - """ - - cdef Py_ssize_t i, j, k, ii, jj, kk - cdef int nx, ny, nz, materialHy, materialHz - cdef float dx, dEy, dEz, RA0, RB0, RE0, RF0 - dx = d - nx = xf - xs - ny = yf - ys - nz = zf - zs - - for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): - ii = xf - (i + 1) - RA0 = (HRA[0, i] - 1) - RB0 = HRB[0, i] - RE0 = HRE[0, i] - RF0 = HRF[0, i] - for j in range(0, ny): - jj = j + ys - for k in range(0, nz): - kk = k + zs - # Hy - materialHy = ID[4, ii, jj, kk] - dEz = (Ez[ii + 1, jj, kk] - Ez[ii, jj, kk]) / dx - Hy[ii, jj, kk] = Hy[ii, jj, kk] + updatecoeffsH[materialHy, 4] * (RA0 * dEz + RB0 * HPhi1[0, i, j, k]) - HPhi1[0, i, j, k] = RE0 * HPhi1[0, i, j, k] - RF0 * dEz - # Hz - materialHz = ID[5, ii, jj, kk] - dEy = (Ey[ii + 1, jj, kk] - Ey[ii, jj, kk]) / dx - Hz[ii, jj, kk] = Hz[ii, jj, kk] - updatecoeffsH[materialHz, 4] * (RA0 * dEy + RB0 * HPhi2[0, i, j, k]) - HPhi2[0, i, j, k] = RE0 * HPhi2[0, i, j, k] - RF0 * dEy - - -####################################################### -# Magnetic field PML updates - 1st order - xplus slab # -####################################################### -cpdef void update_pml_1order_magnetic_xplus( - int xs, - int xf, - int ys, - int yf, - int zs, - int zf, - int nthreads, - floattype_t[:, ::1] updatecoeffsH, - np.uint32_t[:, :, :, ::1] ID, - floattype_t[:, :, ::1] Ex, - floattype_t[:, :, ::1] Ey, - floattype_t[:, :, ::1] Ez, - floattype_t[:, :, ::1] Hx, - floattype_t[:, :, ::1] Hy, - floattype_t[:, :, ::1] Hz, - floattype_t[:, :, :, ::1] HPhi1, - floattype_t[:, :, :, ::1] HPhi2, - floattype_t[:, ::1] HRA, - floattype_t[:, ::1] HRB, - floattype_t[:, ::1] HRE, - floattype_t[:, ::1] HRF, - float d - ): - """This function updates the Hy and Hz field components for the xplus slab. - - Args: - xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box - nthreads (int): Number of threads to use - updatecoeffs, ID, E, H (memoryviews): Access to update coeffients, ID and field component arrays - EPhi, HPhi, HRA, HRB, HRE, HRF (memoryviews): Access to PML coefficient arrays - d (float): Spatial discretisation, e.g. dx, dy or dz - """ - - cdef Py_ssize_t i, j, k, ii, jj, kk - cdef int nx, ny, nz, materialHy, materialHz - cdef float dx, dEy, dEz, RA0, RB0, RE0, RF0 - dx = d - nx = xf - xs - ny = yf - ys - nz = zf - zs - - for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): - ii = i + xs - RA0 = (HRA[0, i] - 1) - RB0 = HRB[0, i] - RE0 = HRE[0, i] - RF0 = HRF[0, i] - for j in range(0, ny): - jj = j + ys - for k in range(0, nz): - kk = k + zs - # Hy - materialHy = ID[4, ii, jj, kk] - dEz = (Ez[ii + 1, jj, kk] - Ez[ii, jj, kk]) / dx - Hy[ii, jj, kk] = Hy[ii, jj, kk] + updatecoeffsH[materialHy, 4] * (RA0 * dEz + RB0 * HPhi1[0, i, j, k]) - HPhi1[0, i, j, k] = RE0 * HPhi1[0, i, j, k] - RF0 * dEz - # Hz - materialHz = ID[5, ii, jj, kk] - dEy = (Ey[ii + 1, jj, kk] - Ey[ii, jj, kk]) / dx - Hz[ii, jj, kk] = Hz[ii, jj, kk] - updatecoeffsH[materialHz, 4] * (RA0 * dEy + RB0 * HPhi2[0, i, j, k]) - HPhi2[0, i, j, k] = RE0 * HPhi2[0, i, j, k] - RF0 * dEy - - -######################################################## -# Magnetic field PML updates - 1st order - yminus slab # -######################################################## -cpdef void update_pml_1order_magnetic_yminus( - int xs, - int xf, - int ys, - int yf, - int zs, - int zf, - int nthreads, - floattype_t[:, ::1] updatecoeffsH, - np.uint32_t[:, :, :, ::1] ID, - floattype_t[:, :, ::1] Ex, - floattype_t[:, :, ::1] Ey, - floattype_t[:, :, ::1] Ez, - floattype_t[:, :, ::1] Hx, - floattype_t[:, :, ::1] Hy, - floattype_t[:, :, ::1] Hz, - floattype_t[:, :, :, ::1] HPhi1, - floattype_t[:, :, :, ::1] HPhi2, - floattype_t[:, ::1] HRA, - floattype_t[:, ::1] HRB, - floattype_t[:, ::1] HRE, - floattype_t[:, ::1] HRF, - float d - ): - """This function updates the Hx and Hz field components for the yminus slab. - - Args: - xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box - nthreads (int): Number of threads to use - updatecoeffs, ID, E, H (memoryviews): Access to update coeffients, ID and field component arrays - EPhi, HPhi, HRA, HRB, HRE, HRF (memoryviews): Access to PML coefficient arrays - d (float): Spatial discretisation, e.g. dx, dy or dz - """ - - cdef Py_ssize_t i, j, k, ii, jj, kk - cdef int nx, ny, nz, materialHx, materialHz - cdef float dy, dEx, dEz, RA0, RB0, RE0, RF0 - dy = d - nx = xf - xs - ny = yf - ys - nz = zf - zs - - for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): - ii = i + xs - for j in range(0, ny): - jj = yf - (j + 1) - RA0 = (HRA[0, j] - 1) - RB0 = HRB[0, j] - RE0 = HRE[0, j] - RF0 = HRF[0, j] - for k in range(0, nz): - kk = k + zs - # Hx - materialHx = ID[3, ii, jj, kk] - dEz = (Ez[ii, jj + 1, kk] - Ez[ii, jj, kk]) / dy - Hx[ii, jj, kk] = Hx[ii, jj, kk] - updatecoeffsH[materialHx, 4] * (RA0 * dEz + RB0 * HPhi1[0, i, j, k]) - HPhi1[0, i, j, k] = RE0 * HPhi1[0, i, j, k] - RF0 * dEz - # Hz - materialHz = ID[5, ii, jj, kk] - dEx = (Ex[ii, jj + 1, kk] - Ex[ii, jj, kk]) / dy - Hz[ii, jj, kk] = Hz[ii, jj, kk] + updatecoeffsH[materialHz, 4] * (RA0 * dEx + RB0 * HPhi2[0, i, j, k]) - HPhi2[0, i, j, k] = RE0 * HPhi2[0, i, j, k] - RF0 * dEx - - -####################################################### -# Magnetic field PML updates - 1st order - yplus slab # -####################################################### -cpdef void update_pml_1order_magnetic_yplus( - int xs, - int xf, - int ys, - int yf, - int zs, - int zf, - int nthreads, - floattype_t[:, ::1] updatecoeffsH, - np.uint32_t[:, :, :, ::1] ID, - floattype_t[:, :, ::1] Ex, - floattype_t[:, :, ::1] Ey, - floattype_t[:, :, ::1] Ez, - floattype_t[:, :, ::1] Hx, - floattype_t[:, :, ::1] Hy, - floattype_t[:, :, ::1] Hz, - floattype_t[:, :, :, ::1] HPhi1, - floattype_t[:, :, :, ::1] HPhi2, - floattype_t[:, ::1] HRA, - floattype_t[:, ::1] HRB, - floattype_t[:, ::1] HRE, - floattype_t[:, ::1] HRF, - float d - ): - """This function updates the Hx and Hz field components for the yplus slab. - - Args: - xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box - nthreads (int): Number of threads to use - updatecoeffs, ID, E, H (memoryviews): Access to update coeffients, ID and field component arrays - EPhi, HPhi, HRA, HRB, HRE, HRF (memoryviews): Access to PML coefficient arrays - d (float): Spatial discretisation, e.g. dx, dy or dz - """ - - cdef Py_ssize_t i, j, k, ii, jj, kk - cdef int nx, ny, nz, materialHx, materialHz - cdef float dy, dEx, dEz, RA0, RB0, RE0, RF0 - dy = d - nx = xf - xs - ny = yf - ys - nz = zf - zs - - for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): - ii = i + xs - for j in range(0, ny): - jj = j + ys - RA0 = (HRA[0, j] - 1) - RB0 = HRB[0, j] - RE0 = HRE[0, j] - RF0 = HRF[0, j] - for k in range(0, nz): - kk = k + zs - # Hx - materialHx = ID[3, ii, jj, kk] - dEz = (Ez[ii, jj + 1, kk] - Ez[ii, jj, kk]) / dy - Hx[ii, jj, kk] = Hx[ii, jj, kk] - updatecoeffsH[materialHx, 4] * (RA0 * dEz + RB0 * HPhi1[0, i, j, k]) - HPhi1[0, i, j, k] = RE0 * HPhi1[0, i, j, k] - RF0 * dEz - # Hz - materialHz = ID[5, ii, jj, kk] - dEx = (Ex[ii, jj + 1, kk] - Ex[ii, jj, kk]) / dy - Hz[ii, jj, kk] = Hz[ii, jj, kk] + updatecoeffsH[materialHz, 4] * (RA0 * dEx + RB0 * HPhi2[0, i, j, k]) - HPhi2[0, i, j, k] = RE0 * HPhi2[0, i, j, k] - RF0 * dEx - - -######################################################## -# Magnetic field PML updates - 1st order - zminus slab # -######################################################## -cpdef void update_pml_1order_magnetic_zminus( - int xs, - int xf, - int ys, - int yf, - int zs, - int zf, - int nthreads, - floattype_t[:, ::1] updatecoeffsH, - np.uint32_t[:, :, :, ::1] ID, - floattype_t[:, :, ::1] Ex, - floattype_t[:, :, ::1] Ey, - floattype_t[:, :, ::1] Ez, - floattype_t[:, :, ::1] Hx, - floattype_t[:, :, ::1] Hy, - floattype_t[:, :, ::1] Hz, - floattype_t[:, :, :, ::1] HPhi1, - floattype_t[:, :, :, ::1] HPhi2, - floattype_t[:, ::1] HRA, - floattype_t[:, ::1] HRB, - floattype_t[:, ::1] HRE, - floattype_t[:, ::1] HRF, - float d - ): - """This function updates the Hx and Hy field components for the zminus slab. - - Args: - xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box - nthreads (int): Number of threads to use - updatecoeffs, ID, E, H (memoryviews): Access to update coeffients, ID and field component arrays - EPhi, HPhi, HRA, HRB, HRE, HRF (memoryviews): Access to PML coefficient arrays - d (float): Spatial discretisation, e.g. dx, dy or dz - """ - - cdef Py_ssize_t i, j, k, ii, jj, kk - cdef int nx, ny, nz, materialHx, materialHy - cdef float dz, dEx, dEy, RA0, RB0, RE0, RF0 - dz = d - nx = xf - xs - ny = yf - ys - nz = zf - zs - - for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): - ii = i + xs - for j in range(0, ny): - jj = j + ys - for k in range(0, nz): - kk = zf - (k + 1) - RA0 = (HRA[0, k] - 1) - RB0 = HRB[0, k] - RE0 = HRE[0, k] - RF0 = HRF[0, k] - # Hx - materialHx = ID[3, ii, jj, kk] - dEy = (Ey[ii, jj, kk + 1] - Ey[ii, jj, kk]) / dz - Hx[ii, jj, kk] = Hx[ii, jj, kk] + updatecoeffsH[materialHx, 4] * (RA0 * dEy + RB0 * HPhi1[0, i, j, k]) - HPhi1[0, i, j, k] = RE0 * HPhi1[0, i, j, k] - RF0 * dEy - # Hy - materialHy = ID[4, ii, jj, kk] - dEx = (Ex[ii, jj, kk + 1] - Ex[ii, jj, kk]) / dz - Hy[ii, jj, kk] = Hy[ii, jj, kk] - updatecoeffsH[materialHy, 4] * (RA0 * dEx + RB0 * HPhi2[0, i, j, k]) - HPhi2[0, i, j, k] = RE0 * HPhi2[0, i, j, k] - RF0 * dEx - - -####################################################### -# Magnetic field PML updates - 1st order - zplus slab # -####################################################### -cpdef void update_pml_1order_magnetic_zplus( - int xs, - int xf, - int ys, - int yf, - int zs, - int zf, - int nthreads, - floattype_t[:, ::1] updatecoeffsH, - np.uint32_t[:, :, :, ::1] ID, - floattype_t[:, :, ::1] Ex, - floattype_t[:, :, ::1] Ey, - floattype_t[:, :, ::1] Ez, - floattype_t[:, :, ::1] Hx, - floattype_t[:, :, ::1] Hy, - floattype_t[:, :, ::1] Hz, - floattype_t[:, :, :, ::1] HPhi1, - floattype_t[:, :, :, ::1] HPhi2, - floattype_t[:, ::1] HRA, - floattype_t[:, ::1] HRB, - floattype_t[:, ::1] HRE, - floattype_t[:, ::1] HRF, - float d - ): - """This function updates the Hx and Hy field components for the zplus slab. - - Args: - xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box - nthreads (int): Number of threads to use - updatecoeffs, ID, E, H (memoryviews): Access to update coeffients, ID and field component arrays - EPhi, HPhi, HRA, HRB, HRE, HRF (memoryviews): Access to PML coefficient arrays - d (float): Spatial discretisation, e.g. dx, dy or dz - """ - - cdef Py_ssize_t i, j, k, ii, jj, kk - cdef int nx, ny, nz, materialHx, materialHy - cdef float dz, dEx, dEy, RA0, RB0, RE0, RF0 - dz = d - nx = xf - xs - ny = yf - ys - nz = zf - zs - - for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): - ii = i + xs - for j in range(0, ny): - jj = j + ys - for k in range(0, nz): - kk = k + zs - RA0 = (HRA[0, k] - 1) - RB0 = HRB[0, k] - RE0 = HRE[0, k] - RF0 = HRF[0, k] - # Hx - materialHx = ID[3, ii, jj, kk] - dEy = (Ey[ii, jj, kk + 1] - Ey[ii, jj, kk]) / dz - Hx[ii, jj, kk] = Hx[ii, jj, kk] + updatecoeffsH[materialHx, 4] * (RA0 * dEy + RB0 * HPhi1[0, i, j, k]) - HPhi1[0, i, j, k] = RE0 * HPhi1[0, i, j, k] - RF0 * dEy - # Hy - materialHy = ID[4, ii, jj, kk] - dEx = (Ex[ii, jj, kk + 1] - Ex[ii, jj, kk]) / dz - Hy[ii, jj, kk] = Hy[ii, jj, kk] - updatecoeffsH[materialHy, 4] * (RA0 * dEx + RB0 * HPhi2[0, i, j, k]) - HPhi2[0, i, j, k] = RE0 * HPhi2[0, i, j, k] - RF0 * dEx - - -######################################################## -# Electric field PML updates - 2nd order - xminus slab # -######################################################## -cpdef void update_pml_2order_electric_xminus( - int xs, - int xf, - int ys, - int yf, - int zs, - int zf, - int nthreads, - floattype_t[:, ::1] updatecoeffsE, - np.uint32_t[:, :, :, ::1] ID, - floattype_t[:, :, ::1] Ex, - floattype_t[:, :, ::1] Ey, - floattype_t[:, :, ::1] Ez, - floattype_t[:, :, ::1] Hx, - floattype_t[:, :, ::1] Hy, - floattype_t[:, :, ::1] Hz, - floattype_t[:, :, :, ::1] EPhi1, - floattype_t[:, :, :, ::1] EPhi2, - floattype_t[:, ::1] ERA, - floattype_t[:, ::1] ERB, - floattype_t[:, ::1] ERE, - floattype_t[:, ::1] ERF, - float d - ): - """This function updates the Ey and Ez field components for the xminus slab. - - Args: - xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box - nthreads (int): Number of threads to use - updatecoeffs, ID, E, H (memoryviews): Access to update coeffients, ID and field component arrays - EPhi, HPhi, ERA, ERB, ERE, ERF (memoryviews): Access to PML coefficient arrays - d (float): Spatial discretisation, e.g. dx, dy or dz - """ - - cdef Py_ssize_t i, j, k, ii, jj, kk - cdef int nx, ny, nz, materialEy, materialEz - cdef float dx, dHy, dHz, RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01 - dx = d - nx = xf - xs - ny = yf - ys - nz = zf - zs - - for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): - RA0 = ERA[0, i] - RB0 = ERB[0, i] - RE0 = ERE[0, i] - RF0 = ERF[0, i] - RA1 = ERA[1, i] - RB1 = ERB[1, i] - RE1 = ERE[1, i] - RF1 = ERF[1, i] - RA01 = ERA[0, i] * ERA[1, i] - 1 - ii = xf - i - for j in range(0, ny): - jj = j + ys - for k in range(0, nz): - kk = k + zs - # Ey - materialEy = ID[1, ii, jj, kk] - dHz = (Hz[ii, jj, kk] - Hz[ii - 1, jj, kk]) / dx - Ey[ii, jj, kk] = Ey[ii, jj, kk] - updatecoeffsE[materialEy, 4] * (RA01 * dHz + RA1 * RB0 * EPhi1[0, i, j, k] + RB1 * EPhi1[1, i, j, k]) - EPhi1[1, i, j, k] = RE1 * EPhi1[1, i, j, k] - RF1 * (RA0 * dHz + RB0 * EPhi1[0, i, j, k]) - EPhi1[0, i, j, k] = RE0 * EPhi1[0, i, j, k] - RF0 * dHz - # Ez - materialEz = ID[2, ii, jj, kk] - dHy = (Hy[ii, jj, kk] - Hy[ii - 1, jj, kk]) / dx - Ez[ii, jj, kk] = Ez[ii, jj, kk] + updatecoeffsE[materialEz, 4] * (RA01 * dHy + RA1 * RB0 * EPhi2[0, i, j, k] + RB1 * EPhi2[1, i, j, k]) - EPhi2[1, i, j, k] = RE1 * EPhi2[1, i, j, k] - RF1 * (RA0 * dHy + RB0 * EPhi2[0, i, j, k]) - EPhi2[0, i, j, k] = RE0 * EPhi2[0, i, j, k] - RF0 * dHy - - -####################################################### -# Electric field PML updates - 2nd order - xplus slab # -####################################################### -cpdef void update_pml_2order_electric_xplus( - int xs, - int xf, - int ys, - int yf, - int zs, - int zf, - int nthreads, - floattype_t[:, ::1] updatecoeffsE, - np.uint32_t[:, :, :, ::1] ID, - floattype_t[:, :, ::1] Ex, - floattype_t[:, :, ::1] Ey, - floattype_t[:, :, ::1] Ez, - floattype_t[:, :, ::1] Hx, - floattype_t[:, :, ::1] Hy, - floattype_t[:, :, ::1] Hz, - floattype_t[:, :, :, ::1] EPhi1, - floattype_t[:, :, :, ::1] EPhi2, - floattype_t[:, ::1] ERA, - floattype_t[:, ::1] ERB, - floattype_t[:, ::1] ERE, - floattype_t[:, ::1] ERF, - float d - ): - """This function updates the Ey and Ez field components for the xplus slab. - - Args: - xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box - nthreads (int): Number of threads to use - updatecoeffs, ID, E, H (memoryviews): Access to update coeffients, ID and field component arrays - EPhi, HPhi, ERA, ERB, ERE, ERF (memoryviews): Access to PML coefficient arrays - d (float): Spatial discretisation, e.g. dx, dy or dz - """ - - cdef Py_ssize_t i, j, k, ii, jj, kk - cdef int nx, ny, nz, materialEy, materialEz - cdef float dx, dHy, dHz, RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01 - dx = d - nx = xf - xs - ny = yf - ys - nz = zf - zs - - for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): - RA0 = ERA[0, i] - RB0 = ERB[0, i] - RE0 = ERE[0, i] - RF0 = ERF[0, i] - RA1 = ERA[1, i] - RB1 = ERB[1, i] - RE1 = ERE[1, i] - RF1 = ERF[1, i] - RA01 = ERA[0, i] * ERA[1, i] - 1 - ii = i + xs - for j in range(0, ny): - jj = j + ys - for k in range(0, nz): - kk = k + zs - # Ey - materialEy = ID[1, ii, jj, kk] - dHz = (Hz[ii, jj, kk] - Hz[ii - 1, jj, kk]) / dx - Ey[ii, jj, kk] = Ey[ii, jj, kk] - updatecoeffsE[materialEy, 4] * (RA01 * dHz + RA1 * RB0 * EPhi1[0, i, j, k] + RB1 * EPhi1[1, i, j, k]) - EPhi1[1, i, j, k] = RE1 * EPhi1[1, i, j, k] - RF1 * (RA0 * dHz + RB0 * EPhi1[0, i, j, k]) - EPhi1[0, i, j, k] = RE0 * EPhi1[0, i, j, k] - RF0 * dHz - # Ez - materialEz = ID[2, ii, jj, kk] - dHy = (Hy[ii, jj, kk] - Hy[ii - 1, jj, kk]) / dx - Ez[ii, jj, kk] = Ez[ii, jj, kk] + updatecoeffsE[materialEz, 4] * (RA01 * dHy + RA1 * RB0 * EPhi2[0, i, j, k] + RB1 * EPhi2[1, i, j, k]) - EPhi2[1, i, j, k] = RE1 * EPhi2[1, i, j, k] - RF1 * (RA0 * dHy + RB0 * EPhi2[0, i, j, k]) - EPhi2[0, i, j, k] = RE0 * EPhi2[0, i, j, k] - RF0 * dHy - - -######################################################## -# Electric field PML updates - 2nd order - yminus slab # -######################################################## -cpdef void update_pml_2order_electric_yminus( - int xs, - int xf, - int ys, - int yf, - int zs, - int zf, - int nthreads, - floattype_t[:, ::1] updatecoeffsE, - np.uint32_t[:, :, :, ::1] ID, - floattype_t[:, :, ::1] Ex, - floattype_t[:, :, ::1] Ey, - floattype_t[:, :, ::1] Ez, - floattype_t[:, :, ::1] Hx, - floattype_t[:, :, ::1] Hy, - floattype_t[:, :, ::1] Hz, - floattype_t[:, :, :, ::1] EPhi1, - floattype_t[:, :, :, ::1] EPhi2, - floattype_t[:, ::1] ERA, - floattype_t[:, ::1] ERB, - floattype_t[:, ::1] ERE, - floattype_t[:, ::1] ERF, - float d - ): - """This function updates the Ex and Ez field components for the yminus slab. - - Args: - xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box - nthreads (int): Number of threads to use - updatecoeffs, ID, E, H (memoryviews): Access to update coeffients, ID and field component arrays - EPhi, HPhi, ERA, ERB, ERE, ERF (memoryviews): Access to PML coefficient arrays - d (float): Spatial discretisation, e.g. dx, dy or dz - """ - - cdef Py_ssize_t i, j, k, ii, jj, kk - cdef int nx, ny, nz, materialEx, materialEz - cdef float dy, dHx, dHz, RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01 - dy = d - nx = xf - xs - ny = yf - ys - nz = zf - zs - - for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): - ii = i + xs - for j in range(0, ny): - jj = yf - j - RA0 = ERA[0, j] - RB0 = ERB[0, j] - RE0 = ERE[0, j] - RF0 = ERF[0, j] - RA1 = ERA[1, j] - RB1 = ERB[1, j] - RE1 = ERE[1, j] - RF1 = ERF[1, j] - RA01 = ERA[0, j] * ERA[1, j] - 1 - for k in range(0, nz): - kk = k + zs - # Ex - materialEx = ID[0, ii, jj, kk] - dHz = (Hz[ii, jj, kk] - Hz[ii, jj - 1, kk]) / dy - Ex[ii, jj, kk] = Ex[ii, jj, kk] + updatecoeffsE[materialEx, 4] * (RA01 * dHz + RA1 * RB0 * EPhi1[0, i, j, k] + RB1 * EPhi1[1, i, j, k]) - EPhi1[1, i, j, k] = RE1 * EPhi1[1, i, j, k] - RF1 * (RA0 * dHz + RB0 * EPhi1[0, i, j, k]) - EPhi1[0, i, j, k] = RE0 * EPhi1[0, i, j, k] - RF0 * dHz - # Ez - materialEz = ID[2, ii, jj, kk] - dHx = (Hx[ii, jj, kk] - Hx[ii, jj - 1, kk]) / dy - Ez[ii, jj, kk] = Ez[ii, jj, kk] - updatecoeffsE[materialEz, 4] * (RA01 * dHx + RA1 * RB0 * EPhi2[0, i, j, k] + RB1 * EPhi2[1, i, j, k]) - EPhi2[1, i, j, k] = RE1 * EPhi2[1, i, j, k] - RF1 * (RA0 * dHx + RB0 * EPhi2[0, i, j, k]) - EPhi2[0, i, j, k] = RE0 * EPhi2[0, i, j, k] - RF0 * dHx - - -####################################################### -# Electric field PML updates - 2nd order - yplus slab # -####################################################### -cpdef void update_pml_2order_electric_yplus( - int xs, - int xf, - int ys, - int yf, - int zs, - int zf, - int nthreads, - floattype_t[:, ::1] updatecoeffsE, - np.uint32_t[:, :, :, ::1] ID, - floattype_t[:, :, ::1] Ex, - floattype_t[:, :, ::1] Ey, - floattype_t[:, :, ::1] Ez, - floattype_t[:, :, ::1] Hx, - floattype_t[:, :, ::1] Hy, - floattype_t[:, :, ::1] Hz, - floattype_t[:, :, :, ::1] EPhi1, - floattype_t[:, :, :, ::1] EPhi2, - floattype_t[:, ::1] ERA, - floattype_t[:, ::1] ERB, - floattype_t[:, ::1] ERE, - floattype_t[:, ::1] ERF, - float d - ): - """This function updates the Ex and Ez field components for the yplus slab. - - Args: - xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box - nthreads (int): Number of threads to use - updatecoeffs, ID, E, H (memoryviews): Access to update coeffients, ID and field component arrays - EPhi, HPhi, ERA, ERB, ERE, ERF (memoryviews): Access to PML coefficient arrays - d (float): Spatial discretisation, e.g. dx, dy or dz - """ - - cdef Py_ssize_t i, j, k, ii, jj, kk - cdef int nx, ny, nz, materialEx, materialEz - cdef float dy, dHx, dHz, RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01 - dy = d - nx = xf - xs - ny = yf - ys - nz = zf - zs - - for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): - ii = i + xs - for j in range(0, ny): - jj = j + ys - RA0 = ERA[0, j] - RB0 = ERB[0, j] - RE0 = ERE[0, j] - RF0 = ERF[0, j] - RA1 = ERA[1, j] - RB1 = ERB[1, j] - RE1 = ERE[1, j] - RF1 = ERF[1, j] - RA01 = ERA[0, j] * ERA[1, j] - 1 - for k in range(0, nz): - kk = k + zs - # Ex - materialEx = ID[0, ii, jj, kk] - dHz = (Hz[ii, jj, kk] - Hz[ii, jj - 1, kk]) / dy - Ex[ii, jj, kk] = Ex[ii, jj, kk] + updatecoeffsE[materialEx, 4] * (RA01 * dHz + RA1 * RB0 * EPhi1[0, i, j, k] + RB1 * EPhi1[1, i, j, k]) - EPhi1[1, i, j, k] = RE1 * EPhi1[1, i, j, k] - RF1 * (RA0 * dHz + RB0 * EPhi1[0, i, j, k]) - EPhi1[0, i, j, k] = RE0 * EPhi1[0, i, j, k] - RF0 * dHz - # Ez - materialEz = ID[2, ii, jj, kk] - dHx = (Hx[ii, jj, kk] - Hx[ii, jj - 1, kk]) / dy - Ez[ii, jj, kk] = Ez[ii, jj, kk] - updatecoeffsE[materialEz, 4] * (RA01 * dHx + RA1 * RB0 * EPhi2[0, i, j, k] + RB1 * EPhi2[1, i, j, k]) - EPhi2[1, i, j, k] = RE1 * EPhi2[1, i, j, k] - RF1 * (RA0 * dHx + RB0 * EPhi2[0, i, j, k]) - EPhi2[0, i, j, k] = RE0 * EPhi2[0, i, j, k] - RF0 * dHx - - -######################################################## -# Electric field PML updates - 2nd order - zminus slab # -######################################################## -cpdef void update_pml_2order_electric_zminus( - int xs, - int xf, - int ys, - int yf, - int zs, - int zf, - int nthreads, - floattype_t[:, ::1] updatecoeffsE, - np.uint32_t[:, :, :, ::1] ID, - floattype_t[:, :, ::1] Ex, - floattype_t[:, :, ::1] Ey, - floattype_t[:, :, ::1] Ez, - floattype_t[:, :, ::1] Hx, - floattype_t[:, :, ::1] Hy, - floattype_t[:, :, ::1] Hz, - floattype_t[:, :, :, ::1] EPhi1, - floattype_t[:, :, :, ::1] EPhi2, - floattype_t[:, ::1] ERA, - floattype_t[:, ::1] ERB, - floattype_t[:, ::1] ERE, - floattype_t[:, ::1] ERF, - float d - ): - """This function updates the Ex and Ey field components for the zminus slab. - - Args: - xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box - nthreads (int): Number of threads to use - updatecoeffs, ID, E, H (memoryviews): Access to update coeffients, ID and field component arrays - EPhi, HPhi, ERA, ERB, ERE, ERF (memoryviews): Access to PML coefficient arrays - d (float): Spatial discretisation, e.g. dx, dy or dz - """ - - cdef Py_ssize_t i, j, k, ii, jj, kk - cdef int nx, ny, nz, materialEx, materialEy - cdef float dz, dHx, dHy, RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01 - dz = d - nx = xf - xs - ny = yf - ys - nz = zf - zs - - for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): - ii = i + xs - for j in range(0, ny): - jj = j + ys - for k in range(0, nz): - kk = zf - k - RA0 = ERA[0, k] - RB0 = ERB[0, k] - RE0 = ERE[0, k] - RF0 = ERF[0, k] - RA1 = ERA[1, k] - RB1 = ERB[1, k] - RE1 = ERE[1, k] - RF1 = ERF[1, k] - RA01 = ERA[0, k] * ERA[1, k] - 1 - # Ex - materialEx = ID[0, ii, jj, kk] - dHy = (Hy[ii, jj, kk] - Hy[ii, jj, kk - 1]) / dz - Ex[ii, jj, kk] = Ex[ii, jj, kk] - updatecoeffsE[materialEx, 4] * (RA01 * dHy + RA1 * RB0 * EPhi1[0, i, j, k] + RB1 * EPhi1[1, i, j, k]) - EPhi1[1, i, j, k] = RE1 * EPhi1[1, i, j, k] - RF1 * (RA0 * dHy + RB0 * EPhi1[0, i, j, k]) - EPhi1[0, i, j, k] = RE0 * EPhi1[0, i, j, k] - RF0 * dHy - # Ey - materialEy = ID[1, ii, jj, kk] - dHx = (Hx[ii, jj, kk] - Hx[ii, jj, kk - 1]) / dz - Ey[ii, jj, kk] = Ey[ii, jj, kk] + updatecoeffsE[materialEy, 4] * (RA01 * dHx + RA1 * RB0 * EPhi2[0, i, j, k] + RB1 * EPhi2[1, i, j, k]) - EPhi2[1, i, j, k] = RE1 * EPhi2[1, i, j, k] - RF1 * (RA0 * dHx + RB0 * EPhi2[0, i, j, k]) - EPhi2[0, i, j, k] = RE0 * EPhi2[0, i, j, k] - RF0 * dHx - - -####################################################### -# Electric field PML updates - 2nd order - zplus slab # -####################################################### -cpdef void update_pml_2order_electric_zplus( - int xs, - int xf, - int ys, - int yf, - int zs, - int zf, - int nthreads, - floattype_t[:, ::1] updatecoeffsE, - np.uint32_t[:, :, :, ::1] ID, - floattype_t[:, :, ::1] Ex, - floattype_t[:, :, ::1] Ey, - floattype_t[:, :, ::1] Ez, - floattype_t[:, :, ::1] Hx, - floattype_t[:, :, ::1] Hy, - floattype_t[:, :, ::1] Hz, - floattype_t[:, :, :, ::1] EPhi1, - floattype_t[:, :, :, ::1] EPhi2, - floattype_t[:, ::1] ERA, - floattype_t[:, ::1] ERB, - floattype_t[:, ::1] ERE, - floattype_t[:, ::1] ERF, - float d - ): - """This function updates the Ex and Ey field components for the zplus slab. - - Args: - xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box - nthreads (int): Number of threads to use - updatecoeffs, ID, E, H (memoryviews): Access to update coeffients, ID and field component arrays - EPhi, HPhi, ERA, ERB, ERE, ERF (memoryviews): Access to PML coefficient arrays - d (float): Spatial discretisation, e.g. dx, dy or dz - """ - - cdef Py_ssize_t i, j, k, ii, jj, kk - cdef int nx, ny, nz, materialEx, materialEy - cdef float dz, dHx, dHy, RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01 - dz = d - nx = xf - xs - ny = yf - ys - nz = zf - zs - - for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): - ii = i + xs - for j in range(0, ny): - jj = j + ys - for k in range(0, nz): - kk = k + zs - RA0 = ERA[0, k] - RB0 = ERB[0, k] - RE0 = ERE[0, k] - RF0 = ERF[0, k] - RA1 = ERA[1, k] - RB1 = ERB[1, k] - RE1 = ERE[1, k] - RF1 = ERF[1, k] - RA01 = ERA[0, k] * ERA[1, k] - 1 - # Ex - materialEx = ID[0, ii, jj, kk] - dHy = (Hy[ii, jj, kk] - Hy[ii, jj, kk - 1]) / dz - Ex[ii, jj, kk] = Ex[ii, jj, kk] - updatecoeffsE[materialEx, 4] * (RA01 * dHy + RA1 * RB0 * EPhi1[0, i, j, k] + RB1 * EPhi1[1, i, j, k]) - EPhi1[1, i, j, k] = RE1 * EPhi1[1, i, j, k] - RF1 * (RA0 * dHy + RB0 * EPhi1[0, i, j, k]) - EPhi1[0, i, j, k] = RE0 * EPhi1[0, i, j, k] - RF0 * dHy - # Ey - materialEy = ID[1, ii, jj, kk] - dHx = (Hx[ii, jj, kk] - Hx[ii, jj, kk - 1]) / dz - Ey[ii, jj, kk] = Ey[ii, jj, kk] + updatecoeffsE[materialEy, 4] * (RA01 * dHx + RA1 * RB0 * EPhi2[0, i, j, k] + RB1 * EPhi2[1, i, j, k]) - EPhi2[1, i, j, k] = RE1 * EPhi2[1, i, j, k] - RF1 * (RA0 * dHx + RB0 * EPhi2[0, i, j, k]) - EPhi2[0, i, j, k] = RE0 * EPhi2[0, i, j, k] - RF0 * dHx - - -######################################################## -# Magnetic field PML updates - 2nd order - xminus slab # -######################################################## -cpdef void update_pml_2order_magnetic_xminus( - int xs, - int xf, - int ys, - int yf, - int zs, - int zf, - int nthreads, - floattype_t[:, ::1] updatecoeffsH, - np.uint32_t[:, :, :, ::1] ID, - floattype_t[:, :, ::1] Ex, - floattype_t[:, :, ::1] Ey, - floattype_t[:, :, ::1] Ez, - floattype_t[:, :, ::1] Hx, - floattype_t[:, :, ::1] Hy, - floattype_t[:, :, ::1] Hz, - floattype_t[:, :, :, ::1] HPhi1, - floattype_t[:, :, :, ::1] HPhi2, - floattype_t[:, ::1] HRA, - floattype_t[:, ::1] HRB, - floattype_t[:, ::1] HRE, - floattype_t[:, ::1] HRF, - float d - ): - """This function updates the Hy and Hz field components for the xminus slab. - - Args: - xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box - nthreads (int): Number of threads to use - updatecoeffs, ID, E, H (memoryviews): Access to update coeffients, ID and field component arrays - EPhi, HPhi, HRA, HRB, ERE, HRF (memoryviews): Access to PML coefficient arrays - d (float): Spatial discretisation, e.g. dx, dy or dz - """ - - cdef Py_ssize_t i, j, k, ii, jj, kk - cdef int nx, ny, nz, materialHy, materialHz - cdef float dx, dEy, dEz, RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01 - dx = d - nx = xf - xs - ny = yf - ys - nz = zf - zs - - for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): - ii = xf - (i + 1) - RA0 = HRA[0, i] - RB0 = HRB[0, i] - RE0 = HRE[0, i] - RF0 = HRF[0, i] - RA1 = HRA[1, i] - RB1 = HRB[1, i] - RE1 = HRE[1, i] - RF1 = HRF[1, i] - RA01 = HRA[0, i] * HRA[1, i] - 1 - for j in range(0, ny): - jj = j + ys - for k in range(0, nz): - kk = k + zs - # Hy - materialHy = ID[4, ii, jj, kk] - dEz = (Ez[ii + 1, jj, kk] - Ez[ii, jj, kk]) / dx - Hy[ii, jj, kk] = Hy[ii, jj, kk] + updatecoeffsH[materialHy, 4] * (RA01 * dEz + RA1 * RB0 * HPhi1[0, i, j, k] + RB1 * HPhi1[1, i, j, k]) - HPhi1[1, i, j, k] = RE1 * HPhi1[1, i, j, k] - RF1 * (RA0 * dEz + RB0 * HPhi1[0, i, j, k]) - HPhi1[0, i, j, k] = RE0 * HPhi1[0, i, j, k] - RF0 * dEz - # Hz - materialHz = ID[5, ii, jj, kk] - dEy = (Ey[ii + 1, jj, kk] - Ey[ii, jj, kk]) / dx - Hz[ii, jj, kk] = Hz[ii, jj, kk] - updatecoeffsH[materialHz, 4] * (RA01 * dEy + RA1 * RB0 * HPhi2[0, i, j, k] + RB1 * HPhi2[1, i, j, k]) - HPhi2[1, i, j, k] = RE1 * HPhi2[1, i, j, k] - RF1 * (RA0 * dEy + RB0 * HPhi2[0, i, j, k]) - HPhi2[0, i, j, k] = RE0 * HPhi2[0, i, j, k] - RF0 * dEy - - -####################################################### -# Magnetic field PML updates - 2nd order - xplus slab # -####################################################### -cpdef void update_pml_2order_magnetic_xplus( - int xs, - int xf, - int ys, - int yf, - int zs, - int zf, - int nthreads, - floattype_t[:, ::1] updatecoeffsH, - np.uint32_t[:, :, :, ::1] ID, - floattype_t[:, :, ::1] Ex, - floattype_t[:, :, ::1] Ey, - floattype_t[:, :, ::1] Ez, - floattype_t[:, :, ::1] Hx, - floattype_t[:, :, ::1] Hy, - floattype_t[:, :, ::1] Hz, - floattype_t[:, :, :, ::1] HPhi1, - floattype_t[:, :, :, ::1] HPhi2, - floattype_t[:, ::1] HRA, - floattype_t[:, ::1] HRB, - floattype_t[:, ::1] HRE, - floattype_t[:, ::1] HRF, - float d - ): - """This function updates the Hy and Hz field components for the xplus slab. - - Args: - xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box - nthreads (int): Number of threads to use - updatecoeffs, ID, E, H (memoryviews): Access to update coeffients, ID and field component arrays - EPhi, HPhi, HRA, HRB, HRE, HRF (memoryviews): Access to PML coefficient arrays - d (float): Spatial discretisation, e.g. dx, dy or dz - """ - - cdef Py_ssize_t i, j, k, ii, jj, kk - cdef int nx, ny, nz, materialHy, materialHz - cdef float dx, dEy, dEz, RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01 - dx = d - nx = xf - xs - ny = yf - ys - nz = zf - zs - - for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): - ii = i + xs - RA0 = HRA[0, i] - RB0 = HRB[0, i] - RE0 = HRE[0, i] - RF0 = HRF[0, i] - RA1 = HRA[1, i] - RB1 = HRB[1, i] - RE1 = HRE[1, i] - RF1 = HRF[1, i] - RA01 = HRA[0, i] * HRA[1, i] - 1 - for j in range(0, ny): - jj = j + ys - for k in range(0, nz): - kk = k + zs - # Hy - materialHy = ID[4, ii, jj, kk] - dEz = (Ez[ii + 1, jj, kk] - Ez[ii, jj, kk]) / dx - Hy[ii, jj, kk] = Hy[ii, jj, kk] + updatecoeffsH[materialHy, 4] * (RA01 * dEz + RA1 * RB0 * HPhi1[0, i, j, k] + RB1 * HPhi1[1, i, j, k]) - HPhi1[1, i, j, k] = RE1 * HPhi1[1, i, j, k] - RF1 * (RA0 * dEz + RB0 * HPhi1[0, i, j, k]) - HPhi1[0, i, j, k] = RE0 * HPhi1[0, i, j, k] - RF0 * dEz - # Hz - materialHz = ID[5, ii, jj, kk] - dEy = (Ey[ii + 1, jj, kk] - Ey[ii, jj, kk]) / dx - Hz[ii, jj, kk] = Hz[ii, jj, kk] - updatecoeffsH[materialHz, 4] * (RA01 * dEy + RA1 * RB0 * HPhi2[0, i, j, k] + RB1 * HPhi2[1, i, j, k]) - HPhi2[1, i, j, k] = RE1 * HPhi2[1, i, j, k] - RF1 * (RA0 * dEy + RB0 * HPhi2[0, i, j, k]) - HPhi2[0, i, j, k] = RE0 * HPhi2[0, i, j, k] - RF0 * dEy - - -######################################################## -# Magnetic field PML updates - 2nd order - yminus slab # -######################################################## -cpdef void update_pml_2order_magnetic_yminus( - int xs, - int xf, - int ys, - int yf, - int zs, - int zf, - int nthreads, - floattype_t[:, ::1] updatecoeffsH, - np.uint32_t[:, :, :, ::1] ID, - floattype_t[:, :, ::1] Ex, - floattype_t[:, :, ::1] Ey, - floattype_t[:, :, ::1] Ez, - floattype_t[:, :, ::1] Hx, - floattype_t[:, :, ::1] Hy, - floattype_t[:, :, ::1] Hz, - floattype_t[:, :, :, ::1] HPhi1, - floattype_t[:, :, :, ::1] HPhi2, - floattype_t[:, ::1] HRA, - floattype_t[:, ::1] HRB, - floattype_t[:, ::1] HRE, - floattype_t[:, ::1] HRF, - float d - ): - """This function updates the Hx and Hz field components for the yminus slab. - - Args: - xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box - nthreads (int): Number of threads to use - updatecoeffs, ID, E, H (memoryviews): Access to update coeffients, ID and field component arrays - EPhi, HPhi, HRA, HRB, HRE, HRF (memoryviews): Access to PML coefficient arrays - d (float): Spatial discretisation, e.g. dx, dy or dz - """ - - cdef Py_ssize_t i, j, k, ii, jj, kk - cdef int nx, ny, nz, materialHx, materialHz - cdef float dy, dEx, dEz, RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01 - dy = d - nx = xf - xs - ny = yf - ys - nz = zf - zs - - for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): - ii = i + xs - for j in range(0, ny): - jj = yf - (j + 1) - RA0 = HRA[0, j] - RB0 = HRB[0, j] - RE0 = HRE[0, j] - RF0 = HRF[0, j] - RA1 = HRA[1, j] - RB1 = HRB[1, j] - RE1 = HRE[1, j] - RF1 = HRF[1, j] - RA01 = HRA[0, j] * HRA[1, j] - 1 - for k in range(0, nz): - kk = k + zs - # Hx - materialHx = ID[3, ii, jj, kk] - dEz = (Ez[ii, jj + 1, kk] - Ez[ii, jj, kk]) / dy - Hx[ii, jj, kk] = Hx[ii, jj, kk] - updatecoeffsH[materialHx, 4] * (RA01 * dEz + RA1 * RB0 * HPhi1[0, i, j, k] + RB1 * HPhi1[1, i, j, k]) - HPhi1[1, i, j, k] = RE1 * HPhi1[1, i, j, k] - RF1 * (RA0 * dEz + RB0 * HPhi1[0, i, j, k]) - HPhi1[0, i, j, k] = RE0 * HPhi1[0, i, j, k] - RF0 * dEz - # Hz - materialHz = ID[5, ii, jj, kk] - dEx = (Ex[ii, jj + 1, kk] - Ex[ii, jj, kk]) / dy - Hz[ii, jj, kk] = Hz[ii, jj, kk] + updatecoeffsH[materialHz, 4] * (RA01 * dEx + RA1 * RB0 * HPhi2[0, i, j, k] + RB1 * HPhi2[1, i, j, k]) - HPhi2[1, i, j, k] = RE1 * HPhi2[1, i, j, k] - RF1 * (RA0 * dEx + RB0 * HPhi2[0, i, j, k]) - HPhi2[0, i, j, k] = RE0 * HPhi2[0, i, j, k] - RF0 * dEx - - -####################################################### -# Magnetic field PML updates - 2nd order - yplus slab # -####################################################### -cpdef void update_pml_2order_magnetic_yplus( - int xs, - int xf, - int ys, - int yf, - int zs, - int zf, - int nthreads, - floattype_t[:, ::1] updatecoeffsH, - np.uint32_t[:, :, :, ::1] ID, - floattype_t[:, :, ::1] Ex, - floattype_t[:, :, ::1] Ey, - floattype_t[:, :, ::1] Ez, - floattype_t[:, :, ::1] Hx, - floattype_t[:, :, ::1] Hy, - floattype_t[:, :, ::1] Hz, - floattype_t[:, :, :, ::1] HPhi1, - floattype_t[:, :, :, ::1] HPhi2, - floattype_t[:, ::1] HRA, - floattype_t[:, ::1] HRB, - floattype_t[:, ::1] HRE, - floattype_t[:, ::1] HRF, - float d - ): - """This function updates the Hx and Hz field components for the yplus slab. - - Args: - xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box - nthreads (int): Number of threads to use - updatecoeffs, ID, E, H (memoryviews): Access to update coeffients, ID and field component arrays - EPhi, HPhi, HRA, HRB, HRE, HRF (memoryviews): Access to PML coefficient arrays - d (float): Spatial discretisation, e.g. dx, dy or dz - """ - - cdef Py_ssize_t i, j, k, ii, jj, kk - cdef int nx, ny, nz, materialHx, materialHz - cdef float dy, dEx, dEz, RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01 - dy = d - nx = xf - xs - ny = yf - ys - nz = zf - zs - - for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): - ii = i + xs - for j in range(0, ny): - jj = j + ys - RA0 = HRA[0, j] - RB0 = HRB[0, j] - RE0 = HRE[0, j] - RF0 = HRF[0, j] - RA1 = HRA[1, j] - RB1 = HRB[1, j] - RE1 = HRE[1, j] - RF1 = HRF[1, j] - RA01 = HRA[0, j] * HRA[1, j] - 1 - for k in range(0, nz): - kk = k + zs - # Hx - materialHx = ID[3, ii, jj, kk] - dEz = (Ez[ii, jj + 1, kk] - Ez[ii, jj, kk]) / dy - Hx[ii, jj, kk] = Hx[ii, jj, kk] - updatecoeffsH[materialHx, 4] * (RA01 * dEz + RA1 * RB0 * HPhi1[0, i, j, k] + RB1 * HPhi1[1, i, j, k]) - HPhi1[1, i, j, k] = RE1 * HPhi1[1, i, j, k] - RF1 * (RA0 * dEz + RB0 * HPhi1[0, i, j, k]) - HPhi1[0, i, j, k] = RE0 * HPhi1[0, i, j, k] - RF0 * dEz - # Hz - materialHz = ID[5, ii, jj, kk] - dEx = (Ex[ii, jj + 1, kk] - Ex[ii, jj, kk]) / dy - Hz[ii, jj, kk] = Hz[ii, jj, kk] + updatecoeffsH[materialHz, 4] * (RA01 * dEx + RA1 * RB0 * HPhi2[0, i, j, k] + RB1 * HPhi2[1, i, j, k]) - HPhi2[1, i, j, k] = RE1 * HPhi2[1, i, j, k] - RF1 * (RA0 * dEx + RB0 * HPhi2[0, i, j, k]) - HPhi2[0, i, j, k] = RE0 * HPhi2[0, i, j, k] - RF0 * dEx - - -######################################################## -# Magnetic field PML updates - 2nd order - zminus slab # -######################################################## -cpdef void update_pml_2order_magnetic_zminus( - int xs, - int xf, - int ys, - int yf, - int zs, - int zf, - int nthreads, - floattype_t[:, ::1] updatecoeffsH, - np.uint32_t[:, :, :, ::1] ID, - floattype_t[:, :, ::1] Ex, - floattype_t[:, :, ::1] Ey, - floattype_t[:, :, ::1] Ez, - floattype_t[:, :, ::1] Hx, - floattype_t[:, :, ::1] Hy, - floattype_t[:, :, ::1] Hz, - floattype_t[:, :, :, ::1] HPhi1, - floattype_t[:, :, :, ::1] HPhi2, - floattype_t[:, ::1] HRA, - floattype_t[:, ::1] HRB, - floattype_t[:, ::1] HRE, - floattype_t[:, ::1] HRF, - float d - ): - """This function updates the Hx and Hy field components for the zminus slab. - - Args: - xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box - nthreads (int): Number of threads to use - updatecoeffs, ID, E, H (memoryviews): Access to update coeffients, ID and field component arrays - EPhi, HPhi, HRA, HRB, HRE, HRF (memoryviews): Access to PML coefficient arrays - d (float): Spatial discretisation, e.g. dx, dy or dz - """ - - cdef Py_ssize_t i, j, k, ii, jj, kk - cdef int nx, ny, nz, materialHx, materialHy - cdef float dz, dEx, dEy, RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01 - dz = d - nx = xf - xs - ny = yf - ys - nz = zf - zs - - for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): - ii = i + xs - for j in range(0, ny): - jj = j + ys - for k in range(0, nz): - kk = zf - (k + 1) - RA0 = HRA[0, k] - RB0 = HRB[0, k] - RE0 = HRE[0, k] - RF0 = HRF[0, k] - RA1 = HRA[1, k] - RB1 = HRB[1, k] - RE1 = HRE[1, k] - RF1 = HRF[1, k] - RA01 = HRA[0, k] * HRA[1, k] - 1 - # Hx - materialHx = ID[3, ii, jj, kk] - dEy = (Ey[ii, jj, kk + 1] - Ey[ii, jj, kk]) / dz - Hx[ii, jj, kk] = Hx[ii, jj, kk] + updatecoeffsH[materialHx, 4] * (RA01 * dEy + RA1 * RB0 * HPhi1[0, i, j, k] + RB1 * HPhi1[1, i, j, k]) - HPhi1[1, i, j, k] = RE1 * HPhi1[1, i, j, k] - RF1 * (RA0 * dEy + RB0 * HPhi1[0, i, j, k]) - HPhi1[0, i, j, k] = RE0 * HPhi1[0, i, j, k] - RF0 * dEy - # Hy - materialHy = ID[4, ii, jj, kk] - dEx = (Ex[ii, jj, kk + 1] - Ex[ii, jj, kk]) / dz - Hy[ii, jj, kk] = Hy[ii, jj, kk] - updatecoeffsH[materialHy, 4] * (RA01 * dEx + RA1 * RB0 * HPhi2[0, i, j, k] + RB1 * HPhi2[1, i, j, k]) - HPhi2[1, i, j, k] = RE1 * HPhi2[1, i, j, k] - RF1 * (RA0 * dEx + RB0 * HPhi2[0, i, j, k]) - HPhi2[0, i, j, k] = RE0 * HPhi2[0, i, j, k] - RF0 * dEx - - -####################################################### -# Magnetic field PML updates - 2nd order - zplus slab # -####################################################### -cpdef void update_pml_2order_magnetic_zplus( - int xs, - int xf, - int ys, - int yf, - int zs, - int zf, - int nthreads, - floattype_t[:, ::1] updatecoeffsH, - np.uint32_t[:, :, :, ::1] ID, - floattype_t[:, :, ::1] Ex, - floattype_t[:, :, ::1] Ey, - floattype_t[:, :, ::1] Ez, - floattype_t[:, :, ::1] Hx, - floattype_t[:, :, ::1] Hy, - floattype_t[:, :, ::1] Hz, - floattype_t[:, :, :, ::1] HPhi1, - floattype_t[:, :, :, ::1] HPhi2, - floattype_t[:, ::1] HRA, - floattype_t[:, ::1] HRB, - floattype_t[:, ::1] HRE, - floattype_t[:, ::1] HRF, - float d - ): - """This function updates the Hx and Hy field components for the zplus slab. - - Args: - xs, xf, ys, yf, zs, zf (int): Cell coordinates of entire box - nthreads (int): Number of threads to use - updatecoeffs, ID, E, H (memoryviews): Access to update coeffients, ID and field component arrays - EPhi, HPhi, HRA, HRB, HRE, HRF (memoryviews): Access to PML coefficient arrays - d (float): Spatial discretisation, e.g. dx, dy or dz - """ - - cdef Py_ssize_t i, j, k, ii, jj, kk - cdef int nx, ny, nz, materialHx, materialHy - cdef float dz, dEx, dEy, RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01 - dz = d - nx = xf - xs - ny = yf - ys - nz = zf - zs - - for i in prange(0, nx, nogil=True, schedule='static', num_threads=nthreads): - ii = i + xs - for j in range(0, ny): - jj = j + ys - for k in range(0, nz): - kk = k + zs - RA0 = HRA[0, k] - RB0 = HRB[0, k] - RE0 = HRE[0, k] - RF0 = HRF[0, k] - RA1 = HRA[1, k] - RB1 = HRB[1, k] - RE1 = HRE[1, k] - RF1 = HRF[1, k] - RA01 = HRA[0, k] * HRA[1, k] - 1 - # Hx - materialHx = ID[3, ii, jj, kk] - dEy = (Ey[ii, jj, kk + 1] - Ey[ii, jj, kk]) / dz - Hx[ii, jj, kk] = Hx[ii, jj, kk] + updatecoeffsH[materialHx, 4] * (RA01 * dEy + RA1 * RB0 * HPhi1[0, i, j, k] + RB1 * HPhi1[1, i, j, k]) - HPhi1[1, i, j, k] = RE1 * HPhi1[1, i, j, k] - RF1 * (RA0 * dEy + RB0 * HPhi1[0, i, j, k]) - HPhi1[0, i, j, k] = RE0 * HPhi1[0, i, j, k] - RF0 * dEy - # Hy - materialHy = ID[4, ii, jj, kk] - dEx = (Ex[ii, jj, kk + 1] - Ex[ii, jj, kk]) / dz - Hy[ii, jj, kk] = Hy[ii, jj, kk] - updatecoeffsH[materialHy, 4] * (RA01 * dEx + RA1 * RB0 * HPhi2[0, i, j, k] + RB1 * HPhi2[1, i, j, k]) - HPhi2[1, i, j, k] = RE1 * HPhi2[1, i, j, k] - RF1 * (RA0 * dEx + RB0 * HPhi2[0, i, j, k]) - HPhi2[0, i, j, k] = RE0 * HPhi2[0, i, j, k] - RF0 * dEx diff --git a/gprMax/pml_updates_gpu.py b/gprMax/pml_updates_gpu.py deleted file mode 100644 index cd5571c8..00000000 --- a/gprMax/pml_updates_gpu.py +++ /dev/null @@ -1,2027 +0,0 @@ -# Copyright (C) 2015-2019: The University of Edinburgh -# Authors: Craig Warren and Antonis Giannopoulos -# -# This file is part of gprMax. -# -# gprMax is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# gprMax is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with gprMax. If not, see . - -from string import Template - -kernels_template_pml = Template(""" - -// Macros for converting subscripts to linear index: -#define INDEX2D_R(m, n) (m)*($NY_R)+(n) -#define INDEX2D_MAT(m, n) (m)*($NY_MATCOEFFS)+(n) -#define INDEX3D_FIELDS(i, j, k) (i)*($NY_FIELDS)*($NZ_FIELDS)+(j)*($NZ_FIELDS)+(k) -#define INDEX4D_ID(p, i, j, k) (p)*($NX_ID)*($NY_ID)*($NZ_ID)+(i)*($NY_ID)*($NZ_ID)+(j)*($NZ_ID)+(k) -#define INDEX4D_EPHI1(p, i, j, k) (p)*(NX_EPHI1)*(NY_EPHI1)*(NZ_EPHI1)+(i)*(NY_EPHI1)*(NZ_EPHI1)+(j)*(NZ_EPHI1)+(k) -#define INDEX4D_EPHI2(p, i, j, k) (p)*(NX_EPHI2)*(NY_EPHI2)*(NZ_EPHI2)+(i)*(NY_EPHI2)*(NZ_EPHI2)+(j)*(NZ_EPHI2)+(k) -#define INDEX4D_HPHI1(p, i, j, k) (p)*(NX_HPHI1)*(NY_HPHI1)*(NZ_HPHI1)+(i)*(NY_HPHI1)*(NZ_HPHI1)+(j)*(NZ_HPHI1)+(k) -#define INDEX4D_HPHI2(p, i, j, k) (p)*(NX_HPHI2)*(NY_HPHI2)*(NZ_HPHI2)+(i)*(NY_HPHI2)*(NZ_HPHI2)+(j)*(NZ_HPHI2)+(k) - -// Material coefficients (read-only) in constant memory (64KB) -__device__ __constant__ $REAL updatecoeffsE[$N_updatecoeffsE]; -__device__ __constant__ $REAL updatecoeffsH[$N_updatecoeffsH]; - -////////////////////////////////////////////////////////// -// Electric field PML updates - 1st order - xminus slab // -////////////////////////////////////////////////////////// - -__global__ void update_pml_1order_electric_xminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_EPHI1, int NY_EPHI1, int NZ_EPHI1, int NX_EPHI2, int NY_EPHI2, int NZ_EPHI2, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, $REAL *Ey, $REAL *Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz, $REAL *EPhi1, $REAL *EPhi2, const $REAL* __restrict__ ERA, const $REAL* __restrict__ ERB, const $REAL* __restrict__ ERE, const $REAL* __restrict__ ERF, $REAL d) { - - // This function updates the Ey and Ez field components for the xminus slab. - // - // Args: - // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab - // NX_EPHI, NY_EPHI, NZ_EPHI: Dimensions of EPhi1 and EPhi2 PML arrays - // ID, E, H: Access to ID and field component arrays - // EPhi, ERA, ERB, ERE, ERF: Access to PML electric coefficient arrays - // d: Spatial discretisation, e.g. dx, dy or dz - - // Obtain the linear index corresponding to the current thread - int idx = blockIdx.x * blockDim.x + threadIdx.x; - - // Convert the linear index to subscripts for PML EPhi1 (4D) arrays - int p1 = idx / (NX_EPHI1 * NY_EPHI1 * NZ_EPHI1); - int i1 = (idx % (NX_EPHI1 * NY_EPHI1 * NZ_EPHI1)) / (NY_EPHI1 * NZ_EPHI1); - int j1 = ((idx % (NX_EPHI1 * NY_EPHI1 * NZ_EPHI1)) % (NY_EPHI1 * NZ_EPHI1)) / NZ_EPHI1; - int k1 = ((idx % (NX_EPHI1 * NY_EPHI1 * NZ_EPHI1)) % (NY_EPHI1 * NZ_EPHI1)) % NZ_EPHI1; - - // Convert the linear index to subscripts for PML EPhi2 (4D) arrays - int p2 = idx / (NX_EPHI2 * NY_EPHI2 * NZ_EPHI2); - int i2 = (idx % (NX_EPHI2 * NY_EPHI2 * NZ_EPHI2)) / (NY_EPHI2 * NZ_EPHI2); - int j2 = ((idx % (NX_EPHI2 * NY_EPHI2 * NZ_EPHI2)) % (NY_EPHI2 * NZ_EPHI2)) / NZ_EPHI2; - int k2 = ((idx % (NX_EPHI2 * NY_EPHI2 * NZ_EPHI2)) % (NY_EPHI2 * NZ_EPHI2)) % NZ_EPHI2; - - $REAL RA0, RB0, RE0, RF0, dHy, dHz; - $REAL dx = d; - int ii, jj, kk, materialEy, materialEz; - int nx = xf - xs; - int ny = yf - ys; - int nz = zf - zs; - - if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { - // Subscripts for field arrays - ii = xf - i1; - jj = j1 + ys; - kk = k1 + zs; - - // PML coefficients - RA0 = ERA[INDEX2D_R(0,i1)] - 1; - RB0 = ERB[INDEX2D_R(0,i1)]; - RE0 = ERE[INDEX2D_R(0,i1)]; - RF0 = ERF[INDEX2D_R(0,i1)]; - - // Ey - materialEy = ID[INDEX4D_ID(1,ii,jj,kk)]; - dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx; - Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA0 * dHz + RB0 * EPhi1[INDEX4D_EPHI1(0,i1,j1,k1)]); - EPhi1[INDEX4D_EPHI1(0,i1,j1,k1)] = RE0 * EPhi1[INDEX4D_EPHI1(0,i1,j1,k1)] - RF0 * dHz; - } - - if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { - // Subscripts for field arrays - ii = xf - i2; - jj = j2 + ys; - kk = k2 + zs; - - // PML coefficients - RA0 = ERA[INDEX2D_R(0,i2)] - 1; - RB0 = ERB[INDEX2D_R(0,i2)]; - RE0 = ERE[INDEX2D_R(0,i2)]; - RF0 = ERF[INDEX2D_R(0,i2)]; - - // Ez - materialEz = ID[INDEX4D_ID(2,ii,jj,kk)]; - dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx; - Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA0 * dHy + RB0 * EPhi2[INDEX4D_EPHI2(0,i2,j2,k2)]); - EPhi2[INDEX4D_EPHI2(0,i2,j2,k2)] = RE0 * EPhi2[INDEX4D_EPHI2(0,i2,j2,k2)] - RF0 * dHy; - } -} - - -///////////////////////////////////////////////////////// -// Electric field PML updates - 1st order - xplus slab // -///////////////////////////////////////////////////////// - -__global__ void update_pml_1order_electric_xplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_EPHI1, int NY_EPHI1, int NZ_EPHI1, int NX_EPHI2, int NY_EPHI2, int NZ_EPHI2, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, $REAL *Ey, $REAL *Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz, $REAL *EPhi1, $REAL *EPhi2, const $REAL* __restrict__ ERA, const $REAL* __restrict__ ERB, const $REAL* __restrict__ ERE, const $REAL* __restrict__ ERF, $REAL d) { - - // This function updates the Ey and Ez field components for the xplus slab. - // - // Args: - // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab - // NX_EPHI, NY_EPHI, NZ_EPHI: Dimensions of EPhi1 and EPhi2 PML arrays - // ID, E, H: Access to ID and field component arrays - // EPhi, ERA, ERB, ERE, ERF: Access to PML electric coefficient arrays - // d: Spatial discretisation, e.g. dx, dy or dz - - // Obtain the linear index corresponding to the current thread - int idx = blockIdx.x * blockDim.x + threadIdx.x; - - // Convert the linear index to subscripts for PML EPhi1 (4D) arrays - int p1 = idx / (NX_EPHI1 * NY_EPHI1 * NZ_EPHI1); - int i1 = (idx % (NX_EPHI1 * NY_EPHI1 * NZ_EPHI1)) / (NY_EPHI1 * NZ_EPHI1); - int j1 = ((idx % (NX_EPHI1 * NY_EPHI1 * NZ_EPHI1)) % (NY_EPHI1 * NZ_EPHI1)) / NZ_EPHI1; - int k1 = ((idx % (NX_EPHI1 * NY_EPHI1 * NZ_EPHI1)) % (NY_EPHI1 * NZ_EPHI1)) % NZ_EPHI1; - - // Convert the linear index to subscripts for PML EPhi2 (4D) arrays - int p2 = idx / (NX_EPHI2 * NY_EPHI2 * NZ_EPHI2); - int i2 = (idx % (NX_EPHI2 * NY_EPHI2 * NZ_EPHI2)) / (NY_EPHI2 * NZ_EPHI2); - int j2 = ((idx % (NX_EPHI2 * NY_EPHI2 * NZ_EPHI2)) % (NY_EPHI2 * NZ_EPHI2)) / NZ_EPHI2; - int k2 = ((idx % (NX_EPHI2 * NY_EPHI2 * NZ_EPHI2)) % (NY_EPHI2 * NZ_EPHI2)) % NZ_EPHI2; - - $REAL RA0, RB0, RE0, RF0, dHy, dHz; - $REAL dx = d; - int ii, jj, kk, materialEy, materialEz; - int nx = xf - xs; - int ny = yf - ys; - int nz = zf - zs; - - if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { - // Subscripts for field arrays - ii = i1 + xs; - jj = j1 + ys; - kk = k1 + zs; - - // PML coefficients - RA0 = ERA[INDEX2D_R(0,i1)] - 1; - RB0 = ERB[INDEX2D_R(0,i1)]; - RE0 = ERE[INDEX2D_R(0,i1)]; - RF0 = ERF[INDEX2D_R(0,i1)]; - - // Ey - materialEy = ID[INDEX4D_ID(1,ii,jj,kk)]; - dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx; - Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA0 * dHz + RB0 * EPhi1[INDEX4D_EPHI1(0,i1,j1,k1)]); - EPhi1[INDEX4D_EPHI1(0,i1,j1,k1)] = RE0 * EPhi1[INDEX4D_EPHI1(0,i1,j1,k1)] - RF0 * dHz; - } - - if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { - // Subscripts for field arrays - ii = i2 + xs; - jj = j2 + ys; - kk = k2 + zs; - - // PML coefficients - RA0 = ERA[INDEX2D_R(0,i2)] - 1; - RB0 = ERB[INDEX2D_R(0,i2)]; - RE0 = ERE[INDEX2D_R(0,i2)]; - RF0 = ERF[INDEX2D_R(0,i2)]; - - // Ez - materialEz = ID[INDEX4D_ID(2,ii,jj,kk)]; - dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx; - Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA0 * dHy + RB0 * EPhi2[INDEX4D_EPHI2(0,i2,j2,k2)]); - EPhi2[INDEX4D_EPHI2(0,i2,j2,k2)] = RE0 * EPhi2[INDEX4D_EPHI2(0,i2,j2,k2)] - RF0 * dHy; - } -} - - -////////////////////////////////////////////////////////// -// Electric field PML updates - 1st order - yminus slab // -////////////////////////////////////////////////////////// - -__global__ void update_pml_1order_electric_yminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_EPHI1, int NY_EPHI1, int NZ_EPHI1, int NX_EPHI2, int NY_EPHI2, int NZ_EPHI2, const unsigned int* __restrict__ ID, $REAL *Ex, const $REAL* __restrict__ Ey, $REAL *Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz, $REAL *EPhi1, $REAL *EPhi2, const $REAL* __restrict__ ERA, const $REAL* __restrict__ ERB, const $REAL* __restrict__ ERE, const $REAL* __restrict__ ERF, $REAL d) { - - // This function updates the Ex and Ez field components for the yminus slab. - // - // Args: - // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab - // NX_EPHI, NY_EPHI, NZ_EPHI: Dimensions of EPhi1 and EPhi2 PML arrays - // ID, E, H: Access to ID and field component arrays - // EPhi, ERA, ERB, ERE, ERF: Access to PML electric coefficient arrays - // d: Spatial discretisation, e.g. dx, dy or dz - - // Obtain the linear index corresponding to the current thread - int idx = blockIdx.x * blockDim.x + threadIdx.x; - - // Convert the linear index to subscripts for PML EPhi1 (4D) arrays - int p1 = idx / (NX_EPHI1 * NY_EPHI1 * NZ_EPHI1); - int i1 = (idx % (NX_EPHI1 * NY_EPHI1 * NZ_EPHI1)) / (NY_EPHI1 * NZ_EPHI1); - int j1 = ((idx % (NX_EPHI1 * NY_EPHI1 * NZ_EPHI1)) % (NY_EPHI1 * NZ_EPHI1)) / NZ_EPHI1; - int k1 = ((idx % (NX_EPHI1 * NY_EPHI1 * NZ_EPHI1)) % (NY_EPHI1 * NZ_EPHI1)) % NZ_EPHI1; - - // Convert the linear index to subscripts for PML EPhi2 (4D) arrays - int p2 = idx / (NX_EPHI2 * NY_EPHI2 * NZ_EPHI2); - int i2 = (idx % (NX_EPHI2 * NY_EPHI2 * NZ_EPHI2)) / (NY_EPHI2 * NZ_EPHI2); - int j2 = ((idx % (NX_EPHI2 * NY_EPHI2 * NZ_EPHI2)) % (NY_EPHI2 * NZ_EPHI2)) / NZ_EPHI2; - int k2 = ((idx % (NX_EPHI2 * NY_EPHI2 * NZ_EPHI2)) % (NY_EPHI2 * NZ_EPHI2)) % NZ_EPHI2; - - $REAL RA0, RB0, RE0, RF0, dHx, dHz; - $REAL dy = d; - int ii, jj, kk, materialEx, materialEz; - int nx = xf - xs; - int ny = yf - ys; - int nz = zf - zs; - - if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { - // Subscripts for field arrays - ii = i1 + xs; - jj = yf - j1; - kk = k1 + zs; - - // PML coefficients - RA0 = ERA[INDEX2D_R(0,j1)] - 1; - RB0 = ERB[INDEX2D_R(0,j1)]; - RE0 = ERE[INDEX2D_R(0,j1)]; - RF0 = ERF[INDEX2D_R(0,j1)]; - - // Ex - materialEx = ID[INDEX4D_ID(0,ii,jj,kk)]; - dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy; - Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA0 * dHz + RB0 * EPhi1[INDEX4D_EPHI1(0,i1,j1,k1)]); - EPhi1[INDEX4D_EPHI1(0,i1,j1,k1)] = RE0 * EPhi1[INDEX4D_EPHI1(0,i1,j1,k1)] - RF0 * dHz; - } - - if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { - // Subscripts for field arrays - ii = i2 + xs; - jj = yf - j2; - kk = k2 + zs; - - // PML coefficients - RA0 = ERA[INDEX2D_R(0,j2)] - 1; - RB0 = ERB[INDEX2D_R(0,j2)]; - RE0 = ERE[INDEX2D_R(0,j2)]; - RF0 = ERF[INDEX2D_R(0,j2)]; - - // Ez - materialEz = ID[INDEX4D_ID(2,ii,jj,kk)]; - dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy; - Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA0 * dHx + RB0 * EPhi2[INDEX4D_EPHI2(0,i2,j2,k2)]); - EPhi2[INDEX4D_EPHI2(0,i2,j2,k2)] = RE0 * EPhi2[INDEX4D_EPHI2(0,i2,j2,k2)] - RF0 * dHx; - } -} - - -///////////////////////////////////////////////////////// -// Electric field PML updates - 1st order - yplus slab // -///////////////////////////////////////////////////////// - -__global__ void update_pml_1order_electric_yplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_EPHI1, int NY_EPHI1, int NZ_EPHI1, int NX_EPHI2, int NY_EPHI2, int NZ_EPHI2, const unsigned int* __restrict__ ID, $REAL *Ex, const $REAL* __restrict__ Ey, $REAL *Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz, $REAL *EPhi1, $REAL *EPhi2, const $REAL* __restrict__ ERA, const $REAL* __restrict__ ERB, const $REAL* __restrict__ ERE, const $REAL* __restrict__ ERF, $REAL d) { - - // This function updates the Ex and Ez field components for the yplus slab. - // - // Args: - // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab - // NX_EPHI, NY_EPHI, NZ_EPHI: Dimensions of EPhi1 and EPhi2 PML arrays - // ID, E, H: Access to ID and field component arrays - // EPhi, ERA, ERB, ERE, ERF: Access to PML electric coefficient arrays - // d: Spatial discretisation, e.g. dx, dy or dz - - // Obtain the linear index corresponding to the current thread - int idx = blockIdx.x * blockDim.x + threadIdx.x; - - // Convert the linear index to subscripts for PML EPhi1 (4D) arrays - int p1 = idx / (NX_EPHI1 * NY_EPHI1 * NZ_EPHI1); - int i1 = (idx % (NX_EPHI1 * NY_EPHI1 * NZ_EPHI1)) / (NY_EPHI1 * NZ_EPHI1); - int j1 = ((idx % (NX_EPHI1 * NY_EPHI1 * NZ_EPHI1)) % (NY_EPHI1 * NZ_EPHI1)) / NZ_EPHI1; - int k1 = ((idx % (NX_EPHI1 * NY_EPHI1 * NZ_EPHI1)) % (NY_EPHI1 * NZ_EPHI1)) % NZ_EPHI1; - - // Convert the linear index to subscripts for PML EPhi2 (4D) arrays - int p2 = idx / (NX_EPHI2 * NY_EPHI2 * NZ_EPHI2); - int i2 = (idx % (NX_EPHI2 * NY_EPHI2 * NZ_EPHI2)) / (NY_EPHI2 * NZ_EPHI2); - int j2 = ((idx % (NX_EPHI2 * NY_EPHI2 * NZ_EPHI2)) % (NY_EPHI2 * NZ_EPHI2)) / NZ_EPHI2; - int k2 = ((idx % (NX_EPHI2 * NY_EPHI2 * NZ_EPHI2)) % (NY_EPHI2 * NZ_EPHI2)) % NZ_EPHI2; - - $REAL RA0, RB0, RE0, RF0, dHx, dHz; - $REAL dy = d; - int ii, jj, kk, materialEx, materialEz; - int nx = xf - xs; - int ny = yf - ys; - int nz = zf - zs; - - if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { - // Subscripts for field arrays - ii = i1 + xs; - jj = j1 + ys; - kk = k1 + zs; - - // PML coefficients - RA0 = ERA[INDEX2D_R(0,j1)] - 1; - RB0 = ERB[INDEX2D_R(0,j1)]; - RE0 = ERE[INDEX2D_R(0,j1)]; - RF0 = ERF[INDEX2D_R(0,j1)]; - - // Ex - materialEx = ID[INDEX4D_ID(0,ii,jj,kk)]; - dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy; - Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA0 * dHz + RB0 * EPhi1[INDEX4D_EPHI1(0,i1,j1,k1)]); - EPhi1[INDEX4D_EPHI1(0,i1,j1,k1)] = RE0 * EPhi1[INDEX4D_EPHI1(0,i1,j1,k1)] - RF0 * dHz; - } - - if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { - // Subscripts for field arrays - ii = i2 + xs; - jj = j2 + ys; - kk = k2 + zs; - - // PML coefficients - RA0 = ERA[INDEX2D_R(0,j2)] - 1; - RB0 = ERB[INDEX2D_R(0,j2)]; - RE0 = ERE[INDEX2D_R(0,j2)]; - RF0 = ERF[INDEX2D_R(0,j2)]; - - // Ez - materialEz = ID[INDEX4D_ID(2,ii,jj,kk)]; - dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy; - Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA0 * dHx + RB0 * EPhi2[INDEX4D_EPHI2(0,i2,j2,k2)]); - EPhi2[INDEX4D_EPHI2(0,i2,j2,k2)] = RE0 * EPhi2[INDEX4D_EPHI2(0,i2,j2,k2)] - RF0 * dHx; - } -} - - -////////////////////////////////////////////////////////// -// Electric field PML updates - 1st order - zminus slab // -////////////////////////////////////////////////////////// - -__global__ void update_pml_1order_electric_zminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_EPHI1, int NY_EPHI1, int NZ_EPHI1, int NX_EPHI2, int NY_EPHI2, int NZ_EPHI2, const unsigned int* __restrict__ ID, $REAL *Ex, $REAL *Ey, const $REAL* __restrict__ Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz, $REAL *EPhi1, $REAL *EPhi2, const $REAL* __restrict__ ERA, const $REAL* __restrict__ ERB, const $REAL* __restrict__ ERE, const $REAL* __restrict__ ERF, $REAL d) { - - // This function updates the Ex and Ey field components for the zminus slab. - // - // Args: - // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab - // NX_EPHI, NY_EPHI, NZ_EPHI: Dimensions of EPhi1 and EPhi2 PML arrays - // ID, E, H: Access to ID and field component arrays - // EPhi, ERA, ERB, ERE, ERF: Access to PML electric coefficient arrays - // d: Spatial discretisation, e.g. dx, dy or dz - - // Obtain the linear index corresponding to the current thread - int idx = blockIdx.x * blockDim.x + threadIdx.x; - - // Convert the linear index to subscripts for PML EPhi1 (4D) arrays - int p1 = idx / (NX_EPHI1 * NY_EPHI1 * NZ_EPHI1); - int i1 = (idx % (NX_EPHI1 * NY_EPHI1 * NZ_EPHI1)) / (NY_EPHI1 * NZ_EPHI1); - int j1 = ((idx % (NX_EPHI1 * NY_EPHI1 * NZ_EPHI1)) % (NY_EPHI1 * NZ_EPHI1)) / NZ_EPHI1; - int k1 = ((idx % (NX_EPHI1 * NY_EPHI1 * NZ_EPHI1)) % (NY_EPHI1 * NZ_EPHI1)) % NZ_EPHI1; - - // Convert the linear index to subscripts for PML EPhi2 (4D) arrays - int p2 = idx / (NX_EPHI2 * NY_EPHI2 * NZ_EPHI2); - int i2 = (idx % (NX_EPHI2 * NY_EPHI2 * NZ_EPHI2)) / (NY_EPHI2 * NZ_EPHI2); - int j2 = ((idx % (NX_EPHI2 * NY_EPHI2 * NZ_EPHI2)) % (NY_EPHI2 * NZ_EPHI2)) / NZ_EPHI2; - int k2 = ((idx % (NX_EPHI2 * NY_EPHI2 * NZ_EPHI2)) % (NY_EPHI2 * NZ_EPHI2)) % NZ_EPHI2; - - $REAL RA0, RB0, RE0, RF0, dHx, dHy; - $REAL dz = d; - int ii, jj, kk, materialEx, materialEy; - int nx = xf - xs; - int ny = yf - ys; - int nz = zf - zs; - - if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { - // Subscripts for field arrays - ii = i1 + xs; - jj = j1 + ys; - kk = zf - k1; - - // PML coefficients - RA0 = ERA[INDEX2D_R(0,k1)] - 1; - RB0 = ERB[INDEX2D_R(0,k1)]; - RE0 = ERE[INDEX2D_R(0,k1)]; - RF0 = ERF[INDEX2D_R(0,k1)]; - - // Ex - materialEx = ID[INDEX4D_ID(0,ii,jj,kk)]; - dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz; - Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA0 * dHy + RB0 * EPhi1[INDEX4D_EPHI1(0,i1,j1,k1)]); - EPhi1[INDEX4D_EPHI1(0,i1,j1,k1)] = RE0 * EPhi1[INDEX4D_EPHI1(0,i1,j1,k1)] - RF0 * dHy; - } - - if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { - // Subscripts for field arrays - ii = i2 + xs; - jj = j2 + xs; - kk = zf - k2; - - // PML coefficients - RA0 = ERA[INDEX2D_R(0,k2)] - 1; - RB0 = ERB[INDEX2D_R(0,k2)]; - RE0 = ERE[INDEX2D_R(0,k2)]; - RF0 = ERF[INDEX2D_R(0,k2)]; - - // Ey - materialEy = ID[INDEX4D_ID(1,ii,jj,kk)]; - dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz; - Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA0 * dHx + RB0 * EPhi2[INDEX4D_EPHI2(0,i2,j2,k2)]); - EPhi2[INDEX4D_EPHI2(0,i2,j2,k2)] = RE0 * EPhi2[INDEX4D_EPHI2(0,i2,j2,k2)] - RF0 * dHx; - } -} - - -///////////////////////////////////////////////////////// -// Electric field PML updates - 1st order - zplus slab // -///////////////////////////////////////////////////////// - -__global__ void update_pml_1order_electric_zplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_EPHI1, int NY_EPHI1, int NZ_EPHI1, int NX_EPHI2, int NY_EPHI2, int NZ_EPHI2, const unsigned int* __restrict__ ID, $REAL *Ex, $REAL *Ey, const $REAL* __restrict__ Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz, $REAL *EPhi1, $REAL *EPhi2, const $REAL* __restrict__ ERA, const $REAL* __restrict__ ERB, const $REAL* __restrict__ ERE, const $REAL* __restrict__ ERF, $REAL d) { - - // This function updates the Ex and Ey field components for the zplus slab. - // - // Args: - // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab - // NX_EPHI, NY_EPHI, NZ_EPHI: Dimensions of EPhi1 and EPhi2 PML arrays - // updatecoeffs, ID, E, H: Access to ID and field component arrays - // EPhi, ERA, ERB, ERE, ERF: Access to PML electric coefficient arrays - // d: Spatial discretisation, e.g. dx, dy or dz - - // Obtain the linear index corresponding to the current thread - int idx = blockIdx.x * blockDim.x + threadIdx.x; - - // Convert the linear index to subscripts for PML EPhi1 (4D) arrays - int p1 = idx / (NX_EPHI1 * NY_EPHI1 * NZ_EPHI1); - int i1 = (idx % (NX_EPHI1 * NY_EPHI1 * NZ_EPHI1)) / (NY_EPHI1 * NZ_EPHI1); - int j1 = ((idx % (NX_EPHI1 * NY_EPHI1 * NZ_EPHI1)) % (NY_EPHI1 * NZ_EPHI1)) / NZ_EPHI1; - int k1 = ((idx % (NX_EPHI1 * NY_EPHI1 * NZ_EPHI1)) % (NY_EPHI1 * NZ_EPHI1)) % NZ_EPHI1; - - // Convert the linear index to subscripts for PML EPhi2 (4D) arrays - int p2 = idx / (NX_EPHI2 * NY_EPHI2 * NZ_EPHI2); - int i2 = (idx % (NX_EPHI2 * NY_EPHI2 * NZ_EPHI2)) / (NY_EPHI2 * NZ_EPHI2); - int j2 = ((idx % (NX_EPHI2 * NY_EPHI2 * NZ_EPHI2)) % (NY_EPHI2 * NZ_EPHI2)) / NZ_EPHI2; - int k2 = ((idx % (NX_EPHI2 * NY_EPHI2 * NZ_EPHI2)) % (NY_EPHI2 * NZ_EPHI2)) % NZ_EPHI2; - - $REAL RA0, RB0, RE0, RF0, dHx, dHy; - $REAL dz = d; - int ii, jj, kk, materialEx, materialEy; - int nx = xf - xs; - int ny = yf - ys; - int nz = zf - zs; - - if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { - // Subscripts for field arrays - ii = i1 + xs; - jj = j1 + ys; - kk = k1 + zs; - - // PML coefficients - RA0 = ERA[INDEX2D_R(0,k1)] - 1; - RB0 = ERB[INDEX2D_R(0,k1)]; - RE0 = ERE[INDEX2D_R(0,k1)]; - RF0 = ERF[INDEX2D_R(0,k1)]; - - // Ex - materialEx = ID[INDEX4D_ID(0,ii,jj,kk)]; - dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz; - Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA0 * dHy + RB0 * EPhi1[INDEX4D_EPHI1(0,i1,j1,k1)]); - EPhi1[INDEX4D_EPHI1(0,i1,j1,k1)] = RE0 * EPhi1[INDEX4D_EPHI1(0,i1,j1,k1)] - RF0 * dHy; - } - - if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { - // Subscripts for field arrays - ii = i2 + xs; - jj = j2 + ys; - kk = k2 + zs; - - // PML coefficients - RA0 = ERA[INDEX2D_R(0,k2)] - 1; - RB0 = ERB[INDEX2D_R(0,k2)]; - RE0 = ERE[INDEX2D_R(0,k2)]; - RF0 = ERF[INDEX2D_R(0,k2)]; - - // Ey - materialEy = ID[INDEX4D_ID(1,ii,jj,kk)]; - dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz; - Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA0 * dHx + RB0 * EPhi2[INDEX4D_EPHI2(0,i2,j2,k2)]); - EPhi2[INDEX4D_EPHI2(0,i2,j2,k2)] = RE0 * EPhi2[INDEX4D_EPHI2(0,i2,j2,k2)] - RF0 * dHx; - } -} - - -////////////////////////////////////////////////////////// -// Magnetic field PML updates - 1st order - xminus slab // -////////////////////////////////////////////////////////// - -__global__ void update_pml_1order_magnetic_xminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_HPHI1, int NY_HPHI1, int NZ_HPHI1, int NX_HPHI2, int NY_HPHI2, int NZ_HPHI2, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez, const $REAL* __restrict__ Hx, $REAL *Hy, $REAL *Hz, $REAL *HPhi1, $REAL *HPhi2, const $REAL* __restrict__ HRA, const $REAL* __restrict__ HRB, const $REAL* __restrict__ HRE, const $REAL* __restrict__ HRF, $REAL d) { - - // This function updates the Hy and Hz field components for the xminus slab. - // - // Args: - // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab - // NX_HPHI, NY_HPHI, NZ_HPHI: Dimensions of HPhi1 and HPhi2 PML arrays - // ID, E, H: Access to ID and field component arrays - // HPhi, HRA, HRB, HRE, HRF: Access to PML magnetic coefficient arrays - // d: Spatial discretisation, e.g. dx, dy or dz - - // Obtain the linear index corresponding to the current thread - int idx = blockIdx.x * blockDim.x + threadIdx.x; - - // Convert the linear index to subscripts for PML HPhi1 (4D) arrays - int p1 = idx / (NX_HPHI1 * NY_HPHI1 * NZ_HPHI1); - int i1 = (idx % (NX_HPHI1 * NY_HPHI1 * NZ_HPHI1)) / (NY_HPHI1 * NZ_HPHI1); - int j1 = ((idx % (NX_HPHI1 * NY_HPHI1 * NZ_HPHI1)) % (NY_HPHI1 * NZ_HPHI1)) / NZ_HPHI1; - int k1 = ((idx % (NX_HPHI1 * NY_HPHI1 * NZ_HPHI1)) % (NY_HPHI1 * NZ_HPHI1)) % NZ_HPHI1; - - // Convert the linear index to subscripts for PML HPhi2 (4D) arrays - int p2 = idx / (NX_HPHI2 * NY_HPHI2 * NZ_HPHI2); - int i2 = (idx % (NX_HPHI2 * NY_HPHI2 * NZ_HPHI2)) / (NY_HPHI2 * NZ_HPHI2); - int j2 = ((idx % (NX_HPHI2 * NY_HPHI2 * NZ_HPHI2)) % (NY_HPHI2 * NZ_HPHI2)) / NZ_HPHI2; - int k2 = ((idx % (NX_HPHI2 * NY_HPHI2 * NZ_HPHI2)) % (NY_HPHI2 * NZ_HPHI2)) % NZ_HPHI2; - - $REAL RA0, RB0, RE0, RF0, dEy, dEz; - $REAL dx = d; - int ii, jj, kk, materialHy, materialHz; - int nx = xf - xs; - int ny = yf - ys; - int nz = zf - zs; - - if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { - // Subscripts for field arrays - ii = xf - (i1 + 1); - jj = j1 + ys; - kk = k1 + zs; - - // PML coefficients - RA0 = HRA[INDEX2D_R(0,i1)] - 1; - RB0 = HRB[INDEX2D_R(0,i1)]; - RE0 = HRE[INDEX2D_R(0,i1)]; - RF0 = HRF[INDEX2D_R(0,i1)]; - - // Hy - materialHy = ID[INDEX4D_ID(4,ii,jj,kk)]; - dEz = (Ez[INDEX3D_FIELDS(ii+1,jj,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dx; - Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA0 * dEz + RB0 * HPhi1[INDEX4D_HPHI1(0,i1,j1,k1)]); - HPhi1[INDEX4D_HPHI1(0,i1,j1,k1)] = RE0 * HPhi1[INDEX4D_HPHI1(0,i1,j1,k1)] - RF0 * dEz; - } - - if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { - // Subscripts for field arrays - ii = xf - (i2 + 1); - jj = j2 + ys; - kk = k2 + zs; - - // PML coefficients - RA0 = HRA[INDEX2D_R(0,i2)] - 1; - RB0 = HRB[INDEX2D_R(0,i2)]; - RE0 = HRE[INDEX2D_R(0,i2)]; - RF0 = HRF[INDEX2D_R(0,i2)]; - - // Hz - materialHz = ID[INDEX4D_ID(5,ii,jj,kk)]; - dEy = (Ey[INDEX3D_FIELDS(ii+1,jj,kk)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dx; - Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA0 * dEy + RB0 * HPhi2[INDEX4D_HPHI2(0,i2,j2,k2)]); - HPhi2[INDEX4D_HPHI2(0,i2,j2,k2)] = RE0 * HPhi2[INDEX4D_HPHI2(0,i2,j2,k2)] - RF0 * dEy; - } -} - - -///////////////////////////////////////////////////////// -// Magnetic field PML updates - 1st order - xplus slab // -///////////////////////////////////////////////////////// - -__global__ void update_pml_1order_magnetic_xplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_HPHI1, int NY_HPHI1, int NZ_HPHI1, int NX_HPHI2, int NY_HPHI2, int NZ_HPHI2, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez, const $REAL* __restrict__ Hx, $REAL *Hy, $REAL *Hz, $REAL *HPhi1, $REAL *HPhi2, const $REAL* __restrict__ HRA, const $REAL* __restrict__ HRB, const $REAL* __restrict__ HRE, const $REAL* __restrict__ HRF, $REAL d) { - - // This function updates the Hy and Hz field components for the xplus slab. - // - // Args: - // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab - // NX_HPHI, NY_HPHI, NZ_HPHI: Dimensions of HPhi1 and HPhi2 PML arrays - // ID, E, H: Access to ID and field component arrays - // HPhi, HRA, HRB, HRE, HRF: Access to PML magnetic coefficient arrays - // d: Spatial discretisation, e.g. dx, dy or dz - - // Obtain the linear index corresponding to the current thread - int idx = blockIdx.x * blockDim.x + threadIdx.x; - - // Convert the linear index to subscripts for PML HPhi1 (4D) arrays - int p1 = idx / (NX_HPHI1 * NY_HPHI1 * NZ_HPHI1); - int i1 = (idx % (NX_HPHI1 * NY_HPHI1 * NZ_HPHI1)) / (NY_HPHI1 * NZ_HPHI1); - int j1 = ((idx % (NX_HPHI1 * NY_HPHI1 * NZ_HPHI1)) % (NY_HPHI1 * NZ_HPHI1)) / NZ_HPHI1; - int k1 = ((idx % (NX_HPHI1 * NY_HPHI1 * NZ_HPHI1)) % (NY_HPHI1 * NZ_HPHI1)) % NZ_HPHI1; - - // Convert the linear index to subscripts for PML HPhi2 (4D) arrays - int p2 = idx / (NX_HPHI2 * NY_HPHI2 * NZ_HPHI2); - int i2 = (idx % (NX_HPHI2 * NY_HPHI2 * NZ_HPHI2)) / (NY_HPHI2 * NZ_HPHI2); - int j2 = ((idx % (NX_HPHI2 * NY_HPHI2 * NZ_HPHI2)) % (NY_HPHI2 * NZ_HPHI2)) / NZ_HPHI2; - int k2 = ((idx % (NX_HPHI2 * NY_HPHI2 * NZ_HPHI2)) % (NY_HPHI2 * NZ_HPHI2)) % NZ_HPHI2; - - $REAL RA0, RB0, RE0, RF0, dEy, dEz; - $REAL dx = d; - int ii, jj, kk, materialHy, materialHz; - int nx = xf - xs; - int ny = yf - ys; - int nz = zf - zs; - - if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { - // Subscripts for field arrays - ii = i1 + xs; - jj = j1 + ys; - kk = k1 + zs; - - // PML coefficients - RA0 = HRA[INDEX2D_R(0,i1)] - 1; - RB0 = HRB[INDEX2D_R(0,i1)]; - RE0 = HRE[INDEX2D_R(0,i1)]; - RF0 = HRF[INDEX2D_R(0,i1)]; - - // Hy - materialHy = ID[INDEX4D_ID(4,ii,jj,kk)]; - dEz = (Ez[INDEX3D_FIELDS(ii+1,jj,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dx; - Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA0 * dEz + RB0 * HPhi1[INDEX4D_HPHI1(0,i1,j1,k1)]); - HPhi1[INDEX4D_HPHI1(0,i1,j1,k1)] = RE0 * HPhi1[INDEX4D_HPHI1(0,i1,j1,k1)] - RF0 * dEz; - } - - if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { - // Subscripts for field arrays - ii = i2 + xs; - jj = j2 + ys; - kk = k2 + zs; - - // PML coefficients - RA0 = HRA[INDEX2D_R(0,i2)] - 1; - RB0 = HRB[INDEX2D_R(0,i2)]; - RE0 = HRE[INDEX2D_R(0,i2)]; - RF0 = HRF[INDEX2D_R(0,i2)]; - - // Hz - materialHz = ID[INDEX4D_ID(5,ii,jj,kk)]; - dEy = (Ey[INDEX3D_FIELDS(ii+1,jj,kk)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dx; - Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA0 * dEy + RB0 * HPhi2[INDEX4D_HPHI2(0,i2,j2,k2)]); - HPhi2[INDEX4D_HPHI2(0,i2,j2,k2)] = RE0 * HPhi2[INDEX4D_HPHI2(0,i2,j2,k2)] - RF0 * dEy; - } -} - - -////////////////////////////////////////////////////////// -// Magnetic field PML updates - 1st order - yminus slab // -////////////////////////////////////////////////////////// - -__global__ void update_pml_1order_magnetic_yminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_HPHI1, int NY_HPHI1, int NZ_HPHI1, int NX_HPHI2, int NY_HPHI2, int NZ_HPHI2, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez, $REAL *Hx, const $REAL* __restrict__ Hy, $REAL *Hz, $REAL *HPhi1, $REAL *HPhi2, const $REAL* __restrict__ HRA, const $REAL* __restrict__ HRB, const $REAL* __restrict__ HRE, const $REAL* __restrict__ HRF, $REAL d) { - - // This function updates the Hx and Hz field components for the yminus slab. - // - // Args: - // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab - // NX_HPHI, NY_HPHI, NZ_HPHI: Dimensions of HPhi1 and HPhi2 PML arrays - // ID, E, H: Access to ID and field component arrays - // HPhi, HRA, HRB, HRE, HRF: Access to PML magnetic coefficient arrays - // d: Spatial discretisation, e.g. dx, dy or dz - - // Obtain the linear index corresponding to the current thread - int idx = blockIdx.x * blockDim.x + threadIdx.x; - - // Convert the linear index to subscripts for PML HPhi1 (4D) arrays - int p1 = idx / (NX_HPHI1 * NY_HPHI1 * NZ_HPHI1); - int i1 = (idx % (NX_HPHI1 * NY_HPHI1 * NZ_HPHI1)) / (NY_HPHI1 * NZ_HPHI1); - int j1 = ((idx % (NX_HPHI1 * NY_HPHI1 * NZ_HPHI1)) % (NY_HPHI1 * NZ_HPHI1)) / NZ_HPHI1; - int k1 = ((idx % (NX_HPHI1 * NY_HPHI1 * NZ_HPHI1)) % (NY_HPHI1 * NZ_HPHI1)) % NZ_HPHI1; - - // Convert the linear index to subscripts for PML HPhi2 (4D) arrays - int p2 = idx / (NX_HPHI2 * NY_HPHI2 * NZ_HPHI2); - int i2 = (idx % (NX_HPHI2 * NY_HPHI2 * NZ_HPHI2)) / (NY_HPHI2 * NZ_HPHI2); - int j2 = ((idx % (NX_HPHI2 * NY_HPHI2 * NZ_HPHI2)) % (NY_HPHI2 * NZ_HPHI2)) / NZ_HPHI2; - int k2 = ((idx % (NX_HPHI2 * NY_HPHI2 * NZ_HPHI2)) % (NY_HPHI2 * NZ_HPHI2)) % NZ_HPHI2; - - $REAL RA0, RB0, RE0, RF0, dEx, dEz; - $REAL dy = d; - int ii, jj, kk, materialHx, materialHz; - int nx = xf - xs; - int ny = yf - ys; - int nz = zf - zs; - - if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { - // Subscripts for field arrays - ii = i1 + xs; - jj = yf - (j1 + 1); - kk = k1 + zs; - - // PML coefficients - RA0 = HRA[INDEX2D_R(0,j1)] - 1; - RB0 = HRB[INDEX2D_R(0,j1)]; - RE0 = HRE[INDEX2D_R(0,j1)]; - RF0 = HRF[INDEX2D_R(0,j1)]; - - // Hx - materialHx = ID[INDEX4D_ID(3,ii,jj,kk)]; - dEz = (Ez[INDEX3D_FIELDS(ii,jj+1,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dy; - Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA0 * dEz + RB0 * HPhi1[INDEX4D_HPHI1(0,i1,j1,k1)]); - HPhi1[INDEX4D_HPHI1(0,i1,j1,k1)] = RE0 * HPhi1[INDEX4D_HPHI1(0,i1,j1,k1)] - RF0 * dEz; - } - - if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { - // Subscripts for field arrays - ii = i2 + xs; - jj = yf - (j2 + 1); - kk = k2 + zs; - - // PML coefficients - RA0 = HRA[INDEX2D_R(0,j2)] - 1; - RB0 = HRB[INDEX2D_R(0,j2)]; - RE0 = HRE[INDEX2D_R(0,j2)]; - RF0 = HRF[INDEX2D_R(0,j2)]; - - // Hz - materialHz = ID[INDEX4D_ID(5,ii,jj,kk)]; - dEx = (Ex[INDEX3D_FIELDS(ii,jj+1,kk)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dy; - Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA0 * dEx + RB0 * HPhi2[INDEX4D_HPHI2(0,i2,j2,k2)]); - HPhi2[INDEX4D_HPHI2(0,i2,j2,k2)] = RE0 * HPhi2[INDEX4D_HPHI2(0,i2,j2,k2)] - RF0 * dEx; - } -} - - -///////////////////////////////////////////////////////// -// Magnetic field PML updates - 1st order - yplus slab // -///////////////////////////////////////////////////////// - -__global__ void update_pml_1order_magnetic_yplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_HPHI1, int NY_HPHI1, int NZ_HPHI1, int NX_HPHI2, int NY_HPHI2, int NZ_HPHI2, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez, $REAL *Hx, const $REAL* __restrict__ Hy, $REAL *Hz, $REAL *HPhi1, $REAL *HPhi2, const $REAL* __restrict__ HRA, const $REAL* __restrict__ HRB, const $REAL* __restrict__ HRE, const $REAL* __restrict__ HRF, $REAL d) { - - // This function updates the Hx and Hz field components for the yplus slab. - // - // Args: - // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab - // NX_HPHI, NY_HPHI, NZ_HPHI: Dimensions of HPhi1 and HPhi2 PML arrays - // ID, E, H: Access to ID and field component arrays - // HPhi, HRA, HRB, HRE, HRF: Access to PML magnetic coefficient arrays - // d: Spatial discretisation, e.g. dx, dy or dz - - // Obtain the linear index corresponding to the current thread - int idx = blockIdx.x * blockDim.x + threadIdx.x; - - // Convert the linear index to subscripts for PML HPhi1 (4D) arrays - int p1 = idx / (NX_HPHI1 * NY_HPHI1 * NZ_HPHI1); - int i1 = (idx % (NX_HPHI1 * NY_HPHI1 * NZ_HPHI1)) / (NY_HPHI1 * NZ_HPHI1); - int j1 = ((idx % (NX_HPHI1 * NY_HPHI1 * NZ_HPHI1)) % (NY_HPHI1 * NZ_HPHI1)) / NZ_HPHI1; - int k1 = ((idx % (NX_HPHI1 * NY_HPHI1 * NZ_HPHI1)) % (NY_HPHI1 * NZ_HPHI1)) % NZ_HPHI1; - - // Convert the linear index to subscripts for PML HPhi2 (4D) arrays - int p2 = idx / (NX_HPHI2 * NY_HPHI2 * NZ_HPHI2); - int i2 = (idx % (NX_HPHI2 * NY_HPHI2 * NZ_HPHI2)) / (NY_HPHI2 * NZ_HPHI2); - int j2 = ((idx % (NX_HPHI2 * NY_HPHI2 * NZ_HPHI2)) % (NY_HPHI2 * NZ_HPHI2)) / NZ_HPHI2; - int k2 = ((idx % (NX_HPHI2 * NY_HPHI2 * NZ_HPHI2)) % (NY_HPHI2 * NZ_HPHI2)) % NZ_HPHI2; - - $REAL RA0, RB0, RE0, RF0, dEx, dEz; - $REAL dy = d; - int ii, jj, kk, materialHx, materialHz; - int nx = xf - xs; - int ny = yf - ys; - int nz = zf - zs; - - if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { - // Subscripts for field arrays - ii = i1 + xs; - jj = j1 + ys; - kk = k1 + zs; - - // PML coefficients - RA0 = HRA[INDEX2D_R(0,j1)] - 1; - RB0 = HRB[INDEX2D_R(0,j1)]; - RE0 = HRE[INDEX2D_R(0,j1)]; - RF0 = HRF[INDEX2D_R(0,j1)]; - - // Hx - materialHx = ID[INDEX4D_ID(3,ii,jj,kk)]; - dEz = (Ez[INDEX3D_FIELDS(ii,jj+1,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dy; - Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA0 * dEz + RB0 * HPhi1[INDEX4D_HPHI1(0,i1,j1,k1)]); - HPhi1[INDEX4D_HPHI1(0,i1,j1,k1)] = RE0 * HPhi1[INDEX4D_HPHI1(0,i1,j1,k1)] - RF0 * dEz; - } - - if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { - // Subscripts for field arrays - ii = i2 + xs; - jj = j2 + ys; - kk = k2 + zs; - - // PML coefficients - RA0 = HRA[INDEX2D_R(0,j2)] - 1; - RB0 = HRB[INDEX2D_R(0,j2)]; - RE0 = HRE[INDEX2D_R(0,j2)]; - RF0 = HRF[INDEX2D_R(0,j2)]; - - // Hz - materialHz = ID[INDEX4D_ID(5,ii,jj,kk)]; - dEx = (Ex[INDEX3D_FIELDS(ii,jj+1,kk)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dy; - Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA0 * dEx + RB0 * HPhi2[INDEX4D_HPHI2(0,i2,j2,k2)]); - HPhi2[INDEX4D_HPHI2(0,i2,j2,k2)] = RE0 * HPhi2[INDEX4D_HPHI2(0,i2,j2,k2)] - RF0 * dEx; - } -} - - -////////////////////////////////////////////////////////// -// Magnetic field PML updates - 1st order - zminus slab // -////////////////////////////////////////////////////////// - -__global__ void update_pml_1order_magnetic_zminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_HPHI1, int NY_HPHI1, int NZ_HPHI1, int NX_HPHI2, int NY_HPHI2, int NZ_HPHI2, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez, $REAL *Hx, $REAL *Hy, const $REAL* __restrict__ Hz, $REAL *HPhi1, $REAL *HPhi2, const $REAL* __restrict__ HRA, const $REAL* __restrict__ HRB, const $REAL* __restrict__ HRE, const $REAL* __restrict__ HRF, $REAL d) { - - // This function updates the Hx and Hy field components for the zminus slab. - // - // Args: - // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab - // NX_HPHI, NY_HPHI, NZ_HPHI: Dimensions of HPhi1 and HPhi2 PML arrays - // ID, E, H: Access to ID and field component arrays - // HPhi, HRA, HRB, HRE, HRF: Access to PML magnetic coefficient arrays - // d: Spatial discretisation, e.g. dx, dy or dz - - // Obtain the linear index corresponding to the current thread - int idx = blockIdx.x * blockDim.x + threadIdx.x; - - // Convert the linear index to subscripts for PML HPhi1 (4D) arrays - int p1 = idx / (NX_HPHI1 * NY_HPHI1 * NZ_HPHI1); - int i1 = (idx % (NX_HPHI1 * NY_HPHI1 * NZ_HPHI1)) / (NY_HPHI1 * NZ_HPHI1); - int j1 = ((idx % (NX_HPHI1 * NY_HPHI1 * NZ_HPHI1)) % (NY_HPHI1 * NZ_HPHI1)) / NZ_HPHI1; - int k1 = ((idx % (NX_HPHI1 * NY_HPHI1 * NZ_HPHI1)) % (NY_HPHI1 * NZ_HPHI1)) % NZ_HPHI1; - - // Convert the linear index to subscripts for PML HPhi2 (4D) arrays - int p2 = idx / (NX_HPHI2 * NY_HPHI2 * NZ_HPHI2); - int i2 = (idx % (NX_HPHI2 * NY_HPHI2 * NZ_HPHI2)) / (NY_HPHI2 * NZ_HPHI2); - int j2 = ((idx % (NX_HPHI2 * NY_HPHI2 * NZ_HPHI2)) % (NY_HPHI2 * NZ_HPHI2)) / NZ_HPHI2; - int k2 = ((idx % (NX_HPHI2 * NY_HPHI2 * NZ_HPHI2)) % (NY_HPHI2 * NZ_HPHI2)) % NZ_HPHI2; - - $REAL RA0, RB0, RE0, RF0, dEx, dEy; - $REAL dz = d; - int ii, jj, kk, materialHx, materialHy; - int nx = xf - xs; - int ny = yf - ys; - int nz = zf - zs; - - if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { - // Subscripts for field arrays - ii = i1 + xs; - jj = j1 + ys; - kk = zf - (k1 + 1); - - // PML coefficients - RA0 = HRA[INDEX2D_R(0,k1)] - 1; - RB0 = HRB[INDEX2D_R(0,k1)]; - RE0 = HRE[INDEX2D_R(0,k1)]; - RF0 = HRF[INDEX2D_R(0,k1)]; - - // Hx - materialHx = ID[INDEX4D_ID(3,ii,jj,kk)]; - dEy = (Ey[INDEX3D_FIELDS(ii,jj,kk+1)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dz; - Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA0 * dEy + RB0 * HPhi1[INDEX4D_HPHI1(0,i1,j1,k1)]); - HPhi1[INDEX4D_HPHI1(0,i1,j1,k1)] = RE0 * HPhi1[INDEX4D_HPHI1(0,i1,j1,k1)] - RF0 * dEy; - } - - if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { - // Subscripts for field arrays - ii = i2 + xs; - jj = j2 + ys; - kk = zf - (k2 + 1); - - // PML coefficients - RA0 = HRA[INDEX2D_R(0,k2)] - 1; - RB0 = HRB[INDEX2D_R(0,k2)]; - RE0 = HRE[INDEX2D_R(0,k2)]; - RF0 = HRF[INDEX2D_R(0,k2)]; - - // Hy - materialHy = ID[INDEX4D_ID(4,ii,jj,kk)]; - dEx = (Ex[INDEX3D_FIELDS(ii,jj,kk+1)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dz; - Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA0 * dEx + RB0 * HPhi2[INDEX4D_HPHI2(0,i2,j2,k2)]); - HPhi2[INDEX4D_HPHI2(0,i2,j2,k2)] = RE0 * HPhi2[INDEX4D_HPHI2(0,i2,j2,k2)] - RF0 * dEx; - } -} - - -///////////////////////////////////////////////////////// -// Magnetic field PML updates - 1st order - zplus slab // -///////////////////////////////////////////////////////// - -__global__ void update_pml_1order_magnetic_zplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_HPHI1, int NY_HPHI1, int NZ_HPHI1, int NX_HPHI2, int NY_HPHI2, int NZ_HPHI2, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez, $REAL *Hx, $REAL *Hy, const $REAL* __restrict__ Hz, $REAL *HPhi1, $REAL *HPhi2, const $REAL* __restrict__ HRA, const $REAL* __restrict__ HRB, const $REAL* __restrict__ HRE, const $REAL* __restrict__ HRF, $REAL d) { - - // This function updates the Hx and Hy field components for the zplus slab. - // - // Args: - // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab - // NX_HPHI, NY_HPHI, NZ_HPHI: Dimensions of HPhi1 and HPhi2 PML arrays - // ID, E, H: Access to ID and field component arrays - // HPhi, HRA, HRB, HRE, HRF: Access to PML magnetic coefficient arrays - // d: Spatial discretisation, e.g. dx, dy or dz - - // Obtain the linear index corresponding to the current thread - int idx = blockIdx.x * blockDim.x + threadIdx.x; - - // Convert the linear index to subscripts for PML HPhi1 (4D) arrays - int p1 = idx / (NX_HPHI1 * NY_HPHI1 * NZ_HPHI1); - int i1 = (idx % (NX_HPHI1 * NY_HPHI1 * NZ_HPHI1)) / (NY_HPHI1 * NZ_HPHI1); - int j1 = ((idx % (NX_HPHI1 * NY_HPHI1 * NZ_HPHI1)) % (NY_HPHI1 * NZ_HPHI1)) / NZ_HPHI1; - int k1 = ((idx % (NX_HPHI1 * NY_HPHI1 * NZ_HPHI1)) % (NY_HPHI1 * NZ_HPHI1)) % NZ_HPHI1; - - // Convert the linear index to subscripts for PML HPhi2 (4D) arrays - int p2 = idx / (NX_HPHI2 * NY_HPHI2 * NZ_HPHI2); - int i2 = (idx % (NX_HPHI2 * NY_HPHI2 * NZ_HPHI2)) / (NY_HPHI2 * NZ_HPHI2); - int j2 = ((idx % (NX_HPHI2 * NY_HPHI2 * NZ_HPHI2)) % (NY_HPHI2 * NZ_HPHI2)) / NZ_HPHI2; - int k2 = ((idx % (NX_HPHI2 * NY_HPHI2 * NZ_HPHI2)) % (NY_HPHI2 * NZ_HPHI2)) % NZ_HPHI2; - - $REAL RA0, RB0, RE0, RF0, dEx, dEy; - $REAL dz = d; - int ii, jj, kk, materialHx, materialHy; - int nx = xf - xs; - int ny = yf - ys; - int nz = zf - zs; - - if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { - // Subscripts for field arrays - ii = i1 + xs; - jj = j1 + ys; - kk = k1 + zs; - - // PML coefficients - RA0 = HRA[INDEX2D_R(0,k1)] - 1; - RB0 = HRB[INDEX2D_R(0,k1)]; - RE0 = HRE[INDEX2D_R(0,k1)]; - RF0 = HRF[INDEX2D_R(0,k1)]; - - // Hx - materialHx = ID[INDEX4D_ID(3,ii,jj,kk)]; - dEy = (Ey[INDEX3D_FIELDS(ii,jj,kk+1)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dz; - Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA0 * dEy + RB0 * HPhi1[INDEX4D_HPHI1(0,i1,j1,k1)]); - HPhi1[INDEX4D_HPHI1(0,i1,j1,k1)] = RE0 * HPhi1[INDEX4D_HPHI1(0,i1,j1,k1)] - RF0 * dEy; - } - - if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { - // Subscripts for field arrays - ii = i2 + xs; - jj = j2 + ys; - kk = k2 + zs; - - // PML coefficients - RA0 = HRA[INDEX2D_R(0,k2)] - 1; - RB0 = HRB[INDEX2D_R(0,k2)]; - RE0 = HRE[INDEX2D_R(0,k2)]; - RF0 = HRF[INDEX2D_R(0,k2)]; - - // Hy - materialHy = ID[INDEX4D_ID(4,ii,jj,kk)]; - dEx = (Ex[INDEX3D_FIELDS(ii,jj,kk+1)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dz; - Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA0 * dEx + RB0 * HPhi2[INDEX4D_HPHI2(0,i2,j2,k2)]); - HPhi2[INDEX4D_HPHI2(0,i2,j2,k2)] = RE0 * HPhi2[INDEX4D_HPHI2(0,i2,j2,k2)] - RF0 * dEx; - } -} - - -////////////////////////////////////////////////////////// -// Electric field PML updates - 2nd order - xminus slab // -////////////////////////////////////////////////////////// - -__global__ void update_pml_2order_electric_xminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_EPHI1, int NY_EPHI1, int NZ_EPHI1, int NX_EPHI2, int NY_EPHI2, int NZ_EPHI2, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, $REAL *Ey, $REAL *Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz, $REAL *EPhi1, $REAL *EPhi2, const $REAL* __restrict__ ERA, const $REAL* __restrict__ ERB, const $REAL* __restrict__ ERE, const $REAL* __restrict__ ERF, $REAL d) { - - // This function updates the Ey and Ez field components for the xminus slab. - // - // Args: - // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab - // NX_EPHI, NY_EPHI, NZ_EPHI: Dimensions of EPhi1 and EPhi2 PML arrays - // ID, E, H: Access to ID and field component arrays - // EPhi, ERA, ERB, ERE, ERF: Access to PML electric coefficient arrays - // d: Spatial discretisation, e.g. dx, dy or dz - - // Obtain the linear index corresponding to the current thread - int idx = blockIdx.x * blockDim.x + threadIdx.x; - - // Convert the linear index to subscripts for PML EPhi1 (4D) arrays - int p1 = idx / (NX_EPHI1 * NY_EPHI1 * NZ_EPHI1); - int i1 = (idx % (NX_EPHI1 * NY_EPHI1 * NZ_EPHI1)) / (NY_EPHI1 * NZ_EPHI1); - int j1 = ((idx % (NX_EPHI1 * NY_EPHI1 * NZ_EPHI1)) % (NY_EPHI1 * NZ_EPHI1)) / NZ_EPHI1; - int k1 = ((idx % (NX_EPHI1 * NY_EPHI1 * NZ_EPHI1)) % (NY_EPHI1 * NZ_EPHI1)) % NZ_EPHI1; - - // Convert the linear index to subscripts for PML EPhi2 (4D) arrays - int p2 = idx / (NX_EPHI2 * NY_EPHI2 * NZ_EPHI2); - int i2 = (idx % (NX_EPHI2 * NY_EPHI2 * NZ_EPHI2)) / (NY_EPHI2 * NZ_EPHI2); - int j2 = ((idx % (NX_EPHI2 * NY_EPHI2 * NZ_EPHI2)) % (NY_EPHI2 * NZ_EPHI2)) / NZ_EPHI2; - int k2 = ((idx % (NX_EPHI2 * NY_EPHI2 * NZ_EPHI2)) % (NY_EPHI2 * NZ_EPHI2)) % NZ_EPHI2; - - $REAL RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dHy, dHz; - $REAL dx = d; - int ii, jj, kk, materialEy, materialEz; - int nx = xf - xs; - int ny = yf - ys; - int nz = zf - zs; - - if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { - // Subscripts for field arrays - ii = xf - i1; - jj = j1 + ys; - kk = k1 + zs; - - // PML coefficients - RA0 = ERA[INDEX2D_R(0,i1)]; - RB0 = ERB[INDEX2D_R(0,i1)]; - RE0 = ERE[INDEX2D_R(0,i1)]; - RF0 = ERF[INDEX2D_R(0,i1)]; - RA1 = ERA[INDEX2D_R(1,i1)]; - RB1 = ERB[INDEX2D_R(1,i1)]; - RE1 = ERE[INDEX2D_R(1,i1)]; - RF1 = ERF[INDEX2D_R(1,i1)]; - RA01 = ERA[INDEX2D_R(0,i1)] * ERA[INDEX2D_R(1,i1)] - 1; - - // Ey - materialEy = ID[INDEX4D_ID(1,ii,jj,kk)]; - dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx; - Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA01 * dHz + RA1 * RB0 * EPhi1[INDEX4D_EPHI1(0,i1,j1,k1)] + RB1 * EPhi1[INDEX4D_EPHI1(1,i1,j1,k1)]); - EPhi1[INDEX4D_EPHI1(1,i1,j1,k1)] = RE1 * EPhi1[INDEX4D_EPHI1(1,i1,j1,k1)] - RF1 * (RA0 * dHz + RB0 * EPhi1[INDEX4D_EPHI1(0,i1,j1,k1)]); - EPhi1[INDEX4D_EPHI1(0,i1,j1,k1)] = RE0 * EPhi1[INDEX4D_EPHI1(0,i1,j1,k1)] - RF0 * dHz; - } - - if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { - // Subscripts for field arrays - ii = xf - i2; - jj = j2 + ys; - kk = k2 + zs; - - // PML coefficients - RA0 = ERA[INDEX2D_R(0,i2)]; - RB0 = ERB[INDEX2D_R(0,i2)]; - RE0 = ERE[INDEX2D_R(0,i2)]; - RF0 = ERF[INDEX2D_R(0,i2)]; - RA1 = ERA[INDEX2D_R(1,i2)]; - RB1 = ERB[INDEX2D_R(1,i2)]; - RE1 = ERE[INDEX2D_R(1,i2)]; - RF1 = ERF[INDEX2D_R(1,i2)]; - RA01 = ERA[INDEX2D_R(0,i2)] * ERA[INDEX2D_R(1,i2)] - 1; - - // Ez - materialEz = ID[INDEX4D_ID(2,ii,jj,kk)]; - dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx; - Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA01 * dHy + RA1 * RB0 * EPhi2[INDEX4D_EPHI2(0,i2,j2,k2)] + RB1 * EPhi2[INDEX4D_EPHI2(1,i2,j2,k2)]); - EPhi2[INDEX4D_EPHI2(1,i2,j2,k2)] = RE1 * EPhi2[INDEX4D_EPHI2(1,i2,j2,k2)] - RF1 * (RA0 * dHy + RB0 * EPhi2[INDEX4D_EPHI2(0,i2,j2,k2)]); - EPhi2[INDEX4D_EPHI2(0,i2,j2,k2)] = RE0 * EPhi2[INDEX4D_EPHI2(0,i2,j2,k2)] - RF0 * dHy; - } -} - - -///////////////////////////////////////////////////////// -// Electric field PML updates - 2nd order - xplus slab // -///////////////////////////////////////////////////////// - -__global__ void update_pml_2order_electric_xplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_EPHI1, int NY_EPHI1, int NZ_EPHI1, int NX_EPHI2, int NY_EPHI2, int NZ_EPHI2, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, $REAL *Ey, $REAL *Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz, $REAL *EPhi1, $REAL *EPhi2, const $REAL* __restrict__ ERA, const $REAL* __restrict__ ERB, const $REAL* __restrict__ ERE, const $REAL* __restrict__ ERF, $REAL d) { - - // This function updates the Ey and Ez field components for the xplus slab. - // - // Args: - // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab - // NX_EPHI, NY_EPHI, NZ_EPHI: Dimensions of EPhi1 and EPhi2 PML arrays - // ID, E, H: Access to ID and field component arrays - // EPhi, ERA, ERB, ERE, ERF: Access to PML electric coefficient arrays - // d: Spatial discretisation, e.g. dx, dy or dz - - // Obtain the linear index corresponding to the current thread - int idx = blockIdx.x * blockDim.x + threadIdx.x; - - // Convert the linear index to subscripts for PML EPhi1 (4D) arrays - int p1 = idx / (NX_EPHI1 * NY_EPHI1 * NZ_EPHI1); - int i1 = (idx % (NX_EPHI1 * NY_EPHI1 * NZ_EPHI1)) / (NY_EPHI1 * NZ_EPHI1); - int j1 = ((idx % (NX_EPHI1 * NY_EPHI1 * NZ_EPHI1)) % (NY_EPHI1 * NZ_EPHI1)) / NZ_EPHI1; - int k1 = ((idx % (NX_EPHI1 * NY_EPHI1 * NZ_EPHI1)) % (NY_EPHI1 * NZ_EPHI1)) % NZ_EPHI1; - - // Convert the linear index to subscripts for PML EPhi2 (4D) arrays - int p2 = idx / (NX_EPHI2 * NY_EPHI2 * NZ_EPHI2); - int i2 = (idx % (NX_EPHI2 * NY_EPHI2 * NZ_EPHI2)) / (NY_EPHI2 * NZ_EPHI2); - int j2 = ((idx % (NX_EPHI2 * NY_EPHI2 * NZ_EPHI2)) % (NY_EPHI2 * NZ_EPHI2)) / NZ_EPHI2; - int k2 = ((idx % (NX_EPHI2 * NY_EPHI2 * NZ_EPHI2)) % (NY_EPHI2 * NZ_EPHI2)) % NZ_EPHI2; - - $REAL RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dHy, dHz; - $REAL dx = d; - int ii, jj, kk, materialEy, materialEz; - int nx = xf - xs; - int ny = yf - ys; - int nz = zf - zs; - - if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { - // Subscripts for field arrays - ii = i1 + xs; - jj = j1 + ys; - kk = k1 + zs; - - // PML coefficients - RA0 = ERA[INDEX2D_R(0,i1)]; - RB0 = ERB[INDEX2D_R(0,i1)]; - RE0 = ERE[INDEX2D_R(0,i1)]; - RF0 = ERF[INDEX2D_R(0,i1)]; - RA1 = ERA[INDEX2D_R(1,i1)]; - RB1 = ERB[INDEX2D_R(1,i1)]; - RE1 = ERE[INDEX2D_R(1,i1)]; - RF1 = ERF[INDEX2D_R(1,i1)]; - RA01 = ERA[INDEX2D_R(0,i1)] * ERA[INDEX2D_R(1,i1)] - 1; - - // Ey - materialEy = ID[INDEX4D_ID(1,ii,jj,kk)]; - dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx; - Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA01 * dHz + RA1 * RB0 * EPhi1[INDEX4D_EPHI1(0,i1,j1,k1)] + RB1 * EPhi1[INDEX4D_EPHI1(1,i1,j1,k1)]); - EPhi1[INDEX4D_EPHI1(1,i1,j1,k1)] = RE1 * EPhi1[INDEX4D_EPHI1(1,i1,j1,k1)] - RF1 * (RA0 * dHz + RB0 * EPhi1[INDEX4D_EPHI1(0,i1,j1,k1)]); - EPhi1[INDEX4D_EPHI1(0,i1,j1,k1)] = RE0 * EPhi1[INDEX4D_EPHI1(0,i1,j1,k1)] - RF0 * dHz; - } - - if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { - // Subscripts for field arrays - ii = i2 + xs; - jj = j2 + ys; - kk = k2 + zs; - - // PML coefficients - RA0 = ERA[INDEX2D_R(0,i2)]; - RB0 = ERB[INDEX2D_R(0,i2)]; - RE0 = ERE[INDEX2D_R(0,i2)]; - RF0 = ERF[INDEX2D_R(0,i2)]; - RA1 = ERA[INDEX2D_R(1,i2)]; - RB1 = ERB[INDEX2D_R(1,i2)]; - RE1 = ERE[INDEX2D_R(1,i2)]; - RF1 = ERF[INDEX2D_R(1,i2)]; - RA01 = ERA[INDEX2D_R(0,i2)] * ERA[INDEX2D_R(1,i2)] - 1; - - // Ez - materialEz = ID[INDEX4D_ID(2,ii,jj,kk)]; - dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx; - Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA01 * dHy + RA1 * RB0 * EPhi2[INDEX4D_EPHI2(0,i2,j2,k2)] + RB1 * EPhi2[INDEX4D_EPHI2(1,i2,j2,k2)]); - EPhi2[INDEX4D_EPHI2(1,i2,j2,k2)] = RE1 * EPhi2[INDEX4D_EPHI2(1,i2,j2,k2)] - RF1 * (RA0 * dHy + RB0 * EPhi2[INDEX4D_EPHI2(0,i2,j2,k2)]); - EPhi2[INDEX4D_EPHI2(0,i2,j2,k2)] = RE0 * EPhi2[INDEX4D_EPHI2(0,i2,j2,k2)] - RF0 * dHy; - } -} - - -////////////////////////////////////////////////////////// -// Electric field PML updates - 2nd order - yminus slab // -////////////////////////////////////////////////////////// - -__global__ void update_pml_2order_electric_yminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_EPHI1, int NY_EPHI1, int NZ_EPHI1, int NX_EPHI2, int NY_EPHI2, int NZ_EPHI2, const unsigned int* __restrict__ ID, $REAL *Ex, const $REAL* __restrict__ Ey, $REAL *Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz, $REAL *EPhi1, $REAL *EPhi2, const $REAL* __restrict__ ERA, const $REAL* __restrict__ ERB, const $REAL* __restrict__ ERE, const $REAL* __restrict__ ERF, $REAL d) { - - // This function updates the Ex and Ez field components for the yminus slab. - // - // Args: - // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab - // NX_EPHI, NY_EPHI, NZ_EPHI: Dimensions of EPhi1 and EPhi2 PML arrays - // ID, E, H: Access to ID and field component arrays - // EPhi, ERA, ERB, ERE, ERF: Access to PML electric coefficient arrays - // d: Spatial discretisation, e.g. dx, dy or dz - - // Obtain the linear index corresponding to the current thread - int idx = blockIdx.x * blockDim.x + threadIdx.x; - - // Convert the linear index to subscripts for PML EPhi1 (4D) arrays - int p1 = idx / (NX_EPHI1 * NY_EPHI1 * NZ_EPHI1); - int i1 = (idx % (NX_EPHI1 * NY_EPHI1 * NZ_EPHI1)) / (NY_EPHI1 * NZ_EPHI1); - int j1 = ((idx % (NX_EPHI1 * NY_EPHI1 * NZ_EPHI1)) % (NY_EPHI1 * NZ_EPHI1)) / NZ_EPHI1; - int k1 = ((idx % (NX_EPHI1 * NY_EPHI1 * NZ_EPHI1)) % (NY_EPHI1 * NZ_EPHI1)) % NZ_EPHI1; - - // Convert the linear index to subscripts for PML EPhi2 (4D) arrays - int p2 = idx / (NX_EPHI2 * NY_EPHI2 * NZ_EPHI2); - int i2 = (idx % (NX_EPHI2 * NY_EPHI2 * NZ_EPHI2)) / (NY_EPHI2 * NZ_EPHI2); - int j2 = ((idx % (NX_EPHI2 * NY_EPHI2 * NZ_EPHI2)) % (NY_EPHI2 * NZ_EPHI2)) / NZ_EPHI2; - int k2 = ((idx % (NX_EPHI2 * NY_EPHI2 * NZ_EPHI2)) % (NY_EPHI2 * NZ_EPHI2)) % NZ_EPHI2; - - $REAL RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dHx, dHz; - $REAL dy = d; - int ii, jj, kk, materialEx, materialEz; - int nx = xf - xs; - int ny = yf - ys; - int nz = zf - zs; - - if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { - // Subscripts for field arrays - ii = i1 + xs; - jj = yf - j1; - kk = k1 + zs; - - // PML coefficients - RA0 = ERA[INDEX2D_R(0,j1)]; - RB0 = ERB[INDEX2D_R(0,j1)]; - RE0 = ERE[INDEX2D_R(0,j1)]; - RF0 = ERF[INDEX2D_R(0,j1)]; - RA1 = ERA[INDEX2D_R(1,j1)]; - RB1 = ERB[INDEX2D_R(1,j1)]; - RE1 = ERE[INDEX2D_R(1,j1)]; - RF1 = ERF[INDEX2D_R(1,j1)]; - RA01 = ERA[INDEX2D_R(0,j1)] * ERA[INDEX2D_R(1,j1)] - 1; - - // Ex - materialEx = ID[INDEX4D_ID(0,ii,jj,kk)]; - dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy; - Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA01 * dHz + RA1 * RB0 * EPhi1[INDEX4D_EPHI1(0,i1,j1,k1)] + RB1 * EPhi1[INDEX4D_EPHI1(1,i1,j1,k1)]); - EPhi1[INDEX4D_EPHI1(1,i1,j1,k1)] = RE1 * EPhi1[INDEX4D_EPHI1(1,i1,j1,k1)] - RF1 * (RA0 * dHz + RB0 * EPhi1[INDEX4D_EPHI1(0,i1,j1,k1)]); - EPhi1[INDEX4D_EPHI1(0,i1,j1,k1)] = RE0 * EPhi1[INDEX4D_EPHI1(0,i1,j1,k1)] - RF0 * dHz; - } - - if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { - // Subscripts for field arrays - ii = i2 + xs; - jj = yf - j2; - kk = k2 + zs; - - // PML coefficients - RA0 = ERA[INDEX2D_R(0,j2)]; - RB0 = ERB[INDEX2D_R(0,j2)]; - RE0 = ERE[INDEX2D_R(0,j2)]; - RF0 = ERF[INDEX2D_R(0,j2)]; - RA1 = ERA[INDEX2D_R(1,j2)]; - RB1 = ERB[INDEX2D_R(1,j2)]; - RE1 = ERE[INDEX2D_R(1,j2)]; - RF1 = ERF[INDEX2D_R(1,j2)]; - RA01 = ERA[INDEX2D_R(0,j2)] * ERA[INDEX2D_R(1,j2)] - 1; - - // Ez - materialEz = ID[INDEX4D_ID(2,ii,jj,kk)]; - dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy; - Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA01 * dHx + RA1 * RB0 * EPhi2[INDEX4D_EPHI2(0,i2,j2,k2)] + RB1 * EPhi2[INDEX4D_EPHI2(1,i2,j2,k2)]); - EPhi2[INDEX4D_EPHI2(1,i2,j2,k2)] = RE1 * EPhi2[INDEX4D_EPHI2(1,i2,j2,k2)] - RF1 * (RA0 * dHx + RB0 * EPhi2[INDEX4D_EPHI2(0,i2,j2,k2)]); - EPhi2[INDEX4D_EPHI2(0,i2,j2,k2)] = RE0 * EPhi2[INDEX4D_EPHI2(0,i2,j2,k2)] - RF0 * dHx; - } -} - - -///////////////////////////////////////////////////////// -// Electric field PML updates - 2nd order - yplus slab // -///////////////////////////////////////////////////////// - -__global__ void update_pml_2order_electric_yplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_EPHI1, int NY_EPHI1, int NZ_EPHI1, int NX_EPHI2, int NY_EPHI2, int NZ_EPHI2, const unsigned int* __restrict__ ID, $REAL *Ex, const $REAL* __restrict__ Ey, $REAL *Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz, $REAL *EPhi1, $REAL *EPhi2, const $REAL* __restrict__ ERA, const $REAL* __restrict__ ERB, const $REAL* __restrict__ ERE, const $REAL* __restrict__ ERF, $REAL d) { - - // This function updates the Ex and Ez field components for the yplus slab. - // - // Args: - // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab - // NX_EPHI, NY_EPHI, NZ_EPHI: Dimensions of EPhi1 and EPhi2 PML arrays - // ID, E, H: Access to ID and field component arrays - // EPhi, ERA, ERB, ERE, ERF: Access to PML electric coefficient arrays - // d: Spatial discretisation, e.g. dx, dy or dz - - // Obtain the linear index corresponding to the current thread - int idx = blockIdx.x * blockDim.x + threadIdx.x; - - // Convert the linear index to subscripts for PML EPhi1 (4D) arrays - int p1 = idx / (NX_EPHI1 * NY_EPHI1 * NZ_EPHI1); - int i1 = (idx % (NX_EPHI1 * NY_EPHI1 * NZ_EPHI1)) / (NY_EPHI1 * NZ_EPHI1); - int j1 = ((idx % (NX_EPHI1 * NY_EPHI1 * NZ_EPHI1)) % (NY_EPHI1 * NZ_EPHI1)) / NZ_EPHI1; - int k1 = ((idx % (NX_EPHI1 * NY_EPHI1 * NZ_EPHI1)) % (NY_EPHI1 * NZ_EPHI1)) % NZ_EPHI1; - - // Convert the linear index to subscripts for PML EPhi2 (4D) arrays - int p2 = idx / (NX_EPHI2 * NY_EPHI2 * NZ_EPHI2); - int i2 = (idx % (NX_EPHI2 * NY_EPHI2 * NZ_EPHI2)) / (NY_EPHI2 * NZ_EPHI2); - int j2 = ((idx % (NX_EPHI2 * NY_EPHI2 * NZ_EPHI2)) % (NY_EPHI2 * NZ_EPHI2)) / NZ_EPHI2; - int k2 = ((idx % (NX_EPHI2 * NY_EPHI2 * NZ_EPHI2)) % (NY_EPHI2 * NZ_EPHI2)) % NZ_EPHI2; - - $REAL RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dHx, dHz; - $REAL dy = d; - int ii, jj, kk, materialEx, materialEz; - int nx = xf - xs; - int ny = yf - ys; - int nz = zf - zs; - - if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { - // Subscripts for field arrays - ii = i1 + xs; - jj = j1 + ys; - kk = k1 + zs; - - // PML coefficients - RA0 = ERA[INDEX2D_R(0,j1)]; - RB0 = ERB[INDEX2D_R(0,j1)]; - RE0 = ERE[INDEX2D_R(0,j1)]; - RF0 = ERF[INDEX2D_R(0,j1)]; - RA1 = ERA[INDEX2D_R(1,j1)]; - RB1 = ERB[INDEX2D_R(1,j1)]; - RE1 = ERE[INDEX2D_R(1,j1)]; - RF1 = ERF[INDEX2D_R(1,j1)]; - RA01 = ERA[INDEX2D_R(0,j1)] * ERA[INDEX2D_R(1,j1)] - 1; - - // Ex - materialEx = ID[INDEX4D_ID(0,ii,jj,kk)]; - dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy; - Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA01 * dHz + RA1 * RB0 * EPhi1[INDEX4D_EPHI1(0,i1,j1,k1)] + RB1 * EPhi1[INDEX4D_EPHI1(1,i1,j1,k1)]); - EPhi1[INDEX4D_EPHI1(1,i1,j1,k1)] = RE1 * EPhi1[INDEX4D_EPHI1(1,i1,j1,k1)] - RF1 * (RA0 * dHz + RB0 * EPhi1[INDEX4D_EPHI1(0,i1,j1,k1)]); - EPhi1[INDEX4D_EPHI1(0,i1,j1,k1)] = RE0 * EPhi1[INDEX4D_EPHI1(0,i1,j1,k1)] - RF0 * dHz; - } - - if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { - // Subscripts for field arrays - ii = i2 + xs; - jj = j2 + ys; - kk = k2 + zs; - - // PML coefficients - RA0 = ERA[INDEX2D_R(0,j2)]; - RB0 = ERB[INDEX2D_R(0,j2)]; - RE0 = ERE[INDEX2D_R(0,j2)]; - RF0 = ERF[INDEX2D_R(0,j2)]; - RA1 = ERA[INDEX2D_R(1,j2)]; - RB1 = ERB[INDEX2D_R(1,j2)]; - RE1 = ERE[INDEX2D_R(1,j2)]; - RF1 = ERF[INDEX2D_R(1,j2)]; - RA01 = ERA[INDEX2D_R(0,j2)] * ERA[INDEX2D_R(1,j2)] - 1; - - // Ez - materialEz = ID[INDEX4D_ID(2,ii,jj,kk)]; - dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy; - Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA01 * dHx + RA1 * RB0 * EPhi2[INDEX4D_EPHI2(0,i2,j2,k2)] + RB1 * EPhi2[INDEX4D_EPHI2(1,i2,j2,k2)]); - EPhi2[INDEX4D_EPHI2(1,i2,j2,k2)] = RE1 * EPhi2[INDEX4D_EPHI2(1,i2,j2,k2)] - RF1 * (RA0 * dHx + RB0 * EPhi2[INDEX4D_EPHI2(0,i2,j2,k2)]); - EPhi2[INDEX4D_EPHI2(0,i2,j2,k2)] = RE0 * EPhi2[INDEX4D_EPHI2(0,i2,j2,k2)] - RF0 * dHx; - } -} - - -////////////////////////////////////////////////////////// -// Electric field PML updates - 2nd order - zminus slab // -////////////////////////////////////////////////////////// - -__global__ void update_pml_2order_electric_zminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_EPHI1, int NY_EPHI1, int NZ_EPHI1, int NX_EPHI2, int NY_EPHI2, int NZ_EPHI2, const unsigned int* __restrict__ ID, $REAL *Ex, $REAL *Ey, const $REAL* __restrict__ Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz, $REAL *EPhi1, $REAL *EPhi2, const $REAL* __restrict__ ERA, const $REAL* __restrict__ ERB, const $REAL* __restrict__ ERE, const $REAL* __restrict__ ERF, $REAL d) { - - // This function updates the Ex and Ey field components for the zminus slab. - // - // Args: - // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab - // NX_EPHI, NY_EPHI, NZ_EPHI: Dimensions of EPhi1 and EPhi2 PML arrays - // ID, E, H: Access to ID and field component arrays - // EPhi, ERA, ERB, ERE, ERF: Access to PML electric coefficient arrays - // d: Spatial discretisation, e.g. dx, dy or dz - - // Obtain the linear index corresponding to the current thread - int idx = blockIdx.x * blockDim.x + threadIdx.x; - - // Convert the linear index to subscripts for PML EPhi1 (4D) arrays - int p1 = idx / (NX_EPHI1 * NY_EPHI1 * NZ_EPHI1); - int i1 = (idx % (NX_EPHI1 * NY_EPHI1 * NZ_EPHI1)) / (NY_EPHI1 * NZ_EPHI1); - int j1 = ((idx % (NX_EPHI1 * NY_EPHI1 * NZ_EPHI1)) % (NY_EPHI1 * NZ_EPHI1)) / NZ_EPHI1; - int k1 = ((idx % (NX_EPHI1 * NY_EPHI1 * NZ_EPHI1)) % (NY_EPHI1 * NZ_EPHI1)) % NZ_EPHI1; - - // Convert the linear index to subscripts for PML EPhi2 (4D) arrays - int p2 = idx / (NX_EPHI2 * NY_EPHI2 * NZ_EPHI2); - int i2 = (idx % (NX_EPHI2 * NY_EPHI2 * NZ_EPHI2)) / (NY_EPHI2 * NZ_EPHI2); - int j2 = ((idx % (NX_EPHI2 * NY_EPHI2 * NZ_EPHI2)) % (NY_EPHI2 * NZ_EPHI2)) / NZ_EPHI2; - int k2 = ((idx % (NX_EPHI2 * NY_EPHI2 * NZ_EPHI2)) % (NY_EPHI2 * NZ_EPHI2)) % NZ_EPHI2; - - $REAL RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dHx, dHy; - $REAL dz = d; - int ii, jj, kk, materialEx, materialEy; - int nx = xf - xs; - int ny = yf - ys; - int nz = zf - zs; - - if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { - // Subscripts for field arrays - ii = i1 + xs; - jj = j1 + ys; - kk = zf - k1; - - // PML coefficients - RA0 = ERA[INDEX2D_R(0,k1)]; - RB0 = ERB[INDEX2D_R(0,k1)]; - RE0 = ERE[INDEX2D_R(0,k1)]; - RF0 = ERF[INDEX2D_R(0,k1)]; - RA1 = ERA[INDEX2D_R(1,k1)]; - RB1 = ERB[INDEX2D_R(1,k1)]; - RE1 = ERE[INDEX2D_R(1,k1)]; - RF1 = ERF[INDEX2D_R(1,k1)]; - RA01 = ERA[INDEX2D_R(0,k1)] * ERA[INDEX2D_R(1,k1)] - 1; - - // Ex - materialEx = ID[INDEX4D_ID(0,ii,jj,kk)]; - dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz; - Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA01 * dHy + RA1 * RB0 * EPhi1[INDEX4D_EPHI1(0,i1,j1,k1)] + RB1 * EPhi1[INDEX4D_EPHI1(1,i1,j1,k1)]); - EPhi1[INDEX4D_EPHI1(1,i1,j1,k1)] = RE1 * EPhi1[INDEX4D_EPHI1(1,i1,j1,k1)] - RF1 * (RA0 * dHy + RB0 * EPhi1[INDEX4D_EPHI1(0,i1,j1,k1)]); - EPhi1[INDEX4D_EPHI1(0,i1,j1,k1)] = RE0 * EPhi1[INDEX4D_EPHI1(0,i1,j1,k1)] - RF0 * dHy; - } - - if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { - // Subscripts for field arrays - ii = i2 + xs; - jj = j2 + xs; - kk = zf - k2; - - // PML coefficients - RA0 = ERA[INDEX2D_R(0,k2)]; - RB0 = ERB[INDEX2D_R(0,k2)]; - RE0 = ERE[INDEX2D_R(0,k2)]; - RF0 = ERF[INDEX2D_R(0,k2)]; - RA1 = ERA[INDEX2D_R(1,k2)]; - RB1 = ERB[INDEX2D_R(1,k2)]; - RE1 = ERE[INDEX2D_R(1,k2)]; - RF1 = ERF[INDEX2D_R(1,k2)]; - RA01 = ERA[INDEX2D_R(0,k2)] * ERA[INDEX2D_R(1,k2)] - 1; - - // Ey - materialEy = ID[INDEX4D_ID(1,ii,jj,kk)]; - dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz; - Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA01 * dHx + RA1 * RB0 * EPhi2[INDEX4D_EPHI2(0,i2,j2,k2)] + RB1 * EPhi2[INDEX4D_EPHI2(1,i2,j2,k2)]); - EPhi2[INDEX4D_EPHI2(1,i2,j2,k2)] = RE1 * EPhi2[INDEX4D_EPHI2(1,i2,j2,k2)] - RF1 * (RA0 * dHx + RB0 * EPhi2[INDEX4D_EPHI2(0,i2,j2,k2)]); - EPhi2[INDEX4D_EPHI2(0,i2,j2,k2)] = RE0 * EPhi2[INDEX4D_EPHI2(0,i2,j2,k2)] - RF0 * dHx; - } -} - - -///////////////////////////////////////////////////////// -// Electric field PML updates - 2nd order - zplus slab // -///////////////////////////////////////////////////////// - -__global__ void update_pml_2order_electric_zplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_EPHI1, int NY_EPHI1, int NZ_EPHI1, int NX_EPHI2, int NY_EPHI2, int NZ_EPHI2, const unsigned int* __restrict__ ID, $REAL *Ex, $REAL *Ey, const $REAL* __restrict__ Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz, $REAL *EPhi1, $REAL *EPhi2, const $REAL* __restrict__ ERA, const $REAL* __restrict__ ERB, const $REAL* __restrict__ ERE, const $REAL* __restrict__ ERF, $REAL d) { - - // This function updates the Ex and Ey field components for the zplus slab. - // - // Args: - // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab - // NX_EPHI, NY_EPHI, NZ_EPHI: Dimensions of EPhi1 and EPhi2 PML arrays - // updatecoeffs, ID, E, H: Access to ID and field component arrays - // EPhi, ERA, ERB, ERE, ERF: Access to PML electric coefficient arrays - // d: Spatial discretisation, e.g. dx, dy or dz - - // Obtain the linear index corresponding to the current thread - int idx = blockIdx.x * blockDim.x + threadIdx.x; - - // Convert the linear index to subscripts for PML EPhi1 (4D) arrays - int p1 = idx / (NX_EPHI1 * NY_EPHI1 * NZ_EPHI1); - int i1 = (idx % (NX_EPHI1 * NY_EPHI1 * NZ_EPHI1)) / (NY_EPHI1 * NZ_EPHI1); - int j1 = ((idx % (NX_EPHI1 * NY_EPHI1 * NZ_EPHI1)) % (NY_EPHI1 * NZ_EPHI1)) / NZ_EPHI1; - int k1 = ((idx % (NX_EPHI1 * NY_EPHI1 * NZ_EPHI1)) % (NY_EPHI1 * NZ_EPHI1)) % NZ_EPHI1; - - // Convert the linear index to subscripts for PML EPhi2 (4D) arrays - int p2 = idx / (NX_EPHI2 * NY_EPHI2 * NZ_EPHI2); - int i2 = (idx % (NX_EPHI2 * NY_EPHI2 * NZ_EPHI2)) / (NY_EPHI2 * NZ_EPHI2); - int j2 = ((idx % (NX_EPHI2 * NY_EPHI2 * NZ_EPHI2)) % (NY_EPHI2 * NZ_EPHI2)) / NZ_EPHI2; - int k2 = ((idx % (NX_EPHI2 * NY_EPHI2 * NZ_EPHI2)) % (NY_EPHI2 * NZ_EPHI2)) % NZ_EPHI2; - - $REAL RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dHx, dHy; - $REAL dz = d; - int ii, jj, kk, materialEx, materialEy; - int nx = xf - xs; - int ny = yf - ys; - int nz = zf - zs; - - if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { - // Subscripts for field arrays - ii = i1 + xs; - jj = j1 + ys; - kk = k1 + zs; - - // PML coefficients - RA0 = ERA[INDEX2D_R(0,k1)]; - RB0 = ERB[INDEX2D_R(0,k1)]; - RE0 = ERE[INDEX2D_R(0,k1)]; - RF0 = ERF[INDEX2D_R(0,k1)]; - RA1 = ERA[INDEX2D_R(1,k1)]; - RB1 = ERB[INDEX2D_R(1,k1)]; - RE1 = ERE[INDEX2D_R(1,k1)]; - RF1 = ERF[INDEX2D_R(1,k1)]; - RA01 = ERA[INDEX2D_R(0,k1)] * ERA[INDEX2D_R(1,k1)] - 1; - - // Ex - materialEx = ID[INDEX4D_ID(0,ii,jj,kk)]; - dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz; - Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA01 * dHy + RA1 * RB0 * EPhi1[INDEX4D_EPHI1(0,i1,j1,k1)] + RB1 * EPhi1[INDEX4D_EPHI1(1,i1,j1,k1)]); - EPhi1[INDEX4D_EPHI1(1,i1,j1,k1)] = RE1 * EPhi1[INDEX4D_EPHI1(1,i1,j1,k1)] - RF1 * (RA0 * dHy + RB0 * EPhi1[INDEX4D_EPHI1(0,i1,j1,k1)]); - EPhi1[INDEX4D_EPHI1(0,i1,j1,k1)] = RE0 * EPhi1[INDEX4D_EPHI1(0,i1,j1,k1)] - RF0 * dHy; - } - - if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { - // Subscripts for field arrays - ii = i2 + xs; - jj = j2 + ys; - kk = k2 + zs; - - // PML coefficients - RA0 = ERA[INDEX2D_R(0,k2)]; - RB0 = ERB[INDEX2D_R(0,k2)]; - RE0 = ERE[INDEX2D_R(0,k2)]; - RF0 = ERF[INDEX2D_R(0,k2)]; - RA1 = ERA[INDEX2D_R(1,k2)]; - RB1 = ERB[INDEX2D_R(1,k2)]; - RE1 = ERE[INDEX2D_R(1,k2)]; - RF1 = ERF[INDEX2D_R(1,k2)]; - RA01 = ERA[INDEX2D_R(0,k2)] * ERA[INDEX2D_R(1,k2)] - 1; - - // Ey - materialEy = ID[INDEX4D_ID(1,ii,jj,kk)]; - dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz; - Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA01 * dHx + RA1 * RB0 * EPhi2[INDEX4D_EPHI2(0,i2,j2,k2)] + RB1 * EPhi2[INDEX4D_EPHI2(1,i2,j2,k2)]); - EPhi2[INDEX4D_EPHI2(1,i2,j2,k2)] = RE1 * EPhi2[INDEX4D_EPHI2(1,i2,j2,k2)] - RF1 * (RA0 * dHx + RB0 * EPhi2[INDEX4D_EPHI2(0,i2,j2,k2)]); - EPhi2[INDEX4D_EPHI2(0,i2,j2,k2)] = RE0 * EPhi2[INDEX4D_EPHI2(0,i2,j2,k2)] - RF0 * dHx; - } -} - - -////////////////////////////////////////////////////////// -// Magnetic field PML updates - 2nd order - xminus slab // -////////////////////////////////////////////////////////// - -__global__ void update_pml_2order_magnetic_xminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_HPHI1, int NY_HPHI1, int NZ_HPHI1, int NX_HPHI2, int NY_HPHI2, int NZ_HPHI2, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez, const $REAL* __restrict__ Hx, $REAL *Hy, $REAL *Hz, $REAL *HPhi1, $REAL *HPhi2, const $REAL* __restrict__ HRA, const $REAL* __restrict__ HRB, const $REAL* __restrict__ HRE, const $REAL* __restrict__ HRF, $REAL d) { - - // This function updates the Hy and Hz field components for the xminus slab. - // - // Args: - // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab - // NX_HPHI, NY_HPHI, NZ_HPHI: Dimensions of HPhi1 and HPhi2 PML arrays - // ID, E, H: Access to ID and field component arrays - // HPhi, HRA, HRB, HRE, HRF: Access to PML magnetic coefficient arrays - // d: Spatial discretisation, e.g. dx, dy or dz - - // Obtain the linear index corresponding to the current thread - int idx = blockIdx.x * blockDim.x + threadIdx.x; - - // Convert the linear index to subscripts for PML HPhi1 (4D) arrays - int p1 = idx / (NX_HPHI1 * NY_HPHI1 * NZ_HPHI1); - int i1 = (idx % (NX_HPHI1 * NY_HPHI1 * NZ_HPHI1)) / (NY_HPHI1 * NZ_HPHI1); - int j1 = ((idx % (NX_HPHI1 * NY_HPHI1 * NZ_HPHI1)) % (NY_HPHI1 * NZ_HPHI1)) / NZ_HPHI1; - int k1 = ((idx % (NX_HPHI1 * NY_HPHI1 * NZ_HPHI1)) % (NY_HPHI1 * NZ_HPHI1)) % NZ_HPHI1; - - // Convert the linear index to subscripts for PML HPhi2 (4D) arrays - int p2 = idx / (NX_HPHI2 * NY_HPHI2 * NZ_HPHI2); - int i2 = (idx % (NX_HPHI2 * NY_HPHI2 * NZ_HPHI2)) / (NY_HPHI2 * NZ_HPHI2); - int j2 = ((idx % (NX_HPHI2 * NY_HPHI2 * NZ_HPHI2)) % (NY_HPHI2 * NZ_HPHI2)) / NZ_HPHI2; - int k2 = ((idx % (NX_HPHI2 * NY_HPHI2 * NZ_HPHI2)) % (NY_HPHI2 * NZ_HPHI2)) % NZ_HPHI2; - - $REAL RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dEy, dEz; - $REAL dx = d; - int ii, jj, kk, materialHy, materialHz; - int nx = xf - xs; - int ny = yf - ys; - int nz = zf - zs; - - if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { - // Subscripts for field arrays - ii = xf - (i1 + 1); - jj = j1 + ys; - kk = k1 + zs; - - // PML coefficients - RA0 = HRA[INDEX2D_R(0,i1)]; - RB0 = HRB[INDEX2D_R(0,i1)]; - RE0 = HRE[INDEX2D_R(0,i1)]; - RF0 = HRF[INDEX2D_R(0,i1)]; - RA1 = HRA[INDEX2D_R(1,i1)]; - RB1 = HRB[INDEX2D_R(1,i1)]; - RE1 = HRE[INDEX2D_R(1,i1)]; - RF1 = HRF[INDEX2D_R(1,i1)]; - RA01 = HRA[INDEX2D_R(0,i1)] * HRA[INDEX2D_R(1,i1)] - 1; - - // Hy - materialHy = ID[INDEX4D_ID(4,ii,jj,kk)]; - dEz = (Ez[INDEX3D_FIELDS(ii+1,jj,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dx; - Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA01 * dEz + RA1 * RB0 * HPhi1[INDEX4D_HPHI1(0,i1,j1,k1)] + RB1 * HPhi1[INDEX4D_HPHI1(1,i1,j1,k1)]); - HPhi1[INDEX4D_HPHI1(1,i1,j1,k1)] = RE1 * HPhi1[INDEX4D_HPHI1(1,i1,j1,k1)] - RF1 * (RA0 * dEz + RB0 * HPhi1[INDEX4D_HPHI1(0,i1,j1,k1)]); - HPhi1[INDEX4D_HPHI1(0,i1,j1,k1)] = RE0 * HPhi1[INDEX4D_HPHI1(0,i1,j1,k1)] - RF0 * dEz; - } - - if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { - // Subscripts for field arrays - ii = xf - (i2 + 1); - jj = j2 + ys; - kk = k2 + zs; - - // PML coefficients - RA0 = HRA[INDEX2D_R(0,i2)]; - RB0 = HRB[INDEX2D_R(0,i2)]; - RE0 = HRE[INDEX2D_R(0,i2)]; - RF0 = HRF[INDEX2D_R(0,i2)]; - RA1 = HRA[INDEX2D_R(1,i2)]; - RB1 = HRB[INDEX2D_R(1,i2)]; - RE1 = HRE[INDEX2D_R(1,i2)]; - RF1 = HRF[INDEX2D_R(1,i2)]; - RA01 = HRA[INDEX2D_R(0,i2)] * HRA[INDEX2D_R(1,i2)] - 1; - - // Hz - materialHz = ID[INDEX4D_ID(5,ii,jj,kk)]; - dEy = (Ey[INDEX3D_FIELDS(ii+1,jj,kk)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dx; - Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA01 * dEy + RA1 * RB0 * HPhi2[INDEX4D_HPHI2(0,i2,j2,k2)] + RB1 * HPhi2[INDEX4D_HPHI2(1,i2,j2,k2)]); - HPhi2[INDEX4D_HPHI2(1,i2,j2,k2)] = RE1 * HPhi2[INDEX4D_HPHI2(1,i2,j2,k2)] - RF1 * (RA0 * dEy + RB0 * HPhi2[INDEX4D_HPHI2(0,i2,j2,k2)]); - HPhi2[INDEX4D_HPHI2(0,i2,j2,k2)] = RE0 * HPhi2[INDEX4D_HPHI2(0,i2,j2,k2)] - RF0 * dEy; - } -} - - -///////////////////////////////////////////////////////// -// Magnetic field PML updates - 2nd order - xplus slab // -///////////////////////////////////////////////////////// - -__global__ void update_pml_2order_magnetic_xplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_HPHI1, int NY_HPHI1, int NZ_HPHI1, int NX_HPHI2, int NY_HPHI2, int NZ_HPHI2, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez, const $REAL* __restrict__ Hx, $REAL *Hy, $REAL *Hz, $REAL *HPhi1, $REAL *HPhi2, const $REAL* __restrict__ HRA, const $REAL* __restrict__ HRB, const $REAL* __restrict__ HRE, const $REAL* __restrict__ HRF, $REAL d) { - - // This function updates the Hy and Hz field components for the xplus slab. - // - // Args: - // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab - // NX_HPHI, NY_HPHI, NZ_HPHI: Dimensions of HPhi1 and HPhi2 PML arrays - // ID, E, H: Access to ID and field component arrays - // HPhi, HRA, HRB, HRE, HRF: Access to PML magnetic coefficient arrays - // d: Spatial discretisation, e.g. dx, dy or dz - - // Obtain the linear index corresponding to the current thread - int idx = blockIdx.x * blockDim.x + threadIdx.x; - - // Convert the linear index to subscripts for PML HPhi1 (4D) arrays - int p1 = idx / (NX_HPHI1 * NY_HPHI1 * NZ_HPHI1); - int i1 = (idx % (NX_HPHI1 * NY_HPHI1 * NZ_HPHI1)) / (NY_HPHI1 * NZ_HPHI1); - int j1 = ((idx % (NX_HPHI1 * NY_HPHI1 * NZ_HPHI1)) % (NY_HPHI1 * NZ_HPHI1)) / NZ_HPHI1; - int k1 = ((idx % (NX_HPHI1 * NY_HPHI1 * NZ_HPHI1)) % (NY_HPHI1 * NZ_HPHI1)) % NZ_HPHI1; - - // Convert the linear index to subscripts for PML HPhi2 (4D) arrays - int p2 = idx / (NX_HPHI2 * NY_HPHI2 * NZ_HPHI2); - int i2 = (idx % (NX_HPHI2 * NY_HPHI2 * NZ_HPHI2)) / (NY_HPHI2 * NZ_HPHI2); - int j2 = ((idx % (NX_HPHI2 * NY_HPHI2 * NZ_HPHI2)) % (NY_HPHI2 * NZ_HPHI2)) / NZ_HPHI2; - int k2 = ((idx % (NX_HPHI2 * NY_HPHI2 * NZ_HPHI2)) % (NY_HPHI2 * NZ_HPHI2)) % NZ_HPHI2; - - $REAL RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dEy, dEz; - $REAL dx = d; - int ii, jj, kk, materialHy, materialHz; - int nx = xf - xs; - int ny = yf - ys; - int nz = zf - zs; - - if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { - // Subscripts for field arrays - ii = i1 + xs; - jj = j1 + ys; - kk = k1 + zs; - - // PML coefficients - RA0 = HRA[INDEX2D_R(0,i1)]; - RB0 = HRB[INDEX2D_R(0,i1)]; - RE0 = HRE[INDEX2D_R(0,i1)]; - RF0 = HRF[INDEX2D_R(0,i1)]; - RA1 = HRA[INDEX2D_R(1,i1)]; - RB1 = HRB[INDEX2D_R(1,i1)]; - RE1 = HRE[INDEX2D_R(1,i1)]; - RF1 = HRF[INDEX2D_R(1,i1)]; - RA01 = HRA[INDEX2D_R(0,i1)] * HRA[INDEX2D_R(1,i1)] - 1; - - // Hy - materialHy = ID[INDEX4D_ID(4,ii,jj,kk)]; - dEz = (Ez[INDEX3D_FIELDS(ii+1,jj,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dx; - Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA01 * dEz + RA1 * RB0 * HPhi1[INDEX4D_HPHI1(0,i1,j1,k1)] + RB1 * HPhi1[INDEX4D_HPHI1(1,i1,j1,k1)]); - HPhi1[INDEX4D_HPHI1(1,i1,j1,k1)] = RE1 * HPhi1[INDEX4D_HPHI1(1,i1,j1,k1)] - RF1 * (RA0 * dEz + RB0 * HPhi1[INDEX4D_HPHI1(0,i1,j1,k1)]); - HPhi1[INDEX4D_HPHI1(0,i1,j1,k1)] = RE0 * HPhi1[INDEX4D_HPHI1(0,i1,j1,k1)] - RF0 * dEz; - } - - if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { - // Subscripts for field arrays - ii = i2 + xs; - jj = j2 + ys; - kk = k2 + zs; - - // PML coefficients - RA0 = HRA[INDEX2D_R(0,i2)]; - RB0 = HRB[INDEX2D_R(0,i2)]; - RE0 = HRE[INDEX2D_R(0,i2)]; - RF0 = HRF[INDEX2D_R(0,i2)]; - RA1 = HRA[INDEX2D_R(1,i2)]; - RB1 = HRB[INDEX2D_R(1,i2)]; - RE1 = HRE[INDEX2D_R(1,i2)]; - RF1 = HRF[INDEX2D_R(1,i2)]; - RA01 = HRA[INDEX2D_R(0,i2)] * HRA[INDEX2D_R(1,i2)] - 1; - - // Hz - materialHz = ID[INDEX4D_ID(5,ii,jj,kk)]; - dEy = (Ey[INDEX3D_FIELDS(ii+1,jj,kk)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dx; - Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA01 * dEy + RA1 * RB0 * HPhi2[INDEX4D_HPHI2(0,i2,j2,k2)] + RB1 * HPhi2[INDEX4D_HPHI2(1,i2,j2,k2)]); - HPhi2[INDEX4D_HPHI2(1,i2,j2,k2)] = RE1 * HPhi2[INDEX4D_HPHI2(1,i2,j2,k2)] - RF1 * (RA0 * dEy + RB0 * HPhi2[INDEX4D_HPHI2(0,i2,j2,k2)]); - HPhi2[INDEX4D_HPHI2(0,i2,j2,k2)] = RE0 * HPhi2[INDEX4D_HPHI2(0,i2,j2,k2)] - RF0 * dEy; - } -} - - -////////////////////////////////////////////////////////// -// Magnetic field PML updates - 2nd order - yminus slab // -////////////////////////////////////////////////////////// - -__global__ void update_pml_2order_magnetic_yminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_HPHI1, int NY_HPHI1, int NZ_HPHI1, int NX_HPHI2, int NY_HPHI2, int NZ_HPHI2, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez, $REAL *Hx, const $REAL* __restrict__ Hy, $REAL *Hz, $REAL *HPhi1, $REAL *HPhi2, const $REAL* __restrict__ HRA, const $REAL* __restrict__ HRB, const $REAL* __restrict__ HRE, const $REAL* __restrict__ HRF, $REAL d) { - - // This function updates the Hx and Hz field components for the yminus slab. - // - // Args: - // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab - // NX_HPHI, NY_HPHI, NZ_HPHI: Dimensions of HPhi1 and HPhi2 PML arrays - // ID, E, H: Access to ID and field component arrays - // HPhi, HRA, HRB, HRE, HRF: Access to PML magnetic coefficient arrays - // d: Spatial discretisation, e.g. dx, dy or dz - - // Obtain the linear index corresponding to the current thread - int idx = blockIdx.x * blockDim.x + threadIdx.x; - - // Convert the linear index to subscripts for PML HPhi1 (4D) arrays - int p1 = idx / (NX_HPHI1 * NY_HPHI1 * NZ_HPHI1); - int i1 = (idx % (NX_HPHI1 * NY_HPHI1 * NZ_HPHI1)) / (NY_HPHI1 * NZ_HPHI1); - int j1 = ((idx % (NX_HPHI1 * NY_HPHI1 * NZ_HPHI1)) % (NY_HPHI1 * NZ_HPHI1)) / NZ_HPHI1; - int k1 = ((idx % (NX_HPHI1 * NY_HPHI1 * NZ_HPHI1)) % (NY_HPHI1 * NZ_HPHI1)) % NZ_HPHI1; - - // Convert the linear index to subscripts for PML HPhi2 (4D) arrays - int p2 = idx / (NX_HPHI2 * NY_HPHI2 * NZ_HPHI2); - int i2 = (idx % (NX_HPHI2 * NY_HPHI2 * NZ_HPHI2)) / (NY_HPHI2 * NZ_HPHI2); - int j2 = ((idx % (NX_HPHI2 * NY_HPHI2 * NZ_HPHI2)) % (NY_HPHI2 * NZ_HPHI2)) / NZ_HPHI2; - int k2 = ((idx % (NX_HPHI2 * NY_HPHI2 * NZ_HPHI2)) % (NY_HPHI2 * NZ_HPHI2)) % NZ_HPHI2; - - $REAL RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dEx, dEz; - $REAL dy = d; - int ii, jj, kk, materialHx, materialHz; - int nx = xf - xs; - int ny = yf - ys; - int nz = zf - zs; - - if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { - // Subscripts for field arrays - ii = i1 + xs; - jj = yf - (j1 + 1); - kk = k1 + zs; - - // PML coefficients - RA0 = HRA[INDEX2D_R(0,j1)]; - RB0 = HRB[INDEX2D_R(0,j1)]; - RE0 = HRE[INDEX2D_R(0,j1)]; - RF0 = HRF[INDEX2D_R(0,j1)]; - RA1 = HRA[INDEX2D_R(1,j1)]; - RB1 = HRB[INDEX2D_R(1,j1)]; - RE1 = HRE[INDEX2D_R(1,j1)]; - RF1 = HRF[INDEX2D_R(1,j1)]; - RA01 = HRA[INDEX2D_R(0,j1)] * HRA[INDEX2D_R(1,j1)] - 1; - - // Hx - materialHx = ID[INDEX4D_ID(3,ii,jj,kk)]; - dEz = (Ez[INDEX3D_FIELDS(ii,jj+1,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dy; - Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA01 * dEz + RA1 * RB0 * HPhi1[INDEX4D_HPHI1(0,i1,j1,k1)] + RB1 * HPhi1[INDEX4D_HPHI1(1,i1,j1,k1)]); - HPhi1[INDEX4D_HPHI1(1,i1,j1,k1)] = RE1 * HPhi1[INDEX4D_HPHI1(1,i1,j1,k1)] - RF1 * (RA0 * dEz + RB0 * HPhi1[INDEX4D_HPHI1(0,i1,j1,k1)]); - HPhi1[INDEX4D_HPHI1(0,i1,j1,k1)] = RE0 * HPhi1[INDEX4D_HPHI1(0,i1,j1,k1)] - RF0 * dEz; - } - - if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { - // Subscripts for field arrays - ii = i2 + xs; - jj = yf - (j2 + 1); - kk = k2 + zs; - - // PML coefficients - RA0 = HRA[INDEX2D_R(0,j2)]; - RB0 = HRB[INDEX2D_R(0,j2)]; - RE0 = HRE[INDEX2D_R(0,j2)]; - RF0 = HRF[INDEX2D_R(0,j2)]; - RA1 = HRA[INDEX2D_R(1,j2)]; - RB1 = HRB[INDEX2D_R(1,j2)]; - RE1 = HRE[INDEX2D_R(1,j2)]; - RF1 = HRF[INDEX2D_R(1,j2)]; - RA01 = HRA[INDEX2D_R(0,j2)] * HRA[INDEX2D_R(1,j2)] - 1; - - // Hz - materialHz = ID[INDEX4D_ID(5,ii,jj,kk)]; - dEx = (Ex[INDEX3D_FIELDS(ii,jj+1,kk)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dy; - Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA01 * dEx + RA1 * RB0 * HPhi2[INDEX4D_HPHI2(0,i2,j2,k2)] + RB1 * HPhi2[INDEX4D_HPHI2(1,i2,j2,k2)]); - HPhi2[INDEX4D_HPHI2(1,i2,j2,k2)] = RE1 * HPhi2[INDEX4D_HPHI2(1,i2,j2,k2)] - RF1 * (RA0 * dEx + RB0 * HPhi2[INDEX4D_HPHI2(0,i2,j2,k2)]); - HPhi2[INDEX4D_HPHI2(0,i2,j2,k2)] = RE0 * HPhi2[INDEX4D_HPHI2(0,i2,j2,k2)] - RF0 * dEx; - } -} - - -///////////////////////////////////////////////////////// -// Magnetic field PML updates - 2nd order - yplus slab // -///////////////////////////////////////////////////////// - -__global__ void update_pml_2order_magnetic_yplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_HPHI1, int NY_HPHI1, int NZ_HPHI1, int NX_HPHI2, int NY_HPHI2, int NZ_HPHI2, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez, $REAL *Hx, const $REAL* __restrict__ Hy, $REAL *Hz, $REAL *HPhi1, $REAL *HPhi2, const $REAL* __restrict__ HRA, const $REAL* __restrict__ HRB, const $REAL* __restrict__ HRE, const $REAL* __restrict__ HRF, $REAL d) { - - // This function updates the Hx and Hz field components for the yplus slab. - // - // Args: - // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab - // NX_HPHI, NY_HPHI, NZ_HPHI: Dimensions of HPhi1 and HPhi2 PML arrays - // ID, E, H: Access to ID and field component arrays - // HPhi, HRA, HRB, HRE, HRF: Access to PML magnetic coefficient arrays - // d: Spatial discretisation, e.g. dx, dy or dz - - // Obtain the linear index corresponding to the current thread - int idx = blockIdx.x * blockDim.x + threadIdx.x; - - // Convert the linear index to subscripts for PML HPhi1 (4D) arrays - int p1 = idx / (NX_HPHI1 * NY_HPHI1 * NZ_HPHI1); - int i1 = (idx % (NX_HPHI1 * NY_HPHI1 * NZ_HPHI1)) / (NY_HPHI1 * NZ_HPHI1); - int j1 = ((idx % (NX_HPHI1 * NY_HPHI1 * NZ_HPHI1)) % (NY_HPHI1 * NZ_HPHI1)) / NZ_HPHI1; - int k1 = ((idx % (NX_HPHI1 * NY_HPHI1 * NZ_HPHI1)) % (NY_HPHI1 * NZ_HPHI1)) % NZ_HPHI1; - - // Convert the linear index to subscripts for PML HPhi2 (4D) arrays - int p2 = idx / (NX_HPHI2 * NY_HPHI2 * NZ_HPHI2); - int i2 = (idx % (NX_HPHI2 * NY_HPHI2 * NZ_HPHI2)) / (NY_HPHI2 * NZ_HPHI2); - int j2 = ((idx % (NX_HPHI2 * NY_HPHI2 * NZ_HPHI2)) % (NY_HPHI2 * NZ_HPHI2)) / NZ_HPHI2; - int k2 = ((idx % (NX_HPHI2 * NY_HPHI2 * NZ_HPHI2)) % (NY_HPHI2 * NZ_HPHI2)) % NZ_HPHI2; - - $REAL RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dEx, dEz; - $REAL dy = d; - int ii, jj, kk, materialHx, materialHz; - int nx = xf - xs; - int ny = yf - ys; - int nz = zf - zs; - - if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { - // Subscripts for field arrays - ii = i1 + xs; - jj = j1 + ys; - kk = k1 + zs; - - // PML coefficients - RA0 = HRA[INDEX2D_R(0,j1)]; - RB0 = HRB[INDEX2D_R(0,j1)]; - RE0 = HRE[INDEX2D_R(0,j1)]; - RF0 = HRF[INDEX2D_R(0,j1)]; - RA1 = HRA[INDEX2D_R(1,j1)]; - RB1 = HRB[INDEX2D_R(1,j1)]; - RE1 = HRE[INDEX2D_R(1,j1)]; - RF1 = HRF[INDEX2D_R(1,j1)]; - RA01 = HRA[INDEX2D_R(0,j1)] * HRA[INDEX2D_R(1,j1)] - 1; - - // Hx - materialHx = ID[INDEX4D_ID(3,ii,jj,kk)]; - dEz = (Ez[INDEX3D_FIELDS(ii,jj+1,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dy; - Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA01 * dEz + RA1 * RB0 * HPhi1[INDEX4D_HPHI1(0,i1,j1,k1)] + RB1 * HPhi1[INDEX4D_HPHI1(1,i1,j1,k1)]); - HPhi1[INDEX4D_HPHI1(1,i1,j1,k1)] = RE1 * HPhi1[INDEX4D_HPHI1(1,i1,j1,k1)] - RF1 * (RA0 * dEz + RB0 * HPhi1[INDEX4D_HPHI1(0,i1,j1,k1)]); - HPhi1[INDEX4D_HPHI1(0,i1,j1,k1)] = RE0 * HPhi1[INDEX4D_HPHI1(0,i1,j1,k1)] - RF0 * dEz; - } - - if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { - // Subscripts for field arrays - ii = i2 + xs; - jj = j2 + ys; - kk = k2 + zs; - - // PML coefficients - RA0 = HRA[INDEX2D_R(0,j2)]; - RB0 = HRB[INDEX2D_R(0,j2)]; - RE0 = HRE[INDEX2D_R(0,j2)]; - RF0 = HRF[INDEX2D_R(0,j2)]; - RA1 = HRA[INDEX2D_R(1,j2)]; - RB1 = HRB[INDEX2D_R(1,j2)]; - RE1 = HRE[INDEX2D_R(1,j2)]; - RF1 = HRF[INDEX2D_R(1,j2)]; - RA01 = HRA[INDEX2D_R(0,j2)] * HRA[INDEX2D_R(1,j2)] - 1; - - // Hz - materialHz = ID[INDEX4D_ID(5,ii,jj,kk)]; - dEx = (Ex[INDEX3D_FIELDS(ii,jj+1,kk)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dy; - Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA01 * dEx + RA1 * RB0 * HPhi2[INDEX4D_HPHI2(0,i2,j2,k2)] + RB1 * HPhi2[INDEX4D_HPHI2(1,i2,j2,k2)]); - HPhi2[INDEX4D_HPHI2(1,i2,j2,k2)] = RE1 * HPhi2[INDEX4D_HPHI2(1,i2,j2,k2)] - RF1 * (RA0 * dEx + RB0 * HPhi2[INDEX4D_HPHI2(0,i2,j2,k2)]); - HPhi2[INDEX4D_HPHI2(0,i2,j2,k2)] = RE0 * HPhi2[INDEX4D_HPHI2(0,i2,j2,k2)] - RF0 * dEx; - } -} - - -////////////////////////////////////////////////////////// -// Magnetic field PML updates - 2nd order - zminus slab // -////////////////////////////////////////////////////////// - -__global__ void update_pml_2order_magnetic_zminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_HPHI1, int NY_HPHI1, int NZ_HPHI1, int NX_HPHI2, int NY_HPHI2, int NZ_HPHI2, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez, $REAL *Hx, $REAL *Hy, const $REAL* __restrict__ Hz, $REAL *HPhi1, $REAL *HPhi2, const $REAL* __restrict__ HRA, const $REAL* __restrict__ HRB, const $REAL* __restrict__ HRE, const $REAL* __restrict__ HRF, $REAL d) { - - // This function updates the Hx and Hy field components for the zminus slab. - // - // Args: - // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab - // NX_HPHI, NY_HPHI, NZ_HPHI: Dimensions of HPhi1 and HPhi2 PML arrays - // ID, E, H: Access to ID and field component arrays - // HPhi, HRA, HRB, HRE, HRF: Access to PML magnetic coefficient arrays - // d: Spatial discretisation, e.g. dx, dy or dz - - // Obtain the linear index corresponding to the current thread - int idx = blockIdx.x * blockDim.x + threadIdx.x; - - // Convert the linear index to subscripts for PML HPhi1 (4D) arrays - int p1 = idx / (NX_HPHI1 * NY_HPHI1 * NZ_HPHI1); - int i1 = (idx % (NX_HPHI1 * NY_HPHI1 * NZ_HPHI1)) / (NY_HPHI1 * NZ_HPHI1); - int j1 = ((idx % (NX_HPHI1 * NY_HPHI1 * NZ_HPHI1)) % (NY_HPHI1 * NZ_HPHI1)) / NZ_HPHI1; - int k1 = ((idx % (NX_HPHI1 * NY_HPHI1 * NZ_HPHI1)) % (NY_HPHI1 * NZ_HPHI1)) % NZ_HPHI1; - - // Convert the linear index to subscripts for PML HPhi2 (4D) arrays - int p2 = idx / (NX_HPHI2 * NY_HPHI2 * NZ_HPHI2); - int i2 = (idx % (NX_HPHI2 * NY_HPHI2 * NZ_HPHI2)) / (NY_HPHI2 * NZ_HPHI2); - int j2 = ((idx % (NX_HPHI2 * NY_HPHI2 * NZ_HPHI2)) % (NY_HPHI2 * NZ_HPHI2)) / NZ_HPHI2; - int k2 = ((idx % (NX_HPHI2 * NY_HPHI2 * NZ_HPHI2)) % (NY_HPHI2 * NZ_HPHI2)) % NZ_HPHI2; - - $REAL RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dEx, dEy; - $REAL dz = d; - int ii, jj, kk, materialHx, materialHy; - int nx = xf - xs; - int ny = yf - ys; - int nz = zf - zs; - - if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { - // Subscripts for field arrays - ii = i1 + xs; - jj = j1 + ys; - kk = zf - (k1 + 1); - - // PML coefficients - RA0 = HRA[INDEX2D_R(0,k1)]; - RB0 = HRB[INDEX2D_R(0,k1)]; - RE0 = HRE[INDEX2D_R(0,k1)]; - RF0 = HRF[INDEX2D_R(0,k1)]; - RA1 = HRA[INDEX2D_R(1,k1)]; - RB1 = HRB[INDEX2D_R(1,k1)]; - RE1 = HRE[INDEX2D_R(1,k1)]; - RF1 = HRF[INDEX2D_R(1,k1)]; - RA01 = HRA[INDEX2D_R(0,k1)] * HRA[INDEX2D_R(1,k1)] - 1; - - // Hx - materialHx = ID[INDEX4D_ID(3,ii,jj,kk)]; - dEy = (Ey[INDEX3D_FIELDS(ii,jj,kk+1)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dz; - Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA01 * dEy + RA1 * RB0 * HPhi1[INDEX4D_HPHI1(0,i1,j1,k1)] + RB1 * HPhi1[INDEX4D_HPHI1(1,i1,j1,k1)]); - HPhi1[INDEX4D_HPHI1(1,i1,j1,k1)] = RE1 * HPhi1[INDEX4D_HPHI1(1,i1,j1,k1)] - RF1 * (RA0 * dEy + RB0 * HPhi1[INDEX4D_HPHI1(0,i1,j1,k1)]); - HPhi1[INDEX4D_HPHI1(0,i1,j1,k1)] = RE0 * HPhi1[INDEX4D_HPHI1(0,i1,j1,k1)] - RF0 * dEy; - } - - if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { - // Subscripts for field arrays - ii = i2 + xs; - jj = j2 + ys; - kk = zf - (k2 + 1); - - // PML coefficients - RA0 = HRA[INDEX2D_R(0,k2)]; - RB0 = HRB[INDEX2D_R(0,k2)]; - RE0 = HRE[INDEX2D_R(0,k2)]; - RF0 = HRF[INDEX2D_R(0,k2)]; - RA1 = HRA[INDEX2D_R(1,k2)]; - RB1 = HRB[INDEX2D_R(1,k2)]; - RE1 = HRE[INDEX2D_R(1,k2)]; - RF1 = HRF[INDEX2D_R(1,k2)]; - RA01 = HRA[INDEX2D_R(0,k2)] * HRA[INDEX2D_R(1,k2)] - 1; - - // Hy - materialHy = ID[INDEX4D_ID(4,ii,jj,kk)]; - dEx = (Ex[INDEX3D_FIELDS(ii,jj,kk+1)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dz; - Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA01 * dEx + RA1 * RB0 * HPhi2[INDEX4D_HPHI2(0,i2,j2,k2)] + RB1 * HPhi2[INDEX4D_HPHI2(1,i2,j2,k2)]); - HPhi2[INDEX4D_HPHI2(1,i2,j2,k2)] = RE1 * HPhi2[INDEX4D_HPHI2(1,i2,j2,k2)] - RF1 * (RA0 * dEx + RB0 * HPhi2[INDEX4D_HPHI2(0,i2,j2,k2)]); - HPhi2[INDEX4D_HPHI2(0,i2,j2,k2)] = RE0 * HPhi2[INDEX4D_HPHI2(0,i2,j2,k2)] - RF0 * dEx; - } -} - - -///////////////////////////////////////////////////////// -// Magnetic field PML updates - 2nd order - zplus slab // -///////////////////////////////////////////////////////// - -__global__ void update_pml_2order_magnetic_zplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_HPHI1, int NY_HPHI1, int NZ_HPHI1, int NX_HPHI2, int NY_HPHI2, int NZ_HPHI2, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez, $REAL *Hx, $REAL *Hy, const $REAL* __restrict__ Hz, $REAL *HPhi1, $REAL *HPhi2, const $REAL* __restrict__ HRA, const $REAL* __restrict__ HRB, const $REAL* __restrict__ HRE, const $REAL* __restrict__ HRF, $REAL d) { - - // This function updates the Hx and Hy field components for the zplus slab. - // - // Args: - // xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab - // NX_HPHI, NY_HPHI, NZ_HPHI: Dimensions of HPhi1 and HPhi2 PML arrays - // ID, E, H: Access to ID and field component arrays - // HPhi, HRA, HRB, HRE, HRF: Access to PML magnetic coefficient arrays - // d: Spatial discretisation, e.g. dx, dy or dz - - // Obtain the linear index corresponding to the current thread - int idx = blockIdx.x * blockDim.x + threadIdx.x; - - // Convert the linear index to subscripts for PML HPhi1 (4D) arrays - int p1 = idx / (NX_HPHI1 * NY_HPHI1 * NZ_HPHI1); - int i1 = (idx % (NX_HPHI1 * NY_HPHI1 * NZ_HPHI1)) / (NY_HPHI1 * NZ_HPHI1); - int j1 = ((idx % (NX_HPHI1 * NY_HPHI1 * NZ_HPHI1)) % (NY_HPHI1 * NZ_HPHI1)) / NZ_HPHI1; - int k1 = ((idx % (NX_HPHI1 * NY_HPHI1 * NZ_HPHI1)) % (NY_HPHI1 * NZ_HPHI1)) % NZ_HPHI1; - - // Convert the linear index to subscripts for PML HPhi2 (4D) arrays - int p2 = idx / (NX_HPHI2 * NY_HPHI2 * NZ_HPHI2); - int i2 = (idx % (NX_HPHI2 * NY_HPHI2 * NZ_HPHI2)) / (NY_HPHI2 * NZ_HPHI2); - int j2 = ((idx % (NX_HPHI2 * NY_HPHI2 * NZ_HPHI2)) % (NY_HPHI2 * NZ_HPHI2)) / NZ_HPHI2; - int k2 = ((idx % (NX_HPHI2 * NY_HPHI2 * NZ_HPHI2)) % (NY_HPHI2 * NZ_HPHI2)) % NZ_HPHI2; - - $REAL RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dEx, dEy; - $REAL dz = d; - int ii, jj, kk, materialHx, materialHy; - int nx = xf - xs; - int ny = yf - ys; - int nz = zf - zs; - - if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) { - // Subscripts for field arrays - ii = i1 + xs; - jj = j1 + ys; - kk = k1 + zs; - - // PML coefficients - RA0 = HRA[INDEX2D_R(0,k1)]; - RB0 = HRB[INDEX2D_R(0,k1)]; - RE0 = HRE[INDEX2D_R(0,k1)]; - RF0 = HRF[INDEX2D_R(0,k1)]; - RA1 = HRA[INDEX2D_R(1,k1)]; - RB1 = HRB[INDEX2D_R(1,k1)]; - RE1 = HRE[INDEX2D_R(1,k1)]; - RF1 = HRF[INDEX2D_R(1,k1)]; - RA01 = HRA[INDEX2D_R(0,k1)] * HRA[INDEX2D_R(1,k1)] - 1; - - // Hx - materialHx = ID[INDEX4D_ID(3,ii,jj,kk)]; - dEy = (Ey[INDEX3D_FIELDS(ii,jj,kk+1)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dz; - Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA01 * dEy + RA1 * RB0 * HPhi1[INDEX4D_HPHI1(0,i1,j1,k1)] + RB1 * HPhi1[INDEX4D_HPHI1(1,i1,j1,k1)]); - HPhi1[INDEX4D_HPHI1(1,i1,j1,k1)] = RE1 * HPhi1[INDEX4D_HPHI1(1,i1,j1,k1)] - RF1 * (RA0 * dEy + RB0 * HPhi1[INDEX4D_HPHI1(0,i1,j1,k1)]); - HPhi1[INDEX4D_HPHI1(0,i1,j1,k1)] = RE0 * HPhi1[INDEX4D_HPHI1(0,i1,j1,k1)] - RF0 * dEy; - } - - if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) { - // Subscripts for field arrays - ii = i2 + xs; - jj = j2 + ys; - kk = k2 + zs; - - // PML coefficients - RA0 = HRA[INDEX2D_R(0,k2)]; - RB0 = HRB[INDEX2D_R(0,k2)]; - RE0 = HRE[INDEX2D_R(0,k2)]; - RF0 = HRF[INDEX2D_R(0,k2)]; - RA1 = HRA[INDEX2D_R(1,k2)]; - RB1 = HRB[INDEX2D_R(1,k2)]; - RE1 = HRE[INDEX2D_R(1,k2)]; - RF1 = HRF[INDEX2D_R(1,k2)]; - RA01 = HRA[INDEX2D_R(0,k2)] * HRA[INDEX2D_R(1,k2)] - 1; - - // Hy - materialHy = ID[INDEX4D_ID(4,ii,jj,kk)]; - dEx = (Ex[INDEX3D_FIELDS(ii,jj,kk+1)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dz; - Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA01 * dEx + RA1 * RB0 * HPhi2[INDEX4D_HPHI2(0,i2,j2,k2)] + RB1 * HPhi2[INDEX4D_HPHI2(1,i2,j2,k2)]); - HPhi2[INDEX4D_HPHI2(1,i2,j2,k2)] = RE1 * HPhi2[INDEX4D_HPHI2(1,i2,j2,k2)] - RF1 * (RA0 * dEx + RB0 * HPhi2[INDEX4D_HPHI2(0,i2,j2,k2)]); HPhi2[INDEX4D_HPHI2(0,i2,j2,k2)] = RE0 * HPhi2[INDEX4D_HPHI2(0,i2,j2,k2)] - RF0 * dEx; - } -} - -""") diff --git a/setup.py b/setup.py index abf76b18..0baa6bd0 100644 --- a/setup.py +++ b/setup.py @@ -68,16 +68,12 @@ if '--no-cython' in sys.argv: else: USE_CYTHON = True -# Build a list of all the files that need to be Cythonized looking in gprMax directory and user_libs +# Build a list of all the files that need to be Cythonized looking in gprMax directory cythonfiles = [] -for root, dirs, files in os.walk(os.path.join(os.getcwd(), packagename)): +for root, dirs, files in os.walk(os.path.join(os.getcwd(), packagename), topdown=True): for file in files: if file.endswith('.pyx'): - cythonfiles.append(os.path.join(packagename, file)) -for root, dirs, files in os.walk(os.path.join(os.getcwd(), 'user_libs')): - for file in files: - if file.endswith('.pyx'): - cythonfiles.append(os.path.join('user_libs', file)) + cythonfiles.append(os.path.relpath(os.path.join(root, file))) # Process 'cleanall' command line argument - cleanup Cython files if 'cleanall' in sys.argv: diff --git a/tests/models_basic/pmls/pmls.in b/tests/models_basic/pmls/pmls.in deleted file mode 100755 index 2cce905a..00000000 --- a/tests/models_basic/pmls/pmls.in +++ /dev/null @@ -1,26 +0,0 @@ -#title: PMLs -#domain: 0.051 0.126 0.026 -#dx_dy_dz: 0.001 0.001 0.001 -#time_window: 2100 - -#pml_cells: 0 0 0 0 0 10 - -## Built-in 1st order PML -pml_cfs: constant forward 0 0 constant forward 1 1 quartic forward 0 None - -## PMLs from http://dx.doi.org/10.1109/TAP.2011.2180344 -## Standard PML -pml_cfs: constant forward 0 0 quartic forward 1 11 quartic forward 0 7.427 - -## CFS PML -pml_cfs: constant forward 0.05 0.05 quartic forward 1 7 quartic forward 0 11.671 - -## O2 RIPML -#pml_cfs: constant forward 0 0 constant forward 1 1 sextic forward 0 0.5836 -#pml_cfs: constant forward 0.05 0.05 cubic forward 1 8 quadratic forward 0 5.8357 - -#waveform: gaussiandotnorm 1 9.42e9 mypulse -#hertzian_dipole: z 0.013 0.013 0.015 mypulse -#rx: 0.037 0.112 0.015 - -#box: 0.013 0.013 0.013 0.038 0.113 0.014 pec diff --git a/tests/models_basic/pmls/pmls_diffs_stdCPUvsO2CPU_x0.png b/tests/models_basic/pmls/pmls_diffs_stdCPUvsO2CPU_x0.png deleted file mode 100644 index c7ab8078..00000000 Binary files a/tests/models_basic/pmls/pmls_diffs_stdCPUvsO2CPU_x0.png and /dev/null differ diff --git a/tests/models_basic/pmls/pmls_diffs_stdCPUvsO2CPU_xmax.png b/tests/models_basic/pmls/pmls_diffs_stdCPUvsO2CPU_xmax.png deleted file mode 100644 index 1eafc26b..00000000 Binary files a/tests/models_basic/pmls/pmls_diffs_stdCPUvsO2CPU_xmax.png and /dev/null differ diff --git a/tests/models_basic/pmls/pmls_diffs_stdCPUvsO2CPU_y0.png b/tests/models_basic/pmls/pmls_diffs_stdCPUvsO2CPU_y0.png deleted file mode 100644 index 9b66f6fa..00000000 Binary files a/tests/models_basic/pmls/pmls_diffs_stdCPUvsO2CPU_y0.png and /dev/null differ diff --git a/tests/models_basic/pmls/pmls_diffs_stdCPUvsO2CPU_ymax.png b/tests/models_basic/pmls/pmls_diffs_stdCPUvsO2CPU_ymax.png deleted file mode 100644 index 26547d35..00000000 Binary files a/tests/models_basic/pmls/pmls_diffs_stdCPUvsO2CPU_ymax.png and /dev/null differ diff --git a/tests/models_basic/pmls/pmls_diffs_stdCPUvsO2CPU_z0.png b/tests/models_basic/pmls/pmls_diffs_stdCPUvsO2CPU_z0.png deleted file mode 100644 index 270c259b..00000000 Binary files a/tests/models_basic/pmls/pmls_diffs_stdCPUvsO2CPU_z0.png and /dev/null differ diff --git a/tests/models_basic/pmls/pmls_diffs_stdCPUvsO2CPU_zmax.png b/tests/models_basic/pmls/pmls_diffs_stdCPUvsO2CPU_zmax.png deleted file mode 100644 index 5503f4a2..00000000 Binary files a/tests/models_basic/pmls/pmls_diffs_stdCPUvsO2CPU_zmax.png and /dev/null differ diff --git a/tests/models_basic/pmls/pmls_diffs_stdCPUvsO2GPU_x0.png b/tests/models_basic/pmls/pmls_diffs_stdCPUvsO2GPU_x0.png deleted file mode 100644 index eb21eb94..00000000 Binary files a/tests/models_basic/pmls/pmls_diffs_stdCPUvsO2GPU_x0.png and /dev/null differ diff --git a/tests/models_basic/pmls/pmls_diffs_stdCPUvsO2GPU_xmax.png b/tests/models_basic/pmls/pmls_diffs_stdCPUvsO2GPU_xmax.png deleted file mode 100644 index 7b526fba..00000000 Binary files a/tests/models_basic/pmls/pmls_diffs_stdCPUvsO2GPU_xmax.png and /dev/null differ diff --git a/tests/models_basic/pmls/pmls_diffs_stdCPUvsO2GPU_y0.png b/tests/models_basic/pmls/pmls_diffs_stdCPUvsO2GPU_y0.png deleted file mode 100644 index 675f593b..00000000 Binary files a/tests/models_basic/pmls/pmls_diffs_stdCPUvsO2GPU_y0.png and /dev/null differ diff --git a/tests/models_basic/pmls/pmls_diffs_stdCPUvsO2GPU_ymax.png b/tests/models_basic/pmls/pmls_diffs_stdCPUvsO2GPU_ymax.png deleted file mode 100644 index e0b4fa77..00000000 Binary files a/tests/models_basic/pmls/pmls_diffs_stdCPUvsO2GPU_ymax.png and /dev/null differ diff --git a/tests/models_basic/pmls/pmls_diffs_stdCPUvsO2GPU_z0.png b/tests/models_basic/pmls/pmls_diffs_stdCPUvsO2GPU_z0.png deleted file mode 100644 index 663a06a6..00000000 Binary files a/tests/models_basic/pmls/pmls_diffs_stdCPUvsO2GPU_z0.png and /dev/null differ diff --git a/tests/models_basic/pmls/pmls_diffs_stdCPUvsO2GPU_zmax.png b/tests/models_basic/pmls/pmls_diffs_stdCPUvsO2GPU_zmax.png deleted file mode 100644 index ead62386..00000000 Binary files a/tests/models_basic/pmls/pmls_diffs_stdCPUvsO2GPU_zmax.png and /dev/null differ diff --git a/tests/models_pmls/plot_pml_comparison.py b/tests/models_pmls/plot_pml_comparison.py new file mode 100644 index 00000000..57925c88 --- /dev/null +++ b/tests/models_pmls/plot_pml_comparison.py @@ -0,0 +1,108 @@ +import itertools +from operator import add +import os +import sys + +from colorama import init, Fore, Style +init() +import h5py +import matplotlib.pyplot as plt +import numpy as np + +# Create/setup plot figure +#colors = ['#E60D30', '#5CB7C6', '#A21797', '#A3B347'] # Plot colours from http://tools.medialab.sciences-po.fr/iwanthue/index.php +#colorIDs = ["#62a85b", "#9967c7", "#b3943f", "#6095cd", "#cb5c42", "#c95889"] +colorIDs = ["#79c72e", "#5774ff", "#ff7c2c", "#4b4e80", "#d7004e", "#007545", "#ff83ec"] +#colorIDs = ["#ba0044", "#b2d334", "#470055", "#185300", "#ff96b1", "#3e2700", "#0162a9", "#fdb786"] +colors = itertools.cycle(colorIDs) +# for i in range(2): +# next(colors) +lines = itertools.cycle(('--', ':', '-.', '-')) +markers = ['o', 'd', '^', 's', '*'] + +basepath = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'pml_3D_pec_plate') +path = 'rxs/rx1/' +refmodel = 'pml_3D_pec_plate_ref' +PMLIDs = ['CFS-PML', 'HORIPML-1', 'HORIPML-2', 'MRIPML-1', 'MRIPML-2'] +maxerrors = [] +testmodels = ['pml_3D_pec_plate_' + s for s in PMLIDs] + +fig, ax = plt.subplots(subplot_kw=dict(xlabel='Iterations', ylabel='Error [dB]'), figsize=(20, 10), facecolor='w', edgecolor='w') + +for x, model in enumerate(testmodels): + # Get output for model and reference files + fileref = h5py.File(os.path.join(basepath, refmodel + '.out'), 'r') + filetest = h5py.File(os.path.join(basepath, model + '.out'), 'r') + + # Get available field output component names + outputsref = list(fileref[path].keys()) + outputstest = list(filetest[path].keys()) + if outputsref != outputstest: + raise GeneralError('Field output components do not match reference solution') + + # Check that type of float used to store fields matches + if filetest[path + outputstest[0]].dtype != fileref[path + outputsref[0]].dtype: + print(Fore.RED + 'WARNING: Type of floating point number in test model ({}) does not match type in reference solution ({})\n'.format(filetest[path + outputstest[0]].dtype, fileref[path + outputsref[0]].dtype) + Style.RESET_ALL) + floattyperef = fileref[path + outputsref[0]].dtype + floattypetest = filetest[path + outputstest[0]].dtype + # print('Data type: {}'.format(floattypetest)) + + # Arrays for storing time + # timeref = np.zeros((fileref.attrs['Iterations']), dtype=floattyperef) + # timeref = np.linspace(0, (fileref.attrs['Iterations'] - 1) * fileref.attrs['dt'], num=fileref.attrs['Iterations']) / 1e-9 + # timetest = np.zeros((filetest.attrs['Iterations']), dtype=floattypetest) + # timetest = np.linspace(0, (filetest.attrs['Iterations'] - 1) * filetest.attrs['dt'], num=filetest.attrs['Iterations']) / 1e-9 + timeref = np.zeros((fileref.attrs['Iterations']), dtype=floattyperef) + timeref = np.linspace(0, (fileref.attrs['Iterations'] - 1), num=fileref.attrs['Iterations']) + timetest = np.zeros((filetest.attrs['Iterations']), dtype=floattypetest) + timetest = np.linspace(0, (filetest.attrs['Iterations'] - 1), num=filetest.attrs['Iterations']) + + # Arrays for storing field data + dataref = np.zeros((fileref.attrs['Iterations'], len(outputsref)), dtype=floattyperef) + datatest = np.zeros((filetest.attrs['Iterations'], len(outputstest)), dtype=floattypetest) + for ID, name in enumerate(outputsref): + dataref[:, ID] = fileref[path + str(name)][:] + datatest[:, ID] = filetest[path + str(name)][:] + if np.any(np.isnan(datatest[:, ID])): + raise ValueError('Test data contains NaNs') + + fileref.close() + filetest.close() + + # Diffs + datadiffs = np.zeros(datatest.shape, dtype=np.float64) + for i in range(len(outputstest)): + max = np.amax(np.abs(dataref[:, i])) + datadiffs[:, i] = np.divide(np.abs(datatest[:, i] - dataref[:, i]), max, out=np.zeros_like(dataref[:, i]), where=max != 0) # Replace any division by zero with zero + + # Calculate power (ignore warning from taking a log of any zero values) + with np.errstate(divide='ignore'): + datadiffs[:, i] = 20 * np.log10(datadiffs[:, i]) + # Replace any NaNs or Infs from zero division + datadiffs[:, i][np.invert(np.isfinite(datadiffs[:, i]))] = 0 + + # Print maximum error value + start = 210 + maxerrors.append(': {:.1f} [dB]'.format(np.amax(datadiffs[start::, 1]))) + print('{}: Max. error {}'.format(model, maxerrors[x])) + + # Plot diffs (select column to choose field component, 0-Ex, 1-Ey etc..) + ax.plot(timeref[start::], datadiffs[start::, 1], color=next(colors), lw=2, ls=next(lines), label=model) + ax.set_xticks(np.arange(0, 2200, step=100)) + ax.set_xlim([0, 2100]) + ax.set_yticks(np.arange(-160, 0, step=20)) + ax.set_ylim([-160, -20]) + ax.set_axisbelow(True) + ax.grid(color=(0.75,0.75,0.75), linestyle='dashed') + +mylegend = list(map(add, PMLIDs, maxerrors)) +legend = ax.legend(mylegend, loc=1, fontsize=14) +frame = legend.get_frame() +frame.set_edgecolor('white') +frame.set_alpha(0) + +plt.show() + +# Save a PDF/PNG of the figure +fig.savefig(basepath + '.pdf', dpi=None, format='pdf', bbox_inches='tight', pad_inches=0.1) +#fig.savefig(savename + '.png', dpi=150, format='png', bbox_inches='tight', pad_inches=0.1) diff --git a/tests/models_pmls/pml_3D_pec_plate/pml_3D_pec_plate_CFS-PML.in b/tests/models_pmls/pml_3D_pec_plate/pml_3D_pec_plate_CFS-PML.in new file mode 100755 index 00000000..24f7fd37 --- /dev/null +++ b/tests/models_pmls/pml_3D_pec_plate/pml_3D_pec_plate_CFS-PML.in @@ -0,0 +1,34 @@ +#title: Response from an elongated thin PEC plate +#domain: 0.051 0.126 0.026 +#dx_dy_dz: 0.001 0.001 0.001 +#time_window: 2100 +#time_step_stability_factor: 0.99 + +################################################ +## PML parameters +## CFS (alpha, kappa, sigma) +## sigma_max = (0.8 * (m + 1)) / (z0 * d * np.sqrt(er * mr)) +## z0 = 376.73, d = 0.001 +################################################ + +#pml_cells: 10 + +############# +## CFS PML ## +############# +#python: +import numpy as np + +# Parameters from http://dx.doi.org/10.1109/TAP.2018.2823864 +smax = 1.1 * ((4 + 1) / (150 * np.pi * 0.001)) +print('#pml_cfs: constant forward 0.05 0.05 quartic forward 1 8 quartic forward 0 {}'.format(smax)) +#end_python: + +#waveform: gaussiandotnorm 1 9.42e9 mypulse +#hertzian_dipole: z 0.013 0.013 0.014 mypulse +#rx: 0.038 0.114 0.013 + +#plate: 0.013 0.013 0.013 0.038 0.113 0.013 pec + +geometry_view: 0 0 0 0.051 0.126 0.026 0.001 0.001 0.001 pml_3D_pec_plate_f f +geometry_view: 0 0 0 0.051 0.126 0.026 0.001 0.001 0.001 pml_3D_pec_plate_n n diff --git a/tests/models_pmls/pml_3D_pec_plate/pml_3D_pec_plate_HORIPML-1.in b/tests/models_pmls/pml_3D_pec_plate/pml_3D_pec_plate_HORIPML-1.in new file mode 100755 index 00000000..cd44206c --- /dev/null +++ b/tests/models_pmls/pml_3D_pec_plate/pml_3D_pec_plate_HORIPML-1.in @@ -0,0 +1,34 @@ +#title: Response from an elongated thin PEC plate +#domain: 0.051 0.126 0.026 +#dx_dy_dz: 0.001 0.001 0.001 +#time_window: 2100 +#time_step_stability_factor: 0.99 + +################################################ +## PML parameters +## CFS (alpha, kappa, sigma) +## sigma_max = (0.8 * (m + 1)) / (z0 * d * np.sqrt(er * mr)) +## z0 = 376.73, d = 0.001 +################################################ + +#pml_cells: 10 + +################################################ +## 1st order (default) HORIPML - Standard PML ## +################################################ +#python: +import numpy as np + +# Parameters from http://dx.doi.org/10.1109/TAP.2011.2180344 +smax = 0.7 * ((4 + 1) / (150 * np.pi * 0.001)) +print('#pml_cfs: constant forward 0 0 quartic forward 1 12 quartic forward 0 {}'.format(smax)) +#end_python: + +#waveform: gaussiandotnorm 1 9.42e9 mypulse +#hertzian_dipole: z 0.013 0.013 0.014 mypulse +#rx: 0.038 0.114 0.013 + +#plate: 0.013 0.013 0.013 0.038 0.113 0.013 pec + +geometry_view: 0 0 0 0.051 0.126 0.026 0.001 0.001 0.001 pml_3D_pec_plate_f f +geometry_view: 0 0 0 0.051 0.126 0.026 0.001 0.001 0.001 pml_3D_pec_plate_n n diff --git a/tests/models_pmls/pml_3D_pec_plate/pml_3D_pec_plate_HORIPML-2.in b/tests/models_pmls/pml_3D_pec_plate/pml_3D_pec_plate_HORIPML-2.in new file mode 100755 index 00000000..4c553080 --- /dev/null +++ b/tests/models_pmls/pml_3D_pec_plate/pml_3D_pec_plate_HORIPML-2.in @@ -0,0 +1,37 @@ +#title: Response from an elongated thin PEC plate +#domain: 0.051 0.126 0.026 +#dx_dy_dz: 0.001 0.001 0.001 +#time_window: 2100 +#time_step_stability_factor: 0.99 + +################################################ +## PML parameters +## CFS (alpha, kappa, sigma) +## sigma_max = (0.8 * (m + 1)) / (z0 * d * np.sqrt(er * mr)) +## z0 = 376.73, d = 0.001 +################################################ + +#pml_cells: 10 + +####################### +## 2nd order HORIPML ## +####################### +#python: +import numpy as np + +# Parameters from http://dx.doi.org/10.1109/TAP.2018.2823864 +smax1 = 0.275 / (150 * np.pi * 0.001) +smax2 = 2.75 / (150 * np.pi * 0.001) +a0 = 0.07 +print('#pml_cfs: constant forward 0 0 constant forward 1 1 sextic forward 0 {}'.format(smax1)) +print('#pml_cfs: sextic forward {} {} cubic forward 1 8 quadratic forward 0 {}'.format(a0, a0 + smax1, smax2)) +#end_python: + +#waveform: gaussiandotnorm 1 9.42e9 mypulse +#hertzian_dipole: z 0.013 0.013 0.014 mypulse +#rx: 0.038 0.114 0.013 + +#plate: 0.013 0.013 0.013 0.038 0.113 0.013 pec + +geometry_view: 0 0 0 0.051 0.126 0.026 0.001 0.001 0.001 pml_3D_pec_plate_f f +geometry_view: 0 0 0 0.051 0.126 0.026 0.001 0.001 0.001 pml_3D_pec_plate_n n diff --git a/tests/models_pmls/pml_3D_pec_plate/pml_3D_pec_plate_MRIPML-1.in b/tests/models_pmls/pml_3D_pec_plate/pml_3D_pec_plate_MRIPML-1.in new file mode 100755 index 00000000..5131a43e --- /dev/null +++ b/tests/models_pmls/pml_3D_pec_plate/pml_3D_pec_plate_MRIPML-1.in @@ -0,0 +1,36 @@ +#title: Response from an elongated thin PEC plate +#domain: 0.051 0.126 0.026 +#dx_dy_dz: 0.001 0.001 0.001 +#time_window: 2100 +#time_step_stability_factor: 0.99 + +################################################ +## PML parameters +## CFS (alpha, kappa, sigma) +## sigma_max = (0.8 * (m + 1)) / (z0 * d * np.sqrt(er * mr)) +## z0 = 376.73, d = 0.001 +################################################ + +#pml_cells: 10 + +###################### +## 1st order MRIPML ## +###################### +#pml_formulation: MRIPML + +#python: +import numpy as np + +# Parameters from Antonis' MATLAB script (M3Dparams.m) +smax = 1.1 * ((4 + 1) / (150 * np.pi * 0.001)) +print('#pml_cfs: constant forward 0.05 0.05 quartic forward 1 8 quartic forward 0 {}'.format(smax)) +#end_python: + +#waveform: gaussiandotnorm 1 9.42e9 mypulse +#hertzian_dipole: z 0.013 0.013 0.014 mypulse +#rx: 0.038 0.114 0.013 + +#plate: 0.013 0.013 0.013 0.038 0.113 0.013 pec + +geometry_view: 0 0 0 0.051 0.126 0.026 0.001 0.001 0.001 pml_3D_pec_plate_f f +geometry_view: 0 0 0 0.051 0.126 0.026 0.001 0.001 0.001 pml_3D_pec_plate_n n diff --git a/tests/models_pmls/pml_3D_pec_plate/pml_3D_pec_plate_MRIPML-2.in b/tests/models_pmls/pml_3D_pec_plate/pml_3D_pec_plate_MRIPML-2.in new file mode 100755 index 00000000..d2c83c07 --- /dev/null +++ b/tests/models_pmls/pml_3D_pec_plate/pml_3D_pec_plate_MRIPML-2.in @@ -0,0 +1,39 @@ +#title: Response from an elongated thin PEC plate +#domain: 0.051 0.126 0.026 +#dx_dy_dz: 0.001 0.001 0.001 +#time_window: 2100 +#time_step_stability_factor: 0.99 + +################################################ +## PML parameters +## CFS (alpha, kappa, sigma) +## sigma_max = (0.8 * (m + 1)) / (z0 * d * np.sqrt(er * mr)) +## z0 = 376.73, d = 0.001 +################################################ + +#pml_cells: 10 + +###################### +## 2nd order MRIPML ## +###################### +#pml_formulation: MRIPML + +#python: +import numpy as np + +# Parameters from http://dx.doi.org/10.1109/TAP.2018.2823864 +smax1 = 0.65 * ((4 + 1) / (150 * np.pi * 0.001)) +smax2 = 0.65 * ((2 + 1) / (150 * np.pi * 0.001)) +print('#pml_cfs: quadratic reverse 0 0.15 quartic forward 1 12 quartic forward 0 {}'.format(smax1)) +print('#pml_cfs: linear reverse 0 0.8 constant forward 0 0 quadratic forward 0 {}'.format(smax2)) +#end_python: + + +#waveform: gaussiandotnorm 1 9.42e9 mypulse +#hertzian_dipole: z 0.013 0.013 0.014 mypulse +#rx: 0.038 0.114 0.013 + +#plate: 0.013 0.013 0.013 0.038 0.113 0.013 pec + +geometry_view: 0 0 0 0.051 0.126 0.026 0.001 0.001 0.001 pml_3D_pec_plate_f f +geometry_view: 0 0 0 0.051 0.126 0.026 0.001 0.001 0.001 pml_3D_pec_plate_n n diff --git a/tests/models_pmls/pml_3D_pec_plate/pml_3D_pec_plate_ref.in b/tests/models_pmls/pml_3D_pec_plate/pml_3D_pec_plate_ref.in new file mode 100755 index 00000000..4f8aaeca --- /dev/null +++ b/tests/models_pmls/pml_3D_pec_plate/pml_3D_pec_plate_ref.in @@ -0,0 +1,34 @@ +#title: Standard PML of response from an elongated thin PEC plate +#domain: 0.201 0.276 0.176 +#dx_dy_dz: 0.001 0.001 0.001 +#time_window: 2100 +#time_step_stability_factor: 0.99 + +################################################ +## PML parameters +## CFS (alpha, kappa, sigma) +## sigma_max = (0.8 * (m + 1)) / (z0 * d * np.sqrt(er * mr)) +## z0 = 376.73, d = 0.001 +################################################ + +#pml_cells: 10 + +############# +## CFS PML ## +############# +#python: +import numpy as np + +# Parameters from http://dx.doi.org/10.1109/TAP.2018.2823864 +smax = 1.1 * ((4 + 1) / (150 * np.pi * 0.001)) +print('#pml_cfs: constant forward 0.05 0.05 quartic forward 1 8 quartic forward 0 {}'.format(smax)) +#end_python: + +#waveform: gaussiandotnorm 1 9.42e9 mypulse +#hertzian_dipole: z 0.088 0.088 0.089 mypulse +#rx: 0.113 0.189 0.088 + +#plate: 0.088 0.088 0.088 0.113 0.188 0.088 pec + +geometry_view: 0 0 0 0.201 0.276 0.176 0.001 0.001 0.001 pml_3D_pec_plate_ref_f f +geometry_view: 0 0 0 0.201 0.276 0.176 0.001 0.001 0.001 pml_3D_pec_plate_ref_n n diff --git a/tests/models_pmls/pml_off.in b/tests/models_pmls/pml_off.in new file mode 100755 index 00000000..8070050c --- /dev/null +++ b/tests/models_pmls/pml_off.in @@ -0,0 +1,10 @@ +#title: PML test none +#domain: 0.100 0.100 0.100 +#dx_dy_dz: 0.001 0.001 0.001 +#time_window: 3e-9 + +#waveform: gaussiandot 1 1e9 myWave +#hertzian_dipole: z 0.050 0.050 0.050 myWave +#rx: 0.070 0.070 0.070 + +#pml_cells: 0 diff --git a/tests/models_pmls/pml_x0/pml_x0.in b/tests/models_pmls/pml_x0/pml_x0.in new file mode 100755 index 00000000..0c1a8851 --- /dev/null +++ b/tests/models_pmls/pml_x0/pml_x0.in @@ -0,0 +1,26 @@ +#title: PML test x0 slab +#domain: 0.100 0.100 0.100 +#dx_dy_dz: 0.001 0.001 0.001 +#time_window: 3e-9 + +#waveform: gaussiandot 1 1e9 myWave +#hertzian_dipole: z 0.050 0.050 0.050 myWave +#rx: 0.070 0.070 0.070 + +#pml_cells: 10 0 0 0 0 0 + +#pml_formulation: HORIPML + +## Built-in 1st order PML +#pml_cfs: constant forward 0 0 constant forward 1 1 quartic forward 0 None + +## PMLs from http://dx.doi.org/10.1109/TAP.2011.2180344 +## Standard PML +pml_cfs: constant forward 0 0 quartic forward 1 11 quartic forward 0 7.427 + +## CFS PML +pml_cfs: constant forward 0.05 0.05 quartic forward 1 7 quartic forward 0 11.671 + +## 2nd order RIPML +pml_cfs: constant forward 0 0 constant forward 1 1 sextic forward 0 0.5836 +pml_cfs: constant forward 0.05 0.05 cubic forward 1 8 quadratic forward 0 5.8357 diff --git a/tests/models_pmls/pml_xmax/pml_xmax.in b/tests/models_pmls/pml_xmax/pml_xmax.in new file mode 100755 index 00000000..26b42aa7 --- /dev/null +++ b/tests/models_pmls/pml_xmax/pml_xmax.in @@ -0,0 +1,26 @@ +#title: PML test xmax slab +#domain: 0.100 0.100 0.100 +#dx_dy_dz: 0.001 0.001 0.001 +#time_window: 3e-9 + +#waveform: gaussiandot 1 1e9 myWave +#hertzian_dipole: z 0.050 0.050 0.050 myWave +#rx: 0.070 0.070 0.070 + +#pml_cells: 0 0 0 10 0 0 + +#pml_formulation: HORIPML + +## Built-in 1st order PML +#pml_cfs: constant forward 0 0 constant forward 1 1 quartic forward 0 None + +## PMLs from http://dx.doi.org/10.1109/TAP.2011.2180344 +## Standard PML +pml_cfs: constant forward 0 0 quartic forward 1 11 quartic forward 0 7.427 + +## CFS PML +pml_cfs: constant forward 0.05 0.05 quartic forward 1 7 quartic forward 0 11.671 + +## 2nd order RIPML +pml_cfs: constant forward 0 0 constant forward 1 1 sextic forward 0 0.5836 +pml_cfs: constant forward 0.05 0.05 cubic forward 1 8 quadratic forward 0 5.8357 diff --git a/tests/models_pmls/pml_y0/pml_y0.in b/tests/models_pmls/pml_y0/pml_y0.in new file mode 100755 index 00000000..76c8e040 --- /dev/null +++ b/tests/models_pmls/pml_y0/pml_y0.in @@ -0,0 +1,26 @@ +#title: PML test y0 slab +#domain: 0.100 0.100 0.100 +#dx_dy_dz: 0.001 0.001 0.001 +#time_window: 3e-9 + +#waveform: gaussiandot 1 1e9 myWave +#hertzian_dipole: z 0.050 0.050 0.050 myWave +#rx: 0.070 0.070 0.070 + +#pml_cells: 0 10 0 0 0 0 + +#pml_formulation: HORIPML + +## Built-in 1st order PML +#pml_cfs: constant forward 0 0 constant forward 1 1 quartic forward 0 None + +## PMLs from http://dx.doi.org/10.1109/TAP.2011.2180344 +## Standard PML +pml_cfs: constant forward 0 0 quartic forward 1 11 quartic forward 0 7.427 + +## CFS PML +pml_cfs: constant forward 0.05 0.05 quartic forward 1 7 quartic forward 0 11.671 + +## 2nd order RIPML +pml_cfs: constant forward 0 0 constant forward 1 1 sextic forward 0 0.5836 +pml_cfs: constant forward 0.05 0.05 cubic forward 1 8 quadratic forward 0 5.8357 diff --git a/tests/models_pmls/pml_ymax/pml_ymax.in b/tests/models_pmls/pml_ymax/pml_ymax.in new file mode 100755 index 00000000..24bd1846 --- /dev/null +++ b/tests/models_pmls/pml_ymax/pml_ymax.in @@ -0,0 +1,26 @@ +#title: PML test ymax slab +#domain: 0.100 0.100 0.100 +#dx_dy_dz: 0.001 0.001 0.001 +#time_window: 3e-9 + +#waveform: gaussiandot 1 1e9 myWave +#hertzian_dipole: z 0.050 0.050 0.050 myWave +#rx: 0.070 0.070 0.070 + +#pml_cells: 0 0 0 0 10 0 + +#pml_formulation: HORIPML + +## Built-in 1st order PML +#pml_cfs: constant forward 0 0 constant forward 1 1 quartic forward 0 None + +## PMLs from http://dx.doi.org/10.1109/TAP.2011.2180344 +## Standard PML +pml_cfs: constant forward 0 0 quartic forward 1 11 quartic forward 0 7.427 + +## CFS PML +pml_cfs: constant forward 0.05 0.05 quartic forward 1 7 quartic forward 0 11.671 + +## 2nd order RIPML +pml_cfs: constant forward 0 0 constant forward 1 1 sextic forward 0 0.5836 +pml_cfs: constant forward 0.05 0.05 cubic forward 1 8 quadratic forward 0 5.8357 diff --git a/tests/models_pmls/pml_z0/pml_z0.in b/tests/models_pmls/pml_z0/pml_z0.in new file mode 100755 index 00000000..269d7e65 --- /dev/null +++ b/tests/models_pmls/pml_z0/pml_z0.in @@ -0,0 +1,26 @@ +#title: PML test z0 slab +#domain: 0.100 0.100 0.100 +#dx_dy_dz: 0.001 0.001 0.001 +#time_window: 3e-9 + +#waveform: gaussiandot 1 1e9 myWave +#hertzian_dipole: z 0.050 0.050 0.050 myWave +#rx: 0.070 0.070 0.070 + +#pml_cells: 0 0 10 0 0 0 + +#pml_formulation: HORIPML + +## Built-in 1st order PML +#pml_cfs: constant forward 0 0 constant forward 1 1 quartic forward 0 None + +## PMLs from http://dx.doi.org/10.1109/TAP.2011.2180344 +## Standard PML +pml_cfs: constant forward 0 0 quartic forward 1 11 quartic forward 0 7.427 + +## CFS PML +pml_cfs: constant forward 0.05 0.05 quartic forward 1 7 quartic forward 0 11.671 + +## 2nd order RIPML +pml_cfs: constant forward 0 0 constant forward 1 1 sextic forward 0 0.5836 +pml_cfs: constant forward 0.05 0.05 cubic forward 1 8 quadratic forward 0 5.8357 diff --git a/tests/models_pmls/pml_zmax/pml_zmax.in b/tests/models_pmls/pml_zmax/pml_zmax.in new file mode 100755 index 00000000..797a568a --- /dev/null +++ b/tests/models_pmls/pml_zmax/pml_zmax.in @@ -0,0 +1,26 @@ +#title: PML test zmax slab +#domain: 0.100 0.100 0.100 +#dx_dy_dz: 0.001 0.001 0.001 +#time_window: 3e-9 + +#waveform: gaussiandot 1 1e9 myWave +#hertzian_dipole: z 0.050 0.050 0.050 myWave +#rx: 0.070 0.070 0.070 + +#pml_cells: 0 0 0 0 0 10 + +#pml_formulation: HORIPML + +## Built-in 1st order PML +#pml_cfs: constant forward 0 0 constant forward 1 1 quartic forward 0 None + +## PMLs from http://dx.doi.org/10.1109/TAP.2011.2180344 +## Standard PML +pml_cfs: constant forward 0 0 quartic forward 1 11 quartic forward 0 7.427 + +## CFS PML +pml_cfs: constant forward 0.05 0.05 quartic forward 1 7 quartic forward 0 11.671 + +## 2nd order RIPML +pml_cfs: constant forward 0 0 constant forward 1 1 sextic forward 0 0.5836 +pml_cfs: constant forward 0.05 0.05 cubic forward 1 8 quadratic forward 0 5.8357 diff --git a/tests/test_models.py b/tests/test_models.py index 6ee91588..542bbc19 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -40,18 +40,22 @@ from tests.analytical_solutions import hertzian_dipole_fs """ basepath = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'models_') -basepath += 'basic' +# basepath += 'basic' # basepath += 'advanced' +basepath += 'pmls' # List of available basic test models -testmodels = ['hertzian_dipole_fs_analytical', '2D_ExHyHz', '2D_EyHxHz', '2D_EzHxHy', 'cylinder_Ascan_2D', 'hertzian_dipole_fs', 'hertzian_dipole_hs', 'hertzian_dipole_dispersive', 'magnetic_dipole_fs', 'pmls'] +# testmodels = ['hertzian_dipole_fs_analytical', '2D_ExHyHz', '2D_EyHxHz', '2D_EzHxHy', 'cylinder_Ascan_2D', 'hertzian_dipole_fs', 'hertzian_dipole_hs', 'hertzian_dipole_dispersive', 'magnetic_dipole_fs', 'pmls'] # List of available advanced test models # testmodels = ['antenna_GSSI_1500_fs', 'antenna_MALA_1200_fs'] +# List of available PML models +testmodels = ['pml_x0', 'pml_y0', 'pml_z0', 'pml_xmax', 'pml_ymax', 'pml_zmax', 'pml_3D_pec_plate'] + # Select a specific model if desired -testmodels = testmodels[:-1] -# testmodels = [testmodels[0]] +# testmodels = testmodels[:-1] +testmodels = [testmodels[6]] testresults = dict.fromkeys(testmodels) path = '/rxs/rx1/' @@ -63,7 +67,8 @@ for i, model in enumerate(testmodels): testresults[model] = {} # Run model - api(os.path.join(basepath, model + os.path.sep + model + '.in'), gpu=None) + inputfile = os.path.join(basepath, model + os.path.sep + model + '.in') + api(inputfile, gpu=[None]) # Special case for analytical comparison if model == 'hertzian_dipole_fs_analytical': @@ -76,8 +81,7 @@ for i, model in enumerate(testmodels): # Arrays for storing time floattype = filetest[path + outputstest[0]].dtype - timetest = np.zeros((filetest.attrs['Iterations']), dtype=floattype) - timetest = np.arange(0, filetest.attrs['dt'] * filetest.attrs['Iterations'], filetest.attrs['dt']) / 1e-9 + timetest = np.linspace(0, (filetest.attrs['Iterations'] - 1) * filetest.attrs['dt'], num=filetest.attrs['Iterations']) / 1e-9 timeref = timetest # Arrays for storing field data @@ -118,9 +122,9 @@ for i, model in enumerate(testmodels): # Arrays for storing time timeref = np.zeros((fileref.attrs['Iterations']), dtype=floattyperef) - timeref = np.arange(0, fileref.attrs['dt'] * fileref.attrs['Iterations'], fileref.attrs['dt']) / 1e-9 + timeref = np.linspace(0, (fileref.attrs['Iterations'] - 1) * fileref.attrs['dt'], num=fileref.attrs['Iterations']) / 1e-9 timetest = np.zeros((filetest.attrs['Iterations']), dtype=floattypetest) - timetest = np.arange(0, filetest.attrs['dt'] * filetest.attrs['Iterations'], filetest.attrs['dt']) / 1e-9 + timetest = np.linspace(0, (filetest.attrs['Iterations'] - 1) * filetest.attrs['dt'], num=filetest.attrs['Iterations']) / 1e-9 # Arrays for storing field data dataref = np.zeros((fileref.attrs['Iterations'], len(outputsref)), dtype=floattyperef)