你已经派生过 gprMax
镜像自地址
https://gitee.com/sunhf/gprMax.git
已同步 2025-08-07 15:10:13 +08:00
Work to implement OpenCL solver - temporarily breaks CUDA
这个提交包含在:
173
gprMax/config.py
173
gprMax/config.py
@@ -28,7 +28,7 @@ from scipy.constants import c
|
||||
from scipy.constants import epsilon_0 as e0
|
||||
from scipy.constants import mu_0 as m0
|
||||
|
||||
from .utilities.host_info import detect_cuda_gpus, get_host_info
|
||||
from .utilities.host_info import detect_cuda_gpus, detect_opencl, get_host_info
|
||||
from .utilities.utilities import get_terminal_width
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -61,15 +61,21 @@ class ModelConfig:
|
||||
self.grids = []
|
||||
self.ompthreads = None
|
||||
|
||||
# Store information for CUDA solver
|
||||
# gpu: GPU object
|
||||
# snapsgpu2cpu: copy snapshot data from GPU to CPU during simulation
|
||||
# N.B. This will happen if the requested snapshots are too large to fit
|
||||
# on the memory of the GPU. If True this will slow performance significantly
|
||||
if sim_config.general['cuda']:
|
||||
# If a list of lists of GPU deviceIDs is found, flatten it
|
||||
if any(isinstance(element, list) for element in sim_config.args.gpu):
|
||||
deviceID = [val for sublist in sim_config.args.gpu for val in sublist]
|
||||
# Store information for CUDA or OpenCL solver
|
||||
# dev: compute device object.
|
||||
# snapsgpu2cpu: copy snapshot data from GPU to CPU during simulation.
|
||||
# N.B. This will happen if the requested snapshots are too large to
|
||||
# fit on the memory of the GPU. If True this will slow
|
||||
# performance significantly.
|
||||
if sim_config.general['solver'] == 'cuda' or sim_config.general['solver'] == 'opencl':
|
||||
if sim_config.general['solver'] == 'cuda':
|
||||
devs = sim_config.args.gpu
|
||||
elif sim_config.general['solver'] == 'opencl':
|
||||
devs = sim_config.args.opencl
|
||||
|
||||
# If a list of lists of deviceIDs is found, flatten it
|
||||
if any(isinstance(element, list) for element in devs):
|
||||
deviceID = [val for sublist in devs for val in sublist]
|
||||
|
||||
# If no deviceID is given default to using deviceID 0. Else if either
|
||||
# a single deviceID or list of deviceIDs is given use first one.
|
||||
@@ -78,8 +84,8 @@ class ModelConfig:
|
||||
except:
|
||||
deviceID = 0
|
||||
|
||||
self.cuda = {'gpu': sim_config.set_model_gpu(deviceID),
|
||||
'snapsgpu2cpu': False}
|
||||
self.device = {'dev': sim_config.set_model_device(deviceID),
|
||||
'snapsgpu2cpu': False}
|
||||
|
||||
# Total memory usage for all grids in the model. Starts with 50MB overhead.
|
||||
self.mem_overhead = 50e6
|
||||
@@ -88,29 +94,34 @@ class ModelConfig:
|
||||
self.reuse_geometry = False
|
||||
|
||||
# String to print at start of each model run
|
||||
s = f'\n--- Model {model_num + 1}/{sim_config.model_end}, input file: {sim_config.input_file_path}'
|
||||
self.inputfilestr = Fore.GREEN + f"{s} {'-' * (get_terminal_width() - 1 - len(s))}\n" + Style.RESET_ALL
|
||||
s = (f'\n--- Model {model_num + 1}/{sim_config.model_end}, '
|
||||
f'input file: {sim_config.input_file_path}')
|
||||
self.inputfilestr = (Fore.GREEN + f"{s} {'-' * (get_terminal_width() - 1 - len(s))}\n" +
|
||||
Style.RESET_ALL)
|
||||
|
||||
# Output file path and name for specific model
|
||||
self.appendmodelnumber = '' if sim_config.single_model else str(model_num + 1) # Indexed from 1
|
||||
self.set_output_file_path()
|
||||
|
||||
# Numerical dispersion analysis parameters
|
||||
# highestfreqthres: threshold (dB) down from maximum power (0dB) of main frequency used
|
||||
# to calculate highest frequency for numerical dispersion analysis
|
||||
# maxnumericaldisp: maximum allowable percentage physical phase-velocity phase error
|
||||
# mingridsampling: minimum grid sampling of smallest wavelength for physical wave propagation
|
||||
# highestfreqthres: threshold (dB) down from maximum power (0dB) of
|
||||
# main frequency used to calculate highest
|
||||
# frequency for numerical dispersion analysis.
|
||||
# maxnumericaldisp: maximum allowable percentage physical
|
||||
# phase-velocity phase error.
|
||||
# mingridsampling: minimum grid sampling of smallest wavelength for
|
||||
# physical wave propagation.
|
||||
self.numdispersion = {'highestfreqthres': 40,
|
||||
'maxnumericaldisp': 2,
|
||||
'mingridsampling': 3}
|
||||
|
||||
# General information to configure materials
|
||||
# maxpoles: Maximum number of dispersive material poles in a model
|
||||
# dispersivedtype: Data type for dispersive materials
|
||||
# dispersiveCdtype: Data type for dispersive materials in Cython
|
||||
# drudelorentz: True/False model contains Drude or Lorentz materials
|
||||
# maxpoles: Maximum number of dispersive material poles in a model.
|
||||
# dispersivedtype: Data type for dispersive materials.
|
||||
# dispersiveCdtype: Data type for dispersive materials in Cython.
|
||||
# drudelorentz: True/False model contains Drude or Lorentz materials.
|
||||
# cudarealfunc: String to substitute into CUDA kernels for fields
|
||||
# dependent on dispersive material type
|
||||
# dependent on dispersive material type.
|
||||
self.materials = {'maxpoles': 0,
|
||||
'dispersivedtype': None,
|
||||
'dispersiveCdtype': None,
|
||||
@@ -123,32 +134,32 @@ class ModelConfig:
|
||||
else: return None
|
||||
|
||||
def get_usernamespace(self):
|
||||
return {'c': c, # Speed of light in free space (m/s)
|
||||
'e0': e0, # Permittivity of free space (F/m)
|
||||
'm0': m0, # Permeability of free space (H/m)
|
||||
'z0': np.sqrt(m0 / e0), # Impedance of free space (Ohms)
|
||||
'number_model_runs': sim_config.model_end,
|
||||
'current_model_run': model_num + 1,
|
||||
'inputfile': sim_config.input_file_path.resolve()}
|
||||
tmp = {'number_model_runs': sim_config.model_end,
|
||||
'current_model_run': model_num + 1,
|
||||
'inputfile': sim_config.input_file_path.resolve()}
|
||||
return dict(**sim_config.em_consts, **tmp)
|
||||
|
||||
|
||||
def set_dispersive_material_types(self):
|
||||
"""Set data type for disperive materials. Complex if Drude or Lorentz
|
||||
materials are present. Real if Debye materials.
|
||||
"""
|
||||
if self.materials['drudelorentz']:
|
||||
self.materials['cudarealfunc'] = '.real()'
|
||||
self.materials['crealfunc'] = '.real()'
|
||||
self.materials['dispersivedtype'] = sim_config.dtypes['complex']
|
||||
self.materials['dispersiveCdtype'] = sim_config.dtypes['C_complex']
|
||||
else:
|
||||
self.materials['crealfunc'] = ''
|
||||
self.materials['dispersivedtype'] = sim_config.dtypes['float_or_double']
|
||||
self.materials['dispersiveCdtype'] = sim_config.dtypes['C_float_or_double']
|
||||
|
||||
def set_output_file_path(self, outputdir=None):
|
||||
"""Output file path can be provided by the user via the API or an input file
|
||||
command. If they haven't provided one use the input file path instead.
|
||||
"""Output file path can be provided by the user via the API or an input
|
||||
file command. If they haven't provided one use the input file path
|
||||
instead.
|
||||
|
||||
Args:
|
||||
outputdir (str): Output file directory given from input file command.
|
||||
outputdir: string of output file directory given by input file command.
|
||||
"""
|
||||
|
||||
if not outputdir:
|
||||
@@ -171,7 +182,7 @@ class ModelConfig:
|
||||
"""Set directory to store any snapshots.
|
||||
|
||||
Returns:
|
||||
snapshot_dir (Path): directory to store snapshot files in.
|
||||
snapshot_dir: Path to directory to store snapshot files in.
|
||||
"""
|
||||
parts = self.output_file_path.with_suffix('').parts
|
||||
snapshot_dir = Path(*parts[:-1], parts[-1] + '_snaps')
|
||||
@@ -187,7 +198,7 @@ class SimulationConfig:
|
||||
def __init__(self, args):
|
||||
"""
|
||||
Args:
|
||||
args (Namespace): Arguments from either API or CLI.
|
||||
args: Namespace with arguments from either API or CLI.
|
||||
"""
|
||||
|
||||
self.args = args
|
||||
@@ -196,17 +207,19 @@ class SimulationConfig:
|
||||
logger.exception('The geometry fixed option cannot be used with MPI.')
|
||||
raise ValueError
|
||||
|
||||
# General settings for the simulation
|
||||
# inputfilepath: path to inputfile location
|
||||
# outputfilepath: path to outputfile location
|
||||
# progressbars: whether to show progress bars on stdoout or not
|
||||
# cpu, cuda, opencl: solver type
|
||||
# subgrid: whether the simulation uses sub-grids
|
||||
# precision: data type for electromagnetic field output (single/double)
|
||||
if args.gpu and args.opencl:
|
||||
logger.exception('You cannot use both CUDA and OpenCl simultaneously.')
|
||||
raise ValueError
|
||||
|
||||
self.general = {'cpu': True,
|
||||
'cuda': False,
|
||||
'opencl': False,
|
||||
# General settings for the simulation
|
||||
# inputfilepath: path to inputfile location.
|
||||
# outputfilepath: path to outputfile location.
|
||||
# progressbars: whether to show progress bars on stdoout or not.
|
||||
# solver: cpu, cuda, opencl.
|
||||
# subgrid: whether the simulation uses sub-grids.
|
||||
# precision: data type for electromagnetic field output (single/double).
|
||||
|
||||
self.general = {'solver': 'cpu',
|
||||
'subgrid': False,
|
||||
'precision': 'single'}
|
||||
|
||||
@@ -222,29 +235,37 @@ class SimulationConfig:
|
||||
# Store information about host machine
|
||||
self.hostinfo = get_host_info()
|
||||
|
||||
# Information about any Nvidia GPUs
|
||||
# CUDA
|
||||
if self.args.gpu is not None:
|
||||
self.general['cuda'] = True
|
||||
self.general['cpu'] = False
|
||||
self.general['opencl'] = False
|
||||
self.general['solver'] = 'cuda'
|
||||
# Both single and double precision are possible on GPUs, but single
|
||||
# provides best performance.
|
||||
self.general['precision'] = 'single'
|
||||
self.cuda = {'gpus': [], # gpus: list of GPU objects
|
||||
'nvcc_opts': None} # nvcc_opts: nvcc compiler options
|
||||
self.devices = {'devs': [], # devs: list of pycuda device objects
|
||||
'nvcc_opts': None} # nvcc_opts: nvcc compiler options
|
||||
# Suppress nvcc warnings on Microsoft Windows
|
||||
if sys.platform == 'win32': self.cuda['nvcc_opts'] = ['-w']
|
||||
|
||||
# List of GPU objects of available GPUs
|
||||
self.cuda['gpus'] = detect_cuda_gpus()
|
||||
# Add pycuda available GPU(s)
|
||||
self.devices['devs'] = detect_cuda_gpus()
|
||||
|
||||
# OpenCL
|
||||
if self.args.opencl is not None:
|
||||
self.general['solver'] = 'opencl'
|
||||
self.general['precision'] = 'single'
|
||||
# List of pyopencl available device(s)
|
||||
self.devices = {'devs': []}
|
||||
self.devices['devs'] = detect_opencl()
|
||||
|
||||
# Subgrid parameter may not exist if user enters via CLI
|
||||
try:
|
||||
self.general['subgrid'] = self.args.subgrid
|
||||
# Double precision should be used with subgrid for best accuracy
|
||||
self.general['precision'] = 'double'
|
||||
if self.general['subgrid'] and self.general['cuda']:
|
||||
logger.exception('The CUDA-based solver cannot currently be used with models that contain sub-grids.')
|
||||
if ((self.general['subgrid'] and self.general['cuda']) or
|
||||
(self.general['subgrid'] and self.general['opencl'])):
|
||||
logger.exception('You cannot currently use CUDA or OpenCL-based '
|
||||
'solvers with models that contain sub-grids.')
|
||||
raise ValueError
|
||||
except AttributeError:
|
||||
self.general['subgrid'] = False
|
||||
@@ -262,34 +283,35 @@ class SimulationConfig:
|
||||
self._set_model_start_end()
|
||||
self._set_single_model()
|
||||
|
||||
def set_model_gpu(self, deviceID):
|
||||
"""Specify GPU object for model.
|
||||
def set_model_device(self, deviceID):
|
||||
"""Specify pycuda/pyopencl object for model.
|
||||
|
||||
Args:
|
||||
deviceID (int): Requested deviceID of GPU
|
||||
deviceID: int of requested deviceID of compute device.
|
||||
|
||||
Returns:
|
||||
gpu (GPU object): Requested GPU object.
|
||||
dev: requested pycuda/pyopencl device object.
|
||||
"""
|
||||
|
||||
found = False
|
||||
for gpu in self.cuda['gpus']:
|
||||
if gpu.deviceID == deviceID:
|
||||
for ID, dev in self.devices['devs'].items():
|
||||
if ID == deviceID:
|
||||
found = True
|
||||
return gpu
|
||||
return dev
|
||||
|
||||
if not found:
|
||||
logger.exception(f'GPU with device ID {deviceID} does not exist')
|
||||
logger.exception(f'Compute device with device ID {deviceID} does '
|
||||
'not exist.')
|
||||
raise ValueError
|
||||
|
||||
def _set_precision(self):
|
||||
"""Data type (precision) for electromagnetic field output.
|
||||
|
||||
Solid and ID arrays use 32-bit integers (0 to 4294967295)
|
||||
Rigid arrays use 8-bit integers (the smallest available type to store true/false)
|
||||
Fractal arrays use complex numbers
|
||||
Dispersive coefficient arrays use either float or complex numbers
|
||||
Main field arrays use floats
|
||||
Solid and ID arrays use 32-bit integers (0 to 4294967295).
|
||||
Rigid arrays use 8-bit integers (the smallest available type to store true/false).
|
||||
Fractal arrays use complex numbers.
|
||||
Dispersive coefficient arrays use either float or complex numbers.
|
||||
Main field arrays use floats.
|
||||
"""
|
||||
|
||||
if self.general['precision'] == 'single':
|
||||
@@ -298,16 +320,25 @@ class SimulationConfig:
|
||||
'cython_float_or_double': cython.float,
|
||||
'cython_complex': cython.floatcomplex,
|
||||
'C_float_or_double': 'float',
|
||||
'C_complex': 'pycuda::complex<float>',
|
||||
'C_complex': None,
|
||||
'vtk_float': 'Float32'}
|
||||
if self.general['solver'] == 'cuda':
|
||||
self.dtypes['C_complex'] = 'pycuda::complex<float>'
|
||||
elif self.general['solver'] == 'opencl':
|
||||
self.dtypes['C_complex'] = 'cfloat'
|
||||
|
||||
elif self.general['precision'] == 'double':
|
||||
self.dtypes = {'float_or_double': np.float64,
|
||||
'complex': np.complex128,
|
||||
'cython_float_or_double': cython.double,
|
||||
'cython_complex': cython.doublecomplex,
|
||||
'C_float_or_double': 'double',
|
||||
'C_complex': 'pycuda::complex<double>',
|
||||
'C_complex': None,
|
||||
'vtk_float': 'Float64'}
|
||||
if self.general['solver'] == 'cuda':
|
||||
self.dtypes['C_complex'] = 'pycuda::complex<double>'
|
||||
elif self.general['solver'] == 'opencl':
|
||||
self.dtypes['C_complex'] = 'cdouble'
|
||||
|
||||
def _get_byteorder(self):
|
||||
"""Check the byte order of system to use for VTK files, i.e. geometry
|
||||
|
@@ -25,18 +25,23 @@ import gprMax.config as config
|
||||
from ._version import __version__, codename
|
||||
from .model_build_run import ModelBuildRun
|
||||
from .solvers import create_G, create_solver
|
||||
from .utilities.utilities import get_terminal_width, human_size, logo, timer
|
||||
from .utilities.host_info import (detect_cuda_gpus, detect_opencl,
|
||||
print_cuda_info, print_host_info,
|
||||
print_opencl_info)
|
||||
from .utilities.utilities import get_terminal_width, logo, timer
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Context:
|
||||
"""Standard context - models are run one after another and each model
|
||||
can exploit parallelisation using either OpenMP (CPU) or CUDA (GPU).
|
||||
can exploit parallelisation using either OpenMP (CPU), CUDA (GPU), or
|
||||
OpenCL (CPU/GPU).
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.model_range = range(config.sim_config.model_start, config.sim_config.model_end)
|
||||
self.model_range = range(config.sim_config.model_start,
|
||||
config.sim_config.model_end)
|
||||
self.tsimend = None
|
||||
self.tsimstart = None
|
||||
|
||||
@@ -44,10 +49,12 @@ class Context:
|
||||
"""Run the simulation in the correct context."""
|
||||
self.tsimstart = timer()
|
||||
self.print_logo_copyright()
|
||||
self.print_host_info()
|
||||
if config.sim_config.general['cuda']:
|
||||
self.print_gpu_info()
|
||||
|
||||
print_host_info(config.sim_config.hostinfo)
|
||||
if config.sim_config.general['solver'] == 'cuda':
|
||||
print_cuda_info(config.sim_config.devices['devs'])
|
||||
elif config.sim_config.general['solver'] == 'opencl':
|
||||
print_opencl_info(config.sim_config.devices['devs'])
|
||||
|
||||
# Clear list of model configs. It can be retained when gprMax is
|
||||
# called in a loop, and want to avoid this.
|
||||
config.model_configs = []
|
||||
@@ -79,33 +86,23 @@ class Context:
|
||||
logo_copyright = logo(__version__ + ' (' + codename + ')')
|
||||
logger.basic(logo_copyright)
|
||||
|
||||
def print_host_info(self):
|
||||
"""Print information about the host machine."""
|
||||
hyperthreadingstr = f", {config.sim_config.hostinfo['logicalcores']} cores with Hyper-Threading" if config.sim_config.hostinfo['hyperthreading'] else ''
|
||||
logger.basic(f"\nHost: {config.sim_config.hostinfo['hostname']} | {config.sim_config.hostinfo['machineID']} | {config.sim_config.hostinfo['sockets']} x {config.sim_config.hostinfo['cpuID']} ({config.sim_config.hostinfo['physicalcores']} cores{hyperthreadingstr}) | {human_size(config.sim_config.hostinfo['ram'], a_kilobyte_is_1024_bytes=True)} RAM | {config.sim_config.hostinfo['osversion']}")
|
||||
|
||||
def print_gpu_info(self):
|
||||
"""Print information about any NVIDIA CUDA GPUs detected."""
|
||||
gpus_info = []
|
||||
for gpu in config.sim_config.cuda['gpus']:
|
||||
gpus_info.append(f'{gpu.deviceID} - {gpu.name}, {human_size(gpu.totalmem, a_kilobyte_is_1024_bytes=True)}')
|
||||
logger.basic(f"GPU resources: {' | '.join(gpus_info)}")
|
||||
|
||||
def print_time_report(self):
|
||||
"""Print the total simulation time based on context."""
|
||||
s = f"\n=== Simulation completed in [HH:MM:SS]: {datetime.timedelta(seconds=self.tsimend - self.tsimstart)}"
|
||||
s = ("\n=== Simulation completed in [HH:MM:SS]: "
|
||||
f"{datetime.timedelta(seconds=self.tsimend - self.tsimstart)}")
|
||||
logger.basic(f"{s} {'=' * (get_terminal_width() - 1 - len(s))}\n")
|
||||
|
||||
|
||||
class MPIContext(Context):
|
||||
"""Mixed mode MPI/OpenMP/CUDA context - MPI task farm is used to distribute
|
||||
models, and each model parallelised using either OpenMP (CPU)
|
||||
or CUDA (GPU).
|
||||
models, and each model parallelised using either OpenMP (CPU),
|
||||
CUDA (GPU), or OpenCL (CPU/GPU).
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
from mpi4py import MPI
|
||||
|
||||
from gprMax.mpi import MPIExecutor
|
||||
|
||||
self.comm = MPI.COMM_WORLD
|
||||
@@ -149,7 +146,9 @@ class MPIContext(Context):
|
||||
if executor.is_master():
|
||||
if config.sim_config.general['cuda']:
|
||||
if executor.size - 1 > len(config.sim_config.cuda['gpus']):
|
||||
logger.exception('Not enough GPU resources for number of MPI tasks requested. Number of MPI tasks should be equal to number of GPUs + 1.')
|
||||
logger.exception('Not enough GPU resources for number of '
|
||||
'MPI tasks requested. Number of MPI tasks '
|
||||
'should be equal to number of GPUs + 1.')
|
||||
raise ValueError
|
||||
|
||||
# Create job list
|
||||
@@ -175,7 +174,8 @@ class SPOTPYContext(Context):
|
||||
(https://github.com/thouska/spotpy). SPOTPY coupling can utilise 2 levels
|
||||
of MPI parallelism - where the top level is where SPOPTY optmisation
|
||||
algorithms can be parallelised, and the lower level is where gprMax
|
||||
models can be parallelised using either OpenMP (CPU) or CUDA (GPU).
|
||||
models can be parallelised using either OpenMP (CPU), CUDA (GPU), or
|
||||
OpenCL (CPU/GPU).
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
|
@@ -1,244 +0,0 @@
|
||||
# Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom
|
||||
# Authors: Craig Warren, Antonis Giannopoulos, and John Hartley
|
||||
#
|
||||
# This file is part of gprMax.
|
||||
#
|
||||
# gprMax is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# gprMax is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with gprMax. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
from string import Template
|
||||
|
||||
kernel_template_fields = Template("""
|
||||
|
||||
#include <pycuda-complex.hpp>
|
||||
|
||||
// Macros for converting subscripts to linear index:
|
||||
#define INDEX2D_MAT(m, n) (m)*($NY_MATCOEFFS)+(n)
|
||||
#define INDEX2D_MATDISP(m, n) (m)*($NY_MATDISPCOEFFS)+(n)
|
||||
#define INDEX3D_FIELDS(i, j, k) (i)*($NY_FIELDS)*($NZ_FIELDS)+(j)*($NZ_FIELDS)+(k)
|
||||
#define INDEX4D_ID(p, i, j, k) (p)*($NX_ID)*($NY_ID)*($NZ_ID)+(i)*($NY_ID)*($NZ_ID)+(j)*($NZ_ID)+(k)
|
||||
#define INDEX4D_T(p, i, j, k) (p)*($NX_T)*($NY_T)*($NZ_T)+(i)*($NY_T)*($NZ_T)+(j)*($NZ_T)+(k)
|
||||
|
||||
// Material coefficients (read-only) in constant memory (64KB)_
|
||||
__device__ __constant__ $REAL updatecoeffsE[$N_updatecoeffsE];
|
||||
__device__ __constant__ $REAL updatecoeffsH[$N_updatecoeffsH];
|
||||
|
||||
|
||||
///////////////////////////////////////////////
|
||||
// Electric field updates - normal materials //
|
||||
///////////////////////////////////////////////
|
||||
|
||||
__global__ void update_electric(int NX, int NY, int NZ, const unsigned int* __restrict__ ID, $REAL *Ex, $REAL *Ey, $REAL *Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz) {
|
||||
|
||||
// This function updates electric field values.
|
||||
//
|
||||
// Args:
|
||||
// NX, NY, NZ: Number of cells of the model domain
|
||||
// ID, E, H: Access to ID and field component arrays
|
||||
|
||||
// Obtain the linear index corresponding to the current thread
|
||||
int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
|
||||
// Convert the linear index to subscripts for 3D field arrays
|
||||
int i = idx / ($NY_FIELDS * $NZ_FIELDS);
|
||||
int j = (idx % ($NY_FIELDS * $NZ_FIELDS)) / $NZ_FIELDS;
|
||||
int k = (idx % ($NY_FIELDS * $NZ_FIELDS)) % $NZ_FIELDS;
|
||||
|
||||
// Convert the linear index to subscripts for 4D material ID array
|
||||
int i_ID = (idx % ($NX_ID * $NY_ID * $NZ_ID)) / ($NY_ID * $NZ_ID);
|
||||
int j_ID = ((idx % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) / $NZ_ID;
|
||||
int k_ID = ((idx % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) % $NZ_ID;
|
||||
|
||||
// Ex component
|
||||
if ((NY != 1 || NZ != 1) && i >= 0 && i < NX && j > 0 && j < NY && k > 0 && k < NZ) {
|
||||
int materialEx = ID[INDEX4D_ID(0,i_ID,j_ID,k_ID)];
|
||||
Ex[INDEX3D_FIELDS(i,j,k)] = updatecoeffsE[INDEX2D_MAT(materialEx,0)] * Ex[INDEX3D_FIELDS(i,j,k)] + updatecoeffsE[INDEX2D_MAT(materialEx,2)] * (Hz[INDEX3D_FIELDS(i,j,k)] - Hz[INDEX3D_FIELDS(i,j-1,k)]) - updatecoeffsE[INDEX2D_MAT(materialEx,3)] * (Hy[INDEX3D_FIELDS(i,j,k)] - Hy[INDEX3D_FIELDS(i,j,k-1)]);
|
||||
}
|
||||
|
||||
// Ey component
|
||||
if ((NX != 1 || NZ != 1) && i > 0 && i < NX && j >= 0 && j < NY && k > 0 && k < NZ) {
|
||||
int materialEy = ID[INDEX4D_ID(1,i_ID,j_ID,k_ID)];
|
||||
Ey[INDEX3D_FIELDS(i,j,k)] = updatecoeffsE[INDEX2D_MAT(materialEy,0)] * Ey[INDEX3D_FIELDS(i,j,k)] + updatecoeffsE[INDEX2D_MAT(materialEy,3)] * (Hx[INDEX3D_FIELDS(i,j,k)] - Hx[INDEX3D_FIELDS(i,j,k-1)]) - updatecoeffsE[INDEX2D_MAT(materialEy,1)] * (Hz[INDEX3D_FIELDS(i,j,k)] - Hz[INDEX3D_FIELDS(i-1,j,k)]);
|
||||
}
|
||||
|
||||
// Ez component
|
||||
if ((NX != 1 || NY != 1) && i > 0 && i < NX && j > 0 && j < NY && k >= 0 && k < NZ) {
|
||||
int materialEz = ID[INDEX4D_ID(2,i_ID,j_ID,k_ID)];
|
||||
Ez[INDEX3D_FIELDS(i,j,k)] = updatecoeffsE[INDEX2D_MAT(materialEz,0)] * Ez[INDEX3D_FIELDS(i,j,k)] + updatecoeffsE[INDEX2D_MAT(materialEz,1)] * (Hy[INDEX3D_FIELDS(i,j,k)] - Hy[INDEX3D_FIELDS(i-1,j,k)]) - updatecoeffsE[INDEX2D_MAT(materialEz,2)] * (Hx[INDEX3D_FIELDS(i,j,k)] - Hx[INDEX3D_FIELDS(i,j-1,k)]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////
|
||||
// Magnetic field updates //
|
||||
////////////////////////////
|
||||
|
||||
__global__ void update_magnetic(int NX, int NY, int NZ, const unsigned int* __restrict__ ID, $REAL *Hx, $REAL *Hy, $REAL *Hz, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez) {
|
||||
|
||||
// This function updates magnetic field values.
|
||||
//
|
||||
// Args:
|
||||
// NX, NY, NZ: Number of cells of the model domain
|
||||
// ID, E, H: Access to ID and field component arrays
|
||||
|
||||
// Obtain the linear index corresponding to the current thread
|
||||
int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
|
||||
// Convert the linear index to subscripts for 3D field arrays
|
||||
int i = idx / ($NY_FIELDS * $NZ_FIELDS);
|
||||
int j = (idx % ($NY_FIELDS * $NZ_FIELDS)) / $NZ_FIELDS;
|
||||
int k = (idx % ($NY_FIELDS * $NZ_FIELDS)) % $NZ_FIELDS;
|
||||
|
||||
// Convert the linear index to subscripts for 4D material ID array
|
||||
int i_ID = (idx % ($NX_ID * $NY_ID * $NZ_ID)) / ($NY_ID * $NZ_ID);
|
||||
int j_ID = ((idx % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) / $NZ_ID;
|
||||
int k_ID = ((idx % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) % $NZ_ID;
|
||||
|
||||
// Hx component
|
||||
if (NX != 1 && i > 0 && i < NX && j >= 0 && j < NY && k >= 0 && k < NZ) {
|
||||
int materialHx = ID[INDEX4D_ID(3,i_ID,j_ID,k_ID)];
|
||||
Hx[INDEX3D_FIELDS(i,j,k)] = updatecoeffsH[INDEX2D_MAT(materialHx,0)] * Hx[INDEX3D_FIELDS(i,j,k)] - updatecoeffsH[INDEX2D_MAT(materialHx,2)] * (Ez[INDEX3D_FIELDS(i,j+1,k)] - Ez[INDEX3D_FIELDS(i,j,k)]) + updatecoeffsH[INDEX2D_MAT(materialHx,3)] * (Ey[INDEX3D_FIELDS(i,j,k+1)] - Ey[INDEX3D_FIELDS(i,j,k)]);
|
||||
}
|
||||
|
||||
// Hy component
|
||||
if (NY != 1 && i >= 0 && i < NX && j > 0 && j < NY && k >= 0 && k < NZ) {
|
||||
int materialHy = ID[INDEX4D_ID(4,i_ID,j_ID,k_ID)];
|
||||
Hy[INDEX3D_FIELDS(i,j,k)] = updatecoeffsH[INDEX2D_MAT(materialHy,0)] * Hy[INDEX3D_FIELDS(i,j,k)] - updatecoeffsH[INDEX2D_MAT(materialHy,3)] * (Ex[INDEX3D_FIELDS(i,j,k+1)] - Ex[INDEX3D_FIELDS(i,j,k)]) + updatecoeffsH[INDEX2D_MAT(materialHy,1)] * (Ez[INDEX3D_FIELDS(i+1,j,k)] - Ez[INDEX3D_FIELDS(i,j,k)]);
|
||||
}
|
||||
|
||||
// Hz component
|
||||
if (NZ != 1 && i >= 0 && i < NX && j >= 0 && j < NY && k > 0 && k < NZ) {
|
||||
int materialHz = ID[INDEX4D_ID(5,i_ID,j_ID,k_ID)];
|
||||
Hz[INDEX3D_FIELDS(i,j,k)] = updatecoeffsH[INDEX2D_MAT(materialHz,0)] * Hz[INDEX3D_FIELDS(i,j,k)] - updatecoeffsH[INDEX2D_MAT(materialHz,1)] * (Ey[INDEX3D_FIELDS(i+1,j,k)] - Ey[INDEX3D_FIELDS(i,j,k)]) + updatecoeffsH[INDEX2D_MAT(materialHz,2)] * (Ex[INDEX3D_FIELDS(i,j+1,k)] - Ex[INDEX3D_FIELDS(i,j,k)]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
// Electric field updates - dispersive materials //
|
||||
///////////////////////////////////////////////////
|
||||
|
||||
__global__ void update_electric_dispersive_A(int NX, int NY, int NZ, int MAXPOLES, const $COMPLEX* __restrict__ updatecoeffsdispersive, $COMPLEX *Tx, $COMPLEX *Ty, $COMPLEX *Tz, const unsigned int* __restrict__ ID, $REAL *Ex, $REAL *Ey, $REAL *Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz) {
|
||||
|
||||
// This function is part A of updates to electric field values when dispersive materials (with multiple poles) are present.
|
||||
//
|
||||
// Args:
|
||||
// NX, NY, NZ: Number of cells of the model domain
|
||||
// MAXPOLES: Maximum number of dispersive material poles present in model
|
||||
// updatedispersivecoeffs, T, ID, E, H: Access to update coefficients, dispersive, ID and field component arrays
|
||||
|
||||
// Obtain the linear index corresponding to the current thread
|
||||
int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
|
||||
// Convert the linear index to subscripts for 3D field arrays
|
||||
int i = idx / ($NY_FIELDS * $NZ_FIELDS);
|
||||
int j = (idx % ($NY_FIELDS * $NZ_FIELDS)) / $NZ_FIELDS;
|
||||
int k = (idx % ($NY_FIELDS * $NZ_FIELDS)) % $NZ_FIELDS;
|
||||
|
||||
// Convert the linear index to subscripts for 4D material ID array
|
||||
int i_ID = (idx % ($NX_ID * $NY_ID * $NZ_ID)) / ($NY_ID * $NZ_ID);
|
||||
int j_ID = ((idx % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) / $NZ_ID;
|
||||
int k_ID = ((idx % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) % $NZ_ID;
|
||||
|
||||
// Convert the linear index to subscripts for 4D dispersive array
|
||||
int i_T = (idx % ($NX_T * $NY_T * $NZ_T)) / ($NY_T * $NZ_T);
|
||||
int j_T = ((idx % ($NX_T * $NY_T * $NZ_T)) % ($NY_T * $NZ_T)) / $NZ_T;
|
||||
int k_T = ((idx % ($NX_T * $NY_T * $NZ_T)) % ($NY_T * $NZ_T)) % $NZ_T;
|
||||
|
||||
// Ex component
|
||||
if ((NY != 1 || NZ != 1) && i >= 0 && i < NX && j > 0 && j < NY && k > 0 && k < NZ) {
|
||||
int materialEx = ID[INDEX4D_ID(0,i_ID,j_ID,k_ID)];
|
||||
$REAL phi = 0;
|
||||
for (int pole = 0; pole < MAXPOLES; pole++) {
|
||||
phi = phi + updatecoeffsdispersive[INDEX2D_MATDISP(materialEx,pole*3)]$REALFUNC * Tx[INDEX4D_T(pole,i_T,j_T,k_T)]$REALFUNC;
|
||||
Tx[INDEX4D_T(pole,i_T,j_T,k_T)] = updatecoeffsdispersive[INDEX2D_MATDISP(materialEx,1+(pole*3))] * Tx[INDEX4D_T(pole,i_T,j_T,k_T)] + updatecoeffsdispersive[INDEX2D_MATDISP(materialEx,2+(pole*3))] * Ex[INDEX3D_FIELDS(i,j,k)];
|
||||
}
|
||||
Ex[INDEX3D_FIELDS(i,j,k)] = updatecoeffsE[INDEX2D_MAT(materialEx,0)] * Ex[INDEX3D_FIELDS(i,j,k)] + updatecoeffsE[INDEX2D_MAT(materialEx,2)] * (Hz[INDEX3D_FIELDS(i,j,k)] - Hz[INDEX3D_FIELDS(i,j-1,k)]) - updatecoeffsE[INDEX2D_MAT(materialEx,3)] * (Hy[INDEX3D_FIELDS(i,j,k)] - Hy[INDEX3D_FIELDS(i,j,k-1)]) - updatecoeffsE[INDEX2D_MAT(materialEx,4)] * phi;
|
||||
}
|
||||
|
||||
// Ey component
|
||||
if ((NX != 1 || NZ != 1) && i > 0 && i < NX && j >= 0 && j < NY && k > 0 && k < NZ) {
|
||||
int materialEy = ID[INDEX4D_ID(1,i_ID,j_ID,k_ID)];
|
||||
$REAL phi = 0;
|
||||
for (int pole = 0; pole < MAXPOLES; pole++) {
|
||||
phi = phi + updatecoeffsdispersive[INDEX2D_MATDISP(materialEy,pole*3)]$REALFUNC * Ty[INDEX4D_T(pole,i_T,j_T,k_T)]$REALFUNC;
|
||||
Ty[INDEX4D_T(pole,i_T,j_T,k_T)] = updatecoeffsdispersive[INDEX2D_MATDISP(materialEy,1+(pole*3))] * Ty[INDEX4D_T(pole,i_T,j_T,k_T)] + updatecoeffsdispersive[INDEX2D_MATDISP(materialEy,2+(pole*3))] * Ey[INDEX3D_FIELDS(i,j,k)];
|
||||
}
|
||||
Ey[INDEX3D_FIELDS(i,j,k)] = updatecoeffsE[INDEX2D_MAT(materialEy,0)] * Ey[INDEX3D_FIELDS(i,j,k)] + updatecoeffsE[INDEX2D_MAT(materialEy,3)] * (Hx[INDEX3D_FIELDS(i,j,k)] - Hx[INDEX3D_FIELDS(i,j,k-1)]) - updatecoeffsE[INDEX2D_MAT(materialEy,1)] * (Hz[INDEX3D_FIELDS(i,j,k)] - Hz[INDEX3D_FIELDS(i-1,j,k)]) - updatecoeffsE[INDEX2D_MAT(materialEy,4)] * phi;
|
||||
}
|
||||
|
||||
// Ez component
|
||||
if ((NX != 1 || NY != 1) && i > 0 && i < NX && j > 0 && j < NY && k >= 0 && k < NZ) {
|
||||
int materialEz = ID[INDEX4D_ID(2,i_ID,j_ID,k_ID)];
|
||||
$REAL phi = 0;
|
||||
for (int pole = 0; pole < MAXPOLES; pole++) {
|
||||
phi = phi + updatecoeffsdispersive[INDEX2D_MATDISP(materialEz,pole*3)]$REALFUNC * Tz[INDEX4D_T(pole,i_T,j_T,k_T)]$REALFUNC;
|
||||
Tz[INDEX4D_T(pole,i_T,j_T,k_T)] = updatecoeffsdispersive[INDEX2D_MATDISP(materialEz,1+(pole*3))] * Tz[INDEX4D_T(pole,i_T,j_T,k_T)] + updatecoeffsdispersive[INDEX2D_MATDISP(materialEz,2+(pole*3))] * Ez[INDEX3D_FIELDS(i,j,k)];
|
||||
}
|
||||
Ez[INDEX3D_FIELDS(i,j,k)] = updatecoeffsE[INDEX2D_MAT(materialEz,0)] * Ez[INDEX3D_FIELDS(i,j,k)] + updatecoeffsE[INDEX2D_MAT(materialEz,1)] * (Hy[INDEX3D_FIELDS(i,j,k)] - Hy[INDEX3D_FIELDS(i-1,j,k)]) - updatecoeffsE[INDEX2D_MAT(materialEz,2)] * (Hx[INDEX3D_FIELDS(i,j,k)] - Hx[INDEX3D_FIELDS(i,j-1,k)]) - updatecoeffsE[INDEX2D_MAT(materialEz,4)] * phi;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void update_electric_dispersive_B(int NX, int NY, int NZ, int MAXPOLES, const $COMPLEX* __restrict__ updatecoeffsdispersive, $COMPLEX *Tx, $COMPLEX *Ty, $COMPLEX *Tz, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez) {
|
||||
|
||||
// This function is part B which updates the dispersive field arrays when dispersive materials (with multiple poles) are present.
|
||||
//
|
||||
// Args:
|
||||
// NX, NY, NZ: Number of cells of the model domain
|
||||
// MAXPOLES: Maximum number of dispersive material poles present in model
|
||||
// updatedispersivecoeffs, T, ID, E, H: Access to update coefficients, dispersive, ID and field component arrays
|
||||
|
||||
// Obtain the linear index corresponding to the current thread
|
||||
int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
|
||||
// Convert the linear index to subscripts for 3D field arrays
|
||||
int i = idx / ($NY_FIELDS * $NZ_FIELDS);
|
||||
int j = (idx % ($NY_FIELDS * $NZ_FIELDS)) / $NZ_FIELDS;
|
||||
int k = (idx % ($NY_FIELDS * $NZ_FIELDS)) % $NZ_FIELDS;
|
||||
|
||||
// Convert the linear index to subscripts for 4D material ID array
|
||||
int i_ID = (idx % ($NX_ID * $NY_ID * $NZ_ID)) / ($NY_ID * $NZ_ID);
|
||||
int j_ID = ((idx % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) / $NZ_ID;
|
||||
int k_ID = ((idx % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) % $NZ_ID;
|
||||
|
||||
// Convert the linear index to subscripts for 4D dispersive array
|
||||
int i_T = (idx % ($NX_T * $NY_T * $NZ_T)) / ($NY_T * $NZ_T);
|
||||
int j_T = ((idx % ($NX_T * $NY_T * $NZ_T)) % ($NY_T * $NZ_T)) / $NZ_T;
|
||||
int k_T = ((idx % ($NX_T * $NY_T * $NZ_T)) % ($NY_T * $NZ_T)) % $NZ_T;
|
||||
|
||||
// Ex component
|
||||
if ((NY != 1 || NZ != 1) && i >= 0 && i < NX && j > 0 && j < NY && k > 0 && k < NZ) {
|
||||
int materialEx = ID[INDEX4D_ID(0,i_ID,j_ID,k_ID)];
|
||||
for (int pole = 0; pole < MAXPOLES; pole++) {
|
||||
Tx[INDEX4D_T(pole,i_T,j_T,k_T)] = Tx[INDEX4D_T(pole,i_T,j_T,k_T)] - updatecoeffsdispersive[INDEX2D_MATDISP(materialEx,2+(pole*3))] * Ex[INDEX3D_FIELDS(i,j,k)];
|
||||
}
|
||||
}
|
||||
|
||||
// Ey component
|
||||
if ((NX != 1 || NZ != 1) && i > 0 && i < NX && j >= 0 && j < NY && k > 0 && k < NZ) {
|
||||
int materialEy = ID[INDEX4D_ID(1,i_ID,j_ID,k_ID)];
|
||||
for (int pole = 0; pole < MAXPOLES; pole++) {
|
||||
Ty[INDEX4D_T(pole,i_T,j_T,k_T)] = Ty[INDEX4D_T(pole,i_T,j_T,k_T)] - updatecoeffsdispersive[INDEX2D_MATDISP(materialEy,2+(pole*3))] * Ey[INDEX3D_FIELDS(i,j,k)];
|
||||
}
|
||||
}
|
||||
|
||||
// Ez component
|
||||
if ((NX != 1 || NY != 1) && i > 0 && i < NX && j > 0 && j < NY && k >= 0 && k < NZ) {
|
||||
int materialEz = ID[INDEX4D_ID(2,i_ID,j_ID,k_ID)];
|
||||
for (int pole = 0; pole < MAXPOLES; pole++) {
|
||||
Tz[INDEX4D_T(pole,i_T,j_T,k_T)] = Tz[INDEX4D_T(pole,i_T,j_T,k_T)] - updatecoeffsdispersive[INDEX2D_MATDISP(materialEz,2+(pole*3))] * Ez[INDEX3D_FIELDS(i,j,k)];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
""")
|
@@ -18,7 +18,7 @@
|
||||
|
||||
from string import Template
|
||||
|
||||
kernel_template_store_snapshot = Template("""
|
||||
knl_template_store_snapshot = Template("""
|
||||
|
||||
// Macros for converting subscripts to linear index:
|
||||
#define INDEX3D_FIELDS(i, j, k) (i)*($NY_FIELDS)*($NZ_FIELDS)+(j)*($NZ_FIELDS)+(k)
|
||||
|
@@ -18,7 +18,7 @@
|
||||
|
||||
from string import Template
|
||||
|
||||
kernel_template_sources = Template("""
|
||||
knl_template_sources = Template("""
|
||||
|
||||
// Macros for converting subscripts to linear index:
|
||||
#define INDEX2D_MAT(m, n) (m)*($NY_MATCOEFFS)+(n)
|
||||
|
@@ -0,0 +1,276 @@
|
||||
// Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom
|
||||
// Authors: Craig Warren, Antonis Giannopoulos, and John Hartley
|
||||
//
|
||||
// This file is part of gprMax.
|
||||
//
|
||||
// gprMax is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// gprMax is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with gprMax. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
{% block complex_header %}{% endblock complex_header %}
|
||||
|
||||
// Macros for converting subscripts to linear index:
|
||||
#define IDX2D_MAT(m, n) (m)*({{NY_MATCOEFFS}})+(n)
|
||||
#define IDX2D_MATDISP(m, n) (m)*({{NY_MATDISPCOEFFS}})+(n)
|
||||
#define IDX3D_FIELDS(i, j, k) (i)*({{NY_FIELDS}})*({{NZ_FIELDS}})+(j)*({{NZ_FIELDS}})+(k)
|
||||
#define IDX4D_ID(p, i, j, k) (p)*({{NX_ID}})*({{NY_ID}})*({{NZ_ID}})+(i)*({{NY_ID}})*({{NZ_ID}})+(j)*({{NZ_ID}})+(k)
|
||||
#define IDX4D_T(p, i, j, k) (p)*({{NX_T}})*({{NY_T}})*({{NZ_T}})+(i)*({{NY_T}})*({{NZ_T}})+(j)*({{NZ_T}})+(k)
|
||||
|
||||
// Material coefficients (read-only) stored in constant memory of compute device
|
||||
{% block constmem %}{% endblock constmem %}
|
||||
|
||||
///////////////////////////////////////////////
|
||||
// Electric field updates - normal materials //
|
||||
///////////////////////////////////////////////
|
||||
|
||||
{{KERNEL}} void update_electric(int NX,
|
||||
int NY,
|
||||
int NZ,{% filter indent(width=30) %}{% block electric_args %}{% endblock electric_args %}{% endfilter %}{
|
||||
|
||||
// This function updates electric field values.
|
||||
//
|
||||
// Args:
|
||||
// NX, NY, NZ: Number of cells of the model domain
|
||||
// ID, E, H: Access to ID and field component arrays
|
||||
|
||||
// Obtain the linear index corresponding to the current thread
|
||||
{% block threadidx %}{% endblock threadidx %}
|
||||
|
||||
// Convert the linear index to subscripts for 3D field arrays
|
||||
int i = idx / ({{NY_FIELDS}} * {{NZ_FIELDS}});
|
||||
int j = (idx % ({{NY_FIELDS}} * {{NZ_FIELDS}})) / {{NZ_FIELDS}};
|
||||
int k = (idx % ({{NY_FIELDS}} * {{NZ_FIELDS}})) % {{NZ_FIELDS}};
|
||||
|
||||
// Convert the linear index to subscripts for 4D material ID array
|
||||
int i_ID = (idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) / ({{NY_ID}} * {{NZ_ID}});
|
||||
int j_ID = ((idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) / {{NZ_ID}};
|
||||
int k_ID = ((idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) % {{NZ_ID}};
|
||||
|
||||
// Ex component
|
||||
if ((NY != 1 || NZ != 1) && i >= 0 && i < NX && j > 0 && j < NY && k > 0 && k < NZ) {
|
||||
int materialEx = ID[IDX4D_ID(0,i_ID,j_ID,k_ID)];
|
||||
Ex[IDX3D_FIELDS(i,j,k)] = updatecoeffsE[IDX2D_MAT(materialEx,0)] * Ex[IDX3D_FIELDS(i,j,k)] +
|
||||
updatecoeffsE[IDX2D_MAT(materialEx,2)] * (Hz[IDX3D_FIELDS(i,j,k)] - Hz[IDX3D_FIELDS(i,j-1,k)]) -
|
||||
updatecoeffsE[IDX2D_MAT(materialEx,3)] * (Hy[IDX3D_FIELDS(i,j,k)] - Hy[IDX3D_FIELDS(i,j,k-1)]);
|
||||
}
|
||||
|
||||
// Ey component
|
||||
if ((NX != 1 || NZ != 1) && i > 0 && i < NX && j >= 0 && j < NY && k > 0 && k < NZ) {
|
||||
int materialEy = ID[IDX4D_ID(1,i_ID,j_ID,k_ID)];
|
||||
Ey[IDX3D_FIELDS(i,j,k)] = updatecoeffsE[IDX2D_MAT(materialEy,0)] * Ey[IDX3D_FIELDS(i,j,k)] +
|
||||
updatecoeffsE[IDX2D_MAT(materialEy,3)] * (Hx[IDX3D_FIELDS(i,j,k)] - Hx[IDX3D_FIELDS(i,j,k-1)]) -
|
||||
updatecoeffsE[IDX2D_MAT(materialEy,1)] * (Hz[IDX3D_FIELDS(i,j,k)] - Hz[IDX3D_FIELDS(i-1,j,k)]);
|
||||
}
|
||||
|
||||
// Ez component
|
||||
if ((NX != 1 || NY != 1) && i > 0 && i < NX && j > 0 && j < NY && k >= 0 && k < NZ) {
|
||||
int materialEz = ID[IDX4D_ID(2,i_ID,j_ID,k_ID)];
|
||||
Ez[IDX3D_FIELDS(i,j,k)] = updatecoeffsE[IDX2D_MAT(materialEz,0)] * Ez[IDX3D_FIELDS(i,j,k)] +
|
||||
updatecoeffsE[IDX2D_MAT(materialEz,1)] * (Hy[IDX3D_FIELDS(i,j,k)] - Hy[IDX3D_FIELDS(i-1,j,k)]) -
|
||||
updatecoeffsE[IDX2D_MAT(materialEz,2)] * (Hx[IDX3D_FIELDS(i,j,k)] - Hx[IDX3D_FIELDS(i,j-1,k)]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////
|
||||
// Magnetic field updates //
|
||||
////////////////////////////
|
||||
|
||||
{{KERNEL}} void update_magnetic(int NX,
|
||||
int NY,
|
||||
int NZ,{% filter indent(width=30) %}{% block magnetic_args %}{% endblock magnetic_args %}{% endfilter %}{
|
||||
|
||||
// This function updates magnetic field values.
|
||||
//
|
||||
// Args:
|
||||
// NX, NY, NZ: Number of cells of the model domain
|
||||
// ID, E, H: Access to ID and field component arrays
|
||||
|
||||
// Obtain the linear index corresponding to the current thread
|
||||
{{self.threadidx()}}
|
||||
|
||||
// Convert the linear index to subscripts for 3D field arrays
|
||||
int i = idx / ({{NY_FIELDS}} * {{NZ_FIELDS}});
|
||||
int j = (idx % ({{NY_FIELDS}} * {{NZ_FIELDS}})) / {{NZ_FIELDS}};
|
||||
int k = (idx % ({{NY_FIELDS}} * {{NZ_FIELDS}})) % {{NZ_FIELDS}};
|
||||
|
||||
// Convert the linear index to subscripts for 4D material ID array
|
||||
int i_ID = (idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) / ({{NY_ID}} * {{NZ_ID}});
|
||||
int j_ID = ((idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) / {{NZ_ID}};
|
||||
int k_ID = ((idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) % {{NZ_ID}};
|
||||
|
||||
// Hx component
|
||||
if (NX != 1 && i > 0 && i < NX && j >= 0 && j < NY && k >= 0 && k < NZ) {
|
||||
int materialHx = ID[IDX4D_ID(3,i_ID,j_ID,k_ID)];
|
||||
Hx[IDX3D_FIELDS(i,j,k)] = updatecoeffsH[IDX2D_MAT(materialHx,0)] * Hx[IDX3D_FIELDS(i,j,k)] -
|
||||
updatecoeffsH[IDX2D_MAT(materialHx,2)] * (Ez[IDX3D_FIELDS(i,j+1,k)] - Ez[IDX3D_FIELDS(i,j,k)]) +
|
||||
updatecoeffsH[IDX2D_MAT(materialHx,3)] * (Ey[IDX3D_FIELDS(i,j,k+1)] - Ey[IDX3D_FIELDS(i,j,k)]);
|
||||
}
|
||||
|
||||
// Hy component
|
||||
if (NY != 1 && i >= 0 && i < NX && j > 0 && j < NY && k >= 0 && k < NZ) {
|
||||
int materialHy = ID[IDX4D_ID(4,i_ID,j_ID,k_ID)];
|
||||
Hy[IDX3D_FIELDS(i,j,k)] = updatecoeffsH[IDX2D_MAT(materialHy,0)] * Hy[IDX3D_FIELDS(i,j,k)] -
|
||||
updatecoeffsH[IDX2D_MAT(materialHy,3)] * (Ex[IDX3D_FIELDS(i,j,k+1)] - Ex[IDX3D_FIELDS(i,j,k)]) +
|
||||
updatecoeffsH[IDX2D_MAT(materialHy,1)] * (Ez[IDX3D_FIELDS(i+1,j,k)] - Ez[IDX3D_FIELDS(i,j,k)]);
|
||||
}
|
||||
|
||||
// Hz component
|
||||
if (NZ != 1 && i >= 0 && i < NX && j >= 0 && j < NY && k > 0 && k < NZ) {
|
||||
int materialHz = ID[IDX4D_ID(5,i_ID,j_ID,k_ID)];
|
||||
Hz[IDX3D_FIELDS(i,j,k)] = updatecoeffsH[IDX2D_MAT(materialHz,0)] * Hz[IDX3D_FIELDS(i,j,k)] -
|
||||
updatecoeffsH[IDX2D_MAT(materialHz,1)] * (Ey[IDX3D_FIELDS(i+1,j,k)] - Ey[IDX3D_FIELDS(i,j,k)]) +
|
||||
updatecoeffsH[IDX2D_MAT(materialHz,2)] * (Ex[IDX3D_FIELDS(i,j+1,k)] - Ex[IDX3D_FIELDS(i,j,k)]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
// Electric field updates - dispersive materials //
|
||||
///////////////////////////////////////////////////
|
||||
|
||||
{{KERNEL}} void update_electric_dispersive_A(int NX,
|
||||
int NY,
|
||||
int NZ,
|
||||
int MAXPOLES,{% filter indent(width=43) %}{% block electric_dispersive_A_args %}{% endblock electric_dispersive_A_args %}{% endfilter %}{
|
||||
|
||||
// This function is part A of updates to electric field values when
|
||||
// dispersive materials (with multiple poles) are present.
|
||||
//
|
||||
// Args:
|
||||
// NX, NY, NZ: Number of cells of the model domain
|
||||
// MAXPOLES: Maximum number of dispersive material poles present in model
|
||||
// updatedispersivecoeffs, T, ID, E, H: Access to update coefficients, dispersive, ID and field component arrays
|
||||
|
||||
// Obtain the linear index corresponding to the current thread
|
||||
{{self.threadidx()}}
|
||||
|
||||
// Convert the linear index to subscripts for 3D field arrays
|
||||
int i = idx / ({{NY_FIELDS}} * {{NZ_FIELDS}});
|
||||
int j = (idx % ({{NY_FIELDS}} * {{NZ_FIELDS}})) / {{NZ_FIELDS}};
|
||||
int k = (idx % ({{NY_FIELDS}} * {{NZ_FIELDS}})) % {{NZ_FIELDS}};
|
||||
|
||||
// Convert the linear index to subscripts for 4D material ID array
|
||||
int i_ID = (idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) / ({{NY_ID}} * {{NZ_ID}});
|
||||
int j_ID = ((idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) / {{NZ_ID}};
|
||||
int k_ID = ((idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) % {{NZ_ID}};
|
||||
|
||||
// Convert the linear index to subscripts for 4D dispersive array
|
||||
int i_T = (idx % ({{NX_T}} * {{NY_T}} * {{NZ_T}})) / ({{NY_T}} * {{NZ_T}});
|
||||
int j_T = ((idx % ({{NX_T}} * {{NY_T}} * {{NZ_T}})) % ({{NY_T}} * {{NZ_T}})) / {{NZ_T}};
|
||||
int k_T = ((idx % ({{NX_T}} * {{NY_T}} * {{NZ_T}})) % ({{NY_T}} * {{NZ_T}})) % {{NZ_T}};
|
||||
|
||||
// Ex component
|
||||
if ((NY != 1 || NZ != 1) && i >= 0 && i < NX && j > 0 && j < NY && k > 0 && k < NZ) {
|
||||
int materialEx = ID[IDX4D_ID(0,i_ID,j_ID,k_ID)];
|
||||
{{REAL}} phi = 0;
|
||||
for (int pole = 0; pole < MAXPOLES; pole++) {
|
||||
phi = phi + updatecoeffsdispersive[IDX2D_MATDISP(materialEx,pole*3)]{{REALFUNC}} * Tx[IDX4D_T(pole,i_T,j_T,k_T)]{{REALFUNC}};
|
||||
Tx[IDX4D_T(pole,i_T,j_T,k_T)] = updatecoeffsdispersive[IDX2D_MATDISP(materialEx,1+(pole*3))] * Tx[IDX4D_T(pole,i_T,j_T,k_T)] +
|
||||
updatecoeffsdispersive[IDX2D_MATDISP(materialEx,2+(pole*3))] * Ex[IDX3D_FIELDS(i,j,k)];
|
||||
}
|
||||
Ex[IDX3D_FIELDS(i,j,k)] = updatecoeffsE[IDX2D_MAT(materialEx,0)] * Ex[IDX3D_FIELDS(i,j,k)] +
|
||||
updatecoeffsE[IDX2D_MAT(materialEx,2)] * (Hz[IDX3D_FIELDS(i,j,k)] - Hz[IDX3D_FIELDS(i,j-1,k)]) -
|
||||
updatecoeffsE[IDX2D_MAT(materialEx,3)] * (Hy[IDX3D_FIELDS(i,j,k)] - Hy[IDX3D_FIELDS(i,j,k-1)]) -
|
||||
updatecoeffsE[IDX2D_MAT(materialEx,4)] * phi;
|
||||
}
|
||||
|
||||
// Ey component
|
||||
if ((NX != 1 || NZ != 1) && i > 0 && i < NX && j >= 0 && j < NY && k > 0 && k < NZ) {
|
||||
int materialEy = ID[IDX4D_ID(1,i_ID,j_ID,k_ID)];
|
||||
{{REAL}} phi = 0;
|
||||
for (int pole = 0; pole < MAXPOLES; pole++) {
|
||||
phi = phi + updatecoeffsdispersive[IDX2D_MATDISP(materialEy,pole*3)]{{REALFUNC}} * Ty[IDX4D_T(pole,i_T,j_T,k_T)]{{REALFUNC}};
|
||||
Ty[IDX4D_T(pole,i_T,j_T,k_T)] = updatecoeffsdispersive[IDX2D_MATDISP(materialEy,1+(pole*3))] * Ty[IDX4D_T(pole,i_T,j_T,k_T)] +
|
||||
updatecoeffsdispersive[IDX2D_MATDISP(materialEy,2+(pole*3))] * Ey[IDX3D_FIELDS(i,j,k)];
|
||||
}
|
||||
Ey[IDX3D_FIELDS(i,j,k)] = updatecoeffsE[IDX2D_MAT(materialEy,0)] * Ey[IDX3D_FIELDS(i,j,k)] +
|
||||
updatecoeffsE[IDX2D_MAT(materialEy,3)] * (Hx[IDX3D_FIELDS(i,j,k)] - Hx[IDX3D_FIELDS(i,j,k-1)]) -
|
||||
updatecoeffsE[IDX2D_MAT(materialEy,1)] * (Hz[IDX3D_FIELDS(i,j,k)] - Hz[IDX3D_FIELDS(i-1,j,k)]) -
|
||||
updatecoeffsE[IDX2D_MAT(materialEy,4)] * phi;
|
||||
}
|
||||
|
||||
// Ez component
|
||||
if ((NX != 1 || NY != 1) && i > 0 && i < NX && j > 0 && j < NY && k >= 0 && k < NZ) {
|
||||
int materialEz = ID[IDX4D_ID(2,i_ID,j_ID,k_ID)];
|
||||
{{REAL}} phi = 0;
|
||||
for (int pole = 0; pole < MAXPOLES; pole++) {
|
||||
phi = phi + updatecoeffsdispersive[IDX2D_MATDISP(materialEz,pole*3)]{{REALFUNC}} * Tz[IDX4D_T(pole,i_T,j_T,k_T)]{{REALFUNC}};
|
||||
Tz[IDX4D_T(pole,i_T,j_T,k_T)] = updatecoeffsdispersive[IDX2D_MATDISP(materialEz,1+(pole*3))] * Tz[IDX4D_T(pole,i_T,j_T,k_T)] +
|
||||
updatecoeffsdispersive[IDX2D_MATDISP(materialEz,2+(pole*3))] * Ez[IDX3D_FIELDS(i,j,k)];
|
||||
}
|
||||
Ez[IDX3D_FIELDS(i,j,k)] = updatecoeffsE[IDX2D_MAT(materialEz,0)] * Ez[IDX3D_FIELDS(i,j,k)] +
|
||||
updatecoeffsE[IDX2D_MAT(materialEz,1)] * (Hy[IDX3D_FIELDS(i,j,k)] - Hy[IDX3D_FIELDS(i-1,j,k)]) -
|
||||
updatecoeffsE[IDX2D_MAT(materialEz,2)] * (Hx[IDX3D_FIELDS(i,j,k)] - Hx[IDX3D_FIELDS(i,j-1,k)]) -
|
||||
updatecoeffsE[IDX2D_MAT(materialEz,4)] * phi;
|
||||
}
|
||||
}
|
||||
|
||||
{{KERNEL}} void update_electric_dispersive_B(int NX,
|
||||
int NY,
|
||||
int NZ,
|
||||
int MAXPOLES,{% filter indent(width=43) %}{% block electric_dispersive_B_args %}{% endblock electric_dispersive_B_args %}{% endfilter %}{
|
||||
|
||||
// This function is part B which updates the dispersive field arrays when
|
||||
// dispersive materials (with multiple poles) are present.
|
||||
//
|
||||
// Args:
|
||||
// NX, NY, NZ: Number of cells of the model domain
|
||||
// MAXPOLES: Maximum number of dispersive material poles present in model
|
||||
// updatedispersivecoeffs, T, ID, E, H: Access to update coefficients, dispersive, ID and field component arrays
|
||||
|
||||
// Obtain the linear index corresponding to the current thread
|
||||
{{self.threadidx()}}
|
||||
|
||||
// Convert the linear index to subscripts for 3D field arrays
|
||||
int i = idx / ({{NY_FIELDS}} * {{NZ_FIELDS}});
|
||||
int j = (idx % ({{NY_FIELDS}} * {{NZ_FIELDS}})) / {{NZ_FIELDS}};
|
||||
int k = (idx % ({{NY_FIELDS}} * {{NZ_FIELDS}})) % {{NZ_FIELDS}};
|
||||
|
||||
// Convert the linear index to subscripts for 4D material ID array
|
||||
int i_ID = (idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) / ({{NY_ID}} * {{NZ_ID}});
|
||||
int j_ID = ((idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) / {{NZ_ID}};
|
||||
int k_ID = ((idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) % {{NZ_ID}};
|
||||
|
||||
// Convert the linear index to subscripts for 4D dispersive array
|
||||
int i_T = (idx % ({{NX_T}} * {{NY_T}} * {{NZ_T}})) / ({{NY_T}} * {{NZ_T}});
|
||||
int j_T = ((idx % ({{NX_T}} * {{NY_T}} * {{NZ_T}})) % ({{NY_T}} * {{NZ_T}})) / {{NZ_T}};
|
||||
int k_T = ((idx % ({{NX_T}} * {{NY_T}} * {{NZ_T}})) % ({{NY_T}} * {{NZ_T}})) % {{NZ_T}};
|
||||
|
||||
// Ex component
|
||||
if ((NY != 1 || NZ != 1) && i >= 0 && i < NX && j > 0 && j < NY && k > 0 && k < NZ) {
|
||||
int materialEx = ID[IDX4D_ID(0,i_ID,j_ID,k_ID)];
|
||||
for (int pole = 0; pole < MAXPOLES; pole++) {
|
||||
Tx[IDX4D_T(pole,i_T,j_T,k_T)] = Tx[IDX4D_T(pole,i_T,j_T,k_T)] -
|
||||
updatecoeffsdispersive[IDX2D_MATDISP(materialEx,2+(pole*3))] * Ex[IDX3D_FIELDS(i,j,k)];
|
||||
}
|
||||
}
|
||||
|
||||
// Ey component
|
||||
if ((NX != 1 || NZ != 1) && i > 0 && i < NX && j >= 0 && j < NY && k > 0 && k < NZ) {
|
||||
int materialEy = ID[IDX4D_ID(1,i_ID,j_ID,k_ID)];
|
||||
for (int pole = 0; pole < MAXPOLES; pole++) {
|
||||
Ty[IDX4D_T(pole,i_T,j_T,k_T)] = Ty[IDX4D_T(pole,i_T,j_T,k_T)] -
|
||||
updatecoeffsdispersive[IDX2D_MATDISP(materialEy,2+(pole*3))] * Ey[IDX3D_FIELDS(i,j,k)];
|
||||
}
|
||||
}
|
||||
|
||||
// Ez component
|
||||
if ((NX != 1 || NY != 1) && i > 0 && i < NX && j > 0 && j < NY && k >= 0 && k < NZ) {
|
||||
int materialEz = ID[IDX4D_ID(2,i_ID,j_ID,k_ID)];
|
||||
for (int pole = 0; pole < MAXPOLES; pole++) {
|
||||
Tz[IDX4D_T(pole,i_T,j_T,k_T)] = Tz[IDX4D_T(pole,i_T,j_T,k_T)] -
|
||||
updatecoeffsdispersive[IDX2D_MATDISP(materialEz,2+(pole*3))] * Ez[IDX3D_FIELDS(i,j,k)];
|
||||
}
|
||||
}
|
||||
}
|
@@ -0,0 +1,65 @@
|
||||
{% extends "fields_updates_base.tmpl" %}
|
||||
|
||||
{% block complex_header %}
|
||||
#include <pycuda-complex.hpp>
|
||||
{% endblock complex_header %}
|
||||
|
||||
|
||||
{% block constmem %}
|
||||
__device__ __constant__ {{REAL}} updatecoeffsE[{{N_updatecoeffsE}}];
|
||||
__device__ __constant__ {{REAL}} updatecoeffsH[{{N_updatecoeffsH}}];
|
||||
{% endblock constmem %}
|
||||
|
||||
|
||||
{% block threadidx %}
|
||||
int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
{% endblock threadidx %}
|
||||
|
||||
|
||||
{% block electric_args %}
|
||||
const unsigned int* __restrict__ ID,
|
||||
{{REAL}} *Ex,
|
||||
{{REAL}} *Ey,
|
||||
{{REAL}} *Ez,
|
||||
const {{REAL}}* __restrict__ Hx,
|
||||
const {{REAL}}* __restrict__ Hy,
|
||||
const {{REAL}}* __restrict__ Hz)
|
||||
{% endblock electric_args %}
|
||||
|
||||
|
||||
{% block magnetic_args %}
|
||||
const unsigned int* __restrict__ ID,
|
||||
{{REAL}} *Hx,
|
||||
{{REAL}} *Hy,
|
||||
{{REAL}} *Hz,
|
||||
const {{REAL}}* __restrict__ Ex,
|
||||
const {{REAL}}* __restrict__ Ey,
|
||||
const {{REAL}}* __restrict__ Ez)
|
||||
{% endblock magnetic_args %}
|
||||
|
||||
|
||||
{% block electric_dispersive_A_args %}
|
||||
const {{COMPLEX}}* __restrict__ updatecoeffsdispersive,
|
||||
{{COMPLEX}} *Tx,
|
||||
{{COMPLEX}} *Ty,
|
||||
{{COMPLEX}} *Tz,
|
||||
const unsigned int* __restrict__ ID,
|
||||
{{REAL}} *Ex,
|
||||
{{REAL}} *Ey,
|
||||
{{REAL}} *Ez,
|
||||
const {{REAL}}* __restrict__ Hx,
|
||||
const {{REAL}}* __restrict__ Hy,
|
||||
const {{REAL}}* __restrict__ Hz)
|
||||
{% endblock electric_dispersive_A_args %}
|
||||
|
||||
|
||||
{% block electric_dispersive_B_args %}
|
||||
const {{COMPLEX}}* __restrict__ updatecoeffsdispersive,
|
||||
{{COMPLEX}} *Tx,
|
||||
{{COMPLEX}} *Ty,
|
||||
{{COMPLEX}} *Tz,
|
||||
const unsigned int* __restrict__ ID,
|
||||
const {{REAL}}* __restrict__ Ex,
|
||||
const {{REAL}}* __restrict__ Ey,
|
||||
const {{REAL}}* __restrict__ Ez)
|
||||
{% endblock electric_dispersive_B_args %}
|
@@ -0,0 +1,77 @@
|
||||
{% extends "fields_updates_base.tmpl" %}
|
||||
|
||||
{% block complex_header %}
|
||||
#include <pyopencl-complex.h>
|
||||
{% endblock complex_header %}
|
||||
|
||||
|
||||
{% block constmem %}
|
||||
__constant {{REAL}} updatecoeffsE[{{N_updatecoeffsE}}] =
|
||||
{
|
||||
{% for i in updatecoeffsE %}
|
||||
{{i}},
|
||||
{% endfor %}
|
||||
};
|
||||
|
||||
__constant {{REAL}} updatecoeffsH[{{N_updatecoeffsH}}] =
|
||||
{
|
||||
{% for i in updatecoeffsH %}
|
||||
{{i}},
|
||||
{% endfor %}
|
||||
};
|
||||
{% endblock constmem %}
|
||||
|
||||
|
||||
{% block threadidx %}
|
||||
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) +
|
||||
get_global_id(1) * get_global_size(0) + get_global_id(0);
|
||||
{% endblock threadidx %}
|
||||
|
||||
|
||||
{% block electric_args %}
|
||||
__global const unsigned int* restrict ID,
|
||||
__global {{REAL}} *Ex,
|
||||
__global {{REAL}} *Ey,
|
||||
__global {{REAL}} *Ez,
|
||||
__global const {{REAL}} * restrict Hx,
|
||||
__global const {{REAL}} * restrict Hy,
|
||||
__global const {{REAL}} * restrict Hz)
|
||||
{% endblock electric_args %}
|
||||
|
||||
|
||||
{% block magnetic_args %}
|
||||
__global const unsigned int* restrict ID,
|
||||
__global {{REAL}} *Hx,
|
||||
__global {{REAL}} *Hy,
|
||||
__global {{REAL}} *Hz,
|
||||
__global const {{REAL}}* restrict Ex,
|
||||
__global const {{REAL}}* restrict Ey,
|
||||
__global const {{REAL}}* restrict Ez)
|
||||
{% endblock magnetic_args %}
|
||||
|
||||
|
||||
{% block electric_dispersive_A_args %}
|
||||
__global const {{COMPLEX}}* restrict updatecoeffsdispersive,
|
||||
__global {{COMPLEX}} *Tx,
|
||||
__global {{COMPLEX}} *Ty,
|
||||
__global {{COMPLEX}} *Tz,
|
||||
__global const unsigned int* restrict ID,
|
||||
__global {{REAL}} *Ex,
|
||||
__global {{REAL}} *Ey,
|
||||
__global {{REAL}} *Ez,
|
||||
__global const {{REAL}}* restrict Hx,
|
||||
__global const {{REAL}}* restrict Hy,
|
||||
__global const {{REAL}}* restrict Hz)
|
||||
{% endblock electric_dispersive_A_args %}
|
||||
|
||||
|
||||
{% block electric_dispersive_B_args %}
|
||||
__global const {{COMPLEX}}* restrict updatecoeffsdispersive,
|
||||
__global {{COMPLEX}} *Tx,
|
||||
__global {{COMPLEX}} *Ty,
|
||||
__global {{COMPLEX}} *Tz,
|
||||
__global const unsigned int* restrict ID,
|
||||
__global const {{REAL}}* restrict Ex,
|
||||
__global const {{REAL}}* restrict Ey,
|
||||
__global const {{REAL}}* restrict Ez)
|
||||
{% endblock electric_dispersive_B_args %}
|
文件差异内容过多而无法显示
加载差异
@@ -0,0 +1,62 @@
|
||||
{% extends "pml_updates_electric_HORIPML_base.tmpl" %}
|
||||
|
||||
|
||||
{% block constmem %}
|
||||
__device__ __constant__ {{REAL}} updatecoeffsE[{{N_updatecoeffsE}}];
|
||||
{% endblock constmem %}
|
||||
|
||||
|
||||
{% block threadidx %}
|
||||
int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
{% endblock threadidx %}
|
||||
|
||||
|
||||
{% block x_args %}
|
||||
const unsigned int* __restrict__ ID,
|
||||
const {{REAL}}* __restrict__ Ex,
|
||||
{{REAL}} *Ey,
|
||||
{{REAL}} *Ez,
|
||||
const {{REAL}}* __restrict__ Hx,
|
||||
const {{REAL}}* __restrict__ Hy,
|
||||
const {{REAL}}* __restrict__ Hz,
|
||||
{{REAL}} *PHI1,
|
||||
{{REAL}} *PHI2,
|
||||
const {{REAL}}* __restrict__ RA,
|
||||
const {{REAL}}* __restrict__ RB,
|
||||
const {{REAL}}* __restrict__ RE,
|
||||
const {{REAL}}* __restrict__ RF,
|
||||
{% endblock x_args %}
|
||||
|
||||
|
||||
{% block y_args %}
|
||||
const unsigned int* __restrict__ ID,
|
||||
{{REAL}} *Ex,
|
||||
const {{REAL}}* __restrict__ Ey,
|
||||
{{REAL}} *Ez,
|
||||
const {{REAL}}* __restrict__ Hx,
|
||||
const {{REAL}}* __restrict__ Hy,
|
||||
const {{REAL}}* __restrict__ Hz,
|
||||
{{REAL}} *PHI1,
|
||||
{{REAL}} *PHI2,
|
||||
const {{REAL}}* __restrict__ RA,
|
||||
const {{REAL}}* __restrict__ RB,
|
||||
const {{REAL}}* __restrict__ RE,
|
||||
const {{REAL}}* __restrict__ RF,
|
||||
{% endblock y_args %}
|
||||
|
||||
|
||||
{% block z_args %}
|
||||
const unsigned int* __restrict__ ID,
|
||||
{{REAL}} *Ex,
|
||||
{{REAL}} *Ey,
|
||||
const {{REAL}}* __restrict__ Ez,
|
||||
const {{REAL}}* __restrict__ Hx,
|
||||
const {{REAL}}* __restrict__ Hy,
|
||||
const {{REAL}}* __restrict__ Hz,
|
||||
{{REAL}} *PHI1,
|
||||
{{REAL}} *PHI2,
|
||||
const {{REAL}}* __restrict__ RA,
|
||||
const {{REAL}}* __restrict__ RB,
|
||||
const {{REAL}}* __restrict__ RE,
|
||||
const {{REAL}}* __restrict__ RF,
|
||||
{% endblock z_args %}
|
@@ -0,0 +1,68 @@
|
||||
{% extends "pml_updates_electric_HORIPML_base.tmpl" %}
|
||||
|
||||
|
||||
{% block constmem %}
|
||||
__constant {{REAL}} updatecoeffsE[{{N_updatecoeffsE}}] =
|
||||
{
|
||||
{% for i in updatecoeffsE %}
|
||||
{{i}},
|
||||
{% endfor %}
|
||||
};
|
||||
{% endblock constmem %}
|
||||
|
||||
|
||||
{% block threadidx %}
|
||||
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) +
|
||||
get_global_id(1) * get_global_size(0) + get_global_id(0);
|
||||
{% endblock threadidx %}
|
||||
|
||||
|
||||
{% block x_args %}
|
||||
__global const unsigned int* restrict ID,
|
||||
__global const {{REAL}}* restrict Ex,
|
||||
__global {{REAL}} *Ey,
|
||||
__global {{REAL}} *Ez,
|
||||
__global const {{REAL}}* restrict Hx,
|
||||
__global const {{REAL}}* restrict Hy,
|
||||
__global const {{REAL}}* restrict Hz,
|
||||
__global {{REAL}} *PHI1,
|
||||
__global {{REAL}} *PHI2,
|
||||
__global const {{REAL}}* restrict RA,
|
||||
__global const {{REAL}}* restrict RB,
|
||||
__global const {{REAL}}* restrict RE,
|
||||
__global const {{REAL}}* restrict RF,
|
||||
{% endblock x_args %}
|
||||
|
||||
|
||||
{% block y_args %}
|
||||
__global const unsigned int* restrict ID,
|
||||
__global {{REAL}} *Ex,
|
||||
__global const {{REAL}}* restrict Ey,
|
||||
__global {{REAL}} *Ez,
|
||||
__global const {{REAL}}* restrict Hx,
|
||||
__global const {{REAL}}* restrict Hy,
|
||||
__global const {{REAL}}* restrict Hz,
|
||||
__global {{REAL}} *PHI1,
|
||||
__global {{REAL}} *PHI2,
|
||||
__global const {{REAL}}* restrict RA,
|
||||
__global const {{REAL}}* restrict RB,
|
||||
__global const {{REAL}}* restrict RE,
|
||||
__global const {{REAL}}* restrict RF,
|
||||
{% endblock y_args %}
|
||||
|
||||
|
||||
{% block z_args %}
|
||||
__global const unsigned int* restrict ID,
|
||||
__global {{REAL}} *Ex,
|
||||
__global {{REAL}} *Ey,
|
||||
__global const {{REAL}}* restrict Ez,
|
||||
__global const {{REAL}}* restrict Hx,
|
||||
__global const {{REAL}}* restrict Hy,
|
||||
__global const {{REAL}}* restrict Hz,
|
||||
__global {{REAL}} *PHI1,
|
||||
__global {{REAL}} *PHI2,
|
||||
__global const {{REAL}}* restrict RA,
|
||||
__global const {{REAL}}* restrict RB,
|
||||
__global const {{REAL}}* restrict RE,
|
||||
__global const {{REAL}}* restrict RF,
|
||||
{% endblock z_args %}
|
文件差异内容过多而无法显示
加载差异
@@ -0,0 +1,62 @@
|
||||
{% extends "pml_updates_magnetic_HORIPML_base.tmpl" %}
|
||||
|
||||
|
||||
{% block constmem %}
|
||||
__device__ __constant__ {{REAL}} updatecoeffsH[{{N_updatecoeffsH}}];
|
||||
{% endblock constmem %}
|
||||
|
||||
|
||||
{% block threadidx %}
|
||||
int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
{% endblock threadidx %}
|
||||
|
||||
|
||||
{% block x_args %}
|
||||
const unsigned int* __restrict__ ID,
|
||||
const {{REAL}}* __restrict__ Ex,
|
||||
const {{REAL}}* __restrict__ Ey,
|
||||
const {{REAL}}* __restrict__ Ez,
|
||||
const {{REAL}}* __restrict__ Hx,
|
||||
{{REAL}} *Hy,
|
||||
{{REAL}} *Hz,
|
||||
{{REAL}} *PHI1,
|
||||
{{REAL}} *PHI2,
|
||||
const {{REAL}}* __restrict__ RA,
|
||||
const {{REAL}}* __restrict__ RB,
|
||||
const {{REAL}}* __restrict__ RE,
|
||||
const {{REAL}}* __restrict__ RF,
|
||||
{% endblock x_args %}
|
||||
|
||||
|
||||
{% block y_args %}
|
||||
const unsigned int* __restrict__ ID,
|
||||
const {{REAL}}* __restrict__ Ex,
|
||||
const {{REAL}}* __restrict__ Ey,
|
||||
const {{REAL}}* __restrict__ Ez,
|
||||
{{REAL}} *Hx,
|
||||
const {{REAL}}* __restrict__ Hy,
|
||||
{{REAL}} *Hz,
|
||||
{{REAL}} *PHI1,
|
||||
{{REAL}} *PHI2,
|
||||
const {{REAL}}* __restrict__ RA,
|
||||
const {{REAL}}* __restrict__ RB,
|
||||
const {{REAL}}* __restrict__ RE,
|
||||
const {{REAL}}* __restrict__ RF,
|
||||
{% endblock y_args %}
|
||||
|
||||
|
||||
{% block z_args %}
|
||||
const unsigned int* __restrict__ ID,
|
||||
const {{REAL}}* __restrict__ Ex,
|
||||
const {{REAL}}* __restrict__ Ey,
|
||||
const {{REAL}}* __restrict__ Ez,
|
||||
{{REAL}} *Hx,
|
||||
{{REAL}} *Hy,
|
||||
const {{REAL}}* __restrict__ Hz,
|
||||
{{REAL}} *PHI1,
|
||||
{{REAL}} *PHI2,
|
||||
const {{REAL}}* __restrict__ RA,
|
||||
const {{REAL}}* __restrict__ RB,
|
||||
const {{REAL}}* __restrict__ RE,
|
||||
const {{REAL}}* __restrict__ RF,
|
||||
{% endblock z_args %}
|
@@ -0,0 +1,68 @@
|
||||
{% extends "pml_updates_magnetic_HORIPML_base.tmpl" %}
|
||||
|
||||
|
||||
{% block constmem %}
|
||||
__constant {{REAL}} updatecoeffsH[{{N_updatecoeffsH}}] =
|
||||
{
|
||||
{% for i in updatecoeffsH %}
|
||||
{{i}},
|
||||
{% endfor %}
|
||||
};
|
||||
{% endblock constmem %}
|
||||
|
||||
|
||||
{% block threadidx %}
|
||||
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) +
|
||||
get_global_id(1) * get_global_size(0) + get_global_id(0);
|
||||
{% endblock threadidx %}
|
||||
|
||||
|
||||
{% block x_args %}
|
||||
__global const unsigned int* restrict ID,
|
||||
__global const {{REAL}}* restrict Ex,
|
||||
__global const {{REAL}}* restrict Ey,
|
||||
__global const {{REAL}}* restrict Ez,
|
||||
__global const {{REAL}}* restrict Hx,
|
||||
__global {{REAL}} *Hy,
|
||||
__global {{REAL}} *Hz,
|
||||
__global {{REAL}} *PHI1,
|
||||
__global {{REAL}} *PHI2,
|
||||
__global const {{REAL}}* restrict RA,
|
||||
__global const {{REAL}}* restrict RB,
|
||||
__global const {{REAL}}* restrict RE,
|
||||
__global const {{REAL}}* restrict RF,
|
||||
{% endblock x_args %}
|
||||
|
||||
|
||||
{% block y_args %}
|
||||
__global const unsigned int* restrict ID,
|
||||
__global const {{REAL}}* restrict Ex,
|
||||
__global const {{REAL}}* restrict Ey,
|
||||
__global const {{REAL}}* restrict Ez,
|
||||
__global {{REAL}} *Hx,
|
||||
__global const {{REAL}}* restrict Hy,
|
||||
__global {{REAL}} *Hz,
|
||||
__global {{REAL}} *PHI1,
|
||||
__global {{REAL}} *PHI2,
|
||||
__global const {{REAL}}* restrict RA,
|
||||
__global const {{REAL}}* restrict RB,
|
||||
__global const {{REAL}}* restrict RE,
|
||||
__global const {{REAL}}* restrict RF,
|
||||
{% endblock y_args %}
|
||||
|
||||
|
||||
{% block z_args %}
|
||||
__global const unsigned int* restrict ID,
|
||||
__global const {{REAL}}* restrict Ex,
|
||||
__global const {{REAL}}* restrict Ey,
|
||||
__global const {{REAL}}* restrict Ez,
|
||||
__global {{REAL}} *Hx,
|
||||
__global {{REAL}} *Hy,
|
||||
__global const {{REAL}}* restrict Hz,
|
||||
__global {{REAL}} *PHI1,
|
||||
__global {{REAL}} *PHI2,
|
||||
__global const {{REAL}}* restrict RA,
|
||||
__global const {{REAL}}* restrict RB,
|
||||
__global const {{REAL}}* restrict RE,
|
||||
__global const {{REAL}}* restrict RF,
|
||||
{% endblock z_args %}
|
@@ -0,0 +1,90 @@
|
||||
// Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom
|
||||
// Authors: Craig Warren, Antonis Giannopoulos, and John Hartley
|
||||
//
|
||||
// This file is part of gprMax.
|
||||
//
|
||||
// gprMax is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// gprMax is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with gprMax. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
// Macros for converting subscripts to linear index:
|
||||
#define IDX3D_FIELDS(i, j, k) (i)*({{NY_FIELDS}})*({{NZ_FIELDS}})+(j)*({{NZ_FIELDS}})+(k)
|
||||
#define IDX4D_SNAPS(p, i, j, k) (p)*({{NX_SNAPS}})*({{NY_SNAPS}})*({{NZ_SNAPS}})+(i)*({{NY_SNAPS}})*({{NZ_SNAPS}})+(j)*({{NZ_SNAPS}})+(k)
|
||||
|
||||
////////////////////
|
||||
// Store snapshot //
|
||||
////////////////////
|
||||
|
||||
{{KERNEL}} void store_snapshot(int p,
|
||||
int xs,
|
||||
int xf,
|
||||
int ys,
|
||||
int yf,
|
||||
int zs,
|
||||
int zf,
|
||||
int dx,
|
||||
int dy,
|
||||
int dz,{% filter indent(width=29) %}{% block snap_args %}{% endblock snap_args %}{% endfilter %}{
|
||||
|
||||
// This function stores field values for a snapshot.
|
||||
//
|
||||
// Args:
|
||||
// p: Snapshot number
|
||||
// xs, xf, ys, yf, xs, xf: Start and finish cell coordinates for snapshot
|
||||
// dx, dy, dz: Sampling interval in cell coordinates for snapshot
|
||||
// E, H: Access to field component arrays
|
||||
// snapEx, snapEy, snapEz, snapHx, snapHy, snapHz: Access to arrays to store snapshots
|
||||
|
||||
// Obtain the linear index corresponding to the current thread
|
||||
{% block threadidx %}{% endblock threadidx %}
|
||||
|
||||
// Convert the linear index to subscripts for 4D SNAPS array
|
||||
int i = (idx % ({{NX_SNAPS}} * {{NY_SNAPS}} * {{NZ_SNAPS}})) / ({{NY_SNAPS}} * {{NZ_SNAPS}});
|
||||
int j = ((idx % ({{NX_SNAPS}} * {{NY_SNAPS}} * {{NZ_SNAPS}})) % ({{NY_SNAPS}} * {{NZ_SNAPS}})) / {{NZ_SNAPS}};
|
||||
int k = ((idx % ({{NX_SNAPS}} * {{NY_SNAPS}} * {{NZ_SNAPS}})) % ({{NY_SNAPS}} * {{NZ_SNAPS}})) % {{NZ_SNAPS}};
|
||||
|
||||
// Subscripts for field arrays
|
||||
int ii, jj, kk;
|
||||
|
||||
if (i >= xs && i < xf && j >= ys && j < yf && k >= zs && k < zf) {
|
||||
|
||||
// Increment subscripts for field array to account for spatial sampling of snapshot
|
||||
ii = (xs + i) * dx;
|
||||
jj = (ys + j) * dy;
|
||||
kk = (zs + k) * dz;
|
||||
|
||||
// The electric field component value at a point comes from an average of
|
||||
// the 4 electric field component values in that cell
|
||||
snapEx[IDX4D_SNAPS(p,i,j,k)] = (Ex[IDX3D_FIELDS(ii,jj,kk)] +
|
||||
Ex[IDX3D_FIELDS(ii,jj+1,kk)] +
|
||||
Ex[IDX3D_FIELDS(ii,jj,kk+1)] +
|
||||
Ex[IDX3D_FIELDS(ii,jj+1,kk+1)]) / 4;
|
||||
snapEy[IDX4D_SNAPS(p,i,j,k)] = (Ey[IDX3D_FIELDS(ii,jj,kk)] +
|
||||
Ey[IDX3D_FIELDS(ii+1,jj,kk)] +
|
||||
Ey[IDX3D_FIELDS(ii,jj,kk+1)] +
|
||||
Ey[IDX3D_FIELDS(ii+1,jj,kk+1)]) / 4;
|
||||
snapEz[IDX4D_SNAPS(p,i,j,k)] = (Ez[IDX3D_FIELDS(ii,jj,kk)] +
|
||||
Ez[IDX3D_FIELDS(ii+1,jj,kk)] +
|
||||
Ez[IDX3D_FIELDS(ii,jj+1,kk)] +
|
||||
Ez[IDX3D_FIELDS(ii+1,jj+1,kk)]) / 4;
|
||||
|
||||
// The magnetic field component value at a point comes from average of
|
||||
// 2 magnetic field component values in that cell and the following cell
|
||||
snapHx[IDX4D_SNAPS(p,i,j,k)] = (Hx[IDX3D_FIELDS(ii,jj,kk)] +
|
||||
Hx[IDX3D_FIELDS(ii+1,jj,kk)]) / 2;
|
||||
snapHy[IDX4D_SNAPS(p,i,j,k)] = (Hy[IDX3D_FIELDS(ii,jj,kk)] +
|
||||
Hy[IDX3D_FIELDS(ii,jj+1,kk)]) / 2;
|
||||
snapHz[IDX4D_SNAPS(p,i,j,k)] = (Hz[IDX3D_FIELDS(ii,jj,kk)] +
|
||||
Hz[IDX3D_FIELDS(ii,jj,kk+1)]) / 2;
|
||||
}
|
||||
}
|
@@ -0,0 +1,22 @@
|
||||
{% extends "snapshots_base.tmpl" %}
|
||||
|
||||
|
||||
{% block threadidx %}
|
||||
int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
{% endblock threadidx %}
|
||||
|
||||
|
||||
{% block snap_args %}
|
||||
const {{REAL}}* __restrict__ Ex,
|
||||
const {{REAL}}* __restrict__ Ey,
|
||||
const {{REAL}}* __restrict__ Ez,
|
||||
const {{REAL}}* __restrict__ Hx,
|
||||
const {{REAL}}* __restrict__ Hy,
|
||||
const {{REAL}}* __restrict__ Hz,
|
||||
{{REAL}} *snapEx,
|
||||
{{REAL}} *snapEy,
|
||||
{{REAL}} *snapEz,
|
||||
{{REAL}} *snapHx,
|
||||
{{REAL}} *snapHy,
|
||||
{{REAL}} *snapHz
|
||||
{% endblock snap_args %}
|
@@ -0,0 +1,23 @@
|
||||
{% extends "snapshots_base.tmpl" %}
|
||||
|
||||
|
||||
{% block threadidx %}
|
||||
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) +
|
||||
get_global_id(1) * get_global_size(0) + get_global_id(0);
|
||||
{% endblock threadidx %}
|
||||
|
||||
|
||||
{% block snap_args %}
|
||||
__global const {{REAL}}* restrict Ex,
|
||||
__global const {{REAL}}* restrict Ey,
|
||||
__global const {{REAL}}* restrict Ez,
|
||||
__global const {{REAL}}* restrict Hx,
|
||||
__global const {{REAL}}* restrict Hy,
|
||||
__global const {{REAL}}* restrict Hz,
|
||||
__global {{REAL}} *snapEx,
|
||||
__global {{REAL}} *snapEy,
|
||||
__global {{REAL}} *snapEz,
|
||||
__global {{REAL}} *snapHx,
|
||||
__global {{REAL}} *snapHy,
|
||||
__global {{REAL}} *snapHz)
|
||||
{% endblock snap_args %}
|
@@ -0,0 +1,217 @@
|
||||
// Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom
|
||||
// Authors: Craig Warren, Antonis Giannopoulos, and John Hartley
|
||||
//
|
||||
// This file is part of gprMax.
|
||||
//
|
||||
// gprMax is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// gprMax is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with gprMax. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
// Macros for converting subscripts to linear index:
|
||||
#define IDX2D_MAT(m, n) (m)*({{NY_MATCOEFFS}})+(n)
|
||||
#define IDX2D_SRCINFO(m, n) (m)*{{NY_SRCINFO}}+(n)
|
||||
#define IDX2D_SRCWAVES(m, n) (m)*({{NY_SRCWAVES}})+(n)
|
||||
#define IDX3D_FIELDS(i, j, k) (i)*({{NY_FIELDS}})*({{NZ_FIELDS}})+(j)*({{NZ_FIELDS}})+(k)
|
||||
#define IDX4D_ID(p, i, j, k) (p)*({{NX_ID}})*({{NY_ID}})*({{NZ_ID}})+(i)*({{NY_ID}})*({{NZ_ID}})+(j)*({{NZ_ID}})+(k)
|
||||
|
||||
// Material coefficients (read-only) stored in constant memory of compute device
|
||||
{% block constmem %}{% endblock constmem %}
|
||||
|
||||
///////////////////////////////////////////
|
||||
// Hertzian dipole electric field update //
|
||||
///////////////////////////////////////////
|
||||
|
||||
{{KERNEL}} void update_hertzian_dipole(int NHERTZDIPOLE,
|
||||
int iteration,
|
||||
{{REAL}} dx,
|
||||
{{REAL}} dy,
|
||||
{{REAL}} dz,{% filter indent(width=37) %}{% block electric_source_args %}{% endblock electric_source_args %}{% endfilter %}{
|
||||
|
||||
// This function updates electric field values for Hertzian dipole sources.
|
||||
//
|
||||
// Args:
|
||||
// NHERTZDIPOLE: Total number of Hertzian dipoles in the model
|
||||
// iteration: Iteration number of simulation
|
||||
// dx, dy, dz: Spatial discretisations
|
||||
// srcinfo1: Source cell coordinates and polarisation information
|
||||
// srcinfo2: Other source information, e.g. length, resistance etc...
|
||||
// srcwaveforms: Source waveform values
|
||||
// ID, E: Access to ID and field component arrays
|
||||
|
||||
// Obtain the linear index corresponding to the current thread and use for each receiver
|
||||
{% block threadidx %}{% endblock threadidx %}
|
||||
|
||||
if (src < NHERTZDIPOLE) {
|
||||
|
||||
{{REAL}} dl;
|
||||
int i, j, k, polarisation;
|
||||
|
||||
i = srcinfo1[IDX2D_SRCINFO(src,0)];
|
||||
j = srcinfo1[IDX2D_SRCINFO(src,1)];
|
||||
k = srcinfo1[IDX2D_SRCINFO(src,2)];
|
||||
polarisation = srcinfo1[IDX2D_SRCINFO(src,3)];
|
||||
dl = srcinfo2[src];
|
||||
|
||||
// 'x' polarised source
|
||||
if (polarisation == 0) {
|
||||
int materialEx = ID[IDX4D_ID(0,i,j,k)];
|
||||
Ex[IDX3D_FIELDS(i,j,k)] = Ex[IDX3D_FIELDS(i,j,k)] - updatecoeffsE[IDX2D_MAT(materialEx,4)] *
|
||||
srcwaveforms[IDX2D_SRCWAVES(src,iteration)] * dl * (1 / (dx * dy * dz));
|
||||
}
|
||||
|
||||
// 'y' polarised source
|
||||
else if (polarisation == 1) {
|
||||
int materialEy = ID[IDX4D_ID(1,i,j,k)];
|
||||
Ey[IDX3D_FIELDS(i,j,k)] = Ey[IDX3D_FIELDS(i,j,k)] - updatecoeffsE[IDX2D_MAT(materialEy,4)] *
|
||||
srcwaveforms[IDX2D_SRCWAVES(src,iteration)] * dl * (1 / (dx * dy * dz));
|
||||
}
|
||||
|
||||
// 'z' polarised source
|
||||
else if (polarisation == 2) {
|
||||
int materialEz = ID[IDX4D_ID(2,i,j,k)];
|
||||
Ez[IDX3D_FIELDS(i,j,k)] = Ez[IDX3D_FIELDS(i,j,k)] - updatecoeffsE[IDX2D_MAT(materialEz,4)] *
|
||||
srcwaveforms[IDX2D_SRCWAVES(src,iteration)] * dl * (1 / (dx * dy * dz));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
///////////////////////////////////////////
|
||||
// Magnetic dipole magnetic field update //
|
||||
///////////////////////////////////////////
|
||||
|
||||
{{KERNEL}} void update_magnetic_dipole(int NMAGDIPOLE,
|
||||
int iteration,
|
||||
{{REAL}} dx,
|
||||
{{REAL}} dy,
|
||||
{{REAL}} dz,{% filter indent(width=37) %}{% block magnetic_source_args %}{% endblock magnetic_source_args %}{% endfilter %}{
|
||||
|
||||
// This function updates magnetic field values for magnetic dipole sources.
|
||||
//
|
||||
// Args:
|
||||
// NMAGDIPOLE: Total number of magnetic dipoles in the model
|
||||
// iteration: Iteration number of simulation
|
||||
// dx, dy, dz: Spatial discretisations
|
||||
// srcinfo1: Source cell coordinates and polarisation information
|
||||
// srcinfo2: Other source information, e.g. length, resistance etc...
|
||||
// srcwaveforms: Source waveform values
|
||||
// ID, H: Access to ID and field component arrays
|
||||
|
||||
// Obtain the linear index corresponding to the current thread and use for each receiver
|
||||
{{self.threadidx()}}
|
||||
|
||||
if (src < NMAGDIPOLE) {
|
||||
|
||||
int i, j, k, polarisation;
|
||||
|
||||
i = srcinfo1[IDX2D_SRCINFO(src,0)];
|
||||
j = srcinfo1[IDX2D_SRCINFO(src,1)];
|
||||
k = srcinfo1[IDX2D_SRCINFO(src,2)];
|
||||
polarisation = srcinfo1[IDX2D_SRCINFO(src,3)];
|
||||
|
||||
// 'x' polarised source
|
||||
if (polarisation == 0) {
|
||||
int materialHx = ID[IDX4D_ID(3,i,j,k)];
|
||||
Hx[IDX3D_FIELDS(i,j,k)] = Hx[IDX3D_FIELDS(i,j,k)] - updatecoeffsH[IDX2D_MAT(materialHx,4)] *
|
||||
srcwaveforms[IDX2D_SRCWAVES(src,iteration)] * (1 / (dx * dy * dz));
|
||||
}
|
||||
|
||||
// 'y' polarised source
|
||||
else if (polarisation == 1) {
|
||||
int materialHy = ID[IDX4D_ID(4,i,j,k)];
|
||||
Hy[IDX3D_FIELDS(i,j,k)] = Hy[IDX3D_FIELDS(i,j,k)] - updatecoeffsH[IDX2D_MAT(materialHy,4)] *
|
||||
srcwaveforms[IDX2D_SRCWAVES(src,iteration)] * (1 / (dx * dy * dz));
|
||||
}
|
||||
|
||||
// 'z' polarised source
|
||||
else if (polarisation == 2) {
|
||||
int materialHz = ID[IDX4D_ID(5,i,j,k)];
|
||||
Hz[IDX3D_FIELDS(i,j,k)] = Hz[IDX3D_FIELDS(i,j,k)] - updatecoeffsH[IDX2D_MAT(materialHz,4)] *
|
||||
srcwaveforms[IDX2D_SRCWAVES(src,iteration)] * (1 / (dx * dy * dz));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//////////////////////////////////////////
|
||||
// Voltage source electric field update //
|
||||
//////////////////////////////////////////
|
||||
|
||||
{{KERNEL}} void update_voltage_source(int NVOLTSRC,
|
||||
int iteration,
|
||||
{{REAL}} dx,
|
||||
{{REAL}} dy,
|
||||
{{REAL}} dz,{% filter indent(width=36) %}{{self.electric_source_args()}}{% endfilter %}{
|
||||
|
||||
// This function updates electric field values for voltage sources.
|
||||
//
|
||||
// Args:
|
||||
// NVOLTSRC: Total number of voltage sources in the model
|
||||
// iteration: Iteration number of simulation
|
||||
// dx, dy, dz: Spatial discretisations
|
||||
// srcinfo1: Source cell coordinates and polarisation information
|
||||
// srcinfo2: Other source information, e.g. length, resistance etc...
|
||||
// srcwaveforms: Source waveform values
|
||||
// ID, E: Access to ID and field component arrays
|
||||
|
||||
// Obtain the linear index corresponding to the current thread and use for each receiver
|
||||
{{self.threadidx()}}
|
||||
|
||||
if (src < NVOLTSRC) {
|
||||
|
||||
{{REAL}} resistance;
|
||||
int i, j, k, polarisation;
|
||||
|
||||
i = srcinfo1[IDX2D_SRCINFO(src,0)];
|
||||
j = srcinfo1[IDX2D_SRCINFO(src,1)];
|
||||
k = srcinfo1[IDX2D_SRCINFO(src,2)];
|
||||
polarisation = srcinfo1[IDX2D_SRCINFO(src,3)];
|
||||
resistance = srcinfo2[src];
|
||||
|
||||
// 'x' polarised source
|
||||
if (polarisation == 0) {
|
||||
if (resistance != 0) {
|
||||
int materialEx = ID[IDX4D_ID(0,i,j,k)];
|
||||
Ex[IDX3D_FIELDS(i,j,k)] = Ex[IDX3D_FIELDS(i,j,k)] - updatecoeffsE[IDX2D_MAT(materialEx,4)] *
|
||||
srcwaveforms[IDX2D_SRCWAVES(src,iteration)] * (1 / (resistance * dy * dz));
|
||||
}
|
||||
else {
|
||||
Ex[IDX3D_FIELDS(i,j,k)] = -1 * srcwaveforms[IDX2D_SRCWAVES(src,iteration)] / dx;
|
||||
}
|
||||
}
|
||||
|
||||
// 'y' polarised source
|
||||
else if (polarisation == 1) {
|
||||
if (resistance != 0) {
|
||||
int materialEy = ID[IDX4D_ID(1,i,j,k)];
|
||||
Ey[IDX3D_FIELDS(i,j,k)] = Ey[IDX3D_FIELDS(i,j,k)] - updatecoeffsE[IDX2D_MAT(materialEy,4)] *
|
||||
srcwaveforms[IDX2D_SRCWAVES(src,iteration)] * (1 / (resistance * dx * dz));
|
||||
}
|
||||
else {
|
||||
Ey[IDX3D_FIELDS(i,j,k)] = -1 * srcwaveforms[IDX2D_SRCWAVES(src,iteration)] / dy;
|
||||
}
|
||||
}
|
||||
|
||||
// 'z' polarised source
|
||||
else if (polarisation == 2) {
|
||||
if (resistance != 0) {
|
||||
int materialEz = ID[IDX4D_ID(2,i,j,k)];
|
||||
Ez[IDX3D_FIELDS(i,j,k)] = Ez[IDX3D_FIELDS(i,j,k)] - updatecoeffsE[IDX2D_MAT(materialEz,4)] *
|
||||
srcwaveforms[IDX2D_SRCWAVES(src,iteration)] * (1 / (resistance * dx * dy));
|
||||
}
|
||||
else {
|
||||
Ez[IDX3D_FIELDS(i,j,k)] = -1 * srcwaveforms[IDX2D_SRCWAVES(src,iteration)] / dz;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@@ -0,0 +1,34 @@
|
||||
{% extends "source_updates_base.tmpl" %}
|
||||
|
||||
|
||||
{% block constmem %}
|
||||
__device__ __constant__ {{REAL}} updatecoeffsE[{{N_updatecoeffsE}}];
|
||||
__device__ __constant__ {{REAL}} updatecoeffsH[{{N_updatecoeffsH}}];
|
||||
{% endblock constmem %}
|
||||
|
||||
|
||||
{% block threadidx %}
|
||||
int src = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
{% endblock threadidx %}
|
||||
|
||||
|
||||
{% block electric_source_args %}
|
||||
const int* __restrict__ srcinfo1,
|
||||
const {{REAL}}* __restrict__ srcinfo2,
|
||||
const {{REAL}}* __restrict__ srcwaveforms,
|
||||
const unsigned int* __restrict__ ID,
|
||||
{{REAL}} *Ex,
|
||||
{{REAL}} *Ey,
|
||||
{{REAL}} *Ez)
|
||||
{% endblock electric_source_args %}
|
||||
|
||||
|
||||
{% block magnetic_source_args %}
|
||||
const int* __restrict__ srcinfo1,
|
||||
const {{REAL}}* __restrict__ srcinfo2,
|
||||
const {{REAL}}* __restrict__ srcwaveforms,
|
||||
const unsigned int* __restrict__ ID,
|
||||
{{REAL}} *Hx,
|
||||
{{REAL}} *Hy,
|
||||
{{REAL}} *Hz)
|
||||
{% endblock magnetic_source_args %}
|
@@ -0,0 +1,46 @@
|
||||
{% extends "source_updates_base.tmpl" %}
|
||||
|
||||
|
||||
{% block constmem %}
|
||||
__constant {{REAL}} updatecoeffsE[{{N_updatecoeffsE}}] =
|
||||
{
|
||||
{% for i in updatecoeffsE %}
|
||||
{{i}},
|
||||
{% endfor %}
|
||||
};
|
||||
|
||||
__constant {{REAL}} updatecoeffsH[{{N_updatecoeffsH}}] =
|
||||
{
|
||||
{% for i in updatecoeffsH %}
|
||||
{{i}},
|
||||
{% endfor %}
|
||||
};
|
||||
{% endblock constmem %}
|
||||
|
||||
|
||||
{% block threadidx %}
|
||||
int src = get_global_id(2) * get_global_size(0) * get_global_size(1) +
|
||||
get_global_id(1) * get_global_size(0) + get_global_id(0);
|
||||
{% endblock threadidx %}
|
||||
|
||||
|
||||
{% block electric_source_args %}
|
||||
__global const int* restrict srcinfo1,
|
||||
__global const {{REAL}}* restrict srcinfo2,
|
||||
__global const {{REAL}}* restrict srcwaveforms,
|
||||
__global const unsigned int* restrict ID,
|
||||
__global {{REAL}} *Ex,
|
||||
__global {{REAL}} *Ey,
|
||||
__global {{REAL}} *Ez)
|
||||
{% endblock electric_source_args %}
|
||||
|
||||
|
||||
{% block magnetic_source_args %}
|
||||
__global const int* restrict srcinfo1,
|
||||
__global const {{REAL}}* restrict srcinfo2,
|
||||
__global const {{REAL}}* restrict srcwaveforms,
|
||||
__global const unsigned int* restrict ID,
|
||||
__global {{REAL}} *Hx,
|
||||
__global {{REAL}} *Hy,
|
||||
__global {{REAL}} *Hz)
|
||||
{% endblock magnetic_source_args %}
|
@@ -0,0 +1,50 @@
|
||||
// Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom
|
||||
// Authors: Craig Warren, Antonis Giannopoulos, and John Hartley
|
||||
//
|
||||
// This file is part of gprMax.
|
||||
//
|
||||
// gprMax is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// gprMax is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with gprMax. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
#define IDX2D_RXCOORDS(m,n) (m)*({{NY_RXCOORDS}}) + (n)
|
||||
#define IDX3D_RXS(i,j,k) (i)*({{NY_RXS}})*({{NZ_RXS}}) + (j)*({{NZ_RXS}}) + (k)
|
||||
#define IDX3D_FIELDS(i,j,k) (i)*({{NY_FIELDS}})*({{NZ_FIELDS}}) + (j)*({{NZ_FIELDS}}) + (k)
|
||||
|
||||
{{KERNEL}} void store_outputs(int NRX,
|
||||
int iteration,{% filter indent(width=28) %}{% block rx_args %}{% endblock rx_args %}{% endfilter %}{
|
||||
|
||||
// This function stores field component values for every receiver in the model.
|
||||
//
|
||||
// Args:
|
||||
// NRX: total number of receivers in the model.
|
||||
// rxs: array to store field components for receivers - rows
|
||||
// are field components; columns are iterations; pages are receiver
|
||||
|
||||
// Obtain linear index corresponding to the current work item
|
||||
{% block threadidx %}{% endblock threadidx %}
|
||||
|
||||
int i,j,k;
|
||||
|
||||
if (rx < NRX) {
|
||||
i = rxcoords[IDX2D_RXCOORDS(rx,0)];
|
||||
j = rxcoords[IDX2D_RXCOORDS(rx,1)];
|
||||
k = rxcoords[IDX2D_RXCOORDS(rx,2)];
|
||||
rxs[IDX3D_RXS(0,iteration,rx)] = Ex[IDX3D_FIELDS(i,j,k)];
|
||||
rxs[IDX3D_RXS(1,iteration,rx)] = Ey[IDX3D_FIELDS(i,j,k)];
|
||||
rxs[IDX3D_RXS(2,iteration,rx)] = Ez[IDX3D_FIELDS(i,j,k)];
|
||||
rxs[IDX3D_RXS(3,iteration,rx)] = Hx[IDX3D_FIELDS(i,j,k)];
|
||||
rxs[IDX3D_RXS(4,iteration,rx)] = Hy[IDX3D_FIELDS(i,j,k)];
|
||||
rxs[IDX3D_RXS(5,iteration,rx)] = Hz[IDX3D_FIELDS(i,j,k)];
|
||||
}
|
||||
}
|
@@ -0,0 +1,18 @@
|
||||
{% extends "store_outputs_base.tmpl" %}
|
||||
|
||||
|
||||
{% block threadidx %}
|
||||
int rx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
{% endblock threadidx %}
|
||||
|
||||
|
||||
{% block rx_args %}
|
||||
const int* __restrict__ rxcoords,
|
||||
{{REAL}} *rxs,
|
||||
const {{REAL}}* __restrict__ Ex,
|
||||
const {{REAL}}* __restrict__ Ey,
|
||||
const {{REAL}}* __restrict__ Ez,
|
||||
const {{REAL}}* __restrict__ Hx,
|
||||
const {{REAL}}* __restrict__ Hy,
|
||||
const {{REAL}}* __restrict__ Hz)
|
||||
{% endblock rx_args %}
|
@@ -0,0 +1,19 @@
|
||||
{% extends "store_outputs_base.tmpl" %}
|
||||
|
||||
|
||||
{% block threadidx %}
|
||||
int rx = get_global_id(2) * get_global_size(0) * get_global_size(1) +
|
||||
get_global_id(1) * get_global_size(0) + get_global_id(0);
|
||||
{% endblock threadidx %}
|
||||
|
||||
|
||||
{% block rx_args %}
|
||||
__global const int* restrict rxcoords,
|
||||
__global {{REAL}} *rxs,
|
||||
__global const {{REAL}}* restrict Ex,
|
||||
__global const {{REAL}}* restrict Ey,
|
||||
__global const {{REAL}}* restrict Ez,
|
||||
__global const {{REAL}}* restrict Hx,
|
||||
__global const {{REAL}}* restrict Hy,
|
||||
__global const {{REAL}}* restrict Hz)
|
||||
{% endblock rx_args %}
|
@@ -0,0 +1,22 @@
|
||||
{% block complex_header %}{% endblock complex_header %}
|
||||
|
||||
// Macros for converting subscripts to linear index
|
||||
#define IDX2D_MAT(m, n) (m)*({{NY_MATCOEFFS}})+(n)
|
||||
#define IDX2D_MATDISP(m, n) (m)*({{NY_MATDISPCOEFFS}})+(n)
|
||||
#define IDX2D_R(m, n) (m)*(NY_R)+(n)
|
||||
#define IDX2D_RXCOORDS(m,n) (m)*({{NY_RXCOORDS}}) + (n)
|
||||
#define IDX2D_SRCINFO(m, n) (m)*{{NY_SRCINFO}}+(n)
|
||||
#define IDX2D_SRCWAVES(m, n) (m)*({{NY_SRCWAVES}})+(n)
|
||||
|
||||
#define IDX3D_FIELDS(i, j, k) (i)*({{NY_FIELDS}})*({{NZ_FIELDS}})+(j)*({{NZ_FIELDS}})+(k)
|
||||
#define IDX3D_RXS(i,j,k) (i)*({{NY_RXS}})*({{NZ_RXS}}) + (j)*({{NZ_RXS}}) + (k)
|
||||
|
||||
#define IDX4D_ID(p, i, j, k) (p)*({{NX_ID}})*({{NY_ID}})*({{NZ_ID}})+(i)*({{NY_ID}})*({{NZ_ID}})+(j)*({{NZ_ID}})+(k)
|
||||
#define IDX4D_SNAPS(p, i, j, k) (p)*({{NX_SNAPS}})*({{NY_SNAPS}})*({{NZ_SNAPS}})+(i)*({{NY_SNAPS}})*({{NZ_SNAPS}})+(j)*({{NZ_SNAPS}})+(k)
|
||||
#define IDX4D_T(p, i, j, k) (p)*({{NX_T}})*({{NY_T}})*({{NZ_T}})+(i)*({{NY_T}})*({{NZ_T}})+(j)*({{NZ_T}})+(k)
|
||||
#define IDX4D_PHI1(p, i, j, k) (p)*(NX_PHI1)*(NY_PHI1)*(NZ_PHI1)+(i)*(NY_PHI1)*(NZ_PHI1)+(j)*(NZ_PHI1)+(k)
|
||||
#define IDX4D_PHI2(p, i, j, k) (p)*(NX_PHI2)*(NY_PHI2)*(NZ_PHI2)+(i)*(NY_PHI2)*(NZ_PHI2)+(j)*(NZ_PHI2)+(k)
|
||||
|
||||
|
||||
// Material coefficients (read-only) stored in constant memory of compute device
|
||||
{% block constmem %}{% endblock constmem %}
|
@@ -0,0 +1,11 @@
|
||||
{% extends "knl_common_base.tmpl" %}
|
||||
|
||||
{% block complex_header %}
|
||||
#include <pycuda-complex.hpp>
|
||||
{% endblock complex_header %}
|
||||
|
||||
|
||||
{% block constmem %}
|
||||
__device__ __constant__ {{REAL}} updatecoeffsE[{{N_updatecoeffsE}}];
|
||||
__device__ __constant__ {{REAL}} updatecoeffsH[{{N_updatecoeffsH}}];
|
||||
{% endblock constmem %}
|
@@ -0,0 +1,22 @@
|
||||
{% extends "knl_common_base.tmpl" %}
|
||||
|
||||
{% block complex_header %}
|
||||
#include <pyopencl-complex.h>
|
||||
{% endblock complex_header %}
|
||||
|
||||
|
||||
{% block constmem %}
|
||||
__constant {{REAL}} updatecoeffsE[{{N_updatecoeffsE}}] =
|
||||
{
|
||||
{% for i in updatecoeffsE %}
|
||||
{{i}},
|
||||
{% endfor %}
|
||||
};
|
||||
|
||||
__constant {{REAL}} updatecoeffsH[{{N_updatecoeffsH}}] =
|
||||
{
|
||||
{% for i in updatecoeffsH %}
|
||||
{{i}},
|
||||
{% endfor %}
|
||||
};
|
||||
{% endblock constmem %}
|
@@ -0,0 +1,233 @@
|
||||
# Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom
|
||||
# Authors: Craig Warren, Antonis Giannopoulos, and John Hartley
|
||||
#
|
||||
# This file is part of gprMax.
|
||||
#
|
||||
# gprMax is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# gprMax is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with gprMax. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
from string import Template
|
||||
|
||||
update_electric = Template("""
|
||||
// Electric field updates - normal materials.
|
||||
//
|
||||
// Args:
|
||||
// NX, NY, NZ: Number of cells of the model domain.
|
||||
// ID, E, H: Access to ID and field component arrays.
|
||||
|
||||
// Convert the linear index to subscripts for 3D field arrays
|
||||
int x = i / ($NY_FIELDS * $NZ_FIELDS);
|
||||
int y = (i % ($NY_FIELDS * $NZ_FIELDS)) / $NZ_FIELDS;
|
||||
int z = (i % ($NY_FIELDS * $NZ_FIELDS)) % $NZ_FIELDS;
|
||||
|
||||
// Convert the linear index to subscripts for 4D material ID array
|
||||
int x_ID = (i % ($NX_ID * $NY_ID * $NZ_ID)) / ($NY_ID * $NZ_ID);
|
||||
int y_ID = ((i % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) / $NZ_ID;
|
||||
int z_ID = ((i % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) % $NZ_ID;
|
||||
|
||||
// Ex component
|
||||
if ((NY != 1 || NZ != 1) && x >= 0 && x < NX && y > 0 && y < NY && z > 0 && z < NZ) {
|
||||
int materialEx = ID[IDX4D_ID(0,x_ID,y_ID,z_ID)];
|
||||
Ex[IDX3D_FIELDS(x,y,z)] = updatecoeffsE[IDX2D_MAT(materialEx,0)] * Ex[IDX3D_FIELDS(x,y,z)] +
|
||||
updatecoeffsE[IDX2D_MAT(materialEx,2)] * (Hz[IDX3D_FIELDS(x,y,z)] - Hz[IDX3D_FIELDS(x,y-1,z)]) -
|
||||
updatecoeffsE[IDX2D_MAT(materialEx,3)] * (Hy[IDX3D_FIELDS(x,y,z)] - Hy[IDX3D_FIELDS(x,y,z-1)]);
|
||||
}
|
||||
|
||||
// Ey component
|
||||
if ((NX != 1 || NZ != 1) && x > 0 && x < NX && y >= 0 && y < NY && z > 0 && z < NZ) {
|
||||
int materialEy = ID[IDX4D_ID(1,x_ID,y_ID,z_ID)];
|
||||
Ey[IDX3D_FIELDS(x,y,z)] = updatecoeffsE[IDX2D_MAT(materialEy,0)] * Ey[IDX3D_FIELDS(x,y,z)] +
|
||||
updatecoeffsE[IDX2D_MAT(materialEy,3)] * (Hx[IDX3D_FIELDS(x,y,z)] - Hx[IDX3D_FIELDS(x,y,z-1)]) -
|
||||
updatecoeffsE[IDX2D_MAT(materialEy,1)] * (Hz[IDX3D_FIELDS(x,y,z)] - Hz[IDX3D_FIELDS(x-1,y,z)]);
|
||||
}
|
||||
|
||||
// Ez component
|
||||
if ((NX != 1 || NY != 1) && x > 0 && x < NX && y > 0 && y < NY && z >= 0 && z < NZ) {
|
||||
int materialEz = ID[IDX4D_ID(2,x_ID,y_ID,z_ID)];
|
||||
Ez[IDX3D_FIELDS(x,y,z)] = updatecoeffsE[IDX2D_MAT(materialEz,0)] * Ez[IDX3D_FIELDS(x,y,z)] +
|
||||
updatecoeffsE[IDX2D_MAT(materialEz,1)] * (Hy[IDX3D_FIELDS(x,y,z)] - Hy[IDX3D_FIELDS(x-1,y,z)]) -
|
||||
updatecoeffsE[IDX2D_MAT(materialEz,2)] * (Hx[IDX3D_FIELDS(x,y,z)] - Hx[IDX3D_FIELDS(x,y-1,z)]);
|
||||
}
|
||||
""")
|
||||
|
||||
update_magnetic = Template("""
|
||||
// Magnetic field updates - normal materials.
|
||||
//
|
||||
// Args:
|
||||
// NX, NY, NZ: Number of cells of the model domain.
|
||||
// ID, E, H: Access to ID and field component arrays.
|
||||
|
||||
// Convert the linear index to subscripts for 3D field arrays
|
||||
int x = i / ($NY_FIELDS * $NZ_FIELDS);
|
||||
int y = (i % ($NY_FIELDS * $NZ_FIELDS)) / $NZ_FIELDS;
|
||||
int z = (i % ($NY_FIELDS * $NZ_FIELDS)) % $NZ_FIELDS;
|
||||
|
||||
// Convert the linear index to subscripts for 4D material ID array
|
||||
int x_ID = (i % ($NX_ID * $NY_ID * $NZ_ID)) / ($NY_ID * $NZ_ID);
|
||||
int y_ID = ((i % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) / $NZ_ID;
|
||||
int z_ID = ((i % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) % $NZ_ID;
|
||||
|
||||
// Hx component
|
||||
if (NX != 1 && x > 0 && x < NX && y >= 0 && y < NY && z >= 0 && z < NZ) {
|
||||
int materialHx = ID[IDX4D_ID(3,x_ID,y_ID,z_ID)];
|
||||
Hx[IDX3D_FIELDS(x,y,z)] = updatecoeffsH[IDX2D_MAT(materialHx,0)] * Hx[IDX3D_FIELDS(x,y,z)] -
|
||||
updatecoeffsH[IDX2D_MAT(materialHx,2)] * (Ez[IDX3D_FIELDS(x,y+1,z)] - Ez[IDX3D_FIELDS(x,y,z)]) +
|
||||
updatecoeffsH[IDX2D_MAT(materialHx,3)] * (Ey[IDX3D_FIELDS(x,y,z+1)] - Ey[IDX3D_FIELDS(x,y,z)]);
|
||||
}
|
||||
|
||||
// Hy component
|
||||
if (NY != 1 && x >= 0 && x < NX && y > 0 && y < NY && z >= 0 && z < NZ) {
|
||||
int materialHy = ID[IDX4D_ID(4,x_ID,y_ID,z_ID)];
|
||||
Hy[IDX3D_FIELDS(x,y,z)] = updatecoeffsH[IDX2D_MAT(materialHy,0)] * Hy[IDX3D_FIELDS(x,y,z)] -
|
||||
updatecoeffsH[IDX2D_MAT(materialHy,3)] * (Ex[IDX3D_FIELDS(x,y,z+1)] - Ex[IDX3D_FIELDS(x,y,z)]) +
|
||||
updatecoeffsH[IDX2D_MAT(materialHy,1)] * (Ez[IDX3D_FIELDS(x+1,y,z)] - Ez[IDX3D_FIELDS(x,y,z)]);
|
||||
}
|
||||
|
||||
// Hz component
|
||||
if (NZ != 1 && x >= 0 && x < NX && y >= 0 && y < NY && z > 0 && z < NZ) {
|
||||
int materialHz = ID[IDX4D_ID(5,x_ID,y_ID,z_ID)];
|
||||
Hz[IDX3D_FIELDS(x,y,z)] = updatecoeffsH[IDX2D_MAT(materialHz,0)] * Hz[IDX3D_FIELDS(x,y,z)] -
|
||||
updatecoeffsH[IDX2D_MAT(materialHz,1)] * (Ey[IDX3D_FIELDS(x+1,y,z)] - Ey[IDX3D_FIELDS(x,y,z)]) +
|
||||
updatecoeffsH[IDX2D_MAT(materialHz,2)] * (Ex[IDX3D_FIELDS(x,y+1,z)] - Ex[IDX3D_FIELDS(x,y,z)]);
|
||||
}
|
||||
""")
|
||||
|
||||
update_electric_dispersive_A = Template("""
|
||||
// Electric field updates - dispersive materials - part A of updates to electric
|
||||
// field values when dispersive materials
|
||||
// (with multiple poles) are present.
|
||||
//
|
||||
// Args:
|
||||
// NX, NY, NZ: Number of cells of the model domain.
|
||||
// MAXPOLES: Maximum number of dispersive material poles present in model.
|
||||
// updatedispersivecoeffs, T, ID, E, H: Access to update coefficients,
|
||||
// dispersive, ID and field
|
||||
// component arrays.
|
||||
|
||||
|
||||
// Convert the linear index to subscripts for 3D field arrays
|
||||
int x = i / ($NY_FIELDS * $NZ_FIELDS);
|
||||
int y = (i % ($NY_FIELDS * $NZ_FIELDS)) / $NZ_FIELDS;
|
||||
int z = (i % ($NY_FIELDS * $NZ_FIELDS)) % $NZ_FIELDS;
|
||||
|
||||
// Convert the linear index to subscripts for 4D material ID array
|
||||
int x_ID = (i % ($NX_ID * $NY_ID * $NZ_ID)) / ($NY_ID * $NZ_ID);
|
||||
int y_ID = ((i % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) / $NZ_ID;
|
||||
int z_ID = ((i % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) % $NZ_ID;
|
||||
|
||||
// Convert the linear index to subscripts for 4D dispersive array
|
||||
int x_T = (i % ($NX_T * $NY_T * $NZ_T)) / ($NY_T * $NZ_T);
|
||||
int y_T = ((i % ($NX_T * $NY_T * $NZ_T)) % ($NY_T * $NZ_T)) / $NZ_T;
|
||||
int z_T = ((i % ($NX_T * $NY_T * $NZ_T)) % ($NY_T * $NZ_T)) % $NZ_T;
|
||||
|
||||
// Ex component
|
||||
if ((NY != 1 || NZ != 1) && x >= 0 && x < NX && y > 0 && y < NY && z > 0 && z < NZ) {
|
||||
int materialEx = ID[IDX4D_ID(0,x_ID,y_ID,z_ID)];
|
||||
$REAL phi = 0;
|
||||
for (int pole = 0; pole < MAXPOLES; pole++) {
|
||||
phi = phi + updatecoeffsdispersive[IDX2D_MATDISP(materialEx,pole*3)]$REALFUNC * Tx[IDX4D_T(pole,x_T,y_T,z_T)]$REALFUNC;
|
||||
Tx[IDX4D_T(pole,x_T,y_T,z_T)] = updatecoeffsdispersive[IDX2D_MATDISP(materialEx,1+(pole*3))] * Tx[IDX4D_T(pole,x_T,y_T,z_T)] +
|
||||
updatecoeffsdispersive[IDX2D_MATDISP(materialEx,2+(pole*3))] * Ex[IDX3D_FIELDS(x,y,z)];
|
||||
}
|
||||
Ex[IDX3D_FIELDS(x,y,z)] = updatecoeffsE[IDX2D_MAT(materialEx,0)] * Ex[IDX3D_FIELDS(x,y,z)] +
|
||||
updatecoeffsE[IDX2D_MAT(materialEx,2)] * (Hz[IDX3D_FIELDS(x,y,z)] - Hz[IDX3D_FIELDS(x,y-1,z)]) -
|
||||
updatecoeffsE[IDX2D_MAT(materialEx,3)] * (Hy[IDX3D_FIELDS(x,y,z)] - Hy[IDX3D_FIELDS(x,y,z-1)]) -
|
||||
updatecoeffsE[IDX2D_MAT(materialEx,4)] * phi;
|
||||
}
|
||||
|
||||
// Ey component
|
||||
if ((NX != 1 || NZ != 1) && x > 0 && x < NX && y >= 0 && y < NY && z > 0 && z < NZ) {
|
||||
int materialEy = ID[IDX4D_ID(1,x_ID,y_ID,z_ID)];
|
||||
$REAL phi = 0;
|
||||
for (int pole = 0; pole < MAXPOLES; pole++) {
|
||||
phi = phi + updatecoeffsdispersive[IDX2D_MATDISP(materialEy,pole*3)]$REALFUNC * Ty[IDX4D_T(pole,x_T,y_T,z_T)]$REALFUNC;
|
||||
Ty[IDX4D_T(pole,x_T,y_T,z_T)] = updatecoeffsdispersive[IDX2D_MATDISP(materialEy,1+(pole*3))] * Ty[IDX4D_T(pole,x_T,y_T,z_T)] +
|
||||
updatecoeffsdispersive[IDX2D_MATDISP(materialEy,2+(pole*3))] * Ey[IDX3D_FIELDS(x,y,z)];
|
||||
}
|
||||
Ey[IDX3D_FIELDS(x,y,z)] = updatecoeffsE[IDX2D_MAT(materialEy,0)] * Ey[IDX3D_FIELDS(x,y,z)] +
|
||||
updatecoeffsE[IDX2D_MAT(materialEy,3)] * (Hx[IDX3D_FIELDS(x,y,z)] - Hx[IDX3D_FIELDS(x,y,z-1)]) -
|
||||
updatecoeffsE[IDX2D_MAT(materialEy,1)] * (Hz[IDX3D_FIELDS(x,y,z)] - Hz[IDX3D_FIELDS(x-1,y,z)]) -
|
||||
updatecoeffsE[IDX2D_MAT(materialEy,4)] * phi;
|
||||
}
|
||||
|
||||
// Ez component
|
||||
if ((NX != 1 || NY != 1) && x > 0 && x < NX && y > 0 && y < NY && z >= 0 && z < NZ) {
|
||||
int materialEz = ID[IDX4D_ID(2,x_ID,y_ID,z_ID)];
|
||||
$REAL phi = 0;
|
||||
for (int pole = 0; pole < MAXPOLES; pole++) {
|
||||
phi = phi + updatecoeffsdispersive[IDX2D_MATDISP(materialEz,pole*3)]$REALFUNC * Tz[IDX4D_T(pole,x_T,y_T,z_T)]$REALFUNC;
|
||||
Tz[IDX4D_T(pole,x_T,y_T,z_T)] = updatecoeffsdispersive[IDX2D_MATDISP(materialEz,1+(pole*3))] * Tz[IDX4D_T(pole,x_T,y_T,z_T)] +
|
||||
updatecoeffsdispersive[IDX2D_MATDISP(materialEz,2+(pole*3))] * Ez[IDX3D_FIELDS(x,y,z)];
|
||||
}
|
||||
Ez[IDX3D_FIELDS(x,y,z)] = updatecoeffsE[IDX2D_MAT(materialEz,0)] * Ez[IDX3D_FIELDS(x,y,z)] +
|
||||
updatecoeffsE[IDX2D_MAT(materialEz,1)] * (Hy[IDX3D_FIELDS(x,y,z)] - Hy[IDX3D_FIELDS(x-1,y,z)]) -
|
||||
updatecoeffsE[IDX2D_MAT(materialEz,2)] * (Hx[IDX3D_FIELDS(x,y,z)] - Hx[IDX3D_FIELDS(x,y-1,z)]) -
|
||||
updatecoeffsE[IDX2D_MAT(materialEz,4)] * phi;
|
||||
}
|
||||
""")
|
||||
|
||||
update_electric_dispersive_B = Template("""
|
||||
// Electric field updates - dispersive materials - part B of updates to electric
|
||||
// field values when dispersive materials
|
||||
// (with multiple poles) are present.
|
||||
//
|
||||
// Args:
|
||||
// NX, NY, NZ: Number of cells of the model domain.
|
||||
// MAXPOLES: Maximum number of dispersive material poles present in model.
|
||||
// updatedispersivecoeffs, T, ID, E, H: Access to update coefficients,
|
||||
// dispersive, ID and field
|
||||
// component arrays.
|
||||
|
||||
|
||||
// Convert the linear index to subscripts for 3D field arrays
|
||||
int x = i / ($NY_FIELDS * $NZ_FIELDS);
|
||||
int y = (i % ($NY_FIELDS * $NZ_FIELDS)) / $NZ_FIELDS;
|
||||
int z = (i % ($NY_FIELDS * $NZ_FIELDS)) % $NZ_FIELDS;
|
||||
|
||||
// Convert the linear index to subscripts for 4D material ID array
|
||||
int x_ID = (i % ($NX_ID * $NY_ID * $NZ_ID)) / ($NY_ID * $NZ_ID);
|
||||
int y_ID = ((i % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) / $NZ_ID;
|
||||
int z_ID = ((i % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) % $NZ_ID;
|
||||
|
||||
// Convert the linear index to subscripts for 4D dispersive array
|
||||
int x_T = (i % ($NX_T * $NY_T * $NZ_T)) / ($NY_T * $NZ_T);
|
||||
int y_T = ((i % ($NX_T * $NY_T * $NZ_T)) % ($NY_T * $NZ_T)) / $NZ_T;
|
||||
int z_T = ((i % ($NX_T * $NY_T * $NZ_T)) % ($NY_T * $NZ_T)) % $NZ_T;
|
||||
|
||||
// Ex component
|
||||
if ((NY != 1 || NZ != 1) && x >= 0 && x < NX && y > 0 && y < NY && z > 0 && z < NZ) {
|
||||
int materialEx = ID[IDX4D_ID(0,x_ID,y_ID,z_ID)];
|
||||
for (int pole = 0; pole < MAXPOLES; pole++) {
|
||||
Tx[IDX4D_T(pole,x_T,y_T,z_T)] = Tx[IDX4D_T(pole,x_T,y_T,z_T)] -
|
||||
updatecoeffsdispersive[IDX2D_MATDISP(materialEx,2+(pole*3))] * Ex[IDX3D_FIELDS(x,y,z)];
|
||||
}
|
||||
}
|
||||
|
||||
// Ey component
|
||||
if ((NX != 1 || NZ != 1) && x > 0 && x < NX && y >= 0 && y < NY && z > 0 && z < NZ) {
|
||||
int materialEy = ID[IDX4D_ID(1,x_ID,y_ID,z_ID)];
|
||||
for (int pole = 0; pole < MAXPOLES; pole++) {
|
||||
Ty[IDX4D_T(pole,x_T,y_T,z_T)] = Ty[IDX4D_T(pole,x_T,y_T,z_T)] -
|
||||
updatecoeffsdispersive[IDX2D_MATDISP(materialEy,2+(pole*3))] * Ey[IDX3D_FIELDS(x,y,z)];
|
||||
}
|
||||
}
|
||||
|
||||
// Ez component
|
||||
if ((NX != 1 || NY != 1) && x > 0 && x < NX && y > 0 && y < NY && z >= 0 && z < NZ) {
|
||||
int materialEz = ID[IDX4D_ID(2,x_ID,y_ID,z_ID)];
|
||||
for (int pole = 0; pole < MAXPOLES; pole++) {
|
||||
Tz[IDX4D_T(pole,x_T,y_T,z_T)] = Tz[IDX4D_T(pole,x_T,y_T,z_T)] -
|
||||
updatecoeffsdispersive[IDX2D_MATDISP(materialEz,2+(pole*3))] * Ez[IDX3D_FIELDS(x,y,z)];
|
||||
}
|
||||
}
|
||||
""")
|
文件差异内容过多而无法显示
加载差异
文件差异内容过多而无法显示
加载差异
@@ -0,0 +1,72 @@
|
||||
# Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom
|
||||
# Authors: Craig Warren, Antonis Giannopoulos, and John Hartley
|
||||
#
|
||||
# This file is part of gprMax.
|
||||
#
|
||||
# gprMax is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# gprMax is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with gprMax. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
from string import Template
|
||||
|
||||
|
||||
store_snapshot = Template("""
|
||||
// Stores field values for a snapshot.
|
||||
//
|
||||
// Args:
|
||||
// p: Snapshot number.
|
||||
// xs, xf, ys, yf, xs, xf: Start and finish cell coordinates for snapshot.
|
||||
// dx, dy, dz: Sampling interval in cell coordinates for snapshot.
|
||||
// E, H: Access to field component arrays.
|
||||
// snapEx, snapEy, snapEz, snapHx, snapHy, snapHz: Access to arrays to store snapshots.
|
||||
|
||||
|
||||
// Convert the linear index to subscripts for 4D SNAPS array
|
||||
int x = (i % ($NX_SNAPS * $NY_SNAPS * $NZ_SNAPS)) / ($NY_SNAPS * $NZ_SNAPS);
|
||||
int y = ((i % ($NX_SNAPS * $NY_SNAPS * $NZ_SNAPS)) % ($NY_SNAPS * $NZ_SNAPS)) / $NZ_SNAPS;
|
||||
int z = ((i % ($NX_SNAPS * $NY_SNAPS * $NZ_SNAPS)) % ($NY_SNAPS * $NZ_SNAPS)) % $NZ_SNAPS;
|
||||
|
||||
// Subscripts for field arrays
|
||||
int xx, yy, zz;
|
||||
|
||||
if (x >= xs && x < xf && y >= ys && y < yf && z >= zs && z < zf) {
|
||||
|
||||
// Increment subscripts for field array to account for spatial sampling of snapshot
|
||||
xx = (xs + x) * dx;
|
||||
yy = (ys + y) * dy;
|
||||
zz = (zs + z) * dz;
|
||||
|
||||
// The electric field component value at a point comes from an average of
|
||||
// the 4 electric field component values in that cell
|
||||
snapEx[IDX4D_SNAPS(p,x,y,z)] = (Ex[IDX3D_FIELDS(xx,yy,zz)] +
|
||||
Ex[IDX3D_FIELDS(xx,yy+1,zz)] +
|
||||
Ex[IDX3D_FIELDS(xx,yy,zz+1)] +
|
||||
Ex[IDX3D_FIELDS(xx,yy+1,zz+1)]) / 4;
|
||||
snapEy[IDX4D_SNAPS(p,x,y,z)] = (Ey[IDX3D_FIELDS(xx,yy,zz)] +
|
||||
Ey[IDX3D_FIELDS(xx+1,yy,zz)] +
|
||||
Ey[IDX3D_FIELDS(xx,yy,zz+1)] +
|
||||
Ey[IDX3D_FIELDS(xx+1,yy,zz+1)]) / 4;
|
||||
snapEz[IDX4D_SNAPS(p,x,y,z)] = (Ez[IDX3D_FIELDS(xx,yy,zz)] +
|
||||
Ez[IDX3D_FIELDS(xx+1,yy,zz)] +
|
||||
Ez[IDX3D_FIELDS(xx,yy+1,zz)] +
|
||||
Ez[IDX3D_FIELDS(xx+1,yy+1,zz)]) / 4;
|
||||
|
||||
// The magnetic field component value at a point comes from average of
|
||||
// 2 magnetic field component values in that cell and the following cell
|
||||
snapHx[IDX4D_SNAPS(p,x,y,z)] = (Hx[IDX3D_FIELDS(xx,yy,zz)] +
|
||||
Hx[IDX3D_FIELDS(xx+1,yy,zz)]) / 2;
|
||||
snapHy[IDX4D_SNAPS(p,x,y,z)] = (Hy[IDX3D_FIELDS(xx,yy,zz)] +
|
||||
Hy[IDX3D_FIELDS(xx,yy+1,zz)]) / 2;
|
||||
snapHz[IDX4D_SNAPS(p,x,y,z)] = (Hz[IDX3D_FIELDS(xx,yy,zz)] +
|
||||
Hz[IDX3D_FIELDS(xx,yy,zz+1)]) / 2;
|
||||
}
|
||||
""")
|
@@ -0,0 +1,173 @@
|
||||
# Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom
|
||||
# Authors: Craig Warren, Antonis Giannopoulos, and John Hartley
|
||||
#
|
||||
# This file is part of gprMax.
|
||||
#
|
||||
# gprMax is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# gprMax is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with gprMax. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
from string import Template
|
||||
|
||||
update_hertzian_dipole = Template("""
|
||||
// Updates electric field values for Hertzian dipole sources.
|
||||
//
|
||||
// Args:
|
||||
// NHERTZDIPOLE: Total number of Hertzian dipoles in the model.
|
||||
// iteration: Iteration number of simulation.
|
||||
// dx, dy, dz: Spatial discretisations.
|
||||
// srcinfo1: Source cell coordinates and polarisation information.
|
||||
// srcinfo2: Other source information, e.g. length, resistance etc...
|
||||
// srcwaveforms: Source waveform values.
|
||||
// ID, E: Access to ID and field component arrays.
|
||||
|
||||
|
||||
if (i < NHERTZDIPOLE) {
|
||||
|
||||
$REAL dl;
|
||||
int x, y, z, polarisation;
|
||||
|
||||
x = srcinfo1[IDX2D_SRCINFO(i,0)];
|
||||
y = srcinfo1[IDX2D_SRCINFO(i,1)];
|
||||
z = srcinfo1[IDX2D_SRCINFO(i,2)];
|
||||
polarisation = srcinfo1[IDX2D_SRCINFO(i,3)];
|
||||
dl = srcinfo2[i];
|
||||
|
||||
// 'x' polarised source
|
||||
if (polarisation == 0) {
|
||||
int materialEx = ID[IDX4D_ID(0,x,y,z)];
|
||||
Ex[IDX3D_FIELDS(x,y,z)] = Ex[IDX3D_FIELDS(x,y,z)] - updatecoeffsE[IDX2D_MAT(materialEx,4)] *
|
||||
srcwaveforms[IDX2D_SRCWAVES(i,iteration)] * dl * (1 / (dx * dy * dz));
|
||||
}
|
||||
|
||||
// 'y' polarised source
|
||||
else if (polarisation == 1) {
|
||||
int materialEy = ID[IDX4D_ID(1,x,y,z)];
|
||||
Ey[IDX3D_FIELDS(x,y,z)] = Ey[IDX3D_FIELDS(x,y,z)] - updatecoeffsE[IDX2D_MAT(materialEy,4)] *
|
||||
srcwaveforms[IDX2D_SRCWAVES(i,iteration)] * dl * (1 / (dx * dy * dz));
|
||||
}
|
||||
|
||||
// 'z' polarised source
|
||||
else if (polarisation == 2) {
|
||||
int materialEz = ID[IDX4D_ID(2,x,y,z)];
|
||||
Ez[IDX3D_FIELDS(x,y,z)] = Ez[IDX3D_FIELDS(x,y,z)] - updatecoeffsE[IDX2D_MAT(materialEz,4)] *
|
||||
srcwaveforms[IDX2D_SRCWAVES(i,iteration)] * dl * (1 / (dx * dy * dz));
|
||||
}
|
||||
}
|
||||
""")
|
||||
|
||||
update_magnetic_dipole = Template("""
|
||||
// Updates electric field values for Hertzian dipole sources.
|
||||
//
|
||||
// Args:
|
||||
// NMAGDIPOLE: Total number of magnetic dipoles in the model.
|
||||
// iteration: Iteration number of simulation.
|
||||
// dx, dy, dz: Spatial discretisations.
|
||||
// srcinfo1: Source cell coordinates and polarisation information.
|
||||
// srcinfo2: Other source information, e.g. length, resistance etc...
|
||||
// srcwaveforms: Source waveform values.
|
||||
// ID, H: Access to ID and field component arrays.
|
||||
|
||||
|
||||
if (i < NMAGDIPOLE) {
|
||||
|
||||
int x, y, z, polarisation;
|
||||
|
||||
x = srcinfo1[IDX2D_SRCINFO(i,0)];
|
||||
y = srcinfo1[IDX2D_SRCINFO(i,1)];
|
||||
z = srcinfo1[IDX2D_SRCINFO(i,2)];
|
||||
polarisation = srcinfo1[IDX2D_SRCINFO(i,3)];
|
||||
|
||||
// 'x' polarised source
|
||||
if (polarisation == 0) {
|
||||
int materialHx = ID[IDX4D_ID(3,x,y,z)];
|
||||
Hx[IDX3D_FIELDS(x,y,z)] = Hx[IDX3D_FIELDS(x,y,z)] - updatecoeffsH[IDX2D_MAT(materialHx,4)] *
|
||||
srcwaveforms[IDX2D_SRCWAVES(i,iteration)] * (1 / (dx * dy * dz));
|
||||
}
|
||||
|
||||
// 'y' polarised source
|
||||
else if (polarisation == 1) {
|
||||
int materialHy = ID[IDX4D_ID(4,x,y,z)];
|
||||
Hy[IDX3D_FIELDS(x,y,z)] = Hy[IDX3D_FIELDS(x,y,z)] - updatecoeffsH[IDX2D_MAT(materialHy,4)] *
|
||||
srcwaveforms[IDX2D_SRCWAVES(i,iteration)] * (1 / (dx * dy * dz));
|
||||
}
|
||||
|
||||
// 'z' polarised source
|
||||
else if (polarisation == 2) {
|
||||
int materialHz = ID[IDX4D_ID(5,x,y,z)];
|
||||
Hz[IDX3D_FIELDS(x,y,z)] = Hz[IDX3D_FIELDS(x,y,z)] - updatecoeffsH[IDX2D_MAT(materialHz,4)] *
|
||||
srcwaveforms[IDX2D_SRCWAVES(i,iteration)] * (1 / (dx * dy * dz));
|
||||
}
|
||||
}
|
||||
""")
|
||||
|
||||
update_voltage_source = Template("""
|
||||
// Updates electric field values for voltage sources.
|
||||
//
|
||||
// Args:
|
||||
// NVOLTSRC: Total number of voltage sources in the model.
|
||||
// iteration: Iteration number of simulation.
|
||||
// dx, dy, dz: Spatial discretisations.
|
||||
// srcinfo1: Source cell coordinates and polarisation information.
|
||||
// srcinfo2: Other source information, e.g. length, resistance etc...
|
||||
// srcwaveforms: Source waveform values.
|
||||
// ID, E: Access to ID and field component arrays.
|
||||
|
||||
|
||||
if (i < NVOLTSRC) {
|
||||
|
||||
$REAL resistance;
|
||||
int x, y, z, polarisation;
|
||||
|
||||
x = srcinfo1[IDX2D_SRCINFO(i,0)];
|
||||
y = srcinfo1[IDX2D_SRCINFO(i,1)];
|
||||
z = srcinfo1[IDX2D_SRCINFO(i,2)];
|
||||
polarisation = srcinfo1[IDX2D_SRCINFO(i,3)];
|
||||
resistance = srcinfo2[i];
|
||||
|
||||
// 'x' polarised source
|
||||
if (polarisation == 0) {
|
||||
if (resistance != 0) {
|
||||
int materialEx = ID[IDX4D_ID(0,x,y,z)];
|
||||
Ex[IDX3D_FIELDS(x,y,z)] = Ex[IDX3D_FIELDS(x,y,z)] - updatecoeffsE[IDX2D_MAT(materialEx,4)] *
|
||||
srcwaveforms[IDX2D_SRCWAVES(i,iteration)] * (1 / (resistance * dy * dz));
|
||||
}
|
||||
else {
|
||||
Ex[IDX3D_FIELDS(x,y,z)] = -1 * srcwaveforms[IDX2D_SRCWAVES(i,iteration)] / dx;
|
||||
}
|
||||
}
|
||||
|
||||
// 'y' polarised source
|
||||
else if (polarisation == 1) {
|
||||
if (resistance != 0) {
|
||||
int materialEy = ID[IDX4D_ID(1,x,y,z)];
|
||||
Ey[IDX3D_FIELDS(x,y,z)] = Ey[IDX3D_FIELDS(x,y,z)] - updatecoeffsE[IDX2D_MAT(materialEy,4)] *
|
||||
srcwaveforms[IDX2D_SRCWAVES(i,iteration)] * (1 / (resistance * dx * dz));
|
||||
}
|
||||
else {
|
||||
Ey[IDX3D_FIELDS(x,y,z)] = -1 * srcwaveforms[IDX2D_SRCWAVES(i,iteration)] / dy;
|
||||
}
|
||||
}
|
||||
|
||||
// 'z' polarised source
|
||||
else if (polarisation == 2) {
|
||||
if (resistance != 0) {
|
||||
int materialEz = ID[IDX4D_ID(2,x,y,z)];
|
||||
Ez[IDX3D_FIELDS(x,y,z)] = Ez[IDX3D_FIELDS(x,y,z)] - updatecoeffsE[IDX2D_MAT(materialEz,4)] *
|
||||
srcwaveforms[IDX2D_SRCWAVES(i,iteration)] * (1 / (resistance * dx * dy));
|
||||
}
|
||||
else {
|
||||
Ez[IDX3D_FIELDS(x,y,z)] = -1 * srcwaveforms[IDX2D_SRCWAVES(i,iteration)] / dz;
|
||||
}
|
||||
}
|
||||
}
|
||||
""")
|
@@ -0,0 +1,42 @@
|
||||
# Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom
|
||||
# Authors: Craig Warren, Antonis Giannopoulos, and John Hartley
|
||||
#
|
||||
# This file is part of gprMax.
|
||||
#
|
||||
# gprMax is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# gprMax is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with gprMax. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
from string import Template
|
||||
|
||||
|
||||
store_outputs = Template("""
|
||||
// Stores field component values for every receiver in the model.
|
||||
//
|
||||
// Args:
|
||||
// NRX: total number of receivers in the model.
|
||||
// rxs: array to store field components for receivers - rows
|
||||
// are field components; columns are iterations; pages are receiver.
|
||||
|
||||
if (i < NRX) {
|
||||
int x, y, z;
|
||||
x = rxcoords[IDX2D_RXCOORDS(i,0)];
|
||||
y = rxcoords[IDX2D_RXCOORDS(i,1)];
|
||||
z = rxcoords[IDX2D_RXCOORDS(i,2)];
|
||||
rxs[IDX3D_RXS(0,iteration,i)] = Ex[IDX3D_FIELDS(x,y,z)];
|
||||
rxs[IDX3D_RXS(1,iteration,i)] = Ey[IDX3D_FIELDS(x,y,z)];
|
||||
rxs[IDX3D_RXS(2,iteration,i)] = Ez[IDX3D_FIELDS(x,y,z)];
|
||||
rxs[IDX3D_RXS(3,iteration,i)] = Hx[IDX3D_FIELDS(x,y,z)];
|
||||
rxs[IDX3D_RXS(4,iteration,i)] = Hy[IDX3D_FIELDS(x,y,z)];
|
||||
rxs[IDX3D_RXS(5,iteration,i)] = Hz[IDX3D_FIELDS(x,y,z)];
|
||||
}
|
||||
""")
|
@@ -56,7 +56,7 @@ def store_outputs(G):
|
||||
tl.Itotal[iteration] = tl.current[tl.antpos]
|
||||
|
||||
|
||||
kernel_template_store_outputs = Template("""
|
||||
knl_template_store_outputs = Template("""
|
||||
|
||||
// Macros for converting subscripts to linear index:
|
||||
#define INDEX2D_RXCOORDS(m, n) (m)*($NY_RXCOORDS)+(n)
|
||||
|
@@ -32,6 +32,7 @@ args_defaults = {'scenes': None,
|
||||
'restart': None,
|
||||
'mpi': False,
|
||||
'gpu': None,
|
||||
'opencl': None,
|
||||
'subgrid': False,
|
||||
'autotranslate': False,
|
||||
'geometry_only': False,
|
||||
@@ -67,6 +68,8 @@ help_msg = {'scenes': '(list, opt): List of the scenes to run the model. '
|
||||
'performance section of the User Guide.',
|
||||
'gpu': '(list/bool, opt): Flag to use NVIDIA GPU or list of NVIDIA '
|
||||
'GPU device ID(s) for specific GPU card(s).',
|
||||
'opencl': '(list/bool, opt): Flag to use OpenCL or list of OpenCL '
|
||||
'device ID(s) for specific compute device(s).',
|
||||
'subgrid': '(bool, opt): Flag to use sub-gridding.',
|
||||
'autotranslate': '(bool, opt): For sub-gridding - auto translate '
|
||||
'objects with main grid coordinates to their '
|
||||
@@ -92,6 +95,7 @@ def run(scenes=args_defaults['scenes'],
|
||||
restart=args_defaults['restart'],
|
||||
mpi=args_defaults['mpi'],
|
||||
gpu=args_defaults['gpu'],
|
||||
opencl=args_defaults['opencl'],
|
||||
subgrid=args_defaults['subgrid'],
|
||||
autotranslate=args_defaults['autotranslate'],
|
||||
geometry_only=args_defaults['geometry_only'],
|
||||
@@ -112,6 +116,7 @@ def run(scenes=args_defaults['scenes'],
|
||||
'restart': restart,
|
||||
'mpi': mpi,
|
||||
'gpu': gpu,
|
||||
'opencl': opencl,
|
||||
'subgrid': subgrid,
|
||||
'autotranslate': autotranslate,
|
||||
'geometry_only': geometry_only,
|
||||
@@ -139,6 +144,8 @@ def cli():
|
||||
help=help_msg['mpi'])
|
||||
parser.add_argument('-gpu', type=int, action='append', nargs='*',
|
||||
help=help_msg['gpu'])
|
||||
parser.add_argument('-opencl', type=int, action='append', nargs='*',
|
||||
help=help_msg['opencl'])
|
||||
parser.add_argument('--geometry-only', action='store_true',
|
||||
default=args_defaults['geometry_only'],
|
||||
help=help_msg['geometry_only'])
|
||||
@@ -176,11 +183,11 @@ def run_main(args):
|
||||
if args.spotpy:
|
||||
context = SPOTPYContext()
|
||||
context.run(args.i)
|
||||
# MPI running with (OpenMP/CUDA)
|
||||
# MPI running with (OpenMP/CUDA/OpenCL)
|
||||
elif config.sim_config.args.mpi:
|
||||
context = MPIContext()
|
||||
context.run()
|
||||
# Standard running (OpenMP/CUDA)
|
||||
# Standard running (OpenMP/CUDA/OpenCL)
|
||||
else:
|
||||
context = Context()
|
||||
context.run()
|
||||
|
@@ -306,33 +306,74 @@ class CUDAGrid(FDTDGrid):
|
||||
self.bpg = (int(np.ceil(((self.nx + 1) * (self.ny + 1) *
|
||||
(self.nz + 1)) / self.tpb[0])), 1, 1)
|
||||
|
||||
def htod_geometry_arrays(self):
|
||||
"""Initialise an array for cell edge IDs (ID) on GPU."""
|
||||
import pycuda.gpuarray as gpuarray
|
||||
def htod_geometry_arrays(self, queue=None):
|
||||
"""Initialise an array for cell edge IDs (ID) on compute device.
|
||||
|
||||
Args:
|
||||
queue: pyopencl queue.
|
||||
"""
|
||||
|
||||
self.ID_gpu = gpuarray.to_gpu(self.ID)
|
||||
if config.sim_config.general['solver'] == 'cuda':
|
||||
import pycuda.gpuarray as gpuarray
|
||||
self.ID_dev = gpuarray.to_gpu(self.ID)
|
||||
|
||||
def htod_field_arrays(self):
|
||||
"""Initialise geometry and field arrays on GPU."""
|
||||
elif config.sim_config.general['solver'] == 'opencl':
|
||||
import pyopencl.array as clarray
|
||||
self.ID_dev = clarray.to_device(queue, self.ID)
|
||||
|
||||
import pycuda.gpuarray as gpuarray
|
||||
def htod_field_arrays(self, queue=None):
|
||||
"""Initialise field arrays on compute device.
|
||||
|
||||
Args:
|
||||
queue: pyopencl queue.
|
||||
"""
|
||||
|
||||
self.Ex_gpu = gpuarray.to_gpu(self.Ex)
|
||||
self.Ey_gpu = gpuarray.to_gpu(self.Ey)
|
||||
self.Ez_gpu = gpuarray.to_gpu(self.Ez)
|
||||
self.Hx_gpu = gpuarray.to_gpu(self.Hx)
|
||||
self.Hy_gpu = gpuarray.to_gpu(self.Hy)
|
||||
self.Hz_gpu = gpuarray.to_gpu(self.Hz)
|
||||
if config.sim_config.general['solver'] == 'cuda':
|
||||
import pycuda.gpuarray as gpuarray
|
||||
self.Ex_dev = gpuarray.to_gpu(self.Ex)
|
||||
self.Ey_dev = gpuarray.to_gpu(self.Ey)
|
||||
self.Ez_dev = gpuarray.to_gpu(self.Ez)
|
||||
self.Hx_dev = gpuarray.to_gpu(self.Hx)
|
||||
self.Hy_dev = gpuarray.to_gpu(self.Hy)
|
||||
self.Hz_dev = gpuarray.to_gpu(self.Hz)
|
||||
elif config.sim_config.general['solver'] == 'opencl':
|
||||
import pyopencl.array as clarray
|
||||
self.Ex_dev = clarray.to_device(queue, self.Ex)
|
||||
self.Ey_dev = clarray.to_device(queue, self.Ey)
|
||||
self.Ez_dev = clarray.to_device(queue, self.Ez)
|
||||
self.Hx_dev = clarray.to_device(queue, self.Hx)
|
||||
self.Hy_dev = clarray.to_device(queue, self.Hy)
|
||||
self.Hz_dev = clarray.to_device(queue, self.Hz)
|
||||
|
||||
def htod_dispersive_arrays(self):
|
||||
"""Initialise dispersive material coefficient arrays on GPU."""
|
||||
def htod_dispersive_arrays(self, queue=None):
|
||||
"""Initialise dispersive material coefficient arrays on compute device.
|
||||
|
||||
Args:
|
||||
queue: pyopencl queue.
|
||||
"""
|
||||
|
||||
import pycuda.gpuarray as gpuarray
|
||||
if config.sim_config.general['solver'] == 'cuda':
|
||||
import pycuda.gpuarray as gpuarray
|
||||
self.Tx_dev = gpuarray.to_gpu(self.Tx)
|
||||
self.Ty_dev = gpuarray.to_gpu(self.Ty)
|
||||
self.Tz_dev = gpuarray.to_gpu(self.Tz)
|
||||
self.updatecoeffsdispersive_dev = gpuarray.to_gpu(self.updatecoeffsdispersive)
|
||||
elif config.sim_config.general['solver'] == 'opencl':
|
||||
import pyopencl.array as clarray
|
||||
self.Tx_dev = clarray.to_device(queue, self.Tx)
|
||||
self.Ty_dev = clarray.to_device(queue, self.Ty)
|
||||
self.Tz_dev = clarray.to_device(queue, self.Tz)
|
||||
self.updatecoeffsdispersive_dev = clarray.to_device(queue, self.updatecoeffsdispersive)
|
||||
|
||||
self.Tx_gpu = gpuarray.to_gpu(self.Tx)
|
||||
self.Ty_gpu = gpuarray.to_gpu(self.Ty)
|
||||
self.Tz_gpu = gpuarray.to_gpu(self.Tz)
|
||||
self.updatecoeffsdispersive_gpu = gpuarray.to_gpu(self.updatecoeffsdispersive)
|
||||
|
||||
class OpenCLGrid(CUDAGrid):
|
||||
"""Additional grid methods for solving on compute device using OpenCL."""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
|
||||
def set_blocks_per_grid(self):
|
||||
pass
|
||||
|
||||
|
||||
def dispersion_analysis(G):
|
||||
|
@@ -149,7 +149,9 @@ class ModelBuildRun:
|
||||
|
||||
# Check memory requirements
|
||||
total_mem, mem_strs = mem_check_all(grids)
|
||||
logger.info(f'\nMemory required: {" + ".join(mem_strs)} + ~{human_size(config.get_model_config().mem_overhead)} overhead = {human_size(total_mem)}')
|
||||
logger.info(f'\nMemory required: {" + ".join(mem_strs)} + '
|
||||
f'~{human_size(config.get_model_config().mem_overhead)} '
|
||||
f'overhead = {human_size(total_mem)}')
|
||||
|
||||
# Build grids
|
||||
gridbuilders = [GridBuilder(grid) for grid in grids]
|
||||
@@ -170,21 +172,41 @@ class ModelBuildRun:
|
||||
# Check to see if numerical dispersion might be a problem
|
||||
results = dispersion_analysis(gb.grid)
|
||||
if results['error']:
|
||||
logger.warning(f"\nNumerical dispersion analysis [{gb.grid.name}] not carried out as {results['error']}")
|
||||
logger.warning(f"\nNumerical dispersion analysis [{gb.grid.name}] "
|
||||
f"not carried out as {results['error']}")
|
||||
elif results['N'] < config.get_model_config().numdispersion['mingridsampling']:
|
||||
logger.exception(f"\nNon-physical wave propagation in [{gb.grid.name}] detected. Material '{results['material'].ID}' has wavelength sampled by {results['N']} cells, less than required minimum for physical wave propagation. Maximum significant frequency estimated as {results['maxfreq']:g}Hz")
|
||||
logger.exception(f"\nNon-physical wave propagation in [{gb.grid.name}] "
|
||||
f"detected. Material '{results['material'].ID}' "
|
||||
f"has wavelength sampled by {results['N']} cells, "
|
||||
f"less than required minimum for physical wave "
|
||||
f"propagation. Maximum significant frequency "
|
||||
f"estimated as {results['maxfreq']:g}Hz")
|
||||
raise ValueError
|
||||
elif (results['deltavp'] and np.abs(results['deltavp']) >
|
||||
config.get_model_config().numdispersion['maxnumericaldisp']):
|
||||
logger.warning(f"\n[{gb.grid.name}] has potentially significant numerical dispersion. Estimated largest physical phase-velocity error is {results['deltavp']:.2f}% in material '{results['material'].ID}' whose wavelength sampled by {results['N']} cells. Maximum significant frequency estimated as {results['maxfreq']:g}Hz")
|
||||
logger.warning(f"\n[{gb.grid.name}] has potentially significant "
|
||||
f"numerical dispersion. Estimated largest physical "
|
||||
f"phase-velocity error is {results['deltavp']:.2f}% "
|
||||
f"in material '{results['material'].ID}' whose "
|
||||
f"wavelength sampled by {results['N']} cells. "
|
||||
f"Maximum significant frequency estimated as "
|
||||
f"{results['maxfreq']:g}Hz")
|
||||
elif results['deltavp']:
|
||||
logger.info(f"\nNumerical dispersion analysis [{gb.grid.name}]: estimated largest physical phase-velocity error is {results['deltavp']:.2f}% in material '{results['material'].ID}' whose wavelength sampled by {results['N']} cells. Maximum significant frequency estimated as {results['maxfreq']:g}Hz")
|
||||
logger.info(f"\nNumerical dispersion analysis [{gb.grid.name}]: "
|
||||
f"estimated largest physical phase-velocity error is "
|
||||
f"{results['deltavp']:.2f}% in material '{results['material'].ID}' "
|
||||
f"whose wavelength sampled by {results['N']} cells. "
|
||||
f"Maximum significant frequency estimated as "
|
||||
f"{results['maxfreq']:g}Hz")
|
||||
|
||||
def reuse_geometry(self):
|
||||
# Reset iteration number
|
||||
self.G.iteration = 0
|
||||
s = f'\n--- Model {config.get_model_config().appendmodelnumber}/{config.sim_config.model_end}, input file (not re-processed, i.e. geometry fixed): {config.sim_config.input_file_path}'
|
||||
config.get_model_config().inputfilestr = Fore.GREEN + f"{s} {'-' * (get_terminal_width() - 1 - len(s))}\n" + Style.RESET_ALL
|
||||
s = (f'\n--- Model {config.get_model_config().appendmodelnumber}/{config.sim_config.model_end}, '
|
||||
f'input file (not re-processed, i.e. geometry fixed): '
|
||||
f'{config.sim_config.input_file_path}')
|
||||
config.get_model_config().inputfilestr = (Fore.GREEN + f"{s} {'-' * (get_terminal_width() - 1 - len(s))}\n" +
|
||||
Style.RESET_ALL)
|
||||
logger.basic(config.get_model_config().inputfilestr)
|
||||
for grid in [self.G] + self.G.subgrids:
|
||||
grid.reset_fields()
|
||||
@@ -224,7 +246,9 @@ class ModelBuildRun:
|
||||
fn = snapshotdir / Path(snap.filename)
|
||||
snap.filename = fn.with_suffix(snap.fileext)
|
||||
pbar = tqdm(total=snap.vtkdatawritesize, leave=True, unit='byte',
|
||||
unit_scale=True, desc=f'Writing snapshot file {i + 1} of {len(self.G.snapshots)}, {snap.filename.name}',
|
||||
unit_scale=True, desc=f'Writing snapshot file {i + 1} '
|
||||
f'of {len(self.G.snapshots)}, '
|
||||
f'{snap.filename.name}',
|
||||
ncols=get_terminal_width() - 1, file=sys.stdout,
|
||||
disable=not config.sim_config.general['progressbars'])
|
||||
snap.write_file(pbar, self.G)
|
||||
@@ -235,12 +259,12 @@ class ModelBuildRun:
|
||||
"""Print resource information on runtime and memory usage.
|
||||
|
||||
Args:
|
||||
tsolve (float): Time taken to execute solving (seconds).
|
||||
memsolve (float): Memory (RAM) used on GPU.
|
||||
tsolve: float of time taken to execute solving (seconds).
|
||||
memsolve: float of memory (RAM) used.
|
||||
"""
|
||||
|
||||
mem_str = ''
|
||||
if config.sim_config.general['cuda']:
|
||||
if config.sim_config.general['solver'] == 'cuda':
|
||||
mem_str = f' host + ~{human_size(memsolve)} GPU'
|
||||
|
||||
logger.info(f'\nMemory used: ~{human_size(self.p.memory_full_info().uss)}{mem_str}')
|
||||
@@ -250,24 +274,37 @@ class ModelBuildRun:
|
||||
"""Solve using FDTD method.
|
||||
|
||||
Args:
|
||||
solver (Solver): solver object.
|
||||
solver: solver object.
|
||||
|
||||
Returns:
|
||||
tsolve (float): time taken to execute solving (seconds).
|
||||
tsolve: float of time taken to execute solving (seconds).
|
||||
"""
|
||||
|
||||
# Check number of OpenMP threads
|
||||
if config.sim_config.general['cpu']:
|
||||
logger.basic(f"CPU solver using: {config.get_model_config().ompthreads} OpenMP thread(s) on {config.sim_config.hostinfo['hostname']}\n")
|
||||
# Print information about and check OpenMP threads
|
||||
if config.sim_config.general['solver'] == 'cpu':
|
||||
logger.basic(f"OPENMP solver with {config.get_model_config().ompthreads} "
|
||||
f"thread(s) on {config.sim_config.hostinfo['hostname']}\n")
|
||||
if config.get_model_config().ompthreads > config.sim_config.hostinfo['physicalcores']:
|
||||
logger.warning(f"You have specified more threads ({config.get_model_config().ompthreads}) than available physical CPU cores ({config.sim_config.hostinfo['physicalcores']}). This may lead to degraded performance.")
|
||||
# Print information about any GPU in use
|
||||
elif config.sim_config.general['cuda']:
|
||||
logger.basic(f"GPU solver using: {config.get_model_config().cuda['gpu'].deviceID} - {config.get_model_config().cuda['gpu'].name} on {config.sim_config.hostinfo['hostname']}\n")
|
||||
logger.warning(f"You have specified more threads ({config.get_model_config().ompthreads}) "
|
||||
f"than available physical CPU cores ({config.sim_config.hostinfo['physicalcores']}). "
|
||||
f"This may lead to degraded performance.")
|
||||
# Print information about any compute device, e.g. GPU, in use
|
||||
elif config.sim_config.general['solver'] == 'cuda' or config.sim_config.general['solver'] == 'opencl':
|
||||
solvername = config.sim_config.general['solver'].upper()
|
||||
hostname = config.sim_config.hostinfo['hostname']
|
||||
if config.sim_config.general['solver'] == 'opencl':
|
||||
platformname = ' on ' + ' '.join(config.get_model_config().device['dev'].platform.name.split()) + ' platform'
|
||||
else:
|
||||
platformname = ''
|
||||
devicename = ' '.join(config.get_model_config().device['dev'].name.split())
|
||||
logger.basic(f"{solvername} solver using {devicename}{platformname} "
|
||||
f"on {hostname}\n")
|
||||
|
||||
# Prepare iterator
|
||||
if config.sim_config.general['progressbars']:
|
||||
iterator = tqdm(range(self.G.iterations), desc=f'Running model {config.model_num + 1}/{config.sim_config.model_end}', ncols=get_terminal_width() - 1, file=sys.stdout, disable=not config.sim_config.general['progressbars'])
|
||||
iterator = tqdm(range(self.G.iterations), desc=f'Running model {config.model_num + 1}/{config.sim_config.model_end}',
|
||||
ncols=get_terminal_width() - 1, file=sys.stdout,
|
||||
disable=not config.sim_config.general['progressbars'])
|
||||
else:
|
||||
iterator = range(self.G.iterations)
|
||||
|
||||
|
311
gprMax/opencl/fields_updates.cl
普通文件
311
gprMax/opencl/fields_updates.cl
普通文件
@@ -0,0 +1,311 @@
|
||||
# Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom
|
||||
# Authors: Craig Warren, Antonis Giannopoulos, and John Hartley
|
||||
#
|
||||
# This file is part of gprMax.
|
||||
#
|
||||
# gprMax is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# gprMax is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with gprMax. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
#include <pyopencl-complex.h>
|
||||
|
||||
#define INDEX2D_MAT(m, n) (m)*({{NY_MATCOEFFS}}) + (n)
|
||||
#define INDEX2D_MATDISP(m, n) (m)*({{NY_MATDISPCOEFFS}}) + (n)
|
||||
#define INDEX3D_FIELDS(i, j, k) (i)*({{NY_FIELDS}})*({{NZ_FIELDS}}) + (j)*({{NZ_FIELDS}}) + (k)
|
||||
#define INDEX4D_ID(p, i, j, k) (p)*({{NX_ID}})*({{NY_ID}})*({{NZ_ID}}) + (i)*({{NY_ID}})*({{NZ_ID}}) + (j)*({{NZ_ID}}) + (k)
|
||||
#define INDEX4D_T(p, i, j, k) (p)*({{NX_T}})*({{NY_T}})*({{NZ_T}}) + (i)*({{NY_T}})*({{NZ_T}}) + (j)*({{NZ_T}}) + (k)
|
||||
|
||||
// Material coefficients (read-only) in constant memory
|
||||
__constant {{REAL}} updatecoeffsE[{{N_updatecoeffsE}}] =
|
||||
{
|
||||
{% for i in updateEVal %}
|
||||
{{i}},
|
||||
{% endfor %}
|
||||
};
|
||||
|
||||
__constant {{REAL}} updatecoeffsH[{{N_updatecoeffsH}}] =
|
||||
{
|
||||
{% for i in updateHVal %}
|
||||
{{i}},
|
||||
{% endfor %}
|
||||
};
|
||||
|
||||
|
||||
///////////////////////////////////////////////
|
||||
// Electric field updates - normal materials //
|
||||
///////////////////////////////////////////////
|
||||
|
||||
__kernel void update_electric(int NX, int NY, int NZ,
|
||||
__global const unsigned int* restrict ID,
|
||||
__global {{REAL}} *Ex,
|
||||
__global {{REAL}} *Ey,
|
||||
__global {{REAL}} *Ez,
|
||||
__global const {{REAL}} * restrict Hx,
|
||||
__global const {{REAL}} * restrict Hy,
|
||||
__global const {{REAL}} * restrict Hz) {
|
||||
|
||||
// This function updates electric field values.
|
||||
//
|
||||
// Args:
|
||||
// NX, NY, NZ: Number of cells of the models domain.
|
||||
// ID, E, H: Access to ID and field component arrays.
|
||||
|
||||
// Obtain the linear index corresponding to the current thread
|
||||
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) +
|
||||
get_global_id(1) * get_global_size(0) + get_global_id(0);
|
||||
|
||||
// Convert the linear index to subscripts for 3D field arrays
|
||||
int i = idx / ({{NY_FIELDS}} * {{NZ_FIELDS}});
|
||||
int j = (idx % ({{NY_FIELDS}}*{{NZ_FIELDS}})) / {{NZ_FIELDS}};
|
||||
int k = (idx % ({{NY_FIELDS}}*{{NZ_FIELDS}})) % {{NZ_FIELDS}};
|
||||
|
||||
// Convert the linear index to subscripts for 4D material ID arrays
|
||||
int i_ID = (idx%({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) / ({{NY_ID}} * {{NZ_ID}});
|
||||
int j_ID = ((idx%({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) / {{NZ_ID}};
|
||||
int k_ID = ((idx%({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) % {{NZ_ID}};
|
||||
|
||||
// Ex component
|
||||
if ((NY != 1 || NZ != 1) && i >= 0 && i < NX && j > 0 && j < NY && k > 0 && k < NZ) {
|
||||
int materialEx = ID[INDEX4D_ID(0,i_ID,j_ID,k_ID)];
|
||||
Ex[INDEX3D_FIELDS(i,j,k)] = updatecoeffsE[INDEX2D_MAT(materialEx,0)] * Ex[INDEX3D_FIELDS(i,j,k)] +
|
||||
updatecoeffsE[INDEX2D_MAT(materialEx,2)] * (Hz[INDEX3D_FIELDS(i,j,k)] - Hz[INDEX3D_FIELDS(i,j-1,k)]) -
|
||||
updatecoeffsE[INDEX2D_MAT(materialEx,3)] * (Hy[INDEX3D_FIELDS(i,j,k)] - Hy[INDEX3D_FIELDS(i,j,k-1)]);
|
||||
}
|
||||
|
||||
// Ey component
|
||||
if ((NX != 1 || NZ != 1) && i > 0 && i < NX && j >= 0 && j < NY && k > 0 && k < NZ) {
|
||||
int materialEy = ID[INDEX4D_ID(1,i_ID,j_ID,k_ID)];
|
||||
Ey[INDEX3D_FIELDS(i,j,k)] = updatecoeffsE[INDEX2D_MAT(materialEy,0)] * Ey[INDEX3D_FIELDS(i,j,k)] +
|
||||
updatecoeffsE[INDEX2D_MAT(materialEy,3)] * (Hx[INDEX3D_FIELDS(i,j,k)] - Hx[INDEX3D_FIELDS(i,j,k-1)]) - updatecoeffsE[INDEX2D_MAT(materialEy,1)] * (Hz[INDEX3D_FIELDS(i,j,k)] - Hz[INDEX3D_FIELDS(i-1,j,k)]);
|
||||
}
|
||||
|
||||
// Ez component
|
||||
if ((NX != 1 || NY != 1) && i > 0 && i < NX && j > 0 && j < NY && k >= 0 && k < NZ) {
|
||||
int materialEz = ID[INDEX4D_ID(2,i_ID,j_ID,k_ID)];
|
||||
Ez[INDEX3D_FIELDS(i,j,k)] = updatecoeffsE[INDEX2D_MAT(materialEz,0)] * Ez[INDEX3D_FIELDS(i,j,k)] +
|
||||
updatecoeffsE[INDEX2D_MAT(materialEz,1)] * (Hy[INDEX3D_FIELDS(i,j,k)] - Hy[INDEX3D_FIELDS(i-1,j,k)]) - updatecoeffsE[INDEX2D_MAT(materialEz,2)] * (Hx[INDEX3D_FIELDS(i,j,k)] - Hx[INDEX3D_FIELDS(i,j-1,k)]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////
|
||||
// Magnetic field updates //
|
||||
////////////////////////////
|
||||
|
||||
__kernel void update_magnetic(int NX, int NY, int NZ,
|
||||
__global const unsigned int* restrict ID,
|
||||
__global {{REAL}} *Hx,
|
||||
__global {{REAL}} *Hy,
|
||||
__global {{REAL}} *Hz,
|
||||
__global const {{REAL}}* restrict Ex,
|
||||
__global const {{REAL}}* restrict Ey,
|
||||
__global const {{REAL}}* restrict Ez) {
|
||||
|
||||
// This function updates magnetic field values.
|
||||
//
|
||||
// Args:
|
||||
// NX, NY, NZ: number of cells of the model domain.
|
||||
// ID, E, H: access to ID and field component arrays.
|
||||
|
||||
// Obtain the linear index corresponding to the current thread
|
||||
int idx = get_global_id(0);
|
||||
|
||||
// convert the linear index to subscripts to 3D field arrays
|
||||
int i = idx / ({{NY_FIELDS}} * {{NZ_FIELDS}});
|
||||
int j = (idx%({{NY_FIELDS}}*{{NZ_FIELDS}})) / {{NZ_FIELDS}};
|
||||
int k = (idx%({{NY_FIELDS}}*{{NZ_FIELDS}})) % {{NZ_FIELDS}};
|
||||
|
||||
// convert the linear index to subscripts to 4D material ID arrays
|
||||
int i_ID = ( idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) / ({{NY_ID}} * {{NZ_ID}});
|
||||
int j_ID = (( idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) / {{NZ_ID}};
|
||||
int k_ID = (( idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) % {{NZ_ID}};
|
||||
|
||||
// Hx component
|
||||
if (NX != 1 && i > 0 && i < NX && j >= 0 && j < NY && k >= 0 && k < NZ) {
|
||||
int materialHx = ID[INDEX4D_ID(3,i_ID,j_ID,k_ID)];
|
||||
Hx[INDEX3D_FIELDS(i,j,k)] = updatecoeffsH[INDEX2D_MAT(materialHx,0)] * Hx[INDEX3D_FIELDS(i,j,k)] -
|
||||
updatecoeffsH[INDEX2D_MAT(materialHx,2)] * (Ez[INDEX3D_FIELDS(i,j+1,k)] - Ez[INDEX3D_FIELDS(i,j,k)]) + updatecoeffsH[INDEX2D_MAT(materialHx,3)] * (Ey[INDEX3D_FIELDS(i,j,k+1)] - Ey[INDEX3D_FIELDS(i,j,k)]);
|
||||
}
|
||||
|
||||
// Hy component
|
||||
if (NY != 1 && i >= 0 && i < NX && j > 0 && j < NY && k >= 0 && k < NZ) {
|
||||
int materialHy = ID[INDEX4D_ID(4,i_ID,j_ID,k_ID)];
|
||||
Hy[INDEX3D_FIELDS(i,j,k)] = updatecoeffsH[INDEX2D_MAT(materialHy,0)] * Hy[INDEX3D_FIELDS(i,j,k)] -
|
||||
updatecoeffsH[INDEX2D_MAT(materialHy,3)] * (Ex[INDEX3D_FIELDS(i,j,k+1)] - Ex[INDEX3D_FIELDS(i,j,k)]) + updatecoeffsH[INDEX2D_MAT(materialHy,1)] * (Ez[INDEX3D_FIELDS(i+1,j,k)] - Ez[INDEX3D_FIELDS(i,j,k)]);
|
||||
}
|
||||
|
||||
// Hz component
|
||||
if (NZ != 1 && i >= 0 && i < NX && j >= 0 && j < NY && k > 0 && k < NZ) {
|
||||
int materialHz = ID[INDEX4D_ID(5,i_ID,j_ID,k_ID)];
|
||||
Hz[INDEX3D_FIELDS(i,j,k)] = updatecoeffsH[INDEX2D_MAT(materialHz,0)] * Hz[INDEX3D_FIELDS(i,j,k)] -
|
||||
updatecoeffsH[INDEX2D_MAT(materialHz,1)] * (Ey[INDEX3D_FIELDS(i+1,j,k)] - Ey[INDEX3D_FIELDS(i,j,k)]) + updatecoeffsH[INDEX2D_MAT(materialHz,2)] * (Ex[INDEX3D_FIELDS(i,j+1,k)] - Ex[INDEX3D_FIELDS(i,j,k)]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
// Electric field updates - dispersive materials //
|
||||
///////////////////////////////////////////////////
|
||||
|
||||
__kernel void update_electric_dispersive_A(int NX, int NY, int NZ, int MAXPOLES,
|
||||
__global const {{COMPLEX-}}_t* restrict updatecoeffsdispersive,
|
||||
__global {{COMPLEX-}}_t *Tx,
|
||||
__global {{COMPLEX-}}_t *Ty,
|
||||
__global {{COMPLEX-}}_t *Tz,
|
||||
__global const unsigned int* restrict ID,
|
||||
__global {{REAL}} *Ex,
|
||||
__global {{REAL}} *Ey,
|
||||
__global {{REAL}} *Ez,
|
||||
__global const {{REAL}}* restrict Hx,
|
||||
__global const {{REAL}}* restrict Hy,
|
||||
__global const {{REAL}}* restrict Hz) {
|
||||
|
||||
// This function is part A of updates to electric field values when dispersive materials (with multiple poles) are present.
|
||||
//
|
||||
// Args:
|
||||
// NX, NY, NZ: Number of cells of the model domain
|
||||
// MAXPOLES: Maximum number of dispersive material poles present in model
|
||||
// updatedispersivecoeffs, T, ID, E, H: Access to update coefficients, dispersive, ID and field component arrays
|
||||
|
||||
// Obtain the linear index corresponding to the current thread
|
||||
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) +
|
||||
get_global_id(1) * get_global_size(0) + get_global_id(0);
|
||||
|
||||
// Convert the linear index to subscripts for 3D field arrays
|
||||
int i = idx / ({{NY_FIELDS}} * {{NZ_FIELDS}});
|
||||
int j = (idx % ({{NY_FIELDS}} * {{NZ_FIELDS}})) / {{NZ_FIELDS}};
|
||||
int k = (idx % ({{NY_FIELDS}} * {{NZ_FIELDS}})) % {{NZ_FIELDS}};
|
||||
|
||||
// Convert the linear index to subscripts for 4D material ID array
|
||||
int i_ID = (idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) / ({{NY_ID}} * {{NZ_ID}});
|
||||
int j_ID = ((idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) / {{NZ_ID}};
|
||||
int k_ID = ((idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) % {{NZ_ID}};
|
||||
|
||||
// Convert the linear index to subscripts for 4D dispersive array
|
||||
int i_T = (idx % ({{NX_T}} * {{NY_T}} * {{NZ_T}})) / ({{NY_T}} * {{NZ_T}});
|
||||
int j_T = ((idx % ({{NX_T}} * {{NY_T}} * {{NZ_T}})) % ({{NY_T}} * {{NZ_T}})) / {{NZ_T}};
|
||||
int k_T = ((idx % ({{NX_T}} * {{NY_T}} * {{NZ_T}})) % ({{NY_T}} * {{NZ_T}})) % {{NZ_T}};
|
||||
|
||||
// Ex component
|
||||
if ((NY != 1 || NZ != 1) && i >= 0 && i < NX && j > 0 && j < NY && k > 0 && k < NZ) {
|
||||
int materialEx = ID[INDEX4D_ID(0,i_ID,j_ID,k_ID)];
|
||||
{{REAL}} phi = 0;
|
||||
for (int pole = 0; pole < MAXPOLES; pole++) {
|
||||
phi = phi + updatecoeffsdispersive[INDEX2D_MATDISP(materialEx,pole*3)].real * Tx[INDEX4D_T(pole,i_T,j_T,k_T)].real;
|
||||
Tx[INDEX4D_T(pole,i_T,j_T,k_T)] = cfloat_add(cfloat_mul(updatecoeffsdispersive[INDEX2D_MATDISP(materialEx,1+(pole*3))],
|
||||
Tx[INDEX4D_T(pole,i_T,j_T,k_T)]),
|
||||
cfloat_mulr(updatecoeffsdispersive[INDEX2D_MATDISP(materialEx,2+(pole*3))],
|
||||
Ex[INDEX3D_FIELDS(i,j,k)]));
|
||||
}
|
||||
Ex[INDEX3D_FIELDS(i,j,k)] = updatecoeffsE[INDEX2D_MAT(materialEx,0)] * Ex[INDEX3D_FIELDS(i,j,k)] +
|
||||
updatecoeffsE[INDEX2D_MAT(materialEx,2)] * (Hz[INDEX3D_FIELDS(i,j,k)] - Hz[INDEX3D_FIELDS(i,j-1,k)]) - updatecoeffsE[INDEX2D_MAT(materialEx,3)] * (Hy[INDEX3D_FIELDS(i,j,k)] - Hy[INDEX3D_FIELDS(i,j,k-1)]) - updatecoeffsE[INDEX2D_MAT(materialEx,4)] * phi;
|
||||
}
|
||||
|
||||
// Ey component
|
||||
if ((NX != 1 || NZ != 1) && i > 0 && i < NX && j >= 0 && j < NY && k > 0 && k < NZ) {
|
||||
int materialEy = ID[INDEX4D_ID(1,i_ID,j_ID,k_ID)];
|
||||
{{REAL}} phi = 0;
|
||||
for (int pole = 0; pole < MAXPOLES; pole++) {
|
||||
phi = phi + updatecoeffsdispersive[INDEX2D_MATDISP(materialEy,pole*3)].real * Ty[INDEX4D_T(pole,i_T,j_T,k_T)].real;
|
||||
Ty[INDEX4D_T(pole,i_T,j_T,k_T)] = cfloat_add(cfloat_mul(updatecoeffsdispersive[INDEX2D_MATDISP(materialEy,1+(pole*3))],
|
||||
Ty[INDEX4D_T(pole,i_T,j_T,k_T)]),
|
||||
cfloat_mulr(updatecoeffsdispersive[INDEX2D_MATDISP(materialEy,2+(pole*3))],
|
||||
Ey[INDEX3D_FIELDS(i,j,k)]));
|
||||
}
|
||||
Ey[INDEX3D_FIELDS(i,j,k)] = updatecoeffsE[INDEX2D_MAT(materialEy,0)] * Ey[INDEX3D_FIELDS(i,j,k)] +
|
||||
updatecoeffsE[INDEX2D_MAT(materialEy,3)] * (Hx[INDEX3D_FIELDS(i,j,k)] - Hx[INDEX3D_FIELDS(i,j,k-1)]) - updatecoeffsE[INDEX2D_MAT(materialEy,1)] * (Hz[INDEX3D_FIELDS(i,j,k)] - Hz[INDEX3D_FIELDS(i-1,j,k)]) - updatecoeffsE[INDEX2D_MAT(materialEy,4)] * phi;
|
||||
}
|
||||
|
||||
// Ez component
|
||||
if ((NX != 1 || NY != 1) && i > 0 && i < NX && j > 0 && j < NY && k >= 0 && k < NZ) {
|
||||
int materialEz = ID[INDEX4D_ID(2,i_ID,j_ID,k_ID)];
|
||||
{{REAL}} phi = 0;
|
||||
for (int pole = 0; pole < MAXPOLES; pole++) {
|
||||
phi = phi + updatecoeffsdispersive[INDEX2D_MATDISP(materialEz,pole*3)].real * Tz[INDEX4D_T(pole,i_T,j_T,k_T)].real;
|
||||
Tz[INDEX4D_T(pole,i_T,j_T,k_T)] = cfloat_add(cfloat_mul(updatecoeffsdispersive[INDEX2D_MATDISP(materialEz,1+(pole*3))],
|
||||
Tz[INDEX4D_T(pole,i_T,j_T,k_T)]),
|
||||
cfloat_mulr(updatecoeffsdispersive[INDEX2D_MATDISP(materialEz,2+(pole*3))],
|
||||
Ez[INDEX3D_FIELDS(i,j,k)]));
|
||||
}
|
||||
Ez[INDEX3D_FIELDS(i,j,k)] = updatecoeffsE[INDEX2D_MAT(materialEz,0)] * Ez[INDEX3D_FIELDS(i,j,k)] +
|
||||
updatecoeffsE[INDEX2D_MAT(materialEz,1)] * (Hy[INDEX3D_FIELDS(i,j,k)] - Hy[INDEX3D_FIELDS(i-1,j,k)]) - updatecoeffsE[INDEX2D_MAT(materialEz,2)] * (Hx[INDEX3D_FIELDS(i,j,k)] - Hx[INDEX3D_FIELDS(i,j-1,k)]) - updatecoeffsE[INDEX2D_MAT(materialEz,4)] * phi;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__kernel void update_electric_dispersive_B(int NX, int NY, int NZ, int MAXPOLES,
|
||||
__global const {{COMPLEX-}}_t* restrict updatecoeffsdispersive,
|
||||
__global {{COMPLEX-}}_t *Tx,
|
||||
__global {{COMPLEX-}}_t *Ty,
|
||||
__global {{COMPLEX-}}_t *Tz,
|
||||
__global const unsigned int* restrict ID,
|
||||
__global const {{REAL}}* restrict Ex,
|
||||
__global const {{REAL}}* restrict Ey,
|
||||
__global const {{REAL}}* restrict Ez) {
|
||||
|
||||
// This function is part B which updates the dispersive field arrays when dispersive materials (with multiple poles) are present.
|
||||
//
|
||||
// Args:
|
||||
// NX, NY, NZ: Number of cells of the model domain
|
||||
// MAXPOLES: Maximum number of dispersive material poles present in model
|
||||
// updatedispersivecoeffs, T, ID, E, H: Access to update coefficients, dispersive, ID and field component arrays
|
||||
|
||||
// Obtain the linear index corresponding to the current thread
|
||||
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) +
|
||||
get_global_id(1) * get_global_size(0) + get_global_id(0);
|
||||
|
||||
// Convert the linear index to subscripts for 3D field arrays
|
||||
int i = idx / ({{NY_FIELDS}} * {{NZ_FIELDS}});
|
||||
int j = (idx % ({{NY_FIELDS}} * {{NZ_FIELDS}})) / {{NZ_FIELDS}};
|
||||
int k = (idx % ({{NY_FIELDS}} * {{NZ_FIELDS}})) % {{NZ_FIELDS}};
|
||||
|
||||
// Convert the linear index to subscripts for 4D material ID array
|
||||
int i_ID = (idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) / ({{NY_ID}} * {{NZ_ID}});
|
||||
int j_ID = ((idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) / {{NZ_ID}};
|
||||
int k_ID = ((idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) % {{NZ_ID}};
|
||||
|
||||
// Convert the linear index to subscripts for 4D dispersive array
|
||||
int i_T = (idx % ({{NX_T}} * {{NY_T}} * {{NZ_T}})) / ({{NY_T}} * {{NZ_T}});
|
||||
int j_T = ((idx % ({{NX_T}} * {{NY_T}} * {{NZ_T}})) % ({{NY_T}} * {{NZ_T}})) / {{NZ_T}};
|
||||
int k_T = ((idx % ({{NX_T}} * {{NY_T}} * {{NZ_T}})) % ({{NY_T}} * {{NZ_T}})) % {{NZ_T}};
|
||||
|
||||
// Ex component
|
||||
if ((NY != 1 || NZ != 1) && i >= 0 && i < NX && j > 0 && j < NY && k > 0 && k < NZ) {
|
||||
int materialEx = ID[INDEX4D_ID(0,i_ID,j_ID,k_ID)];
|
||||
for (int pole = 0; pole < MAXPOLES; pole++) {
|
||||
Tx[INDEX4D_T(pole,i_T,j_T,k_T)] = cfloat_sub(Tx[INDEX4D_T(pole,i_T,j_T,k_T)],
|
||||
cfloat_mulr(updatecoeffsdispersive[INDEX2D_MATDISP(materialEx,2+(pole*3))],
|
||||
Ex[INDEX3D_FIELDS(i,j,k)]));
|
||||
}
|
||||
}
|
||||
|
||||
// Ey component
|
||||
if ((NX != 1 || NZ != 1) && i > 0 && i < NX && j >= 0 && j < NY && k > 0 && k < NZ) {
|
||||
int materialEy = ID[INDEX4D_ID(1,i_ID,j_ID,k_ID)];
|
||||
for (int pole = 0; pole < MAXPOLES; pole++) {
|
||||
Ty[INDEX4D_T(pole,i_T,j_T,k_T)] = cfloat_sub(Ty[INDEX4D_T(pole,i_T,j_T,k_T)],
|
||||
cfloat_mulr(updatecoeffsdispersive[INDEX2D_MATDISP(materialEy,2+(pole*3))],
|
||||
Ey[INDEX3D_FIELDS(i,j,k)]));
|
||||
}
|
||||
}
|
||||
|
||||
// Ez component
|
||||
if ((NX != 1 || NY != 1) && i > 0 && i < NX && j > 0 && j < NY && k >= 0 && k < NZ) {
|
||||
int materialEz = ID[INDEX4D_ID(2,i_ID,j_ID,k_ID)];
|
||||
for (int pole = 0; pole < MAXPOLES; pole++) {
|
||||
Tz[INDEX4D_T(pole,i_T,j_T,k_T)] = cfloat_sub(Tz[INDEX4D_T(pole,i_T,j_T,k_T)],
|
||||
cfloat_mulr(updatecoeffsdispersive[INDEX2D_MATDISP(materialEz,2+(pole*3))],
|
||||
Ez[INDEX3D_FIELDS(i,j,k)]));
|
||||
}
|
||||
}
|
||||
}
|
@@ -0,0 +1,955 @@
|
||||
// Macros for converting subscripts to linear index:
|
||||
#define INDEX2D_R(m, n) (m)*(NY_R)+(n)
|
||||
#define INDEX2D_MAT(m, n) (m)*({{NY_MATCOEFFS}})+(n)
|
||||
#define INDEX3D_FIELDS(i, j, k) (i)*({{NY_FIELDS}})*({{NZ_FIELDS}})+(j)*({{NZ_FIELDS}})+(k)
|
||||
#define INDEX4D_ID(p, i, j, k) (p)*({{NX_ID}})*({{NY_ID}})*({{NZ_ID}})+(i)*({{NY_ID}})*({{NZ_ID}})+(j)*({{NZ_ID}})+(k)
|
||||
#define INDEX4D_PHI1(p, i, j, k) (p)*(NX_PHI1)*(NY_PHI1)*(NZ_PHI1)+(i)*(NY_PHI1)*(NZ_PHI1)+(j)*(NZ_PHI1)+(k)
|
||||
#define INDEX4D_PHI2(p, i, j, k) (p)*(NX_PHI2)*(NY_PHI2)*(NZ_PHI2)+(i)*(NY_PHI2)*(NZ_PHI2)+(j)*(NZ_PHI2)+(k)
|
||||
|
||||
__constant {{REAL}} updatecoeffsE[{{N_updatecoeffsE}}] =
|
||||
{
|
||||
{% for i in updateEVal %}
|
||||
{{i}},
|
||||
{% endfor %}
|
||||
};
|
||||
|
||||
|
||||
__kernel void order1_xminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global {{REAL}} *Ey, __global {{REAL}} *Ez, __global const {{REAL}}* restrict Hx, __global const {{REAL}}* restrict Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d){
|
||||
|
||||
// This function updates the Ey and Ez field components for the xminus slab.
|
||||
//
|
||||
// Args:
|
||||
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
|
||||
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
|
||||
// ID, E, H: Access to ID and field component arrays
|
||||
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
|
||||
// d: Spatial discretisation, e.g. dx, dy or dz
|
||||
|
||||
// obtain the linear index corresponding to the current thread
|
||||
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
|
||||
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
|
||||
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
|
||||
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
|
||||
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
|
||||
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
|
||||
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
|
||||
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
|
||||
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
|
||||
|
||||
{{REAL}} RA01, RB0, RE0, RF0, dHy, dHz;
|
||||
{{REAL}} dx = d;
|
||||
int ii, jj, kk, materialEy, materialEz;
|
||||
int nx = xf - xs;
|
||||
int ny = yf - ys;
|
||||
int nz = zf - zs;
|
||||
|
||||
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
|
||||
// Subscripts for field arrays
|
||||
ii = xf - i1;
|
||||
jj = j1 + ys;
|
||||
kk = k1 + zs;
|
||||
|
||||
// PML coefficients
|
||||
RA01 = RA[INDEX2D_R(0,i1)] - 1;
|
||||
RB0 = RB[INDEX2D_R(0,i1)];
|
||||
RE0 = RE[INDEX2D_R(0,i1)];
|
||||
RF0 = RF[INDEX2D_R(0,i1)];
|
||||
|
||||
// Ey
|
||||
materialEy = ID[INDEX4D_ID(1,ii,jj,kk)];
|
||||
dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx;
|
||||
Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA01 * dHz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
|
||||
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz;
|
||||
}
|
||||
|
||||
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
|
||||
// Subscripts for field arrays
|
||||
ii = xf - i2;
|
||||
jj = j2 + ys;
|
||||
kk = k2 + zs;
|
||||
|
||||
// PML coefficients
|
||||
RA01 = RA[INDEX2D_R(0,i2)] - 1;
|
||||
RB0 = RB[INDEX2D_R(0,i2)];
|
||||
RE0 = RE[INDEX2D_R(0,i2)];
|
||||
RF0 = RF[INDEX2D_R(0,i2)];
|
||||
|
||||
// Ez
|
||||
materialEz = ID[INDEX4D_ID(2,ii,jj,kk)];
|
||||
dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx;
|
||||
Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA01 * dHy + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
|
||||
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHy;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void order2_xminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global {{REAL}} *Ey, __global {{REAL}} *Ez, __global const {{REAL}}* restrict Hx, __global const {{REAL}}* restrict Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d){
|
||||
// This function updates the Ey and Ez field components for the xminus slab.
|
||||
//
|
||||
// Args:
|
||||
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
|
||||
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
|
||||
// ID, E, H: Access to ID and field component arrays
|
||||
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
|
||||
// d: Spatial discretisation, e.g. dx, dy or dz
|
||||
|
||||
// Obtain the linear index corresponding to the current thread
|
||||
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
|
||||
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
|
||||
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
|
||||
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
|
||||
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
|
||||
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
|
||||
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
|
||||
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
|
||||
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
|
||||
|
||||
{{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dHy, dHz;
|
||||
{{REAL}} dx = d;
|
||||
int ii, jj, kk, materialEy, materialEz;
|
||||
int nx = xf - xs;
|
||||
int ny = yf - ys;
|
||||
int nz = zf - zs;
|
||||
|
||||
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
|
||||
// Subscripts for field arrays
|
||||
ii = xf - i1;
|
||||
jj = j1 + ys;
|
||||
kk = k1 + zs;
|
||||
|
||||
// PML coefficients
|
||||
RA0 = RA[INDEX2D_R(0,i1)];
|
||||
RB0 = RB[INDEX2D_R(0,i1)];
|
||||
RE0 = RE[INDEX2D_R(0,i1)];
|
||||
RF0 = RF[INDEX2D_R(0,i1)];
|
||||
RA1 = RA[INDEX2D_R(1,i1)];
|
||||
RB1 = RB[INDEX2D_R(1,i1)];
|
||||
RE1 = RE[INDEX2D_R(1,i1)];
|
||||
RF1 = RF[INDEX2D_R(1,i1)];
|
||||
RA01 = RA[INDEX2D_R(0,i1)] * RA[INDEX2D_R(1,i1)] - 1;
|
||||
|
||||
// Ey
|
||||
materialEy = ID[INDEX4D_ID(1,ii,jj,kk)];
|
||||
dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx;
|
||||
Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA01 * dHz + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]);
|
||||
PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dHz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
|
||||
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz;
|
||||
}
|
||||
|
||||
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
|
||||
// Subscripts for field arrays
|
||||
ii = xf - i2;
|
||||
jj = j2 + ys;
|
||||
kk = k2 + zs;
|
||||
|
||||
// PML coefficients
|
||||
RA0 = RA[INDEX2D_R(0,i2)];
|
||||
RB0 = RB[INDEX2D_R(0,i2)];
|
||||
RE0 = RE[INDEX2D_R(0,i2)];
|
||||
RF0 = RF[INDEX2D_R(0,i2)];
|
||||
RA1 = RA[INDEX2D_R(1,i2)];
|
||||
RB1 = RB[INDEX2D_R(1,i2)];
|
||||
RE1 = RE[INDEX2D_R(1,i2)];
|
||||
RF1 = RF[INDEX2D_R(1,i2)];
|
||||
RA01 = RA[INDEX2D_R(0,i2)] * RA[INDEX2D_R(1,i2)] - 1;
|
||||
|
||||
// Ez
|
||||
materialEz = ID[INDEX4D_ID(2,ii,jj,kk)];
|
||||
dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx;
|
||||
Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA01 * dHy + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]);
|
||||
PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dHy + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
|
||||
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHy;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void order1_xplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global {{REAL}} *Ey, __global {{REAL}} *Ez, __global const {{REAL}}* restrict Hx, __global const {{REAL}}* restrict Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d){
|
||||
// This function updates the Ey and Ez field components for the xplus slab.
|
||||
//
|
||||
// Args:
|
||||
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
|
||||
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
|
||||
// ID, E, H: Access to ID and field component arrays
|
||||
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
|
||||
// d: Spatial discretisation, e.g. dx, dy or dz
|
||||
|
||||
// Obtain the linear index corresponding to the current thread
|
||||
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
|
||||
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
|
||||
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
|
||||
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
|
||||
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
|
||||
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
|
||||
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
|
||||
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
|
||||
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
|
||||
|
||||
{{REAL}} RA01, RB0, RE0, RF0, dHy, dHz;
|
||||
{{REAL}} dx = d;
|
||||
int ii, jj, kk, materialEy, materialEz;
|
||||
int nx = xf - xs;
|
||||
int ny = yf - ys;
|
||||
int nz = zf - zs;
|
||||
|
||||
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
|
||||
// Subscripts for field arrays
|
||||
ii = i1 + xs;
|
||||
jj = j1 + ys;
|
||||
kk = k1 + zs;
|
||||
|
||||
// PML coefficients
|
||||
RA01 = RA[INDEX2D_R(0,i1)] - 1;
|
||||
RB0 = RB[INDEX2D_R(0,i1)];
|
||||
RE0 = RE[INDEX2D_R(0,i1)];
|
||||
RF0 = RF[INDEX2D_R(0,i1)];
|
||||
|
||||
// Ey
|
||||
materialEy = ID[INDEX4D_ID(1,ii,jj,kk)];
|
||||
dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx;
|
||||
Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA01 * dHz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
|
||||
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz;
|
||||
}
|
||||
|
||||
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
|
||||
// Subscripts for field arrays
|
||||
ii = i2 + xs;
|
||||
jj = j2 + ys;
|
||||
kk = k2 + zs;
|
||||
|
||||
// PML coefficients
|
||||
RA01 = RA[INDEX2D_R(0,i2)] - 1;
|
||||
RB0 = RB[INDEX2D_R(0,i2)];
|
||||
RE0 = RE[INDEX2D_R(0,i2)];
|
||||
RF0 = RF[INDEX2D_R(0,i2)];
|
||||
|
||||
// Ez
|
||||
materialEz = ID[INDEX4D_ID(2,ii,jj,kk)];
|
||||
dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx;
|
||||
Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA01 * dHy + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
|
||||
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHy;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void order2_xplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global {{REAL}} *Ey, __global {{REAL}} *Ez, __global const {{REAL}}* restrict Hx, __global const {{REAL}}* restrict Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
|
||||
|
||||
// This function updates the Ey and Ez field components for the xplus slab.
|
||||
//
|
||||
// Args:
|
||||
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
|
||||
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
|
||||
// ID, E, H: Access to ID and field component arrays
|
||||
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
|
||||
// d: Spatial discretisation, e.g. dx, dy or dz
|
||||
|
||||
// Obtain the linear index corresponding to the current thread
|
||||
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
|
||||
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
|
||||
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
|
||||
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
|
||||
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
|
||||
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
|
||||
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
|
||||
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
|
||||
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
|
||||
|
||||
{{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dHy, dHz;
|
||||
{{REAL}} dx = d;
|
||||
int ii, jj, kk, materialEy, materialEz;
|
||||
int nx = xf - xs;
|
||||
int ny = yf - ys;
|
||||
int nz = zf - zs;
|
||||
|
||||
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
|
||||
// Subscripts for field arrays
|
||||
ii = i1 + xs;
|
||||
jj = j1 + ys;
|
||||
kk = k1 + zs;
|
||||
|
||||
// PML coefficients
|
||||
RA0 = RA[INDEX2D_R(0,i1)];
|
||||
RB0 = RB[INDEX2D_R(0,i1)];
|
||||
RE0 = RE[INDEX2D_R(0,i1)];
|
||||
RF0 = RF[INDEX2D_R(0,i1)];
|
||||
RA1 = RA[INDEX2D_R(1,i1)];
|
||||
RB1 = RB[INDEX2D_R(1,i1)];
|
||||
RE1 = RE[INDEX2D_R(1,i1)];
|
||||
RF1 = RF[INDEX2D_R(1,i1)];
|
||||
RA01 = RA[INDEX2D_R(0,i1)] * RA[INDEX2D_R(1,i1)] - 1;
|
||||
|
||||
// Ey
|
||||
materialEy = ID[INDEX4D_ID(1,ii,jj,kk)];
|
||||
dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx;
|
||||
Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA01 * dHz + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]);
|
||||
PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dHz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
|
||||
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz;
|
||||
}
|
||||
|
||||
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
|
||||
// Subscripts for field arrays
|
||||
ii = i2 + xs;
|
||||
jj = j2 + ys;
|
||||
kk = k2 + zs;
|
||||
|
||||
// PML coefficients
|
||||
RA0 = RA[INDEX2D_R(0,i2)];
|
||||
RB0 = RB[INDEX2D_R(0,i2)];
|
||||
RE0 = RE[INDEX2D_R(0,i2)];
|
||||
RF0 = RF[INDEX2D_R(0,i2)];
|
||||
RA1 = RA[INDEX2D_R(1,i2)];
|
||||
RB1 = RB[INDEX2D_R(1,i2)];
|
||||
RE1 = RE[INDEX2D_R(1,i2)];
|
||||
RF1 = RF[INDEX2D_R(1,i2)];
|
||||
RA01 = RA[INDEX2D_R(0,i2)] * RA[INDEX2D_R(1,i2)] - 1;
|
||||
|
||||
// Ez
|
||||
materialEz = ID[INDEX4D_ID(2,ii,jj,kk)];
|
||||
dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx;
|
||||
Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA01 * dHy + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]);
|
||||
PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dHy + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
|
||||
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHy;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__kernel void order1_yminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global {{REAL}} *Ex, __global const {{REAL}}* restrict Ey, __global {{REAL}} *Ez, __global const {{REAL}}* restrict Hx, __global const {{REAL}}* restrict Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
|
||||
|
||||
// This function updates the Ex and Ez field components for the yminus slab.
|
||||
//
|
||||
// Args:
|
||||
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
|
||||
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
|
||||
// ID, E, H: Access to ID and field component arrays
|
||||
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
|
||||
// d: Spatial discretisation, e.g. dx, dy or dz
|
||||
|
||||
// Obtain the linear index corresponding to the current thread
|
||||
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
|
||||
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
|
||||
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
|
||||
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
|
||||
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
|
||||
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
|
||||
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
|
||||
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
|
||||
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
|
||||
|
||||
{{REAL}} RA01, RB0, RE0, RF0, dHx, dHz;
|
||||
{{REAL}} dy = d;
|
||||
int ii, jj, kk, materialEx, materialEz;
|
||||
int nx = xf - xs;
|
||||
int ny = yf - ys;
|
||||
int nz = zf - zs;
|
||||
|
||||
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
|
||||
// Subscripts for field arrays
|
||||
ii = i1 + xs;
|
||||
jj = yf - j1;
|
||||
kk = k1 + zs;
|
||||
|
||||
// PML coefficients
|
||||
RA01 = RA[INDEX2D_R(0,j1)] - 1;
|
||||
RB0 = RB[INDEX2D_R(0,j1)];
|
||||
RE0 = RE[INDEX2D_R(0,j1)];
|
||||
RF0 = RF[INDEX2D_R(0,j1)];
|
||||
|
||||
// Ex
|
||||
materialEx = ID[INDEX4D_ID(0,ii,jj,kk)];
|
||||
dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy;
|
||||
Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA01 * dHz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
|
||||
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz;
|
||||
}
|
||||
|
||||
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
|
||||
// Subscripts for field arrays
|
||||
ii = i2 + xs;
|
||||
jj = yf - j2;
|
||||
kk = k2 + zs;
|
||||
|
||||
// PML coefficients
|
||||
RA01 = RA[INDEX2D_R(0,j2)] - 1;
|
||||
RB0 = RB[INDEX2D_R(0,j2)];
|
||||
RE0 = RE[INDEX2D_R(0,j2)];
|
||||
RF0 = RF[INDEX2D_R(0,j2)];
|
||||
|
||||
// Ez
|
||||
materialEz = ID[INDEX4D_ID(2,ii,jj,kk)];
|
||||
dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy;
|
||||
Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA01 * dHx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
|
||||
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void order2_yminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global {{REAL}} *Ex, __global const {{REAL}}* restrict Ey, __global {{REAL}} *Ez, __global const {{REAL}}* restrict Hx, __global const {{REAL}}* restrict Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
|
||||
|
||||
// This function updates the Ex and Ez field components for the yminus slab.
|
||||
//
|
||||
// Args:
|
||||
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
|
||||
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
|
||||
// ID, E, H: Access to ID and field component arrays
|
||||
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
|
||||
// d: Spatial discretisation, e.g. dx, dy or dz
|
||||
|
||||
// Obtain the linear index corresponding to the current thread
|
||||
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
|
||||
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
|
||||
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
|
||||
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
|
||||
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
|
||||
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
|
||||
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
|
||||
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
|
||||
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
|
||||
|
||||
{{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dHx, dHz;
|
||||
{{REAL}} dy = d;
|
||||
int ii, jj, kk, materialEx, materialEz;
|
||||
int nx = xf - xs;
|
||||
int ny = yf - ys;
|
||||
int nz = zf - zs;
|
||||
|
||||
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
|
||||
// Subscripts for field arrays
|
||||
ii = i1 + xs;
|
||||
jj = yf - j1;
|
||||
kk = k1 + zs;
|
||||
|
||||
// PML coefficients
|
||||
RA0 = RA[INDEX2D_R(0,j1)];
|
||||
RB0 = RB[INDEX2D_R(0,j1)];
|
||||
RE0 = RE[INDEX2D_R(0,j1)];
|
||||
RF0 = RF[INDEX2D_R(0,j1)];
|
||||
RA1 = RA[INDEX2D_R(1,j1)];
|
||||
RB1 = RB[INDEX2D_R(1,j1)];
|
||||
RE1 = RE[INDEX2D_R(1,j1)];
|
||||
RF1 = RF[INDEX2D_R(1,j1)];
|
||||
RA01 = RA[INDEX2D_R(0,j1)] * RA[INDEX2D_R(1,j1)] - 1;
|
||||
|
||||
// Ex
|
||||
materialEx = ID[INDEX4D_ID(0,ii,jj,kk)];
|
||||
dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy;
|
||||
Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA01 * dHz + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]);
|
||||
PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dHz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
|
||||
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz;
|
||||
}
|
||||
|
||||
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
|
||||
// Subscripts for field arrays
|
||||
ii = i2 + xs;
|
||||
jj = yf - j2;
|
||||
kk = k2 + zs;
|
||||
|
||||
// PML coefficients
|
||||
RA0 = RA[INDEX2D_R(0,j2)];
|
||||
RB0 = RB[INDEX2D_R(0,j2)];
|
||||
RE0 = RE[INDEX2D_R(0,j2)];
|
||||
RF0 = RF[INDEX2D_R(0,j2)];
|
||||
RA1 = RA[INDEX2D_R(1,j2)];
|
||||
RB1 = RB[INDEX2D_R(1,j2)];
|
||||
RE1 = RE[INDEX2D_R(1,j2)];
|
||||
RF1 = RF[INDEX2D_R(1,j2)];
|
||||
RA01 = RA[INDEX2D_R(0,j2)] * RA[INDEX2D_R(1,j2)] - 1;
|
||||
|
||||
// Ez
|
||||
materialEz = ID[INDEX4D_ID(2,ii,jj,kk)];
|
||||
dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy;
|
||||
Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA01 * dHx + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]);
|
||||
PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dHx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
|
||||
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__kernel void order1_yplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global {{REAL}} *Ex, __global const {{REAL}}* restrict Ey, __global {{REAL}} *Ez, __global const {{REAL}}* restrict Hx, __global const {{REAL}}* restrict Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
|
||||
|
||||
// This function updates the Ex and Ez field components for the yplus slab.
|
||||
//
|
||||
// Args:
|
||||
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
|
||||
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
|
||||
// ID, E, H: Access to ID and field component arrays
|
||||
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
|
||||
// d: Spatial discretisation, e.g. dx, dy or dz
|
||||
|
||||
// Obtain the linear index corresponding to the current thread
|
||||
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
|
||||
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
|
||||
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
|
||||
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
|
||||
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
|
||||
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
|
||||
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
|
||||
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
|
||||
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
|
||||
|
||||
{{REAL}} RA01, RB0, RE0, RF0, dHx, dHz;
|
||||
{{REAL}} dy = d;
|
||||
int ii, jj, kk, materialEx, materialEz;
|
||||
int nx = xf - xs;
|
||||
int ny = yf - ys;
|
||||
int nz = zf - zs;
|
||||
|
||||
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
|
||||
// Subscripts for field arrays
|
||||
ii = i1 + xs;
|
||||
jj = j1 + ys;
|
||||
kk = k1 + zs;
|
||||
|
||||
// PML coefficients
|
||||
RA01 = RA[INDEX2D_R(0,j1)] - 1;
|
||||
RB0 = RB[INDEX2D_R(0,j1)];
|
||||
RE0 = RE[INDEX2D_R(0,j1)];
|
||||
RF0 = RF[INDEX2D_R(0,j1)];
|
||||
|
||||
// Ex
|
||||
materialEx = ID[INDEX4D_ID(0,ii,jj,kk)];
|
||||
dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy;
|
||||
Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA01 * dHz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
|
||||
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz;
|
||||
}
|
||||
|
||||
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
|
||||
// Subscripts for field arrays
|
||||
ii = i2 + xs;
|
||||
jj = j2 + ys;
|
||||
kk = k2 + zs;
|
||||
|
||||
// PML coefficients
|
||||
RA01 = RA[INDEX2D_R(0,j2)] - 1;
|
||||
RB0 = RB[INDEX2D_R(0,j2)];
|
||||
RE0 = RE[INDEX2D_R(0,j2)];
|
||||
RF0 = RF[INDEX2D_R(0,j2)];
|
||||
|
||||
// Ez
|
||||
materialEz = ID[INDEX4D_ID(2,ii,jj,kk)];
|
||||
dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy;
|
||||
Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA01 * dHx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
|
||||
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__kernel void order2_yplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global {{REAL}} *Ex, __global const {{REAL}}* restrict Ey, __global {{REAL}} *Ez, __global const {{REAL}}* restrict Hx, __global const {{REAL}}* restrict Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
|
||||
|
||||
// This function updates the Ex and Ez field components for the yplus slab.
|
||||
//
|
||||
// Args:
|
||||
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
|
||||
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
|
||||
// ID, E, H: Access to ID and field component arrays
|
||||
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
|
||||
// d: Spatial discretisation, e.g. dx, dy or dz
|
||||
|
||||
// Obtain the linear index corresponding to the current thread
|
||||
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
|
||||
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
|
||||
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
|
||||
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
|
||||
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
|
||||
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
|
||||
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
|
||||
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
|
||||
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
|
||||
|
||||
{{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dHx, dHz;
|
||||
{{REAL}} dy = d;
|
||||
int ii, jj, kk, materialEx, materialEz;
|
||||
int nx = xf - xs;
|
||||
int ny = yf - ys;
|
||||
int nz = zf - zs;
|
||||
|
||||
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
|
||||
// Subscripts for field arrays
|
||||
ii = i1 + xs;
|
||||
jj = j1 + ys;
|
||||
kk = k1 + zs;
|
||||
|
||||
// PML coefficients
|
||||
RA0 = RA[INDEX2D_R(0,j1)];
|
||||
RB0 = RB[INDEX2D_R(0,j1)];
|
||||
RE0 = RE[INDEX2D_R(0,j1)];
|
||||
RF0 = RF[INDEX2D_R(0,j1)];
|
||||
RA1 = RA[INDEX2D_R(1,j1)];
|
||||
RB1 = RB[INDEX2D_R(1,j1)];
|
||||
RE1 = RE[INDEX2D_R(1,j1)];
|
||||
RF1 = RF[INDEX2D_R(1,j1)];
|
||||
RA01 = RA[INDEX2D_R(0,j1)] * RA[INDEX2D_R(1,j1)] - 1;
|
||||
|
||||
// Ex
|
||||
materialEx = ID[INDEX4D_ID(0,ii,jj,kk)];
|
||||
dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy;
|
||||
Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA01 * dHz + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]);
|
||||
PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dHz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
|
||||
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz;
|
||||
}
|
||||
|
||||
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
|
||||
// Subscripts for field arrays
|
||||
ii = i2 + xs;
|
||||
jj = j2 + ys;
|
||||
kk = k2 + zs;
|
||||
|
||||
// PML coefficients
|
||||
RA0 = RA[INDEX2D_R(0,j2)];
|
||||
RB0 = RB[INDEX2D_R(0,j2)];
|
||||
RE0 = RE[INDEX2D_R(0,j2)];
|
||||
RF0 = RF[INDEX2D_R(0,j2)];
|
||||
RA1 = RA[INDEX2D_R(1,j2)];
|
||||
RB1 = RB[INDEX2D_R(1,j2)];
|
||||
RE1 = RE[INDEX2D_R(1,j2)];
|
||||
RF1 = RF[INDEX2D_R(1,j2)];
|
||||
RA01 = RA[INDEX2D_R(0,j2)] * RA[INDEX2D_R(1,j2)] - 1;
|
||||
|
||||
// Ez
|
||||
materialEz = ID[INDEX4D_ID(2,ii,jj,kk)];
|
||||
dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy;
|
||||
Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA01 * dHx + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]);
|
||||
PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dHx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
|
||||
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void order1_zminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global {{REAL}} *Ex, __global {{REAL}} *Ey, __global const {{REAL}}* restrict Ez, __global const {{REAL}}* restrict Hx, __global const {{REAL}}* restrict Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
|
||||
|
||||
// This function updates the Ex and Ey field components for the zminus slab.
|
||||
//
|
||||
// Args:
|
||||
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
|
||||
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
|
||||
// ID, E, H: Access to ID and field component arrays
|
||||
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
|
||||
// d: Spatial discretisation, e.g. dx, dy or dz
|
||||
|
||||
// Obtain the linear index corresponding to the current thread
|
||||
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
|
||||
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
|
||||
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
|
||||
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
|
||||
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
|
||||
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
|
||||
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
|
||||
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
|
||||
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
|
||||
|
||||
{{REAL}} RA01, RB0, RE0, RF0, dHx, dHy;
|
||||
{{REAL}} dz = d;
|
||||
int ii, jj, kk, materialEx, materialEy;
|
||||
int nx = xf - xs;
|
||||
int ny = yf - ys;
|
||||
int nz = zf - zs;
|
||||
|
||||
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
|
||||
// Subscripts for field arrays
|
||||
ii = i1 + xs;
|
||||
jj = j1 + ys;
|
||||
kk = zf - k1;
|
||||
|
||||
// PML coefficients
|
||||
RA01 = RA[INDEX2D_R(0,k1)] - 1;
|
||||
RB0 = RB[INDEX2D_R(0,k1)];
|
||||
RE0 = RE[INDEX2D_R(0,k1)];
|
||||
RF0 = RF[INDEX2D_R(0,k1)];
|
||||
|
||||
// Ex
|
||||
materialEx = ID[INDEX4D_ID(0,ii,jj,kk)];
|
||||
dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz;
|
||||
Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA01 * dHy + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
|
||||
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHy;
|
||||
}
|
||||
|
||||
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
|
||||
// Subscripts for field arrays
|
||||
ii = i2 + xs;
|
||||
jj = j2 + ys;
|
||||
kk = zf - k2;
|
||||
|
||||
// PML coefficients
|
||||
RA01 = RA[INDEX2D_R(0,k2)] - 1;
|
||||
RB0 = RB[INDEX2D_R(0,k2)];
|
||||
RE0 = RE[INDEX2D_R(0,k2)];
|
||||
RF0 = RF[INDEX2D_R(0,k2)];
|
||||
|
||||
// Ey
|
||||
materialEy = ID[INDEX4D_ID(1,ii,jj,kk)];
|
||||
dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz;
|
||||
Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA01 * dHx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
|
||||
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void order2_zminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global {{REAL}} *Ex, __global {{REAL}} *Ey, __global const {{REAL}}* restrict Ez, __global const {{REAL}}* restrict Hx, __global const {{REAL}}* restrict Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
|
||||
|
||||
// This function updates the Ex and Ey field components for the zminus slab.
|
||||
//
|
||||
// Args:
|
||||
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
|
||||
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
|
||||
// ID, E, H: Access to ID and field component arrays
|
||||
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
|
||||
// d: Spatial discretisation, e.g. dx, dy or dz
|
||||
|
||||
// Obtain the linear index corresponding to the current thread
|
||||
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
|
||||
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
|
||||
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
|
||||
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
|
||||
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
|
||||
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
|
||||
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
|
||||
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
|
||||
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
|
||||
|
||||
{{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dHx, dHy;
|
||||
{{REAL}} dz = d;
|
||||
int ii, jj, kk, materialEx, materialEy;
|
||||
int nx = xf - xs;
|
||||
int ny = yf - ys;
|
||||
int nz = zf - zs;
|
||||
|
||||
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
|
||||
// Subscripts for field arrays
|
||||
ii = i1 + xs;
|
||||
jj = j1 + ys;
|
||||
kk = zf - k1;
|
||||
|
||||
// PML coefficients
|
||||
RA0 = RA[INDEX2D_R(0,k1)];
|
||||
RB0 = RB[INDEX2D_R(0,k1)];
|
||||
RE0 = RE[INDEX2D_R(0,k1)];
|
||||
RF0 = RF[INDEX2D_R(0,k1)];
|
||||
RA1 = RA[INDEX2D_R(1,k1)];
|
||||
RB1 = RB[INDEX2D_R(1,k1)];
|
||||
RE1 = RE[INDEX2D_R(1,k1)];
|
||||
RF1 = RF[INDEX2D_R(1,k1)];
|
||||
RA01 = RA[INDEX2D_R(0,k1)] * RA[INDEX2D_R(1,k1)] - 1;
|
||||
|
||||
// Ex
|
||||
materialEx = ID[INDEX4D_ID(0,ii,jj,kk)];
|
||||
dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz;
|
||||
Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA01 * dHy + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]);
|
||||
PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dHy + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
|
||||
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHy;
|
||||
}
|
||||
|
||||
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
|
||||
// Subscripts for field arrays
|
||||
ii = i2 + xs;
|
||||
jj = j2 + ys;
|
||||
kk = zf - k2;
|
||||
|
||||
// PML coefficients
|
||||
RA0 = RA[INDEX2D_R(0,k2)];
|
||||
RB0 = RB[INDEX2D_R(0,k2)];
|
||||
RE0 = RE[INDEX2D_R(0,k2)];
|
||||
RF0 = RF[INDEX2D_R(0,k2)];
|
||||
RA1 = RA[INDEX2D_R(1,k2)];
|
||||
RB1 = RB[INDEX2D_R(1,k2)];
|
||||
RE1 = RE[INDEX2D_R(1,k2)];
|
||||
RF1 = RF[INDEX2D_R(1,k2)];
|
||||
RA01 = RA[INDEX2D_R(0,k2)] * RA[INDEX2D_R(1,k2)] - 1;
|
||||
|
||||
// Ey
|
||||
materialEy = ID[INDEX4D_ID(1,ii,jj,kk)];
|
||||
dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz;
|
||||
Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA01 * dHx + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]);
|
||||
PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dHx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
|
||||
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__kernel void order1_zplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global {{REAL}} *Ex, __global {{REAL}} *Ey, __global const {{REAL}}* restrict Ez, __global const {{REAL}}* restrict Hx, __global const {{REAL}}* restrict Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
|
||||
|
||||
// This function updates the Ex and Ey field components for the zplus slab.
|
||||
//
|
||||
// Args:
|
||||
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
|
||||
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
|
||||
// ID, E, H: Access to ID and field component arrays
|
||||
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
|
||||
// d: Spatial discretisation, e.g. dx, dy or dz
|
||||
|
||||
// Obtain the linear index corresponding to the current thread
|
||||
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
|
||||
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
|
||||
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
|
||||
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
|
||||
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
|
||||
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
|
||||
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
|
||||
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
|
||||
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
|
||||
|
||||
{{REAL}} RA01, RB0, RE0, RF0, dHx, dHy;
|
||||
{{REAL}} dz = d;
|
||||
int ii, jj, kk, materialEx, materialEy;
|
||||
int nx = xf - xs;
|
||||
int ny = yf - ys;
|
||||
int nz = zf - zs;
|
||||
|
||||
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
|
||||
// Subscripts for field arrays
|
||||
ii = i1 + xs;
|
||||
jj = j1 + ys;
|
||||
kk = k1 + zs;
|
||||
|
||||
// PML coefficients
|
||||
RA01 = RA[INDEX2D_R(0,k1)] - 1;
|
||||
RB0 = RB[INDEX2D_R(0,k1)];
|
||||
RE0 = RE[INDEX2D_R(0,k1)];
|
||||
RF0 = RF[INDEX2D_R(0,k1)];
|
||||
|
||||
// Ex
|
||||
materialEx = ID[INDEX4D_ID(0,ii,jj,kk)];
|
||||
dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz;
|
||||
Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA01 * dHy + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
|
||||
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHy;
|
||||
}
|
||||
|
||||
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
|
||||
// Subscripts for field arrays
|
||||
ii = i2 + xs;
|
||||
jj = j2 + ys;
|
||||
kk = k2 + zs;
|
||||
|
||||
// PML coefficients
|
||||
RA01 = RA[INDEX2D_R(0,k2)] - 1;
|
||||
RB0 = RB[INDEX2D_R(0,k2)];
|
||||
RE0 = RE[INDEX2D_R(0,k2)];
|
||||
RF0 = RF[INDEX2D_R(0,k2)];
|
||||
|
||||
// Ey
|
||||
materialEy = ID[INDEX4D_ID(1,ii,jj,kk)];
|
||||
dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz;
|
||||
Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA01 * dHx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
|
||||
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__kernel void order2_zplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global {{REAL}} *Ex, __global {{REAL}} *Ey, __global const {{REAL}}* restrict Ez, __global const {{REAL}}* restrict Hx, __global const {{REAL}}* restrict Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
|
||||
|
||||
// This function updates the Ex and Ey field components for the zplus slab.
|
||||
//
|
||||
// Args:
|
||||
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
|
||||
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
|
||||
// ID, E, H: Access to ID and field component arrays
|
||||
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
|
||||
// d: Spatial discretisation, e.g. dx, dy or dz
|
||||
|
||||
// Obtain the linear index corresponding to the current thread
|
||||
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
|
||||
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
|
||||
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
|
||||
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
|
||||
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
|
||||
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
|
||||
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
|
||||
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
|
||||
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
|
||||
|
||||
{{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dHx, dHy;
|
||||
{{REAL}} dz = d;
|
||||
int ii, jj, kk, materialEx, materialEy;
|
||||
int nx = xf - xs;
|
||||
int ny = yf - ys;
|
||||
int nz = zf - zs;
|
||||
|
||||
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
|
||||
// Subscripts for field arrays
|
||||
ii = i1 + xs;
|
||||
jj = j1 + ys;
|
||||
kk = k1 + zs;
|
||||
|
||||
// PML coefficients
|
||||
RA0 = RA[INDEX2D_R(0,k1)];
|
||||
RB0 = RB[INDEX2D_R(0,k1)];
|
||||
RE0 = RE[INDEX2D_R(0,k1)];
|
||||
RF0 = RF[INDEX2D_R(0,k1)];
|
||||
RA1 = RA[INDEX2D_R(1,k1)];
|
||||
RB1 = RB[INDEX2D_R(1,k1)];
|
||||
RE1 = RE[INDEX2D_R(1,k1)];
|
||||
RF1 = RF[INDEX2D_R(1,k1)];
|
||||
RA01 = RA[INDEX2D_R(0,k1)] * RA[INDEX2D_R(1,k1)] - 1;
|
||||
|
||||
// Ex
|
||||
materialEx = ID[INDEX4D_ID(0,ii,jj,kk)];
|
||||
dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz;
|
||||
Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA01 * dHy + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]);
|
||||
PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dHy + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
|
||||
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHy;
|
||||
}
|
||||
|
||||
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
|
||||
// Subscripts for field arrays
|
||||
ii = i2 + xs;
|
||||
jj = j2 + ys;
|
||||
kk = k2 + zs;
|
||||
|
||||
// PML coefficients
|
||||
RA0 = RA[INDEX2D_R(0,k2)];
|
||||
RB0 = RB[INDEX2D_R(0,k2)];
|
||||
RE0 = RE[INDEX2D_R(0,k2)];
|
||||
RF0 = RF[INDEX2D_R(0,k2)];
|
||||
RA1 = RA[INDEX2D_R(1,k2)];
|
||||
RB1 = RB[INDEX2D_R(1,k2)];
|
||||
RE1 = RE[INDEX2D_R(1,k2)];
|
||||
RF1 = RF[INDEX2D_R(1,k2)];
|
||||
RA01 = RA[INDEX2D_R(0,k2)] * RA[INDEX2D_R(1,k2)] - 1;
|
||||
|
||||
// Ey
|
||||
materialEy = ID[INDEX4D_ID(1,ii,jj,kk)];
|
||||
dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz;
|
||||
Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA01 * dHx + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]);
|
||||
PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dHx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
|
||||
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx;
|
||||
}
|
||||
}
|
||||
|
@@ -0,0 +1,962 @@
|
||||
// Macros for converting subscripts to linear index:
|
||||
#define INDEX2D_R(m, n) (m)*(NY_R)+(n)
|
||||
#define INDEX2D_MAT(m, n) (m)*({{NY_MATCOEFFS}})+(n)
|
||||
#define INDEX3D_FIELDS(i, j, k) (i)*({{NY_FIELDS}})*({{NZ_FIELDS}})+(j)*({{NZ_FIELDS}})+(k)
|
||||
#define INDEX4D_ID(p, i, j, k) (p)*({{NX_ID}})*({{NY_ID}})*({{NZ_ID}})+(i)*({{NY_ID}})*({{NZ_ID}})+(j)*({{NZ_ID}})+(k)
|
||||
#define INDEX4D_PHI1(p, i, j, k) (p)*(NX_PHI1)*(NY_PHI1)*(NZ_PHI1)+(i)*(NY_PHI1)*(NZ_PHI1)+(j)*(NZ_PHI1)+(k)
|
||||
#define INDEX4D_PHI2(p, i, j, k) (p)*(NX_PHI2)*(NY_PHI2)*(NZ_PHI2)+(i)*(NY_PHI2)*(NZ_PHI2)+(j)*(NZ_PHI2)+(k)
|
||||
|
||||
|
||||
__constant {{REAL}} updatecoeffsH[{{N_updatecoeffsH}}] =
|
||||
{
|
||||
{% for i in updateHVal %}
|
||||
{{i}},
|
||||
{% endfor %}
|
||||
};
|
||||
|
||||
__kernel void order1_xminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global const {{REAL}}* restrict Ey, __global const {{REAL}}* restrict Ez, __global const {{REAL}}* restrict Hx, __global {{REAL}} *Hy, __global {{REAL}} *Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
|
||||
|
||||
// This function updates the Hy and Hz field components for the xminus slab.
|
||||
//
|
||||
// Args:
|
||||
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
|
||||
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
|
||||
// ID, E, H: Access to ID and field component arrays
|
||||
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
|
||||
// d: Spatial discretisation, e.g. dx, dy or dz
|
||||
|
||||
// Obtain the linear index corresponding to the current tREad
|
||||
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
|
||||
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
|
||||
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
|
||||
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
|
||||
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
|
||||
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
|
||||
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
|
||||
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
|
||||
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
|
||||
|
||||
{{REAL}} RA01, RB0, RE0, RF0, dEy, dEz;
|
||||
{{REAL}} dx = d;
|
||||
int ii, jj, kk, materialHy, materialHz;
|
||||
int nx = xf - xs;
|
||||
int ny = yf - ys;
|
||||
int nz = zf - zs;
|
||||
|
||||
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
|
||||
// Subscripts for field arrays
|
||||
ii = xf - (i1 + 1);
|
||||
jj = j1 + ys;
|
||||
kk = k1 + zs;
|
||||
|
||||
// PML coefficients
|
||||
RA01 = RA[INDEX2D_R(0,i1)] - 1;
|
||||
RB0 = RB[INDEX2D_R(0,i1)];
|
||||
RE0 = RE[INDEX2D_R(0,i1)];
|
||||
RF0 = RF[INDEX2D_R(0,i1)];
|
||||
|
||||
// Hy
|
||||
materialHy = ID[INDEX4D_ID(4,ii,jj,kk)];
|
||||
dEz = (Ez[INDEX3D_FIELDS(ii+1,jj,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dx;
|
||||
Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA01 * dEz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
|
||||
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz;
|
||||
}
|
||||
|
||||
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
|
||||
// Subscripts for field arrays
|
||||
ii = xf - (i2 + 1);
|
||||
jj = j2 + ys;
|
||||
kk = k2 + zs;
|
||||
|
||||
// PML coefficients
|
||||
RA01 = RA[INDEX2D_R(0,i2)] - 1;
|
||||
RB0 = RB[INDEX2D_R(0,i2)];
|
||||
RE0 = RE[INDEX2D_R(0,i2)];
|
||||
RF0 = RF[INDEX2D_R(0,i2)];
|
||||
|
||||
// Hz
|
||||
materialHz = ID[INDEX4D_ID(5,ii,jj,kk)];
|
||||
dEy = (Ey[INDEX3D_FIELDS(ii+1,jj,kk)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dx;
|
||||
Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA01 * dEy + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
|
||||
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEy;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__kernel void order2_xminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global const {{REAL}}* restrict Ey, __global const {{REAL}}* restrict Ez, __global const {{REAL}}* restrict Hx, __global {{REAL}} *Hy, __global {{REAL}} *Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
|
||||
|
||||
// This function updates the Hy and Hz field components for the xminus slab.
|
||||
//
|
||||
// Args:
|
||||
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
|
||||
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
|
||||
// ID, E, H: Access to ID and field component arrays
|
||||
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
|
||||
// d: Spatial discretisation, e.g. dx, dy or dz
|
||||
|
||||
// Obtain the linear index corresponding to the current tREad
|
||||
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
|
||||
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
|
||||
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
|
||||
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
|
||||
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
|
||||
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
|
||||
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
|
||||
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
|
||||
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
|
||||
|
||||
{{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dEy, dEz;
|
||||
{{REAL}} dx = d;
|
||||
int ii, jj, kk, materialHy, materialHz;
|
||||
int nx = xf - xs;
|
||||
int ny = yf - ys;
|
||||
int nz = zf - zs;
|
||||
|
||||
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
|
||||
// Subscripts for field arrays
|
||||
ii = xf - (i1 + 1);
|
||||
jj = j1 + ys;
|
||||
kk = k1 + zs;
|
||||
|
||||
// PML coefficients
|
||||
RA0 = RA[INDEX2D_R(0,i1)];
|
||||
RB0 = RB[INDEX2D_R(0,i1)];
|
||||
RE0 = RE[INDEX2D_R(0,i1)];
|
||||
RF0 = RF[INDEX2D_R(0,i1)];
|
||||
RA1 = RA[INDEX2D_R(1,i1)];
|
||||
RB1 = RB[INDEX2D_R(1,i1)];
|
||||
RE1 = RE[INDEX2D_R(1,i1)];
|
||||
RF1 = RF[INDEX2D_R(1,i1)];
|
||||
RA01 = RA[INDEX2D_R(0,i1)] * RA[INDEX2D_R(1,i1)] - 1;
|
||||
|
||||
// Hy
|
||||
materialHy = ID[INDEX4D_ID(4,ii,jj,kk)];
|
||||
dEz = (Ez[INDEX3D_FIELDS(ii+1,jj,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dx;
|
||||
Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA01 * dEz + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]);
|
||||
PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dEz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
|
||||
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz;
|
||||
}
|
||||
|
||||
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
|
||||
// Subscripts for field arrays
|
||||
ii = xf - (i2 + 1);
|
||||
jj = j2 + ys;
|
||||
kk = k2 + zs;
|
||||
|
||||
// PML coefficients
|
||||
RA0 = RA[INDEX2D_R(0,i2)];
|
||||
RB0 = RB[INDEX2D_R(0,i2)];
|
||||
RE0 = RE[INDEX2D_R(0,i2)];
|
||||
RF0 = RF[INDEX2D_R(0,i2)];
|
||||
RA1 = RA[INDEX2D_R(1,i2)];
|
||||
RB1 = RB[INDEX2D_R(1,i2)];
|
||||
RE1 = RE[INDEX2D_R(1,i2)];
|
||||
RF1 = RF[INDEX2D_R(1,i2)];
|
||||
RA01 = RA[INDEX2D_R(0,i2)] * RA[INDEX2D_R(1,i2)] - 1;
|
||||
|
||||
// Hz
|
||||
materialHz = ID[INDEX4D_ID(5,ii,jj,kk)];
|
||||
dEy = (Ey[INDEX3D_FIELDS(ii+1,jj,kk)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dx;
|
||||
Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA01 * dEy + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]);
|
||||
PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dEy + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
|
||||
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEy;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__kernel void order1_xplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global const {{REAL}}* restrict Ey, __global const {{REAL}}* restrict Ez, __global const {{REAL}}* restrict Hx, __global {{REAL}} *Hy, __global {{REAL}} *Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
|
||||
|
||||
// This function updates the Hy and Hz field components for the xplus slab.
|
||||
//
|
||||
// Args:
|
||||
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
|
||||
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
|
||||
// ID, E, H: Access to ID and field component arrays
|
||||
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
|
||||
// d: Spatial discretisation, e.g. dx, dy or dz
|
||||
|
||||
// Obtain the linear index corresponding to the current tREad
|
||||
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
|
||||
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
|
||||
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
|
||||
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
|
||||
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
|
||||
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
|
||||
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
|
||||
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
|
||||
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
|
||||
|
||||
{{REAL}} RA01, RB0, RE0, RF0, dEy, dEz;
|
||||
{{REAL}} dx = d;
|
||||
int ii, jj, kk, materialHy, materialHz;
|
||||
int nx = xf - xs;
|
||||
int ny = yf - ys;
|
||||
int nz = zf - zs;
|
||||
|
||||
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
|
||||
// Subscripts for field arrays
|
||||
ii = i1 + xs;
|
||||
jj = j1 + ys;
|
||||
kk = k1 + zs;
|
||||
|
||||
// PML coefficients
|
||||
RA01 = RA[INDEX2D_R(0,i1)] - 1;
|
||||
RB0 = RB[INDEX2D_R(0,i1)];
|
||||
RE0 = RE[INDEX2D_R(0,i1)];
|
||||
RF0 = RF[INDEX2D_R(0,i1)];
|
||||
|
||||
// Hy
|
||||
materialHy = ID[INDEX4D_ID(4,ii,jj,kk)];
|
||||
dEz = (Ez[INDEX3D_FIELDS(ii+1,jj,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dx;
|
||||
Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA01 * dEz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
|
||||
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz;
|
||||
}
|
||||
|
||||
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
|
||||
// Subscripts for field arrays
|
||||
ii = i2 + xs;
|
||||
jj = j2 + ys;
|
||||
kk = k2 + zs;
|
||||
|
||||
// PML coefficients
|
||||
RA01 = RA[INDEX2D_R(0,i2)] - 1;
|
||||
RB0 = RB[INDEX2D_R(0,i2)];
|
||||
RE0 = RE[INDEX2D_R(0,i2)];
|
||||
RF0 = RF[INDEX2D_R(0,i2)];
|
||||
|
||||
// Hz
|
||||
materialHz = ID[INDEX4D_ID(5,ii,jj,kk)];
|
||||
dEy = (Ey[INDEX3D_FIELDS(ii+1,jj,kk)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dx;
|
||||
Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA01 * dEy + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
|
||||
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEy;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__kernel void order2_xplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global const {{REAL}}* restrict Ey, __global const {{REAL}}* restrict Ez, __global const {{REAL}}* restrict Hx, __global {{REAL}} *Hy, __global {{REAL}} *Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
|
||||
|
||||
// This function updates the Hy and Hz field components for the xplus slab.
|
||||
//
|
||||
// Args:
|
||||
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
|
||||
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
|
||||
// ID, E, H: Access to ID and field component arrays
|
||||
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
|
||||
// d: Spatial discretisation, e.g. dx, dy or dz
|
||||
|
||||
// Obtain the linear index corresponding to the current tREad
|
||||
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
|
||||
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
|
||||
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
|
||||
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
|
||||
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
|
||||
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
|
||||
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
|
||||
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
|
||||
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
|
||||
|
||||
{{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dEy, dEz;
|
||||
{{REAL}} dx = d;
|
||||
int ii, jj, kk, materialHy, materialHz;
|
||||
int nx = xf - xs;
|
||||
int ny = yf - ys;
|
||||
int nz = zf - zs;
|
||||
|
||||
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
|
||||
// Subscripts for field arrays
|
||||
ii = i1 + xs;
|
||||
jj = j1 + ys;
|
||||
kk = k1 + zs;
|
||||
|
||||
// PML coefficients
|
||||
RA0 = RA[INDEX2D_R(0,i1)];
|
||||
RB0 = RB[INDEX2D_R(0,i1)];
|
||||
RE0 = RE[INDEX2D_R(0,i1)];
|
||||
RF0 = RF[INDEX2D_R(0,i1)];
|
||||
RA1 = RA[INDEX2D_R(1,i1)];
|
||||
RB1 = RB[INDEX2D_R(1,i1)];
|
||||
RE1 = RE[INDEX2D_R(1,i1)];
|
||||
RF1 = RF[INDEX2D_R(1,i1)];
|
||||
RA01 = RA[INDEX2D_R(0,i1)] * RA[INDEX2D_R(1,i1)] - 1;
|
||||
|
||||
// Hy
|
||||
materialHy = ID[INDEX4D_ID(4,ii,jj,kk)];
|
||||
dEz = (Ez[INDEX3D_FIELDS(ii+1,jj,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dx;
|
||||
Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA01 * dEz + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]);
|
||||
PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dEz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
|
||||
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz;
|
||||
}
|
||||
|
||||
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
|
||||
// Subscripts for field arrays
|
||||
ii = i2 + xs;
|
||||
jj = j2 + ys;
|
||||
kk = k2 + zs;
|
||||
|
||||
// PML coefficients
|
||||
RA0 = RA[INDEX2D_R(0,i2)];
|
||||
RB0 = RB[INDEX2D_R(0,i2)];
|
||||
RE0 = RE[INDEX2D_R(0,i2)];
|
||||
RF0 = RF[INDEX2D_R(0,i2)];
|
||||
RA1 = RA[INDEX2D_R(1,i2)];
|
||||
RB1 = RB[INDEX2D_R(1,i2)];
|
||||
RE1 = RE[INDEX2D_R(1,i2)];
|
||||
RF1 = RF[INDEX2D_R(1,i2)];
|
||||
RA01 = RA[INDEX2D_R(0,i2)] * RA[INDEX2D_R(1,i2)] - 1;
|
||||
|
||||
// Hz
|
||||
materialHz = ID[INDEX4D_ID(5,ii,jj,kk)];
|
||||
dEy = (Ey[INDEX3D_FIELDS(ii+1,jj,kk)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dx;
|
||||
Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA01 * dEy + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]);
|
||||
PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dEy + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
|
||||
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEy;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__kernel void order1_yminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global const {{REAL}}* restrict Ey, __global const {{REAL}}* restrict Ez, __global {{REAL}} *Hx, __global const {{REAL}}* restrict Hy, __global {{REAL}} *Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
|
||||
|
||||
// This function updates the Hx and Hz field components for the yminus slab.
|
||||
//
|
||||
// Args:
|
||||
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
|
||||
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
|
||||
// ID, E, H: Access to ID and field component arrays
|
||||
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
|
||||
// d: Spatial discretisation, e.g. dx, dy or dz
|
||||
|
||||
// Obtain the linear index corresponding to the current tREad
|
||||
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
|
||||
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
|
||||
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
|
||||
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
|
||||
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
|
||||
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
|
||||
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
|
||||
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
|
||||
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
|
||||
|
||||
{{REAL}} RA01, RB0, RE0, RF0, dEx, dEz;
|
||||
{{REAL}} dy = d;
|
||||
int ii, jj, kk, materialHx, materialHz;
|
||||
int nx = xf - xs;
|
||||
int ny = yf - ys;
|
||||
int nz = zf - zs;
|
||||
|
||||
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
|
||||
// Subscripts for field arrays
|
||||
ii = i1 + xs;
|
||||
jj = yf - (j1 + 1);
|
||||
kk = k1 + zs;
|
||||
|
||||
// PML coefficients
|
||||
RA01 = RA[INDEX2D_R(0,j1)] - 1;
|
||||
RB0 = RB[INDEX2D_R(0,j1)];
|
||||
RE0 = RE[INDEX2D_R(0,j1)];
|
||||
RF0 = RF[INDEX2D_R(0,j1)];
|
||||
|
||||
// Hx
|
||||
materialHx = ID[INDEX4D_ID(3,ii,jj,kk)];
|
||||
dEz = (Ez[INDEX3D_FIELDS(ii,jj+1,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dy;
|
||||
Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA01 * dEz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
|
||||
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz;
|
||||
}
|
||||
|
||||
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
|
||||
// Subscripts for field arrays
|
||||
ii = i2 + xs;
|
||||
jj = yf - (j2 + 1);
|
||||
kk = k2 + zs;
|
||||
|
||||
// PML coefficients
|
||||
RA01 = RA[INDEX2D_R(0,j2)] - 1;
|
||||
RB0 = RB[INDEX2D_R(0,j2)];
|
||||
RE0 = RE[INDEX2D_R(0,j2)];
|
||||
RF0 = RF[INDEX2D_R(0,j2)];
|
||||
|
||||
// Hz
|
||||
materialHz = ID[INDEX4D_ID(5,ii,jj,kk)];
|
||||
dEx = (Ex[INDEX3D_FIELDS(ii,jj+1,kk)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dy;
|
||||
Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA01 * dEx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
|
||||
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__kernel void order2_yminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global const {{REAL}}* restrict Ey, __global const {{REAL}}* restrict Ez, __global {{REAL}} *Hx, __global const {{REAL}}* restrict Hy, __global {{REAL}} *Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
|
||||
|
||||
// This function updates the Hx and Hz field components for the yminus slab.
|
||||
//
|
||||
// Args:
|
||||
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
|
||||
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
|
||||
// ID, E, H: Access to ID and field component arrays
|
||||
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
|
||||
// d: Spatial discretisation, e.g. dx, dy or dz
|
||||
|
||||
// Obtain the linear index corresponding to the current tREad
|
||||
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
|
||||
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
|
||||
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
|
||||
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
|
||||
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
|
||||
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
|
||||
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
|
||||
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
|
||||
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
|
||||
|
||||
{{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dEx, dEz;
|
||||
{{REAL}} dy = d;
|
||||
int ii, jj, kk, materialHx, materialHz;
|
||||
int nx = xf - xs;
|
||||
int ny = yf - ys;
|
||||
int nz = zf - zs;
|
||||
|
||||
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
|
||||
// Subscripts for field arrays
|
||||
ii = i1 + xs;
|
||||
jj = yf - (j1 + 1);
|
||||
kk = k1 + zs;
|
||||
|
||||
// PML coefficients
|
||||
RA0 = RA[INDEX2D_R(0,j1)];
|
||||
RB0 = RB[INDEX2D_R(0,j1)];
|
||||
RE0 = RE[INDEX2D_R(0,j1)];
|
||||
RF0 = RF[INDEX2D_R(0,j1)];
|
||||
RA1 = RA[INDEX2D_R(1,j1)];
|
||||
RB1 = RB[INDEX2D_R(1,j1)];
|
||||
RE1 = RE[INDEX2D_R(1,j1)];
|
||||
RF1 = RF[INDEX2D_R(1,j1)];
|
||||
RA01 = RA[INDEX2D_R(0,j1)] * RA[INDEX2D_R(1,j1)] - 1;
|
||||
|
||||
// Hx
|
||||
materialHx = ID[INDEX4D_ID(3,ii,jj,kk)];
|
||||
dEz = (Ez[INDEX3D_FIELDS(ii,jj+1,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dy;
|
||||
Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA01 * dEz + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]);
|
||||
PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dEz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
|
||||
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz;
|
||||
}
|
||||
|
||||
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
|
||||
// Subscripts for field arrays
|
||||
ii = i2 + xs;
|
||||
jj = yf - (j2 + 1);
|
||||
kk = k2 + zs;
|
||||
|
||||
// PML coefficients
|
||||
RA0 = RA[INDEX2D_R(0,j2)];
|
||||
RB0 = RB[INDEX2D_R(0,j2)];
|
||||
RE0 = RE[INDEX2D_R(0,j2)];
|
||||
RF0 = RF[INDEX2D_R(0,j2)];
|
||||
RA1 = RA[INDEX2D_R(1,j2)];
|
||||
RB1 = RB[INDEX2D_R(1,j2)];
|
||||
RE1 = RE[INDEX2D_R(1,j2)];
|
||||
RF1 = RF[INDEX2D_R(1,j2)];
|
||||
RA01 = RA[INDEX2D_R(0,j2)] * RA[INDEX2D_R(1,j2)] - 1;
|
||||
|
||||
// Hz
|
||||
materialHz = ID[INDEX4D_ID(5,ii,jj,kk)];
|
||||
dEx = (Ex[INDEX3D_FIELDS(ii,jj+1,kk)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dy;
|
||||
Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA01 * dEx + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]);
|
||||
PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dEx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
|
||||
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__kernel void order1_yplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global const {{REAL}}* restrict Ey, __global const {{REAL}}* restrict Ez, __global {{REAL}} *Hx, __global const {{REAL}}* restrict Hy, __global {{REAL}} *Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
|
||||
|
||||
// This function updates the Hx and Hz field components for the yplus slab.
|
||||
//
|
||||
// Args:
|
||||
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
|
||||
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
|
||||
// ID, E, H: Access to ID and field component arrays
|
||||
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
|
||||
// d: Spatial discretisation, e.g. dx, dy or dz
|
||||
|
||||
// Obtain the linear index corresponding to the current tREad
|
||||
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
|
||||
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
|
||||
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
|
||||
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
|
||||
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
|
||||
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
|
||||
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
|
||||
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
|
||||
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
|
||||
|
||||
{{REAL}} RA01, RB0, RE0, RF0, dEx, dEz;
|
||||
{{REAL}} dy = d;
|
||||
int ii, jj, kk, materialHx, materialHz;
|
||||
int nx = xf - xs;
|
||||
int ny = yf - ys;
|
||||
int nz = zf - zs;
|
||||
|
||||
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
|
||||
// Subscripts for field arrays
|
||||
ii = i1 + xs;
|
||||
jj = j1 + ys;
|
||||
kk = k1 + zs;
|
||||
|
||||
// PML coefficients
|
||||
RA01 = RA[INDEX2D_R(0,j1)] - 1;
|
||||
RB0 = RB[INDEX2D_R(0,j1)];
|
||||
RE0 = RE[INDEX2D_R(0,j1)];
|
||||
RF0 = RF[INDEX2D_R(0,j1)];
|
||||
|
||||
// Hx
|
||||
materialHx = ID[INDEX4D_ID(3,ii,jj,kk)];
|
||||
dEz = (Ez[INDEX3D_FIELDS(ii,jj+1,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dy;
|
||||
Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA01 * dEz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
|
||||
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz;
|
||||
}
|
||||
|
||||
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
|
||||
// Subscripts for field arrays
|
||||
ii = i2 + xs;
|
||||
jj = j2 + ys;
|
||||
kk = k2 + zs;
|
||||
|
||||
// PML coefficients
|
||||
RA01 = RA[INDEX2D_R(0,j2)] - 1;
|
||||
RB0 = RB[INDEX2D_R(0,j2)];
|
||||
RE0 = RE[INDEX2D_R(0,j2)];
|
||||
RF0 = RF[INDEX2D_R(0,j2)];
|
||||
|
||||
// Hz
|
||||
materialHz = ID[INDEX4D_ID(5,ii,jj,kk)];
|
||||
dEx = (Ex[INDEX3D_FIELDS(ii,jj+1,kk)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dy;
|
||||
Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA01 * dEx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
|
||||
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__kernel void order2_yplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global const {{REAL}}* restrict Ey, __global const {{REAL}}* restrict Ez, __global {{REAL}} *Hx, __global const {{REAL}}* restrict Hy, __global {{REAL}} *Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
|
||||
|
||||
// This function updates the Hx and Hz field components for the yplus slab.
|
||||
//
|
||||
// Args:
|
||||
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
|
||||
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
|
||||
// ID, E, H: Access to ID and field component arrays
|
||||
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
|
||||
// d: Spatial discretisation, e.g. dx, dy or dz
|
||||
|
||||
// Obtain the linear index corresponding to the current tREad
|
||||
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
|
||||
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
|
||||
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
|
||||
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
|
||||
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
|
||||
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
|
||||
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
|
||||
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
|
||||
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
|
||||
|
||||
{{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dEx, dEz;
|
||||
{{REAL}} dy = d;
|
||||
int ii, jj, kk, materialHx, materialHz;
|
||||
int nx = xf - xs;
|
||||
int ny = yf - ys;
|
||||
int nz = zf - zs;
|
||||
|
||||
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
|
||||
// Subscripts for field arrays
|
||||
ii = i1 + xs;
|
||||
jj = j1 + ys;
|
||||
kk = k1 + zs;
|
||||
|
||||
// PML coefficients
|
||||
RA0 = RA[INDEX2D_R(0,j1)];
|
||||
RB0 = RB[INDEX2D_R(0,j1)];
|
||||
RE0 = RE[INDEX2D_R(0,j1)];
|
||||
RF0 = RF[INDEX2D_R(0,j1)];
|
||||
RA1 = RA[INDEX2D_R(1,j1)];
|
||||
RB1 = RB[INDEX2D_R(1,j1)];
|
||||
RE1 = RE[INDEX2D_R(1,j1)];
|
||||
RF1 = RF[INDEX2D_R(1,j1)];
|
||||
RA01 = RA[INDEX2D_R(0,j1)] * RA[INDEX2D_R(1,j1)] - 1;
|
||||
|
||||
// Hx
|
||||
materialHx = ID[INDEX4D_ID(3,ii,jj,kk)];
|
||||
dEz = (Ez[INDEX3D_FIELDS(ii,jj+1,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dy;
|
||||
Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA01 * dEz + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]);
|
||||
PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dEz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
|
||||
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz;
|
||||
}
|
||||
|
||||
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
|
||||
// Subscripts for field arrays
|
||||
ii = i2 + xs;
|
||||
jj = j2 + ys;
|
||||
kk = k2 + zs;
|
||||
|
||||
// PML coefficients
|
||||
RA0 = RA[INDEX2D_R(0,j2)];
|
||||
RB0 = RB[INDEX2D_R(0,j2)];
|
||||
RE0 = RE[INDEX2D_R(0,j2)];
|
||||
RF0 = RF[INDEX2D_R(0,j2)];
|
||||
RA1 = RA[INDEX2D_R(1,j2)];
|
||||
RB1 = RB[INDEX2D_R(1,j2)];
|
||||
RE1 = RE[INDEX2D_R(1,j2)];
|
||||
RF1 = RF[INDEX2D_R(1,j2)];
|
||||
RA01 = RA[INDEX2D_R(0,j2)] * RA[INDEX2D_R(1,j2)] - 1;
|
||||
|
||||
// Hz
|
||||
materialHz = ID[INDEX4D_ID(5,ii,jj,kk)];
|
||||
dEx = (Ex[INDEX3D_FIELDS(ii,jj+1,kk)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dy;
|
||||
Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA01 * dEx + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]);
|
||||
PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dEx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
|
||||
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__kernel void order1_zminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global const {{REAL}}* restrict Ey, __global const {{REAL}}* restrict Ez, __global {{REAL}} *Hx, __global {{REAL}} *Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
|
||||
|
||||
// This function updates the Hx and Hy field components for the zminus slab.
|
||||
//
|
||||
// Args:
|
||||
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
|
||||
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
|
||||
// ID, E, H: Access to ID and field component arrays
|
||||
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
|
||||
// d: Spatial discretisation, e.g. dx, dy or dz
|
||||
|
||||
// Obtain the linear index corresponding to the current tREad
|
||||
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
|
||||
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
|
||||
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
|
||||
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
|
||||
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
|
||||
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
|
||||
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
|
||||
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
|
||||
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
|
||||
|
||||
{{REAL}} RA01, RB0, RE0, RF0, dEx, dEy;
|
||||
{{REAL}} dz = d;
|
||||
int ii, jj, kk, materialHx, materialHy;
|
||||
int nx = xf - xs;
|
||||
int ny = yf - ys;
|
||||
int nz = zf - zs;
|
||||
|
||||
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
|
||||
// Subscripts for field arrays
|
||||
ii = i1 + xs;
|
||||
jj = j1 + ys;
|
||||
kk = zf - (k1 + 1);
|
||||
|
||||
// PML coefficients
|
||||
RA01 = RA[INDEX2D_R(0,k1)] - 1;
|
||||
RB0 = RB[INDEX2D_R(0,k1)];
|
||||
RE0 = RE[INDEX2D_R(0,k1)];
|
||||
RF0 = RF[INDEX2D_R(0,k1)];
|
||||
|
||||
// Hx
|
||||
materialHx = ID[INDEX4D_ID(3,ii,jj,kk)];
|
||||
dEy = (Ey[INDEX3D_FIELDS(ii,jj,kk+1)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dz;
|
||||
Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA01 * dEy + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
|
||||
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEy;
|
||||
}
|
||||
|
||||
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
|
||||
// Subscripts for field arrays
|
||||
ii = i2 + xs;
|
||||
jj = j2 + ys;
|
||||
kk = zf - (k2 + 1);
|
||||
|
||||
// PML coefficients
|
||||
RA01 = RA[INDEX2D_R(0,k2)] - 1;
|
||||
RB0 = RB[INDEX2D_R(0,k2)];
|
||||
RE0 = RE[INDEX2D_R(0,k2)];
|
||||
RF0 = RF[INDEX2D_R(0,k2)];
|
||||
|
||||
// Hy
|
||||
materialHy = ID[INDEX4D_ID(4,ii,jj,kk)];
|
||||
dEx = (Ex[INDEX3D_FIELDS(ii,jj,kk+1)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dz;
|
||||
Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA01 * dEx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
|
||||
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__kernel void order2_zminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global const {{REAL}}* restrict Ey, __global const {{REAL}}* restrict Ez, __global {{REAL}} *Hx, __global {{REAL}} *Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
|
||||
|
||||
// This function updates the Hx and Hy field components for the zminus slab.
|
||||
//
|
||||
// Args:
|
||||
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
|
||||
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
|
||||
// ID, E, H: Access to ID and field component arrays
|
||||
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
|
||||
// d: Spatial discretisation, e.g. dx, dy or dz
|
||||
|
||||
// Obtain the linear index corresponding to the current tREad
|
||||
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
|
||||
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
|
||||
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
|
||||
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
|
||||
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
|
||||
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
|
||||
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
|
||||
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
|
||||
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
|
||||
|
||||
{{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dEx, dEy;
|
||||
{{REAL}} dz = d;
|
||||
int ii, jj, kk, materialHx, materialHy;
|
||||
int nx = xf - xs;
|
||||
int ny = yf - ys;
|
||||
int nz = zf - zs;
|
||||
|
||||
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
|
||||
// Subscripts for field arrays
|
||||
ii = i1 + xs;
|
||||
jj = j1 + ys;
|
||||
kk = zf - (k1 + 1);
|
||||
|
||||
// PML coefficients
|
||||
RA0 = RA[INDEX2D_R(0,k1)];
|
||||
RB0 = RB[INDEX2D_R(0,k1)];
|
||||
RE0 = RE[INDEX2D_R(0,k1)];
|
||||
RF0 = RF[INDEX2D_R(0,k1)];
|
||||
RA1 = RA[INDEX2D_R(1,k1)];
|
||||
RB1 = RB[INDEX2D_R(1,k1)];
|
||||
RE1 = RE[INDEX2D_R(1,k1)];
|
||||
RF1 = RF[INDEX2D_R(1,k1)];
|
||||
RA01 = RA[INDEX2D_R(0,k1)] * RA[INDEX2D_R(1,k1)] - 1;
|
||||
|
||||
// Hx
|
||||
materialHx = ID[INDEX4D_ID(3,ii,jj,kk)];
|
||||
dEy = (Ey[INDEX3D_FIELDS(ii,jj,kk+1)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dz;
|
||||
Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA01 * dEy + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]);
|
||||
PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dEy + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
|
||||
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEy;
|
||||
}
|
||||
|
||||
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
|
||||
// Subscripts for field arrays
|
||||
ii = i2 + xs;
|
||||
jj = j2 + ys;
|
||||
kk = zf - (k2 + 1);
|
||||
|
||||
// PML coefficients
|
||||
RA0 = RA[INDEX2D_R(0,k2)];
|
||||
RB0 = RB[INDEX2D_R(0,k2)];
|
||||
RE0 = RE[INDEX2D_R(0,k2)];
|
||||
RF0 = RF[INDEX2D_R(0,k2)];
|
||||
RA1 = RA[INDEX2D_R(1,k2)];
|
||||
RB1 = RB[INDEX2D_R(1,k2)];
|
||||
RE1 = RE[INDEX2D_R(1,k2)];
|
||||
RF1 = RF[INDEX2D_R(1,k2)];
|
||||
RA01 = RA[INDEX2D_R(0,k2)] * RA[INDEX2D_R(1,k2)] - 1;
|
||||
|
||||
// Hy
|
||||
materialHy = ID[INDEX4D_ID(4,ii,jj,kk)];
|
||||
dEx = (Ex[INDEX3D_FIELDS(ii,jj,kk+1)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dz;
|
||||
Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA01 * dEx + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]);
|
||||
PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dEx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
|
||||
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__kernel void order1_zplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global const {{REAL}}* restrict Ey, __global const {{REAL}}* restrict Ez, __global {{REAL}} *Hx, __global {{REAL}} *Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
|
||||
|
||||
// This function updates the Hx and Hy field components for the zplus slab.
|
||||
//
|
||||
// Args:
|
||||
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
|
||||
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
|
||||
// ID, E, H: Access to ID and field component arrays
|
||||
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
|
||||
// d: Spatial discretisation, e.g. dx, dy or dz
|
||||
|
||||
// Obtain the linear index corresponding to the current tREad
|
||||
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
|
||||
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
|
||||
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
|
||||
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
|
||||
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
|
||||
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
|
||||
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
|
||||
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
|
||||
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
|
||||
|
||||
{{REAL}} RA01, RB0, RE0, RF0, dEx, dEy;
|
||||
{{REAL}} dz = d;
|
||||
int ii, jj, kk, materialHx, materialHy;
|
||||
int nx = xf - xs;
|
||||
int ny = yf - ys;
|
||||
int nz = zf - zs;
|
||||
|
||||
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
|
||||
// Subscripts for field arrays
|
||||
ii = i1 + xs;
|
||||
jj = j1 + ys;
|
||||
kk = k1 + zs;
|
||||
|
||||
// PML coefficients
|
||||
RA01 = RA[INDEX2D_R(0,k1)] - 1;
|
||||
RB0 = RB[INDEX2D_R(0,k1)];
|
||||
RE0 = RE[INDEX2D_R(0,k1)];
|
||||
RF0 = RF[INDEX2D_R(0,k1)];
|
||||
|
||||
// Hx
|
||||
materialHx = ID[INDEX4D_ID(3,ii,jj,kk)];
|
||||
dEy = (Ey[INDEX3D_FIELDS(ii,jj,kk+1)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dz;
|
||||
Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA01 * dEy + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
|
||||
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEy;
|
||||
}
|
||||
|
||||
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
|
||||
// Subscripts for field arrays
|
||||
ii = i2 + xs;
|
||||
jj = j2 + ys;
|
||||
kk = k2 + zs;
|
||||
|
||||
// PML coefficients
|
||||
RA01 = RA[INDEX2D_R(0,k2)] - 1;
|
||||
RB0 = RB[INDEX2D_R(0,k2)];
|
||||
RE0 = RE[INDEX2D_R(0,k2)];
|
||||
RF0 = RF[INDEX2D_R(0,k2)];
|
||||
|
||||
// Hy
|
||||
materialHy = ID[INDEX4D_ID(4,ii,jj,kk)];
|
||||
dEx = (Ex[INDEX3D_FIELDS(ii,jj,kk+1)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dz;
|
||||
Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA01 * dEx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
|
||||
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__kernel void order2_zplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global const {{REAL}}* restrict Ey, __global const {{REAL}}* restrict Ez, __global {{REAL}} *Hx, __global {{REAL}} *Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
|
||||
|
||||
// This function updates the Hx and Hy field components for the zplus slab.
|
||||
//
|
||||
// Args:
|
||||
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
|
||||
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
|
||||
// ID, E, H: Access to ID and field component arrays
|
||||
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
|
||||
// d: Spatial discretisation, e.g. dx, dy or dz
|
||||
|
||||
// Obtain the linear index corresponding to the current tREad
|
||||
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
|
||||
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
|
||||
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
|
||||
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
|
||||
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
|
||||
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
|
||||
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
|
||||
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
|
||||
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
|
||||
|
||||
{{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dEx, dEy;
|
||||
{{REAL}} dz = d;
|
||||
int ii, jj, kk, materialHx, materialHy;
|
||||
int nx = xf - xs;
|
||||
int ny = yf - ys;
|
||||
int nz = zf - zs;
|
||||
|
||||
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
|
||||
// Subscripts for field arrays
|
||||
ii = i1 + xs;
|
||||
jj = j1 + ys;
|
||||
kk = k1 + zs;
|
||||
|
||||
// PML coefficients
|
||||
RA0 = RA[INDEX2D_R(0,k1)];
|
||||
RB0 = RB[INDEX2D_R(0,k1)];
|
||||
RE0 = RE[INDEX2D_R(0,k1)];
|
||||
RF0 = RF[INDEX2D_R(0,k1)];
|
||||
RA1 = RA[INDEX2D_R(1,k1)];
|
||||
RB1 = RB[INDEX2D_R(1,k1)];
|
||||
RE1 = RE[INDEX2D_R(1,k1)];
|
||||
RF1 = RF[INDEX2D_R(1,k1)];
|
||||
RA01 = RA[INDEX2D_R(0,k1)] * RA[INDEX2D_R(1,k1)] - 1;
|
||||
|
||||
// Hx
|
||||
materialHx = ID[INDEX4D_ID(3,ii,jj,kk)];
|
||||
dEy = (Ey[INDEX3D_FIELDS(ii,jj,kk+1)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dz;
|
||||
Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA01 * dEy + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]);
|
||||
PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dEy + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
|
||||
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEy;
|
||||
}
|
||||
|
||||
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
|
||||
// Subscripts for field arrays
|
||||
ii = i2 + xs;
|
||||
jj = j2 + ys;
|
||||
kk = k2 + zs;
|
||||
|
||||
// PML coefficients
|
||||
RA0 = RA[INDEX2D_R(0,k2)];
|
||||
RB0 = RB[INDEX2D_R(0,k2)];
|
||||
RE0 = RE[INDEX2D_R(0,k2)];
|
||||
RF0 = RF[INDEX2D_R(0,k2)];
|
||||
RA1 = RA[INDEX2D_R(1,k2)];
|
||||
RB1 = RB[INDEX2D_R(1,k2)];
|
||||
RE1 = RE[INDEX2D_R(1,k2)];
|
||||
RF1 = RF[INDEX2D_R(1,k2)];
|
||||
RA01 = RA[INDEX2D_R(0,k2)] * RA[INDEX2D_R(1,k2)] - 1;
|
||||
|
||||
// Hy
|
||||
materialHy = ID[INDEX4D_ID(4,ii,jj,kk)];
|
||||
dEx = (Ex[INDEX3D_FIELDS(ii,jj,kk+1)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dz;
|
||||
Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA01 * dEx + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]);
|
||||
PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dEx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
|
||||
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx;
|
||||
}
|
||||
}
|
55
gprMax/opencl/snapshots.cl
普通文件
55
gprMax/opencl/snapshots.cl
普通文件
@@ -0,0 +1,55 @@
|
||||
// Macros for converting subscripts to linear index:
|
||||
#define INDEX3D_FIELDS(i, j, k) (i)*({{NY_FIELDS}})*({{NZ_FIELDS}})+(j)*({{NZ_FIELDS}})+(k)
|
||||
#define INDEX4D_SNAPS(p, i, j, k) (p)*({{NX_SNAPS}})*({{NY_SNAPS}})*({{NZ_SNAPS}})+(i)*({{NY_SNAPS}})*({{NZ_SNAPS}})+(j)*({{NZ_SNAPS}})+(k)
|
||||
|
||||
////////////////////
|
||||
// Store snapshot //
|
||||
////////////////////
|
||||
|
||||
__kernel void store_snapshot(int p, int xs, int xf, int ys, int yf, int zs, int zf, int dx, int dy, int dz,
|
||||
__global const {{REAL}}* __restrict__ Ex, __global const {{REAL}}* __restrict__ Ey,
|
||||
__global const {{REAL}}* __restrict__ Ez, __global const {{REAL}}* __restrict__ Hx,
|
||||
__global const {{REAL}}* __restrict__ Hy, __global const {{REAL}}* __restrict__ Hz,
|
||||
__global {{REAL}} *snapEx, __global {{REAL}} *snapEy, __global {{REAL}} *snapEz,
|
||||
__global {{REAL}} *snapHx, __global {{REAL}} *snapHy, __global {{REAL}} *snapHz) {
|
||||
|
||||
// This function stores field values for a snapshot.
|
||||
//
|
||||
// Args:
|
||||
// p: Snapshot number
|
||||
// xs, xf, ys, yf, xs, xf: Start and finish cell coordinates for snapshot
|
||||
// dx, dy, dz: Sampling interval in cell coordinates for snapshot
|
||||
// E, H: Access to field component arrays
|
||||
// snapEx, snapEy, snapEz, snapHx, snapHy, snapHz: Access to arrays to store snapshots
|
||||
|
||||
// Obtain the linear index corresponding to the current thread
|
||||
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
|
||||
|
||||
// Convert the linear index to subscripts for 4D SNAPS array
|
||||
int i = (idx % ({{NX_SNAPS}} * {{NY_SNAPS}} * {{NZ_SNAPS}})) / ({{NY_SNAPS}} * {{NZ_SNAPS}});
|
||||
int j = ((idx % ({{NX_SNAPS}} * {{NY_SNAPS}} * {{NZ_SNAPS}})) % ({{NY_SNAPS}} * {{NZ_SNAPS}})) / {{NZ_SNAPS}};
|
||||
int k = ((idx % ({{NX_SNAPS}} * {{NY_SNAPS}} * {{NZ_SNAPS}})) % ({{NY_SNAPS}} * {{NZ_SNAPS}})) % {{NZ_SNAPS}};
|
||||
|
||||
// Subscripts for field arrays
|
||||
int ii, jj, kk;
|
||||
|
||||
if (i >= xs && i < xf && j >= ys && j < yf && k >= zs && k < zf) {
|
||||
|
||||
// Increment subscripts for field array to account for spatial sampling of snapshot
|
||||
ii = (xs + i) * dx;
|
||||
jj = (ys + j) * dy;
|
||||
kk = (zs + k) * dz;
|
||||
|
||||
// The electric field component value at a point comes from an average of
|
||||
// the 4 electric field component values in that cell
|
||||
snapEx[INDEX4D_SNAPS(p,i,j,k)] = (Ex[INDEX3D_FIELDS(ii,jj,kk)] + Ex[INDEX3D_FIELDS(ii,jj+1,kk)] + Ex[INDEX3D_FIELDS(ii,jj,kk+1)] + Ex[INDEX3D_FIELDS(ii,jj+1,kk+1)]) / 4;
|
||||
snapEy[INDEX4D_SNAPS(p,i,j,k)] = (Ey[INDEX3D_FIELDS(ii,jj,kk)] + Ey[INDEX3D_FIELDS(ii+1,jj,kk)] + Ey[INDEX3D_FIELDS(ii,jj,kk+1)] + Ey[INDEX3D_FIELDS(ii+1,jj,kk+1)]) / 4;
|
||||
snapEz[INDEX4D_SNAPS(p,i,j,k)] = (Ez[INDEX3D_FIELDS(ii,jj,kk)] + Ez[INDEX3D_FIELDS(ii+1,jj,kk)] + Ez[INDEX3D_FIELDS(ii,jj+1,kk)] + Ez[INDEX3D_FIELDS(ii+1,jj+1,kk)]) / 4;
|
||||
|
||||
// The magnetic field component value at a point comes from average of
|
||||
// 2 magnetic field component values in that cell and the following cell
|
||||
snapHx[INDEX4D_SNAPS(p,i,j,k)] = (Hx[INDEX3D_FIELDS(ii,jj,kk)] + Hx[INDEX3D_FIELDS(ii+1,jj,kk)]) / 2;
|
||||
snapHy[INDEX4D_SNAPS(p,i,j,k)] = (Hy[INDEX3D_FIELDS(ii,jj,kk)] + Hy[INDEX3D_FIELDS(ii,jj+1,kk)]) / 2;
|
||||
snapHz[INDEX4D_SNAPS(p,i,j,k)] = (Hz[INDEX3D_FIELDS(ii,jj,kk)] + Hz[INDEX3D_FIELDS(ii,jj,kk+1)]) / 2;
|
||||
}
|
||||
}
|
206
gprMax/opencl/source_updates.cl
普通文件
206
gprMax/opencl/source_updates.cl
普通文件
@@ -0,0 +1,206 @@
|
||||
# Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom
|
||||
# Authors: Craig Warren, Antonis Giannopoulos, and John Hartley
|
||||
#
|
||||
# This file is part of gprMax.
|
||||
#
|
||||
# gprMax is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# gprMax is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with gprMax. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
// Macros for converting subscripts to linear index:
|
||||
#define INDEX2D_MAT(m, n) (m)*({{NY_MATCOEFFS}}) + (n)
|
||||
#define INDEX2D_SRCINFO(m, n) (m)*({{NY_SRCINFO}}) + (n)
|
||||
#define INDEX2D_SRCWAVES(m, n) (m)*({{NY_SRCWAVES}}) + (n)
|
||||
#define INDEX3D_FIELDS(i, j, k) (i)*({{NY_FIELDS}})*({{NZ_FIELDS}}) + (j)*({{NZ_FIELDS}}) + (k)
|
||||
#define INDEX4D_ID(p, i, j, k) (p)*({{NX_ID}})*({{NY_ID}})*({{NZ_ID}}) + (i)*({{NY_ID}})*({{NZ_ID}}) + (j)*({{NZ_ID}}) + (k)
|
||||
|
||||
// Material coefficients (read-only) in constant memory
|
||||
__constant {{REAL}} updatecoeffsE[{{N_updatecoeffsE}}] =
|
||||
{
|
||||
{% for i in updateEVal %}
|
||||
{{i}},
|
||||
{% endfor %}
|
||||
};
|
||||
|
||||
__constant {{REAL}} updatecoeffsH[{{N_updatecoeffsH}}] =
|
||||
{
|
||||
{% for i in updateHVal %}
|
||||
{{i}},
|
||||
{% endfor %}
|
||||
};
|
||||
|
||||
|
||||
///////////////////////////////////////////
|
||||
// Hertzian dipole electric field update //
|
||||
///////////////////////////////////////////
|
||||
|
||||
__kernel void update_hertzian_dipole(int NHERTZDIPOLE, int iteration,
|
||||
{{REAL}} dx, {{REAL}} dy, {{REAL}} dz,
|
||||
__global const int* restrict srcinfo1,
|
||||
__global const {{REAL}}* restrict srcinfo2,
|
||||
__global const {{REAL}}* restrict srcwaveforms,
|
||||
__global const unsigned int* restrict ID,
|
||||
__global {{REAL}} *Ex,
|
||||
__global {{REAL}} *Ey,
|
||||
__global {{REAL}} *Ez) {
|
||||
|
||||
// This function updates electric field values for Hertzian dipole sources.
|
||||
//
|
||||
// Args:
|
||||
// NHERTZDIPOLE: total number of hertizan dipole in the model
|
||||
// iteration
|
||||
// dx, dy, dz: spatial discretization
|
||||
// srcinfo1: source cell coordinates and polarisation information
|
||||
// srcinfo2: other source info, length, resistance, etc
|
||||
// srcwaveforms : source waveforms values
|
||||
// ID, E: access to ID and field component values
|
||||
|
||||
// get linear index
|
||||
int src = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
|
||||
|
||||
if (src < NHERTZDIPOLE) {
|
||||
{{REAL}} dl;
|
||||
int i, j, k, polarisation;
|
||||
|
||||
i = srcinfo1[INDEX2D_SRCINFO(src,0)];
|
||||
j = srcinfo1[INDEX2D_SRCINFO(src,1)];
|
||||
k = srcinfo1[INDEX2D_SRCINFO(src,2)];
|
||||
|
||||
polarisation = srcinfo1[INDEX2D_SRCINFO(src,3)];
|
||||
dl = srcinfo2[src];
|
||||
|
||||
// 'x' polarised source
|
||||
if (polarisation == 0) {
|
||||
int materialEx = ID[INDEX4D_ID(0,i,j,k)];
|
||||
Ex[INDEX3D_FIELDS(i,j,k)] = Ex[INDEX3D_FIELDS(i,j,k)] - updatecoeffsE[INDEX2D_MAT(materialEx,4)] * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] * dl * (1 / (dx * dy * dz));
|
||||
}
|
||||
|
||||
// 'y' polarised source
|
||||
else if (polarisation == 1) {
|
||||
int materialEy = ID[INDEX4D_ID(1,i,j,k)];
|
||||
Ey[INDEX3D_FIELDS(i,j,k)] = Ey[INDEX3D_FIELDS(i,j,k)] - updatecoeffsE[INDEX2D_MAT(materialEy,4)] * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] * dl * (1 / (dx * dy * dz));
|
||||
}
|
||||
|
||||
// 'z' polarised source
|
||||
else if (polarisation == 2) {
|
||||
int materialEz = ID[INDEX4D_ID(2,i,j,k)];
|
||||
Ez[INDEX3D_FIELDS(i,j,k)] = Ez[INDEX3D_FIELDS(i,j,k)] - updatecoeffsE[INDEX2D_MAT(materialEz,4)] * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] * dl * (1 / (dx * dy * dz));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void update_magnetic_dipole(int NMAGDIPOLE, int iteration, {{REAL}} dx, {{REAL}} dy, {{REAL}} dz, __global const int* restrict srcinfo1, __global const {{REAL}}* restrict srcinfo2, __global const {{REAL}}* restrict srcwaveforms, __global const unsigned int* restrict ID, __global {{REAL}} *Hx, __global {{REAL}} *Hy, __global {{REAL}} *Hz){
|
||||
// This function updates magnetic field values for magnetic dipole sources.
|
||||
//
|
||||
// Args:
|
||||
// NMAGDIPOLE: Total number of magnetic dipoles in the model
|
||||
// iteration: Iteration number of simulation
|
||||
// dx, dy, dz: Spatial discretisations
|
||||
// srcinfo1: Source cell coordinates and polarisation information
|
||||
// srcinfo2: Other source information, e.g. length, resistance etc...
|
||||
// srcwaveforms: Source waveform values
|
||||
// ID, H: Access to ID and field component arrays
|
||||
|
||||
// Obtain the linear index corresponding to the current thread and use for each receiver
|
||||
int src = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
|
||||
|
||||
if (src < NMAGDIPOLE) {
|
||||
|
||||
int i, j, k, polarisation;
|
||||
|
||||
i = srcinfo1[INDEX2D_SRCINFO(src,0)];
|
||||
j = srcinfo1[INDEX2D_SRCINFO(src,1)];
|
||||
k = srcinfo1[INDEX2D_SRCINFO(src,2)];
|
||||
polarisation = srcinfo1[INDEX2D_SRCINFO(src,3)];
|
||||
|
||||
// 'x' polarised source
|
||||
if (polarisation == 0) {
|
||||
int materialHx = ID[INDEX4D_ID(3,i,j,k)];
|
||||
Hx[INDEX3D_FIELDS(i,j,k)] = Hx[INDEX3D_FIELDS(i,j,k)] - updatecoeffsH[INDEX2D_MAT(materialHx,4)] * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] * (1 / (dx * dy * dz));
|
||||
}
|
||||
|
||||
// 'y' polarised source
|
||||
else if (polarisation == 1) {
|
||||
int materialHy = ID[INDEX4D_ID(4,i,j,k)];
|
||||
Hy[INDEX3D_FIELDS(i,j,k)] = Hy[INDEX3D_FIELDS(i,j,k)] - updatecoeffsH[INDEX2D_MAT(materialHy,4)] * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] * (1 / (dx * dy * dz));
|
||||
}
|
||||
|
||||
// 'z' polarised source
|
||||
else if (polarisation == 2) {
|
||||
int materialHz = ID[INDEX4D_ID(5,i,j,k)];
|
||||
Hz[INDEX3D_FIELDS(i,j,k)] = Hz[INDEX3D_FIELDS(i,j,k)] - updatecoeffsH[INDEX2D_MAT(materialHz,4)] * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] * (1 / (dx * dy * dz));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void update_voltage_source(int NVOLTSRC, int iteration, {{REAL}} dx, {{REAL}} dy, {{REAL}} dz, __global const int* restrict srcinfo1, __global const {{REAL}}* restrict srcinfo2, __global const {{REAL}}* restrict srcwaveforms, __global const unsigned int* restrict ID, __global {{REAL}} *Ex, __global {{REAL}} *Ey, __global {{REAL}} *Ez){
|
||||
|
||||
// This function updates electric field values for voltage sources.
|
||||
//
|
||||
// Args:
|
||||
// NVOLTSRC: Total number of voltage sources in the model
|
||||
// iteration: Iteration number of simulation
|
||||
// dx, dy, dz: Spatial discretisations
|
||||
// srcinfo1: Source cell coordinates and polarisation information
|
||||
// srcinfo2: Other source information, e.g. length, resistance etc...
|
||||
// srcwaveforms: Source waveform values
|
||||
// ID, E: Access to ID and field component arrays
|
||||
|
||||
// Obtain the linear index corresponding to the current thread and use for each receiver
|
||||
int src = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
|
||||
|
||||
if (src < NVOLTSRC) {
|
||||
|
||||
{{REAL}} resistance;
|
||||
int i, j, k, polarisation;
|
||||
|
||||
i = srcinfo1[INDEX2D_SRCINFO(src,0)];
|
||||
j = srcinfo1[INDEX2D_SRCINFO(src,1)];
|
||||
k = srcinfo1[INDEX2D_SRCINFO(src,2)];
|
||||
polarisation = srcinfo1[INDEX2D_SRCINFO(src,3)];
|
||||
resistance = srcinfo2[src];
|
||||
|
||||
// 'x' polarised source
|
||||
if (polarisation == 0) {
|
||||
if (resistance != 0) {
|
||||
int materialEx = ID[INDEX4D_ID(0,i,j,k)];
|
||||
Ex[INDEX3D_FIELDS(i,j,k)] = Ex[INDEX3D_FIELDS(i,j,k)] - updatecoeffsE[INDEX2D_MAT(materialEx,4)] * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] * (1 / (resistance * dy * dz));
|
||||
}
|
||||
else {
|
||||
Ex[INDEX3D_FIELDS(i,j,k)] = -1 * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] / dx;
|
||||
}
|
||||
}
|
||||
|
||||
// 'y' polarised source
|
||||
else if (polarisation == 1) {
|
||||
if (resistance != 0) {
|
||||
int materialEy = ID[INDEX4D_ID(1,i,j,k)];
|
||||
Ey[INDEX3D_FIELDS(i,j,k)] = Ey[INDEX3D_FIELDS(i,j,k)] - updatecoeffsE[INDEX2D_MAT(materialEy,4)] * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] * (1 / (resistance * dx * dz));
|
||||
}
|
||||
else {
|
||||
Ey[INDEX3D_FIELDS(i,j,k)] = -1 * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] / dy;
|
||||
}
|
||||
}
|
||||
|
||||
// 'z' polarised source
|
||||
else if (polarisation == 2) {
|
||||
if (resistance != 0) {
|
||||
int materialEz = ID[INDEX4D_ID(2,i,j,k)];
|
||||
Ez[INDEX3D_FIELDS(i,j,k)] = Ez[INDEX3D_FIELDS(i,j,k)] - updatecoeffsE[INDEX2D_MAT(materialEz,4)] * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] * (1 / (resistance * dx * dy));
|
||||
}
|
||||
else {
|
||||
Ez[INDEX3D_FIELDS(i,j,k)] = -1 * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] / dz;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
59
gprMax/opencl/store_outputs.cl
普通文件
59
gprMax/opencl/store_outputs.cl
普通文件
@@ -0,0 +1,59 @@
|
||||
# Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom
|
||||
# Authors: Craig Warren, Antonis Giannopoulos, and John Hartley
|
||||
#
|
||||
# This file is part of gprMax.
|
||||
#
|
||||
# gprMax is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# gprMax is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with gprMax. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
#define INDEX2D_RXCOORDS(m,n) (m)*({{NY_RXCOORDS}}) + (n)
|
||||
#define INDEX3D_RXS(i,j,k) (i)*({{NY_RXS}})*({{NZ_RXS}}) + (j)*({{NZ_RXS}}) + (k)
|
||||
#define INDEX3D_FIELDS(i,j,k) (i)*({{NY_FIELDS}})*({{NZ_FIELDS}}) + (j)*({{NZ_FIELDS}}) + (k)
|
||||
|
||||
__kernel void store_outputs(int NRX, int iteration,
|
||||
__global const int* restrict rxcoords,
|
||||
__global {{REAL}} *rxs,
|
||||
__global const {{REAL}}* restrict Ex,
|
||||
__global const {{REAL}}* restrict Ey,
|
||||
__global const {{REAL}}* restrict Ez,
|
||||
__global const {{REAL}}* restrict Hx,
|
||||
__global const {{REAL}}* restrict Hy,
|
||||
__global const {{REAL}}* restrict Hz) {
|
||||
|
||||
// This function stores field component values for every receiver in the model.
|
||||
//
|
||||
// Args:
|
||||
// NRX: total number of receivers in the model.
|
||||
// rxs: array to store field components for receivers - rows
|
||||
// are field components; columns are iterations; pages are receiver
|
||||
|
||||
// Obtain linear index corresponding to the current work item
|
||||
int rx = get_global_id(2) * get_global_size(0) * get_global_size(1) +
|
||||
get_global_id(1) * get_global_size(0) + get_global_id(0);
|
||||
|
||||
int i,j,k;
|
||||
|
||||
if (rx < NRX) {
|
||||
i = rxcoords[INDEX2D_RXCOORDS(rx,0)];
|
||||
j = rxcoords[INDEX2D_RXCOORDS(rx,1)];
|
||||
k = rxcoords[INDEX2D_RXCOORDS(rx,2)];
|
||||
rxs[INDEX3D_RXS(0,iteration,rx)] = Ex[INDEX3D_FIELDS(i,j,k)];
|
||||
rxs[INDEX3D_RXS(1,iteration,rx)] = Ey[INDEX3D_FIELDS(i,j,k)];
|
||||
rxs[INDEX3D_RXS(2,iteration,rx)] = Ez[INDEX3D_FIELDS(i,j,k)];
|
||||
rxs[INDEX3D_RXS(3,iteration,rx)] = Hx[INDEX3D_FIELDS(i,j,k)];
|
||||
rxs[INDEX3D_RXS(4,iteration,rx)] = Hy[INDEX3D_FIELDS(i,j,k)];
|
||||
rxs[INDEX3D_RXS(5,iteration,rx)] = Hz[INDEX3D_FIELDS(i,j,k)];
|
||||
}
|
||||
}
|
||||
|
356
gprMax/pml.py
356
gprMax/pml.py
@@ -21,25 +21,31 @@ from importlib import import_module
|
||||
import gprMax.config as config
|
||||
import numpy as np
|
||||
|
||||
from .utilities.utilities import timer
|
||||
|
||||
|
||||
class CFSParameter:
|
||||
"""Individual CFS parameter (e.g. alpha, kappa, or sigma)."""
|
||||
|
||||
# Allowable scaling profiles and directions
|
||||
scalingprofiles = {'constant': 0, 'linear': 1, 'quadratic': 2, 'cubic': 3,
|
||||
'quartic': 4, 'quintic': 5, 'sextic': 6, 'septic': 7, 'octic': 8}
|
||||
'quartic': 4, 'quintic': 5, 'sextic': 6, 'septic': 7,
|
||||
'octic': 8}
|
||||
scalingdirections = ['forward', 'reverse']
|
||||
|
||||
def __init__(self, ID=None, scaling='polynomial', scalingprofile=None,
|
||||
scalingdirection='forward', min=0, max=0):
|
||||
"""
|
||||
Args:
|
||||
ID (str): Identifier for CFS parameter, can be: 'alpha', 'kappa' or 'sigma'.
|
||||
scaling (str): Type of scaling, can be: 'polynomial'.
|
||||
scalingprofile (str): Type of scaling profile from scalingprofiles.
|
||||
scalingdirection (str): Direction of scaling profile from scalingdirections.
|
||||
min (float): Minimum value for parameter.
|
||||
max (float): Maximum value for parameter.
|
||||
ID: string identifier for CFS parameter, can be: 'alpha', 'kappa' or
|
||||
'sigma'.
|
||||
scaling: string for type of scaling, can be: 'polynomial'.
|
||||
scalingprofile: string for type of scaling profile from
|
||||
scalingprofiles.
|
||||
scalingdirection: string for direction of scaling profile from
|
||||
scalingdirections.
|
||||
min: float for minimum value for parameter.
|
||||
max: float for maximum value for parameter.
|
||||
"""
|
||||
|
||||
self.ID = ID
|
||||
@@ -56,9 +62,9 @@ class CFS:
|
||||
def __init__(self):
|
||||
"""
|
||||
Args:
|
||||
alpha (CFSParameter): alpha parameter for CFS.
|
||||
kappa (CFSParameter): kappa parameter for CFS.
|
||||
sigma (CFSParameter): sigma parameter for CFS.
|
||||
alpha: CFSParameter alpha parameter for CFS.
|
||||
kappa: CFSParameter kappa parameter for CFS.
|
||||
sigma: CFSParameter sigma parameter for CFS.
|
||||
"""
|
||||
|
||||
self.alpha = CFSParameter(ID='alpha', scalingprofile='constant')
|
||||
@@ -70,11 +76,11 @@ class CFS:
|
||||
material properties.
|
||||
|
||||
Args:
|
||||
d (float): dx, dy, or dz in direction of PML.
|
||||
er (float): Average permittivity of underlying material.
|
||||
mr (float): Average permeability of underlying material.
|
||||
G (class): Grid class instance - holds essential parameters
|
||||
describing the model.
|
||||
d: float for dx, dy, or dz in direction of PML.
|
||||
er: float for average permittivity of underlying material.
|
||||
mr: float for average permeability of underlying material.
|
||||
G: FDTDGrid object that holds essential parameters describing the
|
||||
model.
|
||||
"""
|
||||
|
||||
# Calculation of the maximum value of sigma from http://dx.doi.org/10.1109/8.546249
|
||||
@@ -86,17 +92,17 @@ class CFS:
|
||||
electric and magnetic PML updates.
|
||||
|
||||
Args:
|
||||
order (int): Order of polynomial for scaling profile.
|
||||
Evalues (float): numpy array holding scaling profile values for
|
||||
electric PML update.
|
||||
Hvalues (float): numpy array holding scaling profile values for
|
||||
magnetic PML update.
|
||||
order: int of order of polynomial for scaling profile.
|
||||
Evalues: float array holding scaling profile values for
|
||||
electric PML update.
|
||||
Hvalues: float array holding scaling profile values for
|
||||
magnetic PML update.
|
||||
|
||||
Returns:
|
||||
Evalues (float): numpy array holding scaling profile values for
|
||||
electric PML update.
|
||||
Hvalues (float): numpy array holding scaling profile values for
|
||||
magnetic PML update.
|
||||
Evalues: float array holding scaling profile values for
|
||||
electric PML update.
|
||||
Hvalues: float array holding scaling profile values for
|
||||
magnetic PML update.
|
||||
"""
|
||||
|
||||
tmp = (np.linspace(0, (len(Evalues) - 1) + 0.5, num=2 * len(Evalues))
|
||||
@@ -111,17 +117,18 @@ class CFS:
|
||||
profile type and minimum and maximum values.
|
||||
|
||||
Args:
|
||||
thickness (int): Thickness of PML in cells.
|
||||
parameter (CFSParameter): Instance of CFSParameter
|
||||
thickness: int of thickness of PML in cells.
|
||||
parameter: instance of CFSParameter
|
||||
|
||||
Returns:
|
||||
Evalues (float): numpy array holding profile value for electric
|
||||
PML update.
|
||||
Hvalues (float): numpy array holding profile value for magnetic
|
||||
PML update.
|
||||
Evalues: float array holding profile value for electric
|
||||
PML update.
|
||||
Hvalues: float array holding profile value for magnetic
|
||||
PML update.
|
||||
"""
|
||||
|
||||
# Extra cell of thickness added to allow correct scaling of electric and magnetic values
|
||||
# Extra cell of thickness added to allow correct scaling of electric and
|
||||
# magnetic values
|
||||
Evalues = np.zeros(thickness + 1, dtype=config.sim_config.dtypes['float_or_double'])
|
||||
Hvalues = np.zeros(thickness + 1, dtype=config.sim_config.dtypes['float_or_double'])
|
||||
|
||||
@@ -146,10 +153,12 @@ class CFS:
|
||||
if parameter.scalingdirection == 'reverse':
|
||||
Evalues = Evalues[::-1]
|
||||
Hvalues = Hvalues[::-1]
|
||||
# Magnetic values must be shifted one element to the left after reversal
|
||||
# Magnetic values must be shifted one element to the left after
|
||||
# reversal
|
||||
Hvalues = np.roll(Hvalues, -1)
|
||||
|
||||
# Extra cell of thickness not required and therefore removed after scaling
|
||||
# Extra cell of thickness not required and therefore removed after
|
||||
# scaling
|
||||
Evalues = Evalues[:-1]
|
||||
Hvalues = Hvalues[:-1]
|
||||
|
||||
@@ -168,17 +177,20 @@ class PML:
|
||||
boundaryIDs = ['x0', 'y0', 'z0', 'xmax', 'ymax', 'zmax']
|
||||
|
||||
# Indicates direction of increasing absorption
|
||||
# xminus, yminus, zminus - absorption increases in negative direction of x-axis, y-axis, or z-axis
|
||||
# xplus, yplus, zplus - absorption increases in positive direction of x-axis, y-axis, or z-axis
|
||||
# xminus, yminus, zminus - absorption increases in negative direction of
|
||||
# x-axis, y-axis, or z-axis
|
||||
# xplus, yplus, zplus - absorption increases in positive direction of
|
||||
# x-axis, y-axis, or z-axis
|
||||
directions = ['xminus', 'yminus', 'zminus', 'xplus', 'yplus', 'zplus']
|
||||
|
||||
def __init__(self, G, ID=None, direction=None, xs=0, xf=0, ys=0, yf=0, zs=0, zf=0):
|
||||
"""
|
||||
Args:
|
||||
G (FDTDGrid): Holds essential parameters describing the model.
|
||||
ID (str): Identifier for PML slab.
|
||||
direction (str): Direction of increasing absorption.
|
||||
xs, xf, ys, yf, zs, zf (float): Extent of the PML slab.
|
||||
G: FDTDGrid object that holds essential parameters describing the
|
||||
model.
|
||||
ID: string identifier for PML slab.
|
||||
direction: string for direction of increasing absorption.
|
||||
xs, xf, ys, yf, zs, zf: floats of extent of the PML slab.
|
||||
"""
|
||||
|
||||
self.G = G
|
||||
@@ -244,8 +256,8 @@ class PML:
|
||||
"""Calculates electric and magnetic update coefficients for the PML.
|
||||
|
||||
Args:
|
||||
er (float): Average permittivity of underlying material
|
||||
mr (float): Average permeability of underlying material
|
||||
er: float of average permittivity of underlying material
|
||||
mr: float of average permeability of underlying material
|
||||
"""
|
||||
|
||||
self.ERA = np.zeros((len(self.CFS), self.thickness),
|
||||
@@ -275,19 +287,25 @@ class PML:
|
||||
# Define different parameters depending on PML formulation
|
||||
if self.G.pmlformulation == 'HORIPML':
|
||||
# HORIPML electric update coefficients
|
||||
tmp = (2 * config.sim_config.em_consts['e0'] * Ekappa) + self.G.dt * (Ealpha * Ekappa + Esigma)
|
||||
self.ERA[x, :] = (2 * config.sim_config.em_consts['e0'] + self.G.dt * Ealpha) / tmp
|
||||
self.ERB[x, :] = (2 * config.sim_config.em_consts['e0'] * Ekappa) / tmp
|
||||
self.ERE[x, :] = ((2 * config.sim_config.em_consts['e0'] * Ekappa) - self.G.dt
|
||||
* (Ealpha * Ekappa + Esigma)) / tmp
|
||||
tmp = ((2 * config.sim_config.em_consts['e0'] * Ekappa) +
|
||||
self.G.dt * (Ealpha * Ekappa + Esigma))
|
||||
self.ERA[x, :] = ((2 * config.sim_config.em_consts['e0'] +
|
||||
self.G.dt * Ealpha) / tmp)
|
||||
self.ERB[x, :] = ((2 * config.sim_config.em_consts['e0'] * Ekappa)
|
||||
/ tmp)
|
||||
self.ERE[x, :] = (((2 * config.sim_config.em_consts['e0'] * Ekappa) -
|
||||
self.G.dt * (Ealpha * Ekappa + Esigma)) / tmp)
|
||||
self.ERF[x, :] = (2 * Esigma * self.G.dt) / (Ekappa * tmp)
|
||||
|
||||
# HORIPML magnetic update coefficients
|
||||
tmp = (2 * config.sim_config.em_consts['e0'] * Hkappa) + self.G.dt * (Halpha * Hkappa + Hsigma)
|
||||
self.HRA[x, :] = (2 * config.sim_config.em_consts['e0'] + self.G.dt * Halpha) / tmp
|
||||
self.HRB[x, :] = (2 * config.sim_config.em_consts['e0'] * Hkappa) / tmp
|
||||
self.HRE[x, :] = ((2 * config.sim_config.em_consts['e0'] * Hkappa) - self.G.dt
|
||||
* (Halpha * Hkappa + Hsigma)) / tmp
|
||||
tmp = ((2 * config.sim_config.em_consts['e0'] * Hkappa) +
|
||||
self.G.dt * (Halpha * Hkappa + Hsigma))
|
||||
self.HRA[x, :] = ((2 * config.sim_config.em_consts['e0'] +
|
||||
self.G.dt * Halpha) / tmp)
|
||||
self.HRB[x, :] = ((2 * config.sim_config.em_consts['e0'] * Hkappa)
|
||||
/ tmp)
|
||||
self.HRE[x, :] = (((2 * config.sim_config.em_consts['e0'] * Hkappa) -
|
||||
self.G.dt * (Halpha * Hkappa + Hsigma)) / tmp)
|
||||
self.HRF[x, :] = (2 * Hsigma * self.G.dt) / (Hkappa * tmp)
|
||||
|
||||
elif self.G.pmlformulation == 'MRIPML':
|
||||
@@ -295,31 +313,39 @@ class PML:
|
||||
tmp = 2 * config.sim_config.em_consts['e0'] + self.G.dt * Ealpha
|
||||
self.ERA[x, :] = Ekappa + (self.G.dt * Esigma) / tmp
|
||||
self.ERB[x, :] = (2 * config.sim_config.em_consts['e0']) / tmp
|
||||
self.ERE[x, :] = ((2 * config.sim_config.em_consts['e0']) - self.G.dt * Ealpha) / tmp
|
||||
self.ERE[x, :] = (((2 * config.sim_config.em_consts['e0'])
|
||||
- self.G.dt * Ealpha) / tmp)
|
||||
self.ERF[x, :] = (2 * Esigma * self.G.dt) / tmp
|
||||
|
||||
# MRIPML magnetic update coefficients
|
||||
tmp = 2 * config.sim_config.em_consts['e0'] + self.G.dt * Halpha
|
||||
self.HRA[x, :] = Hkappa + (self.G.dt * Hsigma) / tmp
|
||||
self.HRB[x, :] = (2 * config.sim_config.em_consts['e0']) / tmp
|
||||
self.HRE[x, :] = ((2 * config.sim_config.sim_config.em_consts['e0']) - self.G.dt * Halpha) / tmp
|
||||
self.HRE[x, :] = (((2 * config.sim_config.sim_config.em_consts['e0'])
|
||||
- self.G.dt * Halpha) / tmp)
|
||||
self.HRF[x, :] = (2 * Hsigma * self.G.dt) / tmp
|
||||
|
||||
def update_electric(self):
|
||||
"""This functions updates electric field components with the PML correction."""
|
||||
"""This functions updates electric field components with the PML
|
||||
correction.
|
||||
"""
|
||||
|
||||
pmlmodule = 'gprMax.cython.pml_updates_electric_' + self.G.pmlformulation
|
||||
func = getattr(import_module(pmlmodule), 'order' + str(len(self.CFS)) + '_' + self.direction)
|
||||
func = getattr(import_module(pmlmodule),
|
||||
'order' + str(len(self.CFS)) + '_' + self.direction)
|
||||
func(self.xs, self.xf, self.ys, self.yf, self.zs, self.zf,
|
||||
config.get_model_config().ompthreads, self.G.updatecoeffsE, self.G.ID,
|
||||
self.G.Ex, self.G.Ey, self.G.Ez, self.G.Hx, self.G.Hy, self.G.Hz,
|
||||
self.EPhi1, self.EPhi2, self.ERA, self.ERB, self.ERE, self.ERF, self.d)
|
||||
|
||||
def update_magnetic(self):
|
||||
"""This functions updates magnetic field components with the PML correction."""
|
||||
"""This functions updates magnetic field components with the PML
|
||||
correction.
|
||||
"""
|
||||
|
||||
pmlmodule = 'gprMax.cython.pml_updates_magnetic_' + self.G.pmlformulation
|
||||
func = getattr(import_module(pmlmodule), 'order' + str(len(self.CFS)) + '_' + self.direction)
|
||||
func = getattr(import_module(pmlmodule),
|
||||
'order' + str(len(self.CFS)) + '_' + self.direction)
|
||||
func(self.xs, self.xf, self.ys, self.yf, self.zs, self.zf,
|
||||
config.get_model_config().ompthreads, self.G.updatecoeffsH, self.G.ID,
|
||||
self.G.Ex, self.G.Ey, self.G.Ez, self.G.Hx, self.G.Hy, self.G.Hz,
|
||||
@@ -331,6 +357,9 @@ class CUDAPML(PML):
|
||||
solving on GPU using CUDA.
|
||||
"""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(CUDAPML, self).__init__(*args, **kwargs)
|
||||
|
||||
def htod_field_arrays(self):
|
||||
"""Initialise PML field and coefficient arrays on GPU."""
|
||||
|
||||
@@ -360,9 +389,9 @@ class CUDAPML(PML):
|
||||
"""Get update functions from PML kernels.
|
||||
|
||||
Args:
|
||||
kernelselectric: PyCuda SourceModule containing PML kernels for
|
||||
kernelselectric: pycuda SourceModule containing PML kernels for
|
||||
electric updates.
|
||||
kernelsmagnetic: PyCuda SourceModule containing PML kernels for
|
||||
kernelsmagnetic: pycuda SourceModule containing PML kernels for
|
||||
magnetic updates.
|
||||
"""
|
||||
|
||||
@@ -373,9 +402,12 @@ class CUDAPML(PML):
|
||||
"""This functions updates electric field components with the PML
|
||||
correction on the GPU.
|
||||
"""
|
||||
self.update_electric_gpu(np.int32(self.xs), np.int32(self.xf),
|
||||
np.int32(self.ys), np.int32(self.yf),
|
||||
np.int32(self.zs), np.int32(self.zf),
|
||||
self.update_electric_gpu(np.int32(self.xs),
|
||||
np.int32(self.xf),
|
||||
np.int32(self.ys),
|
||||
np.int32(self.yf),
|
||||
np.int32(self.zs),
|
||||
np.int32(self.zf),
|
||||
np.int32(self.EPhi1_gpu.shape[1]),
|
||||
np.int32(self.EPhi1_gpu.shape[2]),
|
||||
np.int32(self.EPhi1_gpu.shape[3]),
|
||||
@@ -384,11 +416,18 @@ class CUDAPML(PML):
|
||||
np.int32(self.EPhi2_gpu.shape[3]),
|
||||
np.int32(self.thickness),
|
||||
self.G.ID_gpu.gpudata,
|
||||
self.G.Ex_gpu.gpudata, self.G.Ey_gpu.gpudata, self.G.Ez_gpu.gpudata,
|
||||
self.G.Hx_gpu.gpudata, self.G.Hy_gpu.gpudata, self.G.Hz_gpu.gpudata,
|
||||
self.EPhi1_gpu.gpudata, self.EPhi2_gpu.gpudata,
|
||||
self.ERA_gpu.gpudata, self.ERB_gpu.gpudata,
|
||||
self.ERE_gpu.gpudata, self.ERF_gpu.gpudata,
|
||||
self.G.Ex_gpu.gpudata,
|
||||
self.G.Ey_gpu.gpudata,
|
||||
self.G.Ez_gpu.gpudata,
|
||||
self.G.Hx_gpu.gpudata,
|
||||
self.G.Hy_gpu.gpudata,
|
||||
self.G.Hz_gpu.gpudata,
|
||||
self.EPhi1_gpu.gpudata,
|
||||
self.EPhi2_gpu.gpudata,
|
||||
self.ERA_gpu.gpudata,
|
||||
self.ERB_gpu.gpudata,
|
||||
self.ERE_gpu.gpudata,
|
||||
self.ERF_gpu.gpudata,
|
||||
config.sim_config.dtypes['float_or_double'](self.d),
|
||||
block=self.G.tpb, grid=self.bpg)
|
||||
|
||||
@@ -396,9 +435,12 @@ class CUDAPML(PML):
|
||||
"""This functions updates magnetic field components with the PML
|
||||
correction on the GPU.
|
||||
"""
|
||||
self.update_magnetic_gpu(np.int32(self.xs), np.int32(self.xf),
|
||||
np.int32(self.ys), np.int32(self.yf),
|
||||
np.int32(self.zs), np.int32(self.zf),
|
||||
self.update_magnetic_gpu(np.int32(self.xs),
|
||||
np.int32(self.xf),
|
||||
np.int32(self.ys),
|
||||
np.int32(self.yf),
|
||||
np.int32(self.zs),
|
||||
np.int32(self.zf),
|
||||
np.int32(self.HPhi1_gpu.shape[1]),
|
||||
np.int32(self.HPhi1_gpu.shape[2]),
|
||||
np.int32(self.HPhi1_gpu.shape[3]),
|
||||
@@ -407,19 +449,147 @@ class CUDAPML(PML):
|
||||
np.int32(self.HPhi2_gpu.shape[3]),
|
||||
np.int32(self.thickness),
|
||||
self.G.ID_gpu.gpudata,
|
||||
self.G.Ex_gpu.gpudata, self.G.Ey_gpu.gpudata, self.G.Ez_gpu.gpudata,
|
||||
self.G.Hx_gpu.gpudata, self.G.Hy_gpu.gpudata, self.G.Hz_gpu.gpudata,
|
||||
self.HPhi1_gpu.gpudata, self.HPhi2_gpu.gpudata,
|
||||
self.HRA_gpu.gpudata, self.HRB_gpu.gpudata,
|
||||
self.HRE_gpu.gpudata, self.HRF_gpu.gpudata,
|
||||
self.G.Ex_gpu.gpudata,
|
||||
self.G.Ey_gpu.gpudata,
|
||||
self.G.Ez_gpu.gpudata,
|
||||
self.G.Hx_gpu.gpudata,
|
||||
self.G.Hy_gpu.gpudata,
|
||||
self.G.Hz_gpu.gpudata,
|
||||
self.HPhi1_gpu.gpudata,
|
||||
self.HPhi2_gpu.gpudata,
|
||||
self.HRA_gpu.gpudata,
|
||||
self.HRB_gpu.gpudata,
|
||||
self.HRE_gpu.gpudata,
|
||||
self.HRF_gpu.gpudata,
|
||||
config.sim_config.dtypes['float_or_double'](self.d),
|
||||
block=self.G.tpb, grid=self.bpg)
|
||||
|
||||
|
||||
class OpenCLPML(PML):
|
||||
"""Perfectly Matched Layer (PML) Absorbing Boundary Conditions (ABC) for
|
||||
solving on compute device using OpenCL.
|
||||
"""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(OpenCLPML, self).__init__(*args, **kwargs)
|
||||
self.compute_time = 0
|
||||
|
||||
def set_queue(self, queue):
|
||||
"""Passes in pyopencl queue.
|
||||
|
||||
Args:
|
||||
queue: pyopencl queue.
|
||||
"""
|
||||
self.queue = queue
|
||||
|
||||
def htod_field_arrays(self):
|
||||
"""Initialise PML field and coefficient arrays on compute device."""
|
||||
|
||||
import pyopencl.array as clarray
|
||||
|
||||
self.ERA_dev = clarray.to_device(self.queue, self.ERA)
|
||||
self.ERB_dev = clarray.to_device(self.queue, self.ERB)
|
||||
self.ERE_dev = clarray.to_device(self.queue, self.ERE)
|
||||
self.ERF_dev = clarray.to_device(self.queue, self.ERF)
|
||||
self.HRA_dev = clarray.to_device(self.queue, self.HRA)
|
||||
self.HRB_dev = clarray.to_device(self.queue, self.HRB)
|
||||
self.HRE_dev = clarray.to_device(self.queue, self.HRE)
|
||||
self.HRF_dev = clarray.to_device(self.queue, self.HRF)
|
||||
self.EPhi1_dev = clarray.to_device(self.queue, self.EPhi1)
|
||||
self.EPhi2_dev = clarray.to_device(self.queue, self.EPhi2)
|
||||
self.HPhi1_dev = clarray.to_device(self.queue, self.HPhi1)
|
||||
self.HPhi2_dev = clarray.to_device(self.queue, self.HPhi2)
|
||||
|
||||
def set_blocks_per_grid():
|
||||
pass
|
||||
|
||||
def set_wgs(self):
|
||||
"""Set the workgroup size used for updating the PML field arrays
|
||||
on a compute device.
|
||||
"""
|
||||
self.wgs = (((int(np.ceil(((self.EPhi1_dev.shape[1] + 1) *
|
||||
(self.EPhi1_dev.shape[2] + 1) *
|
||||
(self.EPhi1_dev.shape[3] + 1)) / self.G.tpb[0]))) * 256), 1, 1)
|
||||
|
||||
def get_update_funcs():
|
||||
pass
|
||||
|
||||
def update_electric(self):
|
||||
"""This functions updates electric field components with the PML
|
||||
correction on the compute device.
|
||||
"""
|
||||
start_time = timer()
|
||||
event = self.update_electric_dev(np.int32(self.xs),
|
||||
np.int32(self.xf),
|
||||
np.int32(self.ys),
|
||||
np.int32(self.yf),
|
||||
np.int32(self.zs),
|
||||
np.int32(self.zf),
|
||||
np.int32(self.EPhi1_dev.shape[1]),
|
||||
np.int32(self.EPhi1_dev.shape[2]),
|
||||
np.int32(self.EPhi1_dev.shape[3]),
|
||||
np.int32(self.EPhi2_dev.shape[1]),
|
||||
np.int32(self.EPhi2_dev.shape[2]),
|
||||
np.int32(self.EPhi2_dev.shape[3]),
|
||||
np.int32(self.thickness),
|
||||
self.G.ID_dev,
|
||||
self.G.Ex_dev,
|
||||
self.G.Ey_dev,
|
||||
self.G.Ez_dev,
|
||||
self.G.Hx_dev,
|
||||
self.G.Hy_dev,
|
||||
self.G.Hz_dev,
|
||||
self.EPhi1_dev,
|
||||
self.EPhi2_dev,
|
||||
self.ERA_dev,
|
||||
self.ERB_dev,
|
||||
self.ERE_dev,
|
||||
self.ERF_dev,
|
||||
config.sim_config.dtypes['float_or_double'](self.d))
|
||||
event.wait()
|
||||
self.compute_time += (timer() - start_time)
|
||||
|
||||
def update_magnetic(self):
|
||||
"""This functions updates magnetic field components with the PML
|
||||
correction on the compute device.
|
||||
"""
|
||||
start_time = timer()
|
||||
event = self.update_magnetic_dev(np.int32(self.xs),
|
||||
np.int32(self.xf),
|
||||
np.int32(self.ys),
|
||||
np.int32(self.yf),
|
||||
np.int32(self.zs),
|
||||
np.int32(self.zf),
|
||||
np.int32(self.HPhi1_dev.shape[1]),
|
||||
np.int32(self.HPhi1_dev.shape[2]),
|
||||
np.int32(self.HPhi1_dev.shape[3]),
|
||||
np.int32(self.HPhi2_dev.shape[1]),
|
||||
np.int32(self.HPhi2_dev.shape[2]),
|
||||
np.int32(self.HPhi2_dev.shape[3]),
|
||||
np.int32(self.thickness),
|
||||
self.G.ID_dev,
|
||||
self.G.Ex_dev,
|
||||
self.G.Ey_dev,
|
||||
self.G.Ez_dev,
|
||||
self.G.Hx_dev,
|
||||
self.G.Hy_dev,
|
||||
self.G.Hz_dev,
|
||||
self.HPhi1_dev,
|
||||
self.HPhi2_dev,
|
||||
self.HRA_dev,
|
||||
self.HRB_dev,
|
||||
self.HRE_dev,
|
||||
self.HRF_dev,
|
||||
config.sim_config.dtypes['float_or_double'](self.d))
|
||||
event.wait()
|
||||
self.compute_time += (timer() - start_time)
|
||||
|
||||
|
||||
def print_pml_info(G):
|
||||
"""Information about PMLs.
|
||||
|
||||
Args:
|
||||
G (FDTDGrid): Parameters describing a grid in a model.
|
||||
G: FDTDGrid objects that holds parameters describing a grid in a model.
|
||||
"""
|
||||
# No PML
|
||||
if all(value == 0 for value in G.pmlthickness.values()):
|
||||
@@ -433,7 +603,8 @@ def print_pml_info(G):
|
||||
pmlinfo += f'{key}: {value}, '
|
||||
pmlinfo = pmlinfo[:-2]
|
||||
|
||||
return f'\nPML boundaries [{G.name}]: {{formulation: {G.pmlformulation}, order: {len(G.cfs)}, thickness (cells): {pmlinfo}}}'
|
||||
return (f'\nPML boundaries [{G.name}]: {{formulation: {G.pmlformulation}, '
|
||||
f'order: {len(G.cfs)}, thickness (cells): {pmlinfo}}}')
|
||||
|
||||
|
||||
def build_pml(G, key, value):
|
||||
@@ -442,21 +613,28 @@ def build_pml(G, key, value):
|
||||
(based on underlying material er and mr from solid array).
|
||||
|
||||
Args:
|
||||
G (FDTDGrid): Parameters describing a grid in a model.
|
||||
key (str): Identifier of PML slab.
|
||||
value (int): Thickness of PML slab in cells.
|
||||
G: FDTDGrid objects that holds parameters describing a grid in a model.
|
||||
key: string dentifier of PML slab.
|
||||
value: int with thickness of PML slab in cells.
|
||||
"""
|
||||
|
||||
pml_type = CUDAPML if config.sim_config.general['cuda'] else PML
|
||||
if config.sim_config.general['solver'] == 'cpu':
|
||||
pml_type = PML
|
||||
elif config.sim_config.general['solver'] == 'cuda':
|
||||
pml_type = CUDAPML
|
||||
elif config.sim_config.general['solver'] == 'opencl':
|
||||
pml_type = OpenCLPML
|
||||
|
||||
sumer = 0 # Sum of relative permittivities in PML slab
|
||||
summr = 0 # Sum of relative permeabilities in PML slab
|
||||
|
||||
if key[0] == 'x':
|
||||
if key == 'x0':
|
||||
pml = pml_type(G, ID=key, direction='xminus', xf=value, yf=G.ny, zf=G.nz)
|
||||
pml = pml_type(G, ID=key, direction='xminus',
|
||||
xf=value, yf=G.ny, zf=G.nz)
|
||||
elif key == 'xmax':
|
||||
pml = pml_type(G, ID=key, direction='xplus', xs=G.nx - value, xf=G.nx, yf=G.ny, zf=G.nz)
|
||||
pml = pml_type(G, ID=key, direction='xplus',
|
||||
xs=G.nx - value, xf=G.nx, yf=G.ny, zf=G.nz)
|
||||
G.pmls.append(pml)
|
||||
for j in range(G.ny):
|
||||
for k in range(G.nz):
|
||||
@@ -469,9 +647,11 @@ def build_pml(G, key, value):
|
||||
|
||||
elif key[0] == 'y':
|
||||
if key == 'y0':
|
||||
pml = pml_type(G, ID=key, direction='yminus', yf=value, xf=G.nx, zf=G.nz)
|
||||
pml = pml_type(G, ID=key, direction='yminus',
|
||||
yf=value, xf=G.nx, zf=G.nz)
|
||||
elif key == 'ymax':
|
||||
pml = pml_type(G, ID=key, direction='yplus', ys=G.ny - value, xf=G.nx, yf=G.ny, zf=G.nz)
|
||||
pml = pml_type(G, ID=key, direction='yplus',
|
||||
ys=G.ny - value, xf=G.nx, yf=G.ny, zf=G.nz)
|
||||
G.pmls.append(pml)
|
||||
for i in range(G.nx):
|
||||
for k in range(G.nz):
|
||||
@@ -484,9 +664,11 @@ def build_pml(G, key, value):
|
||||
|
||||
elif key[0] == 'z':
|
||||
if key == 'z0':
|
||||
pml = pml_type(G, ID=key, direction='zminus', zf=value, xf=G.nx, yf=G.ny)
|
||||
pml = pml_type(G, ID=key, direction='zminus',
|
||||
zf=value, xf=G.nx, yf=G.ny)
|
||||
elif key == 'zmax':
|
||||
pml = pml_type(G, ID=key, direction='zplus', zs=G.nz - value, xf=G.nx, yf=G.ny, zf=G.nz)
|
||||
pml = pml_type(G, ID=key, direction='zplus',
|
||||
zs=G.nz - value, xf=G.nx, yf=G.ny, zf=G.nz)
|
||||
G.pmls.append(pml)
|
||||
for i in range(G.nx):
|
||||
for j in range(G.ny):
|
||||
|
@@ -26,8 +26,8 @@ class Rx:
|
||||
allowableoutputs = ['Ex', 'Ey', 'Ez', 'Hx', 'Hy', 'Hz', 'Ix', 'Iy', 'Iz']
|
||||
defaultoutputs = allowableoutputs[:-3]
|
||||
|
||||
allowableoutputs_gpu = allowableoutputs[:-3]
|
||||
maxnumoutputs_gpu = 0
|
||||
allowableoutputs_dev = allowableoutputs[:-3]
|
||||
maxnumoutputs_dev = 0
|
||||
|
||||
def __init__(self):
|
||||
|
||||
@@ -41,57 +41,65 @@ class Rx:
|
||||
self.zcoordorigin = None
|
||||
|
||||
|
||||
def htod_rx_arrays(G):
|
||||
"""Initialise arrays on GPU for receiver coordinates and to store field
|
||||
def htod_rx_arrays(G, queue=None):
|
||||
"""Initialise arrays on compute device for receiver coordinates and to store field
|
||||
components for receivers.
|
||||
|
||||
Args:
|
||||
G (FDTDGrid): Holds essential parameters describing the model.
|
||||
G: FDTDGrid object that holds essential parameters describing the model.
|
||||
queue: pyopencl queue.
|
||||
|
||||
Returns:
|
||||
rxcoords_gpu (int): numpy array of receiver coordinates from GPU.
|
||||
rxs_gpu (float): numpy array of receiver data from GPU - rows are field
|
||||
components; columns are iterations; pages are receivers.
|
||||
rxcoords_dev: int array of receiver coordinates on compute device.
|
||||
rxs_dev: float array of receiver data on compute device - rows are field
|
||||
components; columns are iterations; pages are receivers.
|
||||
"""
|
||||
|
||||
import pycuda.gpuarray as gpuarray
|
||||
|
||||
# Array to store receiver coordinates on GPU
|
||||
# Array to store receiver coordinates on compute device
|
||||
rxcoords = np.zeros((len(G.rxs), 3), dtype=np.int32)
|
||||
for i, rx in enumerate(G.rxs):
|
||||
rxcoords[i, 0] = rx.xcoord
|
||||
rxcoords[i, 1] = rx.ycoord
|
||||
rxcoords[i, 2] = rx.zcoord
|
||||
# Store maximum number of output components
|
||||
if len(rx.outputs) > Rx.maxnumoutputs_gpu:
|
||||
Rx.maxnumoutputs_gpu = len(rx.outputs)
|
||||
if len(rx.outputs) > Rx.maxnumoutputs_dev:
|
||||
Rx.maxnumoutputs_dev = len(rx.outputs)
|
||||
|
||||
# Array to store field components for receivers on GPU - rows are field components;
|
||||
# columns are iterations; pages are receivers
|
||||
rxs = np.zeros((len(Rx.allowableoutputs_gpu), G.iterations, len(G.rxs)),
|
||||
# Array to store field components for receivers on compute device -
|
||||
# rows are field components; columns are iterations; pages are receivers
|
||||
rxs = np.zeros((len(Rx.allowableoutputs_dev), G.iterations, len(G.rxs)),
|
||||
dtype=config.sim_config.dtypes['float_or_double'])
|
||||
|
||||
# Copy arrays to GPU
|
||||
rxcoords_gpu = gpuarray.to_gpu(rxcoords)
|
||||
rxs_gpu = gpuarray.to_gpu(rxs)
|
||||
# Copy arrays to compute device
|
||||
if config.sim_config.general['solver'] == 'cuda':
|
||||
import pycuda.gpuarray as gpuarray
|
||||
rxcoords_dev = gpuarray.to_gpu(rxcoords)
|
||||
rxs_dev = gpuarray.to_gpu(rxs)
|
||||
|
||||
return rxcoords_gpu, rxs_gpu
|
||||
elif config.sim_config.general['solver'] == 'opencl':
|
||||
import pyopencl.array as clarray
|
||||
rxcoords_dev = clarray.to_device(queue, rxcoords)
|
||||
rxs_dev = clarray.to_device(queue, rxs)
|
||||
|
||||
return rxcoords_dev, rxs_dev
|
||||
|
||||
|
||||
def dtoh_rx_array(rxs_gpu, rxcoords_gpu, G):
|
||||
"""Copy output from receivers array used on GPU back to receiver objects.
|
||||
def dtoh_rx_array(rxs_dev, rxcoords_dev, G):
|
||||
"""Copy output from receivers array used on compute device back to receiver
|
||||
objects.
|
||||
|
||||
Args:
|
||||
rxs_gpu (float): numpy array of receiver data from GPU - rows are field
|
||||
components; columns are iterations; pages are receivers.
|
||||
rxcoords_gpu (int): numpy array of receiver coordinates from GPU.
|
||||
G (FDTDGrid): Holds essential parameters describing the model.
|
||||
rxcoords_dev: int array of receiver coordinates on compute device.
|
||||
rxs_dev: float array of receiver data on compute device - rows are field
|
||||
components; columns are iterations; pages are receivers.
|
||||
G: FDTDGrid object that holds essential parameters describing the model.
|
||||
|
||||
"""
|
||||
|
||||
for rx in G.rxs:
|
||||
for rxgpu in range(len(G.rxs)):
|
||||
if (rx.xcoord == rxcoords_gpu[rxgpu, 0] and
|
||||
rx.ycoord == rxcoords_gpu[rxgpu, 1] and
|
||||
rx.zcoord == rxcoords_gpu[rxgpu, 2]):
|
||||
for rxd in range(len(G.rxs)):
|
||||
if (rx.xcoord == rxcoords_dev[rxd, 0] and
|
||||
rx.ycoord == rxcoords_dev[rxd, 1] and
|
||||
rx.zcoord == rxcoords_dev[rxd, 2]):
|
||||
for output in rx.outputs.keys():
|
||||
rx.outputs[output] = rxs_gpu[Rx.allowableoutputs_gpu.index(output), :, rxgpu]
|
||||
rx.outputs[output] = rxs_dev[Rx.allowableoutputs_dev.index(output), :, rxd]
|
||||
|
@@ -223,18 +223,17 @@ class Snapshot:
|
||||
f.close()
|
||||
|
||||
|
||||
def htod_snapshot_array(G):
|
||||
"""Initialise array on GPU for to store field data for snapshots.
|
||||
def htod_snapshot_array(G, queue=None):
|
||||
"""Initialise array on compute device for to store field data for snapshots.
|
||||
|
||||
Args:
|
||||
G (FDTDGrid): Parameters describing a grid in a model.
|
||||
G: FDTDGrid object with parameters describing a grid in a model.
|
||||
queue: pyopencl queue.
|
||||
|
||||
Returns:
|
||||
snapE_gpu, snapH_gpu (float): numpy arrays of snapshot data on GPU.
|
||||
snapE_dev, snapH_dev: float arrays of snapshot data on compute device.
|
||||
"""
|
||||
|
||||
import pycuda.gpuarray as gpuarray
|
||||
|
||||
# Get dimensions of largest requested snapshot
|
||||
for snap in G.snapshots:
|
||||
if snap.nx > Snapshot.nx_max:
|
||||
@@ -244,15 +243,21 @@ def htod_snapshot_array(G):
|
||||
if snap.nz > Snapshot.nz_max:
|
||||
Snapshot.nz_max = snap.nz
|
||||
|
||||
# GPU - blocks per grid - according to largest requested snapshot
|
||||
Snapshot.bpg = (int(np.ceil(((Snapshot.nx_max) *
|
||||
(Snapshot.ny_max) *
|
||||
(Snapshot.nz_max)) / Snapshot.tpb[0])), 1, 1)
|
||||
if config.sim_config.general['solver'] == 'cuda':
|
||||
# Blocks per grid - according to largest requested snapshot
|
||||
Snapshot.bpg = (int(np.ceil(((Snapshot.nx_max) *
|
||||
(Snapshot.ny_max) *
|
||||
(Snapshot.nz_max)) / Snapshot.tpb[0])), 1, 1)
|
||||
elif config.sim_config.general['solver'] == 'opencl':
|
||||
# Workgroup size - according to largest requested snapshot
|
||||
Snapshot.wgs = (int(np.ceil(((Snapshot.nx_max) *
|
||||
(Snapshot.ny_max) *
|
||||
(Snapshot.nz_max)))), 1, 1)
|
||||
|
||||
# 4D arrays to store snapshots on GPU, e.g. snapEx(time, x, y, z);
|
||||
# if snapshots are not being stored on the GPU during the simulation then
|
||||
# they are copied back to the host after each iteration, hence numsnaps = 1
|
||||
numsnaps = 1 if config.get_model_config().cuda['snapsgpu2cpu'] else len(G.snapshots)
|
||||
numsnaps = 1 if config.get_model_config().device['snapsgpu2cpu'] else len(G.snapshots)
|
||||
snapEx = np.zeros((numsnaps, Snapshot.nx_max, Snapshot.ny_max, Snapshot.nz_max),
|
||||
dtype=config.sim_config.dtypes['float_or_double'])
|
||||
snapEy = np.zeros((numsnaps, Snapshot.nx_max, Snapshot.ny_max, Snapshot.nz_max),
|
||||
@@ -266,29 +271,41 @@ def htod_snapshot_array(G):
|
||||
snapHz = np.zeros((numsnaps, Snapshot.nx_max, Snapshot.ny_max, Snapshot.nz_max),
|
||||
dtype=config.sim_config.dtypes['float_or_double'])
|
||||
|
||||
# Copy arrays to GPU
|
||||
snapEx_gpu = gpuarray.to_gpu(snapEx)
|
||||
snapEy_gpu = gpuarray.to_gpu(snapEy)
|
||||
snapEz_gpu = gpuarray.to_gpu(snapEz)
|
||||
snapHx_gpu = gpuarray.to_gpu(snapHx)
|
||||
snapHy_gpu = gpuarray.to_gpu(snapHy)
|
||||
snapHz_gpu = gpuarray.to_gpu(snapHz)
|
||||
# Copy arrays to compute device
|
||||
if config.sim_config.general['solver'] == 'cuda':
|
||||
import pycuda.gpuarray as gpuarray
|
||||
snapEx_dev = gpuarray.to_gpu(snapEx)
|
||||
snapEy_dev = gpuarray.to_gpu(snapEy)
|
||||
snapEz_dev = gpuarray.to_gpu(snapEz)
|
||||
snapHx_dev = gpuarray.to_gpu(snapHx)
|
||||
snapHy_dev = gpuarray.to_gpu(snapHy)
|
||||
snapHz_dev = gpuarray.to_gpu(snapHz)
|
||||
|
||||
return snapEx_gpu, snapEy_gpu, snapEz_gpu, snapHx_gpu, snapHy_gpu, snapHz_gpu
|
||||
elif config.sim_config.general['solver'] == 'opencl':
|
||||
import pyopencl.array as clarray
|
||||
snapEx_dev = clarray.to_device(queue, snapEx)
|
||||
snapEy_dev = clarray.to_device(queue, snapEy)
|
||||
snapEz_dev = clarray.to_device(queue, snapEz)
|
||||
snapHx_dev = clarray.to_device(queue, snapHx)
|
||||
snapHy_dev = clarray.to_device(queue, snapHy)
|
||||
snapHz_dev = clarray.to_device(queue, snapHz)
|
||||
|
||||
return snapEx_dev, snapEy_dev, snapEz_dev, snapHx_dev, snapHy_dev, snapHz_dev
|
||||
|
||||
|
||||
def dtoh_snapshot_array(snapEx_gpu, snapEy_gpu, snapEz_gpu, snapHx_gpu, snapHy_gpu, snapHz_gpu, i, snap):
|
||||
"""Copy snapshot array used on GPU back to snapshot objects and store in format for Paraview.
|
||||
def dtoh_snapshot_array(snapEx_dev, snapEy_dev, snapEz_dev, snapHx_dev, snapHy_dev, snapHz_dev, i, snap):
|
||||
"""Copy snapshot array used on compute device back to snapshot objects and
|
||||
store in format for Paraview.
|
||||
|
||||
Args:
|
||||
snapE_gpu, snapH_gpu (float): numpy arrays of snapshot data from GPU.
|
||||
i (int): index for snapshot data on GPU array.
|
||||
snap (class): Snapshot class instance
|
||||
snapE_dev, snapH_dev: float arrays of snapshot data from compute device.
|
||||
i: int for index of snapshot data on compute device array.
|
||||
snap: Snapshot class instance
|
||||
"""
|
||||
|
||||
snap.Exsnap = snapEx_gpu[i, snap.xs:snap.xf, snap.ys:snap.yf, snap.zs:snap.zf]
|
||||
snap.Eysnap = snapEy_gpu[i, snap.xs:snap.xf, snap.ys:snap.yf, snap.zs:snap.zf]
|
||||
snap.Ezsnap = snapEz_gpu[i, snap.xs:snap.xf, snap.ys:snap.yf, snap.zs:snap.zf]
|
||||
snap.Hxsnap = snapHx_gpu[i, snap.xs:snap.xf, snap.ys:snap.yf, snap.zs:snap.zf]
|
||||
snap.Hysnap = snapHy_gpu[i, snap.xs:snap.xf, snap.ys:snap.yf, snap.zs:snap.zf]
|
||||
snap.Hzsnap = snapHz_gpu[i, snap.xs:snap.xf, snap.ys:snap.yf, snap.zs:snap.zf]
|
||||
snap.Exsnap = snapEx_dev[i, snap.xs:snap.xf, snap.ys:snap.yf, snap.zs:snap.zf]
|
||||
snap.Eysnap = snapEy_dev[i, snap.xs:snap.xf, snap.ys:snap.yf, snap.zs:snap.zf]
|
||||
snap.Ezsnap = snapEz_dev[i, snap.xs:snap.xf, snap.ys:snap.yf, snap.zs:snap.zf]
|
||||
snap.Hxsnap = snapHx_dev[i, snap.xs:snap.xf, snap.ys:snap.yf, snap.zs:snap.zf]
|
||||
snap.Hysnap = snapHy_dev[i, snap.xs:snap.xf, snap.ys:snap.yf, snap.zs:snap.zf]
|
||||
snap.Hzsnap = snapHz_dev[i, snap.xs:snap.xf, snap.ys:snap.yf, snap.zs:snap.zf]
|
||||
|
@@ -18,22 +18,24 @@
|
||||
|
||||
import gprMax.config as config
|
||||
|
||||
from .grid import CUDAGrid, FDTDGrid
|
||||
from .grid import CUDAGrid, FDTDGrid, OpenCLGrid
|
||||
from .subgrids.updates import create_updates as create_subgrid_updates
|
||||
from .updates import CPUUpdates, CUDAUpdates
|
||||
from .updates import CPUUpdates, CUDAUpdates, OpenCLUpdates
|
||||
|
||||
|
||||
def create_G():
|
||||
"""Create grid object according to solver.
|
||||
|
||||
Returns:
|
||||
G (FDTDGrid): Holds essential parameters describing the model.
|
||||
G: FDTDGrid that holds essential parameters describing the model.
|
||||
"""
|
||||
|
||||
if config.sim_config.general['cpu']:
|
||||
if config.sim_config.general['solver'] == 'cpu':
|
||||
G = FDTDGrid()
|
||||
elif config.sim_config.general['cuda']:
|
||||
elif config.sim_config.general['solver'] == 'cuda':
|
||||
G = CUDAGrid()
|
||||
elif config.sim_config.general['solver'] == 'opencl':
|
||||
G = OpenCLGrid()
|
||||
|
||||
return G
|
||||
|
||||
@@ -42,10 +44,10 @@ def create_solver(G):
|
||||
"""Create configured solver object.
|
||||
|
||||
Args:
|
||||
G (FDTDGrid): Holds essential parameters describing the model.
|
||||
G: FDTDGrid that holds essential parameters describing the model.
|
||||
|
||||
Returns:
|
||||
solver (Solver): solver object.
|
||||
solver: Solver object.
|
||||
"""
|
||||
|
||||
if config.sim_config.general['subgrid']:
|
||||
@@ -56,14 +58,17 @@ def create_solver(G):
|
||||
# the required numerical precision and dispersive material type.
|
||||
props = updates.adapt_dispersive_config()
|
||||
updates.set_dispersive_updates(props)
|
||||
elif config.sim_config.general['cpu']:
|
||||
elif config.sim_config.general['solver'] == 'cpu':
|
||||
updates = CPUUpdates(G)
|
||||
solver = Solver(updates)
|
||||
props = updates.adapt_dispersive_config()
|
||||
updates.set_dispersive_updates(props)
|
||||
elif config.sim_config.general['cuda']:
|
||||
elif config.sim_config.general['solver'] == 'cuda':
|
||||
updates = CUDAUpdates(G)
|
||||
solver = Solver(updates)
|
||||
elif config.sim_config.general['solver'] == 'opencl':
|
||||
updates = OpenCLUpdates(G)
|
||||
solver = Solver(updates)
|
||||
|
||||
return solver
|
||||
|
||||
@@ -74,8 +79,8 @@ class Solver:
|
||||
def __init__(self, updates, hsg=False):
|
||||
"""
|
||||
Args:
|
||||
updates (Updates): Updates contains methods to run FDTD algorithm.
|
||||
hsg (bool): Use sub-gridding.
|
||||
updates: Updates contains methods to run FDTD algorithm.
|
||||
hsg: bool to use sub-gridding.
|
||||
"""
|
||||
|
||||
self.updates = updates
|
||||
@@ -85,13 +90,14 @@ class Solver:
|
||||
"""Time step the FDTD model.
|
||||
|
||||
Args:
|
||||
iterator (iterator): can be range() or tqdm()
|
||||
iterator: can be range() or tqdm()
|
||||
|
||||
Returns:
|
||||
tsolve (float): Time taken to execute solving (seconds).
|
||||
memsolve (float): Memory (RAM) used.
|
||||
tsolve: float for time taken to execute solving (seconds).
|
||||
memsolve: float for memory (RAM) used.
|
||||
"""
|
||||
|
||||
memsolve = 0
|
||||
self.updates.time_start()
|
||||
|
||||
for iteration in iterator:
|
||||
@@ -108,7 +114,8 @@ class Solver:
|
||||
if self.hsg:
|
||||
self.updates.hsg_1()
|
||||
self.updates.update_electric_b()
|
||||
memsolve = self.updates.calculate_memsolve(iteration) if config.sim_config.general['cuda'] else None
|
||||
if config.sim_config.general['solver'] == 'cuda':
|
||||
memsolve = self.updates.calculate_memsolve(iteration)
|
||||
|
||||
self.updates.finalise()
|
||||
tsolve = self.updates.calculate_tsolve()
|
||||
|
@@ -45,7 +45,8 @@ class Source:
|
||||
"""Calculates all waveform values for source for duration of simulation.
|
||||
|
||||
Args:
|
||||
G (FDTDGrid): Holds essential parameters describing the model.
|
||||
G: FDTDGrid object that olds essential parameters describing the
|
||||
model.
|
||||
"""
|
||||
# Waveform values for electric sources - calculated half a timestep later
|
||||
self.waveformvaluesJ = np.zeros((G.iterations),
|
||||
@@ -82,13 +83,14 @@ class VoltageSource(Source):
|
||||
"""Updates electric field values for a voltage source.
|
||||
|
||||
Args:
|
||||
iteration (int): Current iteration (timestep).
|
||||
updatecoeffsE (memory view): numpy array of electric field update
|
||||
coefficients.
|
||||
ID (memory view): numpy array of numeric IDs corresponding to
|
||||
materials in the model.
|
||||
Ex, Ey, Ez (memory view): numpy array of electric field values.
|
||||
G (FDTDGrid): Holds essential parameters describing the model.
|
||||
iteration: int of current iteration (timestep).
|
||||
updatecoeffsE: memory view of array of electric field update
|
||||
coefficients.
|
||||
ID: memory view of array of numeric IDs corresponding to materials
|
||||
in the model.
|
||||
Ex, Ey, Ez: memory view of array of electric field values.
|
||||
G: FDTDGrid object that olds essential parameters describing the
|
||||
model.
|
||||
"""
|
||||
|
||||
if iteration * G.dt >= self.start and iteration * G.dt <= self.stop:
|
||||
@@ -126,7 +128,8 @@ class VoltageSource(Source):
|
||||
voltage source conductivity to the underlying parameters.
|
||||
|
||||
Args:
|
||||
G (FDTDGrid): Holds essential parameters describing the model.
|
||||
G: FDTDGrid object that olds essential parameters describing the
|
||||
model.
|
||||
"""
|
||||
|
||||
if self.resistance != 0:
|
||||
@@ -166,13 +169,14 @@ class HertzianDipole(Source):
|
||||
"""Updates electric field values for a Hertzian dipole.
|
||||
|
||||
Args:
|
||||
iteration (int): Current iteration (timestep).
|
||||
updatecoeffsE (memory view): numpy array of electric field update
|
||||
coefficients.
|
||||
ID (memory view): numpy array of numeric IDs corresponding to
|
||||
materials in the model.
|
||||
Ex, Ey, Ez (memory view): numpy array of electric field values.
|
||||
G (FDTDGrid): Holds essential parameters describing the model.
|
||||
iteration: int of current iteration (timestep).
|
||||
updatecoeffsE: memory view of array of electric field update
|
||||
coefficients.
|
||||
ID: memory view of array of numeric IDs corresponding to materials
|
||||
in the model.
|
||||
Ex, Ey, Ez: memory view of array of electric field values.
|
||||
G: FDTDGrid object that olds essential parameters describing the
|
||||
model.
|
||||
"""
|
||||
|
||||
if iteration * G.dt >= self.start and iteration * G.dt <= self.stop:
|
||||
@@ -203,13 +207,14 @@ class MagneticDipole(Source):
|
||||
"""Updates magnetic field values for a magnetic dipole.
|
||||
|
||||
Args:
|
||||
iteration (int): Current iteration (timestep).
|
||||
updatecoeffsH (memory view): numpy array of magnetic field update
|
||||
coefficients.
|
||||
ID (memory view): numpy array of numeric IDs corresponding to
|
||||
materials in the model.
|
||||
Hx, Hy, Hz (memory view): numpy array of magnetic field values.
|
||||
G (FDTDGrid): Holds essential parameters describing the model.
|
||||
iteration: int of current iteration (timestep).
|
||||
updatecoeffsH: memory view of array of magnetic field update
|
||||
coefficients.
|
||||
ID: memory view of array of numeric IDs corresponding to materials
|
||||
in the model.
|
||||
Hx, Hy, Hz: memory view of array of magnetic field values.
|
||||
G: FDTDGrid object that olds essential parameters describing the
|
||||
model.
|
||||
"""
|
||||
|
||||
if iteration * G.dt >= self.start and iteration * G.dt <= self.stop:
|
||||
@@ -234,24 +239,23 @@ class MagneticDipole(Source):
|
||||
(1 / (G.dx * G.dy * G.dz)))
|
||||
|
||||
|
||||
def htod_src_arrays(sources, G):
|
||||
"""Initialise arrays on GPU for source coordinates/polarisation, other
|
||||
source information, and source waveform values.
|
||||
def htod_src_arrays(sources, G, queue=None):
|
||||
"""Initialise arrays on compute device for source coordinates/polarisation,
|
||||
other source information, and source waveform values.
|
||||
|
||||
Args:
|
||||
sources (list): List of sources of one type, e.g. HertzianDipole
|
||||
G (FDTDGrid): Holds essential parameters describing the model.
|
||||
sources: list of sources of one type, e.g. HertzianDipole
|
||||
G: FDTDGrid object that holds essential parameters describing the model.
|
||||
queue: pyopencl queue.
|
||||
|
||||
Returns:
|
||||
srcinfo1_gpu (int): numpy array of source cell coordinates and
|
||||
polarisation information.
|
||||
srcinfo2_gpu (float): numpy array of other source information,
|
||||
e.g. length, resistance etc...
|
||||
srcwaves_gpu (float): numpy array of source waveform values.
|
||||
srcinfo1_dev: int array of source cell coordinates and polarisation
|
||||
information.
|
||||
srcinfo2_dev: float array of other source information, e.g. length,
|
||||
resistance etc...
|
||||
srcwaves_dev: float array of source waveform values.
|
||||
"""
|
||||
|
||||
import pycuda.gpuarray as gpuarray
|
||||
|
||||
srcinfo1 = np.zeros((len(sources), 4), dtype=np.int32)
|
||||
srcinfo2 = np.zeros((len(sources)), dtype=config.sim_config.dtypes['float_or_double'])
|
||||
srcwaves = np.zeros((len(sources), G.iterations), dtype=config.sim_config.dtypes['float_or_double'])
|
||||
@@ -276,11 +280,19 @@ def htod_src_arrays(sources, G):
|
||||
elif src.__class__.__name__ == 'MagneticDipole':
|
||||
srcwaves[i, :] = src.waveformvaluesM
|
||||
|
||||
srcinfo1_gpu = gpuarray.to_gpu(srcinfo1)
|
||||
srcinfo2_gpu = gpuarray.to_gpu(srcinfo2)
|
||||
srcwaves_gpu = gpuarray.to_gpu(srcwaves)
|
||||
# Copy arrays to compute device
|
||||
if config.sim_config.general['solver'] == 'cuda':
|
||||
import pycuda.gpuarray as gpuarray
|
||||
srcinfo1_dev = gpuarray.to_gpu(srcinfo1)
|
||||
srcinfo2_dev = gpuarray.to_gpu(srcinfo2)
|
||||
srcwaves_dev = gpuarray.to_gpu(srcwaves)
|
||||
elif config.sim_config.general['solver'] == 'opencl':
|
||||
import pyopencl.array as clarray
|
||||
srcinfo1_dev = clarray.to_device(queue, srcinfo1)
|
||||
srcinfo2_dev = clarray.to_device(queue, srcinfo2)
|
||||
srcwaves_dev = clarray.to_device(queue, srcwaves)
|
||||
|
||||
return srcinfo1_gpu, srcinfo2_gpu, srcwaves_gpu
|
||||
return srcinfo1_dev, srcinfo2_dev, srcwaves_dev
|
||||
|
||||
|
||||
class TransmissionLine(Source):
|
||||
@@ -291,7 +303,8 @@ class TransmissionLine(Source):
|
||||
def __init__(self, G):
|
||||
"""
|
||||
Args:
|
||||
G (FDTDGrid): Holds essential parameters describing the model.
|
||||
G: FDTDGrid object that holds essential parameters describing the
|
||||
model.
|
||||
"""
|
||||
|
||||
super().__init__()
|
||||
@@ -328,7 +341,8 @@ class TransmissionLine(Source):
|
||||
from: http://dx.doi.org/10.1002/mop.10415
|
||||
|
||||
Args:
|
||||
G (FDTDGrid): Holds essential parameters describing the model.
|
||||
G: FDTDGrid object that holds essential parameters describing the
|
||||
model.
|
||||
"""
|
||||
|
||||
for iteration in range(G.iterations):
|
||||
@@ -344,7 +358,8 @@ class TransmissionLine(Source):
|
||||
"""Updates absorbing boundary condition at end of the transmission line.
|
||||
|
||||
Args:
|
||||
G (FDTDGrid): Holds essential parameters describing the model.
|
||||
G: FDTDGrid object that holds essential parameters describing the
|
||||
model.
|
||||
"""
|
||||
|
||||
h = (config.c * G.dt - self.dl) / (config.c * G.dt + self.dl)
|
||||
@@ -357,8 +372,9 @@ class TransmissionLine(Source):
|
||||
"""Updates voltage values along the transmission line.
|
||||
|
||||
Args:
|
||||
iteration (int): Current iteration (timestep).
|
||||
G (FDTDGrid): Holds essential parameters describing the model.
|
||||
iteration: int of current iteration (timestep).
|
||||
G: FDTDGrid object that holds essential parameters describing the
|
||||
model.
|
||||
"""
|
||||
|
||||
# Update all the voltage values along the line
|
||||
@@ -375,8 +391,9 @@ class TransmissionLine(Source):
|
||||
"""Updates current values along the transmission line.
|
||||
|
||||
Args:
|
||||
iteration (int): Current iteration (timestep).
|
||||
G (FDTDGrid): Holds essential parameters describing the model.
|
||||
iteration: int of current iteration (timestep).
|
||||
G: FDTDGrid object that holds essential parameters describing the
|
||||
model.
|
||||
"""
|
||||
|
||||
# Update all the current values along the line
|
||||
@@ -393,13 +410,14 @@ class TransmissionLine(Source):
|
||||
the transmission line.
|
||||
|
||||
Args:
|
||||
iteration (int): Current iteration (timestep).
|
||||
updatecoeffsE (memory view): numpy array of electric field update
|
||||
coefficients.
|
||||
ID (memory view): numpy array of numeric IDs corresponding to
|
||||
materials in the model.
|
||||
Ex, Ey, Ez (memory view): numpy array of electric field values.
|
||||
G (FDTDGrid): Holds essential parameters describing the model.
|
||||
iteration: int of current iteration (timestep).
|
||||
updatecoeffsE: memory view of array of electric field update
|
||||
coefficients.
|
||||
ID: memory view of array of numeric IDs corresponding to materials
|
||||
in the model.
|
||||
Ex, Ey, Ez: memory view of array of electric field values.
|
||||
G: FDTDGrid object that olds essential parameters describing the
|
||||
model.
|
||||
"""
|
||||
|
||||
if iteration * G.dt >= self.start and iteration * G.dt <= self.stop:
|
||||
@@ -423,13 +441,14 @@ class TransmissionLine(Source):
|
||||
in the main grid.
|
||||
|
||||
Args:
|
||||
iteration (int): Current iteration (timestep).
|
||||
updatecoeffsH (memory view): numpy array of magnetic field update
|
||||
coefficients.
|
||||
ID (memory view): numpy array of numeric IDs corresponding to
|
||||
materials in the model.
|
||||
Hx, Hy, Hz (memory view): numpy array of magnetic field values.
|
||||
G (FDTDGrid): Holds essential parameters describing the model.
|
||||
iteration: int of current iteration (timestep).
|
||||
updatecoeffsH: memory view of array of magnetic field update
|
||||
coefficients.
|
||||
ID: memory view of array of numeric IDs corresponding to materials
|
||||
in the model.
|
||||
Hx, Hy, Hz: memory view of array of magnetic field values.
|
||||
G: FDTDGrid object that olds essential parameters describing the
|
||||
model.
|
||||
"""
|
||||
|
||||
if iteration * G.dt >= self.start and iteration * G.dt <= self.stop:
|
||||
|
@@ -18,19 +18,21 @@
|
||||
|
||||
import logging
|
||||
from importlib import import_module
|
||||
from string import Template
|
||||
|
||||
import numpy as np
|
||||
from jinja2 import Environment, PackageLoader
|
||||
|
||||
import gprMax.config as config
|
||||
|
||||
from .cuda.fields_updates import kernel_template_fields
|
||||
from .cuda.snapshots import kernel_template_store_snapshot
|
||||
from .cuda.source_updates import kernel_template_sources
|
||||
from .cuda.snapshots import knl_template_store_snapshot
|
||||
from .cuda_opencl_el import (knl_fields_updates, knl_snapshots,
|
||||
knl_source_updates, knl_store_outputs)
|
||||
from .cython.fields_updates_normal import \
|
||||
update_electric as update_electric_cpu
|
||||
from .cython.fields_updates_normal import \
|
||||
update_magnetic as update_magnetic_cpu
|
||||
from .fields_outputs import kernel_template_store_outputs
|
||||
from .fields_outputs import knl_template_store_outputs
|
||||
from .fields_outputs import store_outputs as store_outputs_cpu
|
||||
from .receivers import dtoh_rx_array, htod_rx_arrays
|
||||
from .snapshots import Snapshot, dtoh_snapshot_array, htod_snapshot_array
|
||||
@@ -256,27 +258,27 @@ class CUDAUpdates:
|
||||
self.source_module = getattr(import_module('pycuda.compiler'), 'SourceModule')
|
||||
self.drv.init()
|
||||
|
||||
# Create device handle and context on specifc GPU device (and make it current context)
|
||||
# Create device handle and context on specific GPU device (and make it current context)
|
||||
self.dev = self.drv.Device(config.get_model_config().cuda['gpu'].deviceID)
|
||||
self.ctx = self.dev.make_context()
|
||||
|
||||
# Initialise arrays on GPU, prepare kernels, and get kernel functions
|
||||
self._set_field_kernels()
|
||||
self._set_field_knls()
|
||||
if self.grid.pmls:
|
||||
self._set_pml_kernels()
|
||||
self._set_pml_knls()
|
||||
if self.grid.rxs:
|
||||
self._set_rx_kernel()
|
||||
self._set_rx_knl()
|
||||
if self.grid.voltagesources + self.grid.hertziandipoles + self.grid.magneticdipoles:
|
||||
self._set_src_kernels()
|
||||
self._set_src_knls()
|
||||
if self.grid.snapshots:
|
||||
self._set_snapshot_kernel()
|
||||
self._set_snapshot_knl()
|
||||
|
||||
def _set_field_kernels(self):
|
||||
def _set_field_knls(self):
|
||||
"""Electric and magnetic field updates - prepare kernels, and
|
||||
get kernel functions.
|
||||
"""
|
||||
if config.get_model_config().materials['maxpoles'] > 0:
|
||||
kernels_fields = self.source_module(kernel_template_fields.substitute(
|
||||
knls_fields = self.source_module(knl_template_fields.substitute(
|
||||
REAL=config.sim_config.dtypes['C_float_or_double'],
|
||||
REALFUNC=config.get_model_config().materials['cudarealfunc'],
|
||||
COMPLEX=config.get_model_config().materials['dispersiveCdtype'],
|
||||
@@ -296,7 +298,7 @@ class CUDAUpdates:
|
||||
options=config.sim_config.cuda['nvcc_opts'])
|
||||
else: # Set to one any substitutions for dispersive materials.
|
||||
# Value of COMPLEX is not relevant.
|
||||
kernels_fields = self.source_module(kernel_template_fields.substitute(
|
||||
knls_fields = self.source_module(knl_template_fields.substitute(
|
||||
REAL=config.sim_config.dtypes['C_float_or_double'],
|
||||
REALFUNC=config.get_model_config().materials['cudarealfunc'],
|
||||
COMPLEX=config.sim_config.dtypes['C_float_or_double'],
|
||||
@@ -314,17 +316,17 @@ class CUDAUpdates:
|
||||
NY_T=1,
|
||||
NZ_T=1),
|
||||
options=config.sim_config.cuda['nvcc_opts'])
|
||||
self.update_electric_gpu = kernels_fields.get_function("update_electric")
|
||||
self.update_magnetic_gpu = kernels_fields.get_function("update_magnetic")
|
||||
self._copy_mat_coeffs(kernels_fields, kernels_fields)
|
||||
self.update_electric_gpu = knls_fields.get_function("update_electric")
|
||||
self.update_magnetic_gpu = knls_fields.get_function("update_magnetic")
|
||||
self._copy_mat_coeffs(knls_fields, knls_fields)
|
||||
|
||||
# Electric and magnetic field updates - dispersive materials
|
||||
# - get kernel functions and initialise array on GPU
|
||||
# If there are any dispersive materials (updates are split into two
|
||||
# parts as they require present and updated electric field values).
|
||||
if config.get_model_config().materials['maxpoles'] > 0:
|
||||
self.dispersive_update_a = kernels_fields.get_function("update_electric_dispersive_A")
|
||||
self.dispersive_update_b = kernels_fields.get_function("update_electric_dispersive_B")
|
||||
self.dispersive_update_a = knls_fields.get_function("update_electric_dispersive_A")
|
||||
self.dispersive_update_b = knls_fields.get_function("update_electric_dispersive_B")
|
||||
|
||||
# Electric and magnetic field updates - set blocks per grid and
|
||||
# initialise field arrays on GPU
|
||||
@@ -334,17 +336,17 @@ class CUDAUpdates:
|
||||
if config.get_model_config().materials['maxpoles'] > 0:
|
||||
self.grid.htod_dispersive_arrays()
|
||||
|
||||
def _set_pml_kernels(self):
|
||||
def _set_pml_knls(self):
|
||||
"""PMLS - prepare kernels and get kernel functions."""
|
||||
pmlmodulelectric = 'gprMax.cuda.pml_updates_electric_' + self.grid.pmlformulation
|
||||
kernelelectricfunc = getattr(import_module(pmlmodulelectric),
|
||||
'kernels_template_pml_electric_' +
|
||||
knlelectricfunc = getattr(import_module(pmlmodulelectric),
|
||||
'knls_template_pml_electric_' +
|
||||
self.grid.pmlformulation)
|
||||
pmlmodulemagnetic = 'gprMax.cuda.pml_updates_magnetic_' + self.grid.pmlformulation
|
||||
kernelmagneticfunc = getattr(import_module(pmlmodulemagnetic),
|
||||
'kernels_template_pml_magnetic_' +
|
||||
knlmagneticfunc = getattr(import_module(pmlmodulemagnetic),
|
||||
'knls_template_pml_magnetic_' +
|
||||
self.grid.pmlformulation)
|
||||
kernels_pml_electric = self.source_module(kernelelectricfunc.substitute(
|
||||
knls_pml_electric = self.source_module(knlelectricfunc.substitute(
|
||||
REAL=config.sim_config.dtypes['C_float_or_double'],
|
||||
N_updatecoeffsE=self.grid.updatecoeffsE.size,
|
||||
NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1],
|
||||
@@ -355,7 +357,7 @@ class CUDAUpdates:
|
||||
NY_ID=self.grid.ID.shape[2],
|
||||
NZ_ID=self.grid.ID.shape[3]),
|
||||
options=config.sim_config.cuda['nvcc_opts'])
|
||||
kernels_pml_magnetic = self.source_module(kernelmagneticfunc.substitute(
|
||||
knls_pml_magnetic = self.source_module(knlmagneticfunc.substitute(
|
||||
REAL=config.sim_config.dtypes['C_float_or_double'],
|
||||
N_updatecoeffsH=self.grid.updatecoeffsH.size,
|
||||
NY_MATCOEFFS=self.grid.updatecoeffsH.shape[1],
|
||||
@@ -366,19 +368,19 @@ class CUDAUpdates:
|
||||
NY_ID=self.grid.ID.shape[2],
|
||||
NZ_ID=self.grid.ID.shape[3]),
|
||||
options=config.sim_config.cuda['nvcc_opts'])
|
||||
self._copy_mat_coeffs(kernels_pml_electric, kernels_pml_magnetic)
|
||||
self._copy_mat_coeffs(knls_pml_electric, knls_pml_magnetic)
|
||||
# Set block per grid, initialise arrays on GPU, and get kernel functions
|
||||
for pml in self.grid.pmls:
|
||||
pml.htod_field_arrays()
|
||||
pml.set_blocks_per_grid()
|
||||
pml.get_update_funcs(kernels_pml_electric, kernels_pml_magnetic)
|
||||
pml.get_update_funcs(knls_pml_electric, knls_pml_magnetic)
|
||||
|
||||
def _set_rx_kernel(self):
|
||||
def _set_rx_knl(self):
|
||||
"""Receivers - initialise arrays on GPU, prepare kernel and get kernel
|
||||
function.
|
||||
"""
|
||||
self.rxcoords_gpu, self.rxs_gpu = htod_rx_arrays(self.grid)
|
||||
kernel_store_outputs = self.source_module(kernel_template_store_outputs.substitute(
|
||||
knl_store_outputs = self.source_module(knl_template_store_outputs.substitute(
|
||||
REAL=config.sim_config.dtypes['C_float_or_double'],
|
||||
NY_RXCOORDS=3,
|
||||
NX_RXS=6,
|
||||
@@ -388,13 +390,13 @@ class CUDAUpdates:
|
||||
NY_FIELDS=self.grid.ny + 1,
|
||||
NZ_FIELDS=self.grid.nz + 1),
|
||||
options=config.sim_config.cuda['nvcc_opts'])
|
||||
self.store_outputs_gpu = kernel_store_outputs.get_function("store_outputs")
|
||||
self.store_outputs_gpu = knl_store_outputs.get_function("store_outputs")
|
||||
|
||||
def _set_src_kernels(self):
|
||||
def _set_src_knls(self):
|
||||
"""Sources - initialise arrays on GPU, prepare kernel and get kernel
|
||||
function.
|
||||
"""
|
||||
kernels_sources = self.source_module(kernel_template_sources.substitute(
|
||||
knls_sources = self.source_module(knl_template_sources.substitute(
|
||||
REAL=config.sim_config.dtypes['C_float_or_double'],
|
||||
N_updatecoeffsE=self.grid.updatecoeffsE.size,
|
||||
N_updatecoeffsH=self.grid.updatecoeffsH.size,
|
||||
@@ -408,23 +410,23 @@ class CUDAUpdates:
|
||||
NY_ID=self.grid.ID.shape[2],
|
||||
NZ_ID=self.grid.ID.shape[3]),
|
||||
options=config.sim_config.cuda['nvcc_opts'])
|
||||
self._copy_mat_coeffs(kernels_sources, kernels_sources)
|
||||
self._copy_mat_coeffs(knls_sources, knls_sources)
|
||||
if self.grid.hertziandipoles:
|
||||
self.srcinfo1_hertzian_gpu, self.srcinfo2_hertzian_gpu, self.srcwaves_hertzian_gpu = htod_src_arrays(self.grid.hertziandipoles, self.grid)
|
||||
self.update_hertzian_dipole_gpu = kernels_sources.get_function("update_hertzian_dipole")
|
||||
self.update_hertzian_dipole_gpu = knls_sources.get_function("update_hertzian_dipole")
|
||||
if self.grid.magneticdipoles:
|
||||
self.srcinfo1_magnetic_gpu, self.srcinfo2_magnetic_gpu, self.srcwaves_magnetic_gpu = htod_src_arrays(self.grid.magneticdipoles, self.grid)
|
||||
self.update_magnetic_dipole_gpu = kernels_sources.get_function("update_magnetic_dipole")
|
||||
self.update_magnetic_dipole_gpu = knls_sources.get_function("update_magnetic_dipole")
|
||||
if self.grid.voltagesources:
|
||||
self.srcinfo1_voltage_gpu, self.srcinfo2_voltage_gpu, self.srcwaves_voltage_gpu = htod_src_arrays(self.grid.voltagesources, self.grid)
|
||||
self.update_voltage_source_gpu = kernels_sources.get_function("update_voltage_source")
|
||||
self.update_voltage_source_gpu = knls_sources.get_function("update_voltage_source")
|
||||
|
||||
def _set_snapshot_kernel(self):
|
||||
def _set_snapshot_knl(self):
|
||||
"""Snapshots - initialise arrays on GPU, prepare kernel and get kernel
|
||||
function.
|
||||
"""
|
||||
self.snapEx_gpu, self.snapEy_gpu, self.snapEz_gpu, self.snapHx_gpu, self.snapHy_gpu, self.snapHz_gpu = htod_snapshot_array(self.grid)
|
||||
kernel_store_snapshot = self.source_module(kernel_template_store_snapshot.substitute(
|
||||
knl_store_snapshot = self.source_module(knl_template_store_snapshot.substitute(
|
||||
REAL=config.sim_config.dtypes['C_float_or_double'],
|
||||
NX_SNAPS=Snapshot.nx_max,
|
||||
NY_SNAPS=Snapshot.ny_max,
|
||||
@@ -433,25 +435,25 @@ class CUDAUpdates:
|
||||
NY_FIELDS=self.grid.ny + 1,
|
||||
NZ_FIELDS=self.grid.nz + 1),
|
||||
options=config.sim_config.cuda['nvcc_opts'])
|
||||
self.store_snapshot_gpu = kernel_store_snapshot.get_function("store_snapshot")
|
||||
self.store_snapshot_gpu = knl_store_snapshot.get_function("store_snapshot")
|
||||
|
||||
def _copy_mat_coeffs(self, kernelE, kernelH):
|
||||
def _copy_mat_coeffs(self, knlE, knlH):
|
||||
"""Copy material coefficient arrays to constant memory of GPU
|
||||
(must be <64KB).
|
||||
|
||||
Args:
|
||||
kernelE (kernel): electric field kernel.
|
||||
kernelH (kernel): magnetic field kernel.
|
||||
knlE (kernel): electric field kernel.
|
||||
knlH (kernel): magnetic field kernel.
|
||||
"""
|
||||
|
||||
# Check if coefficient arrays will fit on constant memory of GPU
|
||||
if (self.grid.updatecoeffsE.nbytes + self.grid.updatecoeffsH.nbytes
|
||||
> config.get_model_config().cuda['gpu'].constmem):
|
||||
logger.exception(f"Too many materials in the model to fit onto constant memory of size {human_size(config.get_model_config().cuda['gpu'].constmem)} on {config.get_model_config().cuda['gpu'].deviceID} - {config.get_model_config().cuda['gpu'].name} GPU")
|
||||
> config.get_model_config().cuda['gpu'].total_constant_memory):
|
||||
logger.exception(f"Too many materials in the model to fit onto constant memory of size {human_size(config.get_model_config().cuda['gpu'].total_constant_memory)} on {config.get_model_config().cuda['gpu'].deviceID} - {config.get_model_config().cuda['gpu'].name} GPU")
|
||||
raise ValueError
|
||||
|
||||
updatecoeffsE = kernelE.get_global('updatecoeffsE')[0]
|
||||
updatecoeffsH = kernelH.get_global('updatecoeffsH')[0]
|
||||
updatecoeffsE = knlE.get_global('updatecoeffsE')[0]
|
||||
updatecoeffsH = knlH.get_global('updatecoeffsH')[0]
|
||||
self.drv.memcpy_htod(updatecoeffsE, self.grid.updatecoeffsE)
|
||||
self.drv.memcpy_htod(updatecoeffsH, self.grid.updatecoeffsH)
|
||||
|
||||
@@ -707,3 +709,595 @@ class CUDAUpdates:
|
||||
# Remove context from top of stack and delete
|
||||
self.ctx.pop()
|
||||
del self.ctx
|
||||
|
||||
|
||||
class OpenCLUpdates:
|
||||
"""Defines update functions for OpenCL-based solver."""
|
||||
|
||||
def __init__(self, G):
|
||||
"""
|
||||
Args:
|
||||
G: FDTDObject of parameters describing a grid in a model.
|
||||
"""
|
||||
|
||||
self.grid = G
|
||||
self.dispersive_update_a = None
|
||||
self.dispersive_update_b = None
|
||||
self.compute_time = 0
|
||||
|
||||
# Import pyopencl module
|
||||
self.cl = import_module('pyopencl')
|
||||
self.elwise = getattr(import_module('pyopencl.elementwise'), 'ElementwiseKernel')
|
||||
|
||||
# Select device, create context and command queue
|
||||
self.dev = config.get_model_config().device['dev']
|
||||
self.ctx = self.cl.Context(devices=[self.dev])
|
||||
self.queue = self.cl.CommandQueue(self.ctx,
|
||||
properties=self.cl.command_queue_properties.PROFILING_ENABLE)
|
||||
|
||||
# Enviroment for templating kernels
|
||||
self.env = Environment(loader=PackageLoader('gprMax', 'cuda_opencl_el'))
|
||||
|
||||
# Initialise arrays on device, prepare kernels, and get kernel functions
|
||||
self._set_field_knls()
|
||||
if self.grid.pmls:
|
||||
self._set_pml_knls()
|
||||
if self.grid.rxs:
|
||||
self._set_rx_knl()
|
||||
if self.grid.voltagesources + self.grid.hertziandipoles + self.grid.magneticdipoles:
|
||||
self._set_src_knls()
|
||||
if self.grid.snapshots:
|
||||
self._set_snapshot_knl()
|
||||
|
||||
def _set_field_knls(self):
|
||||
"""Electric and magnetic field updates - prepare kernels, and
|
||||
get kernel functions.
|
||||
"""
|
||||
if config.get_model_config().materials['maxpoles'] > 0:
|
||||
NY_MATDISPCOEFFS = self.grid.updatecoeffsdispersive.shape[1]
|
||||
NX_T = self.grid.Tx.shape[1]
|
||||
NY_T = self.grid.Tx.shape[2]
|
||||
NZ_T = self.grid.Tx.shape[3]
|
||||
else: # Set to one any substitutions for dispersive materials.
|
||||
NY_MATDISPCOEFFS = 1
|
||||
NX_T = 1
|
||||
NY_T = 1
|
||||
NZ_T = 1
|
||||
|
||||
self.knl_common = self.env.get_template('knl_common_opencl.tmpl').render(
|
||||
updatecoeffsE = self.grid.updatecoeffsE.ravel(),
|
||||
updatecoeffsH = self.grid.updatecoeffsH.ravel(),
|
||||
REAL=config.sim_config.dtypes['C_float_or_double'],
|
||||
N_updatecoeffsE=self.grid.updatecoeffsE.size,
|
||||
N_updatecoeffsH=self.grid.updatecoeffsH.size,
|
||||
NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1],
|
||||
NY_MATDISPCOEFFS=NY_MATDISPCOEFFS,
|
||||
NX_FIELDS=self.grid.nx + 1,
|
||||
NY_FIELDS=self.grid.ny + 1,
|
||||
NZ_FIELDS=self.grid.nz + 1,
|
||||
NX_ID=self.grid.ID.shape[1],
|
||||
NY_ID=self.grid.ID.shape[2],
|
||||
NZ_ID=self.grid.ID.shape[3],
|
||||
NX_T=NX_T,
|
||||
NY_T=NY_T,
|
||||
NZ_T=NZ_T,
|
||||
NY_RXCOORDS=3,
|
||||
NX_RXS=6,
|
||||
NY_RXS=self.grid.iterations,
|
||||
NZ_RXS=len(self.grid.rxs),
|
||||
NY_SRCINFO=4,
|
||||
NY_SRCWAVES=self.grid.iterations,
|
||||
NX_SNAPS=Snapshot.nx_max,
|
||||
NY_SNAPS=Snapshot.ny_max,
|
||||
NZ_SNAPS=Snapshot.nz_max)
|
||||
|
||||
self.update_electric_dev = self.elwise(self.ctx,
|
||||
Template("int NX, "
|
||||
"int NY, "
|
||||
"int NZ, "
|
||||
"__global const unsigned int* restrict ID, "
|
||||
"__global $REAL *Ex, "
|
||||
"__global $REAL *Ey, "
|
||||
"__global $REAL *Ez, "
|
||||
"__global const $REAL * restrict Hx, "
|
||||
"__global const $REAL * restrict Hy, "
|
||||
"__global const $REAL * restrict Hz").substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
|
||||
knl_fields_updates.update_electric.substitute({
|
||||
'NX_FIELDS': self.grid.nx + 1,
|
||||
'NY_FIELDS': self.grid.ny + 1,
|
||||
'NZ_FIELDS': self.grid.nz + 1,
|
||||
'NX_ID': self.grid.ID.shape[1],
|
||||
'NY_ID': self.grid.ID.shape[2],
|
||||
'NZ_ID': self.grid.ID.shape[3]}),
|
||||
'update_electric', preamble=self.knl_common)
|
||||
|
||||
self.update_magnetic_dev = self.elwise(self.ctx,
|
||||
Template("int NX, "
|
||||
"int NY, "
|
||||
"int NZ, "
|
||||
"__global const unsigned int* restrict ID, "
|
||||
"__global $REAL *Hx, "
|
||||
"__global $REAL *Hy, "
|
||||
"__global $REAL *Hz, "
|
||||
"__global const $REAL * restrict Ex, "
|
||||
"__global const $REAL * restrict Ey, "
|
||||
"__global const $REAL * restrict Ez").substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
|
||||
knl_fields_updates.update_magnetic.substitute({
|
||||
'NX_FIELDS': self.grid.nx + 1,
|
||||
'NY_FIELDS': self.grid.ny + 1,
|
||||
'NZ_FIELDS': self.grid.nz + 1,
|
||||
'NX_ID': self.grid.ID.shape[1],
|
||||
'NY_ID': self.grid.ID.shape[2],
|
||||
'NZ_ID': self.grid.ID.shape[3]}),
|
||||
'update_magnetic', preamble=self.knl_common)
|
||||
|
||||
# Electric and magnetic field updates - dispersive materials -
|
||||
# get kernel functions
|
||||
# If there are any dispersive materials (updates are split into two
|
||||
# parts as they require present and updated electric field values).
|
||||
if config.get_model_config().materials['maxpoles'] > 0:
|
||||
self.dispersive_update_a = self.elwise(self.ctx,
|
||||
Template("int NX, "
|
||||
"int NY, "
|
||||
"int NZ, "
|
||||
"int MAXPOLES, "
|
||||
"__global const $COMPLEX* restrict updatecoeffsdispersive, "
|
||||
"__global $COMPLEX *Tx, "
|
||||
"__global $COMPLEX *Ty, "
|
||||
"__global $COMPLEX *Tz, "
|
||||
"__global const unsigned int* restrict ID, "
|
||||
"__global $REAL *Ex, "
|
||||
"__global $REAL *Ey, "
|
||||
"__global $REAL *Ez, "
|
||||
"__global const $REAL* restrict Hx, "
|
||||
"__global const $REAL* restrict Hy, "
|
||||
"__global const $REAL* restrict Hz").substitute({'REAL': config.sim_config.dtypes['C_float_or_double'], 'COMPLEX': config.get_model_config().materials['dispersiveCdtype']}),
|
||||
knl_fields_updates.update_electric_dispersive_A.substitute({
|
||||
'REAL': config.sim_config.dtypes['C_float_or_double'],
|
||||
'REALFUNC': config.get_model_config().materials['crealfunc'],
|
||||
'NX_FIELDS': self.grid.nx + 1,
|
||||
'NY_FIELDS': self.grid.ny + 1,
|
||||
'NZ_FIELDS': self.grid.nz + 1,
|
||||
'NX_ID': self.grid.ID.shape[1],
|
||||
'NY_ID': self.grid.ID.shape[2],
|
||||
'NZ_ID': self.grid.ID.shape[3],
|
||||
'NX_T': NX_T,
|
||||
'NY_T': NY_T,
|
||||
'NZ_T': NZ_T}),
|
||||
'update_electric_dispersive_A', preamble=self.knl_common)
|
||||
self.dispersive_update_b = self.elwise(self.ctx,
|
||||
Template("int NX, "
|
||||
"int NY, "
|
||||
"int NZ, "
|
||||
"int MAXPOLES, "
|
||||
"__global const $COMPLEX* restrict updatecoeffsdispersive, "
|
||||
"__global $COMPLEX *Tx, "
|
||||
"__global $COMPLEX *Ty, "
|
||||
"__global $COMPLEX *Tz, "
|
||||
"__global const unsigned int* restrict ID, "
|
||||
"__global $REAL *Ex, "
|
||||
"__global $REAL *Ey, "
|
||||
"__global $REAL *Ez, "
|
||||
"__global const $REAL* restrict Hx, "
|
||||
"__global const $REAL* restrict Hy, "
|
||||
"__global const $REAL* restrict Hz").substitute({'REAL': config.sim_config.dtypes['C_float_or_double'] ,'COMPLEX': config.get_model_config().materials['dispersiveCdtype']}),
|
||||
knl_fields_updates.update_electric_dispersive_B.substitute({
|
||||
'REAL': config.sim_config.dtypes['C_float_or_double'],
|
||||
'REALFUNC': config.get_model_config().materials['crealfunc'],
|
||||
'NX_FIELDS': self.grid.nx + 1,
|
||||
'NY_FIELDS': self.grid.ny + 1,
|
||||
'NZ_FIELDS': self.grid.nz + 1,
|
||||
'NX_ID': self.grid.ID.shape[1],
|
||||
'NY_ID': self.grid.ID.shape[2],
|
||||
'NZ_ID': self.grid.ID.shape[3],
|
||||
'NX_T': NX_T,
|
||||
'NY_T': NY_T,
|
||||
'NZ_T': NZ_T}),
|
||||
'update_electric_dispersive_B', preamble=self.knl_common)
|
||||
|
||||
# Electric and magnetic field updates - initialise field arrays on
|
||||
# compute device
|
||||
self.grid.htod_geometry_arrays(self.queue)
|
||||
self.grid.htod_field_arrays(self.queue)
|
||||
if config.get_model_config().materials['maxpoles'] > 0:
|
||||
self.grid.htod_dispersive_arrays(self.queue)
|
||||
|
||||
def _set_pml_knls(self):
|
||||
"""PMLS - prepare kernels and get kernel functions."""
|
||||
knl_pml_updates_electric = import_module('gprMax.cuda_opencl_el.knl_pml_updates_electric_' + self.grid.pmlformulation)
|
||||
knl_pml_updates_magnetic = import_module('gprMax.cuda_opencl_el.knl_pml_updates_magnetic_' + self.grid.pmlformulation)
|
||||
|
||||
# Set workgroup size, initialise arrays on compute device, and get
|
||||
# kernel functions
|
||||
for pml in self.grid.pmls:
|
||||
pml.set_queue(self.queue)
|
||||
pml.htod_field_arrays()
|
||||
pml.set_wgs()
|
||||
knl_name = 'order' + str(len(pml.CFS)) + '_' + pml.direction
|
||||
knl_electric_name = getattr(knl_pml_updates_electric, knl_name)
|
||||
knl_magnetic_name = getattr(knl_pml_updates_magnetic, knl_name)
|
||||
|
||||
pml.update_electric_dev = self.elwise(self.ctx,
|
||||
knl_electric_name['args'].substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
|
||||
knl_electric_name['func'].substitute({
|
||||
'REAL': config.sim_config.dtypes['C_float_or_double'],
|
||||
'NX_FIELDS': self.grid.nx + 1,
|
||||
'NY_FIELDS': self.grid.ny + 1,
|
||||
'NZ_FIELDS': self.grid.nz + 1,
|
||||
'NX_ID': self.grid.ID.shape[1],
|
||||
'NY_ID': self.grid.ID.shape[2],
|
||||
'NZ_ID': self.grid.ID.shape[3]}),
|
||||
'pml_updates_electric_' + knl_name,
|
||||
preamble=self.knl_common)
|
||||
|
||||
pml.update_magnetic_dev = self.elwise(self.ctx,
|
||||
knl_magnetic_name['args'].substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
|
||||
knl_magnetic_name['func'].substitute({
|
||||
'REAL': config.sim_config.dtypes['C_float_or_double'],
|
||||
'NX_FIELDS': self.grid.nx + 1,
|
||||
'NY_FIELDS': self.grid.ny + 1,
|
||||
'NZ_FIELDS': self.grid.nz + 1,
|
||||
'NX_ID': self.grid.ID.shape[1],
|
||||
'NY_ID': self.grid.ID.shape[2],
|
||||
'NZ_ID': self.grid.ID.shape[3]}),
|
||||
'pml_updates_magnetic_' + knl_name,
|
||||
preamble=self.knl_common)
|
||||
|
||||
def _set_rx_knl(self):
|
||||
"""Receivers - initialise arrays on compute device, prepare kernel and
|
||||
get kernel function.
|
||||
"""
|
||||
self.rxcoords_dev, self.rxs_dev = htod_rx_arrays(self.grid, self.queue)
|
||||
self.store_outputs_dev = self.elwise(self.ctx,
|
||||
Template("int NRX, "
|
||||
"int iteration, "
|
||||
"__global const int* restrict rxcoords, "
|
||||
"__global $REAL *rxs, "
|
||||
"__global const $REAL* restrict Ex, "
|
||||
"__global const $REAL* restrict Ey, "
|
||||
"__global const $REAL* restrict Ez, "
|
||||
"__global const $REAL* restrict Hx, "
|
||||
"__global const $REAL* restrict Hy, "
|
||||
"__global const $REAL* restrict Hz").substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
|
||||
knl_store_outputs.store_outputs.substitute(),
|
||||
'store_outputs', preamble=self.knl_common)
|
||||
|
||||
def _set_src_knls(self):
|
||||
"""Sources - initialise arrays on compute device, prepare kernel and
|
||||
get kernel function.
|
||||
"""
|
||||
if self.grid.hertziandipoles:
|
||||
self.srcinfo1_hertzian_dev, self.srcinfo2_hertzian_dev, self.srcwaves_hertzian_dev = htod_src_arrays(self.grid.hertziandipoles, self.grid, self.queue)
|
||||
self.update_hertzian_dipole_dev = self.elwise(self.ctx,
|
||||
Template("int NHERTZDIPOLE, "
|
||||
"int iteration, "
|
||||
"$REAL dx, "
|
||||
"$REAL dy, "
|
||||
"$REAL dz, "
|
||||
"__global const int* restrict srcinfo1, "
|
||||
"__global const $REAL* restrict srcinfo2, "
|
||||
"__global const $REAL* restrict srcwaveforms, "
|
||||
"__global const unsigned int* restrict ID, "
|
||||
"__global $REAL *Ex, "
|
||||
"__global $REAL *Ey, "
|
||||
"__global $REAL *Ez").substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
|
||||
knl_source_updates.update_hertzian_dipole.substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
|
||||
'update_hertzian_dipole', preamble=self.knl_common)
|
||||
if self.grid.magneticdipoles:
|
||||
self.srcinfo1_magnetic_dev, self.srcinfo2_magnetic_dev, self.srcwaves_magnetic_dev = htod_src_arrays(self.grid.magneticdipoles, self.grid, self.queue)
|
||||
self.update_magnetic_dipole_dev = self.elwise(self.ctx,
|
||||
Template("int NMAGDIPOLE, "
|
||||
"int iteration, "
|
||||
"$REAL dx, "
|
||||
"$REAL dy, "
|
||||
"$REAL dz, "
|
||||
"__global const int* restrict srcinfo1, "
|
||||
"__global const $REAL* restrict srcinfo2, "
|
||||
"__global const $REAL* restrict srcwaveforms, "
|
||||
"__global const unsigned int* restrict ID, "
|
||||
"__global $REAL *Hx, "
|
||||
"__global $REAL *Hy, "
|
||||
"__global $REAL *Hz").substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
|
||||
knl_source_updates.update_magnetic_dipole.substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
|
||||
'update_magnetic_dipole', preamble=self.knl_common)
|
||||
if self.grid.voltagesources:
|
||||
self.srcinfo1_voltage_dev, self.srcinfo2_voltage_dev,self.srcwaves_voltage_dev = htod_src_arrays(self.grid.voltagesources, self.grid, self.queue)
|
||||
self.update_voltage_source_dev = self.elwise(self.ctx,
|
||||
Template("int NVOLTSRC, "
|
||||
"int iteration, "
|
||||
"$REAL dx, "
|
||||
"$REAL dy, "
|
||||
"$REAL dz, "
|
||||
"__global const int* restrict srcinfo1, "
|
||||
"__global const $REAL* restrict srcinfo2, "
|
||||
"__global const $REAL* restrict srcwaveforms, "
|
||||
"__global const unsigned int* restrict ID, "
|
||||
"__global $REAL *Ex, "
|
||||
"__global $REAL *Ey, "
|
||||
"__global $REAL *Ez").substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
|
||||
knl_source_updates.update_voltage_source.substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}), 'update_voltage_source', preamble=self.knl_common)
|
||||
|
||||
def _set_snapshot_knl(self):
|
||||
"""Snapshots - initialise arrays on compute device, prepare kernel and
|
||||
get kernel function.
|
||||
"""
|
||||
self.snapEx_dev, self.snapEy_dev, self.snapEz_dev, self.snapHx_dev, self.snapHy_dev, self.snapHz_dev = htod_snapshot_array(self.grid, self.queue)
|
||||
self.store_snapshot_dev = self.elwise(self.ctx,
|
||||
Template("int p, "
|
||||
"int xs, "
|
||||
"int xf, "
|
||||
"int ys, "
|
||||
"int yf, "
|
||||
"int zs, "
|
||||
"int zf, "
|
||||
"int dx, "
|
||||
"int dy, "
|
||||
"int dz, "
|
||||
"__global const $REAL* restrict Ex, "
|
||||
"__global const $REAL* restrict Ey, "
|
||||
"__global const $REAL* restrict Ez, "
|
||||
"__global const $REAL* restrict Hx, "
|
||||
"__global const $REAL* restrict Hy, "
|
||||
"__global const $REAL* restrict Hz, "
|
||||
"__global $REAL *snapEx, "
|
||||
"__global $REAL *snapEy, "
|
||||
"__global $REAL *snapEz, "
|
||||
"__global $REAL *snapHx, "
|
||||
"__global $REAL *snapHy, "
|
||||
"__global $REAL *snapHz").substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
|
||||
knl_snapshots.store_snapshot.substitute({'NX_SNAPS': Snapshot.nx_max,
|
||||
'NY_SNAPS': Snapshot.ny_max,
|
||||
'NZ_SNAPS': Snapshot.nz_max}),
|
||||
'store_snapshot', preamble=self.knl_common)
|
||||
|
||||
def store_outputs(self):
|
||||
"""Store field component values for every receiver."""
|
||||
if self.grid.rxs:
|
||||
event = self.store_outputs_dev(np.int32(len(self.grid.rxs)),
|
||||
np.int32(self.grid.iteration),
|
||||
self.rxcoords_dev,
|
||||
self.rxs_dev,
|
||||
self.grid.Ex_dev,
|
||||
self.grid.Ey_dev,
|
||||
self.grid.Ez_dev,
|
||||
self.grid.Hx_dev,
|
||||
self.grid.Hy_dev,
|
||||
self.grid.Hz_dev)
|
||||
event.wait()
|
||||
self.compute_time += (event.profile.end - event.profile.start)*1e-9
|
||||
|
||||
def store_snapshots(self, iteration):
|
||||
"""Store any snapshots.
|
||||
|
||||
Args:
|
||||
iteration: int for iteration number.
|
||||
"""
|
||||
|
||||
for i, snap in enumerate(self.grid.snapshots):
|
||||
if snap.time == iteration + 1:
|
||||
snapno = 0 if config.get_model_config().device['snapsgpu2cpu'] else i
|
||||
event = self.store_snapshot_dev(np.int32(snapno),
|
||||
np.int32(snap.xs),
|
||||
np.int32(snap.xf),
|
||||
np.int32(snap.ys),
|
||||
np.int32(snap.yf),
|
||||
np.int32(snap.zs),
|
||||
np.int32(snap.zf),
|
||||
np.int32(snap.dx),
|
||||
np.int32(snap.dy),
|
||||
np.int32(snap.dz),
|
||||
self.grid.Ex_dev,
|
||||
self.grid.Ey_dev,
|
||||
self.grid.Ez_dev,
|
||||
self.grid.Hx_dev,
|
||||
self.grid.Hy_dev,
|
||||
self.grid.Hz_dev,
|
||||
self.snapEx_dev,
|
||||
self.snapEy_dev,
|
||||
self.snapEz_dev,
|
||||
self.snapHx_dev,
|
||||
self.snapHy_dev,
|
||||
self.snapHz_dev)
|
||||
event.wait()
|
||||
if config.get_model_config().device['snapsgpu2cpu']:
|
||||
dtoh_snapshot_array(self.snapEx_dev.get(),
|
||||
self.snapEy_dev.get(),
|
||||
self.snapEz_dev.get(),
|
||||
self.snapHx_dev.get(),
|
||||
self.snapHy_dev.get(),
|
||||
self.snapHz_dev.get(),
|
||||
0,
|
||||
snap)
|
||||
self.compute_time += (event.profile.end - event.profile.start)*1e-9
|
||||
|
||||
def update_magnetic(self):
|
||||
"""Update magnetic field components."""
|
||||
event = self.update_magnetic_dev(np.int32(self.grid.nx),
|
||||
np.int32(self.grid.ny),
|
||||
np.int32(self.grid.nz),
|
||||
self.grid.ID_dev,
|
||||
self.grid.Hx_dev,
|
||||
self.grid.Hy_dev,
|
||||
self.grid.Hz_dev,
|
||||
self.grid.Ex_dev,
|
||||
self.grid.Ey_dev,
|
||||
self.grid.Ez_dev)
|
||||
event.wait()
|
||||
self.compute_time += (event.profile.end - event.profile.start)*1e-9
|
||||
|
||||
def update_magnetic_pml(self):
|
||||
"""Update magnetic field components with the PML correction."""
|
||||
for pml in self.grid.pmls:
|
||||
pml.update_magnetic()
|
||||
self.compute_time += pml.compute_time
|
||||
|
||||
def update_magnetic_sources(self):
|
||||
"""Update magnetic field components from sources."""
|
||||
if self.grid.magneticdipoles:
|
||||
event = self.update_magnetic_dipole_dev(np.int32(len(self.grid.magneticdipoles)),
|
||||
np.int32(self.grid.iteration),
|
||||
config.sim_config.dtypes['float_or_double'](self.grid.dx),
|
||||
config.sim_config.dtypes['float_or_double'](self.grid.dy),
|
||||
config.sim_config.dtypes['float_or_double'](self.grid.dz),
|
||||
self.srcinfo1_magnetic_dev,
|
||||
self.srcinfo2_magnetic_dev,
|
||||
self.srcwaves_magnetic_dev,
|
||||
self.grid.ID_dev,
|
||||
self.grid.Hx_dev,
|
||||
self.grid.Hy_dev,
|
||||
self.grid.Hz_dev)
|
||||
event.wait()
|
||||
self.compute_time += (event.profile.end - event.profile.start)*1e-9
|
||||
|
||||
def update_electric_a(self):
|
||||
"""Update electric field components."""
|
||||
# All materials are non-dispersive so do standard update.
|
||||
if config.get_model_config().materials['maxpoles'] == 0:
|
||||
event = self.update_electric_dev(np.int32(self.grid.nx),
|
||||
np.int32(self.grid.ny),
|
||||
np.int32(self.grid.nz),
|
||||
self.grid.ID_dev,
|
||||
self.grid.Ex_dev,
|
||||
self.grid.Ey_dev,
|
||||
self.grid.Ez_dev,
|
||||
self.grid.Hx_dev,
|
||||
self.grid.Hy_dev,
|
||||
self.grid.Hz_dev)
|
||||
event.wait()
|
||||
self.compute_time += (event.profile.end - event.profile.start)*1e-9
|
||||
|
||||
# If there are any dispersive materials do 1st part of dispersive update
|
||||
# (it is split into two parts as it requires present and updated electric field values).
|
||||
else:
|
||||
event = self.dispersive_update_a(np.int32(self.grid.nx),
|
||||
np.int32(self.grid.ny),
|
||||
np.int32(self.grid.nz),
|
||||
np.int32(config.get_model_config().materials['maxpoles']),
|
||||
self.grid.updatecoeffsdispersive_dev,
|
||||
self.grid.Tx_dev,
|
||||
self.grid.Ty_dev,
|
||||
self.grid.Tz_dev,
|
||||
self.grid.ID_dev,
|
||||
self.grid.Ex_dev,
|
||||
self.grid.Ey_dev,
|
||||
self.grid.Ez_dev,
|
||||
self.grid.Hx_dev,
|
||||
self.grid.Hy_dev,
|
||||
self.grid.Hz_dev)
|
||||
event.wait()
|
||||
self.compute_time += (event.profile.end - event.profile.start)*1e-9
|
||||
|
||||
def update_electric_pml(self):
|
||||
"""Update electric field components with the PML correction."""
|
||||
for pml in self.grid.pmls:
|
||||
pml.update_electric()
|
||||
self.compute_time += pml.compute_time
|
||||
|
||||
def update_electric_sources(self):
|
||||
"""Update electric field components from sources -
|
||||
update any Hertzian dipole sources last.
|
||||
"""
|
||||
if self.grid.voltagesources:
|
||||
event = self.update_voltage_source_dev(np.int32(len(self.grid.voltagesources)),
|
||||
np.int32(self.grid.iteration),
|
||||
config.sim_config.dtypes['float_or_double'](self.grid.dx),
|
||||
config.sim_config.dtypes['float_or_double'](self.grid.dy),
|
||||
config.sim_config.dtypes['float_or_double'](self.grid.dz),
|
||||
self.srcinfo1_voltage_dev,
|
||||
self.srcinfo2_voltage_dev,
|
||||
self.srcwaves_voltage_dev,
|
||||
self.grid.ID_dev,
|
||||
self.grid.Ex_dev,
|
||||
self.grid.Ey_dev,
|
||||
self.grid.Ez_dev)
|
||||
event.wait()
|
||||
self.compute_time += (event.profile.end - event.profile.start)*1e-9
|
||||
|
||||
if self.grid.hertziandipoles:
|
||||
event = self.update_hertzian_dipole_dev(np.int32(len(self.grid.hertziandipoles)),
|
||||
np.int32(self.grid.iteration),
|
||||
config.sim_config.dtypes['float_or_double'](self.grid.dx),
|
||||
config.sim_config.dtypes['float_or_double'](self.grid.dy),
|
||||
config.sim_config.dtypes['float_or_double'](self.grid.dz),
|
||||
self.srcinfo1_hertzian_dev,
|
||||
self.srcinfo2_hertzian_dev,
|
||||
self.srcwaves_hertzian_dev,
|
||||
self.grid.ID_dev,
|
||||
self.grid.Ex_dev,
|
||||
self.grid.Ey_dev,
|
||||
self.grid.Ez_dev)
|
||||
event.wait()
|
||||
self.compute_time += (event.profile.end - event.profile.start)*1e-9
|
||||
|
||||
self.grid.iteration += 1
|
||||
|
||||
def update_electric_b(self):
|
||||
"""If there are any dispersive materials do 2nd part of dispersive
|
||||
update - it is split into two parts as it requires present and
|
||||
updated electric field values. Therefore it can only be completely
|
||||
updated after the electric field has been updated by the PML and
|
||||
source updates.
|
||||
"""
|
||||
if config.get_model_config().materials['maxpoles'] > 0:
|
||||
event = self.dispersive_update_b(np.int32(self.grid.nx),
|
||||
np.int32(self.grid.ny),
|
||||
np.int32(self.grid.nz),
|
||||
np.int32(config.get_model_config().materials['maxpoles']),
|
||||
self.grid.updatecoeffsdispersive_dev,
|
||||
self.grid.Tx_dev,
|
||||
self.grid.Ty_dev,
|
||||
self.grid.Tz_dev,
|
||||
self.grid.ID_dev,
|
||||
self.grid.Ex_dev,
|
||||
self.grid.Ey_dev,
|
||||
self.grid.Ez_dev,
|
||||
self.grid.Hx_dev,
|
||||
self.grid.Hy_dev,
|
||||
self.grid.Hz_dev)
|
||||
event.wait()
|
||||
self.compute_time += (event.profile.end - event.profile.start)*1e-9
|
||||
|
||||
def time_start(self):
|
||||
pass
|
||||
|
||||
def calculate_memsolve(self, iteration):
|
||||
"""Calculate memory used on last iteration.
|
||||
|
||||
Args:
|
||||
iteration: int of iteration number.
|
||||
|
||||
Returns:
|
||||
Memory (RAM) used on compute device.
|
||||
"""
|
||||
# if iteration == self.grid.iterations - 1:
|
||||
# return self.drv.mem_get_info()[1] - self.drv.mem_get_info()[0]
|
||||
logger.debug('Look at memory estimate for pyopencl')
|
||||
pass
|
||||
|
||||
def calculate_tsolve(self):
|
||||
"""Calculate solving time for model."""
|
||||
return self.compute_time
|
||||
|
||||
def finalise(self):
|
||||
"""Copy data from compute device back to CPU to save to file(s)."""
|
||||
# Copy output from receivers array back to correct receiver objects
|
||||
if self.grid.rxs:
|
||||
dtoh_rx_array(self.rxs_dev.get(), self.rxcoords_dev.get(), self.grid)
|
||||
|
||||
# Copy data from any snapshots back to correct snapshot objects
|
||||
if self.grid.snapshots and not config.get_model_config().device['snapsgpu2cpu']:
|
||||
for i, snap in enumerate(self.grid.snapshots):
|
||||
dtoh_snapshot_array(self.snapEx_dev.get(),
|
||||
self.snapEy_dev.get(),
|
||||
self.snapEz_dev.get(),
|
||||
self.snapHx_dev.get(),
|
||||
self.snapHy_dev.get(),
|
||||
self.snapHz_dev.get(),
|
||||
i, snap)
|
||||
|
||||
def cleanup(self):
|
||||
"""Cleanup compute device context."""
|
||||
logger.debug('Check if pyopencl needs explicit cleanup.')
|
||||
# Remove context from top of stack and delete
|
||||
# self.ctx.pop()
|
||||
# del self.ctx
|
||||
|
@@ -26,7 +26,7 @@ import sys
|
||||
import gprMax.config as config
|
||||
import psutil
|
||||
|
||||
from .utilities import human_size
|
||||
from .utilities import get_terminal_width, human_size
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -35,9 +35,9 @@ def get_host_info():
|
||||
"""Get information about the machine, CPU, RAM, and OS.
|
||||
|
||||
Returns:
|
||||
hostinfo (dict): Manufacturer and model of machine; description of CPU
|
||||
type, speed, cores; RAM; name and
|
||||
version of operating system.
|
||||
hostinfo: dict containing manufacturer and model of machine;
|
||||
description of CPU type, speed, cores; RAM; name and
|
||||
version of operating system.
|
||||
"""
|
||||
|
||||
# Default to 'unknown' if any of the detection fails
|
||||
@@ -47,13 +47,17 @@ def get_host_info():
|
||||
if sys.platform == 'win32':
|
||||
# Manufacturer/model
|
||||
try:
|
||||
manufacturer = subprocess.check_output("wmic csproduct get vendor", shell=True, stderr=subprocess.STDOUT).decode('utf-8').strip()
|
||||
manufacturer = subprocess.check_output("wmic csproduct get vendor",
|
||||
shell=True,
|
||||
stderr=subprocess.STDOUT).decode('utf-8').strip()
|
||||
manufacturer = manufacturer.split('\n')
|
||||
if len(manufacturer) > 1:
|
||||
manufacturer = manufacturer[1]
|
||||
else:
|
||||
manufacturer = manufacturer[0]
|
||||
model = subprocess.check_output("wmic computersystem get model", shell=True, stderr=subprocess.STDOUT).decode('utf-8').strip()
|
||||
model = subprocess.check_output("wmic computersystem get model",
|
||||
shell=True,
|
||||
stderr=subprocess.STDOUT).decode('utf-8').strip()
|
||||
model = model.split('\n')
|
||||
if len(model) > 1:
|
||||
model = model[1]
|
||||
@@ -61,16 +65,19 @@ def get_host_info():
|
||||
model = model[0]
|
||||
except subprocess.CalledProcessError:
|
||||
pass
|
||||
machineID = manufacturer + ' ' + model
|
||||
machineID = ' '.join(manufacturer.split()) + ' ' + ' '.join(model.split())
|
||||
|
||||
# CPU information
|
||||
try:
|
||||
allcpuinfo = subprocess.check_output("wmic cpu get Name", shell=True, stderr=subprocess.STDOUT).decode('utf-8').strip()
|
||||
allcpuinfo = subprocess.check_output("wmic cpu get Name",
|
||||
shell=True,
|
||||
stderr=subprocess.STDOUT).decode('utf-8').strip()
|
||||
allcpuinfo = allcpuinfo.split('\n')
|
||||
sockets = 0
|
||||
for line in allcpuinfo:
|
||||
if 'CPU' in line:
|
||||
cpuID = line.strip()
|
||||
cpuID = ' '.join(cpuID.split())
|
||||
sockets += 1
|
||||
except subprocess.CalledProcessError:
|
||||
pass
|
||||
@@ -93,16 +100,21 @@ def get_host_info():
|
||||
# Manufacturer/model
|
||||
manufacturer = 'Apple'
|
||||
try:
|
||||
model = subprocess.check_output("sysctl -n hw.model", shell=True, stderr=subprocess.STDOUT).decode('utf-8').strip()
|
||||
model = subprocess.check_output("sysctl -n hw.model", shell=True,
|
||||
stderr=subprocess.STDOUT).decode('utf-8').strip()
|
||||
except subprocess.CalledProcessError:
|
||||
pass
|
||||
machineID = manufacturer + ' ' + model
|
||||
machineID = ' '.join(manufacturer.split()) + ' ' + ' '.join(model.split())
|
||||
|
||||
# CPU information
|
||||
try:
|
||||
sockets = subprocess.check_output("sysctl -n hw.packages", shell=True, stderr=subprocess.STDOUT).decode('utf-8').strip()
|
||||
sockets = subprocess.check_output("sysctl -n hw.packages",
|
||||
shell=True,
|
||||
stderr=subprocess.STDOUT).decode('utf-8').strip()
|
||||
sockets = int(sockets)
|
||||
cpuID = subprocess.check_output("sysctl -n machdep.cpu.brand_string", shell=True, stderr=subprocess.STDOUT).decode('utf-8').strip()
|
||||
cpuID = subprocess.check_output("sysctl -n machdep.cpu.brand_string",
|
||||
shell=True,
|
||||
stderr=subprocess.STDOUT).decode('utf-8').strip()
|
||||
cpuID = ' '.join(cpuID.split())
|
||||
except subprocess.CalledProcessError:
|
||||
pass
|
||||
@@ -123,21 +135,30 @@ def get_host_info():
|
||||
elif sys.platform == 'linux':
|
||||
# Manufacturer/model
|
||||
try:
|
||||
manufacturer = subprocess.check_output("cat /sys/class/dmi/id/sys_vendor", shell=True, stderr=subprocess.STDOUT).decode('utf-8').strip()
|
||||
model = subprocess.check_output("cat /sys/class/dmi/id/product_name", shell=True, stderr=subprocess.STDOUT).decode('utf-8').strip()
|
||||
manufacturer = subprocess.check_output("cat /sys/class/dmi/id/sys_vendor",
|
||||
shell=True,
|
||||
stderr=subprocess.STDOUT).decode('utf-8').strip()
|
||||
model = subprocess.check_output("cat /sys/class/dmi/id/product_name",
|
||||
shell=True,
|
||||
stderr=subprocess.STDOUT).decode('utf-8').strip()
|
||||
except subprocess.CalledProcessError:
|
||||
pass
|
||||
machineID = manufacturer + ' ' + model
|
||||
machineID = ' '.join(manufacturer.split()) + ' ' + ' '.join(model.split())
|
||||
|
||||
# CPU information
|
||||
try:
|
||||
# Locale to ensure English
|
||||
myenv = {**os.environ, 'LANG': 'en_US.utf8'}
|
||||
cpuIDinfo = subprocess.check_output("cat /proc/cpuinfo", shell=True, stderr=subprocess.STDOUT, env=myenv).decode('utf-8').strip()
|
||||
cpuIDinfo = subprocess.check_output("cat /proc/cpuinfo", shell=True,
|
||||
stderr=subprocess.STDOUT,
|
||||
env=myenv).decode('utf-8').strip()
|
||||
for line in cpuIDinfo.split('\n'):
|
||||
if re.search('model name', line):
|
||||
cpuID = re.sub('.*model name.*:', '', line, 1).strip()
|
||||
allcpuinfo = subprocess.check_output("lscpu", shell=True, stderr=subprocess.STDOUT, env=myenv).decode('utf-8').strip()
|
||||
cpuID = ' '.join(cpuID.split())
|
||||
allcpuinfo = subprocess.check_output("lscpu", shell=True,
|
||||
stderr=subprocess.STDOUT,
|
||||
env=myenv).decode('utf-8').strip()
|
||||
for line in allcpuinfo.split('\n'):
|
||||
if 'Socket(s)' in line:
|
||||
sockets = int(line.strip()[-1])
|
||||
@@ -177,11 +198,31 @@ def get_host_info():
|
||||
return hostinfo
|
||||
|
||||
|
||||
def print_host_info(hostinfo):
|
||||
"""Print information about the machine, CPU, RAM, and OS.
|
||||
|
||||
Args:
|
||||
hostinfo: dict containing manufacturer and model of machine;
|
||||
description of CPU type, speed, cores; RAM; name and
|
||||
version of operating system.
|
||||
"""
|
||||
|
||||
hyperthreadingstr = (f", {config.sim_config.hostinfo['logicalcores']} "
|
||||
f"cores with Hyper-Threading" if config.sim_config.hostinfo['hyperthreading'] else '')
|
||||
logger.basic(f"\n{config.sim_config.hostinfo['hostname']} | "
|
||||
f"{config.sim_config.hostinfo['machineID']} "
|
||||
f"{hostinfo['sockets']} x {hostinfo['cpuID']} "
|
||||
f"({hostinfo['physicalcores']} cores{hyperthreadingstr}) | "
|
||||
f"{human_size(hostinfo['ram'], a_kilobyte_is_1024_bytes=True)} | "
|
||||
f"{hostinfo['osversion']}")
|
||||
logger.basic(f"|--->OpenMP: {hostinfo['physicalcores']} threads")
|
||||
|
||||
|
||||
def set_omp_threads(nthreads=None):
|
||||
"""Sets the number of OpenMP CPU threads for parallelised parts of code.
|
||||
|
||||
Returns:
|
||||
nthreads (int): Number of OpenMP threads.
|
||||
nthreads: int for number of OpenMP threads.
|
||||
"""
|
||||
|
||||
if sys.platform == 'darwin':
|
||||
@@ -228,29 +269,39 @@ def mem_check_host(mem):
|
||||
"""Check if the required amount of memory (RAM) is available on host.
|
||||
|
||||
Args:
|
||||
mem (int): Memory required (bytes).
|
||||
mem: int for memory required (bytes).
|
||||
"""
|
||||
if mem > config.sim_config.hostinfo['ram']:
|
||||
logger.exception(f"Memory (RAM) required ~{human_size(mem)} exceeds {human_size(config.sim_config.hostinfo['ram'], a_kilobyte_is_1024_bytes=True)} detected!\n")
|
||||
logger.exception(f"Memory (RAM) required ~{human_size(mem)} exceeds "
|
||||
f"{human_size(config.sim_config.hostinfo['ram'], a_kilobyte_is_1024_bytes=True)} "
|
||||
"detected!\n")
|
||||
raise ValueError
|
||||
|
||||
|
||||
def mem_check_gpu_snaps(total_mem, snaps_mem):
|
||||
def mem_check_device_snaps(total_mem, snaps_mem):
|
||||
"""Check if the required amount of memory (RAM) for all snapshots can fit
|
||||
on specified GPU.
|
||||
on specified device.
|
||||
|
||||
Args:
|
||||
total_mem (int): Total memory required for model (bytes).
|
||||
snaps_mem (int): Memory required for all snapshots (bytes).
|
||||
total_mem: int for total memory required for model (bytes).
|
||||
snaps_mem: int for memory required for all snapshots (bytes).
|
||||
"""
|
||||
if total_mem - snaps_mem > config.get_model_config().cuda['gpu'].totalmem:
|
||||
logger.exception(f"Memory (RAM) required ~{human_size(total_mem)} exceeds {human_size(config.get_model_config().cuda['gpu'].totalmem, a_kilobyte_is_1024_bytes=True)} detected on specified {config.get_model_config().cuda['gpu'].deviceID} - {config.get_model_config().cuda['gpu'].name} GPU!\n")
|
||||
|
||||
if config.sim_config.general['solver'] == 'cuda':
|
||||
device_mem = config.get_model_config().device['dev'].total_memory()
|
||||
elif config.sim_config.general['solver'] == 'opencl':
|
||||
device_mem = config.get_model_config().device['dev'].global_mem_size
|
||||
|
||||
if total_mem - snaps_mem > device_mem:
|
||||
logger.exception(f"Memory (RAM) required ~{human_size(total_mem)} exceeds "
|
||||
f"{human_size(device_mem, a_kilobyte_is_1024_bytes=True)} "
|
||||
f"detected on specified {' '.join(config.get_model_config().device['dev'].name.split())} device!\n")
|
||||
raise ValueError
|
||||
|
||||
# If the required memory without the snapshots will fit on the GPU then
|
||||
# transfer and store snaphots on host
|
||||
if snaps_mem != 0 and total_mem - snaps_mem < config.get_model_config().cuda['gpu'].totalmem:
|
||||
config.get_model_config().cuda['snapsgpu2cpu'] = True
|
||||
if snaps_mem != 0 and total_mem - snaps_mem < device_mem:
|
||||
config.get_model_config().device['snapsgpu2cpu'] = True
|
||||
|
||||
|
||||
def mem_check_all(grids):
|
||||
@@ -259,12 +310,12 @@ def mem_check_all(grids):
|
||||
memory.
|
||||
|
||||
Args:
|
||||
grids (list): FDTDGrid objects.
|
||||
grids: list of FDTDGrid objects.
|
||||
|
||||
Returns:
|
||||
total_mem (int): Total memory required for all grids.
|
||||
mem_strs (list): Strings containing text of memory requirements for
|
||||
each grid.
|
||||
total_mem: int for total memory required for all grids.
|
||||
mem_str: list of strings containing text of memory requirements for
|
||||
each grid.
|
||||
"""
|
||||
|
||||
total_snaps_mem = 0
|
||||
@@ -297,59 +348,56 @@ def mem_check_all(grids):
|
||||
mem_check_host(total_mem)
|
||||
|
||||
# Check if there is sufficient memory for any snapshots on GPU
|
||||
if total_snaps_mem > 0 and config.sim_config.general['cuda']:
|
||||
mem_check_gpu_snaps(total_mem, total_snaps_mem)
|
||||
if (total_snaps_mem > 0 and config.sim_config.general['solver'] == 'cuda' or
|
||||
config.sim_config.general['solver'] == 'opencl'):
|
||||
mem_check_device_snaps(total_mem, total_snaps_mem)
|
||||
|
||||
return total_mem, mem_strs
|
||||
|
||||
|
||||
class GPU:
|
||||
"""GPU information."""
|
||||
def has_pycuda():
|
||||
"""Check if pycuda module is installed."""
|
||||
pycuda = True
|
||||
try:
|
||||
import pycuda
|
||||
except ImportError:
|
||||
pycuda = False
|
||||
return pycuda
|
||||
|
||||
def __init__(self):
|
||||
|
||||
self.deviceID = None
|
||||
self.name = None
|
||||
self.pcibusID = None
|
||||
self.constmem = None
|
||||
self.totalmem = None
|
||||
|
||||
def get_cuda_gpu_info(self, drv, deviceID):
|
||||
"""Set information about GPU.
|
||||
|
||||
Args:
|
||||
drv (object): pycuda driver.
|
||||
deviceID (int): Device ID for GPU.
|
||||
"""
|
||||
|
||||
self.deviceID = deviceID
|
||||
self.name = drv.Device(self.deviceID).name()
|
||||
self.pcibusID = drv.Device(self.deviceID).pci_bus_id()
|
||||
self.constmem = drv.Device(self.deviceID).total_constant_memory
|
||||
self.totalmem = drv.Device(self.deviceID).total_memory()
|
||||
def has_pyopencl():
|
||||
"""Check if pyopencl module is installed."""
|
||||
pyopencl = True
|
||||
try:
|
||||
import pyopencl
|
||||
except ImportError:
|
||||
pyopencl = False
|
||||
return pyopencl
|
||||
|
||||
|
||||
def detect_cuda_gpus():
|
||||
"""Get information about Nvidia GPU(s).
|
||||
"""Get information about CUDA-capable GPU(s).
|
||||
|
||||
Returns:
|
||||
gpus (list): Detected GPU(s) object(s).
|
||||
gpus: dict of detected pycuda device object(s) where where device ID(s)
|
||||
are keys.
|
||||
"""
|
||||
|
||||
try:
|
||||
gpus = {}
|
||||
|
||||
cuda_reqs = ('To use gprMax with CUDA you must:'
|
||||
'\n 1) install pycuda'
|
||||
'\n 2) install NVIDIA CUDA Toolkit (https://developer.nvidia.com/cuda-toolkit)'
|
||||
'\n 3) have an NVIDIA CUDA-Enabled GPU (https://developer.nvidia.com/cuda-gpus)')
|
||||
|
||||
if has_pycuda():
|
||||
import pycuda.driver as drv
|
||||
has_pycuda = True
|
||||
except ImportError:
|
||||
logger.warning('pycuda not detected - to use gprMax in GPU mode the pycuda package must be installed, and you must have a NVIDIA CUDA-Enabled GPU (https://developer.nvidia.com/cuda-gpus).')
|
||||
has_pycuda = False
|
||||
|
||||
if has_pycuda:
|
||||
drv.init()
|
||||
|
||||
# Check and list any CUDA-Enabled GPUs
|
||||
deviceIDsavail = []
|
||||
if drv.Device.count() == 0:
|
||||
logger.exception('No NVIDIA CUDA-Enabled GPUs detected (https://developer.nvidia.com/cuda-gpus)')
|
||||
raise ValueError
|
||||
logger.warning('No NVIDIA CUDA-Enabled GPUs detected!\n' + cuda_reqs)
|
||||
elif 'CUDA_VISIBLE_DEVICES' in os.environ:
|
||||
deviceIDsavail = os.environ.get('CUDA_VISIBLE_DEVICES')
|
||||
deviceIDsavail = [int(s) for s in deviceIDsavail.split(',')]
|
||||
@@ -357,33 +405,88 @@ def detect_cuda_gpus():
|
||||
deviceIDsavail = range(drv.Device.count())
|
||||
|
||||
# Gather information about detected GPUs
|
||||
gpus = []
|
||||
for ID in deviceIDsavail:
|
||||
gpu = GPU()
|
||||
gpu.get_cuda_gpu_info(drv, ID)
|
||||
gpus.append(gpu)
|
||||
gpus[ID] = drv.Device(ID)
|
||||
|
||||
else:
|
||||
gpus = None
|
||||
logger.warning('pycuda not detected!\n' + cuda_reqs)
|
||||
|
||||
return gpus
|
||||
|
||||
|
||||
def print_cuda_info(devs):
|
||||
""""Print info about detected CUDA-capable GPU(s).
|
||||
|
||||
Args:
|
||||
devs: dict of detected pycuda device object(s) where where device ID(s)
|
||||
are keys.
|
||||
"""""
|
||||
|
||||
import pycuda
|
||||
|
||||
logger.basic('|--->CUDA:')
|
||||
logger.debug(f'PyCUDA: {pycuda.VERSION_TEXT}')
|
||||
|
||||
for ID, gpu in devs.items():
|
||||
logger.basic(f" |--->Device {ID}: {' '.join(gpu.name.split())} | "
|
||||
f"{human_size(gpu.total_memory(), a_kilobyte_is_1024_bytes=True)}")
|
||||
|
||||
|
||||
def detect_opencl():
|
||||
"""Get information about OpenCL platforms and devices.
|
||||
|
||||
Returns:
|
||||
gpus (list): Detected GPU(s) object(s).
|
||||
devs: dict of detected pyopencl device object(s) where where device ID(s)
|
||||
are keys.
|
||||
"""
|
||||
|
||||
try:
|
||||
import pyopencl as cl
|
||||
has_pyopencl = True
|
||||
except ImportError:
|
||||
logger.warning('pyopencl not detected - to use gprMax with OpenCL, the pyopencl package must be installed, and you must have at least one OpenCL capable platform.')
|
||||
has_pyopencl = False
|
||||
devs = {}
|
||||
|
||||
if has_pyopencl:
|
||||
platforms = cl.get_platforms()
|
||||
platform_names = [p.name for p in platforms]
|
||||
logger.info(platform_names)
|
||||
ocl_reqs = ('To use gprMax with OpenCL you must:'
|
||||
'\n 1) install pyopencl'
|
||||
'\n 2) install appropriate OpenCL device driver(s)'
|
||||
'\n 3) have at least one OpenCL-capable platform.')
|
||||
|
||||
if has_pyopencl():
|
||||
import pyopencl as cl
|
||||
try:
|
||||
i = 0
|
||||
for platform in cl.get_platforms():
|
||||
for device in platform.get_devices():
|
||||
devs[i] = device
|
||||
i += 1
|
||||
except:
|
||||
logger.warning('No OpenCL-capable platforms detected!\n' + ocl_reqs)
|
||||
|
||||
else:
|
||||
logger.warning('pyopencl not detected!\n' + ocl_reqs)
|
||||
|
||||
return devs
|
||||
|
||||
|
||||
def print_opencl_info(devs):
|
||||
""""Print info about detected OpenCL-capable device(s).
|
||||
|
||||
Args:
|
||||
devs: dict of detected pyopencl device object(s) where where device ID(s)
|
||||
are keys.
|
||||
"""""
|
||||
|
||||
import pyopencl as cl
|
||||
|
||||
logger.basic('|--->OpenCL:')
|
||||
logger.debug(f'PyOpenCL: {cl.VERSION_TEXT}')
|
||||
|
||||
for i, (ID, dev) in enumerate(devs.items()):
|
||||
if i == 0:
|
||||
platform = dev.platform.name
|
||||
logger.basic(f' |--->Platform: {platform}')
|
||||
if not platform == dev.platform.name:
|
||||
logger.basic(f' |--->Platform: {dev.platform.name}')
|
||||
types = cl.device_type.to_string(dev.type)
|
||||
if 'CPU' in types:
|
||||
type = 'CPU'
|
||||
if 'GPU' in types:
|
||||
type = 'GPU'
|
||||
logger.basic(f" |--->Device {ID}: {type} | {' '.join(dev.name.split())} | "
|
||||
f"{human_size(dev.global_mem_size, a_kilobyte_is_1024_bytes=True)}")
|
||||
|
4
setup.py
4
setup.py
@@ -50,7 +50,7 @@ def build_dispersive_material_templates():
|
||||
|
||||
env = Environment(loader = FileSystemLoader(os.path.join('gprMax', 'cython')), )
|
||||
|
||||
template = env.get_template('fields_updates_dispersive_template')
|
||||
template = env.get_template('fields_updates_dispersive_template.jinja')
|
||||
|
||||
# Render dispersive template for different types
|
||||
r = template.render(
|
||||
@@ -159,7 +159,7 @@ if 'cleanall' in sys.argv:
|
||||
shutil.rmtree(p, ignore_errors=True)
|
||||
print(f'Removed: {p}')
|
||||
|
||||
# Remove 'gprMax/cython/fields_updates_dispersive.pyx' if its there
|
||||
# Remove 'gprMax/cython/fields_updates_dispersive.jinja' if its there
|
||||
if os.path.isfile(cython_disp_file):
|
||||
os.remove(cython_disp_file)
|
||||
|
||||
|
50
tools/get_host_spec.py
普通文件
50
tools/get_host_spec.py
普通文件
@@ -0,0 +1,50 @@
|
||||
# Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom
|
||||
# Authors: Craig Warren, Antonis Giannopoulos, and John Hartley
|
||||
#
|
||||
# This file is part of gprMax.
|
||||
#
|
||||
# gprMax is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# gprMax is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with gprMax. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
from gprMax.utilities.host_info import (detect_cuda_gpus, detect_opencl,
|
||||
get_host_info, print_cuda_info,
|
||||
print_opencl_info)
|
||||
from gprMax.utilities.utilities import get_terminal_width, human_size
|
||||
|
||||
# Host machine info.
|
||||
hostinfo = get_host_info()
|
||||
hyperthreadingstr = f", {hostinfo['logicalcores']} cores with Hyper-Threading" if hostinfo['hyperthreading'] else ''
|
||||
hostname = (f"\n=== {hostinfo['hostname']}")
|
||||
print(f"{hostname} {'=' * (get_terminal_width() - len(hostname) - 1)}")
|
||||
print(f"\n{'Mfr/model:':<12} {hostinfo['machineID']}")
|
||||
print(f"{'CPU:':<12} {hostinfo['sockets']} x {hostinfo['cpuID']} ({hostinfo['physicalcores']} cores{hyperthreadingstr})")
|
||||
print(f"{'RAM:':<12} {human_size(hostinfo['ram'], a_kilobyte_is_1024_bytes=True)}")
|
||||
print(f"{'OS/Version:':<12} {hostinfo['osversion']}")
|
||||
|
||||
# OpenMP
|
||||
print("\n\n=== OpenMP capabilities (gprMax will not use Hyper-Threading with OpenMP as there is no performance advantage)\n")
|
||||
print(f"{'OpenMP threads: '} {hostinfo['physicalcores']}")
|
||||
|
||||
# CUDA
|
||||
print("\n\n=== CUDA capabilities\n")
|
||||
gpus = detect_cuda_gpus()
|
||||
if gpus:
|
||||
print_cuda_info(gpus)
|
||||
|
||||
# OpenCL
|
||||
print("\n\n=== OpenCL capabilities\n")
|
||||
devs = detect_opencl()
|
||||
if devs:
|
||||
print_opencl_info(devs)
|
||||
|
||||
print(f"\n{'=' * (get_terminal_width() - 1)}\n")
|
在新工单中引用
屏蔽一个用户