Restored functionality of CUDA and OpenCL solvers

这个提交包含在:
Craig Warren
2022-11-02 21:16:39 +00:00
父节点 cf4fa037ec
当前提交 279f5ae59a
共有 42 个文件被更改,包括 2397 次插入10409 次删除

查看文件

@@ -1,982 +0,0 @@
# Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom
# Authors: Craig Warren, Antonis Giannopoulos, and John Hartley
#
# This file is part of gprMax.
#
# gprMax is free software: you can redistribute it and/or modify
# it under the terms of the GNU GenRAl Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# gprMax is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU GenRAl Public License for more details.
#
# You should have received a copy of the GNU GenRAl Public License
# along with gprMax. If not, see <http://www.gnu.org/licenses/>.
from string import Template
kernels_template_pml_electric_HORIPML = Template("""
// Macros for converting subscripts to linear index:
#define INDEX2D_R(m, n) (m)*(NY_R)+(n)
#define INDEX2D_MAT(m, n) (m)*($NY_MATCOEFFS)+(n)
#define INDEX3D_FIELDS(i, j, k) (i)*($NY_FIELDS)*($NZ_FIELDS)+(j)*($NZ_FIELDS)+(k)
#define INDEX4D_ID(p, i, j, k) (p)*($NX_ID)*($NY_ID)*($NZ_ID)+(i)*($NY_ID)*($NZ_ID)+(j)*($NZ_ID)+(k)
#define INDEX4D_PHI1(p, i, j, k) (p)*(NX_PHI1)*(NY_PHI1)*(NZ_PHI1)+(i)*(NY_PHI1)*(NZ_PHI1)+(j)*(NZ_PHI1)+(k)
#define INDEX4D_PHI2(p, i, j, k) (p)*(NX_PHI2)*(NY_PHI2)*(NZ_PHI2)+(i)*(NY_PHI2)*(NZ_PHI2)+(j)*(NZ_PHI2)+(k)
// Material coefficients (read-only) in constant memory (64KB)
__device__ __constant__ $REAL updatecoeffsE[$N_updatecoeffsE];
__global__ void order1_xminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, $REAL *Ey, $REAL *Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) {
// This function updates the Ey and Ez field components for the xminus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current thread
int idx = blockIdx.x * blockDim.x + threadIdx.x;
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
$REAL RA01, RB0, RE0, RF0, dHy, dHz;
$REAL dx = d;
int ii, jj, kk, materialEy, materialEz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = xf - i1;
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,i1)] - 1;
RB0 = RB[INDEX2D_R(0,i1)];
RE0 = RE[INDEX2D_R(0,i1)];
RF0 = RF[INDEX2D_R(0,i1)];
// Ey
materialEy = ID[INDEX4D_ID(1,ii,jj,kk)];
dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx;
Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA01 * dHz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = xf - i2;
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,i2)] - 1;
RB0 = RB[INDEX2D_R(0,i2)];
RE0 = RE[INDEX2D_R(0,i2)];
RF0 = RF[INDEX2D_R(0,i2)];
// Ez
materialEz = ID[INDEX4D_ID(2,ii,jj,kk)];
dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx;
Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA01 * dHy + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHy;
}
}
__global__ void order2_xminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, $REAL *Ey, $REAL *Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) {
// This function updates the Ey and Ez field components for the xminus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current thread
int idx = blockIdx.x * blockDim.x + threadIdx.x;
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
$REAL RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dHy, dHz;
$REAL dx = d;
int ii, jj, kk, materialEy, materialEz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = xf - i1;
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,i1)];
RB0 = RB[INDEX2D_R(0,i1)];
RE0 = RE[INDEX2D_R(0,i1)];
RF0 = RF[INDEX2D_R(0,i1)];
RA1 = RA[INDEX2D_R(1,i1)];
RB1 = RB[INDEX2D_R(1,i1)];
RE1 = RE[INDEX2D_R(1,i1)];
RF1 = RF[INDEX2D_R(1,i1)];
RA01 = RA[INDEX2D_R(0,i1)] * RA[INDEX2D_R(1,i1)] - 1;
// Ey
materialEy = ID[INDEX4D_ID(1,ii,jj,kk)];
dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx;
Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA01 * dHz + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dHz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = xf - i2;
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,i2)];
RB0 = RB[INDEX2D_R(0,i2)];
RE0 = RE[INDEX2D_R(0,i2)];
RF0 = RF[INDEX2D_R(0,i2)];
RA1 = RA[INDEX2D_R(1,i2)];
RB1 = RB[INDEX2D_R(1,i2)];
RE1 = RE[INDEX2D_R(1,i2)];
RF1 = RF[INDEX2D_R(1,i2)];
RA01 = RA[INDEX2D_R(0,i2)] * RA[INDEX2D_R(1,i2)] - 1;
// Ez
materialEz = ID[INDEX4D_ID(2,ii,jj,kk)];
dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx;
Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA01 * dHy + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dHy + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHy;
}
}
__global__ void order1_xplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, $REAL *Ey, $REAL *Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) {
// This function updates the Ey and Ez field components for the xplus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current thread
int idx = blockIdx.x * blockDim.x + threadIdx.x;
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
$REAL RA01, RB0, RE0, RF0, dHy, dHz;
$REAL dx = d;
int ii, jj, kk, materialEy, materialEz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,i1)] - 1;
RB0 = RB[INDEX2D_R(0,i1)];
RE0 = RE[INDEX2D_R(0,i1)];
RF0 = RF[INDEX2D_R(0,i1)];
// Ey
materialEy = ID[INDEX4D_ID(1,ii,jj,kk)];
dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx;
Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA01 * dHz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,i2)] - 1;
RB0 = RB[INDEX2D_R(0,i2)];
RE0 = RE[INDEX2D_R(0,i2)];
RF0 = RF[INDEX2D_R(0,i2)];
// Ez
materialEz = ID[INDEX4D_ID(2,ii,jj,kk)];
dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx;
Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA01 * dHy + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHy;
}
}
__global__ void order2_xplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, $REAL *Ey, $REAL *Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) {
// This function updates the Ey and Ez field components for the xplus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current thread
int idx = blockIdx.x * blockDim.x + threadIdx.x;
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
$REAL RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dHy, dHz;
$REAL dx = d;
int ii, jj, kk, materialEy, materialEz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,i1)];
RB0 = RB[INDEX2D_R(0,i1)];
RE0 = RE[INDEX2D_R(0,i1)];
RF0 = RF[INDEX2D_R(0,i1)];
RA1 = RA[INDEX2D_R(1,i1)];
RB1 = RB[INDEX2D_R(1,i1)];
RE1 = RE[INDEX2D_R(1,i1)];
RF1 = RF[INDEX2D_R(1,i1)];
RA01 = RA[INDEX2D_R(0,i1)] * RA[INDEX2D_R(1,i1)] - 1;
// Ey
materialEy = ID[INDEX4D_ID(1,ii,jj,kk)];
dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx;
Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA01 * dHz + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dHz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,i2)];
RB0 = RB[INDEX2D_R(0,i2)];
RE0 = RE[INDEX2D_R(0,i2)];
RF0 = RF[INDEX2D_R(0,i2)];
RA1 = RA[INDEX2D_R(1,i2)];
RB1 = RB[INDEX2D_R(1,i2)];
RE1 = RE[INDEX2D_R(1,i2)];
RF1 = RF[INDEX2D_R(1,i2)];
RA01 = RA[INDEX2D_R(0,i2)] * RA[INDEX2D_R(1,i2)] - 1;
// Ez
materialEz = ID[INDEX4D_ID(2,ii,jj,kk)];
dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx;
Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA01 * dHy + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dHy + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHy;
}
}
__global__ void order1_yminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, const unsigned int* __restrict__ ID, $REAL *Ex, const $REAL* __restrict__ Ey, $REAL *Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) {
// This function updates the Ex and Ez field components for the yminus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current thread
int idx = blockIdx.x * blockDim.x + threadIdx.x;
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
$REAL RA01, RB0, RE0, RF0, dHx, dHz;
$REAL dy = d;
int ii, jj, kk, materialEx, materialEz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = yf - j1;
kk = k1 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,j1)] - 1;
RB0 = RB[INDEX2D_R(0,j1)];
RE0 = RE[INDEX2D_R(0,j1)];
RF0 = RF[INDEX2D_R(0,j1)];
// Ex
materialEx = ID[INDEX4D_ID(0,ii,jj,kk)];
dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy;
Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA01 * dHz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = yf - j2;
kk = k2 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,j2)] - 1;
RB0 = RB[INDEX2D_R(0,j2)];
RE0 = RE[INDEX2D_R(0,j2)];
RF0 = RF[INDEX2D_R(0,j2)];
// Ez
materialEz = ID[INDEX4D_ID(2,ii,jj,kk)];
dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy;
Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA01 * dHx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx;
}
}
__global__ void order2_yminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, const unsigned int* __restrict__ ID, $REAL *Ex, const $REAL* __restrict__ Ey, $REAL *Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) {
// This function updates the Ex and Ez field components for the yminus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current thread
int idx = blockIdx.x * blockDim.x + threadIdx.x;
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
$REAL RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dHx, dHz;
$REAL dy = d;
int ii, jj, kk, materialEx, materialEz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = yf - j1;
kk = k1 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,j1)];
RB0 = RB[INDEX2D_R(0,j1)];
RE0 = RE[INDEX2D_R(0,j1)];
RF0 = RF[INDEX2D_R(0,j1)];
RA1 = RA[INDEX2D_R(1,j1)];
RB1 = RB[INDEX2D_R(1,j1)];
RE1 = RE[INDEX2D_R(1,j1)];
RF1 = RF[INDEX2D_R(1,j1)];
RA01 = RA[INDEX2D_R(0,j1)] * RA[INDEX2D_R(1,j1)] - 1;
// Ex
materialEx = ID[INDEX4D_ID(0,ii,jj,kk)];
dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy;
Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA01 * dHz + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dHz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = yf - j2;
kk = k2 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,j2)];
RB0 = RB[INDEX2D_R(0,j2)];
RE0 = RE[INDEX2D_R(0,j2)];
RF0 = RF[INDEX2D_R(0,j2)];
RA1 = RA[INDEX2D_R(1,j2)];
RB1 = RB[INDEX2D_R(1,j2)];
RE1 = RE[INDEX2D_R(1,j2)];
RF1 = RF[INDEX2D_R(1,j2)];
RA01 = RA[INDEX2D_R(0,j2)] * RA[INDEX2D_R(1,j2)] - 1;
// Ez
materialEz = ID[INDEX4D_ID(2,ii,jj,kk)];
dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy;
Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA01 * dHx + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dHx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx;
}
}
__global__ void order1_yplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, const unsigned int* __restrict__ ID, $REAL *Ex, const $REAL* __restrict__ Ey, $REAL *Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) {
// This function updates the Ex and Ez field components for the yplus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current thread
int idx = blockIdx.x * blockDim.x + threadIdx.x;
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
$REAL RA01, RB0, RE0, RF0, dHx, dHz;
$REAL dy = d;
int ii, jj, kk, materialEx, materialEz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,j1)] - 1;
RB0 = RB[INDEX2D_R(0,j1)];
RE0 = RE[INDEX2D_R(0,j1)];
RF0 = RF[INDEX2D_R(0,j1)];
// Ex
materialEx = ID[INDEX4D_ID(0,ii,jj,kk)];
dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy;
Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA01 * dHz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,j2)] - 1;
RB0 = RB[INDEX2D_R(0,j2)];
RE0 = RE[INDEX2D_R(0,j2)];
RF0 = RF[INDEX2D_R(0,j2)];
// Ez
materialEz = ID[INDEX4D_ID(2,ii,jj,kk)];
dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy;
Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA01 * dHx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx;
}
}
__global__ void order2_yplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, const unsigned int* __restrict__ ID, $REAL *Ex, const $REAL* __restrict__ Ey, $REAL *Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) {
// This function updates the Ex and Ez field components for the yplus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current thread
int idx = blockIdx.x * blockDim.x + threadIdx.x;
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
$REAL RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dHx, dHz;
$REAL dy = d;
int ii, jj, kk, materialEx, materialEz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,j1)];
RB0 = RB[INDEX2D_R(0,j1)];
RE0 = RE[INDEX2D_R(0,j1)];
RF0 = RF[INDEX2D_R(0,j1)];
RA1 = RA[INDEX2D_R(1,j1)];
RB1 = RB[INDEX2D_R(1,j1)];
RE1 = RE[INDEX2D_R(1,j1)];
RF1 = RF[INDEX2D_R(1,j1)];
RA01 = RA[INDEX2D_R(0,j1)] * RA[INDEX2D_R(1,j1)] - 1;
// Ex
materialEx = ID[INDEX4D_ID(0,ii,jj,kk)];
dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy;
Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA01 * dHz + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dHz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,j2)];
RB0 = RB[INDEX2D_R(0,j2)];
RE0 = RE[INDEX2D_R(0,j2)];
RF0 = RF[INDEX2D_R(0,j2)];
RA1 = RA[INDEX2D_R(1,j2)];
RB1 = RB[INDEX2D_R(1,j2)];
RE1 = RE[INDEX2D_R(1,j2)];
RF1 = RF[INDEX2D_R(1,j2)];
RA01 = RA[INDEX2D_R(0,j2)] * RA[INDEX2D_R(1,j2)] - 1;
// Ez
materialEz = ID[INDEX4D_ID(2,ii,jj,kk)];
dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy;
Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA01 * dHx + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dHx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx;
}
}
__global__ void order1_zminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, const unsigned int* __restrict__ ID, $REAL *Ex, $REAL *Ey, const $REAL* __restrict__ Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) {
// This function updates the Ex and Ey field components for the zminus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current thread
int idx = blockIdx.x * blockDim.x + threadIdx.x;
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
$REAL RA01, RB0, RE0, RF0, dHx, dHy;
$REAL dz = d;
int ii, jj, kk, materialEx, materialEy;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = zf - k1;
// PML coefficients
RA01 = RA[INDEX2D_R(0,k1)] - 1;
RB0 = RB[INDEX2D_R(0,k1)];
RE0 = RE[INDEX2D_R(0,k1)];
RF0 = RF[INDEX2D_R(0,k1)];
// Ex
materialEx = ID[INDEX4D_ID(0,ii,jj,kk)];
dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz;
Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA01 * dHy + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHy;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = zf - k2;
// PML coefficients
RA01 = RA[INDEX2D_R(0,k2)] - 1;
RB0 = RB[INDEX2D_R(0,k2)];
RE0 = RE[INDEX2D_R(0,k2)];
RF0 = RF[INDEX2D_R(0,k2)];
// Ey
materialEy = ID[INDEX4D_ID(1,ii,jj,kk)];
dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz;
Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA01 * dHx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx;
}
}
__global__ void order2_zminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, const unsigned int* __restrict__ ID, $REAL *Ex, $REAL *Ey, const $REAL* __restrict__ Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) {
// This function updates the Ex and Ey field components for the zminus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current thread
int idx = blockIdx.x * blockDim.x + threadIdx.x;
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
$REAL RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dHx, dHy;
$REAL dz = d;
int ii, jj, kk, materialEx, materialEy;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = zf - k1;
// PML coefficients
RA0 = RA[INDEX2D_R(0,k1)];
RB0 = RB[INDEX2D_R(0,k1)];
RE0 = RE[INDEX2D_R(0,k1)];
RF0 = RF[INDEX2D_R(0,k1)];
RA1 = RA[INDEX2D_R(1,k1)];
RB1 = RB[INDEX2D_R(1,k1)];
RE1 = RE[INDEX2D_R(1,k1)];
RF1 = RF[INDEX2D_R(1,k1)];
RA01 = RA[INDEX2D_R(0,k1)] * RA[INDEX2D_R(1,k1)] - 1;
// Ex
materialEx = ID[INDEX4D_ID(0,ii,jj,kk)];
dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz;
Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA01 * dHy + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dHy + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHy;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = zf - k2;
// PML coefficients
RA0 = RA[INDEX2D_R(0,k2)];
RB0 = RB[INDEX2D_R(0,k2)];
RE0 = RE[INDEX2D_R(0,k2)];
RF0 = RF[INDEX2D_R(0,k2)];
RA1 = RA[INDEX2D_R(1,k2)];
RB1 = RB[INDEX2D_R(1,k2)];
RE1 = RE[INDEX2D_R(1,k2)];
RF1 = RF[INDEX2D_R(1,k2)];
RA01 = RA[INDEX2D_R(0,k2)] * RA[INDEX2D_R(1,k2)] - 1;
// Ey
materialEy = ID[INDEX4D_ID(1,ii,jj,kk)];
dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz;
Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA01 * dHx + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dHx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx;
}
}
__global__ void order1_zplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, const unsigned int* __restrict__ ID, $REAL *Ex, $REAL *Ey, const $REAL* __restrict__ Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) {
// This function updates the Ex and Ey field components for the zplus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current thread
int idx = blockIdx.x * blockDim.x + threadIdx.x;
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
$REAL RA01, RB0, RE0, RF0, dHx, dHy;
$REAL dz = d;
int ii, jj, kk, materialEx, materialEy;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,k1)] - 1;
RB0 = RB[INDEX2D_R(0,k1)];
RE0 = RE[INDEX2D_R(0,k1)];
RF0 = RF[INDEX2D_R(0,k1)];
// Ex
materialEx = ID[INDEX4D_ID(0,ii,jj,kk)];
dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz;
Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA01 * dHy + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHy;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,k2)] - 1;
RB0 = RB[INDEX2D_R(0,k2)];
RE0 = RE[INDEX2D_R(0,k2)];
RF0 = RF[INDEX2D_R(0,k2)];
// Ey
materialEy = ID[INDEX4D_ID(1,ii,jj,kk)];
dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz;
Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA01 * dHx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx;
}
}
__global__ void order2_zplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, const unsigned int* __restrict__ ID, $REAL *Ex, $REAL *Ey, const $REAL* __restrict__ Ez, const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) {
// This function updates the Ex and Ey field components for the zplus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current thread
int idx = blockIdx.x * blockDim.x + threadIdx.x;
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
$REAL RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dHx, dHy;
$REAL dz = d;
int ii, jj, kk, materialEx, materialEy;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,k1)];
RB0 = RB[INDEX2D_R(0,k1)];
RE0 = RE[INDEX2D_R(0,k1)];
RF0 = RF[INDEX2D_R(0,k1)];
RA1 = RA[INDEX2D_R(1,k1)];
RB1 = RB[INDEX2D_R(1,k1)];
RE1 = RE[INDEX2D_R(1,k1)];
RF1 = RF[INDEX2D_R(1,k1)];
RA01 = RA[INDEX2D_R(0,k1)] * RA[INDEX2D_R(1,k1)] - 1;
// Ex
materialEx = ID[INDEX4D_ID(0,ii,jj,kk)];
dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz;
Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA01 * dHy + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dHy + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHy;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,k2)];
RB0 = RB[INDEX2D_R(0,k2)];
RE0 = RE[INDEX2D_R(0,k2)];
RF0 = RF[INDEX2D_R(0,k2)];
RA1 = RA[INDEX2D_R(1,k2)];
RB1 = RB[INDEX2D_R(1,k2)];
RE1 = RE[INDEX2D_R(1,k2)];
RF1 = RF[INDEX2D_R(1,k2)];
RA01 = RA[INDEX2D_R(0,k2)] * RA[INDEX2D_R(1,k2)] - 1;
// Ey
materialEy = ID[INDEX4D_ID(1,ii,jj,kk)];
dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz;
Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA01 * dHx + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dHx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx;
}
}
""")

文件差异内容过多而无法显示 加载差异

查看文件

@@ -1,982 +0,0 @@
# Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom
# Authors: Craig Warren, Antonis Giannopoulos, and John Hartley
#
# This file is part of gprMax.
#
# gprMax is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# gprMax is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with gprMax. If not, see <http://www.gnu.org/licenses/>.
from string import Template
kernels_template_pml_magnetic_HORIPML = Template("""
// Macros for converting subscripts to linear index:
#define INDEX2D_R(m, n) (m)*(NY_R)+(n)
#define INDEX2D_MAT(m, n) (m)*($NY_MATCOEFFS)+(n)
#define INDEX3D_FIELDS(i, j, k) (i)*($NY_FIELDS)*($NZ_FIELDS)+(j)*($NZ_FIELDS)+(k)
#define INDEX4D_ID(p, i, j, k) (p)*($NX_ID)*($NY_ID)*($NZ_ID)+(i)*($NY_ID)*($NZ_ID)+(j)*($NZ_ID)+(k)
#define INDEX4D_PHI1(p, i, j, k) (p)*(NX_PHI1)*(NY_PHI1)*(NZ_PHI1)+(i)*(NY_PHI1)*(NZ_PHI1)+(j)*(NZ_PHI1)+(k)
#define INDEX4D_PHI2(p, i, j, k) (p)*(NX_PHI2)*(NY_PHI2)*(NZ_PHI2)+(i)*(NY_PHI2)*(NZ_PHI2)+(j)*(NZ_PHI2)+(k)
// Material coefficients (read-only) in constant memory (64KB)
__device__ __constant__ $REAL updatecoeffsH[$N_updatecoeffsH];
__global__ void order1_xminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez, const $REAL* __restrict__ Hx, $REAL *Hy, $REAL *Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) {
// This function updates the Hy and Hz field components for the xminus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current tREad
int idx = blockIdx.x * blockDim.x + threadIdx.x;
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
$REAL RA01, RB0, RE0, RF0, dEy, dEz;
$REAL dx = d;
int ii, jj, kk, materialHy, materialHz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = xf - (i1 + 1);
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,i1)] - 1;
RB0 = RB[INDEX2D_R(0,i1)];
RE0 = RE[INDEX2D_R(0,i1)];
RF0 = RF[INDEX2D_R(0,i1)];
// Hy
materialHy = ID[INDEX4D_ID(4,ii,jj,kk)];
dEz = (Ez[INDEX3D_FIELDS(ii+1,jj,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dx;
Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA01 * dEz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = xf - (i2 + 1);
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,i2)] - 1;
RB0 = RB[INDEX2D_R(0,i2)];
RE0 = RE[INDEX2D_R(0,i2)];
RF0 = RF[INDEX2D_R(0,i2)];
// Hz
materialHz = ID[INDEX4D_ID(5,ii,jj,kk)];
dEy = (Ey[INDEX3D_FIELDS(ii+1,jj,kk)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dx;
Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA01 * dEy + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEy;
}
}
__global__ void order2_xminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez, const $REAL* __restrict__ Hx, $REAL *Hy, $REAL *Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) {
// This function updates the Hy and Hz field components for the xminus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current tREad
int idx = blockIdx.x * blockDim.x + threadIdx.x;
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
$REAL RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dEy, dEz;
$REAL dx = d;
int ii, jj, kk, materialHy, materialHz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = xf - (i1 + 1);
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,i1)];
RB0 = RB[INDEX2D_R(0,i1)];
RE0 = RE[INDEX2D_R(0,i1)];
RF0 = RF[INDEX2D_R(0,i1)];
RA1 = RA[INDEX2D_R(1,i1)];
RB1 = RB[INDEX2D_R(1,i1)];
RE1 = RE[INDEX2D_R(1,i1)];
RF1 = RF[INDEX2D_R(1,i1)];
RA01 = RA[INDEX2D_R(0,i1)] * RA[INDEX2D_R(1,i1)] - 1;
// Hy
materialHy = ID[INDEX4D_ID(4,ii,jj,kk)];
dEz = (Ez[INDEX3D_FIELDS(ii+1,jj,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dx;
Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA01 * dEz + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dEz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = xf - (i2 + 1);
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,i2)];
RB0 = RB[INDEX2D_R(0,i2)];
RE0 = RE[INDEX2D_R(0,i2)];
RF0 = RF[INDEX2D_R(0,i2)];
RA1 = RA[INDEX2D_R(1,i2)];
RB1 = RB[INDEX2D_R(1,i2)];
RE1 = RE[INDEX2D_R(1,i2)];
RF1 = RF[INDEX2D_R(1,i2)];
RA01 = RA[INDEX2D_R(0,i2)] * RA[INDEX2D_R(1,i2)] - 1;
// Hz
materialHz = ID[INDEX4D_ID(5,ii,jj,kk)];
dEy = (Ey[INDEX3D_FIELDS(ii+1,jj,kk)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dx;
Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA01 * dEy + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dEy + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEy;
}
}
__global__ void order1_xplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez, const $REAL* __restrict__ Hx, $REAL *Hy, $REAL *Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) {
// This function updates the Hy and Hz field components for the xplus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current tREad
int idx = blockIdx.x * blockDim.x + threadIdx.x;
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
$REAL RA01, RB0, RE0, RF0, dEy, dEz;
$REAL dx = d;
int ii, jj, kk, materialHy, materialHz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,i1)] - 1;
RB0 = RB[INDEX2D_R(0,i1)];
RE0 = RE[INDEX2D_R(0,i1)];
RF0 = RF[INDEX2D_R(0,i1)];
// Hy
materialHy = ID[INDEX4D_ID(4,ii,jj,kk)];
dEz = (Ez[INDEX3D_FIELDS(ii+1,jj,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dx;
Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA01 * dEz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,i2)] - 1;
RB0 = RB[INDEX2D_R(0,i2)];
RE0 = RE[INDEX2D_R(0,i2)];
RF0 = RF[INDEX2D_R(0,i2)];
// Hz
materialHz = ID[INDEX4D_ID(5,ii,jj,kk)];
dEy = (Ey[INDEX3D_FIELDS(ii+1,jj,kk)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dx;
Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA01 * dEy + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEy;
}
}
__global__ void order2_xplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez, const $REAL* __restrict__ Hx, $REAL *Hy, $REAL *Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) {
// This function updates the Hy and Hz field components for the xplus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current tREad
int idx = blockIdx.x * blockDim.x + threadIdx.x;
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
$REAL RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dEy, dEz;
$REAL dx = d;
int ii, jj, kk, materialHy, materialHz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,i1)];
RB0 = RB[INDEX2D_R(0,i1)];
RE0 = RE[INDEX2D_R(0,i1)];
RF0 = RF[INDEX2D_R(0,i1)];
RA1 = RA[INDEX2D_R(1,i1)];
RB1 = RB[INDEX2D_R(1,i1)];
RE1 = RE[INDEX2D_R(1,i1)];
RF1 = RF[INDEX2D_R(1,i1)];
RA01 = RA[INDEX2D_R(0,i1)] * RA[INDEX2D_R(1,i1)] - 1;
// Hy
materialHy = ID[INDEX4D_ID(4,ii,jj,kk)];
dEz = (Ez[INDEX3D_FIELDS(ii+1,jj,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dx;
Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA01 * dEz + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dEz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,i2)];
RB0 = RB[INDEX2D_R(0,i2)];
RE0 = RE[INDEX2D_R(0,i2)];
RF0 = RF[INDEX2D_R(0,i2)];
RA1 = RA[INDEX2D_R(1,i2)];
RB1 = RB[INDEX2D_R(1,i2)];
RE1 = RE[INDEX2D_R(1,i2)];
RF1 = RF[INDEX2D_R(1,i2)];
RA01 = RA[INDEX2D_R(0,i2)] * RA[INDEX2D_R(1,i2)] - 1;
// Hz
materialHz = ID[INDEX4D_ID(5,ii,jj,kk)];
dEy = (Ey[INDEX3D_FIELDS(ii+1,jj,kk)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dx;
Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA01 * dEy + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dEy + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEy;
}
}
__global__ void order1_yminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez, $REAL *Hx, const $REAL* __restrict__ Hy, $REAL *Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) {
// This function updates the Hx and Hz field components for the yminus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current tREad
int idx = blockIdx.x * blockDim.x + threadIdx.x;
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
$REAL RA01, RB0, RE0, RF0, dEx, dEz;
$REAL dy = d;
int ii, jj, kk, materialHx, materialHz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = yf - (j1 + 1);
kk = k1 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,j1)] - 1;
RB0 = RB[INDEX2D_R(0,j1)];
RE0 = RE[INDEX2D_R(0,j1)];
RF0 = RF[INDEX2D_R(0,j1)];
// Hx
materialHx = ID[INDEX4D_ID(3,ii,jj,kk)];
dEz = (Ez[INDEX3D_FIELDS(ii,jj+1,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dy;
Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA01 * dEz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = yf - (j2 + 1);
kk = k2 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,j2)] - 1;
RB0 = RB[INDEX2D_R(0,j2)];
RE0 = RE[INDEX2D_R(0,j2)];
RF0 = RF[INDEX2D_R(0,j2)];
// Hz
materialHz = ID[INDEX4D_ID(5,ii,jj,kk)];
dEx = (Ex[INDEX3D_FIELDS(ii,jj+1,kk)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dy;
Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA01 * dEx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx;
}
}
__global__ void order2_yminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez, $REAL *Hx, const $REAL* __restrict__ Hy, $REAL *Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) {
// This function updates the Hx and Hz field components for the yminus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current tREad
int idx = blockIdx.x * blockDim.x + threadIdx.x;
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
$REAL RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dEx, dEz;
$REAL dy = d;
int ii, jj, kk, materialHx, materialHz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = yf - (j1 + 1);
kk = k1 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,j1)];
RB0 = RB[INDEX2D_R(0,j1)];
RE0 = RE[INDEX2D_R(0,j1)];
RF0 = RF[INDEX2D_R(0,j1)];
RA1 = RA[INDEX2D_R(1,j1)];
RB1 = RB[INDEX2D_R(1,j1)];
RE1 = RE[INDEX2D_R(1,j1)];
RF1 = RF[INDEX2D_R(1,j1)];
RA01 = RA[INDEX2D_R(0,j1)] * RA[INDEX2D_R(1,j1)] - 1;
// Hx
materialHx = ID[INDEX4D_ID(3,ii,jj,kk)];
dEz = (Ez[INDEX3D_FIELDS(ii,jj+1,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dy;
Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA01 * dEz + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dEz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = yf - (j2 + 1);
kk = k2 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,j2)];
RB0 = RB[INDEX2D_R(0,j2)];
RE0 = RE[INDEX2D_R(0,j2)];
RF0 = RF[INDEX2D_R(0,j2)];
RA1 = RA[INDEX2D_R(1,j2)];
RB1 = RB[INDEX2D_R(1,j2)];
RE1 = RE[INDEX2D_R(1,j2)];
RF1 = RF[INDEX2D_R(1,j2)];
RA01 = RA[INDEX2D_R(0,j2)] * RA[INDEX2D_R(1,j2)] - 1;
// Hz
materialHz = ID[INDEX4D_ID(5,ii,jj,kk)];
dEx = (Ex[INDEX3D_FIELDS(ii,jj+1,kk)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dy;
Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA01 * dEx + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dEx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx;
}
}
__global__ void order1_yplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez, $REAL *Hx, const $REAL* __restrict__ Hy, $REAL *Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) {
// This function updates the Hx and Hz field components for the yplus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current tREad
int idx = blockIdx.x * blockDim.x + threadIdx.x;
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
$REAL RA01, RB0, RE0, RF0, dEx, dEz;
$REAL dy = d;
int ii, jj, kk, materialHx, materialHz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,j1)] - 1;
RB0 = RB[INDEX2D_R(0,j1)];
RE0 = RE[INDEX2D_R(0,j1)];
RF0 = RF[INDEX2D_R(0,j1)];
// Hx
materialHx = ID[INDEX4D_ID(3,ii,jj,kk)];
dEz = (Ez[INDEX3D_FIELDS(ii,jj+1,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dy;
Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA01 * dEz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,j2)] - 1;
RB0 = RB[INDEX2D_R(0,j2)];
RE0 = RE[INDEX2D_R(0,j2)];
RF0 = RF[INDEX2D_R(0,j2)];
// Hz
materialHz = ID[INDEX4D_ID(5,ii,jj,kk)];
dEx = (Ex[INDEX3D_FIELDS(ii,jj+1,kk)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dy;
Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA01 * dEx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx;
}
}
__global__ void order2_yplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez, $REAL *Hx, const $REAL* __restrict__ Hy, $REAL *Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) {
// This function updates the Hx and Hz field components for the yplus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current tREad
int idx = blockIdx.x * blockDim.x + threadIdx.x;
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
$REAL RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dEx, dEz;
$REAL dy = d;
int ii, jj, kk, materialHx, materialHz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,j1)];
RB0 = RB[INDEX2D_R(0,j1)];
RE0 = RE[INDEX2D_R(0,j1)];
RF0 = RF[INDEX2D_R(0,j1)];
RA1 = RA[INDEX2D_R(1,j1)];
RB1 = RB[INDEX2D_R(1,j1)];
RE1 = RE[INDEX2D_R(1,j1)];
RF1 = RF[INDEX2D_R(1,j1)];
RA01 = RA[INDEX2D_R(0,j1)] * RA[INDEX2D_R(1,j1)] - 1;
// Hx
materialHx = ID[INDEX4D_ID(3,ii,jj,kk)];
dEz = (Ez[INDEX3D_FIELDS(ii,jj+1,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dy;
Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA01 * dEz + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dEz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,j2)];
RB0 = RB[INDEX2D_R(0,j2)];
RE0 = RE[INDEX2D_R(0,j2)];
RF0 = RF[INDEX2D_R(0,j2)];
RA1 = RA[INDEX2D_R(1,j2)];
RB1 = RB[INDEX2D_R(1,j2)];
RE1 = RE[INDEX2D_R(1,j2)];
RF1 = RF[INDEX2D_R(1,j2)];
RA01 = RA[INDEX2D_R(0,j2)] * RA[INDEX2D_R(1,j2)] - 1;
// Hz
materialHz = ID[INDEX4D_ID(5,ii,jj,kk)];
dEx = (Ex[INDEX3D_FIELDS(ii,jj+1,kk)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dy;
Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA01 * dEx + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dEx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx;
}
}
__global__ void order1_zminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez, $REAL *Hx, $REAL *Hy, const $REAL* __restrict__ Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) {
// This function updates the Hx and Hy field components for the zminus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current tREad
int idx = blockIdx.x * blockDim.x + threadIdx.x;
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
$REAL RA01, RB0, RE0, RF0, dEx, dEy;
$REAL dz = d;
int ii, jj, kk, materialHx, materialHy;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = zf - (k1 + 1);
// PML coefficients
RA01 = RA[INDEX2D_R(0,k1)] - 1;
RB0 = RB[INDEX2D_R(0,k1)];
RE0 = RE[INDEX2D_R(0,k1)];
RF0 = RF[INDEX2D_R(0,k1)];
// Hx
materialHx = ID[INDEX4D_ID(3,ii,jj,kk)];
dEy = (Ey[INDEX3D_FIELDS(ii,jj,kk+1)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dz;
Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA01 * dEy + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEy;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = zf - (k2 + 1);
// PML coefficients
RA01 = RA[INDEX2D_R(0,k2)] - 1;
RB0 = RB[INDEX2D_R(0,k2)];
RE0 = RE[INDEX2D_R(0,k2)];
RF0 = RF[INDEX2D_R(0,k2)];
// Hy
materialHy = ID[INDEX4D_ID(4,ii,jj,kk)];
dEx = (Ex[INDEX3D_FIELDS(ii,jj,kk+1)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dz;
Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA01 * dEx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx;
}
}
__global__ void order2_zminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez, $REAL *Hx, $REAL *Hy, const $REAL* __restrict__ Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) {
// This function updates the Hx and Hy field components for the zminus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current tREad
int idx = blockIdx.x * blockDim.x + threadIdx.x;
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
$REAL RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dEx, dEy;
$REAL dz = d;
int ii, jj, kk, materialHx, materialHy;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = zf - (k1 + 1);
// PML coefficients
RA0 = RA[INDEX2D_R(0,k1)];
RB0 = RB[INDEX2D_R(0,k1)];
RE0 = RE[INDEX2D_R(0,k1)];
RF0 = RF[INDEX2D_R(0,k1)];
RA1 = RA[INDEX2D_R(1,k1)];
RB1 = RB[INDEX2D_R(1,k1)];
RE1 = RE[INDEX2D_R(1,k1)];
RF1 = RF[INDEX2D_R(1,k1)];
RA01 = RA[INDEX2D_R(0,k1)] * RA[INDEX2D_R(1,k1)] - 1;
// Hx
materialHx = ID[INDEX4D_ID(3,ii,jj,kk)];
dEy = (Ey[INDEX3D_FIELDS(ii,jj,kk+1)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dz;
Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA01 * dEy + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dEy + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEy;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = zf - (k2 + 1);
// PML coefficients
RA0 = RA[INDEX2D_R(0,k2)];
RB0 = RB[INDEX2D_R(0,k2)];
RE0 = RE[INDEX2D_R(0,k2)];
RF0 = RF[INDEX2D_R(0,k2)];
RA1 = RA[INDEX2D_R(1,k2)];
RB1 = RB[INDEX2D_R(1,k2)];
RE1 = RE[INDEX2D_R(1,k2)];
RF1 = RF[INDEX2D_R(1,k2)];
RA01 = RA[INDEX2D_R(0,k2)] * RA[INDEX2D_R(1,k2)] - 1;
// Hy
materialHy = ID[INDEX4D_ID(4,ii,jj,kk)];
dEx = (Ex[INDEX3D_FIELDS(ii,jj,kk+1)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dz;
Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA01 * dEx + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dEx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx;
}
}
__global__ void order1_zplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez, $REAL *Hx, $REAL *Hy, const $REAL* __restrict__ Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) {
// This function updates the Hx and Hy field components for the zplus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current tREad
int idx = blockIdx.x * blockDim.x + threadIdx.x;
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
$REAL RA01, RB0, RE0, RF0, dEx, dEy;
$REAL dz = d;
int ii, jj, kk, materialHx, materialHy;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,k1)] - 1;
RB0 = RB[INDEX2D_R(0,k1)];
RE0 = RE[INDEX2D_R(0,k1)];
RF0 = RF[INDEX2D_R(0,k1)];
// Hx
materialHx = ID[INDEX4D_ID(3,ii,jj,kk)];
dEy = (Ey[INDEX3D_FIELDS(ii,jj,kk+1)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dz;
Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA01 * dEy + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEy;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,k2)] - 1;
RB0 = RB[INDEX2D_R(0,k2)];
RE0 = RE[INDEX2D_R(0,k2)];
RF0 = RF[INDEX2D_R(0,k2)];
// Hy
materialHy = ID[INDEX4D_ID(4,ii,jj,kk)];
dEx = (Ex[INDEX3D_FIELDS(ii,jj,kk+1)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dz;
Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA01 * dEx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx;
}
}
__global__ void order2_zplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, const unsigned int* __restrict__ ID, const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez, $REAL *Hx, $REAL *Hy, const $REAL* __restrict__ Hz, $REAL *PHI1, $REAL *PHI2, const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d) {
// This function updates the Hx and Hy field components for the zplus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current tREad
int idx = blockIdx.x * blockDim.x + threadIdx.x;
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
$REAL RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dEx, dEy;
$REAL dz = d;
int ii, jj, kk, materialHx, materialHy;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,k1)];
RB0 = RB[INDEX2D_R(0,k1)];
RE0 = RE[INDEX2D_R(0,k1)];
RF0 = RF[INDEX2D_R(0,k1)];
RA1 = RA[INDEX2D_R(1,k1)];
RB1 = RB[INDEX2D_R(1,k1)];
RE1 = RE[INDEX2D_R(1,k1)];
RF1 = RF[INDEX2D_R(1,k1)];
RA01 = RA[INDEX2D_R(0,k1)] * RA[INDEX2D_R(1,k1)] - 1;
// Hx
materialHx = ID[INDEX4D_ID(3,ii,jj,kk)];
dEy = (Ey[INDEX3D_FIELDS(ii,jj,kk+1)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dz;
Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA01 * dEy + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dEy + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEy;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,k2)];
RB0 = RB[INDEX2D_R(0,k2)];
RE0 = RE[INDEX2D_R(0,k2)];
RF0 = RF[INDEX2D_R(0,k2)];
RA1 = RA[INDEX2D_R(1,k2)];
RB1 = RB[INDEX2D_R(1,k2)];
RE1 = RE[INDEX2D_R(1,k2)];
RF1 = RF[INDEX2D_R(1,k2)];
RA01 = RA[INDEX2D_R(0,k2)] * RA[INDEX2D_R(1,k2)] - 1;
// Hy
materialHy = ID[INDEX4D_ID(4,ii,jj,kk)];
dEx = (Ex[INDEX3D_FIELDS(ii,jj,kk+1)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dz;
Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA01 * dEx + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dEx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx;
}
}
""")

文件差异内容过多而无法显示 加载差异

查看文件

@@ -1,79 +0,0 @@
# Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom
# Authors: Craig Warren, Antonis Giannopoulos, and John Hartley
#
# This file is part of gprMax.
#
# gprMax is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# gprMax is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with gprMax. If not, see <http://www.gnu.org/licenses/>.
from string import Template
knl_template_store_snapshot = Template("""
// Macros for converting subscripts to linear index:
#define INDEX3D_FIELDS(i, j, k) (i)*($NY_FIELDS)*($NZ_FIELDS)+(j)*($NZ_FIELDS)+(k)
#define INDEX4D_SNAPS(p, i, j, k) (p)*($NX_SNAPS)*($NY_SNAPS)*($NZ_SNAPS)+(i)*($NY_SNAPS)*($NZ_SNAPS)+(j)*($NZ_SNAPS)+(k)
////////////////////
// Store snapshot //
////////////////////
__global__ void store_snapshot(int p, int xs, int xf, int ys, int yf, int zs, int zf, int dx, int dy, int dz,
const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey,
const $REAL* __restrict__ Ez, const $REAL* __restrict__ Hx,
const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz,
$REAL *snapEx, $REAL *snapEy, $REAL *snapEz,
$REAL *snapHx, $REAL *snapHy, $REAL *snapHz) {
// This function stores field values for a snapshot.
//
// Args:
// p: Snapshot number
// xs, xf, ys, yf, xs, xf: Start and finish cell coordinates for snapshot
// dx, dy, dz: Sampling interval in cell coordinates for snapshot
// E, H: Access to field component arrays
// snapEx, snapEy, snapEz, snapHx, snapHy, snapHz: Access to arrays to store snapshots
// Obtain the linear index corresponding to the current thread
int idx = blockIdx.x * blockDim.x + threadIdx.x;
// Convert the linear index to subscripts for 4D SNAPS array
int i = (idx % ($NX_SNAPS * $NY_SNAPS * $NZ_SNAPS)) / ($NY_SNAPS * $NZ_SNAPS);
int j = ((idx % ($NX_SNAPS * $NY_SNAPS * $NZ_SNAPS)) % ($NY_SNAPS * $NZ_SNAPS)) / $NZ_SNAPS;
int k = ((idx % ($NX_SNAPS * $NY_SNAPS * $NZ_SNAPS)) % ($NY_SNAPS * $NZ_SNAPS)) % $NZ_SNAPS;
// Subscripts for field arrays
int ii, jj, kk;
if (i >= xs && i < xf && j >= ys && j < yf && k >= zs && k < zf) {
// Increment subscripts for field array to account for spatial sampling of snapshot
ii = (xs + i) * dx;
jj = (ys + j) * dy;
kk = (zs + k) * dz;
// The electric field component value at a point comes from an average of
// the 4 electric field component values in that cell
snapEx[INDEX4D_SNAPS(p,i,j,k)] = (Ex[INDEX3D_FIELDS(ii,jj,kk)] + Ex[INDEX3D_FIELDS(ii,jj+1,kk)] + Ex[INDEX3D_FIELDS(ii,jj,kk+1)] + Ex[INDEX3D_FIELDS(ii,jj+1,kk+1)]) / 4;
snapEy[INDEX4D_SNAPS(p,i,j,k)] = (Ey[INDEX3D_FIELDS(ii,jj,kk)] + Ey[INDEX3D_FIELDS(ii+1,jj,kk)] + Ey[INDEX3D_FIELDS(ii,jj,kk+1)] + Ey[INDEX3D_FIELDS(ii+1,jj,kk+1)]) / 4;
snapEz[INDEX4D_SNAPS(p,i,j,k)] = (Ez[INDEX3D_FIELDS(ii,jj,kk)] + Ez[INDEX3D_FIELDS(ii+1,jj,kk)] + Ez[INDEX3D_FIELDS(ii,jj+1,kk)] + Ez[INDEX3D_FIELDS(ii+1,jj+1,kk)]) / 4;
// The magnetic field component value at a point comes from average of
// 2 magnetic field component values in that cell and the following cell
snapHx[INDEX4D_SNAPS(p,i,j,k)] = (Hx[INDEX3D_FIELDS(ii,jj,kk)] + Hx[INDEX3D_FIELDS(ii+1,jj,kk)]) / 2;
snapHy[INDEX4D_SNAPS(p,i,j,k)] = (Hy[INDEX3D_FIELDS(ii,jj,kk)] + Hy[INDEX3D_FIELDS(ii,jj+1,kk)]) / 2;
snapHz[INDEX4D_SNAPS(p,i,j,k)] = (Hz[INDEX3D_FIELDS(ii,jj,kk)] + Hz[INDEX3D_FIELDS(ii,jj,kk+1)]) / 2;
}
}
""")

查看文件

@@ -1,203 +0,0 @@
# Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom
# Authors: Craig Warren, Antonis Giannopoulos, and John Hartley
#
# This file is part of gprMax.
#
# gprMax is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# gprMax is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with gprMax. If not, see <http://www.gnu.org/licenses/>.
from string import Template
knl_template_sources = Template("""
// Macros for converting subscripts to linear index:
#define INDEX2D_MAT(m, n) (m)*($NY_MATCOEFFS)+(n)
#define INDEX2D_SRCINFO(m, n) (m)*$NY_SRCINFO+(n)
#define INDEX2D_SRCWAVES(m, n) (m)*($NY_SRCWAVES)+(n)
#define INDEX3D_FIELDS(i, j, k) (i)*($NY_FIELDS)*($NZ_FIELDS)+(j)*($NZ_FIELDS)+(k)
#define INDEX4D_ID(p, i, j, k) (p)*($NX_ID)*($NY_ID)*($NZ_ID)+(i)*($NY_ID)*($NZ_ID)+(j)*($NZ_ID)+(k)
// Material coefficients (read-only) in constant memory (64KB)
__device__ __constant__ $REAL updatecoeffsE[$N_updatecoeffsE];
__device__ __constant__ $REAL updatecoeffsH[$N_updatecoeffsH];
///////////////////////////////////////////
// Hertzian dipole electric field update //
///////////////////////////////////////////
__global__ void update_hertzian_dipole(int NHERTZDIPOLE, int iteration, $REAL dx, $REAL dy, $REAL dz, const int* __restrict__ srcinfo1, const $REAL* __restrict__ srcinfo2, const $REAL* __restrict__ srcwaveforms, const unsigned int* __restrict__ ID, $REAL *Ex, $REAL *Ey, $REAL *Ez) {
// This function updates electric field values for Hertzian dipole sources.
//
// Args:
// NHERTZDIPOLE: Total number of Hertzian dipoles in the model
// iteration: Iteration number of simulation
// dx, dy, dz: Spatial discretisations
// srcinfo1: Source cell coordinates and polarisation information
// srcinfo2: Other source information, e.g. length, resistance etc...
// srcwaveforms: Source waveform values
// ID, E: Access to ID and field component arrays
// Obtain the linear index corresponding to the current thread and use for each receiver
int src = blockIdx.x * blockDim.x + threadIdx.x;
if (src < NHERTZDIPOLE) {
$REAL dl;
int i, j, k, polarisation;
i = srcinfo1[INDEX2D_SRCINFO(src,0)];
j = srcinfo1[INDEX2D_SRCINFO(src,1)];
k = srcinfo1[INDEX2D_SRCINFO(src,2)];
polarisation = srcinfo1[INDEX2D_SRCINFO(src,3)];
dl = srcinfo2[src];
// 'x' polarised source
if (polarisation == 0) {
int materialEx = ID[INDEX4D_ID(0,i,j,k)];
Ex[INDEX3D_FIELDS(i,j,k)] = Ex[INDEX3D_FIELDS(i,j,k)] - updatecoeffsE[INDEX2D_MAT(materialEx,4)] * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] * dl * (1 / (dx * dy * dz));
}
// 'y' polarised source
else if (polarisation == 1) {
int materialEy = ID[INDEX4D_ID(1,i,j,k)];
Ey[INDEX3D_FIELDS(i,j,k)] = Ey[INDEX3D_FIELDS(i,j,k)] - updatecoeffsE[INDEX2D_MAT(materialEy,4)] * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] * dl * (1 / (dx * dy * dz));
}
// 'z' polarised source
else if (polarisation == 2) {
int materialEz = ID[INDEX4D_ID(2,i,j,k)];
Ez[INDEX3D_FIELDS(i,j,k)] = Ez[INDEX3D_FIELDS(i,j,k)] - updatecoeffsE[INDEX2D_MAT(materialEz,4)] * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] * dl * (1 / (dx * dy * dz));
}
}
}
///////////////////////////////////////////
// Magnetic dipole magnetic field update //
///////////////////////////////////////////
__global__ void update_magnetic_dipole(int NMAGDIPOLE, int iteration, $REAL dx, $REAL dy, $REAL dz, const int* __restrict__ srcinfo1, const $REAL* __restrict__ srcinfo2, const $REAL* __restrict__ srcwaveforms, const unsigned int* __restrict__ ID, $REAL *Hx, $REAL *Hy, $REAL *Hz) {
// This function updates magnetic field values for magnetic dipole sources.
//
// Args:
// NMAGDIPOLE: Total number of magnetic dipoles in the model
// iteration: Iteration number of simulation
// dx, dy, dz: Spatial discretisations
// srcinfo1: Source cell coordinates and polarisation information
// srcinfo2: Other source information, e.g. length, resistance etc...
// srcwaveforms: Source waveform values
// ID, H: Access to ID and field component arrays
// Obtain the linear index corresponding to the current thread and use for each receiver
int src = blockIdx.x * blockDim.x + threadIdx.x;
if (src < NMAGDIPOLE) {
int i, j, k, polarisation;
i = srcinfo1[INDEX2D_SRCINFO(src,0)];
j = srcinfo1[INDEX2D_SRCINFO(src,1)];
k = srcinfo1[INDEX2D_SRCINFO(src,2)];
polarisation = srcinfo1[INDEX2D_SRCINFO(src,3)];
// 'x' polarised source
if (polarisation == 0) {
int materialHx = ID[INDEX4D_ID(3,i,j,k)];
Hx[INDEX3D_FIELDS(i,j,k)] = Hx[INDEX3D_FIELDS(i,j,k)] - updatecoeffsH[INDEX2D_MAT(materialHx,4)] * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] * (1 / (dx * dy * dz));
}
// 'y' polarised source
else if (polarisation == 1) {
int materialHy = ID[INDEX4D_ID(4,i,j,k)];
Hy[INDEX3D_FIELDS(i,j,k)] = Hy[INDEX3D_FIELDS(i,j,k)] - updatecoeffsH[INDEX2D_MAT(materialHy,4)] * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] * (1 / (dx * dy * dz));
}
// 'z' polarised source
else if (polarisation == 2) {
int materialHz = ID[INDEX4D_ID(5,i,j,k)];
Hz[INDEX3D_FIELDS(i,j,k)] = Hz[INDEX3D_FIELDS(i,j,k)] - updatecoeffsH[INDEX2D_MAT(materialHz,4)] * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] * (1 / (dx * dy * dz));
}
}
}
//////////////////////////////////////////
// Voltage source electric field update //
//////////////////////////////////////////
__global__ void update_voltage_source(int NVOLTSRC, int iteration, $REAL dx, $REAL dy, $REAL dz, const int* __restrict__ srcinfo1, const $REAL* __restrict__ srcinfo2, const $REAL* __restrict__ srcwaveforms, const unsigned int* __restrict__ ID, $REAL *Ex, $REAL *Ey, $REAL *Ez) {
// This function updates electric field values for voltage sources.
//
// Args:
// NVOLTSRC: Total number of voltage sources in the model
// iteration: Iteration number of simulation
// dx, dy, dz: Spatial discretisations
// srcinfo1: Source cell coordinates and polarisation information
// srcinfo2: Other source information, e.g. length, resistance etc...
// srcwaveforms: Source waveform values
// ID, E: Access to ID and field component arrays
// Obtain the linear index corresponding to the current thread and use for each receiver
int src = blockIdx.x * blockDim.x + threadIdx.x;
if (src < NVOLTSRC) {
$REAL resistance;
int i, j, k, polarisation;
i = srcinfo1[INDEX2D_SRCINFO(src,0)];
j = srcinfo1[INDEX2D_SRCINFO(src,1)];
k = srcinfo1[INDEX2D_SRCINFO(src,2)];
polarisation = srcinfo1[INDEX2D_SRCINFO(src,3)];
resistance = srcinfo2[src];
// 'x' polarised source
if (polarisation == 0) {
if (resistance != 0) {
int materialEx = ID[INDEX4D_ID(0,i,j,k)];
Ex[INDEX3D_FIELDS(i,j,k)] = Ex[INDEX3D_FIELDS(i,j,k)] - updatecoeffsE[INDEX2D_MAT(materialEx,4)] * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] * (1 / (resistance * dy * dz));
}
else {
Ex[INDEX3D_FIELDS(i,j,k)] = -1 * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] / dx;
}
}
// 'y' polarised source
else if (polarisation == 1) {
if (resistance != 0) {
int materialEy = ID[INDEX4D_ID(1,i,j,k)];
Ey[INDEX3D_FIELDS(i,j,k)] = Ey[INDEX3D_FIELDS(i,j,k)] - updatecoeffsE[INDEX2D_MAT(materialEy,4)] * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] * (1 / (resistance * dx * dz));
}
else {
Ey[INDEX3D_FIELDS(i,j,k)] = -1 * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] / dy;
}
}
// 'z' polarised source
else if (polarisation == 2) {
if (resistance != 0) {
int materialEz = ID[INDEX4D_ID(2,i,j,k)];
Ez[INDEX3D_FIELDS(i,j,k)] = Ez[INDEX3D_FIELDS(i,j,k)] - updatecoeffsE[INDEX2D_MAT(materialEz,4)] * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] * (1 / (resistance * dx * dy));
}
else {
Ez[INDEX3D_FIELDS(i,j,k)] = -1 * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] / dz;
}
}
}
}
""")

查看文件

@@ -1,276 +0,0 @@
// Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom
// Authors: Craig Warren, Antonis Giannopoulos, and John Hartley
//
// This file is part of gprMax.
//
// gprMax is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// gprMax is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with gprMax. If not, see <http://www.gnu.org/licenses/>.
{% block complex_header %}{% endblock complex_header %}
// Macros for converting subscripts to linear index:
#define IDX2D_MAT(m, n) (m)*({{NY_MATCOEFFS}})+(n)
#define IDX2D_MATDISP(m, n) (m)*({{NY_MATDISPCOEFFS}})+(n)
#define IDX3D_FIELDS(i, j, k) (i)*({{NY_FIELDS}})*({{NZ_FIELDS}})+(j)*({{NZ_FIELDS}})+(k)
#define IDX4D_ID(p, i, j, k) (p)*({{NX_ID}})*({{NY_ID}})*({{NZ_ID}})+(i)*({{NY_ID}})*({{NZ_ID}})+(j)*({{NZ_ID}})+(k)
#define IDX4D_T(p, i, j, k) (p)*({{NX_T}})*({{NY_T}})*({{NZ_T}})+(i)*({{NY_T}})*({{NZ_T}})+(j)*({{NZ_T}})+(k)
// Material coefficients (read-only) stored in constant memory of compute device
{% block constmem %}{% endblock constmem %}
///////////////////////////////////////////////
// Electric field updates - normal materials //
///////////////////////////////////////////////
{{KERNEL}} void update_electric(int NX,
int NY,
int NZ,{% filter indent(width=30) %}{% block electric_args %}{% endblock electric_args %}{% endfilter %}{
// This function updates electric field values.
//
// Args:
// NX, NY, NZ: Number of cells of the model domain
// ID, E, H: Access to ID and field component arrays
// Obtain the linear index corresponding to the current thread
{% block threadidx %}{% endblock threadidx %}
// Convert the linear index to subscripts for 3D field arrays
int i = idx / ({{NY_FIELDS}} * {{NZ_FIELDS}});
int j = (idx % ({{NY_FIELDS}} * {{NZ_FIELDS}})) / {{NZ_FIELDS}};
int k = (idx % ({{NY_FIELDS}} * {{NZ_FIELDS}})) % {{NZ_FIELDS}};
// Convert the linear index to subscripts for 4D material ID array
int i_ID = (idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) / ({{NY_ID}} * {{NZ_ID}});
int j_ID = ((idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) / {{NZ_ID}};
int k_ID = ((idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) % {{NZ_ID}};
// Ex component
if ((NY != 1 || NZ != 1) && i >= 0 && i < NX && j > 0 && j < NY && k > 0 && k < NZ) {
int materialEx = ID[IDX4D_ID(0,i_ID,j_ID,k_ID)];
Ex[IDX3D_FIELDS(i,j,k)] = updatecoeffsE[IDX2D_MAT(materialEx,0)] * Ex[IDX3D_FIELDS(i,j,k)] +
updatecoeffsE[IDX2D_MAT(materialEx,2)] * (Hz[IDX3D_FIELDS(i,j,k)] - Hz[IDX3D_FIELDS(i,j-1,k)]) -
updatecoeffsE[IDX2D_MAT(materialEx,3)] * (Hy[IDX3D_FIELDS(i,j,k)] - Hy[IDX3D_FIELDS(i,j,k-1)]);
}
// Ey component
if ((NX != 1 || NZ != 1) && i > 0 && i < NX && j >= 0 && j < NY && k > 0 && k < NZ) {
int materialEy = ID[IDX4D_ID(1,i_ID,j_ID,k_ID)];
Ey[IDX3D_FIELDS(i,j,k)] = updatecoeffsE[IDX2D_MAT(materialEy,0)] * Ey[IDX3D_FIELDS(i,j,k)] +
updatecoeffsE[IDX2D_MAT(materialEy,3)] * (Hx[IDX3D_FIELDS(i,j,k)] - Hx[IDX3D_FIELDS(i,j,k-1)]) -
updatecoeffsE[IDX2D_MAT(materialEy,1)] * (Hz[IDX3D_FIELDS(i,j,k)] - Hz[IDX3D_FIELDS(i-1,j,k)]);
}
// Ez component
if ((NX != 1 || NY != 1) && i > 0 && i < NX && j > 0 && j < NY && k >= 0 && k < NZ) {
int materialEz = ID[IDX4D_ID(2,i_ID,j_ID,k_ID)];
Ez[IDX3D_FIELDS(i,j,k)] = updatecoeffsE[IDX2D_MAT(materialEz,0)] * Ez[IDX3D_FIELDS(i,j,k)] +
updatecoeffsE[IDX2D_MAT(materialEz,1)] * (Hy[IDX3D_FIELDS(i,j,k)] - Hy[IDX3D_FIELDS(i-1,j,k)]) -
updatecoeffsE[IDX2D_MAT(materialEz,2)] * (Hx[IDX3D_FIELDS(i,j,k)] - Hx[IDX3D_FIELDS(i,j-1,k)]);
}
}
////////////////////////////
// Magnetic field updates //
////////////////////////////
{{KERNEL}} void update_magnetic(int NX,
int NY,
int NZ,{% filter indent(width=30) %}{% block magnetic_args %}{% endblock magnetic_args %}{% endfilter %}{
// This function updates magnetic field values.
//
// Args:
// NX, NY, NZ: Number of cells of the model domain
// ID, E, H: Access to ID and field component arrays
// Obtain the linear index corresponding to the current thread
{{self.threadidx()}}
// Convert the linear index to subscripts for 3D field arrays
int i = idx / ({{NY_FIELDS}} * {{NZ_FIELDS}});
int j = (idx % ({{NY_FIELDS}} * {{NZ_FIELDS}})) / {{NZ_FIELDS}};
int k = (idx % ({{NY_FIELDS}} * {{NZ_FIELDS}})) % {{NZ_FIELDS}};
// Convert the linear index to subscripts for 4D material ID array
int i_ID = (idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) / ({{NY_ID}} * {{NZ_ID}});
int j_ID = ((idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) / {{NZ_ID}};
int k_ID = ((idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) % {{NZ_ID}};
// Hx component
if (NX != 1 && i > 0 && i < NX && j >= 0 && j < NY && k >= 0 && k < NZ) {
int materialHx = ID[IDX4D_ID(3,i_ID,j_ID,k_ID)];
Hx[IDX3D_FIELDS(i,j,k)] = updatecoeffsH[IDX2D_MAT(materialHx,0)] * Hx[IDX3D_FIELDS(i,j,k)] -
updatecoeffsH[IDX2D_MAT(materialHx,2)] * (Ez[IDX3D_FIELDS(i,j+1,k)] - Ez[IDX3D_FIELDS(i,j,k)]) +
updatecoeffsH[IDX2D_MAT(materialHx,3)] * (Ey[IDX3D_FIELDS(i,j,k+1)] - Ey[IDX3D_FIELDS(i,j,k)]);
}
// Hy component
if (NY != 1 && i >= 0 && i < NX && j > 0 && j < NY && k >= 0 && k < NZ) {
int materialHy = ID[IDX4D_ID(4,i_ID,j_ID,k_ID)];
Hy[IDX3D_FIELDS(i,j,k)] = updatecoeffsH[IDX2D_MAT(materialHy,0)] * Hy[IDX3D_FIELDS(i,j,k)] -
updatecoeffsH[IDX2D_MAT(materialHy,3)] * (Ex[IDX3D_FIELDS(i,j,k+1)] - Ex[IDX3D_FIELDS(i,j,k)]) +
updatecoeffsH[IDX2D_MAT(materialHy,1)] * (Ez[IDX3D_FIELDS(i+1,j,k)] - Ez[IDX3D_FIELDS(i,j,k)]);
}
// Hz component
if (NZ != 1 && i >= 0 && i < NX && j >= 0 && j < NY && k > 0 && k < NZ) {
int materialHz = ID[IDX4D_ID(5,i_ID,j_ID,k_ID)];
Hz[IDX3D_FIELDS(i,j,k)] = updatecoeffsH[IDX2D_MAT(materialHz,0)] * Hz[IDX3D_FIELDS(i,j,k)] -
updatecoeffsH[IDX2D_MAT(materialHz,1)] * (Ey[IDX3D_FIELDS(i+1,j,k)] - Ey[IDX3D_FIELDS(i,j,k)]) +
updatecoeffsH[IDX2D_MAT(materialHz,2)] * (Ex[IDX3D_FIELDS(i,j+1,k)] - Ex[IDX3D_FIELDS(i,j,k)]);
}
}
///////////////////////////////////////////////////
// Electric field updates - dispersive materials //
///////////////////////////////////////////////////
{{KERNEL}} void update_electric_dispersive_A(int NX,
int NY,
int NZ,
int MAXPOLES,{% filter indent(width=43) %}{% block electric_dispersive_A_args %}{% endblock electric_dispersive_A_args %}{% endfilter %}{
// This function is part A of updates to electric field values when
// dispersive materials (with multiple poles) are present.
//
// Args:
// NX, NY, NZ: Number of cells of the model domain
// MAXPOLES: Maximum number of dispersive material poles present in model
// updatedispersivecoeffs, T, ID, E, H: Access to update coefficients, dispersive, ID and field component arrays
// Obtain the linear index corresponding to the current thread
{{self.threadidx()}}
// Convert the linear index to subscripts for 3D field arrays
int i = idx / ({{NY_FIELDS}} * {{NZ_FIELDS}});
int j = (idx % ({{NY_FIELDS}} * {{NZ_FIELDS}})) / {{NZ_FIELDS}};
int k = (idx % ({{NY_FIELDS}} * {{NZ_FIELDS}})) % {{NZ_FIELDS}};
// Convert the linear index to subscripts for 4D material ID array
int i_ID = (idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) / ({{NY_ID}} * {{NZ_ID}});
int j_ID = ((idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) / {{NZ_ID}};
int k_ID = ((idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) % {{NZ_ID}};
// Convert the linear index to subscripts for 4D dispersive array
int i_T = (idx % ({{NX_T}} * {{NY_T}} * {{NZ_T}})) / ({{NY_T}} * {{NZ_T}});
int j_T = ((idx % ({{NX_T}} * {{NY_T}} * {{NZ_T}})) % ({{NY_T}} * {{NZ_T}})) / {{NZ_T}};
int k_T = ((idx % ({{NX_T}} * {{NY_T}} * {{NZ_T}})) % ({{NY_T}} * {{NZ_T}})) % {{NZ_T}};
// Ex component
if ((NY != 1 || NZ != 1) && i >= 0 && i < NX && j > 0 && j < NY && k > 0 && k < NZ) {
int materialEx = ID[IDX4D_ID(0,i_ID,j_ID,k_ID)];
{{REAL}} phi = 0;
for (int pole = 0; pole < MAXPOLES; pole++) {
phi = phi + updatecoeffsdispersive[IDX2D_MATDISP(materialEx,pole*3)]{{REALFUNC}} * Tx[IDX4D_T(pole,i_T,j_T,k_T)]{{REALFUNC}};
Tx[IDX4D_T(pole,i_T,j_T,k_T)] = updatecoeffsdispersive[IDX2D_MATDISP(materialEx,1+(pole*3))] * Tx[IDX4D_T(pole,i_T,j_T,k_T)] +
updatecoeffsdispersive[IDX2D_MATDISP(materialEx,2+(pole*3))] * Ex[IDX3D_FIELDS(i,j,k)];
}
Ex[IDX3D_FIELDS(i,j,k)] = updatecoeffsE[IDX2D_MAT(materialEx,0)] * Ex[IDX3D_FIELDS(i,j,k)] +
updatecoeffsE[IDX2D_MAT(materialEx,2)] * (Hz[IDX3D_FIELDS(i,j,k)] - Hz[IDX3D_FIELDS(i,j-1,k)]) -
updatecoeffsE[IDX2D_MAT(materialEx,3)] * (Hy[IDX3D_FIELDS(i,j,k)] - Hy[IDX3D_FIELDS(i,j,k-1)]) -
updatecoeffsE[IDX2D_MAT(materialEx,4)] * phi;
}
// Ey component
if ((NX != 1 || NZ != 1) && i > 0 && i < NX && j >= 0 && j < NY && k > 0 && k < NZ) {
int materialEy = ID[IDX4D_ID(1,i_ID,j_ID,k_ID)];
{{REAL}} phi = 0;
for (int pole = 0; pole < MAXPOLES; pole++) {
phi = phi + updatecoeffsdispersive[IDX2D_MATDISP(materialEy,pole*3)]{{REALFUNC}} * Ty[IDX4D_T(pole,i_T,j_T,k_T)]{{REALFUNC}};
Ty[IDX4D_T(pole,i_T,j_T,k_T)] = updatecoeffsdispersive[IDX2D_MATDISP(materialEy,1+(pole*3))] * Ty[IDX4D_T(pole,i_T,j_T,k_T)] +
updatecoeffsdispersive[IDX2D_MATDISP(materialEy,2+(pole*3))] * Ey[IDX3D_FIELDS(i,j,k)];
}
Ey[IDX3D_FIELDS(i,j,k)] = updatecoeffsE[IDX2D_MAT(materialEy,0)] * Ey[IDX3D_FIELDS(i,j,k)] +
updatecoeffsE[IDX2D_MAT(materialEy,3)] * (Hx[IDX3D_FIELDS(i,j,k)] - Hx[IDX3D_FIELDS(i,j,k-1)]) -
updatecoeffsE[IDX2D_MAT(materialEy,1)] * (Hz[IDX3D_FIELDS(i,j,k)] - Hz[IDX3D_FIELDS(i-1,j,k)]) -
updatecoeffsE[IDX2D_MAT(materialEy,4)] * phi;
}
// Ez component
if ((NX != 1 || NY != 1) && i > 0 && i < NX && j > 0 && j < NY && k >= 0 && k < NZ) {
int materialEz = ID[IDX4D_ID(2,i_ID,j_ID,k_ID)];
{{REAL}} phi = 0;
for (int pole = 0; pole < MAXPOLES; pole++) {
phi = phi + updatecoeffsdispersive[IDX2D_MATDISP(materialEz,pole*3)]{{REALFUNC}} * Tz[IDX4D_T(pole,i_T,j_T,k_T)]{{REALFUNC}};
Tz[IDX4D_T(pole,i_T,j_T,k_T)] = updatecoeffsdispersive[IDX2D_MATDISP(materialEz,1+(pole*3))] * Tz[IDX4D_T(pole,i_T,j_T,k_T)] +
updatecoeffsdispersive[IDX2D_MATDISP(materialEz,2+(pole*3))] * Ez[IDX3D_FIELDS(i,j,k)];
}
Ez[IDX3D_FIELDS(i,j,k)] = updatecoeffsE[IDX2D_MAT(materialEz,0)] * Ez[IDX3D_FIELDS(i,j,k)] +
updatecoeffsE[IDX2D_MAT(materialEz,1)] * (Hy[IDX3D_FIELDS(i,j,k)] - Hy[IDX3D_FIELDS(i-1,j,k)]) -
updatecoeffsE[IDX2D_MAT(materialEz,2)] * (Hx[IDX3D_FIELDS(i,j,k)] - Hx[IDX3D_FIELDS(i,j-1,k)]) -
updatecoeffsE[IDX2D_MAT(materialEz,4)] * phi;
}
}
{{KERNEL}} void update_electric_dispersive_B(int NX,
int NY,
int NZ,
int MAXPOLES,{% filter indent(width=43) %}{% block electric_dispersive_B_args %}{% endblock electric_dispersive_B_args %}{% endfilter %}{
// This function is part B which updates the dispersive field arrays when
// dispersive materials (with multiple poles) are present.
//
// Args:
// NX, NY, NZ: Number of cells of the model domain
// MAXPOLES: Maximum number of dispersive material poles present in model
// updatedispersivecoeffs, T, ID, E, H: Access to update coefficients, dispersive, ID and field component arrays
// Obtain the linear index corresponding to the current thread
{{self.threadidx()}}
// Convert the linear index to subscripts for 3D field arrays
int i = idx / ({{NY_FIELDS}} * {{NZ_FIELDS}});
int j = (idx % ({{NY_FIELDS}} * {{NZ_FIELDS}})) / {{NZ_FIELDS}};
int k = (idx % ({{NY_FIELDS}} * {{NZ_FIELDS}})) % {{NZ_FIELDS}};
// Convert the linear index to subscripts for 4D material ID array
int i_ID = (idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) / ({{NY_ID}} * {{NZ_ID}});
int j_ID = ((idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) / {{NZ_ID}};
int k_ID = ((idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) % {{NZ_ID}};
// Convert the linear index to subscripts for 4D dispersive array
int i_T = (idx % ({{NX_T}} * {{NY_T}} * {{NZ_T}})) / ({{NY_T}} * {{NZ_T}});
int j_T = ((idx % ({{NX_T}} * {{NY_T}} * {{NZ_T}})) % ({{NY_T}} * {{NZ_T}})) / {{NZ_T}};
int k_T = ((idx % ({{NX_T}} * {{NY_T}} * {{NZ_T}})) % ({{NY_T}} * {{NZ_T}})) % {{NZ_T}};
// Ex component
if ((NY != 1 || NZ != 1) && i >= 0 && i < NX && j > 0 && j < NY && k > 0 && k < NZ) {
int materialEx = ID[IDX4D_ID(0,i_ID,j_ID,k_ID)];
for (int pole = 0; pole < MAXPOLES; pole++) {
Tx[IDX4D_T(pole,i_T,j_T,k_T)] = Tx[IDX4D_T(pole,i_T,j_T,k_T)] -
updatecoeffsdispersive[IDX2D_MATDISP(materialEx,2+(pole*3))] * Ex[IDX3D_FIELDS(i,j,k)];
}
}
// Ey component
if ((NX != 1 || NZ != 1) && i > 0 && i < NX && j >= 0 && j < NY && k > 0 && k < NZ) {
int materialEy = ID[IDX4D_ID(1,i_ID,j_ID,k_ID)];
for (int pole = 0; pole < MAXPOLES; pole++) {
Ty[IDX4D_T(pole,i_T,j_T,k_T)] = Ty[IDX4D_T(pole,i_T,j_T,k_T)] -
updatecoeffsdispersive[IDX2D_MATDISP(materialEy,2+(pole*3))] * Ey[IDX3D_FIELDS(i,j,k)];
}
}
// Ez component
if ((NX != 1 || NY != 1) && i > 0 && i < NX && j > 0 && j < NY && k >= 0 && k < NZ) {
int materialEz = ID[IDX4D_ID(2,i_ID,j_ID,k_ID)];
for (int pole = 0; pole < MAXPOLES; pole++) {
Tz[IDX4D_T(pole,i_T,j_T,k_T)] = Tz[IDX4D_T(pole,i_T,j_T,k_T)] -
updatecoeffsdispersive[IDX2D_MATDISP(materialEz,2+(pole*3))] * Ez[IDX3D_FIELDS(i,j,k)];
}
}
}

查看文件

@@ -1,65 +0,0 @@
{% extends "fields_updates_base.tmpl" %}
{% block complex_header %}
#include <pycuda-complex.hpp>
{% endblock complex_header %}
{% block constmem %}
__device__ __constant__ {{REAL}} updatecoeffsE[{{N_updatecoeffsE}}];
__device__ __constant__ {{REAL}} updatecoeffsH[{{N_updatecoeffsH}}];
{% endblock constmem %}
{% block threadidx %}
int idx = blockIdx.x * blockDim.x + threadIdx.x;
{% endblock threadidx %}
{% block electric_args %}
const unsigned int* __restrict__ ID,
{{REAL}} *Ex,
{{REAL}} *Ey,
{{REAL}} *Ez,
const {{REAL}}* __restrict__ Hx,
const {{REAL}}* __restrict__ Hy,
const {{REAL}}* __restrict__ Hz)
{% endblock electric_args %}
{% block magnetic_args %}
const unsigned int* __restrict__ ID,
{{REAL}} *Hx,
{{REAL}} *Hy,
{{REAL}} *Hz,
const {{REAL}}* __restrict__ Ex,
const {{REAL}}* __restrict__ Ey,
const {{REAL}}* __restrict__ Ez)
{% endblock magnetic_args %}
{% block electric_dispersive_A_args %}
const {{COMPLEX}}* __restrict__ updatecoeffsdispersive,
{{COMPLEX}} *Tx,
{{COMPLEX}} *Ty,
{{COMPLEX}} *Tz,
const unsigned int* __restrict__ ID,
{{REAL}} *Ex,
{{REAL}} *Ey,
{{REAL}} *Ez,
const {{REAL}}* __restrict__ Hx,
const {{REAL}}* __restrict__ Hy,
const {{REAL}}* __restrict__ Hz)
{% endblock electric_dispersive_A_args %}
{% block electric_dispersive_B_args %}
const {{COMPLEX}}* __restrict__ updatecoeffsdispersive,
{{COMPLEX}} *Tx,
{{COMPLEX}} *Ty,
{{COMPLEX}} *Tz,
const unsigned int* __restrict__ ID,
const {{REAL}}* __restrict__ Ex,
const {{REAL}}* __restrict__ Ey,
const {{REAL}}* __restrict__ Ez)
{% endblock electric_dispersive_B_args %}

查看文件

@@ -1,77 +0,0 @@
{% extends "fields_updates_base.tmpl" %}
{% block complex_header %}
#include <pyopencl-complex.h>
{% endblock complex_header %}
{% block constmem %}
__constant {{REAL}} updatecoeffsE[{{N_updatecoeffsE}}] =
{
{% for i in updatecoeffsE %}
{{i}},
{% endfor %}
};
__constant {{REAL}} updatecoeffsH[{{N_updatecoeffsH}}] =
{
{% for i in updatecoeffsH %}
{{i}},
{% endfor %}
};
{% endblock constmem %}
{% block threadidx %}
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) +
get_global_id(1) * get_global_size(0) + get_global_id(0);
{% endblock threadidx %}
{% block electric_args %}
__global const unsigned int* restrict ID,
__global {{REAL}} *Ex,
__global {{REAL}} *Ey,
__global {{REAL}} *Ez,
__global const {{REAL}} * restrict Hx,
__global const {{REAL}} * restrict Hy,
__global const {{REAL}} * restrict Hz)
{% endblock electric_args %}
{% block magnetic_args %}
__global const unsigned int* restrict ID,
__global {{REAL}} *Hx,
__global {{REAL}} *Hy,
__global {{REAL}} *Hz,
__global const {{REAL}}* restrict Ex,
__global const {{REAL}}* restrict Ey,
__global const {{REAL}}* restrict Ez)
{% endblock magnetic_args %}
{% block electric_dispersive_A_args %}
__global const {{COMPLEX}}* restrict updatecoeffsdispersive,
__global {{COMPLEX}} *Tx,
__global {{COMPLEX}} *Ty,
__global {{COMPLEX}} *Tz,
__global const unsigned int* restrict ID,
__global {{REAL}} *Ex,
__global {{REAL}} *Ey,
__global {{REAL}} *Ez,
__global const {{REAL}}* restrict Hx,
__global const {{REAL}}* restrict Hy,
__global const {{REAL}}* restrict Hz)
{% endblock electric_dispersive_A_args %}
{% block electric_dispersive_B_args %}
__global const {{COMPLEX}}* restrict updatecoeffsdispersive,
__global {{COMPLEX}} *Tx,
__global {{COMPLEX}} *Ty,
__global {{COMPLEX}} *Tz,
__global const unsigned int* restrict ID,
__global const {{REAL}}* restrict Ex,
__global const {{REAL}}* restrict Ey,
__global const {{REAL}}* restrict Ez)
{% endblock electric_dispersive_B_args %}

文件差异内容过多而无法显示 加载差异

文件差异内容过多而无法显示 加载差异

查看文件

@@ -1,62 +0,0 @@
{% extends "pml_updates_electric_HORIPML_base.tmpl" %}
{% block constmem %}
__device__ __constant__ {{REAL}} updatecoeffsE[{{N_updatecoeffsE}}];
{% endblock constmem %}
{% block threadidx %}
int idx = blockIdx.x * blockDim.x + threadIdx.x;
{% endblock threadidx %}
{% block x_args %}
const unsigned int* __restrict__ ID,
const {{REAL}}* __restrict__ Ex,
{{REAL}} *Ey,
{{REAL}} *Ez,
const {{REAL}}* __restrict__ Hx,
const {{REAL}}* __restrict__ Hy,
const {{REAL}}* __restrict__ Hz,
{{REAL}} *PHI1,
{{REAL}} *PHI2,
const {{REAL}}* __restrict__ RA,
const {{REAL}}* __restrict__ RB,
const {{REAL}}* __restrict__ RE,
const {{REAL}}* __restrict__ RF,
{% endblock x_args %}
{% block y_args %}
const unsigned int* __restrict__ ID,
{{REAL}} *Ex,
const {{REAL}}* __restrict__ Ey,
{{REAL}} *Ez,
const {{REAL}}* __restrict__ Hx,
const {{REAL}}* __restrict__ Hy,
const {{REAL}}* __restrict__ Hz,
{{REAL}} *PHI1,
{{REAL}} *PHI2,
const {{REAL}}* __restrict__ RA,
const {{REAL}}* __restrict__ RB,
const {{REAL}}* __restrict__ RE,
const {{REAL}}* __restrict__ RF,
{% endblock y_args %}
{% block z_args %}
const unsigned int* __restrict__ ID,
{{REAL}} *Ex,
{{REAL}} *Ey,
const {{REAL}}* __restrict__ Ez,
const {{REAL}}* __restrict__ Hx,
const {{REAL}}* __restrict__ Hy,
const {{REAL}}* __restrict__ Hz,
{{REAL}} *PHI1,
{{REAL}} *PHI2,
const {{REAL}}* __restrict__ RA,
const {{REAL}}* __restrict__ RB,
const {{REAL}}* __restrict__ RE,
const {{REAL}}* __restrict__ RF,
{% endblock z_args %}

查看文件

@@ -1,68 +0,0 @@
{% extends "pml_updates_electric_HORIPML_base.tmpl" %}
{% block constmem %}
__constant {{REAL}} updatecoeffsE[{{N_updatecoeffsE}}] =
{
{% for i in updatecoeffsE %}
{{i}},
{% endfor %}
};
{% endblock constmem %}
{% block threadidx %}
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) +
get_global_id(1) * get_global_size(0) + get_global_id(0);
{% endblock threadidx %}
{% block x_args %}
__global const unsigned int* restrict ID,
__global const {{REAL}}* restrict Ex,
__global {{REAL}} *Ey,
__global {{REAL}} *Ez,
__global const {{REAL}}* restrict Hx,
__global const {{REAL}}* restrict Hy,
__global const {{REAL}}* restrict Hz,
__global {{REAL}} *PHI1,
__global {{REAL}} *PHI2,
__global const {{REAL}}* restrict RA,
__global const {{REAL}}* restrict RB,
__global const {{REAL}}* restrict RE,
__global const {{REAL}}* restrict RF,
{% endblock x_args %}
{% block y_args %}
__global const unsigned int* restrict ID,
__global {{REAL}} *Ex,
__global const {{REAL}}* restrict Ey,
__global {{REAL}} *Ez,
__global const {{REAL}}* restrict Hx,
__global const {{REAL}}* restrict Hy,
__global const {{REAL}}* restrict Hz,
__global {{REAL}} *PHI1,
__global {{REAL}} *PHI2,
__global const {{REAL}}* restrict RA,
__global const {{REAL}}* restrict RB,
__global const {{REAL}}* restrict RE,
__global const {{REAL}}* restrict RF,
{% endblock y_args %}
{% block z_args %}
__global const unsigned int* restrict ID,
__global {{REAL}} *Ex,
__global {{REAL}} *Ey,
__global const {{REAL}}* restrict Ez,
__global const {{REAL}}* restrict Hx,
__global const {{REAL}}* restrict Hy,
__global const {{REAL}}* restrict Hz,
__global {{REAL}} *PHI1,
__global {{REAL}} *PHI2,
__global const {{REAL}}* restrict RA,
__global const {{REAL}}* restrict RB,
__global const {{REAL}}* restrict RE,
__global const {{REAL}}* restrict RF,
{% endblock z_args %}

查看文件

@@ -1,62 +0,0 @@
{% extends "pml_updates_magnetic_HORIPML_base.tmpl" %}
{% block constmem %}
__device__ __constant__ {{REAL}} updatecoeffsH[{{N_updatecoeffsH}}];
{% endblock constmem %}
{% block threadidx %}
int idx = blockIdx.x * blockDim.x + threadIdx.x;
{% endblock threadidx %}
{% block x_args %}
const unsigned int* __restrict__ ID,
const {{REAL}}* __restrict__ Ex,
const {{REAL}}* __restrict__ Ey,
const {{REAL}}* __restrict__ Ez,
const {{REAL}}* __restrict__ Hx,
{{REAL}} *Hy,
{{REAL}} *Hz,
{{REAL}} *PHI1,
{{REAL}} *PHI2,
const {{REAL}}* __restrict__ RA,
const {{REAL}}* __restrict__ RB,
const {{REAL}}* __restrict__ RE,
const {{REAL}}* __restrict__ RF,
{% endblock x_args %}
{% block y_args %}
const unsigned int* __restrict__ ID,
const {{REAL}}* __restrict__ Ex,
const {{REAL}}* __restrict__ Ey,
const {{REAL}}* __restrict__ Ez,
{{REAL}} *Hx,
const {{REAL}}* __restrict__ Hy,
{{REAL}} *Hz,
{{REAL}} *PHI1,
{{REAL}} *PHI2,
const {{REAL}}* __restrict__ RA,
const {{REAL}}* __restrict__ RB,
const {{REAL}}* __restrict__ RE,
const {{REAL}}* __restrict__ RF,
{% endblock y_args %}
{% block z_args %}
const unsigned int* __restrict__ ID,
const {{REAL}}* __restrict__ Ex,
const {{REAL}}* __restrict__ Ey,
const {{REAL}}* __restrict__ Ez,
{{REAL}} *Hx,
{{REAL}} *Hy,
const {{REAL}}* __restrict__ Hz,
{{REAL}} *PHI1,
{{REAL}} *PHI2,
const {{REAL}}* __restrict__ RA,
const {{REAL}}* __restrict__ RB,
const {{REAL}}* __restrict__ RE,
const {{REAL}}* __restrict__ RF,
{% endblock z_args %}

查看文件

@@ -1,68 +0,0 @@
{% extends "pml_updates_magnetic_HORIPML_base.tmpl" %}
{% block constmem %}
__constant {{REAL}} updatecoeffsH[{{N_updatecoeffsH}}] =
{
{% for i in updatecoeffsH %}
{{i}},
{% endfor %}
};
{% endblock constmem %}
{% block threadidx %}
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) +
get_global_id(1) * get_global_size(0) + get_global_id(0);
{% endblock threadidx %}
{% block x_args %}
__global const unsigned int* restrict ID,
__global const {{REAL}}* restrict Ex,
__global const {{REAL}}* restrict Ey,
__global const {{REAL}}* restrict Ez,
__global const {{REAL}}* restrict Hx,
__global {{REAL}} *Hy,
__global {{REAL}} *Hz,
__global {{REAL}} *PHI1,
__global {{REAL}} *PHI2,
__global const {{REAL}}* restrict RA,
__global const {{REAL}}* restrict RB,
__global const {{REAL}}* restrict RE,
__global const {{REAL}}* restrict RF,
{% endblock x_args %}
{% block y_args %}
__global const unsigned int* restrict ID,
__global const {{REAL}}* restrict Ex,
__global const {{REAL}}* restrict Ey,
__global const {{REAL}}* restrict Ez,
__global {{REAL}} *Hx,
__global const {{REAL}}* restrict Hy,
__global {{REAL}} *Hz,
__global {{REAL}} *PHI1,
__global {{REAL}} *PHI2,
__global const {{REAL}}* restrict RA,
__global const {{REAL}}* restrict RB,
__global const {{REAL}}* restrict RE,
__global const {{REAL}}* restrict RF,
{% endblock y_args %}
{% block z_args %}
__global const unsigned int* restrict ID,
__global const {{REAL}}* restrict Ex,
__global const {{REAL}}* restrict Ey,
__global const {{REAL}}* restrict Ez,
__global {{REAL}} *Hx,
__global {{REAL}} *Hy,
__global const {{REAL}}* restrict Hz,
__global {{REAL}} *PHI1,
__global {{REAL}} *PHI2,
__global const {{REAL}}* restrict RA,
__global const {{REAL}}* restrict RB,
__global const {{REAL}}* restrict RE,
__global const {{REAL}}* restrict RF,
{% endblock z_args %}

查看文件

@@ -1,90 +0,0 @@
// Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom
// Authors: Craig Warren, Antonis Giannopoulos, and John Hartley
//
// This file is part of gprMax.
//
// gprMax is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// gprMax is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with gprMax. If not, see <http://www.gnu.org/licenses/>.
// Macros for converting subscripts to linear index:
#define IDX3D_FIELDS(i, j, k) (i)*({{NY_FIELDS}})*({{NZ_FIELDS}})+(j)*({{NZ_FIELDS}})+(k)
#define IDX4D_SNAPS(p, i, j, k) (p)*({{NX_SNAPS}})*({{NY_SNAPS}})*({{NZ_SNAPS}})+(i)*({{NY_SNAPS}})*({{NZ_SNAPS}})+(j)*({{NZ_SNAPS}})+(k)
////////////////////
// Store snapshot //
////////////////////
{{KERNEL}} void store_snapshot(int p,
int xs,
int xf,
int ys,
int yf,
int zs,
int zf,
int dx,
int dy,
int dz,{% filter indent(width=29) %}{% block snap_args %}{% endblock snap_args %}{% endfilter %}{
// This function stores field values for a snapshot.
//
// Args:
// p: Snapshot number
// xs, xf, ys, yf, xs, xf: Start and finish cell coordinates for snapshot
// dx, dy, dz: Sampling interval in cell coordinates for snapshot
// E, H: Access to field component arrays
// snapEx, snapEy, snapEz, snapHx, snapHy, snapHz: Access to arrays to store snapshots
// Obtain the linear index corresponding to the current thread
{% block threadidx %}{% endblock threadidx %}
// Convert the linear index to subscripts for 4D SNAPS array
int i = (idx % ({{NX_SNAPS}} * {{NY_SNAPS}} * {{NZ_SNAPS}})) / ({{NY_SNAPS}} * {{NZ_SNAPS}});
int j = ((idx % ({{NX_SNAPS}} * {{NY_SNAPS}} * {{NZ_SNAPS}})) % ({{NY_SNAPS}} * {{NZ_SNAPS}})) / {{NZ_SNAPS}};
int k = ((idx % ({{NX_SNAPS}} * {{NY_SNAPS}} * {{NZ_SNAPS}})) % ({{NY_SNAPS}} * {{NZ_SNAPS}})) % {{NZ_SNAPS}};
// Subscripts for field arrays
int ii, jj, kk;
if (i >= xs && i < xf && j >= ys && j < yf && k >= zs && k < zf) {
// Increment subscripts for field array to account for spatial sampling of snapshot
ii = (xs + i) * dx;
jj = (ys + j) * dy;
kk = (zs + k) * dz;
// The electric field component value at a point comes from an average of
// the 4 electric field component values in that cell
snapEx[IDX4D_SNAPS(p,i,j,k)] = (Ex[IDX3D_FIELDS(ii,jj,kk)] +
Ex[IDX3D_FIELDS(ii,jj+1,kk)] +
Ex[IDX3D_FIELDS(ii,jj,kk+1)] +
Ex[IDX3D_FIELDS(ii,jj+1,kk+1)]) / 4;
snapEy[IDX4D_SNAPS(p,i,j,k)] = (Ey[IDX3D_FIELDS(ii,jj,kk)] +
Ey[IDX3D_FIELDS(ii+1,jj,kk)] +
Ey[IDX3D_FIELDS(ii,jj,kk+1)] +
Ey[IDX3D_FIELDS(ii+1,jj,kk+1)]) / 4;
snapEz[IDX4D_SNAPS(p,i,j,k)] = (Ez[IDX3D_FIELDS(ii,jj,kk)] +
Ez[IDX3D_FIELDS(ii+1,jj,kk)] +
Ez[IDX3D_FIELDS(ii,jj+1,kk)] +
Ez[IDX3D_FIELDS(ii+1,jj+1,kk)]) / 4;
// The magnetic field component value at a point comes from average of
// 2 magnetic field component values in that cell and the following cell
snapHx[IDX4D_SNAPS(p,i,j,k)] = (Hx[IDX3D_FIELDS(ii,jj,kk)] +
Hx[IDX3D_FIELDS(ii+1,jj,kk)]) / 2;
snapHy[IDX4D_SNAPS(p,i,j,k)] = (Hy[IDX3D_FIELDS(ii,jj,kk)] +
Hy[IDX3D_FIELDS(ii,jj+1,kk)]) / 2;
snapHz[IDX4D_SNAPS(p,i,j,k)] = (Hz[IDX3D_FIELDS(ii,jj,kk)] +
Hz[IDX3D_FIELDS(ii,jj,kk+1)]) / 2;
}
}

查看文件

@@ -1,22 +0,0 @@
{% extends "snapshots_base.tmpl" %}
{% block threadidx %}
int idx = blockIdx.x * blockDim.x + threadIdx.x;
{% endblock threadidx %}
{% block snap_args %}
const {{REAL}}* __restrict__ Ex,
const {{REAL}}* __restrict__ Ey,
const {{REAL}}* __restrict__ Ez,
const {{REAL}}* __restrict__ Hx,
const {{REAL}}* __restrict__ Hy,
const {{REAL}}* __restrict__ Hz,
{{REAL}} *snapEx,
{{REAL}} *snapEy,
{{REAL}} *snapEz,
{{REAL}} *snapHx,
{{REAL}} *snapHy,
{{REAL}} *snapHz
{% endblock snap_args %}

查看文件

@@ -1,23 +0,0 @@
{% extends "snapshots_base.tmpl" %}
{% block threadidx %}
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) +
get_global_id(1) * get_global_size(0) + get_global_id(0);
{% endblock threadidx %}
{% block snap_args %}
__global const {{REAL}}* restrict Ex,
__global const {{REAL}}* restrict Ey,
__global const {{REAL}}* restrict Ez,
__global const {{REAL}}* restrict Hx,
__global const {{REAL}}* restrict Hy,
__global const {{REAL}}* restrict Hz,
__global {{REAL}} *snapEx,
__global {{REAL}} *snapEy,
__global {{REAL}} *snapEz,
__global {{REAL}} *snapHx,
__global {{REAL}} *snapHy,
__global {{REAL}} *snapHz)
{% endblock snap_args %}

查看文件

@@ -1,217 +0,0 @@
// Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom
// Authors: Craig Warren, Antonis Giannopoulos, and John Hartley
//
// This file is part of gprMax.
//
// gprMax is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// gprMax is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with gprMax. If not, see <http://www.gnu.org/licenses/>.
// Macros for converting subscripts to linear index:
#define IDX2D_MAT(m, n) (m)*({{NY_MATCOEFFS}})+(n)
#define IDX2D_SRCINFO(m, n) (m)*{{NY_SRCINFO}}+(n)
#define IDX2D_SRCWAVES(m, n) (m)*({{NY_SRCWAVES}})+(n)
#define IDX3D_FIELDS(i, j, k) (i)*({{NY_FIELDS}})*({{NZ_FIELDS}})+(j)*({{NZ_FIELDS}})+(k)
#define IDX4D_ID(p, i, j, k) (p)*({{NX_ID}})*({{NY_ID}})*({{NZ_ID}})+(i)*({{NY_ID}})*({{NZ_ID}})+(j)*({{NZ_ID}})+(k)
// Material coefficients (read-only) stored in constant memory of compute device
{% block constmem %}{% endblock constmem %}
///////////////////////////////////////////
// Hertzian dipole electric field update //
///////////////////////////////////////////
{{KERNEL}} void update_hertzian_dipole(int NHERTZDIPOLE,
int iteration,
{{REAL}} dx,
{{REAL}} dy,
{{REAL}} dz,{% filter indent(width=37) %}{% block electric_source_args %}{% endblock electric_source_args %}{% endfilter %}{
// This function updates electric field values for Hertzian dipole sources.
//
// Args:
// NHERTZDIPOLE: Total number of Hertzian dipoles in the model
// iteration: Iteration number of simulation
// dx, dy, dz: Spatial discretisations
// srcinfo1: Source cell coordinates and polarisation information
// srcinfo2: Other source information, e.g. length, resistance etc...
// srcwaveforms: Source waveform values
// ID, E: Access to ID and field component arrays
// Obtain the linear index corresponding to the current thread and use for each receiver
{% block threadidx %}{% endblock threadidx %}
if (src < NHERTZDIPOLE) {
{{REAL}} dl;
int i, j, k, polarisation;
i = srcinfo1[IDX2D_SRCINFO(src,0)];
j = srcinfo1[IDX2D_SRCINFO(src,1)];
k = srcinfo1[IDX2D_SRCINFO(src,2)];
polarisation = srcinfo1[IDX2D_SRCINFO(src,3)];
dl = srcinfo2[src];
// 'x' polarised source
if (polarisation == 0) {
int materialEx = ID[IDX4D_ID(0,i,j,k)];
Ex[IDX3D_FIELDS(i,j,k)] = Ex[IDX3D_FIELDS(i,j,k)] - updatecoeffsE[IDX2D_MAT(materialEx,4)] *
srcwaveforms[IDX2D_SRCWAVES(src,iteration)] * dl * (1 / (dx * dy * dz));
}
// 'y' polarised source
else if (polarisation == 1) {
int materialEy = ID[IDX4D_ID(1,i,j,k)];
Ey[IDX3D_FIELDS(i,j,k)] = Ey[IDX3D_FIELDS(i,j,k)] - updatecoeffsE[IDX2D_MAT(materialEy,4)] *
srcwaveforms[IDX2D_SRCWAVES(src,iteration)] * dl * (1 / (dx * dy * dz));
}
// 'z' polarised source
else if (polarisation == 2) {
int materialEz = ID[IDX4D_ID(2,i,j,k)];
Ez[IDX3D_FIELDS(i,j,k)] = Ez[IDX3D_FIELDS(i,j,k)] - updatecoeffsE[IDX2D_MAT(materialEz,4)] *
srcwaveforms[IDX2D_SRCWAVES(src,iteration)] * dl * (1 / (dx * dy * dz));
}
}
}
///////////////////////////////////////////
// Magnetic dipole magnetic field update //
///////////////////////////////////////////
{{KERNEL}} void update_magnetic_dipole(int NMAGDIPOLE,
int iteration,
{{REAL}} dx,
{{REAL}} dy,
{{REAL}} dz,{% filter indent(width=37) %}{% block magnetic_source_args %}{% endblock magnetic_source_args %}{% endfilter %}{
// This function updates magnetic field values for magnetic dipole sources.
//
// Args:
// NMAGDIPOLE: Total number of magnetic dipoles in the model
// iteration: Iteration number of simulation
// dx, dy, dz: Spatial discretisations
// srcinfo1: Source cell coordinates and polarisation information
// srcinfo2: Other source information, e.g. length, resistance etc...
// srcwaveforms: Source waveform values
// ID, H: Access to ID and field component arrays
// Obtain the linear index corresponding to the current thread and use for each receiver
{{self.threadidx()}}
if (src < NMAGDIPOLE) {
int i, j, k, polarisation;
i = srcinfo1[IDX2D_SRCINFO(src,0)];
j = srcinfo1[IDX2D_SRCINFO(src,1)];
k = srcinfo1[IDX2D_SRCINFO(src,2)];
polarisation = srcinfo1[IDX2D_SRCINFO(src,3)];
// 'x' polarised source
if (polarisation == 0) {
int materialHx = ID[IDX4D_ID(3,i,j,k)];
Hx[IDX3D_FIELDS(i,j,k)] = Hx[IDX3D_FIELDS(i,j,k)] - updatecoeffsH[IDX2D_MAT(materialHx,4)] *
srcwaveforms[IDX2D_SRCWAVES(src,iteration)] * (1 / (dx * dy * dz));
}
// 'y' polarised source
else if (polarisation == 1) {
int materialHy = ID[IDX4D_ID(4,i,j,k)];
Hy[IDX3D_FIELDS(i,j,k)] = Hy[IDX3D_FIELDS(i,j,k)] - updatecoeffsH[IDX2D_MAT(materialHy,4)] *
srcwaveforms[IDX2D_SRCWAVES(src,iteration)] * (1 / (dx * dy * dz));
}
// 'z' polarised source
else if (polarisation == 2) {
int materialHz = ID[IDX4D_ID(5,i,j,k)];
Hz[IDX3D_FIELDS(i,j,k)] = Hz[IDX3D_FIELDS(i,j,k)] - updatecoeffsH[IDX2D_MAT(materialHz,4)] *
srcwaveforms[IDX2D_SRCWAVES(src,iteration)] * (1 / (dx * dy * dz));
}
}
}
//////////////////////////////////////////
// Voltage source electric field update //
//////////////////////////////////////////
{{KERNEL}} void update_voltage_source(int NVOLTSRC,
int iteration,
{{REAL}} dx,
{{REAL}} dy,
{{REAL}} dz,{% filter indent(width=36) %}{{self.electric_source_args()}}{% endfilter %}{
// This function updates electric field values for voltage sources.
//
// Args:
// NVOLTSRC: Total number of voltage sources in the model
// iteration: Iteration number of simulation
// dx, dy, dz: Spatial discretisations
// srcinfo1: Source cell coordinates and polarisation information
// srcinfo2: Other source information, e.g. length, resistance etc...
// srcwaveforms: Source waveform values
// ID, E: Access to ID and field component arrays
// Obtain the linear index corresponding to the current thread and use for each receiver
{{self.threadidx()}}
if (src < NVOLTSRC) {
{{REAL}} resistance;
int i, j, k, polarisation;
i = srcinfo1[IDX2D_SRCINFO(src,0)];
j = srcinfo1[IDX2D_SRCINFO(src,1)];
k = srcinfo1[IDX2D_SRCINFO(src,2)];
polarisation = srcinfo1[IDX2D_SRCINFO(src,3)];
resistance = srcinfo2[src];
// 'x' polarised source
if (polarisation == 0) {
if (resistance != 0) {
int materialEx = ID[IDX4D_ID(0,i,j,k)];
Ex[IDX3D_FIELDS(i,j,k)] = Ex[IDX3D_FIELDS(i,j,k)] - updatecoeffsE[IDX2D_MAT(materialEx,4)] *
srcwaveforms[IDX2D_SRCWAVES(src,iteration)] * (1 / (resistance * dy * dz));
}
else {
Ex[IDX3D_FIELDS(i,j,k)] = -1 * srcwaveforms[IDX2D_SRCWAVES(src,iteration)] / dx;
}
}
// 'y' polarised source
else if (polarisation == 1) {
if (resistance != 0) {
int materialEy = ID[IDX4D_ID(1,i,j,k)];
Ey[IDX3D_FIELDS(i,j,k)] = Ey[IDX3D_FIELDS(i,j,k)] - updatecoeffsE[IDX2D_MAT(materialEy,4)] *
srcwaveforms[IDX2D_SRCWAVES(src,iteration)] * (1 / (resistance * dx * dz));
}
else {
Ey[IDX3D_FIELDS(i,j,k)] = -1 * srcwaveforms[IDX2D_SRCWAVES(src,iteration)] / dy;
}
}
// 'z' polarised source
else if (polarisation == 2) {
if (resistance != 0) {
int materialEz = ID[IDX4D_ID(2,i,j,k)];
Ez[IDX3D_FIELDS(i,j,k)] = Ez[IDX3D_FIELDS(i,j,k)] - updatecoeffsE[IDX2D_MAT(materialEz,4)] *
srcwaveforms[IDX2D_SRCWAVES(src,iteration)] * (1 / (resistance * dx * dy));
}
else {
Ez[IDX3D_FIELDS(i,j,k)] = -1 * srcwaveforms[IDX2D_SRCWAVES(src,iteration)] / dz;
}
}
}
}

查看文件

@@ -1,34 +0,0 @@
{% extends "source_updates_base.tmpl" %}
{% block constmem %}
__device__ __constant__ {{REAL}} updatecoeffsE[{{N_updatecoeffsE}}];
__device__ __constant__ {{REAL}} updatecoeffsH[{{N_updatecoeffsH}}];
{% endblock constmem %}
{% block threadidx %}
int src = blockIdx.x * blockDim.x + threadIdx.x;
{% endblock threadidx %}
{% block electric_source_args %}
const int* __restrict__ srcinfo1,
const {{REAL}}* __restrict__ srcinfo2,
const {{REAL}}* __restrict__ srcwaveforms,
const unsigned int* __restrict__ ID,
{{REAL}} *Ex,
{{REAL}} *Ey,
{{REAL}} *Ez)
{% endblock electric_source_args %}
{% block magnetic_source_args %}
const int* __restrict__ srcinfo1,
const {{REAL}}* __restrict__ srcinfo2,
const {{REAL}}* __restrict__ srcwaveforms,
const unsigned int* __restrict__ ID,
{{REAL}} *Hx,
{{REAL}} *Hy,
{{REAL}} *Hz)
{% endblock magnetic_source_args %}

查看文件

@@ -1,46 +0,0 @@
{% extends "source_updates_base.tmpl" %}
{% block constmem %}
__constant {{REAL}} updatecoeffsE[{{N_updatecoeffsE}}] =
{
{% for i in updatecoeffsE %}
{{i}},
{% endfor %}
};
__constant {{REAL}} updatecoeffsH[{{N_updatecoeffsH}}] =
{
{% for i in updatecoeffsH %}
{{i}},
{% endfor %}
};
{% endblock constmem %}
{% block threadidx %}
int src = get_global_id(2) * get_global_size(0) * get_global_size(1) +
get_global_id(1) * get_global_size(0) + get_global_id(0);
{% endblock threadidx %}
{% block electric_source_args %}
__global const int* restrict srcinfo1,
__global const {{REAL}}* restrict srcinfo2,
__global const {{REAL}}* restrict srcwaveforms,
__global const unsigned int* restrict ID,
__global {{REAL}} *Ex,
__global {{REAL}} *Ey,
__global {{REAL}} *Ez)
{% endblock electric_source_args %}
{% block magnetic_source_args %}
__global const int* restrict srcinfo1,
__global const {{REAL}}* restrict srcinfo2,
__global const {{REAL}}* restrict srcwaveforms,
__global const unsigned int* restrict ID,
__global {{REAL}} *Hx,
__global {{REAL}} *Hy,
__global {{REAL}} *Hz)
{% endblock magnetic_source_args %}

查看文件

@@ -1,50 +0,0 @@
// Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom
// Authors: Craig Warren, Antonis Giannopoulos, and John Hartley
//
// This file is part of gprMax.
//
// gprMax is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// gprMax is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with gprMax. If not, see <http://www.gnu.org/licenses/>.
#define IDX2D_RXCOORDS(m,n) (m)*({{NY_RXCOORDS}}) + (n)
#define IDX3D_RXS(i,j,k) (i)*({{NY_RXS}})*({{NZ_RXS}}) + (j)*({{NZ_RXS}}) + (k)
#define IDX3D_FIELDS(i,j,k) (i)*({{NY_FIELDS}})*({{NZ_FIELDS}}) + (j)*({{NZ_FIELDS}}) + (k)
{{KERNEL}} void store_outputs(int NRX,
int iteration,{% filter indent(width=28) %}{% block rx_args %}{% endblock rx_args %}{% endfilter %}{
// This function stores field component values for every receiver in the model.
//
// Args:
// NRX: total number of receivers in the model.
// rxs: array to store field components for receivers - rows
// are field components; columns are iterations; pages are receiver
// Obtain linear index corresponding to the current work item
{% block threadidx %}{% endblock threadidx %}
int i,j,k;
if (rx < NRX) {
i = rxcoords[IDX2D_RXCOORDS(rx,0)];
j = rxcoords[IDX2D_RXCOORDS(rx,1)];
k = rxcoords[IDX2D_RXCOORDS(rx,2)];
rxs[IDX3D_RXS(0,iteration,rx)] = Ex[IDX3D_FIELDS(i,j,k)];
rxs[IDX3D_RXS(1,iteration,rx)] = Ey[IDX3D_FIELDS(i,j,k)];
rxs[IDX3D_RXS(2,iteration,rx)] = Ez[IDX3D_FIELDS(i,j,k)];
rxs[IDX3D_RXS(3,iteration,rx)] = Hx[IDX3D_FIELDS(i,j,k)];
rxs[IDX3D_RXS(4,iteration,rx)] = Hy[IDX3D_FIELDS(i,j,k)];
rxs[IDX3D_RXS(5,iteration,rx)] = Hz[IDX3D_FIELDS(i,j,k)];
}
}

查看文件

@@ -1,18 +0,0 @@
{% extends "store_outputs_base.tmpl" %}
{% block threadidx %}
int rx = blockIdx.x * blockDim.x + threadIdx.x;
{% endblock threadidx %}
{% block rx_args %}
const int* __restrict__ rxcoords,
{{REAL}} *rxs,
const {{REAL}}* __restrict__ Ex,
const {{REAL}}* __restrict__ Ey,
const {{REAL}}* __restrict__ Ez,
const {{REAL}}* __restrict__ Hx,
const {{REAL}}* __restrict__ Hy,
const {{REAL}}* __restrict__ Hz)
{% endblock rx_args %}

查看文件

@@ -1,19 +0,0 @@
{% extends "store_outputs_base.tmpl" %}
{% block threadidx %}
int rx = get_global_id(2) * get_global_size(0) * get_global_size(1) +
get_global_id(1) * get_global_size(0) + get_global_id(0);
{% endblock threadidx %}
{% block rx_args %}
__global const int* restrict rxcoords,
__global {{REAL}} *rxs,
__global const {{REAL}}* restrict Ex,
__global const {{REAL}}* restrict Ey,
__global const {{REAL}}* restrict Ez,
__global const {{REAL}}* restrict Hx,
__global const {{REAL}}* restrict Hy,
__global const {{REAL}}* restrict Hz)
{% endblock rx_args %}

查看文件

@@ -1,311 +0,0 @@
# Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom
# Authors: Craig Warren, Antonis Giannopoulos, and John Hartley
#
# This file is part of gprMax.
#
# gprMax is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# gprMax is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with gprMax. If not, see <http://www.gnu.org/licenses/>.
#include <pyopencl-complex.h>
#define INDEX2D_MAT(m, n) (m)*({{NY_MATCOEFFS}}) + (n)
#define INDEX2D_MATDISP(m, n) (m)*({{NY_MATDISPCOEFFS}}) + (n)
#define INDEX3D_FIELDS(i, j, k) (i)*({{NY_FIELDS}})*({{NZ_FIELDS}}) + (j)*({{NZ_FIELDS}}) + (k)
#define INDEX4D_ID(p, i, j, k) (p)*({{NX_ID}})*({{NY_ID}})*({{NZ_ID}}) + (i)*({{NY_ID}})*({{NZ_ID}}) + (j)*({{NZ_ID}}) + (k)
#define INDEX4D_T(p, i, j, k) (p)*({{NX_T}})*({{NY_T}})*({{NZ_T}}) + (i)*({{NY_T}})*({{NZ_T}}) + (j)*({{NZ_T}}) + (k)
// Material coefficients (read-only) in constant memory
__constant {{REAL}} updatecoeffsE[{{N_updatecoeffsE}}] =
{
{% for i in updateEVal %}
{{i}},
{% endfor %}
};
__constant {{REAL}} updatecoeffsH[{{N_updatecoeffsH}}] =
{
{% for i in updateHVal %}
{{i}},
{% endfor %}
};
///////////////////////////////////////////////
// Electric field updates - normal materials //
///////////////////////////////////////////////
__kernel void update_electric(int NX, int NY, int NZ,
__global const unsigned int* restrict ID,
__global {{REAL}} *Ex,
__global {{REAL}} *Ey,
__global {{REAL}} *Ez,
__global const {{REAL}} * restrict Hx,
__global const {{REAL}} * restrict Hy,
__global const {{REAL}} * restrict Hz) {
// This function updates electric field values.
//
// Args:
// NX, NY, NZ: Number of cells of the models domain.
// ID, E, H: Access to ID and field component arrays.
// Obtain the linear index corresponding to the current thread
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) +
get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for 3D field arrays
int i = idx / ({{NY_FIELDS}} * {{NZ_FIELDS}});
int j = (idx % ({{NY_FIELDS}}*{{NZ_FIELDS}})) / {{NZ_FIELDS}};
int k = (idx % ({{NY_FIELDS}}*{{NZ_FIELDS}})) % {{NZ_FIELDS}};
// Convert the linear index to subscripts for 4D material ID arrays
int i_ID = (idx%({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) / ({{NY_ID}} * {{NZ_ID}});
int j_ID = ((idx%({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) / {{NZ_ID}};
int k_ID = ((idx%({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) % {{NZ_ID}};
// Ex component
if ((NY != 1 || NZ != 1) && i >= 0 && i < NX && j > 0 && j < NY && k > 0 && k < NZ) {
int materialEx = ID[INDEX4D_ID(0,i_ID,j_ID,k_ID)];
Ex[INDEX3D_FIELDS(i,j,k)] = updatecoeffsE[INDEX2D_MAT(materialEx,0)] * Ex[INDEX3D_FIELDS(i,j,k)] +
updatecoeffsE[INDEX2D_MAT(materialEx,2)] * (Hz[INDEX3D_FIELDS(i,j,k)] - Hz[INDEX3D_FIELDS(i,j-1,k)]) -
updatecoeffsE[INDEX2D_MAT(materialEx,3)] * (Hy[INDEX3D_FIELDS(i,j,k)] - Hy[INDEX3D_FIELDS(i,j,k-1)]);
}
// Ey component
if ((NX != 1 || NZ != 1) && i > 0 && i < NX && j >= 0 && j < NY && k > 0 && k < NZ) {
int materialEy = ID[INDEX4D_ID(1,i_ID,j_ID,k_ID)];
Ey[INDEX3D_FIELDS(i,j,k)] = updatecoeffsE[INDEX2D_MAT(materialEy,0)] * Ey[INDEX3D_FIELDS(i,j,k)] +
updatecoeffsE[INDEX2D_MAT(materialEy,3)] * (Hx[INDEX3D_FIELDS(i,j,k)] - Hx[INDEX3D_FIELDS(i,j,k-1)]) - updatecoeffsE[INDEX2D_MAT(materialEy,1)] * (Hz[INDEX3D_FIELDS(i,j,k)] - Hz[INDEX3D_FIELDS(i-1,j,k)]);
}
// Ez component
if ((NX != 1 || NY != 1) && i > 0 && i < NX && j > 0 && j < NY && k >= 0 && k < NZ) {
int materialEz = ID[INDEX4D_ID(2,i_ID,j_ID,k_ID)];
Ez[INDEX3D_FIELDS(i,j,k)] = updatecoeffsE[INDEX2D_MAT(materialEz,0)] * Ez[INDEX3D_FIELDS(i,j,k)] +
updatecoeffsE[INDEX2D_MAT(materialEz,1)] * (Hy[INDEX3D_FIELDS(i,j,k)] - Hy[INDEX3D_FIELDS(i-1,j,k)]) - updatecoeffsE[INDEX2D_MAT(materialEz,2)] * (Hx[INDEX3D_FIELDS(i,j,k)] - Hx[INDEX3D_FIELDS(i,j-1,k)]);
}
}
////////////////////////////
// Magnetic field updates //
////////////////////////////
__kernel void update_magnetic(int NX, int NY, int NZ,
__global const unsigned int* restrict ID,
__global {{REAL}} *Hx,
__global {{REAL}} *Hy,
__global {{REAL}} *Hz,
__global const {{REAL}}* restrict Ex,
__global const {{REAL}}* restrict Ey,
__global const {{REAL}}* restrict Ez) {
// This function updates magnetic field values.
//
// Args:
// NX, NY, NZ: number of cells of the model domain.
// ID, E, H: access to ID and field component arrays.
// Obtain the linear index corresponding to the current thread
int idx = get_global_id(0);
// convert the linear index to subscripts to 3D field arrays
int i = idx / ({{NY_FIELDS}} * {{NZ_FIELDS}});
int j = (idx%({{NY_FIELDS}}*{{NZ_FIELDS}})) / {{NZ_FIELDS}};
int k = (idx%({{NY_FIELDS}}*{{NZ_FIELDS}})) % {{NZ_FIELDS}};
// convert the linear index to subscripts to 4D material ID arrays
int i_ID = ( idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) / ({{NY_ID}} * {{NZ_ID}});
int j_ID = (( idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) / {{NZ_ID}};
int k_ID = (( idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) % {{NZ_ID}};
// Hx component
if (NX != 1 && i > 0 && i < NX && j >= 0 && j < NY && k >= 0 && k < NZ) {
int materialHx = ID[INDEX4D_ID(3,i_ID,j_ID,k_ID)];
Hx[INDEX3D_FIELDS(i,j,k)] = updatecoeffsH[INDEX2D_MAT(materialHx,0)] * Hx[INDEX3D_FIELDS(i,j,k)] -
updatecoeffsH[INDEX2D_MAT(materialHx,2)] * (Ez[INDEX3D_FIELDS(i,j+1,k)] - Ez[INDEX3D_FIELDS(i,j,k)]) + updatecoeffsH[INDEX2D_MAT(materialHx,3)] * (Ey[INDEX3D_FIELDS(i,j,k+1)] - Ey[INDEX3D_FIELDS(i,j,k)]);
}
// Hy component
if (NY != 1 && i >= 0 && i < NX && j > 0 && j < NY && k >= 0 && k < NZ) {
int materialHy = ID[INDEX4D_ID(4,i_ID,j_ID,k_ID)];
Hy[INDEX3D_FIELDS(i,j,k)] = updatecoeffsH[INDEX2D_MAT(materialHy,0)] * Hy[INDEX3D_FIELDS(i,j,k)] -
updatecoeffsH[INDEX2D_MAT(materialHy,3)] * (Ex[INDEX3D_FIELDS(i,j,k+1)] - Ex[INDEX3D_FIELDS(i,j,k)]) + updatecoeffsH[INDEX2D_MAT(materialHy,1)] * (Ez[INDEX3D_FIELDS(i+1,j,k)] - Ez[INDEX3D_FIELDS(i,j,k)]);
}
// Hz component
if (NZ != 1 && i >= 0 && i < NX && j >= 0 && j < NY && k > 0 && k < NZ) {
int materialHz = ID[INDEX4D_ID(5,i_ID,j_ID,k_ID)];
Hz[INDEX3D_FIELDS(i,j,k)] = updatecoeffsH[INDEX2D_MAT(materialHz,0)] * Hz[INDEX3D_FIELDS(i,j,k)] -
updatecoeffsH[INDEX2D_MAT(materialHz,1)] * (Ey[INDEX3D_FIELDS(i+1,j,k)] - Ey[INDEX3D_FIELDS(i,j,k)]) + updatecoeffsH[INDEX2D_MAT(materialHz,2)] * (Ex[INDEX3D_FIELDS(i,j+1,k)] - Ex[INDEX3D_FIELDS(i,j,k)]);
}
}
///////////////////////////////////////////////////
// Electric field updates - dispersive materials //
///////////////////////////////////////////////////
__kernel void update_electric_dispersive_A(int NX, int NY, int NZ, int MAXPOLES,
__global const {{COMPLEX-}}_t* restrict updatecoeffsdispersive,
__global {{COMPLEX-}}_t *Tx,
__global {{COMPLEX-}}_t *Ty,
__global {{COMPLEX-}}_t *Tz,
__global const unsigned int* restrict ID,
__global {{REAL}} *Ex,
__global {{REAL}} *Ey,
__global {{REAL}} *Ez,
__global const {{REAL}}* restrict Hx,
__global const {{REAL}}* restrict Hy,
__global const {{REAL}}* restrict Hz) {
// This function is part A of updates to electric field values when dispersive materials (with multiple poles) are present.
//
// Args:
// NX, NY, NZ: Number of cells of the model domain
// MAXPOLES: Maximum number of dispersive material poles present in model
// updatedispersivecoeffs, T, ID, E, H: Access to update coefficients, dispersive, ID and field component arrays
// Obtain the linear index corresponding to the current thread
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) +
get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for 3D field arrays
int i = idx / ({{NY_FIELDS}} * {{NZ_FIELDS}});
int j = (idx % ({{NY_FIELDS}} * {{NZ_FIELDS}})) / {{NZ_FIELDS}};
int k = (idx % ({{NY_FIELDS}} * {{NZ_FIELDS}})) % {{NZ_FIELDS}};
// Convert the linear index to subscripts for 4D material ID array
int i_ID = (idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) / ({{NY_ID}} * {{NZ_ID}});
int j_ID = ((idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) / {{NZ_ID}};
int k_ID = ((idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) % {{NZ_ID}};
// Convert the linear index to subscripts for 4D dispersive array
int i_T = (idx % ({{NX_T}} * {{NY_T}} * {{NZ_T}})) / ({{NY_T}} * {{NZ_T}});
int j_T = ((idx % ({{NX_T}} * {{NY_T}} * {{NZ_T}})) % ({{NY_T}} * {{NZ_T}})) / {{NZ_T}};
int k_T = ((idx % ({{NX_T}} * {{NY_T}} * {{NZ_T}})) % ({{NY_T}} * {{NZ_T}})) % {{NZ_T}};
// Ex component
if ((NY != 1 || NZ != 1) && i >= 0 && i < NX && j > 0 && j < NY && k > 0 && k < NZ) {
int materialEx = ID[INDEX4D_ID(0,i_ID,j_ID,k_ID)];
{{REAL}} phi = 0;
for (int pole = 0; pole < MAXPOLES; pole++) {
phi = phi + updatecoeffsdispersive[INDEX2D_MATDISP(materialEx,pole*3)].real * Tx[INDEX4D_T(pole,i_T,j_T,k_T)].real;
Tx[INDEX4D_T(pole,i_T,j_T,k_T)] = cfloat_add(cfloat_mul(updatecoeffsdispersive[INDEX2D_MATDISP(materialEx,1+(pole*3))],
Tx[INDEX4D_T(pole,i_T,j_T,k_T)]),
cfloat_mulr(updatecoeffsdispersive[INDEX2D_MATDISP(materialEx,2+(pole*3))],
Ex[INDEX3D_FIELDS(i,j,k)]));
}
Ex[INDEX3D_FIELDS(i,j,k)] = updatecoeffsE[INDEX2D_MAT(materialEx,0)] * Ex[INDEX3D_FIELDS(i,j,k)] +
updatecoeffsE[INDEX2D_MAT(materialEx,2)] * (Hz[INDEX3D_FIELDS(i,j,k)] - Hz[INDEX3D_FIELDS(i,j-1,k)]) - updatecoeffsE[INDEX2D_MAT(materialEx,3)] * (Hy[INDEX3D_FIELDS(i,j,k)] - Hy[INDEX3D_FIELDS(i,j,k-1)]) - updatecoeffsE[INDEX2D_MAT(materialEx,4)] * phi;
}
// Ey component
if ((NX != 1 || NZ != 1) && i > 0 && i < NX && j >= 0 && j < NY && k > 0 && k < NZ) {
int materialEy = ID[INDEX4D_ID(1,i_ID,j_ID,k_ID)];
{{REAL}} phi = 0;
for (int pole = 0; pole < MAXPOLES; pole++) {
phi = phi + updatecoeffsdispersive[INDEX2D_MATDISP(materialEy,pole*3)].real * Ty[INDEX4D_T(pole,i_T,j_T,k_T)].real;
Ty[INDEX4D_T(pole,i_T,j_T,k_T)] = cfloat_add(cfloat_mul(updatecoeffsdispersive[INDEX2D_MATDISP(materialEy,1+(pole*3))],
Ty[INDEX4D_T(pole,i_T,j_T,k_T)]),
cfloat_mulr(updatecoeffsdispersive[INDEX2D_MATDISP(materialEy,2+(pole*3))],
Ey[INDEX3D_FIELDS(i,j,k)]));
}
Ey[INDEX3D_FIELDS(i,j,k)] = updatecoeffsE[INDEX2D_MAT(materialEy,0)] * Ey[INDEX3D_FIELDS(i,j,k)] +
updatecoeffsE[INDEX2D_MAT(materialEy,3)] * (Hx[INDEX3D_FIELDS(i,j,k)] - Hx[INDEX3D_FIELDS(i,j,k-1)]) - updatecoeffsE[INDEX2D_MAT(materialEy,1)] * (Hz[INDEX3D_FIELDS(i,j,k)] - Hz[INDEX3D_FIELDS(i-1,j,k)]) - updatecoeffsE[INDEX2D_MAT(materialEy,4)] * phi;
}
// Ez component
if ((NX != 1 || NY != 1) && i > 0 && i < NX && j > 0 && j < NY && k >= 0 && k < NZ) {
int materialEz = ID[INDEX4D_ID(2,i_ID,j_ID,k_ID)];
{{REAL}} phi = 0;
for (int pole = 0; pole < MAXPOLES; pole++) {
phi = phi + updatecoeffsdispersive[INDEX2D_MATDISP(materialEz,pole*3)].real * Tz[INDEX4D_T(pole,i_T,j_T,k_T)].real;
Tz[INDEX4D_T(pole,i_T,j_T,k_T)] = cfloat_add(cfloat_mul(updatecoeffsdispersive[INDEX2D_MATDISP(materialEz,1+(pole*3))],
Tz[INDEX4D_T(pole,i_T,j_T,k_T)]),
cfloat_mulr(updatecoeffsdispersive[INDEX2D_MATDISP(materialEz,2+(pole*3))],
Ez[INDEX3D_FIELDS(i,j,k)]));
}
Ez[INDEX3D_FIELDS(i,j,k)] = updatecoeffsE[INDEX2D_MAT(materialEz,0)] * Ez[INDEX3D_FIELDS(i,j,k)] +
updatecoeffsE[INDEX2D_MAT(materialEz,1)] * (Hy[INDEX3D_FIELDS(i,j,k)] - Hy[INDEX3D_FIELDS(i-1,j,k)]) - updatecoeffsE[INDEX2D_MAT(materialEz,2)] * (Hx[INDEX3D_FIELDS(i,j,k)] - Hx[INDEX3D_FIELDS(i,j-1,k)]) - updatecoeffsE[INDEX2D_MAT(materialEz,4)] * phi;
}
}
__kernel void update_electric_dispersive_B(int NX, int NY, int NZ, int MAXPOLES,
__global const {{COMPLEX-}}_t* restrict updatecoeffsdispersive,
__global {{COMPLEX-}}_t *Tx,
__global {{COMPLEX-}}_t *Ty,
__global {{COMPLEX-}}_t *Tz,
__global const unsigned int* restrict ID,
__global const {{REAL}}* restrict Ex,
__global const {{REAL}}* restrict Ey,
__global const {{REAL}}* restrict Ez) {
// This function is part B which updates the dispersive field arrays when dispersive materials (with multiple poles) are present.
//
// Args:
// NX, NY, NZ: Number of cells of the model domain
// MAXPOLES: Maximum number of dispersive material poles present in model
// updatedispersivecoeffs, T, ID, E, H: Access to update coefficients, dispersive, ID and field component arrays
// Obtain the linear index corresponding to the current thread
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) +
get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for 3D field arrays
int i = idx / ({{NY_FIELDS}} * {{NZ_FIELDS}});
int j = (idx % ({{NY_FIELDS}} * {{NZ_FIELDS}})) / {{NZ_FIELDS}};
int k = (idx % ({{NY_FIELDS}} * {{NZ_FIELDS}})) % {{NZ_FIELDS}};
// Convert the linear index to subscripts for 4D material ID array
int i_ID = (idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) / ({{NY_ID}} * {{NZ_ID}});
int j_ID = ((idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) / {{NZ_ID}};
int k_ID = ((idx % ({{NX_ID}} * {{NY_ID}} * {{NZ_ID}})) % ({{NY_ID}} * {{NZ_ID}})) % {{NZ_ID}};
// Convert the linear index to subscripts for 4D dispersive array
int i_T = (idx % ({{NX_T}} * {{NY_T}} * {{NZ_T}})) / ({{NY_T}} * {{NZ_T}});
int j_T = ((idx % ({{NX_T}} * {{NY_T}} * {{NZ_T}})) % ({{NY_T}} * {{NZ_T}})) / {{NZ_T}};
int k_T = ((idx % ({{NX_T}} * {{NY_T}} * {{NZ_T}})) % ({{NY_T}} * {{NZ_T}})) % {{NZ_T}};
// Ex component
if ((NY != 1 || NZ != 1) && i >= 0 && i < NX && j > 0 && j < NY && k > 0 && k < NZ) {
int materialEx = ID[INDEX4D_ID(0,i_ID,j_ID,k_ID)];
for (int pole = 0; pole < MAXPOLES; pole++) {
Tx[INDEX4D_T(pole,i_T,j_T,k_T)] = cfloat_sub(Tx[INDEX4D_T(pole,i_T,j_T,k_T)],
cfloat_mulr(updatecoeffsdispersive[INDEX2D_MATDISP(materialEx,2+(pole*3))],
Ex[INDEX3D_FIELDS(i,j,k)]));
}
}
// Ey component
if ((NX != 1 || NZ != 1) && i > 0 && i < NX && j >= 0 && j < NY && k > 0 && k < NZ) {
int materialEy = ID[INDEX4D_ID(1,i_ID,j_ID,k_ID)];
for (int pole = 0; pole < MAXPOLES; pole++) {
Ty[INDEX4D_T(pole,i_T,j_T,k_T)] = cfloat_sub(Ty[INDEX4D_T(pole,i_T,j_T,k_T)],
cfloat_mulr(updatecoeffsdispersive[INDEX2D_MATDISP(materialEy,2+(pole*3))],
Ey[INDEX3D_FIELDS(i,j,k)]));
}
}
// Ez component
if ((NX != 1 || NY != 1) && i > 0 && i < NX && j > 0 && j < NY && k >= 0 && k < NZ) {
int materialEz = ID[INDEX4D_ID(2,i_ID,j_ID,k_ID)];
for (int pole = 0; pole < MAXPOLES; pole++) {
Tz[INDEX4D_T(pole,i_T,j_T,k_T)] = cfloat_sub(Tz[INDEX4D_T(pole,i_T,j_T,k_T)],
cfloat_mulr(updatecoeffsdispersive[INDEX2D_MATDISP(materialEz,2+(pole*3))],
Ez[INDEX3D_FIELDS(i,j,k)]));
}
}
}

查看文件

@@ -1,955 +0,0 @@
// Macros for converting subscripts to linear index:
#define INDEX2D_R(m, n) (m)*(NY_R)+(n)
#define INDEX2D_MAT(m, n) (m)*({{NY_MATCOEFFS}})+(n)
#define INDEX3D_FIELDS(i, j, k) (i)*({{NY_FIELDS}})*({{NZ_FIELDS}})+(j)*({{NZ_FIELDS}})+(k)
#define INDEX4D_ID(p, i, j, k) (p)*({{NX_ID}})*({{NY_ID}})*({{NZ_ID}})+(i)*({{NY_ID}})*({{NZ_ID}})+(j)*({{NZ_ID}})+(k)
#define INDEX4D_PHI1(p, i, j, k) (p)*(NX_PHI1)*(NY_PHI1)*(NZ_PHI1)+(i)*(NY_PHI1)*(NZ_PHI1)+(j)*(NZ_PHI1)+(k)
#define INDEX4D_PHI2(p, i, j, k) (p)*(NX_PHI2)*(NY_PHI2)*(NZ_PHI2)+(i)*(NY_PHI2)*(NZ_PHI2)+(j)*(NZ_PHI2)+(k)
__constant {{REAL}} updatecoeffsE[{{N_updatecoeffsE}}] =
{
{% for i in updateEVal %}
{{i}},
{% endfor %}
};
__kernel void order1_xminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global {{REAL}} *Ey, __global {{REAL}} *Ez, __global const {{REAL}}* restrict Hx, __global const {{REAL}}* restrict Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d){
// This function updates the Ey and Ez field components for the xminus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// obtain the linear index corresponding to the current thread
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA01, RB0, RE0, RF0, dHy, dHz;
{{REAL}} dx = d;
int ii, jj, kk, materialEy, materialEz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = xf - i1;
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,i1)] - 1;
RB0 = RB[INDEX2D_R(0,i1)];
RE0 = RE[INDEX2D_R(0,i1)];
RF0 = RF[INDEX2D_R(0,i1)];
// Ey
materialEy = ID[INDEX4D_ID(1,ii,jj,kk)];
dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx;
Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA01 * dHz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = xf - i2;
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,i2)] - 1;
RB0 = RB[INDEX2D_R(0,i2)];
RE0 = RE[INDEX2D_R(0,i2)];
RF0 = RF[INDEX2D_R(0,i2)];
// Ez
materialEz = ID[INDEX4D_ID(2,ii,jj,kk)];
dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx;
Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA01 * dHy + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHy;
}
}
__kernel void order2_xminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global {{REAL}} *Ey, __global {{REAL}} *Ez, __global const {{REAL}}* restrict Hx, __global const {{REAL}}* restrict Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d){
// This function updates the Ey and Ez field components for the xminus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current thread
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dHy, dHz;
{{REAL}} dx = d;
int ii, jj, kk, materialEy, materialEz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = xf - i1;
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,i1)];
RB0 = RB[INDEX2D_R(0,i1)];
RE0 = RE[INDEX2D_R(0,i1)];
RF0 = RF[INDEX2D_R(0,i1)];
RA1 = RA[INDEX2D_R(1,i1)];
RB1 = RB[INDEX2D_R(1,i1)];
RE1 = RE[INDEX2D_R(1,i1)];
RF1 = RF[INDEX2D_R(1,i1)];
RA01 = RA[INDEX2D_R(0,i1)] * RA[INDEX2D_R(1,i1)] - 1;
// Ey
materialEy = ID[INDEX4D_ID(1,ii,jj,kk)];
dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx;
Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA01 * dHz + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dHz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = xf - i2;
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,i2)];
RB0 = RB[INDEX2D_R(0,i2)];
RE0 = RE[INDEX2D_R(0,i2)];
RF0 = RF[INDEX2D_R(0,i2)];
RA1 = RA[INDEX2D_R(1,i2)];
RB1 = RB[INDEX2D_R(1,i2)];
RE1 = RE[INDEX2D_R(1,i2)];
RF1 = RF[INDEX2D_R(1,i2)];
RA01 = RA[INDEX2D_R(0,i2)] * RA[INDEX2D_R(1,i2)] - 1;
// Ez
materialEz = ID[INDEX4D_ID(2,ii,jj,kk)];
dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx;
Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA01 * dHy + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dHy + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHy;
}
}
__kernel void order1_xplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global {{REAL}} *Ey, __global {{REAL}} *Ez, __global const {{REAL}}* restrict Hx, __global const {{REAL}}* restrict Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d){
// This function updates the Ey and Ez field components for the xplus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current thread
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA01, RB0, RE0, RF0, dHy, dHz;
{{REAL}} dx = d;
int ii, jj, kk, materialEy, materialEz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,i1)] - 1;
RB0 = RB[INDEX2D_R(0,i1)];
RE0 = RE[INDEX2D_R(0,i1)];
RF0 = RF[INDEX2D_R(0,i1)];
// Ey
materialEy = ID[INDEX4D_ID(1,ii,jj,kk)];
dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx;
Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA01 * dHz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,i2)] - 1;
RB0 = RB[INDEX2D_R(0,i2)];
RE0 = RE[INDEX2D_R(0,i2)];
RF0 = RF[INDEX2D_R(0,i2)];
// Ez
materialEz = ID[INDEX4D_ID(2,ii,jj,kk)];
dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx;
Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA01 * dHy + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHy;
}
}
__kernel void order2_xplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global {{REAL}} *Ey, __global {{REAL}} *Ez, __global const {{REAL}}* restrict Hx, __global const {{REAL}}* restrict Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Ey and Ez field components for the xplus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current thread
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dHy, dHz;
{{REAL}} dx = d;
int ii, jj, kk, materialEy, materialEz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,i1)];
RB0 = RB[INDEX2D_R(0,i1)];
RE0 = RE[INDEX2D_R(0,i1)];
RF0 = RF[INDEX2D_R(0,i1)];
RA1 = RA[INDEX2D_R(1,i1)];
RB1 = RB[INDEX2D_R(1,i1)];
RE1 = RE[INDEX2D_R(1,i1)];
RF1 = RF[INDEX2D_R(1,i1)];
RA01 = RA[INDEX2D_R(0,i1)] * RA[INDEX2D_R(1,i1)] - 1;
// Ey
materialEy = ID[INDEX4D_ID(1,ii,jj,kk)];
dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx;
Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA01 * dHz + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dHz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,i2)];
RB0 = RB[INDEX2D_R(0,i2)];
RE0 = RE[INDEX2D_R(0,i2)];
RF0 = RF[INDEX2D_R(0,i2)];
RA1 = RA[INDEX2D_R(1,i2)];
RB1 = RB[INDEX2D_R(1,i2)];
RE1 = RE[INDEX2D_R(1,i2)];
RF1 = RF[INDEX2D_R(1,i2)];
RA01 = RA[INDEX2D_R(0,i2)] * RA[INDEX2D_R(1,i2)] - 1;
// Ez
materialEz = ID[INDEX4D_ID(2,ii,jj,kk)];
dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii-1,jj,kk)]) / dx;
Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA01 * dHy + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dHy + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHy;
}
}
__kernel void order1_yminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global {{REAL}} *Ex, __global const {{REAL}}* restrict Ey, __global {{REAL}} *Ez, __global const {{REAL}}* restrict Hx, __global const {{REAL}}* restrict Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Ex and Ez field components for the yminus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current thread
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA01, RB0, RE0, RF0, dHx, dHz;
{{REAL}} dy = d;
int ii, jj, kk, materialEx, materialEz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = yf - j1;
kk = k1 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,j1)] - 1;
RB0 = RB[INDEX2D_R(0,j1)];
RE0 = RE[INDEX2D_R(0,j1)];
RF0 = RF[INDEX2D_R(0,j1)];
// Ex
materialEx = ID[INDEX4D_ID(0,ii,jj,kk)];
dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy;
Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA01 * dHz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = yf - j2;
kk = k2 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,j2)] - 1;
RB0 = RB[INDEX2D_R(0,j2)];
RE0 = RE[INDEX2D_R(0,j2)];
RF0 = RF[INDEX2D_R(0,j2)];
// Ez
materialEz = ID[INDEX4D_ID(2,ii,jj,kk)];
dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy;
Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA01 * dHx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx;
}
}
__kernel void order2_yminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global {{REAL}} *Ex, __global const {{REAL}}* restrict Ey, __global {{REAL}} *Ez, __global const {{REAL}}* restrict Hx, __global const {{REAL}}* restrict Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Ex and Ez field components for the yminus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current thread
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dHx, dHz;
{{REAL}} dy = d;
int ii, jj, kk, materialEx, materialEz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = yf - j1;
kk = k1 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,j1)];
RB0 = RB[INDEX2D_R(0,j1)];
RE0 = RE[INDEX2D_R(0,j1)];
RF0 = RF[INDEX2D_R(0,j1)];
RA1 = RA[INDEX2D_R(1,j1)];
RB1 = RB[INDEX2D_R(1,j1)];
RE1 = RE[INDEX2D_R(1,j1)];
RF1 = RF[INDEX2D_R(1,j1)];
RA01 = RA[INDEX2D_R(0,j1)] * RA[INDEX2D_R(1,j1)] - 1;
// Ex
materialEx = ID[INDEX4D_ID(0,ii,jj,kk)];
dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy;
Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA01 * dHz + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dHz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = yf - j2;
kk = k2 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,j2)];
RB0 = RB[INDEX2D_R(0,j2)];
RE0 = RE[INDEX2D_R(0,j2)];
RF0 = RF[INDEX2D_R(0,j2)];
RA1 = RA[INDEX2D_R(1,j2)];
RB1 = RB[INDEX2D_R(1,j2)];
RE1 = RE[INDEX2D_R(1,j2)];
RF1 = RF[INDEX2D_R(1,j2)];
RA01 = RA[INDEX2D_R(0,j2)] * RA[INDEX2D_R(1,j2)] - 1;
// Ez
materialEz = ID[INDEX4D_ID(2,ii,jj,kk)];
dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy;
Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA01 * dHx + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dHx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx;
}
}
__kernel void order1_yplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global {{REAL}} *Ex, __global const {{REAL}}* restrict Ey, __global {{REAL}} *Ez, __global const {{REAL}}* restrict Hx, __global const {{REAL}}* restrict Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Ex and Ez field components for the yplus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current thread
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA01, RB0, RE0, RF0, dHx, dHz;
{{REAL}} dy = d;
int ii, jj, kk, materialEx, materialEz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,j1)] - 1;
RB0 = RB[INDEX2D_R(0,j1)];
RE0 = RE[INDEX2D_R(0,j1)];
RF0 = RF[INDEX2D_R(0,j1)];
// Ex
materialEx = ID[INDEX4D_ID(0,ii,jj,kk)];
dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy;
Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA01 * dHz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,j2)] - 1;
RB0 = RB[INDEX2D_R(0,j2)];
RE0 = RE[INDEX2D_R(0,j2)];
RF0 = RF[INDEX2D_R(0,j2)];
// Ez
materialEz = ID[INDEX4D_ID(2,ii,jj,kk)];
dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy;
Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA01 * dHx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx;
}
}
__kernel void order2_yplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global {{REAL}} *Ex, __global const {{REAL}}* restrict Ey, __global {{REAL}} *Ez, __global const {{REAL}}* restrict Hx, __global const {{REAL}}* restrict Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Ex and Ez field components for the yplus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current thread
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dHx, dHz;
{{REAL}} dy = d;
int ii, jj, kk, materialEx, materialEz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,j1)];
RB0 = RB[INDEX2D_R(0,j1)];
RE0 = RE[INDEX2D_R(0,j1)];
RF0 = RF[INDEX2D_R(0,j1)];
RA1 = RA[INDEX2D_R(1,j1)];
RB1 = RB[INDEX2D_R(1,j1)];
RE1 = RE[INDEX2D_R(1,j1)];
RF1 = RF[INDEX2D_R(1,j1)];
RA01 = RA[INDEX2D_R(0,j1)] * RA[INDEX2D_R(1,j1)] - 1;
// Ex
materialEx = ID[INDEX4D_ID(0,ii,jj,kk)];
dHz = (Hz[INDEX3D_FIELDS(ii,jj,kk)] - Hz[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy;
Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA01 * dHz + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dHz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,j2)];
RB0 = RB[INDEX2D_R(0,j2)];
RE0 = RE[INDEX2D_R(0,j2)];
RF0 = RF[INDEX2D_R(0,j2)];
RA1 = RA[INDEX2D_R(1,j2)];
RB1 = RB[INDEX2D_R(1,j2)];
RE1 = RE[INDEX2D_R(1,j2)];
RF1 = RF[INDEX2D_R(1,j2)];
RA01 = RA[INDEX2D_R(0,j2)] * RA[INDEX2D_R(1,j2)] - 1;
// Ez
materialEz = ID[INDEX4D_ID(2,ii,jj,kk)];
dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj-1,kk)]) / dy;
Ez[INDEX3D_FIELDS(ii,jj,kk)] = Ez[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEz,4)] * (RA01 * dHx + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dHx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx;
}
}
__kernel void order1_zminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global {{REAL}} *Ex, __global {{REAL}} *Ey, __global const {{REAL}}* restrict Ez, __global const {{REAL}}* restrict Hx, __global const {{REAL}}* restrict Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Ex and Ey field components for the zminus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current thread
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA01, RB0, RE0, RF0, dHx, dHy;
{{REAL}} dz = d;
int ii, jj, kk, materialEx, materialEy;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = zf - k1;
// PML coefficients
RA01 = RA[INDEX2D_R(0,k1)] - 1;
RB0 = RB[INDEX2D_R(0,k1)];
RE0 = RE[INDEX2D_R(0,k1)];
RF0 = RF[INDEX2D_R(0,k1)];
// Ex
materialEx = ID[INDEX4D_ID(0,ii,jj,kk)];
dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz;
Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA01 * dHy + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHy;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = zf - k2;
// PML coefficients
RA01 = RA[INDEX2D_R(0,k2)] - 1;
RB0 = RB[INDEX2D_R(0,k2)];
RE0 = RE[INDEX2D_R(0,k2)];
RF0 = RF[INDEX2D_R(0,k2)];
// Ey
materialEy = ID[INDEX4D_ID(1,ii,jj,kk)];
dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz;
Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA01 * dHx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx;
}
}
__kernel void order2_zminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global {{REAL}} *Ex, __global {{REAL}} *Ey, __global const {{REAL}}* restrict Ez, __global const {{REAL}}* restrict Hx, __global const {{REAL}}* restrict Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Ex and Ey field components for the zminus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current thread
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dHx, dHy;
{{REAL}} dz = d;
int ii, jj, kk, materialEx, materialEy;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = zf - k1;
// PML coefficients
RA0 = RA[INDEX2D_R(0,k1)];
RB0 = RB[INDEX2D_R(0,k1)];
RE0 = RE[INDEX2D_R(0,k1)];
RF0 = RF[INDEX2D_R(0,k1)];
RA1 = RA[INDEX2D_R(1,k1)];
RB1 = RB[INDEX2D_R(1,k1)];
RE1 = RE[INDEX2D_R(1,k1)];
RF1 = RF[INDEX2D_R(1,k1)];
RA01 = RA[INDEX2D_R(0,k1)] * RA[INDEX2D_R(1,k1)] - 1;
// Ex
materialEx = ID[INDEX4D_ID(0,ii,jj,kk)];
dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz;
Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA01 * dHy + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dHy + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHy;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = zf - k2;
// PML coefficients
RA0 = RA[INDEX2D_R(0,k2)];
RB0 = RB[INDEX2D_R(0,k2)];
RE0 = RE[INDEX2D_R(0,k2)];
RF0 = RF[INDEX2D_R(0,k2)];
RA1 = RA[INDEX2D_R(1,k2)];
RB1 = RB[INDEX2D_R(1,k2)];
RE1 = RE[INDEX2D_R(1,k2)];
RF1 = RF[INDEX2D_R(1,k2)];
RA01 = RA[INDEX2D_R(0,k2)] * RA[INDEX2D_R(1,k2)] - 1;
// Ey
materialEy = ID[INDEX4D_ID(1,ii,jj,kk)];
dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz;
Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA01 * dHx + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dHx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx;
}
}
__kernel void order1_zplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global {{REAL}} *Ex, __global {{REAL}} *Ey, __global const {{REAL}}* restrict Ez, __global const {{REAL}}* restrict Hx, __global const {{REAL}}* restrict Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Ex and Ey field components for the zplus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current thread
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA01, RB0, RE0, RF0, dHx, dHy;
{{REAL}} dz = d;
int ii, jj, kk, materialEx, materialEy;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,k1)] - 1;
RB0 = RB[INDEX2D_R(0,k1)];
RE0 = RE[INDEX2D_R(0,k1)];
RF0 = RF[INDEX2D_R(0,k1)];
// Ex
materialEx = ID[INDEX4D_ID(0,ii,jj,kk)];
dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz;
Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA01 * dHy + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHy;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,k2)] - 1;
RB0 = RB[INDEX2D_R(0,k2)];
RE0 = RE[INDEX2D_R(0,k2)];
RF0 = RF[INDEX2D_R(0,k2)];
// Ey
materialEy = ID[INDEX4D_ID(1,ii,jj,kk)];
dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz;
Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA01 * dHx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx;
}
}
__kernel void order2_zplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global {{REAL}} *Ex, __global {{REAL}} *Ey, __global const {{REAL}}* restrict Ez, __global const {{REAL}}* restrict Hx, __global const {{REAL}}* restrict Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Ex and Ey field components for the zplus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current thread
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dHx, dHy;
{{REAL}} dz = d;
int ii, jj, kk, materialEx, materialEy;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,k1)];
RB0 = RB[INDEX2D_R(0,k1)];
RE0 = RE[INDEX2D_R(0,k1)];
RF0 = RF[INDEX2D_R(0,k1)];
RA1 = RA[INDEX2D_R(1,k1)];
RB1 = RB[INDEX2D_R(1,k1)];
RE1 = RE[INDEX2D_R(1,k1)];
RF1 = RF[INDEX2D_R(1,k1)];
RA01 = RA[INDEX2D_R(0,k1)] * RA[INDEX2D_R(1,k1)] - 1;
// Ex
materialEx = ID[INDEX4D_ID(0,ii,jj,kk)];
dHy = (Hy[INDEX3D_FIELDS(ii,jj,kk)] - Hy[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz;
Ex[INDEX3D_FIELDS(ii,jj,kk)] = Ex[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsE[INDEX2D_MAT(materialEx,4)] * (RA01 * dHy + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dHy + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dHy;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,k2)];
RB0 = RB[INDEX2D_R(0,k2)];
RE0 = RE[INDEX2D_R(0,k2)];
RF0 = RF[INDEX2D_R(0,k2)];
RA1 = RA[INDEX2D_R(1,k2)];
RB1 = RB[INDEX2D_R(1,k2)];
RE1 = RE[INDEX2D_R(1,k2)];
RF1 = RF[INDEX2D_R(1,k2)];
RA01 = RA[INDEX2D_R(0,k2)] * RA[INDEX2D_R(1,k2)] - 1;
// Ey
materialEy = ID[INDEX4D_ID(1,ii,jj,kk)];
dHx = (Hx[INDEX3D_FIELDS(ii,jj,kk)] - Hx[INDEX3D_FIELDS(ii,jj,kk-1)]) / dz;
Ey[INDEX3D_FIELDS(ii,jj,kk)] = Ey[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsE[INDEX2D_MAT(materialEy,4)] * (RA01 * dHx + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dHx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx;
}
}

查看文件

@@ -1,962 +0,0 @@
// Macros for converting subscripts to linear index:
#define INDEX2D_R(m, n) (m)*(NY_R)+(n)
#define INDEX2D_MAT(m, n) (m)*({{NY_MATCOEFFS}})+(n)
#define INDEX3D_FIELDS(i, j, k) (i)*({{NY_FIELDS}})*({{NZ_FIELDS}})+(j)*({{NZ_FIELDS}})+(k)
#define INDEX4D_ID(p, i, j, k) (p)*({{NX_ID}})*({{NY_ID}})*({{NZ_ID}})+(i)*({{NY_ID}})*({{NZ_ID}})+(j)*({{NZ_ID}})+(k)
#define INDEX4D_PHI1(p, i, j, k) (p)*(NX_PHI1)*(NY_PHI1)*(NZ_PHI1)+(i)*(NY_PHI1)*(NZ_PHI1)+(j)*(NZ_PHI1)+(k)
#define INDEX4D_PHI2(p, i, j, k) (p)*(NX_PHI2)*(NY_PHI2)*(NZ_PHI2)+(i)*(NY_PHI2)*(NZ_PHI2)+(j)*(NZ_PHI2)+(k)
__constant {{REAL}} updatecoeffsH[{{N_updatecoeffsH}}] =
{
{% for i in updateHVal %}
{{i}},
{% endfor %}
};
__kernel void order1_xminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global const {{REAL}}* restrict Ey, __global const {{REAL}}* restrict Ez, __global const {{REAL}}* restrict Hx, __global {{REAL}} *Hy, __global {{REAL}} *Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Hy and Hz field components for the xminus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current tREad
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA01, RB0, RE0, RF0, dEy, dEz;
{{REAL}} dx = d;
int ii, jj, kk, materialHy, materialHz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = xf - (i1 + 1);
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,i1)] - 1;
RB0 = RB[INDEX2D_R(0,i1)];
RE0 = RE[INDEX2D_R(0,i1)];
RF0 = RF[INDEX2D_R(0,i1)];
// Hy
materialHy = ID[INDEX4D_ID(4,ii,jj,kk)];
dEz = (Ez[INDEX3D_FIELDS(ii+1,jj,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dx;
Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA01 * dEz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = xf - (i2 + 1);
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,i2)] - 1;
RB0 = RB[INDEX2D_R(0,i2)];
RE0 = RE[INDEX2D_R(0,i2)];
RF0 = RF[INDEX2D_R(0,i2)];
// Hz
materialHz = ID[INDEX4D_ID(5,ii,jj,kk)];
dEy = (Ey[INDEX3D_FIELDS(ii+1,jj,kk)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dx;
Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA01 * dEy + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEy;
}
}
__kernel void order2_xminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global const {{REAL}}* restrict Ey, __global const {{REAL}}* restrict Ez, __global const {{REAL}}* restrict Hx, __global {{REAL}} *Hy, __global {{REAL}} *Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Hy and Hz field components for the xminus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current tREad
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dEy, dEz;
{{REAL}} dx = d;
int ii, jj, kk, materialHy, materialHz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = xf - (i1 + 1);
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,i1)];
RB0 = RB[INDEX2D_R(0,i1)];
RE0 = RE[INDEX2D_R(0,i1)];
RF0 = RF[INDEX2D_R(0,i1)];
RA1 = RA[INDEX2D_R(1,i1)];
RB1 = RB[INDEX2D_R(1,i1)];
RE1 = RE[INDEX2D_R(1,i1)];
RF1 = RF[INDEX2D_R(1,i1)];
RA01 = RA[INDEX2D_R(0,i1)] * RA[INDEX2D_R(1,i1)] - 1;
// Hy
materialHy = ID[INDEX4D_ID(4,ii,jj,kk)];
dEz = (Ez[INDEX3D_FIELDS(ii+1,jj,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dx;
Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA01 * dEz + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dEz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = xf - (i2 + 1);
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,i2)];
RB0 = RB[INDEX2D_R(0,i2)];
RE0 = RE[INDEX2D_R(0,i2)];
RF0 = RF[INDEX2D_R(0,i2)];
RA1 = RA[INDEX2D_R(1,i2)];
RB1 = RB[INDEX2D_R(1,i2)];
RE1 = RE[INDEX2D_R(1,i2)];
RF1 = RF[INDEX2D_R(1,i2)];
RA01 = RA[INDEX2D_R(0,i2)] * RA[INDEX2D_R(1,i2)] - 1;
// Hz
materialHz = ID[INDEX4D_ID(5,ii,jj,kk)];
dEy = (Ey[INDEX3D_FIELDS(ii+1,jj,kk)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dx;
Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA01 * dEy + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dEy + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEy;
}
}
__kernel void order1_xplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global const {{REAL}}* restrict Ey, __global const {{REAL}}* restrict Ez, __global const {{REAL}}* restrict Hx, __global {{REAL}} *Hy, __global {{REAL}} *Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Hy and Hz field components for the xplus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current tREad
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA01, RB0, RE0, RF0, dEy, dEz;
{{REAL}} dx = d;
int ii, jj, kk, materialHy, materialHz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,i1)] - 1;
RB0 = RB[INDEX2D_R(0,i1)];
RE0 = RE[INDEX2D_R(0,i1)];
RF0 = RF[INDEX2D_R(0,i1)];
// Hy
materialHy = ID[INDEX4D_ID(4,ii,jj,kk)];
dEz = (Ez[INDEX3D_FIELDS(ii+1,jj,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dx;
Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA01 * dEz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,i2)] - 1;
RB0 = RB[INDEX2D_R(0,i2)];
RE0 = RE[INDEX2D_R(0,i2)];
RF0 = RF[INDEX2D_R(0,i2)];
// Hz
materialHz = ID[INDEX4D_ID(5,ii,jj,kk)];
dEy = (Ey[INDEX3D_FIELDS(ii+1,jj,kk)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dx;
Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA01 * dEy + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEy;
}
}
__kernel void order2_xplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global const {{REAL}}* restrict Ey, __global const {{REAL}}* restrict Ez, __global const {{REAL}}* restrict Hx, __global {{REAL}} *Hy, __global {{REAL}} *Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Hy and Hz field components for the xplus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current tREad
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dEy, dEz;
{{REAL}} dx = d;
int ii, jj, kk, materialHy, materialHz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,i1)];
RB0 = RB[INDEX2D_R(0,i1)];
RE0 = RE[INDEX2D_R(0,i1)];
RF0 = RF[INDEX2D_R(0,i1)];
RA1 = RA[INDEX2D_R(1,i1)];
RB1 = RB[INDEX2D_R(1,i1)];
RE1 = RE[INDEX2D_R(1,i1)];
RF1 = RF[INDEX2D_R(1,i1)];
RA01 = RA[INDEX2D_R(0,i1)] * RA[INDEX2D_R(1,i1)] - 1;
// Hy
materialHy = ID[INDEX4D_ID(4,ii,jj,kk)];
dEz = (Ez[INDEX3D_FIELDS(ii+1,jj,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dx;
Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA01 * dEz + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dEz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,i2)];
RB0 = RB[INDEX2D_R(0,i2)];
RE0 = RE[INDEX2D_R(0,i2)];
RF0 = RF[INDEX2D_R(0,i2)];
RA1 = RA[INDEX2D_R(1,i2)];
RB1 = RB[INDEX2D_R(1,i2)];
RE1 = RE[INDEX2D_R(1,i2)];
RF1 = RF[INDEX2D_R(1,i2)];
RA01 = RA[INDEX2D_R(0,i2)] * RA[INDEX2D_R(1,i2)] - 1;
// Hz
materialHz = ID[INDEX4D_ID(5,ii,jj,kk)];
dEy = (Ey[INDEX3D_FIELDS(ii+1,jj,kk)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dx;
Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA01 * dEy + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dEy + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEy;
}
}
__kernel void order1_yminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global const {{REAL}}* restrict Ey, __global const {{REAL}}* restrict Ez, __global {{REAL}} *Hx, __global const {{REAL}}* restrict Hy, __global {{REAL}} *Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Hx and Hz field components for the yminus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current tREad
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA01, RB0, RE0, RF0, dEx, dEz;
{{REAL}} dy = d;
int ii, jj, kk, materialHx, materialHz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = yf - (j1 + 1);
kk = k1 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,j1)] - 1;
RB0 = RB[INDEX2D_R(0,j1)];
RE0 = RE[INDEX2D_R(0,j1)];
RF0 = RF[INDEX2D_R(0,j1)];
// Hx
materialHx = ID[INDEX4D_ID(3,ii,jj,kk)];
dEz = (Ez[INDEX3D_FIELDS(ii,jj+1,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dy;
Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA01 * dEz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = yf - (j2 + 1);
kk = k2 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,j2)] - 1;
RB0 = RB[INDEX2D_R(0,j2)];
RE0 = RE[INDEX2D_R(0,j2)];
RF0 = RF[INDEX2D_R(0,j2)];
// Hz
materialHz = ID[INDEX4D_ID(5,ii,jj,kk)];
dEx = (Ex[INDEX3D_FIELDS(ii,jj+1,kk)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dy;
Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA01 * dEx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx;
}
}
__kernel void order2_yminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global const {{REAL}}* restrict Ey, __global const {{REAL}}* restrict Ez, __global {{REAL}} *Hx, __global const {{REAL}}* restrict Hy, __global {{REAL}} *Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Hx and Hz field components for the yminus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current tREad
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dEx, dEz;
{{REAL}} dy = d;
int ii, jj, kk, materialHx, materialHz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = yf - (j1 + 1);
kk = k1 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,j1)];
RB0 = RB[INDEX2D_R(0,j1)];
RE0 = RE[INDEX2D_R(0,j1)];
RF0 = RF[INDEX2D_R(0,j1)];
RA1 = RA[INDEX2D_R(1,j1)];
RB1 = RB[INDEX2D_R(1,j1)];
RE1 = RE[INDEX2D_R(1,j1)];
RF1 = RF[INDEX2D_R(1,j1)];
RA01 = RA[INDEX2D_R(0,j1)] * RA[INDEX2D_R(1,j1)] - 1;
// Hx
materialHx = ID[INDEX4D_ID(3,ii,jj,kk)];
dEz = (Ez[INDEX3D_FIELDS(ii,jj+1,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dy;
Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA01 * dEz + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dEz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = yf - (j2 + 1);
kk = k2 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,j2)];
RB0 = RB[INDEX2D_R(0,j2)];
RE0 = RE[INDEX2D_R(0,j2)];
RF0 = RF[INDEX2D_R(0,j2)];
RA1 = RA[INDEX2D_R(1,j2)];
RB1 = RB[INDEX2D_R(1,j2)];
RE1 = RE[INDEX2D_R(1,j2)];
RF1 = RF[INDEX2D_R(1,j2)];
RA01 = RA[INDEX2D_R(0,j2)] * RA[INDEX2D_R(1,j2)] - 1;
// Hz
materialHz = ID[INDEX4D_ID(5,ii,jj,kk)];
dEx = (Ex[INDEX3D_FIELDS(ii,jj+1,kk)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dy;
Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA01 * dEx + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dEx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx;
}
}
__kernel void order1_yplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global const {{REAL}}* restrict Ey, __global const {{REAL}}* restrict Ez, __global {{REAL}} *Hx, __global const {{REAL}}* restrict Hy, __global {{REAL}} *Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Hx and Hz field components for the yplus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current tREad
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA01, RB0, RE0, RF0, dEx, dEz;
{{REAL}} dy = d;
int ii, jj, kk, materialHx, materialHz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,j1)] - 1;
RB0 = RB[INDEX2D_R(0,j1)];
RE0 = RE[INDEX2D_R(0,j1)];
RF0 = RF[INDEX2D_R(0,j1)];
// Hx
materialHx = ID[INDEX4D_ID(3,ii,jj,kk)];
dEz = (Ez[INDEX3D_FIELDS(ii,jj+1,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dy;
Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA01 * dEz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,j2)] - 1;
RB0 = RB[INDEX2D_R(0,j2)];
RE0 = RE[INDEX2D_R(0,j2)];
RF0 = RF[INDEX2D_R(0,j2)];
// Hz
materialHz = ID[INDEX4D_ID(5,ii,jj,kk)];
dEx = (Ex[INDEX3D_FIELDS(ii,jj+1,kk)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dy;
Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA01 * dEx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx;
}
}
__kernel void order2_yplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global const {{REAL}}* restrict Ey, __global const {{REAL}}* restrict Ez, __global {{REAL}} *Hx, __global const {{REAL}}* restrict Hy, __global {{REAL}} *Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Hx and Hz field components for the yplus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current tREad
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dEx, dEz;
{{REAL}} dy = d;
int ii, jj, kk, materialHx, materialHz;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,j1)];
RB0 = RB[INDEX2D_R(0,j1)];
RE0 = RE[INDEX2D_R(0,j1)];
RF0 = RF[INDEX2D_R(0,j1)];
RA1 = RA[INDEX2D_R(1,j1)];
RB1 = RB[INDEX2D_R(1,j1)];
RE1 = RE[INDEX2D_R(1,j1)];
RF1 = RF[INDEX2D_R(1,j1)];
RA01 = RA[INDEX2D_R(0,j1)] * RA[INDEX2D_R(1,j1)] - 1;
// Hx
materialHx = ID[INDEX4D_ID(3,ii,jj,kk)];
dEz = (Ez[INDEX3D_FIELDS(ii,jj+1,kk)] - Ez[INDEX3D_FIELDS(ii,jj,kk)]) / dy;
Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA01 * dEz + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dEz + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEz;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,j2)];
RB0 = RB[INDEX2D_R(0,j2)];
RE0 = RE[INDEX2D_R(0,j2)];
RF0 = RF[INDEX2D_R(0,j2)];
RA1 = RA[INDEX2D_R(1,j2)];
RB1 = RB[INDEX2D_R(1,j2)];
RE1 = RE[INDEX2D_R(1,j2)];
RF1 = RF[INDEX2D_R(1,j2)];
RA01 = RA[INDEX2D_R(0,j2)] * RA[INDEX2D_R(1,j2)] - 1;
// Hz
materialHz = ID[INDEX4D_ID(5,ii,jj,kk)];
dEx = (Ex[INDEX3D_FIELDS(ii,jj+1,kk)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dy;
Hz[INDEX3D_FIELDS(ii,jj,kk)] = Hz[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHz,4)] * (RA01 * dEx + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dEx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx;
}
}
__kernel void order1_zminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global const {{REAL}}* restrict Ey, __global const {{REAL}}* restrict Ez, __global {{REAL}} *Hx, __global {{REAL}} *Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Hx and Hy field components for the zminus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current tREad
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA01, RB0, RE0, RF0, dEx, dEy;
{{REAL}} dz = d;
int ii, jj, kk, materialHx, materialHy;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = zf - (k1 + 1);
// PML coefficients
RA01 = RA[INDEX2D_R(0,k1)] - 1;
RB0 = RB[INDEX2D_R(0,k1)];
RE0 = RE[INDEX2D_R(0,k1)];
RF0 = RF[INDEX2D_R(0,k1)];
// Hx
materialHx = ID[INDEX4D_ID(3,ii,jj,kk)];
dEy = (Ey[INDEX3D_FIELDS(ii,jj,kk+1)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dz;
Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA01 * dEy + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEy;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = zf - (k2 + 1);
// PML coefficients
RA01 = RA[INDEX2D_R(0,k2)] - 1;
RB0 = RB[INDEX2D_R(0,k2)];
RE0 = RE[INDEX2D_R(0,k2)];
RF0 = RF[INDEX2D_R(0,k2)];
// Hy
materialHy = ID[INDEX4D_ID(4,ii,jj,kk)];
dEx = (Ex[INDEX3D_FIELDS(ii,jj,kk+1)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dz;
Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA01 * dEx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx;
}
}
__kernel void order2_zminus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global const {{REAL}}* restrict Ey, __global const {{REAL}}* restrict Ez, __global {{REAL}} *Hx, __global {{REAL}} *Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Hx and Hy field components for the zminus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current tREad
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dEx, dEy;
{{REAL}} dz = d;
int ii, jj, kk, materialHx, materialHy;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = zf - (k1 + 1);
// PML coefficients
RA0 = RA[INDEX2D_R(0,k1)];
RB0 = RB[INDEX2D_R(0,k1)];
RE0 = RE[INDEX2D_R(0,k1)];
RF0 = RF[INDEX2D_R(0,k1)];
RA1 = RA[INDEX2D_R(1,k1)];
RB1 = RB[INDEX2D_R(1,k1)];
RE1 = RE[INDEX2D_R(1,k1)];
RF1 = RF[INDEX2D_R(1,k1)];
RA01 = RA[INDEX2D_R(0,k1)] * RA[INDEX2D_R(1,k1)] - 1;
// Hx
materialHx = ID[INDEX4D_ID(3,ii,jj,kk)];
dEy = (Ey[INDEX3D_FIELDS(ii,jj,kk+1)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dz;
Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA01 * dEy + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dEy + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEy;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = zf - (k2 + 1);
// PML coefficients
RA0 = RA[INDEX2D_R(0,k2)];
RB0 = RB[INDEX2D_R(0,k2)];
RE0 = RE[INDEX2D_R(0,k2)];
RF0 = RF[INDEX2D_R(0,k2)];
RA1 = RA[INDEX2D_R(1,k2)];
RB1 = RB[INDEX2D_R(1,k2)];
RE1 = RE[INDEX2D_R(1,k2)];
RF1 = RF[INDEX2D_R(1,k2)];
RA01 = RA[INDEX2D_R(0,k2)] * RA[INDEX2D_R(1,k2)] - 1;
// Hy
materialHy = ID[INDEX4D_ID(4,ii,jj,kk)];
dEx = (Ex[INDEX3D_FIELDS(ii,jj,kk+1)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dz;
Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA01 * dEx + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dEx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx;
}
}
__kernel void order1_zplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global const {{REAL}}* restrict Ey, __global const {{REAL}}* restrict Ez, __global {{REAL}} *Hx, __global {{REAL}} *Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Hx and Hy field components for the zplus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current tREad
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA01, RB0, RE0, RF0, dEx, dEy;
{{REAL}} dz = d;
int ii, jj, kk, materialHx, materialHy;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,k1)] - 1;
RB0 = RB[INDEX2D_R(0,k1)];
RE0 = RE[INDEX2D_R(0,k1)];
RF0 = RF[INDEX2D_R(0,k1)];
// Hx
materialHx = ID[INDEX4D_ID(3,ii,jj,kk)];
dEy = (Ey[INDEX3D_FIELDS(ii,jj,kk+1)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dz;
Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA01 * dEy + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEy;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA01 = RA[INDEX2D_R(0,k2)] - 1;
RB0 = RB[INDEX2D_R(0,k2)];
RE0 = RE[INDEX2D_R(0,k2)];
RF0 = RF[INDEX2D_R(0,k2)];
// Hy
materialHy = ID[INDEX4D_ID(4,ii,jj,kk)];
dEx = (Ex[INDEX3D_FIELDS(ii,jj,kk+1)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dz;
Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA01 * dEx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx;
}
}
__kernel void order2_zplus(int xs, int xf, int ys, int yf, int zs, int zf, int NX_PHI1, int NY_PHI1, int NZ_PHI1, int NX_PHI2, int NY_PHI2, int NZ_PHI2, int NY_R, __global const unsigned int* restrict ID, __global const {{REAL}}* restrict Ex, __global const {{REAL}}* restrict Ey, __global const {{REAL}}* restrict Ez, __global {{REAL}} *Hx, __global {{REAL}} *Hy, __global const {{REAL}}* restrict Hz, __global {{REAL}} *PHI1, __global {{REAL}} *PHI2, __global const {{REAL}}* restrict RA, __global const {{REAL}}* restrict RB, __global const {{REAL}}* restrict RE, __global const {{REAL}}* restrict RF, {{REAL}} d) {
// This function updates the Hx and Hy field components for the zplus slab.
//
// Args:
// xs, xf, ys, yf, zs, zf: Cell coordinates of PML slab
// NX_PHI, NY_PHI, NZ_PHI, NY_R: Dimensions of PHI1, PHI2, and R PML arrays
// ID, E, H: Access to ID and field component arrays
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
// Obtain the linear index corresponding to the current tREad
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = idx / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
int j1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) / NZ_PHI1;
int k1 = ((idx % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) % (NY_PHI1 * NZ_PHI1)) % NZ_PHI1;
// Convert the linear index to subscripts for PML PHI2 (4D) arrays
int p2 = idx / (NX_PHI2 * NY_PHI2 * NZ_PHI2);
int i2 = (idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) / (NY_PHI2 * NZ_PHI2);
int j2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) / NZ_PHI2;
int k2 = ((idx % (NX_PHI2 * NY_PHI2 * NZ_PHI2)) % (NY_PHI2 * NZ_PHI2)) % NZ_PHI2;
{{REAL}} RA0, RB0, RE0, RF0, RA1, RB1, RE1, RF1, RA01, dEx, dEy;
{{REAL}} dz = d;
int ii, jj, kk, materialHx, materialHy;
int nx = xf - xs;
int ny = yf - ys;
int nz = zf - zs;
if (p1 == 0 && i1 < nx && j1 < ny && k1 < nz) {
// Subscripts for field arrays
ii = i1 + xs;
jj = j1 + ys;
kk = k1 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,k1)];
RB0 = RB[INDEX2D_R(0,k1)];
RE0 = RE[INDEX2D_R(0,k1)];
RF0 = RF[INDEX2D_R(0,k1)];
RA1 = RA[INDEX2D_R(1,k1)];
RB1 = RB[INDEX2D_R(1,k1)];
RE1 = RE[INDEX2D_R(1,k1)];
RF1 = RF[INDEX2D_R(1,k1)];
RA01 = RA[INDEX2D_R(0,k1)] * RA[INDEX2D_R(1,k1)] - 1;
// Hx
materialHx = ID[INDEX4D_ID(3,ii,jj,kk)];
dEy = (Ey[INDEX3D_FIELDS(ii,jj,kk+1)] - Ey[INDEX3D_FIELDS(ii,jj,kk)]) / dz;
Hx[INDEX3D_FIELDS(ii,jj,kk)] = Hx[INDEX3D_FIELDS(ii,jj,kk)] + updatecoeffsH[INDEX2D_MAT(materialHx,4)] * (RA01 * dEy + RA1 * RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] + RB1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(1,i1,j1,k1)] = RE1 * PHI1[INDEX4D_PHI1(1,i1,j1,k1)] - RF1 * (RA0 * dEy + RB0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)]);
PHI1[INDEX4D_PHI1(0,i1,j1,k1)] = RE0 * PHI1[INDEX4D_PHI1(0,i1,j1,k1)] - RF0 * dEy;
}
if (p2 == 0 && i2 < nx && j2 < ny && k2 < nz) {
// Subscripts for field arrays
ii = i2 + xs;
jj = j2 + ys;
kk = k2 + zs;
// PML coefficients
RA0 = RA[INDEX2D_R(0,k2)];
RB0 = RB[INDEX2D_R(0,k2)];
RE0 = RE[INDEX2D_R(0,k2)];
RF0 = RF[INDEX2D_R(0,k2)];
RA1 = RA[INDEX2D_R(1,k2)];
RB1 = RB[INDEX2D_R(1,k2)];
RE1 = RE[INDEX2D_R(1,k2)];
RF1 = RF[INDEX2D_R(1,k2)];
RA01 = RA[INDEX2D_R(0,k2)] * RA[INDEX2D_R(1,k2)] - 1;
// Hy
materialHy = ID[INDEX4D_ID(4,ii,jj,kk)];
dEx = (Ex[INDEX3D_FIELDS(ii,jj,kk+1)] - Ex[INDEX3D_FIELDS(ii,jj,kk)]) / dz;
Hy[INDEX3D_FIELDS(ii,jj,kk)] = Hy[INDEX3D_FIELDS(ii,jj,kk)] - updatecoeffsH[INDEX2D_MAT(materialHy,4)] * (RA01 * dEx + RA1 * RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] + RB1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(1,i2,j2,k2)] = RE1 * PHI2[INDEX4D_PHI2(1,i2,j2,k2)] - RF1 * (RA0 * dEx + RB0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)]);
PHI2[INDEX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[INDEX4D_PHI2(0,i2,j2,k2)] - RF0 * dEx;
}
}

查看文件

@@ -1,55 +0,0 @@
// Macros for converting subscripts to linear index:
#define INDEX3D_FIELDS(i, j, k) (i)*({{NY_FIELDS}})*({{NZ_FIELDS}})+(j)*({{NZ_FIELDS}})+(k)
#define INDEX4D_SNAPS(p, i, j, k) (p)*({{NX_SNAPS}})*({{NY_SNAPS}})*({{NZ_SNAPS}})+(i)*({{NY_SNAPS}})*({{NZ_SNAPS}})+(j)*({{NZ_SNAPS}})+(k)
////////////////////
// Store snapshot //
////////////////////
__kernel void store_snapshot(int p, int xs, int xf, int ys, int yf, int zs, int zf, int dx, int dy, int dz,
__global const {{REAL}}* __restrict__ Ex, __global const {{REAL}}* __restrict__ Ey,
__global const {{REAL}}* __restrict__ Ez, __global const {{REAL}}* __restrict__ Hx,
__global const {{REAL}}* __restrict__ Hy, __global const {{REAL}}* __restrict__ Hz,
__global {{REAL}} *snapEx, __global {{REAL}} *snapEy, __global {{REAL}} *snapEz,
__global {{REAL}} *snapHx, __global {{REAL}} *snapHy, __global {{REAL}} *snapHz) {
// This function stores field values for a snapshot.
//
// Args:
// p: Snapshot number
// xs, xf, ys, yf, xs, xf: Start and finish cell coordinates for snapshot
// dx, dy, dz: Sampling interval in cell coordinates for snapshot
// E, H: Access to field component arrays
// snapEx, snapEy, snapEz, snapHx, snapHy, snapHz: Access to arrays to store snapshots
// Obtain the linear index corresponding to the current thread
int idx = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
// Convert the linear index to subscripts for 4D SNAPS array
int i = (idx % ({{NX_SNAPS}} * {{NY_SNAPS}} * {{NZ_SNAPS}})) / ({{NY_SNAPS}} * {{NZ_SNAPS}});
int j = ((idx % ({{NX_SNAPS}} * {{NY_SNAPS}} * {{NZ_SNAPS}})) % ({{NY_SNAPS}} * {{NZ_SNAPS}})) / {{NZ_SNAPS}};
int k = ((idx % ({{NX_SNAPS}} * {{NY_SNAPS}} * {{NZ_SNAPS}})) % ({{NY_SNAPS}} * {{NZ_SNAPS}})) % {{NZ_SNAPS}};
// Subscripts for field arrays
int ii, jj, kk;
if (i >= xs && i < xf && j >= ys && j < yf && k >= zs && k < zf) {
// Increment subscripts for field array to account for spatial sampling of snapshot
ii = (xs + i) * dx;
jj = (ys + j) * dy;
kk = (zs + k) * dz;
// The electric field component value at a point comes from an average of
// the 4 electric field component values in that cell
snapEx[INDEX4D_SNAPS(p,i,j,k)] = (Ex[INDEX3D_FIELDS(ii,jj,kk)] + Ex[INDEX3D_FIELDS(ii,jj+1,kk)] + Ex[INDEX3D_FIELDS(ii,jj,kk+1)] + Ex[INDEX3D_FIELDS(ii,jj+1,kk+1)]) / 4;
snapEy[INDEX4D_SNAPS(p,i,j,k)] = (Ey[INDEX3D_FIELDS(ii,jj,kk)] + Ey[INDEX3D_FIELDS(ii+1,jj,kk)] + Ey[INDEX3D_FIELDS(ii,jj,kk+1)] + Ey[INDEX3D_FIELDS(ii+1,jj,kk+1)]) / 4;
snapEz[INDEX4D_SNAPS(p,i,j,k)] = (Ez[INDEX3D_FIELDS(ii,jj,kk)] + Ez[INDEX3D_FIELDS(ii+1,jj,kk)] + Ez[INDEX3D_FIELDS(ii,jj+1,kk)] + Ez[INDEX3D_FIELDS(ii+1,jj+1,kk)]) / 4;
// The magnetic field component value at a point comes from average of
// 2 magnetic field component values in that cell and the following cell
snapHx[INDEX4D_SNAPS(p,i,j,k)] = (Hx[INDEX3D_FIELDS(ii,jj,kk)] + Hx[INDEX3D_FIELDS(ii+1,jj,kk)]) / 2;
snapHy[INDEX4D_SNAPS(p,i,j,k)] = (Hy[INDEX3D_FIELDS(ii,jj,kk)] + Hy[INDEX3D_FIELDS(ii,jj+1,kk)]) / 2;
snapHz[INDEX4D_SNAPS(p,i,j,k)] = (Hz[INDEX3D_FIELDS(ii,jj,kk)] + Hz[INDEX3D_FIELDS(ii,jj,kk+1)]) / 2;
}
}

查看文件

@@ -1,206 +0,0 @@
# Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom
# Authors: Craig Warren, Antonis Giannopoulos, and John Hartley
#
# This file is part of gprMax.
#
# gprMax is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# gprMax is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with gprMax. If not, see <http://www.gnu.org/licenses/>.
// Macros for converting subscripts to linear index:
#define INDEX2D_MAT(m, n) (m)*({{NY_MATCOEFFS}}) + (n)
#define INDEX2D_SRCINFO(m, n) (m)*({{NY_SRCINFO}}) + (n)
#define INDEX2D_SRCWAVES(m, n) (m)*({{NY_SRCWAVES}}) + (n)
#define INDEX3D_FIELDS(i, j, k) (i)*({{NY_FIELDS}})*({{NZ_FIELDS}}) + (j)*({{NZ_FIELDS}}) + (k)
#define INDEX4D_ID(p, i, j, k) (p)*({{NX_ID}})*({{NY_ID}})*({{NZ_ID}}) + (i)*({{NY_ID}})*({{NZ_ID}}) + (j)*({{NZ_ID}}) + (k)
// Material coefficients (read-only) in constant memory
__constant {{REAL}} updatecoeffsE[{{N_updatecoeffsE}}] =
{
{% for i in updateEVal %}
{{i}},
{% endfor %}
};
__constant {{REAL}} updatecoeffsH[{{N_updatecoeffsH}}] =
{
{% for i in updateHVal %}
{{i}},
{% endfor %}
};
///////////////////////////////////////////
// Hertzian dipole electric field update //
///////////////////////////////////////////
__kernel void update_hertzian_dipole(int NHERTZDIPOLE, int iteration,
{{REAL}} dx, {{REAL}} dy, {{REAL}} dz,
__global const int* restrict srcinfo1,
__global const {{REAL}}* restrict srcinfo2,
__global const {{REAL}}* restrict srcwaveforms,
__global const unsigned int* restrict ID,
__global {{REAL}} *Ex,
__global {{REAL}} *Ey,
__global {{REAL}} *Ez) {
// This function updates electric field values for Hertzian dipole sources.
//
// Args:
// NHERTZDIPOLE: total number of hertizan dipole in the model
// iteration
// dx, dy, dz: spatial discretization
// srcinfo1: source cell coordinates and polarisation information
// srcinfo2: other source info, length, resistance, etc
// srcwaveforms : source waveforms values
// ID, E: access to ID and field component values
// get linear index
int src = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
if (src < NHERTZDIPOLE) {
{{REAL}} dl;
int i, j, k, polarisation;
i = srcinfo1[INDEX2D_SRCINFO(src,0)];
j = srcinfo1[INDEX2D_SRCINFO(src,1)];
k = srcinfo1[INDEX2D_SRCINFO(src,2)];
polarisation = srcinfo1[INDEX2D_SRCINFO(src,3)];
dl = srcinfo2[src];
// 'x' polarised source
if (polarisation == 0) {
int materialEx = ID[INDEX4D_ID(0,i,j,k)];
Ex[INDEX3D_FIELDS(i,j,k)] = Ex[INDEX3D_FIELDS(i,j,k)] - updatecoeffsE[INDEX2D_MAT(materialEx,4)] * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] * dl * (1 / (dx * dy * dz));
}
// 'y' polarised source
else if (polarisation == 1) {
int materialEy = ID[INDEX4D_ID(1,i,j,k)];
Ey[INDEX3D_FIELDS(i,j,k)] = Ey[INDEX3D_FIELDS(i,j,k)] - updatecoeffsE[INDEX2D_MAT(materialEy,4)] * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] * dl * (1 / (dx * dy * dz));
}
// 'z' polarised source
else if (polarisation == 2) {
int materialEz = ID[INDEX4D_ID(2,i,j,k)];
Ez[INDEX3D_FIELDS(i,j,k)] = Ez[INDEX3D_FIELDS(i,j,k)] - updatecoeffsE[INDEX2D_MAT(materialEz,4)] * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] * dl * (1 / (dx * dy * dz));
}
}
}
__kernel void update_magnetic_dipole(int NMAGDIPOLE, int iteration, {{REAL}} dx, {{REAL}} dy, {{REAL}} dz, __global const int* restrict srcinfo1, __global const {{REAL}}* restrict srcinfo2, __global const {{REAL}}* restrict srcwaveforms, __global const unsigned int* restrict ID, __global {{REAL}} *Hx, __global {{REAL}} *Hy, __global {{REAL}} *Hz){
// This function updates magnetic field values for magnetic dipole sources.
//
// Args:
// NMAGDIPOLE: Total number of magnetic dipoles in the model
// iteration: Iteration number of simulation
// dx, dy, dz: Spatial discretisations
// srcinfo1: Source cell coordinates and polarisation information
// srcinfo2: Other source information, e.g. length, resistance etc...
// srcwaveforms: Source waveform values
// ID, H: Access to ID and field component arrays
// Obtain the linear index corresponding to the current thread and use for each receiver
int src = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
if (src < NMAGDIPOLE) {
int i, j, k, polarisation;
i = srcinfo1[INDEX2D_SRCINFO(src,0)];
j = srcinfo1[INDEX2D_SRCINFO(src,1)];
k = srcinfo1[INDEX2D_SRCINFO(src,2)];
polarisation = srcinfo1[INDEX2D_SRCINFO(src,3)];
// 'x' polarised source
if (polarisation == 0) {
int materialHx = ID[INDEX4D_ID(3,i,j,k)];
Hx[INDEX3D_FIELDS(i,j,k)] = Hx[INDEX3D_FIELDS(i,j,k)] - updatecoeffsH[INDEX2D_MAT(materialHx,4)] * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] * (1 / (dx * dy * dz));
}
// 'y' polarised source
else if (polarisation == 1) {
int materialHy = ID[INDEX4D_ID(4,i,j,k)];
Hy[INDEX3D_FIELDS(i,j,k)] = Hy[INDEX3D_FIELDS(i,j,k)] - updatecoeffsH[INDEX2D_MAT(materialHy,4)] * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] * (1 / (dx * dy * dz));
}
// 'z' polarised source
else if (polarisation == 2) {
int materialHz = ID[INDEX4D_ID(5,i,j,k)];
Hz[INDEX3D_FIELDS(i,j,k)] = Hz[INDEX3D_FIELDS(i,j,k)] - updatecoeffsH[INDEX2D_MAT(materialHz,4)] * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] * (1 / (dx * dy * dz));
}
}
}
__kernel void update_voltage_source(int NVOLTSRC, int iteration, {{REAL}} dx, {{REAL}} dy, {{REAL}} dz, __global const int* restrict srcinfo1, __global const {{REAL}}* restrict srcinfo2, __global const {{REAL}}* restrict srcwaveforms, __global const unsigned int* restrict ID, __global {{REAL}} *Ex, __global {{REAL}} *Ey, __global {{REAL}} *Ez){
// This function updates electric field values for voltage sources.
//
// Args:
// NVOLTSRC: Total number of voltage sources in the model
// iteration: Iteration number of simulation
// dx, dy, dz: Spatial discretisations
// srcinfo1: Source cell coordinates and polarisation information
// srcinfo2: Other source information, e.g. length, resistance etc...
// srcwaveforms: Source waveform values
// ID, E: Access to ID and field component arrays
// Obtain the linear index corresponding to the current thread and use for each receiver
int src = get_global_id(2) * get_global_size(0) * get_global_size(1) + get_global_id(1) * get_global_size(0) + get_global_id(0);
if (src < NVOLTSRC) {
{{REAL}} resistance;
int i, j, k, polarisation;
i = srcinfo1[INDEX2D_SRCINFO(src,0)];
j = srcinfo1[INDEX2D_SRCINFO(src,1)];
k = srcinfo1[INDEX2D_SRCINFO(src,2)];
polarisation = srcinfo1[INDEX2D_SRCINFO(src,3)];
resistance = srcinfo2[src];
// 'x' polarised source
if (polarisation == 0) {
if (resistance != 0) {
int materialEx = ID[INDEX4D_ID(0,i,j,k)];
Ex[INDEX3D_FIELDS(i,j,k)] = Ex[INDEX3D_FIELDS(i,j,k)] - updatecoeffsE[INDEX2D_MAT(materialEx,4)] * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] * (1 / (resistance * dy * dz));
}
else {
Ex[INDEX3D_FIELDS(i,j,k)] = -1 * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] / dx;
}
}
// 'y' polarised source
else if (polarisation == 1) {
if (resistance != 0) {
int materialEy = ID[INDEX4D_ID(1,i,j,k)];
Ey[INDEX3D_FIELDS(i,j,k)] = Ey[INDEX3D_FIELDS(i,j,k)] - updatecoeffsE[INDEX2D_MAT(materialEy,4)] * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] * (1 / (resistance * dx * dz));
}
else {
Ey[INDEX3D_FIELDS(i,j,k)] = -1 * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] / dy;
}
}
// 'z' polarised source
else if (polarisation == 2) {
if (resistance != 0) {
int materialEz = ID[INDEX4D_ID(2,i,j,k)];
Ez[INDEX3D_FIELDS(i,j,k)] = Ez[INDEX3D_FIELDS(i,j,k)] - updatecoeffsE[INDEX2D_MAT(materialEz,4)] * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] * (1 / (resistance * dx * dy));
}
else {
Ez[INDEX3D_FIELDS(i,j,k)] = -1 * srcwaveforms[INDEX2D_SRCWAVES(src,iteration)] / dz;
}
}
}
}

查看文件

@@ -1,59 +0,0 @@
# Copyright (C) 2015-2022: The University of Edinburgh, United Kingdom
# Authors: Craig Warren, Antonis Giannopoulos, and John Hartley
#
# This file is part of gprMax.
#
# gprMax is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# gprMax is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with gprMax. If not, see <http://www.gnu.org/licenses/>.
#define INDEX2D_RXCOORDS(m,n) (m)*({{NY_RXCOORDS}}) + (n)
#define INDEX3D_RXS(i,j,k) (i)*({{NY_RXS}})*({{NZ_RXS}}) + (j)*({{NZ_RXS}}) + (k)
#define INDEX3D_FIELDS(i,j,k) (i)*({{NY_FIELDS}})*({{NZ_FIELDS}}) + (j)*({{NZ_FIELDS}}) + (k)
__kernel void store_outputs(int NRX, int iteration,
__global const int* restrict rxcoords,
__global {{REAL}} *rxs,
__global const {{REAL}}* restrict Ex,
__global const {{REAL}}* restrict Ey,
__global const {{REAL}}* restrict Ez,
__global const {{REAL}}* restrict Hx,
__global const {{REAL}}* restrict Hy,
__global const {{REAL}}* restrict Hz) {
// This function stores field component values for every receiver in the model.
//
// Args:
// NRX: total number of receivers in the model.
// rxs: array to store field components for receivers - rows
// are field components; columns are iterations; pages are receiver
// Obtain linear index corresponding to the current work item
int rx = get_global_id(2) * get_global_size(0) * get_global_size(1) +
get_global_id(1) * get_global_size(0) + get_global_id(0);
int i,j,k;
if (rx < NRX) {
i = rxcoords[INDEX2D_RXCOORDS(rx,0)];
j = rxcoords[INDEX2D_RXCOORDS(rx,1)];
k = rxcoords[INDEX2D_RXCOORDS(rx,2)];
rxs[INDEX3D_RXS(0,iteration,rx)] = Ex[INDEX3D_FIELDS(i,j,k)];
rxs[INDEX3D_RXS(1,iteration,rx)] = Ey[INDEX3D_FIELDS(i,j,k)];
rxs[INDEX3D_RXS(2,iteration,rx)] = Ez[INDEX3D_FIELDS(i,j,k)];
rxs[INDEX3D_RXS(3,iteration,rx)] = Hx[INDEX3D_FIELDS(i,j,k)];
rxs[INDEX3D_RXS(4,iteration,rx)] = Hy[INDEX3D_FIELDS(i,j,k)];
rxs[INDEX3D_RXS(5,iteration,rx)] = Hz[INDEX3D_FIELDS(i,j,k)];
}
}