你已经派生过 gprMax
镜像自地址
https://gitee.com/sunhf/gprMax.git
已同步 2025-08-07 23:14:03 +08:00
Work on templating for CUDA/OpenCL
这个提交包含在:
@@ -18,7 +18,32 @@
|
||||
|
||||
from string import Template
|
||||
|
||||
update_electric = Template("""
|
||||
update_electric = {
|
||||
'args_cuda': Template("""
|
||||
__global__ void update_electric(int NX,
|
||||
int NY,
|
||||
int NZ,
|
||||
const unsigned int* __restrict__ ID,
|
||||
$REAL *Ex,
|
||||
$REAL *Ey,
|
||||
$REAL *Ez,
|
||||
const $REAL* __restrict__ Hx,
|
||||
const $REAL* __restrict__ Hy,
|
||||
const $REAL* __restrict__ Hz)
|
||||
"""),
|
||||
'args_opencl': Template("""
|
||||
int NX,
|
||||
int NY,
|
||||
int NZ,
|
||||
__global const unsigned int* restrict ID,
|
||||
__global $REAL *Ex,
|
||||
__global $REAL *Ey,
|
||||
__global $REAL *Ez,
|
||||
__global const $REAL * restrict Hx,
|
||||
__global const $REAL * restrict Hy,
|
||||
__global const $REAL * restrict Hz
|
||||
"""),
|
||||
'func': Template("""
|
||||
// Electric field updates - normal materials.
|
||||
//
|
||||
// Args:
|
||||
@@ -35,6 +60,8 @@ update_electric = Template("""
|
||||
int y_ID = ((i % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) / $NZ_ID;
|
||||
int z_ID = ((i % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) % $NZ_ID;
|
||||
|
||||
$CUDA_IDX
|
||||
|
||||
// Ex component
|
||||
if ((NY != 1 || NZ != 1) && x >= 0 && x < NX && y > 0 && y < NY && z > 0 && z < NZ) {
|
||||
int materialEx = ID[IDX4D_ID(0,x_ID,y_ID,z_ID)];
|
||||
@@ -58,9 +85,34 @@ update_electric = Template("""
|
||||
updatecoeffsE[IDX2D_MAT(materialEz,1)] * (Hy[IDX3D_FIELDS(x,y,z)] - Hy[IDX3D_FIELDS(x-1,y,z)]) -
|
||||
updatecoeffsE[IDX2D_MAT(materialEz,2)] * (Hx[IDX3D_FIELDS(x,y,z)] - Hx[IDX3D_FIELDS(x,y-1,z)]);
|
||||
}
|
||||
""")
|
||||
""")}
|
||||
|
||||
update_magnetic = Template("""
|
||||
update_magnetic = {
|
||||
'args_cuda': Template("""
|
||||
__global__ void update_magnetic(int NX,
|
||||
int NY,
|
||||
int NZ,
|
||||
const unsigned int* __restrict__ ID,
|
||||
$REAL *Hx,
|
||||
$REAL *Hy,
|
||||
$REAL *Hz,
|
||||
const $REAL* __restrict__ Ex,
|
||||
const $REAL* __restrict__ Ey,
|
||||
const $REAL* __restrict__ Ez)
|
||||
"""),
|
||||
'args_opencl': Template("""
|
||||
int NX,
|
||||
int NY,
|
||||
int NZ,
|
||||
__global const unsigned int* restrict ID,
|
||||
__global $REAL *Hx,
|
||||
__global $REAL *Hy,
|
||||
__global $REAL *Hz,
|
||||
__global const $REAL * restrict Ex,
|
||||
__global const $REAL * restrict Ey,
|
||||
__global const $REAL * restrict Ez
|
||||
"""),
|
||||
'func': Template("""
|
||||
// Magnetic field updates - normal materials.
|
||||
//
|
||||
// Args:
|
||||
@@ -77,6 +129,8 @@ update_magnetic = Template("""
|
||||
int y_ID = ((i % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) / $NZ_ID;
|
||||
int z_ID = ((i % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) % $NZ_ID;
|
||||
|
||||
$CUDA_IDX
|
||||
|
||||
// Hx component
|
||||
if (NX != 1 && x > 0 && x < NX && y >= 0 && y < NY && z >= 0 && z < NZ) {
|
||||
int materialHx = ID[IDX4D_ID(3,x_ID,y_ID,z_ID)];
|
||||
@@ -100,9 +154,44 @@ update_magnetic = Template("""
|
||||
updatecoeffsH[IDX2D_MAT(materialHz,1)] * (Ey[IDX3D_FIELDS(x+1,y,z)] - Ey[IDX3D_FIELDS(x,y,z)]) +
|
||||
updatecoeffsH[IDX2D_MAT(materialHz,2)] * (Ex[IDX3D_FIELDS(x,y+1,z)] - Ex[IDX3D_FIELDS(x,y,z)]);
|
||||
}
|
||||
""")
|
||||
""")}
|
||||
|
||||
update_electric_dispersive_A = Template("""
|
||||
update_electric_dispersive_A = {
|
||||
'args_cuda': Template("""
|
||||
__global__ void update_electric_dispersive_A(int NX,
|
||||
int NY,
|
||||
int NZ,
|
||||
int MAXPOLES,
|
||||
const $COMPLEX* __restrict__ updatecoeffsdispersive,
|
||||
$COMPLEX *Tx,
|
||||
$COMPLEX *Ty,
|
||||
$COMPLEX *Tz,
|
||||
const unsigned int* __restrict__ ID,
|
||||
$REAL *Ex,
|
||||
$REAL *Ey,
|
||||
$REAL *Ez,
|
||||
const $REAL* __restrict__ Hx,
|
||||
const $REAL* __restrict__ Hy,
|
||||
const $REAL* __restrict__ Hz)
|
||||
"""),
|
||||
'args_opencl': Template("""
|
||||
int NX,
|
||||
int NY,
|
||||
int NZ,
|
||||
int MAXPOLES,
|
||||
__global const $COMPLEX* restrict updatecoeffsdispersive,
|
||||
__global $COMPLEX *Tx,
|
||||
__global $COMPLEX *Ty,
|
||||
__global $COMPLEX *Tz,
|
||||
__global const unsigned int* restrict ID,
|
||||
__global $REAL *Ex,
|
||||
__global $REAL *Ey,
|
||||
__global $REAL *Ez,
|
||||
__global const $REAL* restrict Hx,
|
||||
__global const $REAL* restrict Hy,
|
||||
__global const $REAL* restrict Hz
|
||||
"""),
|
||||
'func': Template("""
|
||||
// Electric field updates - dispersive materials - part A of updates to electric
|
||||
// field values when dispersive materials
|
||||
// (with multiple poles) are present.
|
||||
@@ -130,6 +219,8 @@ update_electric_dispersive_A = Template("""
|
||||
int y_T = ((i % ($NX_T * $NY_T * $NZ_T)) % ($NY_T * $NZ_T)) / $NZ_T;
|
||||
int z_T = ((i % ($NX_T * $NY_T * $NZ_T)) % ($NY_T * $NZ_T)) % $NZ_T;
|
||||
|
||||
$CUDA_IDX
|
||||
|
||||
// Ex component
|
||||
if ((NY != 1 || NZ != 1) && x >= 0 && x < NX && y > 0 && y < NY && z > 0 && z < NZ) {
|
||||
int materialEx = ID[IDX4D_ID(0,x_ID,y_ID,z_ID)];
|
||||
@@ -174,9 +265,38 @@ update_electric_dispersive_A = Template("""
|
||||
updatecoeffsE[IDX2D_MAT(materialEz,2)] * (Hx[IDX3D_FIELDS(x,y,z)] - Hx[IDX3D_FIELDS(x,y-1,z)]) -
|
||||
updatecoeffsE[IDX2D_MAT(materialEz,4)] * phi;
|
||||
}
|
||||
""")
|
||||
""")}
|
||||
|
||||
update_electric_dispersive_B = Template("""
|
||||
update_electric_dispersive_B = {
|
||||
'args_cuda': Template("""
|
||||
__global__ void update_electric_dispersive_B(int NX,
|
||||
int NY,
|
||||
int NZ,
|
||||
int MAXPOLES,
|
||||
const $COMPLEX* __restrict__ updatecoeffsdispersive,
|
||||
$COMPLEX *Tx,
|
||||
$COMPLEX *Ty,
|
||||
$COMPLEX *Tz,
|
||||
const unsigned int* __restrict__ ID,
|
||||
const $REAL* __restrict__ Ex,
|
||||
const $REAL* __restrict__ Ey,
|
||||
const $REAL* __restrict__ Ez)
|
||||
"""),
|
||||
'args_opencl': Template("""
|
||||
int NX,
|
||||
int NY,
|
||||
int NZ,
|
||||
int MAXPOLES,
|
||||
__global const $COMPLEX* restrict updatecoeffsdispersive,
|
||||
__global $COMPLEX *Tx,
|
||||
__global $COMPLEX *Ty,
|
||||
__global $COMPLEX *Tz,
|
||||
__global const unsigned int* restrict ID,
|
||||
__global const $REAL* restrict Ex,
|
||||
__global const $REAL* restrict Ey,
|
||||
__global const $REAL* restrict Ez
|
||||
"""),
|
||||
'func': Template("""
|
||||
// Electric field updates - dispersive materials - part B of updates to electric
|
||||
// field values when dispersive materials
|
||||
// (with multiple poles) are present.
|
||||
@@ -204,6 +324,8 @@ update_electric_dispersive_B = Template("""
|
||||
int y_T = ((i % ($NX_T * $NY_T * $NZ_T)) % ($NY_T * $NZ_T)) / $NZ_T;
|
||||
int z_T = ((i % ($NX_T * $NY_T * $NZ_T)) % ($NY_T * $NZ_T)) % $NZ_T;
|
||||
|
||||
$CUDA_IDX
|
||||
|
||||
// Ex component
|
||||
if ((NY != 1 || NZ != 1) && x >= 0 && x < NX && y > 0 && y < NY && z > 0 && z < NZ) {
|
||||
int materialEx = ID[IDX4D_ID(0,x_ID,y_ID,z_ID)];
|
||||
@@ -230,4 +352,4 @@ update_electric_dispersive_B = Template("""
|
||||
updatecoeffsdispersive[IDX2D_MATDISP(materialEz,2+(pole*3))] * Ez[IDX3D_FIELDS(x,y,z)];
|
||||
}
|
||||
}
|
||||
""")
|
||||
""")}
|
@@ -18,91 +18,166 @@
|
||||
|
||||
from string import Template
|
||||
|
||||
x_args = Template("int xs, "
|
||||
"int xf, "
|
||||
"int ys, "
|
||||
"int yf, "
|
||||
"int zs, "
|
||||
"int zf, "
|
||||
"int NX_PHI1, "
|
||||
"int NY_PHI1, "
|
||||
"int NZ_PHI1, "
|
||||
"int NX_PHI2, "
|
||||
"int NY_PHI2, "
|
||||
"int NZ_PHI2, "
|
||||
"int NY_R, "
|
||||
"__global const unsigned int* restrict ID, "
|
||||
"__global const $REAL* restrict Ex, "
|
||||
"__global $REAL *Ey, "
|
||||
"__global $REAL *Ez, "
|
||||
"__global const $REAL* restrict Hx, "
|
||||
"__global const $REAL* restrict Hy, "
|
||||
"__global const $REAL* restrict Hz, "
|
||||
"__global $REAL *PHI1, "
|
||||
"__global $REAL *PHI2, "
|
||||
"__global const $REAL* restrict RA, "
|
||||
"__global const $REAL* restrict RB, "
|
||||
"__global const $REAL* restrict RE, "
|
||||
"__global const $REAL* restrict RF, "
|
||||
"$REAL d")
|
||||
x_args = {'cuda': Template("""
|
||||
__global__ void $FUNC(int xs,
|
||||
int xf,
|
||||
int ys,
|
||||
int yf,
|
||||
int zs,
|
||||
int zf,
|
||||
int NX_PHI1,
|
||||
int NY_PHI1,
|
||||
int NZ_PHI1,
|
||||
int NX_PHI2,
|
||||
int NY_PHI2,
|
||||
int NZ_PHI2,
|
||||
int NY_R,
|
||||
const unsigned int* __restrict__ ID,
|
||||
const $REAL* __restrict__ Ex, $REAL *Ey,
|
||||
$REAL *Ez,
|
||||
const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz, $REAL *PHI1,
|
||||
$REAL *PHI2,
|
||||
const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d)
|
||||
"""),
|
||||
'opencl': Template("""
|
||||
int xs,
|
||||
int xf,
|
||||
int ys,
|
||||
int yf,
|
||||
int zs,
|
||||
int zf,
|
||||
int NX_PHI1,
|
||||
int NY_PHI1,
|
||||
int NZ_PHI1,
|
||||
int NX_PHI2,
|
||||
int NY_PHI2,
|
||||
int NZ_PHI2,
|
||||
int NY_R,
|
||||
__global const unsigned int* restrict ID,
|
||||
__global const $REAL* restrict Ex,
|
||||
__global $REAL *Ey,
|
||||
__global $REAL *Ez,
|
||||
__global const $REAL* restrict Hx,
|
||||
__global const $REAL* restrict Hy,
|
||||
__global const $REAL* restrict Hz,
|
||||
__global $REAL *PHI1,
|
||||
__global $REAL *PHI2,
|
||||
__global const $REAL* restrict RA,
|
||||
__global const $REAL* restrict RB,
|
||||
__global const $REAL* restrict RE,
|
||||
__global const $REAL* restrict RF,
|
||||
$REAL d
|
||||
""")
|
||||
}
|
||||
|
||||
y_args = Template("int xs, "
|
||||
"int xf, "
|
||||
"int ys, "
|
||||
"int yf, "
|
||||
"int zs, "
|
||||
"int zf, "
|
||||
"int NX_PHI1, "
|
||||
"int NY_PHI1, "
|
||||
"int NZ_PHI1, "
|
||||
"int NX_PHI2, "
|
||||
"int NY_PHI2, "
|
||||
"int NZ_PHI2, "
|
||||
"int NY_R, "
|
||||
"__global const unsigned int* restrict ID, "
|
||||
"__global $REAL *Ex, "
|
||||
"__global const $REAL* restrict Ey, "
|
||||
"__global $REAL *Ez, "
|
||||
"__global const $REAL* restrict Hx, "
|
||||
"__global const $REAL* restrict Hy, "
|
||||
"__global const $REAL* restrict Hz, "
|
||||
"__global $REAL *PHI1, "
|
||||
"__global $REAL *PHI2, "
|
||||
"__global const $REAL* restrict RA, "
|
||||
"__global const $REAL* restrict RB, "
|
||||
"__global const $REAL* restrict RE, "
|
||||
"__global const $REAL* restrict RF, "
|
||||
"$REAL d")
|
||||
y_args = {'cuda': Template("""
|
||||
__global__ void $FUNC(int xs,
|
||||
int xf,
|
||||
int ys,
|
||||
int yf,
|
||||
int zs,
|
||||
int zf,
|
||||
int NX_PHI1,
|
||||
int NY_PHI1,
|
||||
int NZ_PHI1,
|
||||
int NX_PHI2,
|
||||
int NY_PHI2,
|
||||
int NZ_PHI2,
|
||||
int NY_R,
|
||||
const unsigned int* __restrict__ ID,
|
||||
$REAL *Ex,
|
||||
const $REAL* __restrict__ Ey,
|
||||
$REAL *Ez,
|
||||
const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz, $REAL *PHI1,
|
||||
$REAL *PHI2,
|
||||
const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d)
|
||||
"""),
|
||||
'opencl': Template("""
|
||||
int xs,
|
||||
int xf,
|
||||
int ys,
|
||||
int yf,
|
||||
int zs,
|
||||
int zf,
|
||||
int NX_PHI1,
|
||||
int NY_PHI1,
|
||||
int NZ_PHI1,
|
||||
int NX_PHI2,
|
||||
int NY_PHI2,
|
||||
int NZ_PHI2,
|
||||
int NY_R,
|
||||
__global const unsigned int* restrict ID,
|
||||
__global $REAL *Ex,
|
||||
__global const $REAL* restrict Ey,
|
||||
__global $REAL *Ez,
|
||||
__global const $REAL* restrict Hx,
|
||||
__global const $REAL* restrict Hy,
|
||||
__global const $REAL* restrict Hz,
|
||||
__global $REAL *PHI1,
|
||||
__global $REAL *PHI2,
|
||||
__global const $REAL* restrict RA,
|
||||
__global const $REAL* restrict RB,
|
||||
__global const $REAL* restrict RE,
|
||||
__global const $REAL* restrict RF,
|
||||
$REAL d
|
||||
""")
|
||||
}
|
||||
|
||||
z_args = Template("int xs, "
|
||||
"int xf, "
|
||||
"int ys, "
|
||||
"int yf, "
|
||||
"int zs, "
|
||||
"int zf, "
|
||||
"int NX_PHI1, "
|
||||
"int NY_PHI1, "
|
||||
"int NZ_PHI1, "
|
||||
"int NX_PHI2, "
|
||||
"int NY_PHI2, "
|
||||
"int NZ_PHI2, "
|
||||
"int NY_R, "
|
||||
"__global const unsigned int* restrict ID, "
|
||||
"__global $REAL *Ex, "
|
||||
"__global $REAL *Ey, "
|
||||
"__global const $REAL* restrict Ez, "
|
||||
"__global const $REAL* restrict Hx, "
|
||||
"__global const $REAL* restrict Hy, "
|
||||
"__global const $REAL* restrict Hz, "
|
||||
"__global $REAL *PHI1, "
|
||||
"__global $REAL *PHI2, "
|
||||
"__global const $REAL* restrict RA, "
|
||||
"__global const $REAL* restrict RB, "
|
||||
"__global const $REAL* restrict RE, "
|
||||
"__global const $REAL* restrict RF, "
|
||||
"$REAL d")
|
||||
z_args = {'cuda': Template("""
|
||||
__global__ void $FUNC(int xs,
|
||||
int xf,
|
||||
int ys,
|
||||
int yf,
|
||||
int zs,
|
||||
int zf,
|
||||
int NX_PHI1,
|
||||
int NY_PHI1,
|
||||
int NZ_PHI1,
|
||||
int NX_PHI2,
|
||||
int NY_PHI2,
|
||||
int NZ_PHI2,
|
||||
int NY_R,
|
||||
const unsigned int* __restrict__ ID,
|
||||
$REAL *Ex,
|
||||
$REAL *Ey,
|
||||
const $REAL* __restrict__ Ez,
|
||||
const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz, $REAL *PHI1,
|
||||
$REAL *PHI2,
|
||||
const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d)
|
||||
"""),
|
||||
'opencl': Template("""
|
||||
int xs,
|
||||
int xf,
|
||||
int ys,
|
||||
int yf,
|
||||
int zs,
|
||||
int zf,
|
||||
int NX_PHI1,
|
||||
int NY_PHI1,
|
||||
int NZ_PHI1,
|
||||
int NX_PHI2,
|
||||
int NY_PHI2,
|
||||
int NZ_PHI2,
|
||||
int NY_R,
|
||||
__global const unsigned int* restrict ID,
|
||||
__global $REAL *Ex,
|
||||
__global $REAL *Ey,
|
||||
__global const $REAL* restrict Ez,
|
||||
__global const $REAL* restrict Hx,
|
||||
__global const $REAL* restrict Hy,
|
||||
__global const $REAL* restrict Hz,
|
||||
__global $REAL *PHI1,
|
||||
__global $REAL *PHI2,
|
||||
__global const $REAL* restrict RA,
|
||||
__global const $REAL* restrict RB,
|
||||
__global const $REAL* restrict RE,
|
||||
__global const $REAL* restrict RF,
|
||||
$REAL d
|
||||
""")
|
||||
}
|
||||
|
||||
order1_xminus = {'args': x_args,
|
||||
order1_xminus = {'args_cuda': x_args['cuda'],
|
||||
'args_opencl': x_args['opencl'],
|
||||
'func': Template("""
|
||||
// This function updates the Ey and Ez field components for the xminus slab.
|
||||
//
|
||||
@@ -113,6 +188,8 @@ order1_xminus = {'args': x_args,
|
||||
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
|
||||
// d: Spatial discretisation, e.g. dx, dy or dz
|
||||
|
||||
$CUDA_IDX
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
|
||||
int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
|
||||
int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
|
||||
@@ -173,7 +250,8 @@ order1_xminus = {'args': x_args,
|
||||
}
|
||||
""")}
|
||||
|
||||
order2_xminus = {'args': x_args,
|
||||
order2_xminus = {'args_cuda': x_args['cuda'],
|
||||
'args_opencl': x_args['opencl'],
|
||||
'func': Template("""
|
||||
// This function updates the Ey and Ez field components for the xminus slab.
|
||||
//
|
||||
@@ -184,6 +262,8 @@ order2_xminus = {'args': x_args,
|
||||
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
|
||||
// d: Spatial discretisation, e.g. dx, dy or dz
|
||||
|
||||
$CUDA_IDX
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
|
||||
int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
|
||||
int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
|
||||
@@ -260,7 +340,8 @@ order2_xminus = {'args': x_args,
|
||||
}
|
||||
""")}
|
||||
|
||||
order1_xplus = {'args': x_args,
|
||||
order1_xplus = {'args_cuda': x_args['cuda'],
|
||||
'args_opencl': x_args['opencl'],
|
||||
'func': Template("""
|
||||
// This function updates the Ey and Ez field components for the xplus slab.
|
||||
//
|
||||
@@ -271,6 +352,8 @@ order1_xplus = {'args': x_args,
|
||||
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
|
||||
// d: Spatial discretisation, e.g. dx, dy or dz
|
||||
|
||||
$CUDA_IDX
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
|
||||
int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
|
||||
int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
|
||||
@@ -331,7 +414,8 @@ order1_xplus = {'args': x_args,
|
||||
}
|
||||
""")}
|
||||
|
||||
order2_xplus = {'args': x_args,
|
||||
order2_xplus = {'args_cuda': x_args['cuda'],
|
||||
'args_opencl': x_args['opencl'],
|
||||
'func': Template("""
|
||||
// This function updates the Ey and Ez field components for the xplus slab.
|
||||
//
|
||||
@@ -342,6 +426,8 @@ order2_xplus = {'args': x_args,
|
||||
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
|
||||
// d: Spatial discretisation, e.g. dx, dy or dz
|
||||
|
||||
$CUDA_IDX
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
|
||||
int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
|
||||
int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
|
||||
@@ -417,7 +503,8 @@ order2_xplus = {'args': x_args,
|
||||
}
|
||||
""")}
|
||||
|
||||
order1_yminus = {'args': y_args,
|
||||
order1_yminus = {'args_cuda': y_args['cuda'],
|
||||
'args_opencl': y_args['opencl'],
|
||||
'func': Template("""
|
||||
// This function updates the Ex and Ez field components for the yminus slab.
|
||||
//
|
||||
@@ -428,6 +515,8 @@ order1_yminus = {'args': y_args,
|
||||
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
|
||||
// d: Spatial discretisation, e.g. dx, dy or dz
|
||||
|
||||
$CUDA_IDX
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
|
||||
int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
|
||||
int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
|
||||
@@ -488,7 +577,8 @@ order1_yminus = {'args': y_args,
|
||||
}
|
||||
""")}
|
||||
|
||||
order2_yminus = {'args': y_args,
|
||||
order2_yminus = {'args_cuda': y_args['cuda'],
|
||||
'args_opencl': y_args['opencl'],
|
||||
'func': Template("""
|
||||
// This function updates the Ex and Ez field components for the yminus slab.
|
||||
//
|
||||
@@ -499,6 +589,8 @@ order2_yminus = {'args': y_args,
|
||||
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
|
||||
// d: Spatial discretisation, e.g. dx, dy or dz
|
||||
|
||||
$CUDA_IDX
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
|
||||
int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
|
||||
int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
|
||||
@@ -575,7 +667,8 @@ order2_yminus = {'args': y_args,
|
||||
}
|
||||
""")}
|
||||
|
||||
order1_yplus = {'args': y_args,
|
||||
order1_yplus = {'args_cuda': y_args['cuda'],
|
||||
'args_opencl': y_args['opencl'],
|
||||
'func': Template("""
|
||||
// This function updates the Ex and Ez field components for the yplus slab.
|
||||
//
|
||||
@@ -586,6 +679,8 @@ order1_yplus = {'args': y_args,
|
||||
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
|
||||
// d: Spatial discretisation, e.g. dx, dy or dz
|
||||
|
||||
$CUDA_IDX
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
|
||||
int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
|
||||
int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
|
||||
@@ -646,7 +741,8 @@ order1_yplus = {'args': y_args,
|
||||
}
|
||||
""")}
|
||||
|
||||
order2_yplus = {'args': y_args,
|
||||
order2_yplus = {'args_cuda': y_args['cuda'],
|
||||
'args_opencl': y_args['opencl'],
|
||||
'func': Template("""
|
||||
// This function updates the Ex and Ez field components for the yplus slab.
|
||||
//
|
||||
@@ -657,6 +753,8 @@ order2_yplus = {'args': y_args,
|
||||
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
|
||||
// d: Spatial discretisation, e.g. dx, dy or dz
|
||||
|
||||
$CUDA_IDX
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
|
||||
int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
|
||||
int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
|
||||
@@ -733,7 +831,8 @@ order2_yplus = {'args': y_args,
|
||||
}
|
||||
""")}
|
||||
|
||||
order1_zminus = {'args': z_args,
|
||||
order1_zminus = {'args_cuda': z_args['cuda'],
|
||||
'args_opencl': z_args['opencl'],
|
||||
'func': Template("""
|
||||
// This function updates the Ex and Ey field components for the zminus slab.
|
||||
//
|
||||
@@ -744,6 +843,8 @@ order1_zminus = {'args': z_args,
|
||||
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
|
||||
// d: Spatial discretisation, e.g. dx, dy or dz
|
||||
|
||||
$CUDA_IDX
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
|
||||
int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
|
||||
int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
|
||||
@@ -804,7 +905,8 @@ order1_zminus = {'args': z_args,
|
||||
}
|
||||
""")}
|
||||
|
||||
order2_zminus = {'args': z_args,
|
||||
order2_zminus = {'args_cuda': z_args['cuda'],
|
||||
'args_opencl': z_args['opencl'],
|
||||
'func': Template("""
|
||||
// This function updates the Ex and Ey field components for the zminus slab.
|
||||
//
|
||||
@@ -815,6 +917,8 @@ order2_zminus = {'args': z_args,
|
||||
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
|
||||
// d: Spatial discretisation, e.g. dx, dy or dz
|
||||
|
||||
$CUDA_IDX
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
|
||||
int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
|
||||
int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
|
||||
@@ -891,7 +995,8 @@ order2_zminus = {'args': z_args,
|
||||
}
|
||||
""")}
|
||||
|
||||
order1_zplus = {'args': z_args,
|
||||
order1_zplus = {'args_cuda': z_args['cuda'],
|
||||
'args_opencl': z_args['opencl'],
|
||||
'func': Template("""
|
||||
// This function updates the Ex and Ey field components for the zplus slab.
|
||||
//
|
||||
@@ -902,6 +1007,8 @@ order1_zplus = {'args': z_args,
|
||||
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
|
||||
// d: Spatial discretisation, e.g. dx, dy or dz
|
||||
|
||||
$CUDA_IDX
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
|
||||
int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
|
||||
int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
|
||||
@@ -962,7 +1069,8 @@ order1_zplus = {'args': z_args,
|
||||
}
|
||||
""")}
|
||||
|
||||
order2_zplus = {'args': z_args,
|
||||
order2_zplus = {'args_cuda': z_args['cuda'],
|
||||
'args_opencl': z_args['opencl'],
|
||||
'func': Template("""
|
||||
// This function updates the Ex and Ey field components for the zplus slab.
|
||||
//
|
||||
@@ -973,6 +1081,8 @@ order2_zplus = {'args': z_args,
|
||||
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
|
||||
// d: Spatial discretisation, e.g. dx, dy or dz
|
||||
|
||||
$CUDA_IDX
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
|
||||
int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
|
||||
int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
|
||||
@@ -1047,5 +1157,4 @@ order2_zplus = {'args': z_args,
|
||||
(RA0 * dHx + RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)]);
|
||||
PHI2[IDX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx;
|
||||
}
|
||||
""")}
|
||||
|
||||
""")}
|
@@ -18,91 +18,168 @@
|
||||
|
||||
from string import Template
|
||||
|
||||
x_args = Template("int xs, "
|
||||
"int xf, "
|
||||
"int ys, "
|
||||
"int yf, "
|
||||
"int zs, "
|
||||
"int zf, "
|
||||
"int NX_PHI1, "
|
||||
"int NY_PHI1, "
|
||||
"int NZ_PHI1, "
|
||||
"int NX_PHI2, "
|
||||
"int NY_PHI2, "
|
||||
"int NZ_PHI2, "
|
||||
"int NY_R, "
|
||||
"__global const unsigned int* restrict ID, "
|
||||
"__global const $REAL* restrict Ex, "
|
||||
"__global const $REAL* restrict Ey, "
|
||||
"__global const $REAL* restrict Ez, "
|
||||
"__global const $REAL* restrict Hx, "
|
||||
"__global $REAL *Hy, "
|
||||
"__global $REAL *Hz, "
|
||||
"__global $REAL *PHI1, "
|
||||
"__global $REAL *PHI2, "
|
||||
"__global const $REAL* restrict RA, "
|
||||
"__global const $REAL* restrict RB, "
|
||||
"__global const $REAL* restrict RE, "
|
||||
"__global const $REAL* restrict RF, "
|
||||
"$REAL d")
|
||||
x_args = {'cuda': Template("""
|
||||
__global__ void $FUNC(int xs,
|
||||
int xf,
|
||||
int ys,
|
||||
int yf,
|
||||
int zs,
|
||||
int zf,
|
||||
int NX_PHI1,
|
||||
int NY_PHI1,
|
||||
int NZ_PHI1,
|
||||
int NX_PHI2,
|
||||
int NY_PHI2,
|
||||
int NZ_PHI2,
|
||||
int NY_R,
|
||||
const unsigned int* __restrict__ ID,
|
||||
const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez, const $REAL* __restrict__ Hx, $REAL *Hy,
|
||||
$REAL *Hz,
|
||||
$REAL *PHI1,
|
||||
$REAL *PHI2,
|
||||
const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d)
|
||||
"""),
|
||||
'opencl': Template("""
|
||||
int xs,
|
||||
int xf,
|
||||
int ys,
|
||||
int yf,
|
||||
int zs,
|
||||
int zf,
|
||||
int NX_PHI1,
|
||||
int NY_PHI1,
|
||||
int NZ_PHI1,
|
||||
int NX_PHI2,
|
||||
int NY_PHI2,
|
||||
int NZ_PHI2,
|
||||
int NY_R,
|
||||
__global const unsigned int* restrict ID,
|
||||
__global const $REAL* restrict Ex,
|
||||
__global const $REAL* restrict Ey,
|
||||
__global const $REAL* restrict Ez,
|
||||
__global const $REAL* restrict Hx,
|
||||
__global $REAL *Hy,
|
||||
__global $REAL *Hz,
|
||||
__global $REAL *PHI1,
|
||||
__global $REAL *PHI2,
|
||||
__global const $REAL* restrict RA,
|
||||
__global const $REAL* restrict RB,
|
||||
__global const $REAL* restrict RE,
|
||||
__global const $REAL* restrict RF,
|
||||
$REAL d
|
||||
""")
|
||||
}
|
||||
|
||||
y_args = Template("int xs, "
|
||||
"int xf, "
|
||||
"int ys, "
|
||||
"int yf, "
|
||||
"int zs, "
|
||||
"int zf, "
|
||||
"int NX_PHI1, "
|
||||
"int NY_PHI1, "
|
||||
"int NZ_PHI1, "
|
||||
"int NX_PHI2, "
|
||||
"int NY_PHI2, "
|
||||
"int NZ_PHI2, "
|
||||
"int NY_R, "
|
||||
"__global const unsigned int* restrict ID, "
|
||||
"__global const $REAL* restrict Ex, "
|
||||
"__global const $REAL* restrict Ey, "
|
||||
"__global const $REAL* restrict Ez, "
|
||||
"__global $REAL *Hx, "
|
||||
"__global const $REAL* restrict Hy, "
|
||||
"__global $REAL *Hz, "
|
||||
"__global $REAL *PHI1, "
|
||||
"__global $REAL *PHI2, "
|
||||
"__global const $REAL* restrict RA, "
|
||||
"__global const $REAL* restrict RB, "
|
||||
"__global const $REAL* restrict RE, "
|
||||
"__global const $REAL* restrict RF, "
|
||||
"$REAL d")
|
||||
y_args = {'cuda': Template("""
|
||||
__global__ void $FUNC(int xs,
|
||||
int xf,
|
||||
int ys,
|
||||
int yf,
|
||||
int zs,
|
||||
int zf,
|
||||
int NX_PHI1,
|
||||
int NY_PHI1,
|
||||
int NZ_PHI1,
|
||||
int NX_PHI2,
|
||||
int NY_PHI2,
|
||||
int NZ_PHI2,
|
||||
int NY_R,
|
||||
const unsigned int* __restrict__ ID,
|
||||
const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez,
|
||||
$REAL *Hx,
|
||||
const $REAL* __restrict__ Hy,
|
||||
$REAL *Hz,
|
||||
$REAL *PHI1,
|
||||
$REAL *PHI2,
|
||||
const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d)
|
||||
"""),
|
||||
'opencl': Template("""
|
||||
int xs,
|
||||
int xf,
|
||||
int ys,
|
||||
int yf,
|
||||
int zs,
|
||||
int zf,
|
||||
int NX_PHI1,
|
||||
int NY_PHI1,
|
||||
int NZ_PHI1,
|
||||
int NX_PHI2,
|
||||
int NY_PHI2,
|
||||
int NZ_PHI2,
|
||||
int NY_R,
|
||||
__global const unsigned int* restrict ID,
|
||||
__global const $REAL* restrict Ex,
|
||||
__global const $REAL* restrict Ey,
|
||||
__global const $REAL* restrict Ez,
|
||||
__global $REAL *Hx,
|
||||
__global const $REAL* restrict Hy,
|
||||
__global $REAL *Hz,
|
||||
__global $REAL *PHI1,
|
||||
__global $REAL *PHI2,
|
||||
__global const $REAL* restrict RA,
|
||||
__global const $REAL* restrict RB,
|
||||
__global const $REAL* restrict RE,
|
||||
__global const $REAL* restrict RF,
|
||||
$REAL d
|
||||
""")
|
||||
}
|
||||
|
||||
z_args = Template("int xs, "
|
||||
"int xf, "
|
||||
"int ys, "
|
||||
"int yf, "
|
||||
"int zs, "
|
||||
"int zf, "
|
||||
"int NX_PHI1, "
|
||||
"int NY_PHI1, "
|
||||
"int NZ_PHI1, "
|
||||
"int NX_PHI2, "
|
||||
"int NY_PHI2, "
|
||||
"int NZ_PHI2, "
|
||||
"int NY_R, "
|
||||
"__global const unsigned int* restrict ID, "
|
||||
"__global const $REAL* restrict Ex, "
|
||||
"__global const $REAL* restrict Ey, "
|
||||
"__global const $REAL* restrict Ez, "
|
||||
"__global $REAL *Hx, "
|
||||
"__global $REAL *Hy, "
|
||||
"__global const $REAL* restrict Hz, "
|
||||
"__global $REAL *PHI1, "
|
||||
"__global $REAL *PHI2, "
|
||||
"__global const $REAL* restrict RA, "
|
||||
"__global const $REAL* restrict RB, "
|
||||
"__global const $REAL* restrict RE, "
|
||||
"__global const $REAL* restrict RF, "
|
||||
"$REAL d")
|
||||
z_args = {'cuda': Template("""
|
||||
__global__ void $FUNC(int xs,
|
||||
int xf,
|
||||
int ys,
|
||||
int yf,
|
||||
int zs,
|
||||
int zf,
|
||||
int NX_PHI1,
|
||||
int NY_PHI1,
|
||||
int NZ_PHI1,
|
||||
int NX_PHI2,
|
||||
int NY_PHI2,
|
||||
int NZ_PHI2,
|
||||
int NY_R,
|
||||
const unsigned int* __restrict__ ID,
|
||||
const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez,
|
||||
$REAL *Hx,
|
||||
$REAL *Hy,
|
||||
const $REAL* __restrict__ Hz,
|
||||
$REAL *PHI1,
|
||||
$REAL *PHI2,
|
||||
const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d)
|
||||
"""),
|
||||
'opencl': Template("""
|
||||
int xs,
|
||||
int xf,
|
||||
int ys,
|
||||
int yf,
|
||||
int zs,
|
||||
int zf,
|
||||
int NX_PHI1,
|
||||
int NY_PHI1,
|
||||
int NZ_PHI1,
|
||||
int NX_PHI2,
|
||||
int NY_PHI2,
|
||||
int NZ_PHI2,
|
||||
int NY_R,
|
||||
__global const unsigned int* restrict ID,
|
||||
__global const $REAL* restrict Ex,
|
||||
__global const $REAL* restrict Ey,
|
||||
__global const $REAL* restrict Ez,
|
||||
__global $REAL *Hx,
|
||||
__global $REAL *Hy,
|
||||
__global const $REAL* restrict Hz,
|
||||
__global $REAL *PHI1,
|
||||
__global $REAL *PHI2,
|
||||
__global const $REAL* restrict RA,
|
||||
__global const $REAL* restrict RB,
|
||||
__global const $REAL* restrict RE,
|
||||
__global const $REAL* restrict RF,
|
||||
$REAL d
|
||||
""")
|
||||
}
|
||||
|
||||
order1_xminus = {'args': x_args,
|
||||
order1_xminus = {'args_cuda': x_args['cuda'],
|
||||
'args_opencl': x_args['opencl'],
|
||||
'func': Template("""
|
||||
// This function updates the Hy and Hz field components for the xminus slab.
|
||||
//
|
||||
@@ -113,6 +190,8 @@ order1_xminus = {'args': x_args,
|
||||
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
|
||||
// d: Spatial discretisation, e.g. dx, dy or dz
|
||||
|
||||
$CUDA_IDX
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
|
||||
int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
|
||||
int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
|
||||
@@ -173,7 +252,8 @@ order1_xminus = {'args': x_args,
|
||||
}
|
||||
""")}
|
||||
|
||||
order2_xminus = {'args': x_args,
|
||||
order2_xminus = {'args_cuda': x_args['cuda'],
|
||||
'args_opencl': x_args['opencl'],
|
||||
'func': Template("""
|
||||
// This function updates the Hy and Hz field components for the xminus slab.
|
||||
//
|
||||
@@ -184,6 +264,8 @@ order2_xminus = {'args': x_args,
|
||||
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
|
||||
// d: Spatial discretisation, e.g. dx, dy or dz
|
||||
|
||||
$CUDA_IDX
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
|
||||
int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
|
||||
int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
|
||||
@@ -260,7 +342,8 @@ order2_xminus = {'args': x_args,
|
||||
}
|
||||
""")}
|
||||
|
||||
order1_xplus = {'args': x_args,
|
||||
order1_xplus = {'args_cuda': x_args['cuda'],
|
||||
'args_opencl': x_args['opencl'],
|
||||
'func': Template("""
|
||||
// This function updates the Hy and Hz field components for the xplus slab.
|
||||
//
|
||||
@@ -271,6 +354,8 @@ order1_xplus = {'args': x_args,
|
||||
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
|
||||
// d: Spatial discretisation, e.g. dx, dy or dz
|
||||
|
||||
$CUDA_IDX
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
|
||||
int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
|
||||
int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
|
||||
@@ -331,7 +416,8 @@ order1_xplus = {'args': x_args,
|
||||
}
|
||||
""")}
|
||||
|
||||
order2_xplus = {'args': x_args,
|
||||
order2_xplus = {'args_cuda': x_args['cuda'],
|
||||
'args_opencl': x_args['opencl'],
|
||||
'func': Template("""
|
||||
// This function updates the Hy and Hz field components for the xplus slab.
|
||||
//
|
||||
@@ -342,6 +428,8 @@ order2_xplus = {'args': x_args,
|
||||
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
|
||||
// d: Spatial discretisation, e.g. dx, dy or dz
|
||||
|
||||
$CUDA_IDX
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
|
||||
int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
|
||||
int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
|
||||
@@ -418,7 +506,8 @@ order2_xplus = {'args': x_args,
|
||||
}
|
||||
""")}
|
||||
|
||||
order1_yminus = {'args': y_args,
|
||||
order1_yminus = {'args_cuda': y_args['cuda'],
|
||||
'args_opencl': y_args['opencl'],
|
||||
'func': Template("""
|
||||
// This function updates the Hx and Hz field components for the yminus slab.
|
||||
//
|
||||
@@ -429,6 +518,8 @@ order1_yminus = {'args': y_args,
|
||||
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
|
||||
// d: Spatial discretisation, e.g. dx, dy or dz
|
||||
|
||||
$CUDA_IDX
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
|
||||
int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
|
||||
int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
|
||||
@@ -489,7 +580,8 @@ order1_yminus = {'args': y_args,
|
||||
}
|
||||
""")}
|
||||
|
||||
order2_yminus = {'args': y_args,
|
||||
order2_yminus = {'args_cuda': y_args['cuda'],
|
||||
'args_opencl': y_args['opencl'],
|
||||
'func': Template("""
|
||||
// This function updates the Hx and Hz field components for the yminus slab.
|
||||
//
|
||||
@@ -500,6 +592,8 @@ order2_yminus = {'args': y_args,
|
||||
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
|
||||
// d: Spatial discretisation, e.g. dx, dy or dz
|
||||
|
||||
$CUDA_IDX
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
|
||||
int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
|
||||
int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
|
||||
@@ -575,7 +669,8 @@ order2_yminus = {'args': y_args,
|
||||
}
|
||||
""")}
|
||||
|
||||
order1_yplus = {'args': y_args,
|
||||
order1_yplus = {'args_cuda': y_args['cuda'],
|
||||
'args_opencl': y_args['opencl'],
|
||||
'func': Template("""
|
||||
// This function updates the Hx and Hz field components for the yplus slab.
|
||||
//
|
||||
@@ -586,6 +681,8 @@ order1_yplus = {'args': y_args,
|
||||
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
|
||||
// d: Spatial discretisation, e.g. dx, dy or dz
|
||||
|
||||
$CUDA_IDX
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
|
||||
int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
|
||||
int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
|
||||
@@ -646,7 +743,8 @@ order1_yplus = {'args': y_args,
|
||||
}
|
||||
""")}
|
||||
|
||||
order2_yplus = {'args': y_args,
|
||||
order2_yplus = {'args_cuda': y_args['cuda'],
|
||||
'args_opencl': y_args['opencl'],
|
||||
'func': Template("""
|
||||
// This function updates the Hx and Hz field components for the yplus slab.
|
||||
//
|
||||
@@ -657,6 +755,8 @@ order2_yplus = {'args': y_args,
|
||||
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
|
||||
// d: Spatial discretisation, e.g. dx, dy or dz
|
||||
|
||||
$CUDA_IDX
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
|
||||
int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
|
||||
int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
|
||||
@@ -731,7 +831,8 @@ order2_yplus = {'args': y_args,
|
||||
}
|
||||
""")}
|
||||
|
||||
order1_zminus = {'args': z_args,
|
||||
order1_zminus = {'args_cuda': z_args['cuda'],
|
||||
'args_opencl': z_args['opencl'],
|
||||
'func': Template("""
|
||||
// This function updates the Hx and Hy field components for the zminus slab.
|
||||
//
|
||||
@@ -742,6 +843,8 @@ order1_zminus = {'args': z_args,
|
||||
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
|
||||
// d: Spatial discretisation, e.g. dx, dy or dz
|
||||
|
||||
$CUDA_IDX
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
|
||||
int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
|
||||
int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
|
||||
@@ -802,7 +905,8 @@ order1_zminus = {'args': z_args,
|
||||
}
|
||||
""")}
|
||||
|
||||
order2_zminus = {'args': z_args,
|
||||
order2_zminus = {'args_cuda': z_args['cuda'],
|
||||
'args_opencl': z_args['opencl'],
|
||||
'func': Template("""
|
||||
// This function updates the Hx and Hy field components for the zminus slab.
|
||||
//
|
||||
@@ -813,6 +917,8 @@ order2_zminus = {'args': z_args,
|
||||
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
|
||||
// d: Spatial discretisation, e.g. dx, dy or dz
|
||||
|
||||
$CUDA_IDX
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
|
||||
int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
|
||||
int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
|
||||
@@ -889,7 +995,8 @@ order2_zminus = {'args': z_args,
|
||||
}
|
||||
""")}
|
||||
|
||||
order1_zplus = {'args': z_args,
|
||||
order1_zplus = {'args_cuda': z_args['cuda'],
|
||||
'args_opencl': z_args['opencl'],
|
||||
'func': Template("""
|
||||
// This function updates the Hx and Hy field components for the zplus slab.
|
||||
//
|
||||
@@ -900,6 +1007,8 @@ order1_zplus = {'args': z_args,
|
||||
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
|
||||
// d: Spatial discretisation, e.g. dx, dy or dz
|
||||
|
||||
$CUDA_IDX
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
|
||||
int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
|
||||
int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
|
||||
@@ -960,7 +1069,8 @@ order1_zplus = {'args': z_args,
|
||||
}
|
||||
""")}
|
||||
|
||||
order2_zplus = {'args': z_args,
|
||||
order2_zplus = {'args_cuda': z_args['cuda'],
|
||||
'args_opencl': z_args['opencl'],
|
||||
'func': Template("""
|
||||
// This function updates the Hx and Hy field components for the zplus slab.
|
||||
//
|
||||
@@ -971,6 +1081,8 @@ order2_zplus = {'args': z_args,
|
||||
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
|
||||
// d: Spatial discretisation, e.g. dx, dy or dz
|
||||
|
||||
$CUDA_IDX
|
||||
|
||||
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
|
||||
int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
|
||||
int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
|
||||
|
@@ -19,7 +19,55 @@
|
||||
from string import Template
|
||||
|
||||
|
||||
store_snapshot = Template("""
|
||||
store_snapshot = {'args_cuda': Template("""
|
||||
__global__ void store_snapshot(int p,
|
||||
int xs,
|
||||
int xf,
|
||||
int ys,
|
||||
int yf,
|
||||
int zs,
|
||||
int zf,
|
||||
int dx,
|
||||
int dy,
|
||||
int dz,
|
||||
const $REAL* __restrict__ Ex,
|
||||
const $REAL* __restrict__ Ey,
|
||||
const $REAL* __restrict__ Ez,
|
||||
const $REAL* __restrict__ Hx,
|
||||
const $REAL* __restrict__ Hy,
|
||||
const $REAL* __restrict__ Hz,
|
||||
$REAL *snapEx,
|
||||
$REAL *snapEy,
|
||||
$REAL *snapEz,
|
||||
$REAL *snapHx,
|
||||
$REAL *snapHy,
|
||||
$REAL *snapHz)
|
||||
"""),
|
||||
'args_opencl': Template("""
|
||||
int p,
|
||||
int xs,
|
||||
int xf,
|
||||
int ys,
|
||||
int yf,
|
||||
int zs,
|
||||
int zf,
|
||||
int dx,
|
||||
int dy,
|
||||
int dz,
|
||||
__global const $REAL* restrict Ex,
|
||||
__global const $REAL* restrict Ey,
|
||||
__global const $REAL* restrict Ez,
|
||||
__global const $REAL* restrict Hx,
|
||||
__global const $REAL* restrict Hy,
|
||||
__global const $REAL* restrict Hz,
|
||||
__global $REAL *snapEx,
|
||||
__global $REAL *snapEy,
|
||||
__global $REAL *snapEz,
|
||||
__global $REAL *snapHx,
|
||||
__global $REAL *snapHy,
|
||||
__global $REAL *snapHz
|
||||
"""),
|
||||
'func': Template("""
|
||||
// Stores field values for a snapshot.
|
||||
//
|
||||
// Args:
|
||||
@@ -29,6 +77,7 @@ store_snapshot = Template("""
|
||||
// E, H: Access to field component arrays.
|
||||
// snapEx, snapEy, snapEz, snapHx, snapHy, snapHz: Access to arrays to store snapshots.
|
||||
|
||||
$CUDA_IDX
|
||||
|
||||
// Convert the linear index to subscripts for 4D SNAPS array
|
||||
int x = (i % ($NX_SNAPS * $NY_SNAPS * $NZ_SNAPS)) / ($NY_SNAPS * $NZ_SNAPS);
|
||||
@@ -69,4 +118,5 @@ store_snapshot = Template("""
|
||||
snapHz[IDX4D_SNAPS(p,x,y,z)] = (Hz[IDX3D_FIELDS(xx,yy,zz)] +
|
||||
Hz[IDX3D_FIELDS(xx,yy,zz+1)]) / 2;
|
||||
}
|
||||
""")
|
||||
""")
|
||||
}
|
@@ -18,7 +18,35 @@
|
||||
|
||||
from string import Template
|
||||
|
||||
update_hertzian_dipole = Template("""
|
||||
update_hertzian_dipole = {'args_cuda': Template("""
|
||||
__global__ void update_hertzian_dipole(int NHERTZDIPOLE,
|
||||
int iteration,
|
||||
$REAL dx,
|
||||
$REAL dy,
|
||||
$REAL dz,
|
||||
const int* __restrict__ srcinfo1,
|
||||
const $REAL* __restrict__ srcinfo2,
|
||||
const $REAL* __restrict__ srcwaveforms,
|
||||
const unsigned int* __restrict__ ID,
|
||||
$REAL *Ex,
|
||||
$REAL *Ey,
|
||||
$REAL *Ez)
|
||||
"""),
|
||||
'args_opencl': Template("""
|
||||
int NHERTZDIPOLE,
|
||||
int iteration,
|
||||
$REAL dx,
|
||||
$REAL dy,
|
||||
$REAL dz,
|
||||
__global const int* restrict srcinfo1,
|
||||
__global const $REAL* restrict srcinfo2,
|
||||
__global const $REAL* restrict srcwaveforms,
|
||||
__global const unsigned int* restrict ID,
|
||||
__global $REAL *Ex,
|
||||
__global $REAL *Ey,
|
||||
__global $REAL *Ez
|
||||
"""),
|
||||
'func': Template("""
|
||||
// Updates electric field values for Hertzian dipole sources.
|
||||
//
|
||||
// Args:
|
||||
@@ -30,6 +58,7 @@ update_hertzian_dipole = Template("""
|
||||
// srcwaveforms: Source waveform values.
|
||||
// ID, E: Access to ID and field component arrays.
|
||||
|
||||
$CUDA_IDX
|
||||
|
||||
if (i < NHERTZDIPOLE) {
|
||||
|
||||
@@ -64,8 +93,37 @@ update_hertzian_dipole = Template("""
|
||||
}
|
||||
}
|
||||
""")
|
||||
}
|
||||
|
||||
update_magnetic_dipole = Template("""
|
||||
update_magnetic_dipole = {'args_cuda': Template("""
|
||||
__global__ void update_magnetic_dipole(int NMAGDIPOLE,
|
||||
int iteration,
|
||||
$REAL dx,
|
||||
$REAL dy,
|
||||
$REAL dz,
|
||||
const int* __restrict__ srcinfo1,
|
||||
const $REAL* __restrict__ srcinfo2,
|
||||
const $REAL* __restrict__ srcwaveforms,
|
||||
const unsigned int* __restrict__ ID,
|
||||
$REAL *Hx,
|
||||
$REAL *Hy,
|
||||
$REAL *Hz)
|
||||
"""),
|
||||
'args_opencl': Template("""
|
||||
int NMAGDIPOLE,
|
||||
int iteration,
|
||||
$REAL dx,
|
||||
$REAL dy,
|
||||
$REAL dz,
|
||||
__global const int* restrict rcinfo1,
|
||||
__global const $REAL* restrict rcinfo2,
|
||||
__global const $REAL* restrict rcwaveforms,
|
||||
__global const unsigned int* estrict ID,
|
||||
__global $REAL *Hx,
|
||||
__global $REAL *Hy,
|
||||
__global $REAL *Hz
|
||||
"""),
|
||||
'func': Template("""
|
||||
// Updates electric field values for Hertzian dipole sources.
|
||||
//
|
||||
// Args:
|
||||
@@ -77,6 +135,7 @@ update_magnetic_dipole = Template("""
|
||||
// srcwaveforms: Source waveform values.
|
||||
// ID, H: Access to ID and field component arrays.
|
||||
|
||||
$CUDA_IDX
|
||||
|
||||
if (i < NMAGDIPOLE) {
|
||||
|
||||
@@ -109,8 +168,37 @@ update_magnetic_dipole = Template("""
|
||||
}
|
||||
}
|
||||
""")
|
||||
}
|
||||
|
||||
update_voltage_source = Template("""
|
||||
update_voltage_source = {'args_cuda': Template("""
|
||||
__global__ void update_voltage_source(int NVOLTSRC,
|
||||
int iteration,
|
||||
$REAL dx,
|
||||
$REAL dy,
|
||||
$REAL dz,
|
||||
const int* __restrict__ srcinfo1,
|
||||
const $REAL* __restrict__ srcinfo2,
|
||||
const $REAL* __restrict__ srcwaveforms,
|
||||
const unsigned int* __restrict__ ID,
|
||||
$REAL *Ex,
|
||||
$REAL *Ey,
|
||||
$REAL *Ez)
|
||||
"""),
|
||||
'args_opencl': Template("""
|
||||
int NVOLTSRC,
|
||||
int iteration,
|
||||
$REAL dx,
|
||||
$REAL dy,
|
||||
$REAL dz,
|
||||
__global const int* restrict rcinfo1,
|
||||
__global const $REAL* restrict rcinfo2,
|
||||
__global const $REAL* restrict rcwaveforms,
|
||||
__global const unsigned int* estrict ID,
|
||||
__global $REAL *Ex,
|
||||
__global $REAL *Ey,
|
||||
__global $REAL *Ez
|
||||
"""),
|
||||
'func': Template("""
|
||||
// Updates electric field values for voltage sources.
|
||||
//
|
||||
// Args:
|
||||
@@ -122,6 +210,7 @@ update_voltage_source = Template("""
|
||||
// srcwaveforms: Source waveform values.
|
||||
// ID, E: Access to ID and field component arrays.
|
||||
|
||||
$CUDA_IDX
|
||||
|
||||
if (i < NVOLTSRC) {
|
||||
|
||||
@@ -170,4 +259,5 @@ update_voltage_source = Template("""
|
||||
}
|
||||
}
|
||||
}
|
||||
""")
|
||||
""")
|
||||
}
|
@@ -19,7 +19,31 @@
|
||||
from string import Template
|
||||
|
||||
|
||||
store_outputs = Template("""
|
||||
store_outputs = {'args_cuda': Template("""
|
||||
__global__ void store_outputs(int NRX,
|
||||
int iteration,
|
||||
const int* __restrict__ rxcoords,
|
||||
$REAL *rxs,
|
||||
const $REAL* __restrict__ Ex,
|
||||
const $REAL* __restrict__ Ey,
|
||||
const $REAL* __restrict__ Ez,
|
||||
const $REAL* __restrict__ Hx,
|
||||
const $REAL* __restrict__ Hy,
|
||||
const $REAL* __restrict__ Hz)
|
||||
"""),
|
||||
'args_opencl': Template("""
|
||||
int NRX,
|
||||
int iteration,
|
||||
__global const int* restrict rxcoords,
|
||||
__global $REAL *rxs,
|
||||
__global const $REAL* restrict Ex,
|
||||
__global const $REAL* restrict Ey,
|
||||
__global const $REAL* restrict Ez,
|
||||
__global const $REAL* restrict Hx,
|
||||
__global const $REAL* restrict Hy,
|
||||
__global const $REAL* restrict Hz
|
||||
"""),
|
||||
'func': Template("""
|
||||
// Stores field component values for every receiver in the model.
|
||||
//
|
||||
// Args:
|
||||
@@ -27,6 +51,8 @@ store_outputs = Template("""
|
||||
// rxs: array to store field components for receivers - rows
|
||||
// are field components; columns are iterations; pages are receiver.
|
||||
|
||||
$CUDA_IDX
|
||||
|
||||
if (i < NRX) {
|
||||
int x, y, z;
|
||||
x = rxcoords[IDX2D_RXCOORDS(i,0)];
|
||||
@@ -39,4 +65,5 @@ store_outputs = Template("""
|
||||
rxs[IDX3D_RXS(4,iteration,i)] = Hy[IDX3D_FIELDS(x,y,z)];
|
||||
rxs[IDX3D_RXS(5,iteration,i)] = Hz[IDX3D_FIELDS(x,y,z)];
|
||||
}
|
||||
""")
|
||||
""")
|
||||
}
|
@@ -791,45 +791,23 @@ class OpenCLUpdates:
|
||||
NY_SNAPS=Snapshot.ny_max,
|
||||
NZ_SNAPS=Snapshot.nz_max)
|
||||
|
||||
subs = {'CUDA_IDX': '',
|
||||
'NX_FIELDS': self.grid.nx + 1,
|
||||
'NY_FIELDS': self.grid.ny + 1,
|
||||
'NZ_FIELDS': self.grid.nz + 1,
|
||||
'NX_ID': self.grid.ID.shape[1],
|
||||
'NY_ID': self.grid.ID.shape[2],
|
||||
'NZ_ID': self.grid.ID.shape[3]}
|
||||
|
||||
self.update_electric_dev = self.elwise(self.ctx,
|
||||
Template("int NX, "
|
||||
"int NY, "
|
||||
"int NZ, "
|
||||
"__global const unsigned int* restrict ID, "
|
||||
"__global $REAL *Ex, "
|
||||
"__global $REAL *Ey, "
|
||||
"__global $REAL *Ez, "
|
||||
"__global const $REAL * restrict Hx, "
|
||||
"__global const $REAL * restrict Hy, "
|
||||
"__global const $REAL * restrict Hz").substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
|
||||
knl_fields_updates.update_electric.substitute({
|
||||
'NX_FIELDS': self.grid.nx + 1,
|
||||
'NY_FIELDS': self.grid.ny + 1,
|
||||
'NZ_FIELDS': self.grid.nz + 1,
|
||||
'NX_ID': self.grid.ID.shape[1],
|
||||
'NY_ID': self.grid.ID.shape[2],
|
||||
'NZ_ID': self.grid.ID.shape[3]}),
|
||||
knl_fields_updates.update_electric['args_opencl'].substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
|
||||
knl_fields_updates.update_electric['func'].substitute(subs),
|
||||
'update_electric', preamble=self.knl_common,
|
||||
options=config.sim_config.devices['compiler_opts'])
|
||||
|
||||
self.update_magnetic_dev = self.elwise(self.ctx,
|
||||
Template("int NX, "
|
||||
"int NY, "
|
||||
"int NZ, "
|
||||
"__global const unsigned int* restrict ID, "
|
||||
"__global $REAL *Hx, "
|
||||
"__global $REAL *Hy, "
|
||||
"__global $REAL *Hz, "
|
||||
"__global const $REAL * restrict Ex, "
|
||||
"__global const $REAL * restrict Ey, "
|
||||
"__global const $REAL * restrict Ez").substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
|
||||
knl_fields_updates.update_magnetic.substitute({
|
||||
'NX_FIELDS': self.grid.nx + 1,
|
||||
'NY_FIELDS': self.grid.ny + 1,
|
||||
'NZ_FIELDS': self.grid.nz + 1,
|
||||
'NX_ID': self.grid.ID.shape[1],
|
||||
'NY_ID': self.grid.ID.shape[2],
|
||||
'NZ_ID': self.grid.ID.shape[3]}),
|
||||
knl_fields_updates.update_magnetic['args_opencl'].substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
|
||||
knl_fields_updates.update_magnetic['func'].substitute(subs),
|
||||
'update_magnetic', preamble=self.knl_common,
|
||||
options=config.sim_config.devices['compiler_opts'])
|
||||
|
||||
@@ -838,64 +816,26 @@ class OpenCLUpdates:
|
||||
# If there are any dispersive materials (updates are split into two
|
||||
# parts as they require present and updated electric field values).
|
||||
if config.get_model_config().materials['maxpoles'] > 0:
|
||||
subs = {'CUDA_IDX': '',
|
||||
'REAL': config.sim_config.dtypes['C_float_or_double'],
|
||||
'REALFUNC': config.get_model_config().materials['crealfunc'],
|
||||
'NX_FIELDS': self.grid.nx + 1,
|
||||
'NY_FIELDS': self.grid.ny + 1,
|
||||
'NZ_FIELDS': self.grid.nz + 1,
|
||||
'NX_ID': self.grid.ID.shape[1],
|
||||
'NY_ID': self.grid.ID.shape[2],
|
||||
'NZ_ID': self.grid.ID.shape[3],
|
||||
'NX_T': NX_T,
|
||||
'NY_T': NY_T,
|
||||
'NZ_T': NZ_T}
|
||||
self.dispersive_update_a = self.elwise(self.ctx,
|
||||
Template("int NX, "
|
||||
"int NY, "
|
||||
"int NZ, "
|
||||
"int MAXPOLES, "
|
||||
"__global const $COMPLEX* restrict updatecoeffsdispersive, "
|
||||
"__global $COMPLEX *Tx, "
|
||||
"__global $COMPLEX *Ty, "
|
||||
"__global $COMPLEX *Tz, "
|
||||
"__global const unsigned int* restrict ID, "
|
||||
"__global $REAL *Ex, "
|
||||
"__global $REAL *Ey, "
|
||||
"__global $REAL *Ez, "
|
||||
"__global const $REAL* restrict Hx, "
|
||||
"__global const $REAL* restrict Hy, "
|
||||
"__global const $REAL* restrict Hz").substitute({'REAL': config.sim_config.dtypes['C_float_or_double'], 'COMPLEX': config.get_model_config().materials['dispersiveCdtype']}),
|
||||
knl_fields_updates.update_electric_dispersive_A.substitute({
|
||||
'REAL': config.sim_config.dtypes['C_float_or_double'],
|
||||
'REALFUNC': config.get_model_config().materials['crealfunc'],
|
||||
'NX_FIELDS': self.grid.nx + 1,
|
||||
'NY_FIELDS': self.grid.ny + 1,
|
||||
'NZ_FIELDS': self.grid.nz + 1,
|
||||
'NX_ID': self.grid.ID.shape[1],
|
||||
'NY_ID': self.grid.ID.shape[2],
|
||||
'NZ_ID': self.grid.ID.shape[3],
|
||||
'NX_T': NX_T,
|
||||
'NY_T': NY_T,
|
||||
'NZ_T': NZ_T}),
|
||||
knl_fields_updates.update_electric_dispersive_A['args_opencl'].substitute({'REAL': config.sim_config.dtypes['C_float_or_double'], 'COMPLEX': config.get_model_config().materials['dispersiveCdtype']}),
|
||||
knl_fields_updates.update_electric_dispersive_A['func'].substitute(subs),
|
||||
'update_electric_dispersive_A', preamble=self.knl_common,
|
||||
options=config.sim_config.devices['compiler_opts'])
|
||||
self.dispersive_update_b = self.elwise(self.ctx,
|
||||
Template("int NX, "
|
||||
"int NY, "
|
||||
"int NZ, "
|
||||
"int MAXPOLES, "
|
||||
"__global const $COMPLEX* restrict updatecoeffsdispersive, "
|
||||
"__global $COMPLEX *Tx, "
|
||||
"__global $COMPLEX *Ty, "
|
||||
"__global $COMPLEX *Tz, "
|
||||
"__global const unsigned int* restrict ID, "
|
||||
"__global $REAL *Ex, "
|
||||
"__global $REAL *Ey, "
|
||||
"__global $REAL *Ez, "
|
||||
"__global const $REAL* restrict Hx, "
|
||||
"__global const $REAL* restrict Hy, "
|
||||
"__global const $REAL* restrict Hz").substitute({'REAL': config.sim_config.dtypes['C_float_or_double'] ,'COMPLEX': config.get_model_config().materials['dispersiveCdtype']}),
|
||||
knl_fields_updates.update_electric_dispersive_B.substitute({
|
||||
'REAL': config.sim_config.dtypes['C_float_or_double'],
|
||||
'REALFUNC': config.get_model_config().materials['crealfunc'],
|
||||
'NX_FIELDS': self.grid.nx + 1,
|
||||
'NY_FIELDS': self.grid.ny + 1,
|
||||
'NZ_FIELDS': self.grid.nz + 1,
|
||||
'NX_ID': self.grid.ID.shape[1],
|
||||
'NY_ID': self.grid.ID.shape[2],
|
||||
'NZ_ID': self.grid.ID.shape[3],
|
||||
'NX_T': NX_T,
|
||||
'NY_T': NY_T,
|
||||
'NZ_T': NZ_T}),
|
||||
knl_fields_updates.update_electric_dispersive_B['args_opencl'].substitute({'REAL': config.sim_config.dtypes['C_float_or_double'] ,'COMPLEX': config.get_model_config().materials['dispersiveCdtype']}),
|
||||
knl_fields_updates.update_electric_dispersive_B['func'].substitute(subs),
|
||||
'update_electric_dispersive_B', preamble=self.knl_common,
|
||||
options=config.sim_config.devices['compiler_opts'])
|
||||
|
||||
@@ -911,6 +851,15 @@ class OpenCLUpdates:
|
||||
knl_pml_updates_electric = import_module('gprMax.cuda_opencl_el.knl_pml_updates_electric_' + self.grid.pmlformulation)
|
||||
knl_pml_updates_magnetic = import_module('gprMax.cuda_opencl_el.knl_pml_updates_magnetic_' + self.grid.pmlformulation)
|
||||
|
||||
subs = {'CUDA_IDX': '',
|
||||
'REAL': config.sim_config.dtypes['C_float_or_double'],
|
||||
'NX_FIELDS': self.grid.nx + 1,
|
||||
'NY_FIELDS': self.grid.ny + 1,
|
||||
'NZ_FIELDS': self.grid.nz + 1,
|
||||
'NX_ID': self.grid.ID.shape[1],
|
||||
'NY_ID': self.grid.ID.shape[2],
|
||||
'NZ_ID': self.grid.ID.shape[3]}
|
||||
|
||||
# Set workgroup size, initialise arrays on compute device, and get
|
||||
# kernel functions
|
||||
for pml in self.grid.pmls:
|
||||
@@ -921,29 +870,15 @@ class OpenCLUpdates:
|
||||
knl_magnetic_name = getattr(knl_pml_updates_magnetic, knl_name)
|
||||
|
||||
pml.update_electric_dev = self.elwise(self.ctx,
|
||||
knl_electric_name['args'].substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
|
||||
knl_electric_name['func'].substitute({
|
||||
'REAL': config.sim_config.dtypes['C_float_or_double'],
|
||||
'NX_FIELDS': self.grid.nx + 1,
|
||||
'NY_FIELDS': self.grid.ny + 1,
|
||||
'NZ_FIELDS': self.grid.nz + 1,
|
||||
'NX_ID': self.grid.ID.shape[1],
|
||||
'NY_ID': self.grid.ID.shape[2],
|
||||
'NZ_ID': self.grid.ID.shape[3]}),
|
||||
knl_electric_name['args_opencl'].substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
|
||||
knl_electric_name['func'].substitute(subs),
|
||||
'pml_updates_electric_' + knl_name,
|
||||
preamble=self.knl_common,
|
||||
options=config.sim_config.devices['compiler_opts'])
|
||||
|
||||
pml.update_magnetic_dev = self.elwise(self.ctx,
|
||||
knl_magnetic_name['args'].substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
|
||||
knl_magnetic_name['func'].substitute({
|
||||
'REAL': config.sim_config.dtypes['C_float_or_double'],
|
||||
'NX_FIELDS': self.grid.nx + 1,
|
||||
'NY_FIELDS': self.grid.ny + 1,
|
||||
'NZ_FIELDS': self.grid.nz + 1,
|
||||
'NX_ID': self.grid.ID.shape[1],
|
||||
'NY_ID': self.grid.ID.shape[2],
|
||||
'NZ_ID': self.grid.ID.shape[3]}),
|
||||
knl_magnetic_name['args_opencl'].substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
|
||||
knl_magnetic_name['func'].substitute(subs),
|
||||
'pml_updates_magnetic_' + knl_name,
|
||||
preamble=self.knl_common,
|
||||
options=config.sim_config.devices['compiler_opts'])
|
||||
@@ -954,17 +889,8 @@ class OpenCLUpdates:
|
||||
"""
|
||||
self.rxcoords_dev, self.rxs_dev = htod_rx_arrays(self.grid, self.queue)
|
||||
self.store_outputs_dev = self.elwise(self.ctx,
|
||||
Template("int NRX, "
|
||||
"int iteration, "
|
||||
"__global const int* restrict rxcoords, "
|
||||
"__global $REAL *rxs, "
|
||||
"__global const $REAL* restrict Ex, "
|
||||
"__global const $REAL* restrict Ey, "
|
||||
"__global const $REAL* restrict Ez, "
|
||||
"__global const $REAL* restrict Hx, "
|
||||
"__global const $REAL* restrict Hy, "
|
||||
"__global const $REAL* restrict Hz").substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
|
||||
knl_store_outputs.store_outputs.substitute(),
|
||||
knl_store_outputs.store_outputs['args_opencl'].substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
|
||||
knl_store_outputs.store_outputs['func'].substitute({'CUDA_IDX': ''}),
|
||||
'store_outputs', preamble=self.knl_common,
|
||||
options=config.sim_config.devices['compiler_opts'])
|
||||
|
||||
@@ -975,55 +901,22 @@ class OpenCLUpdates:
|
||||
if self.grid.hertziandipoles:
|
||||
self.srcinfo1_hertzian_dev, self.srcinfo2_hertzian_dev, self.srcwaves_hertzian_dev = htod_src_arrays(self.grid.hertziandipoles, self.grid, self.queue)
|
||||
self.update_hertzian_dipole_dev = self.elwise(self.ctx,
|
||||
Template("int NHERTZDIPOLE, "
|
||||
"int iteration, "
|
||||
"$REAL dx, "
|
||||
"$REAL dy, "
|
||||
"$REAL dz, "
|
||||
"__global const int* restrict srcinfo1, "
|
||||
"__global const $REAL* restrict srcinfo2, "
|
||||
"__global const $REAL* restrict srcwaveforms, "
|
||||
"__global const unsigned int* restrict ID, "
|
||||
"__global $REAL *Ex, "
|
||||
"__global $REAL *Ey, "
|
||||
"__global $REAL *Ez").substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
|
||||
knl_source_updates.update_hertzian_dipole.substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
|
||||
knl_source_updates.update_hertzian_dipole['args_opencl'].substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
|
||||
knl_source_updates.update_hertzian_dipole['func'].substitute({'CUDA_IDX': '', 'REAL': config.sim_config.dtypes['C_float_or_double']}),
|
||||
'update_hertzian_dipole', preamble=self.knl_common,
|
||||
options=config.sim_config.devices['compiler_opts'])
|
||||
if self.grid.magneticdipoles:
|
||||
self.srcinfo1_magnetic_dev, self.srcinfo2_magnetic_dev, self.srcwaves_magnetic_dev = htod_src_arrays(self.grid.magneticdipoles, self.grid, self.queue)
|
||||
self.update_magnetic_dipole_dev = self.elwise(self.ctx,
|
||||
Template("int NMAGDIPOLE, "
|
||||
"int iteration, "
|
||||
"$REAL dx, "
|
||||
"$REAL dy, "
|
||||
"$REAL dz, "
|
||||
"__global const int* restrict srcinfo1, "
|
||||
"__global const $REAL* restrict srcinfo2, "
|
||||
"__global const $REAL* restrict srcwaveforms, "
|
||||
"__global const unsigned int* restrict ID, "
|
||||
"__global $REAL *Hx, "
|
||||
"__global $REAL *Hy, "
|
||||
"__global $REAL *Hz").substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
|
||||
knl_source_updates.update_magnetic_dipole.substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
|
||||
knl_source_updates.update_magnetic_dipole['args_opencl'].substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
|
||||
knl_source_updates.update_magnetic_dipole['func'].substitute({'CUDA_IDX': '', 'REAL': config.sim_config.dtypes['C_float_or_double']}),
|
||||
'update_magnetic_dipole', preamble=self.knl_common,
|
||||
options=config.sim_config.devices['compiler_opts'])
|
||||
if self.grid.voltagesources:
|
||||
self.srcinfo1_voltage_dev, self.srcinfo2_voltage_dev,self.srcwaves_voltage_dev = htod_src_arrays(self.grid.voltagesources, self.grid, self.queue)
|
||||
self.update_voltage_source_dev = self.elwise(self.ctx,
|
||||
Template("int NVOLTSRC, "
|
||||
"int iteration, "
|
||||
"$REAL dx, "
|
||||
"$REAL dy, "
|
||||
"$REAL dz, "
|
||||
"__global const int* restrict srcinfo1, "
|
||||
"__global const $REAL* restrict srcinfo2, "
|
||||
"__global const $REAL* restrict srcwaveforms, "
|
||||
"__global const unsigned int* restrict ID, "
|
||||
"__global $REAL *Ex, "
|
||||
"__global $REAL *Ey, "
|
||||
"__global $REAL *Ez").substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
|
||||
knl_source_updates.update_voltage_source.substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}), 'update_voltage_source', preamble=self.knl_common,
|
||||
knl_source_updates.update_voltage_source['args_opencl'].substitute({'CUDA_IDX': '', 'REAL': config.sim_config.dtypes['C_float_or_double']}),
|
||||
knl_source_updates.update_voltage_source['func'].substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}), 'update_voltage_source', preamble=self.knl_common,
|
||||
options=config.sim_config.devices['compiler_opts'])
|
||||
|
||||
def _set_snapshot_knl(self):
|
||||
@@ -1032,31 +925,11 @@ class OpenCLUpdates:
|
||||
"""
|
||||
self.snapEx_dev, self.snapEy_dev, self.snapEz_dev, self.snapHx_dev, self.snapHy_dev, self.snapHz_dev = htod_snapshot_array(self.grid, self.queue)
|
||||
self.store_snapshot_dev = self.elwise(self.ctx,
|
||||
Template("int p, "
|
||||
"int xs, "
|
||||
"int xf, "
|
||||
"int ys, "
|
||||
"int yf, "
|
||||
"int zs, "
|
||||
"int zf, "
|
||||
"int dx, "
|
||||
"int dy, "
|
||||
"int dz, "
|
||||
"__global const $REAL* restrict Ex, "
|
||||
"__global const $REAL* restrict Ey, "
|
||||
"__global const $REAL* restrict Ez, "
|
||||
"__global const $REAL* restrict Hx, "
|
||||
"__global const $REAL* restrict Hy, "
|
||||
"__global const $REAL* restrict Hz, "
|
||||
"__global $REAL *snapEx, "
|
||||
"__global $REAL *snapEy, "
|
||||
"__global $REAL *snapEz, "
|
||||
"__global $REAL *snapHx, "
|
||||
"__global $REAL *snapHy, "
|
||||
"__global $REAL *snapHz").substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
|
||||
knl_snapshots.store_snapshot.substitute({'NX_SNAPS': Snapshot.nx_max,
|
||||
'NY_SNAPS': Snapshot.ny_max,
|
||||
'NZ_SNAPS': Snapshot.nz_max}),
|
||||
knl_snapshots.store_snapshot['args_opencl'].substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
|
||||
knl_snapshots.store_snapshot['func'].substitute( {'CUDA_IDX': '',
|
||||
'NX_SNAPS': Snapshot.nx_max,
|
||||
'NY_SNAPS': Snapshot.ny_max,
|
||||
'NZ_SNAPS': Snapshot.nz_max}),
|
||||
'store_snapshot', preamble=self.knl_common,
|
||||
options=config.sim_config.devices['compiler_opts'])
|
||||
|
||||
@@ -1260,10 +1133,7 @@ class OpenCLUpdates:
|
||||
self.grid.ID_dev,
|
||||
self.grid.Ex_dev,
|
||||
self.grid.Ey_dev,
|
||||
self.grid.Ez_dev,
|
||||
self.grid.Hx_dev,
|
||||
self.grid.Hy_dev,
|
||||
self.grid.Hz_dev)
|
||||
self.grid.Ez_dev)
|
||||
event.wait()
|
||||
self.compute_time += (event.profile.end - event.profile.start)*1e-9
|
||||
|
||||
|
在新工单中引用
屏蔽一个用户