Work on templating for CUDA/OpenCL

这个提交包含在:
Craig Warren
2022-03-03 17:32:43 +00:00
父节点 ec99871e2d
当前提交 0b6d255e77
共有 7 个文件被更改,包括 769 次插入389 次删除

查看文件

@@ -18,7 +18,32 @@
from string import Template
update_electric = Template("""
update_electric = {
'args_cuda': Template("""
__global__ void update_electric(int NX,
int NY,
int NZ,
const unsigned int* __restrict__ ID,
$REAL *Ex,
$REAL *Ey,
$REAL *Ez,
const $REAL* __restrict__ Hx,
const $REAL* __restrict__ Hy,
const $REAL* __restrict__ Hz)
"""),
'args_opencl': Template("""
int NX,
int NY,
int NZ,
__global const unsigned int* restrict ID,
__global $REAL *Ex,
__global $REAL *Ey,
__global $REAL *Ez,
__global const $REAL * restrict Hx,
__global const $REAL * restrict Hy,
__global const $REAL * restrict Hz
"""),
'func': Template("""
// Electric field updates - normal materials.
//
// Args:
@@ -35,6 +60,8 @@ update_electric = Template("""
int y_ID = ((i % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) / $NZ_ID;
int z_ID = ((i % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) % $NZ_ID;
$CUDA_IDX
// Ex component
if ((NY != 1 || NZ != 1) && x >= 0 && x < NX && y > 0 && y < NY && z > 0 && z < NZ) {
int materialEx = ID[IDX4D_ID(0,x_ID,y_ID,z_ID)];
@@ -58,9 +85,34 @@ update_electric = Template("""
updatecoeffsE[IDX2D_MAT(materialEz,1)] * (Hy[IDX3D_FIELDS(x,y,z)] - Hy[IDX3D_FIELDS(x-1,y,z)]) -
updatecoeffsE[IDX2D_MAT(materialEz,2)] * (Hx[IDX3D_FIELDS(x,y,z)] - Hx[IDX3D_FIELDS(x,y-1,z)]);
}
""")
""")}
update_magnetic = Template("""
update_magnetic = {
'args_cuda': Template("""
__global__ void update_magnetic(int NX,
int NY,
int NZ,
const unsigned int* __restrict__ ID,
$REAL *Hx,
$REAL *Hy,
$REAL *Hz,
const $REAL* __restrict__ Ex,
const $REAL* __restrict__ Ey,
const $REAL* __restrict__ Ez)
"""),
'args_opencl': Template("""
int NX,
int NY,
int NZ,
__global const unsigned int* restrict ID,
__global $REAL *Hx,
__global $REAL *Hy,
__global $REAL *Hz,
__global const $REAL * restrict Ex,
__global const $REAL * restrict Ey,
__global const $REAL * restrict Ez
"""),
'func': Template("""
// Magnetic field updates - normal materials.
//
// Args:
@@ -77,6 +129,8 @@ update_magnetic = Template("""
int y_ID = ((i % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) / $NZ_ID;
int z_ID = ((i % ($NX_ID * $NY_ID * $NZ_ID)) % ($NY_ID * $NZ_ID)) % $NZ_ID;
$CUDA_IDX
// Hx component
if (NX != 1 && x > 0 && x < NX && y >= 0 && y < NY && z >= 0 && z < NZ) {
int materialHx = ID[IDX4D_ID(3,x_ID,y_ID,z_ID)];
@@ -100,9 +154,44 @@ update_magnetic = Template("""
updatecoeffsH[IDX2D_MAT(materialHz,1)] * (Ey[IDX3D_FIELDS(x+1,y,z)] - Ey[IDX3D_FIELDS(x,y,z)]) +
updatecoeffsH[IDX2D_MAT(materialHz,2)] * (Ex[IDX3D_FIELDS(x,y+1,z)] - Ex[IDX3D_FIELDS(x,y,z)]);
}
""")
""")}
update_electric_dispersive_A = Template("""
update_electric_dispersive_A = {
'args_cuda': Template("""
__global__ void update_electric_dispersive_A(int NX,
int NY,
int NZ,
int MAXPOLES,
const $COMPLEX* __restrict__ updatecoeffsdispersive,
$COMPLEX *Tx,
$COMPLEX *Ty,
$COMPLEX *Tz,
const unsigned int* __restrict__ ID,
$REAL *Ex,
$REAL *Ey,
$REAL *Ez,
const $REAL* __restrict__ Hx,
const $REAL* __restrict__ Hy,
const $REAL* __restrict__ Hz)
"""),
'args_opencl': Template("""
int NX,
int NY,
int NZ,
int MAXPOLES,
__global const $COMPLEX* restrict updatecoeffsdispersive,
__global $COMPLEX *Tx,
__global $COMPLEX *Ty,
__global $COMPLEX *Tz,
__global const unsigned int* restrict ID,
__global $REAL *Ex,
__global $REAL *Ey,
__global $REAL *Ez,
__global const $REAL* restrict Hx,
__global const $REAL* restrict Hy,
__global const $REAL* restrict Hz
"""),
'func': Template("""
// Electric field updates - dispersive materials - part A of updates to electric
// field values when dispersive materials
// (with multiple poles) are present.
@@ -130,6 +219,8 @@ update_electric_dispersive_A = Template("""
int y_T = ((i % ($NX_T * $NY_T * $NZ_T)) % ($NY_T * $NZ_T)) / $NZ_T;
int z_T = ((i % ($NX_T * $NY_T * $NZ_T)) % ($NY_T * $NZ_T)) % $NZ_T;
$CUDA_IDX
// Ex component
if ((NY != 1 || NZ != 1) && x >= 0 && x < NX && y > 0 && y < NY && z > 0 && z < NZ) {
int materialEx = ID[IDX4D_ID(0,x_ID,y_ID,z_ID)];
@@ -174,9 +265,38 @@ update_electric_dispersive_A = Template("""
updatecoeffsE[IDX2D_MAT(materialEz,2)] * (Hx[IDX3D_FIELDS(x,y,z)] - Hx[IDX3D_FIELDS(x,y-1,z)]) -
updatecoeffsE[IDX2D_MAT(materialEz,4)] * phi;
}
""")
""")}
update_electric_dispersive_B = Template("""
update_electric_dispersive_B = {
'args_cuda': Template("""
__global__ void update_electric_dispersive_B(int NX,
int NY,
int NZ,
int MAXPOLES,
const $COMPLEX* __restrict__ updatecoeffsdispersive,
$COMPLEX *Tx,
$COMPLEX *Ty,
$COMPLEX *Tz,
const unsigned int* __restrict__ ID,
const $REAL* __restrict__ Ex,
const $REAL* __restrict__ Ey,
const $REAL* __restrict__ Ez)
"""),
'args_opencl': Template("""
int NX,
int NY,
int NZ,
int MAXPOLES,
__global const $COMPLEX* restrict updatecoeffsdispersive,
__global $COMPLEX *Tx,
__global $COMPLEX *Ty,
__global $COMPLEX *Tz,
__global const unsigned int* restrict ID,
__global const $REAL* restrict Ex,
__global const $REAL* restrict Ey,
__global const $REAL* restrict Ez
"""),
'func': Template("""
// Electric field updates - dispersive materials - part B of updates to electric
// field values when dispersive materials
// (with multiple poles) are present.
@@ -204,6 +324,8 @@ update_electric_dispersive_B = Template("""
int y_T = ((i % ($NX_T * $NY_T * $NZ_T)) % ($NY_T * $NZ_T)) / $NZ_T;
int z_T = ((i % ($NX_T * $NY_T * $NZ_T)) % ($NY_T * $NZ_T)) % $NZ_T;
$CUDA_IDX
// Ex component
if ((NY != 1 || NZ != 1) && x >= 0 && x < NX && y > 0 && y < NY && z > 0 && z < NZ) {
int materialEx = ID[IDX4D_ID(0,x_ID,y_ID,z_ID)];
@@ -230,4 +352,4 @@ update_electric_dispersive_B = Template("""
updatecoeffsdispersive[IDX2D_MATDISP(materialEz,2+(pole*3))] * Ez[IDX3D_FIELDS(x,y,z)];
}
}
""")
""")}

查看文件

@@ -18,91 +18,166 @@
from string import Template
x_args = Template("int xs, "
"int xf, "
"int ys, "
"int yf, "
"int zs, "
"int zf, "
"int NX_PHI1, "
"int NY_PHI1, "
"int NZ_PHI1, "
"int NX_PHI2, "
"int NY_PHI2, "
"int NZ_PHI2, "
"int NY_R, "
"__global const unsigned int* restrict ID, "
"__global const $REAL* restrict Ex, "
"__global $REAL *Ey, "
"__global $REAL *Ez, "
"__global const $REAL* restrict Hx, "
"__global const $REAL* restrict Hy, "
"__global const $REAL* restrict Hz, "
"__global $REAL *PHI1, "
"__global $REAL *PHI2, "
"__global const $REAL* restrict RA, "
"__global const $REAL* restrict RB, "
"__global const $REAL* restrict RE, "
"__global const $REAL* restrict RF, "
"$REAL d")
x_args = {'cuda': Template("""
__global__ void $FUNC(int xs,
int xf,
int ys,
int yf,
int zs,
int zf,
int NX_PHI1,
int NY_PHI1,
int NZ_PHI1,
int NX_PHI2,
int NY_PHI2,
int NZ_PHI2,
int NY_R,
const unsigned int* __restrict__ ID,
const $REAL* __restrict__ Ex, $REAL *Ey,
$REAL *Ez,
const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz, $REAL *PHI1,
$REAL *PHI2,
const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d)
"""),
'opencl': Template("""
int xs,
int xf,
int ys,
int yf,
int zs,
int zf,
int NX_PHI1,
int NY_PHI1,
int NZ_PHI1,
int NX_PHI2,
int NY_PHI2,
int NZ_PHI2,
int NY_R,
__global const unsigned int* restrict ID,
__global const $REAL* restrict Ex,
__global $REAL *Ey,
__global $REAL *Ez,
__global const $REAL* restrict Hx,
__global const $REAL* restrict Hy,
__global const $REAL* restrict Hz,
__global $REAL *PHI1,
__global $REAL *PHI2,
__global const $REAL* restrict RA,
__global const $REAL* restrict RB,
__global const $REAL* restrict RE,
__global const $REAL* restrict RF,
$REAL d
""")
}
y_args = Template("int xs, "
"int xf, "
"int ys, "
"int yf, "
"int zs, "
"int zf, "
"int NX_PHI1, "
"int NY_PHI1, "
"int NZ_PHI1, "
"int NX_PHI2, "
"int NY_PHI2, "
"int NZ_PHI2, "
"int NY_R, "
"__global const unsigned int* restrict ID, "
"__global $REAL *Ex, "
"__global const $REAL* restrict Ey, "
"__global $REAL *Ez, "
"__global const $REAL* restrict Hx, "
"__global const $REAL* restrict Hy, "
"__global const $REAL* restrict Hz, "
"__global $REAL *PHI1, "
"__global $REAL *PHI2, "
"__global const $REAL* restrict RA, "
"__global const $REAL* restrict RB, "
"__global const $REAL* restrict RE, "
"__global const $REAL* restrict RF, "
"$REAL d")
y_args = {'cuda': Template("""
__global__ void $FUNC(int xs,
int xf,
int ys,
int yf,
int zs,
int zf,
int NX_PHI1,
int NY_PHI1,
int NZ_PHI1,
int NX_PHI2,
int NY_PHI2,
int NZ_PHI2,
int NY_R,
const unsigned int* __restrict__ ID,
$REAL *Ex,
const $REAL* __restrict__ Ey,
$REAL *Ez,
const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz, $REAL *PHI1,
$REAL *PHI2,
const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d)
"""),
'opencl': Template("""
int xs,
int xf,
int ys,
int yf,
int zs,
int zf,
int NX_PHI1,
int NY_PHI1,
int NZ_PHI1,
int NX_PHI2,
int NY_PHI2,
int NZ_PHI2,
int NY_R,
__global const unsigned int* restrict ID,
__global $REAL *Ex,
__global const $REAL* restrict Ey,
__global $REAL *Ez,
__global const $REAL* restrict Hx,
__global const $REAL* restrict Hy,
__global const $REAL* restrict Hz,
__global $REAL *PHI1,
__global $REAL *PHI2,
__global const $REAL* restrict RA,
__global const $REAL* restrict RB,
__global const $REAL* restrict RE,
__global const $REAL* restrict RF,
$REAL d
""")
}
z_args = Template("int xs, "
"int xf, "
"int ys, "
"int yf, "
"int zs, "
"int zf, "
"int NX_PHI1, "
"int NY_PHI1, "
"int NZ_PHI1, "
"int NX_PHI2, "
"int NY_PHI2, "
"int NZ_PHI2, "
"int NY_R, "
"__global const unsigned int* restrict ID, "
"__global $REAL *Ex, "
"__global $REAL *Ey, "
"__global const $REAL* restrict Ez, "
"__global const $REAL* restrict Hx, "
"__global const $REAL* restrict Hy, "
"__global const $REAL* restrict Hz, "
"__global $REAL *PHI1, "
"__global $REAL *PHI2, "
"__global const $REAL* restrict RA, "
"__global const $REAL* restrict RB, "
"__global const $REAL* restrict RE, "
"__global const $REAL* restrict RF, "
"$REAL d")
z_args = {'cuda': Template("""
__global__ void $FUNC(int xs,
int xf,
int ys,
int yf,
int zs,
int zf,
int NX_PHI1,
int NY_PHI1,
int NZ_PHI1,
int NX_PHI2,
int NY_PHI2,
int NZ_PHI2,
int NY_R,
const unsigned int* __restrict__ ID,
$REAL *Ex,
$REAL *Ey,
const $REAL* __restrict__ Ez,
const $REAL* __restrict__ Hx, const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz, $REAL *PHI1,
$REAL *PHI2,
const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d)
"""),
'opencl': Template("""
int xs,
int xf,
int ys,
int yf,
int zs,
int zf,
int NX_PHI1,
int NY_PHI1,
int NZ_PHI1,
int NX_PHI2,
int NY_PHI2,
int NZ_PHI2,
int NY_R,
__global const unsigned int* restrict ID,
__global $REAL *Ex,
__global $REAL *Ey,
__global const $REAL* restrict Ez,
__global const $REAL* restrict Hx,
__global const $REAL* restrict Hy,
__global const $REAL* restrict Hz,
__global $REAL *PHI1,
__global $REAL *PHI2,
__global const $REAL* restrict RA,
__global const $REAL* restrict RB,
__global const $REAL* restrict RE,
__global const $REAL* restrict RF,
$REAL d
""")
}
order1_xminus = {'args': x_args,
order1_xminus = {'args_cuda': x_args['cuda'],
'args_opencl': x_args['opencl'],
'func': Template("""
// This function updates the Ey and Ez field components for the xminus slab.
//
@@ -113,6 +188,8 @@ order1_xminus = {'args': x_args,
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
$CUDA_IDX
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
@@ -173,7 +250,8 @@ order1_xminus = {'args': x_args,
}
""")}
order2_xminus = {'args': x_args,
order2_xminus = {'args_cuda': x_args['cuda'],
'args_opencl': x_args['opencl'],
'func': Template("""
// This function updates the Ey and Ez field components for the xminus slab.
//
@@ -184,6 +262,8 @@ order2_xminus = {'args': x_args,
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
$CUDA_IDX
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
@@ -260,7 +340,8 @@ order2_xminus = {'args': x_args,
}
""")}
order1_xplus = {'args': x_args,
order1_xplus = {'args_cuda': x_args['cuda'],
'args_opencl': x_args['opencl'],
'func': Template("""
// This function updates the Ey and Ez field components for the xplus slab.
//
@@ -271,6 +352,8 @@ order1_xplus = {'args': x_args,
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
$CUDA_IDX
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
@@ -331,7 +414,8 @@ order1_xplus = {'args': x_args,
}
""")}
order2_xplus = {'args': x_args,
order2_xplus = {'args_cuda': x_args['cuda'],
'args_opencl': x_args['opencl'],
'func': Template("""
// This function updates the Ey and Ez field components for the xplus slab.
//
@@ -342,6 +426,8 @@ order2_xplus = {'args': x_args,
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
$CUDA_IDX
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
@@ -417,7 +503,8 @@ order2_xplus = {'args': x_args,
}
""")}
order1_yminus = {'args': y_args,
order1_yminus = {'args_cuda': y_args['cuda'],
'args_opencl': y_args['opencl'],
'func': Template("""
// This function updates the Ex and Ez field components for the yminus slab.
//
@@ -428,6 +515,8 @@ order1_yminus = {'args': y_args,
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
$CUDA_IDX
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
@@ -488,7 +577,8 @@ order1_yminus = {'args': y_args,
}
""")}
order2_yminus = {'args': y_args,
order2_yminus = {'args_cuda': y_args['cuda'],
'args_opencl': y_args['opencl'],
'func': Template("""
// This function updates the Ex and Ez field components for the yminus slab.
//
@@ -499,6 +589,8 @@ order2_yminus = {'args': y_args,
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
$CUDA_IDX
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
@@ -575,7 +667,8 @@ order2_yminus = {'args': y_args,
}
""")}
order1_yplus = {'args': y_args,
order1_yplus = {'args_cuda': y_args['cuda'],
'args_opencl': y_args['opencl'],
'func': Template("""
// This function updates the Ex and Ez field components for the yplus slab.
//
@@ -586,6 +679,8 @@ order1_yplus = {'args': y_args,
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
$CUDA_IDX
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
@@ -646,7 +741,8 @@ order1_yplus = {'args': y_args,
}
""")}
order2_yplus = {'args': y_args,
order2_yplus = {'args_cuda': y_args['cuda'],
'args_opencl': y_args['opencl'],
'func': Template("""
// This function updates the Ex and Ez field components for the yplus slab.
//
@@ -657,6 +753,8 @@ order2_yplus = {'args': y_args,
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
$CUDA_IDX
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
@@ -733,7 +831,8 @@ order2_yplus = {'args': y_args,
}
""")}
order1_zminus = {'args': z_args,
order1_zminus = {'args_cuda': z_args['cuda'],
'args_opencl': z_args['opencl'],
'func': Template("""
// This function updates the Ex and Ey field components for the zminus slab.
//
@@ -744,6 +843,8 @@ order1_zminus = {'args': z_args,
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
$CUDA_IDX
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
@@ -804,7 +905,8 @@ order1_zminus = {'args': z_args,
}
""")}
order2_zminus = {'args': z_args,
order2_zminus = {'args_cuda': z_args['cuda'],
'args_opencl': z_args['opencl'],
'func': Template("""
// This function updates the Ex and Ey field components for the zminus slab.
//
@@ -815,6 +917,8 @@ order2_zminus = {'args': z_args,
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
$CUDA_IDX
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
@@ -891,7 +995,8 @@ order2_zminus = {'args': z_args,
}
""")}
order1_zplus = {'args': z_args,
order1_zplus = {'args_cuda': z_args['cuda'],
'args_opencl': z_args['opencl'],
'func': Template("""
// This function updates the Ex and Ey field components for the zplus slab.
//
@@ -902,6 +1007,8 @@ order1_zplus = {'args': z_args,
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
$CUDA_IDX
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
@@ -962,7 +1069,8 @@ order1_zplus = {'args': z_args,
}
""")}
order2_zplus = {'args': z_args,
order2_zplus = {'args_cuda': z_args['cuda'],
'args_opencl': z_args['opencl'],
'func': Template("""
// This function updates the Ex and Ey field components for the zplus slab.
//
@@ -973,6 +1081,8 @@ order2_zplus = {'args': z_args,
// Phi, RA, RB, RE, RF: Access to PML electric coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
$CUDA_IDX
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
@@ -1047,5 +1157,4 @@ order2_zplus = {'args': z_args,
(RA0 * dHx + RB0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)]);
PHI2[IDX4D_PHI2(0,i2,j2,k2)] = RE0 * PHI2[IDX4D_PHI2(0,i2,j2,k2)] - RF0 * dHx;
}
""")}
""")}

查看文件

@@ -18,91 +18,168 @@
from string import Template
x_args = Template("int xs, "
"int xf, "
"int ys, "
"int yf, "
"int zs, "
"int zf, "
"int NX_PHI1, "
"int NY_PHI1, "
"int NZ_PHI1, "
"int NX_PHI2, "
"int NY_PHI2, "
"int NZ_PHI2, "
"int NY_R, "
"__global const unsigned int* restrict ID, "
"__global const $REAL* restrict Ex, "
"__global const $REAL* restrict Ey, "
"__global const $REAL* restrict Ez, "
"__global const $REAL* restrict Hx, "
"__global $REAL *Hy, "
"__global $REAL *Hz, "
"__global $REAL *PHI1, "
"__global $REAL *PHI2, "
"__global const $REAL* restrict RA, "
"__global const $REAL* restrict RB, "
"__global const $REAL* restrict RE, "
"__global const $REAL* restrict RF, "
"$REAL d")
x_args = {'cuda': Template("""
__global__ void $FUNC(int xs,
int xf,
int ys,
int yf,
int zs,
int zf,
int NX_PHI1,
int NY_PHI1,
int NZ_PHI1,
int NX_PHI2,
int NY_PHI2,
int NZ_PHI2,
int NY_R,
const unsigned int* __restrict__ ID,
const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez, const $REAL* __restrict__ Hx, $REAL *Hy,
$REAL *Hz,
$REAL *PHI1,
$REAL *PHI2,
const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d)
"""),
'opencl': Template("""
int xs,
int xf,
int ys,
int yf,
int zs,
int zf,
int NX_PHI1,
int NY_PHI1,
int NZ_PHI1,
int NX_PHI2,
int NY_PHI2,
int NZ_PHI2,
int NY_R,
__global const unsigned int* restrict ID,
__global const $REAL* restrict Ex,
__global const $REAL* restrict Ey,
__global const $REAL* restrict Ez,
__global const $REAL* restrict Hx,
__global $REAL *Hy,
__global $REAL *Hz,
__global $REAL *PHI1,
__global $REAL *PHI2,
__global const $REAL* restrict RA,
__global const $REAL* restrict RB,
__global const $REAL* restrict RE,
__global const $REAL* restrict RF,
$REAL d
""")
}
y_args = Template("int xs, "
"int xf, "
"int ys, "
"int yf, "
"int zs, "
"int zf, "
"int NX_PHI1, "
"int NY_PHI1, "
"int NZ_PHI1, "
"int NX_PHI2, "
"int NY_PHI2, "
"int NZ_PHI2, "
"int NY_R, "
"__global const unsigned int* restrict ID, "
"__global const $REAL* restrict Ex, "
"__global const $REAL* restrict Ey, "
"__global const $REAL* restrict Ez, "
"__global $REAL *Hx, "
"__global const $REAL* restrict Hy, "
"__global $REAL *Hz, "
"__global $REAL *PHI1, "
"__global $REAL *PHI2, "
"__global const $REAL* restrict RA, "
"__global const $REAL* restrict RB, "
"__global const $REAL* restrict RE, "
"__global const $REAL* restrict RF, "
"$REAL d")
y_args = {'cuda': Template("""
__global__ void $FUNC(int xs,
int xf,
int ys,
int yf,
int zs,
int zf,
int NX_PHI1,
int NY_PHI1,
int NZ_PHI1,
int NX_PHI2,
int NY_PHI2,
int NZ_PHI2,
int NY_R,
const unsigned int* __restrict__ ID,
const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez,
$REAL *Hx,
const $REAL* __restrict__ Hy,
$REAL *Hz,
$REAL *PHI1,
$REAL *PHI2,
const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d)
"""),
'opencl': Template("""
int xs,
int xf,
int ys,
int yf,
int zs,
int zf,
int NX_PHI1,
int NY_PHI1,
int NZ_PHI1,
int NX_PHI2,
int NY_PHI2,
int NZ_PHI2,
int NY_R,
__global const unsigned int* restrict ID,
__global const $REAL* restrict Ex,
__global const $REAL* restrict Ey,
__global const $REAL* restrict Ez,
__global $REAL *Hx,
__global const $REAL* restrict Hy,
__global $REAL *Hz,
__global $REAL *PHI1,
__global $REAL *PHI2,
__global const $REAL* restrict RA,
__global const $REAL* restrict RB,
__global const $REAL* restrict RE,
__global const $REAL* restrict RF,
$REAL d
""")
}
z_args = Template("int xs, "
"int xf, "
"int ys, "
"int yf, "
"int zs, "
"int zf, "
"int NX_PHI1, "
"int NY_PHI1, "
"int NZ_PHI1, "
"int NX_PHI2, "
"int NY_PHI2, "
"int NZ_PHI2, "
"int NY_R, "
"__global const unsigned int* restrict ID, "
"__global const $REAL* restrict Ex, "
"__global const $REAL* restrict Ey, "
"__global const $REAL* restrict Ez, "
"__global $REAL *Hx, "
"__global $REAL *Hy, "
"__global const $REAL* restrict Hz, "
"__global $REAL *PHI1, "
"__global $REAL *PHI2, "
"__global const $REAL* restrict RA, "
"__global const $REAL* restrict RB, "
"__global const $REAL* restrict RE, "
"__global const $REAL* restrict RF, "
"$REAL d")
z_args = {'cuda': Template("""
__global__ void $FUNC(int xs,
int xf,
int ys,
int yf,
int zs,
int zf,
int NX_PHI1,
int NY_PHI1,
int NZ_PHI1,
int NX_PHI2,
int NY_PHI2,
int NZ_PHI2,
int NY_R,
const unsigned int* __restrict__ ID,
const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey, const $REAL* __restrict__ Ez,
$REAL *Hx,
$REAL *Hy,
const $REAL* __restrict__ Hz,
$REAL *PHI1,
$REAL *PHI2,
const $REAL* __restrict__ RA, const $REAL* __restrict__ RB, const $REAL* __restrict__ RE, const $REAL* __restrict__ RF, $REAL d)
"""),
'opencl': Template("""
int xs,
int xf,
int ys,
int yf,
int zs,
int zf,
int NX_PHI1,
int NY_PHI1,
int NZ_PHI1,
int NX_PHI2,
int NY_PHI2,
int NZ_PHI2,
int NY_R,
__global const unsigned int* restrict ID,
__global const $REAL* restrict Ex,
__global const $REAL* restrict Ey,
__global const $REAL* restrict Ez,
__global $REAL *Hx,
__global $REAL *Hy,
__global const $REAL* restrict Hz,
__global $REAL *PHI1,
__global $REAL *PHI2,
__global const $REAL* restrict RA,
__global const $REAL* restrict RB,
__global const $REAL* restrict RE,
__global const $REAL* restrict RF,
$REAL d
""")
}
order1_xminus = {'args': x_args,
order1_xminus = {'args_cuda': x_args['cuda'],
'args_opencl': x_args['opencl'],
'func': Template("""
// This function updates the Hy and Hz field components for the xminus slab.
//
@@ -113,6 +190,8 @@ order1_xminus = {'args': x_args,
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
$CUDA_IDX
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
@@ -173,7 +252,8 @@ order1_xminus = {'args': x_args,
}
""")}
order2_xminus = {'args': x_args,
order2_xminus = {'args_cuda': x_args['cuda'],
'args_opencl': x_args['opencl'],
'func': Template("""
// This function updates the Hy and Hz field components for the xminus slab.
//
@@ -184,6 +264,8 @@ order2_xminus = {'args': x_args,
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
$CUDA_IDX
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
@@ -260,7 +342,8 @@ order2_xminus = {'args': x_args,
}
""")}
order1_xplus = {'args': x_args,
order1_xplus = {'args_cuda': x_args['cuda'],
'args_opencl': x_args['opencl'],
'func': Template("""
// This function updates the Hy and Hz field components for the xplus slab.
//
@@ -271,6 +354,8 @@ order1_xplus = {'args': x_args,
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
$CUDA_IDX
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
@@ -331,7 +416,8 @@ order1_xplus = {'args': x_args,
}
""")}
order2_xplus = {'args': x_args,
order2_xplus = {'args_cuda': x_args['cuda'],
'args_opencl': x_args['opencl'],
'func': Template("""
// This function updates the Hy and Hz field components for the xplus slab.
//
@@ -342,6 +428,8 @@ order2_xplus = {'args': x_args,
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
$CUDA_IDX
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
@@ -418,7 +506,8 @@ order2_xplus = {'args': x_args,
}
""")}
order1_yminus = {'args': y_args,
order1_yminus = {'args_cuda': y_args['cuda'],
'args_opencl': y_args['opencl'],
'func': Template("""
// This function updates the Hx and Hz field components for the yminus slab.
//
@@ -429,6 +518,8 @@ order1_yminus = {'args': y_args,
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
$CUDA_IDX
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
@@ -489,7 +580,8 @@ order1_yminus = {'args': y_args,
}
""")}
order2_yminus = {'args': y_args,
order2_yminus = {'args_cuda': y_args['cuda'],
'args_opencl': y_args['opencl'],
'func': Template("""
// This function updates the Hx and Hz field components for the yminus slab.
//
@@ -500,6 +592,8 @@ order2_yminus = {'args': y_args,
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
$CUDA_IDX
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
@@ -575,7 +669,8 @@ order2_yminus = {'args': y_args,
}
""")}
order1_yplus = {'args': y_args,
order1_yplus = {'args_cuda': y_args['cuda'],
'args_opencl': y_args['opencl'],
'func': Template("""
// This function updates the Hx and Hz field components for the yplus slab.
//
@@ -586,6 +681,8 @@ order1_yplus = {'args': y_args,
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
$CUDA_IDX
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
@@ -646,7 +743,8 @@ order1_yplus = {'args': y_args,
}
""")}
order2_yplus = {'args': y_args,
order2_yplus = {'args_cuda': y_args['cuda'],
'args_opencl': y_args['opencl'],
'func': Template("""
// This function updates the Hx and Hz field components for the yplus slab.
//
@@ -657,6 +755,8 @@ order2_yplus = {'args': y_args,
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
$CUDA_IDX
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
@@ -731,7 +831,8 @@ order2_yplus = {'args': y_args,
}
""")}
order1_zminus = {'args': z_args,
order1_zminus = {'args_cuda': z_args['cuda'],
'args_opencl': z_args['opencl'],
'func': Template("""
// This function updates the Hx and Hy field components for the zminus slab.
//
@@ -742,6 +843,8 @@ order1_zminus = {'args': z_args,
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
$CUDA_IDX
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
@@ -802,7 +905,8 @@ order1_zminus = {'args': z_args,
}
""")}
order2_zminus = {'args': z_args,
order2_zminus = {'args_cuda': z_args['cuda'],
'args_opencl': z_args['opencl'],
'func': Template("""
// This function updates the Hx and Hy field components for the zminus slab.
//
@@ -813,6 +917,8 @@ order2_zminus = {'args': z_args,
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
$CUDA_IDX
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
@@ -889,7 +995,8 @@ order2_zminus = {'args': z_args,
}
""")}
order1_zplus = {'args': z_args,
order1_zplus = {'args_cuda': z_args['cuda'],
'args_opencl': z_args['opencl'],
'func': Template("""
// This function updates the Hx and Hy field components for the zplus slab.
//
@@ -900,6 +1007,8 @@ order1_zplus = {'args': z_args,
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
$CUDA_IDX
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);
@@ -960,7 +1069,8 @@ order1_zplus = {'args': z_args,
}
""")}
order2_zplus = {'args': z_args,
order2_zplus = {'args_cuda': z_args['cuda'],
'args_opencl': z_args['opencl'],
'func': Template("""
// This function updates the Hx and Hy field components for the zplus slab.
//
@@ -971,6 +1081,8 @@ order2_zplus = {'args': z_args,
// Phi, RA, RB, RE, RF: Access to PML magnetic coefficient arrays
// d: Spatial discretisation, e.g. dx, dy or dz
$CUDA_IDX
// Convert the linear index to subscripts for PML PHI1 (4D) arrays
int p1 = i / (NX_PHI1 * NY_PHI1 * NZ_PHI1);
int i1 = (i % (NX_PHI1 * NY_PHI1 * NZ_PHI1)) / (NY_PHI1 * NZ_PHI1);

查看文件

@@ -19,7 +19,55 @@
from string import Template
store_snapshot = Template("""
store_snapshot = {'args_cuda': Template("""
__global__ void store_snapshot(int p,
int xs,
int xf,
int ys,
int yf,
int zs,
int zf,
int dx,
int dy,
int dz,
const $REAL* __restrict__ Ex,
const $REAL* __restrict__ Ey,
const $REAL* __restrict__ Ez,
const $REAL* __restrict__ Hx,
const $REAL* __restrict__ Hy,
const $REAL* __restrict__ Hz,
$REAL *snapEx,
$REAL *snapEy,
$REAL *snapEz,
$REAL *snapHx,
$REAL *snapHy,
$REAL *snapHz)
"""),
'args_opencl': Template("""
int p,
int xs,
int xf,
int ys,
int yf,
int zs,
int zf,
int dx,
int dy,
int dz,
__global const $REAL* restrict Ex,
__global const $REAL* restrict Ey,
__global const $REAL* restrict Ez,
__global const $REAL* restrict Hx,
__global const $REAL* restrict Hy,
__global const $REAL* restrict Hz,
__global $REAL *snapEx,
__global $REAL *snapEy,
__global $REAL *snapEz,
__global $REAL *snapHx,
__global $REAL *snapHy,
__global $REAL *snapHz
"""),
'func': Template("""
// Stores field values for a snapshot.
//
// Args:
@@ -29,6 +77,7 @@ store_snapshot = Template("""
// E, H: Access to field component arrays.
// snapEx, snapEy, snapEz, snapHx, snapHy, snapHz: Access to arrays to store snapshots.
$CUDA_IDX
// Convert the linear index to subscripts for 4D SNAPS array
int x = (i % ($NX_SNAPS * $NY_SNAPS * $NZ_SNAPS)) / ($NY_SNAPS * $NZ_SNAPS);
@@ -69,4 +118,5 @@ store_snapshot = Template("""
snapHz[IDX4D_SNAPS(p,x,y,z)] = (Hz[IDX3D_FIELDS(xx,yy,zz)] +
Hz[IDX3D_FIELDS(xx,yy,zz+1)]) / 2;
}
""")
""")
}

查看文件

@@ -18,7 +18,35 @@
from string import Template
update_hertzian_dipole = Template("""
update_hertzian_dipole = {'args_cuda': Template("""
__global__ void update_hertzian_dipole(int NHERTZDIPOLE,
int iteration,
$REAL dx,
$REAL dy,
$REAL dz,
const int* __restrict__ srcinfo1,
const $REAL* __restrict__ srcinfo2,
const $REAL* __restrict__ srcwaveforms,
const unsigned int* __restrict__ ID,
$REAL *Ex,
$REAL *Ey,
$REAL *Ez)
"""),
'args_opencl': Template("""
int NHERTZDIPOLE,
int iteration,
$REAL dx,
$REAL dy,
$REAL dz,
__global const int* restrict srcinfo1,
__global const $REAL* restrict srcinfo2,
__global const $REAL* restrict srcwaveforms,
__global const unsigned int* restrict ID,
__global $REAL *Ex,
__global $REAL *Ey,
__global $REAL *Ez
"""),
'func': Template("""
// Updates electric field values for Hertzian dipole sources.
//
// Args:
@@ -30,6 +58,7 @@ update_hertzian_dipole = Template("""
// srcwaveforms: Source waveform values.
// ID, E: Access to ID and field component arrays.
$CUDA_IDX
if (i < NHERTZDIPOLE) {
@@ -64,8 +93,37 @@ update_hertzian_dipole = Template("""
}
}
""")
}
update_magnetic_dipole = Template("""
update_magnetic_dipole = {'args_cuda': Template("""
__global__ void update_magnetic_dipole(int NMAGDIPOLE,
int iteration,
$REAL dx,
$REAL dy,
$REAL dz,
const int* __restrict__ srcinfo1,
const $REAL* __restrict__ srcinfo2,
const $REAL* __restrict__ srcwaveforms,
const unsigned int* __restrict__ ID,
$REAL *Hx,
$REAL *Hy,
$REAL *Hz)
"""),
'args_opencl': Template("""
int NMAGDIPOLE,
int iteration,
$REAL dx,
$REAL dy,
$REAL dz,
__global const int* restrict rcinfo1,
__global const $REAL* restrict rcinfo2,
__global const $REAL* restrict rcwaveforms,
__global const unsigned int* estrict ID,
__global $REAL *Hx,
__global $REAL *Hy,
__global $REAL *Hz
"""),
'func': Template("""
// Updates electric field values for Hertzian dipole sources.
//
// Args:
@@ -77,6 +135,7 @@ update_magnetic_dipole = Template("""
// srcwaveforms: Source waveform values.
// ID, H: Access to ID and field component arrays.
$CUDA_IDX
if (i < NMAGDIPOLE) {
@@ -109,8 +168,37 @@ update_magnetic_dipole = Template("""
}
}
""")
}
update_voltage_source = Template("""
update_voltage_source = {'args_cuda': Template("""
__global__ void update_voltage_source(int NVOLTSRC,
int iteration,
$REAL dx,
$REAL dy,
$REAL dz,
const int* __restrict__ srcinfo1,
const $REAL* __restrict__ srcinfo2,
const $REAL* __restrict__ srcwaveforms,
const unsigned int* __restrict__ ID,
$REAL *Ex,
$REAL *Ey,
$REAL *Ez)
"""),
'args_opencl': Template("""
int NVOLTSRC,
int iteration,
$REAL dx,
$REAL dy,
$REAL dz,
__global const int* restrict rcinfo1,
__global const $REAL* restrict rcinfo2,
__global const $REAL* restrict rcwaveforms,
__global const unsigned int* estrict ID,
__global $REAL *Ex,
__global $REAL *Ey,
__global $REAL *Ez
"""),
'func': Template("""
// Updates electric field values for voltage sources.
//
// Args:
@@ -122,6 +210,7 @@ update_voltage_source = Template("""
// srcwaveforms: Source waveform values.
// ID, E: Access to ID and field component arrays.
$CUDA_IDX
if (i < NVOLTSRC) {
@@ -170,4 +259,5 @@ update_voltage_source = Template("""
}
}
}
""")
""")
}

查看文件

@@ -19,7 +19,31 @@
from string import Template
store_outputs = Template("""
store_outputs = {'args_cuda': Template("""
__global__ void store_outputs(int NRX,
int iteration,
const int* __restrict__ rxcoords,
$REAL *rxs,
const $REAL* __restrict__ Ex,
const $REAL* __restrict__ Ey,
const $REAL* __restrict__ Ez,
const $REAL* __restrict__ Hx,
const $REAL* __restrict__ Hy,
const $REAL* __restrict__ Hz)
"""),
'args_opencl': Template("""
int NRX,
int iteration,
__global const int* restrict rxcoords,
__global $REAL *rxs,
__global const $REAL* restrict Ex,
__global const $REAL* restrict Ey,
__global const $REAL* restrict Ez,
__global const $REAL* restrict Hx,
__global const $REAL* restrict Hy,
__global const $REAL* restrict Hz
"""),
'func': Template("""
// Stores field component values for every receiver in the model.
//
// Args:
@@ -27,6 +51,8 @@ store_outputs = Template("""
// rxs: array to store field components for receivers - rows
// are field components; columns are iterations; pages are receiver.
$CUDA_IDX
if (i < NRX) {
int x, y, z;
x = rxcoords[IDX2D_RXCOORDS(i,0)];
@@ -39,4 +65,5 @@ store_outputs = Template("""
rxs[IDX3D_RXS(4,iteration,i)] = Hy[IDX3D_FIELDS(x,y,z)];
rxs[IDX3D_RXS(5,iteration,i)] = Hz[IDX3D_FIELDS(x,y,z)];
}
""")
""")
}

查看文件

@@ -791,45 +791,23 @@ class OpenCLUpdates:
NY_SNAPS=Snapshot.ny_max,
NZ_SNAPS=Snapshot.nz_max)
subs = {'CUDA_IDX': '',
'NX_FIELDS': self.grid.nx + 1,
'NY_FIELDS': self.grid.ny + 1,
'NZ_FIELDS': self.grid.nz + 1,
'NX_ID': self.grid.ID.shape[1],
'NY_ID': self.grid.ID.shape[2],
'NZ_ID': self.grid.ID.shape[3]}
self.update_electric_dev = self.elwise(self.ctx,
Template("int NX, "
"int NY, "
"int NZ, "
"__global const unsigned int* restrict ID, "
"__global $REAL *Ex, "
"__global $REAL *Ey, "
"__global $REAL *Ez, "
"__global const $REAL * restrict Hx, "
"__global const $REAL * restrict Hy, "
"__global const $REAL * restrict Hz").substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
knl_fields_updates.update_electric.substitute({
'NX_FIELDS': self.grid.nx + 1,
'NY_FIELDS': self.grid.ny + 1,
'NZ_FIELDS': self.grid.nz + 1,
'NX_ID': self.grid.ID.shape[1],
'NY_ID': self.grid.ID.shape[2],
'NZ_ID': self.grid.ID.shape[3]}),
knl_fields_updates.update_electric['args_opencl'].substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
knl_fields_updates.update_electric['func'].substitute(subs),
'update_electric', preamble=self.knl_common,
options=config.sim_config.devices['compiler_opts'])
self.update_magnetic_dev = self.elwise(self.ctx,
Template("int NX, "
"int NY, "
"int NZ, "
"__global const unsigned int* restrict ID, "
"__global $REAL *Hx, "
"__global $REAL *Hy, "
"__global $REAL *Hz, "
"__global const $REAL * restrict Ex, "
"__global const $REAL * restrict Ey, "
"__global const $REAL * restrict Ez").substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
knl_fields_updates.update_magnetic.substitute({
'NX_FIELDS': self.grid.nx + 1,
'NY_FIELDS': self.grid.ny + 1,
'NZ_FIELDS': self.grid.nz + 1,
'NX_ID': self.grid.ID.shape[1],
'NY_ID': self.grid.ID.shape[2],
'NZ_ID': self.grid.ID.shape[3]}),
knl_fields_updates.update_magnetic['args_opencl'].substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
knl_fields_updates.update_magnetic['func'].substitute(subs),
'update_magnetic', preamble=self.knl_common,
options=config.sim_config.devices['compiler_opts'])
@@ -838,64 +816,26 @@ class OpenCLUpdates:
# If there are any dispersive materials (updates are split into two
# parts as they require present and updated electric field values).
if config.get_model_config().materials['maxpoles'] > 0:
subs = {'CUDA_IDX': '',
'REAL': config.sim_config.dtypes['C_float_or_double'],
'REALFUNC': config.get_model_config().materials['crealfunc'],
'NX_FIELDS': self.grid.nx + 1,
'NY_FIELDS': self.grid.ny + 1,
'NZ_FIELDS': self.grid.nz + 1,
'NX_ID': self.grid.ID.shape[1],
'NY_ID': self.grid.ID.shape[2],
'NZ_ID': self.grid.ID.shape[3],
'NX_T': NX_T,
'NY_T': NY_T,
'NZ_T': NZ_T}
self.dispersive_update_a = self.elwise(self.ctx,
Template("int NX, "
"int NY, "
"int NZ, "
"int MAXPOLES, "
"__global const $COMPLEX* restrict updatecoeffsdispersive, "
"__global $COMPLEX *Tx, "
"__global $COMPLEX *Ty, "
"__global $COMPLEX *Tz, "
"__global const unsigned int* restrict ID, "
"__global $REAL *Ex, "
"__global $REAL *Ey, "
"__global $REAL *Ez, "
"__global const $REAL* restrict Hx, "
"__global const $REAL* restrict Hy, "
"__global const $REAL* restrict Hz").substitute({'REAL': config.sim_config.dtypes['C_float_or_double'], 'COMPLEX': config.get_model_config().materials['dispersiveCdtype']}),
knl_fields_updates.update_electric_dispersive_A.substitute({
'REAL': config.sim_config.dtypes['C_float_or_double'],
'REALFUNC': config.get_model_config().materials['crealfunc'],
'NX_FIELDS': self.grid.nx + 1,
'NY_FIELDS': self.grid.ny + 1,
'NZ_FIELDS': self.grid.nz + 1,
'NX_ID': self.grid.ID.shape[1],
'NY_ID': self.grid.ID.shape[2],
'NZ_ID': self.grid.ID.shape[3],
'NX_T': NX_T,
'NY_T': NY_T,
'NZ_T': NZ_T}),
knl_fields_updates.update_electric_dispersive_A['args_opencl'].substitute({'REAL': config.sim_config.dtypes['C_float_or_double'], 'COMPLEX': config.get_model_config().materials['dispersiveCdtype']}),
knl_fields_updates.update_electric_dispersive_A['func'].substitute(subs),
'update_electric_dispersive_A', preamble=self.knl_common,
options=config.sim_config.devices['compiler_opts'])
self.dispersive_update_b = self.elwise(self.ctx,
Template("int NX, "
"int NY, "
"int NZ, "
"int MAXPOLES, "
"__global const $COMPLEX* restrict updatecoeffsdispersive, "
"__global $COMPLEX *Tx, "
"__global $COMPLEX *Ty, "
"__global $COMPLEX *Tz, "
"__global const unsigned int* restrict ID, "
"__global $REAL *Ex, "
"__global $REAL *Ey, "
"__global $REAL *Ez, "
"__global const $REAL* restrict Hx, "
"__global const $REAL* restrict Hy, "
"__global const $REAL* restrict Hz").substitute({'REAL': config.sim_config.dtypes['C_float_or_double'] ,'COMPLEX': config.get_model_config().materials['dispersiveCdtype']}),
knl_fields_updates.update_electric_dispersive_B.substitute({
'REAL': config.sim_config.dtypes['C_float_or_double'],
'REALFUNC': config.get_model_config().materials['crealfunc'],
'NX_FIELDS': self.grid.nx + 1,
'NY_FIELDS': self.grid.ny + 1,
'NZ_FIELDS': self.grid.nz + 1,
'NX_ID': self.grid.ID.shape[1],
'NY_ID': self.grid.ID.shape[2],
'NZ_ID': self.grid.ID.shape[3],
'NX_T': NX_T,
'NY_T': NY_T,
'NZ_T': NZ_T}),
knl_fields_updates.update_electric_dispersive_B['args_opencl'].substitute({'REAL': config.sim_config.dtypes['C_float_or_double'] ,'COMPLEX': config.get_model_config().materials['dispersiveCdtype']}),
knl_fields_updates.update_electric_dispersive_B['func'].substitute(subs),
'update_electric_dispersive_B', preamble=self.knl_common,
options=config.sim_config.devices['compiler_opts'])
@@ -911,6 +851,15 @@ class OpenCLUpdates:
knl_pml_updates_electric = import_module('gprMax.cuda_opencl_el.knl_pml_updates_electric_' + self.grid.pmlformulation)
knl_pml_updates_magnetic = import_module('gprMax.cuda_opencl_el.knl_pml_updates_magnetic_' + self.grid.pmlformulation)
subs = {'CUDA_IDX': '',
'REAL': config.sim_config.dtypes['C_float_or_double'],
'NX_FIELDS': self.grid.nx + 1,
'NY_FIELDS': self.grid.ny + 1,
'NZ_FIELDS': self.grid.nz + 1,
'NX_ID': self.grid.ID.shape[1],
'NY_ID': self.grid.ID.shape[2],
'NZ_ID': self.grid.ID.shape[3]}
# Set workgroup size, initialise arrays on compute device, and get
# kernel functions
for pml in self.grid.pmls:
@@ -921,29 +870,15 @@ class OpenCLUpdates:
knl_magnetic_name = getattr(knl_pml_updates_magnetic, knl_name)
pml.update_electric_dev = self.elwise(self.ctx,
knl_electric_name['args'].substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
knl_electric_name['func'].substitute({
'REAL': config.sim_config.dtypes['C_float_or_double'],
'NX_FIELDS': self.grid.nx + 1,
'NY_FIELDS': self.grid.ny + 1,
'NZ_FIELDS': self.grid.nz + 1,
'NX_ID': self.grid.ID.shape[1],
'NY_ID': self.grid.ID.shape[2],
'NZ_ID': self.grid.ID.shape[3]}),
knl_electric_name['args_opencl'].substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
knl_electric_name['func'].substitute(subs),
'pml_updates_electric_' + knl_name,
preamble=self.knl_common,
options=config.sim_config.devices['compiler_opts'])
pml.update_magnetic_dev = self.elwise(self.ctx,
knl_magnetic_name['args'].substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
knl_magnetic_name['func'].substitute({
'REAL': config.sim_config.dtypes['C_float_or_double'],
'NX_FIELDS': self.grid.nx + 1,
'NY_FIELDS': self.grid.ny + 1,
'NZ_FIELDS': self.grid.nz + 1,
'NX_ID': self.grid.ID.shape[1],
'NY_ID': self.grid.ID.shape[2],
'NZ_ID': self.grid.ID.shape[3]}),
knl_magnetic_name['args_opencl'].substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
knl_magnetic_name['func'].substitute(subs),
'pml_updates_magnetic_' + knl_name,
preamble=self.knl_common,
options=config.sim_config.devices['compiler_opts'])
@@ -954,17 +889,8 @@ class OpenCLUpdates:
"""
self.rxcoords_dev, self.rxs_dev = htod_rx_arrays(self.grid, self.queue)
self.store_outputs_dev = self.elwise(self.ctx,
Template("int NRX, "
"int iteration, "
"__global const int* restrict rxcoords, "
"__global $REAL *rxs, "
"__global const $REAL* restrict Ex, "
"__global const $REAL* restrict Ey, "
"__global const $REAL* restrict Ez, "
"__global const $REAL* restrict Hx, "
"__global const $REAL* restrict Hy, "
"__global const $REAL* restrict Hz").substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
knl_store_outputs.store_outputs.substitute(),
knl_store_outputs.store_outputs['args_opencl'].substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
knl_store_outputs.store_outputs['func'].substitute({'CUDA_IDX': ''}),
'store_outputs', preamble=self.knl_common,
options=config.sim_config.devices['compiler_opts'])
@@ -975,55 +901,22 @@ class OpenCLUpdates:
if self.grid.hertziandipoles:
self.srcinfo1_hertzian_dev, self.srcinfo2_hertzian_dev, self.srcwaves_hertzian_dev = htod_src_arrays(self.grid.hertziandipoles, self.grid, self.queue)
self.update_hertzian_dipole_dev = self.elwise(self.ctx,
Template("int NHERTZDIPOLE, "
"int iteration, "
"$REAL dx, "
"$REAL dy, "
"$REAL dz, "
"__global const int* restrict srcinfo1, "
"__global const $REAL* restrict srcinfo2, "
"__global const $REAL* restrict srcwaveforms, "
"__global const unsigned int* restrict ID, "
"__global $REAL *Ex, "
"__global $REAL *Ey, "
"__global $REAL *Ez").substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
knl_source_updates.update_hertzian_dipole.substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
knl_source_updates.update_hertzian_dipole['args_opencl'].substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
knl_source_updates.update_hertzian_dipole['func'].substitute({'CUDA_IDX': '', 'REAL': config.sim_config.dtypes['C_float_or_double']}),
'update_hertzian_dipole', preamble=self.knl_common,
options=config.sim_config.devices['compiler_opts'])
if self.grid.magneticdipoles:
self.srcinfo1_magnetic_dev, self.srcinfo2_magnetic_dev, self.srcwaves_magnetic_dev = htod_src_arrays(self.grid.magneticdipoles, self.grid, self.queue)
self.update_magnetic_dipole_dev = self.elwise(self.ctx,
Template("int NMAGDIPOLE, "
"int iteration, "
"$REAL dx, "
"$REAL dy, "
"$REAL dz, "
"__global const int* restrict srcinfo1, "
"__global const $REAL* restrict srcinfo2, "
"__global const $REAL* restrict srcwaveforms, "
"__global const unsigned int* restrict ID, "
"__global $REAL *Hx, "
"__global $REAL *Hy, "
"__global $REAL *Hz").substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
knl_source_updates.update_magnetic_dipole.substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
knl_source_updates.update_magnetic_dipole['args_opencl'].substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
knl_source_updates.update_magnetic_dipole['func'].substitute({'CUDA_IDX': '', 'REAL': config.sim_config.dtypes['C_float_or_double']}),
'update_magnetic_dipole', preamble=self.knl_common,
options=config.sim_config.devices['compiler_opts'])
if self.grid.voltagesources:
self.srcinfo1_voltage_dev, self.srcinfo2_voltage_dev,self.srcwaves_voltage_dev = htod_src_arrays(self.grid.voltagesources, self.grid, self.queue)
self.update_voltage_source_dev = self.elwise(self.ctx,
Template("int NVOLTSRC, "
"int iteration, "
"$REAL dx, "
"$REAL dy, "
"$REAL dz, "
"__global const int* restrict srcinfo1, "
"__global const $REAL* restrict srcinfo2, "
"__global const $REAL* restrict srcwaveforms, "
"__global const unsigned int* restrict ID, "
"__global $REAL *Ex, "
"__global $REAL *Ey, "
"__global $REAL *Ez").substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
knl_source_updates.update_voltage_source.substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}), 'update_voltage_source', preamble=self.knl_common,
knl_source_updates.update_voltage_source['args_opencl'].substitute({'CUDA_IDX': '', 'REAL': config.sim_config.dtypes['C_float_or_double']}),
knl_source_updates.update_voltage_source['func'].substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}), 'update_voltage_source', preamble=self.knl_common,
options=config.sim_config.devices['compiler_opts'])
def _set_snapshot_knl(self):
@@ -1032,31 +925,11 @@ class OpenCLUpdates:
"""
self.snapEx_dev, self.snapEy_dev, self.snapEz_dev, self.snapHx_dev, self.snapHy_dev, self.snapHz_dev = htod_snapshot_array(self.grid, self.queue)
self.store_snapshot_dev = self.elwise(self.ctx,
Template("int p, "
"int xs, "
"int xf, "
"int ys, "
"int yf, "
"int zs, "
"int zf, "
"int dx, "
"int dy, "
"int dz, "
"__global const $REAL* restrict Ex, "
"__global const $REAL* restrict Ey, "
"__global const $REAL* restrict Ez, "
"__global const $REAL* restrict Hx, "
"__global const $REAL* restrict Hy, "
"__global const $REAL* restrict Hz, "
"__global $REAL *snapEx, "
"__global $REAL *snapEy, "
"__global $REAL *snapEz, "
"__global $REAL *snapHx, "
"__global $REAL *snapHy, "
"__global $REAL *snapHz").substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
knl_snapshots.store_snapshot.substitute({'NX_SNAPS': Snapshot.nx_max,
'NY_SNAPS': Snapshot.ny_max,
'NZ_SNAPS': Snapshot.nz_max}),
knl_snapshots.store_snapshot['args_opencl'].substitute({'REAL': config.sim_config.dtypes['C_float_or_double']}),
knl_snapshots.store_snapshot['func'].substitute( {'CUDA_IDX': '',
'NX_SNAPS': Snapshot.nx_max,
'NY_SNAPS': Snapshot.ny_max,
'NZ_SNAPS': Snapshot.nz_max}),
'store_snapshot', preamble=self.knl_common,
options=config.sim_config.devices['compiler_opts'])
@@ -1260,10 +1133,7 @@ class OpenCLUpdates:
self.grid.ID_dev,
self.grid.Ex_dev,
self.grid.Ey_dev,
self.grid.Ez_dev,
self.grid.Hx_dev,
self.grid.Hy_dev,
self.grid.Hz_dev)
self.grid.Ez_dev)
event.wait()
self.compute_time += (event.profile.end - event.profile.start)*1e-9