Added ability to write snapshots with GPU solver. Implmented speedup of snapshot writing for CPU solver.

这个提交包含在:
Craig Warren
2018-07-09 11:15:08 +01:00
父节点 ae243b1d03
当前提交 0ea15aaf1a
共有 4 个文件被更改,包括 433 次插入118 次删除

查看文件

@@ -31,7 +31,10 @@ import numpy as np
from terminaltables import AsciiTable
from tqdm import tqdm
from gprMax.constants import floattype, cudafloattype, cudacomplextype
from gprMax.constants import floattype
from gprMax.constants import complextype
from gprMax.constants import cudafloattype
from gprMax.constants import cudacomplextype
from gprMax.exceptions import GeneralError
from gprMax.fields_outputs import store_outputs
@@ -48,6 +51,7 @@ from gprMax.fields_updates_gpu import kernels_template_fields
from gprMax.grid import FDTDGrid
from gprMax.grid import dispersion_analysis
from gprMax.input_cmds_geometry import process_geometrycmds
from gprMax.input_cmds_file import process_python_include_code
from gprMax.input_cmds_file import write_processed_file
@@ -60,12 +64,15 @@ from gprMax.pml import build_pmls
from gprMax.pml_updates_gpu import kernels_template_pml
from gprMax.receivers import gpu_initialise_rx_arrays
from gprMax.receivers import gpu_get_rx_array
from gprMax.snapshots import Snapshot
from gprMax.snapshots import gpu_initialise_snapshot_array
from gprMax.snapshots import gpu_get_snapshot_array
from gprMax.snapshots_gpu import kernel_template_store_snapshot
from gprMax.sources import gpu_initialise_src_arrays
from gprMax.source_updates_gpu import kernels_template_sources
from gprMax.utilities import get_host_info
from gprMax.utilities import get_terminal_width
from gprMax.utilities import human_size
from gprMax.utilities import memory_usage
from gprMax.utilities import open_path_file
from gprMax.utilities import round32
from gprMax.yee_cell_build_ext import build_electric_components
@@ -153,6 +160,12 @@ def run_model(args, currentmodelrun, modelend, numbermodelruns, inputfile, usern
print()
process_multicmds(multicmds, G)
# Estimate and check memory (RAM) usage
G.memory_estimate_basic()
G.memory_check()
if G.messages:
print('\nTotal memory (RAM) required: ~{}\n'.format(human_size(G.memoryusage)))
# Initialise an array for volumetric material IDs (solid), boolean
# arrays for specifying materials not to be averaged (rigid),
# an array for cell edge IDs (ID)
@@ -198,22 +211,22 @@ def run_model(args, currentmodelrun, modelend, numbermodelruns, inputfile, usern
# N.B. 2D modes are a single cell slice of 3D grid
if '2D TMx' in G.mode:
# Ey & Ez components
G.ID[1,0,:,:] = 0
G.ID[1,1,:,:] = 0
G.ID[2,0,:,:] = 0
G.ID[2,1,:,:] = 0
G.ID[1, 0, :, :] = 0
G.ID[1, 1, :, :] = 0
G.ID[2, 0, :, :] = 0
G.ID[2, 1, :, :] = 0
elif '2D TMy' in G.mode:
# Ex & Ez components
G.ID[0,:,0,:] = 0
G.ID[0,:,1,:] = 0
G.ID[2,:,0,:] = 0
G.ID[2,:,1,:] = 0
G.ID[0, :, 0, :] = 0
G.ID[0, :, 1, :] = 0
G.ID[2, :, 0, :] = 0
G.ID[2, :, 1, :] = 0
elif '2D TMz' in G.mode:
# Ex & Ey components
G.ID[0,:,:,0] = 0
G.ID[0,:,:,1] = 0
G.ID[1,:,:,0] = 0
G.ID[1,:,:,1] = 0
G.ID[0, :, :, 0] = 0
G.ID[0, :, :, 1] = 0
G.ID[1, :, :, 0] = 0
G.ID[1, :, :, 1] = 0
# Process any voltage sources (that have resistance) to create a new
# material at the source location
@@ -227,20 +240,24 @@ def run_model(args, currentmodelrun, modelend, numbermodelruns, inputfile, usern
# there are any dispersive materials
if Material.maxpoles != 0:
# Update estimated memory (RAM) usage
memestimate = memory_usage(G)
# Check if model can be built and/or run on host
if memestimate > G.hostinfo['ram']:
raise GeneralError('Estimated memory (RAM) required ~{} exceeds {} detected!\n'.format(human_size(memestimate), human_size(G.hostinfo['ram'], a_kilobyte_is_1024_bytes=True)))
# Check if model can be run on specified GPU if required
if G.gpu is not None:
if memestimate > G.gpu.totalmem:
raise GeneralError('Estimated memory (RAM) required ~{} exceeds {} detected on specified {} - {} GPU!\n'.format(human_size(memestimate), human_size(G.gpu.totalmem, a_kilobyte_is_1024_bytes=True), G.gpu.deviceID, G.gpu.name))
G.memoryusage += int(3 * Material.maxpoles * (G.nx + 1) * (G.ny + 1) * (G.nz + 1) * np.dtype(complextype).itemsize)
G.memory_check()
if G.messages:
print('Estimated memory (RAM) required: ~{}'.format(human_size(memestimate)))
print('\nTotal memory (RAM) required, updated (dispersive): ~{}\n'.format(human_size(G.memoryusage)))
G.initialise_dispersive_arrays()
# Check there is sufficient memory to store any snapshots
if G.snapshots:
snapsmemsize = 0
for snap in G.snapshots:
# 2 x required to account for electric and magnetic fields
snapsmemsize += (2 * snap.datasizefield)
G.memoryusage += int(snapsmemsize)
G.memory_check(snapsmemsize=int(snapsmemsize))
if G.messages:
print('\nTotal memory (RAM) required, updated (snapshots): ~{}\n'.format(human_size(G.memoryusage)))
# Process complete list of materials - calculate update coefficients,
# store in arrays, and build text list of materials/properties
materialsdata = process_materials(G)
@@ -314,10 +331,6 @@ def run_model(args, currentmodelrun, modelend, numbermodelruns, inputfile, usern
# Run simulation
else:
# Prepare any snapshot files
for snapshot in G.snapshots:
snapshot.prepare_vtk_imagedata(appendmodelnumber, G)
# Output filename
inputfileparts = os.path.splitext(os.path.join(G.inputdirectory, G.inputfilename))
outputfile = inputfileparts[0] + appendmodelnumber + '.out'
@@ -332,8 +345,23 @@ def run_model(args, currentmodelrun, modelend, numbermodelruns, inputfile, usern
# Write an output file in HDF5 format
write_hdf5_outputfile(outputfile, G.Ex, G.Ey, G.Ez, G.Hx, G.Hy, G.Hz, G)
# Write any snapshots to file
if G.snapshots:
# Create directory and construct filename from user-supplied name and model run number
snapshotdir = os.path.join(G.inputdirectory, os.path.splitext(G.inputfilename)[0] + '_snaps' + appendmodelnumber)
if not os.path.exists(snapshotdir):
os.mkdir(snapshotdir)
print()
for i, snap in enumerate(G.snapshots):
snap.filename = os.path.abspath(os.path.join(snapshotdir, snap.basefilename + '.vti'))
pbar = tqdm(total=snap.vtkdatawritesize, leave=True, unit='byte', unit_scale=True, desc='Writing snapshot file {} of {}, {}'.format(i + 1, len(G.snapshots), os.path.split(snap.filename)[1]), ncols=get_terminal_width() - 1, file=sys.stdout, disable=G.tqdmdisable)
snap.write_vtk_imagedata(pbar, G)
pbar.close()
print()
if G.messages:
print('Memory (RAM) used: ~{}'.format(human_size(p.memory_info().rss)))
print('Total memory (RAM) used: ~{}'.format(human_size(p.memory_info().rss)))
print('Solving time [HH:MM:SS]: {}'.format(datetime.timedelta(seconds=tsolve)))
# If geometry information to be reused between model runs then FDTDGrid
@@ -364,13 +392,10 @@ def solve_cpu(currentmodelrun, modelend, G):
# Store field component values for every receiver and transmission line
store_outputs(iteration, G.Ex, G.Ey, G.Ez, G.Hx, G.Hy, G.Hz, G)
# Write any snapshots to file
for i, snap in enumerate(G.snapshots):
# Store any snapshots
for snap in G.snapshots:
if snap.time == iteration + 1:
snapiters = 36 * (((snap.xf - snap.xs) / snap.dx) * ((snap.yf - snap.ys) / snap.dy) * ((snap.zf - snap.zs) / snap.dz))
pbar = tqdm(total=snapiters, leave=False, unit='byte', unit_scale=True, desc=' Writing snapshot file {} of {}, {}'.format(i + 1, len(G.snapshots), os.path.split(snap.filename)[1]), ncols=get_terminal_width() - 1, file=sys.stdout, disable=G.tqdmdisable)
snap.write_vtk_imagedata(G.Ex, G.Ey, G.Ez, G.Hx, G.Hy, G.Hz, G, pbar)
pbar.close()
snap.store(G)
# Update magnetic field components
update_magnetic(G.nx, G.ny, G.nz, G.nthreads, G.updatecoeffsH, G.ID, G.Ex, G.Ey, G.Ez, G.Hx, G.Hy, G.Hz)
@@ -453,7 +478,7 @@ def solve_gpu(currentmodelrun, modelend, G):
drv.memcpy_htod(updatecoeffsE, G.updatecoeffsE)
drv.memcpy_htod(updatecoeffsH, G.updatecoeffsH)
# Electric and magnetic field updates - dispersive materials - get kernel functions
# Electric and magnetic field updates - dispersive materials - get kernel functions and initialise array on GPU
if Material.maxpoles > 0: # If there are any dispersive materials (updates are split into two parts as they require present and updated electric field values).
update_e_dispersive_A_gpu = kernels_fields.get_function("update_e_dispersive_A")
update_e_dispersive_B_gpu = kernels_fields.get_function("update_e_dispersive_B")
@@ -504,6 +529,14 @@ def solve_gpu(currentmodelrun, modelend, G):
srcinfo1_voltage_gpu, srcinfo2_voltage_gpu, srcwaves_voltage_gpu = gpu_initialise_src_arrays(G.voltagesources, G)
update_voltage_source_gpu = kernels_sources.get_function("update_voltage_source")
# Snapshots - initialise arrays on GPU, prepare kernel and get kernel functions
if G.snapshots:
# Initialise arrays on GPU
snapEx_gpu, snapEy_gpu, snapEz_gpu, snapHx_gpu, snapHy_gpu, snapHz_gpu = gpu_initialise_snapshot_array(G)
# Prepare kernel and get kernel function
kernel_store_snapshot = SourceModule(kernel_template_store_snapshot.substitute(REAL=cudafloattype, NX_SNAPS=Snapshot.nx_max, NY_SNAPS=Snapshot.ny_max, NZ_SNAPS=Snapshot.nz_max, NX_FIELDS=G.Ex.shape[0], NY_FIELDS=G.Ex.shape[1], NZ_FIELDS=G.Ex.shape[2]))
store_snapshot_gpu = kernel_store_snapshot.get_function("store_snapshot")
# Iteration loop timer
iterstart = drv.Event()
iterend = drv.Event()
@@ -513,10 +546,34 @@ def solve_gpu(currentmodelrun, modelend, G):
# Store field component values for every receiver
if G.rxs:
store_outputs_gpu(np.int32(len(G.rxs)), np.int32(iteration), rxcoords_gpu.gpudata, rxs_gpu.gpudata, G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata, G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata, block=(1, 1, 1), grid=(round32(len(G.rxs)), 1, 1))
store_outputs_gpu(np.int32(len(G.rxs)), np.int32(iteration),
rxcoords_gpu.gpudata, rxs_gpu.gpudata,
G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata,
G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata,
block=(1, 1, 1), grid=(round32(len(G.rxs)), 1, 1))
# Store any snapshots
for i, snap in enumerate(G.snapshots):
if snap.time == iteration + 1:
store_snapshot_gpu(np.int32(i), np.int32(snap.xs),
np.int32(snap.xf), np.int32(snap.ys),
np.int32(snap.yf), np.int32(snap.zs),
np.int32(snap.zf), np.int32(snap.dx),
np.int32(snap.dy), np.int32(snap.dz),
G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata,
G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata,
snapEx_gpu.gpudata, snapEy_gpu.gpudata, snapEz_gpu.gpudata,
snapHx_gpu.gpudata, snapHy_gpu.gpudata, snapHz_gpu.gpudata,
block=Snapshot.tpb, grid=Snapshot.bpg)
if G.snapsgpu2cpu:
gpu_get_snapshot_array(snapEx_gpu.get(), snapEy_gpu.get(), snapEz_gpu.get(),
snapHx_gpu.get(), snapHy_gpu.get(), snapHz_gpu.get(), i, snap)
# Update magnetic field components
update_h_gpu(np.int32(G.nx), np.int32(G.ny), np.int32(G.nz), G.ID_gpu.gpudata, G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata, G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata, block=G.tpb, grid=G.bpg)
update_h_gpu(np.int32(G.nx), np.int32(G.ny), np.int32(G.nz),
G.ID_gpu.gpudata, G.Hx_gpu.gpudata, G.Hy_gpu.gpudata,
G.Hz_gpu.gpudata, G.Ex_gpu.gpudata, G.Ey_gpu.gpudata,
G.Ez_gpu.gpudata, block=G.tpb, grid=G.bpg)
# Update magnetic field components with the PML correction
for pml in G.pmls:
@@ -524,13 +581,29 @@ def solve_gpu(currentmodelrun, modelend, G):
# Update magnetic field components for magetic dipole sources
if G.magneticdipoles:
update_magnetic_dipole_gpu(np.int32(len(G.magneticdipoles)), np.int32(iteration), floattype(G.dx), floattype(G.dy), floattype(G.dz), srcinfo1_magnetic_gpu.gpudata, srcinfo2_magnetic_gpu.gpudata, srcwaves_magnetic_gpu.gpudata, G.ID_gpu.gpudata, G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata, block=(1, 1, 1), grid=(round32(len(G.magneticdipoles)), 1, 1))
update_magnetic_dipole_gpu(np.int32(len(G.magneticdipoles)), np.int32(iteration),
floattype(G.dx), floattype(G.dy), floattype(G.dz),
srcinfo1_magnetic_gpu.gpudata, srcinfo2_magnetic_gpu.gpudata,
srcwaves_magnetic_gpu.gpudata, G.ID_gpu.gpudata,
G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata,
block=(1, 1, 1), grid=(round32(len(G.magneticdipoles)), 1, 1))
# Update electric field components
if Material.maxpoles == 0: # If all materials are non-dispersive do standard update
update_e_gpu(np.int32(G.nx), np.int32(G.ny), np.int32(G.nz), G.ID_gpu.gpudata, G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata, G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata, block=G.tpb, grid=G.bpg)
else: # If there are any dispersive materials do 1st part of dispersive update (it is split into two parts as it requires present and updated electric field values).
update_e_dispersive_A_gpu(np.int32(G.nx), np.int32(G.ny), np.int32(G.nz), np.int32(Material.maxpoles), G.updatecoeffsdispersive_gpu.gpudata, G.Tx_gpu.gpudata, G.Ty_gpu.gpudata, G.Tz_gpu.gpudata, G.ID_gpu.gpudata, G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata, G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata, block=G.tpb, grid=G.bpg)
# If all materials are non-dispersive do standard update
if Material.maxpoles == 0:
update_e_gpu(np.int32(G.nx), np.int32(G.ny), np.int32(G.nz), G.ID_gpu.gpudata,
G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata,
G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata,
block=G.tpb, grid=G.bpg)
# If there are any dispersive materials do 1st part of dispersive update
# (it is split into two parts as it requires present and updated electric field values).
else:
update_e_dispersive_A_gpu(np.int32(G.nx), np.int32(G.ny), np.int32(G.nz),
np.int32(Material.maxpoles), G.updatecoeffsdispersive_gpu.gpudata,
G.Tx_gpu.gpudata, G.Ty_gpu.gpudata, G.Tz_gpu.gpudata, G.ID_gpu.gpudata,
G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata,
G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata,
block=G.tpb, grid=G.bpg)
# Update electric field components with the PML correction
for pml in G.pmls:
@@ -538,20 +611,40 @@ def solve_gpu(currentmodelrun, modelend, G):
# Update electric field components for voltage sources
if G.voltagesources:
update_voltage_source_gpu(np.int32(len(G.voltagesources)), np.int32(iteration), floattype(G.dx), floattype(G.dy), floattype(G.dz), srcinfo1_voltage_gpu.gpudata, srcinfo2_voltage_gpu.gpudata, srcwaves_voltage_gpu.gpudata, G.ID_gpu.gpudata, G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata, block=(1, 1, 1), grid=(round32(len(G.voltagesources)), 1, 1))
update_voltage_source_gpu(np.int32(len(G.voltagesources)), np.int32(iteration),
floattype(G.dx), floattype(G.dy), floattype(G.dz),
srcinfo1_voltage_gpu.gpudata, srcinfo2_voltage_gpu.gpudata,
srcwaves_voltage_gpu.gpudata, G.ID_gpu.gpudata,
G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata,
block=(1, 1, 1), grid=(round32(len(G.voltagesources)), 1, 1))
# Update electric field components for Hertzian dipole sources (update any Hertzian dipole sources last)
if G.hertziandipoles:
update_hertzian_dipole_gpu(np.int32(len(G.hertziandipoles)), np.int32(iteration), floattype(G.dx), floattype(G.dy), floattype(G.dz), srcinfo1_hertzian_gpu.gpudata, srcinfo2_hertzian_gpu.gpudata, srcwaves_hertzian_gpu.gpudata, G.ID_gpu.gpudata, G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata, block=(1, 1, 1), grid=(round32(len(G.hertziandipoles)), 1, 1))
update_hertzian_dipole_gpu(np.int32(len(G.hertziandipoles)), np.int32(iteration),
floattype(G.dx), floattype(G.dy), floattype(G.dz),
srcinfo1_hertzian_gpu.gpudata, srcinfo2_hertzian_gpu.gpudata,
srcwaves_hertzian_gpu.gpudata, G.ID_gpu.gpudata,
G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata,
block=(1, 1, 1), grid=(round32(len(G.hertziandipoles)), 1, 1))
# If there are any dispersive materials do 2nd part of dispersive update (it is split into two parts as it requires present and updated electric field values). Therefore it can only be completely updated after the electric field has been updated by the PML and source updates.
if Material.maxpoles > 0:
update_e_dispersive_B_gpu(np.int32(G.nx), np.int32(G.ny), np.int32(G.nz), np.int32(Material.maxpoles), G.updatecoeffsdispersive_gpu.gpudata, G.Tx_gpu.gpudata, G.Ty_gpu.gpudata, G.Tz_gpu.gpudata, G.ID_gpu.gpudata, G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata, block=G.tpb, grid=G.bpg)
update_e_dispersive_B_gpu(np.int32(G.nx), np.int32(G.ny), np.int32(G.nz),
np.int32(Material.maxpoles), G.updatecoeffsdispersive_gpu.gpudata,
G.Tx_gpu.gpudata, G.Ty_gpu.gpudata, G.Tz_gpu.gpudata, G.ID_gpu.gpudata,
G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata,
block=G.tpb, grid=G.bpg)
# Copy output from receivers array back to correct receiver objects
if G.rxs:
gpu_get_rx_array(rxs_gpu.get(), rxcoords_gpu.get(), G)
# Copy data from any snapshots back to correct snapshot objects
if G.snapshots and not G.snapsgpu2cpu:
for i, snap in enumerate(G.snapshots):
gpu_get_snapshot_array(snapEx_gpu.get(), snapEy_gpu.get(), snapEz_gpu.get(),
snapHx_gpu.get(), snapHy_gpu.get(), snapHz_gpu.get(), i, snap)
iterend.record()
iterend.synchronize()
tsolve = iterstart.time_till(iterend) * 1e-3

查看文件

@@ -23,15 +23,23 @@ from struct import pack
import numpy as np
from gprMax.constants import floattype
from gprMax.grid import Ix
from gprMax.grid import Iy
from gprMax.grid import Iz
from gprMax.snapshots_ext import calculate_snapshot_fields
from gprMax.utilities import round_value
class Snapshot(object):
"""Snapshots of the electric and magnetic field values."""
# Dimensions of largest requested snapshot
nx_max = 0
ny_max = 0
nz_max = 0
# GPU - threads per block
tpb = (1, 1, 1)
# GPU - blocks per grid - set according to largest requested snapshot
bpg = None
# Set string for byte order
if sys.byteorder == 'little':
byteorder = 'LittleEndian'
@@ -49,8 +57,8 @@ class Snapshot(object):
def __init__(self, xs=None, ys=None, zs=None, xf=None, yf=None, zf=None, dx=None, dy=None, dz=None, time=None, filename=None):
"""
Args:
xs, xf, ys, yf, zs, zf (float): Extent of the volume.
dx, dy, dz (float): Spatial discretisation.
xs, xf, ys, yf, zs, zf (int): Extent of the volume in cells.
dx, dy, dz (int): Spatial discretisation in cells.
time (int): Iteration number to take the snapshot on.
filename (str): Filename to save to.
"""
@@ -64,92 +72,156 @@ class Snapshot(object):
self.dx = dx
self.dy = dy
self.dz = dz
self.nx = round_value((self.xf - self.xs) / self.dx)
self.ny = round_value((self.yf - self.ys) / self.dy)
self.nz = round_value((self.zf - self.zs) / self.dz)
self.sx = slice(self.xs, self.xf + self.dx, self.dx)
self.sy = slice(self.ys, self.yf + self.dy, self.dy)
self.sz = slice(self.zs, self.zf + self.dz, self.dz)
self.ncells = self.nx * self.ny * self.nz
self.datasizefield = 3 * np.dtype(floattype).itemsize * self.ncells
self.vtkdatawritesize = 2 * self.datasizefield + 2 * np.dtype(np.uint32).itemsize
self.time = time
self.basefilename = filename
def prepare_vtk_imagedata(self, appendmodelnumber, G):
"""Prepares a VTK ImageData (.vti) file for a snapshot.
def store(self, G):
"""Store (in memory) electric and magnetic field values for snapshot.
Args:
appendmodelnumber (str): Text to append to filename.
G (class): Grid class instance - holds essential parameters describing the model.
"""
# No Python 3 support for VTK at time of writing (03/2015)
self.vtk_nx = self.xf - self.xs
self.vtk_ny = self.yf - self.ys
self.vtk_nz = self.zf - self.zs
# Memory views of field arrays to dimensions required for the snapshot
Exslice = np.ascontiguousarray(G.Ex[self.sx, self.sy, self.sz])
Eyslice = np.ascontiguousarray(G.Ey[self.sx, self.sy, self.sz])
Ezslice = np.ascontiguousarray(G.Ez[self.sx, self.sy, self.sz])
Hxslice = np.ascontiguousarray(G.Hx[self.sx, self.sy, self.sz])
Hyslice = np.ascontiguousarray(G.Hy[self.sx, self.sy, self.sz])
Hzslice = np.ascontiguousarray(G.Hz[self.sx, self.sy, self.sz])
# Create directory and construct filename from user-supplied name and model run number
snapshotdir = os.path.join(G.inputdirectory, os.path.splitext(G.inputfilename)[0] + '_snaps' + appendmodelnumber)
if not os.path.exists(snapshotdir):
os.mkdir(snapshotdir)
self.filename = os.path.abspath(os.path.join(snapshotdir, self.basefilename + '.vti'))
# Create arrays to hold the field data for snapshot
Exsnap = np.zeros((self.nx, self.ny, self.nz), dtype=floattype)
Eysnap = np.zeros((self.nx, self.ny, self.nz), dtype=floattype)
Ezsnap = np.zeros((self.nx, self.ny, self.nz), dtype=floattype)
Hxsnap = np.zeros((self.nx, self.ny, self.nz), dtype=floattype)
Hysnap = np.zeros((self.nx, self.ny, self.nz), dtype=floattype)
Hzsnap = np.zeros((self.nx, self.ny, self.nz), dtype=floattype)
# Calculate number of cells according to requested sampling
self.vtk_xscells = round_value(self.xs / self.dx)
self.vtk_xfcells = round_value(self.xf / self.dx)
self.vtk_yscells = round_value(self.ys / self.dy)
self.vtk_yfcells = round_value(self.yf / self.dy)
self.vtk_zscells = round_value(self.zs / self.dz)
self.vtk_zfcells = round_value(self.zf / self.dz)
vtk_hfield_offset = 3 * np.dtype(floattype).itemsize * (self.vtk_xfcells - self.vtk_xscells) * (self.vtk_yfcells - self.vtk_yscells) * (self.vtk_zfcells - self.vtk_zscells) + np.dtype(np.uint32).itemsize
vtk_current_offset = 2 * vtk_hfield_offset
# Calculate field values at points (comes from averaging field components in cells)
calculate_snapshot_fields(
self.nx,
self.ny,
self.nz,
Exslice,
Eyslice,
Ezslice,
Hxslice,
Hyslice,
Hzslice,
Exsnap,
Eysnap,
Ezsnap,
Hxsnap,
Hysnap,
Hzsnap)
# Convert to format for Paraview
self.electric = np.stack((Exsnap, Eysnap, Ezsnap)).reshape(-1, order='F')
self.magnetic = np.stack((Hxsnap, Hysnap, Hzsnap)).reshape(-1, order='F')
def write_vtk_imagedata(self, pbar, G):
"""Write snapshot data to a VTK ImageData (.vti) file.
N.B. No Python 3 support for VTK at time of writing (03/2015)
Args:
pbar (class): Progress bar class instance.
G (class): Grid class instance - holds essential parameters describing the model.
"""
hfield_offset = 3 * np.dtype(floattype).itemsize * self.ncells + np.dtype(np.uint32).itemsize
self.filehandle = open(self.filename, 'wb')
self.filehandle.write('<?xml version="1.0"?>\n'.encode('utf-8'))
self.filehandle.write('<VTKFile type="ImageData" version="1.0" byte_order="{}">\n'.format(Snapshot.byteorder).encode('utf-8'))
self.filehandle.write('<ImageData WholeExtent="{} {} {} {} {} {}" Origin="0 0 0" Spacing="{:.3} {:.3} {:.3}">\n'.format(self.vtk_xscells, self.vtk_xfcells, self.vtk_yscells, self.vtk_yfcells, self.vtk_zscells, self.vtk_zfcells, self.dx * G.dx, self.dy * G.dy, self.dz * G.dz).encode('utf-8'))
self.filehandle.write('<Piece Extent="{} {} {} {} {} {}">\n'.format(self.vtk_xscells, self.vtk_xfcells, self.vtk_yscells, self.vtk_yfcells, self.vtk_zscells, self.vtk_zfcells).encode('utf-8'))
self.filehandle.write('<CellData Vectors="E-field H-field Current">\n'.encode('utf-8'))
self.filehandle.write('<ImageData WholeExtent="{} {} {} {} {} {}" Origin="0 0 0" Spacing="{:.3} {:.3} {:.3}">\n'.format(self.xs, round_value(self.xf / self.dx), self.ys, round_value(self.yf / self.dy), self.zs, round_value(self.zf / self.dz), self.dx * G.dx, self.dy * G.dy, self.dz * G.dz).encode('utf-8'))
self.filehandle.write('<Piece Extent="{} {} {} {} {} {}">\n'.format(self.xs, round_value(self.xf / self.dx), self.ys, round_value(self.yf / self.dy), self.zs, round_value(self.zf / self.dz)).encode('utf-8'))
self.filehandle.write('<CellData Vectors="E-field H-field">\n'.encode('utf-8'))
self.filehandle.write('<DataArray type="{}" Name="E-field" NumberOfComponents="3" format="appended" offset="0" />\n'.format(Snapshot.floatname).encode('utf-8'))
self.filehandle.write('<DataArray type="{}" Name="H-field" NumberOfComponents="3" format="appended" offset="{}" />\n'.format(Snapshot.floatname, vtk_hfield_offset).encode('utf-8'))
self.filehandle.write('<DataArray type="{}" Name="Current" NumberOfComponents="3" format="appended" offset="{}" />\n'.format(Snapshot.floatname, vtk_current_offset).encode('utf-8'))
self.filehandle.write('<DataArray type="{}" Name="H-field" NumberOfComponents="3" format="appended" offset="{}" />\n'.format(Snapshot.floatname, hfield_offset).encode('utf-8'))
self.filehandle.write('</CellData>\n</Piece>\n</ImageData>\n<AppendedData encoding="raw">\n_'.encode('utf-8'))
self.filehandle.close()
def write_vtk_imagedata(self, Ex, Ey, Ez, Hx, Hy, Hz, G, pbar):
"""Writes electric and magnetic field values to VTK ImageData (.vti) file.
Args:
Ex, Ey, Ez, Hx, Hy, Hz (memory view): Electric and magnetic field values.
G (class): Grid class instance - holds essential parameters describing the model.
pbar (class): Progress bar class instance.
"""
self.filehandle = open(self.filename, 'ab')
datasize = 3 * np.dtype(floattype).itemsize * (self.vtk_xfcells - self.vtk_xscells) * (self.vtk_yfcells - self.vtk_yscells) * (self.vtk_zfcells - self.vtk_zscells)
# Write number of bytes of appended data as UInt32
self.filehandle.write(pack('I', datasize))
for k in range(self.zs, self.zf, self.dz):
for j in range(self.ys, self.yf, self.dy):
for i in range(self.xs, self.xf, self.dx):
pbar.update(n=12)
# The electric field component value at a point comes from average of the 4 electric field component values in that cell
self.filehandle.write(pack(Snapshot.floatstring, (Ex[i, j, k] + Ex[i, j + 1, k] + Ex[i, j, k + 1] + Ex[i, j + 1, k + 1]) / 4))
self.filehandle.write(pack(Snapshot.floatstring, (Ey[i, j, k] + Ey[i + 1, j, k] + Ey[i, j, k + 1] + Ey[i + 1, j, k + 1]) / 4))
self.filehandle.write(pack(Snapshot.floatstring, (Ez[i, j, k] + Ez[i + 1, j, k] + Ez[i, j + 1, k] + Ez[i + 1, j + 1, k]) / 4))
self.filehandle.write(pack('I', self.datasizefield))
pbar.update(n=4)
self.electric.tofile(self.filehandle)
pbar.update(n=self.datasizefield)
self.filehandle.write(pack('I', datasize))
for k in range(self.zs, self.zf, self.dz):
for j in range(self.ys, self.yf, self.dy):
for i in range(self.xs, self.xf, self.dx):
pbar.update(n=12)
# The magnetic field component value at a point comes from average
# of 2 magnetic field component values in that cell and the following cell
self.filehandle.write(pack(Snapshot.floatstring, (Hx[i, j, k] + Hx[i + 1, j, k]) / 2))
self.filehandle.write(pack(Snapshot.floatstring, (Hy[i, j, k] + Hy[i, j + 1, k]) / 2))
self.filehandle.write(pack(Snapshot.floatstring, (Hz[i, j, k] + Hz[i, j, k + 1]) / 2))
self.filehandle.write(pack('I', datasize))
for k in range(self.zs, self.zf, self.dz):
for j in range(self.ys, self.yf, self.dy):
for i in range(self.xs, self.xf, self.dx):
pbar.update(n=12)
self.filehandle.write(pack(Snapshot.floatstring, Ix(i, j, k, Hx, Hy, Hz, G)))
self.filehandle.write(pack(Snapshot.floatstring, Iy(i, j, k, Hx, Hy, Hz, G)))
self.filehandle.write(pack(Snapshot.floatstring, Iz(i, j, k, Hx, Hy, Hz, G)))
# Write number of bytes of appended data as UInt32
self.filehandle.write(pack('I', self.datasizefield))
pbar.update(n=4)
self.magnetic.tofile(self.filehandle)
pbar.update(n=self.datasizefield)
self.filehandle.write('\n</AppendedData>\n</VTKFile>'.encode('utf-8'))
self.filehandle.close()
def gpu_initialise_snapshot_array(G):
"""Initialise array on GPU for to store field data for snapshots.
Args:
G (class): Grid class instance - holds essential parameters describing the model.
Returns:
snapE*_gpu, snapH*_gpu (float): numpy arrays of snapshot data on GPU.
"""
import pycuda.gpuarray as gpuarray
# Get dimensions of largest requested snapshot
for snap in G.snapshots:
if snap.nx > Snapshot.nx_max:
Snapshot.nx_max = snap.nx
if snap.ny > Snapshot.ny_max:
Snapshot.ny_max = snap.ny
if snap.nz > Snapshot.nz_max:
Snapshot.nz_max = snap.nz
# GPU - blocks per grid - according to largest requested snapshot
Snapshot.bpg = (int(np.ceil(((Snapshot.nx_max) * (Snapshot.ny_max) * (Snapshot.nz_max)) / Snapshot.tpb[0])), 1, 1)
# 4D arrays to store snapshots on GPU, e.g. snapEx(time, x, y, z)
snapEx = np.zeros((len(G.snapshots), Snapshot.nx_max, Snapshot.ny_max, Snapshot.nz_max), dtype=floattype)
snapEy = np.zeros((len(G.snapshots), Snapshot.nx_max, Snapshot.ny_max, Snapshot.nz_max), dtype=floattype)
snapEz = np.zeros((len(G.snapshots), Snapshot.nx_max, Snapshot.ny_max, Snapshot.nz_max), dtype=floattype)
snapHx = np.zeros((len(G.snapshots), Snapshot.nx_max, Snapshot.ny_max, Snapshot.nz_max), dtype=floattype)
snapHy = np.zeros((len(G.snapshots), Snapshot.nx_max, Snapshot.ny_max, Snapshot.nz_max), dtype=floattype)
snapHz = np.zeros((len(G.snapshots), Snapshot.nx_max, Snapshot.ny_max, Snapshot.nz_max), dtype=floattype)
# Copy arrays to GPU
snapEx_gpu = gpuarray.to_gpu(snapEx)
snapEy_gpu = gpuarray.to_gpu(snapEy)
snapEz_gpu = gpuarray.to_gpu(snapEz)
snapHx_gpu = gpuarray.to_gpu(snapHx)
snapHy_gpu = gpuarray.to_gpu(snapHy)
snapHz_gpu = gpuarray.to_gpu(snapHz)
return snapEx_gpu, snapEy_gpu, snapEz_gpu, snapHx_gpu, snapHy_gpu, snapHz_gpu
def gpu_get_snapshot_array(snapEx_gpu, snapEy_gpu, snapEz_gpu, snapHx_gpu, snapHy_gpu, snapHz_gpu, i, snap):
"""Copy snapshot array used on GPU back to snapshot objects and store in format for Paraview.
Args:
snapE_gpu, snapH_gpu (float): numpy arrays of snapshot data from GPU.
i (int): index for snapshot data on GPU array.
snap (class): Snapshot class instance
"""
snap.electric = np.stack((snapEx_gpu[i, snap.xs:snap.xf, snap.ys:snap.yf, snap.zs:snap.zf],
snapEy_gpu[i, snap.xs:snap.xf, snap.ys:snap.yf, snap.zs:snap.zf],
snapEz_gpu[i, snap.xs:snap.xf, snap.ys:snap.yf, snap.zs:snap.zf])).reshape(-1, order='F')
snap.magnetic = np.stack((snapHx_gpu[i, snap.xs:snap.xf, snap.ys:snap.yf, snap.zs:snap.zf],
snapHy_gpu[i, snap.xs:snap.xf, snap.ys:snap.yf, snap.zs:snap.zf],
snapHz_gpu[i, snap.xs:snap.xf, snap.ys:snap.yf, snap.zs:snap.zf])).reshape(-1, order='F')

71
gprMax/snapshots_ext.pyx 普通文件
查看文件

@@ -0,0 +1,71 @@
# Copyright (C) 2015-2018: The University of Edinburgh
# Authors: Craig Warren and Antonis Giannopoulos
#
# This file is part of gprMax.
#
# gprMax is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# gprMax is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with gprMax. If not, see <http://www.gnu.org/licenses/>.
cimport numpy as np
from gprMax.constants cimport floattype_t
cpdef void calculate_snapshot_fields(
int nx,
int ny,
int nz,
floattype_t[:, :, ::1] sliceEx,
floattype_t[:, :, ::1] sliceEy,
floattype_t[:, :, ::1] sliceEz,
floattype_t[:, :, ::1] sliceHx,
floattype_t[:, :, ::1] sliceHy,
floattype_t[:, :, ::1] sliceHz,
floattype_t[:, :, ::1] snapEx,
floattype_t[:, :, ::1] snapEy,
floattype_t[:, :, ::1] snapEz,
floattype_t[:, :, ::1] snapHx,
floattype_t[:, :, ::1] snapHy,
floattype_t[:, :, ::1] snapHz
):
"""This function calculates electric and magnetic values at points from
averaging values in cells
Args:
nx, ny, nz (int): Size of snapshot array
sliceEx, sliceEy, sliceEz,
sliceHx, sliceHy, sliceHz (memoryview): Access to slices of field arrays
snapEx, snapEy, snapEz, snapHx,
snapHy, snapHz (memoryview): Access to snapshot arrays
"""
cdef Py_ssize_t i, j, k
for i in range(nx):
for j in range(ny):
for k in range(nz):
# The electric field component value at a point comes from
# average of the 4 electric field component values in that cell
snapEx[i, j, k] = (sliceEx[i, j, k] + sliceEx[i, j + 1, k] +
sliceEx[i, j, k + 1] + sliceEx[i, j + 1, k + 1]) / 4
snapEy[i, j, k] = (sliceEy[i, j, k] + sliceEy[i + 1, j, k] +
sliceEy[i, j, k + 1] + sliceEy[i + 1, j, k + 1]) / 4
snapEz[i, j, k] = (sliceEz[i, j, k] + sliceEz[i + 1, j, k] +
sliceEz[i, j + 1, k] + sliceEz[i + 1, j + 1, k]) / 4
# The magnetic field component value at a point comes from average
# of 2 magnetic field component values in that cell and the following cell
snapHx[i, j, k] = (sliceHx[i, j, k] + sliceHx[i + 1, j, k]) / 2
snapHy[i, j, k] = (sliceHy[i, j, k] + sliceHy[i, j + 1, k]) / 2
snapHz[i, j, k] = (sliceHz[i, j, k] + sliceHz[i, j, k + 1]) / 2

79
gprMax/snapshots_gpu.py 普通文件
查看文件

@@ -0,0 +1,79 @@
# Copyright (C) 2015-2018: The University of Edinburgh
# Authors: Craig Warren and Antonis Giannopoulos
#
# This file is part of gprMax.
#
# gprMax is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# gprMax is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with gprMax. If not, see <http://www.gnu.org/licenses/>.
from string import Template
kernel_template_store_snapshot = Template("""
// Macros for converting subscripts to linear index:
#define INDEX3D_FIELDS(i, j, k) (i)*($NY_FIELDS)*($NZ_FIELDS)+(j)*($NZ_FIELDS)+(k)
#define INDEX4D_SNAPS(p, i, j, k) (p)*($NX_SNAPS)*($NY_SNAPS)*($NZ_SNAPS)+(i)*($NY_SNAPS)*($NZ_SNAPS)+(j)*($NZ_SNAPS)+(k)
////////////////////
// Store snapshot //
////////////////////
__global__ void store_snapshot(int p, int xs, int xf, int ys, int yf, int zs, int zf, int dx, int dy, int dz,
const $REAL* __restrict__ Ex, const $REAL* __restrict__ Ey,
const $REAL* __restrict__ Ez, const $REAL* __restrict__ Hx,
const $REAL* __restrict__ Hy, const $REAL* __restrict__ Hz,
$REAL *snapEx, $REAL *snapEy, $REAL *snapEz,
$REAL *snapHx, $REAL *snapHy, $REAL *snapHz) {
// This function stores field values for a snapshot.
//
// Args:
// p: Snapshot number
// xs, xf, ys, yf, xs, xf: Start and finish cell coordinates for snapshot
// dx, dy, dz: Sampling interval in cell coordinates for snapshot
// E, H: Access to field component arrays
// snapEx, snapEy, snapEz, snapHx, snapHy, snapHz: Access to arrays to store snapshots
// Obtain the linear index corresponding to the current thread
int idx = blockIdx.x * blockDim.x + threadIdx.x;
// Convert the linear index to subscripts for 4D SNAPS array
int i = (idx % ($NX_SNAPS * $NY_SNAPS * $NZ_SNAPS)) / ($NY_SNAPS * $NZ_SNAPS);
int j = ((idx % ($NX_SNAPS * $NY_SNAPS * $NZ_SNAPS)) % ($NY_SNAPS * $NZ_SNAPS)) / $NZ_SNAPS;
int k = ((idx % ($NX_SNAPS * $NY_SNAPS * $NZ_SNAPS)) % ($NY_SNAPS * $NZ_SNAPS)) % $NZ_SNAPS;
// Subscripts for field arrays
int ii, jj, kk;
if (i >= xs && i < xf && j >= ys && j < yf && k >= zs && k < zf) {
// Increment subscripts for field array to account for spatial sampling of snapshot
ii = (xs + i) * dx;
jj = (ys + j) * dy;
kk = (zs + k) * dz;
// The electric field component value at a point comes from an average of
// the 4 electric field component values in that cell
snapEx[INDEX4D_SNAPS(p,i,j,k)] = (Ex[INDEX3D_FIELDS(ii,jj,kk)] + Ex[INDEX3D_FIELDS(ii,jj+1,kk)] + Ex[INDEX3D_FIELDS(ii,jj,kk+1)] + Ex[INDEX3D_FIELDS(ii,jj+1,kk+1)]) / 4;
snapEy[INDEX4D_SNAPS(p,i,j,k)] = (Ey[INDEX3D_FIELDS(ii,jj,kk)] + Ey[INDEX3D_FIELDS(ii+1,jj,kk)] + Ey[INDEX3D_FIELDS(ii,jj,kk+1)] + Ey[INDEX3D_FIELDS(ii+1,jj,kk+1)]) / 4;
snapEz[INDEX4D_SNAPS(p,i,j,k)] = (Ez[INDEX3D_FIELDS(ii,jj,kk)] + Ez[INDEX3D_FIELDS(ii+1,jj,kk)] + Ez[INDEX3D_FIELDS(ii,jj+1,kk)] + Ez[INDEX3D_FIELDS(ii+1,jj+1,kk)]) / 4;
// The magnetic field component value at a point comes from average of
// 2 magnetic field component values in that cell and the following cell
snapHx[INDEX4D_SNAPS(p,i,j,k)] = (Hx[INDEX3D_FIELDS(ii,jj,kk)] + Hx[INDEX3D_FIELDS(ii+1,jj,kk)]) / 2;
snapHy[INDEX4D_SNAPS(p,i,j,k)] = (Hy[INDEX3D_FIELDS(ii,jj,kk)] + Hy[INDEX3D_FIELDS(ii,jj+1,kk)]) / 2;
snapHz[INDEX4D_SNAPS(p,i,j,k)] = (Hz[INDEX3D_FIELDS(ii,jj,kk)] + Hz[INDEX3D_FIELDS(ii,jj,kk+1)]) / 2;
}
}
""")