From df0c3cf20576ecc40ff6fa47638bb6c8555f315f Mon Sep 17 00:00:00 2001 From: craig-warren Date: Fri, 8 Nov 2019 16:18:29 +0000 Subject: [PATCH] More work to get dispersive materials and GPU working. --- gprMax/grid.py | 80 +++----- gprMax/materials.py | 8 +- gprMax/model_build_run.py | 88 ++------- gprMax/pml.py | 128 ++++++------- gprMax/receivers.py | 4 +- gprMax/scene.py | 25 ++- gprMax/snapshots.py | 4 +- gprMax/sources.py | 4 +- gprMax/updates.py | 381 ++++++++++++++++++++------------------ gprMax/utilities.py | 72 ++++++- 10 files changed, 380 insertions(+), 414 deletions(-) diff --git a/gprMax/grid.py b/gprMax/grid.py index 3ff7fb4c..474ae3d4 100644 --- a/gprMax/grid.py +++ b/gprMax/grid.py @@ -133,7 +133,7 @@ class FDTDGrid: else: return False - def _initialise_geometry_arrays(self): + def initialise_geometry_arrays(self): """Initialise an array for volumetric material IDs (solid); boolean arrays for specifying whether materials can have dielectric smoothing (rigid); and an array for cell edge IDs (ID). @@ -146,7 +146,7 @@ class FDTDGrid: self.ID = np.ones((6, self.nx + 1, self.ny + 1, self.nz + 1), dtype=np.uint32) self.IDlookup = {'Ex': 0, 'Ey': 1, 'Ez': 2, 'Hx': 3, 'Hy': 4, 'Hz': 5} - def _initialise_field_arrays(self): + def initialise_field_arrays(self): """Initialise arrays for the electric and magnetic field components.""" self.Ex = np.zeros((self.nx + 1, self.ny + 1, self.nz + 1), dtype=config.sim_config.dtypes['float_or_double']) @@ -161,12 +161,6 @@ class FDTDGrid: self.Hz = np.zeros((self.nx + 1, self.ny + 1, self.nz + 1), dtype=config.sim_config.dtypes['float_or_double']) - def initialise_grids(self): - """Initialise all grids.""" - for g in [self] + self.subgrids: - g._initialise_geometry_arrays() - g._initialise_field_arrays() - def initialise_std_update_coeff_arrays(self): """Initialise arrays for storing update coefficients.""" self.updatecoeffsE = np.zeros((len(self.materials), 5), @@ -175,19 +169,33 @@ class FDTDGrid: dtype=config.sim_config.dtypes['float_or_double']) def initialise_dispersive_arrays(self): - """Initialise arrays for storing coefficients when there are dispersive - materials present. - """ + """Initialise field arrays when there are dispersive materials present.""" self.Tx = np.zeros((config.model_configs[self.model_num].materials['maxpoles'], self.nx + 1, self.ny + 1, self.nz + 1), dtype=config.model_configs[self.model_num].materials['dispersivedtype']) self.Ty = np.zeros((config.model_configs[self.model_num].materials['maxpoles'], self.nx + 1, self.ny + 1, self.nz + 1), dtype=config.model_configs[self.model_num].materials['dispersivedtype']) self.Tz = np.zeros((config.model_configs[self.model_num].materials['maxpoles'], self.nx + 1, self.ny + 1, self.nz + 1), dtype=config.model_configs[self.model_num].materials['dispersivedtype']) + + def initialise_dispersive_update_coeff_array(self): + """Initialise array for storing update coefficients when there are dispersive + materials present. + """ self.updatecoeffsdispersive = np.zeros((len(self.materials), 3 * config.model_configs[self.model_num].materials['maxpoles']), dtype=config.model_configs[self.model_num].materials['dispersivedtype']) + def reset_fields(self): + """Clear arrays for field components and PMLs.""" + # Clear arrays for field components + self.initialise_field_arrays() + if config.model_configs[self.model_num].materials['maxpoles'] > 0: + self.initialise_dispersive_arrays() + + # Clear arrays for fields in PML + for pml in self.pmls: + pml.initialise_field_arrays() + def mem_est_basic(self): """Estimate the amount of memory (RAM) required for grid arrays. @@ -269,17 +277,6 @@ class FDTDGrid: self.ID[1, :, :, 0] = 0 self.ID[1, :, :, 1] = 0 - def reset_fields(self): - """Clear arrays for field components and PMLs.""" - # Clear arrays for field components - self.initialise_field_arrays() - if config.model_configs[self.model_num].materials['maxpoles'] != 0: - self.initialise_dispersive_arrays() - - # Clear arrays for fields in PML - for pml in self.pmls: - pml.initialise_field_arrays() - def calculate_dt(self): """Calculate time step at the CFL limit.""" self.dt = (1 / (config.sim_config.em_consts['c'] * np.sqrt( @@ -304,7 +301,6 @@ class CUDAGrid(FDTDGrid): # Blocks per grid - used for main electric/magnetic field updates self.bpg = None - def set_blocks_per_grid(self): """Set the blocks per grid size used for updating the electric and magnetic field arrays on a GPU. @@ -313,13 +309,13 @@ class CUDAGrid(FDTDGrid): self.bpg = (int(np.ceil(((self.nx + 1) * (self.ny + 1) * (self.nz + 1)) / self.tpb[0])), 1, 1) - def _initialise_geometry_arrays_gpu(self): + def htod_geometry_arrays(self): """Initialise an array for cell edge IDs (ID) on GPU.""" import pycuda.gpuarray as gpuarray self.ID_gpu = gpuarray.to_gpu(self.ID) - def _initialise_field_arrays_gpu(self): + def htod_field_arrays(self): """Initialise geometry and field arrays on GPU.""" import pycuda.gpuarray as gpuarray @@ -331,15 +327,7 @@ class CUDAGrid(FDTDGrid): self.Hy_gpu = gpuarray.to_gpu(self.Hy) self.Hz_gpu = gpuarray.to_gpu(self.Hz) - def initialise_grids_gpu(self): - """Initialise all grids. Called when CUDAUpdates is initialised.""" - for g in [self] + self.subgrids: - g._initialise_geometry_arrays_gpu() - g._initialise_field_arrays_gpu() - if config.model_configs[g.model_num].materials['maxpoles'] > 0: - g._initialise_dispersive_arrays_gpu() - - def _initialise_dispersive_arrays_gpu(self): + def htod_dispersive_arrays(self): """Initialise dispersive material coefficient arrays on GPU.""" import pycuda.gpuarray as gpuarray @@ -354,28 +342,16 @@ class CUDAGrid(FDTDGrid): super().reset_fields() - # Clear arrays for field components - self.initialise_grids_gpu() + # Copy arrays geometry and field component arrays + self.htod_geometry_arrays() + self.htod_field_arrays() + if config.model_configs[self.model_num].materials['maxpoles'] > 0: + self.htod_dispersive_arrays() - # Clear arrays for fields in PML + # Copy arrays for fields in PML for pml in self.pmls: pml.initialise_field_arrays_gpu() - def memory_check(self, snapsmemsize=0): - """Check if model can be run on specified GPU.""" - - memuse = super().mem_est_basic() - - if config.sim_config.general['cuda']: - if memuse - snapsmemsize > config.model_configs[self.model_num].cuda['gpu'].totalmem: - raise GeneralError(f"Memory (RAM) required ~{human_size(memuse)} exceeds {human_size(config.model_configs[self.model_num].cuda['gpu'].totalmem, a_kilobyte_is_1024_bytes=True)} detected on specified {config.model_configs[self.model_num].cuda['gpu'].deviceID} - {config.model_configs[self.model_num].cuda['gpu'].name} GPU!\n") - - # If the required memory for the model without the snapshots will - # fit on the GPU then transfer and store snaphots on host - if (snapsmemsize != 0 and memuse - snapsmemsize < - config.model_configs[self.model_num].cuda['gpu'].totalmem): - config.model_configs[self.model_num].cuda['snapsgpu2cpu'] = True - def dispersion_analysis(G): """Analysis of numerical dispersion (Taflove et al, 2005, p112) - diff --git a/gprMax/materials.py b/gprMax/materials.py index 4fafc21f..7dc8ac84 100644 --- a/gprMax/materials.py +++ b/gprMax/materials.py @@ -253,8 +253,8 @@ def process_materials(G): materialtext.append(str(material.numID)) materialtext.append(material.ID[:50] if len(material.ID) > 50 else material.ID) materialtext.append(material.type) - materialtext.append('{:g}'.format(material.er)) - materialtext.append('{:g}'.format(material.se)) + materialtext.append(f'{material.er:g}') + materialtext.append(f'{material.se:g}') if config.model_configs[G.model_num].materials['maxpoles'] > 0: if 'debye' in material.type: materialtext.append('\n'.join('{:g}'.format(deltaer) for deltaer in material.deltaer)) @@ -274,8 +274,8 @@ def process_materials(G): else: materialtext.extend(['', '', '', '', '']) - materialtext.append('{:g}'.format(material.mr)) - materialtext.append('{:g}'.format(material.sm)) + materialtext.append(f'{material.mr:g}') + materialtext.append(f'{material.sm:g}') materialtext.append(material.averagable) materialsdata.append(materialtext) diff --git a/gprMax/model_build_run.py b/gprMax/model_build_run.py index 39ea6025..44e69982 100644 --- a/gprMax/model_build_run.py +++ b/gprMax/model_build_run.py @@ -17,10 +17,9 @@ # along with gprMax. If not, see . import datetime -from importlib import import_module import itertools import logging -import os +from pathlib import Path import psutil import sys @@ -28,51 +27,28 @@ from colorama import init from colorama import Fore from colorama import Style init() -import cython import numpy as np from terminaltables import SingleTable from tqdm import tqdm -from pathlib import Path import gprMax.config as config -from .cuda.fields_updates import kernel_template_fields -from .cuda.snapshots import kernel_template_store_snapshot -from .cuda.source_updates import kernel_template_sources from .cython.yee_cell_build import build_electric_components from .cython.yee_cell_build import build_magnetic_components from .exceptions import GeneralError -from .fields_outputs import kernel_template_store_outputs from .fields_outputs import write_hdf5_outputfile -from .grid import FDTDGrid from .grid import dispersion_analysis -from .hash_cmds_geometry import process_geometrycmds -from .hash_cmds_file import process_python_include_code -from .hash_cmds_file import write_processed_file -from .hash_cmds_file import check_cmd_names from .hash_cmds_file import parse_hash_commands -from .hash_cmds_singleuse import process_singlecmds -from .hash_cmds_multiuse import process_multicmds from .materials import Material from .materials import process_materials -from .pml import CFS from .pml import build_pml -from .pml import pml_info -from .receivers import initialise_rx_arrays_gpu -from .receivers import get_rx_array_gpu -from .receivers import Rx +from .pml import print_pml_info from .scene import Scene from .snapshots import Snapshot -from .snapshots import initialise_snapshot_array_gpu -from .snapshots import get_snapshot_array_gpu from .solvers import create_solver -from .sources import initialise_src_arrays_gpu from .utilities import get_terminal_width from .utilities import human_size -from .utilities import mem_check -from .utilities import open_path_file -from .utilities import round32 +from .utilities import mem_check_all from .utilities import set_omp_threads -from .utilities import timer log = logging.getLogger(__name__) @@ -160,66 +136,38 @@ class ModelBuildRun: scene = self.build_scene() - # Combine available grids and check basic memory requirements + # Combine available grids and check memory requirements grids = [G] + G.subgrids - for grid in grids: - config.model_configs[G.model_num].mem_use += grid.mem_est_basic() - mem_check(config.model_configs[G.model_num].mem_use) - - # Set datatype for dispersive arrays if there are any dispersive materials. - if config.model_configs[G.model_num].materials['maxpoles'] != 0: - drudelorentz = any([m for m in G.materials if 'drude' in m.type or 'lorentz' in m.type]) - if drudelorentz: - config.model_configs[G.model_num].materials['dispersivedtype'] = config.sim_config.dtypes['complex'] - config.model_configs[G.model_num].materials['dispersiveCdtype'] = config.sim_config.dtypes['C_complex'] - else: - config.model_configs[G.model_num].materials['dispersivedtype'] = config.sim_config.dtypes['float_or_double'] - config.model_configs[G.model_num].materials['dispersiveCdtype'] = config.sim_config.dtypes['C_float_or_double'] - - # Update estimated memory (RAM) usage - config.model_configs[G.model_num].mem_use += G.mem_est_dispersive() - mem_check(config.model_configs[G.model_num].mem_use) - - # Check there is sufficient memory to store any snapshots - if G.snapshots: - snaps_mem = 0 - for snap in G.snapshots: - # 2 x required to account for electric and magnetic fields - snaps_mem += int(2 * snap.datasizefield) - config.model_configs[G.model_num].mem_use += snaps_mem - # Check if there is sufficient memory on host - mem_check(config.model_configs[G.model_num].mem_use) - if config.sim_config.general['cuda']: - mem_check_gpu_snaps(G.model_num, snaps_mem) - - log.info(f'\nMemory (RAM) required: ~{human_size(config.model_configs[G.model_num].mem_use)}') + total_mem = mem_check_all(grids) + log.info(f'\nMemory (RAM) required: ~{human_size(total_mem)}') # Build grids gridbuilders = [GridBuilder(grid) for grid in grids] for gb in gridbuilders: - log.info(pml_info(gb.grid)) + log.info(print_pml_info(gb.grid)) if not all(value == 0 for value in gb.grid.pmlthickness.values()): gb.build_pmls() gb.build_components() gb.tm_grid_update() gb.update_voltage_source_materials() + gb.grid.initialise_field_arrays() gb.grid.initialise_std_update_coeff_arrays() - if config.model_configs[G.model_num].materials['maxpoles'] != 0: + if config.model_configs[gb.grid.model_num].materials['maxpoles'] > 0: gb.grid.initialise_dispersive_arrays() + gb.grid.initialise_dispersive_update_coeff_array() gb.build_materials() - # Check to see if numerical dispersion might be a problem - for grid in grids: - results = dispersion_analysis(grid) + # Check to see if numerical dispersion might be a problem + results = dispersion_analysis(gb.grid) if results['error']: - log.warning(Fore.RED + f"\nNumerical dispersion analysis ({grid.name}) not carried out as {results['error']}" + Style.RESET_ALL) - elif results['N'] < config.model_configs[G.model_num].numdispersion['mingridsampling']: - raise GeneralError(f"\nNon-physical wave propagation in {grid.name} detected. Material '{results['material'].ID}' has wavelength sampled by {results['N']} cells, less than required minimum for physical wave propagation. Maximum significant frequency estimated as {results['maxfreq']:g}Hz") + log.warning(Fore.RED + f"\nNumerical dispersion analysis ({gb.grid.name}) not carried out as {results['error']}" + Style.RESET_ALL) + elif results['N'] < config.model_configs[gb.grid.model_num].numdispersion['mingridsampling']: + raise GeneralError(f"\nNon-physical wave propagation in {gb.grid.name} detected. Material '{results['material'].ID}' has wavelength sampled by {results['N']} cells, less than required minimum for physical wave propagation. Maximum significant frequency estimated as {results['maxfreq']:g}Hz") elif (results['deltavp'] and np.abs(results['deltavp']) > - config.model_configs[G.model_num].numdispersion['maxnumericaldisp']): - log.warning(Fore.RED + f"\n{grid.name} has potentially significant numerical dispersion. Estimated largest physical phase-velocity error is {results['deltavp']:.2f}% in material '{results['material'].ID}' whose wavelength sampled by {results['N']} cells. Maximum significant frequency estimated as {results['maxfreq']:g}Hz" + Style.RESET_ALL) + config.model_configs[gb.grid.model_num].numdispersion['maxnumericaldisp']): + log.warning(Fore.RED + f"\n{gb.grid.name} has potentially significant numerical dispersion. Estimated largest physical phase-velocity error is {results['deltavp']:.2f}% in material '{results['material'].ID}' whose wavelength sampled by {results['N']} cells. Maximum significant frequency estimated as {results['maxfreq']:g}Hz" + Style.RESET_ALL) elif results['deltavp']: - log.info(f"\nNumerical dispersion analysis ({grid.name}): estimated largest physical phase-velocity error is {results['deltavp']:.2f}% in material '{results['material'].ID}' whose wavelength sampled by {results['N']} cells. Maximum significant frequency estimated as {results['maxfreq']:g}Hz") + log.info(f"\nNumerical dispersion analysis ({gb.grid.name}): estimated largest physical phase-velocity error is {results['deltavp']:.2f}% in material '{results['material'].ID}' whose wavelength sampled by {results['N']} cells. Maximum significant frequency estimated as {results['maxfreq']:g}Hz") def reuse_geometry(self): # Reset iteration number diff --git a/gprMax/pml.py b/gprMax/pml.py index 4970f7e6..39058a45 100644 --- a/gprMax/pml.py +++ b/gprMax/pml.py @@ -182,6 +182,7 @@ class PML: xs, xf, ys, yf, zs, zf (float): Extent of the PML slab. """ + self.G = G self.ID = ID self.direction = direction self.xs = xs @@ -196,16 +197,16 @@ class PML: # Spatial discretisation and thickness if self.direction[0] == 'x': - self.d = G.dx + self.d = self.G.dx self.thickness = self.nx elif self.direction[0] == 'y': - self.d = G.dy + self.d = self.G.dy self.thickness = self.ny elif self.direction[0] == 'z': - self.d = G.dz + self.d = self.G.dz self.thickness = self.nz - self.CFS = G.cfs + self.CFS = self.G.cfs self.initialise_field_arrays() @@ -240,13 +241,12 @@ class PML: self.HPhi2 = np.zeros((len(self.CFS), self.nx, self.ny + 1, self.nz), dtype=config.sim_config.dtypes['float_or_double']) - def calculate_update_coeffs(self, er, mr, G): + def calculate_update_coeffs(self, er, mr): """Calculates electric and magnetic update coefficients for the PML. Args: er (float): Average permittivity of underlying material mr (float): Average permeability of underlying material - G (FDTDGrid): Holds essential parameters describing the model. """ self.ERA = np.zeros((len(self.CFS), self.thickness), @@ -268,70 +268,63 @@ class PML: for x, cfs in enumerate(self.CFS): if not cfs.sigma.max: - cfs.calculate_sigmamax(self.d, er, mr, G) + cfs.calculate_sigmamax(self.d, er, mr, self.G) Ealpha, Halpha = cfs.calculate_values(self.thickness, cfs.alpha) Ekappa, Hkappa = cfs.calculate_values(self.thickness, cfs.kappa) Esigma, Hsigma = cfs.calculate_values(self.thickness, cfs.sigma) # Define different parameters depending on PML formulation - if G.pmlformulation == 'HORIPML': + if self.G.pmlformulation == 'HORIPML': # HORIPML electric update coefficients - tmp = (2 * config.sim_config.em_consts['e0'] * Ekappa) + G.dt * (Ealpha * Ekappa + Esigma) - self.ERA[x, :] = (2 * config.sim_config.em_consts['e0'] + G.dt * Ealpha) / tmp + tmp = (2 * config.sim_config.em_consts['e0'] * Ekappa) + self.G.dt * (Ealpha * Ekappa + Esigma) + self.ERA[x, :] = (2 * config.sim_config.em_consts['e0'] + self.G.dt * Ealpha) / tmp self.ERB[x, :] = (2 * config.sim_config.em_consts['e0'] * Ekappa) / tmp - self.ERE[x, :] = ((2 * config.sim_config.em_consts['e0'] * Ekappa) - G.dt + self.ERE[x, :] = ((2 * config.sim_config.em_consts['e0'] * Ekappa) - self.G.dt * (Ealpha * Ekappa + Esigma)) / tmp - self.ERF[x, :] = (2 * Esigma * G.dt) / (Ekappa * tmp) + self.ERF[x, :] = (2 * Esigma * self.G.dt) / (Ekappa * tmp) # HORIPML magnetic update coefficients - tmp = (2 * config.sim_config.em_consts['e0'] * Hkappa) + G.dt * (Halpha * Hkappa + Hsigma) - self.HRA[x, :] = (2 * config.sim_config.em_consts['e0'] + G.dt * Halpha) / tmp + tmp = (2 * config.sim_config.em_consts['e0'] * Hkappa) + self.G.dt * (Halpha * Hkappa + Hsigma) + self.HRA[x, :] = (2 * config.sim_config.em_consts['e0'] + self.G.dt * Halpha) / tmp self.HRB[x, :] = (2 * config.sim_config.em_consts['e0'] * Hkappa) / tmp - self.HRE[x, :] = ((2 * config.sim_config.em_consts['e0'] * Hkappa) - G.dt + self.HRE[x, :] = ((2 * config.sim_config.em_consts['e0'] * Hkappa) - self.G.dt * (Halpha * Hkappa + Hsigma)) / tmp - self.HRF[x, :] = (2 * Hsigma * G.dt) / (Hkappa * tmp) + self.HRF[x, :] = (2 * Hsigma * self.G.dt) / (Hkappa * tmp) - elif G.pmlformulation == 'MRIPML': - tmp = 2 * config.sim_config.em_consts['e0'] + G.dt * Ealpha - self.ERA[x, :] = Ekappa + (G.dt * Esigma) / tmp + elif self.G.pmlformulation == 'MRIPML': + # MRIPML electric update coefficients + tmp = 2 * config.sim_config.em_consts['e0'] + self.G.dt * Ealpha + self.ERA[x, :] = Ekappa + (self.G.dt * Esigma) / tmp self.ERB[x, :] = (2 * config.sim_config.em_consts['e0']) / tmp - self.ERE[x, :] = ((2 * config.sim_config.em_consts['e0']) - G.dt * Ealpha) / tmp - self.ERF[x, :] = (2 * Esigma * G.dt) / tmp + self.ERE[x, :] = ((2 * config.sim_config.em_consts['e0']) - self.G.dt * Ealpha) / tmp + self.ERF[x, :] = (2 * Esigma * self.G.dt) / tmp # MRIPML magnetic update coefficients - tmp = 2 * config.sim_config.em_consts['e0'] + G.dt * Halpha - self.HRA[x, :] = Hkappa + (G.dt * Hsigma) / tmp + tmp = 2 * config.sim_config.em_consts['e0'] + self.G.dt * Halpha + self.HRA[x, :] = Hkappa + (self.G.dt * Hsigma) / tmp self.HRB[x, :] = (2 * config.sim_config.em_consts['e0']) / tmp - self.HRE[x, :] = ((2 * config.sim_config.sim_config.em_consts['e0']) - G.dt * Halpha) / tmp - self.HRF[x, :] = (2 * Hsigma * G.dt) / tmp + self.HRE[x, :] = ((2 * config.sim_config.sim_config.em_consts['e0']) - self.G.dt * Halpha) / tmp + self.HRF[x, :] = (2 * Hsigma * self.G.dt) / tmp - def update_electric(self, G): - """This functions updates electric field components with the PML correction. + def update_electric(self): + """This functions updates electric field components with the PML correction.""" - Args: - G (FDTDGrid): Holds essential parameters describing the model. - """ - - pmlmodule = 'gprMax.cython.pml_updates_electric_' + G.pmlformulation + pmlmodule = 'gprMax.cython.pml_updates_electric_' + self.G.pmlformulation func = getattr(import_module(pmlmodule), 'order' + str(len(self.CFS)) + '_' + self.direction) func(self.xs, self.xf, self.ys, self.yf, self.zs, self.zf, - config.sim_config.hostinfo['ompthreads'], G.updatecoeffsE, G.ID, - G.Ex, G.Ey, G.Ez, G.Hx, G.Hy, G.Hz, self.EPhi1, self.EPhi2, - self.ERA, self.ERB, self.ERE, self.ERF, self.d) + config.sim_config.hostinfo['ompthreads'], self.G.updatecoeffsE, self.G.ID, + self.G.Ex, self.G.Ey, self.G.Ez, self.G.Hx, self.G.Hy, self.G.Hz, + self.EPhi1, self.EPhi2, self.ERA, self.ERB, self.ERE, self.ERF, self.d) - def update_magnetic(self, G): - """This functions updates magnetic field components with the PML correction. + def update_magnetic(self): + """This functions updates magnetic field components with the PML correction.""" - Args: - G (FDTDGrid): Holds essential parameters describing the model. - """ - - pmlmodule = 'gprMax.cython.pml_updates_magnetic_' + G.pmlformulation + pmlmodule = 'gprMax.cython.pml_updates_magnetic_' + self.G.pmlformulation func = getattr(import_module(pmlmodule), 'order' + str(len(self.CFS)) + '_' + self.direction) func(self.xs, self.xf, self.ys, self.yf, self.zs, self.zf, - config.sim_config.hostinfo['ompthreads'], G.updatecoeffsH, G.ID, - G.Ex, G.Ey, G.Ez, G.Hx, G.Hy, G.Hz, self.HPhi1, self.HPhi2, - self.HRA, self.HRB, self.HRE, self.HRF, self.d) + config.sim_config.hostinfo['ompthreads'], self.G.updatecoeffsH, self.G.ID, + self.G.Ex, self.G.Ey, self.G.Ez, self.G.Hx, self.G.Hy, self.G.Hz, + self.HPhi1, self.HPhi2, self.HRA, self.HRB, self.HRE, self.HRF, self.d) class CUDAPML(PML): @@ -339,7 +332,7 @@ class CUDAPML(PML): solving on GPU using CUDA. """ - def initialise_field_arrays_gpu(self): + def htod_field_arrays(self): """Initialise PML field and coefficient arrays on GPU.""" import pycuda.gpuarray as gpuarray @@ -357,16 +350,12 @@ class CUDAPML(PML): self.HPhi1_gpu = gpuarray.to_gpu(self.HPhi1) self.HPhi2_gpu = gpuarray.to_gpu(self.HPhi2) - def set_blocks_per_grid(self, G): - """Set the blocks per grid size used for updating the PML field arrays on a GPU. - - Args: - G (FDTDGrid): Parameters describing a grid in a model. - """ - + def set_blocks_per_grid(self): + """Set the blocks per grid size used for updating the PML field arrays + on a GPU.""" self.bpg = (int(np.ceil(((self.EPhi1_gpu.shape[1] + 1) * (self.EPhi1_gpu.shape[2] + 1) * - (self.EPhi1_gpu.shape[3] + 1)) / G.tpb[0])), 1, 1) + (self.EPhi1_gpu.shape[3] + 1)) / self.G.tpb[0])), 1, 1) def get_update_funcs(self, kernelselectric, kernelsmagnetic): """Get update functions from PML kernels. @@ -381,14 +370,10 @@ class CUDAPML(PML): self.update_electric_gpu = kernelselectric.get_function('order' + str(len(self.CFS)) + '_' + self.direction) self.update_magnetic_gpu = kernelsmagnetic.get_function('order' + str(len(self.CFS)) + '_' + self.direction) - def update_electric(self, G): + def update_electric(self): """This functions updates electric field components with the PML correction on the GPU. - - Args: - G (FDTDGrid): Parameters describing a grid in a model. """ - self.update_electric_gpu(np.int32(self.xs), np.int32(self.xf), np.int32(self.ys), np.int32(self.yf), np.int32(self.zs), np.int32(self.zf), @@ -399,21 +384,18 @@ class CUDAPML(PML): np.int32(self.EPhi2_gpu.shape[2]), np.int32(self.EPhi2_gpu.shape[3]), np.int32(self.thickness), - G.ID_gpu.gpudata, - G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata, - G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata, + self.G.ID_gpu.gpudata, + self.G.Ex_gpu.gpudata, self.G.Ey_gpu.gpudata, self.G.Ez_gpu.gpudata, + self.G.Hx_gpu.gpudata, self.G.Hy_gpu.gpudata, self.G.Hz_gpu.gpudata, self.EPhi1_gpu.gpudata, self.EPhi2_gpu.gpudata, self.ERA_gpu.gpudata, self.ERB_gpu.gpudata, self.ERE_gpu.gpudata, self.ERF_gpu.gpudata, config.sim_config.dtypes['float_or_double'](self.d), - block=G.tpb, grid=self.bpg) + block=self.G.tpb, grid=self.bpg) - def update_magnetic(self, G): + def update_magnetic(self): """This functions updates magnetic field components with the PML correction on the GPU. - - Args: - G (FDTDGrid): Parameters describing a grid in a model. """ self.update_magnetic_gpu(np.int32(self.xs), np.int32(self.xf), np.int32(self.ys), np.int32(self.yf), @@ -425,16 +407,16 @@ class CUDAPML(PML): np.int32(self.HPhi2_gpu.shape[2]), np.int32(self.HPhi2_gpu.shape[3]), np.int32(self.thickness), - G.ID_gpu.gpudata, - G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata, - G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata, + self.G.ID_gpu.gpudata, + self.G.Ex_gpu.gpudata, self.G.Ey_gpu.gpudata, self.G.Ez_gpu.gpudata, + self.G.Hx_gpu.gpudata, self.G.Hy_gpu.gpudata, self.G.Hz_gpu.gpudata, self.HPhi1_gpu.gpudata, self.HPhi2_gpu.gpudata, self.HRA_gpu.gpudata, self.HRB_gpu.gpudata, self.HRE_gpu.gpudata, self.HRF_gpu.gpudata, config.sim_config.dtypes['float_or_double'](self.d), - block=G.tpb, grid=self.bpg) + block=self.G.tpb, grid=self.bpg) -def pml_info(G): +def print_pml_info(G): """Information about PMLs. Args: @@ -516,4 +498,4 @@ def build_pml(G, key, value): averageer = sumer / (G.nx * G.ny) averagemr = summr / (G.nx * G.ny) - pml.calculate_update_coeffs(averageer, averagemr, G) + pml.calculate_update_coeffs(averageer, averagemr) diff --git a/gprMax/receivers.py b/gprMax/receivers.py index 399a8959..cfb9a0db 100644 --- a/gprMax/receivers.py +++ b/gprMax/receivers.py @@ -42,7 +42,7 @@ class Rx: self.zcoordorigin = None -def initialise_rx_arrays_gpu(G): +def htod_rx_arrays(G): """Initialise arrays on GPU for receiver coordinates and to store field components for receivers. @@ -79,7 +79,7 @@ def initialise_rx_arrays_gpu(G): return rxcoords_gpu, rxs_gpu -def get_rx_array_gpu(rxs_gpu, rxcoords_gpu, G): +def dtoh_rx_array(rxs_gpu, rxcoords_gpu, G): """Copy output from receivers array used on GPU back to receiver objects. Args: diff --git a/gprMax/scene.py b/gprMax/scene.py index f8438847..2debddd2 100644 --- a/gprMax/scene.py +++ b/gprMax/scene.py @@ -59,7 +59,6 @@ class Scene: raise GeneralError('This object is unknown to gprMax') def process_subgrid_commands(self, subgrids): - # Check for subgrid user objects def func(obj): if isinstance(obj, SubGridUserBase): @@ -113,30 +112,28 @@ class Scene: self.process_cmds(cmds_unique, G) def create_internal_objects(self, G): + """API presents the user with UserObjects in order to build the internal + Rx(), Cylinder() etc... objects. This function essentially calls the + UserObject.create() function in the correct way. + """ - # fractal box commands have an additional nonuser object which - # process modifications + # Fractal box commands have an additional nonuser object which + # processes modifications fbb = FractalBoxBuilder() self.add(fbb) - # gprMax API presents the user with UserObjects in order to build - # the internal Rx(), Cylinder() etc... objects. This function - # essentially calls the UserObject.create() function in the correct - # way - - # Traverse all the user objects in the correct order and create them. + # Create pre-defined (built-in) materials create_built_in_materials(G) - # process commands that can onlyhave a single instance + # Process commands that can only have a single instance self.process_singlecmds(G) # Process main grid multiple commands self.process_cmds(self.multiple_cmds, G) - # Initialise an array for volumetric material IDs (solid), boolean - # arrays for specifying materials not to be averaged (rigid), - # an array for cell edge IDs (ID) - G.initialise_grids() + # Initialise geometry arrays for main and subgrids + for grid in [G] + G.subgrids: + grid.initialise_geometry_arrays() # Process the main grid geometry commands self.process_cmds(self.geometry_cmds, G, sort=False) diff --git a/gprMax/snapshots.py b/gprMax/snapshots.py index 0a968a3e..1aa410cd 100644 --- a/gprMax/snapshots.py +++ b/gprMax/snapshots.py @@ -170,7 +170,7 @@ class Snapshot: self.filehandle.close() -def initialise_snapshot_array_gpu(G): +def htod_snapshot_array(G): """Initialise array on GPU for to store field data for snapshots. Args: @@ -224,7 +224,7 @@ def initialise_snapshot_array_gpu(G): return snapEx_gpu, snapEy_gpu, snapEz_gpu, snapHx_gpu, snapHy_gpu, snapHz_gpu -def get_snapshot_array_gpu(snapEx_gpu, snapEy_gpu, snapEz_gpu, snapHx_gpu, snapHy_gpu, snapHz_gpu, i, snap): +def dtoh_snapshot_array(snapEx_gpu, snapEy_gpu, snapEz_gpu, snapHx_gpu, snapHy_gpu, snapHz_gpu, i, snap): """Copy snapshot array used on GPU back to snapshot objects and store in format for Paraview. Args: diff --git a/gprMax/sources.py b/gprMax/sources.py index 0c54a582..46147660 100644 --- a/gprMax/sources.py +++ b/gprMax/sources.py @@ -236,12 +236,12 @@ class MagneticDipole(Source): (1 / (G.dx * G.dy * G.dz))) -def initialise_src_arrays_gpu(sources, G): +def htod_src_arrays(sources, G): """Initialise arrays on GPU for source coordinates/polarisation, other source information, and source waveform values. Args: - sources (list): List of sources of one class, e.g. HertzianDipoles. + sources (list): List of sources of one type, e.g. HertzianDipole G (FDTDGrid): Holds essential parameters describing the model. Returns: diff --git a/gprMax/updates.py b/gprMax/updates.py index 16cf6052..726c4783 100644 --- a/gprMax/updates.py +++ b/gprMax/updates.py @@ -29,12 +29,12 @@ from .cython.fields_updates_normal import update_electric as update_electric_cpu from .cython.fields_updates_normal import update_magnetic as update_magnetic_cpu from .fields_outputs import store_outputs as store_outputs_cpu from .fields_outputs import kernel_template_store_outputs -from .receivers import initialise_rx_arrays_gpu -from .receivers import get_rx_array_gpu +from .receivers import htod_rx_arrays +from .receivers import dtoh_rx_array from .snapshots import Snapshot -from .snapshots import initialise_snapshot_array_gpu -from .snapshots import get_snapshot_array_gpu -from .sources import initialise_src_arrays_gpu +from .snapshots import htod_snapshot_array +from .snapshots import dtoh_snapshot_array +from .sources import htod_src_arrays from .utilities import round32 from .utilities import timer @@ -85,7 +85,7 @@ class CPUUpdates: def update_magnetic_pml(self): """Update magnetic field components with the PML correction.""" for pml in self.grid.pmls: - pml.update_magnetic(self.grid) + pml.update_magnetic() def update_magnetic_sources(self): """Update magnetic field components from sources.""" @@ -139,7 +139,7 @@ class CPUUpdates: def update_electric_pml(self): """Update electric field components with the PML correction.""" for pml in self.grid.pmls: - pml.update_electric(self.grid) + pml.update_electric() def update_electric_sources(self): """Update electric field components from sources - @@ -262,13 +262,17 @@ class CUDAUpdates: self.ctx = self.dev.make_context() # Initialise arrays on GPU, prepare kernels, and get kernel functions - self.set_field_kernels() - self.set_pml_kernels() - self.set_rx_kernel() - self.set_src_kernels() - self.set_snapshot_kernel() + self._set_field_kernels() + if self.grid.pmls: + self._set_pml_kernels() + if self.grid.rxs: + self._set_rx_kernel() + if self.grid.voltagesources + self.grid.hertziandipoles + self.grid.magneticdipoles: + self._set_src_kernels() + if self.grid.snapshots: + self._set_snapshot_kernel() - def set_field_kernels(self): + def _set_field_kernels(self): """Electric and magnetic field updates - prepare kernels, and get kernel functions. """ @@ -310,125 +314,127 @@ class CUDAUpdates: options=config.sim_config.cuda['nvcc_opts']) self.update_electric_gpu = kernels_fields.get_function("update_electric") self.update_magnetic_gpu = kernels_fields.get_function("update_magnetic") - if (self.grid.updatecoeffsE.nbytes + self.grid.updatecoeffsH.nbytes > config.model_configs[self.grid.model_num].cuda['gpu'].constmem): - raise GeneralError(log.exception(f"Too many materials in the model to fit onto constant memory of size {human_size(config.model_configs[self.grid.model_num].cuda['gpu'].constmem)} on {config.model_configs[self.grid.model_num].cuda['gpu'].deviceID} - {config.model_configs[self.grid.model_num].cuda['gpu'].name} GPU")) - self.copy_mat_coeffs(kernels_fields, kernels_fields) + self._copy_mat_coeffs(kernels_fields, kernels_fields) - # Electric and magnetic field updates - dispersive materials - get kernel functions and initialise array on GPU - if config.model_configs[self.grid.model_num].materials['maxpoles'] > 0: # If there are any dispersive materials (updates are split into two parts as they require present and updated electric field values). + # Electric and magnetic field updates - dispersive materials + # - get kernel functions and initialise array on GPU + # If there are any dispersive materials (updates are split into two + # parts as they require present and updated electric field values). + if config.model_configs[self.grid.model_num].materials['maxpoles'] > 0: self.dispersive_update_a = kernels_fields.get_function("update_electric_dispersive_A") self.dispersive_update_b = kernels_fields.get_function("update_electric_dispersive_B") - # Electric and magnetic field updates - set blocks per grid and initialise field arrays on GPU + # Electric and magnetic field updates - set blocks per grid and + # initialise field arrays on GPU self.grid.set_blocks_per_grid() - self.grid.initialise_grids_gpu() + self.grid.htod_geometry_arrays() + self.grid.htod_field_arrays() + if config.model_configs[self.grid.model_num].materials['maxpoles'] > 0: + self.grid.htod_dispersive_arrays() - def set_pml_kernels(self): + def _set_pml_kernels(self): """PMLS - prepare kernels and get kernel functions.""" - if self.grid.pmls: - pmlmodulelectric = 'gprMax.cuda.pml_updates_electric_' + self.grid.pmlformulation - kernelelectricfunc = getattr(import_module(pmlmodulelectric), - 'kernels_template_pml_electric_' + - self.grid.pmlformulation) - pmlmodulemagnetic = 'gprMax.cuda.pml_updates_magnetic_' + self.grid.pmlformulation - kernelmagneticfunc = getattr(import_module(pmlmodulemagnetic), - 'kernels_template_pml_magnetic_' + - self.grid.pmlformulation) - kernels_pml_electric = self.source_module(kernelelectricfunc.substitute( - REAL=config.sim_config.dtypes['C_float_or_double'], - N_updatecoeffsE=self.grid.updatecoeffsE.size, - NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1], - NX_FIELDS=self.grid.nx + 1, - NY_FIELDS=self.grid.ny + 1, - NZ_FIELDS=self.grid.nz + 1, - NX_ID=self.grid.ID.shape[1], - NY_ID=self.grid.ID.shape[2], - NZ_ID=self.grid.ID.shape[3]), - options=config.sim_config.cuda['nvcc_opts']) - kernels_pml_magnetic = self.source_module(kernelmagneticfunc.substitute( - REAL=config.sim_config.dtypes['C_float_or_double'], - N_updatecoeffsH=self.grid.updatecoeffsH.size, - NY_MATCOEFFS=self.grid.updatecoeffsH.shape[1], - NX_FIELDS=self.grid.nx + 1, - NY_FIELDS=self.grid.ny + 1, - NZ_FIELDS=self.grid.nz + 1, - NX_ID=self.grid.ID.shape[1], - NY_ID=self.grid.ID.shape[2], - NZ_ID=self.grid.ID.shape[3]), - options=config.sim_config.cuda['nvcc_opts']) - self.copy_mat_coeffs(kernels_pml_electric, kernels_pml_magnetic) - # Set block per grid, initialise arrays on GPU, and get kernel functions - for pml in self.grid.pmls: - pml.initialise_field_arrays_gpu() - pml.get_update_funcs(kernels_pml_electric, kernels_pml_magnetic) - pml.set_blocks_per_grid(self.grid) + pmlmodulelectric = 'gprMax.cuda.pml_updates_electric_' + self.grid.pmlformulation + kernelelectricfunc = getattr(import_module(pmlmodulelectric), + 'kernels_template_pml_electric_' + + self.grid.pmlformulation) + pmlmodulemagnetic = 'gprMax.cuda.pml_updates_magnetic_' + self.grid.pmlformulation + kernelmagneticfunc = getattr(import_module(pmlmodulemagnetic), + 'kernels_template_pml_magnetic_' + + self.grid.pmlformulation) + kernels_pml_electric = self.source_module(kernelelectricfunc.substitute( + REAL=config.sim_config.dtypes['C_float_or_double'], + N_updatecoeffsE=self.grid.updatecoeffsE.size, + NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1], + NX_FIELDS=self.grid.nx + 1, + NY_FIELDS=self.grid.ny + 1, + NZ_FIELDS=self.grid.nz + 1, + NX_ID=self.grid.ID.shape[1], + NY_ID=self.grid.ID.shape[2], + NZ_ID=self.grid.ID.shape[3]), + options=config.sim_config.cuda['nvcc_opts']) + kernels_pml_magnetic = self.source_module(kernelmagneticfunc.substitute( + REAL=config.sim_config.dtypes['C_float_or_double'], + N_updatecoeffsH=self.grid.updatecoeffsH.size, + NY_MATCOEFFS=self.grid.updatecoeffsH.shape[1], + NX_FIELDS=self.grid.nx + 1, + NY_FIELDS=self.grid.ny + 1, + NZ_FIELDS=self.grid.nz + 1, + NX_ID=self.grid.ID.shape[1], + NY_ID=self.grid.ID.shape[2], + NZ_ID=self.grid.ID.shape[3]), + options=config.sim_config.cuda['nvcc_opts']) + self._copy_mat_coeffs(kernels_pml_electric, kernels_pml_magnetic) + # Set block per grid, initialise arrays on GPU, and get kernel functions + for pml in self.grid.pmls: + pml.htod_field_arrays() + pml.set_blocks_per_grid() + pml.get_update_funcs(kernels_pml_electric, kernels_pml_magnetic) - def set_rx_kernel(self): + def _set_rx_kernel(self): """Receivers - initialise arrays on GPU, prepare kernel and get kernel function. """ - if self.grid.rxs: - self.rxcoords_gpu, self.rxs_gpu = initialise_rx_arrays_gpu(self.grid) - kernel_store_outputs = self.source_module(kernel_template_store_outputs.substitute( - REAL=config.sim_config.dtypes['C_float_or_double'], - NY_RXCOORDS=3, - NX_RXS=6, - NY_RXS=self.grid.iterations, - NZ_RXS=len(self.grid.rxs), - NX_FIELDS=self.grid.nx + 1, - NY_FIELDS=self.grid.ny + 1, - NZ_FIELDS=self.grid.nz + 1), - options=config.sim_config.cuda['nvcc_opts']) - self.store_outputs_gpu = kernel_store_outputs.get_function("store_outputs") + self.rxcoords_gpu, self.rxs_gpu = htod_rx_arrays(self.grid) + kernel_store_outputs = self.source_module(kernel_template_store_outputs.substitute( + REAL=config.sim_config.dtypes['C_float_or_double'], + NY_RXCOORDS=3, + NX_RXS=6, + NY_RXS=self.grid.iterations, + NZ_RXS=len(self.grid.rxs), + NX_FIELDS=self.grid.nx + 1, + NY_FIELDS=self.grid.ny + 1, + NZ_FIELDS=self.grid.nz + 1), + options=config.sim_config.cuda['nvcc_opts']) + self.store_outputs_gpu = kernel_store_outputs.get_function("store_outputs") - def set_src_kernels(self): + def _set_src_kernels(self): """Sources - initialise arrays on GPU, prepare kernel and get kernel function. """ - if self.grid.voltagesources + self.grid.hertziandipoles + self.grid.magneticdipoles: - kernels_sources = self.source_module(kernel_template_sources.substitute( - REAL=config.sim_config.dtypes['C_float_or_double'], - N_updatecoeffsE=self.grid.updatecoeffsE.size, - N_updatecoeffsH=self.grid.updatecoeffsH.size, - NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1], - NY_SRCINFO=4, - NY_SRCWAVES=self.grid.iterations, - NX_FIELDS=self.grid.nx + 1, - NY_FIELDS=self.grid.ny + 1, - NZ_FIELDS=self.grid.nz + 1, - NX_ID=self.grid.ID.shape[1], - NY_ID=self.grid.ID.shape[2], - NZ_ID=self.grid.ID.shape[3]), - options=config.sim_config.cuda['nvcc_opts']) - self.copy_mat_coeffs(kernels_sources, kernels_sources) - if self.grid.hertziandipoles: - self.srcinfo1_hertzian_gpu, self.srcinfo2_hertzian_gpu, self.srcwaves_hertzian_gpu = initialise_src_arrays_gpu(self.grid.hertziandipoles, self.grid) - self.update_hertzian_dipole_gpu = kernels_sources.get_function("update_hertzian_dipole") - if self.grid.magneticdipoles: - self.srcinfo1_magnetic_gpu, self.srcinfo2_magnetic_gpu, self.srcwaves_magnetic_gpu = initialise_src_arrays_gpu(self.grid.magneticdipoles, self.grid) - self.update_magnetic_dipole_gpu = kernels_sources.get_function("update_magnetic_dipole") - if self.grid.voltagesources: - self.srcinfo1_voltage_gpu, self.srcinfo2_voltage_gpu, self.srcwaves_voltage_gpu = initialise_src_arrays_gpu(self.grid.voltagesources, self.grid) - self.update_voltage_source_gpu = kernels_sources.get_function("update_voltage_source") + kernels_sources = self.source_module(kernel_template_sources.substitute( + REAL=config.sim_config.dtypes['C_float_or_double'], + N_updatecoeffsE=self.grid.updatecoeffsE.size, + N_updatecoeffsH=self.grid.updatecoeffsH.size, + NY_MATCOEFFS=self.grid.updatecoeffsE.shape[1], + NY_SRCINFO=4, + NY_SRCWAVES=self.grid.iterations, + NX_FIELDS=self.grid.nx + 1, + NY_FIELDS=self.grid.ny + 1, + NZ_FIELDS=self.grid.nz + 1, + NX_ID=self.grid.ID.shape[1], + NY_ID=self.grid.ID.shape[2], + NZ_ID=self.grid.ID.shape[3]), + options=config.sim_config.cuda['nvcc_opts']) + self._copy_mat_coeffs(kernels_sources, kernels_sources) + if self.grid.hertziandipoles: + self.srcinfo1_hertzian_gpu, self.srcinfo2_hertzian_gpu, self.srcwaves_hertzian_gpu = htod_src_arrays(self.grid.hertziandipoles, self.grid) + self.update_hertzian_dipole_gpu = kernels_sources.get_function("update_hertzian_dipole") + if self.grid.magneticdipoles: + self.srcinfo1_magnetic_gpu, self.srcinfo2_magnetic_gpu, self.srcwaves_magnetic_gpu = htod_src_arrays(self.grid.magneticdipoles, self.grid) + self.update_magnetic_dipole_gpu = kernels_sources.get_function("update_magnetic_dipole") + if self.grid.voltagesources: + self.srcinfo1_voltage_gpu, self.srcinfo2_voltage_gpu, self.srcwaves_voltage_gpu = htod_src_arrays(self.grid.voltagesources, self.grid) + self.update_voltage_source_gpu = kernels_sources.get_function("update_voltage_source") - def set_snapshot_kernel(self): + def _set_snapshot_kernel(self): """Snapshots - initialise arrays on GPU, prepare kernel and get kernel function. """ - if self.grid.snapshots: - self.snapEx_gpu, self.snapEy_gpu, self.snapEz_gpu, self.snapHx_gpu, self.snapHy_gpu, self.snapHz_gpu = initialise_snapshot_array_gpu(self.grid) - kernel_store_snapshot = self.source_module(kernel_template_store_snapshot.substitute( - REAL=config.sim_config.dtypes['C_float_or_double'], - NX_SNAPS=Snapshot.nx_max, - NY_SNAPS=Snapshot.ny_max, - NZ_SNAPS=Snapshot.nz_max, - NX_FIELDS=self.grid.nx + 1, - NY_FIELDS=self.grid.ny + 1, - NZ_FIELDS=self.grid.nz + 1), - options=config.sim_config.cuda['nvcc_opts']) - self.store_snapshot_gpu = kernel_store_snapshot.get_function("store_snapshot") + self.snapEx_gpu, self.snapEy_gpu, self.snapEz_gpu, + self.snapHx_gpu, self.snapHy_gpu, self.snapHz_gpu = htod_snapshot_array(self.grid) + kernel_store_snapshot = self.source_module(kernel_template_store_snapshot.substitute( + REAL=config.sim_config.dtypes['C_float_or_double'], + NX_SNAPS=Snapshot.nx_max, + NY_SNAPS=Snapshot.ny_max, + NZ_SNAPS=Snapshot.nz_max, + NX_FIELDS=self.grid.nx + 1, + NY_FIELDS=self.grid.ny + 1, + NZ_FIELDS=self.grid.nz + 1), + options=config.sim_config.cuda['nvcc_opts']) + self.store_snapshot_gpu = kernel_store_snapshot.get_function("store_snapshot") - def copy_mat_coeffs(self, kernelE, kernelH): + def _copy_mat_coeffs(self, kernelE, kernelH): """Copy material coefficient arrays to constant memory of GPU (must be <64KB). @@ -436,6 +442,12 @@ class CUDAUpdates: kernelE (kernel): electric field kernel. kernelH (kernel): magnetic field kernel. """ + + # Check if coefficient arrays will fit on constant memory of GPU + if (self.grid.updatecoeffsE.nbytes + self.grid.updatecoeffsH.nbytes + > config.model_configs[self.grid.model_num].cuda['gpu'].constmem): + raise GeneralError(f"Too many materials in the model to fit onto constant memory of size {human_size(config.model_configs[self.grid.model_num].cuda['gpu'].constmem)} on {config.model_configs[self.grid.model_num].cuda['gpu'].deviceID} - {config.model_configs[self.grid.model_num].cuda['gpu'].name} GPU") + updatecoeffsE = kernelE.get_global('updatecoeffsE')[0] updatecoeffsH = kernelH.get_global('updatecoeffsH')[0] self.drv.memcpy_htod(updatecoeffsE, self.grid.updatecoeffsE) @@ -492,52 +504,51 @@ class CUDAUpdates: block=Snapshot.tpb, grid=Snapshot.bpg) if config.model_configs[self.grid.model_num].cuda['snapsgpu2cpu']: - gpu_get_snapshot_array(self.grid.snapEx_gpu.get(), - self.grid.snapEy_gpu.get(), - self.grid.snapEz_gpu.get(), - self.grid.snapHx_gpu.get(), - self.grid.snapHy_gpu.get(), - self.grid.snapHz_gpu.get(), - 0, - snap) + dtoh_snapshot_array(self.grid.snapEx_gpu.get(), + self.grid.snapEy_gpu.get(), + self.grid.snapEz_gpu.get(), + self.grid.snapHx_gpu.get(), + self.grid.snapHy_gpu.get(), + self.grid.snapHz_gpu.get(), + 0, snap) def update_magnetic(self): """Update magnetic field components.""" self.update_magnetic_gpu(np.int32(self.grid.nx), np.int32(self.grid.ny), np.int32(self.grid.nz), - self.grid.ID_gpu, - self.grid.Hx_gpu, - self.grid.Hy_gpu, - self.grid.Hz_gpu, - self.grid.Ex_gpu, - self.grid.Ey_gpu, - self.grid.Ez_gpu, + self.grid.ID_gpu.gpudata, + self.grid.Hx_gpu.gpudata, + self.grid.Hy_gpu.gpudata, + self.grid.Hz_gpu.gpudata, + self.grid.Ex_gpu.gpudata, + self.grid.Ey_gpu.gpudata, + self.grid.Ez_gpu.gpudata, block=self.grid.tpb, grid=self.grid.bpg) def update_magnetic_pml(self): """Update magnetic field components with the PML correction.""" for pml in self.grid.pmls: - pml.update_magnetic(self.grid) + pml.update_magnetic() def update_magnetic_sources(self): """Update magnetic field components from sources.""" if self.grid.magneticdipoles: self.update_magnetic_dipole_gpu(np.int32(len(self.grid.magneticdipoles)), - np.int32(self.grid.iteration), - config.sim_config.dtypes['float_or_double'](self.grid.dx), - config.sim_config.dtypes['float_or_double'](self.grid.dy), - config.sim_config.dtypes['float_or_double'](self.grid.dz), - self.srcinfo1_magnetic_gpu.gpudata, - self.srcinfo2_magnetic_gpu.gpudata, - self.srcwaves_magnetic_gpu.gpudata, - self.grid.ID_gpu, - self.grid.Hx_gpu, - self.grid.Hy_gpu, - self.grid.Hz_gpu, - block=(1, 1, 1), - grid=(round32(len(self.grid.magneticdipoles)), 1, 1)) + np.int32(self.grid.iteration), + config.sim_config.dtypes['float_or_double'](self.grid.dx), + config.sim_config.dtypes['float_or_double'](self.grid.dy), + config.sim_config.dtypes['float_or_double'](self.grid.dz), + self.srcinfo1_magnetic_gpu.gpudata, + self.srcinfo2_magnetic_gpu.gpudata, + self.srcwaves_magnetic_gpu.gpudata, + self.grid.ID_gpu.gpudata, + self.grid.Hx_gpu.gpudata, + self.grid.Hy_gpu.gpudata, + self.grid.Hz_gpu.gpudata, + block=(1, 1, 1), + grid=(round32(len(self.grid.magneticdipoles)), 1, 1)) def update_electric_a(self): """Update electric field components.""" @@ -546,13 +557,13 @@ class CUDAUpdates: self.update_electric_gpu(np.int32(self.grid.nx), np.int32(self.grid.ny), np.int32(self.grid.nz), - self.grid.ID_gpu, - self.grid.Ex_gpu, - self.grid.Ey_gpu, - self.grid.Ez_gpu, - self.grid.Hx_gpu, - self.grid.Hy_gpu, - self.grid.Hz_gpu, + self.grid.ID_gpu.gpudata, + self.grid.Ex_gpu.gpudata, + self.grid.Ey_gpu.gpudata, + self.grid.Ez_gpu.gpudata, + self.grid.Hx_gpu.gpudata, + self.grid.Hy_gpu.gpudata, + self.grid.Hz_gpu.gpudata, block=self.grid.tpb, grid=self.grid.bpg) @@ -564,23 +575,23 @@ class CUDAUpdates: np.int32(self.grid.nz), np.int32(config.model_configs[self.grid.model_num].materials['maxpoles']), self.grid.updatecoeffsdispersive_gpu.gpudata, - self.grid.Tx_gpu, - self.grid.Ty_gpu, - self.grid.Tz_gpu, - self.grid.ID_gpu, - self.grid.Ex_gpu, - self.grid.Ey_gpu, - self.grid.Ez_gpu, - self.grid.Hx_gpu, - self.grid.Hy_gpu, - self.grid.Hz_gpu, + self.grid.Tx_gpu.gpudata, + self.grid.Ty_gpu.gpudata, + self.grid.Tz_gpu.gpudata, + self.grid.ID_gpu.gpudata, + self.grid.Ex_gpu.gpudata, + self.grid.Ey_gpu.gpudata, + self.grid.Ez_gpu.gpudata, + self.grid.Hx_gpu.gpudata, + self.grid.Hy_gpu.gpudata, + self.grid.Hz_gpu.gpudata, block=self.grid.tpb, grid=self.grid.bpg) def update_electric_pml(self): """Update electric field components with the PML correction.""" for pml in self.grid.pmls: - pml.update_electric(self.grid) + pml.update_electric() def update_electric_sources(self): """Update electric field components from sources - @@ -595,10 +606,10 @@ class CUDAUpdates: self.srcinfo1_voltage_gpu.gpudata, self.srcinfo2_voltage_gpu.gpudata, self.srcwaves_voltage_gpu.gpudata, - self.grid.ID_gpu, - self.grid.Ex_gpu, - self.grid.Ey_gpu, - self.grid.Ez_gpu, + self.grid.ID_gpu.gpudata, + self.grid.Ex_gpu.gpudata, + self.grid.Ey_gpu.gpudata, + self.grid.Ez_gpu.gpudata, block=(1, 1, 1), grid=(round32(len(self.grid.voltagesources)), 1, 1)) @@ -611,10 +622,10 @@ class CUDAUpdates: self.srcinfo1_hertzian_gpu.gpudata, self.srcinfo2_hertzian_gpu.gpudata, self.srcwaves_hertzian_gpu.gpudata, - self.grid.ID_gpu, - self.grid.Ex_gpu, - self.grid.Ey_gpu, - self.grid.Ez_gpu, + self.grid.ID_gpu.gpudata, + self.grid.Ex_gpu.gpudata, + self.grid.Ey_gpu.gpudata, + self.grid.Ez_gpu.gpudata, block=(1, 1, 1), grid=(round32(len(self.grid.hertziandipoles)), 1, 1)) @@ -627,19 +638,19 @@ class CUDAUpdates: updated after the electric field has been updated by the PML and source updates. """ - if config.model_configs[self.grid.model_num].materials['maxpoles'] != 0: + if config.model_configs[self.grid.model_num].materials['maxpoles'] > 0: self.dispersive_update_b(np.int32(self.grid.nx), np.int32(self.grid.ny), np.int32(self.grid.nz), np.int32(config.model_configs[self.grid.model_num].materials['maxpoles']), self.grid.updatecoeffsdispersive_gpu.gpudata, - self.grid.Tx_gpu, - self.grid.Ty_gpu, - self.grid.Tz_gpu, - self.grid.ID_gpu, - self.grid.Ex_gpu, - self.grid.Ey_gpu, - self.grid.Ez_gpu, + self.grid.Tx_gpu.gpudata, + self.grid.Ty_gpu.gpudata, + self.grid.Tz_gpu.gpudata, + self.grid.ID_gpu.gpudata, + self.grid.Ex_gpu.gpudata, + self.grid.Ey_gpu.gpudata, + self.grid.Ez_gpu.gpudata, block=self.grid.tpb, grid=self.grid.bpg) @@ -674,20 +685,20 @@ class CUDAUpdates: """Copy data from GPU back to CPU to save to file(s).""" # Copy output from receivers array back to correct receiver objects if self.grid.rxs: - get_rx_array_gpu(self.rxs_gpu.get(), - self.rxcoords_gpu.get(), - self.grid) + dtoh_rx_array(self.rxs_gpu.get(), + self.rxcoords_gpu.get(), + self.grid) # Copy data from any snapshots back to correct snapshot objects if self.grid.snapshots and not config.model_configs[self.grid.model_num].cuda['snapsgpu2cpu']: for i, snap in enumerate(self.grid.snapshots): - get_snapshot_array_gpu(self.snapEx_gpu.get(), - self.snapEy_gpu.get(), - self.snapEz_gpu.get(), - self.snapHx_gpu.get(), - self.snapHy_gpu.get(), - self.snapHz_gpu.get(), - i, snap) + dtoh_snapshot_arra(self.snapEx_gpu.get(), + self.snapEy_gpu.get(), + self.snapEz_gpu.get(), + self.snapHx_gpu.get(), + self.snapHy_gpu.get(), + self.snapHz_gpu.get(), + i, snap) def cleanup(self): """Cleanup GPU context.""" diff --git a/gprMax/utilities.py b/gprMax/utilities.py index d049ac24..0cdf7485 100644 --- a/gprMax/utilities.py +++ b/gprMax/utilities.py @@ -118,7 +118,8 @@ def round_value(value, decimalplaces=0): Args: value (float): Number to round. - decimalplaces (int): Number of decimal places of float to represent rounded value. + decimalplaces (int): Number of decimal places of float to represent + rounded value. Returns: rounded (int/float): Rounded value. @@ -143,7 +144,7 @@ def round32(value): def fft_power(waveform, dt): """Calculate a FFT of the given waveform of amplitude values; - converted to decibels and shifted so that maximum power is 0dB + converted to decibels and shifted so that maximum power is 0dB Args: waveform (ndarray): time domain waveform @@ -200,7 +201,8 @@ def get_host_info(): Returns: hostinfo (dict): Manufacturer and model of machine; description of CPU - type, speed, cores; RAM; name and version of operating system. + type, speed, cores; RAM; name and + version of operating system. """ # Default to 'unknown' if any of the detection fails @@ -381,7 +383,7 @@ def set_omp_threads(nthreads=None): return nthreads -def mem_check(mem): +def mem_check_host(mem): """Check if the required amount of memory (RAM) is available on host. Args: @@ -391,22 +393,72 @@ def mem_check(mem): raise GeneralError(f"Memory (RAM) required ~{human_size(mem)} exceeds {human_size(config.sim_config.hostinfo['ram'], a_kilobyte_is_1024_bytes=True)} detected!\n") -def mem_check_gpu_snaps(model_num, snaps_mem): +def mem_check_gpu_snaps(model_num, total_mem, snaps_mem): """Check if the required amount of memory (RAM) for all snapshots can fit on specified GPU. Args: model_num (int): Model number. + total_mem (int): Total memory required for model (bytes). snaps_mem (int): Memory required for all snapshots (bytes). """ - if config.model_configs[G.model_num].mem_use - snaps_mem > gpu.totalmem: - raise GeneralError(f'Memory (RAM) required ~{human_size(config.model_configs[G.model_num].mem_use)} exceeds {human_size(config.model_configs[G.model_num].gpu.totalmem, a_kilobyte_is_1024_bytes=True)} detected on specified {config.model_configs[G.model_num].gpu.deviceID} - {config.model_configs[G.model_num].gpu.name} GPU!\n') + if total_mem - snaps_mem > config.model_configs[model_num].cuda['gpu'].totalmem: + raise GeneralError(f"Memory (RAM) required ~{human_size(total_mem)} exceeds {human_size(config.model_configs[model_num].cuda['gpu'].totalmem, a_kilobyte_is_1024_bytes=True)} detected on specified {config.model_configs[model_num].cuda['gpu'].deviceID} - {config.model_configs[model_num].cuda['gpu'].name} GPU!\n") # If the required memory without the snapshots will fit on the GPU then # transfer and store snaphots on host - if (snaps_mem != 0 and config.model_configs[G.model_num].mem_use - - snaps_mem < config.model_configs[G.model_num].gpu.totalmem): - config.model_configs[G.model_num].cuda['snapsgpu2cpu'] = True + if snaps_mem != 0 and total_mem - snaps_mem < config.model_configs[model_num].cuda['gpu'].totalmem: + config.model_configs[model_num].cuda['snapsgpu2cpu'] = True + + +def mem_check_all(grids): + """Check memory for all grids, including for any dispersive materials, + snapshots, and if solver with GPU, whether snapshots will fit on GPU + memory. + + Args: + grids (list): FDTDGrid objects. + + Returns: + total_mem (int): Total memory required for all grids. + """ + + total_mem = 0 + total_snaps_mem = 0 + + for grid in grids: + config.model_configs[grid.model_num].mem_use += grid.mem_est_basic() + + # Set datatype for dispersive arrays if there are any dispersive materials. + if config.model_configs[grid.model_num].materials['maxpoles'] != 0: + drudelorentz = any([m for m in grid.materials if 'drude' in m.type or 'lorentz' in m.type]) + if drudelorentz: + config.model_configs[grid.model_num].materials['dispersivedtype'] = config.sim_config.dtypes['complex'] + config.model_configs[grid.model_num].materials['dispersiveCdtype'] = config.sim_config.dtypes['C_complex'] + else: + config.model_configs[grid.model_num].materials['dispersivedtype'] = config.sim_config.dtypes['float_or_double'] + config.model_configs[grid.model_num].materials['dispersiveCdtype'] = config.sim_config.dtypes['C_float_or_double'] + + # Update estimated memory (RAM) usage + config.model_configs[grid.model_num].mem_use += grid.mem_est_dispersive() + + # Calculate snapshot memory + if grid.snapshots: + for snap in grid.snapshots: + # 2 x required to account for electric and magnetic fields + config.model_configs[grid.model_num].mem_use += int(2 * snap.datasizefield) + total_snaps_mem += int(2 * snap.datasizefield) + + total_mem += config.model_configs[grid.model_num].mem_use + + # Check if there is sufficient memory on host + mem_check_host(total_mem) + + # Check if there is sufficient memory for any snapshots on GPU + if config.sim_config.general['cuda']: + mem_check_gpu_snaps(grid.model_num, total_mem, total_snaps_mem) + + return total_mem class GPU: