From 7078bbebf33cf080da088c0d94995e67a3a40baa Mon Sep 17 00:00:00 2001
From: nmannall <n.mannall@epcc.ed.ac.uk>
Date: Tue, 2 Jul 2024 17:11:25 +0100
Subject: [PATCH] Fix incorrect PML material property averages (MPI)

---
 gprMax/cython/pml_build.pyx |  36 +++++++++++
 gprMax/grid/fdtd_grid.py    |   6 +-
 gprMax/grid/mpi_grid.py     |  10 ++-
 gprMax/pml.py               | 125 +++++++++++++++++++++++++++++++-----
 4 files changed, 157 insertions(+), 20 deletions(-)

diff --git a/gprMax/cython/pml_build.pyx b/gprMax/cython/pml_build.pyx
index 10edac00..c23b6f7b 100644
--- a/gprMax/cython/pml_build.pyx
+++ b/gprMax/cython/pml_build.pyx
@@ -60,3 +60,39 @@ cpdef pml_average_er_mr(
     averagemr = summr / (n1 * n2)
 
     return averageer, averagemr
+
+cpdef pml_sum_er_mr(
+    int n1,
+    int n2,
+    int nthreads,
+    np.uint32_t[:, :] solid,
+    float_or_double[::1] ers,
+    float_or_double[::1] mrs
+):
+    """Calculates average permittivity and permeability in PML slab (based on
+        underlying material er and mr from solid array). Used to build PML.
+
+    Args:
+        n1, n2: ints for PML size in cells perpendicular to thickness direction.
+        nthreads: int for number of threads to use.
+        solid: memoryviews to access solid array.
+        ers, mrs: memoryviews to access arrays containing permittivity and
+                    permeability.
+
+    Returns:
+        averageer, averagemr: floats for average permittivity and permeability
+                                in PML slab.
+    """
+
+    cdef Py_ssize_t m, n
+    cdef int numID
+    # Sum and average of relative permittivities and permeabilities in PML slab
+    cdef float sumer, summr, averageer, averagemr
+
+    for m in prange(n1, nogil=True, schedule='static', num_threads=nthreads):
+        for n in range(n2):
+            numID = solid[m ,n]
+            sumer += ers[numID]
+            summr += mrs[numID]
+
+    return sumer, summr
diff --git a/gprMax/grid/fdtd_grid.py b/gprMax/grid/fdtd_grid.py
index 19d9e056..bd5105a2 100644
--- a/gprMax/grid/fdtd_grid.py
+++ b/gprMax/grid/fdtd_grid.py
@@ -152,7 +152,7 @@ class FDTDGrid:
             self.initialise_dispersive_update_coeff_array()
         self._build_materials()
 
-    def _build_pmls(self) -> None:
+    def _build_pmls(self, build_pml_func=build_pml) -> None:
         pbar = tqdm(
             total=sum(1 for value in self.pmls["thickness"].values() if value > 0),
             desc=f"Building PML boundaries [{self.name}]",
@@ -162,7 +162,9 @@ class FDTDGrid:
         )
         for pml_id, thickness in self.pmls["thickness"].items():
             if thickness > 0:
-                build_pml(self, pml_id, thickness)
+                # TODO: Consider making this a method of FDTDGrid (that
+                # can be overriden in MPIGrid)
+                build_pml_func(self, pml_id, thickness)
                 pbar.update()
         pbar.close()
 
diff --git a/gprMax/grid/mpi_grid.py b/gprMax/grid/mpi_grid.py
index 3d69f6ed..8ac54f4b 100644
--- a/gprMax/grid/mpi_grid.py
+++ b/gprMax/grid/mpi_grid.py
@@ -27,6 +27,7 @@ from mpi4py import MPI
 from numpy import ndarray
 
 from gprMax.grid.fdtd_grid import FDTDGrid
+from gprMax.pml import build_pml_mpi
 from gprMax.receivers import Rx
 from gprMax.sources import Source
 
@@ -66,6 +67,7 @@ class MPIGrid(FDTDGrid):
 
         self.lower_extent: npt.NDArray[np.intc] = np.zeros(3, dtype=int)
         self.upper_extent: npt.NDArray[np.intc] = np.zeros(3, dtype=int)
+        self.negative_halo_offset: npt.NDArray[np.intc] = np.zeros(3, dtype=int)
         self.global_size: npt.NDArray[np.intc] = np.zeros(3, dtype=int)
 
         self.neighbours = np.full((3, 2), -1, dtype=int)
@@ -260,6 +262,9 @@ class MPIGrid(FDTDGrid):
         self._halo_swap_array(self.Hy)
         self._halo_swap_array(self.Hz)
 
+    def _build_pmls(self):
+        super()._build_pmls(build_pml_func=build_pml_mpi)
+
     def build(self):
         if any(self.global_size + 1 < self.mpi_tasks):
             logger.error(
@@ -329,6 +334,7 @@ class MPIGrid(FDTDGrid):
         # a positive halo will always exist. Grids not needing a
         # positive halo, still need the extra size as that makes the
         # global grid on the whole one larger than the user dimensions.
-        self.size += self.neighbours[:, 0] >= 0
-        self.lower_extent -= self.neighbours[:, 0] >= 0
+        self.negative_halo_offset = self.neighbours[:, 0] >= 0
+        self.size += self.negative_halo_offset
+        self.lower_extent -= self.negative_halo_offset
         self.upper_extent = self.lower_extent + self.size
diff --git a/gprMax/pml.py b/gprMax/pml.py
index 781e4584..f6e641b5 100644
--- a/gprMax/pml.py
+++ b/gprMax/pml.py
@@ -24,7 +24,7 @@ from mpi4py import MPI
 
 import gprMax.config as config
 
-from .cython.pml_build import pml_average_er_mr
+from .cython.pml_build import pml_average_er_mr, pml_sum_er_mr
 
 logger = logging.getLogger(__name__)
 
@@ -795,22 +795,115 @@ def build_pml(G, pml_ID, thickness):
             G.nx, G.ny, config.get_model_config().ompthreads, G.solid[:, :, pml.zs], ers, mrs
         )
 
-    # TODO: Use Grid type (currently will cause a circular dependency)
-    if config.sim_config.mpi:
-        if pml_ID[0] == "x":
-            comm = G.x_comm
-        elif pml_ID[0] == "y":
-            comm = G.y_comm
-        elif pml_ID[0] == "z":
-            comm = G.z_comm
+    pml.calculate_update_coeffs(averageer, averagemr)
+    G.pmls["slabs"].append(pml)
 
-        comm.barrier()
-        print(f"[Rank {G.rank}] pml_ID: {pml_ID}, er: {averageer}, mr: {averagemr}")
-        averageer = comm.allreduce(averageer, MPI.SUM) / comm.size
-        averagemr = comm.allreduce(averagemr, MPI.SUM) / comm.size
-        print(f"[Rank {G.rank}] NEW pml_ID: {pml_ID}, er: {averageer}, mr {averagemr}")
-    else:
-        print(f"pml_ID: {pml_ID}, er: {averageer}, mr {averagemr}")
+
+def build_pml_mpi(G, pml_ID, thickness):
+    """Builds instances of the PML and calculates the initial parameters and
+        coefficients including setting profile (based on underlying material
+        er and mr from solid array).
+
+    Args:
+        G: FDTDGrid class describing a grid in a model.
+        pml_ID: string identifier of PML slab.
+        thickness: int with thickness of PML slab in cells.
+    """
+
+    # Arrays to hold values of permittivity and permeability (avoids accessing
+    # Material class in Cython.)
+    ers = np.zeros(len(G.materials))
+    mrs = np.zeros(len(G.materials))
+
+    for i, m in enumerate(G.materials):
+        ers[i] = m.er
+        mrs[i] = m.mr
+
+    if config.sim_config.general["solver"] == "cpu":
+        pml_type = PML
+    elif config.sim_config.general["solver"] == "cuda":
+        pml_type = CUDAPML
+    elif config.sim_config.general["solver"] == "opencl":
+        pml_type = OpenCLPML
+
+    if pml_ID == "x0":
+        pml = pml_type(
+            G, ID=pml_ID, direction="xminus", xs=0, xf=thickness, ys=0, yf=G.ny, zs=0, zf=G.nz
+        )
+    elif pml_ID == "xmax":
+        pml = pml_type(
+            G,
+            ID=pml_ID,
+            direction="xplus",
+            xs=G.nx - thickness,
+            xf=G.nx,
+            ys=0,
+            yf=G.ny,
+            zs=0,
+            zf=G.nz,
+        )
+    elif pml_ID == "y0":
+        pml = pml_type(
+            G, ID=pml_ID, direction="yminus", xs=0, xf=G.nx, ys=0, yf=thickness, zs=0, zf=G.nz
+        )
+    elif pml_ID == "ymax":
+        pml = pml_type(
+            G,
+            ID=pml_ID,
+            direction="yplus",
+            xs=0,
+            xf=G.nx,
+            ys=G.ny - thickness,
+            yf=G.ny,
+            zs=0,
+            zf=G.nz,
+        )
+    elif pml_ID == "z0":
+        pml = pml_type(
+            G, ID=pml_ID, direction="zminus", xs=0, xf=G.nx, ys=0, yf=G.ny, zs=0, zf=thickness
+        )
+    elif pml_ID == "zmax":
+        pml = pml_type(
+            G,
+            ID=pml_ID,
+            direction="zplus",
+            xs=0,
+            xf=G.nx,
+            ys=0,
+            yf=G.ny,
+            zs=G.nz - thickness,
+            zf=G.nz,
+        )
+
+    # Need to account for the negative halo (remove it) to avoid double
+    # counting. The solid array does not have a positive halo so we
+    # don't need to consider that.
+    if pml_ID[0] == "x":
+        o1 = G.negative_halo_offset[1]
+        o2 = G.negative_halo_offset[2]
+        n1 = G.ny - o1
+        n2 = G.nz - o2
+        solid = G.solid[pml.xs, o1:, o2:]
+        comm = G.x_comm
+    elif pml_ID[0] == "y":
+        o1 = G.negative_halo_offset[0]
+        o2 = G.negative_halo_offset[2]
+        n1 = G.nx - o1
+        n2 = G.nz - o2
+        solid = G.solid[o1:, pml.ys, o2:]
+        comm = G.y_comm
+    elif pml_ID[0] == "z":
+        o1 = G.negative_halo_offset[0]
+        o2 = G.negative_halo_offset[1]
+        n1 = G.nx - o1
+        n2 = G.ny - o2
+        solid = G.solid[o1:, o2:, pml.zs]
+        comm = G.z_comm
+
+    sumer, summr = pml_sum_er_mr(n1, n2, config.get_model_config().ompthreads, solid, ers, mrs)
+    n = comm.allreduce(n1 * n2, MPI.SUM)
+    averageer = comm.allreduce(sumer, MPI.SUM) / n
+    averagemr = comm.allreduce(summr, MPI.SUM) / n
 
     pml.calculate_update_coeffs(averageer, averagemr)
     G.pmls["slabs"].append(pml)