Merge branch '27-mpi-geometry-views' into mpi

这个提交包含在:
Nathan Mannall
2025-06-02 12:55:02 +01:00
当前提交 45222e62f4
共有 7 个文件被更改,包括 224 次插入102 次删除

查看文件

@@ -29,7 +29,7 @@ from gprMax.output_controllers.geometry_views import GeometryView, Metadata, MPI
from gprMax.output_controllers.grid_view import GridType, MPIGridView
from gprMax.subgrids.grid import SubGridBaseGrid
from gprMax.vtkhdf_filehandlers.vtk_unstructured_grid import VtkUnstructuredGrid
from gprMax.vtkhdf_filehandlers.vtkhdf import VtkCellType
from gprMax.vtkhdf_filehandlers.vtkhdf import VtkCellType, VtkHdfFile
logger = logging.getLogger(__name__)
@@ -131,18 +131,18 @@ class MPIGeometryViewLines(GeometryViewLines[MPIGrid]):
ID = self.grid_view.get_ID()
x = np.arange(self.grid_view.gx + 1, dtype=np.float64)
y = np.arange(self.grid_view.gy + 1, dtype=np.float64)
z = np.arange(self.grid_view.gz + 1, dtype=np.float64)
x = np.arange(self.grid_view.nx + 1, dtype=np.float64)
y = np.arange(self.grid_view.ny + 1, dtype=np.float64)
z = np.arange(self.grid_view.nz + 1, dtype=np.float64)
coords = np.meshgrid(x, y, z, indexing="ij")
self.points = np.vstack(list(map(np.ravel, coords))).T
self.points += self.grid_view.global_start
self.points += self.grid_view.global_start + self.grid_view.offset
self.points *= self.grid_view.step * self.grid.dl
# Each point is the 'source' for 3 lines.
# NB: Excluding points at the far edge of the geometry as those
# are the 'source' for no lines
n_lines = 3 * np.prod(self.grid_view.global_size)
n_lines = 3 * np.prod(self.grid_view.size)
self.cell_types = np.full(n_lines, VtkCellType.LINE)
self.cell_offsets = np.arange(0, 2 * n_lines + 2, 2, dtype=np.intc)
@@ -182,18 +182,11 @@ class MPIGeometryViewLines(GeometryViewLines[MPIGrid]):
self.cell_offsets,
comm=self.grid_view.comm,
) as f:
f.add_cell_data("Material", self.material_data, self.grid_view.offset)
f.add_cell_data("Material", self.material_data)
# Write metadata in serial as it contains variable length
# strings which currently cannot be written by HDF5 using
# parallel I/O
if self.grid_view.comm.rank == 0:
with VtkUnstructuredGrid(
self.filename,
self.points,
self.cell_types,
self.connectivity,
self.cell_offsets,
mode="r+",
) as f:
with VtkHdfFile(self.filename, VtkUnstructuredGrid.TYPE, mode="r+") as f:
self.metadata.write_to_vtkhdf(f)

查看文件

@@ -27,6 +27,7 @@ from gprMax.output_controllers.geometry_views import GeometryView, Metadata, MPI
from gprMax.output_controllers.grid_view import GridType, MPIGridView
from gprMax.subgrids.grid import SubGridBaseGrid
from gprMax.vtkhdf_filehandlers.vtk_image_data import VtkImageData
from gprMax.vtkhdf_filehandlers.vtkhdf import VtkHdfFile
logger = logging.getLogger(__name__)
@@ -105,7 +106,5 @@ class MPIGeometryViewVoxels(GeometryViewVoxels[MPIGrid]):
# strings which currently cannot be written by HDF5 using
# parallel I/O
if self.grid_view.comm.rank == 0:
with VtkImageData(
self.filename, self.grid_view.global_size, self.origin, self.spacing, mode="r+"
) as f:
with VtkHdfFile(self.filename, VtkImageData.TYPE, mode="r+") as f:
self.metadata.write_to_vtkhdf(f)

查看文件

@@ -589,7 +589,9 @@ class MPIGridView(GridView[MPIGrid]):
)
# Calculate offset for the local grid view
self.offset = self.grid.local_to_global_coordinate(self.start) - self.global_start
self.offset = (
self.grid.local_to_global_coordinate(self.start) - self.global_start
) // self.step
# Update local size
self.size = np.ceil((self.stop - self.start) / self.step).astype(np.int32)
@@ -704,7 +706,7 @@ class MPIGridView(GridView[MPIGrid]):
# have a positive neighbour
size = self.size[dimension] + 1
offset = self.offset[dimension] // self.step[dimension]
offset = self.offset[dimension]
return slice(offset, offset + size)
@@ -743,7 +745,7 @@ class MPIGridView(GridView[MPIGrid]):
else:
size = self.size[dimension] + 1
offset = self.offset[dimension] // self.step[dimension]
offset = self.offset[dimension]
if self.has_negative_neighbour[dimension]:
offset -= 1

查看文件

@@ -18,13 +18,13 @@
# along with gprMax. If not, see <http://www.gnu.org/licenses/>.
from os import PathLike
from typing import Literal, Optional, Union
from typing import Optional, Union
import numpy as np
import numpy.typing as npt
from mpi4py import MPI
from mpi4py.MPI import Intracomm
from gprMax.vtkhdf_filehandlers.vtkhdf import VtkHdfFile
from gprMax.vtkhdf_filehandlers.vtkhdf import VtkFileType, VtkHdfFile
class VtkImageData(VtkHdfFile):
@@ -34,6 +34,8 @@ class VtkImageData(VtkHdfFile):
https://docs.vtk.org/en/latest/design_documents/VTKFileFormats.html#image-data
"""
TYPE = VtkFileType.IMAGE_DATA
DIRECTION_ATTR = "Direction"
ORIGIN_ATTR = "Origin"
SPACING_ATTR = "Spacing"
@@ -41,10 +43,6 @@ class VtkImageData(VtkHdfFile):
DIMENSIONS = 3
@property
def TYPE(self) -> Literal["ImageData"]:
return "ImageData"
def __init__(
self,
filename: Union[str, PathLike],
@@ -52,8 +50,7 @@ class VtkImageData(VtkHdfFile):
origin: Optional[npt.NDArray[np.float32]] = None,
spacing: Optional[npt.NDArray[np.float32]] = None,
direction: Optional[npt.NDArray[np.float32]] = None,
mode: str = "w",
comm: Optional[MPI.Comm] = None,
comm: Optional[Intracomm] = None,
):
"""Create a new VtkImageData file.
@@ -78,16 +75,10 @@ class VtkImageData(VtkHdfFile):
I.e. [[1, 0, 0], [0, 1, 0], [0, 0, 1]] and
[1, 0, 0, 0, 1, 0, 0, 0, 1] are equivalent. Default
[[1, 0, 0], [0, 1, 0], [0, 0, 1]].
mode (optional): Mode to open the file. Valid modes are
- r Readonly, file must exist
- r+ Read/write, file must exist
- w Create file, truncate if exists (default)
- w- or x Create file, fail if exists
- a Read/write if exists, create otherwise
comm (optional): MPI communicator containing all ranks that
want to write to the file.
"""
super().__init__(filename, mode, comm)
super().__init__(filename, self.TYPE, "w", comm)
if len(shape) == 0:
raise ValueError(f"Shape must not be empty.")
@@ -186,7 +177,7 @@ class VtkImageData(VtkHdfFile):
"If no offset is specified, data.shape must be one larger in each dimension than"
f" this vtkImageData object. {data.shape} != {points_shape}"
)
return super().add_point_data(name, data, points_shape, offset)
return super()._add_point_data(name, data, points_shape, offset)
def add_cell_data(
self, name: str, data: npt.NDArray, offset: Optional[npt.NDArray[np.int32]] = None
@@ -207,4 +198,4 @@ class VtkImageData(VtkHdfFile):
"If no offset is specified, data.shape must match the dimensions of this"
f" VtkImageData object. {data.shape} != {self.shape}"
)
return super().add_cell_data(name, data, self.shape, offset)
return super()._add_cell_data(name, data, self.shape, offset)

查看文件

@@ -19,13 +19,13 @@
import logging
from os import PathLike
from typing import Literal, Optional, Union
from typing import Optional, Union
import numpy as np
import numpy.typing as npt
from mpi4py import MPI
from gprMax.vtkhdf_filehandlers.vtkhdf import VtkCellType, VtkHdfFile
from gprMax.vtkhdf_filehandlers.vtkhdf import VtkCellType, VtkFileType, VtkHdfFile
logger = logging.getLogger(__name__)
@@ -37,6 +37,8 @@ class VtkUnstructuredGrid(VtkHdfFile):
https://docs.vtk.org/en/latest/design_documents/VTKFileFormats.html#unstructured-grid
"""
TYPE = VtkFileType.UNSTRUCTURED_GRID
class Dataset(VtkHdfFile.Dataset):
CONNECTIVITY = "Connectivity"
NUMBER_OF_CELLS = "NumberOfCells"
@@ -46,10 +48,6 @@ class VtkUnstructuredGrid(VtkHdfFile):
POINTS = "Points"
TYPES = "Types"
@property
def TYPE(self) -> Literal["UnstructuredGrid"]:
return "UnstructuredGrid"
def __init__(
self,
filename: Union[str, PathLike],
@@ -57,8 +55,7 @@ class VtkUnstructuredGrid(VtkHdfFile):
cell_types: npt.NDArray[VtkCellType],
connectivity: npt.NDArray,
cell_offsets: npt.NDArray,
mode: str = "w",
comm: Optional[MPI.Comm] = None,
comm: Optional[MPI.Intracomm] = None,
) -> None:
"""Create a new VtkUnstructuredGrid file.
@@ -83,19 +80,13 @@ class VtkUnstructuredGrid(VtkHdfFile):
and corresponds to a point in the points array.
cell_offsets: Array listing where each cell starts and ends
in the connectivity array. It has shape (C + 1,).
mode (optional): Mode to open the file. Valid modes are
- r Readonly, file must exist
- r+ Read/write, file must exist
- w Create file, truncate if exists (default)
- w- or x Create file, fail if exists
- a Read/write if exists, create otherwise
comm (optional): MPI communicator containing all ranks that
want to write to the file.
Raises:
Value Error: Raised if argument dimensions are invalid.
"""
super().__init__(filename, mode, comm)
super().__init__(filename, self.TYPE, "w", comm)
if len(cell_offsets) != len(cell_types) + 1:
raise ValueError(
@@ -116,39 +107,132 @@ class VtkUnstructuredGrid(VtkHdfFile):
" Some connectivity data will be ignored"
)
self._write_root_dataset(self.Dataset.CONNECTIVITY, connectivity)
self._write_root_dataset(self.Dataset.NUMBER_OF_CELLS, len(cell_types))
self._write_root_dataset(self.Dataset.NUMBER_OF_CONNECTIVITY_IDS, len(connectivity))
self._write_root_dataset(self.Dataset.NUMBER_OF_POINTS, len(points))
self._write_root_dataset(self.Dataset.OFFSETS, cell_offsets)
self._write_root_dataset(self.Dataset.POINTS, points, xyz_data_ordering=False)
self._write_root_dataset(self.Dataset.TYPES, cell_types)
number_of_cells = len(cell_types)
number_of_connectivity_ids = len(connectivity)
number_of_points = len(points)
if self.comm is None:
self.partition = 0
self._write_root_dataset(self.Dataset.NUMBER_OF_CELLS, number_of_cells)
self._write_root_dataset(
self.Dataset.NUMBER_OF_CONNECTIVITY_IDS, number_of_connectivity_ids
)
self._write_root_dataset(self.Dataset.NUMBER_OF_POINTS, number_of_points)
self._write_root_dataset(self.Dataset.TYPES, cell_types)
self._write_root_dataset(self.Dataset.POINTS, points, xyz_data_ordering=False)
self._write_root_dataset(self.Dataset.CONNECTIVITY, connectivity)
self._write_root_dataset(self.Dataset.OFFSETS, cell_offsets)
else:
# Write partition information for each rank
self.partition = self.comm.rank
partition_offset = np.array([self.partition], dtype=np.int32)
partition_shape = np.array([self.comm.size], dtype=np.int32)
self._write_root_dataset(
self.Dataset.NUMBER_OF_CELLS,
number_of_cells,
shape=partition_shape,
offset=partition_offset,
)
self._write_root_dataset(
self.Dataset.NUMBER_OF_CONNECTIVITY_IDS,
number_of_connectivity_ids,
shape=partition_shape,
offset=partition_offset,
)
self._write_root_dataset(
self.Dataset.NUMBER_OF_POINTS,
number_of_points,
shape=partition_shape,
offset=partition_offset,
)
cells_shape = np.array([self.global_number_of_cells], dtype=np.int32)
self._write_root_dataset(
self.Dataset.TYPES, cell_types, shape=cells_shape, offset=self.cells_offset
)
points_shape = np.array([self.global_number_of_points, 3], dtype=np.int32)
self._write_root_dataset(
self.Dataset.POINTS,
points,
shape=points_shape,
offset=self.points_offset,
xyz_data_ordering=False,
)
connectivity_shape = np.array([number_of_connectivity_ids], dtype=np.int32)
self.comm.Allreduce(MPI.IN_PLACE, connectivity_shape)
connectivity_offset = np.array([number_of_connectivity_ids], dtype=np.int32)
self.comm.Exscan(MPI.IN_PLACE, connectivity_offset)
# Exscan leaves this undefined for rank 0
if self.comm.rank == 0:
connectivity_offset = np.zeros(1, dtype=np.int32)
self._write_root_dataset(
self.Dataset.CONNECTIVITY,
connectivity,
shape=connectivity_shape,
offset=connectivity_offset,
)
# The OFFSETS dataset has size C + 1 for each partition.
# Account for the +1 by addding the partition index to the
# offset and MPI communicator size to the shape
cell_offsets_offset = self.cells_offset + self.partition
cell_offsets_shape = cells_shape + self.comm.size
self._write_root_dataset(
self.Dataset.OFFSETS,
cell_offsets,
shape=cell_offsets_shape,
offset=cell_offsets_offset,
)
@property
def number_of_cells(self) -> int:
number_of_cells = self._get_root_dataset(self.Dataset.NUMBER_OF_CELLS)
return np.sum(number_of_cells, dtype=np.int32)
return number_of_cells[self.partition]
@property
def global_number_of_cells(self) -> int:
number_of_cells = self._get_root_dataset(self.Dataset.NUMBER_OF_CELLS)
return np.sum(number_of_cells, dtype=int)
@property
def cells_offset(self) -> npt.NDArray[np.int32]:
number_of_cells = self._get_root_dataset(self.Dataset.NUMBER_OF_CELLS)
return np.sum(number_of_cells[: self.partition], dtype=np.int32, keepdims=True)
@property
def number_of_connectivity_ids(self) -> int:
number_of_connectivity_ids = self._get_root_dataset(self.Dataset.NUMBER_OF_CONNECTIVITY_IDS)
return np.sum(number_of_connectivity_ids, dtype=np.int32)
return number_of_connectivity_ids[self.partition]
@property
def number_of_points(self) -> int:
number_of_points = self._get_root_dataset(self.Dataset.NUMBER_OF_POINTS)
return np.sum(number_of_points, dtype=np.int32)
return number_of_points[self.partition]
def add_point_data(
self, name: str, data: npt.NDArray, offset: Optional[npt.NDArray[np.int32]] = None
):
@property
def global_number_of_points(self) -> int:
number_of_points = self._get_root_dataset(self.Dataset.NUMBER_OF_POINTS)
return np.sum(number_of_points, dtype=int)
@property
def points_offset(self) -> npt.NDArray[np.int32]:
number_of_points = self._get_root_dataset(self.Dataset.NUMBER_OF_POINTS)
offset = np.sum(number_of_points[: self.partition], dtype=np.int32)
return np.array([offset, 0], dtype=np.int32)
def add_point_data(self, name: str, data: npt.NDArray):
"""Add point data to the VTKHDF file.
Args:
name: Name of the dataset.
data: Data to be saved.
offset (optional): Offset to store the provided data at. Can
be omitted if data provides the full dataset.
data: Data to be saved. The length of the date must match
the number of points in the partition owned by this
rank.
Raises:
ValueError: Raised if data has invalid dimensions.
@@ -166,18 +250,17 @@ class VtkUnstructuredGrid(VtkHdfFile):
elif number_of_dimensions == 2 and shape[1] != 1 and shape[1] != 3:
raise ValueError(f"The second dimension should have shape 1 or 3, not {shape[1]}")
return super().add_point_data(name, data, shape, offset)
shape[0] = self.global_number_of_points
def add_cell_data(
self, name: str, data: npt.NDArray, offset: Optional[npt.NDArray[np.int32]] = None
):
return super()._add_point_data(name, data, shape, self.points_offset)
def add_cell_data(self, name: str, data: npt.NDArray):
"""Add cell data to the VTKHDF file.
Args:
name: Name of the dataset.
data: Data to be saved.
offset (optional): Offset to store the provided data at. Can
be omitted if data provides the full dataset.
data: Data to be saved. The length of the date must match
the number of cells in the partition owned by this rank.
Raises:
ValueError: Raised if data has invalid dimensions.
@@ -189,10 +272,12 @@ class VtkUnstructuredGrid(VtkHdfFile):
raise ValueError(f"Data must have 1 or 2 dimensions, not {number_of_dimensions}.")
elif len(data) != self.number_of_cells:
raise ValueError(
"Length of data must match the number of cells in the vtkUnstructuredGrid."
f" {len(data)} != {self.number_of_cells}"
f"Length of data must match the number of cells in partition {self.partition} of the"
f" vtkUnstructuredGrid. {len(data)} != {self.number_of_cells}"
)
elif number_of_dimensions == 2 and shape[1] != 1 and shape[1] != 3:
raise ValueError(f"The second dimension should have shape 1 or 3, not {shape[1]}")
return super().add_cell_data(name, data, shape, offset)
shape[0] = self.global_number_of_cells
return super()._add_cell_data(name, data, shape, self.cells_offset)

查看文件

@@ -29,11 +29,16 @@ from typing import Optional, Tuple, Union
import h5py
import numpy as np
import numpy.typing as npt
from mpi4py import MPI
from mpi4py.MPI import Intracomm
logger = logging.getLogger(__name__)
class VtkFileType(str, Enum):
IMAGE_DATA = "ImageData"
UNSTRUCTURED_GRID = "UnstructuredGrid"
class VtkHdfFile(AbstractContextManager):
VERSION = [2, 2]
FILE_EXTENSION = ".vtkhdf"
@@ -48,19 +53,15 @@ class VtkHdfFile(AbstractContextManager):
class Dataset(str, Enum):
pass
@property
@abstractmethod
def TYPE(self) -> str:
pass
def __enter__(self):
return self
def __init__(
self,
filename: Union[str, PathLike],
mode: str = "w",
comm: Optional[MPI.Comm] = None,
vtk_file_type: VtkFileType,
mode: str = "r",
comm: Optional[Intracomm] = None,
) -> None:
"""Create a new VtkHdfFile.
@@ -74,9 +75,9 @@ class VtkHdfFile(AbstractContextManager):
filename: Name of the file (can be a file path). The file
extension will be set to '.vtkhdf'.
mode (optional): Mode to open the file. Valid modes are
- r Readonly, file must exist
- r Readonly, file must exist (default)
- r+ Read/write, file must exist
- w Create file, truncate if exists (default)
- w Create file, truncate if exists
- w- or x Create file, fail if exists
- a Read/write if exists, create otherwise
comm (optional): MPI communicator containing all ranks that
@@ -100,14 +101,16 @@ class VtkHdfFile(AbstractContextManager):
else:
self.file_handler = h5py.File(self.filename, mode, driver="mpio", comm=self.comm)
logger.debug(f"Opened file '{self.filename}'")
self.root_group = self.file_handler.require_group(self.ROOT_GROUP)
# Set required Version and Type root attributes
self._set_root_attribute(self.VERSION_ATTR, self.VERSION)
self._check_root_attribute(self.VERSION_ATTR, self.VERSION)
type_as_ascii = self.TYPE.encode("ascii")
self._set_root_attribute(
self.TYPE_ATTR, type_as_ascii, h5py.string_dtype("ascii", len(type_as_ascii))
type_as_ascii = vtk_file_type.encode("ascii")
self._check_root_attribute(
self.TYPE_ATTR, type_as_ascii, dtype=h5py.string_dtype("ascii", len(type_as_ascii))
)
def __exit__(
@@ -135,6 +138,55 @@ class VtkHdfFile(AbstractContextManager):
"""Close the file handler"""
self.file_handler.close()
def _check_root_attribute(
self,
attribute: str,
expected_value: npt.ArrayLike,
dtype: npt.DTypeLike = None,
):
"""Check root attribute is present and warn if not valid.
If the attribute exists in the file that has been opened, the
value will be checked against the expected value with a warning
produced if they differ. If the attribute has not been set, it
will be set to the expected value unless the file has been
opened in readonly mode.
Args:
attribute: Name of the attribute.
expected_value: Expected value of the attribute.
dtype (optional): Data type of the attribute. Overrides
expected_value.dtype if the attribute is set by
this function.
Returns:
value: True if the attribute is present in the root VTKHDF
group. False otherwise.
"""
if self._has_root_attribute(attribute):
value = self._get_root_attribute(attribute)
if np.any(value != expected_value):
logger.warning(
f"VTKHDF version mismatch. Expected '{expected_value}', but found '{value}'."
)
elif self.file_handler.mode == "r+":
self._set_root_attribute(attribute, expected_value, dtype=dtype)
else:
logger.warning(f"Required VTKHDF attribute '{attribute}' not found.")
def _has_root_attribute(self, attribute: str) -> bool:
"""Check if attribute is present in the root VTKHDF group.
Args:
attribute: Name of the attribute.
Returns:
value: True if the attribute is present in the root VTKHDF
group. False otherwise.
"""
value = self.root_group.attrs.get(attribute)
return value is not None
def _get_root_attribute(self, attribute: str) -> npt.NDArray:
"""Get attribute from the root VTKHDF group if it exists.
@@ -147,8 +199,8 @@ class VtkHdfFile(AbstractContextManager):
Raises:
KeyError: Raised if the attribute is not present as a key.
"""
value = self.root_group.attrs[attribute]
if isinstance(value, h5py.Empty):
value = self.root_group.attrs.get(attribute)
if value is None:
raise KeyError(f"Attribute '{attribute}' not present in /{self.ROOT_GROUP} group")
return value
@@ -329,11 +381,11 @@ class VtkHdfFile(AbstractContextManager):
if xyz_data_ordering:
data = data.transpose()
if shape is not None:
shape = np.flip(shape)
if shape is not None:
shape = np.flip(shape)
if offset is not None:
offset = np.flip(offset)
if offset is not None:
offset = np.flip(offset)
logger.debug(
f"Writing dataset '{path}', shape: {shape}, data.shape: {data.shape}, dtype: {dtype}"
@@ -430,7 +482,7 @@ class VtkHdfFile(AbstractContextManager):
self.file_handler.create_dataset(path, shape=shape, dtype=dtype)
def add_point_data(
def _add_point_data(
self,
name: str,
data: npt.NDArray,
@@ -450,7 +502,7 @@ class VtkHdfFile(AbstractContextManager):
dataset_path = self._build_dataset_path("PointData", name)
self._write_dataset(dataset_path, data, shape=shape, offset=offset)
def add_cell_data(
def _add_cell_data(
self,
name: str,
data: npt.NDArray,