Call MPI.Abort if an MPI rank raises an error

这个提交包含在:
nmannall
2025-01-17 16:38:07 +00:00
父节点 abddc5fd71
当前提交 266f84edf3

查看文件

@@ -166,13 +166,12 @@ class MPIContext(Context):
self.comm = MPI.COMM_WORLD self.comm = MPI.COMM_WORLD
self.rank = self.comm.rank self.rank = self.comm.rank
requested_mpi_size = np.product(config.sim_config.mpi) requested_mpi_size = np.prod(config.sim_config.mpi)
if self.comm.size < requested_mpi_size: if self.comm.size < requested_mpi_size:
logger.error( raise ValueError(
f"MPI_COMM_WORLD size of {self.comm.size} is too small for requested dimensions of" f"MPI_COMM_WORLD size of {self.comm.size} is too small for requested dimensions of"
f" {config.sim_config.mpi}. {requested_mpi_size} ranks are required." f" {config.sim_config.mpi}. {requested_mpi_size} ranks are required."
) )
raise ValueError
if self.rank >= requested_mpi_size: if self.rank >= requested_mpi_size:
logger.warning( logger.warning(
@@ -186,7 +185,10 @@ class MPIContext(Context):
return MPIModel() return MPIModel()
def run(self) -> Dict: def run(self) -> Dict:
return super().run() try:
return super().run()
except:
self.comm.Abort()
def _run_model(self, model_num: int) -> None: def _run_model(self, model_num: int) -> None:
"""Process for running a single model. """Process for running a single model.