From 5669002757bd6f0f8a8d58bdc0a30f92f8ca2262 Mon Sep 17 00:00:00 2001 From: Nathan Mannall Date: Wed, 5 Mar 2025 18:29:41 +0000 Subject: [PATCH] Add memory usage to performance report - Update calculation of runtime to query SLURM database - Improve regex to extract gprMax reported simulation time --- reframe_tests/tests/base_tests.py | 165 +++++++++++++++++++++++------- 1 file changed, 126 insertions(+), 39 deletions(-) diff --git a/reframe_tests/tests/base_tests.py b/reframe_tests/tests/base_tests.py index fac4d2de..0d2be1b6 100644 --- a/reframe_tests/tests/base_tests.py +++ b/reframe_tests/tests/base_tests.py @@ -9,6 +9,8 @@ import os from pathlib import Path from typing import Literal, Optional, Union +import numpy as np +import numpy.typing as npt import reframe.utility.sanity as sn import reframe.utility.typecheck as typ from reframe import RunOnlyRegressionTest, simple_test @@ -23,7 +25,7 @@ from reframe.core.builtins import ( variable, ) from reframe.core.exceptions import DependencyError -from reframe.utility import udeps +from reframe.utility import osext, udeps from reframe_tests.tests.regression_checks import RegressionCheck from reframe_tests.utilities.deferrable import path_join @@ -130,7 +132,7 @@ class GprMaxBaseTest(RunOnlyRegressionTest): model = parameter() sourcesdir = required - executable = "time -p python -m gprMax" + executable = "python -m gprMax" regression_checks = variable(typ.List[RegressionCheck], value=[]) @@ -391,49 +393,134 @@ class GprMaxBaseTest(RunOnlyRegressionTest): @performance_function("s", perf_key="run_time") def extract_run_time(self): - """Extract total runtime from the last task to complete.""" - return sn.extractsingle( - r"real\s+(?P\S+)", self.stderr, "run_time", float, self.num_tasks - 1 + """Extract total runtime from SLURM.""" + sactt_command = osext.run_command( + [ + "sacct", + "--format=JobID,JobName,State,Elapsed", + "-j", + self.job.jobid, + ] ) + hours, minutes, seconds = sn.extractsingle_s( + self.job.jobid + + r"\.0\s+python\s+COMPLETED\s+(?P\d+):(?P\d+):(?P\d+)", + sactt_command.stdout, + ["hours", "minutes", "seconds"], + int, + ) + + return hours * 3600 + minutes * 60 + seconds @performance_function("s", perf_key="simulation_time") def extract_simulation_time(self): - """Extract simulation time reported by gprMax.""" + """Extract average simulation time reported by gprMax.""" + return sn.round(self.extract_simulation_time_per_rank().sum() / self.num_tasks, 2) - # sn.extractall throws an error if a group has value None. - # Therefore have to handle the < 1 min, >= 1 min and >= 1 hour cases separately. - timeframe = sn.extractsingle( - r"=== Simulation completed in \S+ (?Phour|minute|second)", + # @performance_function("s", perf_key="max_simulation_time") + # def extract_max_simulation_time(self): + # """Extract maximum simulation time reported by gprMax.""" + # return sn.round(self.extract_simulation_time_per_rank().max(), 2) + + # @performance_function("s", perf_key="min_simulation_time") + # def extract_min_simulation_time(self): + # """Extract minimum simulation time reported by gprMax.""" + # return sn.round(self.extract_simulation_time_per_rank().min(), 2) + + # @performance_function("s", perf_key="wall_time") + # def extract_wall_time(self): + # """Extract total simulation time reported by gprMax.""" + # return sn.round(self.extract_simulation_time_per_rank().sum(), 2) + + def extract_simulation_time_per_rank(self) -> npt.NDArray[np.float64]: + """Extract simulation time reported by gprMax from each rank. + + Raises: + ValueError: Raised if not all ranks report the simulation + time. + + Returns: + simulation_times: Simulation time for each rank in seconds. + """ + simulation_time = sn.extractall( + r"=== Simulation completed in " + r"((?<= )(?P\d+) hours?)?\D*" + r"((?<= )(?P\d+) minutes?)?\D*" + r"((?<= )(?P[\d\.]+) seconds?)?\D*=+", self.stdout, - "timeframe", + ["hours", "minutes", "seconds"], + lambda x: 0.0 if x is None else float(x), ) - if timeframe == "hour": - simulation_time = sn.extractall( - r"=== Simulation completed in (?P\S+) hours?, (?P\S+) minutes? and (?P\S+) seconds? =*", - self.stdout, - ["hours", "minutes", "seconds"], - float, + + # Check simulation time was reported by all ranks + if sn.len(simulation_time) != self.num_tasks: + raise ValueError( + f"Simulation time not reported for all ranks. Found {sn.len(simulation_time)}, expected {self.num_tasks}" ) - hours = simulation_time[0][0] - minutes = simulation_time[0][1] - seconds = simulation_time[0][2] - elif timeframe == "minute": - hours = 0 - simulation_time = sn.extractall( - r"=== Simulation completed in (?P\S+) minutes? and (?P\S+) seconds? =*", - self.stdout, - ["minutes", "seconds"], - float, + + # Convert hour and minute values to seconds + simulation_time = np.array(simulation_time.evaluate()) + + simulation_time[:, 0] *= 3600 + simulation_time[:, 1] *= 60 + + # Return simulation time in seconds for each rank + return simulation_time.sum(axis=1) + + @performance_function("GB", perf_key="total_memory_use") + def extract_total_memory_use(self): + """Extract total memory use across all ranks.""" + return sn.round(self.extract_memory_use_per_rank().sum(), 2) + + @performance_function("GB", perf_key="average_memory_use") + def extract_average_memory_use(self): + """Extract average memory use for each rank.""" + return sn.round(self.extract_memory_use_per_rank().sum() / self.num_tasks, 2) + + # @performance_function("GB", perf_key="min_memory_use") + # def extract_min_memory_use(self): + # """Extract minimum memory use by a single rank.""" + # return sn.round(self.extract_memory_use_per_rank().min(), 2) + + # @performance_function("GB", perf_key="max_memory_use") + # def extract_max_memory_use(self): + # """Extract maximum memory use by a single rank.""" + # return sn.round(self.extract_memory_use_per_rank().max(), 2) + + def extract_memory_use_per_rank(self) -> npt.NDArray[np.float64]: + """Extract gprMax report of the estimated memory use per rank. + + Raises: + ValueError: Raised if not all ranks report their estimated + memory usage. + + Returns: + usages: Estimated memory usage for each rank in GB. + """ + memory_report = sn.extractall( + r"Memory used \(estimated\): ~(?P\S+) (?P\S+)", + self.stdout, + ["memory_usage", "units"], + [float, str], + ) + + # Check all ranks reported their estimated memory usage + if sn.len(memory_report) != self.num_tasks: + raise ValueError( + f"Memory usage not reported for all ranks. Found {len(memory_report)}, expected {self.num_tasks}" ) - minutes = simulation_time[0][0] - seconds = simulation_time[0][1] - else: - hours = 0 - minutes = 0 - seconds = sn.extractsingle( - r"=== Simulation completed in (?P\S+) seconds? =*", - self.stdout, - "seconds", - float, - ) - return hours * 3600 + minutes * 60 + seconds + + usages = np.zeros(self.num_tasks) + + # Convert all values into GB + for index, (value, unit) in enumerate(memory_report): + if unit == "MB": + value /= 1024 + elif unit == "KB": + value /= 1048576 + elif unit != "GB": + raise ValueError(f"Unknown unit of memory '{unit}'") + + usages[index] = value + + return usages