Add memory usage to performance report

- Update calculation of runtime to query SLURM database
- Improve regex to extract gprMax reported simulation time
这个提交包含在:
Nathan Mannall
2025-03-05 18:29:41 +00:00
父节点 e6262339f5
当前提交 5669002757

查看文件

@@ -9,6 +9,8 @@ import os
from pathlib import Path from pathlib import Path
from typing import Literal, Optional, Union from typing import Literal, Optional, Union
import numpy as np
import numpy.typing as npt
import reframe.utility.sanity as sn import reframe.utility.sanity as sn
import reframe.utility.typecheck as typ import reframe.utility.typecheck as typ
from reframe import RunOnlyRegressionTest, simple_test from reframe import RunOnlyRegressionTest, simple_test
@@ -23,7 +25,7 @@ from reframe.core.builtins import (
variable, variable,
) )
from reframe.core.exceptions import DependencyError from reframe.core.exceptions import DependencyError
from reframe.utility import udeps from reframe.utility import osext, udeps
from reframe_tests.tests.regression_checks import RegressionCheck from reframe_tests.tests.regression_checks import RegressionCheck
from reframe_tests.utilities.deferrable import path_join from reframe_tests.utilities.deferrable import path_join
@@ -130,7 +132,7 @@ class GprMaxBaseTest(RunOnlyRegressionTest):
model = parameter() model = parameter()
sourcesdir = required sourcesdir = required
executable = "time -p python -m gprMax" executable = "python -m gprMax"
regression_checks = variable(typ.List[RegressionCheck], value=[]) regression_checks = variable(typ.List[RegressionCheck], value=[])
@@ -391,49 +393,134 @@ class GprMaxBaseTest(RunOnlyRegressionTest):
@performance_function("s", perf_key="run_time") @performance_function("s", perf_key="run_time")
def extract_run_time(self): def extract_run_time(self):
"""Extract total runtime from the last task to complete.""" """Extract total runtime from SLURM."""
return sn.extractsingle( sactt_command = osext.run_command(
r"real\s+(?P<run_time>\S+)", self.stderr, "run_time", float, self.num_tasks - 1 [
"sacct",
"--format=JobID,JobName,State,Elapsed",
"-j",
self.job.jobid,
]
) )
hours, minutes, seconds = sn.extractsingle_s(
self.job.jobid
+ r"\.0\s+python\s+COMPLETED\s+(?P<hours>\d+):(?P<minutes>\d+):(?P<seconds>\d+)",
sactt_command.stdout,
["hours", "minutes", "seconds"],
int,
)
return hours * 3600 + minutes * 60 + seconds
@performance_function("s", perf_key="simulation_time") @performance_function("s", perf_key="simulation_time")
def extract_simulation_time(self): def extract_simulation_time(self):
"""Extract simulation time reported by gprMax.""" """Extract average simulation time reported by gprMax."""
return sn.round(self.extract_simulation_time_per_rank().sum() / self.num_tasks, 2)
# sn.extractall throws an error if a group has value None. # @performance_function("s", perf_key="max_simulation_time")
# Therefore have to handle the < 1 min, >= 1 min and >= 1 hour cases separately. # def extract_max_simulation_time(self):
timeframe = sn.extractsingle( # """Extract maximum simulation time reported by gprMax."""
r"=== Simulation completed in \S+ (?P<timeframe>hour|minute|second)", # return sn.round(self.extract_simulation_time_per_rank().max(), 2)
self.stdout,
"timeframe", # @performance_function("s", perf_key="min_simulation_time")
) # def extract_min_simulation_time(self):
if timeframe == "hour": # """Extract minimum simulation time reported by gprMax."""
# return sn.round(self.extract_simulation_time_per_rank().min(), 2)
# @performance_function("s", perf_key="wall_time")
# def extract_wall_time(self):
# """Extract total simulation time reported by gprMax."""
# return sn.round(self.extract_simulation_time_per_rank().sum(), 2)
def extract_simulation_time_per_rank(self) -> npt.NDArray[np.float64]:
"""Extract simulation time reported by gprMax from each rank.
Raises:
ValueError: Raised if not all ranks report the simulation
time.
Returns:
simulation_times: Simulation time for each rank in seconds.
"""
simulation_time = sn.extractall( simulation_time = sn.extractall(
r"=== Simulation completed in (?P<hours>\S+) hours?, (?P<minutes>\S+) minutes? and (?P<seconds>\S+) seconds? =*", r"=== Simulation completed in "
r"((?<= )(?P<hours>\d+) hours?)?\D*"
r"((?<= )(?P<minutes>\d+) minutes?)?\D*"
r"((?<= )(?P<seconds>[\d\.]+) seconds?)?\D*=+",
self.stdout, self.stdout,
["hours", "minutes", "seconds"], ["hours", "minutes", "seconds"],
float, lambda x: 0.0 if x is None else float(x),
) )
hours = simulation_time[0][0]
minutes = simulation_time[0][1] # Check simulation time was reported by all ranks
seconds = simulation_time[0][2] if sn.len(simulation_time) != self.num_tasks:
elif timeframe == "minute": raise ValueError(
hours = 0 f"Simulation time not reported for all ranks. Found {sn.len(simulation_time)}, expected {self.num_tasks}"
simulation_time = sn.extractall( )
r"=== Simulation completed in (?P<minutes>\S+) minutes? and (?P<seconds>\S+) seconds? =*",
# Convert hour and minute values to seconds
simulation_time = np.array(simulation_time.evaluate())
simulation_time[:, 0] *= 3600
simulation_time[:, 1] *= 60
# Return simulation time in seconds for each rank
return simulation_time.sum(axis=1)
@performance_function("GB", perf_key="total_memory_use")
def extract_total_memory_use(self):
"""Extract total memory use across all ranks."""
return sn.round(self.extract_memory_use_per_rank().sum(), 2)
@performance_function("GB", perf_key="average_memory_use")
def extract_average_memory_use(self):
"""Extract average memory use for each rank."""
return sn.round(self.extract_memory_use_per_rank().sum() / self.num_tasks, 2)
# @performance_function("GB", perf_key="min_memory_use")
# def extract_min_memory_use(self):
# """Extract minimum memory use by a single rank."""
# return sn.round(self.extract_memory_use_per_rank().min(), 2)
# @performance_function("GB", perf_key="max_memory_use")
# def extract_max_memory_use(self):
# """Extract maximum memory use by a single rank."""
# return sn.round(self.extract_memory_use_per_rank().max(), 2)
def extract_memory_use_per_rank(self) -> npt.NDArray[np.float64]:
"""Extract gprMax report of the estimated memory use per rank.
Raises:
ValueError: Raised if not all ranks report their estimated
memory usage.
Returns:
usages: Estimated memory usage for each rank in GB.
"""
memory_report = sn.extractall(
r"Memory used \(estimated\): ~(?P<memory_usage>\S+) (?P<units>\S+)",
self.stdout, self.stdout,
["minutes", "seconds"], ["memory_usage", "units"],
float, [float, str],
) )
minutes = simulation_time[0][0]
seconds = simulation_time[0][1] # Check all ranks reported their estimated memory usage
else: if sn.len(memory_report) != self.num_tasks:
hours = 0 raise ValueError(
minutes = 0 f"Memory usage not reported for all ranks. Found {len(memory_report)}, expected {self.num_tasks}"
seconds = sn.extractsingle(
r"=== Simulation completed in (?P<seconds>\S+) seconds? =*",
self.stdout,
"seconds",
float,
) )
return hours * 3600 + minutes * 60 + seconds
usages = np.zeros(self.num_tasks)
# Convert all values into GB
for index, (value, unit) in enumerate(memory_report):
if unit == "MB":
value /= 1024
elif unit == "KB":
value /= 1048576
elif unit != "GB":
raise ValueError(f"Unknown unit of memory '{unit}'")
usages[index] = value
return usages