From a31cb29bc2c82cc23e1eb89daff5fa02a1e7fb08 Mon Sep 17 00:00:00 2001 From: nmannall Date: Wed, 24 Jan 2024 12:02:50 +0000 Subject: [PATCH] Add script to save benchmarking data --- requirements.txt | 1 + tests/utilities/process_perflog.py | 53 ++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) create mode 100644 tests/utilities/process_perflog.py diff --git a/requirements.txt b/requirements.txt index 026af7a2..c1284096 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,6 +11,7 @@ matplotlib # mpi4py numpy numpy-stl +pandas pip pre-commit psutil diff --git a/tests/utilities/process_perflog.py b/tests/utilities/process_perflog.py new file mode 100644 index 00000000..ccd9cdab --- /dev/null +++ b/tests/utilities/process_perflog.py @@ -0,0 +1,53 @@ +import argparse +import re +from datetime import datetime +from pathlib import Path + +import matplotlib.pyplot as plt +import pandas as pd + + +def get_parameter(row, parameter): + value = re.search(f"\s%{parameter}=(?P\S+)\s", row["info"])["value"] + return value + + +def get_parameter_names(item): + return re.findall(f"\s%(?P\S+)=\S+", item) + + +columns_to_keep = ["num_tasks", "num_cpus_per_task", "num_tasks_per_node", "run_time_value", "simulation_time_value"] + +if __name__ == "__main__": + # Parse command line arguments + parser = argparse.ArgumentParser( + usage="cd gprMax/tests; python -m utilities.process_perflog inputfile [-o OUTPUT]", + description="Extract perfvars from reframe perflog file.", + ) + parser.add_argument("inputfile", help="name of input file including path") + parser.add_argument("--output", "-o", help="name of output file including path", required=False) + + args = parser.parse_args() + + perflog = pd.read_csv(args.inputfile, index_col=False) + + # Extract recorded parameters and create a new column for them in the dataframe + parameters = perflog["info"].agg(get_parameter_names).explode().unique() + for parameter in parameters: + perflog[parameter] = perflog.apply(get_parameter, args=[parameter], axis=1) + + # Organise dataframe + columns_to_keep += parameters.tolist() + columns_to_keep.sort() + perflog = perflog[columns_to_keep].sort_values(columns_to_keep) + perflog["simulation_time_value"] = perflog["simulation_time_value"].apply(round, args=[2]) + perflog = perflog.rename(columns={"simulation_time_value": "simulation_time", "run_time_value": "run_time"}) + + # Save output to file + if args.output: + outputfile = args.output + else: + stem = f"{Path(args.inputfile).stem}_{datetime.today().strftime('%Y-%m-%d_%H-%M-%S')}" + outputfile = Path("benchmarks", stem).with_suffix(".csv") + perflog.to_csv(outputfile, index=False) + print(f"Saved benchmark: '{outputfile}'")