你已经派生过 gprMax
镜像自地址
https://gitee.com/sunhf/gprMax.git
已同步 2025-08-06 12:36:51 +08:00
Update to benchmarking mode to enable benchmarking with either CPU or GPU solver.
Added benchmarking metric of 'cells per second'.
这个提交包含在:
@@ -251,11 +251,18 @@ def run_benchmark_sim(args, inputfile, usernamespace):
|
||||
hyperthreading = ', {} cores with Hyper-Threading'.format(hostinfo['logicalcores']) if hostinfo['hyperthreading'] else ''
|
||||
machineIDlong = '{}; {} x {} ({} cores{}); {} RAM; {}'.format(hostinfo['machineID'], hostinfo['sockets'], hostinfo['cpuID'], hostinfo['physicalcores'], hyperthreading, human_size(hostinfo['ram'], a_kilobyte_is_1024_bytes=True), hostinfo['osversion'])
|
||||
|
||||
# Initialise arrays to hold CPU thread info and times, and GPU info and times
|
||||
cputhreads = np.array([], dtype=np.int32)
|
||||
cputimes = np.array([])
|
||||
gpuIDs = []
|
||||
gputimes = np.array([])
|
||||
|
||||
# CPU only benchmarking
|
||||
if args.gpu is None:
|
||||
# Number of CPU threads to benchmark - start from single thread and double threads until maximum number of physical cores
|
||||
threads = 1
|
||||
maxthreads = hostinfo['physicalcores']
|
||||
maxthreadspersocket = hostinfo['physicalcores'] / hostinfo['sockets']
|
||||
cputhreads = np.array([], dtype=np.int32)
|
||||
while threads < maxthreadspersocket:
|
||||
cputhreads = np.append(cputhreads, int(threads))
|
||||
threads *= 2
|
||||
@@ -272,40 +279,37 @@ def run_benchmark_sim(args, inputfile, usernamespace):
|
||||
|
||||
numbermodelruns = len(cputhreads)
|
||||
|
||||
# Both CPU and GPU benchmarking
|
||||
gpus = None
|
||||
gpuIDs = []
|
||||
gputimes = np.array([])
|
||||
if args.gpu is not None:
|
||||
# GPU only benchmarking
|
||||
else:
|
||||
# Set size of array to store GPU runtimes and number of runs of model required
|
||||
if isinstance(args.gpu, list):
|
||||
for gpu in args.gpu:
|
||||
gpuIDs.append(gpu.name)
|
||||
gputimes = np.zeros(len(args.gpu))
|
||||
numbermodelruns += len(args.gpu)
|
||||
numbermodelruns = len(args.gpu)
|
||||
else:
|
||||
gpuIDs.append(args.gpu.name)
|
||||
gputimes = np.zeros(1)
|
||||
numbermodelruns += 1
|
||||
# Store GPU information in a temp variable and set args.gpu to None to do CPU benchmarking first
|
||||
numbermodelruns = 1
|
||||
# Store GPU information in a temp variable
|
||||
gpus = args.gpu
|
||||
args.gpu = None
|
||||
|
||||
usernamespace['number_model_runs'] = numbermodelruns
|
||||
modelend = numbermodelruns + 1
|
||||
|
||||
for currentmodelrun in range(1, modelend):
|
||||
# Set args.gpu if doing GPU benchmark
|
||||
if currentmodelrun > len(cputhreads):
|
||||
if isinstance(gpus, list):
|
||||
args.gpu = gpus[(currentmodelrun - 1) - len(cputhreads)]
|
||||
else:
|
||||
args.gpu = gpus
|
||||
# del os.environ['OMP_NUM_THREADS']
|
||||
gputimes[(currentmodelrun - 1) - len(cputhreads)] = run_model(args, currentmodelrun, modelend - 1, numbermodelruns, inputfile, usernamespace)
|
||||
else:
|
||||
# Run CPU benchmark
|
||||
if args.gpu is None:
|
||||
os.environ['OMP_NUM_THREADS'] = str(cputhreads[currentmodelrun - 1])
|
||||
cputimes[currentmodelrun - 1] = run_model(args, currentmodelrun, modelend - 1, numbermodelruns, inputfile, usernamespace)
|
||||
# Run GPU benchmark
|
||||
else:
|
||||
if isinstance(gpus, list):
|
||||
args.gpu = gpus[(currentmodelrun - 1)]
|
||||
else:
|
||||
args.gpu = gpus
|
||||
os.environ['OMP_NUM_THREADS'] = str(hostinfo['physicalcores'])
|
||||
gputimes[(currentmodelrun - 1)] = run_model(args, currentmodelrun, modelend - 1, numbermodelruns, inputfile, usernamespace)
|
||||
|
||||
# Get model size (in cells) and number of iterations
|
||||
if currentmodelrun == 1:
|
||||
@@ -318,7 +322,7 @@ def run_benchmark_sim(args, inputfile, usernamespace):
|
||||
numcells = f.attrs['nx, ny, nz']
|
||||
|
||||
# Save number of threads and benchmarking times to NumPy archive
|
||||
np.savez(os.path.splitext(inputfile.name)[0], machineID=machineIDlong, gpuIDs=[], cputhreads=cputhreads, cputimes=cputimes, gputimes=[], iterations=iterations, numcells=numcells, version=__version__)
|
||||
np.savez(os.path.splitext(inputfile.name)[0], machineID=machineIDlong, gpuIDs=gpuIDs, cputhreads=cputhreads, cputimes=cputimes, gputimes=gputimes, iterations=iterations, numcells=numcells, version=__version__)
|
||||
|
||||
simcompletestr = '\n=== Simulation completed'
|
||||
print('{} {}\n'.format(simcompletestr, '=' * (get_terminal_width() - 1 - len(simcompletestr))))
|
||||
|
@@ -27,7 +27,8 @@ import matplotlib.gridspec as gridspec
|
||||
import numpy as np
|
||||
|
||||
from gprMax._version import __version__
|
||||
from gprMax.utilities import get_host_info, human_size
|
||||
from gprMax.utilities import get_host_info
|
||||
from gprMax.utilities import human_size
|
||||
|
||||
|
||||
"""Plots execution times and speedup factors from benchmarking models run with different numbers of CPU (OpenMP) threads. Can also benchmark GPU(s) if required. Results are read from a NumPy archive."""
|
||||
@@ -54,17 +55,18 @@ except KeyError:
|
||||
machineIDlong = '{}; {} x {} ({} cores{}); {} RAM; {}'.format(hostinfo['machineID'], hostinfo['sockets'], hostinfo['cpuID'], hostinfo['physicalcores'], hyperthreading, human_size(hostinfo['ram'], a_kilobyte_is_1024_bytes=True), hostinfo['osversion'])
|
||||
print('Host: {}'.format(machineIDlong))
|
||||
|
||||
# Base result - threads and times info from Numpy archive
|
||||
# Base result - general info
|
||||
print('Model: {}'.format(args.baseresult))
|
||||
for i in range(len(baseresult['cputhreads'])):
|
||||
print('{} CPU (OpenMP) thread(s): {:g} s'.format(baseresult['cputhreads'][i], baseresult['cputimes'][i]))
|
||||
cells = np.array([baseresult['numcells'][0]]) # Length of cubic model side for cells per second metric
|
||||
baseplotlabel = os.path.splitext(os.path.split(args.baseresult)[1])[0] + '.in'
|
||||
|
||||
# Base result - arrays for length of cubic model side and cells per second metric
|
||||
cells = np.array([baseresult['numcells'][0]])
|
||||
# Base result - CPU threads and times info from Numpy archive
|
||||
if baseresult['cputhreads'].size != 0:
|
||||
for i in range(len(baseresult['cputhreads'])):
|
||||
print('{} CPU (OpenMP) thread(s): {:g} s'.format(baseresult['cputhreads'][i], baseresult['cputimes'][i]))
|
||||
cpucellspersec = np.array([(baseresult['numcells'][0] * baseresult['numcells'][1] * baseresult['numcells'][2] * baseresult['iterations']) / baseresult['cputimes'][0]])
|
||||
|
||||
# Base result for GPU if required - time info
|
||||
# Base result - GPU time info
|
||||
gpuIDs = baseresult['gpuIDs'].tolist()
|
||||
if gpuIDs:
|
||||
gpucellspersec = np.zeros((len(gpuIDs), 1))
|
||||
@@ -79,15 +81,16 @@ if args.otherresults is not None:
|
||||
for i, result in enumerate(args.otherresults):
|
||||
otherresults.append(dict(np.load(result)))
|
||||
print('\nModel: {}'.format(result))
|
||||
for thread in range(len(otherresults[i]['cputhreads'])):
|
||||
print('{} CPU (OpenMP) thread(s): {:g} s'.format(otherresults[i]['cputhreads'][thread], otherresults[i]['cputimes'][thread]))
|
||||
cells = np.append(cells, otherresults[i]['numcells'][0]) # Length of cubic model side for cells per second metric
|
||||
otherplotlabels.append(os.path.splitext(os.path.split(result)[1])[0] + '.in')
|
||||
|
||||
# Arrays for length of cubic model side and cells per second metric
|
||||
cells = np.append(cells, otherresults[i]['numcells'][0])
|
||||
# CPU
|
||||
if otherresults[i]['cputhreads'].size != 0:
|
||||
for thread in range(len(otherresults[i]['cputhreads'])):
|
||||
print('{} CPU (OpenMP) thread(s): {:g} s'.format(otherresults[i]['cputhreads'][thread], otherresults[i]['cputimes'][thread]))
|
||||
cpucellspersec = np.append(cpucellspersec, (otherresults[i]['numcells'][0] * otherresults[i]['numcells'][1] * otherresults[i]['numcells'][2] * otherresults[i]['iterations']) / otherresults[i]['cputimes'][0])
|
||||
|
||||
# Other results GPU
|
||||
# GPU
|
||||
othergpuIDs = otherresults[i]['gpuIDs'].tolist()
|
||||
if othergpuIDs:
|
||||
# Array for cells per second metric
|
||||
@@ -113,11 +116,13 @@ markers = ['o', 'd', '^', 's', '*']
|
||||
fig, ax = plt.subplots(num=machineID, figsize=(30, 10), facecolor='w', edgecolor='w')
|
||||
fig.suptitle(machineIDlong + '\ngprMax v' + version)
|
||||
gs = gridspec.GridSpec(1, 3, hspace=0.5)
|
||||
plotcount = 0
|
||||
|
||||
###########################################
|
||||
# Subplot of CPU (OpenMP) threads vs time #
|
||||
###########################################
|
||||
ax = plt.subplot(gs[0, 0])
|
||||
if baseresult['cputhreads'].size != 0:
|
||||
ax = plt.subplot(gs[0, plotcount])
|
||||
ax.plot(baseresult['cputhreads'], baseresult['cputimes'], color=next(colors), marker=markers[0], markeredgecolor='none', ms=8, lw=2, label=baseplotlabel)
|
||||
|
||||
if args.otherresults is not None:
|
||||
@@ -133,12 +138,14 @@ frame.set_edgecolor('white')
|
||||
ax.set_xlim([0, baseresult['cputhreads'][0] * 1.1])
|
||||
ax.set_xticks(np.append(baseresult['cputhreads'], 0))
|
||||
ax.set_ylim(0, top=ax.get_ylim()[1] * 1.1)
|
||||
plotcount += 1
|
||||
|
||||
######################################################
|
||||
# Subplot of CPU (OpenMP) threads vs speed-up factor #
|
||||
######################################################
|
||||
colors = itertools.cycle(colorIDs) # Reset color iterator
|
||||
ax = plt.subplot(gs[0, 1])
|
||||
if baseresult['cputhreads'].size != 0:
|
||||
ax = plt.subplot(gs[0, plotcount])
|
||||
ax.plot(baseresult['cputhreads'], baseresult['cputimes'][-1] / baseresult['cputimes'], color=next(colors), marker=markers[0], markeredgecolor='none', ms=8, lw=2, label=baseplotlabel)
|
||||
|
||||
if args.otherresults is not None:
|
||||
@@ -154,17 +161,37 @@ frame.set_edgecolor('white')
|
||||
ax.set_xlim([0, baseresult['cputhreads'][0] * 1.1])
|
||||
ax.set_xticks(np.append(baseresult['cputhreads'], 0))
|
||||
ax.set_ylim(bottom=1, top=ax.get_ylim()[1] * 1.1)
|
||||
plotcount += 1
|
||||
|
||||
###########################################
|
||||
# Subplot of simulation size vs cells/sec #
|
||||
###########################################
|
||||
|
||||
def autolabel(rects):
|
||||
"""Attach a text label above each bar on a matplotlib bar chart displaying its height.
|
||||
|
||||
Args:
|
||||
rects: Handle to bar chart
|
||||
"""
|
||||
for rect in rects:
|
||||
height = rect.get_height()
|
||||
ax.text(rect.get_x() + rect.get_width()/2, height,
|
||||
'%d' % int(height),
|
||||
ha='center', va='bottom', fontsize=10, rotation=90)
|
||||
|
||||
colors = itertools.cycle(colorIDs) # Reset color iterator
|
||||
ax = plt.subplot(gs[0, 2])
|
||||
ax.plot(cells, cpucellspersec / 1e6, color=next(colors), marker=markers[0], markeredgecolor='none', ms=8, lw=2, label=cpuID)
|
||||
ax = plt.subplot(gs[0, plotcount])
|
||||
barwidth = 8 # the width of the bars
|
||||
|
||||
if baseresult['cputhreads'].size != 0:
|
||||
cpu = ax.bar(cells - (1/2) * barwidth, cpucellspersec / 1e6, barwidth, color=next(colors), edgecolor='none', label=cpuID)
|
||||
autolabel(cpu)
|
||||
|
||||
if gpuIDs:
|
||||
positions = np.arange(-gpucellspersec.shape[0] / 2, gpucellspersec.shape[0] / 2, 1)
|
||||
for i in range(gpucellspersec.shape[0]):
|
||||
ax.plot(cells, gpucellspersec[i,:] / 1e6, color=next(colors), marker=markers[0], markeredgecolor='none', ms=8, lw=2, label='NVIDIA ' + gpuIDs[i])
|
||||
gpu = ax.bar(cells + positions[i] * barwidth, gpucellspersec[i,:] / 1e6, barwidth, color=next(colors), edgecolor='none', label='NVIDIA ' + gpuIDs[i])
|
||||
autolabel(gpu)
|
||||
|
||||
ax.set_xlabel('Side length of cubic domain [cells]')
|
||||
ax.set_ylabel('Performance [Mcells/s]')
|
||||
@@ -172,6 +199,8 @@ ax.grid()
|
||||
legend = ax.legend(loc=2)
|
||||
frame = legend.get_frame()
|
||||
frame.set_edgecolor('white')
|
||||
ax.set_xticks(cells)
|
||||
ax.set_xticklabels(cells)
|
||||
ax.set_xlim([0, cells[-1] * 1.1])
|
||||
ax.set_ylim(bottom=0, top=ax.get_ylim()[1] * 1.1)
|
||||
|
||||
|
在新工单中引用
屏蔽一个用户