diff --git a/gprMax/gprMax.py b/gprMax/gprMax.py index 1c889f44..ee53b251 100644 --- a/gprMax/gprMax.py +++ b/gprMax/gprMax.py @@ -251,61 +251,65 @@ def run_benchmark_sim(args, inputfile, usernamespace): hyperthreading = ', {} cores with Hyper-Threading'.format(hostinfo['logicalcores']) if hostinfo['hyperthreading'] else '' machineIDlong = '{}; {} x {} ({} cores{}); {} RAM; {}'.format(hostinfo['machineID'], hostinfo['sockets'], hostinfo['cpuID'], hostinfo['physicalcores'], hyperthreading, human_size(hostinfo['ram'], a_kilobyte_is_1024_bytes=True), hostinfo['osversion']) - # Number of CPU threads to benchmark - start from single thread and double threads until maximum number of physical cores - threads = 1 - maxthreads = hostinfo['physicalcores'] - maxthreadspersocket = hostinfo['physicalcores'] / hostinfo['sockets'] + # Initialise arrays to hold CPU thread info and times, and GPU info and times cputhreads = np.array([], dtype=np.int32) - while threads < maxthreadspersocket: - cputhreads = np.append(cputhreads, int(threads)) - threads *= 2 - # Check for system with only single thread - if cputhreads.size == 0: - cputhreads = np.append(cputhreads, threads) - # Add maxthreadspersocket and maxthreads if necessary - if cputhreads[-1] != maxthreadspersocket: - cputhreads = np.append(cputhreads, int(maxthreadspersocket)) - if cputhreads[-1] != maxthreads: - cputhreads = np.append(cputhreads, int(maxthreads)) - cputhreads = cputhreads[::-1] - cputimes = np.zeros(len(cputhreads)) - - numbermodelruns = len(cputhreads) - - # Both CPU and GPU benchmarking - gpus = None + cputimes = np.array([]) gpuIDs = [] gputimes = np.array([]) - if args.gpu is not None: + + # CPU only benchmarking + if args.gpu is None: + # Number of CPU threads to benchmark - start from single thread and double threads until maximum number of physical cores + threads = 1 + maxthreads = hostinfo['physicalcores'] + maxthreadspersocket = hostinfo['physicalcores'] / hostinfo['sockets'] + while threads < maxthreadspersocket: + cputhreads = np.append(cputhreads, int(threads)) + threads *= 2 + # Check for system with only single thread + if cputhreads.size == 0: + cputhreads = np.append(cputhreads, threads) + # Add maxthreadspersocket and maxthreads if necessary + if cputhreads[-1] != maxthreadspersocket: + cputhreads = np.append(cputhreads, int(maxthreadspersocket)) + if cputhreads[-1] != maxthreads: + cputhreads = np.append(cputhreads, int(maxthreads)) + cputhreads = cputhreads[::-1] + cputimes = np.zeros(len(cputhreads)) + + numbermodelruns = len(cputhreads) + + # GPU only benchmarking + else: # Set size of array to store GPU runtimes and number of runs of model required if isinstance(args.gpu, list): for gpu in args.gpu: gpuIDs.append(gpu.name) gputimes = np.zeros(len(args.gpu)) - numbermodelruns += len(args.gpu) + numbermodelruns = len(args.gpu) else: gpuIDs.append(args.gpu.name) gputimes = np.zeros(1) - numbermodelruns += 1 - # Store GPU information in a temp variable and set args.gpu to None to do CPU benchmarking first + numbermodelruns = 1 + # Store GPU information in a temp variable gpus = args.gpu - args.gpu = None usernamespace['number_model_runs'] = numbermodelruns modelend = numbermodelruns + 1 for currentmodelrun in range(1, modelend): - # Set args.gpu if doing GPU benchmark - if currentmodelrun > len(cputhreads): - if isinstance(gpus, list): - args.gpu = gpus[(currentmodelrun - 1) - len(cputhreads)] - else: - args.gpu = gpus - # del os.environ['OMP_NUM_THREADS'] - gputimes[(currentmodelrun - 1) - len(cputhreads)] = run_model(args, currentmodelrun, modelend - 1, numbermodelruns, inputfile, usernamespace) - else: + # Run CPU benchmark + if args.gpu is None: os.environ['OMP_NUM_THREADS'] = str(cputhreads[currentmodelrun - 1]) cputimes[currentmodelrun - 1] = run_model(args, currentmodelrun, modelend - 1, numbermodelruns, inputfile, usernamespace) + # Run GPU benchmark + else: + if isinstance(gpus, list): + args.gpu = gpus[(currentmodelrun - 1)] + else: + args.gpu = gpus + os.environ['OMP_NUM_THREADS'] = str(hostinfo['physicalcores']) + gputimes[(currentmodelrun - 1)] = run_model(args, currentmodelrun, modelend - 1, numbermodelruns, inputfile, usernamespace) # Get model size (in cells) and number of iterations if currentmodelrun == 1: @@ -318,7 +322,7 @@ def run_benchmark_sim(args, inputfile, usernamespace): numcells = f.attrs['nx, ny, nz'] # Save number of threads and benchmarking times to NumPy archive - np.savez(os.path.splitext(inputfile.name)[0], machineID=machineIDlong, gpuIDs=[], cputhreads=cputhreads, cputimes=cputimes, gputimes=[], iterations=iterations, numcells=numcells, version=__version__) + np.savez(os.path.splitext(inputfile.name)[0], machineID=machineIDlong, gpuIDs=gpuIDs, cputhreads=cputhreads, cputimes=cputimes, gputimes=gputimes, iterations=iterations, numcells=numcells, version=__version__) simcompletestr = '\n=== Simulation completed' print('{} {}\n'.format(simcompletestr, '=' * (get_terminal_width() - 1 - len(simcompletestr)))) diff --git a/tests/benchmarking/plot_benchmark.py b/tests/benchmarking/plot_benchmark.py index a5dc6a62..90e6a0b4 100644 --- a/tests/benchmarking/plot_benchmark.py +++ b/tests/benchmarking/plot_benchmark.py @@ -27,7 +27,8 @@ import matplotlib.gridspec as gridspec import numpy as np from gprMax._version import __version__ -from gprMax.utilities import get_host_info, human_size +from gprMax.utilities import get_host_info +from gprMax.utilities import human_size """Plots execution times and speedup factors from benchmarking models run with different numbers of CPU (OpenMP) threads. Can also benchmark GPU(s) if required. Results are read from a NumPy archive.""" @@ -54,17 +55,18 @@ except KeyError: machineIDlong = '{}; {} x {} ({} cores{}); {} RAM; {}'.format(hostinfo['machineID'], hostinfo['sockets'], hostinfo['cpuID'], hostinfo['physicalcores'], hyperthreading, human_size(hostinfo['ram'], a_kilobyte_is_1024_bytes=True), hostinfo['osversion']) print('Host: {}'.format(machineIDlong)) -# Base result - threads and times info from Numpy archive +# Base result - general info print('Model: {}'.format(args.baseresult)) -for i in range(len(baseresult['cputhreads'])): - print('{} CPU (OpenMP) thread(s): {:g} s'.format(baseresult['cputhreads'][i], baseresult['cputimes'][i])) +cells = np.array([baseresult['numcells'][0]]) # Length of cubic model side for cells per second metric baseplotlabel = os.path.splitext(os.path.split(args.baseresult)[1])[0] + '.in' -# Base result - arrays for length of cubic model side and cells per second metric -cells = np.array([baseresult['numcells'][0]]) -cpucellspersec = np.array([(baseresult['numcells'][0] * baseresult['numcells'][1] * baseresult['numcells'][2] * baseresult['iterations']) / baseresult['cputimes'][0]]) +# Base result - CPU threads and times info from Numpy archive +if baseresult['cputhreads'].size != 0: + for i in range(len(baseresult['cputhreads'])): + print('{} CPU (OpenMP) thread(s): {:g} s'.format(baseresult['cputhreads'][i], baseresult['cputimes'][i])) + cpucellspersec = np.array([(baseresult['numcells'][0] * baseresult['numcells'][1] * baseresult['numcells'][2] * baseresult['iterations']) / baseresult['cputimes'][0]]) -# Base result for GPU if required - time info +# Base result - GPU time info gpuIDs = baseresult['gpuIDs'].tolist() if gpuIDs: gpucellspersec = np.zeros((len(gpuIDs), 1)) @@ -79,15 +81,16 @@ if args.otherresults is not None: for i, result in enumerate(args.otherresults): otherresults.append(dict(np.load(result))) print('\nModel: {}'.format(result)) - for thread in range(len(otherresults[i]['cputhreads'])): - print('{} CPU (OpenMP) thread(s): {:g} s'.format(otherresults[i]['cputhreads'][thread], otherresults[i]['cputimes'][thread])) + cells = np.append(cells, otherresults[i]['numcells'][0]) # Length of cubic model side for cells per second metric otherplotlabels.append(os.path.splitext(os.path.split(result)[1])[0] + '.in') - # Arrays for length of cubic model side and cells per second metric - cells = np.append(cells, otherresults[i]['numcells'][0]) - cpucellspersec = np.append(cpucellspersec, (otherresults[i]['numcells'][0] * otherresults[i]['numcells'][1] * otherresults[i]['numcells'][2] * otherresults[i]['iterations']) / otherresults[i]['cputimes'][0]) + # CPU + if otherresults[i]['cputhreads'].size != 0: + for thread in range(len(otherresults[i]['cputhreads'])): + print('{} CPU (OpenMP) thread(s): {:g} s'.format(otherresults[i]['cputhreads'][thread], otherresults[i]['cputimes'][thread])) + cpucellspersec = np.append(cpucellspersec, (otherresults[i]['numcells'][0] * otherresults[i]['numcells'][1] * otherresults[i]['numcells'][2] * otherresults[i]['iterations']) / otherresults[i]['cputimes'][0]) - # Other results GPU + # GPU othergpuIDs = otherresults[i]['gpuIDs'].tolist() if othergpuIDs: # Array for cells per second metric @@ -113,58 +116,82 @@ markers = ['o', 'd', '^', 's', '*'] fig, ax = plt.subplots(num=machineID, figsize=(30, 10), facecolor='w', edgecolor='w') fig.suptitle(machineIDlong + '\ngprMax v' + version) gs = gridspec.GridSpec(1, 3, hspace=0.5) +plotcount = 0 ########################################### # Subplot of CPU (OpenMP) threads vs time # ########################################### -ax = plt.subplot(gs[0, 0]) -ax.plot(baseresult['cputhreads'], baseresult['cputimes'], color=next(colors), marker=markers[0], markeredgecolor='none', ms=8, lw=2, label=baseplotlabel) +if baseresult['cputhreads'].size != 0: + ax = plt.subplot(gs[0, plotcount]) + ax.plot(baseresult['cputhreads'], baseresult['cputimes'], color=next(colors), marker=markers[0], markeredgecolor='none', ms=8, lw=2, label=baseplotlabel) -if args.otherresults is not None: - for i, result in enumerate(otherresults): - ax.plot(result['cputhreads'], result['cputimes'], color=next(colors), marker=markers[0], markeredgecolor='none', ms=8, lw=2, ls=next(lines), label=otherplotlabels[i]) + if args.otherresults is not None: + for i, result in enumerate(otherresults): + ax.plot(result['cputhreads'], result['cputimes'], color=next(colors), marker=markers[0], markeredgecolor='none', ms=8, lw=2, ls=next(lines), label=otherplotlabels[i]) -ax.set_xlabel('Number of CPU (OpenMP) threads') -ax.set_ylabel('Time [s]') -ax.grid() -legend = ax.legend(loc=1) -frame = legend.get_frame() -frame.set_edgecolor('white') -ax.set_xlim([0, baseresult['cputhreads'][0] * 1.1]) -ax.set_xticks(np.append(baseresult['cputhreads'], 0)) -ax.set_ylim(0, top=ax.get_ylim()[1] * 1.1) + ax.set_xlabel('Number of CPU (OpenMP) threads') + ax.set_ylabel('Time [s]') + ax.grid() + legend = ax.legend(loc=1) + frame = legend.get_frame() + frame.set_edgecolor('white') + ax.set_xlim([0, baseresult['cputhreads'][0] * 1.1]) + ax.set_xticks(np.append(baseresult['cputhreads'], 0)) + ax.set_ylim(0, top=ax.get_ylim()[1] * 1.1) + plotcount += 1 ###################################################### # Subplot of CPU (OpenMP) threads vs speed-up factor # ###################################################### colors = itertools.cycle(colorIDs) # Reset color iterator -ax = plt.subplot(gs[0, 1]) -ax.plot(baseresult['cputhreads'], baseresult['cputimes'][-1] / baseresult['cputimes'], color=next(colors), marker=markers[0], markeredgecolor='none', ms=8, lw=2, label=baseplotlabel) +if baseresult['cputhreads'].size != 0: + ax = plt.subplot(gs[0, plotcount]) + ax.plot(baseresult['cputhreads'], baseresult['cputimes'][-1] / baseresult['cputimes'], color=next(colors), marker=markers[0], markeredgecolor='none', ms=8, lw=2, label=baseplotlabel) -if args.otherresults is not None: - for i, result in enumerate(otherresults): - ax.plot(result['cputhreads'], result['cputimes'][-1] / result['cputimes'], color=next(colors), marker=markers[0], markeredgecolor='none', ms=8, lw=2, ls=next(lines), label=otherplotlabels[i]) + if args.otherresults is not None: + for i, result in enumerate(otherresults): + ax.plot(result['cputhreads'], result['cputimes'][-1] / result['cputimes'], color=next(colors), marker=markers[0], markeredgecolor='none', ms=8, lw=2, ls=next(lines), label=otherplotlabels[i]) -ax.set_xlabel('Number of CPU (OpenMP) threads') -ax.set_ylabel('Speed-up factor') -ax.grid() -legend = ax.legend(loc=2) -frame = legend.get_frame() -frame.set_edgecolor('white') -ax.set_xlim([0, baseresult['cputhreads'][0] * 1.1]) -ax.set_xticks(np.append(baseresult['cputhreads'], 0)) -ax.set_ylim(bottom=1, top=ax.get_ylim()[1] * 1.1) + ax.set_xlabel('Number of CPU (OpenMP) threads') + ax.set_ylabel('Speed-up factor') + ax.grid() + legend = ax.legend(loc=2) + frame = legend.get_frame() + frame.set_edgecolor('white') + ax.set_xlim([0, baseresult['cputhreads'][0] * 1.1]) + ax.set_xticks(np.append(baseresult['cputhreads'], 0)) + ax.set_ylim(bottom=1, top=ax.get_ylim()[1] * 1.1) + plotcount += 1 ########################################### # Subplot of simulation size vs cells/sec # ########################################### + +def autolabel(rects): + """Attach a text label above each bar on a matplotlib bar chart displaying its height. + + Args: + rects: Handle to bar chart + """ + for rect in rects: + height = rect.get_height() + ax.text(rect.get_x() + rect.get_width()/2, height, + '%d' % int(height), + ha='center', va='bottom', fontsize=10, rotation=90) + colors = itertools.cycle(colorIDs) # Reset color iterator -ax = plt.subplot(gs[0, 2]) -ax.plot(cells, cpucellspersec / 1e6, color=next(colors), marker=markers[0], markeredgecolor='none', ms=8, lw=2, label=cpuID) +ax = plt.subplot(gs[0, plotcount]) +barwidth = 8 # the width of the bars + +if baseresult['cputhreads'].size != 0: + cpu = ax.bar(cells - (1/2) * barwidth, cpucellspersec / 1e6, barwidth, color=next(colors), edgecolor='none', label=cpuID) + autolabel(cpu) if gpuIDs: + positions = np.arange(-gpucellspersec.shape[0] / 2, gpucellspersec.shape[0] / 2, 1) for i in range(gpucellspersec.shape[0]): - ax.plot(cells, gpucellspersec[i,:] / 1e6, color=next(colors), marker=markers[0], markeredgecolor='none', ms=8, lw=2, label='NVIDIA ' + gpuIDs[i]) + gpu = ax.bar(cells + positions[i] * barwidth, gpucellspersec[i,:] / 1e6, barwidth, color=next(colors), edgecolor='none', label='NVIDIA ' + gpuIDs[i]) + autolabel(gpu) ax.set_xlabel('Side length of cubic domain [cells]') ax.set_ylabel('Performance [Mcells/s]') @@ -172,6 +199,8 @@ ax.grid() legend = ax.legend(loc=2) frame = legend.get_frame() frame.set_edgecolor('white') +ax.set_xticks(cells) +ax.set_xticklabels(cells) ax.set_xlim([0, cells[-1] * 1.1]) ax.set_ylim(bottom=0, top=ax.get_ylim()[1] * 1.1)