Fix deviceID selection for GPUs.

这个提交包含在:
Craig Warren
2020-03-10 17:28:58 +00:00
父节点 842fd3bc12
当前提交 65c463e7a9
共有 3 个文件被更改,包括 53 次插入28 次删除

查看文件

@@ -31,7 +31,7 @@ from scipy.constants import epsilon_0 as e0
from scipy.constants import mu_0 as m0
from .exceptions import GeneralError
from .utilities import detect_check_gpus
from .utilities import detect_gpus
from .utilities import get_host_info
from .utilities import get_terminal_width
@@ -71,8 +71,15 @@ class ModelConfig:
# N.B. This will happen if the requested snapshots are too large to fit
# on the memory of the GPU. If True this will slow performance significantly
if sim_config.general['cuda']:
gpu = sim_config.set_model_gpu()
self.cuda = {'gpu': gpu,
# If a list of lists of GPU deviceIDs is found, flatten it
if any(isinstance(element, list) for element in sim_config.args.gpu):
deviceID = [val for sublist in sim_config.args.gpu for val in sublist]
# If no deviceID is given default to using deviceID 0. Else if either
# a single deviceID or list of deviceIDs is given use first one.
deviceID = 0 if not deviceID else deviceID[0]
self.cuda = {'gpu': sim_config.set_model_gpu(deviceID),
'snapsgpu2cpu': False}
# Total memory usage for all grids in the model. Starts with 50MB overhead.
@@ -221,20 +228,12 @@ class SimulationConfig:
# provides best performance.
self.general['precision'] = 'single'
self.cuda = {'gpus': [], # gpus: list of GPU objects
'gpus_str': [], # gpus_str: list of strings describing GPU(s)
'nvcc_opts': None} # nvcc_opts: nvcc compiler options
# Suppress nvcc warnings on Microsoft Windows
if sys.platform == 'win32': self.cuda['nvcc_opts'] = ['-w']
# Flatten a list of lists
if any(isinstance(element, list) for element in self.args.gpu):
self.args.gpu = [val for sublist in self.args.gpu for val in sublist]
# If no deviceID is given default to 0
if not self.args.gpu:
self.args.gpu = [0]
self.cuda['gpus'] = detect_check_gpus(self.args.gpu)
# List of GPU objects of available GPUs
self.cuda['gpus'] = detect_gpus()
# Subgrid parameter may not exist if user enters via CLI
try:
@@ -259,14 +258,25 @@ class SimulationConfig:
self._set_model_start_end()
self._set_single_model()
def set_model_gpu(self, deviceID=0):
"""Specify GPU object for model. Defaults to first GPU deviceID in
list of deviceID given.
def set_model_gpu(self, deviceID):
"""Specify GPU object for model.
Args:
deviceID (int): Requested deviceID of GPU
Returns:
gpu (GPU object): Requested GPU object.
"""
found = False
for gpu in self.cuda['gpus']:
if gpu.deviceID == deviceID:
found = True
return gpu
if not found:
raise GeneralError(f'GPU with device ID {deviceID} does not exist')
def _set_precision(self):
"""Data type (precision) for electromagnetic field output.

查看文件

@@ -18,10 +18,11 @@
import datetime
import logging
import time
import sys
import gprMax.config as config
from ._version import __version__, codename
from .exceptions import GeneralError
from .model_build_run import ModelBuildRun
from .solvers import create_solver
from .solvers import create_G
@@ -144,19 +145,22 @@ class MPIContext(Context):
self.print_host_info()
if config.sim_config.general['cuda']:
self.print_gpu_info()
time.sleep(0.1)
sys.stdout.flush()
# Contruct MPIExecutor
executor = self.MPIExecutor(self._run_model, comm=self.comm)
# Check GPU resources versus number of MPI tasks
if executor.is_master():
if config.sim_config.general['cuda']:
if executor.size - 1 > len(config.sim_config.cuda['gpus']):
raise GeneralError(f'Not enough GPU resources for number of MPI tasks requested. Number of MPI tasks should be equal to number of GPUs + 1.')
# Create job list
jobs = []
for i in self.model_range:
jobs.append({'i': i})
# Send the workers to their work loop
executor.start()
if executor.is_master():

查看文件

@@ -503,15 +503,11 @@ class GPU:
self.totalmem = drv.Device(self.deviceID).total_memory()
def detect_check_gpus(deviceIDs):
def detect_gpus():
"""Get information about Nvidia GPU(s).
Args:
deviceIDs (list): List of integers of device IDs.
Returns:
gpus (list): Detected GPU(s) object(s).
gpus_str (list): Printable strings of information on GPU(s).
"""
try:
@@ -529,6 +525,23 @@ def detect_check_gpus(deviceIDs):
else:
deviceIDsavail = range(drv.Device.count())
# Gather information about selected/detected GPUs
gpus = []
for ID in deviceIDsavail:
gpu = GPU(deviceID=ID)
gpu.get_gpu_info(drv)
gpus.append(gpu)
return gpus
def check_gpus(gpus):
"""Check if requested Nvidia GPU(s) deviceID(s) exist.
Args:
gpus (list): List of GPU object(s).
"""
# Check if requested device ID(s) exist
for ID in deviceIDs:
if ID not in deviceIDsavail:
@@ -541,8 +554,6 @@ def detect_check_gpus(deviceIDs):
gpu.get_gpu_info(drv)
gpus.append(gpu)
return gpus
def timer():
"""Function to return time in fractional seconds."""