Fix deviceID selection for GPUs.

这个提交包含在:
Craig Warren
2020-03-10 17:28:58 +00:00
父节点 842fd3bc12
当前提交 65c463e7a9
共有 3 个文件被更改,包括 53 次插入28 次删除

查看文件

@@ -31,7 +31,7 @@ from scipy.constants import epsilon_0 as e0
from scipy.constants import mu_0 as m0 from scipy.constants import mu_0 as m0
from .exceptions import GeneralError from .exceptions import GeneralError
from .utilities import detect_check_gpus from .utilities import detect_gpus
from .utilities import get_host_info from .utilities import get_host_info
from .utilities import get_terminal_width from .utilities import get_terminal_width
@@ -71,8 +71,15 @@ class ModelConfig:
# N.B. This will happen if the requested snapshots are too large to fit # N.B. This will happen if the requested snapshots are too large to fit
# on the memory of the GPU. If True this will slow performance significantly # on the memory of the GPU. If True this will slow performance significantly
if sim_config.general['cuda']: if sim_config.general['cuda']:
gpu = sim_config.set_model_gpu() # If a list of lists of GPU deviceIDs is found, flatten it
self.cuda = {'gpu': gpu, if any(isinstance(element, list) for element in sim_config.args.gpu):
deviceID = [val for sublist in sim_config.args.gpu for val in sublist]
# If no deviceID is given default to using deviceID 0. Else if either
# a single deviceID or list of deviceIDs is given use first one.
deviceID = 0 if not deviceID else deviceID[0]
self.cuda = {'gpu': sim_config.set_model_gpu(deviceID),
'snapsgpu2cpu': False} 'snapsgpu2cpu': False}
# Total memory usage for all grids in the model. Starts with 50MB overhead. # Total memory usage for all grids in the model. Starts with 50MB overhead.
@@ -221,20 +228,12 @@ class SimulationConfig:
# provides best performance. # provides best performance.
self.general['precision'] = 'single' self.general['precision'] = 'single'
self.cuda = {'gpus': [], # gpus: list of GPU objects self.cuda = {'gpus': [], # gpus: list of GPU objects
'gpus_str': [], # gpus_str: list of strings describing GPU(s)
'nvcc_opts': None} # nvcc_opts: nvcc compiler options 'nvcc_opts': None} # nvcc_opts: nvcc compiler options
# Suppress nvcc warnings on Microsoft Windows # Suppress nvcc warnings on Microsoft Windows
if sys.platform == 'win32': self.cuda['nvcc_opts'] = ['-w'] if sys.platform == 'win32': self.cuda['nvcc_opts'] = ['-w']
# Flatten a list of lists # List of GPU objects of available GPUs
if any(isinstance(element, list) for element in self.args.gpu): self.cuda['gpus'] = detect_gpus()
self.args.gpu = [val for sublist in self.args.gpu for val in sublist]
# If no deviceID is given default to 0
if not self.args.gpu:
self.args.gpu = [0]
self.cuda['gpus'] = detect_check_gpus(self.args.gpu)
# Subgrid parameter may not exist if user enters via CLI # Subgrid parameter may not exist if user enters via CLI
try: try:
@@ -259,14 +258,25 @@ class SimulationConfig:
self._set_model_start_end() self._set_model_start_end()
self._set_single_model() self._set_single_model()
def set_model_gpu(self, deviceID=0): def set_model_gpu(self, deviceID):
"""Specify GPU object for model. Defaults to first GPU deviceID in """Specify GPU object for model.
list of deviceID given.
Args:
deviceID (int): Requested deviceID of GPU
Returns:
gpu (GPU object): Requested GPU object.
""" """
found = False
for gpu in self.cuda['gpus']: for gpu in self.cuda['gpus']:
if gpu.deviceID == deviceID: if gpu.deviceID == deviceID:
found = True
return gpu return gpu
if not found:
raise GeneralError(f'GPU with device ID {deviceID} does not exist')
def _set_precision(self): def _set_precision(self):
"""Data type (precision) for electromagnetic field output. """Data type (precision) for electromagnetic field output.

查看文件

@@ -18,10 +18,11 @@
import datetime import datetime
import logging import logging
import time import sys
import gprMax.config as config import gprMax.config as config
from ._version import __version__, codename from ._version import __version__, codename
from .exceptions import GeneralError
from .model_build_run import ModelBuildRun from .model_build_run import ModelBuildRun
from .solvers import create_solver from .solvers import create_solver
from .solvers import create_G from .solvers import create_G
@@ -144,19 +145,22 @@ class MPIContext(Context):
self.print_host_info() self.print_host_info()
if config.sim_config.general['cuda']: if config.sim_config.general['cuda']:
self.print_gpu_info() self.print_gpu_info()
sys.stdout.flush()
time.sleep(0.1)
# Contruct MPIExecutor # Contruct MPIExecutor
executor = self.MPIExecutor(self._run_model, comm=self.comm) executor = self.MPIExecutor(self._run_model, comm=self.comm)
# Check GPU resources versus number of MPI tasks
if executor.is_master():
if config.sim_config.general['cuda']:
if executor.size - 1 > len(config.sim_config.cuda['gpus']):
raise GeneralError(f'Not enough GPU resources for number of MPI tasks requested. Number of MPI tasks should be equal to number of GPUs + 1.')
# Create job list # Create job list
jobs = [] jobs = []
for i in self.model_range: for i in self.model_range:
jobs.append({'i': i}) jobs.append({'i': i})
# Send the workers to their work loop # Send the workers to their work loop
executor.start() executor.start()
if executor.is_master(): if executor.is_master():

查看文件

@@ -503,15 +503,11 @@ class GPU:
self.totalmem = drv.Device(self.deviceID).total_memory() self.totalmem = drv.Device(self.deviceID).total_memory()
def detect_check_gpus(deviceIDs): def detect_gpus():
"""Get information about Nvidia GPU(s). """Get information about Nvidia GPU(s).
Args:
deviceIDs (list): List of integers of device IDs.
Returns: Returns:
gpus (list): Detected GPU(s) object(s). gpus (list): Detected GPU(s) object(s).
gpus_str (list): Printable strings of information on GPU(s).
""" """
try: try:
@@ -529,6 +525,23 @@ def detect_check_gpus(deviceIDs):
else: else:
deviceIDsavail = range(drv.Device.count()) deviceIDsavail = range(drv.Device.count())
# Gather information about selected/detected GPUs
gpus = []
for ID in deviceIDsavail:
gpu = GPU(deviceID=ID)
gpu.get_gpu_info(drv)
gpus.append(gpu)
return gpus
def check_gpus(gpus):
"""Check if requested Nvidia GPU(s) deviceID(s) exist.
Args:
gpus (list): List of GPU object(s).
"""
# Check if requested device ID(s) exist # Check if requested device ID(s) exist
for ID in deviceIDs: for ID in deviceIDs:
if ID not in deviceIDsavail: if ID not in deviceIDsavail:
@@ -541,8 +554,6 @@ def detect_check_gpus(deviceIDs):
gpu.get_gpu_info(drv) gpu.get_gpu_info(drv)
gpus.append(gpu) gpus.append(gpu)
return gpus
def timer(): def timer():
"""Function to return time in fractional seconds.""" """Function to return time in fractional seconds."""