Skip to content

NVIDIA Docker

宿主机需要安装好显卡驱动

官网安装教程

Centos7

shell
distribution=$(. /etc/os-release;echo $ID$VERSION_ID) \
    && curl -s -L https://nvidia.github.io/libnvidia-container/$distribution/libnvidia-container.repo \
    | sudo tee /etc/yum.repos.d/nvidia-container-toolkit.repo
shell
yum-config-manager --enable libnvidia-container-experimental
shell
sudo yum clean expire-cache
shell
sudo yum install -y nvidia-docker2
shell
sudo systemctl restart docker
shell
sudo docker run --rm --gpus all nvidia/cuda:11.0-base nvidia-smi
shell
Sat Apr  2 19:18:23 2022
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.106.00   Driver Version: 460.106.00   CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|===============================+======================+======================|
|   0  Tesla T4            Off  | 00000000:18:00.0 Off |                    0 |
| N/A   58C    P0    30W /  70W |   4110MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+

+-----------------------------------------------------------------------------+
| Processes:                                                                  |
|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |
|        ID   ID                                                   Usage      |
|=============================================================================|
|    0   N/A  N/A     17084      C   /opt/conda/bin/python3           1369MiB |
|    0   N/A  N/A     17085      C   /opt/conda/bin/python3           1369MiB |
|    0   N/A  N/A     17086      C   /opt/conda/bin/python3           1369MiB |
+-----------------------------------------------------------------------------+

Ubuntu

shell
# 
ubuntu-drivers devices
#  推荐版本
nvidia-driver-570 - distro non-free recommended
apt install nvidia-driver-570 -y 

# NVLink 才需要安装
# nvidia-fabricmanager 注意版本要和上面的驱动对应
sudo apt install nvidia-fabricmanager-570

sudo systemctl enable nvidia-fabricmanager --now
systemctl status nvidia-fabricmanager
shell
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \
  && curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \
    sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \
    sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
sed -i -e '/experimental/ s/^#//g' /etc/apt/sources.list.d/nvidia-container-toolkit.list

sudo apt-get update

export NVIDIA_CONTAINER_TOOLKIT_VERSION=1.17.8-1
  sudo apt-get install -y \
      nvidia-container-toolkit=${NVIDIA_CONTAINER_TOOLKIT_VERSION} \
      nvidia-container-toolkit-base=${NVIDIA_CONTAINER_TOOLKIT_VERSION} \
      libnvidia-container-tools=${NVIDIA_CONTAINER_TOOLKIT_VERSION} \
      libnvidia-container1=${NVIDIA_CONTAINER_TOOLKIT_VERSION}

sudo nvidia-ctk runtime configure --runtime=docker
sudo systemctl restart docker
shell
Sat Apr  2 19:18:23 2022
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.106.00   Driver Version: 460.106.00   CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|===============================+======================+======================|
|   0  Tesla T4            Off  | 00000000:18:00.0 Off |                    0 |
| N/A   58C    P0    30W /  70W |   4110MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+

+-----------------------------------------------------------------------------+
| Processes:                                                                  |
|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |
|        ID   ID                                                   Usage      |
|=============================================================================|
|    0   N/A  N/A     17084      C   /opt/conda/bin/python3           1369MiB |
|    0   N/A  N/A     17085      C   /opt/conda/bin/python3           1369MiB |
|    0   N/A  N/A     17086      C   /opt/conda/bin/python3           1369MiB |
+-----------------------------------------------------------------------------+