NVIDIA Docker
宿主机需要安装好显卡驱动
官网安装教程
Centos7
shell
distribution=$(. /etc/os-release;echo $ID$VERSION_ID) \
&& curl -s -L https://nvidia.github.io/libnvidia-container/$distribution/libnvidia-container.repo \
| sudo tee /etc/yum.repos.d/nvidia-container-toolkit.repo
shell
yum-config-manager --enable libnvidia-container-experimental
shell
sudo yum clean expire-cache
shell
sudo yum install -y nvidia-docker2
shell
sudo systemctl restart docker
shell
sudo docker run --rm --gpus all nvidia/cuda:11.0-base nvidia-smi
shell
Sat Apr 2 19:18:23 2022
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.106.00 Driver Version: 460.106.00 CUDA Version: 11.2 |
|-------------------------------+----------------------+----------------------+
| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
| | | MIG M. |
|===============================+======================+======================|
| 0 Tesla T4 Off | 00000000:18:00.0 Off | 0 |
| N/A 58C P0 30W / 70W | 4110MiB / 15109MiB | 0% Default |
| | | N/A |
+-------------------------------+----------------------+----------------------+
+-----------------------------------------------------------------------------+
| Processes: |
| GPU GI CI PID Type Process name GPU Memory |
| ID ID Usage |
|=============================================================================|
| 0 N/A N/A 17084 C /opt/conda/bin/python3 1369MiB |
| 0 N/A N/A 17085 C /opt/conda/bin/python3 1369MiB |
| 0 N/A N/A 17086 C /opt/conda/bin/python3 1369MiB |
+-----------------------------------------------------------------------------+
Ubuntu
shell
#
ubuntu-drivers devices
# 推荐版本
nvidia-driver-570 - distro non-free recommended
apt install nvidia-driver-570 -y
# NVLink 才需要安装
# nvidia-fabricmanager 注意版本要和上面的驱动对应
sudo apt install nvidia-fabricmanager-570
sudo systemctl enable nvidia-fabricmanager --now
systemctl status nvidia-fabricmanager
shell
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \
&& curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \
sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \
sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
sed -i -e '/experimental/ s/^#//g' /etc/apt/sources.list.d/nvidia-container-toolkit.list
sudo apt-get update
export NVIDIA_CONTAINER_TOOLKIT_VERSION=1.17.8-1
sudo apt-get install -y \
nvidia-container-toolkit=${NVIDIA_CONTAINER_TOOLKIT_VERSION} \
nvidia-container-toolkit-base=${NVIDIA_CONTAINER_TOOLKIT_VERSION} \
libnvidia-container-tools=${NVIDIA_CONTAINER_TOOLKIT_VERSION} \
libnvidia-container1=${NVIDIA_CONTAINER_TOOLKIT_VERSION}
sudo nvidia-ctk runtime configure --runtime=docker
sudo systemctl restart docker
shell
Sat Apr 2 19:18:23 2022
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.106.00 Driver Version: 460.106.00 CUDA Version: 11.2 |
|-------------------------------+----------------------+----------------------+
| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
| | | MIG M. |
|===============================+======================+======================|
| 0 Tesla T4 Off | 00000000:18:00.0 Off | 0 |
| N/A 58C P0 30W / 70W | 4110MiB / 15109MiB | 0% Default |
| | | N/A |
+-------------------------------+----------------------+----------------------+
+-----------------------------------------------------------------------------+
| Processes: |
| GPU GI CI PID Type Process name GPU Memory |
| ID ID Usage |
|=============================================================================|
| 0 N/A N/A 17084 C /opt/conda/bin/python3 1369MiB |
| 0 N/A N/A 17085 C /opt/conda/bin/python3 1369MiB |
| 0 N/A N/A 17086 C /opt/conda/bin/python3 1369MiB |
+-----------------------------------------------------------------------------+