FROM ubuntu:jammy
COPY sources.list /etc/apt/
RUN (apt-get update)
RUN (apt-get install -y vim netcat telnet httpie)
RUN (apt-get install -y ubuntu-drivers-common)
ENV DEBIAN_FRONTEND=noninteractive
RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections
RUN (apt-get install -y nvidia-driver-525-server)
RUN (apt-get install -y nvtop)
安装了宿主机一样的 nvidia GPU 驱动之后,在 container 里面,还是调用不了显卡
─➤ docker run --rm -it ponponon/cuda_env bash
root@ea828c62b649:/# nvtop
No GPU to monitor.
root@ea828c62b649:/# nvidia-smi
Failed to initialize NVML: Unknown Error
root@ea828c62b649:/# nvidia-smi
Failed to initialize NVML: Unknown Error
root@ea828c62b649:/# nvidia-smi
Failed to initialize NVML: Unknown Error
root@ea828c62b649:/# nvidia-smi
Failed to initialize NVML: Unknown Error
root@ea828c62b649:/# nvidia-smi
Failed to initialize NVML: Unknown Error
root@ea828c62b649:/# nvidia-smi
Failed to initialize NVML: Unknown Error
root@ea828c62b649:/#
怎么办?
这样也报错
─➤ docker run --gpus all --rm -it ponponon/cuda_env nvidia-smi
docker: Error response from daemon: could not select device driver "" with capabilities: [[gpu]].
但是在宿主机上,直接运行 nvidia-smi 是 ok 的
docker 调用 nvidia 的 GPU 教程