宿主机 安装 driver


# 禁用宿主机 自带 显卡 驱动

lsmod | grep nouveau
  nouveau              1949696  0
  mxm_wmi                16384  1 nouveau
  wmi                    32768  2 mxm_wmi,nouveau
  video                  49152  1 nouveau
  i2c_algo_bit           16384  1 nouveau
  ttm                   106496  2 qxl,nouveau
  drm_kms_helper        184320  4 qxl,nouveau
  drm                   491520  5 drm_kms_helper,qxl,ttm,nouveau

# 首先金庸 nouvean
vi /etc/modprobe.d/blacklist.conf

# 在最后一行添加:blacklist nouveau

modprobe_path='/etc/modprobe.d/blacklist.conf'
sed -i "s/blacklist nouveau//g" ${modprobe_path}
echo -e '\nblacklist nouveau' >>  ${modprobe_path}

sudo update-initramfs -u

# 关闭图形界面
systemctl set-default multi-user.target

reboot

lsmod | grep nouveau
# 无输出 代表成功

echo "nameserver 114.114.114.114" > /etc/resolv.conf


sudo sed -i "s@http.*archive.ubuntu.com@http://repo.huaweicloud.com@g" /etc/apt/sources.list
sudo sed -i "s@http.*security.ubuntu.com@http://repo.huaweicloud.com@g" /etc/apt/sources.list

apt update -y


sudo apt-get remove nvidia*  -y

apt autoremove -y


apt install nvidia-driver-460-server -y

apt install nvidia-cuda-toolkit -y


# 关闭图形界面
systemctl set-default multi-user.target


distribution=$(. /etc/os-release;echo $ID$VERSION_ID) \
   && curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add - \
   && curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list


sudo apt-get update -y

sudo apt-get install -y nvidia-docker2

sudo systemctl restart docker

sudo systemctl enable docker


测试 nvidia-smi 驱动


# 国内
sudo docker run --rm --gpus all registry.cn-hangzhou.aliyuncs.com/mkmk/all:nvidia-cuda-11-base nvidia-smi

Thu Apr  8 16:52:50 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|===============================+======================+======================|
|   0  GeForce GT 730      Off  | 00000000:00:03.0 N/A |                  N/A |
| 30%   30C    P0    N/A /  N/A |      0MiB /  2002MiB |     N/A      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  GeForce GT 730      Off  | 00000000:00:04.0 N/A |                  N/A |
| 30%   27C    P0    N/A /  N/A |      0MiB /  2002MiB |     N/A      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                                  |
|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |
|        ID   ID                                                   Usage      |
|=============================================================================|
|  No running processes found                                                 |
+-----------------------------------------------------------------------------+
root@free_cicd:~# 


# 可以 先跳过 这个 步骤 直接   使用 gpu-burn 测试 gpu
# 如果  提示  不兼容  来给 驱动 降级
echo "nameserver 114.114.114.114" > /etc/resolv.conf

wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin
sudo mv cuda-ubuntu2004.pin /etc/apt/preferences.d/cuda-repository-pin-600
wget https://developer.download.nvidia.com/compute/cuda/11.1.1/local_installers/cuda-repo-ubuntu2004-11-1-local_11.1.1-455.32.00-1_amd64.deb
sudo dpkg -i cuda-repo-ubuntu2004-11-1-local_11.1.1-455.32.00-1_amd64.deb
sudo apt-key add /var/cuda-repo-ubuntu2004-11-1-local/7fa2af80.pub
sudo apt-get update
sudo apt-get -y install cuda

# 关闭图形界面
systemctl set-default multi-user.target
reboot

使用 gpu-burn 测试 gpu


docker run -it  --gpus=all  registry.cn-hangzhou.aliyuncs.com/mkmk/all:gpu-burn-cuda11.1


docker run -it  --gpus=all  registry.cn-hangzhou.aliyuncs.com/mkmk/all:gpu-burn-cuda11.1  "/app/gpu_burn"  "10"

rm compare.ptx
COMPUTE=60 make

gub-burn 的 dockerfile

root@free_cicd:~/gpu-burn# 

cat Dockerfile 

FROM nvidia/cuda:11.1.1-devel AS builder

WORKDIR /build

COPY . /build/

RUN make

FROM nvidia/cuda:11.1.1-runtime

COPY --from=builder /build/gpu_burn /app/
COPY --from=builder /build/compare.cu /app/

WORKDIR /app

CMD ["./gpu_burn", "60"]


docker build . -t gpu-burn:cuda11.1 

#  推送到 远程
docker tag  gpu-burn:cuda11.1 registry.cn-hangzhou.aliyuncs.com/mkmk/all:gpu-burn-cuda11.1

docker push registry.cn-hangzhou.aliyuncs.com/mkmk/all:gpu-burn-cuda11.1

需要 合适的 驱动


echo "nameserver 114.114.114.114" > /etc/resolv.conf

ubuntu-drivers devices
  Command 'ubuntu-drivers' not found, but can be installed with:

apt install ubuntu-drivers-common -y

ubuntu-drivers devices

  == /sys/devices/pci0000:00/0000:00:03.0 ==
  modalias : pci:v000010DEd00001287sv00000000sd00000000bc03sc00i00
  vendor   : NVIDIA Corporation
  model    : GK208B [GeForce GT 730]
  driver   : nvidia-driver-390 - distro non-free
  driver   : nvidia-driver-460 - distro non-free recommended

sudo apt-get install -y nvidia-driver-460 libnvidia-gl-460 libnvidia-compute-460 libnvidia-extra-460 nvidia-compute-utils-460 libnvidia-decode-460 libnvidia-encode-460 nvidia-utils-460 xserver-xorg-video-nvidia-460 libnvidia-cfg1-460 libnvidia-ifr1-460

systemctl set-default multi-user.target


reboot

nvidia-smi

make clean
make CUDAPATH=/usr/local/cuda

来聊聊啊
gzh.png


乐码客
1 声望1 粉丝