12.9 pytorch version of chatglm2-6b deployment
I. Pre-requisites
- has completed the installation of the operating system (ubuntu, ctyunos, openeuler, kylin and other systems)
- the server has been connected to the external network access (non-essential, easy to download the package, if not, then manually upload)
- Recommended disk space > 1TB.
- prepare docker installation package, docker image, code, pre-training weights, data sets (links will be provided in the subsequent preparation) Second, the application deployment
- docker deployment #Download the installation package
wget https://download.docker.com/linux/static/stable/aarch64/docker-18.09.8.tgz --no-check-certificate
#Unzip and install
tar xvpf docker-18.09.8.tgz
cp -p -f docker/* /usr/bin
#Prepare the environment
#For Docker to work properly, the firewall also needs to be turned off.
setenforce 0
systemctl stop firewalld
systemctl disable firewalld
#Configure the docker.service service
vim /usr/lib/systemd/system/docker.service
#Press I to go to edit and paste the following
[Unit]
Description=Docker Application Container Engine
Documentation=http://docs.docker.com
After=network.target docker.socket
[Service]
Type=notify
EnvironmentFile=-/run/flannel/docker
WorkingDirectory=/usr/local/bin
ExecStart=/usr/bin/dockerd -H tcp://0.0.0.0:4243 -H unix:///var/run/docker.sock --selinux-enabled=false --log-opt max-size=1g
ExecReload=/bin/kill -s HUP
#Having non-zero Limit*s causes performance problems due to accounting overhead
#in the kernel. We recommend using cgroups to do container-local accounting.
LimitNOFILE=infinity
LimitNPROC=infinity
LimitCORE=infinity
#Uncomment TasksMax if your systemd version supports it.
#Only systemd 226 and above support this version.
#TasksMax=infinity
TimeoutStartSec=0
#set delegate yes so that systemd does not reset the cgroups of docker containers
Delegate=yes
#kill only the docker process, not all processes in the cgroup
KillMode=process
Restart=on-failure
[Install]
WantedBy=multi-user.target
#Start up related services
systemctl daemon-reload
systemctl status docker
systemctl restart docker
systemctl status docker
systemctl enable docker
#Note! After startup, you need to make sure that docker's file path has enough space, usually under /usr by default, if there is not enough space, please replace the path according to the following operation
vi /etc/docker/daemon.json
#Add the following, note that modifying the following path /home/docker is a sample
{
"data-root":"/home/docker"
}
#Exit wq
sudo systemctl daemon-reload
sudo systemctl restart docker
- Preparation of resources
cd /home/work
#docker image download
wget https://czy.obs.cn-east-324.fjaicc.com/chatglm2-6b-pytorch/images/chatglm2-6b-pytorch.tar
#Load the image
docker load -i chatglm2-6b-pytorch.tar
#Code (with dataset and weights) Download
wget https://czy.obs.cn-east-324.fjaicc.com/chatglm2-6b-pytorch/code/ModelZoo-PyTorch.tar.gz
#code decompression
tar -zxvf ModelZoo-PyTorch.tar.gz
- Start the container
#Start the container here please note that the contents of the <???> change
docker run -itd -u root --ipc=host \
--device=/dev/davinci0 \
--device=/dev/davinci1 \
--device=/dev/davinci2 \
--device=/dev/davinci3 \
--device=/dev/davinci4 \
--device=/dev/davinci5 \
--device=/dev/davinci6 \
--device=/dev/davinci7 \
--device=/dev/davinci_manager \
--device=/dev/devmm_svm \
--device=/dev/hisi_hdc \
-v /usr/local/Ascend/driver:/usr/local/Ascend/driver \
-v /usr/local/Ascend/add-ons/:/usr/local/Ascend/add-ons/ \
-v /usr/local/sbin/npu-smi:/usr/local/sbin/npu-smi \
-v /usr/local/sbin/:/usr/local/sbin/ \
-v /var/log/npu/conf/slog/slog.conf:/var/log/npu/conf/slog/slog.conf \
-v /var/log/npu/slog/:/var/log/npu/slog \
-v /var/log/npu/profiling/:/var/log/npu/profiling \
-v /var/log/npu/dump/:/var/log/npu/dump \
-v /var/log/npu/:/usr/slog \
-v <The work paths mapped in the host contain code weight datasets inside them such as:/home/work/>:/home/work \
--name <Name of the container> \
chatglm2-6b-pytorch:v1.0 \
/bin/bash
4.Enter the container and turn on fine tuning
docker exec -it <container ID> bash
cd /home/work/ModelZoo-PyTorch/PyTorch/built-in/foundation/ChatGLM2-6B/ptuning/
#ptuning v2 Single card fine-tuning
bash train.sh
#8 card full parameter fintune
bash ds_train_finetune.sh
#Lora fine-tuning
bash ds_train_lora.sh