[ENH] Restructure the project.

2023-05-22 02:39:04 +08:00
parent f329433cad
commit 46a9cbc21e
22 changed files with 755 additions and 36 deletions
--- a/scripts/finetune.sh
+++ b/scripts/finetune.sh
@ -0,0 +1,56 @@
+#!/bin/bash
+export WANDB_MODE=disabled # 禁用wandb
+
+# 使用chinese-alpaca-plus-7b-merged模型在law_data.json数据集上finetune
+experiment_name="chinese-alpaca-plus-7b-law-e1"
+
+# 单卡或者模型并行
+python finetune.py \
+    --base_model "minlik/chinese-alpaca-plus-7b-merged" \
+    --data_path "./data/finetune_law_data.json" \
+    --output_dir "./outputs/"${experiment_name} \
+    --batch_size 64 \
+    --micro_batch_size 8 \
+    --num_epochs 20 \
+    --learning_rate 3e-4 \
+    --cutoff_len 256 \
+    --val_set_size 0 \
+    --lora_r 8 \
+    --lora_alpha 16 \
+    --lora_dropout 0.05 \
+    --lora_target_modules "[q_proj,v_proj]" \
+    --train_on_inputs True \
+    --add_eos_token True \
+    --group_by_length False \
+    --wandb_project \
+    --wandb_run_name \
+    --wandb_watch \
+    --wandb_log_model \
+    --resume_from_checkpoint "./outputs/"${experiment_name} \
+    --prompt_template_name "alpaca" \
+
+
+# 多卡数据并行
+# WORLD_SIZE=8 CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 torchrun --nproc_per_node=8 --master_port=1234 finetune.py \
+#     --base_model "minlik/chinese-alpaca-plus-7b-merged" \
+#     --data_path "./data/finetune_law_data.json" \
+#     --output_dir "./outputs/"${experiment_name} \
+#     --batch_size 64 \
+#     --micro_batch_size 8 \
+#     --num_epochs 20 \
+#     --learning_rate 3e-4 \
+#     --cutoff_len 256 \
+#     --val_set_size 0 \
+#     --lora_r 8 \
+#     --lora_alpha 16 \
+#     --lora_dropout 0.05 \
+#     --lora_target_modules "[q_proj,v_proj]" \
+#     --train_on_inputs True \
+#     --add_eos_token True \
+#     --group_by_length False \
+#     --wandb_project \
+#     --wandb_run_name \
+#     --wandb_watch \
+#     --wandb_log_model \
+#     --resume_from_checkpoint "./outputs/"${experiment_name} \
+#     --prompt_template_name "alpaca" \
--- a/scripts/generate.sh
+++ b/scripts/generate.sh
@ -0,0 +1,7 @@
+
+CUDA_VISIBLE_DEVICES=1 python generate.py \
+    --load_8bit \
+    --base_model 'minlik/chinese-llama-7b-merged' \
+    --lora_weights 'entity303/lawgpt-lora-7b' \
+    --prompt_template 'law_template' \
+    --share_gradio
--- a/scripts/infer-law.sh
+++ b/scripts/infer-law.sh
@ -0,0 +1,16 @@
+
+# LawGPT
+python infer.py \
+    --base_model 'minlik/chinese-alpaca-plus-7b-merged' \
+    --lora_weights './outputs/chinese-alpaca-plus-7b-law-e1' \
+    --instruct_dir './data/infer_law_data.json' \
+    --prompt_template 'alpaca'
+
+
+# Chinese-Alpaca-plus-7B
+python infer.py \
+    --base_model 'minlik/chinese-alpaca-plus-7b-merged' \
+    --lora_weights '' \
+    --instruct_dir './data/infer_law_data.json' \
+    --prompt_template 'alpaca'
+
--- a/scripts/train.sh
+++ b/scripts/train.sh
@ -0,0 +1,20 @@
+#!/bin/bash
+
+WORLD_SIZE=8 CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 torchrun --nproc_per_node=8 --master_port=1235 train_lora.py \
+    --base_model '../models/base_models/chinese_llama_7b' \
+    --data_path '' \
+    --output_dir '../models/lora_weights' \
+    --batch_size 128 \
+    --micro_batch_size 8 \
+    --num_epochs 1 \
+    --learning_rate 0.0003 \
+    --cutoff_len 1024 \
+    --val_set_size 0 \
+    --lora_r 16 \
+    --lora_alpha 32 \
+    --lora_dropout 0.05 \
+    --lora_target_modules '[q_proj, v_proj, k_proj, o_proj]' \
+    --train_on_inputs True \
+    --add_eos_token True \
+    --group_by_length True \
+    --resume_from_checkpoint '../models/lora_weights'
--- a/scripts/webui.sh
+++ b/scripts/webui.sh
@ -0,0 +1,21 @@
+#!/bin/bash
+
+
+# 使用huggingface上已经训练好的模型
+python webui.py \
+    --load_8bit True \
+    --base_model 'minlik/chinese-alpaca-plus-7b-merged' \
+    --lora_weights 'entity303/lawgpt-lora-7b' \
+    --prompt_template "law_template" \
+    --server_name "0.0.0.0" \
+    --share_gradio Ture \
+
+
+# 使用自己finetune的lora, 把自己的模型放到对应目录即可
+# python webui.py \
+#     --load_8bit True \
+#     --base_model 'minlik/chinese-alpaca-plus-7b-merged' \
+#     --lora_weights './outputs/chinese-alpaca-plus-7b-law-e1' \
+#     --prompt_template "law_template" \
+#     --server_name "0.0.0.0" \
+#     --share_gradio Ture \