CHT-6_20240715-1255 / run_dpo.sh
nike00811's picture
Training in progress, step 27
dc9ca7d verified
raw
history blame contribute delete
No virus
915 Bytes
# export NCCL_P2P_DISABLE="1"
# export NCCL_IB_DISABLE="1"
export OMP_NUM_THREADS=1
export ACCELERATE_LOG_LEVEL=info
export WANDB_PROJECT='Mistral-DPO'
export WANDB_ENTITY="ckip"
output_dir="DPO/CHT-6_20240715-1255"
# Check if the directory does not exist
if [ -d ${output_dir} ]; then
echo "The directory already exists."
exit
fi
mkdir -p ${output_dir}
current_script=$0
cp ${current_script} ${output_dir}
accelerate launch \
--config_file alignment-handbook/recipes/accelerate_configs/deepspeed_zero3.yaml \
--num_processes 4 \
test_dpo.py \
"dpo_config_full.yaml" \
--load_in_4bit=false \
--logging_steps=1 \
--model_name_or_path="/share/home/models/CHT-Mistral-7B/CHT-6/" \
--train_file="data/training_ptt_dpo-include_id-converted.jsonl" \
--validation_file="data/testing_ptt_dpo_include_id-converted.jsonl" \
--cache_dir="cache_dir" \
--output_dir=${output_dir} \
| tee ${output_dir}/trainin.log