File size: 3,368 Bytes
abb8a1f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
#! /usr/bin/bash
#
# This script runs the speech recognition training using DeepSpeed
#
# CHANGE THESE AS PER YOUR REQUIREMENTS
# LANG as it is referred in the dataset
LANG=ml # 2 letter ISO code for the language
LANG_ISO_3=mal # 3 letter ISO code for the language
LANGUAGE=Malayalam # Full language name as per Whisper convention
# For Mozilla Commonvoice datasets, uncomment the following
DATASET="mozilla-foundation/common_voice_16_0"
TEXT_COLUMN="sentence"
# For Google Fleurs datasets, uncomment the following
# DATASET="google/fleurs"
# TEXT_COLUMN="transcription"
# Custom datasets
#DATASET="parambharat/kannada_asr_corpus"
#TEXT_COLUMN=${TEXT_COLUMN:-"sentence"}
# Function to get fine tuning learning rate
get_fine_tuning_lr() {
local model_size=$1
local lr
case $model_size in
"tiny")
lr="3.75e-5"
;;
"base")
lr="2.5e-5"
;;
"small")
lr="1.25e-5"
;;
"medium")
lr="6.25e-6"
;;
"large")
lr="4.375e-6"
;;
"large-v2")
lr="5e-6"
;;
*)
echo "Invalid model size"
exit 1
;;
esac
echo $lr
}
SCRIPT_PATH=$(realpath "${BASH_SOURCE[0]}")
SCRIPT_DIR=$(realpath $(dirname "${BASH_SOURCE[0]}"))
# Port to use
export MASTER_PORT="${MASTER_PORT:-29500}"
echo "Using master_port for deepspeech: ${MASTER_PORT}"
export "MASTER_ADDR"="localhost"
export "RANK"="0"
export "LOCAL_RANK"="0"
export "WORLD_SIZE"="1"
# Base model variant
MODEL=w2v2
# Model names and other stuff
BASE_MODEL="facebook/mms-1b-all"
JUST_LANG=${LANG%%_*}
MY_MODEL="breeze-listen-${MODEL}-${JUST_LANG}"
OUTDIR="/cosmos/home/sp-operator/ai/training/models/simpragma/${MY_MODEL}"
echo "OUTDIR: ${OUTDIR}"
# Training parameters you can tweak. Feel free to directly change any of the parameters below.
MAX_EPOCHS=4
TRAIN_BATCH_SIZE=4
EVAL_BATCH_SIZE=4
LEARNING_RATE="1e-3"
EVAL_STEPS="200"
SAVE_STEPS="200"
# Create dir
mkdir -p ${OUTDIR}
# --overwrite_output_dir \
# If you want to resume from existing checkpoint, include the following argument as well. Modify the checkpoint directory.
# --resume_from_checkpoint="${MY_MODEL}/checkpoint-400" \
echo "================ TRAINING: START ================"
python ${SCRIPT_DIR}/run_speech_recognition_ctc_adapter.py \
--dataset_name="${DATASET}" \
--model_name_or_path="${BASE_MODEL}" \
--dataset_config_name="${LANG}" \
--target_language="${LANG_ISO_3}" \
--output_dir="${OUTDIR}" \
--num_train_epochs="${MAX_EPOCHS}" \
--per_device_train_batch_size="${TRAIN_BATCH_SIZE}" \
--learning_rate="${LEARNING_RATE}" \
--warmup_steps="100" \
--evaluation_strategy="steps" \
--text_column_name="${TEXT_COLUMN}" \
--length_column_name="input_length" \
--save_steps="${SAVE_STEPS}" \
--eval_steps="${EVAL_STEPS}" \
--save_total_limit="3" \
--optim="adamw_bnb_8bit" \
--hub_model_id "simpragma/${MY_MODEL}" \
--gradient_checkpointing \
--chars_to_ignore , ? . ! - \; \: \" “ % ‘ ” � \
--fp16 \
--group_by_length \
--do_train \
--do_eval \
--push_to_hub \
| tee ${OUTDIR}/${MY_MODEL}.log
# Copy the script to the output directory so that we can recreate the model
cp ${SCRIPT_PATH} ${OUTDIR}
echo "================ TRAINING: DONE ================"
exit 0
|