|
#!/usr/bin/bash |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
EXPERIMENT="ebranchformer_english_small_normalized" |
|
PROJECT="regularizations_english_corpus" |
|
WORK_DIR="/mnt/proj1/open-28-58/lakoc/huggingface_asr" |
|
RECIPE_DIR="${WORK_DIR}/recipes/ebranchformer_english" |
|
EXPERIMENT_PATH="${WORK_DIR}/experiments/${EXPERIMENT}" |
|
HF_HOME="/scratch/project/open-28-57/lakoc/huggingface_cache" |
|
|
|
args=( |
|
|
|
--output_dir=$EXPERIMENT_PATH |
|
--per_device_train_batch_size="64" |
|
--per_device_eval_batch_size="8" |
|
--dataloader_num_workers="24" |
|
--num_train_epochs="400" |
|
--group_by_length="True" |
|
--bf16 |
|
--do_train |
|
--do_evaluate |
|
--joint_decoding_during_training |
|
--load_best_model_at_end |
|
--metric_for_best_model="eval_wer" |
|
|
|
|
|
--optim="adamw_torch" |
|
--learning_rate="1e-3" |
|
--warmup_steps="40000" |
|
--early_stopping_patience="10" |
|
--weight_decay="1e-6" |
|
--max_grad_norm="0.5" |
|
--lsm_factor="0.1" |
|
--mask_unks |
|
--gradient_accumulation_steps="1" |
|
|
|
|
|
--report_to="wandb" |
|
--logging_steps="10" |
|
--save_strategy="epoch" |
|
--evaluation_strategy="epoch" |
|
--wandb_predictions_to_save=500 |
|
--greater_is_better="False" |
|
--save_total_limit="5" |
|
--track_ctc_loss |
|
|
|
|
|
--max_duration_in_seconds="20.0" |
|
--min_duration_in_seconds="0.2" |
|
--length_column_name="input_len" |
|
--remove_unused_columns="False" |
|
--preprocessing_num_workers="32" |
|
--dataset_name="/scratch/project/open-28-57/lakoc/processed_dataset_full" |
|
--writer_batch_size="500" |
|
--test_splits wsj_test fisher_swbd_dev voxpopuli_test tedlium3_test librispeech_test.clean librispeech_test.other commonvoice_en_test fleurs_test |
|
|
|
|
|
--data_preprocessing_config="${RECIPE_DIR}/data_preprocessing.json" |
|
|
|
|
|
--from_encoder_decoder_config |
|
--tokenizer_name="Lakoc/english_corpus_uni5000_normalized" |
|
--feature_extractor_name="Lakoc/log_80mel_extractor_16k" |
|
--base_encoder_model="Lakoc/fisher_ebranchformer_enc_12_layers_fixed" |
|
--base_decoder_model="Lakoc/gpt2_tiny_decoder_6_layers" |
|
--ctc_weight="0.3" |
|
--decoder_pos_emb_fixed |
|
--expect_2d_input |
|
|
|
|
|
--num_beams="1" |
|
--max_length="512" |
|
--predict_with_generate |
|
--decoding_ctc_weight="0" |
|
) |
|
|
|
export PARENT=`/bin/hostname -s` |
|
export MPORT=13000 |
|
export CHILDREN=`scontrol show hostnames $SLURM_JOB_NODELIST | grep -v $PARENT` |
|
export HOSTLIST="$PARENT $CHILDREN" |
|
export WORLD_SIZE=$SLURM_NTASKS |
|
|
|
conda deactivate |
|
source activate loco_asr |
|
|
|
mkdir -p $EXPERIMENT_PATH |
|
|
|
srun --cpus-per-task $SLURM_CPUS_ON_NODE --gpus-per-task $SLURM_GPUS_ON_NODE \ |
|
/mnt/proj1/open-28-58/lakoc/huggingface_asr/recipes/multinode_training/start_single_node_job.sh \ |
|
"${EXPERIMENT}" $PROJECT $WORK_DIR $RECIPE_DIR $HF_HOME "${args[@]}" |
|
|