|
EXP_FOLDER="dumped/bengali_t5_base" |
|
CACHE_DIR=$EXP_FOLDER/ |
|
MODEL_CKPT=$EXP_FOLDER/ |
|
mkdir -p $EXP_FOLDER |
|
mkdir -p $CACHE_DIR |
|
mkdir -p $MODEL_CKPT |
|
|
|
TOKENIZER_DIR="dumped/bengali_t5_base/tokenizer" |
|
MODEL_CONFIG="t5-base" |
|
MAX_SEQ_LEN=512 |
|
|
|
NUM_THREAD=50 |
|
DATASET_NAME="mc4" |
|
DATASET_CONFIG_NAME="bn" |
|
|
|
python -u run_t5_mlm_flax.py \ |
|
--output_dir ${MODEL_CKPT} \ |
|
--model_type "t5" \ |
|
--config_name $MODEL_CONFIG \ |
|
--tokenizer_name ${TOKENIZER_DIR} \ |
|
--dataset_name $DATASET_NAME \ |
|
--dataset_config_name $DATASET_CONFIG_NAME \ |
|
--max_seq_length $MAX_SEQ_LEN \ |
|
--per_device_train_batch_size 8 \ |
|
--per_device_eval_batch_size 8 \ |
|
--adafactor \ |
|
--learning_rate 1e-3 \ |
|
--weight_decay 0.001 \ |
|
--warmup_steps 5000 \ |
|
--overwrite_output_dir \ |
|
--num_train_epochs 10 \ |
|
--logging_steps 500 \ |
|
--save_steps 2500 \ |
|
--eval_steps 7500 \ |
|
--preprocessing_num_workers $NUM_THREAD \ |
|
--dtype bfloat16 |
|
|