experiment_name: "runs/transformer_big" dataset: src_lang: 'lo' src_tokenizer: 'BPE' src_max_seq_len: 400 tgt_lang: 'vi' tgt_tokenizer: 'WordLevel' tgt_max_seq_len: 350 train_dataset: 'train_clean.dat' validate_dataset: 'dev_clean.dat' tokenizer_file: "tokenizer_{0}.json" model: # 42688527 parameters d_model: 512 num_heads: 8 d_ff: 2048 dropout_p: 0.3 num_encoder_layers: 4 num_decoder_layers: 2 model_folder: "weights" model_basename: "transformer_" preload: "big"