adam_beta1: 0.9 adam_beta2: 0.999 adam_epsilon: 1.0e-08 adam_weight_decay: 0.01 gradient_accumulation_steps: 1 learning_rate: 1.0e-06 lr_warmup_steps: 0 max_train_steps: 1050 num_train_epochs: 3 train_batch_size: 1