|
--- |
|
datasets: |
|
- multi_news |
|
metrics: |
|
- bleu |
|
- rouge |
|
pipeline_tag: summarization |
|
--- |
|
|
|
# Hyperparameters |
|
learning_rate=2e-5 |
|
per_device_train_batch_size=14 |
|
per_device_eval_batch_size=14 |
|
weight_decay=0.01 |
|
save_total_limit=3 |
|
num_train_epochs=3 |
|
predict_with_generate=True |
|
fp16=True |
|
|
|
# Training Output |
|
global_step=7710, |
|
training_loss=2.436398018566087, |
|
metrics={'train_runtime': 30287.1254, |
|
'train_samples_per_second': 3.564, |
|
'train_steps_per_second': 0.255, |
|
'total_flos': 3.1186278368988365e+17, |
|
'train_loss': 2.436398018566087, |
|
'epoch': 3.0} |
|
|
|
# Training Results |
|
|
|
| Epoch | Training Loss | Validation Loss | Rouge1 | Rouge2 | Rougel | Rougelsum | Bleu | Gen Len | |
|
|:----- |:------------ |:--------------- |:-------- | :------- |:-------- |:--------- |:-------- |:--------- | |
|
1| 2.451200| 2.291708| 0.322800| 0.110100| 0.194600| 0.194700| 0.368400| 150.224300 |
|
2| 2.527300| nan| 0.296400| 0.100100| 0.181800| 0.181900 |0.317300| 137.569200 |
|
3| 2.523800| nan |0.296600| 0.100000| 0.181800 |0.181900 |0.317200| 137.254000 |