Training in progress, step 22000, checkpoint
Browse files
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1856040378
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4531ecca714f467930734a738ae96aa6d8c7dde0ae2f856e01bfd7c2718c30e2
|
3 |
size 1856040378
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 928000378
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5d73be928861bba842ddafd6c57448be532897ea8cdcffe6ba29b9511de401e3
|
3 |
size 928000378
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:206e7b0fb5a1dee1afc9d0786fbf979a9b9a48953b1e20b8dd66c2319f838ad0
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1000
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3e2e1da63578bda22917ec19daaed13e356a28b0f07b3611af0830560ee695f8
|
3 |
size 1000
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric": 1.
|
3 |
-
"best_model_checkpoint": "model/chessformer-3/checkpoint-
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -73843,6 +73843,3522 @@
|
|
73843 |
"eval_samples_per_second": 556.179,
|
73844 |
"eval_steps_per_second": 69.543,
|
73845 |
"step": 21000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73846 |
}
|
73847 |
],
|
73848 |
"logging_steps": 2,
|
@@ -73862,7 +77378,7 @@
|
|
73862 |
"attributes": {}
|
73863 |
}
|
73864 |
},
|
73865 |
-
"total_flos": 4.
|
73866 |
"train_batch_size": 768,
|
73867 |
"trial_name": null,
|
73868 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_metric": 1.4320533275604248,
|
3 |
+
"best_model_checkpoint": "model/chessformer-3/checkpoint-22000",
|
4 |
+
"epoch": 0.9781690453959362,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 22000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
73843 |
"eval_samples_per_second": 556.179,
|
73844 |
"eval_steps_per_second": 69.543,
|
73845 |
"step": 21000
|
73846 |
+
},
|
73847 |
+
{
|
73848 |
+
"epoch": 0.9337957405184296,
|
73849 |
+
"grad_norm": 0.05756537243723869,
|
73850 |
+
"learning_rate": 1.1783100692341508e-05,
|
73851 |
+
"loss": 1.4479,
|
73852 |
+
"step": 21002
|
73853 |
+
},
|
73854 |
+
{
|
73855 |
+
"epoch": 0.933884664977102,
|
73856 |
+
"grad_norm": 0.0578317753970623,
|
73857 |
+
"learning_rate": 1.17515715586613e-05,
|
73858 |
+
"loss": 1.4463,
|
73859 |
+
"step": 21004
|
73860 |
+
},
|
73861 |
+
{
|
73862 |
+
"epoch": 0.9339735894357744,
|
73863 |
+
"grad_norm": 0.056406132876873016,
|
73864 |
+
"learning_rate": 1.172008416260839e-05,
|
73865 |
+
"loss": 1.4472,
|
73866 |
+
"step": 21006
|
73867 |
+
},
|
73868 |
+
{
|
73869 |
+
"epoch": 0.9340625138944467,
|
73870 |
+
"grad_norm": 0.0582900270819664,
|
73871 |
+
"learning_rate": 1.1688638506874405e-05,
|
73872 |
+
"loss": 1.452,
|
73873 |
+
"step": 21008
|
73874 |
+
},
|
73875 |
+
{
|
73876 |
+
"epoch": 0.934151438353119,
|
73877 |
+
"grad_norm": 0.05787406116724014,
|
73878 |
+
"learning_rate": 1.1657234594147636e-05,
|
73879 |
+
"loss": 1.4507,
|
73880 |
+
"step": 21010
|
73881 |
+
},
|
73882 |
+
{
|
73883 |
+
"epoch": 0.9342403628117913,
|
73884 |
+
"grad_norm": 0.05701528117060661,
|
73885 |
+
"learning_rate": 1.1625872427112439e-05,
|
73886 |
+
"loss": 1.4506,
|
73887 |
+
"step": 21012
|
73888 |
+
},
|
73889 |
+
{
|
73890 |
+
"epoch": 0.9343292872704637,
|
73891 |
+
"grad_norm": 0.05795227363705635,
|
73892 |
+
"learning_rate": 1.1594552008449888e-05,
|
73893 |
+
"loss": 1.4454,
|
73894 |
+
"step": 21014
|
73895 |
+
},
|
73896 |
+
{
|
73897 |
+
"epoch": 0.9344182117291361,
|
73898 |
+
"grad_norm": 0.05739319324493408,
|
73899 |
+
"learning_rate": 1.1563273340837289e-05,
|
73900 |
+
"loss": 1.451,
|
73901 |
+
"step": 21016
|
73902 |
+
},
|
73903 |
+
{
|
73904 |
+
"epoch": 0.9345071361878085,
|
73905 |
+
"grad_norm": 0.05824971944093704,
|
73906 |
+
"learning_rate": 1.1532036426948667e-05,
|
73907 |
+
"loss": 1.4479,
|
73908 |
+
"step": 21018
|
73909 |
+
},
|
73910 |
+
{
|
73911 |
+
"epoch": 0.9345960606464808,
|
73912 |
+
"grad_norm": 0.05758555978536606,
|
73913 |
+
"learning_rate": 1.1500841269454166e-05,
|
73914 |
+
"loss": 1.4407,
|
73915 |
+
"step": 21020
|
73916 |
+
},
|
73917 |
+
{
|
73918 |
+
"epoch": 0.9346849851051532,
|
73919 |
+
"grad_norm": 0.05768615007400513,
|
73920 |
+
"learning_rate": 1.1469687871020428e-05,
|
73921 |
+
"loss": 1.4472,
|
73922 |
+
"step": 21022
|
73923 |
+
},
|
73924 |
+
{
|
73925 |
+
"epoch": 0.9347739095638256,
|
73926 |
+
"grad_norm": 0.057626668363809586,
|
73927 |
+
"learning_rate": 1.143857623431066e-05,
|
73928 |
+
"loss": 1.4431,
|
73929 |
+
"step": 21024
|
73930 |
+
},
|
73931 |
+
{
|
73932 |
+
"epoch": 0.9348628340224979,
|
73933 |
+
"grad_norm": 0.057193391025066376,
|
73934 |
+
"learning_rate": 1.1407506361984509e-05,
|
73935 |
+
"loss": 1.4451,
|
73936 |
+
"step": 21026
|
73937 |
+
},
|
73938 |
+
{
|
73939 |
+
"epoch": 0.9349517584811703,
|
73940 |
+
"grad_norm": 0.05753851309418678,
|
73941 |
+
"learning_rate": 1.1376478256697797e-05,
|
73942 |
+
"loss": 1.4413,
|
73943 |
+
"step": 21028
|
73944 |
+
},
|
73945 |
+
{
|
73946 |
+
"epoch": 0.9350406829398425,
|
73947 |
+
"grad_norm": 0.05880241096019745,
|
73948 |
+
"learning_rate": 1.1345491921102957e-05,
|
73949 |
+
"loss": 1.4487,
|
73950 |
+
"step": 21030
|
73951 |
+
},
|
73952 |
+
{
|
73953 |
+
"epoch": 0.9351296073985149,
|
73954 |
+
"grad_norm": 0.057462744414806366,
|
73955 |
+
"learning_rate": 1.1314547357849037e-05,
|
73956 |
+
"loss": 1.4432,
|
73957 |
+
"step": 21032
|
73958 |
+
},
|
73959 |
+
{
|
73960 |
+
"epoch": 0.9352185318571873,
|
73961 |
+
"grad_norm": 0.05817759782075882,
|
73962 |
+
"learning_rate": 1.1283644569581087e-05,
|
73963 |
+
"loss": 1.4497,
|
73964 |
+
"step": 21034
|
73965 |
+
},
|
73966 |
+
{
|
73967 |
+
"epoch": 0.9353074563158597,
|
73968 |
+
"grad_norm": 0.058256879448890686,
|
73969 |
+
"learning_rate": 1.1252783558940882e-05,
|
73970 |
+
"loss": 1.4495,
|
73971 |
+
"step": 21036
|
73972 |
+
},
|
73973 |
+
{
|
73974 |
+
"epoch": 0.935396380774532,
|
73975 |
+
"grad_norm": 0.05921512097120285,
|
73976 |
+
"learning_rate": 1.1221964328566647e-05,
|
73977 |
+
"loss": 1.444,
|
73978 |
+
"step": 21038
|
73979 |
+
},
|
73980 |
+
{
|
73981 |
+
"epoch": 0.9354853052332044,
|
73982 |
+
"grad_norm": 0.05717106908559799,
|
73983 |
+
"learning_rate": 1.119118688109283e-05,
|
73984 |
+
"loss": 1.4431,
|
73985 |
+
"step": 21040
|
73986 |
+
},
|
73987 |
+
{
|
73988 |
+
"epoch": 0.9355742296918768,
|
73989 |
+
"grad_norm": 0.058333754539489746,
|
73990 |
+
"learning_rate": 1.1160451219150548e-05,
|
73991 |
+
"loss": 1.4481,
|
73992 |
+
"step": 21042
|
73993 |
+
},
|
73994 |
+
{
|
73995 |
+
"epoch": 0.9356631541505491,
|
73996 |
+
"grad_norm": 0.05773552879691124,
|
73997 |
+
"learning_rate": 1.112975734536703e-05,
|
73998 |
+
"loss": 1.4508,
|
73999 |
+
"step": 21044
|
74000 |
+
},
|
74001 |
+
{
|
74002 |
+
"epoch": 0.9357520786092215,
|
74003 |
+
"grad_norm": 0.058473654091358185,
|
74004 |
+
"learning_rate": 1.1099105262366294e-05,
|
74005 |
+
"loss": 1.452,
|
74006 |
+
"step": 21046
|
74007 |
+
},
|
74008 |
+
{
|
74009 |
+
"epoch": 0.9358410030678939,
|
74010 |
+
"grad_norm": 0.057561490684747696,
|
74011 |
+
"learning_rate": 1.1068494972768572e-05,
|
74012 |
+
"loss": 1.4452,
|
74013 |
+
"step": 21048
|
74014 |
+
},
|
74015 |
+
{
|
74016 |
+
"epoch": 0.9359299275265662,
|
74017 |
+
"grad_norm": 0.05912725254893303,
|
74018 |
+
"learning_rate": 1.1037926479190497e-05,
|
74019 |
+
"loss": 1.4493,
|
74020 |
+
"step": 21050
|
74021 |
+
},
|
74022 |
+
{
|
74023 |
+
"epoch": 0.9360188519852385,
|
74024 |
+
"grad_norm": 0.05738065019249916,
|
74025 |
+
"learning_rate": 1.1007399784245308e-05,
|
74026 |
+
"loss": 1.4438,
|
74027 |
+
"step": 21052
|
74028 |
+
},
|
74029 |
+
{
|
74030 |
+
"epoch": 0.9361077764439109,
|
74031 |
+
"grad_norm": 0.05802926421165466,
|
74032 |
+
"learning_rate": 1.0976914890542422e-05,
|
74033 |
+
"loss": 1.4481,
|
74034 |
+
"step": 21054
|
74035 |
+
},
|
74036 |
+
{
|
74037 |
+
"epoch": 0.9361967009025832,
|
74038 |
+
"grad_norm": 0.05792355164885521,
|
74039 |
+
"learning_rate": 1.0946471800688029e-05,
|
74040 |
+
"loss": 1.4466,
|
74041 |
+
"step": 21056
|
74042 |
+
},
|
74043 |
+
{
|
74044 |
+
"epoch": 0.9362856253612556,
|
74045 |
+
"grad_norm": 0.059775542467832565,
|
74046 |
+
"learning_rate": 1.0916070517284327e-05,
|
74047 |
+
"loss": 1.4437,
|
74048 |
+
"step": 21058
|
74049 |
+
},
|
74050 |
+
{
|
74051 |
+
"epoch": 0.936374549819928,
|
74052 |
+
"grad_norm": 0.058133359998464584,
|
74053 |
+
"learning_rate": 1.0885711042930235e-05,
|
74054 |
+
"loss": 1.4453,
|
74055 |
+
"step": 21060
|
74056 |
+
},
|
74057 |
+
{
|
74058 |
+
"epoch": 0.9364634742786003,
|
74059 |
+
"grad_norm": 0.05763556808233261,
|
74060 |
+
"learning_rate": 1.0855393380221068e-05,
|
74061 |
+
"loss": 1.4554,
|
74062 |
+
"step": 21062
|
74063 |
+
},
|
74064 |
+
{
|
74065 |
+
"epoch": 0.9365523987372727,
|
74066 |
+
"grad_norm": 0.058549582958221436,
|
74067 |
+
"learning_rate": 1.0825117531748474e-05,
|
74068 |
+
"loss": 1.447,
|
74069 |
+
"step": 21064
|
74070 |
+
},
|
74071 |
+
{
|
74072 |
+
"epoch": 0.9366413231959451,
|
74073 |
+
"grad_norm": 0.057294197380542755,
|
74074 |
+
"learning_rate": 1.0794883500100494e-05,
|
74075 |
+
"loss": 1.4436,
|
74076 |
+
"step": 21066
|
74077 |
+
},
|
74078 |
+
{
|
74079 |
+
"epoch": 0.9367302476546174,
|
74080 |
+
"grad_norm": 0.05782110616564751,
|
74081 |
+
"learning_rate": 1.0764691287861783e-05,
|
74082 |
+
"loss": 1.4539,
|
74083 |
+
"step": 21068
|
74084 |
+
},
|
74085 |
+
{
|
74086 |
+
"epoch": 0.9368191721132898,
|
74087 |
+
"grad_norm": 0.05737898498773575,
|
74088 |
+
"learning_rate": 1.0734540897613221e-05,
|
74089 |
+
"loss": 1.4546,
|
74090 |
+
"step": 21070
|
74091 |
+
},
|
74092 |
+
{
|
74093 |
+
"epoch": 0.9369080965719622,
|
74094 |
+
"grad_norm": 0.05745162069797516,
|
74095 |
+
"learning_rate": 1.0704432331932245e-05,
|
74096 |
+
"loss": 1.4459,
|
74097 |
+
"step": 21072
|
74098 |
+
},
|
74099 |
+
{
|
74100 |
+
"epoch": 0.9369970210306344,
|
74101 |
+
"grad_norm": 0.05851050093770027,
|
74102 |
+
"learning_rate": 1.0674365593392633e-05,
|
74103 |
+
"loss": 1.4427,
|
74104 |
+
"step": 21074
|
74105 |
+
},
|
74106 |
+
{
|
74107 |
+
"epoch": 0.9370859454893068,
|
74108 |
+
"grad_norm": 0.058115605264902115,
|
74109 |
+
"learning_rate": 1.0644340684564657e-05,
|
74110 |
+
"loss": 1.442,
|
74111 |
+
"step": 21076
|
74112 |
+
},
|
74113 |
+
{
|
74114 |
+
"epoch": 0.9371748699479792,
|
74115 |
+
"grad_norm": 0.05693921446800232,
|
74116 |
+
"learning_rate": 1.061435760801499e-05,
|
74117 |
+
"loss": 1.4476,
|
74118 |
+
"step": 21078
|
74119 |
+
},
|
74120 |
+
{
|
74121 |
+
"epoch": 0.9372637944066515,
|
74122 |
+
"grad_norm": 0.05739055946469307,
|
74123 |
+
"learning_rate": 1.0584416366306581e-05,
|
74124 |
+
"loss": 1.4492,
|
74125 |
+
"step": 21080
|
74126 |
+
},
|
74127 |
+
{
|
74128 |
+
"epoch": 0.9373527188653239,
|
74129 |
+
"grad_norm": 0.05716922506690025,
|
74130 |
+
"learning_rate": 1.0554516961999105e-05,
|
74131 |
+
"loss": 1.4438,
|
74132 |
+
"step": 21082
|
74133 |
+
},
|
74134 |
+
{
|
74135 |
+
"epoch": 0.9374416433239963,
|
74136 |
+
"grad_norm": 0.05674619972705841,
|
74137 |
+
"learning_rate": 1.0524659397648462e-05,
|
74138 |
+
"loss": 1.4482,
|
74139 |
+
"step": 21084
|
74140 |
+
},
|
74141 |
+
{
|
74142 |
+
"epoch": 0.9375305677826686,
|
74143 |
+
"grad_norm": 0.058578867465257645,
|
74144 |
+
"learning_rate": 1.0494843675806942e-05,
|
74145 |
+
"loss": 1.4419,
|
74146 |
+
"step": 21086
|
74147 |
+
},
|
74148 |
+
{
|
74149 |
+
"epoch": 0.937619492241341,
|
74150 |
+
"grad_norm": 0.05818881839513779,
|
74151 |
+
"learning_rate": 1.0465069799023397e-05,
|
74152 |
+
"loss": 1.4496,
|
74153 |
+
"step": 21088
|
74154 |
+
},
|
74155 |
+
{
|
74156 |
+
"epoch": 0.9377084167000134,
|
74157 |
+
"grad_norm": 0.058019597083330154,
|
74158 |
+
"learning_rate": 1.0435337769843012e-05,
|
74159 |
+
"loss": 1.4423,
|
74160 |
+
"step": 21090
|
74161 |
+
},
|
74162 |
+
{
|
74163 |
+
"epoch": 0.9377973411586857,
|
74164 |
+
"grad_norm": 0.05781329795718193,
|
74165 |
+
"learning_rate": 1.0405647590807366e-05,
|
74166 |
+
"loss": 1.4467,
|
74167 |
+
"step": 21092
|
74168 |
+
},
|
74169 |
+
{
|
74170 |
+
"epoch": 0.9378862656173581,
|
74171 |
+
"grad_norm": 0.05810556560754776,
|
74172 |
+
"learning_rate": 1.0375999264454483e-05,
|
74173 |
+
"loss": 1.4453,
|
74174 |
+
"step": 21094
|
74175 |
+
},
|
74176 |
+
{
|
74177 |
+
"epoch": 0.9379751900760304,
|
74178 |
+
"grad_norm": 0.05800723284482956,
|
74179 |
+
"learning_rate": 1.0346392793319005e-05,
|
74180 |
+
"loss": 1.442,
|
74181 |
+
"step": 21096
|
74182 |
+
},
|
74183 |
+
{
|
74184 |
+
"epoch": 0.9380641145347027,
|
74185 |
+
"grad_norm": 0.0583004355430603,
|
74186 |
+
"learning_rate": 1.031682817993157e-05,
|
74187 |
+
"loss": 1.447,
|
74188 |
+
"step": 21098
|
74189 |
+
},
|
74190 |
+
{
|
74191 |
+
"epoch": 0.9381530389933751,
|
74192 |
+
"grad_norm": 0.05934945121407509,
|
74193 |
+
"learning_rate": 1.0287305426819771e-05,
|
74194 |
+
"loss": 1.4457,
|
74195 |
+
"step": 21100
|
74196 |
+
},
|
74197 |
+
{
|
74198 |
+
"epoch": 0.9382419634520475,
|
74199 |
+
"grad_norm": 0.05703364312648773,
|
74200 |
+
"learning_rate": 1.0257824536507088e-05,
|
74201 |
+
"loss": 1.4435,
|
74202 |
+
"step": 21102
|
74203 |
+
},
|
74204 |
+
{
|
74205 |
+
"epoch": 0.9383308879107198,
|
74206 |
+
"grad_norm": 0.057916540652513504,
|
74207 |
+
"learning_rate": 1.0228385511513782e-05,
|
74208 |
+
"loss": 1.4447,
|
74209 |
+
"step": 21104
|
74210 |
+
},
|
74211 |
+
{
|
74212 |
+
"epoch": 0.9384198123693922,
|
74213 |
+
"grad_norm": 0.05742233991622925,
|
74214 |
+
"learning_rate": 1.0198988354356509e-05,
|
74215 |
+
"loss": 1.442,
|
74216 |
+
"step": 21106
|
74217 |
+
},
|
74218 |
+
{
|
74219 |
+
"epoch": 0.9385087368280646,
|
74220 |
+
"grad_norm": 0.05896897241473198,
|
74221 |
+
"learning_rate": 1.0169633067548145e-05,
|
74222 |
+
"loss": 1.4476,
|
74223 |
+
"step": 21108
|
74224 |
+
},
|
74225 |
+
{
|
74226 |
+
"epoch": 0.9385976612867369,
|
74227 |
+
"grad_norm": 0.05869178846478462,
|
74228 |
+
"learning_rate": 1.0140319653598185e-05,
|
74229 |
+
"loss": 1.4472,
|
74230 |
+
"step": 21110
|
74231 |
+
},
|
74232 |
+
{
|
74233 |
+
"epoch": 0.9386865857454093,
|
74234 |
+
"grad_norm": 0.05712299793958664,
|
74235 |
+
"learning_rate": 1.0111048115012456e-05,
|
74236 |
+
"loss": 1.4457,
|
74237 |
+
"step": 21112
|
74238 |
+
},
|
74239 |
+
{
|
74240 |
+
"epoch": 0.9387755102040817,
|
74241 |
+
"grad_norm": 0.05764850974082947,
|
74242 |
+
"learning_rate": 1.0081818454293124e-05,
|
74243 |
+
"loss": 1.4435,
|
74244 |
+
"step": 21114
|
74245 |
+
},
|
74246 |
+
{
|
74247 |
+
"epoch": 0.938864434662754,
|
74248 |
+
"grad_norm": 0.05720280855894089,
|
74249 |
+
"learning_rate": 1.0052630673939023e-05,
|
74250 |
+
"loss": 1.4491,
|
74251 |
+
"step": 21116
|
74252 |
+
},
|
74253 |
+
{
|
74254 |
+
"epoch": 0.9389533591214263,
|
74255 |
+
"grad_norm": 0.058444440364837646,
|
74256 |
+
"learning_rate": 1.0023484776445158e-05,
|
74257 |
+
"loss": 1.4496,
|
74258 |
+
"step": 21118
|
74259 |
+
},
|
74260 |
+
{
|
74261 |
+
"epoch": 0.9390422835800987,
|
74262 |
+
"grad_norm": 0.0576569139957428,
|
74263 |
+
"learning_rate": 9.994380764303091e-06,
|
74264 |
+
"loss": 1.4523,
|
74265 |
+
"step": 21120
|
74266 |
+
},
|
74267 |
+
{
|
74268 |
+
"epoch": 0.939131208038771,
|
74269 |
+
"grad_norm": 0.05701880529522896,
|
74270 |
+
"learning_rate": 9.96531864000072e-06,
|
74271 |
+
"loss": 1.4496,
|
74272 |
+
"step": 21122
|
74273 |
+
},
|
74274 |
+
{
|
74275 |
+
"epoch": 0.9392201324974434,
|
74276 |
+
"grad_norm": 0.05704936385154724,
|
74277 |
+
"learning_rate": 9.936298406022393e-06,
|
74278 |
+
"loss": 1.4445,
|
74279 |
+
"step": 21124
|
74280 |
+
},
|
74281 |
+
{
|
74282 |
+
"epoch": 0.9393090569561158,
|
74283 |
+
"grad_norm": 0.05782592296600342,
|
74284 |
+
"learning_rate": 9.907320064848956e-06,
|
74285 |
+
"loss": 1.4462,
|
74286 |
+
"step": 21126
|
74287 |
+
},
|
74288 |
+
{
|
74289 |
+
"epoch": 0.9393979814147881,
|
74290 |
+
"grad_norm": 0.05813451111316681,
|
74291 |
+
"learning_rate": 9.878383618957599e-06,
|
74292 |
+
"loss": 1.4475,
|
74293 |
+
"step": 21128
|
74294 |
+
},
|
74295 |
+
{
|
74296 |
+
"epoch": 0.9394869058734605,
|
74297 |
+
"grad_norm": 0.05756333842873573,
|
74298 |
+
"learning_rate": 9.849489070821893e-06,
|
74299 |
+
"loss": 1.4443,
|
74300 |
+
"step": 21130
|
74301 |
+
},
|
74302 |
+
{
|
74303 |
+
"epoch": 0.9395758303321329,
|
74304 |
+
"grad_norm": 0.05767510086297989,
|
74305 |
+
"learning_rate": 9.820636422911811e-06,
|
74306 |
+
"loss": 1.4527,
|
74307 |
+
"step": 21132
|
74308 |
+
},
|
74309 |
+
{
|
74310 |
+
"epoch": 0.9396647547908052,
|
74311 |
+
"grad_norm": 0.057892054319381714,
|
74312 |
+
"learning_rate": 9.791825677693988e-06,
|
74313 |
+
"loss": 1.4458,
|
74314 |
+
"step": 21134
|
74315 |
+
},
|
74316 |
+
{
|
74317 |
+
"epoch": 0.9397536792494776,
|
74318 |
+
"grad_norm": 0.057954948395490646,
|
74319 |
+
"learning_rate": 9.763056837631123e-06,
|
74320 |
+
"loss": 1.4412,
|
74321 |
+
"step": 21136
|
74322 |
+
},
|
74323 |
+
{
|
74324 |
+
"epoch": 0.93984260370815,
|
74325 |
+
"grad_norm": 0.05756092816591263,
|
74326 |
+
"learning_rate": 9.734329905182527e-06,
|
74327 |
+
"loss": 1.4492,
|
74328 |
+
"step": 21138
|
74329 |
+
},
|
74330 |
+
{
|
74331 |
+
"epoch": 0.9399315281668222,
|
74332 |
+
"grad_norm": 0.05734538286924362,
|
74333 |
+
"learning_rate": 9.705644882803954e-06,
|
74334 |
+
"loss": 1.4451,
|
74335 |
+
"step": 21140
|
74336 |
+
},
|
74337 |
+
{
|
74338 |
+
"epoch": 0.9400204526254946,
|
74339 |
+
"grad_norm": 0.057080693542957306,
|
74340 |
+
"learning_rate": 9.677001772947614e-06,
|
74341 |
+
"loss": 1.4529,
|
74342 |
+
"step": 21142
|
74343 |
+
},
|
74344 |
+
{
|
74345 |
+
"epoch": 0.940109377084167,
|
74346 |
+
"grad_norm": 0.057391393929719925,
|
74347 |
+
"learning_rate": 9.648400578061822e-06,
|
74348 |
+
"loss": 1.4459,
|
74349 |
+
"step": 21144
|
74350 |
+
},
|
74351 |
+
{
|
74352 |
+
"epoch": 0.9401983015428393,
|
74353 |
+
"grad_norm": 0.0573163703083992,
|
74354 |
+
"learning_rate": 9.619841300591736e-06,
|
74355 |
+
"loss": 1.4446,
|
74356 |
+
"step": 21146
|
74357 |
+
},
|
74358 |
+
{
|
74359 |
+
"epoch": 0.9402872260015117,
|
74360 |
+
"grad_norm": 0.058304496109485626,
|
74361 |
+
"learning_rate": 9.591323942978624e-06,
|
74362 |
+
"loss": 1.4483,
|
74363 |
+
"step": 21148
|
74364 |
+
},
|
74365 |
+
{
|
74366 |
+
"epoch": 0.9403761504601841,
|
74367 |
+
"grad_norm": 0.059254664927721024,
|
74368 |
+
"learning_rate": 9.562848507660316e-06,
|
74369 |
+
"loss": 1.4455,
|
74370 |
+
"step": 21150
|
74371 |
+
},
|
74372 |
+
{
|
74373 |
+
"epoch": 0.9404650749188564,
|
74374 |
+
"grad_norm": 0.05732493847608566,
|
74375 |
+
"learning_rate": 9.534414997070973e-06,
|
74376 |
+
"loss": 1.4498,
|
74377 |
+
"step": 21152
|
74378 |
+
},
|
74379 |
+
{
|
74380 |
+
"epoch": 0.9405539993775288,
|
74381 |
+
"grad_norm": 0.05808182805776596,
|
74382 |
+
"learning_rate": 9.506023413641263e-06,
|
74383 |
+
"loss": 1.4464,
|
74384 |
+
"step": 21154
|
74385 |
+
},
|
74386 |
+
{
|
74387 |
+
"epoch": 0.9406429238362012,
|
74388 |
+
"grad_norm": 0.05795228108763695,
|
74389 |
+
"learning_rate": 9.477673759798188e-06,
|
74390 |
+
"loss": 1.444,
|
74391 |
+
"step": 21156
|
74392 |
+
},
|
74393 |
+
{
|
74394 |
+
"epoch": 0.9407318482948736,
|
74395 |
+
"grad_norm": 0.0579368956387043,
|
74396 |
+
"learning_rate": 9.449366037965313e-06,
|
74397 |
+
"loss": 1.4475,
|
74398 |
+
"step": 21158
|
74399 |
+
},
|
74400 |
+
{
|
74401 |
+
"epoch": 0.9408207727535458,
|
74402 |
+
"grad_norm": 0.05821996554732323,
|
74403 |
+
"learning_rate": 9.421100250562309e-06,
|
74404 |
+
"loss": 1.4519,
|
74405 |
+
"step": 21160
|
74406 |
+
},
|
74407 |
+
{
|
74408 |
+
"epoch": 0.9409096972122182,
|
74409 |
+
"grad_norm": 0.058779843151569366,
|
74410 |
+
"learning_rate": 9.392876400005579e-06,
|
74411 |
+
"loss": 1.4436,
|
74412 |
+
"step": 21162
|
74413 |
+
},
|
74414 |
+
{
|
74415 |
+
"epoch": 0.9409986216708905,
|
74416 |
+
"grad_norm": 0.05720444768667221,
|
74417 |
+
"learning_rate": 9.364694488707858e-06,
|
74418 |
+
"loss": 1.4545,
|
74419 |
+
"step": 21164
|
74420 |
+
},
|
74421 |
+
{
|
74422 |
+
"epoch": 0.9410875461295629,
|
74423 |
+
"grad_norm": 0.057297226041555405,
|
74424 |
+
"learning_rate": 9.336554519078221e-06,
|
74425 |
+
"loss": 1.4425,
|
74426 |
+
"step": 21166
|
74427 |
+
},
|
74428 |
+
{
|
74429 |
+
"epoch": 0.9411764705882353,
|
74430 |
+
"grad_norm": 0.057402316480875015,
|
74431 |
+
"learning_rate": 9.308456493522133e-06,
|
74432 |
+
"loss": 1.4458,
|
74433 |
+
"step": 21168
|
74434 |
+
},
|
74435 |
+
{
|
74436 |
+
"epoch": 0.9412653950469076,
|
74437 |
+
"grad_norm": 0.05686284974217415,
|
74438 |
+
"learning_rate": 9.280400414441613e-06,
|
74439 |
+
"loss": 1.4437,
|
74440 |
+
"step": 21170
|
74441 |
+
},
|
74442 |
+
{
|
74443 |
+
"epoch": 0.94135431950558,
|
74444 |
+
"grad_norm": 0.05828374624252319,
|
74445 |
+
"learning_rate": 9.252386284235026e-06,
|
74446 |
+
"loss": 1.4463,
|
74447 |
+
"step": 21172
|
74448 |
+
},
|
74449 |
+
{
|
74450 |
+
"epoch": 0.9414432439642524,
|
74451 |
+
"grad_norm": 0.05804366618394852,
|
74452 |
+
"learning_rate": 9.224414105297064e-06,
|
74453 |
+
"loss": 1.4455,
|
74454 |
+
"step": 21174
|
74455 |
+
},
|
74456 |
+
{
|
74457 |
+
"epoch": 0.9415321684229248,
|
74458 |
+
"grad_norm": 0.058524325489997864,
|
74459 |
+
"learning_rate": 9.19648388001898e-06,
|
74460 |
+
"loss": 1.4406,
|
74461 |
+
"step": 21176
|
74462 |
+
},
|
74463 |
+
{
|
74464 |
+
"epoch": 0.9416210928815971,
|
74465 |
+
"grad_norm": 0.05712493881583214,
|
74466 |
+
"learning_rate": 9.168595610788365e-06,
|
74467 |
+
"loss": 1.445,
|
74468 |
+
"step": 21178
|
74469 |
+
},
|
74470 |
+
{
|
74471 |
+
"epoch": 0.9417100173402695,
|
74472 |
+
"grad_norm": 0.05792125314474106,
|
74473 |
+
"learning_rate": 9.140749299989205e-06,
|
74474 |
+
"loss": 1.4381,
|
74475 |
+
"step": 21180
|
74476 |
+
},
|
74477 |
+
{
|
74478 |
+
"epoch": 0.9417989417989417,
|
74479 |
+
"grad_norm": 0.05810127779841423,
|
74480 |
+
"learning_rate": 9.112944950001978e-06,
|
74481 |
+
"loss": 1.4472,
|
74482 |
+
"step": 21182
|
74483 |
+
},
|
74484 |
+
{
|
74485 |
+
"epoch": 0.9418878662576141,
|
74486 |
+
"grad_norm": 0.05826450139284134,
|
74487 |
+
"learning_rate": 9.085182563203453e-06,
|
74488 |
+
"loss": 1.4468,
|
74489 |
+
"step": 21184
|
74490 |
+
},
|
74491 |
+
{
|
74492 |
+
"epoch": 0.9419767907162865,
|
74493 |
+
"grad_norm": 0.0577828474342823,
|
74494 |
+
"learning_rate": 9.057462141966898e-06,
|
74495 |
+
"loss": 1.4437,
|
74496 |
+
"step": 21186
|
74497 |
+
},
|
74498 |
+
{
|
74499 |
+
"epoch": 0.9420657151749589,
|
74500 |
+
"grad_norm": 0.05752667412161827,
|
74501 |
+
"learning_rate": 9.02978368866203e-06,
|
74502 |
+
"loss": 1.4479,
|
74503 |
+
"step": 21188
|
74504 |
+
},
|
74505 |
+
{
|
74506 |
+
"epoch": 0.9421546396336312,
|
74507 |
+
"grad_norm": 0.05732182040810585,
|
74508 |
+
"learning_rate": 9.002147205654843e-06,
|
74509 |
+
"loss": 1.4476,
|
74510 |
+
"step": 21190
|
74511 |
+
},
|
74512 |
+
{
|
74513 |
+
"epoch": 0.9422435640923036,
|
74514 |
+
"grad_norm": 0.057963550090789795,
|
74515 |
+
"learning_rate": 8.974552695307948e-06,
|
74516 |
+
"loss": 1.4481,
|
74517 |
+
"step": 21192
|
74518 |
+
},
|
74519 |
+
{
|
74520 |
+
"epoch": 0.942332488550976,
|
74521 |
+
"grad_norm": 0.05983150750398636,
|
74522 |
+
"learning_rate": 8.947000159980067e-06,
|
74523 |
+
"loss": 1.4485,
|
74524 |
+
"step": 21194
|
74525 |
+
},
|
74526 |
+
{
|
74527 |
+
"epoch": 0.9424214130096483,
|
74528 |
+
"grad_norm": 0.05860733240842819,
|
74529 |
+
"learning_rate": 8.919489602026653e-06,
|
74530 |
+
"loss": 1.4393,
|
74531 |
+
"step": 21196
|
74532 |
+
},
|
74533 |
+
{
|
74534 |
+
"epoch": 0.9425103374683207,
|
74535 |
+
"grad_norm": 0.05784778296947479,
|
74536 |
+
"learning_rate": 8.892021023799435e-06,
|
74537 |
+
"loss": 1.4464,
|
74538 |
+
"step": 21198
|
74539 |
+
},
|
74540 |
+
{
|
74541 |
+
"epoch": 0.9425992619269931,
|
74542 |
+
"grad_norm": 0.05762090906500816,
|
74543 |
+
"learning_rate": 8.864594427646478e-06,
|
74544 |
+
"loss": 1.4431,
|
74545 |
+
"step": 21200
|
74546 |
+
},
|
74547 |
+
{
|
74548 |
+
"epoch": 0.9426881863856654,
|
74549 |
+
"grad_norm": 0.0579993836581707,
|
74550 |
+
"learning_rate": 8.837209815912295e-06,
|
74551 |
+
"loss": 1.4431,
|
74552 |
+
"step": 21202
|
74553 |
+
},
|
74554 |
+
{
|
74555 |
+
"epoch": 0.9427771108443377,
|
74556 |
+
"grad_norm": 0.0585738830268383,
|
74557 |
+
"learning_rate": 8.809867190938014e-06,
|
74558 |
+
"loss": 1.4496,
|
74559 |
+
"step": 21204
|
74560 |
+
},
|
74561 |
+
{
|
74562 |
+
"epoch": 0.94286603530301,
|
74563 |
+
"grad_norm": 0.05800570920109749,
|
74564 |
+
"learning_rate": 8.782566555060822e-06,
|
74565 |
+
"loss": 1.4428,
|
74566 |
+
"step": 21206
|
74567 |
+
},
|
74568 |
+
{
|
74569 |
+
"epoch": 0.9429549597616824,
|
74570 |
+
"grad_norm": 0.058509331196546555,
|
74571 |
+
"learning_rate": 8.755307910614574e-06,
|
74572 |
+
"loss": 1.4455,
|
74573 |
+
"step": 21208
|
74574 |
+
},
|
74575 |
+
{
|
74576 |
+
"epoch": 0.9430438842203548,
|
74577 |
+
"grad_norm": 0.05728688836097717,
|
74578 |
+
"learning_rate": 8.728091259929404e-06,
|
74579 |
+
"loss": 1.4459,
|
74580 |
+
"step": 21210
|
74581 |
+
},
|
74582 |
+
{
|
74583 |
+
"epoch": 0.9431328086790272,
|
74584 |
+
"grad_norm": 0.057143598794937134,
|
74585 |
+
"learning_rate": 8.70091660533201e-06,
|
74586 |
+
"loss": 1.4493,
|
74587 |
+
"step": 21212
|
74588 |
+
},
|
74589 |
+
{
|
74590 |
+
"epoch": 0.9432217331376995,
|
74591 |
+
"grad_norm": 0.05791422724723816,
|
74592 |
+
"learning_rate": 8.673783949145364e-06,
|
74593 |
+
"loss": 1.4482,
|
74594 |
+
"step": 21214
|
74595 |
+
},
|
74596 |
+
{
|
74597 |
+
"epoch": 0.9433106575963719,
|
74598 |
+
"grad_norm": 0.05727590247988701,
|
74599 |
+
"learning_rate": 8.646693293688834e-06,
|
74600 |
+
"loss": 1.4514,
|
74601 |
+
"step": 21216
|
74602 |
+
},
|
74603 |
+
{
|
74604 |
+
"epoch": 0.9433995820550443,
|
74605 |
+
"grad_norm": 0.05757603794336319,
|
74606 |
+
"learning_rate": 8.61964464127829e-06,
|
74607 |
+
"loss": 1.4526,
|
74608 |
+
"step": 21218
|
74609 |
+
},
|
74610 |
+
{
|
74611 |
+
"epoch": 0.9434885065137166,
|
74612 |
+
"grad_norm": 0.057514291256666183,
|
74613 |
+
"learning_rate": 8.592637994225994e-06,
|
74614 |
+
"loss": 1.4421,
|
74615 |
+
"step": 21220
|
74616 |
+
},
|
74617 |
+
{
|
74618 |
+
"epoch": 0.943577430972389,
|
74619 |
+
"grad_norm": 0.05774332210421562,
|
74620 |
+
"learning_rate": 8.565673354840543e-06,
|
74621 |
+
"loss": 1.4437,
|
74622 |
+
"step": 21222
|
74623 |
+
},
|
74624 |
+
{
|
74625 |
+
"epoch": 0.9436663554310614,
|
74626 |
+
"grad_norm": 0.05754677578806877,
|
74627 |
+
"learning_rate": 8.53875072542698e-06,
|
74628 |
+
"loss": 1.4513,
|
74629 |
+
"step": 21224
|
74630 |
+
},
|
74631 |
+
{
|
74632 |
+
"epoch": 0.9437552798897336,
|
74633 |
+
"grad_norm": 0.05838412791490555,
|
74634 |
+
"learning_rate": 8.511870108286857e-06,
|
74635 |
+
"loss": 1.449,
|
74636 |
+
"step": 21226
|
74637 |
+
},
|
74638 |
+
{
|
74639 |
+
"epoch": 0.943844204348406,
|
74640 |
+
"grad_norm": 0.05649451166391373,
|
74641 |
+
"learning_rate": 8.485031505718e-06,
|
74642 |
+
"loss": 1.4425,
|
74643 |
+
"step": 21228
|
74644 |
+
},
|
74645 |
+
{
|
74646 |
+
"epoch": 0.9439331288070784,
|
74647 |
+
"grad_norm": 0.05882545933127403,
|
74648 |
+
"learning_rate": 8.458234920014684e-06,
|
74649 |
+
"loss": 1.4436,
|
74650 |
+
"step": 21230
|
74651 |
+
},
|
74652 |
+
{
|
74653 |
+
"epoch": 0.9440220532657507,
|
74654 |
+
"grad_norm": 0.05716662108898163,
|
74655 |
+
"learning_rate": 8.431480353467524e-06,
|
74656 |
+
"loss": 1.4402,
|
74657 |
+
"step": 21232
|
74658 |
+
},
|
74659 |
+
{
|
74660 |
+
"epoch": 0.9441109777244231,
|
74661 |
+
"grad_norm": 0.05844126269221306,
|
74662 |
+
"learning_rate": 8.404767808363744e-06,
|
74663 |
+
"loss": 1.4457,
|
74664 |
+
"step": 21234
|
74665 |
+
},
|
74666 |
+
{
|
74667 |
+
"epoch": 0.9441999021830955,
|
74668 |
+
"grad_norm": 0.05845796316862106,
|
74669 |
+
"learning_rate": 8.378097286986852e-06,
|
74670 |
+
"loss": 1.4508,
|
74671 |
+
"step": 21236
|
74672 |
+
},
|
74673 |
+
{
|
74674 |
+
"epoch": 0.9442888266417678,
|
74675 |
+
"grad_norm": 0.059185273945331573,
|
74676 |
+
"learning_rate": 8.351468791616634e-06,
|
74677 |
+
"loss": 1.4452,
|
74678 |
+
"step": 21238
|
74679 |
+
},
|
74680 |
+
{
|
74681 |
+
"epoch": 0.9443777511004402,
|
74682 |
+
"grad_norm": 0.05794282630085945,
|
74683 |
+
"learning_rate": 8.324882324529548e-06,
|
74684 |
+
"loss": 1.4503,
|
74685 |
+
"step": 21240
|
74686 |
+
},
|
74687 |
+
{
|
74688 |
+
"epoch": 0.9444666755591126,
|
74689 |
+
"grad_norm": 0.05867988243699074,
|
74690 |
+
"learning_rate": 8.29833788799822e-06,
|
74691 |
+
"loss": 1.4503,
|
74692 |
+
"step": 21242
|
74693 |
+
},
|
74694 |
+
{
|
74695 |
+
"epoch": 0.9445556000177849,
|
74696 |
+
"grad_norm": 0.05754483863711357,
|
74697 |
+
"learning_rate": 8.271835484291835e-06,
|
74698 |
+
"loss": 1.4445,
|
74699 |
+
"step": 21244
|
74700 |
+
},
|
74701 |
+
{
|
74702 |
+
"epoch": 0.9446445244764573,
|
74703 |
+
"grad_norm": 0.05763980373740196,
|
74704 |
+
"learning_rate": 8.245375115675912e-06,
|
74705 |
+
"loss": 1.4464,
|
74706 |
+
"step": 21246
|
74707 |
+
},
|
74708 |
+
{
|
74709 |
+
"epoch": 0.9447334489351296,
|
74710 |
+
"grad_norm": 0.05797286331653595,
|
74711 |
+
"learning_rate": 8.218956784412479e-06,
|
74712 |
+
"loss": 1.448,
|
74713 |
+
"step": 21248
|
74714 |
+
},
|
74715 |
+
{
|
74716 |
+
"epoch": 0.9448223733938019,
|
74717 |
+
"grad_norm": 0.05742442235350609,
|
74718 |
+
"learning_rate": 8.192580492759783e-06,
|
74719 |
+
"loss": 1.4453,
|
74720 |
+
"step": 21250
|
74721 |
+
},
|
74722 |
+
{
|
74723 |
+
"epoch": 0.9449112978524743,
|
74724 |
+
"grad_norm": 0.05741652473807335,
|
74725 |
+
"learning_rate": 8.16624624297263e-06,
|
74726 |
+
"loss": 1.4499,
|
74727 |
+
"step": 21252
|
74728 |
+
},
|
74729 |
+
{
|
74730 |
+
"epoch": 0.9450002223111467,
|
74731 |
+
"grad_norm": 0.058388851583004,
|
74732 |
+
"learning_rate": 8.139954037302222e-06,
|
74733 |
+
"loss": 1.4424,
|
74734 |
+
"step": 21254
|
74735 |
+
},
|
74736 |
+
{
|
74737 |
+
"epoch": 0.945089146769819,
|
74738 |
+
"grad_norm": 0.058313120156526566,
|
74739 |
+
"learning_rate": 8.113703877996092e-06,
|
74740 |
+
"loss": 1.4426,
|
74741 |
+
"step": 21256
|
74742 |
+
},
|
74743 |
+
{
|
74744 |
+
"epoch": 0.9451780712284914,
|
74745 |
+
"grad_norm": 0.05811810865998268,
|
74746 |
+
"learning_rate": 8.087495767298226e-06,
|
74747 |
+
"loss": 1.441,
|
74748 |
+
"step": 21258
|
74749 |
+
},
|
74750 |
+
{
|
74751 |
+
"epoch": 0.9452669956871638,
|
74752 |
+
"grad_norm": 0.057785093784332275,
|
74753 |
+
"learning_rate": 8.061329707448939e-06,
|
74754 |
+
"loss": 1.4496,
|
74755 |
+
"step": 21260
|
74756 |
+
},
|
74757 |
+
{
|
74758 |
+
"epoch": 0.9453559201458361,
|
74759 |
+
"grad_norm": 0.05778678134083748,
|
74760 |
+
"learning_rate": 8.035205700685167e-06,
|
74761 |
+
"loss": 1.4433,
|
74762 |
+
"step": 21262
|
74763 |
+
},
|
74764 |
+
{
|
74765 |
+
"epoch": 0.9454448446045085,
|
74766 |
+
"grad_norm": 0.057463571429252625,
|
74767 |
+
"learning_rate": 8.00912374924001e-06,
|
74768 |
+
"loss": 1.4445,
|
74769 |
+
"step": 21264
|
74770 |
+
},
|
74771 |
+
{
|
74772 |
+
"epoch": 0.9455337690631809,
|
74773 |
+
"grad_norm": 0.058068107813596725,
|
74774 |
+
"learning_rate": 7.983083855343132e-06,
|
74775 |
+
"loss": 1.4452,
|
74776 |
+
"step": 21266
|
74777 |
+
},
|
74778 |
+
{
|
74779 |
+
"epoch": 0.9456226935218531,
|
74780 |
+
"grad_norm": 0.05680304020643234,
|
74781 |
+
"learning_rate": 7.957086021220417e-06,
|
74782 |
+
"loss": 1.4465,
|
74783 |
+
"step": 21268
|
74784 |
+
},
|
74785 |
+
{
|
74786 |
+
"epoch": 0.9457116179805255,
|
74787 |
+
"grad_norm": 0.057669758796691895,
|
74788 |
+
"learning_rate": 7.931130249094365e-06,
|
74789 |
+
"loss": 1.4493,
|
74790 |
+
"step": 21270
|
74791 |
+
},
|
74792 |
+
{
|
74793 |
+
"epoch": 0.9458005424391979,
|
74794 |
+
"grad_norm": 0.056808967143297195,
|
74795 |
+
"learning_rate": 7.905216541183869e-06,
|
74796 |
+
"loss": 1.4448,
|
74797 |
+
"step": 21272
|
74798 |
+
},
|
74799 |
+
{
|
74800 |
+
"epoch": 0.9458894668978702,
|
74801 |
+
"grad_norm": 0.05728383734822273,
|
74802 |
+
"learning_rate": 7.879344899703932e-06,
|
74803 |
+
"loss": 1.4457,
|
74804 |
+
"step": 21274
|
74805 |
+
},
|
74806 |
+
{
|
74807 |
+
"epoch": 0.9459783913565426,
|
74808 |
+
"grad_norm": 0.05765995383262634,
|
74809 |
+
"learning_rate": 7.853515326866345e-06,
|
74810 |
+
"loss": 1.4424,
|
74811 |
+
"step": 21276
|
74812 |
+
},
|
74813 |
+
{
|
74814 |
+
"epoch": 0.946067315815215,
|
74815 |
+
"grad_norm": 0.0581035315990448,
|
74816 |
+
"learning_rate": 7.827727824879116e-06,
|
74817 |
+
"loss": 1.443,
|
74818 |
+
"step": 21278
|
74819 |
+
},
|
74820 |
+
{
|
74821 |
+
"epoch": 0.9461562402738873,
|
74822 |
+
"grad_norm": 0.05767706036567688,
|
74823 |
+
"learning_rate": 7.801982395946649e-06,
|
74824 |
+
"loss": 1.4423,
|
74825 |
+
"step": 21280
|
74826 |
+
},
|
74827 |
+
{
|
74828 |
+
"epoch": 0.9462451647325597,
|
74829 |
+
"grad_norm": 0.057066384702920914,
|
74830 |
+
"learning_rate": 7.776279042269685e-06,
|
74831 |
+
"loss": 1.4482,
|
74832 |
+
"step": 21282
|
74833 |
+
},
|
74834 |
+
{
|
74835 |
+
"epoch": 0.9463340891912321,
|
74836 |
+
"grad_norm": 0.058075472712516785,
|
74837 |
+
"learning_rate": 7.750617766045688e-06,
|
74838 |
+
"loss": 1.4452,
|
74839 |
+
"step": 21284
|
74840 |
+
},
|
74841 |
+
{
|
74842 |
+
"epoch": 0.9464230136499044,
|
74843 |
+
"grad_norm": 0.05739322304725647,
|
74844 |
+
"learning_rate": 7.724998569468066e-06,
|
74845 |
+
"loss": 1.4477,
|
74846 |
+
"step": 21286
|
74847 |
+
},
|
74848 |
+
{
|
74849 |
+
"epoch": 0.9465119381085768,
|
74850 |
+
"grad_norm": 0.05727776512503624,
|
74851 |
+
"learning_rate": 7.699421454726963e-06,
|
74852 |
+
"loss": 1.447,
|
74853 |
+
"step": 21288
|
74854 |
+
},
|
74855 |
+
{
|
74856 |
+
"epoch": 0.9466008625672491,
|
74857 |
+
"grad_norm": 0.057564686983823776,
|
74858 |
+
"learning_rate": 7.67388642400879e-06,
|
74859 |
+
"loss": 1.4455,
|
74860 |
+
"step": 21290
|
74861 |
+
},
|
74862 |
+
{
|
74863 |
+
"epoch": 0.9466897870259214,
|
74864 |
+
"grad_norm": 0.05791555717587471,
|
74865 |
+
"learning_rate": 7.648393479496419e-06,
|
74866 |
+
"loss": 1.4429,
|
74867 |
+
"step": 21292
|
74868 |
+
},
|
74869 |
+
{
|
74870 |
+
"epoch": 0.9467787114845938,
|
74871 |
+
"grad_norm": 0.057518370449543,
|
74872 |
+
"learning_rate": 7.622942623369156e-06,
|
74873 |
+
"loss": 1.4435,
|
74874 |
+
"step": 21294
|
74875 |
+
},
|
74876 |
+
{
|
74877 |
+
"epoch": 0.9468676359432662,
|
74878 |
+
"grad_norm": 0.05689391866326332,
|
74879 |
+
"learning_rate": 7.597533857802541e-06,
|
74880 |
+
"loss": 1.4461,
|
74881 |
+
"step": 21296
|
74882 |
+
},
|
74883 |
+
{
|
74884 |
+
"epoch": 0.9469565604019385,
|
74885 |
+
"grad_norm": 0.058041494339704514,
|
74886 |
+
"learning_rate": 7.57216718496867e-06,
|
74887 |
+
"loss": 1.4456,
|
74888 |
+
"step": 21298
|
74889 |
+
},
|
74890 |
+
{
|
74891 |
+
"epoch": 0.9470454848606109,
|
74892 |
+
"grad_norm": 0.057841625064611435,
|
74893 |
+
"learning_rate": 7.5468426070360865e-06,
|
74894 |
+
"loss": 1.4501,
|
74895 |
+
"step": 21300
|
74896 |
+
},
|
74897 |
+
{
|
74898 |
+
"epoch": 0.9471344093192833,
|
74899 |
+
"grad_norm": 0.057104241102933884,
|
74900 |
+
"learning_rate": 7.521560126169502e-06,
|
74901 |
+
"loss": 1.4433,
|
74902 |
+
"step": 21302
|
74903 |
+
},
|
74904 |
+
{
|
74905 |
+
"epoch": 0.9472233337779556,
|
74906 |
+
"grad_norm": 0.058944664895534515,
|
74907 |
+
"learning_rate": 7.4963197445302445e-06,
|
74908 |
+
"loss": 1.4469,
|
74909 |
+
"step": 21304
|
74910 |
+
},
|
74911 |
+
{
|
74912 |
+
"epoch": 0.947312258236628,
|
74913 |
+
"grad_norm": 0.05764506757259369,
|
74914 |
+
"learning_rate": 7.471121464275976e-06,
|
74915 |
+
"loss": 1.4485,
|
74916 |
+
"step": 21306
|
74917 |
+
},
|
74918 |
+
{
|
74919 |
+
"epoch": 0.9474011826953004,
|
74920 |
+
"grad_norm": 0.05761958658695221,
|
74921 |
+
"learning_rate": 7.445965287560752e-06,
|
74922 |
+
"loss": 1.4491,
|
74923 |
+
"step": 21308
|
74924 |
+
},
|
74925 |
+
{
|
74926 |
+
"epoch": 0.9474901071539727,
|
74927 |
+
"grad_norm": 0.0575152151286602,
|
74928 |
+
"learning_rate": 7.420851216535019e-06,
|
74929 |
+
"loss": 1.4428,
|
74930 |
+
"step": 21310
|
74931 |
+
},
|
74932 |
+
{
|
74933 |
+
"epoch": 0.947579031612645,
|
74934 |
+
"grad_norm": 0.058228038251399994,
|
74935 |
+
"learning_rate": 7.3957792533456695e-06,
|
74936 |
+
"loss": 1.4463,
|
74937 |
+
"step": 21312
|
74938 |
+
},
|
74939 |
+
{
|
74940 |
+
"epoch": 0.9476679560713174,
|
74941 |
+
"grad_norm": 0.057448986917734146,
|
74942 |
+
"learning_rate": 7.3707494001358785e-06,
|
74943 |
+
"loss": 1.4482,
|
74944 |
+
"step": 21314
|
74945 |
+
},
|
74946 |
+
{
|
74947 |
+
"epoch": 0.9477568805299897,
|
74948 |
+
"grad_norm": 0.05910767614841461,
|
74949 |
+
"learning_rate": 7.3457616590454916e-06,
|
74950 |
+
"loss": 1.445,
|
74951 |
+
"step": 21316
|
74952 |
+
},
|
74953 |
+
{
|
74954 |
+
"epoch": 0.9478458049886621,
|
74955 |
+
"grad_norm": 0.057866428047418594,
|
74956 |
+
"learning_rate": 7.320816032210353e-06,
|
74957 |
+
"loss": 1.4464,
|
74958 |
+
"step": 21318
|
74959 |
+
},
|
74960 |
+
{
|
74961 |
+
"epoch": 0.9479347294473345,
|
74962 |
+
"grad_norm": 0.05850965157151222,
|
74963 |
+
"learning_rate": 7.295912521763038e-06,
|
74964 |
+
"loss": 1.4437,
|
74965 |
+
"step": 21320
|
74966 |
+
},
|
74967 |
+
{
|
74968 |
+
"epoch": 0.9480236539060068,
|
74969 |
+
"grad_norm": 0.057498205453157425,
|
74970 |
+
"learning_rate": 7.271051129832451e-06,
|
74971 |
+
"loss": 1.449,
|
74972 |
+
"step": 21322
|
74973 |
+
},
|
74974 |
+
{
|
74975 |
+
"epoch": 0.9481125783646792,
|
74976 |
+
"grad_norm": 0.057792823761701584,
|
74977 |
+
"learning_rate": 7.246231858543784e-06,
|
74978 |
+
"loss": 1.4393,
|
74979 |
+
"step": 21324
|
74980 |
+
},
|
74981 |
+
{
|
74982 |
+
"epoch": 0.9482015028233516,
|
74983 |
+
"grad_norm": 0.058098308742046356,
|
74984 |
+
"learning_rate": 7.221454710018671e-06,
|
74985 |
+
"loss": 1.4419,
|
74986 |
+
"step": 21326
|
74987 |
+
},
|
74988 |
+
{
|
74989 |
+
"epoch": 0.948290427282024,
|
74990 |
+
"grad_norm": 0.058741819113492966,
|
74991 |
+
"learning_rate": 7.196719686375308e-06,
|
74992 |
+
"loss": 1.4463,
|
74993 |
+
"step": 21328
|
74994 |
+
},
|
74995 |
+
{
|
74996 |
+
"epoch": 0.9483793517406963,
|
74997 |
+
"grad_norm": 0.0579305998980999,
|
74998 |
+
"learning_rate": 7.1720267897279475e-06,
|
74999 |
+
"loss": 1.4438,
|
75000 |
+
"step": 21330
|
75001 |
+
},
|
75002 |
+
{
|
75003 |
+
"epoch": 0.9484682761993687,
|
75004 |
+
"grad_norm": 0.05758350342512131,
|
75005 |
+
"learning_rate": 7.147376022187624e-06,
|
75006 |
+
"loss": 1.4539,
|
75007 |
+
"step": 21332
|
75008 |
+
},
|
75009 |
+
{
|
75010 |
+
"epoch": 0.9485572006580409,
|
75011 |
+
"grad_norm": 0.05874762684106827,
|
75012 |
+
"learning_rate": 7.122767385861484e-06,
|
75013 |
+
"loss": 1.4462,
|
75014 |
+
"step": 21334
|
75015 |
+
},
|
75016 |
+
{
|
75017 |
+
"epoch": 0.9486461251167133,
|
75018 |
+
"grad_norm": 0.05720106512308121,
|
75019 |
+
"learning_rate": 7.09820088285329e-06,
|
75020 |
+
"loss": 1.4421,
|
75021 |
+
"step": 21336
|
75022 |
+
},
|
75023 |
+
{
|
75024 |
+
"epoch": 0.9487350495753857,
|
75025 |
+
"grad_norm": 0.057676248252391815,
|
75026 |
+
"learning_rate": 7.073676515263028e-06,
|
75027 |
+
"loss": 1.4495,
|
75028 |
+
"step": 21338
|
75029 |
+
},
|
75030 |
+
{
|
75031 |
+
"epoch": 0.948823974034058,
|
75032 |
+
"grad_norm": 0.05915232002735138,
|
75033 |
+
"learning_rate": 7.049194285187077e-06,
|
75034 |
+
"loss": 1.4458,
|
75035 |
+
"step": 21340
|
75036 |
+
},
|
75037 |
+
{
|
75038 |
+
"epoch": 0.9489128984927304,
|
75039 |
+
"grad_norm": 0.05813376605510712,
|
75040 |
+
"learning_rate": 7.02475419471843e-06,
|
75041 |
+
"loss": 1.4398,
|
75042 |
+
"step": 21342
|
75043 |
+
},
|
75044 |
+
{
|
75045 |
+
"epoch": 0.9490018229514028,
|
75046 |
+
"grad_norm": 0.057365693151950836,
|
75047 |
+
"learning_rate": 7.000356245946249e-06,
|
75048 |
+
"loss": 1.4464,
|
75049 |
+
"step": 21344
|
75050 |
+
},
|
75051 |
+
{
|
75052 |
+
"epoch": 0.9490907474100752,
|
75053 |
+
"grad_norm": 0.05754502862691879,
|
75054 |
+
"learning_rate": 6.976000440956198e-06,
|
75055 |
+
"loss": 1.4493,
|
75056 |
+
"step": 21346
|
75057 |
+
},
|
75058 |
+
{
|
75059 |
+
"epoch": 0.9491796718687475,
|
75060 |
+
"grad_norm": 0.058054231107234955,
|
75061 |
+
"learning_rate": 6.9516867818302796e-06,
|
75062 |
+
"loss": 1.4469,
|
75063 |
+
"step": 21348
|
75064 |
+
},
|
75065 |
+
{
|
75066 |
+
"epoch": 0.9492685963274199,
|
75067 |
+
"grad_norm": 0.05699177458882332,
|
75068 |
+
"learning_rate": 6.927415270647053e-06,
|
75069 |
+
"loss": 1.4509,
|
75070 |
+
"step": 21350
|
75071 |
+
},
|
75072 |
+
{
|
75073 |
+
"epoch": 0.9493575207860923,
|
75074 |
+
"grad_norm": 0.058153219521045685,
|
75075 |
+
"learning_rate": 6.903185909481191e-06,
|
75076 |
+
"loss": 1.4432,
|
75077 |
+
"step": 21352
|
75078 |
+
},
|
75079 |
+
{
|
75080 |
+
"epoch": 0.9494464452447646,
|
75081 |
+
"grad_norm": 0.05815225467085838,
|
75082 |
+
"learning_rate": 6.878998700403982e-06,
|
75083 |
+
"loss": 1.439,
|
75084 |
+
"step": 21354
|
75085 |
+
},
|
75086 |
+
{
|
75087 |
+
"epoch": 0.9495353697034369,
|
75088 |
+
"grad_norm": 0.05740995332598686,
|
75089 |
+
"learning_rate": 6.854853645483106e-06,
|
75090 |
+
"loss": 1.4472,
|
75091 |
+
"step": 21356
|
75092 |
+
},
|
75093 |
+
{
|
75094 |
+
"epoch": 0.9496242941621093,
|
75095 |
+
"grad_norm": 0.05770250782370567,
|
75096 |
+
"learning_rate": 6.830750746782633e-06,
|
75097 |
+
"loss": 1.4437,
|
75098 |
+
"step": 21358
|
75099 |
+
},
|
75100 |
+
{
|
75101 |
+
"epoch": 0.9497132186207816,
|
75102 |
+
"grad_norm": 0.05776236578822136,
|
75103 |
+
"learning_rate": 6.806690006362859e-06,
|
75104 |
+
"loss": 1.4447,
|
75105 |
+
"step": 21360
|
75106 |
+
},
|
75107 |
+
{
|
75108 |
+
"epoch": 0.949802143079454,
|
75109 |
+
"grad_norm": 0.057613179087638855,
|
75110 |
+
"learning_rate": 6.7826714262806396e-06,
|
75111 |
+
"loss": 1.4448,
|
75112 |
+
"step": 21362
|
75113 |
+
},
|
75114 |
+
{
|
75115 |
+
"epoch": 0.9498910675381264,
|
75116 |
+
"grad_norm": 0.0582854337990284,
|
75117 |
+
"learning_rate": 6.758695008589221e-06,
|
75118 |
+
"loss": 1.4439,
|
75119 |
+
"step": 21364
|
75120 |
+
},
|
75121 |
+
{
|
75122 |
+
"epoch": 0.9499799919967987,
|
75123 |
+
"grad_norm": 0.057772018015384674,
|
75124 |
+
"learning_rate": 6.734760755338243e-06,
|
75125 |
+
"loss": 1.4434,
|
75126 |
+
"step": 21366
|
75127 |
+
},
|
75128 |
+
{
|
75129 |
+
"epoch": 0.9500689164554711,
|
75130 |
+
"grad_norm": 0.057995546609163284,
|
75131 |
+
"learning_rate": 6.7108686685735665e-06,
|
75132 |
+
"loss": 1.444,
|
75133 |
+
"step": 21368
|
75134 |
+
},
|
75135 |
+
{
|
75136 |
+
"epoch": 0.9501578409141435,
|
75137 |
+
"grad_norm": 0.05806703120470047,
|
75138 |
+
"learning_rate": 6.687018750337726e-06,
|
75139 |
+
"loss": 1.4412,
|
75140 |
+
"step": 21370
|
75141 |
+
},
|
75142 |
+
{
|
75143 |
+
"epoch": 0.9502467653728158,
|
75144 |
+
"grad_norm": 0.056987687945365906,
|
75145 |
+
"learning_rate": 6.663211002669534e-06,
|
75146 |
+
"loss": 1.4426,
|
75147 |
+
"step": 21372
|
75148 |
+
},
|
75149 |
+
{
|
75150 |
+
"epoch": 0.9503356898314882,
|
75151 |
+
"grad_norm": 0.05830015987157822,
|
75152 |
+
"learning_rate": 6.639445427604085e-06,
|
75153 |
+
"loss": 1.4377,
|
75154 |
+
"step": 21374
|
75155 |
+
},
|
75156 |
+
{
|
75157 |
+
"epoch": 0.9504246142901606,
|
75158 |
+
"grad_norm": 0.057094842195510864,
|
75159 |
+
"learning_rate": 6.615722027173032e-06,
|
75160 |
+
"loss": 1.4469,
|
75161 |
+
"step": 21376
|
75162 |
+
},
|
75163 |
+
{
|
75164 |
+
"epoch": 0.9505135387488328,
|
75165 |
+
"grad_norm": 0.057993218302726746,
|
75166 |
+
"learning_rate": 6.592040803404309e-06,
|
75167 |
+
"loss": 1.4415,
|
75168 |
+
"step": 21378
|
75169 |
+
},
|
75170 |
+
{
|
75171 |
+
"epoch": 0.9506024632075052,
|
75172 |
+
"grad_norm": 0.05756436288356781,
|
75173 |
+
"learning_rate": 6.5684017583223506e-06,
|
75174 |
+
"loss": 1.4454,
|
75175 |
+
"step": 21380
|
75176 |
+
},
|
75177 |
+
{
|
75178 |
+
"epoch": 0.9506913876661776,
|
75179 |
+
"grad_norm": 0.05718113109469414,
|
75180 |
+
"learning_rate": 6.544804893947876e-06,
|
75181 |
+
"loss": 1.4446,
|
75182 |
+
"step": 21382
|
75183 |
+
},
|
75184 |
+
{
|
75185 |
+
"epoch": 0.9507803121248499,
|
75186 |
+
"grad_norm": 0.05671248957514763,
|
75187 |
+
"learning_rate": 6.521250212298046e-06,
|
75188 |
+
"loss": 1.4501,
|
75189 |
+
"step": 21384
|
75190 |
+
},
|
75191 |
+
{
|
75192 |
+
"epoch": 0.9508692365835223,
|
75193 |
+
"grad_norm": 0.05659397318959236,
|
75194 |
+
"learning_rate": 6.497737715386476e-06,
|
75195 |
+
"loss": 1.4458,
|
75196 |
+
"step": 21386
|
75197 |
+
},
|
75198 |
+
{
|
75199 |
+
"epoch": 0.9509581610421947,
|
75200 |
+
"grad_norm": 0.05791900306940079,
|
75201 |
+
"learning_rate": 6.474267405223e-06,
|
75202 |
+
"loss": 1.4472,
|
75203 |
+
"step": 21388
|
75204 |
+
},
|
75205 |
+
{
|
75206 |
+
"epoch": 0.951047085500867,
|
75207 |
+
"grad_norm": 0.05804454907774925,
|
75208 |
+
"learning_rate": 6.450839283814125e-06,
|
75209 |
+
"loss": 1.4465,
|
75210 |
+
"step": 21390
|
75211 |
+
},
|
75212 |
+
{
|
75213 |
+
"epoch": 0.9511360099595394,
|
75214 |
+
"grad_norm": 0.05747028812766075,
|
75215 |
+
"learning_rate": 6.4274533531624715e-06,
|
75216 |
+
"loss": 1.4468,
|
75217 |
+
"step": 21392
|
75218 |
+
},
|
75219 |
+
{
|
75220 |
+
"epoch": 0.9512249344182118,
|
75221 |
+
"grad_norm": 0.058244530111551285,
|
75222 |
+
"learning_rate": 6.404109615267218e-06,
|
75223 |
+
"loss": 1.4506,
|
75224 |
+
"step": 21394
|
75225 |
+
},
|
75226 |
+
{
|
75227 |
+
"epoch": 0.9513138588768841,
|
75228 |
+
"grad_norm": 0.057229701429605484,
|
75229 |
+
"learning_rate": 6.380808072123934e-06,
|
75230 |
+
"loss": 1.4488,
|
75231 |
+
"step": 21396
|
75232 |
+
},
|
75233 |
+
{
|
75234 |
+
"epoch": 0.9514027833355564,
|
75235 |
+
"grad_norm": 0.05814434215426445,
|
75236 |
+
"learning_rate": 6.357548725724416e-06,
|
75237 |
+
"loss": 1.4449,
|
75238 |
+
"step": 21398
|
75239 |
+
},
|
75240 |
+
{
|
75241 |
+
"epoch": 0.9514917077942288,
|
75242 |
+
"grad_norm": 0.057511039078235626,
|
75243 |
+
"learning_rate": 6.334331578057018e-06,
|
75244 |
+
"loss": 1.4408,
|
75245 |
+
"step": 21400
|
75246 |
+
},
|
75247 |
+
{
|
75248 |
+
"epoch": 0.9515806322529011,
|
75249 |
+
"grad_norm": 0.05873566493391991,
|
75250 |
+
"learning_rate": 6.31115663110654e-06,
|
75251 |
+
"loss": 1.449,
|
75252 |
+
"step": 21402
|
75253 |
+
},
|
75254 |
+
{
|
75255 |
+
"epoch": 0.9516695567115735,
|
75256 |
+
"grad_norm": 0.05769232288002968,
|
75257 |
+
"learning_rate": 6.288023886854011e-06,
|
75258 |
+
"loss": 1.4451,
|
75259 |
+
"step": 21404
|
75260 |
+
},
|
75261 |
+
{
|
75262 |
+
"epoch": 0.9517584811702459,
|
75263 |
+
"grad_norm": 0.05827988311648369,
|
75264 |
+
"learning_rate": 6.264933347276847e-06,
|
75265 |
+
"loss": 1.4442,
|
75266 |
+
"step": 21406
|
75267 |
+
},
|
75268 |
+
{
|
75269 |
+
"epoch": 0.9518474056289182,
|
75270 |
+
"grad_norm": 0.05725441128015518,
|
75271 |
+
"learning_rate": 6.2418850143490805e-06,
|
75272 |
+
"loss": 1.4455,
|
75273 |
+
"step": 21408
|
75274 |
+
},
|
75275 |
+
{
|
75276 |
+
"epoch": 0.9519363300875906,
|
75277 |
+
"grad_norm": 0.05901114642620087,
|
75278 |
+
"learning_rate": 6.218878890040858e-06,
|
75279 |
+
"loss": 1.4456,
|
75280 |
+
"step": 21410
|
75281 |
+
},
|
75282 |
+
{
|
75283 |
+
"epoch": 0.952025254546263,
|
75284 |
+
"grad_norm": 0.05790838971734047,
|
75285 |
+
"learning_rate": 6.195914976318884e-06,
|
75286 |
+
"loss": 1.4424,
|
75287 |
+
"step": 21412
|
75288 |
+
},
|
75289 |
+
{
|
75290 |
+
"epoch": 0.9521141790049353,
|
75291 |
+
"grad_norm": 0.057518161833286285,
|
75292 |
+
"learning_rate": 6.17299327514631e-06,
|
75293 |
+
"loss": 1.45,
|
75294 |
+
"step": 21414
|
75295 |
+
},
|
75296 |
+
{
|
75297 |
+
"epoch": 0.9522031034636077,
|
75298 |
+
"grad_norm": 0.05830460786819458,
|
75299 |
+
"learning_rate": 6.150113788482403e-06,
|
75300 |
+
"loss": 1.451,
|
75301 |
+
"step": 21416
|
75302 |
+
},
|
75303 |
+
{
|
75304 |
+
"epoch": 0.9522920279222801,
|
75305 |
+
"grad_norm": 0.057020124047994614,
|
75306 |
+
"learning_rate": 6.127276518283153e-06,
|
75307 |
+
"loss": 1.4477,
|
75308 |
+
"step": 21418
|
75309 |
+
},
|
75310 |
+
{
|
75311 |
+
"epoch": 0.9523809523809523,
|
75312 |
+
"grad_norm": 0.05747748538851738,
|
75313 |
+
"learning_rate": 6.104481466500667e-06,
|
75314 |
+
"loss": 1.4502,
|
75315 |
+
"step": 21420
|
75316 |
+
},
|
75317 |
+
{
|
75318 |
+
"epoch": 0.9524698768396247,
|
75319 |
+
"grad_norm": 0.05771753937005997,
|
75320 |
+
"learning_rate": 6.081728635083661e-06,
|
75321 |
+
"loss": 1.4461,
|
75322 |
+
"step": 21422
|
75323 |
+
},
|
75324 |
+
{
|
75325 |
+
"epoch": 0.9525588012982971,
|
75326 |
+
"grad_norm": 0.057178083807229996,
|
75327 |
+
"learning_rate": 6.059018025977137e-06,
|
75328 |
+
"loss": 1.4498,
|
75329 |
+
"step": 21424
|
75330 |
+
},
|
75331 |
+
{
|
75332 |
+
"epoch": 0.9526477257569694,
|
75333 |
+
"grad_norm": 0.05818738043308258,
|
75334 |
+
"learning_rate": 6.036349641122429e-06,
|
75335 |
+
"loss": 1.4535,
|
75336 |
+
"step": 21426
|
75337 |
+
},
|
75338 |
+
{
|
75339 |
+
"epoch": 0.9527366502156418,
|
75340 |
+
"grad_norm": 0.057868827134370804,
|
75341 |
+
"learning_rate": 6.013723482457434e-06,
|
75342 |
+
"loss": 1.4438,
|
75343 |
+
"step": 21428
|
75344 |
+
},
|
75345 |
+
{
|
75346 |
+
"epoch": 0.9528255746743142,
|
75347 |
+
"grad_norm": 0.05839633569121361,
|
75348 |
+
"learning_rate": 5.9911395519162695e-06,
|
75349 |
+
"loss": 1.4444,
|
75350 |
+
"step": 21430
|
75351 |
+
},
|
75352 |
+
{
|
75353 |
+
"epoch": 0.9529144991329865,
|
75354 |
+
"grad_norm": 0.05750703439116478,
|
75355 |
+
"learning_rate": 5.968597851429502e-06,
|
75356 |
+
"loss": 1.4501,
|
75357 |
+
"step": 21432
|
75358 |
+
},
|
75359 |
+
{
|
75360 |
+
"epoch": 0.9530034235916589,
|
75361 |
+
"grad_norm": 0.05813831835985184,
|
75362 |
+
"learning_rate": 5.946098382924148e-06,
|
75363 |
+
"loss": 1.4435,
|
75364 |
+
"step": 21434
|
75365 |
+
},
|
75366 |
+
{
|
75367 |
+
"epoch": 0.9530923480503313,
|
75368 |
+
"grad_norm": 0.05906336382031441,
|
75369 |
+
"learning_rate": 5.9236411483235e-06,
|
75370 |
+
"loss": 1.4442,
|
75371 |
+
"step": 21436
|
75372 |
+
},
|
75373 |
+
{
|
75374 |
+
"epoch": 0.9531812725090036,
|
75375 |
+
"grad_norm": 0.05761095881462097,
|
75376 |
+
"learning_rate": 5.901226149547356e-06,
|
75377 |
+
"loss": 1.4428,
|
75378 |
+
"step": 21438
|
75379 |
+
},
|
75380 |
+
{
|
75381 |
+
"epoch": 0.953270196967676,
|
75382 |
+
"grad_norm": 0.058021754026412964,
|
75383 |
+
"learning_rate": 5.8788533885117956e-06,
|
75384 |
+
"loss": 1.4458,
|
75385 |
+
"step": 21440
|
75386 |
+
},
|
75387 |
+
{
|
75388 |
+
"epoch": 0.9533591214263483,
|
75389 |
+
"grad_norm": 0.057688046246767044,
|
75390 |
+
"learning_rate": 5.856522867129343e-06,
|
75391 |
+
"loss": 1.4438,
|
75392 |
+
"step": 21442
|
75393 |
+
},
|
75394 |
+
{
|
75395 |
+
"epoch": 0.9534480458850206,
|
75396 |
+
"grad_norm": 0.05762839689850807,
|
75397 |
+
"learning_rate": 5.834234587309028e-06,
|
75398 |
+
"loss": 1.4459,
|
75399 |
+
"step": 21444
|
75400 |
+
},
|
75401 |
+
{
|
75402 |
+
"epoch": 0.953536970343693,
|
75403 |
+
"grad_norm": 0.05795801803469658,
|
75404 |
+
"learning_rate": 5.811988550955993e-06,
|
75405 |
+
"loss": 1.4487,
|
75406 |
+
"step": 21446
|
75407 |
+
},
|
75408 |
+
{
|
75409 |
+
"epoch": 0.9536258948023654,
|
75410 |
+
"grad_norm": 0.0580856017768383,
|
75411 |
+
"learning_rate": 5.789784759971994e-06,
|
75412 |
+
"loss": 1.4503,
|
75413 |
+
"step": 21448
|
75414 |
+
},
|
75415 |
+
{
|
75416 |
+
"epoch": 0.9537148192610377,
|
75417 |
+
"grad_norm": 0.05720821022987366,
|
75418 |
+
"learning_rate": 5.767623216255125e-06,
|
75419 |
+
"loss": 1.4477,
|
75420 |
+
"step": 21450
|
75421 |
+
},
|
75422 |
+
{
|
75423 |
+
"epoch": 0.9538037437197101,
|
75424 |
+
"grad_norm": 0.057731300592422485,
|
75425 |
+
"learning_rate": 5.745503921699868e-06,
|
75426 |
+
"loss": 1.4478,
|
75427 |
+
"step": 21452
|
75428 |
+
},
|
75429 |
+
{
|
75430 |
+
"epoch": 0.9538926681783825,
|
75431 |
+
"grad_norm": 0.05763096734881401,
|
75432 |
+
"learning_rate": 5.7234268781969915e-06,
|
75433 |
+
"loss": 1.4447,
|
75434 |
+
"step": 21454
|
75435 |
+
},
|
75436 |
+
{
|
75437 |
+
"epoch": 0.9539815926370548,
|
75438 |
+
"grad_norm": 0.057338956743478775,
|
75439 |
+
"learning_rate": 5.701392087633761e-06,
|
75440 |
+
"loss": 1.445,
|
75441 |
+
"step": 21456
|
75442 |
+
},
|
75443 |
+
{
|
75444 |
+
"epoch": 0.9540705170957272,
|
75445 |
+
"grad_norm": 0.057891324162483215,
|
75446 |
+
"learning_rate": 5.679399551893893e-06,
|
75447 |
+
"loss": 1.4418,
|
75448 |
+
"step": 21458
|
75449 |
+
},
|
75450 |
+
{
|
75451 |
+
"epoch": 0.9541594415543996,
|
75452 |
+
"grad_norm": 0.05737922713160515,
|
75453 |
+
"learning_rate": 5.657449272857385e-06,
|
75454 |
+
"loss": 1.4444,
|
75455 |
+
"step": 21460
|
75456 |
+
},
|
75457 |
+
{
|
75458 |
+
"epoch": 0.954248366013072,
|
75459 |
+
"grad_norm": 0.05823858082294464,
|
75460 |
+
"learning_rate": 5.635541252400511e-06,
|
75461 |
+
"loss": 1.4367,
|
75462 |
+
"step": 21462
|
75463 |
+
},
|
75464 |
+
{
|
75465 |
+
"epoch": 0.9543372904717442,
|
75466 |
+
"grad_norm": 0.05750085040926933,
|
75467 |
+
"learning_rate": 5.61367549239622e-06,
|
75468 |
+
"loss": 1.4491,
|
75469 |
+
"step": 21464
|
75470 |
+
},
|
75471 |
+
{
|
75472 |
+
"epoch": 0.9544262149304166,
|
75473 |
+
"grad_norm": 0.05752246081829071,
|
75474 |
+
"learning_rate": 5.591851994713681e-06,
|
75475 |
+
"loss": 1.4429,
|
75476 |
+
"step": 21466
|
75477 |
+
},
|
75478 |
+
{
|
75479 |
+
"epoch": 0.9545151393890889,
|
75480 |
+
"grad_norm": 0.057769421488046646,
|
75481 |
+
"learning_rate": 5.5700707612184044e-06,
|
75482 |
+
"loss": 1.4498,
|
75483 |
+
"step": 21468
|
75484 |
+
},
|
75485 |
+
{
|
75486 |
+
"epoch": 0.9546040638477613,
|
75487 |
+
"grad_norm": 0.057797472923994064,
|
75488 |
+
"learning_rate": 5.548331793772288e-06,
|
75489 |
+
"loss": 1.4463,
|
75490 |
+
"step": 21470
|
75491 |
+
},
|
75492 |
+
{
|
75493 |
+
"epoch": 0.9546929883064337,
|
75494 |
+
"grad_norm": 0.057377446442842484,
|
75495 |
+
"learning_rate": 5.52663509423379e-06,
|
75496 |
+
"loss": 1.4473,
|
75497 |
+
"step": 21472
|
75498 |
+
},
|
75499 |
+
{
|
75500 |
+
"epoch": 0.954781912765106,
|
75501 |
+
"grad_norm": 0.05764816701412201,
|
75502 |
+
"learning_rate": 5.504980664457593e-06,
|
75503 |
+
"loss": 1.4454,
|
75504 |
+
"step": 21474
|
75505 |
+
},
|
75506 |
+
{
|
75507 |
+
"epoch": 0.9548708372237784,
|
75508 |
+
"grad_norm": 0.05828756093978882,
|
75509 |
+
"learning_rate": 5.483368506294828e-06,
|
75510 |
+
"loss": 1.4446,
|
75511 |
+
"step": 21476
|
75512 |
+
},
|
75513 |
+
{
|
75514 |
+
"epoch": 0.9549597616824508,
|
75515 |
+
"grad_norm": 0.058148372918367386,
|
75516 |
+
"learning_rate": 5.461798621593017e-06,
|
75517 |
+
"loss": 1.441,
|
75518 |
+
"step": 21478
|
75519 |
+
},
|
75520 |
+
{
|
75521 |
+
"epoch": 0.9550486861411231,
|
75522 |
+
"grad_norm": 0.05808459222316742,
|
75523 |
+
"learning_rate": 5.440271012195963e-06,
|
75524 |
+
"loss": 1.4473,
|
75525 |
+
"step": 21480
|
75526 |
+
},
|
75527 |
+
{
|
75528 |
+
"epoch": 0.9551376105997955,
|
75529 |
+
"grad_norm": 0.05690108239650726,
|
75530 |
+
"learning_rate": 5.4187856799440275e-06,
|
75531 |
+
"loss": 1.4451,
|
75532 |
+
"step": 21482
|
75533 |
+
},
|
75534 |
+
{
|
75535 |
+
"epoch": 0.9552265350584679,
|
75536 |
+
"grad_norm": 0.05813979357481003,
|
75537 |
+
"learning_rate": 5.3973426266737955e-06,
|
75538 |
+
"loss": 1.4465,
|
75539 |
+
"step": 21484
|
75540 |
+
},
|
75541 |
+
{
|
75542 |
+
"epoch": 0.9553154595171401,
|
75543 |
+
"grad_norm": 0.057522471994161606,
|
75544 |
+
"learning_rate": 5.3759418542184134e-06,
|
75545 |
+
"loss": 1.4487,
|
75546 |
+
"step": 21486
|
75547 |
+
},
|
75548 |
+
{
|
75549 |
+
"epoch": 0.9554043839758125,
|
75550 |
+
"grad_norm": 0.05860830470919609,
|
75551 |
+
"learning_rate": 5.354583364407251e-06,
|
75552 |
+
"loss": 1.4444,
|
75553 |
+
"step": 21488
|
75554 |
+
},
|
75555 |
+
{
|
75556 |
+
"epoch": 0.9554933084344849,
|
75557 |
+
"grad_norm": 0.05663066357374191,
|
75558 |
+
"learning_rate": 5.333267159066124e-06,
|
75559 |
+
"loss": 1.444,
|
75560 |
+
"step": 21490
|
75561 |
+
},
|
75562 |
+
{
|
75563 |
+
"epoch": 0.9555822328931572,
|
75564 |
+
"grad_norm": 0.058597080409526825,
|
75565 |
+
"learning_rate": 5.311993240017243e-06,
|
75566 |
+
"loss": 1.4472,
|
75567 |
+
"step": 21492
|
75568 |
+
},
|
75569 |
+
{
|
75570 |
+
"epoch": 0.9556711573518296,
|
75571 |
+
"grad_norm": 0.057328660041093826,
|
75572 |
+
"learning_rate": 5.290761609079209e-06,
|
75573 |
+
"loss": 1.447,
|
75574 |
+
"step": 21494
|
75575 |
+
},
|
75576 |
+
{
|
75577 |
+
"epoch": 0.955760081810502,
|
75578 |
+
"grad_norm": 0.0580228753387928,
|
75579 |
+
"learning_rate": 5.269572268066958e-06,
|
75580 |
+
"loss": 1.4468,
|
75581 |
+
"step": 21496
|
75582 |
+
},
|
75583 |
+
{
|
75584 |
+
"epoch": 0.9558490062691744,
|
75585 |
+
"grad_norm": 0.05755842849612236,
|
75586 |
+
"learning_rate": 5.248425218791874e-06,
|
75587 |
+
"loss": 1.4492,
|
75588 |
+
"step": 21498
|
75589 |
+
},
|
75590 |
+
{
|
75591 |
+
"epoch": 0.9559379307278467,
|
75592 |
+
"grad_norm": 0.05705071613192558,
|
75593 |
+
"learning_rate": 5.227320463061791e-06,
|
75594 |
+
"loss": 1.4469,
|
75595 |
+
"step": 21500
|
75596 |
+
},
|
75597 |
+
{
|
75598 |
+
"epoch": 0.9559379307278467,
|
75599 |
+
"eval_loss": 1.432187557220459,
|
75600 |
+
"eval_runtime": 12.4558,
|
75601 |
+
"eval_samples_per_second": 554.762,
|
75602 |
+
"eval_steps_per_second": 69.365,
|
75603 |
+
"step": 21500
|
75604 |
+
},
|
75605 |
+
{
|
75606 |
+
"epoch": 0.9560268551865191,
|
75607 |
+
"grad_norm": 0.057521793991327286,
|
75608 |
+
"learning_rate": 5.206258002680653e-06,
|
75609 |
+
"loss": 1.4425,
|
75610 |
+
"step": 21502
|
75611 |
+
},
|
75612 |
+
{
|
75613 |
+
"epoch": 0.9561157796451915,
|
75614 |
+
"grad_norm": 0.05715570226311684,
|
75615 |
+
"learning_rate": 5.18523783944913e-06,
|
75616 |
+
"loss": 1.4504,
|
75617 |
+
"step": 21504
|
75618 |
+
},
|
75619 |
+
{
|
75620 |
+
"epoch": 0.9562047041038638,
|
75621 |
+
"grad_norm": 0.05779632553458214,
|
75622 |
+
"learning_rate": 5.164259975164009e-06,
|
75623 |
+
"loss": 1.4429,
|
75624 |
+
"step": 21506
|
75625 |
+
},
|
75626 |
+
{
|
75627 |
+
"epoch": 0.9562936285625361,
|
75628 |
+
"grad_norm": 0.059060875326395035,
|
75629 |
+
"learning_rate": 5.143324411618577e-06,
|
75630 |
+
"loss": 1.4463,
|
75631 |
+
"step": 21508
|
75632 |
+
},
|
75633 |
+
{
|
75634 |
+
"epoch": 0.9563825530212084,
|
75635 |
+
"grad_norm": 0.05711085721850395,
|
75636 |
+
"learning_rate": 5.122431150602624e-06,
|
75637 |
+
"loss": 1.4455,
|
75638 |
+
"step": 21510
|
75639 |
+
},
|
75640 |
+
{
|
75641 |
+
"epoch": 0.9564714774798808,
|
75642 |
+
"grad_norm": 0.05797237157821655,
|
75643 |
+
"learning_rate": 5.101580193902055e-06,
|
75644 |
+
"loss": 1.4448,
|
75645 |
+
"step": 21512
|
75646 |
+
},
|
75647 |
+
{
|
75648 |
+
"epoch": 0.9565604019385532,
|
75649 |
+
"grad_norm": 0.057503245770931244,
|
75650 |
+
"learning_rate": 5.080771543299389e-06,
|
75651 |
+
"loss": 1.4502,
|
75652 |
+
"step": 21514
|
75653 |
+
},
|
75654 |
+
{
|
75655 |
+
"epoch": 0.9566493263972256,
|
75656 |
+
"grad_norm": 0.05919162929058075,
|
75657 |
+
"learning_rate": 5.060005200573425e-06,
|
75658 |
+
"loss": 1.4475,
|
75659 |
+
"step": 21516
|
75660 |
+
},
|
75661 |
+
{
|
75662 |
+
"epoch": 0.9567382508558979,
|
75663 |
+
"grad_norm": 0.05886862426996231,
|
75664 |
+
"learning_rate": 5.039281167499299e-06,
|
75665 |
+
"loss": 1.4526,
|
75666 |
+
"step": 21518
|
75667 |
+
},
|
75668 |
+
{
|
75669 |
+
"epoch": 0.9568271753145703,
|
75670 |
+
"grad_norm": 0.058441340923309326,
|
75671 |
+
"learning_rate": 5.018599445848648e-06,
|
75672 |
+
"loss": 1.4441,
|
75673 |
+
"step": 21520
|
75674 |
+
},
|
75675 |
+
{
|
75676 |
+
"epoch": 0.9569160997732427,
|
75677 |
+
"grad_norm": 0.05824016034603119,
|
75678 |
+
"learning_rate": 4.997960037389449e-06,
|
75679 |
+
"loss": 1.4449,
|
75680 |
+
"step": 21522
|
75681 |
+
},
|
75682 |
+
{
|
75683 |
+
"epoch": 0.957005024231915,
|
75684 |
+
"grad_norm": 0.057525213807821274,
|
75685 |
+
"learning_rate": 4.97736294388601e-06,
|
75686 |
+
"loss": 1.4477,
|
75687 |
+
"step": 21524
|
75688 |
+
},
|
75689 |
+
{
|
75690 |
+
"epoch": 0.9570939486905874,
|
75691 |
+
"grad_norm": 0.058176856487989426,
|
75692 |
+
"learning_rate": 4.956808167099092e-06,
|
75693 |
+
"loss": 1.4471,
|
75694 |
+
"step": 21526
|
75695 |
+
},
|
75696 |
+
{
|
75697 |
+
"epoch": 0.9571828731492596,
|
75698 |
+
"grad_norm": 0.057681165635585785,
|
75699 |
+
"learning_rate": 4.936295708785732e-06,
|
75700 |
+
"loss": 1.4497,
|
75701 |
+
"step": 21528
|
75702 |
+
},
|
75703 |
+
{
|
75704 |
+
"epoch": 0.957271797607932,
|
75705 |
+
"grad_norm": 0.05783999338746071,
|
75706 |
+
"learning_rate": 4.915825570699584e-06,
|
75707 |
+
"loss": 1.4408,
|
75708 |
+
"step": 21530
|
75709 |
+
},
|
75710 |
+
{
|
75711 |
+
"epoch": 0.9573607220666044,
|
75712 |
+
"grad_norm": 0.05777215585112572,
|
75713 |
+
"learning_rate": 4.895397754590414e-06,
|
75714 |
+
"loss": 1.4461,
|
75715 |
+
"step": 21532
|
75716 |
+
},
|
75717 |
+
{
|
75718 |
+
"epoch": 0.9574496465252768,
|
75719 |
+
"grad_norm": 0.058080073446035385,
|
75720 |
+
"learning_rate": 4.875012262204492e-06,
|
75721 |
+
"loss": 1.451,
|
75722 |
+
"step": 21534
|
75723 |
+
},
|
75724 |
+
{
|
75725 |
+
"epoch": 0.9575385709839491,
|
75726 |
+
"grad_norm": 0.05700400471687317,
|
75727 |
+
"learning_rate": 4.854669095284425e-06,
|
75728 |
+
"loss": 1.4511,
|
75729 |
+
"step": 21536
|
75730 |
+
},
|
75731 |
+
{
|
75732 |
+
"epoch": 0.9576274954426215,
|
75733 |
+
"grad_norm": 0.05668550357222557,
|
75734 |
+
"learning_rate": 4.834368255569322e-06,
|
75735 |
+
"loss": 1.4435,
|
75736 |
+
"step": 21538
|
75737 |
+
},
|
75738 |
+
{
|
75739 |
+
"epoch": 0.9577164199012939,
|
75740 |
+
"grad_norm": 0.05787520855665207,
|
75741 |
+
"learning_rate": 4.81410974479457e-06,
|
75742 |
+
"loss": 1.4419,
|
75743 |
+
"step": 21540
|
75744 |
+
},
|
75745 |
+
{
|
75746 |
+
"epoch": 0.9578053443599662,
|
75747 |
+
"grad_norm": 0.05773087963461876,
|
75748 |
+
"learning_rate": 4.793893564691843e-06,
|
75749 |
+
"loss": 1.4458,
|
75750 |
+
"step": 21542
|
75751 |
+
},
|
75752 |
+
{
|
75753 |
+
"epoch": 0.9578942688186386,
|
75754 |
+
"grad_norm": 0.057870473712682724,
|
75755 |
+
"learning_rate": 4.773719716989477e-06,
|
75756 |
+
"loss": 1.4454,
|
75757 |
+
"step": 21544
|
75758 |
+
},
|
75759 |
+
{
|
75760 |
+
"epoch": 0.957983193277311,
|
75761 |
+
"grad_norm": 0.05800451338291168,
|
75762 |
+
"learning_rate": 4.753588203411929e-06,
|
75763 |
+
"loss": 1.4423,
|
75764 |
+
"step": 21546
|
75765 |
+
},
|
75766 |
+
{
|
75767 |
+
"epoch": 0.9580721177359833,
|
75768 |
+
"grad_norm": 0.05686897784471512,
|
75769 |
+
"learning_rate": 4.7334990256800995e-06,
|
75770 |
+
"loss": 1.4462,
|
75771 |
+
"step": 21548
|
75772 |
+
},
|
75773 |
+
{
|
75774 |
+
"epoch": 0.9581610421946556,
|
75775 |
+
"grad_norm": 0.056949593126773834,
|
75776 |
+
"learning_rate": 4.7134521855113355e-06,
|
75777 |
+
"loss": 1.4418,
|
75778 |
+
"step": 21550
|
75779 |
+
},
|
75780 |
+
{
|
75781 |
+
"epoch": 0.958249966653328,
|
75782 |
+
"grad_norm": 0.05743638426065445,
|
75783 |
+
"learning_rate": 4.693447684619379e-06,
|
75784 |
+
"loss": 1.4471,
|
75785 |
+
"step": 21552
|
75786 |
+
},
|
75787 |
+
{
|
75788 |
+
"epoch": 0.9583388911120003,
|
75789 |
+
"grad_norm": 0.05902915075421333,
|
75790 |
+
"learning_rate": 4.673485524714305e-06,
|
75791 |
+
"loss": 1.4437,
|
75792 |
+
"step": 21554
|
75793 |
+
},
|
75794 |
+
{
|
75795 |
+
"epoch": 0.9584278155706727,
|
75796 |
+
"grad_norm": 0.058458685874938965,
|
75797 |
+
"learning_rate": 4.653565707502416e-06,
|
75798 |
+
"loss": 1.4422,
|
75799 |
+
"step": 21556
|
75800 |
+
},
|
75801 |
+
{
|
75802 |
+
"epoch": 0.9585167400293451,
|
75803 |
+
"grad_norm": 0.05795848369598389,
|
75804 |
+
"learning_rate": 4.633688234686739e-06,
|
75805 |
+
"loss": 1.4426,
|
75806 |
+
"step": 21558
|
75807 |
+
},
|
75808 |
+
{
|
75809 |
+
"epoch": 0.9586056644880174,
|
75810 |
+
"grad_norm": 0.058213118463754654,
|
75811 |
+
"learning_rate": 4.613853107966359e-06,
|
75812 |
+
"loss": 1.4477,
|
75813 |
+
"step": 21560
|
75814 |
+
},
|
75815 |
+
{
|
75816 |
+
"epoch": 0.9586945889466898,
|
75817 |
+
"grad_norm": 0.05738683417439461,
|
75818 |
+
"learning_rate": 4.59406032903692e-06,
|
75819 |
+
"loss": 1.4455,
|
75820 |
+
"step": 21562
|
75821 |
+
},
|
75822 |
+
{
|
75823 |
+
"epoch": 0.9587835134053622,
|
75824 |
+
"grad_norm": 0.057969436049461365,
|
75825 |
+
"learning_rate": 4.5743098995903455e-06,
|
75826 |
+
"loss": 1.4482,
|
75827 |
+
"step": 21564
|
75828 |
+
},
|
75829 |
+
{
|
75830 |
+
"epoch": 0.9588724378640345,
|
75831 |
+
"grad_norm": 0.05753438174724579,
|
75832 |
+
"learning_rate": 4.554601821315063e-06,
|
75833 |
+
"loss": 1.4418,
|
75834 |
+
"step": 21566
|
75835 |
+
},
|
75836 |
+
{
|
75837 |
+
"epoch": 0.9589613623227069,
|
75838 |
+
"grad_norm": 0.056868597865104675,
|
75839 |
+
"learning_rate": 4.534936095895781e-06,
|
75840 |
+
"loss": 1.449,
|
75841 |
+
"step": 21568
|
75842 |
+
},
|
75843 |
+
{
|
75844 |
+
"epoch": 0.9590502867813793,
|
75845 |
+
"grad_norm": 0.057507824152708054,
|
75846 |
+
"learning_rate": 4.515312725013598e-06,
|
75847 |
+
"loss": 1.4416,
|
75848 |
+
"step": 21570
|
75849 |
+
},
|
75850 |
+
{
|
75851 |
+
"epoch": 0.9591392112400515,
|
75852 |
+
"grad_norm": 0.05689457431435585,
|
75853 |
+
"learning_rate": 4.495731710346007e-06,
|
75854 |
+
"loss": 1.4416,
|
75855 |
+
"step": 21572
|
75856 |
+
},
|
75857 |
+
{
|
75858 |
+
"epoch": 0.9592281356987239,
|
75859 |
+
"grad_norm": 0.0581645630300045,
|
75860 |
+
"learning_rate": 4.47619305356689e-06,
|
75861 |
+
"loss": 1.446,
|
75862 |
+
"step": 21574
|
75863 |
+
},
|
75864 |
+
{
|
75865 |
+
"epoch": 0.9593170601573963,
|
75866 |
+
"grad_norm": 0.058234803378582,
|
75867 |
+
"learning_rate": 4.4566967563465234e-06,
|
75868 |
+
"loss": 1.4436,
|
75869 |
+
"step": 21576
|
75870 |
+
},
|
75871 |
+
{
|
75872 |
+
"epoch": 0.9594059846160686,
|
75873 |
+
"grad_norm": 0.05846071243286133,
|
75874 |
+
"learning_rate": 4.4372428203514615e-06,
|
75875 |
+
"loss": 1.4399,
|
75876 |
+
"step": 21578
|
75877 |
+
},
|
75878 |
+
{
|
75879 |
+
"epoch": 0.959494909074741,
|
75880 |
+
"grad_norm": 0.057688742876052856,
|
75881 |
+
"learning_rate": 4.417831247244819e-06,
|
75882 |
+
"loss": 1.4489,
|
75883 |
+
"step": 21580
|
75884 |
+
},
|
75885 |
+
{
|
75886 |
+
"epoch": 0.9595838335334134,
|
75887 |
+
"grad_norm": 0.058130089193582535,
|
75888 |
+
"learning_rate": 4.398462038685824e-06,
|
75889 |
+
"loss": 1.4435,
|
75890 |
+
"step": 21582
|
75891 |
+
},
|
75892 |
+
{
|
75893 |
+
"epoch": 0.9596727579920857,
|
75894 |
+
"grad_norm": 0.05822238698601723,
|
75895 |
+
"learning_rate": 4.3791351963304304e-06,
|
75896 |
+
"loss": 1.4503,
|
75897 |
+
"step": 21584
|
75898 |
+
},
|
75899 |
+
{
|
75900 |
+
"epoch": 0.9597616824507581,
|
75901 |
+
"grad_norm": 0.05791193246841431,
|
75902 |
+
"learning_rate": 4.3598507218306495e-06,
|
75903 |
+
"loss": 1.4482,
|
75904 |
+
"step": 21586
|
75905 |
+
},
|
75906 |
+
{
|
75907 |
+
"epoch": 0.9598506069094305,
|
75908 |
+
"grad_norm": 0.05749112367630005,
|
75909 |
+
"learning_rate": 4.340608616835051e-06,
|
75910 |
+
"loss": 1.4458,
|
75911 |
+
"step": 21588
|
75912 |
+
},
|
75913 |
+
{
|
75914 |
+
"epoch": 0.9599395313681028,
|
75915 |
+
"grad_norm": 0.0575961209833622,
|
75916 |
+
"learning_rate": 4.321408882988542e-06,
|
75917 |
+
"loss": 1.4476,
|
75918 |
+
"step": 21590
|
75919 |
+
},
|
75920 |
+
{
|
75921 |
+
"epoch": 0.9600284558267752,
|
75922 |
+
"grad_norm": 0.05781961977481842,
|
75923 |
+
"learning_rate": 4.302251521932366e-06,
|
75924 |
+
"loss": 1.4448,
|
75925 |
+
"step": 21592
|
75926 |
+
},
|
75927 |
+
{
|
75928 |
+
"epoch": 0.9601173802854475,
|
75929 |
+
"grad_norm": 0.05863596498966217,
|
75930 |
+
"learning_rate": 4.283136535304155e-06,
|
75931 |
+
"loss": 1.4491,
|
75932 |
+
"step": 21594
|
75933 |
+
},
|
75934 |
+
{
|
75935 |
+
"epoch": 0.9602063047441198,
|
75936 |
+
"grad_norm": 0.057458702474832535,
|
75937 |
+
"learning_rate": 4.264063924738104e-06,
|
75938 |
+
"loss": 1.4479,
|
75939 |
+
"step": 21596
|
75940 |
+
},
|
75941 |
+
{
|
75942 |
+
"epoch": 0.9602952292027922,
|
75943 |
+
"grad_norm": 0.057083528488874435,
|
75944 |
+
"learning_rate": 4.2450336918644085e-06,
|
75945 |
+
"loss": 1.4482,
|
75946 |
+
"step": 21598
|
75947 |
+
},
|
75948 |
+
{
|
75949 |
+
"epoch": 0.9603841536614646,
|
75950 |
+
"grad_norm": 0.05720750615000725,
|
75951 |
+
"learning_rate": 4.226045838309989e-06,
|
75952 |
+
"loss": 1.4473,
|
75953 |
+
"step": 21600
|
75954 |
+
},
|
75955 |
+
{
|
75956 |
+
"epoch": 0.9604730781201369,
|
75957 |
+
"grad_norm": 0.05795248597860336,
|
75958 |
+
"learning_rate": 4.207100365697936e-06,
|
75959 |
+
"loss": 1.4519,
|
75960 |
+
"step": 21602
|
75961 |
+
},
|
75962 |
+
{
|
75963 |
+
"epoch": 0.9605620025788093,
|
75964 |
+
"grad_norm": 0.057493992149829865,
|
75965 |
+
"learning_rate": 4.188197275647898e-06,
|
75966 |
+
"loss": 1.4457,
|
75967 |
+
"step": 21604
|
75968 |
+
},
|
75969 |
+
{
|
75970 |
+
"epoch": 0.9606509270374817,
|
75971 |
+
"grad_norm": 0.0582110695540905,
|
75972 |
+
"learning_rate": 4.169336569775695e-06,
|
75973 |
+
"loss": 1.4448,
|
75974 |
+
"step": 21606
|
75975 |
+
},
|
75976 |
+
{
|
75977 |
+
"epoch": 0.960739851496154,
|
75978 |
+
"grad_norm": 0.05797712132334709,
|
75979 |
+
"learning_rate": 4.150518249693647e-06,
|
75980 |
+
"loss": 1.4529,
|
75981 |
+
"step": 21608
|
75982 |
+
},
|
75983 |
+
{
|
75984 |
+
"epoch": 0.9608287759548264,
|
75985 |
+
"grad_norm": 0.0583646297454834,
|
75986 |
+
"learning_rate": 4.1317423170104675e-06,
|
75987 |
+
"loss": 1.4422,
|
75988 |
+
"step": 21610
|
75989 |
+
},
|
75990 |
+
{
|
75991 |
+
"epoch": 0.9609177004134988,
|
75992 |
+
"grad_norm": 0.05701034516096115,
|
75993 |
+
"learning_rate": 4.113008773331151e-06,
|
75994 |
+
"loss": 1.4434,
|
75995 |
+
"step": 21612
|
75996 |
+
},
|
75997 |
+
{
|
75998 |
+
"epoch": 0.9610066248721711,
|
75999 |
+
"grad_norm": 0.05788358673453331,
|
76000 |
+
"learning_rate": 4.094317620257138e-06,
|
76001 |
+
"loss": 1.4412,
|
76002 |
+
"step": 21614
|
76003 |
+
},
|
76004 |
+
{
|
76005 |
+
"epoch": 0.9610955493308434,
|
76006 |
+
"grad_norm": 0.05787377804517746,
|
76007 |
+
"learning_rate": 4.075668859386261e-06,
|
76008 |
+
"loss": 1.4417,
|
76009 |
+
"step": 21616
|
76010 |
+
},
|
76011 |
+
{
|
76012 |
+
"epoch": 0.9611844737895158,
|
76013 |
+
"grad_norm": 0.05681522190570831,
|
76014 |
+
"learning_rate": 4.057062492312691e-06,
|
76015 |
+
"loss": 1.4483,
|
76016 |
+
"step": 21618
|
76017 |
+
},
|
76018 |
+
{
|
76019 |
+
"epoch": 0.9612733982481881,
|
76020 |
+
"grad_norm": 0.05852191522717476,
|
76021 |
+
"learning_rate": 4.038498520626932e-06,
|
76022 |
+
"loss": 1.4491,
|
76023 |
+
"step": 21620
|
76024 |
+
},
|
76025 |
+
{
|
76026 |
+
"epoch": 0.9613623227068605,
|
76027 |
+
"grad_norm": 0.05827983468770981,
|
76028 |
+
"learning_rate": 4.0199769459159376e-06,
|
76029 |
+
"loss": 1.4491,
|
76030 |
+
"step": 21622
|
76031 |
+
},
|
76032 |
+
{
|
76033 |
+
"epoch": 0.9614512471655329,
|
76034 |
+
"grad_norm": 0.05857324227690697,
|
76035 |
+
"learning_rate": 4.001497769763107e-06,
|
76036 |
+
"loss": 1.4505,
|
76037 |
+
"step": 21624
|
76038 |
+
},
|
76039 |
+
{
|
76040 |
+
"epoch": 0.9615401716242052,
|
76041 |
+
"grad_norm": 0.058631282299757004,
|
76042 |
+
"learning_rate": 3.983060993747956e-06,
|
76043 |
+
"loss": 1.4475,
|
76044 |
+
"step": 21626
|
76045 |
+
},
|
76046 |
+
{
|
76047 |
+
"epoch": 0.9616290960828776,
|
76048 |
+
"grad_norm": 0.056906893849372864,
|
76049 |
+
"learning_rate": 3.964666619446666e-06,
|
76050 |
+
"loss": 1.4458,
|
76051 |
+
"step": 21628
|
76052 |
+
},
|
76053 |
+
{
|
76054 |
+
"epoch": 0.96171802054155,
|
76055 |
+
"grad_norm": 0.057671476155519485,
|
76056 |
+
"learning_rate": 3.946314648431648e-06,
|
76057 |
+
"loss": 1.4407,
|
76058 |
+
"step": 21630
|
76059 |
+
},
|
76060 |
+
{
|
76061 |
+
"epoch": 0.9618069450002223,
|
76062 |
+
"grad_norm": 0.057756319642066956,
|
76063 |
+
"learning_rate": 3.928005082271646e-06,
|
76064 |
+
"loss": 1.4427,
|
76065 |
+
"step": 21632
|
76066 |
+
},
|
76067 |
+
{
|
76068 |
+
"epoch": 0.9618958694588947,
|
76069 |
+
"grad_norm": 0.05779024213552475,
|
76070 |
+
"learning_rate": 3.909737922531909e-06,
|
76071 |
+
"loss": 1.4454,
|
76072 |
+
"step": 21634
|
76073 |
+
},
|
76074 |
+
{
|
76075 |
+
"epoch": 0.961984793917567,
|
76076 |
+
"grad_norm": 0.05701223015785217,
|
76077 |
+
"learning_rate": 3.891513170773964e-06,
|
76078 |
+
"loss": 1.4496,
|
76079 |
+
"step": 21636
|
76080 |
+
},
|
76081 |
+
{
|
76082 |
+
"epoch": 0.9620737183762393,
|
76083 |
+
"grad_norm": 0.05769617483019829,
|
76084 |
+
"learning_rate": 3.873330828555788e-06,
|
76085 |
+
"loss": 1.443,
|
76086 |
+
"step": 21638
|
76087 |
+
},
|
76088 |
+
{
|
76089 |
+
"epoch": 0.9621626428349117,
|
76090 |
+
"grad_norm": 0.05806345120072365,
|
76091 |
+
"learning_rate": 3.855190897431637e-06,
|
76092 |
+
"loss": 1.4428,
|
76093 |
+
"step": 21640
|
76094 |
+
},
|
76095 |
+
{
|
76096 |
+
"epoch": 0.9622515672935841,
|
76097 |
+
"grad_norm": 0.0573996864259243,
|
76098 |
+
"learning_rate": 3.837093378952217e-06,
|
76099 |
+
"loss": 1.4492,
|
76100 |
+
"step": 21642
|
76101 |
+
},
|
76102 |
+
{
|
76103 |
+
"epoch": 0.9623404917522564,
|
76104 |
+
"grad_norm": 0.05671732872724533,
|
76105 |
+
"learning_rate": 3.81903827466451e-06,
|
76106 |
+
"loss": 1.4445,
|
76107 |
+
"step": 21644
|
76108 |
+
},
|
76109 |
+
{
|
76110 |
+
"epoch": 0.9624294162109288,
|
76111 |
+
"grad_norm": 0.057457756251096725,
|
76112 |
+
"learning_rate": 3.801025586112061e-06,
|
76113 |
+
"loss": 1.4476,
|
76114 |
+
"step": 21646
|
76115 |
+
},
|
76116 |
+
{
|
76117 |
+
"epoch": 0.9625183406696012,
|
76118 |
+
"grad_norm": 0.05706573650240898,
|
76119 |
+
"learning_rate": 3.783055314834638e-06,
|
76120 |
+
"loss": 1.4462,
|
76121 |
+
"step": 21648
|
76122 |
+
},
|
76123 |
+
{
|
76124 |
+
"epoch": 0.9626072651282735,
|
76125 |
+
"grad_norm": 0.05761359632015228,
|
76126 |
+
"learning_rate": 3.765127462368345e-06,
|
76127 |
+
"loss": 1.4483,
|
76128 |
+
"step": 21650
|
76129 |
+
},
|
76130 |
+
{
|
76131 |
+
"epoch": 0.9626961895869459,
|
76132 |
+
"grad_norm": 0.057819437235593796,
|
76133 |
+
"learning_rate": 3.7472420302458455e-06,
|
76134 |
+
"loss": 1.4464,
|
76135 |
+
"step": 21652
|
76136 |
+
},
|
76137 |
+
{
|
76138 |
+
"epoch": 0.9627851140456183,
|
76139 |
+
"grad_norm": 0.05833171680569649,
|
76140 |
+
"learning_rate": 3.729399019996027e-06,
|
76141 |
+
"loss": 1.4449,
|
76142 |
+
"step": 21654
|
76143 |
+
},
|
76144 |
+
{
|
76145 |
+
"epoch": 0.9628740385042907,
|
76146 |
+
"grad_norm": 0.057252366095781326,
|
76147 |
+
"learning_rate": 3.71159843314417e-06,
|
76148 |
+
"loss": 1.4462,
|
76149 |
+
"step": 21656
|
76150 |
+
},
|
76151 |
+
{
|
76152 |
+
"epoch": 0.9629629629629629,
|
76153 |
+
"grad_norm": 0.058833926916122437,
|
76154 |
+
"learning_rate": 3.69384027121189e-06,
|
76155 |
+
"loss": 1.4441,
|
76156 |
+
"step": 21658
|
76157 |
+
},
|
76158 |
+
{
|
76159 |
+
"epoch": 0.9630518874216353,
|
76160 |
+
"grad_norm": 0.05726613849401474,
|
76161 |
+
"learning_rate": 3.676124535717307e-06,
|
76162 |
+
"loss": 1.4473,
|
76163 |
+
"step": 21660
|
76164 |
+
},
|
76165 |
+
{
|
76166 |
+
"epoch": 0.9631408118803076,
|
76167 |
+
"grad_norm": 0.05816185846924782,
|
76168 |
+
"learning_rate": 3.658451228174875e-06,
|
76169 |
+
"loss": 1.4407,
|
76170 |
+
"step": 21662
|
76171 |
+
},
|
76172 |
+
{
|
76173 |
+
"epoch": 0.96322973633898,
|
76174 |
+
"grad_norm": 0.05848447605967522,
|
76175 |
+
"learning_rate": 3.640820350095331e-06,
|
76176 |
+
"loss": 1.4431,
|
76177 |
+
"step": 21664
|
76178 |
+
},
|
76179 |
+
{
|
76180 |
+
"epoch": 0.9633186607976524,
|
76181 |
+
"grad_norm": 0.05794060230255127,
|
76182 |
+
"learning_rate": 3.6232319029858017e-06,
|
76183 |
+
"loss": 1.4511,
|
76184 |
+
"step": 21666
|
76185 |
+
},
|
76186 |
+
{
|
76187 |
+
"epoch": 0.9634075852563247,
|
76188 |
+
"grad_norm": 0.05679847672581673,
|
76189 |
+
"learning_rate": 3.6056858883499187e-06,
|
76190 |
+
"loss": 1.4469,
|
76191 |
+
"step": 21668
|
76192 |
+
},
|
76193 |
+
{
|
76194 |
+
"epoch": 0.9634965097149971,
|
76195 |
+
"grad_norm": 0.05763591080904007,
|
76196 |
+
"learning_rate": 3.588182307687482e-06,
|
76197 |
+
"loss": 1.4494,
|
76198 |
+
"step": 21670
|
76199 |
+
},
|
76200 |
+
{
|
76201 |
+
"epoch": 0.9635854341736695,
|
76202 |
+
"grad_norm": 0.05757317319512367,
|
76203 |
+
"learning_rate": 3.5707211624949055e-06,
|
76204 |
+
"loss": 1.4486,
|
76205 |
+
"step": 21672
|
76206 |
+
},
|
76207 |
+
{
|
76208 |
+
"epoch": 0.9636743586323419,
|
76209 |
+
"grad_norm": 0.057650867849588394,
|
76210 |
+
"learning_rate": 3.5533024542647174e-06,
|
76211 |
+
"loss": 1.4514,
|
76212 |
+
"step": 21674
|
76213 |
+
},
|
76214 |
+
{
|
76215 |
+
"epoch": 0.9637632830910142,
|
76216 |
+
"grad_norm": 0.057070985436439514,
|
76217 |
+
"learning_rate": 3.53592618448606e-06,
|
76218 |
+
"loss": 1.4482,
|
76219 |
+
"step": 21676
|
76220 |
+
},
|
76221 |
+
{
|
76222 |
+
"epoch": 0.9638522075496866,
|
76223 |
+
"grad_norm": 0.05852733552455902,
|
76224 |
+
"learning_rate": 3.5185923546442457e-06,
|
76225 |
+
"loss": 1.448,
|
76226 |
+
"step": 21678
|
76227 |
+
},
|
76228 |
+
{
|
76229 |
+
"epoch": 0.9639411320083588,
|
76230 |
+
"grad_norm": 0.058048900216817856,
|
76231 |
+
"learning_rate": 3.501300966221088e-06,
|
76232 |
+
"loss": 1.4493,
|
76233 |
+
"step": 21680
|
76234 |
+
},
|
76235 |
+
{
|
76236 |
+
"epoch": 0.9640300564670312,
|
76237 |
+
"grad_norm": 0.05750441551208496,
|
76238 |
+
"learning_rate": 3.4840520206947392e-06,
|
76239 |
+
"loss": 1.4466,
|
76240 |
+
"step": 21682
|
76241 |
+
},
|
76242 |
+
{
|
76243 |
+
"epoch": 0.9641189809257036,
|
76244 |
+
"grad_norm": 0.05819224938750267,
|
76245 |
+
"learning_rate": 3.4668455195396854e-06,
|
76246 |
+
"loss": 1.4456,
|
76247 |
+
"step": 21684
|
76248 |
+
},
|
76249 |
+
{
|
76250 |
+
"epoch": 0.964207905384376,
|
76251 |
+
"grad_norm": 0.057015545666217804,
|
76252 |
+
"learning_rate": 3.449681464226806e-06,
|
76253 |
+
"loss": 1.4428,
|
76254 |
+
"step": 21686
|
76255 |
+
},
|
76256 |
+
{
|
76257 |
+
"epoch": 0.9642968298430483,
|
76258 |
+
"grad_norm": 0.056825630366802216,
|
76259 |
+
"learning_rate": 3.4325598562234274e-06,
|
76260 |
+
"loss": 1.447,
|
76261 |
+
"step": 21688
|
76262 |
+
},
|
76263 |
+
{
|
76264 |
+
"epoch": 0.9643857543017207,
|
76265 |
+
"grad_norm": 0.05699056014418602,
|
76266 |
+
"learning_rate": 3.415480696993101e-06,
|
76267 |
+
"loss": 1.4432,
|
76268 |
+
"step": 21690
|
76269 |
+
},
|
76270 |
+
{
|
76271 |
+
"epoch": 0.9644746787603931,
|
76272 |
+
"grad_norm": 0.057919371873140335,
|
76273 |
+
"learning_rate": 3.3984439879958807e-06,
|
76274 |
+
"loss": 1.4484,
|
76275 |
+
"step": 21692
|
76276 |
+
},
|
76277 |
+
{
|
76278 |
+
"epoch": 0.9645636032190654,
|
76279 |
+
"grad_norm": 0.05731486156582832,
|
76280 |
+
"learning_rate": 3.381449730688102e-06,
|
76281 |
+
"loss": 1.4495,
|
76282 |
+
"step": 21694
|
76283 |
+
},
|
76284 |
+
{
|
76285 |
+
"epoch": 0.9646525276777378,
|
76286 |
+
"grad_norm": 0.05739450827240944,
|
76287 |
+
"learning_rate": 3.3644979265225474e-06,
|
76288 |
+
"loss": 1.449,
|
76289 |
+
"step": 21696
|
76290 |
+
},
|
76291 |
+
{
|
76292 |
+
"epoch": 0.9647414521364102,
|
76293 |
+
"grad_norm": 0.058767009526491165,
|
76294 |
+
"learning_rate": 3.3475885769482796e-06,
|
76295 |
+
"loss": 1.4444,
|
76296 |
+
"step": 21698
|
76297 |
+
},
|
76298 |
+
{
|
76299 |
+
"epoch": 0.9648303765950825,
|
76300 |
+
"grad_norm": 0.05814867466688156,
|
76301 |
+
"learning_rate": 3.330721683410809e-06,
|
76302 |
+
"loss": 1.4522,
|
76303 |
+
"step": 21700
|
76304 |
+
},
|
76305 |
+
{
|
76306 |
+
"epoch": 0.9649193010537548,
|
76307 |
+
"grad_norm": 0.05803316831588745,
|
76308 |
+
"learning_rate": 3.313897247352038e-06,
|
76309 |
+
"loss": 1.4435,
|
76310 |
+
"step": 21702
|
76311 |
+
},
|
76312 |
+
{
|
76313 |
+
"epoch": 0.9650082255124272,
|
76314 |
+
"grad_norm": 0.05720685422420502,
|
76315 |
+
"learning_rate": 3.297115270210149e-06,
|
76316 |
+
"loss": 1.4459,
|
76317 |
+
"step": 21704
|
76318 |
+
},
|
76319 |
+
{
|
76320 |
+
"epoch": 0.9650971499710995,
|
76321 |
+
"grad_norm": 0.05773816257715225,
|
76322 |
+
"learning_rate": 3.2803757534197175e-06,
|
76323 |
+
"loss": 1.4459,
|
76324 |
+
"step": 21706
|
76325 |
+
},
|
76326 |
+
{
|
76327 |
+
"epoch": 0.9651860744297719,
|
76328 |
+
"grad_norm": 0.05913296714425087,
|
76329 |
+
"learning_rate": 3.263678698411765e-06,
|
76330 |
+
"loss": 1.4439,
|
76331 |
+
"step": 21708
|
76332 |
+
},
|
76333 |
+
{
|
76334 |
+
"epoch": 0.9652749988884443,
|
76335 |
+
"grad_norm": 0.057658907026052475,
|
76336 |
+
"learning_rate": 3.2470241066136495e-06,
|
76337 |
+
"loss": 1.4482,
|
76338 |
+
"step": 21710
|
76339 |
+
},
|
76340 |
+
{
|
76341 |
+
"epoch": 0.9653639233471166,
|
76342 |
+
"grad_norm": 0.05754270777106285,
|
76343 |
+
"learning_rate": 3.230411979448955e-06,
|
76344 |
+
"loss": 1.4479,
|
76345 |
+
"step": 21712
|
76346 |
+
},
|
76347 |
+
{
|
76348 |
+
"epoch": 0.965452847805789,
|
76349 |
+
"grad_norm": 0.05791141465306282,
|
76350 |
+
"learning_rate": 3.2138423183378785e-06,
|
76351 |
+
"loss": 1.4476,
|
76352 |
+
"step": 21714
|
76353 |
+
},
|
76354 |
+
{
|
76355 |
+
"epoch": 0.9655417722644614,
|
76356 |
+
"grad_norm": 0.057228460907936096,
|
76357 |
+
"learning_rate": 3.1973151246967868e-06,
|
76358 |
+
"loss": 1.4455,
|
76359 |
+
"step": 21716
|
76360 |
+
},
|
76361 |
+
{
|
76362 |
+
"epoch": 0.9656306967231337,
|
76363 |
+
"grad_norm": 0.05856222286820412,
|
76364 |
+
"learning_rate": 3.18083039993855e-06,
|
76365 |
+
"loss": 1.4421,
|
76366 |
+
"step": 21718
|
76367 |
+
},
|
76368 |
+
{
|
76369 |
+
"epoch": 0.9657196211818061,
|
76370 |
+
"grad_norm": 0.05858870595693588,
|
76371 |
+
"learning_rate": 3.1643881454723744e-06,
|
76372 |
+
"loss": 1.4433,
|
76373 |
+
"step": 21720
|
76374 |
+
},
|
76375 |
+
{
|
76376 |
+
"epoch": 0.9658085456404785,
|
76377 |
+
"grad_norm": 0.057810429483652115,
|
76378 |
+
"learning_rate": 3.1479883627037464e-06,
|
76379 |
+
"loss": 1.4414,
|
76380 |
+
"step": 21722
|
76381 |
+
},
|
76382 |
+
{
|
76383 |
+
"epoch": 0.9658974700991507,
|
76384 |
+
"grad_norm": 0.05786995589733124,
|
76385 |
+
"learning_rate": 3.131631053034656e-06,
|
76386 |
+
"loss": 1.4479,
|
76387 |
+
"step": 21724
|
76388 |
+
},
|
76389 |
+
{
|
76390 |
+
"epoch": 0.9659863945578231,
|
76391 |
+
"grad_norm": 0.05765226110816002,
|
76392 |
+
"learning_rate": 3.115316217863373e-06,
|
76393 |
+
"loss": 1.444,
|
76394 |
+
"step": 21726
|
76395 |
+
},
|
76396 |
+
{
|
76397 |
+
"epoch": 0.9660753190164955,
|
76398 |
+
"grad_norm": 0.05825477093458176,
|
76399 |
+
"learning_rate": 3.0990438585845048e-06,
|
76400 |
+
"loss": 1.4505,
|
76401 |
+
"step": 21728
|
76402 |
+
},
|
76403 |
+
{
|
76404 |
+
"epoch": 0.9661642434751678,
|
76405 |
+
"grad_norm": 0.057248134166002274,
|
76406 |
+
"learning_rate": 3.082813976589216e-06,
|
76407 |
+
"loss": 1.4484,
|
76408 |
+
"step": 21730
|
76409 |
+
},
|
76410 |
+
{
|
76411 |
+
"epoch": 0.9662531679338402,
|
76412 |
+
"grad_norm": 0.057661522179841995,
|
76413 |
+
"learning_rate": 3.0666265732647856e-06,
|
76414 |
+
"loss": 1.4497,
|
76415 |
+
"step": 21732
|
76416 |
+
},
|
76417 |
+
{
|
76418 |
+
"epoch": 0.9663420923925126,
|
76419 |
+
"grad_norm": 0.05723179504275322,
|
76420 |
+
"learning_rate": 3.0504816499950516e-06,
|
76421 |
+
"loss": 1.4442,
|
76422 |
+
"step": 21734
|
76423 |
+
},
|
76424 |
+
{
|
76425 |
+
"epoch": 0.9664310168511849,
|
76426 |
+
"grad_norm": 0.05735337361693382,
|
76427 |
+
"learning_rate": 3.034379208160076e-06,
|
76428 |
+
"loss": 1.4482,
|
76429 |
+
"step": 21736
|
76430 |
+
},
|
76431 |
+
{
|
76432 |
+
"epoch": 0.9665199413098573,
|
76433 |
+
"grad_norm": 0.05809729918837547,
|
76434 |
+
"learning_rate": 3.018319249136481e-06,
|
76435 |
+
"loss": 1.4436,
|
76436 |
+
"step": 21738
|
76437 |
+
},
|
76438 |
+
{
|
76439 |
+
"epoch": 0.9666088657685297,
|
76440 |
+
"grad_norm": 0.05790387839078903,
|
76441 |
+
"learning_rate": 3.002301774297056e-06,
|
76442 |
+
"loss": 1.4404,
|
76443 |
+
"step": 21740
|
76444 |
+
},
|
76445 |
+
{
|
76446 |
+
"epoch": 0.966697790227202,
|
76447 |
+
"grad_norm": 0.05688699334859848,
|
76448 |
+
"learning_rate": 2.9863267850110955e-06,
|
76449 |
+
"loss": 1.449,
|
76450 |
+
"step": 21742
|
76451 |
+
},
|
76452 |
+
{
|
76453 |
+
"epoch": 0.9667867146858744,
|
76454 |
+
"grad_norm": 0.056998882442712784,
|
76455 |
+
"learning_rate": 2.9703942826441734e-06,
|
76456 |
+
"loss": 1.4472,
|
76457 |
+
"step": 21744
|
76458 |
+
},
|
76459 |
+
{
|
76460 |
+
"epoch": 0.9668756391445467,
|
76461 |
+
"grad_norm": 0.058484215289354324,
|
76462 |
+
"learning_rate": 2.9545042685583112e-06,
|
76463 |
+
"loss": 1.4486,
|
76464 |
+
"step": 21746
|
76465 |
+
},
|
76466 |
+
{
|
76467 |
+
"epoch": 0.966964563603219,
|
76468 |
+
"grad_norm": 0.05785754323005676,
|
76469 |
+
"learning_rate": 2.938656744111812e-06,
|
76470 |
+
"loss": 1.4462,
|
76471 |
+
"step": 21748
|
76472 |
+
},
|
76473 |
+
{
|
76474 |
+
"epoch": 0.9670534880618914,
|
76475 |
+
"grad_norm": 0.05740215256810188,
|
76476 |
+
"learning_rate": 2.9228517106594244e-06,
|
76477 |
+
"loss": 1.4409,
|
76478 |
+
"step": 21750
|
76479 |
+
},
|
76480 |
+
{
|
76481 |
+
"epoch": 0.9671424125205638,
|
76482 |
+
"grad_norm": 0.057988543063402176,
|
76483 |
+
"learning_rate": 2.9070891695521796e-06,
|
76484 |
+
"loss": 1.4468,
|
76485 |
+
"step": 21752
|
76486 |
+
},
|
76487 |
+
{
|
76488 |
+
"epoch": 0.9672313369792361,
|
76489 |
+
"grad_norm": 0.05775817483663559,
|
76490 |
+
"learning_rate": 2.8913691221376102e-06,
|
76491 |
+
"loss": 1.4534,
|
76492 |
+
"step": 21754
|
76493 |
+
},
|
76494 |
+
{
|
76495 |
+
"epoch": 0.9673202614379085,
|
76496 |
+
"grad_norm": 0.05813591554760933,
|
76497 |
+
"learning_rate": 2.8756915697594756e-06,
|
76498 |
+
"loss": 1.4463,
|
76499 |
+
"step": 21756
|
76500 |
+
},
|
76501 |
+
{
|
76502 |
+
"epoch": 0.9674091858965809,
|
76503 |
+
"grad_norm": 0.05779729038476944,
|
76504 |
+
"learning_rate": 2.86005651375798e-06,
|
76505 |
+
"loss": 1.4475,
|
76506 |
+
"step": 21758
|
76507 |
+
},
|
76508 |
+
{
|
76509 |
+
"epoch": 0.9674981103552532,
|
76510 |
+
"grad_norm": 0.05689067393541336,
|
76511 |
+
"learning_rate": 2.844463955469723e-06,
|
76512 |
+
"loss": 1.4441,
|
76513 |
+
"step": 21760
|
76514 |
+
},
|
76515 |
+
{
|
76516 |
+
"epoch": 0.9675870348139256,
|
76517 |
+
"grad_norm": 0.05785556882619858,
|
76518 |
+
"learning_rate": 2.828913896227525e-06,
|
76519 |
+
"loss": 1.446,
|
76520 |
+
"step": 21762
|
76521 |
+
},
|
76522 |
+
{
|
76523 |
+
"epoch": 0.967675959272598,
|
76524 |
+
"grad_norm": 0.0574394054710865,
|
76525 |
+
"learning_rate": 2.8134063373607687e-06,
|
76526 |
+
"loss": 1.4418,
|
76527 |
+
"step": 21764
|
76528 |
+
},
|
76529 |
+
{
|
76530 |
+
"epoch": 0.9677648837312702,
|
76531 |
+
"grad_norm": 0.05662890523672104,
|
76532 |
+
"learning_rate": 2.79794128019506e-06,
|
76533 |
+
"loss": 1.4432,
|
76534 |
+
"step": 21766
|
76535 |
+
},
|
76536 |
+
{
|
76537 |
+
"epoch": 0.9678538081899426,
|
76538 |
+
"grad_norm": 0.0582660436630249,
|
76539 |
+
"learning_rate": 2.7825187260523966e-06,
|
76540 |
+
"loss": 1.4476,
|
76541 |
+
"step": 21768
|
76542 |
+
},
|
76543 |
+
{
|
76544 |
+
"epoch": 0.967942732648615,
|
76545 |
+
"grad_norm": 0.05858684331178665,
|
76546 |
+
"learning_rate": 2.767138676251224e-06,
|
76547 |
+
"loss": 1.4451,
|
76548 |
+
"step": 21770
|
76549 |
+
},
|
76550 |
+
{
|
76551 |
+
"epoch": 0.9680316571072873,
|
76552 |
+
"grad_norm": 0.056393761187791824,
|
76553 |
+
"learning_rate": 2.7518011321062687e-06,
|
76554 |
+
"loss": 1.444,
|
76555 |
+
"step": 21772
|
76556 |
+
},
|
76557 |
+
{
|
76558 |
+
"epoch": 0.9681205815659597,
|
76559 |
+
"grad_norm": 0.058356158435344696,
|
76560 |
+
"learning_rate": 2.736506094928648e-06,
|
76561 |
+
"loss": 1.4424,
|
76562 |
+
"step": 21774
|
76563 |
+
},
|
76564 |
+
{
|
76565 |
+
"epoch": 0.9682095060246321,
|
76566 |
+
"grad_norm": 0.05726497247815132,
|
76567 |
+
"learning_rate": 2.7212535660258718e-06,
|
76568 |
+
"loss": 1.4445,
|
76569 |
+
"step": 21776
|
76570 |
+
},
|
76571 |
+
{
|
76572 |
+
"epoch": 0.9682984304833044,
|
76573 |
+
"grad_norm": 0.057576339691877365,
|
76574 |
+
"learning_rate": 2.7060435467017865e-06,
|
76575 |
+
"loss": 1.4466,
|
76576 |
+
"step": 21778
|
76577 |
+
},
|
76578 |
+
{
|
76579 |
+
"epoch": 0.9683873549419768,
|
76580 |
+
"grad_norm": 0.05778598040342331,
|
76581 |
+
"learning_rate": 2.6908760382565735e-06,
|
76582 |
+
"loss": 1.448,
|
76583 |
+
"step": 21780
|
76584 |
+
},
|
76585 |
+
{
|
76586 |
+
"epoch": 0.9684762794006492,
|
76587 |
+
"grad_norm": 0.056812673807144165,
|
76588 |
+
"learning_rate": 2.6757510419868624e-06,
|
76589 |
+
"loss": 1.4487,
|
76590 |
+
"step": 21782
|
76591 |
+
},
|
76592 |
+
{
|
76593 |
+
"epoch": 0.9685652038593215,
|
76594 |
+
"grad_norm": 0.05817960575222969,
|
76595 |
+
"learning_rate": 2.660668559185564e-06,
|
76596 |
+
"loss": 1.4498,
|
76597 |
+
"step": 21784
|
76598 |
+
},
|
76599 |
+
{
|
76600 |
+
"epoch": 0.9686541283179939,
|
76601 |
+
"grad_norm": 0.05673222243785858,
|
76602 |
+
"learning_rate": 2.6456285911420353e-06,
|
76603 |
+
"loss": 1.4489,
|
76604 |
+
"step": 21786
|
76605 |
+
},
|
76606 |
+
{
|
76607 |
+
"epoch": 0.9687430527766662,
|
76608 |
+
"grad_norm": 0.058771610260009766,
|
76609 |
+
"learning_rate": 2.63063113914197e-06,
|
76610 |
+
"loss": 1.4485,
|
76611 |
+
"step": 21788
|
76612 |
+
},
|
76613 |
+
{
|
76614 |
+
"epoch": 0.9688319772353385,
|
76615 |
+
"grad_norm": 0.05711560696363449,
|
76616 |
+
"learning_rate": 2.615676204467343e-06,
|
76617 |
+
"loss": 1.4452,
|
76618 |
+
"step": 21790
|
76619 |
+
},
|
76620 |
+
{
|
76621 |
+
"epoch": 0.9689209016940109,
|
76622 |
+
"grad_norm": 0.057385995984077454,
|
76623 |
+
"learning_rate": 2.6007637883966872e-06,
|
76624 |
+
"loss": 1.4476,
|
76625 |
+
"step": 21792
|
76626 |
+
},
|
76627 |
+
{
|
76628 |
+
"epoch": 0.9690098261526833,
|
76629 |
+
"grad_norm": 0.05783890560269356,
|
76630 |
+
"learning_rate": 2.5858938922046495e-06,
|
76631 |
+
"loss": 1.4458,
|
76632 |
+
"step": 21794
|
76633 |
+
},
|
76634 |
+
{
|
76635 |
+
"epoch": 0.9690987506113556,
|
76636 |
+
"grad_norm": 0.05787234753370285,
|
76637 |
+
"learning_rate": 2.571066517162435e-06,
|
76638 |
+
"loss": 1.4456,
|
76639 |
+
"step": 21796
|
76640 |
+
},
|
76641 |
+
{
|
76642 |
+
"epoch": 0.969187675070028,
|
76643 |
+
"grad_norm": 0.05799291655421257,
|
76644 |
+
"learning_rate": 2.556281664537585e-06,
|
76645 |
+
"loss": 1.4469,
|
76646 |
+
"step": 21798
|
76647 |
+
},
|
76648 |
+
{
|
76649 |
+
"epoch": 0.9692765995287004,
|
76650 |
+
"grad_norm": 0.05841020122170448,
|
76651 |
+
"learning_rate": 2.541539335593923e-06,
|
76652 |
+
"loss": 1.4422,
|
76653 |
+
"step": 21800
|
76654 |
+
},
|
76655 |
+
{
|
76656 |
+
"epoch": 0.9693655239873727,
|
76657 |
+
"grad_norm": 0.057031866163015366,
|
76658 |
+
"learning_rate": 2.526839531591718e-06,
|
76659 |
+
"loss": 1.4509,
|
76660 |
+
"step": 21802
|
76661 |
+
},
|
76662 |
+
{
|
76663 |
+
"epoch": 0.9694544484460451,
|
76664 |
+
"grad_norm": 0.05724103003740311,
|
76665 |
+
"learning_rate": 2.5121822537875206e-06,
|
76666 |
+
"loss": 1.4438,
|
76667 |
+
"step": 21804
|
76668 |
+
},
|
76669 |
+
{
|
76670 |
+
"epoch": 0.9695433729047175,
|
76671 |
+
"grad_norm": 0.057726915925741196,
|
76672 |
+
"learning_rate": 2.4975675034343835e-06,
|
76673 |
+
"loss": 1.4444,
|
76674 |
+
"step": 21806
|
76675 |
+
},
|
76676 |
+
{
|
76677 |
+
"epoch": 0.9696322973633899,
|
76678 |
+
"grad_norm": 0.058166489005088806,
|
76679 |
+
"learning_rate": 2.482995281781586e-06,
|
76680 |
+
"loss": 1.4455,
|
76681 |
+
"step": 21808
|
76682 |
+
},
|
76683 |
+
{
|
76684 |
+
"epoch": 0.9697212218220621,
|
76685 |
+
"grad_norm": 0.058684688061475754,
|
76686 |
+
"learning_rate": 2.4684655900748532e-06,
|
76687 |
+
"loss": 1.4486,
|
76688 |
+
"step": 21810
|
76689 |
+
},
|
76690 |
+
{
|
76691 |
+
"epoch": 0.9698101462807345,
|
76692 |
+
"grad_norm": 0.05730533227324486,
|
76693 |
+
"learning_rate": 2.453978429556247e-06,
|
76694 |
+
"loss": 1.4456,
|
76695 |
+
"step": 21812
|
76696 |
+
},
|
76697 |
+
{
|
76698 |
+
"epoch": 0.9698990707394068,
|
76699 |
+
"grad_norm": 0.057590242475271225,
|
76700 |
+
"learning_rate": 2.439533801464111e-06,
|
76701 |
+
"loss": 1.4445,
|
76702 |
+
"step": 21814
|
76703 |
+
},
|
76704 |
+
{
|
76705 |
+
"epoch": 0.9699879951980792,
|
76706 |
+
"grad_norm": 0.057827651500701904,
|
76707 |
+
"learning_rate": 2.425131707033346e-06,
|
76708 |
+
"loss": 1.4461,
|
76709 |
+
"step": 21816
|
76710 |
+
},
|
76711 |
+
{
|
76712 |
+
"epoch": 0.9700769196567516,
|
76713 |
+
"grad_norm": 0.05834771320223808,
|
76714 |
+
"learning_rate": 2.4107721474950774e-06,
|
76715 |
+
"loss": 1.4415,
|
76716 |
+
"step": 21818
|
76717 |
+
},
|
76718 |
+
{
|
76719 |
+
"epoch": 0.970165844115424,
|
76720 |
+
"grad_norm": 0.057678621262311935,
|
76721 |
+
"learning_rate": 2.3964551240767684e-06,
|
76722 |
+
"loss": 1.4477,
|
76723 |
+
"step": 21820
|
76724 |
+
},
|
76725 |
+
{
|
76726 |
+
"epoch": 0.9702547685740963,
|
76727 |
+
"grad_norm": 0.05750925838947296,
|
76728 |
+
"learning_rate": 2.382180638002385e-06,
|
76729 |
+
"loss": 1.4441,
|
76730 |
+
"step": 21822
|
76731 |
+
},
|
76732 |
+
{
|
76733 |
+
"epoch": 0.9703436930327687,
|
76734 |
+
"grad_norm": 0.056883055716753006,
|
76735 |
+
"learning_rate": 2.3679486904921164e-06,
|
76736 |
+
"loss": 1.4452,
|
76737 |
+
"step": 21824
|
76738 |
+
},
|
76739 |
+
{
|
76740 |
+
"epoch": 0.970432617491441,
|
76741 |
+
"grad_norm": 0.057405877858400345,
|
76742 |
+
"learning_rate": 2.353759282762602e-06,
|
76743 |
+
"loss": 1.4487,
|
76744 |
+
"step": 21826
|
76745 |
+
},
|
76746 |
+
{
|
76747 |
+
"epoch": 0.9705215419501134,
|
76748 |
+
"grad_norm": 0.056650012731552124,
|
76749 |
+
"learning_rate": 2.339612416026815e-06,
|
76750 |
+
"loss": 1.4426,
|
76751 |
+
"step": 21828
|
76752 |
+
},
|
76753 |
+
{
|
76754 |
+
"epoch": 0.9706104664087858,
|
76755 |
+
"grad_norm": 0.05729759484529495,
|
76756 |
+
"learning_rate": 2.3255080914940106e-06,
|
76757 |
+
"loss": 1.4483,
|
76758 |
+
"step": 21830
|
76759 |
+
},
|
76760 |
+
{
|
76761 |
+
"epoch": 0.970699390867458,
|
76762 |
+
"grad_norm": 0.05801773816347122,
|
76763 |
+
"learning_rate": 2.3114463103700023e-06,
|
76764 |
+
"loss": 1.4429,
|
76765 |
+
"step": 21832
|
76766 |
+
},
|
76767 |
+
{
|
76768 |
+
"epoch": 0.9707883153261304,
|
76769 |
+
"grad_norm": 0.057637542486190796,
|
76770 |
+
"learning_rate": 2.2974270738567726e-06,
|
76771 |
+
"loss": 1.4442,
|
76772 |
+
"step": 21834
|
76773 |
+
},
|
76774 |
+
{
|
76775 |
+
"epoch": 0.9708772397848028,
|
76776 |
+
"grad_norm": 0.058352191001176834,
|
76777 |
+
"learning_rate": 2.2834503831528076e-06,
|
76778 |
+
"loss": 1.4448,
|
76779 |
+
"step": 21836
|
76780 |
+
},
|
76781 |
+
{
|
76782 |
+
"epoch": 0.9709661642434751,
|
76783 |
+
"grad_norm": 0.057397257536649704,
|
76784 |
+
"learning_rate": 2.2695162394528735e-06,
|
76785 |
+
"loss": 1.4414,
|
76786 |
+
"step": 21838
|
76787 |
+
},
|
76788 |
+
{
|
76789 |
+
"epoch": 0.9710550887021475,
|
76790 |
+
"grad_norm": 0.05693025141954422,
|
76791 |
+
"learning_rate": 2.2556246439481286e-06,
|
76792 |
+
"loss": 1.4481,
|
76793 |
+
"step": 21840
|
76794 |
+
},
|
76795 |
+
{
|
76796 |
+
"epoch": 0.9711440131608199,
|
76797 |
+
"grad_norm": 0.05720305070281029,
|
76798 |
+
"learning_rate": 2.2417755978260123e-06,
|
76799 |
+
"loss": 1.452,
|
76800 |
+
"step": 21842
|
76801 |
+
},
|
76802 |
+
{
|
76803 |
+
"epoch": 0.9712329376194923,
|
76804 |
+
"grad_norm": 0.057424396276474,
|
76805 |
+
"learning_rate": 2.2279691022705216e-06,
|
76806 |
+
"loss": 1.4438,
|
76807 |
+
"step": 21844
|
76808 |
+
},
|
76809 |
+
{
|
76810 |
+
"epoch": 0.9713218620781646,
|
76811 |
+
"grad_norm": 0.057667020708322525,
|
76812 |
+
"learning_rate": 2.2142051584618237e-06,
|
76813 |
+
"loss": 1.4478,
|
76814 |
+
"step": 21846
|
76815 |
+
},
|
76816 |
+
{
|
76817 |
+
"epoch": 0.971410786536837,
|
76818 |
+
"grad_norm": 0.058313172310590744,
|
76819 |
+
"learning_rate": 2.200483767576589e-06,
|
76820 |
+
"loss": 1.4476,
|
76821 |
+
"step": 21848
|
76822 |
+
},
|
76823 |
+
{
|
76824 |
+
"epoch": 0.9714997109955094,
|
76825 |
+
"grad_norm": 0.057696230709552765,
|
76826 |
+
"learning_rate": 2.1868049307877113e-06,
|
76827 |
+
"loss": 1.4433,
|
76828 |
+
"step": 21850
|
76829 |
+
},
|
76830 |
+
{
|
76831 |
+
"epoch": 0.9715886354541817,
|
76832 |
+
"grad_norm": 0.057298045605421066,
|
76833 |
+
"learning_rate": 2.1731686492644787e-06,
|
76834 |
+
"loss": 1.447,
|
76835 |
+
"step": 21852
|
76836 |
+
},
|
76837 |
+
{
|
76838 |
+
"epoch": 0.971677559912854,
|
76839 |
+
"grad_norm": 0.05838226154446602,
|
76840 |
+
"learning_rate": 2.1595749241726805e-06,
|
76841 |
+
"loss": 1.4426,
|
76842 |
+
"step": 21854
|
76843 |
+
},
|
76844 |
+
{
|
76845 |
+
"epoch": 0.9717664843715264,
|
76846 |
+
"grad_norm": 0.057391196489334106,
|
76847 |
+
"learning_rate": 2.146023756674331e-06,
|
76848 |
+
"loss": 1.4436,
|
76849 |
+
"step": 21856
|
76850 |
+
},
|
76851 |
+
{
|
76852 |
+
"epoch": 0.9718554088301987,
|
76853 |
+
"grad_norm": 0.05735941603779793,
|
76854 |
+
"learning_rate": 2.132515147927838e-06,
|
76855 |
+
"loss": 1.4449,
|
76856 |
+
"step": 21858
|
76857 |
+
},
|
76858 |
+
{
|
76859 |
+
"epoch": 0.9719443332888711,
|
76860 |
+
"grad_norm": 0.05849364399909973,
|
76861 |
+
"learning_rate": 2.1190490990879994e-06,
|
76862 |
+
"loss": 1.4509,
|
76863 |
+
"step": 21860
|
76864 |
+
},
|
76865 |
+
{
|
76866 |
+
"epoch": 0.9720332577475435,
|
76867 |
+
"grad_norm": 0.05746670067310333,
|
76868 |
+
"learning_rate": 2.10562561130595e-06,
|
76869 |
+
"loss": 1.4453,
|
76870 |
+
"step": 21862
|
76871 |
+
},
|
76872 |
+
{
|
76873 |
+
"epoch": 0.9721221822062158,
|
76874 |
+
"grad_norm": 0.05694587528705597,
|
76875 |
+
"learning_rate": 2.0922446857291055e-06,
|
76876 |
+
"loss": 1.4459,
|
76877 |
+
"step": 21864
|
76878 |
+
},
|
76879 |
+
{
|
76880 |
+
"epoch": 0.9722111066648882,
|
76881 |
+
"grad_norm": 0.05823846161365509,
|
76882 |
+
"learning_rate": 2.0789063235014396e-06,
|
76883 |
+
"loss": 1.4386,
|
76884 |
+
"step": 21866
|
76885 |
+
},
|
76886 |
+
{
|
76887 |
+
"epoch": 0.9723000311235606,
|
76888 |
+
"grad_norm": 0.05770362168550491,
|
76889 |
+
"learning_rate": 2.065610525763095e-06,
|
76890 |
+
"loss": 1.4468,
|
76891 |
+
"step": 21868
|
76892 |
+
},
|
76893 |
+
{
|
76894 |
+
"epoch": 0.9723889555822329,
|
76895 |
+
"grad_norm": 0.05771014466881752,
|
76896 |
+
"learning_rate": 2.052357293650775e-06,
|
76897 |
+
"loss": 1.4437,
|
76898 |
+
"step": 21870
|
76899 |
+
},
|
76900 |
+
{
|
76901 |
+
"epoch": 0.9724778800409053,
|
76902 |
+
"grad_norm": 0.058271683752536774,
|
76903 |
+
"learning_rate": 2.039146628297295e-06,
|
76904 |
+
"loss": 1.446,
|
76905 |
+
"step": 21872
|
76906 |
+
},
|
76907 |
+
{
|
76908 |
+
"epoch": 0.9725668044995777,
|
76909 |
+
"grad_norm": 0.057467687875032425,
|
76910 |
+
"learning_rate": 2.0259785308319176e-06,
|
76911 |
+
"loss": 1.4474,
|
76912 |
+
"step": 21874
|
76913 |
+
},
|
76914 |
+
{
|
76915 |
+
"epoch": 0.9726557289582499,
|
76916 |
+
"grad_norm": 0.058236099779605865,
|
76917 |
+
"learning_rate": 2.012853002380466e-06,
|
76918 |
+
"loss": 1.4466,
|
76919 |
+
"step": 21876
|
76920 |
+
},
|
76921 |
+
{
|
76922 |
+
"epoch": 0.9727446534169223,
|
76923 |
+
"grad_norm": 0.057283271104097366,
|
76924 |
+
"learning_rate": 1.9997700440649302e-06,
|
76925 |
+
"loss": 1.4443,
|
76926 |
+
"step": 21878
|
76927 |
+
},
|
76928 |
+
{
|
76929 |
+
"epoch": 0.9728335778755947,
|
76930 |
+
"grad_norm": 0.057595402002334595,
|
76931 |
+
"learning_rate": 1.9867296570036387e-06,
|
76932 |
+
"loss": 1.4457,
|
76933 |
+
"step": 21880
|
76934 |
+
},
|
76935 |
+
{
|
76936 |
+
"epoch": 0.972922502334267,
|
76937 |
+
"grad_norm": 0.05784250423312187,
|
76938 |
+
"learning_rate": 1.973731842311366e-06,
|
76939 |
+
"loss": 1.4421,
|
76940 |
+
"step": 21882
|
76941 |
+
},
|
76942 |
+
{
|
76943 |
+
"epoch": 0.9730114267929394,
|
76944 |
+
"grad_norm": 0.057085875421762466,
|
76945 |
+
"learning_rate": 1.9607766010991677e-06,
|
76946 |
+
"loss": 1.4472,
|
76947 |
+
"step": 21884
|
76948 |
+
},
|
76949 |
+
{
|
76950 |
+
"epoch": 0.9731003512516118,
|
76951 |
+
"grad_norm": 0.05725502967834473,
|
76952 |
+
"learning_rate": 1.9478639344746585e-06,
|
76953 |
+
"loss": 1.4512,
|
76954 |
+
"step": 21886
|
76955 |
+
},
|
76956 |
+
{
|
76957 |
+
"epoch": 0.9731892757102841,
|
76958 |
+
"grad_norm": 0.057032741606235504,
|
76959 |
+
"learning_rate": 1.934993843541566e-06,
|
76960 |
+
"loss": 1.4412,
|
76961 |
+
"step": 21888
|
76962 |
+
},
|
76963 |
+
{
|
76964 |
+
"epoch": 0.9732782001689565,
|
76965 |
+
"grad_norm": 0.056812822818756104,
|
76966 |
+
"learning_rate": 1.9221663294000657e-06,
|
76967 |
+
"loss": 1.4454,
|
76968 |
+
"step": 21890
|
76969 |
+
},
|
76970 |
+
{
|
76971 |
+
"epoch": 0.9733671246276289,
|
76972 |
+
"grad_norm": 0.057856421917676926,
|
76973 |
+
"learning_rate": 1.9093813931467807e-06,
|
76974 |
+
"loss": 1.4446,
|
76975 |
+
"step": 21892
|
76976 |
+
},
|
76977 |
+
{
|
76978 |
+
"epoch": 0.9734560490863012,
|
76979 |
+
"grad_norm": 0.05846314877271652,
|
76980 |
+
"learning_rate": 1.8966390358746145e-06,
|
76981 |
+
"loss": 1.4441,
|
76982 |
+
"step": 21894
|
76983 |
+
},
|
76984 |
+
{
|
76985 |
+
"epoch": 0.9735449735449735,
|
76986 |
+
"grad_norm": 0.05669744685292244,
|
76987 |
+
"learning_rate": 1.883939258672751e-06,
|
76988 |
+
"loss": 1.4504,
|
76989 |
+
"step": 21896
|
76990 |
+
},
|
76991 |
+
{
|
76992 |
+
"epoch": 0.9736338980036459,
|
76993 |
+
"grad_norm": 0.05733288824558258,
|
76994 |
+
"learning_rate": 1.8712820626268778e-06,
|
76995 |
+
"loss": 1.453,
|
76996 |
+
"step": 21898
|
76997 |
+
},
|
76998 |
+
{
|
76999 |
+
"epoch": 0.9737228224623182,
|
77000 |
+
"grad_norm": 0.05750066787004471,
|
77001 |
+
"learning_rate": 1.8586674488190736e-06,
|
77002 |
+
"loss": 1.4435,
|
77003 |
+
"step": 21900
|
77004 |
+
},
|
77005 |
+
{
|
77006 |
+
"epoch": 0.9738117469209906,
|
77007 |
+
"grad_norm": 0.05767687410116196,
|
77008 |
+
"learning_rate": 1.8460954183275314e-06,
|
77009 |
+
"loss": 1.4426,
|
77010 |
+
"step": 21902
|
77011 |
+
},
|
77012 |
+
{
|
77013 |
+
"epoch": 0.973900671379663,
|
77014 |
+
"grad_norm": 0.05756646394729614,
|
77015 |
+
"learning_rate": 1.8335659722271136e-06,
|
77016 |
+
"loss": 1.4368,
|
77017 |
+
"step": 21904
|
77018 |
+
},
|
77019 |
+
{
|
77020 |
+
"epoch": 0.9739895958383353,
|
77021 |
+
"grad_norm": 0.057719554752111435,
|
77022 |
+
"learning_rate": 1.8210791115887971e-06,
|
77023 |
+
"loss": 1.4422,
|
77024 |
+
"step": 21906
|
77025 |
+
},
|
77026 |
+
{
|
77027 |
+
"epoch": 0.9740785202970077,
|
77028 |
+
"grad_norm": 0.05758875980973244,
|
77029 |
+
"learning_rate": 1.8086348374800055e-06,
|
77030 |
+
"loss": 1.4432,
|
77031 |
+
"step": 21908
|
77032 |
+
},
|
77033 |
+
{
|
77034 |
+
"epoch": 0.9741674447556801,
|
77035 |
+
"grad_norm": 0.0580807626247406,
|
77036 |
+
"learning_rate": 1.7962331509646103e-06,
|
77037 |
+
"loss": 1.4417,
|
77038 |
+
"step": 21910
|
77039 |
+
},
|
77040 |
+
{
|
77041 |
+
"epoch": 0.9742563692143524,
|
77042 |
+
"grad_norm": 0.057357918471097946,
|
77043 |
+
"learning_rate": 1.7838740531027076e-06,
|
77044 |
+
"loss": 1.4442,
|
77045 |
+
"step": 21912
|
77046 |
+
},
|
77047 |
+
{
|
77048 |
+
"epoch": 0.9743452936730248,
|
77049 |
+
"grad_norm": 0.05980123206973076,
|
77050 |
+
"learning_rate": 1.7715575449508413e-06,
|
77051 |
+
"loss": 1.4484,
|
77052 |
+
"step": 21914
|
77053 |
+
},
|
77054 |
+
{
|
77055 |
+
"epoch": 0.9744342181316972,
|
77056 |
+
"grad_norm": 0.05732397362589836,
|
77057 |
+
"learning_rate": 1.7592836275618361e-06,
|
77058 |
+
"loss": 1.4435,
|
77059 |
+
"step": 21916
|
77060 |
+
},
|
77061 |
+
{
|
77062 |
+
"epoch": 0.9745231425903694,
|
77063 |
+
"grad_norm": 0.05790344625711441,
|
77064 |
+
"learning_rate": 1.7470523019849084e-06,
|
77065 |
+
"loss": 1.446,
|
77066 |
+
"step": 21918
|
77067 |
+
},
|
77068 |
+
{
|
77069 |
+
"epoch": 0.9746120670490418,
|
77070 |
+
"grad_norm": 0.0583200603723526,
|
77071 |
+
"learning_rate": 1.734863569265721e-06,
|
77072 |
+
"loss": 1.4472,
|
77073 |
+
"step": 21920
|
77074 |
+
},
|
77075 |
+
{
|
77076 |
+
"epoch": 0.9747009915077142,
|
77077 |
+
"grad_norm": 0.057706188410520554,
|
77078 |
+
"learning_rate": 1.7227174304461635e-06,
|
77079 |
+
"loss": 1.4404,
|
77080 |
+
"step": 21922
|
77081 |
+
},
|
77082 |
+
{
|
77083 |
+
"epoch": 0.9747899159663865,
|
77084 |
+
"grad_norm": 0.05923660844564438,
|
77085 |
+
"learning_rate": 1.7106138865645716e-06,
|
77086 |
+
"loss": 1.4471,
|
77087 |
+
"step": 21924
|
77088 |
+
},
|
77089 |
+
{
|
77090 |
+
"epoch": 0.9748788404250589,
|
77091 |
+
"grad_norm": 0.057506807148456573,
|
77092 |
+
"learning_rate": 1.698552938655562e-06,
|
77093 |
+
"loss": 1.4454,
|
77094 |
+
"step": 21926
|
77095 |
+
},
|
77096 |
+
{
|
77097 |
+
"epoch": 0.9749677648837313,
|
77098 |
+
"grad_norm": 0.057615265250205994,
|
77099 |
+
"learning_rate": 1.6865345877502546e-06,
|
77100 |
+
"loss": 1.4465,
|
77101 |
+
"step": 21928
|
77102 |
+
},
|
77103 |
+
{
|
77104 |
+
"epoch": 0.9750566893424036,
|
77105 |
+
"grad_norm": 0.05670270696282387,
|
77106 |
+
"learning_rate": 1.6745588348758833e-06,
|
77107 |
+
"loss": 1.4418,
|
77108 |
+
"step": 21930
|
77109 |
+
},
|
77110 |
+
{
|
77111 |
+
"epoch": 0.975145613801076,
|
77112 |
+
"grad_norm": 0.057712372392416,
|
77113 |
+
"learning_rate": 1.662625681056351e-06,
|
77114 |
+
"loss": 1.4462,
|
77115 |
+
"step": 21932
|
77116 |
+
},
|
77117 |
+
{
|
77118 |
+
"epoch": 0.9752345382597484,
|
77119 |
+
"grad_norm": 0.05813715234398842,
|
77120 |
+
"learning_rate": 1.65073512731162e-06,
|
77121 |
+
"loss": 1.446,
|
77122 |
+
"step": 21934
|
77123 |
+
},
|
77124 |
+
{
|
77125 |
+
"epoch": 0.9753234627184207,
|
77126 |
+
"grad_norm": 0.05662925913929939,
|
77127 |
+
"learning_rate": 1.6388871746582102e-06,
|
77128 |
+
"loss": 1.4411,
|
77129 |
+
"step": 21936
|
77130 |
+
},
|
77131 |
+
{
|
77132 |
+
"epoch": 0.9754123871770931,
|
77133 |
+
"grad_norm": 0.056773193180561066,
|
77134 |
+
"learning_rate": 1.627081824108978e-06,
|
77135 |
+
"loss": 1.4469,
|
77136 |
+
"step": 21938
|
77137 |
+
},
|
77138 |
+
{
|
77139 |
+
"epoch": 0.9755013116357654,
|
77140 |
+
"grad_norm": 0.057524967938661575,
|
77141 |
+
"learning_rate": 1.6153190766730053e-06,
|
77142 |
+
"loss": 1.4463,
|
77143 |
+
"step": 21940
|
77144 |
+
},
|
77145 |
+
{
|
77146 |
+
"epoch": 0.9755902360944377,
|
77147 |
+
"grad_norm": 0.05839113891124725,
|
77148 |
+
"learning_rate": 1.6035989333558765e-06,
|
77149 |
+
"loss": 1.4537,
|
77150 |
+
"step": 21942
|
77151 |
+
},
|
77152 |
+
{
|
77153 |
+
"epoch": 0.9756791605531101,
|
77154 |
+
"grad_norm": 0.058153193444013596,
|
77155 |
+
"learning_rate": 1.5919213951594569e-06,
|
77156 |
+
"loss": 1.4476,
|
77157 |
+
"step": 21944
|
77158 |
+
},
|
77159 |
+
{
|
77160 |
+
"epoch": 0.9757680850117825,
|
77161 |
+
"grad_norm": 0.05714964494109154,
|
77162 |
+
"learning_rate": 1.5802864630820591e-06,
|
77163 |
+
"loss": 1.4424,
|
77164 |
+
"step": 21946
|
77165 |
+
},
|
77166 |
+
{
|
77167 |
+
"epoch": 0.9758570094704548,
|
77168 |
+
"grad_norm": 0.0575794093310833,
|
77169 |
+
"learning_rate": 1.5686941381182206e-06,
|
77170 |
+
"loss": 1.4466,
|
77171 |
+
"step": 21948
|
77172 |
+
},
|
77173 |
+
{
|
77174 |
+
"epoch": 0.9759459339291272,
|
77175 |
+
"grad_norm": 0.057474132627248764,
|
77176 |
+
"learning_rate": 1.5571444212588715e-06,
|
77177 |
+
"loss": 1.4455,
|
77178 |
+
"step": 21950
|
77179 |
+
},
|
77180 |
+
{
|
77181 |
+
"epoch": 0.9760348583877996,
|
77182 |
+
"grad_norm": 0.05650651827454567,
|
77183 |
+
"learning_rate": 1.5456373134914437e-06,
|
77184 |
+
"loss": 1.449,
|
77185 |
+
"step": 21952
|
77186 |
+
},
|
77187 |
+
{
|
77188 |
+
"epoch": 0.9761237828464719,
|
77189 |
+
"grad_norm": 0.05746529996395111,
|
77190 |
+
"learning_rate": 1.534172815799484e-06,
|
77191 |
+
"loss": 1.4398,
|
77192 |
+
"step": 21954
|
77193 |
+
},
|
77194 |
+
{
|
77195 |
+
"epoch": 0.9762127073051443,
|
77196 |
+
"grad_norm": 0.05808810144662857,
|
77197 |
+
"learning_rate": 1.522750929163208e-06,
|
77198 |
+
"loss": 1.439,
|
77199 |
+
"step": 21956
|
77200 |
+
},
|
77201 |
+
{
|
77202 |
+
"epoch": 0.9763016317638167,
|
77203 |
+
"grad_norm": 0.05777526646852493,
|
77204 |
+
"learning_rate": 1.5113716545588352e-06,
|
77205 |
+
"loss": 1.4447,
|
77206 |
+
"step": 21958
|
77207 |
+
},
|
77208 |
+
{
|
77209 |
+
"epoch": 0.976390556222489,
|
77210 |
+
"grad_norm": 0.057585395872592926,
|
77211 |
+
"learning_rate": 1.5000349929591982e-06,
|
77212 |
+
"loss": 1.4507,
|
77213 |
+
"step": 21960
|
77214 |
+
},
|
77215 |
+
{
|
77216 |
+
"epoch": 0.9764794806811613,
|
77217 |
+
"grad_norm": 0.05766819417476654,
|
77218 |
+
"learning_rate": 1.4887409453333555e-06,
|
77219 |
+
"loss": 1.4515,
|
77220 |
+
"step": 21962
|
77221 |
+
},
|
77222 |
+
{
|
77223 |
+
"epoch": 0.9765684051398337,
|
77224 |
+
"grad_norm": 0.05719029903411865,
|
77225 |
+
"learning_rate": 1.4774895126468125e-06,
|
77226 |
+
"loss": 1.4468,
|
77227 |
+
"step": 21964
|
77228 |
+
},
|
77229 |
+
{
|
77230 |
+
"epoch": 0.976657329598506,
|
77231 |
+
"grad_norm": 0.05869666114449501,
|
77232 |
+
"learning_rate": 1.4662806958614105e-06,
|
77233 |
+
"loss": 1.4479,
|
77234 |
+
"step": 21966
|
77235 |
+
},
|
77236 |
+
{
|
77237 |
+
"epoch": 0.9767462540571784,
|
77238 |
+
"grad_norm": 0.057645734399557114,
|
77239 |
+
"learning_rate": 1.4551144959352724e-06,
|
77240 |
+
"loss": 1.4539,
|
77241 |
+
"step": 21968
|
77242 |
+
},
|
77243 |
+
{
|
77244 |
+
"epoch": 0.9768351785158508,
|
77245 |
+
"grad_norm": 0.058074142783880234,
|
77246 |
+
"learning_rate": 1.443990913822968e-06,
|
77247 |
+
"loss": 1.4441,
|
77248 |
+
"step": 21970
|
77249 |
+
},
|
77250 |
+
{
|
77251 |
+
"epoch": 0.9769241029745231,
|
77252 |
+
"grad_norm": 0.057652901858091354,
|
77253 |
+
"learning_rate": 1.432909950475403e-06,
|
77254 |
+
"loss": 1.4502,
|
77255 |
+
"step": 21972
|
77256 |
+
},
|
77257 |
+
{
|
77258 |
+
"epoch": 0.9770130274331955,
|
77259 |
+
"grad_norm": 0.056621208786964417,
|
77260 |
+
"learning_rate": 1.4218716068398196e-06,
|
77261 |
+
"loss": 1.4418,
|
77262 |
+
"step": 21974
|
77263 |
+
},
|
77264 |
+
{
|
77265 |
+
"epoch": 0.9771019518918679,
|
77266 |
+
"grad_norm": 0.05749661475419998,
|
77267 |
+
"learning_rate": 1.4108758838597969e-06,
|
77268 |
+
"loss": 1.4433,
|
77269 |
+
"step": 21976
|
77270 |
+
},
|
77271 |
+
{
|
77272 |
+
"epoch": 0.9771908763505402,
|
77273 |
+
"grad_norm": 0.05827660858631134,
|
77274 |
+
"learning_rate": 1.3999227824753047e-06,
|
77275 |
+
"loss": 1.4444,
|
77276 |
+
"step": 21978
|
77277 |
+
},
|
77278 |
+
{
|
77279 |
+
"epoch": 0.9772798008092126,
|
77280 |
+
"grad_norm": 0.057929717004299164,
|
77281 |
+
"learning_rate": 1.3890123036227054e-06,
|
77282 |
+
"loss": 1.4509,
|
77283 |
+
"step": 21980
|
77284 |
+
},
|
77285 |
+
{
|
77286 |
+
"epoch": 0.977368725267885,
|
77287 |
+
"grad_norm": 0.058121420443058014,
|
77288 |
+
"learning_rate": 1.3781444482345863e-06,
|
77289 |
+
"loss": 1.4477,
|
77290 |
+
"step": 21982
|
77291 |
+
},
|
77292 |
+
{
|
77293 |
+
"epoch": 0.9774576497265572,
|
77294 |
+
"grad_norm": 0.057763341814279556,
|
77295 |
+
"learning_rate": 1.3673192172400927e-06,
|
77296 |
+
"loss": 1.4427,
|
77297 |
+
"step": 21984
|
77298 |
+
},
|
77299 |
+
{
|
77300 |
+
"epoch": 0.9775465741852296,
|
77301 |
+
"grad_norm": 0.05874239280819893,
|
77302 |
+
"learning_rate": 1.3565366115645405e-06,
|
77303 |
+
"loss": 1.4478,
|
77304 |
+
"step": 21986
|
77305 |
+
},
|
77306 |
+
{
|
77307 |
+
"epoch": 0.977635498643902,
|
77308 |
+
"grad_norm": 0.05708703398704529,
|
77309 |
+
"learning_rate": 1.3457966321296921e-06,
|
77310 |
+
"loss": 1.4451,
|
77311 |
+
"step": 21988
|
77312 |
+
},
|
77313 |
+
{
|
77314 |
+
"epoch": 0.9777244231025743,
|
77315 |
+
"grad_norm": 0.05787283182144165,
|
77316 |
+
"learning_rate": 1.335099279853591e-06,
|
77317 |
+
"loss": 1.4457,
|
77318 |
+
"step": 21990
|
77319 |
+
},
|
77320 |
+
{
|
77321 |
+
"epoch": 0.9778133475612467,
|
77322 |
+
"grad_norm": 0.05789671465754509,
|
77323 |
+
"learning_rate": 1.3244445556507834e-06,
|
77324 |
+
"loss": 1.445,
|
77325 |
+
"step": 21992
|
77326 |
+
},
|
77327 |
+
{
|
77328 |
+
"epoch": 0.9779022720199191,
|
77329 |
+
"grad_norm": 0.0576728992164135,
|
77330 |
+
"learning_rate": 1.3138324604320961e-06,
|
77331 |
+
"loss": 1.4467,
|
77332 |
+
"step": 21994
|
77333 |
+
},
|
77334 |
+
{
|
77335 |
+
"epoch": 0.9779911964785915,
|
77336 |
+
"grad_norm": 0.05753236636519432,
|
77337 |
+
"learning_rate": 1.3032629951045817e-06,
|
77338 |
+
"loss": 1.4464,
|
77339 |
+
"step": 21996
|
77340 |
+
},
|
77341 |
+
{
|
77342 |
+
"epoch": 0.9780801209372638,
|
77343 |
+
"grad_norm": 0.05750956013798714,
|
77344 |
+
"learning_rate": 1.2927361605718502e-06,
|
77345 |
+
"loss": 1.4436,
|
77346 |
+
"step": 21998
|
77347 |
+
},
|
77348 |
+
{
|
77349 |
+
"epoch": 0.9781690453959362,
|
77350 |
+
"grad_norm": 0.05757934972643852,
|
77351 |
+
"learning_rate": 1.2822519577337932e-06,
|
77352 |
+
"loss": 1.4457,
|
77353 |
+
"step": 22000
|
77354 |
+
},
|
77355 |
+
{
|
77356 |
+
"epoch": 0.9781690453959362,
|
77357 |
+
"eval_loss": 1.4320533275604248,
|
77358 |
+
"eval_runtime": 12.4698,
|
77359 |
+
"eval_samples_per_second": 554.141,
|
77360 |
+
"eval_steps_per_second": 69.288,
|
77361 |
+
"step": 22000
|
77362 |
}
|
77363 |
],
|
77364 |
"logging_steps": 2,
|
|
|
77378 |
"attributes": {}
|
77379 |
}
|
77380 |
},
|
77381 |
+
"total_flos": 4.707408710467584e+19,
|
77382 |
"train_batch_size": 768,
|
77383 |
"trial_name": null,
|
77384 |
"trial_params": null
|