Model save
Browse files- README.md +61 -0
- all_results.json +9 -0
- generation_config.json +6 -0
- model-00001-of-00003.safetensors +3 -0
- model-00002-of-00003.safetensors +3 -0
- model-00003-of-00003.safetensors +3 -0
- model.safetensors.index.json +298 -0
- runs/Jul22_01-29-07_gpu-1/events.out.tfevents.1721579981.gpu-1.907562.0 +2 -2
- train_results.json +9 -0
- trainer_state.json +2562 -0
README.md
ADDED
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: apache-2.0
|
3 |
+
base_model: mistralai/Mistral-7B-Instruct-v0.2
|
4 |
+
tags:
|
5 |
+
- trl
|
6 |
+
- dpo
|
7 |
+
- generated_from_trainer
|
8 |
+
model-index:
|
9 |
+
- name: uf-mistral-it-dpo-iopo-iter1-full
|
10 |
+
results: []
|
11 |
+
---
|
12 |
+
|
13 |
+
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
14 |
+
should probably proofread and complete it, then remove this comment. -->
|
15 |
+
|
16 |
+
[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/nlee28/lucky/runs/3vm2mk1w)
|
17 |
+
# uf-mistral-it-dpo-iopo-iter1-full
|
18 |
+
|
19 |
+
This model is a fine-tuned version of [mistralai/Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2) on an unknown dataset.
|
20 |
+
|
21 |
+
## Model description
|
22 |
+
|
23 |
+
More information needed
|
24 |
+
|
25 |
+
## Intended uses & limitations
|
26 |
+
|
27 |
+
More information needed
|
28 |
+
|
29 |
+
## Training and evaluation data
|
30 |
+
|
31 |
+
More information needed
|
32 |
+
|
33 |
+
## Training procedure
|
34 |
+
|
35 |
+
### Training hyperparameters
|
36 |
+
|
37 |
+
The following hyperparameters were used during training:
|
38 |
+
- learning_rate: 5e-07
|
39 |
+
- train_batch_size: 4
|
40 |
+
- eval_batch_size: 8
|
41 |
+
- seed: 42
|
42 |
+
- distributed_type: multi-GPU
|
43 |
+
- num_devices: 2
|
44 |
+
- gradient_accumulation_steps: 4
|
45 |
+
- total_train_batch_size: 32
|
46 |
+
- total_eval_batch_size: 16
|
47 |
+
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
48 |
+
- lr_scheduler_type: cosine
|
49 |
+
- lr_scheduler_warmup_ratio: 0.1
|
50 |
+
- num_epochs: 1
|
51 |
+
|
52 |
+
### Training results
|
53 |
+
|
54 |
+
|
55 |
+
|
56 |
+
### Framework versions
|
57 |
+
|
58 |
+
- Transformers 4.42.4
|
59 |
+
- Pytorch 2.1.2.post303
|
60 |
+
- Datasets 2.18.0
|
61 |
+
- Tokenizers 0.19.1
|
all_results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 0.9998519176662224,
|
3 |
+
"total_flos": 0.0,
|
4 |
+
"train_loss": 0.17761736296081995,
|
5 |
+
"train_runtime": 39274.1948,
|
6 |
+
"train_samples": 54020,
|
7 |
+
"train_samples_per_second": 1.375,
|
8 |
+
"train_steps_per_second": 0.043
|
9 |
+
}
|
generation_config.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_from_model_config": true,
|
3 |
+
"bos_token_id": 1,
|
4 |
+
"eos_token_id": 2,
|
5 |
+
"transformers_version": "4.42.4"
|
6 |
+
}
|
model-00001-of-00003.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1b6adac529813d1bf1618bb1855fff10ff3c88a24d60c17a34071a53e0784c43
|
3 |
+
size 4943162336
|
model-00002-of-00003.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1ccb5a9bfe035142bd2d48518ceb07c019bebb830d2a6764ccff00a18cfb7dea
|
3 |
+
size 4999819336
|
model-00003-of-00003.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b870b0836e4851c7daaf6a56b31067727f20bef5a992a6f378eec79a1302c26e
|
3 |
+
size 4540516344
|
model.safetensors.index.json
ADDED
@@ -0,0 +1,298 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metadata": {
|
3 |
+
"total_size": 14483464192
|
4 |
+
},
|
5 |
+
"weight_map": {
|
6 |
+
"lm_head.weight": "model-00003-of-00003.safetensors",
|
7 |
+
"model.embed_tokens.weight": "model-00001-of-00003.safetensors",
|
8 |
+
"model.layers.0.input_layernorm.weight": "model-00001-of-00003.safetensors",
|
9 |
+
"model.layers.0.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
|
10 |
+
"model.layers.0.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
|
11 |
+
"model.layers.0.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
|
12 |
+
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
|
13 |
+
"model.layers.0.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
|
14 |
+
"model.layers.0.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
|
15 |
+
"model.layers.0.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
|
16 |
+
"model.layers.0.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
|
17 |
+
"model.layers.1.input_layernorm.weight": "model-00001-of-00003.safetensors",
|
18 |
+
"model.layers.1.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
|
19 |
+
"model.layers.1.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
|
20 |
+
"model.layers.1.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
|
21 |
+
"model.layers.1.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
|
22 |
+
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
|
23 |
+
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
|
24 |
+
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
|
25 |
+
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
|
26 |
+
"model.layers.10.input_layernorm.weight": "model-00002-of-00003.safetensors",
|
27 |
+
"model.layers.10.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
|
28 |
+
"model.layers.10.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
|
29 |
+
"model.layers.10.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
|
30 |
+
"model.layers.10.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
|
31 |
+
"model.layers.10.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
|
32 |
+
"model.layers.10.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
|
33 |
+
"model.layers.10.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
|
34 |
+
"model.layers.10.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
|
35 |
+
"model.layers.11.input_layernorm.weight": "model-00002-of-00003.safetensors",
|
36 |
+
"model.layers.11.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
|
37 |
+
"model.layers.11.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
|
38 |
+
"model.layers.11.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
|
39 |
+
"model.layers.11.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
|
40 |
+
"model.layers.11.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
|
41 |
+
"model.layers.11.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
|
42 |
+
"model.layers.11.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
|
43 |
+
"model.layers.11.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
|
44 |
+
"model.layers.12.input_layernorm.weight": "model-00002-of-00003.safetensors",
|
45 |
+
"model.layers.12.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
|
46 |
+
"model.layers.12.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
|
47 |
+
"model.layers.12.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
|
48 |
+
"model.layers.12.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
|
49 |
+
"model.layers.12.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
|
50 |
+
"model.layers.12.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
|
51 |
+
"model.layers.12.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
|
52 |
+
"model.layers.12.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
|
53 |
+
"model.layers.13.input_layernorm.weight": "model-00002-of-00003.safetensors",
|
54 |
+
"model.layers.13.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
|
55 |
+
"model.layers.13.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
|
56 |
+
"model.layers.13.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
|
57 |
+
"model.layers.13.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
|
58 |
+
"model.layers.13.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
|
59 |
+
"model.layers.13.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
|
60 |
+
"model.layers.13.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
|
61 |
+
"model.layers.13.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
|
62 |
+
"model.layers.14.input_layernorm.weight": "model-00002-of-00003.safetensors",
|
63 |
+
"model.layers.14.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
|
64 |
+
"model.layers.14.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
|
65 |
+
"model.layers.14.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
|
66 |
+
"model.layers.14.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
|
67 |
+
"model.layers.14.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
|
68 |
+
"model.layers.14.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
|
69 |
+
"model.layers.14.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
|
70 |
+
"model.layers.14.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
|
71 |
+
"model.layers.15.input_layernorm.weight": "model-00002-of-00003.safetensors",
|
72 |
+
"model.layers.15.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
|
73 |
+
"model.layers.15.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
|
74 |
+
"model.layers.15.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
|
75 |
+
"model.layers.15.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
|
76 |
+
"model.layers.15.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
|
77 |
+
"model.layers.15.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
|
78 |
+
"model.layers.15.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
|
79 |
+
"model.layers.15.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
|
80 |
+
"model.layers.16.input_layernorm.weight": "model-00002-of-00003.safetensors",
|
81 |
+
"model.layers.16.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
|
82 |
+
"model.layers.16.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
|
83 |
+
"model.layers.16.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
|
84 |
+
"model.layers.16.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
|
85 |
+
"model.layers.16.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
|
86 |
+
"model.layers.16.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
|
87 |
+
"model.layers.16.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
|
88 |
+
"model.layers.16.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
|
89 |
+
"model.layers.17.input_layernorm.weight": "model-00002-of-00003.safetensors",
|
90 |
+
"model.layers.17.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
|
91 |
+
"model.layers.17.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
|
92 |
+
"model.layers.17.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
|
93 |
+
"model.layers.17.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
|
94 |
+
"model.layers.17.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
|
95 |
+
"model.layers.17.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
|
96 |
+
"model.layers.17.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
|
97 |
+
"model.layers.17.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
|
98 |
+
"model.layers.18.input_layernorm.weight": "model-00002-of-00003.safetensors",
|
99 |
+
"model.layers.18.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
|
100 |
+
"model.layers.18.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
|
101 |
+
"model.layers.18.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
|
102 |
+
"model.layers.18.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
|
103 |
+
"model.layers.18.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
|
104 |
+
"model.layers.18.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
|
105 |
+
"model.layers.18.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
|
106 |
+
"model.layers.18.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
|
107 |
+
"model.layers.19.input_layernorm.weight": "model-00002-of-00003.safetensors",
|
108 |
+
"model.layers.19.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
|
109 |
+
"model.layers.19.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
|
110 |
+
"model.layers.19.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
|
111 |
+
"model.layers.19.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
|
112 |
+
"model.layers.19.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
|
113 |
+
"model.layers.19.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
|
114 |
+
"model.layers.19.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
|
115 |
+
"model.layers.19.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
|
116 |
+
"model.layers.2.input_layernorm.weight": "model-00001-of-00003.safetensors",
|
117 |
+
"model.layers.2.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
|
118 |
+
"model.layers.2.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
|
119 |
+
"model.layers.2.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
|
120 |
+
"model.layers.2.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
|
121 |
+
"model.layers.2.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
|
122 |
+
"model.layers.2.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
|
123 |
+
"model.layers.2.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
|
124 |
+
"model.layers.2.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
|
125 |
+
"model.layers.20.input_layernorm.weight": "model-00002-of-00003.safetensors",
|
126 |
+
"model.layers.20.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
|
127 |
+
"model.layers.20.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
|
128 |
+
"model.layers.20.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
|
129 |
+
"model.layers.20.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
|
130 |
+
"model.layers.20.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
|
131 |
+
"model.layers.20.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
|
132 |
+
"model.layers.20.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
|
133 |
+
"model.layers.20.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
|
134 |
+
"model.layers.21.input_layernorm.weight": "model-00002-of-00003.safetensors",
|
135 |
+
"model.layers.21.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
|
136 |
+
"model.layers.21.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
|
137 |
+
"model.layers.21.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
|
138 |
+
"model.layers.21.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
|
139 |
+
"model.layers.21.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
|
140 |
+
"model.layers.21.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
|
141 |
+
"model.layers.21.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
|
142 |
+
"model.layers.21.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
|
143 |
+
"model.layers.22.input_layernorm.weight": "model-00003-of-00003.safetensors",
|
144 |
+
"model.layers.22.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
|
145 |
+
"model.layers.22.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
|
146 |
+
"model.layers.22.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
|
147 |
+
"model.layers.22.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
|
148 |
+
"model.layers.22.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
|
149 |
+
"model.layers.22.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
|
150 |
+
"model.layers.22.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
|
151 |
+
"model.layers.22.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
|
152 |
+
"model.layers.23.input_layernorm.weight": "model-00003-of-00003.safetensors",
|
153 |
+
"model.layers.23.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
|
154 |
+
"model.layers.23.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
|
155 |
+
"model.layers.23.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
|
156 |
+
"model.layers.23.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
|
157 |
+
"model.layers.23.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
|
158 |
+
"model.layers.23.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
|
159 |
+
"model.layers.23.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
|
160 |
+
"model.layers.23.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
|
161 |
+
"model.layers.24.input_layernorm.weight": "model-00003-of-00003.safetensors",
|
162 |
+
"model.layers.24.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
|
163 |
+
"model.layers.24.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
|
164 |
+
"model.layers.24.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
|
165 |
+
"model.layers.24.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
|
166 |
+
"model.layers.24.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
|
167 |
+
"model.layers.24.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
|
168 |
+
"model.layers.24.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
|
169 |
+
"model.layers.24.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
|
170 |
+
"model.layers.25.input_layernorm.weight": "model-00003-of-00003.safetensors",
|
171 |
+
"model.layers.25.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
|
172 |
+
"model.layers.25.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
|
173 |
+
"model.layers.25.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
|
174 |
+
"model.layers.25.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
|
175 |
+
"model.layers.25.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
|
176 |
+
"model.layers.25.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
|
177 |
+
"model.layers.25.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
|
178 |
+
"model.layers.25.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
|
179 |
+
"model.layers.26.input_layernorm.weight": "model-00003-of-00003.safetensors",
|
180 |
+
"model.layers.26.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
|
181 |
+
"model.layers.26.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
|
182 |
+
"model.layers.26.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
|
183 |
+
"model.layers.26.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
|
184 |
+
"model.layers.26.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
|
185 |
+
"model.layers.26.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
|
186 |
+
"model.layers.26.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
|
187 |
+
"model.layers.26.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
|
188 |
+
"model.layers.27.input_layernorm.weight": "model-00003-of-00003.safetensors",
|
189 |
+
"model.layers.27.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
|
190 |
+
"model.layers.27.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
|
191 |
+
"model.layers.27.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
|
192 |
+
"model.layers.27.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
|
193 |
+
"model.layers.27.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
|
194 |
+
"model.layers.27.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
|
195 |
+
"model.layers.27.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
|
196 |
+
"model.layers.27.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
|
197 |
+
"model.layers.28.input_layernorm.weight": "model-00003-of-00003.safetensors",
|
198 |
+
"model.layers.28.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
|
199 |
+
"model.layers.28.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
|
200 |
+
"model.layers.28.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
|
201 |
+
"model.layers.28.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
|
202 |
+
"model.layers.28.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
|
203 |
+
"model.layers.28.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
|
204 |
+
"model.layers.28.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
|
205 |
+
"model.layers.28.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
|
206 |
+
"model.layers.29.input_layernorm.weight": "model-00003-of-00003.safetensors",
|
207 |
+
"model.layers.29.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
|
208 |
+
"model.layers.29.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
|
209 |
+
"model.layers.29.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
|
210 |
+
"model.layers.29.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
|
211 |
+
"model.layers.29.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
|
212 |
+
"model.layers.29.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
|
213 |
+
"model.layers.29.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
|
214 |
+
"model.layers.29.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
|
215 |
+
"model.layers.3.input_layernorm.weight": "model-00001-of-00003.safetensors",
|
216 |
+
"model.layers.3.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
|
217 |
+
"model.layers.3.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
|
218 |
+
"model.layers.3.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
|
219 |
+
"model.layers.3.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
|
220 |
+
"model.layers.3.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
|
221 |
+
"model.layers.3.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
|
222 |
+
"model.layers.3.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
|
223 |
+
"model.layers.3.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
|
224 |
+
"model.layers.30.input_layernorm.weight": "model-00003-of-00003.safetensors",
|
225 |
+
"model.layers.30.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
|
226 |
+
"model.layers.30.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
|
227 |
+
"model.layers.30.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
|
228 |
+
"model.layers.30.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
|
229 |
+
"model.layers.30.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
|
230 |
+
"model.layers.30.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
|
231 |
+
"model.layers.30.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
|
232 |
+
"model.layers.30.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
|
233 |
+
"model.layers.31.input_layernorm.weight": "model-00003-of-00003.safetensors",
|
234 |
+
"model.layers.31.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
|
235 |
+
"model.layers.31.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
|
236 |
+
"model.layers.31.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
|
237 |
+
"model.layers.31.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
|
238 |
+
"model.layers.31.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
|
239 |
+
"model.layers.31.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
|
240 |
+
"model.layers.31.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
|
241 |
+
"model.layers.31.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
|
242 |
+
"model.layers.4.input_layernorm.weight": "model-00001-of-00003.safetensors",
|
243 |
+
"model.layers.4.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
|
244 |
+
"model.layers.4.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
|
245 |
+
"model.layers.4.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
|
246 |
+
"model.layers.4.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
|
247 |
+
"model.layers.4.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
|
248 |
+
"model.layers.4.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
|
249 |
+
"model.layers.4.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
|
250 |
+
"model.layers.4.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
|
251 |
+
"model.layers.5.input_layernorm.weight": "model-00001-of-00003.safetensors",
|
252 |
+
"model.layers.5.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
|
253 |
+
"model.layers.5.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
|
254 |
+
"model.layers.5.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
|
255 |
+
"model.layers.5.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
|
256 |
+
"model.layers.5.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
|
257 |
+
"model.layers.5.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
|
258 |
+
"model.layers.5.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
|
259 |
+
"model.layers.5.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
|
260 |
+
"model.layers.6.input_layernorm.weight": "model-00001-of-00003.safetensors",
|
261 |
+
"model.layers.6.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
|
262 |
+
"model.layers.6.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
|
263 |
+
"model.layers.6.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
|
264 |
+
"model.layers.6.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
|
265 |
+
"model.layers.6.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
|
266 |
+
"model.layers.6.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
|
267 |
+
"model.layers.6.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
|
268 |
+
"model.layers.6.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
|
269 |
+
"model.layers.7.input_layernorm.weight": "model-00001-of-00003.safetensors",
|
270 |
+
"model.layers.7.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
|
271 |
+
"model.layers.7.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
|
272 |
+
"model.layers.7.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
|
273 |
+
"model.layers.7.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
|
274 |
+
"model.layers.7.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
|
275 |
+
"model.layers.7.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
|
276 |
+
"model.layers.7.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
|
277 |
+
"model.layers.7.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
|
278 |
+
"model.layers.8.input_layernorm.weight": "model-00001-of-00003.safetensors",
|
279 |
+
"model.layers.8.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
|
280 |
+
"model.layers.8.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
|
281 |
+
"model.layers.8.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
|
282 |
+
"model.layers.8.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
|
283 |
+
"model.layers.8.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
|
284 |
+
"model.layers.8.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
|
285 |
+
"model.layers.8.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
|
286 |
+
"model.layers.8.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
|
287 |
+
"model.layers.9.input_layernorm.weight": "model-00001-of-00003.safetensors",
|
288 |
+
"model.layers.9.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
|
289 |
+
"model.layers.9.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
|
290 |
+
"model.layers.9.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
|
291 |
+
"model.layers.9.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
|
292 |
+
"model.layers.9.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
|
293 |
+
"model.layers.9.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
|
294 |
+
"model.layers.9.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
|
295 |
+
"model.layers.9.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
|
296 |
+
"model.norm.weight": "model-00003-of-00003.safetensors"
|
297 |
+
}
|
298 |
+
}
|
runs/Jul22_01-29-07_gpu-1/events.out.tfevents.1721579981.gpu-1.907562.0
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5b76435acc7b55713d2aefa653c0d9a0c5d0b40946c3666e28b5aa5c9336ddf7
|
3 |
+
size 121936
|
train_results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 0.9998519176662224,
|
3 |
+
"total_flos": 0.0,
|
4 |
+
"train_loss": 0.17761736296081995,
|
5 |
+
"train_runtime": 39274.1948,
|
6 |
+
"train_samples": 54020,
|
7 |
+
"train_samples_per_second": 1.375,
|
8 |
+
"train_steps_per_second": 0.043
|
9 |
+
}
|
trainer_state.json
ADDED
@@ -0,0 +1,2562 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.9998519176662224,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 1688,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.005923293351103213,
|
13 |
+
"grad_norm": 504.85714834798955,
|
14 |
+
"learning_rate": 2.9585798816568044e-08,
|
15 |
+
"logits/chosen": -2.2128281593322754,
|
16 |
+
"logits/rejected": -2.1649556159973145,
|
17 |
+
"logps/chosen": -334.58282470703125,
|
18 |
+
"logps/rejected": -174.33193969726562,
|
19 |
+
"loss": 0.694,
|
20 |
+
"rewards/accuracies": 0.41874998807907104,
|
21 |
+
"rewards/chosen": 0.003443267662078142,
|
22 |
+
"rewards/margins": 0.00374796474352479,
|
23 |
+
"rewards/rejected": -0.0003046986530534923,
|
24 |
+
"step": 10
|
25 |
+
},
|
26 |
+
{
|
27 |
+
"epoch": 0.011846586702206426,
|
28 |
+
"grad_norm": 519.6103666653933,
|
29 |
+
"learning_rate": 5.917159763313609e-08,
|
30 |
+
"logits/chosen": -2.2768917083740234,
|
31 |
+
"logits/rejected": -2.2541089057922363,
|
32 |
+
"logps/chosen": -442.1549377441406,
|
33 |
+
"logps/rejected": -184.51959228515625,
|
34 |
+
"loss": 0.6858,
|
35 |
+
"rewards/accuracies": 0.5625,
|
36 |
+
"rewards/chosen": 0.013772351667284966,
|
37 |
+
"rewards/margins": 0.025344645604491234,
|
38 |
+
"rewards/rejected": -0.011572292074561119,
|
39 |
+
"step": 20
|
40 |
+
},
|
41 |
+
{
|
42 |
+
"epoch": 0.01776988005330964,
|
43 |
+
"grad_norm": 433.16385797649355,
|
44 |
+
"learning_rate": 8.875739644970414e-08,
|
45 |
+
"logits/chosen": -2.2564902305603027,
|
46 |
+
"logits/rejected": -2.263812780380249,
|
47 |
+
"logps/chosen": -363.1085510253906,
|
48 |
+
"logps/rejected": -180.28768920898438,
|
49 |
+
"loss": 0.6461,
|
50 |
+
"rewards/accuracies": 0.7124999761581421,
|
51 |
+
"rewards/chosen": 0.08519863337278366,
|
52 |
+
"rewards/margins": 0.12328235059976578,
|
53 |
+
"rewards/rejected": -0.038083698600530624,
|
54 |
+
"step": 30
|
55 |
+
},
|
56 |
+
{
|
57 |
+
"epoch": 0.023693173404412852,
|
58 |
+
"grad_norm": 312.3325020676633,
|
59 |
+
"learning_rate": 1.1834319526627217e-07,
|
60 |
+
"logits/chosen": -2.258511543273926,
|
61 |
+
"logits/rejected": -2.21596097946167,
|
62 |
+
"logps/chosen": -358.8316650390625,
|
63 |
+
"logps/rejected": -164.7125244140625,
|
64 |
+
"loss": 0.5831,
|
65 |
+
"rewards/accuracies": 0.6812499761581421,
|
66 |
+
"rewards/chosen": 0.21487005054950714,
|
67 |
+
"rewards/margins": 0.3081914782524109,
|
68 |
+
"rewards/rejected": -0.09332143515348434,
|
69 |
+
"step": 40
|
70 |
+
},
|
71 |
+
{
|
72 |
+
"epoch": 0.029616466755516067,
|
73 |
+
"grad_norm": 290.61310964931636,
|
74 |
+
"learning_rate": 1.4792899408284022e-07,
|
75 |
+
"logits/chosen": -2.248854875564575,
|
76 |
+
"logits/rejected": -2.2407047748565674,
|
77 |
+
"logps/chosen": -362.57843017578125,
|
78 |
+
"logps/rejected": -178.95758056640625,
|
79 |
+
"loss": 0.4954,
|
80 |
+
"rewards/accuracies": 0.8062499761581421,
|
81 |
+
"rewards/chosen": 0.38509249687194824,
|
82 |
+
"rewards/margins": 0.6190904974937439,
|
83 |
+
"rewards/rejected": -0.23399806022644043,
|
84 |
+
"step": 50
|
85 |
+
},
|
86 |
+
{
|
87 |
+
"epoch": 0.03553976010661928,
|
88 |
+
"grad_norm": 303.52300542572084,
|
89 |
+
"learning_rate": 1.7751479289940827e-07,
|
90 |
+
"logits/chosen": -2.21785044670105,
|
91 |
+
"logits/rejected": -2.182326316833496,
|
92 |
+
"logps/chosen": -348.1097412109375,
|
93 |
+
"logps/rejected": -162.1182403564453,
|
94 |
+
"loss": 0.4517,
|
95 |
+
"rewards/accuracies": 0.8125,
|
96 |
+
"rewards/chosen": 0.6382587552070618,
|
97 |
+
"rewards/margins": 0.9943715929985046,
|
98 |
+
"rewards/rejected": -0.3561127781867981,
|
99 |
+
"step": 60
|
100 |
+
},
|
101 |
+
{
|
102 |
+
"epoch": 0.04146305345772249,
|
103 |
+
"grad_norm": 251.78388219726764,
|
104 |
+
"learning_rate": 2.0710059171597633e-07,
|
105 |
+
"logits/chosen": -2.2731988430023193,
|
106 |
+
"logits/rejected": -2.23414945602417,
|
107 |
+
"logps/chosen": -337.77349853515625,
|
108 |
+
"logps/rejected": -182.30088806152344,
|
109 |
+
"loss": 0.4179,
|
110 |
+
"rewards/accuracies": 0.8125,
|
111 |
+
"rewards/chosen": 0.6993478536605835,
|
112 |
+
"rewards/margins": 1.2595398426055908,
|
113 |
+
"rewards/rejected": -0.5601919889450073,
|
114 |
+
"step": 70
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 0.047386346808825704,
|
118 |
+
"grad_norm": 246.56195042754825,
|
119 |
+
"learning_rate": 2.3668639053254435e-07,
|
120 |
+
"logits/chosen": -2.2068114280700684,
|
121 |
+
"logits/rejected": -2.209404468536377,
|
122 |
+
"logps/chosen": -408.40484619140625,
|
123 |
+
"logps/rejected": -202.7127685546875,
|
124 |
+
"loss": 0.3483,
|
125 |
+
"rewards/accuracies": 0.856249988079071,
|
126 |
+
"rewards/chosen": 0.9628815650939941,
|
127 |
+
"rewards/margins": 1.7825168371200562,
|
128 |
+
"rewards/rejected": -0.8196353912353516,
|
129 |
+
"step": 80
|
130 |
+
},
|
131 |
+
{
|
132 |
+
"epoch": 0.05330964015992892,
|
133 |
+
"grad_norm": 188.01879345063887,
|
134 |
+
"learning_rate": 2.662721893491124e-07,
|
135 |
+
"logits/chosen": -2.2164900302886963,
|
136 |
+
"logits/rejected": -2.2228798866271973,
|
137 |
+
"logps/chosen": -360.1303405761719,
|
138 |
+
"logps/rejected": -179.77536010742188,
|
139 |
+
"loss": 0.338,
|
140 |
+
"rewards/accuracies": 0.824999988079071,
|
141 |
+
"rewards/chosen": 0.9752426147460938,
|
142 |
+
"rewards/margins": 1.890472650527954,
|
143 |
+
"rewards/rejected": -0.9152299761772156,
|
144 |
+
"step": 90
|
145 |
+
},
|
146 |
+
{
|
147 |
+
"epoch": 0.059232933511032135,
|
148 |
+
"grad_norm": 381.23591220338665,
|
149 |
+
"learning_rate": 2.9585798816568045e-07,
|
150 |
+
"logits/chosen": -2.2488455772399902,
|
151 |
+
"logits/rejected": -2.2343482971191406,
|
152 |
+
"logps/chosen": -347.54180908203125,
|
153 |
+
"logps/rejected": -175.11048889160156,
|
154 |
+
"loss": 0.3559,
|
155 |
+
"rewards/accuracies": 0.793749988079071,
|
156 |
+
"rewards/chosen": 1.015298843383789,
|
157 |
+
"rewards/margins": 1.9129976034164429,
|
158 |
+
"rewards/rejected": -0.897698700428009,
|
159 |
+
"step": 100
|
160 |
+
},
|
161 |
+
{
|
162 |
+
"epoch": 0.06515622686213535,
|
163 |
+
"grad_norm": 183.36363122937976,
|
164 |
+
"learning_rate": 3.254437869822485e-07,
|
165 |
+
"logits/chosen": -2.3256828784942627,
|
166 |
+
"logits/rejected": -2.2990269660949707,
|
167 |
+
"logps/chosen": -384.78863525390625,
|
168 |
+
"logps/rejected": -195.26393127441406,
|
169 |
+
"loss": 0.2998,
|
170 |
+
"rewards/accuracies": 0.84375,
|
171 |
+
"rewards/chosen": 1.2486822605133057,
|
172 |
+
"rewards/margins": 2.4611282348632812,
|
173 |
+
"rewards/rejected": -1.2124459743499756,
|
174 |
+
"step": 110
|
175 |
+
},
|
176 |
+
{
|
177 |
+
"epoch": 0.07107952021323856,
|
178 |
+
"grad_norm": 196.55190107882535,
|
179 |
+
"learning_rate": 3.5502958579881655e-07,
|
180 |
+
"logits/chosen": -2.2839980125427246,
|
181 |
+
"logits/rejected": -2.2439706325531006,
|
182 |
+
"logps/chosen": -316.72900390625,
|
183 |
+
"logps/rejected": -187.08209228515625,
|
184 |
+
"loss": 0.2897,
|
185 |
+
"rewards/accuracies": 0.875,
|
186 |
+
"rewards/chosen": 1.0929539203643799,
|
187 |
+
"rewards/margins": 2.364865779876709,
|
188 |
+
"rewards/rejected": -1.2719120979309082,
|
189 |
+
"step": 120
|
190 |
+
},
|
191 |
+
{
|
192 |
+
"epoch": 0.07700281356434177,
|
193 |
+
"grad_norm": 187.394753566322,
|
194 |
+
"learning_rate": 3.8461538461538463e-07,
|
195 |
+
"logits/chosen": -2.3191745281219482,
|
196 |
+
"logits/rejected": -2.3147594928741455,
|
197 |
+
"logps/chosen": -341.1308288574219,
|
198 |
+
"logps/rejected": -192.97549438476562,
|
199 |
+
"loss": 0.2532,
|
200 |
+
"rewards/accuracies": 0.887499988079071,
|
201 |
+
"rewards/chosen": 1.4034380912780762,
|
202 |
+
"rewards/margins": 3.091428756713867,
|
203 |
+
"rewards/rejected": -1.6879905462265015,
|
204 |
+
"step": 130
|
205 |
+
},
|
206 |
+
{
|
207 |
+
"epoch": 0.08292610691544498,
|
208 |
+
"grad_norm": 116.14861473966326,
|
209 |
+
"learning_rate": 4.1420118343195265e-07,
|
210 |
+
"logits/chosen": -2.361495018005371,
|
211 |
+
"logits/rejected": -2.321951150894165,
|
212 |
+
"logps/chosen": -333.0472412109375,
|
213 |
+
"logps/rejected": -179.1328582763672,
|
214 |
+
"loss": 0.2321,
|
215 |
+
"rewards/accuracies": 0.8812500238418579,
|
216 |
+
"rewards/chosen": 0.9758448600769043,
|
217 |
+
"rewards/margins": 3.051640510559082,
|
218 |
+
"rewards/rejected": -2.0757956504821777,
|
219 |
+
"step": 140
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"epoch": 0.0888494002665482,
|
223 |
+
"grad_norm": 259.05273624826594,
|
224 |
+
"learning_rate": 4.437869822485207e-07,
|
225 |
+
"logits/chosen": -2.3843483924865723,
|
226 |
+
"logits/rejected": -2.3312084674835205,
|
227 |
+
"logps/chosen": -323.70526123046875,
|
228 |
+
"logps/rejected": -197.47764587402344,
|
229 |
+
"loss": 0.2627,
|
230 |
+
"rewards/accuracies": 0.8999999761581421,
|
231 |
+
"rewards/chosen": 0.864323616027832,
|
232 |
+
"rewards/margins": 3.313027858734131,
|
233 |
+
"rewards/rejected": -2.448704242706299,
|
234 |
+
"step": 150
|
235 |
+
},
|
236 |
+
{
|
237 |
+
"epoch": 0.09477269361765141,
|
238 |
+
"grad_norm": 260.57944130025356,
|
239 |
+
"learning_rate": 4.733727810650887e-07,
|
240 |
+
"logits/chosen": -2.3698296546936035,
|
241 |
+
"logits/rejected": -2.3505420684814453,
|
242 |
+
"logps/chosen": -360.91864013671875,
|
243 |
+
"logps/rejected": -198.164794921875,
|
244 |
+
"loss": 0.2367,
|
245 |
+
"rewards/accuracies": 0.887499988079071,
|
246 |
+
"rewards/chosen": 1.216972827911377,
|
247 |
+
"rewards/margins": 3.8299670219421387,
|
248 |
+
"rewards/rejected": -2.612994432449341,
|
249 |
+
"step": 160
|
250 |
+
},
|
251 |
+
{
|
252 |
+
"epoch": 0.10069598696875463,
|
253 |
+
"grad_norm": 150.81084803740066,
|
254 |
+
"learning_rate": 4.999994653198566e-07,
|
255 |
+
"logits/chosen": -2.3464887142181396,
|
256 |
+
"logits/rejected": -2.3302454948425293,
|
257 |
+
"logps/chosen": -335.73529052734375,
|
258 |
+
"logps/rejected": -194.806640625,
|
259 |
+
"loss": 0.2132,
|
260 |
+
"rewards/accuracies": 0.9125000238418579,
|
261 |
+
"rewards/chosen": 0.9059770703315735,
|
262 |
+
"rewards/margins": 3.8774585723876953,
|
263 |
+
"rewards/rejected": -2.9714818000793457,
|
264 |
+
"step": 170
|
265 |
+
},
|
266 |
+
{
|
267 |
+
"epoch": 0.10661928031985785,
|
268 |
+
"grad_norm": 280.1531227472986,
|
269 |
+
"learning_rate": 4.999353064699471e-07,
|
270 |
+
"logits/chosen": -2.364449977874756,
|
271 |
+
"logits/rejected": -2.3326315879821777,
|
272 |
+
"logps/chosen": -389.11236572265625,
|
273 |
+
"logps/rejected": -215.023681640625,
|
274 |
+
"loss": 0.236,
|
275 |
+
"rewards/accuracies": 0.9125000238418579,
|
276 |
+
"rewards/chosen": 0.8287426233291626,
|
277 |
+
"rewards/margins": 4.475917816162109,
|
278 |
+
"rewards/rejected": -3.647174835205078,
|
279 |
+
"step": 180
|
280 |
+
},
|
281 |
+
{
|
282 |
+
"epoch": 0.11254257367096106,
|
283 |
+
"grad_norm": 175.0444339450233,
|
284 |
+
"learning_rate": 4.99764243036258e-07,
|
285 |
+
"logits/chosen": -2.3520586490631104,
|
286 |
+
"logits/rejected": -2.3261897563934326,
|
287 |
+
"logps/chosen": -359.8037109375,
|
288 |
+
"logps/rejected": -211.98666381835938,
|
289 |
+
"loss": 0.1991,
|
290 |
+
"rewards/accuracies": 0.8999999761581421,
|
291 |
+
"rewards/chosen": 0.8636599779129028,
|
292 |
+
"rewards/margins": 5.269904136657715,
|
293 |
+
"rewards/rejected": -4.40624475479126,
|
294 |
+
"step": 190
|
295 |
+
},
|
296 |
+
{
|
297 |
+
"epoch": 0.11846586702206427,
|
298 |
+
"grad_norm": 237.8569924192833,
|
299 |
+
"learning_rate": 4.994863481875841e-07,
|
300 |
+
"logits/chosen": -2.3697867393493652,
|
301 |
+
"logits/rejected": -2.3272385597229004,
|
302 |
+
"logps/chosen": -382.9794006347656,
|
303 |
+
"logps/rejected": -224.5498046875,
|
304 |
+
"loss": 0.2205,
|
305 |
+
"rewards/accuracies": 0.9125000238418579,
|
306 |
+
"rewards/chosen": 0.797897219657898,
|
307 |
+
"rewards/margins": 5.273079872131348,
|
308 |
+
"rewards/rejected": -4.47518253326416,
|
309 |
+
"step": 200
|
310 |
+
},
|
311 |
+
{
|
312 |
+
"epoch": 0.12438916037316748,
|
313 |
+
"grad_norm": 283.96288222349347,
|
314 |
+
"learning_rate": 4.991017407876165e-07,
|
315 |
+
"logits/chosen": -2.3532214164733887,
|
316 |
+
"logits/rejected": -2.2996296882629395,
|
317 |
+
"logps/chosen": -370.9749450683594,
|
318 |
+
"logps/rejected": -198.25497436523438,
|
319 |
+
"loss": 0.194,
|
320 |
+
"rewards/accuracies": 0.893750011920929,
|
321 |
+
"rewards/chosen": 0.7681259512901306,
|
322 |
+
"rewards/margins": 5.055164337158203,
|
323 |
+
"rewards/rejected": -4.2870378494262695,
|
324 |
+
"step": 210
|
325 |
+
},
|
326 |
+
{
|
327 |
+
"epoch": 0.1303124537242707,
|
328 |
+
"grad_norm": 386.78866139109,
|
329 |
+
"learning_rate": 4.98610585344102e-07,
|
330 |
+
"logits/chosen": -2.400724172592163,
|
331 |
+
"logits/rejected": -2.375039577484131,
|
332 |
+
"logps/chosen": -351.29156494140625,
|
333 |
+
"logps/rejected": -214.4847412109375,
|
334 |
+
"loss": 0.2662,
|
335 |
+
"rewards/accuracies": 0.887499988079071,
|
336 |
+
"rewards/chosen": 0.7083452939987183,
|
337 |
+
"rewards/margins": 4.722594261169434,
|
338 |
+
"rewards/rejected": -4.014249324798584,
|
339 |
+
"step": 220
|
340 |
+
},
|
341 |
+
{
|
342 |
+
"epoch": 0.13623574707537392,
|
343 |
+
"grad_norm": 441.8804995826954,
|
344 |
+
"learning_rate": 4.980130919384768e-07,
|
345 |
+
"logits/chosen": -2.423753261566162,
|
346 |
+
"logits/rejected": -2.3783416748046875,
|
347 |
+
"logps/chosen": -399.88494873046875,
|
348 |
+
"logps/rejected": -210.5980682373047,
|
349 |
+
"loss": 0.2184,
|
350 |
+
"rewards/accuracies": 0.8812500238418579,
|
351 |
+
"rewards/chosen": 0.6179746389389038,
|
352 |
+
"rewards/margins": 5.342424392700195,
|
353 |
+
"rewards/rejected": -4.72445011138916,
|
354 |
+
"step": 230
|
355 |
+
},
|
356 |
+
{
|
357 |
+
"epoch": 0.14215904042647712,
|
358 |
+
"grad_norm": 327.95375338533654,
|
359 |
+
"learning_rate": 4.973095161360105e-07,
|
360 |
+
"logits/chosen": -2.3384299278259277,
|
361 |
+
"logits/rejected": -2.323362350463867,
|
362 |
+
"logps/chosen": -333.81536865234375,
|
363 |
+
"logps/rejected": -228.82601928710938,
|
364 |
+
"loss": 0.1762,
|
365 |
+
"rewards/accuracies": 0.9375,
|
366 |
+
"rewards/chosen": 0.22780442237854004,
|
367 |
+
"rewards/margins": 5.158236980438232,
|
368 |
+
"rewards/rejected": -4.93043327331543,
|
369 |
+
"step": 240
|
370 |
+
},
|
371 |
+
{
|
372 |
+
"epoch": 0.14808233377758034,
|
373 |
+
"grad_norm": 200.53822627799985,
|
374 |
+
"learning_rate": 4.965001588764913e-07,
|
375 |
+
"logits/chosen": -2.385148525238037,
|
376 |
+
"logits/rejected": -2.3672657012939453,
|
377 |
+
"logps/chosen": -397.1217041015625,
|
378 |
+
"logps/rejected": -228.70773315429688,
|
379 |
+
"loss": 0.1876,
|
380 |
+
"rewards/accuracies": 0.949999988079071,
|
381 |
+
"rewards/chosen": 1.1027851104736328,
|
382 |
+
"rewards/margins": 5.892121315002441,
|
383 |
+
"rewards/rejected": -4.789336204528809,
|
384 |
+
"step": 250
|
385 |
+
},
|
386 |
+
{
|
387 |
+
"epoch": 0.15400562712868354,
|
388 |
+
"grad_norm": 47.97910034361157,
|
389 |
+
"learning_rate": 4.955853663455072e-07,
|
390 |
+
"logits/chosen": -2.438140392303467,
|
391 |
+
"logits/rejected": -2.4087047576904297,
|
392 |
+
"logps/chosen": -373.9306945800781,
|
393 |
+
"logps/rejected": -242.48764038085938,
|
394 |
+
"loss": 0.1722,
|
395 |
+
"rewards/accuracies": 0.9125000238418579,
|
396 |
+
"rewards/chosen": 0.6679338216781616,
|
397 |
+
"rewards/margins": 5.903395175933838,
|
398 |
+
"rewards/rejected": -5.235461235046387,
|
399 |
+
"step": 260
|
400 |
+
},
|
401 |
+
{
|
402 |
+
"epoch": 0.15992892047978677,
|
403 |
+
"grad_norm": 131.31461307719647,
|
404 |
+
"learning_rate": 4.945655298263713e-07,
|
405 |
+
"logits/chosen": -2.3899612426757812,
|
406 |
+
"logits/rejected": -2.356353282928467,
|
407 |
+
"logps/chosen": -375.282470703125,
|
408 |
+
"logps/rejected": -228.1762237548828,
|
409 |
+
"loss": 0.1561,
|
410 |
+
"rewards/accuracies": 0.949999988079071,
|
411 |
+
"rewards/chosen": 0.6925370097160339,
|
412 |
+
"rewards/margins": 5.915196418762207,
|
413 |
+
"rewards/rejected": -5.222658634185791,
|
414 |
+
"step": 270
|
415 |
+
},
|
416 |
+
{
|
417 |
+
"epoch": 0.16585221383088997,
|
418 |
+
"grad_norm": 216.04443101572159,
|
419 |
+
"learning_rate": 4.934410855327585e-07,
|
420 |
+
"logits/chosen": -2.4134862422943115,
|
421 |
+
"logits/rejected": -2.404572010040283,
|
422 |
+
"logps/chosen": -332.552490234375,
|
423 |
+
"logps/rejected": -228.0837860107422,
|
424 |
+
"loss": 0.1927,
|
425 |
+
"rewards/accuracies": 0.9125000238418579,
|
426 |
+
"rewards/chosen": 0.5202642679214478,
|
427 |
+
"rewards/margins": 6.256397724151611,
|
428 |
+
"rewards/rejected": -5.736133575439453,
|
429 |
+
"step": 280
|
430 |
+
},
|
431 |
+
{
|
432 |
+
"epoch": 0.1717755071819932,
|
433 |
+
"grad_norm": 124.31837233503076,
|
434 |
+
"learning_rate": 4.922125144221252e-07,
|
435 |
+
"logits/chosen": -2.430438995361328,
|
436 |
+
"logits/rejected": -2.398848056793213,
|
437 |
+
"logps/chosen": -409.75347900390625,
|
438 |
+
"logps/rejected": -227.51846313476562,
|
439 |
+
"loss": 0.1461,
|
440 |
+
"rewards/accuracies": 0.925000011920929,
|
441 |
+
"rewards/chosen": 0.5862516164779663,
|
442 |
+
"rewards/margins": 6.339666843414307,
|
443 |
+
"rewards/rejected": -5.753414630889893,
|
444 |
+
"step": 290
|
445 |
+
},
|
446 |
+
{
|
447 |
+
"epoch": 0.1776988005330964,
|
448 |
+
"grad_norm": 90.59877362683102,
|
449 |
+
"learning_rate": 4.90880341989989e-07,
|
450 |
+
"logits/chosen": -2.381986141204834,
|
451 |
+
"logits/rejected": -2.354752540588379,
|
452 |
+
"logps/chosen": -372.0549011230469,
|
453 |
+
"logps/rejected": -238.38900756835938,
|
454 |
+
"loss": 0.2123,
|
455 |
+
"rewards/accuracies": 0.9312499761581421,
|
456 |
+
"rewards/chosen": 0.7634764909744263,
|
457 |
+
"rewards/margins": 6.4423956871032715,
|
458 |
+
"rewards/rejected": -5.678919792175293,
|
459 |
+
"step": 300
|
460 |
+
},
|
461 |
+
{
|
462 |
+
"epoch": 0.18362209388419962,
|
463 |
+
"grad_norm": 214.55771845505996,
|
464 |
+
"learning_rate": 4.894451380451589e-07,
|
465 |
+
"logits/chosen": -2.4163644313812256,
|
466 |
+
"logits/rejected": -2.3825597763061523,
|
467 |
+
"logps/chosen": -325.26171875,
|
468 |
+
"logps/rejected": -230.36776733398438,
|
469 |
+
"loss": 0.173,
|
470 |
+
"rewards/accuracies": 0.918749988079071,
|
471 |
+
"rewards/chosen": 0.395336776971817,
|
472 |
+
"rewards/margins": 5.5172624588012695,
|
473 |
+
"rewards/rejected": -5.121925354003906,
|
474 |
+
"step": 310
|
475 |
+
},
|
476 |
+
{
|
477 |
+
"epoch": 0.18954538723530281,
|
478 |
+
"grad_norm": 143.07726831372327,
|
479 |
+
"learning_rate": 4.879075164660124e-07,
|
480 |
+
"logits/chosen": -2.4872541427612305,
|
481 |
+
"logits/rejected": -2.4530301094055176,
|
482 |
+
"logps/chosen": -337.13592529296875,
|
483 |
+
"logps/rejected": -217.49795532226562,
|
484 |
+
"loss": 0.1866,
|
485 |
+
"rewards/accuracies": 0.90625,
|
486 |
+
"rewards/chosen": 0.2758009433746338,
|
487 |
+
"rewards/margins": 5.386232852935791,
|
488 |
+
"rewards/rejected": -5.110430717468262,
|
489 |
+
"step": 320
|
490 |
+
},
|
491 |
+
{
|
492 |
+
"epoch": 0.19546868058640604,
|
493 |
+
"grad_norm": 96.96348568375632,
|
494 |
+
"learning_rate": 4.862681349379212e-07,
|
495 |
+
"logits/chosen": -2.5405662059783936,
|
496 |
+
"logits/rejected": -2.506821870803833,
|
497 |
+
"logps/chosen": -343.34637451171875,
|
498 |
+
"logps/rejected": -218.04287719726562,
|
499 |
+
"loss": 0.1596,
|
500 |
+
"rewards/accuracies": 0.9375,
|
501 |
+
"rewards/chosen": 0.5433809161186218,
|
502 |
+
"rewards/margins": 5.724984645843506,
|
503 |
+
"rewards/rejected": -5.181603908538818,
|
504 |
+
"step": 330
|
505 |
+
},
|
506 |
+
{
|
507 |
+
"epoch": 0.20139197393750927,
|
508 |
+
"grad_norm": 236.3908842304634,
|
509 |
+
"learning_rate": 4.8452769467194e-07,
|
510 |
+
"logits/chosen": -2.4440038204193115,
|
511 |
+
"logits/rejected": -2.4228639602661133,
|
512 |
+
"logps/chosen": -398.2060852050781,
|
513 |
+
"logps/rejected": -238.4206085205078,
|
514 |
+
"loss": 0.161,
|
515 |
+
"rewards/accuracies": 0.90625,
|
516 |
+
"rewards/chosen": 0.39312082529067993,
|
517 |
+
"rewards/margins": 6.064854145050049,
|
518 |
+
"rewards/rejected": -5.6717329025268555,
|
519 |
+
"step": 340
|
520 |
+
},
|
521 |
+
{
|
522 |
+
"epoch": 0.20731526728861246,
|
523 |
+
"grad_norm": 262.31086254004623,
|
524 |
+
"learning_rate": 4.82686940104879e-07,
|
525 |
+
"logits/chosen": -2.439896583557129,
|
526 |
+
"logits/rejected": -2.415922164916992,
|
527 |
+
"logps/chosen": -339.29217529296875,
|
528 |
+
"logps/rejected": -242.05789184570312,
|
529 |
+
"loss": 0.1722,
|
530 |
+
"rewards/accuracies": 0.9375,
|
531 |
+
"rewards/chosen": -0.6545895338058472,
|
532 |
+
"rewards/margins": 5.870488166809082,
|
533 |
+
"rewards/rejected": -6.525076866149902,
|
534 |
+
"step": 350
|
535 |
+
},
|
536 |
+
{
|
537 |
+
"epoch": 0.2132385606397157,
|
538 |
+
"grad_norm": 222.15082465425456,
|
539 |
+
"learning_rate": 4.807466585808856e-07,
|
540 |
+
"logits/chosen": -2.379359006881714,
|
541 |
+
"logits/rejected": -2.360146999359131,
|
542 |
+
"logps/chosen": -316.02349853515625,
|
543 |
+
"logps/rejected": -233.2420654296875,
|
544 |
+
"loss": 0.156,
|
545 |
+
"rewards/accuracies": 0.9375,
|
546 |
+
"rewards/chosen": -0.11594714224338531,
|
547 |
+
"rewards/margins": 6.157732963562012,
|
548 |
+
"rewards/rejected": -6.273680210113525,
|
549 |
+
"step": 360
|
550 |
+
},
|
551 |
+
{
|
552 |
+
"epoch": 0.2191618539908189,
|
553 |
+
"grad_norm": 242.78071949212634,
|
554 |
+
"learning_rate": 4.787076800146752e-07,
|
555 |
+
"logits/chosen": -2.4132204055786133,
|
556 |
+
"logits/rejected": -2.403979778289795,
|
557 |
+
"logps/chosen": -359.83953857421875,
|
558 |
+
"logps/rejected": -234.0885772705078,
|
559 |
+
"loss": 0.1843,
|
560 |
+
"rewards/accuracies": 0.8812500238418579,
|
561 |
+
"rewards/chosen": -0.31123560667037964,
|
562 |
+
"rewards/margins": 5.990903377532959,
|
563 |
+
"rewards/rejected": -6.302138328552246,
|
564 |
+
"step": 370
|
565 |
+
},
|
566 |
+
{
|
567 |
+
"epoch": 0.22508514734192211,
|
568 |
+
"grad_norm": 199.5431768901427,
|
569 |
+
"learning_rate": 4.765708765365526e-07,
|
570 |
+
"logits/chosen": -2.3680121898651123,
|
571 |
+
"logits/rejected": -2.37663197517395,
|
572 |
+
"logps/chosen": -356.29052734375,
|
573 |
+
"logps/rejected": -248.9377899169922,
|
574 |
+
"loss": 0.1632,
|
575 |
+
"rewards/accuracies": 0.9375,
|
576 |
+
"rewards/chosen": -0.06563782691955566,
|
577 |
+
"rewards/margins": 6.597121238708496,
|
578 |
+
"rewards/rejected": -6.662759304046631,
|
579 |
+
"step": 380
|
580 |
+
},
|
581 |
+
{
|
582 |
+
"epoch": 0.2310084406930253,
|
583 |
+
"grad_norm": 252.4218453368612,
|
584 |
+
"learning_rate": 4.7433716211937587e-07,
|
585 |
+
"logits/chosen": -2.3765156269073486,
|
586 |
+
"logits/rejected": -2.3698158264160156,
|
587 |
+
"logps/chosen": -355.7226867675781,
|
588 |
+
"logps/rejected": -231.8157501220703,
|
589 |
+
"loss": 0.1849,
|
590 |
+
"rewards/accuracies": 0.9125000238418579,
|
591 |
+
"rewards/chosen": 0.5422400236129761,
|
592 |
+
"rewards/margins": 6.704817295074463,
|
593 |
+
"rewards/rejected": -6.1625776290893555,
|
594 |
+
"step": 390
|
595 |
+
},
|
596 |
+
{
|
597 |
+
"epoch": 0.23693173404412854,
|
598 |
+
"grad_norm": 145.48012597761922,
|
599 |
+
"learning_rate": 4.720074921876245e-07,
|
600 |
+
"logits/chosen": -2.398963451385498,
|
601 |
+
"logits/rejected": -2.394768238067627,
|
602 |
+
"logps/chosen": -322.42864990234375,
|
603 |
+
"logps/rejected": -235.3686065673828,
|
604 |
+
"loss": 0.1707,
|
605 |
+
"rewards/accuracies": 0.918749988079071,
|
606 |
+
"rewards/chosen": -0.2241075038909912,
|
607 |
+
"rewards/margins": 6.238755226135254,
|
608 |
+
"rewards/rejected": -6.462862491607666,
|
609 |
+
"step": 400
|
610 |
+
},
|
611 |
+
{
|
612 |
+
"epoch": 0.24285502739523174,
|
613 |
+
"grad_norm": 292.71968928639006,
|
614 |
+
"learning_rate": 4.6958286320873593e-07,
|
615 |
+
"logits/chosen": -2.43870210647583,
|
616 |
+
"logits/rejected": -2.3871872425079346,
|
617 |
+
"logps/chosen": -397.65423583984375,
|
618 |
+
"logps/rejected": -255.60720825195312,
|
619 |
+
"loss": 0.1541,
|
620 |
+
"rewards/accuracies": 0.949999988079071,
|
621 |
+
"rewards/chosen": 0.2829202711582184,
|
622 |
+
"rewards/margins": 7.782283782958984,
|
623 |
+
"rewards/rejected": -7.499364376068115,
|
624 |
+
"step": 410
|
625 |
+
},
|
626 |
+
{
|
627 |
+
"epoch": 0.24877832074633496,
|
628 |
+
"grad_norm": 224.80127439466196,
|
629 |
+
"learning_rate": 4.6706431226688804e-07,
|
630 |
+
"logits/chosen": -2.416389226913452,
|
631 |
+
"logits/rejected": -2.3854172229766846,
|
632 |
+
"logps/chosen": -403.90765380859375,
|
633 |
+
"logps/rejected": -268.164794921875,
|
634 |
+
"loss": 0.1664,
|
635 |
+
"rewards/accuracies": 0.918749988079071,
|
636 |
+
"rewards/chosen": -0.07748878002166748,
|
637 |
+
"rewards/margins": 8.120170593261719,
|
638 |
+
"rewards/rejected": -8.197659492492676,
|
639 |
+
"step": 420
|
640 |
+
},
|
641 |
+
{
|
642 |
+
"epoch": 0.2547016140974382,
|
643 |
+
"grad_norm": 314.70972896563933,
|
644 |
+
"learning_rate": 4.6445291661940777e-07,
|
645 |
+
"logits/chosen": -2.3671650886535645,
|
646 |
+
"logits/rejected": -2.3478543758392334,
|
647 |
+
"logps/chosen": -364.58673095703125,
|
648 |
+
"logps/rejected": -255.23056030273438,
|
649 |
+
"loss": 0.2109,
|
650 |
+
"rewards/accuracies": 0.918749988079071,
|
651 |
+
"rewards/chosen": 0.149729922413826,
|
652 |
+
"rewards/margins": 7.63533878326416,
|
653 |
+
"rewards/rejected": -7.485608100891113,
|
654 |
+
"step": 430
|
655 |
+
},
|
656 |
+
{
|
657 |
+
"epoch": 0.2606249074485414,
|
658 |
+
"grad_norm": 123.44333079620083,
|
659 |
+
"learning_rate": 4.6174979323599715e-07,
|
660 |
+
"logits/chosen": -2.418224811553955,
|
661 |
+
"logits/rejected": -2.4098281860351562,
|
662 |
+
"logps/chosen": -365.10400390625,
|
663 |
+
"logps/rejected": -231.907470703125,
|
664 |
+
"loss": 0.2034,
|
665 |
+
"rewards/accuracies": 0.9375,
|
666 |
+
"rewards/chosen": 0.7242127060890198,
|
667 |
+
"rewards/margins": 6.6777753829956055,
|
668 |
+
"rewards/rejected": -5.9535627365112305,
|
669 |
+
"step": 440
|
670 |
+
},
|
671 |
+
{
|
672 |
+
"epoch": 0.2665482007996446,
|
673 |
+
"grad_norm": 185.88217229096983,
|
674 |
+
"learning_rate": 4.5895609832097277e-07,
|
675 |
+
"logits/chosen": -2.4694488048553467,
|
676 |
+
"logits/rejected": -2.4495654106140137,
|
677 |
+
"logps/chosen": -340.63934326171875,
|
678 |
+
"logps/rejected": -227.07791137695312,
|
679 |
+
"loss": 0.1729,
|
680 |
+
"rewards/accuracies": 0.9437500238418579,
|
681 |
+
"rewards/chosen": 0.8438789248466492,
|
682 |
+
"rewards/margins": 6.674716949462891,
|
683 |
+
"rewards/rejected": -5.830838680267334,
|
684 |
+
"step": 450
|
685 |
+
},
|
686 |
+
{
|
687 |
+
"epoch": 0.27247149415074784,
|
688 |
+
"grad_norm": 179.72268316546433,
|
689 |
+
"learning_rate": 4.560730268187236e-07,
|
690 |
+
"logits/chosen": -2.389660358428955,
|
691 |
+
"logits/rejected": -2.396211624145508,
|
692 |
+
"logps/chosen": -332.08612060546875,
|
693 |
+
"logps/rejected": -227.017578125,
|
694 |
+
"loss": 0.1946,
|
695 |
+
"rewards/accuracies": 0.9375,
|
696 |
+
"rewards/chosen": 0.7443017959594727,
|
697 |
+
"rewards/margins": 6.060439586639404,
|
698 |
+
"rewards/rejected": -5.316138744354248,
|
699 |
+
"step": 460
|
700 |
+
},
|
701 |
+
{
|
702 |
+
"epoch": 0.27839478750185104,
|
703 |
+
"grad_norm": 110.66902509862196,
|
704 |
+
"learning_rate": 4.531018119025989e-07,
|
705 |
+
"logits/chosen": -2.4120264053344727,
|
706 |
+
"logits/rejected": -2.4028701782226562,
|
707 |
+
"logps/chosen": -314.36895751953125,
|
708 |
+
"logps/rejected": -218.5847930908203,
|
709 |
+
"loss": 0.1598,
|
710 |
+
"rewards/accuracies": 0.9125000238418579,
|
711 |
+
"rewards/chosen": 0.032317258417606354,
|
712 |
+
"rewards/margins": 5.494265556335449,
|
713 |
+
"rewards/rejected": -5.461948871612549,
|
714 |
+
"step": 470
|
715 |
+
},
|
716 |
+
{
|
717 |
+
"epoch": 0.28431808085295424,
|
718 |
+
"grad_norm": 214.74519309520247,
|
719 |
+
"learning_rate": 4.5004372444744376e-07,
|
720 |
+
"logits/chosen": -2.426173686981201,
|
721 |
+
"logits/rejected": -2.408202648162842,
|
722 |
+
"logps/chosen": -353.477294921875,
|
723 |
+
"logps/rejected": -240.3783721923828,
|
724 |
+
"loss": 0.2256,
|
725 |
+
"rewards/accuracies": 0.8812500238418579,
|
726 |
+
"rewards/chosen": -0.20403608679771423,
|
727 |
+
"rewards/margins": 6.32252311706543,
|
728 |
+
"rewards/rejected": -6.526559352874756,
|
729 |
+
"step": 480
|
730 |
+
},
|
731 |
+
{
|
732 |
+
"epoch": 0.29024137420405743,
|
733 |
+
"grad_norm": 191.0207951068833,
|
734 |
+
"learning_rate": 4.4690007248600967e-07,
|
735 |
+
"logits/chosen": -2.422132968902588,
|
736 |
+
"logits/rejected": -2.40049409866333,
|
737 |
+
"logps/chosen": -396.6956481933594,
|
738 |
+
"logps/rejected": -240.05593872070312,
|
739 |
+
"loss": 0.1501,
|
740 |
+
"rewards/accuracies": 0.949999988079071,
|
741 |
+
"rewards/chosen": 0.5469577312469482,
|
742 |
+
"rewards/margins": 7.562119483947754,
|
743 |
+
"rewards/rejected": -7.015161037445068,
|
744 |
+
"step": 490
|
745 |
+
},
|
746 |
+
{
|
747 |
+
"epoch": 0.2961646675551607,
|
748 |
+
"grad_norm": 98.30324866742373,
|
749 |
+
"learning_rate": 4.436722006494701e-07,
|
750 |
+
"logits/chosen": -2.439347505569458,
|
751 |
+
"logits/rejected": -2.4101297855377197,
|
752 |
+
"logps/chosen": -355.91400146484375,
|
753 |
+
"logps/rejected": -236.58407592773438,
|
754 |
+
"loss": 0.1413,
|
755 |
+
"rewards/accuracies": 0.949999988079071,
|
756 |
+
"rewards/chosen": 0.17143169045448303,
|
757 |
+
"rewards/margins": 7.068209648132324,
|
758 |
+
"rewards/rejected": -6.896778106689453,
|
759 |
+
"step": 500
|
760 |
+
},
|
761 |
+
{
|
762 |
+
"epoch": 0.3020879609062639,
|
763 |
+
"grad_norm": 172.09787160323572,
|
764 |
+
"learning_rate": 4.4036148959228356e-07,
|
765 |
+
"logits/chosen": -2.4719622135162354,
|
766 |
+
"logits/rejected": -2.4255988597869873,
|
767 |
+
"logps/chosen": -408.50079345703125,
|
768 |
+
"logps/rejected": -240.2022247314453,
|
769 |
+
"loss": 0.1282,
|
770 |
+
"rewards/accuracies": 0.9375,
|
771 |
+
"rewards/chosen": 0.17151732742786407,
|
772 |
+
"rewards/margins": 7.430660247802734,
|
773 |
+
"rewards/rejected": -7.259142875671387,
|
774 |
+
"step": 510
|
775 |
+
},
|
776 |
+
{
|
777 |
+
"epoch": 0.3080112542573671,
|
778 |
+
"grad_norm": 213.31450495313408,
|
779 |
+
"learning_rate": 4.3696935540164705e-07,
|
780 |
+
"logits/chosen": -2.4813694953918457,
|
781 |
+
"logits/rejected": -2.4537417888641357,
|
782 |
+
"logps/chosen": -305.8951110839844,
|
783 |
+
"logps/rejected": -212.90701293945312,
|
784 |
+
"loss": 0.2087,
|
785 |
+
"rewards/accuracies": 0.8999999761581421,
|
786 |
+
"rewards/chosen": -0.024075621739029884,
|
787 |
+
"rewards/margins": 6.6571245193481445,
|
788 |
+
"rewards/rejected": -6.6812005043029785,
|
789 |
+
"step": 520
|
790 |
+
},
|
791 |
+
{
|
792 |
+
"epoch": 0.3139345476084703,
|
793 |
+
"grad_norm": 83.43235360842944,
|
794 |
+
"learning_rate": 4.334972489917947e-07,
|
795 |
+
"logits/chosen": -2.4770638942718506,
|
796 |
+
"logits/rejected": -2.4384360313415527,
|
797 |
+
"logps/chosen": -343.23797607421875,
|
798 |
+
"logps/rejected": -231.5509033203125,
|
799 |
+
"loss": 0.138,
|
800 |
+
"rewards/accuracies": 0.9375,
|
801 |
+
"rewards/chosen": 0.25519540905952454,
|
802 |
+
"rewards/margins": 7.639869689941406,
|
803 |
+
"rewards/rejected": -7.3846755027771,
|
804 |
+
"step": 530
|
805 |
+
},
|
806 |
+
{
|
807 |
+
"epoch": 0.31985784095957354,
|
808 |
+
"grad_norm": 48.96905584146253,
|
809 |
+
"learning_rate": 4.299466554833997e-07,
|
810 |
+
"logits/chosen": -2.462367534637451,
|
811 |
+
"logits/rejected": -2.433013439178467,
|
812 |
+
"logps/chosen": -331.65179443359375,
|
813 |
+
"logps/rejected": -237.80703735351562,
|
814 |
+
"loss": 0.2046,
|
815 |
+
"rewards/accuracies": 0.9375,
|
816 |
+
"rewards/chosen": -0.43259042501449585,
|
817 |
+
"rewards/margins": 7.181746482849121,
|
818 |
+
"rewards/rejected": -7.614336967468262,
|
819 |
+
"step": 540
|
820 |
+
},
|
821 |
+
{
|
822 |
+
"epoch": 0.32578113431067673,
|
823 |
+
"grad_norm": 157.94476432481386,
|
824 |
+
"learning_rate": 4.263190935683449e-07,
|
825 |
+
"logits/chosen": -2.454803943634033,
|
826 |
+
"logits/rejected": -2.4091312885284424,
|
827 |
+
"logps/chosen": -346.77508544921875,
|
828 |
+
"logps/rejected": -241.5703125,
|
829 |
+
"loss": 0.1686,
|
830 |
+
"rewards/accuracies": 0.949999988079071,
|
831 |
+
"rewards/chosen": 0.16140493750572205,
|
832 |
+
"rewards/margins": 8.025360107421875,
|
833 |
+
"rewards/rejected": -7.863955497741699,
|
834 |
+
"step": 550
|
835 |
+
},
|
836 |
+
{
|
837 |
+
"epoch": 0.33170442766177993,
|
838 |
+
"grad_norm": 144.6392465996477,
|
839 |
+
"learning_rate": 4.2261611486013437e-07,
|
840 |
+
"logits/chosen": -2.361588478088379,
|
841 |
+
"logits/rejected": -2.340947389602661,
|
842 |
+
"logps/chosen": -376.0790710449219,
|
843 |
+
"logps/rejected": -261.4156799316406,
|
844 |
+
"loss": 0.1378,
|
845 |
+
"rewards/accuracies": 0.949999988079071,
|
846 |
+
"rewards/chosen": -0.15833595395088196,
|
847 |
+
"rewards/margins": 7.758933067321777,
|
848 |
+
"rewards/rejected": -7.917269706726074,
|
849 |
+
"step": 560
|
850 |
+
},
|
851 |
+
{
|
852 |
+
"epoch": 0.3376277210128832,
|
853 |
+
"grad_norm": 65.04958454200869,
|
854 |
+
"learning_rate": 4.188393032302233e-07,
|
855 |
+
"logits/chosen": -2.3772928714752197,
|
856 |
+
"logits/rejected": -2.3414306640625,
|
857 |
+
"logps/chosen": -364.4604187011719,
|
858 |
+
"logps/rejected": -261.02935791015625,
|
859 |
+
"loss": 0.1429,
|
860 |
+
"rewards/accuracies": 0.956250011920929,
|
861 |
+
"rewards/chosen": -0.16394826769828796,
|
862 |
+
"rewards/margins": 8.307961463928223,
|
863 |
+
"rewards/rejected": -8.471909523010254,
|
864 |
+
"step": 570
|
865 |
+
},
|
866 |
+
{
|
867 |
+
"epoch": 0.3435510143639864,
|
868 |
+
"grad_norm": 39.92370071225025,
|
869 |
+
"learning_rate": 4.1499027413055e-07,
|
870 |
+
"logits/chosen": -2.4158213138580322,
|
871 |
+
"logits/rejected": -2.400824546813965,
|
872 |
+
"logps/chosen": -355.3429260253906,
|
873 |
+
"logps/rejected": -234.93331909179688,
|
874 |
+
"loss": 0.1184,
|
875 |
+
"rewards/accuracies": 0.949999988079071,
|
876 |
+
"rewards/chosen": 0.20681849122047424,
|
877 |
+
"rewards/margins": 8.111043930053711,
|
878 |
+
"rewards/rejected": -7.904225826263428,
|
879 |
+
"step": 580
|
880 |
+
},
|
881 |
+
{
|
882 |
+
"epoch": 0.3494743077150896,
|
883 |
+
"grad_norm": 113.63629001311939,
|
884 |
+
"learning_rate": 4.1107067390256056e-07,
|
885 |
+
"logits/chosen": -2.3974995613098145,
|
886 |
+
"logits/rejected": -2.385651111602783,
|
887 |
+
"logps/chosen": -371.2859802246094,
|
888 |
+
"logps/rejected": -251.9286346435547,
|
889 |
+
"loss": 0.1818,
|
890 |
+
"rewards/accuracies": 0.956250011920929,
|
891 |
+
"rewards/chosen": -0.022083889693021774,
|
892 |
+
"rewards/margins": 8.15199089050293,
|
893 |
+
"rewards/rejected": -8.174076080322266,
|
894 |
+
"step": 590
|
895 |
+
},
|
896 |
+
{
|
897 |
+
"epoch": 0.3553976010661928,
|
898 |
+
"grad_norm": 76.65150743146755,
|
899 |
+
"learning_rate": 4.0708217907302047e-07,
|
900 |
+
"logits/chosen": -2.446026563644409,
|
901 |
+
"logits/rejected": -2.446206569671631,
|
902 |
+
"logps/chosen": -390.41510009765625,
|
903 |
+
"logps/rejected": -249.01260375976562,
|
904 |
+
"loss": 0.1302,
|
905 |
+
"rewards/accuracies": 0.9750000238418579,
|
906 |
+
"rewards/chosen": 0.1251598298549652,
|
907 |
+
"rewards/margins": 8.266278266906738,
|
908 |
+
"rewards/rejected": -8.141119003295898,
|
909 |
+
"step": 600
|
910 |
+
},
|
911 |
+
{
|
912 |
+
"epoch": 0.36132089441729603,
|
913 |
+
"grad_norm": 154.61613568075327,
|
914 |
+
"learning_rate": 4.030264956369157e-07,
|
915 |
+
"logits/chosen": -2.431563377380371,
|
916 |
+
"logits/rejected": -2.402116298675537,
|
917 |
+
"logps/chosen": -367.7977600097656,
|
918 |
+
"logps/rejected": -256.6099548339844,
|
919 |
+
"loss": 0.1303,
|
920 |
+
"rewards/accuracies": 0.9437500238418579,
|
921 |
+
"rewards/chosen": -0.1916094869375229,
|
922 |
+
"rewards/margins": 8.200936317443848,
|
923 |
+
"rewards/rejected": -8.392545700073242,
|
924 |
+
"step": 610
|
925 |
+
},
|
926 |
+
{
|
927 |
+
"epoch": 0.36724418776839923,
|
928 |
+
"grad_norm": 365.6788351549646,
|
929 |
+
"learning_rate": 3.989053583277492e-07,
|
930 |
+
"logits/chosen": -2.437472105026245,
|
931 |
+
"logits/rejected": -2.424696445465088,
|
932 |
+
"logps/chosen": -364.15374755859375,
|
933 |
+
"logps/rejected": -273.16595458984375,
|
934 |
+
"loss": 0.2123,
|
935 |
+
"rewards/accuracies": 0.9312499761581421,
|
936 |
+
"rewards/chosen": -1.120966911315918,
|
937 |
+
"rewards/margins": 7.895718574523926,
|
938 |
+
"rewards/rejected": -9.016683578491211,
|
939 |
+
"step": 620
|
940 |
+
},
|
941 |
+
{
|
942 |
+
"epoch": 0.37316748111950243,
|
943 |
+
"grad_norm": 170.20992171320333,
|
944 |
+
"learning_rate": 3.947205298755447e-07,
|
945 |
+
"logits/chosen": -2.448801279067993,
|
946 |
+
"logits/rejected": -2.427565813064575,
|
947 |
+
"logps/chosen": -411.0587463378906,
|
948 |
+
"logps/rejected": -271.62652587890625,
|
949 |
+
"loss": 0.139,
|
950 |
+
"rewards/accuracies": 0.9437500238418579,
|
951 |
+
"rewards/chosen": -0.17977580428123474,
|
952 |
+
"rewards/margins": 8.981353759765625,
|
953 |
+
"rewards/rejected": -9.161130905151367,
|
954 |
+
"step": 630
|
955 |
+
},
|
956 |
+
{
|
957 |
+
"epoch": 0.37909077447060563,
|
958 |
+
"grad_norm": 177.25293521329584,
|
959 |
+
"learning_rate": 3.9047380025287634e-07,
|
960 |
+
"logits/chosen": -2.372959613800049,
|
961 |
+
"logits/rejected": -2.3525047302246094,
|
962 |
+
"logps/chosen": -356.154296875,
|
963 |
+
"logps/rejected": -274.4513854980469,
|
964 |
+
"loss": 0.1744,
|
965 |
+
"rewards/accuracies": 0.9375,
|
966 |
+
"rewards/chosen": -0.8354924321174622,
|
967 |
+
"rewards/margins": 8.103582382202148,
|
968 |
+
"rewards/rejected": -8.93907356262207,
|
969 |
+
"step": 640
|
970 |
+
},
|
971 |
+
{
|
972 |
+
"epoch": 0.3850140678217089,
|
973 |
+
"grad_norm": 360.5455787004992,
|
974 |
+
"learning_rate": 3.8616698590924523e-07,
|
975 |
+
"logits/chosen": -2.4072346687316895,
|
976 |
+
"logits/rejected": -2.383105993270874,
|
977 |
+
"logps/chosen": -351.6363220214844,
|
978 |
+
"logps/rejected": -239.79788208007812,
|
979 |
+
"loss": 0.2141,
|
980 |
+
"rewards/accuracies": 0.90625,
|
981 |
+
"rewards/chosen": -0.6761496663093567,
|
982 |
+
"rewards/margins": 7.148809909820557,
|
983 |
+
"rewards/rejected": -7.824960231781006,
|
984 |
+
"step": 650
|
985 |
+
},
|
986 |
+
{
|
987 |
+
"epoch": 0.3909373611728121,
|
988 |
+
"grad_norm": 270.4274889017812,
|
989 |
+
"learning_rate": 3.8180192899413123e-07,
|
990 |
+
"logits/chosen": -2.4213879108428955,
|
991 |
+
"logits/rejected": -2.4075303077697754,
|
992 |
+
"logps/chosen": -417.3751525878906,
|
993 |
+
"logps/rejected": -265.44854736328125,
|
994 |
+
"loss": 0.1753,
|
995 |
+
"rewards/accuracies": 0.925000011920929,
|
996 |
+
"rewards/chosen": 0.6145623326301575,
|
997 |
+
"rewards/margins": 9.045318603515625,
|
998 |
+
"rewards/rejected": -8.430756568908691,
|
999 |
+
"step": 660
|
1000 |
+
},
|
1001 |
+
{
|
1002 |
+
"epoch": 0.3968606545239153,
|
1003 |
+
"grad_norm": 167.55864604832982,
|
1004 |
+
"learning_rate": 3.7738049656905225e-07,
|
1005 |
+
"logits/chosen": -2.42012619972229,
|
1006 |
+
"logits/rejected": -2.407224416732788,
|
1007 |
+
"logps/chosen": -367.69439697265625,
|
1008 |
+
"logps/rejected": -252.48849487304688,
|
1009 |
+
"loss": 0.1777,
|
1010 |
+
"rewards/accuracies": 0.9437500238418579,
|
1011 |
+
"rewards/chosen": 0.16578371822834015,
|
1012 |
+
"rewards/margins": 8.239130020141602,
|
1013 |
+
"rewards/rejected": -8.073347091674805,
|
1014 |
+
"step": 670
|
1015 |
+
},
|
1016 |
+
{
|
1017 |
+
"epoch": 0.40278394787501853,
|
1018 |
+
"grad_norm": 199.41539357853415,
|
1019 |
+
"learning_rate": 3.7290457980896787e-07,
|
1020 |
+
"logits/chosen": -2.481067180633545,
|
1021 |
+
"logits/rejected": -2.4814565181732178,
|
1022 |
+
"logps/chosen": -396.1634216308594,
|
1023 |
+
"logps/rejected": -250.05990600585938,
|
1024 |
+
"loss": 0.1805,
|
1025 |
+
"rewards/accuracies": 0.9375,
|
1026 |
+
"rewards/chosen": -0.06804431229829788,
|
1027 |
+
"rewards/margins": 7.441187381744385,
|
1028 |
+
"rewards/rejected": -7.5092315673828125,
|
1029 |
+
"step": 680
|
1030 |
+
},
|
1031 |
+
{
|
1032 |
+
"epoch": 0.40870724122612173,
|
1033 |
+
"grad_norm": 127.14212704342435,
|
1034 |
+
"learning_rate": 3.68376093193369e-07,
|
1035 |
+
"logits/chosen": -2.447186231613159,
|
1036 |
+
"logits/rejected": -2.441771984100342,
|
1037 |
+
"logps/chosen": -361.73516845703125,
|
1038 |
+
"logps/rejected": -253.1171417236328,
|
1039 |
+
"loss": 0.2343,
|
1040 |
+
"rewards/accuracies": 0.9312499761581421,
|
1041 |
+
"rewards/chosen": 0.23877505958080292,
|
1042 |
+
"rewards/margins": 8.379014015197754,
|
1043 |
+
"rewards/rejected": -8.140238761901855,
|
1044 |
+
"step": 690
|
1045 |
+
},
|
1046 |
+
{
|
1047 |
+
"epoch": 0.41463053457722493,
|
1048 |
+
"grad_norm": 156.38290182401465,
|
1049 |
+
"learning_rate": 3.637969736873992e-07,
|
1050 |
+
"logits/chosen": -2.468182325363159,
|
1051 |
+
"logits/rejected": -2.4434332847595215,
|
1052 |
+
"logps/chosen": -365.93463134765625,
|
1053 |
+
"logps/rejected": -235.07699584960938,
|
1054 |
+
"loss": 0.1846,
|
1055 |
+
"rewards/accuracies": 0.9312499761581421,
|
1056 |
+
"rewards/chosen": 0.2839972972869873,
|
1057 |
+
"rewards/margins": 7.469516754150391,
|
1058 |
+
"rewards/rejected": -7.185519218444824,
|
1059 |
+
"step": 700
|
1060 |
+
},
|
1061 |
+
{
|
1062 |
+
"epoch": 0.42055382792832813,
|
1063 |
+
"grad_norm": 80.92449178173787,
|
1064 |
+
"learning_rate": 3.591691799133587e-07,
|
1065 |
+
"logits/chosen": -2.4627020359039307,
|
1066 |
+
"logits/rejected": -2.4086921215057373,
|
1067 |
+
"logps/chosen": -399.5291442871094,
|
1068 |
+
"logps/rejected": -245.04385375976562,
|
1069 |
+
"loss": 0.1367,
|
1070 |
+
"rewards/accuracies": 0.9437500238418579,
|
1071 |
+
"rewards/chosen": 0.26195061206817627,
|
1072 |
+
"rewards/margins": 8.103439331054688,
|
1073 |
+
"rewards/rejected": -7.841488838195801,
|
1074 |
+
"step": 710
|
1075 |
+
},
|
1076 |
+
{
|
1077 |
+
"epoch": 0.4264771212794314,
|
1078 |
+
"grad_norm": 164.79458174222637,
|
1079 |
+
"learning_rate": 3.5449469131294476e-07,
|
1080 |
+
"logits/chosen": -2.4840645790100098,
|
1081 |
+
"logits/rejected": -2.4384331703186035,
|
1082 |
+
"logps/chosen": -356.69952392578125,
|
1083 |
+
"logps/rejected": -245.77041625976562,
|
1084 |
+
"loss": 0.1222,
|
1085 |
+
"rewards/accuracies": 0.9437500238418579,
|
1086 |
+
"rewards/chosen": -0.7162678837776184,
|
1087 |
+
"rewards/margins": 7.782705783843994,
|
1088 |
+
"rewards/rejected": -8.49897289276123,
|
1089 |
+
"step": 720
|
1090 |
+
},
|
1091 |
+
{
|
1092 |
+
"epoch": 0.4324004146305346,
|
1093 |
+
"grad_norm": 172.5370543885783,
|
1094 |
+
"learning_rate": 3.497755073005868e-07,
|
1095 |
+
"logits/chosen": -2.47273850440979,
|
1096 |
+
"logits/rejected": -2.4551682472229004,
|
1097 |
+
"logps/chosen": -394.3757629394531,
|
1098 |
+
"logps/rejected": -252.3651885986328,
|
1099 |
+
"loss": 0.1914,
|
1100 |
+
"rewards/accuracies": 0.96875,
|
1101 |
+
"rewards/chosen": -0.40301981568336487,
|
1102 |
+
"rewards/margins": 7.574839115142822,
|
1103 |
+
"rewards/rejected": -7.9778594970703125,
|
1104 |
+
"step": 730
|
1105 |
+
},
|
1106 |
+
{
|
1107 |
+
"epoch": 0.4383237079816378,
|
1108 |
+
"grad_norm": 133.2758942704595,
|
1109 |
+
"learning_rate": 3.4501364640823926e-07,
|
1110 |
+
"logits/chosen": -2.5142226219177246,
|
1111 |
+
"logits/rejected": -2.48344087600708,
|
1112 |
+
"logps/chosen": -351.15582275390625,
|
1113 |
+
"logps/rejected": -255.9786376953125,
|
1114 |
+
"loss": 0.1843,
|
1115 |
+
"rewards/accuracies": 0.9312499761581421,
|
1116 |
+
"rewards/chosen": -0.5328763723373413,
|
1117 |
+
"rewards/margins": 7.858250617980957,
|
1118 |
+
"rewards/rejected": -8.39112663269043,
|
1119 |
+
"step": 740
|
1120 |
+
},
|
1121 |
+
{
|
1122 |
+
"epoch": 0.44424700133274103,
|
1123 |
+
"grad_norm": 151.90351812794674,
|
1124 |
+
"learning_rate": 3.402111454219966e-07,
|
1125 |
+
"logits/chosen": -2.4132637977600098,
|
1126 |
+
"logits/rejected": -2.3917102813720703,
|
1127 |
+
"logps/chosen": -364.5772705078125,
|
1128 |
+
"logps/rejected": -241.1504364013672,
|
1129 |
+
"loss": 0.2091,
|
1130 |
+
"rewards/accuracies": 0.893750011920929,
|
1131 |
+
"rewards/chosen": -0.33990636467933655,
|
1132 |
+
"rewards/margins": 7.563208103179932,
|
1133 |
+
"rewards/rejected": -7.90311336517334,
|
1134 |
+
"step": 750
|
1135 |
+
},
|
1136 |
+
{
|
1137 |
+
"epoch": 0.45017029468384423,
|
1138 |
+
"grad_norm": 175.38346833521052,
|
1139 |
+
"learning_rate": 3.353700585109005e-07,
|
1140 |
+
"logits/chosen": -2.4272594451904297,
|
1141 |
+
"logits/rejected": -2.4220120906829834,
|
1142 |
+
"logps/chosen": -374.5780334472656,
|
1143 |
+
"logps/rejected": -245.2948760986328,
|
1144 |
+
"loss": 0.1494,
|
1145 |
+
"rewards/accuracies": 0.96875,
|
1146 |
+
"rewards/chosen": 0.7049421668052673,
|
1147 |
+
"rewards/margins": 8.787199020385742,
|
1148 |
+
"rewards/rejected": -8.082255363464355,
|
1149 |
+
"step": 760
|
1150 |
+
},
|
1151 |
+
{
|
1152 |
+
"epoch": 0.45609358803494743,
|
1153 |
+
"grad_norm": 105.96376988895295,
|
1154 |
+
"learning_rate": 3.304924563483129e-07,
|
1155 |
+
"logits/chosen": -2.398015260696411,
|
1156 |
+
"logits/rejected": -2.389971971511841,
|
1157 |
+
"logps/chosen": -383.84344482421875,
|
1158 |
+
"logps/rejected": -238.3228759765625,
|
1159 |
+
"loss": 0.1439,
|
1160 |
+
"rewards/accuracies": 0.949999988079071,
|
1161 |
+
"rewards/chosen": 0.1009693592786789,
|
1162 |
+
"rewards/margins": 8.56285285949707,
|
1163 |
+
"rewards/rejected": -8.461882591247559,
|
1164 |
+
"step": 770
|
1165 |
+
},
|
1166 |
+
{
|
1167 |
+
"epoch": 0.4620168813860506,
|
1168 |
+
"grad_norm": 210.20985866366246,
|
1169 |
+
"learning_rate": 3.255804252262283e-07,
|
1170 |
+
"logits/chosen": -2.450796604156494,
|
1171 |
+
"logits/rejected": -2.423675060272217,
|
1172 |
+
"logps/chosen": -340.67572021484375,
|
1173 |
+
"logps/rejected": -249.12155151367188,
|
1174 |
+
"loss": 0.1576,
|
1175 |
+
"rewards/accuracies": 0.9312499761581421,
|
1176 |
+
"rewards/chosen": -0.6850844621658325,
|
1177 |
+
"rewards/margins": 7.610543251037598,
|
1178 |
+
"rewards/rejected": -8.29562759399414,
|
1179 |
+
"step": 780
|
1180 |
+
},
|
1181 |
+
{
|
1182 |
+
"epoch": 0.4679401747371539,
|
1183 |
+
"grad_norm": 38.37233058357723,
|
1184 |
+
"learning_rate": 3.2063606616290626e-07,
|
1185 |
+
"logits/chosen": -2.3822109699249268,
|
1186 |
+
"logits/rejected": -2.354607105255127,
|
1187 |
+
"logps/chosen": -382.82598876953125,
|
1188 |
+
"logps/rejected": -249.13320922851562,
|
1189 |
+
"loss": 0.1334,
|
1190 |
+
"rewards/accuracies": 0.9624999761581421,
|
1191 |
+
"rewards/chosen": 0.23880510032176971,
|
1192 |
+
"rewards/margins": 9.092573165893555,
|
1193 |
+
"rewards/rejected": -8.853767395019531,
|
1194 |
+
"step": 790
|
1195 |
+
},
|
1196 |
+
{
|
1197 |
+
"epoch": 0.4738634680882571,
|
1198 |
+
"grad_norm": 116.83040603007437,
|
1199 |
+
"learning_rate": 3.1566149400420523e-07,
|
1200 |
+
"logits/chosen": -2.448554515838623,
|
1201 |
+
"logits/rejected": -2.4384427070617676,
|
1202 |
+
"logps/chosen": -364.5985107421875,
|
1203 |
+
"logps/rejected": -256.18109130859375,
|
1204 |
+
"loss": 0.1401,
|
1205 |
+
"rewards/accuracies": 0.956250011920929,
|
1206 |
+
"rewards/chosen": 0.08134280145168304,
|
1207 |
+
"rewards/margins": 8.44970989227295,
|
1208 |
+
"rewards/rejected": -8.368366241455078,
|
1209 |
+
"step": 800
|
1210 |
+
},
|
1211 |
+
{
|
1212 |
+
"epoch": 0.4797867614393603,
|
1213 |
+
"grad_norm": 120.81771343753893,
|
1214 |
+
"learning_rate": 3.1065883651900087e-07,
|
1215 |
+
"logits/chosen": -2.403247356414795,
|
1216 |
+
"logits/rejected": -2.3994150161743164,
|
1217 |
+
"logps/chosen": -369.07373046875,
|
1218 |
+
"logps/rejected": -260.661865234375,
|
1219 |
+
"loss": 0.1515,
|
1220 |
+
"rewards/accuracies": 0.956250011920929,
|
1221 |
+
"rewards/chosen": 0.24362042546272278,
|
1222 |
+
"rewards/margins": 8.64600658416748,
|
1223 |
+
"rewards/rejected": -8.402385711669922,
|
1224 |
+
"step": 810
|
1225 |
+
},
|
1226 |
+
{
|
1227 |
+
"epoch": 0.4857100547904635,
|
1228 |
+
"grad_norm": 163.57574382904446,
|
1229 |
+
"learning_rate": 3.056302334890786e-07,
|
1230 |
+
"logits/chosen": -2.472740888595581,
|
1231 |
+
"logits/rejected": -2.447871685028076,
|
1232 |
+
"logps/chosen": -364.73382568359375,
|
1233 |
+
"logps/rejected": -243.4113311767578,
|
1234 |
+
"loss": 0.1651,
|
1235 |
+
"rewards/accuracies": 0.918749988079071,
|
1236 |
+
"rewards/chosen": 0.1390383541584015,
|
1237 |
+
"rewards/margins": 7.885109901428223,
|
1238 |
+
"rewards/rejected": -7.746070861816406,
|
1239 |
+
"step": 820
|
1240 |
+
},
|
1241 |
+
{
|
1242 |
+
"epoch": 0.49163334814156673,
|
1243 |
+
"grad_norm": 110.291493965559,
|
1244 |
+
"learning_rate": 3.0057783579388586e-07,
|
1245 |
+
"logits/chosen": -2.3814120292663574,
|
1246 |
+
"logits/rejected": -2.3707213401794434,
|
1247 |
+
"logps/chosen": -380.00115966796875,
|
1248 |
+
"logps/rejected": -259.52813720703125,
|
1249 |
+
"loss": 0.1149,
|
1250 |
+
"rewards/accuracies": 0.9750000238418579,
|
1251 |
+
"rewards/chosen": -0.010458474978804588,
|
1252 |
+
"rewards/margins": 8.064732551574707,
|
1253 |
+
"rewards/rejected": -8.075190544128418,
|
1254 |
+
"step": 830
|
1255 |
+
},
|
1256 |
+
{
|
1257 |
+
"epoch": 0.4975566414926699,
|
1258 |
+
"grad_norm": 149.32298193701925,
|
1259 |
+
"learning_rate": 2.9550380449053907e-07,
|
1260 |
+
"logits/chosen": -2.4556055068969727,
|
1261 |
+
"logits/rejected": -2.4424943923950195,
|
1262 |
+
"logps/chosen": -353.40087890625,
|
1263 |
+
"logps/rejected": -249.0240020751953,
|
1264 |
+
"loss": 0.1836,
|
1265 |
+
"rewards/accuracies": 0.925000011920929,
|
1266 |
+
"rewards/chosen": -0.6178598403930664,
|
1267 |
+
"rewards/margins": 6.908316135406494,
|
1268 |
+
"rewards/rejected": -7.526176452636719,
|
1269 |
+
"step": 840
|
1270 |
+
},
|
1271 |
+
{
|
1272 |
+
"epoch": 0.5034799348437732,
|
1273 |
+
"grad_norm": 298.0495441928926,
|
1274 |
+
"learning_rate": 2.904103098894767e-07,
|
1275 |
+
"logits/chosen": -2.4620985984802246,
|
1276 |
+
"logits/rejected": -2.4430508613586426,
|
1277 |
+
"logps/chosen": -381.40338134765625,
|
1278 |
+
"logps/rejected": -244.94735717773438,
|
1279 |
+
"loss": 0.1312,
|
1280 |
+
"rewards/accuracies": 0.9312499761581421,
|
1281 |
+
"rewards/chosen": -0.1066194549202919,
|
1282 |
+
"rewards/margins": 8.322629928588867,
|
1283 |
+
"rewards/rejected": -8.42924976348877,
|
1284 |
+
"step": 850
|
1285 |
+
},
|
1286 |
+
{
|
1287 |
+
"epoch": 0.5094032281948764,
|
1288 |
+
"grad_norm": 44.65987676821851,
|
1289 |
+
"learning_rate": 2.852995306261545e-07,
|
1290 |
+
"logits/chosen": -2.4761202335357666,
|
1291 |
+
"logits/rejected": -2.458343982696533,
|
1292 |
+
"logps/chosen": -416.10015869140625,
|
1293 |
+
"logps/rejected": -269.155029296875,
|
1294 |
+
"loss": 0.1904,
|
1295 |
+
"rewards/accuracies": 0.949999988079071,
|
1296 |
+
"rewards/chosen": -0.5461829900741577,
|
1297 |
+
"rewards/margins": 8.20594310760498,
|
1298 |
+
"rewards/rejected": -8.75212574005127,
|
1299 |
+
"step": 860
|
1300 |
+
},
|
1301 |
+
{
|
1302 |
+
"epoch": 0.5153265215459796,
|
1303 |
+
"grad_norm": 138.61047314539718,
|
1304 |
+
"learning_rate": 2.801736527291797e-07,
|
1305 |
+
"logits/chosen": -2.4353713989257812,
|
1306 |
+
"logits/rejected": -2.440162420272827,
|
1307 |
+
"logps/chosen": -368.56671142578125,
|
1308 |
+
"logps/rejected": -262.32000732421875,
|
1309 |
+
"loss": 0.1165,
|
1310 |
+
"rewards/accuracies": 0.956250011920929,
|
1311 |
+
"rewards/chosen": -0.6010546684265137,
|
1312 |
+
"rewards/margins": 8.25703239440918,
|
1313 |
+
"rewards/rejected": -8.858087539672852,
|
1314 |
+
"step": 870
|
1315 |
+
},
|
1316 |
+
{
|
1317 |
+
"epoch": 0.5212498148970828,
|
1318 |
+
"grad_norm": 32.99592732987078,
|
1319 |
+
"learning_rate": 2.750348686852836e-07,
|
1320 |
+
"logits/chosen": -2.4713826179504395,
|
1321 |
+
"logits/rejected": -2.4460389614105225,
|
1322 |
+
"logps/chosen": -395.4850769042969,
|
1323 |
+
"logps/rejected": -263.14697265625,
|
1324 |
+
"loss": 0.0963,
|
1325 |
+
"rewards/accuracies": 0.956250011920929,
|
1326 |
+
"rewards/chosen": -0.47209230065345764,
|
1327 |
+
"rewards/margins": 8.793320655822754,
|
1328 |
+
"rewards/rejected": -9.265413284301758,
|
1329 |
+
"step": 880
|
1330 |
+
},
|
1331 |
+
{
|
1332 |
+
"epoch": 0.527173108248186,
|
1333 |
+
"grad_norm": 225.08525680844807,
|
1334 |
+
"learning_rate": 2.69885376501531e-07,
|
1335 |
+
"logits/chosen": -2.4371562004089355,
|
1336 |
+
"logits/rejected": -2.4292545318603516,
|
1337 |
+
"logps/chosen": -368.02349853515625,
|
1338 |
+
"logps/rejected": -282.25390625,
|
1339 |
+
"loss": 0.0939,
|
1340 |
+
"rewards/accuracies": 0.956250011920929,
|
1341 |
+
"rewards/chosen": -0.43533068895339966,
|
1342 |
+
"rewards/margins": 8.678312301635742,
|
1343 |
+
"rewards/rejected": -9.113642692565918,
|
1344 |
+
"step": 890
|
1345 |
+
},
|
1346 |
+
{
|
1347 |
+
"epoch": 0.5330964015992892,
|
1348 |
+
"grad_norm": 117.84513266560047,
|
1349 |
+
"learning_rate": 2.647273787651687e-07,
|
1350 |
+
"logits/chosen": -2.4487619400024414,
|
1351 |
+
"logits/rejected": -2.401111125946045,
|
1352 |
+
"logps/chosen": -412.74359130859375,
|
1353 |
+
"logps/rejected": -252.1238250732422,
|
1354 |
+
"loss": 0.1775,
|
1355 |
+
"rewards/accuracies": 0.9437500238418579,
|
1356 |
+
"rewards/chosen": -0.4897252917289734,
|
1357 |
+
"rewards/margins": 8.060205459594727,
|
1358 |
+
"rewards/rejected": -8.549931526184082,
|
1359 |
+
"step": 900
|
1360 |
+
},
|
1361 |
+
{
|
1362 |
+
"epoch": 0.5390196949503924,
|
1363 |
+
"grad_norm": 109.88865766710877,
|
1364 |
+
"learning_rate": 2.5956308170151526e-07,
|
1365 |
+
"logits/chosen": -2.403801441192627,
|
1366 |
+
"logits/rejected": -2.3616394996643066,
|
1367 |
+
"logps/chosen": -408.7337951660156,
|
1368 |
+
"logps/rejected": -258.9072570800781,
|
1369 |
+
"loss": 0.1848,
|
1370 |
+
"rewards/accuracies": 0.9312499761581421,
|
1371 |
+
"rewards/chosen": -0.19426563382148743,
|
1372 |
+
"rewards/margins": 8.246191024780273,
|
1373 |
+
"rewards/rejected": -8.44045639038086,
|
1374 |
+
"step": 910
|
1375 |
+
},
|
1376 |
+
{
|
1377 |
+
"epoch": 0.5449429883014957,
|
1378 |
+
"grad_norm": 129.4752751715463,
|
1379 |
+
"learning_rate": 2.543946942302944e-07,
|
1380 |
+
"logits/chosen": -2.4435765743255615,
|
1381 |
+
"logits/rejected": -2.440274477005005,
|
1382 |
+
"logps/chosen": -338.3968505859375,
|
1383 |
+
"logps/rejected": -252.08511352539062,
|
1384 |
+
"loss": 0.1796,
|
1385 |
+
"rewards/accuracies": 0.918749988079071,
|
1386 |
+
"rewards/chosen": -0.5266432762145996,
|
1387 |
+
"rewards/margins": 7.261737823486328,
|
1388 |
+
"rewards/rejected": -7.7883806228637695,
|
1389 |
+
"step": 920
|
1390 |
+
},
|
1391 |
+
{
|
1392 |
+
"epoch": 0.5508662816525989,
|
1393 |
+
"grad_norm": 121.9458874503311,
|
1394 |
+
"learning_rate": 2.492244270208158e-07,
|
1395 |
+
"logits/chosen": -2.441021680831909,
|
1396 |
+
"logits/rejected": -2.4099669456481934,
|
1397 |
+
"logps/chosen": -378.41082763671875,
|
1398 |
+
"logps/rejected": -243.3146209716797,
|
1399 |
+
"loss": 0.1649,
|
1400 |
+
"rewards/accuracies": 0.9125000238418579,
|
1401 |
+
"rewards/chosen": -0.03578373044729233,
|
1402 |
+
"rewards/margins": 7.419234275817871,
|
1403 |
+
"rewards/rejected": -7.455018520355225,
|
1404 |
+
"step": 930
|
1405 |
+
},
|
1406 |
+
{
|
1407 |
+
"epoch": 0.5567895750037021,
|
1408 |
+
"grad_norm": 100.11022642436808,
|
1409 |
+
"learning_rate": 2.440544915464078e-07,
|
1410 |
+
"logits/chosen": -2.3852946758270264,
|
1411 |
+
"logits/rejected": -2.384293794631958,
|
1412 |
+
"logps/chosen": -395.0909118652344,
|
1413 |
+
"logps/rejected": -283.81109619140625,
|
1414 |
+
"loss": 0.1261,
|
1415 |
+
"rewards/accuracies": 0.9375,
|
1416 |
+
"rewards/chosen": -0.41745632886886597,
|
1417 |
+
"rewards/margins": 8.085180282592773,
|
1418 |
+
"rewards/rejected": -8.502635955810547,
|
1419 |
+
"step": 940
|
1420 |
+
},
|
1421 |
+
{
|
1422 |
+
"epoch": 0.5627128683548053,
|
1423 |
+
"grad_norm": 239.01205162989254,
|
1424 |
+
"learning_rate": 2.3888709913850593e-07,
|
1425 |
+
"logits/chosen": -2.415767192840576,
|
1426 |
+
"logits/rejected": -2.4106380939483643,
|
1427 |
+
"logps/chosen": -308.04681396484375,
|
1428 |
+
"logps/rejected": -246.5779266357422,
|
1429 |
+
"loss": 0.2266,
|
1430 |
+
"rewards/accuracies": 0.893750011920929,
|
1431 |
+
"rewards/chosen": -0.8913130760192871,
|
1432 |
+
"rewards/margins": 7.0888872146606445,
|
1433 |
+
"rewards/rejected": -7.980200290679932,
|
1434 |
+
"step": 950
|
1435 |
+
},
|
1436 |
+
{
|
1437 |
+
"epoch": 0.5686361617059085,
|
1438 |
+
"grad_norm": 63.97848384116781,
|
1439 |
+
"learning_rate": 2.337244600408025e-07,
|
1440 |
+
"logits/chosen": -2.3705592155456543,
|
1441 |
+
"logits/rejected": -2.3697023391723633,
|
1442 |
+
"logps/chosen": -387.23468017578125,
|
1443 |
+
"logps/rejected": -259.0006408691406,
|
1444 |
+
"loss": 0.1191,
|
1445 |
+
"rewards/accuracies": 0.956250011920929,
|
1446 |
+
"rewards/chosen": -0.3846256732940674,
|
1447 |
+
"rewards/margins": 8.353618621826172,
|
1448 |
+
"rewards/rejected": -8.738243103027344,
|
1449 |
+
"step": 960
|
1450 |
+
},
|
1451 |
+
{
|
1452 |
+
"epoch": 0.5745594550570117,
|
1453 |
+
"grad_norm": 117.6322527275228,
|
1454 |
+
"learning_rate": 2.2856878246386085e-07,
|
1455 |
+
"logits/chosen": -2.3312573432922363,
|
1456 |
+
"logits/rejected": -2.337036371231079,
|
1457 |
+
"logps/chosen": -396.6022644042969,
|
1458 |
+
"logps/rejected": -264.2213439941406,
|
1459 |
+
"loss": 0.1741,
|
1460 |
+
"rewards/accuracies": 0.9125000238418579,
|
1461 |
+
"rewards/chosen": -0.13234379887580872,
|
1462 |
+
"rewards/margins": 8.489058494567871,
|
1463 |
+
"rewards/rejected": -8.621402740478516,
|
1464 |
+
"step": 970
|
1465 |
+
},
|
1466 |
+
{
|
1467 |
+
"epoch": 0.5804827484081149,
|
1468 |
+
"grad_norm": 104.9036090682925,
|
1469 |
+
"learning_rate": 2.2342227164060035e-07,
|
1470 |
+
"logits/chosen": -2.3327765464782715,
|
1471 |
+
"logits/rejected": -2.3284411430358887,
|
1472 |
+
"logps/chosen": -389.34027099609375,
|
1473 |
+
"logps/rejected": -267.3720703125,
|
1474 |
+
"loss": 0.0761,
|
1475 |
+
"rewards/accuracies": 0.987500011920929,
|
1476 |
+
"rewards/chosen": 0.05067775771021843,
|
1477 |
+
"rewards/margins": 8.64169692993164,
|
1478 |
+
"rewards/rejected": -8.591019630432129,
|
1479 |
+
"step": 980
|
1480 |
+
},
|
1481 |
+
{
|
1482 |
+
"epoch": 0.5864060417592182,
|
1483 |
+
"grad_norm": 159.50861121207825,
|
1484 |
+
"learning_rate": 2.182871288830533e-07,
|
1485 |
+
"logits/chosen": -2.286221742630005,
|
1486 |
+
"logits/rejected": -2.3094284534454346,
|
1487 |
+
"logps/chosen": -356.39630126953125,
|
1488 |
+
"logps/rejected": -273.9451599121094,
|
1489 |
+
"loss": 0.1836,
|
1490 |
+
"rewards/accuracies": 0.9375,
|
1491 |
+
"rewards/chosen": -0.7623409032821655,
|
1492 |
+
"rewards/margins": 7.787258148193359,
|
1493 |
+
"rewards/rejected": -8.549599647521973,
|
1494 |
+
"step": 990
|
1495 |
+
},
|
1496 |
+
{
|
1497 |
+
"epoch": 0.5923293351103214,
|
1498 |
+
"grad_norm": 133.61865184419023,
|
1499 |
+
"learning_rate": 2.131655506408007e-07,
|
1500 |
+
"logits/chosen": -2.295893430709839,
|
1501 |
+
"logits/rejected": -2.2877814769744873,
|
1502 |
+
"logps/chosen": -390.1469421386719,
|
1503 |
+
"logps/rejected": -258.03076171875,
|
1504 |
+
"loss": 0.1684,
|
1505 |
+
"rewards/accuracies": 0.9375,
|
1506 |
+
"rewards/chosen": -0.3687962293624878,
|
1507 |
+
"rewards/margins": 8.301212310791016,
|
1508 |
+
"rewards/rejected": -8.670007705688477,
|
1509 |
+
"step": 1000
|
1510 |
+
},
|
1511 |
+
{
|
1512 |
+
"epoch": 0.5982526284614246,
|
1513 |
+
"grad_norm": 281.753571055999,
|
1514 |
+
"learning_rate": 2.0805972756148643e-07,
|
1515 |
+
"logits/chosen": -2.3332886695861816,
|
1516 |
+
"logits/rejected": -2.3315188884735107,
|
1517 |
+
"logps/chosen": -409.4767150878906,
|
1518 |
+
"logps/rejected": -261.0493469238281,
|
1519 |
+
"loss": 0.1785,
|
1520 |
+
"rewards/accuracies": 0.925000011920929,
|
1521 |
+
"rewards/chosen": -0.04291856661438942,
|
1522 |
+
"rewards/margins": 8.258207321166992,
|
1523 |
+
"rewards/rejected": -8.301126480102539,
|
1524 |
+
"step": 1010
|
1525 |
+
},
|
1526 |
+
{
|
1527 |
+
"epoch": 0.6041759218125278,
|
1528 |
+
"grad_norm": 284.7843229605494,
|
1529 |
+
"learning_rate": 2.0297184355381432e-07,
|
1530 |
+
"logits/chosen": -2.3366830348968506,
|
1531 |
+
"logits/rejected": -2.3286259174346924,
|
1532 |
+
"logps/chosen": -380.1471862792969,
|
1533 |
+
"logps/rejected": -256.72039794921875,
|
1534 |
+
"loss": 0.1423,
|
1535 |
+
"rewards/accuracies": 0.9624999761581421,
|
1536 |
+
"rewards/chosen": 0.037542905658483505,
|
1537 |
+
"rewards/margins": 8.408895492553711,
|
1538 |
+
"rewards/rejected": -8.37135124206543,
|
1539 |
+
"step": 1020
|
1540 |
+
},
|
1541 |
+
{
|
1542 |
+
"epoch": 0.610099215163631,
|
1543 |
+
"grad_norm": 211.9761172407754,
|
1544 |
+
"learning_rate": 1.9790407485342638e-07,
|
1545 |
+
"logits/chosen": -2.2436161041259766,
|
1546 |
+
"logits/rejected": -2.2300117015838623,
|
1547 |
+
"logps/chosen": -398.33917236328125,
|
1548 |
+
"logps/rejected": -265.8940734863281,
|
1549 |
+
"loss": 0.1086,
|
1550 |
+
"rewards/accuracies": 0.956250011920929,
|
1551 |
+
"rewards/chosen": -0.20479007065296173,
|
1552 |
+
"rewards/margins": 8.657182693481445,
|
1553 |
+
"rewards/rejected": -8.861973762512207,
|
1554 |
+
"step": 1030
|
1555 |
+
},
|
1556 |
+
{
|
1557 |
+
"epoch": 0.6160225085147342,
|
1558 |
+
"grad_norm": 170.42441601315315,
|
1559 |
+
"learning_rate": 1.928585890920641e-07,
|
1560 |
+
"logits/chosen": -2.310929536819458,
|
1561 |
+
"logits/rejected": -2.305142879486084,
|
1562 |
+
"logps/chosen": -329.87359619140625,
|
1563 |
+
"logps/rejected": -256.7166748046875,
|
1564 |
+
"loss": 0.1104,
|
1565 |
+
"rewards/accuracies": 0.96875,
|
1566 |
+
"rewards/chosen": -0.6634355783462524,
|
1567 |
+
"rewards/margins": 8.081941604614258,
|
1568 |
+
"rewards/rejected": -8.745377540588379,
|
1569 |
+
"step": 1040
|
1570 |
+
},
|
1571 |
+
{
|
1572 |
+
"epoch": 0.6219458018658374,
|
1573 |
+
"grad_norm": 128.54909030583076,
|
1574 |
+
"learning_rate": 1.8783754437040902e-07,
|
1575 |
+
"logits/chosen": -2.3843979835510254,
|
1576 |
+
"logits/rejected": -2.374628782272339,
|
1577 |
+
"logps/chosen": -384.445068359375,
|
1578 |
+
"logps/rejected": -262.91778564453125,
|
1579 |
+
"loss": 0.1279,
|
1580 |
+
"rewards/accuracies": 0.956250011920929,
|
1581 |
+
"rewards/chosen": -0.14523005485534668,
|
1582 |
+
"rewards/margins": 9.181239128112793,
|
1583 |
+
"rewards/rejected": -9.326468467712402,
|
1584 |
+
"step": 1050
|
1585 |
+
},
|
1586 |
+
{
|
1587 |
+
"epoch": 0.6278690952169406,
|
1588 |
+
"grad_norm": 307.8448068760573,
|
1589 |
+
"learning_rate": 1.8284308833500118e-07,
|
1590 |
+
"logits/chosen": -2.3700027465820312,
|
1591 |
+
"logits/rejected": -2.3581337928771973,
|
1592 |
+
"logps/chosen": -355.612060546875,
|
1593 |
+
"logps/rejected": -268.969970703125,
|
1594 |
+
"loss": 0.133,
|
1595 |
+
"rewards/accuracies": 0.949999988079071,
|
1596 |
+
"rewards/chosen": -0.7148059010505676,
|
1597 |
+
"rewards/margins": 8.257380485534668,
|
1598 |
+
"rewards/rejected": -8.972186088562012,
|
1599 |
+
"step": 1060
|
1600 |
+
},
|
1601 |
+
{
|
1602 |
+
"epoch": 0.6337923885680439,
|
1603 |
+
"grad_norm": 214.38553119597148,
|
1604 |
+
"learning_rate": 1.7787735725962756e-07,
|
1605 |
+
"logits/chosen": -2.358398199081421,
|
1606 |
+
"logits/rejected": -2.3408474922180176,
|
1607 |
+
"logps/chosen": -337.56427001953125,
|
1608 |
+
"logps/rejected": -256.54119873046875,
|
1609 |
+
"loss": 0.2134,
|
1610 |
+
"rewards/accuracies": 0.918749988079071,
|
1611 |
+
"rewards/chosen": -1.1531927585601807,
|
1612 |
+
"rewards/margins": 7.760645389556885,
|
1613 |
+
"rewards/rejected": -8.913838386535645,
|
1614 |
+
"step": 1070
|
1615 |
+
},
|
1616 |
+
{
|
1617 |
+
"epoch": 0.6397156819191471,
|
1618 |
+
"grad_norm": 175.56218680703643,
|
1619 |
+
"learning_rate": 1.7294247513157616e-07,
|
1620 |
+
"logits/chosen": -2.3990116119384766,
|
1621 |
+
"logits/rejected": -2.3715598583221436,
|
1622 |
+
"logps/chosen": -430.0047302246094,
|
1623 |
+
"logps/rejected": -277.5758361816406,
|
1624 |
+
"loss": 0.1585,
|
1625 |
+
"rewards/accuracies": 0.949999988079071,
|
1626 |
+
"rewards/chosen": -0.28831297159194946,
|
1627 |
+
"rewards/margins": 9.353979110717773,
|
1628 |
+
"rewards/rejected": -9.642291069030762,
|
1629 |
+
"step": 1080
|
1630 |
+
},
|
1631 |
+
{
|
1632 |
+
"epoch": 0.6456389752702503,
|
1633 |
+
"grad_norm": 251.12704474839504,
|
1634 |
+
"learning_rate": 1.6804055274314494e-07,
|
1635 |
+
"logits/chosen": -2.3074963092803955,
|
1636 |
+
"logits/rejected": -2.3215384483337402,
|
1637 |
+
"logps/chosen": -360.34307861328125,
|
1638 |
+
"logps/rejected": -262.5448913574219,
|
1639 |
+
"loss": 0.144,
|
1640 |
+
"rewards/accuracies": 0.9624999761581421,
|
1641 |
+
"rewards/chosen": -0.527413010597229,
|
1642 |
+
"rewards/margins": 7.982884883880615,
|
1643 |
+
"rewards/rejected": -8.510297775268555,
|
1644 |
+
"step": 1090
|
1645 |
+
},
|
1646 |
+
{
|
1647 |
+
"epoch": 0.6515622686213535,
|
1648 |
+
"grad_norm": 199.1781794411605,
|
1649 |
+
"learning_rate": 1.6317368678879496e-07,
|
1650 |
+
"logits/chosen": -2.358985662460327,
|
1651 |
+
"logits/rejected": -2.3370273113250732,
|
1652 |
+
"logps/chosen": -427.634521484375,
|
1653 |
+
"logps/rejected": -271.89276123046875,
|
1654 |
+
"loss": 0.1672,
|
1655 |
+
"rewards/accuracies": 0.90625,
|
1656 |
+
"rewards/chosen": -0.5357122421264648,
|
1657 |
+
"rewards/margins": 8.577353477478027,
|
1658 |
+
"rewards/rejected": -9.113065719604492,
|
1659 |
+
"step": 1100
|
1660 |
+
},
|
1661 |
+
{
|
1662 |
+
"epoch": 0.6574855619724567,
|
1663 |
+
"grad_norm": 117.97757864532069,
|
1664 |
+
"learning_rate": 1.5834395896833281e-07,
|
1665 |
+
"logits/chosen": -2.4176013469696045,
|
1666 |
+
"logits/rejected": -2.4249093532562256,
|
1667 |
+
"logps/chosen": -401.90667724609375,
|
1668 |
+
"logps/rejected": -275.45086669921875,
|
1669 |
+
"loss": 0.1541,
|
1670 |
+
"rewards/accuracies": 0.9624999761581421,
|
1671 |
+
"rewards/chosen": -0.13858655095100403,
|
1672 |
+
"rewards/margins": 9.566910743713379,
|
1673 |
+
"rewards/rejected": -9.705496788024902,
|
1674 |
+
"step": 1110
|
1675 |
+
},
|
1676 |
+
{
|
1677 |
+
"epoch": 0.6634088553235599,
|
1678 |
+
"grad_norm": 258.70885157576424,
|
1679 |
+
"learning_rate": 1.535534350965075e-07,
|
1680 |
+
"logits/chosen": -2.3172340393066406,
|
1681 |
+
"logits/rejected": -2.309823989868164,
|
1682 |
+
"logps/chosen": -340.2889709472656,
|
1683 |
+
"logps/rejected": -265.4190979003906,
|
1684 |
+
"loss": 0.1473,
|
1685 |
+
"rewards/accuracies": 0.9375,
|
1686 |
+
"rewards/chosen": -1.003075361251831,
|
1687 |
+
"rewards/margins": 8.131772994995117,
|
1688 |
+
"rewards/rejected": -9.134848594665527,
|
1689 |
+
"step": 1120
|
1690 |
+
},
|
1691 |
+
{
|
1692 |
+
"epoch": 0.6693321486746631,
|
1693 |
+
"grad_norm": 201.96502150689784,
|
1694 |
+
"learning_rate": 1.4880416421940154e-07,
|
1695 |
+
"logits/chosen": -2.4160609245300293,
|
1696 |
+
"logits/rejected": -2.4233767986297607,
|
1697 |
+
"logps/chosen": -405.70684814453125,
|
1698 |
+
"logps/rejected": -260.34912109375,
|
1699 |
+
"loss": 0.1724,
|
1700 |
+
"rewards/accuracies": 0.9375,
|
1701 |
+
"rewards/chosen": 0.08480462431907654,
|
1702 |
+
"rewards/margins": 8.918376922607422,
|
1703 |
+
"rewards/rejected": -8.833572387695312,
|
1704 |
+
"step": 1130
|
1705 |
+
},
|
1706 |
+
{
|
1707 |
+
"epoch": 0.6752554420257664,
|
1708 |
+
"grad_norm": 181.74682648839334,
|
1709 |
+
"learning_rate": 1.4409817773799459e-07,
|
1710 |
+
"logits/chosen": -2.4215950965881348,
|
1711 |
+
"logits/rejected": -2.4049665927886963,
|
1712 |
+
"logps/chosen": -366.7375183105469,
|
1713 |
+
"logps/rejected": -254.3837432861328,
|
1714 |
+
"loss": 0.0756,
|
1715 |
+
"rewards/accuracies": 0.96875,
|
1716 |
+
"rewards/chosen": -0.5654062032699585,
|
1717 |
+
"rewards/margins": 8.568774223327637,
|
1718 |
+
"rewards/rejected": -9.134181022644043,
|
1719 |
+
"step": 1140
|
1720 |
+
},
|
1721 |
+
{
|
1722 |
+
"epoch": 0.6811787353768696,
|
1723 |
+
"grad_norm": 67.27024810633608,
|
1724 |
+
"learning_rate": 1.3943748853927385e-07,
|
1725 |
+
"logits/chosen": -2.405170202255249,
|
1726 |
+
"logits/rejected": -2.371568202972412,
|
1727 |
+
"logps/chosen": -390.42718505859375,
|
1728 |
+
"logps/rejected": -272.73919677734375,
|
1729 |
+
"loss": 0.1351,
|
1730 |
+
"rewards/accuracies": 0.9437500238418579,
|
1731 |
+
"rewards/chosen": -0.37090998888015747,
|
1732 |
+
"rewards/margins": 8.646610260009766,
|
1733 |
+
"rewards/rejected": -9.017520904541016,
|
1734 |
+
"step": 1150
|
1735 |
+
},
|
1736 |
+
{
|
1737 |
+
"epoch": 0.6871020287279728,
|
1738 |
+
"grad_norm": 55.76657618504751,
|
1739 |
+
"learning_rate": 1.3482409013526436e-07,
|
1740 |
+
"logits/chosen": -2.441098213195801,
|
1741 |
+
"logits/rejected": -2.4157867431640625,
|
1742 |
+
"logps/chosen": -384.2054443359375,
|
1743 |
+
"logps/rejected": -268.0479736328125,
|
1744 |
+
"loss": 0.1027,
|
1745 |
+
"rewards/accuracies": 0.9624999761581421,
|
1746 |
+
"rewards/chosen": -0.44497281312942505,
|
1747 |
+
"rewards/margins": 8.480072021484375,
|
1748 |
+
"rewards/rejected": -8.925044059753418,
|
1749 |
+
"step": 1160
|
1750 |
+
},
|
1751 |
+
{
|
1752 |
+
"epoch": 0.693025322079076,
|
1753 |
+
"grad_norm": 25.040104944811635,
|
1754 |
+
"learning_rate": 1.302599558103456e-07,
|
1755 |
+
"logits/chosen": -2.3982086181640625,
|
1756 |
+
"logits/rejected": -2.385756015777588,
|
1757 |
+
"logps/chosen": -382.56939697265625,
|
1758 |
+
"logps/rejected": -260.25616455078125,
|
1759 |
+
"loss": 0.1179,
|
1760 |
+
"rewards/accuracies": 0.949999988079071,
|
1761 |
+
"rewards/chosen": -0.5692565441131592,
|
1762 |
+
"rewards/margins": 8.688024520874023,
|
1763 |
+
"rewards/rejected": -9.257280349731445,
|
1764 |
+
"step": 1170
|
1765 |
+
},
|
1766 |
+
{
|
1767 |
+
"epoch": 0.6989486154301792,
|
1768 |
+
"grad_norm": 296.46624172556943,
|
1769 |
+
"learning_rate": 1.257470377772214e-07,
|
1770 |
+
"logits/chosen": -2.3739066123962402,
|
1771 |
+
"logits/rejected": -2.3816215991973877,
|
1772 |
+
"logps/chosen": -385.3483581542969,
|
1773 |
+
"logps/rejected": -271.0584411621094,
|
1774 |
+
"loss": 0.1722,
|
1775 |
+
"rewards/accuracies": 0.956250011920929,
|
1776 |
+
"rewards/chosen": 0.0011343419319018722,
|
1777 |
+
"rewards/margins": 9.203813552856445,
|
1778 |
+
"rewards/rejected": -9.202679634094238,
|
1779 |
+
"step": 1180
|
1780 |
+
},
|
1781 |
+
{
|
1782 |
+
"epoch": 0.7048719087812824,
|
1783 |
+
"grad_norm": 62.58357200837184,
|
1784 |
+
"learning_rate": 1.2128726634190046e-07,
|
1785 |
+
"logits/chosen": -2.4125571250915527,
|
1786 |
+
"logits/rejected": -2.3731584548950195,
|
1787 |
+
"logps/chosen": -360.48260498046875,
|
1788 |
+
"logps/rejected": -257.780517578125,
|
1789 |
+
"loss": 0.1151,
|
1790 |
+
"rewards/accuracies": 0.949999988079071,
|
1791 |
+
"rewards/chosen": 0.00764580350369215,
|
1792 |
+
"rewards/margins": 8.383014678955078,
|
1793 |
+
"rewards/rejected": -8.375368118286133,
|
1794 |
+
"step": 1190
|
1795 |
+
},
|
1796 |
+
{
|
1797 |
+
"epoch": 0.7107952021323856,
|
1798 |
+
"grad_norm": 104.3167050157556,
|
1799 |
+
"learning_rate": 1.1688254907804992e-07,
|
1800 |
+
"logits/chosen": -2.4090094566345215,
|
1801 |
+
"logits/rejected": -2.4037535190582275,
|
1802 |
+
"logps/chosen": -446.6856384277344,
|
1803 |
+
"logps/rejected": -272.03125,
|
1804 |
+
"loss": 0.1451,
|
1805 |
+
"rewards/accuracies": 0.918749988079071,
|
1806 |
+
"rewards/chosen": 0.24328398704528809,
|
1807 |
+
"rewards/margins": 8.856546401977539,
|
1808 |
+
"rewards/rejected": -8.613263130187988,
|
1809 |
+
"step": 1200
|
1810 |
+
},
|
1811 |
+
{
|
1812 |
+
"epoch": 0.7167184954834889,
|
1813 |
+
"grad_norm": 167.28701038200384,
|
1814 |
+
"learning_rate": 1.1253477001106956e-07,
|
1815 |
+
"logits/chosen": -2.4815921783447266,
|
1816 |
+
"logits/rejected": -2.4600720405578613,
|
1817 |
+
"logps/chosen": -373.46539306640625,
|
1818 |
+
"logps/rejected": -249.20217895507812,
|
1819 |
+
"loss": 0.1822,
|
1820 |
+
"rewards/accuracies": 0.956250011920929,
|
1821 |
+
"rewards/chosen": 0.3091612756252289,
|
1822 |
+
"rewards/margins": 8.595071792602539,
|
1823 |
+
"rewards/rejected": -8.285909652709961,
|
1824 |
+
"step": 1210
|
1825 |
+
},
|
1826 |
+
{
|
1827 |
+
"epoch": 0.7226417888345921,
|
1828 |
+
"grad_norm": 103.53904259555343,
|
1829 |
+
"learning_rate": 1.0824578881224065e-07,
|
1830 |
+
"logits/chosen": -2.4788851737976074,
|
1831 |
+
"logits/rejected": -2.473902940750122,
|
1832 |
+
"logps/chosen": -348.9431457519531,
|
1833 |
+
"logps/rejected": -238.5637664794922,
|
1834 |
+
"loss": 0.1396,
|
1835 |
+
"rewards/accuracies": 0.9375,
|
1836 |
+
"rewards/chosen": -0.04494175314903259,
|
1837 |
+
"rewards/margins": 7.9483323097229,
|
1838 |
+
"rewards/rejected": -7.993273735046387,
|
1839 |
+
"step": 1220
|
1840 |
+
},
|
1841 |
+
{
|
1842 |
+
"epoch": 0.7285650821856953,
|
1843 |
+
"grad_norm": 60.68963545277756,
|
1844 |
+
"learning_rate": 1.0401744000328918e-07,
|
1845 |
+
"logits/chosen": -2.423682451248169,
|
1846 |
+
"logits/rejected": -2.424591302871704,
|
1847 |
+
"logps/chosen": -362.5735168457031,
|
1848 |
+
"logps/rejected": -250.6082305908203,
|
1849 |
+
"loss": 0.0947,
|
1850 |
+
"rewards/accuracies": 0.96875,
|
1851 |
+
"rewards/chosen": 0.03537973761558533,
|
1852 |
+
"rewards/margins": 8.45991325378418,
|
1853 |
+
"rewards/rejected": -8.42453384399414,
|
1854 |
+
"step": 1230
|
1855 |
+
},
|
1856 |
+
{
|
1857 |
+
"epoch": 0.7344883755367985,
|
1858 |
+
"grad_norm": 178.49917347104252,
|
1859 |
+
"learning_rate": 9.985153217170902e-08,
|
1860 |
+
"logits/chosen": -2.441357135772705,
|
1861 |
+
"logits/rejected": -2.414604902267456,
|
1862 |
+
"logps/chosen": -331.5675964355469,
|
1863 |
+
"logps/rejected": -249.1182861328125,
|
1864 |
+
"loss": 0.1617,
|
1865 |
+
"rewards/accuracies": 0.9375,
|
1866 |
+
"rewards/chosen": -0.4034241735935211,
|
1867 |
+
"rewards/margins": 7.731410026550293,
|
1868 |
+
"rewards/rejected": -8.134834289550781,
|
1869 |
+
"step": 1240
|
1870 |
+
},
|
1871 |
+
{
|
1872 |
+
"epoch": 0.7404116688879017,
|
1873 |
+
"grad_norm": 150.3556045078429,
|
1874 |
+
"learning_rate": 9.574984719717553e-08,
|
1875 |
+
"logits/chosen": -2.402959108352661,
|
1876 |
+
"logits/rejected": -2.3822813034057617,
|
1877 |
+
"logps/chosen": -344.8562316894531,
|
1878 |
+
"logps/rejected": -261.72198486328125,
|
1879 |
+
"loss": 0.1141,
|
1880 |
+
"rewards/accuracies": 0.9437500238418579,
|
1881 |
+
"rewards/chosen": -0.2789481282234192,
|
1882 |
+
"rewards/margins": 8.856972694396973,
|
1883 |
+
"rewards/rejected": -9.135921478271484,
|
1884 |
+
"step": 1250
|
1885 |
+
},
|
1886 |
+
{
|
1887 |
+
"epoch": 0.7463349622390049,
|
1888 |
+
"grad_norm": 107.9110175024558,
|
1889 |
+
"learning_rate": 9.171413948938459e-08,
|
1890 |
+
"logits/chosen": -2.4185938835144043,
|
1891 |
+
"logits/rejected": -2.4270195960998535,
|
1892 |
+
"logps/chosen": -351.55206298828125,
|
1893 |
+
"logps/rejected": -245.40463256835938,
|
1894 |
+
"loss": 0.1194,
|
1895 |
+
"rewards/accuracies": 0.9750000238418579,
|
1896 |
+
"rewards/chosen": -0.38523998856544495,
|
1897 |
+
"rewards/margins": 8.319580078125,
|
1898 |
+
"rewards/rejected": -8.70482063293457,
|
1899 |
+
"step": 1260
|
1900 |
+
},
|
1901 |
+
{
|
1902 |
+
"epoch": 0.7522582555901081,
|
1903 |
+
"grad_norm": 201.2050831618457,
|
1904 |
+
"learning_rate": 8.774613523764049e-08,
|
1905 |
+
"logits/chosen": -2.417402744293213,
|
1906 |
+
"logits/rejected": -2.3926000595092773,
|
1907 |
+
"logps/chosen": -295.12255859375,
|
1908 |
+
"logps/rejected": -243.2334442138672,
|
1909 |
+
"loss": 0.1174,
|
1910 |
+
"rewards/accuracies": 0.9437500238418579,
|
1911 |
+
"rewards/chosen": -0.7759536504745483,
|
1912 |
+
"rewards/margins": 7.477177619934082,
|
1913 |
+
"rewards/rejected": -8.253131866455078,
|
1914 |
+
"step": 1270
|
1915 |
+
},
|
1916 |
+
{
|
1917 |
+
"epoch": 0.7581815489412113,
|
1918 |
+
"grad_norm": 118.14466939117713,
|
1919 |
+
"learning_rate": 8.384753167251412e-08,
|
1920 |
+
"logits/chosen": -2.4332449436187744,
|
1921 |
+
"logits/rejected": -2.4204506874084473,
|
1922 |
+
"logps/chosen": -347.2571716308594,
|
1923 |
+
"logps/rejected": -239.56527709960938,
|
1924 |
+
"loss": 0.1154,
|
1925 |
+
"rewards/accuracies": 0.9312499761581421,
|
1926 |
+
"rewards/chosen": -0.4777204990386963,
|
1927 |
+
"rewards/margins": 7.958296775817871,
|
1928 |
+
"rewards/rejected": -8.436017990112305,
|
1929 |
+
"step": 1280
|
1930 |
+
},
|
1931 |
+
{
|
1932 |
+
"epoch": 0.7641048422923146,
|
1933 |
+
"grad_norm": 115.9052038218948,
|
1934 |
+
"learning_rate": 8.001999633988942e-08,
|
1935 |
+
"logits/chosen": -2.409921169281006,
|
1936 |
+
"logits/rejected": -2.38704776763916,
|
1937 |
+
"logps/chosen": -343.9907531738281,
|
1938 |
+
"logps/rejected": -241.80130004882812,
|
1939 |
+
"loss": 0.1142,
|
1940 |
+
"rewards/accuracies": 0.9624999761581421,
|
1941 |
+
"rewards/chosen": -0.26248881220817566,
|
1942 |
+
"rewards/margins": 8.137993812561035,
|
1943 |
+
"rewards/rejected": -8.400481224060059,
|
1944 |
+
"step": 1290
|
1945 |
+
},
|
1946 |
+
{
|
1947 |
+
"epoch": 0.7700281356434178,
|
1948 |
+
"grad_norm": 44.99756218547529,
|
1949 |
+
"learning_rate": 7.62651663877042e-08,
|
1950 |
+
"logits/chosen": -2.431091547012329,
|
1951 |
+
"logits/rejected": -2.4076178073883057,
|
1952 |
+
"logps/chosen": -413.41339111328125,
|
1953 |
+
"logps/rejected": -272.5387268066406,
|
1954 |
+
"loss": 0.1154,
|
1955 |
+
"rewards/accuracies": 0.949999988079071,
|
1956 |
+
"rewards/chosen": -0.2713491916656494,
|
1957 |
+
"rewards/margins": 8.43775749206543,
|
1958 |
+
"rewards/rejected": -8.709107398986816,
|
1959 |
+
"step": 1300
|
1960 |
+
},
|
1961 |
+
{
|
1962 |
+
"epoch": 0.775951428994521,
|
1963 |
+
"grad_norm": 271.4269072656633,
|
1964 |
+
"learning_rate": 7.258464786569549e-08,
|
1965 |
+
"logits/chosen": -2.382718086242676,
|
1966 |
+
"logits/rejected": -2.3769688606262207,
|
1967 |
+
"logps/chosen": -327.7436218261719,
|
1968 |
+
"logps/rejected": -232.5410614013672,
|
1969 |
+
"loss": 0.1171,
|
1970 |
+
"rewards/accuracies": 0.9312499761581421,
|
1971 |
+
"rewards/chosen": -0.5554865002632141,
|
1972 |
+
"rewards/margins": 7.468624114990234,
|
1973 |
+
"rewards/rejected": -8.024110794067383,
|
1974 |
+
"step": 1310
|
1975 |
+
},
|
1976 |
+
{
|
1977 |
+
"epoch": 0.7818747223456242,
|
1978 |
+
"grad_norm": 101.64320308184249,
|
1979 |
+
"learning_rate": 6.898001503844483e-08,
|
1980 |
+
"logits/chosen": -2.417436361312866,
|
1981 |
+
"logits/rejected": -2.4156861305236816,
|
1982 |
+
"logps/chosen": -355.09088134765625,
|
1983 |
+
"logps/rejected": -256.650634765625,
|
1984 |
+
"loss": 0.0912,
|
1985 |
+
"rewards/accuracies": 0.9624999761581421,
|
1986 |
+
"rewards/chosen": -0.4817001223564148,
|
1987 |
+
"rewards/margins": 8.431524276733398,
|
1988 |
+
"rewards/rejected": -8.913224220275879,
|
1989 |
+
"step": 1320
|
1990 |
+
},
|
1991 |
+
{
|
1992 |
+
"epoch": 0.7877980156967274,
|
1993 |
+
"grad_norm": 70.70476612750028,
|
1994 |
+
"learning_rate": 6.545280971202014e-08,
|
1995 |
+
"logits/chosen": -2.3889780044555664,
|
1996 |
+
"logits/rejected": -2.3656888008117676,
|
1997 |
+
"logps/chosen": -353.8454895019531,
|
1998 |
+
"logps/rejected": -263.7891540527344,
|
1999 |
+
"loss": 0.1696,
|
2000 |
+
"rewards/accuracies": 0.9312499761581421,
|
2001 |
+
"rewards/chosen": -0.36209869384765625,
|
2002 |
+
"rewards/margins": 8.301480293273926,
|
2003 |
+
"rewards/rejected": -8.663579940795898,
|
2004 |
+
"step": 1330
|
2005 |
+
},
|
2006 |
+
{
|
2007 |
+
"epoch": 0.7937213090478306,
|
2008 |
+
"grad_norm": 115.20773379897521,
|
2009 |
+
"learning_rate": 6.200454057450022e-08,
|
2010 |
+
"logits/chosen": -2.4155004024505615,
|
2011 |
+
"logits/rejected": -2.3988840579986572,
|
2012 |
+
"logps/chosen": -358.6512756347656,
|
2013 |
+
"logps/rejected": -260.19317626953125,
|
2014 |
+
"loss": 0.1052,
|
2015 |
+
"rewards/accuracies": 0.949999988079071,
|
2016 |
+
"rewards/chosen": -0.13175497949123383,
|
2017 |
+
"rewards/margins": 8.193792343139648,
|
2018 |
+
"rewards/rejected": -8.325546264648438,
|
2019 |
+
"step": 1340
|
2020 |
+
},
|
2021 |
+
{
|
2022 |
+
"epoch": 0.7996446023989338,
|
2023 |
+
"grad_norm": 348.05960789906396,
|
2024 |
+
"learning_rate": 5.863668255066492e-08,
|
2025 |
+
"logits/chosen": -2.415785312652588,
|
2026 |
+
"logits/rejected": -2.402935028076172,
|
2027 |
+
"logps/chosen": -328.21258544921875,
|
2028 |
+
"logps/rejected": -232.1653289794922,
|
2029 |
+
"loss": 0.0952,
|
2030 |
+
"rewards/accuracies": 0.949999988079071,
|
2031 |
+
"rewards/chosen": -0.03695619851350784,
|
2032 |
+
"rewards/margins": 8.016546249389648,
|
2033 |
+
"rewards/rejected": -8.053503036499023,
|
2034 |
+
"step": 1350
|
2035 |
+
},
|
2036 |
+
{
|
2037 |
+
"epoch": 0.8055678957500371,
|
2038 |
+
"grad_norm": 164.95215219581857,
|
2039 |
+
"learning_rate": 5.53506761711274e-08,
|
2040 |
+
"logits/chosen": -2.4385123252868652,
|
2041 |
+
"logits/rejected": -2.410877227783203,
|
2042 |
+
"logps/chosen": -429.80712890625,
|
2043 |
+
"logps/rejected": -269.0475769042969,
|
2044 |
+
"loss": 0.184,
|
2045 |
+
"rewards/accuracies": 0.918749988079071,
|
2046 |
+
"rewards/chosen": 0.22647914290428162,
|
2047 |
+
"rewards/margins": 9.079859733581543,
|
2048 |
+
"rewards/rejected": -8.85338020324707,
|
2049 |
+
"step": 1360
|
2050 |
+
},
|
2051 |
+
{
|
2052 |
+
"epoch": 0.8114911891011403,
|
2053 |
+
"grad_norm": 153.76387128912395,
|
2054 |
+
"learning_rate": 5.2147926956177174e-08,
|
2055 |
+
"logits/chosen": -2.400052309036255,
|
2056 |
+
"logits/rejected": -2.3625330924987793,
|
2057 |
+
"logps/chosen": -375.1744079589844,
|
2058 |
+
"logps/rejected": -251.15213012695312,
|
2059 |
+
"loss": 0.1689,
|
2060 |
+
"rewards/accuracies": 0.9312499761581421,
|
2061 |
+
"rewards/chosen": -0.37147119641304016,
|
2062 |
+
"rewards/margins": 7.78417444229126,
|
2063 |
+
"rewards/rejected": -8.155645370483398,
|
2064 |
+
"step": 1370
|
2065 |
+
},
|
2066 |
+
{
|
2067 |
+
"epoch": 0.8174144824522435,
|
2068 |
+
"grad_norm": 61.83899130563118,
|
2069 |
+
"learning_rate": 4.902980481459834e-08,
|
2070 |
+
"logits/chosen": -2.410595655441284,
|
2071 |
+
"logits/rejected": -2.3901000022888184,
|
2072 |
+
"logps/chosen": -425.96466064453125,
|
2073 |
+
"logps/rejected": -264.555908203125,
|
2074 |
+
"loss": 0.1532,
|
2075 |
+
"rewards/accuracies": 0.925000011920929,
|
2076 |
+
"rewards/chosen": 0.039742302149534225,
|
2077 |
+
"rewards/margins": 8.688650131225586,
|
2078 |
+
"rewards/rejected": -8.648908615112305,
|
2079 |
+
"step": 1380
|
2080 |
+
},
|
2081 |
+
{
|
2082 |
+
"epoch": 0.8233377758033467,
|
2083 |
+
"grad_norm": 66.74057591923928,
|
2084 |
+
"learning_rate": 4.5997643457719646e-08,
|
2085 |
+
"logits/chosen": -2.343575954437256,
|
2086 |
+
"logits/rejected": -2.366262912750244,
|
2087 |
+
"logps/chosen": -375.5441589355469,
|
2088 |
+
"logps/rejected": -252.6354522705078,
|
2089 |
+
"loss": 0.1891,
|
2090 |
+
"rewards/accuracies": 0.925000011920929,
|
2091 |
+
"rewards/chosen": -0.5915766954421997,
|
2092 |
+
"rewards/margins": 8.14863395690918,
|
2093 |
+
"rewards/rejected": -8.74021053314209,
|
2094 |
+
"step": 1390
|
2095 |
+
},
|
2096 |
+
{
|
2097 |
+
"epoch": 0.8292610691544499,
|
2098 |
+
"grad_norm": 114.35276279118753,
|
2099 |
+
"learning_rate": 4.305273982894772e-08,
|
2100 |
+
"logits/chosen": -2.423459529876709,
|
2101 |
+
"logits/rejected": -2.391444683074951,
|
2102 |
+
"logps/chosen": -352.93212890625,
|
2103 |
+
"logps/rejected": -244.15249633789062,
|
2104 |
+
"loss": 0.104,
|
2105 |
+
"rewards/accuracies": 0.956250011920929,
|
2106 |
+
"rewards/chosen": -0.4639635980129242,
|
2107 |
+
"rewards/margins": 8.43293285369873,
|
2108 |
+
"rewards/rejected": -8.896896362304688,
|
2109 |
+
"step": 1400
|
2110 |
+
},
|
2111 |
+
{
|
2112 |
+
"epoch": 0.8351843625055531,
|
2113 |
+
"grad_norm": 289.8483422636645,
|
2114 |
+
"learning_rate": 4.0196353549026786e-08,
|
2115 |
+
"logits/chosen": -2.4569976329803467,
|
2116 |
+
"logits/rejected": -2.421668529510498,
|
2117 |
+
"logps/chosen": -413.7970275878906,
|
2118 |
+
"logps/rejected": -260.17559814453125,
|
2119 |
+
"loss": 0.0994,
|
2120 |
+
"rewards/accuracies": 0.956250011920929,
|
2121 |
+
"rewards/chosen": -0.029109030961990356,
|
2122 |
+
"rewards/margins": 8.420695304870605,
|
2123 |
+
"rewards/rejected": -8.449804306030273,
|
2124 |
+
"step": 1410
|
2125 |
+
},
|
2126 |
+
{
|
2127 |
+
"epoch": 0.8411076558566563,
|
2128 |
+
"grad_norm": 268.194865051895,
|
2129 |
+
"learning_rate": 3.742970637726181e-08,
|
2130 |
+
"logits/chosen": -2.401829242706299,
|
2131 |
+
"logits/rejected": -2.3853981494903564,
|
2132 |
+
"logps/chosen": -336.02154541015625,
|
2133 |
+
"logps/rejected": -260.9532775878906,
|
2134 |
+
"loss": 0.1105,
|
2135 |
+
"rewards/accuracies": 0.949999988079071,
|
2136 |
+
"rewards/chosen": -0.7859910726547241,
|
2137 |
+
"rewards/margins": 7.957832336425781,
|
2138 |
+
"rewards/rejected": -8.743824005126953,
|
2139 |
+
"step": 1420
|
2140 |
+
},
|
2141 |
+
{
|
2142 |
+
"epoch": 0.8470309492077596,
|
2143 |
+
"grad_norm": 108.72556342354649,
|
2144 |
+
"learning_rate": 3.4753981688937284e-08,
|
2145 |
+
"logits/chosen": -2.4282424449920654,
|
2146 |
+
"logits/rejected": -2.4077298641204834,
|
2147 |
+
"logps/chosen": -372.74859619140625,
|
2148 |
+
"logps/rejected": -255.5635986328125,
|
2149 |
+
"loss": 0.1373,
|
2150 |
+
"rewards/accuracies": 0.9624999761581421,
|
2151 |
+
"rewards/chosen": -0.21870934963226318,
|
2152 |
+
"rewards/margins": 8.647294998168945,
|
2153 |
+
"rewards/rejected": -8.866004943847656,
|
2154 |
+
"step": 1430
|
2155 |
+
},
|
2156 |
+
{
|
2157 |
+
"epoch": 0.8529542425588628,
|
2158 |
+
"grad_norm": 91.89889805093581,
|
2159 |
+
"learning_rate": 3.217032396915265e-08,
|
2160 |
+
"logits/chosen": -2.392305850982666,
|
2161 |
+
"logits/rejected": -2.3840205669403076,
|
2162 |
+
"logps/chosen": -384.5450439453125,
|
2163 |
+
"logps/rejected": -271.7799987792969,
|
2164 |
+
"loss": 0.1026,
|
2165 |
+
"rewards/accuracies": 0.956250011920929,
|
2166 |
+
"rewards/chosen": 0.14383617043495178,
|
2167 |
+
"rewards/margins": 8.799039840698242,
|
2168 |
+
"rewards/rejected": -8.655204772949219,
|
2169 |
+
"step": 1440
|
2170 |
+
},
|
2171 |
+
{
|
2172 |
+
"epoch": 0.858877535909966,
|
2173 |
+
"grad_norm": 110.87871865085356,
|
2174 |
+
"learning_rate": 2.9679838323293404e-08,
|
2175 |
+
"logits/chosen": -2.4049434661865234,
|
2176 |
+
"logits/rejected": -2.3713347911834717,
|
2177 |
+
"logps/chosen": -388.73480224609375,
|
2178 |
+
"logps/rejected": -259.99884033203125,
|
2179 |
+
"loss": 0.1238,
|
2180 |
+
"rewards/accuracies": 0.96875,
|
2181 |
+
"rewards/chosen": -0.4035407602787018,
|
2182 |
+
"rewards/margins": 8.943741798400879,
|
2183 |
+
"rewards/rejected": -9.347283363342285,
|
2184 |
+
"step": 1450
|
2185 |
+
},
|
2186 |
+
{
|
2187 |
+
"epoch": 0.8648008292610692,
|
2188 |
+
"grad_norm": 173.62149862701466,
|
2189 |
+
"learning_rate": 2.728359000434488e-08,
|
2190 |
+
"logits/chosen": -2.460850715637207,
|
2191 |
+
"logits/rejected": -2.4532432556152344,
|
2192 |
+
"logps/chosen": -408.2056884765625,
|
2193 |
+
"logps/rejected": -253.97256469726562,
|
2194 |
+
"loss": 0.1465,
|
2195 |
+
"rewards/accuracies": 0.925000011920929,
|
2196 |
+
"rewards/chosen": -0.10670559108257294,
|
2197 |
+
"rewards/margins": 8.27380084991455,
|
2198 |
+
"rewards/rejected": -8.38050651550293,
|
2199 |
+
"step": 1460
|
2200 |
+
},
|
2201 |
+
{
|
2202 |
+
"epoch": 0.8707241226121724,
|
2203 |
+
"grad_norm": 71.42558720517053,
|
2204 |
+
"learning_rate": 2.498260395725302e-08,
|
2205 |
+
"logits/chosen": -2.43896484375,
|
2206 |
+
"logits/rejected": -2.4272048473358154,
|
2207 |
+
"logps/chosen": -360.89898681640625,
|
2208 |
+
"logps/rejected": -257.8056945800781,
|
2209 |
+
"loss": 0.1013,
|
2210 |
+
"rewards/accuracies": 0.9624999761581421,
|
2211 |
+
"rewards/chosen": -0.2727930545806885,
|
2212 |
+
"rewards/margins": 8.105804443359375,
|
2213 |
+
"rewards/rejected": -8.378597259521484,
|
2214 |
+
"step": 1470
|
2215 |
+
},
|
2216 |
+
{
|
2217 |
+
"epoch": 0.8766474159632756,
|
2218 |
+
"grad_norm": 101.27358766803283,
|
2219 |
+
"learning_rate": 2.2777864380525426e-08,
|
2220 |
+
"logits/chosen": -2.4488790035247803,
|
2221 |
+
"logits/rejected": -2.4199492931365967,
|
2222 |
+
"logps/chosen": -368.7762145996094,
|
2223 |
+
"logps/rejected": -244.88949584960938,
|
2224 |
+
"loss": 0.1181,
|
2225 |
+
"rewards/accuracies": 0.949999988079071,
|
2226 |
+
"rewards/chosen": 0.07807255536317825,
|
2227 |
+
"rewards/margins": 8.640520095825195,
|
2228 |
+
"rewards/rejected": -8.56244945526123,
|
2229 |
+
"step": 1480
|
2230 |
+
},
|
2231 |
+
{
|
2232 |
+
"epoch": 0.8825707093143788,
|
2233 |
+
"grad_norm": 248.7079758081644,
|
2234 |
+
"learning_rate": 2.0670314305261423e-08,
|
2235 |
+
"logits/chosen": -2.443664073944092,
|
2236 |
+
"logits/rejected": -2.4383292198181152,
|
2237 |
+
"logps/chosen": -358.8103332519531,
|
2238 |
+
"logps/rejected": -255.68338012695312,
|
2239 |
+
"loss": 0.1486,
|
2240 |
+
"rewards/accuracies": 0.918749988079071,
|
2241 |
+
"rewards/chosen": -0.19392237067222595,
|
2242 |
+
"rewards/margins": 8.529777526855469,
|
2243 |
+
"rewards/rejected": -8.723699569702148,
|
2244 |
+
"step": 1490
|
2245 |
+
},
|
2246 |
+
{
|
2247 |
+
"epoch": 0.8884940026654821,
|
2248 |
+
"grad_norm": 228.603556403638,
|
2249 |
+
"learning_rate": 1.866085519178995e-08,
|
2250 |
+
"logits/chosen": -2.430896282196045,
|
2251 |
+
"logits/rejected": -2.39713716506958,
|
2252 |
+
"logps/chosen": -370.627685546875,
|
2253 |
+
"logps/rejected": -263.00537109375,
|
2254 |
+
"loss": 0.147,
|
2255 |
+
"rewards/accuracies": 0.949999988079071,
|
2256 |
+
"rewards/chosen": -0.3709534704685211,
|
2257 |
+
"rewards/margins": 8.486063003540039,
|
2258 |
+
"rewards/rejected": -8.85701847076416,
|
2259 |
+
"step": 1500
|
2260 |
+
},
|
2261 |
+
{
|
2262 |
+
"epoch": 0.8944172960165853,
|
2263 |
+
"grad_norm": 98.42860918890575,
|
2264 |
+
"learning_rate": 1.675034654408894e-08,
|
2265 |
+
"logits/chosen": -2.434990167617798,
|
2266 |
+
"logits/rejected": -2.408191204071045,
|
2267 |
+
"logps/chosen": -367.3474426269531,
|
2268 |
+
"logps/rejected": -252.94888305664062,
|
2269 |
+
"loss": 0.0803,
|
2270 |
+
"rewards/accuracies": 0.9937499761581421,
|
2271 |
+
"rewards/chosen": -0.12121151387691498,
|
2272 |
+
"rewards/margins": 8.94743537902832,
|
2273 |
+
"rewards/rejected": -9.068646430969238,
|
2274 |
+
"step": 1510
|
2275 |
+
},
|
2276 |
+
{
|
2277 |
+
"epoch": 0.9003405893676885,
|
2278 |
+
"grad_norm": 139.6690107400583,
|
2279 |
+
"learning_rate": 1.4939605542150595e-08,
|
2280 |
+
"logits/chosen": -2.399888277053833,
|
2281 |
+
"logits/rejected": -2.36537504196167,
|
2282 |
+
"logps/chosen": -379.11102294921875,
|
2283 |
+
"logps/rejected": -256.906005859375,
|
2284 |
+
"loss": 0.0731,
|
2285 |
+
"rewards/accuracies": 0.9750000238418579,
|
2286 |
+
"rewards/chosen": -0.062218405306339264,
|
2287 |
+
"rewards/margins": 8.892073631286621,
|
2288 |
+
"rewards/rejected": -8.954293251037598,
|
2289 |
+
"step": 1520
|
2290 |
+
},
|
2291 |
+
{
|
2292 |
+
"epoch": 0.9062638827187917,
|
2293 |
+
"grad_norm": 67.8318737973644,
|
2294 |
+
"learning_rate": 1.3229406692449791e-08,
|
2295 |
+
"logits/chosen": -2.479100227355957,
|
2296 |
+
"logits/rejected": -2.4435877799987793,
|
2297 |
+
"logps/chosen": -313.84320068359375,
|
2298 |
+
"logps/rejected": -239.1044921875,
|
2299 |
+
"loss": 0.1053,
|
2300 |
+
"rewards/accuracies": 0.9624999761581421,
|
2301 |
+
"rewards/chosen": -0.45267003774642944,
|
2302 |
+
"rewards/margins": 7.700293064117432,
|
2303 |
+
"rewards/rejected": -8.152963638305664,
|
2304 |
+
"step": 1530
|
2305 |
+
},
|
2306 |
+
{
|
2307 |
+
"epoch": 0.9121871760698949,
|
2308 |
+
"grad_norm": 133.6230788419226,
|
2309 |
+
"learning_rate": 1.162048149666503e-08,
|
2310 |
+
"logits/chosen": -2.3948609828948975,
|
2311 |
+
"logits/rejected": -2.3574843406677246,
|
2312 |
+
"logps/chosen": -375.52130126953125,
|
2313 |
+
"logps/rejected": -248.8327178955078,
|
2314 |
+
"loss": 0.1221,
|
2315 |
+
"rewards/accuracies": 0.9375,
|
2316 |
+
"rewards/chosen": -0.5586664080619812,
|
2317 |
+
"rewards/margins": 8.581660270690918,
|
2318 |
+
"rewards/rejected": -9.140327453613281,
|
2319 |
+
"step": 1540
|
2320 |
+
},
|
2321 |
+
{
|
2322 |
+
"epoch": 0.918110469420998,
|
2323 |
+
"grad_norm": 81.24757118228953,
|
2324 |
+
"learning_rate": 1.0113518138794047e-08,
|
2325 |
+
"logits/chosen": -2.424410581588745,
|
2326 |
+
"logits/rejected": -2.394204616546631,
|
2327 |
+
"logps/chosen": -375.75787353515625,
|
2328 |
+
"logps/rejected": -253.07760620117188,
|
2329 |
+
"loss": 0.2091,
|
2330 |
+
"rewards/accuracies": 0.918749988079071,
|
2331 |
+
"rewards/chosen": -0.5351217985153198,
|
2332 |
+
"rewards/margins": 7.9864182472229,
|
2333 |
+
"rewards/rejected": -8.521539688110352,
|
2334 |
+
"step": 1550
|
2335 |
+
},
|
2336 |
+
{
|
2337 |
+
"epoch": 0.9240337627721013,
|
2338 |
+
"grad_norm": 335.2650242487096,
|
2339 |
+
"learning_rate": 8.709161190797565e-09,
|
2340 |
+
"logits/chosen": -2.427302122116089,
|
2341 |
+
"logits/rejected": -2.419010639190674,
|
2342 |
+
"logps/chosen": -371.4006652832031,
|
2343 |
+
"logps/rejected": -262.81365966796875,
|
2344 |
+
"loss": 0.1524,
|
2345 |
+
"rewards/accuracies": 0.956250011920929,
|
2346 |
+
"rewards/chosen": -0.2929847836494446,
|
2347 |
+
"rewards/margins": 8.156495094299316,
|
2348 |
+
"rewards/rejected": -8.449480056762695,
|
2349 |
+
"step": 1560
|
2350 |
+
},
|
2351 |
+
{
|
2352 |
+
"epoch": 0.9299570561232045,
|
2353 |
+
"grad_norm": 196.1060901854074,
|
2354 |
+
"learning_rate": 7.408011336897141e-09,
|
2355 |
+
"logits/chosen": -2.4616658687591553,
|
2356 |
+
"logits/rejected": -2.4268431663513184,
|
2357 |
+
"logps/chosen": -371.90484619140625,
|
2358 |
+
"logps/rejected": -251.7766876220703,
|
2359 |
+
"loss": 0.1275,
|
2360 |
+
"rewards/accuracies": 0.949999988079071,
|
2361 |
+
"rewards/chosen": -0.5105483531951904,
|
2362 |
+
"rewards/margins": 8.057133674621582,
|
2363 |
+
"rewards/rejected": -8.567682266235352,
|
2364 |
+
"step": 1570
|
2365 |
+
},
|
2366 |
+
{
|
2367 |
+
"epoch": 0.9358803494743078,
|
2368 |
+
"grad_norm": 58.4219582518985,
|
2369 |
+
"learning_rate": 6.210625116645135e-09,
|
2370 |
+
"logits/chosen": -2.403099775314331,
|
2371 |
+
"logits/rejected": -2.384831666946411,
|
2372 |
+
"logps/chosen": -417.583251953125,
|
2373 |
+
"logps/rejected": -272.24603271484375,
|
2374 |
+
"loss": 0.1594,
|
2375 |
+
"rewards/accuracies": 0.918749988079071,
|
2376 |
+
"rewards/chosen": -0.04987464100122452,
|
2377 |
+
"rewards/margins": 8.769102096557617,
|
2378 |
+
"rewards/rejected": -8.818976402282715,
|
2379 |
+
"step": 1580
|
2380 |
+
},
|
2381 |
+
{
|
2382 |
+
"epoch": 0.941803642825411,
|
2383 |
+
"grad_norm": 144.546751800466,
|
2384 |
+
"learning_rate": 5.117514686876378e-09,
|
2385 |
+
"logits/chosen": -2.456322193145752,
|
2386 |
+
"logits/rejected": -2.416351795196533,
|
2387 |
+
"logps/chosen": -351.6046447753906,
|
2388 |
+
"logps/rejected": -273.55303955078125,
|
2389 |
+
"loss": 0.1241,
|
2390 |
+
"rewards/accuracies": 0.9624999761581421,
|
2391 |
+
"rewards/chosen": -0.4101320207118988,
|
2392 |
+
"rewards/margins": 9.183103561401367,
|
2393 |
+
"rewards/rejected": -9.593236923217773,
|
2394 |
+
"step": 1590
|
2395 |
+
},
|
2396 |
+
{
|
2397 |
+
"epoch": 0.9477269361765142,
|
2398 |
+
"grad_norm": 120.48952563193308,
|
2399 |
+
"learning_rate": 4.1291476026441565e-09,
|
2400 |
+
"logits/chosen": -2.456437110900879,
|
2401 |
+
"logits/rejected": -2.4311928749084473,
|
2402 |
+
"logps/chosen": -365.16168212890625,
|
2403 |
+
"logps/rejected": -253.9194793701172,
|
2404 |
+
"loss": 0.1579,
|
2405 |
+
"rewards/accuracies": 0.9437500238418579,
|
2406 |
+
"rewards/chosen": -0.06400365382432938,
|
2407 |
+
"rewards/margins": 8.453577041625977,
|
2408 |
+
"rewards/rejected": -8.51758098602295,
|
2409 |
+
"step": 1600
|
2410 |
+
},
|
2411 |
+
{
|
2412 |
+
"epoch": 0.9536502295276174,
|
2413 |
+
"grad_norm": 106.80281991069545,
|
2414 |
+
"learning_rate": 3.2459466172331253e-09,
|
2415 |
+
"logits/chosen": -2.3881659507751465,
|
2416 |
+
"logits/rejected": -2.397707939147949,
|
2417 |
+
"logps/chosen": -372.622314453125,
|
2418 |
+
"logps/rejected": -245.3422393798828,
|
2419 |
+
"loss": 0.1586,
|
2420 |
+
"rewards/accuracies": 0.956250011920929,
|
2421 |
+
"rewards/chosen": -0.32000666856765747,
|
2422 |
+
"rewards/margins": 7.897205352783203,
|
2423 |
+
"rewards/rejected": -8.217211723327637,
|
2424 |
+
"step": 1610
|
2425 |
+
},
|
2426 |
+
{
|
2427 |
+
"epoch": 0.9595735228787206,
|
2428 |
+
"grad_norm": 193.24252371332162,
|
2429 |
+
"learning_rate": 2.4682895013354854e-09,
|
2430 |
+
"logits/chosen": -2.4109058380126953,
|
2431 |
+
"logits/rejected": -2.413801670074463,
|
2432 |
+
"logps/chosen": -360.99072265625,
|
2433 |
+
"logps/rejected": -256.06756591796875,
|
2434 |
+
"loss": 0.091,
|
2435 |
+
"rewards/accuracies": 0.9624999761581421,
|
2436 |
+
"rewards/chosen": -0.6014559268951416,
|
2437 |
+
"rewards/margins": 8.314714431762695,
|
2438 |
+
"rewards/rejected": -8.916170120239258,
|
2439 |
+
"step": 1620
|
2440 |
+
},
|
2441 |
+
{
|
2442 |
+
"epoch": 0.9654968162298238,
|
2443 |
+
"grad_norm": 182.40318024580077,
|
2444 |
+
"learning_rate": 1.7965088814675677e-09,
|
2445 |
+
"logits/chosen": -2.384791851043701,
|
2446 |
+
"logits/rejected": -2.382201671600342,
|
2447 |
+
"logps/chosen": -375.5494689941406,
|
2448 |
+
"logps/rejected": -269.6348876953125,
|
2449 |
+
"loss": 0.1368,
|
2450 |
+
"rewards/accuracies": 0.9125000238418579,
|
2451 |
+
"rewards/chosen": -0.9617233276367188,
|
2452 |
+
"rewards/margins": 8.054598808288574,
|
2453 |
+
"rewards/rejected": -9.016322135925293,
|
2454 |
+
"step": 1630
|
2455 |
+
},
|
2456 |
+
{
|
2457 |
+
"epoch": 0.971420109580927,
|
2458 |
+
"grad_norm": 142.9169068872898,
|
2459 |
+
"learning_rate": 1.2308920976958348e-09,
|
2460 |
+
"logits/chosen": -2.448244094848633,
|
2461 |
+
"logits/rejected": -2.4210681915283203,
|
2462 |
+
"logps/chosen": -364.89703369140625,
|
2463 |
+
"logps/rejected": -260.6488342285156,
|
2464 |
+
"loss": 0.1134,
|
2465 |
+
"rewards/accuracies": 0.949999988079071,
|
2466 |
+
"rewards/chosen": -0.6372100114822388,
|
2467 |
+
"rewards/margins": 8.421440124511719,
|
2468 |
+
"rewards/rejected": -9.058650016784668,
|
2469 |
+
"step": 1640
|
2470 |
+
},
|
2471 |
+
{
|
2472 |
+
"epoch": 0.9773434029320303,
|
2473 |
+
"grad_norm": 72.39563192418329,
|
2474 |
+
"learning_rate": 7.716810807330276e-10,
|
2475 |
+
"logits/chosen": -2.4073166847229004,
|
2476 |
+
"logits/rejected": -2.365865707397461,
|
2477 |
+
"logps/chosen": -390.2394104003906,
|
2478 |
+
"logps/rejected": -262.54150390625,
|
2479 |
+
"loss": 0.0787,
|
2480 |
+
"rewards/accuracies": 0.96875,
|
2481 |
+
"rewards/chosen": -0.4426153302192688,
|
2482 |
+
"rewards/margins": 8.694158554077148,
|
2483 |
+
"rewards/rejected": -9.136773109436035,
|
2484 |
+
"step": 1650
|
2485 |
+
},
|
2486 |
+
{
|
2487 |
+
"epoch": 0.9832666962831335,
|
2488 |
+
"grad_norm": 109.48295555626903,
|
2489 |
+
"learning_rate": 4.190722484575804e-10,
|
2490 |
+
"logits/chosen": -2.4156811237335205,
|
2491 |
+
"logits/rejected": -2.381577968597412,
|
2492 |
+
"logps/chosen": -399.725341796875,
|
2493 |
+
"logps/rejected": -263.4638366699219,
|
2494 |
+
"loss": 0.1397,
|
2495 |
+
"rewards/accuracies": 0.956250011920929,
|
2496 |
+
"rewards/chosen": -0.32644200325012207,
|
2497 |
+
"rewards/margins": 8.409704208374023,
|
2498 |
+
"rewards/rejected": -8.736146926879883,
|
2499 |
+
"step": 1660
|
2500 |
+
},
|
2501 |
+
{
|
2502 |
+
"epoch": 0.9891899896342367,
|
2503 |
+
"grad_norm": 137.41353285844747,
|
2504 |
+
"learning_rate": 1.732164218998522e-10,
|
2505 |
+
"logits/chosen": -2.3911380767822266,
|
2506 |
+
"logits/rejected": -2.386594533920288,
|
2507 |
+
"logps/chosen": -363.5224609375,
|
2508 |
+
"logps/rejected": -253.3739776611328,
|
2509 |
+
"loss": 0.125,
|
2510 |
+
"rewards/accuracies": 0.949999988079071,
|
2511 |
+
"rewards/chosen": -0.11769469082355499,
|
2512 |
+
"rewards/margins": 8.378522872924805,
|
2513 |
+
"rewards/rejected": -8.496217727661133,
|
2514 |
+
"step": 1670
|
2515 |
+
},
|
2516 |
+
{
|
2517 |
+
"epoch": 0.9951132829853399,
|
2518 |
+
"grad_norm": 198.01011960831866,
|
2519 |
+
"learning_rate": 3.4218760731730136e-11,
|
2520 |
+
"logits/chosen": -2.465393543243408,
|
2521 |
+
"logits/rejected": -2.44226336479187,
|
2522 |
+
"logps/chosen": -390.7846984863281,
|
2523 |
+
"logps/rejected": -258.70721435546875,
|
2524 |
+
"loss": 0.1317,
|
2525 |
+
"rewards/accuracies": 0.9375,
|
2526 |
+
"rewards/chosen": -0.5431281924247742,
|
2527 |
+
"rewards/margins": 8.081016540527344,
|
2528 |
+
"rewards/rejected": -8.624144554138184,
|
2529 |
+
"step": 1680
|
2530 |
+
},
|
2531 |
+
{
|
2532 |
+
"epoch": 0.9998519176662224,
|
2533 |
+
"step": 1688,
|
2534 |
+
"total_flos": 0.0,
|
2535 |
+
"train_loss": 0.17761736296081995,
|
2536 |
+
"train_runtime": 39274.1948,
|
2537 |
+
"train_samples_per_second": 1.375,
|
2538 |
+
"train_steps_per_second": 0.043
|
2539 |
+
}
|
2540 |
+
],
|
2541 |
+
"logging_steps": 10,
|
2542 |
+
"max_steps": 1688,
|
2543 |
+
"num_input_tokens_seen": 0,
|
2544 |
+
"num_train_epochs": 1,
|
2545 |
+
"save_steps": 500,
|
2546 |
+
"stateful_callbacks": {
|
2547 |
+
"TrainerControl": {
|
2548 |
+
"args": {
|
2549 |
+
"should_epoch_stop": false,
|
2550 |
+
"should_evaluate": false,
|
2551 |
+
"should_log": false,
|
2552 |
+
"should_save": true,
|
2553 |
+
"should_training_stop": true
|
2554 |
+
},
|
2555 |
+
"attributes": {}
|
2556 |
+
}
|
2557 |
+
},
|
2558 |
+
"total_flos": 0.0,
|
2559 |
+
"train_batch_size": 4,
|
2560 |
+
"trial_name": null,
|
2561 |
+
"trial_params": null
|
2562 |
+
}
|