weqweasdas commited on
Commit
15d5a92
1 Parent(s): dffcfe1

Model save

Browse files
README.md ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model: alignment-handbook/zephyr-7b-sft-full
4
+ tags:
5
+ - trl
6
+ - dpo
7
+ - generated_from_trainer
8
+ model-index:
9
+ - name: zephyr-7b-dpo-full
10
+ results: []
11
+ ---
12
+
13
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
+ should probably proofread and complete it, then remove this comment. -->
15
+
16
+ # zephyr-7b-dpo-full
17
+
18
+ This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-full](https://huggingface.co/alignment-handbook/zephyr-7b-sft-full) on the None dataset.
19
+ It achieves the following results on the evaluation set:
20
+ - Loss: 0.5042
21
+ - Rewards/chosen: -1.1309
22
+ - Rewards/rejected: -1.9677
23
+ - Rewards/accuracies: 0.7550
24
+ - Rewards/margins: 0.8368
25
+ - Logps/rejected: -464.9229
26
+ - Logps/chosen: -402.9644
27
+ - Logits/rejected: -0.9395
28
+ - Logits/chosen: -1.4685
29
+
30
+ ## Model description
31
+
32
+ More information needed
33
+
34
+ ## Intended uses & limitations
35
+
36
+ More information needed
37
+
38
+ ## Training and evaluation data
39
+
40
+ More information needed
41
+
42
+ ## Training procedure
43
+
44
+ ### Training hyperparameters
45
+
46
+ The following hyperparameters were used during training:
47
+ - learning_rate: 5e-07
48
+ - train_batch_size: 2
49
+ - eval_batch_size: 2
50
+ - seed: 42
51
+ - distributed_type: multi-GPU
52
+ - num_devices: 10
53
+ - gradient_accumulation_steps: 8
54
+ - total_train_batch_size: 160
55
+ - total_eval_batch_size: 20
56
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
57
+ - lr_scheduler_type: cosine
58
+ - lr_scheduler_warmup_ratio: 0.1
59
+ - num_epochs: 1
60
+
61
+ ### Training results
62
+
63
+ | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
64
+ |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
65
+ | 0.5474 | 0.26 | 100 | 0.5625 | -0.7544 | -1.1673 | 0.6850 | 0.4129 | -384.8893 | -365.3133 | -2.2414 | -2.3089 |
66
+ | 0.5291 | 0.52 | 200 | 0.5176 | -1.2026 | -1.9705 | 0.75 | 0.7679 | -465.2114 | -410.1355 | -1.1595 | -1.5009 |
67
+ | 0.4891 | 0.79 | 300 | 0.5042 | -1.1309 | -1.9677 | 0.7550 | 0.8368 | -464.9229 | -402.9644 | -0.9395 | -1.4685 |
68
+
69
+
70
+ ### Framework versions
71
+
72
+ - Transformers 4.39.0.dev0
73
+ - Pytorch 2.1.2
74
+ - Datasets 2.14.6
75
+ - Tokenizers 0.15.2
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "train_loss": 0.5410362510156881,
4
+ "train_runtime": 45025.8645,
5
+ "train_samples": 61134,
6
+ "train_samples_per_second": 1.358,
7
+ "train_steps_per_second": 0.008
8
+ }
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "transformers_version": "4.39.0.dev0"
6
+ }
model-00001-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35f5ea80d91818a0be671826bc85d784733485872dd5276556683bac2831a20f
3
+ size 4943162336
model-00002-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:603985a400b07943db0c3c4c47b8afb5314d9648c23a1083c7f064fd5205963d
3
+ size 4999819336
model-00003-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a31b14bd81e7160ee0da251e70ebbc7c8cc6d82e5e461a6e3a41eb506b16d7ee
3
+ size 4540516344
model.safetensors.index.json ADDED
@@ -0,0 +1,298 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 14483464192
4
+ },
5
+ "weight_map": {
6
+ "lm_head.weight": "model-00003-of-00003.safetensors",
7
+ "model.embed_tokens.weight": "model-00001-of-00003.safetensors",
8
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00003.safetensors",
9
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
10
+ "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
11
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
12
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
13
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
14
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
15
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
16
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
17
+ "model.layers.1.input_layernorm.weight": "model-00001-of-00003.safetensors",
18
+ "model.layers.1.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
19
+ "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
20
+ "model.layers.1.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
21
+ "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
22
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
23
+ "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
24
+ "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
25
+ "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
26
+ "model.layers.10.input_layernorm.weight": "model-00002-of-00003.safetensors",
27
+ "model.layers.10.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
28
+ "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
29
+ "model.layers.10.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
30
+ "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
31
+ "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
32
+ "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
33
+ "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
34
+ "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
35
+ "model.layers.11.input_layernorm.weight": "model-00002-of-00003.safetensors",
36
+ "model.layers.11.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
37
+ "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
38
+ "model.layers.11.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
39
+ "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
40
+ "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
41
+ "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
42
+ "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
43
+ "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
44
+ "model.layers.12.input_layernorm.weight": "model-00002-of-00003.safetensors",
45
+ "model.layers.12.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
46
+ "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
47
+ "model.layers.12.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
48
+ "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
49
+ "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
50
+ "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
51
+ "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
52
+ "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
53
+ "model.layers.13.input_layernorm.weight": "model-00002-of-00003.safetensors",
54
+ "model.layers.13.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
55
+ "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
56
+ "model.layers.13.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
57
+ "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
58
+ "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
59
+ "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
60
+ "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
61
+ "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
62
+ "model.layers.14.input_layernorm.weight": "model-00002-of-00003.safetensors",
63
+ "model.layers.14.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
64
+ "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
65
+ "model.layers.14.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
66
+ "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
67
+ "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
68
+ "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
69
+ "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
70
+ "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
71
+ "model.layers.15.input_layernorm.weight": "model-00002-of-00003.safetensors",
72
+ "model.layers.15.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
73
+ "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
74
+ "model.layers.15.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
75
+ "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
76
+ "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
77
+ "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
78
+ "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
79
+ "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
80
+ "model.layers.16.input_layernorm.weight": "model-00002-of-00003.safetensors",
81
+ "model.layers.16.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
82
+ "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
83
+ "model.layers.16.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
84
+ "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
85
+ "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
86
+ "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
87
+ "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
88
+ "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
89
+ "model.layers.17.input_layernorm.weight": "model-00002-of-00003.safetensors",
90
+ "model.layers.17.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
91
+ "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
92
+ "model.layers.17.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
93
+ "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
94
+ "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
95
+ "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
96
+ "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
97
+ "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
98
+ "model.layers.18.input_layernorm.weight": "model-00002-of-00003.safetensors",
99
+ "model.layers.18.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
100
+ "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
101
+ "model.layers.18.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
102
+ "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
103
+ "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
104
+ "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
105
+ "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
106
+ "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
107
+ "model.layers.19.input_layernorm.weight": "model-00002-of-00003.safetensors",
108
+ "model.layers.19.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
109
+ "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
110
+ "model.layers.19.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
111
+ "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
112
+ "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
113
+ "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
114
+ "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
115
+ "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
116
+ "model.layers.2.input_layernorm.weight": "model-00001-of-00003.safetensors",
117
+ "model.layers.2.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
118
+ "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
119
+ "model.layers.2.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
120
+ "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
121
+ "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
122
+ "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
123
+ "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
124
+ "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
125
+ "model.layers.20.input_layernorm.weight": "model-00002-of-00003.safetensors",
126
+ "model.layers.20.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
127
+ "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
128
+ "model.layers.20.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
129
+ "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
130
+ "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
131
+ "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
132
+ "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
133
+ "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
134
+ "model.layers.21.input_layernorm.weight": "model-00002-of-00003.safetensors",
135
+ "model.layers.21.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
136
+ "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
137
+ "model.layers.21.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
138
+ "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
139
+ "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
140
+ "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
141
+ "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
142
+ "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
143
+ "model.layers.22.input_layernorm.weight": "model-00003-of-00003.safetensors",
144
+ "model.layers.22.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
145
+ "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
146
+ "model.layers.22.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
147
+ "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
148
+ "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
149
+ "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
150
+ "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
151
+ "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
152
+ "model.layers.23.input_layernorm.weight": "model-00003-of-00003.safetensors",
153
+ "model.layers.23.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
154
+ "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
155
+ "model.layers.23.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
156
+ "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
157
+ "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
158
+ "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
159
+ "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
160
+ "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
161
+ "model.layers.24.input_layernorm.weight": "model-00003-of-00003.safetensors",
162
+ "model.layers.24.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
163
+ "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
164
+ "model.layers.24.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
165
+ "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
166
+ "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
167
+ "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
168
+ "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
169
+ "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
170
+ "model.layers.25.input_layernorm.weight": "model-00003-of-00003.safetensors",
171
+ "model.layers.25.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
172
+ "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
173
+ "model.layers.25.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
174
+ "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
175
+ "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
176
+ "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
177
+ "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
178
+ "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
179
+ "model.layers.26.input_layernorm.weight": "model-00003-of-00003.safetensors",
180
+ "model.layers.26.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
181
+ "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
182
+ "model.layers.26.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
183
+ "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
184
+ "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
185
+ "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
186
+ "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
187
+ "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
188
+ "model.layers.27.input_layernorm.weight": "model-00003-of-00003.safetensors",
189
+ "model.layers.27.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
190
+ "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
191
+ "model.layers.27.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
192
+ "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
193
+ "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
194
+ "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
195
+ "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
196
+ "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
197
+ "model.layers.28.input_layernorm.weight": "model-00003-of-00003.safetensors",
198
+ "model.layers.28.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
199
+ "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
200
+ "model.layers.28.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
201
+ "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
202
+ "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
203
+ "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
204
+ "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
205
+ "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
206
+ "model.layers.29.input_layernorm.weight": "model-00003-of-00003.safetensors",
207
+ "model.layers.29.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
208
+ "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
209
+ "model.layers.29.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
210
+ "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
211
+ "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
212
+ "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
213
+ "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
214
+ "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
215
+ "model.layers.3.input_layernorm.weight": "model-00001-of-00003.safetensors",
216
+ "model.layers.3.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
217
+ "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
218
+ "model.layers.3.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
219
+ "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
220
+ "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
221
+ "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
222
+ "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
223
+ "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
224
+ "model.layers.30.input_layernorm.weight": "model-00003-of-00003.safetensors",
225
+ "model.layers.30.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
226
+ "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
227
+ "model.layers.30.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
228
+ "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
229
+ "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
230
+ "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
231
+ "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
232
+ "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
233
+ "model.layers.31.input_layernorm.weight": "model-00003-of-00003.safetensors",
234
+ "model.layers.31.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
235
+ "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
236
+ "model.layers.31.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
237
+ "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
238
+ "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
239
+ "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
240
+ "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
241
+ "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
242
+ "model.layers.4.input_layernorm.weight": "model-00001-of-00003.safetensors",
243
+ "model.layers.4.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
244
+ "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
245
+ "model.layers.4.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
246
+ "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
247
+ "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
248
+ "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
249
+ "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
250
+ "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
251
+ "model.layers.5.input_layernorm.weight": "model-00001-of-00003.safetensors",
252
+ "model.layers.5.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
253
+ "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
254
+ "model.layers.5.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
255
+ "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
256
+ "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
257
+ "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
258
+ "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
259
+ "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
260
+ "model.layers.6.input_layernorm.weight": "model-00001-of-00003.safetensors",
261
+ "model.layers.6.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
262
+ "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
263
+ "model.layers.6.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
264
+ "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
265
+ "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
266
+ "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
267
+ "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
268
+ "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
269
+ "model.layers.7.input_layernorm.weight": "model-00001-of-00003.safetensors",
270
+ "model.layers.7.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
271
+ "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
272
+ "model.layers.7.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
273
+ "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
274
+ "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
275
+ "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
276
+ "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
277
+ "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
278
+ "model.layers.8.input_layernorm.weight": "model-00001-of-00003.safetensors",
279
+ "model.layers.8.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
280
+ "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
281
+ "model.layers.8.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
282
+ "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
283
+ "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
284
+ "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
285
+ "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
286
+ "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
287
+ "model.layers.9.input_layernorm.weight": "model-00001-of-00003.safetensors",
288
+ "model.layers.9.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
289
+ "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
290
+ "model.layers.9.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
291
+ "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
292
+ "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
293
+ "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
294
+ "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
295
+ "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
296
+ "model.norm.weight": "model-00003-of-00003.safetensors"
297
+ }
298
+ }
runs/May01_19-58-40_tongzhang4.math.ust.hk/events.out.tfevents.1714564785.tongzhang4.math.ust.hk.2321598.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa008eef01aeb090051f16be2190ea2378bdc92b949dd92c03f4e9ef64e3186e
3
- size 28128
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e44f427ba5f7712d19c8464df4d52292a5571851f76ff759accb08ed2affa7c0
3
+ size 33986
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "train_loss": 0.5410362510156881,
4
+ "train_runtime": 45025.8645,
5
+ "train_samples": 61134,
6
+ "train_samples_per_second": 1.358,
7
+ "train_steps_per_second": 0.008
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,663 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.9996728819103696,
5
+ "eval_steps": 100,
6
+ "global_step": 382,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.0,
13
+ "grad_norm": 8.037226671121555,
14
+ "learning_rate": 1.282051282051282e-08,
15
+ "logits/chosen": -2.9984583854675293,
16
+ "logits/rejected": -3.047058343887329,
17
+ "logps/chosen": -440.9766845703125,
18
+ "logps/rejected": -253.47076416015625,
19
+ "loss": 0.6931,
20
+ "rewards/accuracies": 0.0,
21
+ "rewards/chosen": 0.0,
22
+ "rewards/margins": 0.0,
23
+ "rewards/rejected": 0.0,
24
+ "step": 1
25
+ },
26
+ {
27
+ "epoch": 0.03,
28
+ "grad_norm": 9.369508939204481,
29
+ "learning_rate": 1.2820512820512818e-07,
30
+ "logits/chosen": -2.851626396179199,
31
+ "logits/rejected": -2.840857982635498,
32
+ "logps/chosen": -292.8236999511719,
33
+ "logps/rejected": -243.9152069091797,
34
+ "loss": 0.6931,
35
+ "rewards/accuracies": 0.4583333432674408,
36
+ "rewards/chosen": 0.0002360683138249442,
37
+ "rewards/margins": -7.22050535841845e-05,
38
+ "rewards/rejected": 0.00030827338923700154,
39
+ "step": 10
40
+ },
41
+ {
42
+ "epoch": 0.05,
43
+ "grad_norm": 7.770546674426242,
44
+ "learning_rate": 2.5641025641025636e-07,
45
+ "logits/chosen": -2.836233139038086,
46
+ "logits/rejected": -2.8243448734283447,
47
+ "logps/chosen": -269.3387145996094,
48
+ "logps/rejected": -226.77444458007812,
49
+ "loss": 0.6919,
50
+ "rewards/accuracies": 0.6499999761581421,
51
+ "rewards/chosen": 0.0006391379283741117,
52
+ "rewards/margins": 0.0022494590375572443,
53
+ "rewards/rejected": -0.0016103212255984545,
54
+ "step": 20
55
+ },
56
+ {
57
+ "epoch": 0.08,
58
+ "grad_norm": 7.555168855947224,
59
+ "learning_rate": 3.8461538461538463e-07,
60
+ "logits/chosen": -2.8013336658477783,
61
+ "logits/rejected": -2.8021349906921387,
62
+ "logps/chosen": -273.3194274902344,
63
+ "logps/rejected": -246.2598114013672,
64
+ "loss": 0.6851,
65
+ "rewards/accuracies": 0.637499988079071,
66
+ "rewards/chosen": 0.01441938616335392,
67
+ "rewards/margins": 0.014810544438660145,
68
+ "rewards/rejected": -0.0003911592939402908,
69
+ "step": 30
70
+ },
71
+ {
72
+ "epoch": 0.1,
73
+ "grad_norm": 8.28569270937212,
74
+ "learning_rate": 4.999895137916554e-07,
75
+ "logits/chosen": -2.814171552658081,
76
+ "logits/rejected": -2.832319974899292,
77
+ "logps/chosen": -276.9251403808594,
78
+ "logps/rejected": -268.66461181640625,
79
+ "loss": 0.6723,
80
+ "rewards/accuracies": 0.675000011920929,
81
+ "rewards/chosen": 0.04223880544304848,
82
+ "rewards/margins": 0.04603661596775055,
83
+ "rewards/rejected": -0.0037978135515004396,
84
+ "step": 40
85
+ },
86
+ {
87
+ "epoch": 0.13,
88
+ "grad_norm": 10.06207588110713,
89
+ "learning_rate": 4.987322328603843e-07,
90
+ "logits/chosen": -2.771031141281128,
91
+ "logits/rejected": -2.7740721702575684,
92
+ "logps/chosen": -280.4728088378906,
93
+ "logps/rejected": -270.3973083496094,
94
+ "loss": 0.6424,
95
+ "rewards/accuracies": 0.737500011920929,
96
+ "rewards/chosen": 0.010499795898795128,
97
+ "rewards/margins": 0.13346262276172638,
98
+ "rewards/rejected": -0.1229628473520279,
99
+ "step": 50
100
+ },
101
+ {
102
+ "epoch": 0.16,
103
+ "grad_norm": 11.452381967640083,
104
+ "learning_rate": 4.953897892477663e-07,
105
+ "logits/chosen": -2.7735002040863037,
106
+ "logits/rejected": -2.7375593185424805,
107
+ "logps/chosen": -275.349853515625,
108
+ "logps/rejected": -285.12603759765625,
109
+ "loss": 0.6166,
110
+ "rewards/accuracies": 0.7250000238418579,
111
+ "rewards/chosen": -0.11094622313976288,
112
+ "rewards/margins": 0.23263141512870789,
113
+ "rewards/rejected": -0.34357762336730957,
114
+ "step": 60
115
+ },
116
+ {
117
+ "epoch": 0.18,
118
+ "grad_norm": 12.969857186189097,
119
+ "learning_rate": 4.899902032011388e-07,
120
+ "logits/chosen": -2.6529109477996826,
121
+ "logits/rejected": -2.628537893295288,
122
+ "logps/chosen": -302.51910400390625,
123
+ "logps/rejected": -317.7935791015625,
124
+ "loss": 0.5948,
125
+ "rewards/accuracies": 0.7250000238418579,
126
+ "rewards/chosen": -0.10311013460159302,
127
+ "rewards/margins": 0.31482020020484924,
128
+ "rewards/rejected": -0.4179303050041199,
129
+ "step": 70
130
+ },
131
+ {
132
+ "epoch": 0.21,
133
+ "grad_norm": 19.524374589693302,
134
+ "learning_rate": 4.825787403267712e-07,
135
+ "logits/chosen": -2.5461225509643555,
136
+ "logits/rejected": -2.522418737411499,
137
+ "logps/chosen": -298.61798095703125,
138
+ "logps/rejected": -317.22503662109375,
139
+ "loss": 0.5781,
140
+ "rewards/accuracies": 0.706250011920929,
141
+ "rewards/chosen": -0.3027142286300659,
142
+ "rewards/margins": 0.3840213418006897,
143
+ "rewards/rejected": -0.6867355108261108,
144
+ "step": 80
145
+ },
146
+ {
147
+ "epoch": 0.24,
148
+ "grad_norm": 14.061479301034291,
149
+ "learning_rate": 4.732175321209399e-07,
150
+ "logits/chosen": -2.397352933883667,
151
+ "logits/rejected": -2.424654722213745,
152
+ "logps/chosen": -268.97369384765625,
153
+ "logps/rejected": -313.6884460449219,
154
+ "loss": 0.5836,
155
+ "rewards/accuracies": 0.7250000238418579,
156
+ "rewards/chosen": -0.3002493977546692,
157
+ "rewards/margins": 0.41001391410827637,
158
+ "rewards/rejected": -0.7102633118629456,
159
+ "step": 90
160
+ },
161
+ {
162
+ "epoch": 0.26,
163
+ "grad_norm": 17.956912133781355,
164
+ "learning_rate": 4.619850551115105e-07,
165
+ "logits/chosen": -2.1157875061035156,
166
+ "logits/rejected": -2.0495009422302246,
167
+ "logps/chosen": -304.60968017578125,
168
+ "logps/rejected": -369.248779296875,
169
+ "loss": 0.5474,
170
+ "rewards/accuracies": 0.768750011920929,
171
+ "rewards/chosen": -0.5611704587936401,
172
+ "rewards/margins": 0.6696933507919312,
173
+ "rewards/rejected": -1.2308636903762817,
174
+ "step": 100
175
+ },
176
+ {
177
+ "epoch": 0.26,
178
+ "eval_logits/chosen": -2.308899164199829,
179
+ "eval_logits/rejected": -2.2413949966430664,
180
+ "eval_logps/chosen": -365.31329345703125,
181
+ "eval_logps/rejected": -384.8892822265625,
182
+ "eval_loss": 0.5624693632125854,
183
+ "eval_rewards/accuracies": 0.6850000023841858,
184
+ "eval_rewards/chosen": -0.7543885707855225,
185
+ "eval_rewards/margins": 0.41293570399284363,
186
+ "eval_rewards/rejected": -1.167324423789978,
187
+ "eval_runtime": 693.8722,
188
+ "eval_samples_per_second": 2.882,
189
+ "eval_steps_per_second": 0.144,
190
+ "step": 100
191
+ },
192
+ {
193
+ "epoch": 0.29,
194
+ "grad_norm": 26.962688754701436,
195
+ "learning_rate": 4.4897547297646633e-07,
196
+ "logits/chosen": -1.9299335479736328,
197
+ "logits/rejected": -1.8247636556625366,
198
+ "logps/chosen": -396.6432189941406,
199
+ "logps/rejected": -426.43658447265625,
200
+ "loss": 0.5435,
201
+ "rewards/accuracies": 0.768750011920929,
202
+ "rewards/chosen": -0.9001098871231079,
203
+ "rewards/margins": 0.6095530390739441,
204
+ "rewards/rejected": -1.5096628665924072,
205
+ "step": 110
206
+ },
207
+ {
208
+ "epoch": 0.31,
209
+ "grad_norm": 24.58898873320785,
210
+ "learning_rate": 4.3429784715451177e-07,
211
+ "logits/chosen": -1.8942654132843018,
212
+ "logits/rejected": -1.5966886281967163,
213
+ "logps/chosen": -327.930419921875,
214
+ "logps/rejected": -381.9439697265625,
215
+ "loss": 0.5262,
216
+ "rewards/accuracies": 0.768750011920929,
217
+ "rewards/chosen": -0.6237712502479553,
218
+ "rewards/margins": 0.8189311027526855,
219
+ "rewards/rejected": -1.442702293395996,
220
+ "step": 120
221
+ },
222
+ {
223
+ "epoch": 0.34,
224
+ "grad_norm": 22.502297297962723,
225
+ "learning_rate": 4.180752225653292e-07,
226
+ "logits/chosen": -1.746715784072876,
227
+ "logits/rejected": -1.6937605142593384,
228
+ "logps/chosen": -372.397705078125,
229
+ "logps/rejected": -407.4283142089844,
230
+ "loss": 0.5297,
231
+ "rewards/accuracies": 0.675000011920929,
232
+ "rewards/chosen": -0.8563397526741028,
233
+ "rewards/margins": 0.5461622476577759,
234
+ "rewards/rejected": -1.4025019407272339,
235
+ "step": 130
236
+ },
237
+ {
238
+ "epoch": 0.37,
239
+ "grad_norm": 30.209865465712408,
240
+ "learning_rate": 4.00443596104045e-07,
241
+ "logits/chosen": -1.5750809907913208,
242
+ "logits/rejected": -1.1808230876922607,
243
+ "logps/chosen": -404.4322509765625,
244
+ "logps/rejected": -471.23883056640625,
245
+ "loss": 0.5301,
246
+ "rewards/accuracies": 0.7749999761581421,
247
+ "rewards/chosen": -1.0831854343414307,
248
+ "rewards/margins": 0.7117680907249451,
249
+ "rewards/rejected": -1.79495370388031,
250
+ "step": 140
251
+ },
252
+ {
253
+ "epoch": 0.39,
254
+ "grad_norm": 24.47710068833958,
255
+ "learning_rate": 3.815507765571832e-07,
256
+ "logits/chosen": -1.8257108926773071,
257
+ "logits/rejected": -1.4285178184509277,
258
+ "logps/chosen": -398.15509033203125,
259
+ "logps/rejected": -433.53375244140625,
260
+ "loss": 0.5141,
261
+ "rewards/accuracies": 0.768750011920929,
262
+ "rewards/chosen": -0.8729391098022461,
263
+ "rewards/margins": 0.7921355366706848,
264
+ "rewards/rejected": -1.6650745868682861,
265
+ "step": 150
266
+ },
267
+ {
268
+ "epoch": 0.42,
269
+ "grad_norm": 21.92961027984523,
270
+ "learning_rate": 3.615551454976164e-07,
271
+ "logits/chosen": -1.5830988883972168,
272
+ "logits/rejected": -0.9580858945846558,
273
+ "logps/chosen": -407.8912048339844,
274
+ "logps/rejected": -474.907958984375,
275
+ "loss": 0.5206,
276
+ "rewards/accuracies": 0.8125,
277
+ "rewards/chosen": -0.9090505838394165,
278
+ "rewards/margins": 0.8707038164138794,
279
+ "rewards/rejected": -1.779754400253296,
280
+ "step": 160
281
+ },
282
+ {
283
+ "epoch": 0.44,
284
+ "grad_norm": 20.612672549801676,
285
+ "learning_rate": 3.406243295461325e-07,
286
+ "logits/chosen": -1.4334670305252075,
287
+ "logits/rejected": -1.1878687143325806,
288
+ "logps/chosen": -390.4436340332031,
289
+ "logps/rejected": -457.1796875,
290
+ "loss": 0.5124,
291
+ "rewards/accuracies": 0.762499988079071,
292
+ "rewards/chosen": -0.7781120538711548,
293
+ "rewards/margins": 0.8819589614868164,
294
+ "rewards/rejected": -1.6600710153579712,
295
+ "step": 170
296
+ },
297
+ {
298
+ "epoch": 0.47,
299
+ "grad_norm": 29.50134057519578,
300
+ "learning_rate": 3.189337951302639e-07,
301
+ "logits/chosen": -1.2944433689117432,
302
+ "logits/rejected": -0.9970762133598328,
303
+ "logps/chosen": -345.94415283203125,
304
+ "logps/rejected": -390.71673583984375,
305
+ "loss": 0.514,
306
+ "rewards/accuracies": 0.768750011920929,
307
+ "rewards/chosen": -0.8080419301986694,
308
+ "rewards/margins": 0.8096262216567993,
309
+ "rewards/rejected": -1.6176681518554688,
310
+ "step": 180
311
+ },
312
+ {
313
+ "epoch": 0.5,
314
+ "grad_norm": 22.184389019959962,
315
+ "learning_rate": 2.966653775207444e-07,
316
+ "logits/chosen": -1.5276657342910767,
317
+ "logits/rejected": -1.1025655269622803,
318
+ "logps/chosen": -404.1734924316406,
319
+ "logps/rejected": -435.48089599609375,
320
+ "loss": 0.4989,
321
+ "rewards/accuracies": 0.7437499761581421,
322
+ "rewards/chosen": -0.9301355481147766,
323
+ "rewards/margins": 0.8291870355606079,
324
+ "rewards/rejected": -1.7593225240707397,
325
+ "step": 190
326
+ },
327
+ {
328
+ "epoch": 0.52,
329
+ "grad_norm": 26.733564014369115,
330
+ "learning_rate": 2.7400575647692043e-07,
331
+ "logits/chosen": -1.406765341758728,
332
+ "logits/rejected": -1.11467444896698,
333
+ "logps/chosen": -400.4964904785156,
334
+ "logps/rejected": -441.97998046875,
335
+ "loss": 0.5291,
336
+ "rewards/accuracies": 0.6812499761581421,
337
+ "rewards/chosen": -1.223042607307434,
338
+ "rewards/margins": 0.6574328541755676,
339
+ "rewards/rejected": -1.880475401878357,
340
+ "step": 200
341
+ },
342
+ {
343
+ "epoch": 0.52,
344
+ "eval_logits/chosen": -1.5008797645568848,
345
+ "eval_logits/rejected": -1.159541130065918,
346
+ "eval_logps/chosen": -410.1354675292969,
347
+ "eval_logps/rejected": -465.21136474609375,
348
+ "eval_loss": 0.5176023244857788,
349
+ "eval_rewards/accuracies": 0.75,
350
+ "eval_rewards/chosen": -1.2026103734970093,
351
+ "eval_rewards/margins": 0.7679347395896912,
352
+ "eval_rewards/rejected": -1.9705451726913452,
353
+ "eval_runtime": 694.1376,
354
+ "eval_samples_per_second": 2.881,
355
+ "eval_steps_per_second": 0.144,
356
+ "step": 200
357
+ },
358
+ {
359
+ "epoch": 0.55,
360
+ "grad_norm": 22.51552046396425,
361
+ "learning_rate": 2.511448912800265e-07,
362
+ "logits/chosen": -1.538048505783081,
363
+ "logits/rejected": -1.2279856204986572,
364
+ "logps/chosen": -392.5513610839844,
365
+ "logps/rejected": -486.5321350097656,
366
+ "loss": 0.4967,
367
+ "rewards/accuracies": 0.7250000238418579,
368
+ "rewards/chosen": -1.1226943731307983,
369
+ "rewards/margins": 0.8473464250564575,
370
+ "rewards/rejected": -1.9700406789779663,
371
+ "step": 210
372
+ },
373
+ {
374
+ "epoch": 0.58,
375
+ "grad_norm": 23.041728264521645,
376
+ "learning_rate": 2.2827442827369772e-07,
377
+ "logits/chosen": -1.632845163345337,
378
+ "logits/rejected": -1.380772352218628,
379
+ "logps/chosen": -346.6684875488281,
380
+ "logps/rejected": -421.88739013671875,
381
+ "loss": 0.5141,
382
+ "rewards/accuracies": 0.731249988079071,
383
+ "rewards/chosen": -0.9023996591567993,
384
+ "rewards/margins": 0.7315413951873779,
385
+ "rewards/rejected": -1.6339410543441772,
386
+ "step": 220
387
+ },
388
+ {
389
+ "epoch": 0.6,
390
+ "grad_norm": 25.751992215511777,
391
+ "learning_rate": 2.0558609426156343e-07,
392
+ "logits/chosen": -1.5758168697357178,
393
+ "logits/rejected": -1.392135500907898,
394
+ "logps/chosen": -378.9245300292969,
395
+ "logps/rejected": -444.28936767578125,
396
+ "loss": 0.5208,
397
+ "rewards/accuracies": 0.699999988079071,
398
+ "rewards/chosen": -1.1253713369369507,
399
+ "rewards/margins": 0.6441680192947388,
400
+ "rewards/rejected": -1.7695392370224,
401
+ "step": 230
402
+ },
403
+ {
404
+ "epoch": 0.63,
405
+ "grad_norm": 23.086940301819613,
406
+ "learning_rate": 1.8327008923033222e-07,
407
+ "logits/chosen": -1.6344903707504272,
408
+ "logits/rejected": -1.4062269926071167,
409
+ "logps/chosen": -433.552001953125,
410
+ "logps/rejected": -481.857421875,
411
+ "loss": 0.5213,
412
+ "rewards/accuracies": 0.737500011920929,
413
+ "rewards/chosen": -1.1788504123687744,
414
+ "rewards/margins": 0.7093037962913513,
415
+ "rewards/rejected": -1.8881542682647705,
416
+ "step": 240
417
+ },
418
+ {
419
+ "epoch": 0.65,
420
+ "grad_norm": 22.246705559005484,
421
+ "learning_rate": 1.6151349187243063e-07,
422
+ "logits/chosen": -1.5954548120498657,
423
+ "logits/rejected": -1.1250216960906982,
424
+ "logps/chosen": -444.52001953125,
425
+ "logps/rejected": -509.41302490234375,
426
+ "loss": 0.4941,
427
+ "rewards/accuracies": 0.8125,
428
+ "rewards/chosen": -1.2779477834701538,
429
+ "rewards/margins": 0.9330456852912903,
430
+ "rewards/rejected": -2.210993528366089,
431
+ "step": 250
432
+ },
433
+ {
434
+ "epoch": 0.68,
435
+ "grad_norm": 25.840093284837668,
436
+ "learning_rate": 1.4049869127495884e-07,
437
+ "logits/chosen": -1.4928535223007202,
438
+ "logits/rejected": -0.7584124803543091,
439
+ "logps/chosen": -401.40374755859375,
440
+ "logps/rejected": -504.8365173339844,
441
+ "loss": 0.4984,
442
+ "rewards/accuracies": 0.7749999761581421,
443
+ "rewards/chosen": -1.2281973361968994,
444
+ "rewards/margins": 1.0200105905532837,
445
+ "rewards/rejected": -2.2482082843780518,
446
+ "step": 260
447
+ },
448
+ {
449
+ "epoch": 0.71,
450
+ "grad_norm": 23.793534917882216,
451
+ "learning_rate": 1.2040185792236873e-07,
452
+ "logits/chosen": -1.413051962852478,
453
+ "logits/rejected": -0.8309895396232605,
454
+ "logps/chosen": -416.62255859375,
455
+ "logps/rejected": -475.1249084472656,
456
+ "loss": 0.4978,
457
+ "rewards/accuracies": 0.737500011920929,
458
+ "rewards/chosen": -1.3103489875793457,
459
+ "rewards/margins": 0.874372124671936,
460
+ "rewards/rejected": -2.1847212314605713,
461
+ "step": 270
462
+ },
463
+ {
464
+ "epoch": 0.73,
465
+ "grad_norm": 27.621259089208138,
466
+ "learning_rate": 1.0139146683069728e-07,
467
+ "logits/chosen": -1.5253907442092896,
468
+ "logits/rejected": -1.0778144598007202,
469
+ "logps/chosen": -385.1762390136719,
470
+ "logps/rejected": -462.73834228515625,
471
+ "loss": 0.5159,
472
+ "rewards/accuracies": 0.7437499761581421,
473
+ "rewards/chosen": -1.0836073160171509,
474
+ "rewards/margins": 0.8540776371955872,
475
+ "rewards/rejected": -1.9376850128173828,
476
+ "step": 280
477
+ },
478
+ {
479
+ "epoch": 0.76,
480
+ "grad_norm": 23.12152900338625,
481
+ "learning_rate": 8.362688519416084e-08,
482
+ "logits/chosen": -1.374618649482727,
483
+ "logits/rejected": -1.0819616317749023,
484
+ "logps/chosen": -367.61090087890625,
485
+ "logps/rejected": -503.74835205078125,
486
+ "loss": 0.4882,
487
+ "rewards/accuracies": 0.8125,
488
+ "rewards/chosen": -1.0010521411895752,
489
+ "rewards/margins": 1.0948959589004517,
490
+ "rewards/rejected": -2.0959479808807373,
491
+ "step": 290
492
+ },
493
+ {
494
+ "epoch": 0.79,
495
+ "grad_norm": 19.50201171164751,
496
+ "learning_rate": 6.725703638409861e-08,
497
+ "logits/chosen": -1.3707311153411865,
498
+ "logits/rejected": -1.0541932582855225,
499
+ "logps/chosen": -394.03924560546875,
500
+ "logps/rejected": -464.50640869140625,
501
+ "loss": 0.4891,
502
+ "rewards/accuracies": 0.75,
503
+ "rewards/chosen": -1.1756898164749146,
504
+ "rewards/margins": 0.8380070924758911,
505
+ "rewards/rejected": -2.0136971473693848,
506
+ "step": 300
507
+ },
508
+ {
509
+ "epoch": 0.79,
510
+ "eval_logits/chosen": -1.4685231447219849,
511
+ "eval_logits/rejected": -0.9395467638969421,
512
+ "eval_logps/chosen": -402.9643859863281,
513
+ "eval_logps/rejected": -464.9228515625,
514
+ "eval_loss": 0.5042179822921753,
515
+ "eval_rewards/accuracies": 0.7549999952316284,
516
+ "eval_rewards/chosen": -1.1308995485305786,
517
+ "eval_rewards/margins": 0.8367605805397034,
518
+ "eval_rewards/rejected": -1.9676600694656372,
519
+ "eval_runtime": 693.8368,
520
+ "eval_samples_per_second": 2.883,
521
+ "eval_steps_per_second": 0.144,
522
+ "step": 300
523
+ },
524
+ {
525
+ "epoch": 0.81,
526
+ "grad_norm": 22.978115464291324,
527
+ "learning_rate": 5.2419151500177395e-08,
528
+ "logits/chosen": -1.3883640766143799,
529
+ "logits/rejected": -0.8084322214126587,
530
+ "logps/chosen": -390.07672119140625,
531
+ "logps/rejected": -441.5680236816406,
532
+ "loss": 0.5031,
533
+ "rewards/accuracies": 0.737500011920929,
534
+ "rewards/chosen": -1.1122825145721436,
535
+ "rewards/margins": 0.8595865964889526,
536
+ "rewards/rejected": -1.971868872642517,
537
+ "step": 310
538
+ },
539
+ {
540
+ "epoch": 0.84,
541
+ "grad_norm": 22.683444984114566,
542
+ "learning_rate": 3.9237618939808646e-08,
543
+ "logits/chosen": -1.3592180013656616,
544
+ "logits/rejected": -0.9010286331176758,
545
+ "logps/chosen": -427.95184326171875,
546
+ "logps/rejected": -494.72137451171875,
547
+ "loss": 0.5004,
548
+ "rewards/accuracies": 0.768750011920929,
549
+ "rewards/chosen": -1.145861029624939,
550
+ "rewards/margins": 0.888739287853241,
551
+ "rewards/rejected": -2.0346004962921143,
552
+ "step": 320
553
+ },
554
+ {
555
+ "epoch": 0.86,
556
+ "grad_norm": 22.356295615100926,
557
+ "learning_rate": 2.7822941630022605e-08,
558
+ "logits/chosen": -1.4484529495239258,
559
+ "logits/rejected": -1.012138843536377,
560
+ "logps/chosen": -386.97357177734375,
561
+ "logps/rejected": -459.3680725097656,
562
+ "loss": 0.4987,
563
+ "rewards/accuracies": 0.7124999761581421,
564
+ "rewards/chosen": -1.110649585723877,
565
+ "rewards/margins": 0.8449923396110535,
566
+ "rewards/rejected": -1.9556419849395752,
567
+ "step": 330
568
+ },
569
+ {
570
+ "epoch": 0.89,
571
+ "grad_norm": 21.4095540436173,
572
+ "learning_rate": 1.827081066349459e-08,
573
+ "logits/chosen": -1.372106671333313,
574
+ "logits/rejected": -1.1249427795410156,
575
+ "logps/chosen": -384.5665283203125,
576
+ "logps/rejected": -453.394287109375,
577
+ "loss": 0.5067,
578
+ "rewards/accuracies": 0.6812499761581421,
579
+ "rewards/chosen": -1.1582924127578735,
580
+ "rewards/margins": 0.6956581473350525,
581
+ "rewards/rejected": -1.8539505004882812,
582
+ "step": 340
583
+ },
584
+ {
585
+ "epoch": 0.92,
586
+ "grad_norm": 20.33149833976292,
587
+ "learning_rate": 1.0661303104582881e-08,
588
+ "logits/chosen": -1.3133699893951416,
589
+ "logits/rejected": -0.8347970247268677,
590
+ "logps/chosen": -401.160888671875,
591
+ "logps/rejected": -450.82843017578125,
592
+ "loss": 0.486,
593
+ "rewards/accuracies": 0.7562500238418579,
594
+ "rewards/chosen": -1.1163744926452637,
595
+ "rewards/margins": 0.9114343523979187,
596
+ "rewards/rejected": -2.027808666229248,
597
+ "step": 350
598
+ },
599
+ {
600
+ "epoch": 0.94,
601
+ "grad_norm": 27.107299279864698,
602
+ "learning_rate": 5.058210690300463e-09,
603
+ "logits/chosen": -1.40584397315979,
604
+ "logits/rejected": -1.0146383047103882,
605
+ "logps/chosen": -394.41607666015625,
606
+ "logps/rejected": -467.156982421875,
607
+ "loss": 0.5043,
608
+ "rewards/accuracies": 0.731249988079071,
609
+ "rewards/chosen": -1.0669889450073242,
610
+ "rewards/margins": 0.8511545062065125,
611
+ "rewards/rejected": -1.9181435108184814,
612
+ "step": 360
613
+ },
614
+ {
615
+ "epoch": 0.97,
616
+ "grad_norm": 23.223735379023285,
617
+ "learning_rate": 1.5085050538290456e-09,
618
+ "logits/chosen": -1.3387047052383423,
619
+ "logits/rejected": -1.1372339725494385,
620
+ "logps/chosen": -393.8738708496094,
621
+ "logps/rejected": -456.54046630859375,
622
+ "loss": 0.5213,
623
+ "rewards/accuracies": 0.6812499761581421,
624
+ "rewards/chosen": -1.13485586643219,
625
+ "rewards/margins": 0.6930190324783325,
626
+ "rewards/rejected": -1.827874779701233,
627
+ "step": 370
628
+ },
629
+ {
630
+ "epoch": 0.99,
631
+ "grad_norm": 22.565920583773593,
632
+ "learning_rate": 4.19439536940569e-11,
633
+ "logits/chosen": -1.2898640632629395,
634
+ "logits/rejected": -0.9960281252861023,
635
+ "logps/chosen": -395.0779724121094,
636
+ "logps/rejected": -449.18646240234375,
637
+ "loss": 0.4955,
638
+ "rewards/accuracies": 0.6937500238418579,
639
+ "rewards/chosen": -1.2446048259735107,
640
+ "rewards/margins": 0.7065111398696899,
641
+ "rewards/rejected": -1.9511159658432007,
642
+ "step": 380
643
+ },
644
+ {
645
+ "epoch": 1.0,
646
+ "step": 382,
647
+ "total_flos": 0.0,
648
+ "train_loss": 0.5410362510156881,
649
+ "train_runtime": 45025.8645,
650
+ "train_samples_per_second": 1.358,
651
+ "train_steps_per_second": 0.008
652
+ }
653
+ ],
654
+ "logging_steps": 10,
655
+ "max_steps": 382,
656
+ "num_input_tokens_seen": 0,
657
+ "num_train_epochs": 1,
658
+ "save_steps": 100,
659
+ "total_flos": 0.0,
660
+ "train_batch_size": 2,
661
+ "trial_name": null,
662
+ "trial_params": null
663
+ }