AmberYifan commited on
Commit
7782fb0
1 Parent(s): ddac35f

Model save

Browse files
Files changed (4) hide show
  1. README.md +13 -16
  2. all_results.json +4 -4
  3. train_results.json +4 -4
  4. trainer_state.json +164 -164
README.md CHANGED
@@ -2,31 +2,28 @@
2
  license: apache-2.0
3
  base_model: AmberYifan/mistral-safe-sft-full
4
  tags:
5
- - alignment-handbook
6
  - generated_from_trainer
7
- datasets:
8
- - AmberYifan/sft-spin-kcenter-5k
9
  model-index:
10
- - name: sft-spin-kcenter-5k
11
  results: []
12
  ---
13
 
14
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
  should probably proofread and complete it, then remove this comment. -->
16
 
17
- # sft-spin-kcenter-5k
18
 
19
- This model is a fine-tuned version of [AmberYifan/mistral-safe-sft-full](https://huggingface.co/AmberYifan/mistral-safe-sft-full) on the AmberYifan/sft-spin-kcenter-5k dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 0.9619
22
- - Rewards/real: 0.2019
23
- - Rewards/generated: -0.1441
24
- - Rewards/accuracies: 0.8906
25
- - Rewards/margins: 0.3460
26
- - Logps/generated: -259.6614
27
- - Logps/real: -210.6393
28
- - Logits/generated: -2.6617
29
- - Logits/real: -2.6095
30
 
31
  ## Model description
32
 
@@ -62,7 +59,7 @@ The following hyperparameters were used during training:
62
 
63
  | Training Loss | Epoch | Step | Validation Loss | Rewards/real | Rewards/generated | Rewards/accuracies | Rewards/margins | Logps/generated | Logps/real | Logits/generated | Logits/real |
64
  |:-------------:|:------:|:----:|:---------------:|:------------:|:-----------------:|:------------------:|:---------------:|:---------------:|:----------:|:----------------:|:-----------:|
65
- | 0.0 | 0.6369 | 100 | 0.9619 | 0.2019 | -0.1441 | 0.8906 | 0.3460 | -259.6614 | -210.6393 | -2.6617 | -2.6095 |
66
 
67
 
68
  ### Framework versions
 
2
  license: apache-2.0
3
  base_model: AmberYifan/mistral-safe-sft-full
4
  tags:
 
5
  - generated_from_trainer
 
 
6
  model-index:
7
+ - name: mistral-sft-kcenter-5k
8
  results: []
9
  ---
10
 
11
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
12
  should probably proofread and complete it, then remove this comment. -->
13
 
14
+ # mistral-sft-kcenter-5k
15
 
16
+ This model is a fine-tuned version of [AmberYifan/mistral-safe-sft-full](https://huggingface.co/AmberYifan/mistral-safe-sft-full) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
+ - Loss: 0.1925
19
+ - Rewards/real: 5.5330
20
+ - Rewards/generated: -4.6681
21
+ - Rewards/accuracies: 0.9922
22
+ - Rewards/margins: 10.2011
23
+ - Logps/generated: -304.9019
24
+ - Logps/real: -157.3288
25
+ - Logits/generated: -2.8831
26
+ - Logits/real: -2.8163
27
 
28
  ## Model description
29
 
 
59
 
60
  | Training Loss | Epoch | Step | Validation Loss | Rewards/real | Rewards/generated | Rewards/accuracies | Rewards/margins | Logps/generated | Logps/real | Logits/generated | Logits/real |
61
  |:-------------:|:------:|:----:|:---------------:|:------------:|:-----------------:|:------------------:|:---------------:|:---------------:|:----------:|:----------------:|:-----------:|
62
+ | 0.1804 | 0.6369 | 100 | 0.1925 | 5.5330 | -4.6681 | 0.9922 | 10.2011 | -304.9019 | -157.3288 | -2.8831 | -2.8163 |
63
 
64
 
65
  ### Framework versions
all_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 0.0,
4
- "train_loss": 0.03164779691052703,
5
- "train_runtime": 1178.5993,
6
  "train_samples": 5000,
7
- "train_samples_per_second": 4.242,
8
- "train_steps_per_second": 0.133
9
  }
 
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.23225101619769054,
5
+ "train_runtime": 1278.722,
6
  "train_samples": 5000,
7
+ "train_samples_per_second": 3.91,
8
+ "train_steps_per_second": 0.123
9
  }
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 0.0,
4
- "train_loss": 0.03164779691052703,
5
- "train_runtime": 1178.5993,
6
  "train_samples": 5000,
7
- "train_samples_per_second": 4.242,
8
- "train_steps_per_second": 0.133
9
  }
 
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.23225101619769054,
5
+ "train_runtime": 1278.722,
6
  "train_samples": 5000,
7
+ "train_samples_per_second": 3.91,
8
+ "train_steps_per_second": 0.123
9
  }
trainer_state.json CHANGED
@@ -10,13 +10,13 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.006369426751592357,
13
- "grad_norm": 2514.601414684904,
14
  "learning_rate": 3.125e-08,
15
- "logits/generated": -2.661752223968506,
16
- "logits/real": -2.483980894088745,
17
- "logps/generated": -429.17132568359375,
18
- "logps/real": -342.051025390625,
19
- "loss": 1.3612,
20
  "rewards/accuracies": 0.0,
21
  "rewards/generated": 0.0,
22
  "rewards/margins": 0.0,
@@ -25,253 +25,253 @@
25
  },
26
  {
27
  "epoch": 0.06369426751592357,
28
- "grad_norm": 0.31574006570864244,
29
  "learning_rate": 3.1249999999999997e-07,
30
- "logits/generated": -2.668369770050049,
31
- "logits/real": -2.424091339111328,
32
- "logps/generated": -450.4893798828125,
33
- "logps/real": -144.69952392578125,
34
- "loss": 0.4008,
35
- "rewards/accuracies": 0.8888888955116272,
36
- "rewards/generated": -2.1318063735961914,
37
- "rewards/margins": 21.866958618164062,
38
- "rewards/real": 19.735153198242188,
39
  "step": 10
40
  },
41
  {
42
  "epoch": 0.12738853503184713,
43
- "grad_norm": 0.005520241230176221,
44
  "learning_rate": 4.858156028368794e-07,
45
- "logits/generated": -2.692908763885498,
46
- "logits/real": -2.329023599624634,
47
- "logps/generated": -478.9815368652344,
48
- "logps/real": -0.014682354405522346,
49
- "loss": 0.0,
50
  "rewards/accuracies": 1.0,
51
- "rewards/generated": -4.981024742126465,
52
- "rewards/margins": 39.18465805053711,
53
- "rewards/real": 34.203636169433594,
54
  "step": 20
55
  },
56
  {
57
  "epoch": 0.1910828025477707,
58
- "grad_norm": 0.002789639064368334,
59
  "learning_rate": 4.50354609929078e-07,
60
- "logits/generated": -2.705698013305664,
61
- "logits/real": -2.342294216156006,
62
- "logps/generated": -486.1914978027344,
63
- "logps/real": -0.004650969058275223,
64
- "loss": 0.0,
65
  "rewards/accuracies": 1.0,
66
- "rewards/generated": -5.702020645141602,
67
- "rewards/margins": 39.90666198730469,
68
- "rewards/real": 34.20463943481445,
69
  "step": 30
70
  },
71
  {
72
  "epoch": 0.25477707006369427,
73
- "grad_norm": 0.0019297231936813803,
74
  "learning_rate": 4.148936170212766e-07,
75
- "logits/generated": -2.7122185230255127,
76
- "logits/real": -2.348639965057373,
77
- "logps/generated": -489.88970947265625,
78
- "logps/real": -0.002834505634382367,
79
- "loss": 0.0,
80
- "rewards/accuracies": 1.0,
81
- "rewards/generated": -6.071843147277832,
82
- "rewards/margins": 40.27666473388672,
83
- "rewards/real": 34.2048225402832,
84
  "step": 40
85
  },
86
  {
87
  "epoch": 0.3184713375796178,
88
- "grad_norm": 0.0014726852980064813,
89
  "learning_rate": 3.7943262411347514e-07,
90
- "logits/generated": -2.71730375289917,
91
- "logits/real": -2.3547444343566895,
92
- "logps/generated": -492.354248046875,
93
- "logps/real": -0.0020735759753733873,
94
- "loss": 0.0,
95
  "rewards/accuracies": 1.0,
96
- "rewards/generated": -6.318298816680908,
97
- "rewards/margins": 40.523193359375,
98
- "rewards/real": 34.20489501953125,
99
  "step": 50
100
  },
101
  {
102
  "epoch": 0.3821656050955414,
103
- "grad_norm": 0.0012108227405915682,
104
  "learning_rate": 3.4397163120567375e-07,
105
- "logits/generated": -2.7216479778289795,
106
- "logits/real": -2.359346866607666,
107
- "logps/generated": -493.62725830078125,
108
- "logps/real": -0.0016389258671551943,
109
- "loss": 0.0,
110
  "rewards/accuracies": 1.0,
111
- "rewards/generated": -6.445591926574707,
112
- "rewards/margins": 40.65053176879883,
113
- "rewards/real": 34.20494079589844,
114
  "step": 60
115
  },
116
  {
117
  "epoch": 0.445859872611465,
118
- "grad_norm": 0.0010092968797445821,
119
  "learning_rate": 3.085106382978723e-07,
120
- "logits/generated": -2.7243549823760986,
121
- "logits/real": -2.3620219230651855,
122
- "logps/generated": -494.82586669921875,
123
- "logps/real": -0.0013623478589579463,
124
- "loss": 0.0,
125
- "rewards/accuracies": 1.0,
126
- "rewards/generated": -6.565457820892334,
127
- "rewards/margins": 40.77042770385742,
128
- "rewards/real": 34.20496368408203,
129
  "step": 70
130
  },
131
  {
132
  "epoch": 0.5095541401273885,
133
- "grad_norm": 0.0008924667007579917,
134
  "learning_rate": 2.730496453900709e-07,
135
- "logits/generated": -2.726926565170288,
136
- "logits/real": -2.362954616546631,
137
- "logps/generated": -495.93377685546875,
138
- "logps/real": -0.0011800352949649096,
139
- "loss": 0.0,
140
- "rewards/accuracies": 1.0,
141
- "rewards/generated": -6.676251411437988,
142
- "rewards/margins": 40.88123321533203,
143
- "rewards/real": 34.204986572265625,
144
  "step": 80
145
  },
146
  {
147
  "epoch": 0.5732484076433121,
148
- "grad_norm": 0.0008155704567801252,
149
  "learning_rate": 2.375886524822695e-07,
150
- "logits/generated": -2.72874116897583,
151
- "logits/real": -2.366927146911621,
152
- "logps/generated": -496.6890563964844,
153
- "logps/real": -0.0010528427083045244,
154
- "loss": 0.0,
155
- "rewards/accuracies": 1.0,
156
- "rewards/generated": -6.751776218414307,
157
- "rewards/margins": 40.95677185058594,
158
- "rewards/real": 34.204994201660156,
159
  "step": 90
160
  },
161
  {
162
  "epoch": 0.6369426751592356,
163
- "grad_norm": 0.000725474761841383,
164
  "learning_rate": 2.0212765957446807e-07,
165
- "logits/generated": -2.7310385704040527,
166
- "logits/real": -2.369218349456787,
167
- "logps/generated": -497.72625732421875,
168
- "logps/real": -0.000954283110331744,
169
- "loss": 0.0,
170
  "rewards/accuracies": 1.0,
171
- "rewards/generated": -6.855503082275391,
172
- "rewards/margins": 41.06051254272461,
173
- "rewards/real": 34.20500946044922,
174
  "step": 100
175
  },
176
  {
177
  "epoch": 0.6369426751592356,
178
- "eval_logits/generated": -2.661693572998047,
179
- "eval_logits/real": -2.609511137008667,
180
- "eval_logps/generated": -259.66143798828125,
181
- "eval_logps/real": -210.63931274414062,
182
- "eval_loss": 0.9619492292404175,
183
- "eval_rewards/accuracies": 0.890625,
184
- "eval_rewards/generated": -0.14410093426704407,
185
- "eval_rewards/margins": 0.3460058867931366,
186
- "eval_rewards/real": 0.20190495252609253,
187
- "eval_runtime": 37.6272,
188
- "eval_samples_per_second": 13.288,
189
- "eval_steps_per_second": 0.425,
190
  "step": 100
191
  },
192
  {
193
  "epoch": 0.7006369426751592,
194
- "grad_norm": 0.0006953242722311237,
195
  "learning_rate": 1.6666666666666665e-07,
196
- "logits/generated": -2.732168436050415,
197
- "logits/real": -2.370328426361084,
198
- "logps/generated": -497.7915954589844,
199
- "logps/real": -0.0008821273222565651,
200
- "loss": 0.0,
201
  "rewards/accuracies": 1.0,
202
- "rewards/generated": -6.86203145980835,
203
- "rewards/margins": 41.06704330444336,
204
- "rewards/real": 34.20501708984375,
205
  "step": 110
206
  },
207
  {
208
  "epoch": 0.7643312101910829,
209
- "grad_norm": 0.0006463359840785572,
210
  "learning_rate": 1.3120567375886523e-07,
211
- "logits/generated": -2.7338597774505615,
212
- "logits/real": -2.3714773654937744,
213
- "logps/generated": -498.8688049316406,
214
- "logps/real": -0.0008279000176116824,
215
- "loss": 0.0,
216
- "rewards/accuracies": 1.0,
217
- "rewards/generated": -6.969751834869385,
218
- "rewards/margins": 41.174774169921875,
219
- "rewards/real": 34.205020904541016,
220
  "step": 120
221
  },
222
  {
223
  "epoch": 0.8280254777070064,
224
- "grad_norm": 0.0006205498614208318,
225
  "learning_rate": 9.574468085106382e-08,
226
- "logits/generated": -2.7348275184631348,
227
- "logits/real": -2.3729214668273926,
228
- "logps/generated": -498.8643493652344,
229
- "logps/real": -0.0007825180655345321,
230
- "loss": 0.0,
231
  "rewards/accuracies": 1.0,
232
- "rewards/generated": -6.969304084777832,
233
- "rewards/margins": 41.17433547973633,
234
- "rewards/real": 34.20502471923828,
235
  "step": 130
236
  },
237
  {
238
  "epoch": 0.89171974522293,
239
- "grad_norm": 0.0005942730744268325,
240
  "learning_rate": 6.02836879432624e-08,
241
- "logits/generated": -2.7360432147979736,
242
- "logits/real": -2.374084949493408,
243
- "logps/generated": -499.4146423339844,
244
- "logps/real": -0.0007567574502900243,
245
- "loss": 0.0,
246
- "rewards/accuracies": 1.0,
247
- "rewards/generated": -7.024338722229004,
248
- "rewards/margins": 41.229366302490234,
249
- "rewards/real": 34.20502471923828,
250
  "step": 140
251
  },
252
  {
253
  "epoch": 0.9554140127388535,
254
- "grad_norm": 0.0005956400007297931,
255
  "learning_rate": 2.4822695035460993e-08,
256
- "logits/generated": -2.73626708984375,
257
- "logits/real": -2.374666690826416,
258
- "logps/generated": -499.5492248535156,
259
- "logps/real": -0.0007398539455607533,
260
- "loss": 0.0,
261
  "rewards/accuracies": 1.0,
262
- "rewards/generated": -7.0377936363220215,
263
- "rewards/margins": 41.242820739746094,
264
- "rewards/real": 34.20502471923828,
265
  "step": 150
266
  },
267
  {
268
  "epoch": 1.0,
269
  "step": 157,
270
  "total_flos": 0.0,
271
- "train_loss": 0.03164779691052703,
272
- "train_runtime": 1178.5993,
273
- "train_samples_per_second": 4.242,
274
- "train_steps_per_second": 0.133
275
  }
276
  ],
277
  "logging_steps": 10,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.006369426751592357,
13
+ "grad_norm": 553.6689195387686,
14
  "learning_rate": 3.125e-08,
15
+ "logits/generated": -2.5852508544921875,
16
+ "logits/real": -2.6413676738739014,
17
+ "logps/generated": -325.0230407714844,
18
+ "logps/real": -285.6551513671875,
19
+ "loss": 0.9368,
20
  "rewards/accuracies": 0.0,
21
  "rewards/generated": 0.0,
22
  "rewards/margins": 0.0,
 
25
  },
26
  {
27
  "epoch": 0.06369426751592357,
28
+ "grad_norm": 114.25554694751857,
29
  "learning_rate": 3.1249999999999997e-07,
30
+ "logits/generated": -2.6055305004119873,
31
+ "logits/real": -2.5509183406829834,
32
+ "logps/generated": -363.4412536621094,
33
+ "logps/real": -227.83956909179688,
34
+ "loss": 0.6399,
35
+ "rewards/accuracies": 0.7916666865348816,
36
+ "rewards/generated": -1.371843934059143,
37
+ "rewards/margins": 1.6683220863342285,
38
+ "rewards/real": 0.2964780032634735,
39
  "step": 10
40
  },
41
  {
42
  "epoch": 0.12738853503184713,
43
+ "grad_norm": 32.95472745041124,
44
  "learning_rate": 4.858156028368794e-07,
45
+ "logits/generated": -2.859358072280884,
46
+ "logits/real": -2.843193531036377,
47
+ "logps/generated": -321.1373596191406,
48
+ "logps/real": -200.79653930664062,
49
+ "loss": 0.2658,
50
  "rewards/accuracies": 1.0,
51
+ "rewards/generated": -4.476873874664307,
52
+ "rewards/margins": 5.808846950531006,
53
+ "rewards/real": 1.3319734334945679,
54
  "step": 20
55
  },
56
  {
57
  "epoch": 0.1910828025477707,
58
+ "grad_norm": 15.376345388496501,
59
  "learning_rate": 4.50354609929078e-07,
60
+ "logits/generated": -2.8705520629882812,
61
+ "logits/real": -2.8971784114837646,
62
+ "logps/generated": -368.5296325683594,
63
+ "logps/real": -192.18089294433594,
64
+ "loss": 0.2291,
65
  "rewards/accuracies": 1.0,
66
+ "rewards/generated": -5.970882415771484,
67
+ "rewards/margins": 9.524211883544922,
68
+ "rewards/real": 3.553328275680542,
69
  "step": 30
70
  },
71
  {
72
  "epoch": 0.25477707006369427,
73
+ "grad_norm": 32.240493773919496,
74
  "learning_rate": 4.148936170212766e-07,
75
+ "logits/generated": -2.9980812072753906,
76
+ "logits/real": -2.925062656402588,
77
+ "logps/generated": -352.6733093261719,
78
+ "logps/real": -184.546142578125,
79
+ "loss": 0.2192,
80
+ "rewards/accuracies": 0.9624999761581421,
81
+ "rewards/generated": -5.158407688140869,
82
+ "rewards/margins": 9.533308029174805,
83
+ "rewards/real": 4.374899864196777,
84
  "step": 40
85
  },
86
  {
87
  "epoch": 0.3184713375796178,
88
+ "grad_norm": 22.443882761667297,
89
  "learning_rate": 3.7943262411347514e-07,
90
+ "logits/generated": -2.9874608516693115,
91
+ "logits/real": -2.901106357574463,
92
+ "logps/generated": -366.24774169921875,
93
+ "logps/real": -178.15200805664062,
94
+ "loss": 0.1918,
95
  "rewards/accuracies": 1.0,
96
+ "rewards/generated": -5.891494274139404,
97
+ "rewards/margins": 10.835145950317383,
98
+ "rewards/real": 4.94365119934082,
99
  "step": 50
100
  },
101
  {
102
  "epoch": 0.3821656050955414,
103
+ "grad_norm": 27.90941399161424,
104
  "learning_rate": 3.4397163120567375e-07,
105
+ "logits/generated": -2.9581241607666016,
106
+ "logits/real": -2.923001527786255,
107
+ "logps/generated": -340.056884765625,
108
+ "logps/real": -175.193603515625,
109
+ "loss": 0.2088,
110
  "rewards/accuracies": 1.0,
111
+ "rewards/generated": -4.940432548522949,
112
+ "rewards/margins": 10.540928840637207,
113
+ "rewards/real": 5.6004958152771,
114
  "step": 60
115
  },
116
  {
117
  "epoch": 0.445859872611465,
118
+ "grad_norm": 19.61548013874602,
119
  "learning_rate": 3.085106382978723e-07,
120
+ "logits/generated": -2.950155735015869,
121
+ "logits/real": -2.920978546142578,
122
+ "logps/generated": -374.2133483886719,
123
+ "logps/real": -176.62344360351562,
124
+ "loss": 0.1973,
125
+ "rewards/accuracies": 0.9750000238418579,
126
+ "rewards/generated": -5.149114608764648,
127
+ "rewards/margins": 11.470724105834961,
128
+ "rewards/real": 6.321610450744629,
129
  "step": 70
130
  },
131
  {
132
  "epoch": 0.5095541401273885,
133
+ "grad_norm": 7.313023868270791,
134
  "learning_rate": 2.730496453900709e-07,
135
+ "logits/generated": -2.891042709350586,
136
+ "logits/real": -2.816384792327881,
137
+ "logps/generated": -343.1810607910156,
138
+ "logps/real": -187.75689697265625,
139
+ "loss": 0.1921,
140
+ "rewards/accuracies": 0.987500011920929,
141
+ "rewards/generated": -4.03465461730957,
142
+ "rewards/margins": 11.060041427612305,
143
+ "rewards/real": 7.025385856628418,
144
  "step": 80
145
  },
146
  {
147
  "epoch": 0.5732484076433121,
148
+ "grad_norm": 5.902065577304844,
149
  "learning_rate": 2.375886524822695e-07,
150
+ "logits/generated": -2.7973499298095703,
151
+ "logits/real": -2.7673676013946533,
152
+ "logps/generated": -361.3358154296875,
153
+ "logps/real": -168.44692993164062,
154
+ "loss": 0.1829,
155
+ "rewards/accuracies": 0.9750000238418579,
156
+ "rewards/generated": -4.653961658477783,
157
+ "rewards/margins": 10.882070541381836,
158
+ "rewards/real": 6.228109359741211,
159
  "step": 90
160
  },
161
  {
162
  "epoch": 0.6369426751592356,
163
+ "grad_norm": 21.378621598556965,
164
  "learning_rate": 2.0212765957446807e-07,
165
+ "logits/generated": -2.8296706676483154,
166
+ "logits/real": -2.783637762069702,
167
+ "logps/generated": -385.32916259765625,
168
+ "logps/real": -161.2016143798828,
169
+ "loss": 0.1804,
170
  "rewards/accuracies": 1.0,
171
+ "rewards/generated": -5.7995285987854,
172
+ "rewards/margins": 12.244100570678711,
173
+ "rewards/real": 6.444572448730469,
174
  "step": 100
175
  },
176
  {
177
  "epoch": 0.6369426751592356,
178
+ "eval_logits/generated": -2.8830642700195312,
179
+ "eval_logits/real": -2.8163247108459473,
180
+ "eval_logps/generated": -304.90185546875,
181
+ "eval_logps/real": -157.32879638671875,
182
+ "eval_loss": 0.19246906042099,
183
+ "eval_rewards/accuracies": 0.9921875,
184
+ "eval_rewards/generated": -4.66813850402832,
185
+ "eval_rewards/margins": 10.201096534729004,
186
+ "eval_rewards/real": 5.532957077026367,
187
+ "eval_runtime": 40.3701,
188
+ "eval_samples_per_second": 12.385,
189
+ "eval_steps_per_second": 0.396,
190
  "step": 100
191
  },
192
  {
193
  "epoch": 0.7006369426751592,
194
+ "grad_norm": 4.836243072704085,
195
  "learning_rate": 1.6666666666666665e-07,
196
+ "logits/generated": -2.814863920211792,
197
+ "logits/real": -2.813689708709717,
198
+ "logps/generated": -349.199951171875,
199
+ "logps/real": -167.05197143554688,
200
+ "loss": 0.1732,
201
  "rewards/accuracies": 1.0,
202
+ "rewards/generated": -4.092279434204102,
203
+ "rewards/margins": 10.799135208129883,
204
+ "rewards/real": 6.706856727600098,
205
  "step": 110
206
  },
207
  {
208
  "epoch": 0.7643312101910829,
209
+ "grad_norm": 10.2454630213605,
210
  "learning_rate": 1.3120567375886523e-07,
211
+ "logits/generated": -2.6291651725769043,
212
+ "logits/real": -2.6084656715393066,
213
+ "logps/generated": -357.1519470214844,
214
+ "logps/real": -194.95156860351562,
215
+ "loss": 0.2818,
216
+ "rewards/accuracies": 0.987500011920929,
217
+ "rewards/generated": -6.045389175415039,
218
+ "rewards/margins": 10.61597728729248,
219
+ "rewards/real": 4.570586681365967,
220
  "step": 120
221
  },
222
  {
223
  "epoch": 0.8280254777070064,
224
+ "grad_norm": 30.968094217096088,
225
  "learning_rate": 9.574468085106382e-08,
226
+ "logits/generated": -2.7041964530944824,
227
+ "logits/real": -2.6806530952453613,
228
+ "logps/generated": -349.06878662109375,
229
+ "logps/real": -164.90850830078125,
230
+ "loss": 0.1784,
231
  "rewards/accuracies": 1.0,
232
+ "rewards/generated": -5.374934196472168,
233
+ "rewards/margins": 13.002920150756836,
234
+ "rewards/real": 7.627985954284668,
235
  "step": 130
236
  },
237
  {
238
  "epoch": 0.89171974522293,
239
+ "grad_norm": 39.77463051390536,
240
  "learning_rate": 6.02836879432624e-08,
241
+ "logits/generated": -2.705962896347046,
242
+ "logits/real": -2.6498522758483887,
243
+ "logps/generated": -338.68121337890625,
244
+ "logps/real": -155.6457061767578,
245
+ "loss": 0.1802,
246
+ "rewards/accuracies": 0.987500011920929,
247
+ "rewards/generated": -4.318178176879883,
248
+ "rewards/margins": 11.076618194580078,
249
+ "rewards/real": 6.758440971374512,
250
  "step": 140
251
  },
252
  {
253
  "epoch": 0.9554140127388535,
254
+ "grad_norm": 37.53390149893301,
255
  "learning_rate": 2.4822695035460993e-08,
256
+ "logits/generated": -2.6534829139709473,
257
+ "logits/real": -2.6331899166107178,
258
+ "logps/generated": -302.69781494140625,
259
+ "logps/real": -132.01724243164062,
260
+ "loss": 0.1803,
261
  "rewards/accuracies": 1.0,
262
+ "rewards/generated": -3.575101375579834,
263
+ "rewards/margins": 9.780950546264648,
264
+ "rewards/real": 6.20584774017334,
265
  "step": 150
266
  },
267
  {
268
  "epoch": 1.0,
269
  "step": 157,
270
  "total_flos": 0.0,
271
+ "train_loss": 0.23225101619769054,
272
+ "train_runtime": 1278.722,
273
+ "train_samples_per_second": 3.91,
274
+ "train_steps_per_second": 0.123
275
  }
276
  ],
277
  "logging_steps": 10,