AmberYifan commited on
Commit
0938998
1 Parent(s): e7cf78b

Model save

Browse files
Files changed (4) hide show
  1. README.md +13 -16
  2. all_results.json +4 -4
  3. train_results.json +4 -4
  4. trainer_state.json +169 -169
README.md CHANGED
@@ -2,31 +2,28 @@
2
  license: apache-2.0
3
  base_model: AmberYifan/mistral-safe-sft-full
4
  tags:
5
- - alignment-handbook
6
  - generated_from_trainer
7
- datasets:
8
- - AmberYifan/sft-spin-kcenter-5k
9
  model-index:
10
- - name: sft-spin-kcenter-5k
11
  results: []
12
  ---
13
 
14
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
  should probably proofread and complete it, then remove this comment. -->
16
 
17
- # sft-spin-kcenter-5k
18
 
19
- This model is a fine-tuned version of [AmberYifan/mistral-safe-sft-full](https://huggingface.co/AmberYifan/mistral-safe-sft-full) on the AmberYifan/sft-spin-kcenter-5k dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 1.0181
22
- - Rewards/real: 1.4812
23
- - Rewards/generated: 1.4157
24
- - Rewards/accuracies: 0.0812
25
- - Rewards/margins: 0.0655
26
- - Logps/generated: -57.7408
27
- - Logps/real: -16.8189
28
- - Logits/generated: -2.6921
29
- - Logits/real: -2.4190
30
 
31
  ## Model description
32
 
@@ -62,7 +59,7 @@ The following hyperparameters were used during training:
62
 
63
  | Training Loss | Epoch | Step | Validation Loss | Rewards/real | Rewards/generated | Rewards/accuracies | Rewards/margins | Logps/generated | Logps/real | Logits/generated | Logits/real |
64
  |:-------------:|:------:|:----:|:---------------:|:------------:|:-----------------:|:------------------:|:---------------:|:---------------:|:----------:|:----------------:|:-----------:|
65
- | 0.3429 | 0.6369 | 100 | 1.0181 | 1.4812 | 1.4157 | 0.0812 | 0.0655 | -57.7408 | -16.8189 | -2.6921 | -2.4190 |
66
 
67
 
68
  ### Framework versions
 
2
  license: apache-2.0
3
  base_model: AmberYifan/mistral-safe-sft-full
4
  tags:
 
5
  - generated_from_trainer
 
 
6
  model-index:
7
+ - name: mistral-sft-kcenter-5k
8
  results: []
9
  ---
10
 
11
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
12
  should probably proofread and complete it, then remove this comment. -->
13
 
14
+ # mistral-sft-kcenter-5k
15
 
16
+ This model is a fine-tuned version of [AmberYifan/mistral-safe-sft-full](https://huggingface.co/AmberYifan/mistral-safe-sft-full) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
+ - Loss: 0.9619
19
+ - Rewards/real: 0.2019
20
+ - Rewards/generated: -0.1441
21
+ - Rewards/accuracies: 0.8906
22
+ - Rewards/margins: 0.3460
23
+ - Logps/generated: -259.6614
24
+ - Logps/real: -210.6393
25
+ - Logits/generated: -2.6617
26
+ - Logits/real: -2.6095
27
 
28
  ## Model description
29
 
 
59
 
60
  | Training Loss | Epoch | Step | Validation Loss | Rewards/real | Rewards/generated | Rewards/accuracies | Rewards/margins | Logps/generated | Logps/real | Logits/generated | Logits/real |
61
  |:-------------:|:------:|:----:|:---------------:|:------------:|:-----------------:|:------------------:|:---------------:|:---------------:|:----------:|:----------------:|:-----------:|
62
+ | 0.0 | 0.6369 | 100 | 0.9619 | 0.2019 | -0.1441 | 0.8906 | 0.3460 | -259.6614 | -210.6393 | -2.6617 | -2.6095 |
63
 
64
 
65
  ### Framework versions
all_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 0.0,
4
- "train_loss": 0.3886139180250229,
5
- "train_runtime": 1518.6555,
6
  "train_samples": 5000,
7
- "train_samples_per_second": 3.292,
8
- "train_steps_per_second": 0.103
9
  }
 
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.03164779691052703,
5
+ "train_runtime": 1178.5993,
6
  "train_samples": 5000,
7
+ "train_samples_per_second": 4.242,
8
+ "train_steps_per_second": 0.133
9
  }
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 0.0,
4
- "train_loss": 0.3886139180250229,
5
- "train_runtime": 1518.6555,
6
  "train_samples": 5000,
7
- "train_samples_per_second": 3.292,
8
- "train_steps_per_second": 0.103
9
  }
 
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.03164779691052703,
5
+ "train_runtime": 1178.5993,
6
  "train_samples": 5000,
7
+ "train_samples_per_second": 4.242,
8
+ "train_steps_per_second": 0.133
9
  }
trainer_state.json CHANGED
@@ -10,13 +10,13 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.006369426751592357,
13
- "grad_norm": 436.30790272840835,
14
  "learning_rate": 3.125e-08,
15
- "logits/generated": -2.574023723602295,
16
- "logits/real": -2.507308006286621,
17
- "logps/generated": -396.6226806640625,
18
- "logps/real": -228.6820831298828,
19
- "loss": 1.1036,
20
  "rewards/accuracies": 0.0,
21
  "rewards/generated": 0.0,
22
  "rewards/margins": 0.0,
@@ -25,253 +25,253 @@
25
  },
26
  {
27
  "epoch": 0.06369426751592357,
28
- "grad_norm": 86.39288206954664,
29
  "learning_rate": 3.1249999999999997e-07,
30
- "logits/generated": -2.6174476146698,
31
- "logits/real": -2.577033042907715,
32
- "logps/generated": -314.8096923828125,
33
- "logps/real": -233.916259765625,
34
- "loss": 0.8432,
35
- "rewards/accuracies": 0.8055555820465088,
36
- "rewards/generated": -1.1551631689071655,
37
- "rewards/margins": 1.29329252243042,
38
- "rewards/real": 0.13812927901744843,
39
  "step": 10
40
  },
41
  {
42
  "epoch": 0.12738853503184713,
43
- "grad_norm": 23.096946473336036,
44
  "learning_rate": 4.858156028368794e-07,
45
- "logits/generated": -2.934861421585083,
46
- "logits/real": -2.8925399780273438,
47
- "logps/generated": -313.32012939453125,
48
- "logps/real": -235.418701171875,
49
- "loss": 0.4872,
50
  "rewards/accuracies": 1.0,
51
- "rewards/generated": -3.896754741668701,
52
- "rewards/margins": 6.176419258117676,
53
- "rewards/real": 2.2796645164489746,
54
  "step": 20
55
  },
56
  {
57
  "epoch": 0.1910828025477707,
58
- "grad_norm": 23.454945687282052,
59
  "learning_rate": 4.50354609929078e-07,
60
- "logits/generated": -2.9848690032958984,
61
- "logits/real": -2.9865798950195312,
62
- "logps/generated": -304.69854736328125,
63
- "logps/real": -152.80667114257812,
64
- "loss": 0.3868,
65
- "rewards/accuracies": 0.987500011920929,
66
- "rewards/generated": -3.0515201091766357,
67
- "rewards/margins": 7.164666652679443,
68
- "rewards/real": 4.113146781921387,
69
  "step": 30
70
  },
71
  {
72
  "epoch": 0.25477707006369427,
73
- "grad_norm": 56.59285724233864,
74
  "learning_rate": 4.148936170212766e-07,
75
- "logits/generated": -2.821133852005005,
76
- "logits/real": -2.894505262374878,
77
- "logps/generated": -349.6683044433594,
78
- "logps/real": -201.2012176513672,
79
- "loss": 0.4438,
80
  "rewards/accuracies": 1.0,
81
- "rewards/generated": -5.3700079917907715,
82
- "rewards/margins": 9.444425582885742,
83
- "rewards/real": 4.074416637420654,
84
  "step": 40
85
  },
86
  {
87
  "epoch": 0.3184713375796178,
88
- "grad_norm": 26.344052963980364,
89
  "learning_rate": 3.7943262411347514e-07,
90
- "logits/generated": -3.041593074798584,
91
- "logits/real": -2.9904277324676514,
92
- "logps/generated": -335.09942626953125,
93
- "logps/real": -158.0336151123047,
94
- "loss": 0.37,
95
- "rewards/accuracies": 0.987500011920929,
96
- "rewards/generated": -3.180644989013672,
97
- "rewards/margins": 8.93952751159668,
98
- "rewards/real": 5.758882999420166,
99
  "step": 50
100
  },
101
  {
102
  "epoch": 0.3821656050955414,
103
- "grad_norm": 47.871426260084256,
104
  "learning_rate": 3.4397163120567375e-07,
105
- "logits/generated": -2.9896843433380127,
106
- "logits/real": -3.0180420875549316,
107
- "logps/generated": -302.9552307128906,
108
- "logps/real": -140.7099151611328,
109
- "loss": 0.3324,
110
- "rewards/accuracies": 0.9750000238418579,
111
- "rewards/generated": -3.2430217266082764,
112
- "rewards/margins": 9.301046371459961,
113
- "rewards/real": 6.0580244064331055,
114
  "step": 60
115
  },
116
  {
117
  "epoch": 0.445859872611465,
118
- "grad_norm": 24.288240218254995,
119
  "learning_rate": 3.085106382978723e-07,
120
- "logits/generated": -3.0595779418945312,
121
- "logits/real": -2.9718518257141113,
122
- "logps/generated": -328.691650390625,
123
- "logps/real": -140.92660522460938,
124
- "loss": 0.333,
125
- "rewards/accuracies": 0.987500011920929,
126
- "rewards/generated": -3.642324447631836,
127
- "rewards/margins": 10.118036270141602,
128
- "rewards/real": 6.47571325302124,
129
  "step": 70
130
  },
131
  {
132
  "epoch": 0.5095541401273885,
133
- "grad_norm": 14.83120429808784,
134
  "learning_rate": 2.730496453900709e-07,
135
- "logits/generated": -2.996634006500244,
136
- "logits/real": -3.0401620864868164,
137
- "logps/generated": -322.6135559082031,
138
- "logps/real": -159.5257110595703,
139
- "loss": 0.3328,
140
- "rewards/accuracies": 0.9624999761581421,
141
- "rewards/generated": -2.347047805786133,
142
- "rewards/margins": 10.133938789367676,
143
- "rewards/real": 7.786890506744385,
144
  "step": 80
145
  },
146
  {
147
  "epoch": 0.5732484076433121,
148
- "grad_norm": 16.65278970025503,
149
  "learning_rate": 2.375886524822695e-07,
150
- "logits/generated": -3.0264060497283936,
151
- "logits/real": -2.979038715362549,
152
- "logps/generated": -360.536376953125,
153
- "logps/real": -162.26622009277344,
154
- "loss": 0.3387,
155
- "rewards/accuracies": 0.987500011920929,
156
- "rewards/generated": -4.7210164070129395,
157
- "rewards/margins": 12.409219741821289,
158
- "rewards/real": 7.68820333480835,
159
  "step": 90
160
  },
161
  {
162
  "epoch": 0.6369426751592356,
163
- "grad_norm": 7.324308799399964,
164
  "learning_rate": 2.0212765957446807e-07,
165
- "logits/generated": -2.968139410018921,
166
- "logits/real": -3.0011098384857178,
167
- "logps/generated": -325.79107666015625,
168
- "logps/real": -162.4396514892578,
169
- "loss": 0.3429,
170
- "rewards/accuracies": 0.987500011920929,
171
- "rewards/generated": -2.580188512802124,
172
- "rewards/margins": 10.546457290649414,
173
- "rewards/real": 7.966268062591553,
174
  "step": 100
175
  },
176
  {
177
  "epoch": 0.6369426751592356,
178
- "eval_logits/generated": -2.6921370029449463,
179
- "eval_logits/real": -2.418954372406006,
180
- "eval_logps/generated": -57.74082565307617,
181
- "eval_logps/real": -16.818851470947266,
182
- "eval_loss": 1.018080234527588,
183
- "eval_rewards/accuracies": 0.0812101885676384,
184
- "eval_rewards/generated": 1.4157419204711914,
185
- "eval_rewards/margins": 0.06545643508434296,
186
- "eval_rewards/real": 1.4811984300613403,
187
- "eval_runtime": 325.124,
188
- "eval_samples_per_second": 15.379,
189
- "eval_steps_per_second": 0.483,
190
  "step": 100
191
  },
192
  {
193
  "epoch": 0.7006369426751592,
194
- "grad_norm": 44.79401383111945,
195
  "learning_rate": 1.6666666666666665e-07,
196
- "logits/generated": -2.966001033782959,
197
- "logits/real": -2.9780120849609375,
198
- "logps/generated": -347.74993896484375,
199
- "logps/real": -158.44979858398438,
200
- "loss": 0.365,
201
- "rewards/accuracies": 0.987500011920929,
202
- "rewards/generated": -3.206289768218994,
203
- "rewards/margins": 11.313484191894531,
204
- "rewards/real": 8.107194900512695,
205
  "step": 110
206
  },
207
  {
208
  "epoch": 0.7643312101910829,
209
- "grad_norm": 7.15542526632935,
210
  "learning_rate": 1.3120567375886523e-07,
211
- "logits/generated": -2.9361348152160645,
212
- "logits/real": -2.991243839263916,
213
- "logps/generated": -340.3627014160156,
214
- "logps/real": -145.40615844726562,
215
- "loss": 0.3243,
216
  "rewards/accuracies": 1.0,
217
- "rewards/generated": -3.4838199615478516,
218
- "rewards/margins": 10.616706848144531,
219
- "rewards/real": 7.132887363433838,
220
  "step": 120
221
  },
222
  {
223
  "epoch": 0.8280254777070064,
224
- "grad_norm": 28.617693401838523,
225
  "learning_rate": 9.574468085106382e-08,
226
- "logits/generated": -2.880622386932373,
227
- "logits/real": -2.873990774154663,
228
- "logps/generated": -284.91949462890625,
229
- "logps/real": -130.95306396484375,
230
- "loss": 0.3022,
231
- "rewards/accuracies": 0.987500011920929,
232
- "rewards/generated": -1.7718982696533203,
233
- "rewards/margins": 8.987956047058105,
234
- "rewards/real": 7.216057777404785,
235
  "step": 130
236
  },
237
  {
238
  "epoch": 0.89171974522293,
239
- "grad_norm": 11.2855453725489,
240
  "learning_rate": 6.02836879432624e-08,
241
- "logits/generated": -2.893979549407959,
242
- "logits/real": -2.919368028640747,
243
- "logps/generated": -334.3547058105469,
244
- "logps/real": -154.1964569091797,
245
- "loss": 0.3389,
246
- "rewards/accuracies": 0.9750000238418579,
247
- "rewards/generated": -3.628894090652466,
248
- "rewards/margins": 11.9329252243042,
249
- "rewards/real": 8.304032325744629,
250
  "step": 140
251
  },
252
  {
253
  "epoch": 0.9554140127388535,
254
- "grad_norm": 31.41907953329688,
255
  "learning_rate": 2.4822695035460993e-08,
256
- "logits/generated": -2.864701747894287,
257
- "logits/real": -2.9138083457946777,
258
- "logps/generated": -315.903564453125,
259
- "logps/real": -138.75643920898438,
260
- "loss": 0.3255,
261
- "rewards/accuracies": 0.987500011920929,
262
- "rewards/generated": -2.897620677947998,
263
- "rewards/margins": 9.965813636779785,
264
- "rewards/real": 7.068192958831787,
265
  "step": 150
266
  },
267
  {
268
  "epoch": 1.0,
269
  "step": 157,
270
  "total_flos": 0.0,
271
- "train_loss": 0.3886139180250229,
272
- "train_runtime": 1518.6555,
273
- "train_samples_per_second": 3.292,
274
- "train_steps_per_second": 0.103
275
  }
276
  ],
277
  "logging_steps": 10,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.006369426751592357,
13
+ "grad_norm": 2514.601414684904,
14
  "learning_rate": 3.125e-08,
15
+ "logits/generated": -2.661752223968506,
16
+ "logits/real": -2.483980894088745,
17
+ "logps/generated": -429.17132568359375,
18
+ "logps/real": -342.051025390625,
19
+ "loss": 1.3612,
20
  "rewards/accuracies": 0.0,
21
  "rewards/generated": 0.0,
22
  "rewards/margins": 0.0,
 
25
  },
26
  {
27
  "epoch": 0.06369426751592357,
28
+ "grad_norm": 0.31574006570864244,
29
  "learning_rate": 3.1249999999999997e-07,
30
+ "logits/generated": -2.668369770050049,
31
+ "logits/real": -2.424091339111328,
32
+ "logps/generated": -450.4893798828125,
33
+ "logps/real": -144.69952392578125,
34
+ "loss": 0.4008,
35
+ "rewards/accuracies": 0.8888888955116272,
36
+ "rewards/generated": -2.1318063735961914,
37
+ "rewards/margins": 21.866958618164062,
38
+ "rewards/real": 19.735153198242188,
39
  "step": 10
40
  },
41
  {
42
  "epoch": 0.12738853503184713,
43
+ "grad_norm": 0.005520241230176221,
44
  "learning_rate": 4.858156028368794e-07,
45
+ "logits/generated": -2.692908763885498,
46
+ "logits/real": -2.329023599624634,
47
+ "logps/generated": -478.9815368652344,
48
+ "logps/real": -0.014682354405522346,
49
+ "loss": 0.0,
50
  "rewards/accuracies": 1.0,
51
+ "rewards/generated": -4.981024742126465,
52
+ "rewards/margins": 39.18465805053711,
53
+ "rewards/real": 34.203636169433594,
54
  "step": 20
55
  },
56
  {
57
  "epoch": 0.1910828025477707,
58
+ "grad_norm": 0.002789639064368334,
59
  "learning_rate": 4.50354609929078e-07,
60
+ "logits/generated": -2.705698013305664,
61
+ "logits/real": -2.342294216156006,
62
+ "logps/generated": -486.1914978027344,
63
+ "logps/real": -0.004650969058275223,
64
+ "loss": 0.0,
65
+ "rewards/accuracies": 1.0,
66
+ "rewards/generated": -5.702020645141602,
67
+ "rewards/margins": 39.90666198730469,
68
+ "rewards/real": 34.20463943481445,
69
  "step": 30
70
  },
71
  {
72
  "epoch": 0.25477707006369427,
73
+ "grad_norm": 0.0019297231936813803,
74
  "learning_rate": 4.148936170212766e-07,
75
+ "logits/generated": -2.7122185230255127,
76
+ "logits/real": -2.348639965057373,
77
+ "logps/generated": -489.88970947265625,
78
+ "logps/real": -0.002834505634382367,
79
+ "loss": 0.0,
80
  "rewards/accuracies": 1.0,
81
+ "rewards/generated": -6.071843147277832,
82
+ "rewards/margins": 40.27666473388672,
83
+ "rewards/real": 34.2048225402832,
84
  "step": 40
85
  },
86
  {
87
  "epoch": 0.3184713375796178,
88
+ "grad_norm": 0.0014726852980064813,
89
  "learning_rate": 3.7943262411347514e-07,
90
+ "logits/generated": -2.71730375289917,
91
+ "logits/real": -2.3547444343566895,
92
+ "logps/generated": -492.354248046875,
93
+ "logps/real": -0.0020735759753733873,
94
+ "loss": 0.0,
95
+ "rewards/accuracies": 1.0,
96
+ "rewards/generated": -6.318298816680908,
97
+ "rewards/margins": 40.523193359375,
98
+ "rewards/real": 34.20489501953125,
99
  "step": 50
100
  },
101
  {
102
  "epoch": 0.3821656050955414,
103
+ "grad_norm": 0.0012108227405915682,
104
  "learning_rate": 3.4397163120567375e-07,
105
+ "logits/generated": -2.7216479778289795,
106
+ "logits/real": -2.359346866607666,
107
+ "logps/generated": -493.62725830078125,
108
+ "logps/real": -0.0016389258671551943,
109
+ "loss": 0.0,
110
+ "rewards/accuracies": 1.0,
111
+ "rewards/generated": -6.445591926574707,
112
+ "rewards/margins": 40.65053176879883,
113
+ "rewards/real": 34.20494079589844,
114
  "step": 60
115
  },
116
  {
117
  "epoch": 0.445859872611465,
118
+ "grad_norm": 0.0010092968797445821,
119
  "learning_rate": 3.085106382978723e-07,
120
+ "logits/generated": -2.7243549823760986,
121
+ "logits/real": -2.3620219230651855,
122
+ "logps/generated": -494.82586669921875,
123
+ "logps/real": -0.0013623478589579463,
124
+ "loss": 0.0,
125
+ "rewards/accuracies": 1.0,
126
+ "rewards/generated": -6.565457820892334,
127
+ "rewards/margins": 40.77042770385742,
128
+ "rewards/real": 34.20496368408203,
129
  "step": 70
130
  },
131
  {
132
  "epoch": 0.5095541401273885,
133
+ "grad_norm": 0.0008924667007579917,
134
  "learning_rate": 2.730496453900709e-07,
135
+ "logits/generated": -2.726926565170288,
136
+ "logits/real": -2.362954616546631,
137
+ "logps/generated": -495.93377685546875,
138
+ "logps/real": -0.0011800352949649096,
139
+ "loss": 0.0,
140
+ "rewards/accuracies": 1.0,
141
+ "rewards/generated": -6.676251411437988,
142
+ "rewards/margins": 40.88123321533203,
143
+ "rewards/real": 34.204986572265625,
144
  "step": 80
145
  },
146
  {
147
  "epoch": 0.5732484076433121,
148
+ "grad_norm": 0.0008155704567801252,
149
  "learning_rate": 2.375886524822695e-07,
150
+ "logits/generated": -2.72874116897583,
151
+ "logits/real": -2.366927146911621,
152
+ "logps/generated": -496.6890563964844,
153
+ "logps/real": -0.0010528427083045244,
154
+ "loss": 0.0,
155
+ "rewards/accuracies": 1.0,
156
+ "rewards/generated": -6.751776218414307,
157
+ "rewards/margins": 40.95677185058594,
158
+ "rewards/real": 34.204994201660156,
159
  "step": 90
160
  },
161
  {
162
  "epoch": 0.6369426751592356,
163
+ "grad_norm": 0.000725474761841383,
164
  "learning_rate": 2.0212765957446807e-07,
165
+ "logits/generated": -2.7310385704040527,
166
+ "logits/real": -2.369218349456787,
167
+ "logps/generated": -497.72625732421875,
168
+ "logps/real": -0.000954283110331744,
169
+ "loss": 0.0,
170
+ "rewards/accuracies": 1.0,
171
+ "rewards/generated": -6.855503082275391,
172
+ "rewards/margins": 41.06051254272461,
173
+ "rewards/real": 34.20500946044922,
174
  "step": 100
175
  },
176
  {
177
  "epoch": 0.6369426751592356,
178
+ "eval_logits/generated": -2.661693572998047,
179
+ "eval_logits/real": -2.609511137008667,
180
+ "eval_logps/generated": -259.66143798828125,
181
+ "eval_logps/real": -210.63931274414062,
182
+ "eval_loss": 0.9619492292404175,
183
+ "eval_rewards/accuracies": 0.890625,
184
+ "eval_rewards/generated": -0.14410093426704407,
185
+ "eval_rewards/margins": 0.3460058867931366,
186
+ "eval_rewards/real": 0.20190495252609253,
187
+ "eval_runtime": 37.6272,
188
+ "eval_samples_per_second": 13.288,
189
+ "eval_steps_per_second": 0.425,
190
  "step": 100
191
  },
192
  {
193
  "epoch": 0.7006369426751592,
194
+ "grad_norm": 0.0006953242722311237,
195
  "learning_rate": 1.6666666666666665e-07,
196
+ "logits/generated": -2.732168436050415,
197
+ "logits/real": -2.370328426361084,
198
+ "logps/generated": -497.7915954589844,
199
+ "logps/real": -0.0008821273222565651,
200
+ "loss": 0.0,
201
+ "rewards/accuracies": 1.0,
202
+ "rewards/generated": -6.86203145980835,
203
+ "rewards/margins": 41.06704330444336,
204
+ "rewards/real": 34.20501708984375,
205
  "step": 110
206
  },
207
  {
208
  "epoch": 0.7643312101910829,
209
+ "grad_norm": 0.0006463359840785572,
210
  "learning_rate": 1.3120567375886523e-07,
211
+ "logits/generated": -2.7338597774505615,
212
+ "logits/real": -2.3714773654937744,
213
+ "logps/generated": -498.8688049316406,
214
+ "logps/real": -0.0008279000176116824,
215
+ "loss": 0.0,
216
  "rewards/accuracies": 1.0,
217
+ "rewards/generated": -6.969751834869385,
218
+ "rewards/margins": 41.174774169921875,
219
+ "rewards/real": 34.205020904541016,
220
  "step": 120
221
  },
222
  {
223
  "epoch": 0.8280254777070064,
224
+ "grad_norm": 0.0006205498614208318,
225
  "learning_rate": 9.574468085106382e-08,
226
+ "logits/generated": -2.7348275184631348,
227
+ "logits/real": -2.3729214668273926,
228
+ "logps/generated": -498.8643493652344,
229
+ "logps/real": -0.0007825180655345321,
230
+ "loss": 0.0,
231
+ "rewards/accuracies": 1.0,
232
+ "rewards/generated": -6.969304084777832,
233
+ "rewards/margins": 41.17433547973633,
234
+ "rewards/real": 34.20502471923828,
235
  "step": 130
236
  },
237
  {
238
  "epoch": 0.89171974522293,
239
+ "grad_norm": 0.0005942730744268325,
240
  "learning_rate": 6.02836879432624e-08,
241
+ "logits/generated": -2.7360432147979736,
242
+ "logits/real": -2.374084949493408,
243
+ "logps/generated": -499.4146423339844,
244
+ "logps/real": -0.0007567574502900243,
245
+ "loss": 0.0,
246
+ "rewards/accuracies": 1.0,
247
+ "rewards/generated": -7.024338722229004,
248
+ "rewards/margins": 41.229366302490234,
249
+ "rewards/real": 34.20502471923828,
250
  "step": 140
251
  },
252
  {
253
  "epoch": 0.9554140127388535,
254
+ "grad_norm": 0.0005956400007297931,
255
  "learning_rate": 2.4822695035460993e-08,
256
+ "logits/generated": -2.73626708984375,
257
+ "logits/real": -2.374666690826416,
258
+ "logps/generated": -499.5492248535156,
259
+ "logps/real": -0.0007398539455607533,
260
+ "loss": 0.0,
261
+ "rewards/accuracies": 1.0,
262
+ "rewards/generated": -7.0377936363220215,
263
+ "rewards/margins": 41.242820739746094,
264
+ "rewards/real": 34.20502471923828,
265
  "step": 150
266
  },
267
  {
268
  "epoch": 1.0,
269
  "step": 157,
270
  "total_flos": 0.0,
271
+ "train_loss": 0.03164779691052703,
272
+ "train_runtime": 1178.5993,
273
+ "train_samples_per_second": 4.242,
274
+ "train_steps_per_second": 0.133
275
  }
276
  ],
277
  "logging_steps": 10,