yiran-wang3 commited on
Commit
e8dd434
1 Parent(s): 226a56c

End of training

Browse files
README.md ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ license: apache-2.0
4
+ base_model: yiran-wang3/qwen1_chat_adamw_iter7
5
+ tags:
6
+ - alignment-handbook
7
+ - generated_from_trainer
8
+ - trl
9
+ - dpo
10
+ datasets:
11
+ - self-generate/qw1_sppo_hard_new_cn_mining_oj_iter7-binarized
12
+ model-index:
13
+ - name: qwen1_chat_adamw_iter8
14
+ results: []
15
+ ---
16
+
17
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
18
+ should probably proofread and complete it, then remove this comment. -->
19
+
20
+ # qwen1_chat_adamw_iter8
21
+
22
+ This model is a fine-tuned version of [yiran-wang3/qwen1_chat_adamw_iter7](https://huggingface.co/yiran-wang3/qwen1_chat_adamw_iter7) on the self-generate/qw1_sppo_hard_new_cn_mining_oj_iter7-binarized dataset.
23
+
24
+ ## Model description
25
+
26
+ More information needed
27
+
28
+ ## Intended uses & limitations
29
+
30
+ More information needed
31
+
32
+ ## Training and evaluation data
33
+
34
+ More information needed
35
+
36
+ ## Training procedure
37
+
38
+ ### Training hyperparameters
39
+
40
+ The following hyperparameters were used during training:
41
+ - learning_rate: 1e-06
42
+ - train_batch_size: 8
43
+ - eval_batch_size: 4
44
+ - seed: 42
45
+ - distributed_type: multi-GPU
46
+ - num_devices: 8
47
+ - total_train_batch_size: 64
48
+ - total_eval_batch_size: 32
49
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
50
+ - lr_scheduler_type: constant
51
+ - lr_scheduler_warmup_ratio: 0.1
52
+ - lr_scheduler_warmup_steps: 100
53
+ - num_epochs: 1.0
54
+
55
+ ### Training results
56
+
57
+
58
+
59
+ ### Framework versions
60
+
61
+ - Transformers 4.45.0
62
+ - Pytorch 2.4.0+cu121
63
+ - Datasets 2.14.6
64
+ - Tokenizers 0.20.1
all_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "total_flos": 0.0,
4
+ "train_loss": 0.4953597542401907,
5
+ "train_runtime": 139.3771,
6
+ "train_samples": 2355,
7
+ "train_samples_per_second": 16.897,
8
+ "train_steps_per_second": 0.265
9
+ }
config.json CHANGED
@@ -23,7 +23,7 @@
23
  "tie_word_embeddings": true,
24
  "torch_dtype": "bfloat16",
25
  "transformers_version": "4.45.0",
26
- "use_cache": false,
27
  "use_sliding_window": false,
28
  "vocab_size": 151936
29
  }
 
23
  "tie_word_embeddings": true,
24
  "torch_dtype": "bfloat16",
25
  "transformers_version": "4.45.0",
26
+ "use_cache": true,
27
  "use_sliding_window": false,
28
  "vocab_size": 151936
29
  }
generation_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 151645,
6
+ 151643
7
+ ],
8
+ "pad_token_id": 151643,
9
+ "repetition_penalty": 1.1,
10
+ "temperature": 0.7,
11
+ "top_k": 20,
12
+ "top_p": 0.8,
13
+ "transformers_version": "4.45.0"
14
+ }
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "total_flos": 0.0,
4
+ "train_loss": 0.4953597542401907,
5
+ "train_runtime": 139.3771,
6
+ "train_samples": 2355,
7
+ "train_samples_per_second": 16.897,
8
+ "train_steps_per_second": 0.265
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,819 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 100,
6
+ "global_step": 37,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "debug/policy_chosen_logits": -0.8575258255004883,
13
+ "debug/policy_chosen_logps": -145.75564575195312,
14
+ "debug/policy_rejected_logits": -0.7912808060646057,
15
+ "debug/policy_rejected_logps": -155.6038818359375,
16
+ "debug/reference_chosen_logps": -145.75564575195312,
17
+ "debug/reference_rejected_logps": -155.6038818359375,
18
+ "epoch": 0.02702702702702703,
19
+ "grad_norm": 10.590267262290878,
20
+ "learning_rate": 1e-06,
21
+ "logits/chosen": -0.8575258255004883,
22
+ "logits/rejected": -0.7912808060646057,
23
+ "logps/chosen": -145.75564575195312,
24
+ "logps/rejected": -155.6038818359375,
25
+ "loss": 0.5,
26
+ "rewards/accuracies": 0.0,
27
+ "rewards/chosen": 0.0,
28
+ "rewards/margins": 0.0,
29
+ "rewards/rejected": 0.0,
30
+ "step": 1
31
+ },
32
+ {
33
+ "debug/policy_chosen_logits": -0.7469339966773987,
34
+ "debug/policy_chosen_logps": -161.53433227539062,
35
+ "debug/policy_rejected_logits": -0.7776355147361755,
36
+ "debug/policy_rejected_logps": -170.75521850585938,
37
+ "debug/reference_chosen_logps": -161.53152465820312,
38
+ "debug/reference_rejected_logps": -170.4493408203125,
39
+ "epoch": 0.05405405405405406,
40
+ "grad_norm": 10.000972104848039,
41
+ "learning_rate": 1e-06,
42
+ "logits/chosen": -0.7469339966773987,
43
+ "logits/rejected": -0.7776355147361755,
44
+ "logps/chosen": -161.53433227539062,
45
+ "logps/rejected": -170.75521850585938,
46
+ "loss": 0.4999,
47
+ "rewards/accuracies": 0.875,
48
+ "rewards/chosen": -2.8095149900764227e-05,
49
+ "rewards/margins": 0.0030306337866932154,
50
+ "rewards/rejected": -0.0030587289948016405,
51
+ "step": 2
52
+ },
53
+ {
54
+ "debug/policy_chosen_logits": -0.8981359004974365,
55
+ "debug/policy_chosen_logps": -176.5548095703125,
56
+ "debug/policy_rejected_logits": -0.8165794014930725,
57
+ "debug/policy_rejected_logps": -174.51219177246094,
58
+ "debug/reference_chosen_logps": -176.45700073242188,
59
+ "debug/reference_rejected_logps": -174.48855590820312,
60
+ "epoch": 0.08108108108108109,
61
+ "grad_norm": 11.901363041409127,
62
+ "learning_rate": 1e-06,
63
+ "logits/chosen": -0.8981359004974365,
64
+ "logits/rejected": -0.8165794014930725,
65
+ "logps/chosen": -176.5548095703125,
66
+ "logps/rejected": -174.51219177246094,
67
+ "loss": 0.4991,
68
+ "rewards/accuracies": 0.5,
69
+ "rewards/chosen": -0.0009781360859051347,
70
+ "rewards/margins": -0.000741882249712944,
71
+ "rewards/rejected": -0.00023625371977686882,
72
+ "step": 3
73
+ },
74
+ {
75
+ "debug/policy_chosen_logits": -0.9096183776855469,
76
+ "debug/policy_chosen_logps": -166.55648803710938,
77
+ "debug/policy_rejected_logits": -0.9066538214683533,
78
+ "debug/policy_rejected_logps": -164.07363891601562,
79
+ "debug/reference_chosen_logps": -166.54132080078125,
80
+ "debug/reference_rejected_logps": -164.5047607421875,
81
+ "epoch": 0.10810810810810811,
82
+ "grad_norm": 11.906594144254457,
83
+ "learning_rate": 1e-06,
84
+ "logits/chosen": -0.9096183776855469,
85
+ "logits/rejected": -0.9066538214683533,
86
+ "logps/chosen": -166.55648803710938,
87
+ "logps/rejected": -164.07363891601562,
88
+ "loss": 0.4985,
89
+ "rewards/accuracies": 0.5,
90
+ "rewards/chosen": -0.00015165336662903428,
91
+ "rewards/margins": -0.004462923854589462,
92
+ "rewards/rejected": 0.004311270546168089,
93
+ "step": 4
94
+ },
95
+ {
96
+ "debug/policy_chosen_logits": -0.7255207896232605,
97
+ "debug/policy_chosen_logps": -197.11146545410156,
98
+ "debug/policy_rejected_logits": -0.6460945010185242,
99
+ "debug/policy_rejected_logps": -199.1307830810547,
100
+ "debug/reference_chosen_logps": -196.3956298828125,
101
+ "debug/reference_rejected_logps": -198.6553192138672,
102
+ "epoch": 0.13513513513513514,
103
+ "grad_norm": 12.022558068888802,
104
+ "learning_rate": 1e-06,
105
+ "logits/chosen": -0.7255207896232605,
106
+ "logits/rejected": -0.6460945010185242,
107
+ "logps/chosen": -197.11146545410156,
108
+ "logps/rejected": -199.1307830810547,
109
+ "loss": 0.4968,
110
+ "rewards/accuracies": 0.375,
111
+ "rewards/chosen": -0.007158536929637194,
112
+ "rewards/margins": -0.00240384042263031,
113
+ "rewards/rejected": -0.004754695575684309,
114
+ "step": 5
115
+ },
116
+ {
117
+ "debug/policy_chosen_logits": -0.9493230581283569,
118
+ "debug/policy_chosen_logps": -129.1173553466797,
119
+ "debug/policy_rejected_logits": -0.8156340718269348,
120
+ "debug/policy_rejected_logps": -164.30892944335938,
121
+ "debug/reference_chosen_logps": -127.74217987060547,
122
+ "debug/reference_rejected_logps": -162.74935913085938,
123
+ "epoch": 0.16216216216216217,
124
+ "grad_norm": 12.824448923481688,
125
+ "learning_rate": 1e-06,
126
+ "logits/chosen": -0.9493230581283569,
127
+ "logits/rejected": -0.8156340718269348,
128
+ "logps/chosen": -129.1173553466797,
129
+ "logps/rejected": -164.30892944335938,
130
+ "loss": 0.5049,
131
+ "rewards/accuracies": 0.625,
132
+ "rewards/chosen": -0.013751697726547718,
133
+ "rewards/margins": 0.0018438897095620632,
134
+ "rewards/rejected": -0.015595588833093643,
135
+ "step": 6
136
+ },
137
+ {
138
+ "debug/policy_chosen_logits": -0.7272260785102844,
139
+ "debug/policy_chosen_logps": -184.9543914794922,
140
+ "debug/policy_rejected_logits": -0.6780456900596619,
141
+ "debug/policy_rejected_logps": -187.98831176757812,
142
+ "debug/reference_chosen_logps": -184.20895385742188,
143
+ "debug/reference_rejected_logps": -188.1205291748047,
144
+ "epoch": 0.1891891891891892,
145
+ "grad_norm": 12.347680907504191,
146
+ "learning_rate": 1e-06,
147
+ "logits/chosen": -0.7272260785102844,
148
+ "logits/rejected": -0.6780456900596619,
149
+ "logps/chosen": -184.9543914794922,
150
+ "logps/rejected": -187.98831176757812,
151
+ "loss": 0.4986,
152
+ "rewards/accuracies": 0.375,
153
+ "rewards/chosen": -0.007454394828528166,
154
+ "rewards/margins": -0.008776684291660786,
155
+ "rewards/rejected": 0.0013222885318100452,
156
+ "step": 7
157
+ },
158
+ {
159
+ "debug/policy_chosen_logits": -0.8364517688751221,
160
+ "debug/policy_chosen_logps": -189.10472106933594,
161
+ "debug/policy_rejected_logits": -0.7381677627563477,
162
+ "debug/policy_rejected_logps": -188.10775756835938,
163
+ "debug/reference_chosen_logps": -187.6288299560547,
164
+ "debug/reference_rejected_logps": -188.3019256591797,
165
+ "epoch": 0.21621621621621623,
166
+ "grad_norm": 12.404458206833132,
167
+ "learning_rate": 1e-06,
168
+ "logits/chosen": -0.8364517688751221,
169
+ "logits/rejected": -0.7381677627563477,
170
+ "logps/chosen": -189.10472106933594,
171
+ "logps/rejected": -188.10775756835938,
172
+ "loss": 0.4937,
173
+ "rewards/accuracies": 0.375,
174
+ "rewards/chosen": -0.014758807606995106,
175
+ "rewards/margins": -0.016700536012649536,
176
+ "rewards/rejected": 0.0019417284056544304,
177
+ "step": 8
178
+ },
179
+ {
180
+ "debug/policy_chosen_logits": -0.9120941758155823,
181
+ "debug/policy_chosen_logps": -163.34133911132812,
182
+ "debug/policy_rejected_logits": -0.976243257522583,
183
+ "debug/policy_rejected_logps": -167.19322204589844,
184
+ "debug/reference_chosen_logps": -161.68980407714844,
185
+ "debug/reference_rejected_logps": -164.67279052734375,
186
+ "epoch": 0.24324324324324326,
187
+ "grad_norm": 12.727135168469278,
188
+ "learning_rate": 1e-06,
189
+ "logits/chosen": -0.9120941758155823,
190
+ "logits/rejected": -0.976243257522583,
191
+ "logps/chosen": -163.34133911132812,
192
+ "logps/rejected": -167.19322204589844,
193
+ "loss": 0.4976,
194
+ "rewards/accuracies": 0.5,
195
+ "rewards/chosen": -0.0165153406560421,
196
+ "rewards/margins": 0.008688842877745628,
197
+ "rewards/rejected": -0.025204181671142578,
198
+ "step": 9
199
+ },
200
+ {
201
+ "debug/policy_chosen_logits": -0.760608434677124,
202
+ "debug/policy_chosen_logps": -172.08404541015625,
203
+ "debug/policy_rejected_logits": -0.7949759364128113,
204
+ "debug/policy_rejected_logps": -178.0618133544922,
205
+ "debug/reference_chosen_logps": -171.69947814941406,
206
+ "debug/reference_rejected_logps": -175.53970336914062,
207
+ "epoch": 0.2702702702702703,
208
+ "grad_norm": 12.198398917852398,
209
+ "learning_rate": 1e-06,
210
+ "logits/chosen": -0.760608434677124,
211
+ "logits/rejected": -0.7949759364128113,
212
+ "logps/chosen": -172.08404541015625,
213
+ "logps/rejected": -178.0618133544922,
214
+ "loss": 0.49,
215
+ "rewards/accuracies": 0.875,
216
+ "rewards/chosen": -0.003845730097964406,
217
+ "rewards/margins": 0.021375417709350586,
218
+ "rewards/rejected": -0.025221146643161774,
219
+ "step": 10
220
+ },
221
+ {
222
+ "debug/policy_chosen_logits": -0.7727924585342407,
223
+ "debug/policy_chosen_logps": -174.89260864257812,
224
+ "debug/policy_rejected_logits": -0.8414075970649719,
225
+ "debug/policy_rejected_logps": -191.7485809326172,
226
+ "debug/reference_chosen_logps": -174.308349609375,
227
+ "debug/reference_rejected_logps": -189.44308471679688,
228
+ "epoch": 0.2972972972972973,
229
+ "grad_norm": 12.869944279488237,
230
+ "learning_rate": 1e-06,
231
+ "logits/chosen": -0.7727924585342407,
232
+ "logits/rejected": -0.8414075970649719,
233
+ "logps/chosen": -174.89260864257812,
234
+ "logps/rejected": -191.7485809326172,
235
+ "loss": 0.4961,
236
+ "rewards/accuracies": 0.75,
237
+ "rewards/chosen": -0.005842561833560467,
238
+ "rewards/margins": 0.0172123983502388,
239
+ "rewards/rejected": -0.02305496111512184,
240
+ "step": 11
241
+ },
242
+ {
243
+ "debug/policy_chosen_logits": -0.9232946038246155,
244
+ "debug/policy_chosen_logps": -172.6126708984375,
245
+ "debug/policy_rejected_logits": -0.8749657273292542,
246
+ "debug/policy_rejected_logps": -176.91111755371094,
247
+ "debug/reference_chosen_logps": -170.90977478027344,
248
+ "debug/reference_rejected_logps": -176.2742156982422,
249
+ "epoch": 0.32432432432432434,
250
+ "grad_norm": 13.637899725211922,
251
+ "learning_rate": 1e-06,
252
+ "logits/chosen": -0.9232946038246155,
253
+ "logits/rejected": -0.8749657273292542,
254
+ "logps/chosen": -172.6126708984375,
255
+ "logps/rejected": -176.91111755371094,
256
+ "loss": 0.5005,
257
+ "rewards/accuracies": 0.625,
258
+ "rewards/chosen": -0.017028970643877983,
259
+ "rewards/margins": -0.010659895837306976,
260
+ "rewards/rejected": -0.006369075272232294,
261
+ "step": 12
262
+ },
263
+ {
264
+ "debug/policy_chosen_logits": -0.7241289615631104,
265
+ "debug/policy_chosen_logps": -170.33370971679688,
266
+ "debug/policy_rejected_logits": -0.7508945465087891,
267
+ "debug/policy_rejected_logps": -174.8129119873047,
268
+ "debug/reference_chosen_logps": -169.210205078125,
269
+ "debug/reference_rejected_logps": -175.19602966308594,
270
+ "epoch": 0.35135135135135137,
271
+ "grad_norm": 12.678047953411022,
272
+ "learning_rate": 1e-06,
273
+ "logits/chosen": -0.7241289615631104,
274
+ "logits/rejected": -0.7508945465087891,
275
+ "logps/chosen": -170.33370971679688,
276
+ "logps/rejected": -174.8129119873047,
277
+ "loss": 0.4867,
278
+ "rewards/accuracies": 0.125,
279
+ "rewards/chosen": -0.011235074140131474,
280
+ "rewards/margins": -0.015066290274262428,
281
+ "rewards/rejected": 0.0038312142714858055,
282
+ "step": 13
283
+ },
284
+ {
285
+ "debug/policy_chosen_logits": -0.9043007493019104,
286
+ "debug/policy_chosen_logps": -165.7388916015625,
287
+ "debug/policy_rejected_logits": -0.8851659893989563,
288
+ "debug/policy_rejected_logps": -185.2967529296875,
289
+ "debug/reference_chosen_logps": -169.01324462890625,
290
+ "debug/reference_rejected_logps": -184.21755981445312,
291
+ "epoch": 0.3783783783783784,
292
+ "grad_norm": 14.228799593284224,
293
+ "learning_rate": 1e-06,
294
+ "logits/chosen": -0.9043007493019104,
295
+ "logits/rejected": -0.8851659893989563,
296
+ "logps/chosen": -165.7388916015625,
297
+ "logps/rejected": -185.2967529296875,
298
+ "loss": 0.4838,
299
+ "rewards/accuracies": 0.875,
300
+ "rewards/chosen": 0.032743629068136215,
301
+ "rewards/margins": 0.04353557527065277,
302
+ "rewards/rejected": -0.010791949927806854,
303
+ "step": 14
304
+ },
305
+ {
306
+ "debug/policy_chosen_logits": -0.8794234991073608,
307
+ "debug/policy_chosen_logps": -190.51846313476562,
308
+ "debug/policy_rejected_logits": -0.9416622519493103,
309
+ "debug/policy_rejected_logps": -182.75303649902344,
310
+ "debug/reference_chosen_logps": -191.22064208984375,
311
+ "debug/reference_rejected_logps": -180.401611328125,
312
+ "epoch": 0.40540540540540543,
313
+ "grad_norm": 13.867700853582742,
314
+ "learning_rate": 1e-06,
315
+ "logits/chosen": -0.8794234991073608,
316
+ "logits/rejected": -0.9416622519493103,
317
+ "logps/chosen": -190.51846313476562,
318
+ "logps/rejected": -182.75303649902344,
319
+ "loss": 0.5002,
320
+ "rewards/accuracies": 0.875,
321
+ "rewards/chosen": 0.0070218658074736595,
322
+ "rewards/margins": 0.03053615428507328,
323
+ "rewards/rejected": -0.023514289408922195,
324
+ "step": 15
325
+ },
326
+ {
327
+ "debug/policy_chosen_logits": -0.9163352251052856,
328
+ "debug/policy_chosen_logps": -161.2454833984375,
329
+ "debug/policy_rejected_logits": -0.8506691455841064,
330
+ "debug/policy_rejected_logps": -174.76438903808594,
331
+ "debug/reference_chosen_logps": -164.75534057617188,
332
+ "debug/reference_rejected_logps": -172.7283172607422,
333
+ "epoch": 0.43243243243243246,
334
+ "grad_norm": 15.950956103697601,
335
+ "learning_rate": 1e-06,
336
+ "logits/chosen": -0.9163352251052856,
337
+ "logits/rejected": -0.8506691455841064,
338
+ "logps/chosen": -161.2454833984375,
339
+ "logps/rejected": -174.76438903808594,
340
+ "loss": 0.4953,
341
+ "rewards/accuracies": 0.75,
342
+ "rewards/chosen": 0.035098638385534286,
343
+ "rewards/margins": 0.055459294468164444,
344
+ "rewards/rejected": -0.020360659807920456,
345
+ "step": 16
346
+ },
347
+ {
348
+ "debug/policy_chosen_logits": -0.886226236820221,
349
+ "debug/policy_chosen_logps": -152.4619598388672,
350
+ "debug/policy_rejected_logits": -0.8994572162628174,
351
+ "debug/policy_rejected_logps": -186.78121948242188,
352
+ "debug/reference_chosen_logps": -153.10696411132812,
353
+ "debug/reference_rejected_logps": -188.9115447998047,
354
+ "epoch": 0.4594594594594595,
355
+ "grad_norm": 14.555594379538805,
356
+ "learning_rate": 1e-06,
357
+ "logits/chosen": -0.886226236820221,
358
+ "logits/rejected": -0.8994572162628174,
359
+ "logps/chosen": -152.4619598388672,
360
+ "logps/rejected": -186.78121948242188,
361
+ "loss": 0.4997,
362
+ "rewards/accuracies": 0.375,
363
+ "rewards/chosen": 0.006449948064982891,
364
+ "rewards/margins": -0.014853332191705704,
365
+ "rewards/rejected": 0.02130328118801117,
366
+ "step": 17
367
+ },
368
+ {
369
+ "debug/policy_chosen_logits": -0.791405200958252,
370
+ "debug/policy_chosen_logps": -184.97439575195312,
371
+ "debug/policy_rejected_logits": -0.7767256498336792,
372
+ "debug/policy_rejected_logps": -168.33358764648438,
373
+ "debug/reference_chosen_logps": -185.62191772460938,
374
+ "debug/reference_rejected_logps": -161.90869140625,
375
+ "epoch": 0.4864864864864865,
376
+ "grad_norm": 17.299703458305174,
377
+ "learning_rate": 1e-06,
378
+ "logits/chosen": -0.791405200958252,
379
+ "logits/rejected": -0.7767256498336792,
380
+ "logps/chosen": -184.97439575195312,
381
+ "logps/rejected": -168.33358764648438,
382
+ "loss": 0.4931,
383
+ "rewards/accuracies": 0.75,
384
+ "rewards/chosen": 0.006475199945271015,
385
+ "rewards/margins": 0.0707239881157875,
386
+ "rewards/rejected": -0.06424878537654877,
387
+ "step": 18
388
+ },
389
+ {
390
+ "debug/policy_chosen_logits": -0.8800061345100403,
391
+ "debug/policy_chosen_logps": -148.43475341796875,
392
+ "debug/policy_rejected_logits": -0.9134210348129272,
393
+ "debug/policy_rejected_logps": -142.7111358642578,
394
+ "debug/reference_chosen_logps": -149.74551391601562,
395
+ "debug/reference_rejected_logps": -140.6129150390625,
396
+ "epoch": 0.5135135135135135,
397
+ "grad_norm": 15.389494933483391,
398
+ "learning_rate": 1e-06,
399
+ "logits/chosen": -0.8800061345100403,
400
+ "logits/rejected": -0.9134210348129272,
401
+ "logps/chosen": -148.43475341796875,
402
+ "logps/rejected": -142.7111358642578,
403
+ "loss": 0.4953,
404
+ "rewards/accuracies": 0.75,
405
+ "rewards/chosen": 0.013107641600072384,
406
+ "rewards/margins": 0.03408981114625931,
407
+ "rewards/rejected": -0.02098216861486435,
408
+ "step": 19
409
+ },
410
+ {
411
+ "debug/policy_chosen_logits": -0.95091712474823,
412
+ "debug/policy_chosen_logps": -152.49203491210938,
413
+ "debug/policy_rejected_logits": -0.8285200595855713,
414
+ "debug/policy_rejected_logps": -152.87184143066406,
415
+ "debug/reference_chosen_logps": -151.83273315429688,
416
+ "debug/reference_rejected_logps": -152.75054931640625,
417
+ "epoch": 0.5405405405405406,
418
+ "grad_norm": 13.903822325437991,
419
+ "learning_rate": 1e-06,
420
+ "logits/chosen": -0.95091712474823,
421
+ "logits/rejected": -0.8285200595855713,
422
+ "logps/chosen": -152.49203491210938,
423
+ "logps/rejected": -152.87184143066406,
424
+ "loss": 0.5003,
425
+ "rewards/accuracies": 0.5,
426
+ "rewards/chosen": -0.006593028549104929,
427
+ "rewards/margins": -0.005380069836974144,
428
+ "rewards/rejected": -0.0012129591777920723,
429
+ "step": 20
430
+ },
431
+ {
432
+ "debug/policy_chosen_logits": -0.8724645972251892,
433
+ "debug/policy_chosen_logps": -142.1439666748047,
434
+ "debug/policy_rejected_logits": -0.7575433850288391,
435
+ "debug/policy_rejected_logps": -175.90309143066406,
436
+ "debug/reference_chosen_logps": -140.2176513671875,
437
+ "debug/reference_rejected_logps": -174.95127868652344,
438
+ "epoch": 0.5675675675675675,
439
+ "grad_norm": 14.060737834168805,
440
+ "learning_rate": 1e-06,
441
+ "logits/chosen": -0.8724645972251892,
442
+ "logits/rejected": -0.7575433850288391,
443
+ "logps/chosen": -142.1439666748047,
444
+ "logps/rejected": -175.90309143066406,
445
+ "loss": 0.4882,
446
+ "rewards/accuracies": 0.5,
447
+ "rewards/chosen": -0.01926323026418686,
448
+ "rewards/margins": -0.00974507350474596,
449
+ "rewards/rejected": -0.009518155828118324,
450
+ "step": 21
451
+ },
452
+ {
453
+ "debug/policy_chosen_logits": -1.0412858724594116,
454
+ "debug/policy_chosen_logps": -135.55929565429688,
455
+ "debug/policy_rejected_logits": -1.00175142288208,
456
+ "debug/policy_rejected_logps": -188.136962890625,
457
+ "debug/reference_chosen_logps": -135.7158203125,
458
+ "debug/reference_rejected_logps": -186.81320190429688,
459
+ "epoch": 0.5945945945945946,
460
+ "grad_norm": 14.183550914621547,
461
+ "learning_rate": 1e-06,
462
+ "logits/chosen": -1.0412858724594116,
463
+ "logits/rejected": -1.00175142288208,
464
+ "logps/chosen": -135.55929565429688,
465
+ "logps/rejected": -188.136962890625,
466
+ "loss": 0.4911,
467
+ "rewards/accuracies": 0.625,
468
+ "rewards/chosen": 0.0015652086585760117,
469
+ "rewards/margins": 0.014802752062678337,
470
+ "rewards/rejected": -0.013237543404102325,
471
+ "step": 22
472
+ },
473
+ {
474
+ "debug/policy_chosen_logits": -0.8892878293991089,
475
+ "debug/policy_chosen_logps": -152.1015625,
476
+ "debug/policy_rejected_logits": -0.7090870141983032,
477
+ "debug/policy_rejected_logps": -186.6959686279297,
478
+ "debug/reference_chosen_logps": -147.5272216796875,
479
+ "debug/reference_rejected_logps": -184.30201721191406,
480
+ "epoch": 0.6216216216216216,
481
+ "grad_norm": 14.276125809311413,
482
+ "learning_rate": 1e-06,
483
+ "logits/chosen": -0.8892878293991089,
484
+ "logits/rejected": -0.7090870141983032,
485
+ "logps/chosen": -152.1015625,
486
+ "logps/rejected": -186.6959686279297,
487
+ "loss": 0.5004,
488
+ "rewards/accuracies": 0.5,
489
+ "rewards/chosen": -0.04574331268668175,
490
+ "rewards/margins": -0.021803725510835648,
491
+ "rewards/rejected": -0.0239395871758461,
492
+ "step": 23
493
+ },
494
+ {
495
+ "debug/policy_chosen_logits": -0.8802600502967834,
496
+ "debug/policy_chosen_logps": -159.21139526367188,
497
+ "debug/policy_rejected_logits": -0.9364652037620544,
498
+ "debug/policy_rejected_logps": -164.08383178710938,
499
+ "debug/reference_chosen_logps": -155.6630859375,
500
+ "debug/reference_rejected_logps": -161.748291015625,
501
+ "epoch": 0.6486486486486487,
502
+ "grad_norm": 15.603022958174197,
503
+ "learning_rate": 1e-06,
504
+ "logits/chosen": -0.8802600502967834,
505
+ "logits/rejected": -0.9364652037620544,
506
+ "logps/chosen": -159.21139526367188,
507
+ "logps/rejected": -164.08383178710938,
508
+ "loss": 0.4987,
509
+ "rewards/accuracies": 0.375,
510
+ "rewards/chosen": -0.035483140498399734,
511
+ "rewards/margins": -0.012127798981964588,
512
+ "rewards/rejected": -0.02335534058511257,
513
+ "step": 24
514
+ },
515
+ {
516
+ "debug/policy_chosen_logits": -0.9097840785980225,
517
+ "debug/policy_chosen_logps": -161.55003356933594,
518
+ "debug/policy_rejected_logits": -0.8685249090194702,
519
+ "debug/policy_rejected_logps": -179.54681396484375,
520
+ "debug/reference_chosen_logps": -162.00999450683594,
521
+ "debug/reference_rejected_logps": -181.08807373046875,
522
+ "epoch": 0.6756756756756757,
523
+ "grad_norm": 14.622466903613363,
524
+ "learning_rate": 1e-06,
525
+ "logits/chosen": -0.9097840785980225,
526
+ "logits/rejected": -0.8685249090194702,
527
+ "logps/chosen": -161.55003356933594,
528
+ "logps/rejected": -179.54681396484375,
529
+ "loss": 0.5001,
530
+ "rewards/accuracies": 0.5,
531
+ "rewards/chosen": 0.004599475301802158,
532
+ "rewards/margins": -0.010813076049089432,
533
+ "rewards/rejected": 0.015412550419569016,
534
+ "step": 25
535
+ },
536
+ {
537
+ "debug/policy_chosen_logits": -0.8982373476028442,
538
+ "debug/policy_chosen_logps": -155.50433349609375,
539
+ "debug/policy_rejected_logits": -0.8736176490783691,
540
+ "debug/policy_rejected_logps": -176.95404052734375,
541
+ "debug/reference_chosen_logps": -155.57803344726562,
542
+ "debug/reference_rejected_logps": -176.65240478515625,
543
+ "epoch": 0.7027027027027027,
544
+ "grad_norm": 14.148910548336614,
545
+ "learning_rate": 1e-06,
546
+ "logits/chosen": -0.8982373476028442,
547
+ "logits/rejected": -0.8736176490783691,
548
+ "logps/chosen": -155.50433349609375,
549
+ "logps/rejected": -176.95404052734375,
550
+ "loss": 0.5059,
551
+ "rewards/accuracies": 0.625,
552
+ "rewards/chosen": 0.0007370477542281151,
553
+ "rewards/margins": 0.003753413911908865,
554
+ "rewards/rejected": -0.003016366856172681,
555
+ "step": 26
556
+ },
557
+ {
558
+ "debug/policy_chosen_logits": -0.9832875728607178,
559
+ "debug/policy_chosen_logps": -159.749267578125,
560
+ "debug/policy_rejected_logits": -0.9118414521217346,
561
+ "debug/policy_rejected_logps": -151.69076538085938,
562
+ "debug/reference_chosen_logps": -162.98480224609375,
563
+ "debug/reference_rejected_logps": -154.2532501220703,
564
+ "epoch": 0.7297297297297297,
565
+ "grad_norm": 15.17612475288469,
566
+ "learning_rate": 1e-06,
567
+ "logits/chosen": -0.9832875728607178,
568
+ "logits/rejected": -0.9118414521217346,
569
+ "logps/chosen": -159.749267578125,
570
+ "logps/rejected": -151.69076538085938,
571
+ "loss": 0.5099,
572
+ "rewards/accuracies": 0.375,
573
+ "rewards/chosen": 0.03235547989606857,
574
+ "rewards/margins": 0.006730623543262482,
575
+ "rewards/rejected": 0.02562485635280609,
576
+ "step": 27
577
+ },
578
+ {
579
+ "debug/policy_chosen_logits": -0.8673918843269348,
580
+ "debug/policy_chosen_logps": -147.43087768554688,
581
+ "debug/policy_rejected_logits": -0.7803842425346375,
582
+ "debug/policy_rejected_logps": -188.6826629638672,
583
+ "debug/reference_chosen_logps": -148.56741333007812,
584
+ "debug/reference_rejected_logps": -186.77401733398438,
585
+ "epoch": 0.7567567567567568,
586
+ "grad_norm": 13.774005492067339,
587
+ "learning_rate": 1e-06,
588
+ "logits/chosen": -0.8673918843269348,
589
+ "logits/rejected": -0.7803842425346375,
590
+ "logps/chosen": -147.43087768554688,
591
+ "logps/rejected": -188.6826629638672,
592
+ "loss": 0.4971,
593
+ "rewards/accuracies": 0.5,
594
+ "rewards/chosen": 0.01136524323374033,
595
+ "rewards/margins": 0.030451610684394836,
596
+ "rewards/rejected": -0.01908636838197708,
597
+ "step": 28
598
+ },
599
+ {
600
+ "debug/policy_chosen_logits": -0.8602281808853149,
601
+ "debug/policy_chosen_logps": -153.77182006835938,
602
+ "debug/policy_rejected_logits": -0.8865491151809692,
603
+ "debug/policy_rejected_logps": -184.97218322753906,
604
+ "debug/reference_chosen_logps": -154.79705810546875,
605
+ "debug/reference_rejected_logps": -183.60633850097656,
606
+ "epoch": 0.7837837837837838,
607
+ "grad_norm": 15.257213384975808,
608
+ "learning_rate": 1e-06,
609
+ "logits/chosen": -0.8602281808853149,
610
+ "logits/rejected": -0.8865491151809692,
611
+ "logps/chosen": -153.77182006835938,
612
+ "logps/rejected": -184.97218322753906,
613
+ "loss": 0.5138,
614
+ "rewards/accuracies": 0.625,
615
+ "rewards/chosen": 0.01025250181555748,
616
+ "rewards/margins": 0.02391086146235466,
617
+ "rewards/rejected": -0.01365836150944233,
618
+ "step": 29
619
+ },
620
+ {
621
+ "debug/policy_chosen_logits": -0.9129707217216492,
622
+ "debug/policy_chosen_logps": -184.99423217773438,
623
+ "debug/policy_rejected_logits": -1.0919617414474487,
624
+ "debug/policy_rejected_logps": -141.35853576660156,
625
+ "debug/reference_chosen_logps": -188.018798828125,
626
+ "debug/reference_rejected_logps": -136.7606201171875,
627
+ "epoch": 0.8108108108108109,
628
+ "grad_norm": 13.614612194522516,
629
+ "learning_rate": 1e-06,
630
+ "logits/chosen": -0.9129707217216492,
631
+ "logits/rejected": -1.0919617414474487,
632
+ "logps/chosen": -184.99423217773438,
633
+ "logps/rejected": -141.35853576660156,
634
+ "loss": 0.4793,
635
+ "rewards/accuracies": 0.75,
636
+ "rewards/chosen": 0.030245695263147354,
637
+ "rewards/margins": 0.07622484117746353,
638
+ "rewards/rejected": -0.04597914591431618,
639
+ "step": 30
640
+ },
641
+ {
642
+ "debug/policy_chosen_logits": -0.7870268225669861,
643
+ "debug/policy_chosen_logps": -157.5154266357422,
644
+ "debug/policy_rejected_logits": -0.8374965190887451,
645
+ "debug/policy_rejected_logps": -162.17874145507812,
646
+ "debug/reference_chosen_logps": -159.80447387695312,
647
+ "debug/reference_rejected_logps": -163.95941162109375,
648
+ "epoch": 0.8378378378378378,
649
+ "grad_norm": 13.069040617397022,
650
+ "learning_rate": 1e-06,
651
+ "logits/chosen": -0.7870268225669861,
652
+ "logits/rejected": -0.8374965190887451,
653
+ "logps/chosen": -157.5154266357422,
654
+ "logps/rejected": -162.17874145507812,
655
+ "loss": 0.48,
656
+ "rewards/accuracies": 0.625,
657
+ "rewards/chosen": 0.022890347987413406,
658
+ "rewards/margins": 0.0050835697911679745,
659
+ "rewards/rejected": 0.017806777730584145,
660
+ "step": 31
661
+ },
662
+ {
663
+ "debug/policy_chosen_logits": -0.8162547945976257,
664
+ "debug/policy_chosen_logps": -171.15731811523438,
665
+ "debug/policy_rejected_logits": -0.8792607188224792,
666
+ "debug/policy_rejected_logps": -170.6051483154297,
667
+ "debug/reference_chosen_logps": -174.41046142578125,
668
+ "debug/reference_rejected_logps": -171.5959930419922,
669
+ "epoch": 0.8648648648648649,
670
+ "grad_norm": 13.409519517707912,
671
+ "learning_rate": 1e-06,
672
+ "logits/chosen": -0.8162547945976257,
673
+ "logits/rejected": -0.8792607188224792,
674
+ "logps/chosen": -171.15731811523438,
675
+ "logps/rejected": -170.6051483154297,
676
+ "loss": 0.4797,
677
+ "rewards/accuracies": 0.75,
678
+ "rewards/chosen": 0.03253144025802612,
679
+ "rewards/margins": 0.02262299507856369,
680
+ "rewards/rejected": 0.009908447973430157,
681
+ "step": 32
682
+ },
683
+ {
684
+ "debug/policy_chosen_logits": -0.8389750719070435,
685
+ "debug/policy_chosen_logps": -130.0693359375,
686
+ "debug/policy_rejected_logits": -0.8284645676612854,
687
+ "debug/policy_rejected_logps": -172.84576416015625,
688
+ "debug/reference_chosen_logps": -128.82723999023438,
689
+ "debug/reference_rejected_logps": -173.13807678222656,
690
+ "epoch": 0.8918918918918919,
691
+ "grad_norm": 14.49702969131408,
692
+ "learning_rate": 1e-06,
693
+ "logits/chosen": -0.8389750719070435,
694
+ "logits/rejected": -0.8284645676612854,
695
+ "logps/chosen": -130.0693359375,
696
+ "logps/rejected": -172.84576416015625,
697
+ "loss": 0.5019,
698
+ "rewards/accuracies": 0.25,
699
+ "rewards/chosen": -0.01242092065513134,
700
+ "rewards/margins": -0.01534400973469019,
701
+ "rewards/rejected": 0.0029230881482362747,
702
+ "step": 33
703
+ },
704
+ {
705
+ "debug/policy_chosen_logits": -0.7313972115516663,
706
+ "debug/policy_chosen_logps": -206.6120147705078,
707
+ "debug/policy_rejected_logits": -0.8341861367225647,
708
+ "debug/policy_rejected_logps": -174.07289123535156,
709
+ "debug/reference_chosen_logps": -204.1991729736328,
710
+ "debug/reference_rejected_logps": -172.9614715576172,
711
+ "epoch": 0.918918918918919,
712
+ "grad_norm": 13.848425223482339,
713
+ "learning_rate": 1e-06,
714
+ "logits/chosen": -0.7313972115516663,
715
+ "logits/rejected": -0.8341861367225647,
716
+ "logps/chosen": -206.6120147705078,
717
+ "logps/rejected": -174.07289123535156,
718
+ "loss": 0.4918,
719
+ "rewards/accuracies": 0.375,
720
+ "rewards/chosen": -0.024128342047333717,
721
+ "rewards/margins": -0.013014238327741623,
722
+ "rewards/rejected": -0.01111410278826952,
723
+ "step": 34
724
+ },
725
+ {
726
+ "debug/policy_chosen_logits": -0.7511980533599854,
727
+ "debug/policy_chosen_logps": -169.01324462890625,
728
+ "debug/policy_rejected_logits": -0.8619469404220581,
729
+ "debug/policy_rejected_logps": -155.92356872558594,
730
+ "debug/reference_chosen_logps": -174.2823028564453,
731
+ "debug/reference_rejected_logps": -151.06353759765625,
732
+ "epoch": 0.9459459459459459,
733
+ "grad_norm": 13.804422393772251,
734
+ "learning_rate": 1e-06,
735
+ "logits/chosen": -0.7511980533599854,
736
+ "logits/rejected": -0.8619469404220581,
737
+ "logps/chosen": -169.01324462890625,
738
+ "logps/rejected": -155.92356872558594,
739
+ "loss": 0.491,
740
+ "rewards/accuracies": 0.875,
741
+ "rewards/chosen": 0.052690617740154266,
742
+ "rewards/margins": 0.10129091143608093,
743
+ "rewards/rejected": -0.04860030114650726,
744
+ "step": 35
745
+ },
746
+ {
747
+ "debug/policy_chosen_logits": -0.9176344275474548,
748
+ "debug/policy_chosen_logps": -142.12106323242188,
749
+ "debug/policy_rejected_logits": -0.8687180876731873,
750
+ "debug/policy_rejected_logps": -179.96939086914062,
751
+ "debug/reference_chosen_logps": -143.38665771484375,
752
+ "debug/reference_rejected_logps": -179.182861328125,
753
+ "epoch": 0.972972972972973,
754
+ "grad_norm": 12.092026182468153,
755
+ "learning_rate": 1e-06,
756
+ "logits/chosen": -0.9176344275474548,
757
+ "logits/rejected": -0.8687180876731873,
758
+ "logps/chosen": -142.12106323242188,
759
+ "logps/rejected": -179.96939086914062,
760
+ "loss": 0.4918,
761
+ "rewards/accuracies": 0.625,
762
+ "rewards/chosen": 0.012656106613576412,
763
+ "rewards/margins": 0.02052140235900879,
764
+ "rewards/rejected": -0.007865296676754951,
765
+ "step": 36
766
+ },
767
+ {
768
+ "debug/policy_chosen_logits": -0.8388864398002625,
769
+ "debug/policy_chosen_logps": -190.0416717529297,
770
+ "debug/policy_rejected_logits": -0.9188035726547241,
771
+ "debug/policy_rejected_logps": -160.5929412841797,
772
+ "debug/reference_chosen_logps": -190.74612426757812,
773
+ "debug/reference_rejected_logps": -159.9268798828125,
774
+ "epoch": 1.0,
775
+ "grad_norm": 12.215368730436227,
776
+ "learning_rate": 1e-06,
777
+ "logits/chosen": -0.8388864398002625,
778
+ "logits/rejected": -0.9188035726547241,
779
+ "logps/chosen": -190.0416717529297,
780
+ "logps/rejected": -160.5929412841797,
781
+ "loss": 0.4773,
782
+ "rewards/accuracies": 0.625,
783
+ "rewards/chosen": 0.00704436469823122,
784
+ "rewards/margins": 0.013704795390367508,
785
+ "rewards/rejected": -0.006660431623458862,
786
+ "step": 37
787
+ },
788
+ {
789
+ "epoch": 1.0,
790
+ "step": 37,
791
+ "total_flos": 0.0,
792
+ "train_loss": 0.4953597542401907,
793
+ "train_runtime": 139.3771,
794
+ "train_samples_per_second": 16.897,
795
+ "train_steps_per_second": 0.265
796
+ }
797
+ ],
798
+ "logging_steps": 1,
799
+ "max_steps": 37,
800
+ "num_input_tokens_seen": 0,
801
+ "num_train_epochs": 1,
802
+ "save_steps": 500,
803
+ "stateful_callbacks": {
804
+ "TrainerControl": {
805
+ "args": {
806
+ "should_epoch_stop": false,
807
+ "should_evaluate": false,
808
+ "should_log": false,
809
+ "should_save": true,
810
+ "should_training_stop": true
811
+ },
812
+ "attributes": {}
813
+ }
814
+ },
815
+ "total_flos": 0.0,
816
+ "train_batch_size": 8,
817
+ "trial_name": null,
818
+ "trial_params": null
819
+ }