diogopaes10 commited on
Commit
66fe6a4
β€’
1 Parent(s): 8eb03d2

End of training

Browse files
checkpoint-3000/pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c95f2d92d432e839ff645f87515fc248494db36dac837cae703842d2cbf775e5
3
- size 737788917
 
 
 
 
checkpoint-3000/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:30675d1e37d339cae6e3dc477927b70e39062f0910613a7d90db6c1671bca5bc
3
- size 14575
 
 
 
 
checkpoint-3000/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:68cbe85bad4e57d93e8caf9830d7003e889867d2bc1bdf97b16703437df161a3
3
- size 627
 
 
 
 
checkpoint-3000/trainer_state.json DELETED
@@ -1,412 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 12.0,
5
- "global_step": 3000,
6
- "is_hyper_param_search": false,
7
- "is_local_process_zero": true,
8
- "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 0.0,
12
- "learning_rate": 1.999466666666667e-05,
13
- "loss": 2.308,
14
- "step": 1
15
- },
16
- {
17
- "epoch": 0.75,
18
- "learning_rate": 1.8997333333333335e-05,
19
- "loss": 1.6916,
20
- "step": 188
21
- },
22
- {
23
- "epoch": 0.75,
24
- "eval_accuracy": 0.6755,
25
- "eval_disk_space_total": 78.1898422241211,
26
- "eval_disk_space_used": 24.806385040283203,
27
- "eval_f1": 0.6708054417489328,
28
- "eval_gpu_ram_allocated": 2.089780330657959,
29
- "eval_gpu_ram_cached": 25.85546875,
30
- "eval_gpu_ram_total": 39.56402587890625,
31
- "eval_gpu_utilization": 50,
32
- "eval_loss": 1.1062816381454468,
33
- "eval_precision": 0.690043017889279,
34
- "eval_recall": 0.6755,
35
- "eval_runtime": 2.3912,
36
- "eval_samples_per_second": 836.398,
37
- "eval_steps_per_second": 26.347,
38
- "eval_system_ram_total": 83.48074722290039,
39
- "eval_system_ram_used": 4.019077301025391,
40
- "step": 188
41
- },
42
- {
43
- "epoch": 1.5,
44
- "learning_rate": 1.8e-05,
45
- "loss": 0.9694,
46
- "step": 376
47
- },
48
- {
49
- "epoch": 1.5,
50
- "eval_accuracy": 0.7195,
51
- "eval_disk_space_total": 78.1898422241211,
52
- "eval_disk_space_used": 29.64177703857422,
53
- "eval_f1": 0.7181081417115642,
54
- "eval_gpu_ram_allocated": 2.0897774696350098,
55
- "eval_gpu_ram_cached": 25.85546875,
56
- "eval_gpu_ram_total": 39.56402587890625,
57
- "eval_gpu_utilization": 50,
58
- "eval_loss": 0.9585903286933899,
59
- "eval_precision": 0.719758443061289,
60
- "eval_recall": 0.7195,
61
- "eval_runtime": 2.3693,
62
- "eval_samples_per_second": 844.141,
63
- "eval_steps_per_second": 26.59,
64
- "eval_system_ram_total": 83.48074722290039,
65
- "eval_system_ram_used": 4.2536163330078125,
66
- "step": 376
67
- },
68
- {
69
- "epoch": 2.26,
70
- "learning_rate": 1.6997333333333334e-05,
71
- "loss": 0.8509,
72
- "step": 564
73
- },
74
- {
75
- "epoch": 2.26,
76
- "eval_accuracy": 0.712,
77
- "eval_disk_space_total": 78.1898422241211,
78
- "eval_disk_space_used": 29.641841888427734,
79
- "eval_f1": 0.7070168337920522,
80
- "eval_gpu_ram_allocated": 2.089791774749756,
81
- "eval_gpu_ram_cached": 25.85546875,
82
- "eval_gpu_ram_total": 39.56402587890625,
83
- "eval_gpu_utilization": 46,
84
- "eval_loss": 0.9747923016548157,
85
- "eval_precision": 0.7160570316458433,
86
- "eval_recall": 0.712,
87
- "eval_runtime": 2.4432,
88
- "eval_samples_per_second": 818.586,
89
- "eval_steps_per_second": 25.785,
90
- "eval_system_ram_total": 83.48074722290039,
91
- "eval_system_ram_used": 4.160213470458984,
92
- "step": 564
93
- },
94
- {
95
- "epoch": 3.01,
96
- "learning_rate": 1.5994666666666668e-05,
97
- "loss": 0.7475,
98
- "step": 752
99
- },
100
- {
101
- "epoch": 3.01,
102
- "eval_accuracy": 0.714,
103
- "eval_disk_space_total": 78.1898422241211,
104
- "eval_disk_space_used": 29.641963958740234,
105
- "eval_f1": 0.7122032912823338,
106
- "eval_gpu_ram_allocated": 2.089776039123535,
107
- "eval_gpu_ram_cached": 25.85546875,
108
- "eval_gpu_ram_total": 39.56402587890625,
109
- "eval_gpu_utilization": 50,
110
- "eval_loss": 0.9446640014648438,
111
- "eval_precision": 0.7148157467744413,
112
- "eval_recall": 0.714,
113
- "eval_runtime": 2.5063,
114
- "eval_samples_per_second": 798.001,
115
- "eval_steps_per_second": 25.137,
116
- "eval_system_ram_total": 83.48074722290039,
117
- "eval_system_ram_used": 4.160709381103516,
118
- "step": 752
119
- },
120
- {
121
- "epoch": 3.76,
122
- "learning_rate": 1.4997333333333335e-05,
123
- "loss": 0.5841,
124
- "step": 940
125
- },
126
- {
127
- "epoch": 3.76,
128
- "eval_accuracy": 0.711,
129
- "eval_disk_space_total": 78.1898422241211,
130
- "eval_disk_space_used": 29.642024993896484,
131
- "eval_f1": 0.7076606604060025,
132
- "eval_gpu_ram_allocated": 2.089787483215332,
133
- "eval_gpu_ram_cached": 25.85546875,
134
- "eval_gpu_ram_total": 39.56402587890625,
135
- "eval_gpu_utilization": 47,
136
- "eval_loss": 1.0064291954040527,
137
- "eval_precision": 0.7225290812411572,
138
- "eval_recall": 0.711,
139
- "eval_runtime": 2.4755,
140
- "eval_samples_per_second": 807.933,
141
- "eval_steps_per_second": 25.45,
142
- "eval_system_ram_total": 83.48074722290039,
143
- "eval_system_ram_used": 4.188880920410156,
144
- "step": 940
145
- },
146
- {
147
- "epoch": 4.51,
148
- "learning_rate": 1.3994666666666668e-05,
149
- "loss": 0.4972,
150
- "step": 1128
151
- },
152
- {
153
- "epoch": 4.51,
154
- "eval_accuracy": 0.714,
155
- "eval_disk_space_total": 78.1898422241211,
156
- "eval_disk_space_used": 29.642135620117188,
157
- "eval_f1": 0.7109995031569997,
158
- "eval_gpu_ram_allocated": 2.089801788330078,
159
- "eval_gpu_ram_cached": 25.85546875,
160
- "eval_gpu_ram_total": 39.56402587890625,
161
- "eval_gpu_utilization": 47,
162
- "eval_loss": 1.0585097074508667,
163
- "eval_precision": 0.7129473752365556,
164
- "eval_recall": 0.714,
165
- "eval_runtime": 2.3843,
166
- "eval_samples_per_second": 838.824,
167
- "eval_steps_per_second": 26.423,
168
- "eval_system_ram_total": 83.48074722290039,
169
- "eval_system_ram_used": 4.176631927490234,
170
- "step": 1128
171
- },
172
- {
173
- "epoch": 5.26,
174
- "learning_rate": 1.2992e-05,
175
- "loss": 0.4555,
176
- "step": 1316
177
- },
178
- {
179
- "epoch": 5.26,
180
- "eval_accuracy": 0.7075,
181
- "eval_disk_space_total": 78.1898422241211,
182
- "eval_disk_space_used": 33.76519775390625,
183
- "eval_f1": 0.7086283787248422,
184
- "eval_gpu_ram_allocated": 2.089810371398926,
185
- "eval_gpu_ram_cached": 25.85546875,
186
- "eval_gpu_ram_total": 39.56402587890625,
187
- "eval_gpu_utilization": 46,
188
- "eval_loss": 1.117536187171936,
189
- "eval_precision": 0.71510102752271,
190
- "eval_recall": 0.7075,
191
- "eval_runtime": 2.5545,
192
- "eval_samples_per_second": 782.936,
193
- "eval_steps_per_second": 24.662,
194
- "eval_system_ram_total": 83.48074722290039,
195
- "eval_system_ram_used": 4.225734710693359,
196
- "step": 1316
197
- },
198
- {
199
- "epoch": 6.02,
200
- "learning_rate": 1.1989333333333336e-05,
201
- "loss": 0.3535,
202
- "step": 1504
203
- },
204
- {
205
- "epoch": 6.02,
206
- "eval_accuracy": 0.708,
207
- "eval_disk_space_total": 78.1898422241211,
208
- "eval_disk_space_used": 33.76530456542969,
209
- "eval_f1": 0.7032209621498534,
210
- "eval_gpu_ram_allocated": 2.0898032188415527,
211
- "eval_gpu_ram_cached": 25.85546875,
212
- "eval_gpu_ram_total": 39.56402587890625,
213
- "eval_gpu_utilization": 50,
214
- "eval_loss": 1.1748836040496826,
215
- "eval_precision": 0.7076659711678004,
216
- "eval_recall": 0.708,
217
- "eval_runtime": 2.3932,
218
- "eval_samples_per_second": 835.715,
219
- "eval_steps_per_second": 26.325,
220
- "eval_system_ram_total": 83.48074722290039,
221
- "eval_system_ram_used": 4.23016357421875,
222
- "step": 1504
223
- },
224
- {
225
- "epoch": 6.77,
226
- "learning_rate": 1.0986666666666668e-05,
227
- "loss": 0.2614,
228
- "step": 1692
229
- },
230
- {
231
- "epoch": 6.77,
232
- "eval_accuracy": 0.709,
233
- "eval_disk_space_total": 78.1898422241211,
234
- "eval_disk_space_used": 33.76542282104492,
235
- "eval_f1": 0.7056311006074188,
236
- "eval_gpu_ram_allocated": 2.089783191680908,
237
- "eval_gpu_ram_cached": 25.85546875,
238
- "eval_gpu_ram_total": 39.56402587890625,
239
- "eval_gpu_utilization": 49,
240
- "eval_loss": 1.2027860879898071,
241
- "eval_precision": 0.7079398723985221,
242
- "eval_recall": 0.709,
243
- "eval_runtime": 2.3888,
244
- "eval_samples_per_second": 837.234,
245
- "eval_steps_per_second": 26.373,
246
- "eval_system_ram_total": 83.48074722290039,
247
- "eval_system_ram_used": 4.237628936767578,
248
- "step": 1692
249
- },
250
- {
251
- "epoch": 7.52,
252
- "learning_rate": 9.984e-06,
253
- "loss": 0.2321,
254
- "step": 1880
255
- },
256
- {
257
- "epoch": 7.52,
258
- "eval_accuracy": 0.698,
259
- "eval_disk_space_total": 78.1898422241211,
260
- "eval_disk_space_used": 33.7656135559082,
261
- "eval_f1": 0.7018556265437493,
262
- "eval_gpu_ram_allocated": 2.089846134185791,
263
- "eval_gpu_ram_cached": 25.85546875,
264
- "eval_gpu_ram_total": 39.56402587890625,
265
- "eval_gpu_utilization": 49,
266
- "eval_loss": 1.2960551977157593,
267
- "eval_precision": 0.708462957552084,
268
- "eval_recall": 0.698,
269
- "eval_runtime": 2.391,
270
- "eval_samples_per_second": 836.478,
271
- "eval_steps_per_second": 26.349,
272
- "eval_system_ram_total": 83.48074722290039,
273
- "eval_system_ram_used": 4.224781036376953,
274
- "step": 1880
275
- },
276
- {
277
- "epoch": 8.27,
278
- "learning_rate": 8.981333333333333e-06,
279
- "loss": 0.197,
280
- "step": 2068
281
- },
282
- {
283
- "epoch": 8.27,
284
- "eval_accuracy": 0.712,
285
- "eval_disk_space_total": 78.1898422241211,
286
- "eval_disk_space_used": 33.7657470703125,
287
- "eval_f1": 0.7097931257647566,
288
- "eval_gpu_ram_allocated": 2.0897903442382812,
289
- "eval_gpu_ram_cached": 25.85546875,
290
- "eval_gpu_ram_total": 39.56402587890625,
291
- "eval_gpu_utilization": 45,
292
- "eval_loss": 1.3960117101669312,
293
- "eval_precision": 0.7137187449926237,
294
- "eval_recall": 0.712,
295
- "eval_runtime": 2.3878,
296
- "eval_samples_per_second": 837.604,
297
- "eval_steps_per_second": 26.385,
298
- "eval_system_ram_total": 83.48074722290039,
299
- "eval_system_ram_used": 4.219398498535156,
300
- "step": 2068
301
- },
302
- {
303
- "epoch": 9.02,
304
- "learning_rate": 7.978666666666667e-06,
305
- "loss": 0.1505,
306
- "step": 2256
307
- },
308
- {
309
- "epoch": 9.02,
310
- "eval_accuracy": 0.7075,
311
- "eval_disk_space_total": 78.1898422241211,
312
- "eval_disk_space_used": 35.827659606933594,
313
- "eval_f1": 0.709341703450241,
314
- "eval_gpu_ram_allocated": 2.0897817611694336,
315
- "eval_gpu_ram_cached": 25.85546875,
316
- "eval_gpu_ram_total": 39.56402587890625,
317
- "eval_gpu_utilization": 48,
318
- "eval_loss": 1.4310206174850464,
319
- "eval_precision": 0.7133423622104005,
320
- "eval_recall": 0.7075,
321
- "eval_runtime": 2.4471,
322
- "eval_samples_per_second": 817.29,
323
- "eval_steps_per_second": 25.745,
324
- "eval_system_ram_total": 83.48074722290039,
325
- "eval_system_ram_used": 4.2417755126953125,
326
- "step": 2256
327
- },
328
- {
329
- "epoch": 9.78,
330
- "learning_rate": 6.976000000000001e-06,
331
- "loss": 0.1132,
332
- "step": 2444
333
- },
334
- {
335
- "epoch": 9.78,
336
- "eval_accuracy": 0.7045,
337
- "eval_disk_space_total": 78.1898422241211,
338
- "eval_disk_space_used": 35.827754974365234,
339
- "eval_f1": 0.705265213679387,
340
- "eval_gpu_ram_allocated": 2.089801788330078,
341
- "eval_gpu_ram_cached": 25.85546875,
342
- "eval_gpu_ram_total": 39.56402587890625,
343
- "eval_gpu_utilization": 48,
344
- "eval_loss": 1.5454399585723877,
345
- "eval_precision": 0.7097494768850874,
346
- "eval_recall": 0.7045,
347
- "eval_runtime": 2.5035,
348
- "eval_samples_per_second": 798.873,
349
- "eval_steps_per_second": 25.165,
350
- "eval_system_ram_total": 83.48074722290039,
351
- "eval_system_ram_used": 4.293117523193359,
352
- "step": 2444
353
- },
354
- {
355
- "epoch": 10.53,
356
- "learning_rate": 5.973333333333334e-06,
357
- "loss": 0.0979,
358
- "step": 2632
359
- },
360
- {
361
- "epoch": 10.53,
362
- "eval_accuracy": 0.708,
363
- "eval_disk_space_total": 78.1898422241211,
364
- "eval_disk_space_used": 35.82805252075195,
365
- "eval_f1": 0.7090322597492875,
366
- "eval_gpu_ram_allocated": 2.089801788330078,
367
- "eval_gpu_ram_cached": 25.85546875,
368
- "eval_gpu_ram_total": 39.56402587890625,
369
- "eval_gpu_utilization": 45,
370
- "eval_loss": 1.64204740524292,
371
- "eval_precision": 0.7171054872018443,
372
- "eval_recall": 0.708,
373
- "eval_runtime": 2.5339,
374
- "eval_samples_per_second": 789.29,
375
- "eval_steps_per_second": 24.863,
376
- "eval_system_ram_total": 83.48074722290039,
377
- "eval_system_ram_used": 4.279300689697266,
378
- "step": 2632
379
- },
380
- {
381
- "epoch": 11.28,
382
- "learning_rate": 4.976e-06,
383
- "loss": 0.0818,
384
- "step": 2820
385
- },
386
- {
387
- "epoch": 11.28,
388
- "eval_accuracy": 0.7065,
389
- "eval_disk_space_total": 78.1898422241211,
390
- "eval_disk_space_used": 35.828128814697266,
391
- "eval_f1": 0.706242034421972,
392
- "eval_gpu_ram_allocated": 2.0898447036743164,
393
- "eval_gpu_ram_cached": 25.85546875,
394
- "eval_gpu_ram_total": 39.56402587890625,
395
- "eval_gpu_utilization": 49,
396
- "eval_loss": 1.686875820159912,
397
- "eval_precision": 0.7102028476355108,
398
- "eval_recall": 0.7065,
399
- "eval_runtime": 2.4408,
400
- "eval_samples_per_second": 819.396,
401
- "eval_steps_per_second": 25.811,
402
- "eval_system_ram_total": 83.48074722290039,
403
- "eval_system_ram_used": 4.2822418212890625,
404
- "step": 2820
405
- }
406
- ],
407
- "max_steps": 3750,
408
- "num_train_epochs": 15,
409
- "total_flos": 5780526048262272.0,
410
- "trial_name": null,
411
- "trial_params": null
412
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-3250/added_tokens.json DELETED
@@ -1,3 +0,0 @@
1
- {
2
- "[MASK]": 128000
3
- }
 
 
 
 
checkpoint-3250/config.json DELETED
@@ -1,59 +0,0 @@
1
- {
2
- "_name_or_path": "microsoft/deberta-v3-base",
3
- "architectures": [
4
- "DebertaV2ForSequenceClassification"
5
- ],
6
- "attention_probs_dropout_prob": 0.1,
7
- "hidden_act": "gelu",
8
- "hidden_dropout_prob": 0.1,
9
- "hidden_size": 768,
10
- "id2label": {
11
- "0": "Society & Culture",
12
- "1": "Science & Mathematics",
13
- "2": "Health",
14
- "3": "Education & Reference",
15
- "4": "Computers & Internet",
16
- "5": "Sports",
17
- "6": "Business & Finance",
18
- "7": "Entertainment & Music",
19
- "8": "Family & Relationships",
20
- "9": "Politics & Government"
21
- },
22
- "initializer_range": 0.02,
23
- "intermediate_size": 3072,
24
- "label2id": {
25
- "Business & Finance": 6,
26
- "Computers & Internet": 4,
27
- "Education & Reference": 3,
28
- "Entertainment & Music": 7,
29
- "Family & Relationships": 8,
30
- "Health": 2,
31
- "Politics & Government": 9,
32
- "Science & Mathematics": 1,
33
- "Society & Culture": 0,
34
- "Sports": 5
35
- },
36
- "layer_norm_eps": 1e-07,
37
- "max_position_embeddings": 512,
38
- "max_relative_positions": -1,
39
- "model_type": "deberta-v2",
40
- "norm_rel_ebd": "layer_norm",
41
- "num_attention_heads": 12,
42
- "num_hidden_layers": 12,
43
- "pad_token_id": 0,
44
- "pooler_dropout": 0,
45
- "pooler_hidden_act": "gelu",
46
- "pooler_hidden_size": 768,
47
- "pos_att_type": [
48
- "p2c",
49
- "c2p"
50
- ],
51
- "position_biased_input": false,
52
- "position_buckets": 256,
53
- "relative_attention": true,
54
- "share_att_key": true,
55
- "torch_dtype": "float32",
56
- "transformers_version": "4.31.0",
57
- "type_vocab_size": 0,
58
- "vocab_size": 128100
59
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-3250/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f17eacaf5376c0fabb8aeff03d05e59319e7a180e3c00d273c966e5a26d33f06
3
- size 1475557125
 
 
 
 
checkpoint-3250/special_tokens_map.json DELETED
@@ -1,9 +0,0 @@
1
- {
2
- "bos_token": "[CLS]",
3
- "cls_token": "[CLS]",
4
- "eos_token": "[SEP]",
5
- "mask_token": "[MASK]",
6
- "pad_token": "[PAD]",
7
- "sep_token": "[SEP]",
8
- "unk_token": "[UNK]"
9
- }
 
 
 
 
 
 
 
 
 
 
checkpoint-3250/spm.model DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c679fbf93643d19aab7ee10c0b99e460bdbc02fedf34b92b05af343b4af586fd
3
- size 2464616
 
 
 
 
checkpoint-3250/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
checkpoint-3250/tokenizer_config.json DELETED
@@ -1,16 +0,0 @@
1
- {
2
- "bos_token": "[CLS]",
3
- "clean_up_tokenization_spaces": true,
4
- "cls_token": "[CLS]",
5
- "do_lower_case": false,
6
- "eos_token": "[SEP]",
7
- "mask_token": "[MASK]",
8
- "model_max_length": 1000000000000000019884624838656,
9
- "pad_token": "[PAD]",
10
- "sep_token": "[SEP]",
11
- "sp_model_kwargs": {},
12
- "split_by_punct": false,
13
- "tokenizer_class": "DebertaV2Tokenizer",
14
- "unk_token": "[UNK]",
15
- "vocab_type": "spm"
16
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
{checkpoint-3000 β†’ checkpoint-3750}/added_tokens.json RENAMED
File without changes
{checkpoint-3000 β†’ checkpoint-3750}/config.json RENAMED
File without changes
{checkpoint-3000 β†’ checkpoint-3750}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a06836e74e2ee556f75e4f728cb07fd38bf7dca4688b3915aab2696b9adac99
3
  size 1475557125
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d156da8a469609a065c22691bca354b9e5e529ba5788e4c6a2fb2c05ca16bac3
3
  size 1475557125
{checkpoint-3250 β†’ checkpoint-3750}/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ab87f1854f930634513326e51246eb456af5e9a20373ec512925553c65de13d4
3
  size 737788917
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb9e4b3254fdacaf6fa2e542ebcdfaf789e8bac3209c97fa9884b99c6abaf00f
3
  size 737788917
{checkpoint-3250 β†’ checkpoint-3750}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e9318169f07f11cd9456a08a8554cc70b98429bd8764cb326d58f8d40bc05005
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6586d391e812ac443d153fe39b51387d1776691cc74a4237b33c00b1c485b263
3
  size 14575
{checkpoint-3250 β†’ checkpoint-3750}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b6692caff16315b18091d72ba55872cc98f8a135dd4601d0a933fafdf6b6bcd
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7c58bc37a8c98d0bf14317ace8f986ecedea9d0665ee8450abd7629af8cb1e9
3
  size 627
{checkpoint-3000 β†’ checkpoint-3750}/special_tokens_map.json RENAMED
File without changes
{checkpoint-3000 β†’ checkpoint-3750}/spm.model RENAMED
File without changes
{checkpoint-3000 β†’ checkpoint-3750}/tokenizer.json RENAMED
File without changes
{checkpoint-3000 β†’ checkpoint-3750}/tokenizer_config.json RENAMED
File without changes
{checkpoint-3250 β†’ checkpoint-3750}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 13.0,
5
- "global_step": 3250,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -454,11 +454,63 @@
454
  "eval_system_ram_total": 83.48074722290039,
455
  "eval_system_ram_used": 4.266563415527344,
456
  "step": 3196
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
457
  }
458
  ],
459
  "max_steps": 3750,
460
  "num_train_epochs": 15,
461
- "total_flos": 6259743625357056.0,
462
  "trial_name": null,
463
  "trial_params": null
464
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 15.0,
5
+ "global_step": 3750,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
454
  "eval_system_ram_total": 83.48074722290039,
455
  "eval_system_ram_used": 4.266563415527344,
456
  "step": 3196
457
+ },
458
+ {
459
+ "epoch": 13.54,
460
+ "learning_rate": 1.968e-06,
461
+ "loss": 0.0368,
462
+ "step": 3384
463
+ },
464
+ {
465
+ "epoch": 13.54,
466
+ "eval_accuracy": 0.7055,
467
+ "eval_disk_space_total": 78.1898422241211,
468
+ "eval_disk_space_used": 35.828514099121094,
469
+ "eval_f1": 0.7079190260942086,
470
+ "eval_gpu_ram_allocated": 2.0898475646972656,
471
+ "eval_gpu_ram_cached": 25.85546875,
472
+ "eval_gpu_ram_total": 39.56402587890625,
473
+ "eval_gpu_utilization": 47,
474
+ "eval_loss": 1.8403420448303223,
475
+ "eval_precision": 0.7131395828448935,
476
+ "eval_recall": 0.7055,
477
+ "eval_runtime": 2.4685,
478
+ "eval_samples_per_second": 810.214,
479
+ "eval_steps_per_second": 25.522,
480
+ "eval_system_ram_total": 83.48074722290039,
481
+ "eval_system_ram_used": 4.278324127197266,
482
+ "step": 3384
483
+ },
484
+ {
485
+ "epoch": 14.29,
486
+ "learning_rate": 9.653333333333333e-07,
487
+ "loss": 0.0379,
488
+ "step": 3572
489
+ },
490
+ {
491
+ "epoch": 14.29,
492
+ "eval_accuracy": 0.705,
493
+ "eval_disk_space_total": 78.1898422241211,
494
+ "eval_disk_space_used": 35.828582763671875,
495
+ "eval_f1": 0.7051869329304575,
496
+ "eval_gpu_ram_allocated": 2.089784622192383,
497
+ "eval_gpu_ram_cached": 25.85546875,
498
+ "eval_gpu_ram_total": 39.56402587890625,
499
+ "eval_gpu_utilization": 47,
500
+ "eval_loss": 1.8535802364349365,
501
+ "eval_precision": 0.7073671527926624,
502
+ "eval_recall": 0.705,
503
+ "eval_runtime": 2.4945,
504
+ "eval_samples_per_second": 801.76,
505
+ "eval_steps_per_second": 25.255,
506
+ "eval_system_ram_total": 83.48074722290039,
507
+ "eval_system_ram_used": 4.301258087158203,
508
+ "step": 3572
509
  }
510
  ],
511
  "max_steps": 3750,
512
  "num_train_epochs": 15,
513
+ "total_flos": 7220464762017408.0,
514
  "trial_name": null,
515
  "trial_params": null
516
  }
{checkpoint-3000 β†’ checkpoint-3750}/training_args.bin RENAMED
File without changes
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ab87f1854f930634513326e51246eb456af5e9a20373ec512925553c65de13d4
3
  size 737788917
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb9e4b3254fdacaf6fa2e542ebcdfaf789e8bac3209c97fa9884b99c6abaf00f
3
  size 737788917
runs/Jul22_21-19-20_ab4276e44fca/events.out.tfevents.1690060770.ab4276e44fca.659.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a0676d7575783045c7b0b81b3dac8f9748e1362dd10f8ada9768f991e63e7691
3
- size 23854
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7cc1c502b7afe0f436409a4e0fe91d0bca050eb10ee5cd85dbc61b0ffa714a0
3
+ size 26412
checkpoint-3250/training_args.bin β†’ runs/Jul22_21-19-20_ab4276e44fca/events.out.tfevents.1690061518.ab4276e44fca.659.1 RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c483311c6e034a3b791b3aba6061603f3b239dec5f5cd88a867a5f6743909401
3
- size 4091
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f403714063524786ff7cc8bc9849d04c002300dca210402782a59c99edb0de9
3
+ size 1033