NicolaiSivesind commited on
Commit
2c7378d
1 Parent(s): 721a7c7

Upload 11 files

Browse files
config.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "NbAiLab/nb-bert-large",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 1024,
12
+ "id2label": {
13
+ "0": "LABEL_0",
14
+ "1": "LABEL_1",
15
+ "2": "LABEL_2",
16
+ "3": "LABEL_3",
17
+ "4": "LABEL_4",
18
+ "5": "LABEL_5",
19
+ "6": "LABEL_6"
20
+ },
21
+ "initializer_range": 0.02,
22
+ "intermediate_size": 4096,
23
+ "label2id": {
24
+ "LABEL_0": 0,
25
+ "LABEL_1": 1,
26
+ "LABEL_2": 2,
27
+ "LABEL_3": 3,
28
+ "LABEL_4": 4,
29
+ "LABEL_5": 5,
30
+ "LABEL_6": 6
31
+ },
32
+ "layer_norm_eps": 1e-12,
33
+ "max_position_embeddings": 512,
34
+ "model_type": "bert",
35
+ "num_attention_heads": 16,
36
+ "num_hidden_layers": 24,
37
+ "pad_token_id": 0,
38
+ "position_embedding_type": "absolute",
39
+ "problem_type": "single_label_classification",
40
+ "torch_dtype": "float32",
41
+ "transformers_version": "4.40.0",
42
+ "type_vocab_size": 2,
43
+ "use_cache": true,
44
+ "vocab_size": 50000
45
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77cb83eae223bcbc8342d37afaa3f167eccfe89eb7210f2e09ab50898176bd60
3
+ size 1420425204
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ee80e099b27b639b74f84d633fac0a8d8e83cb7ab6b889d5851bd636793654f
3
+ size 2841074157
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e5aa417b4f1b17cecb5dcf222fa3803ec44a2e98f52640f39c31b014e024933
3
+ size 13990
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:146161a54404c69c61e07a330b5d04fb08e8df56104ee1a347f4c65ebc068516
3
+ size 1064
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "501": {
4
+ "content": "[CLS]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "502": {
12
+ "content": "[MASK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "503": {
20
+ "content": "[PAD]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "504": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "505": {
36
+ "content": "[UNK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 1000000000000000019884624838656,
50
+ "never_split": null,
51
+ "pad_token": "[PAD]",
52
+ "sep_token": "[SEP]",
53
+ "strip_accents": false,
54
+ "tokenize_chinese_chars": true,
55
+ "tokenizer_class": "BertTokenizer",
56
+ "unk_token": "[UNK]"
57
+ }
trainer_state.json ADDED
@@ -0,0 +1,431 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 8.547008547008547,
5
+ "eval_steps": 30,
6
+ "global_step": 1000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.2564102564102564,
13
+ "eval_accuracy": 0.33653846153846156,
14
+ "eval_f1": 0.15795586527293845,
15
+ "eval_loss": 1.5857505798339844,
16
+ "eval_precision": 0.15263157894736842,
17
+ "eval_recall": 0.20117770428329435,
18
+ "eval_runtime": 8.6482,
19
+ "eval_samples_per_second": 12.026,
20
+ "eval_steps_per_second": 1.503,
21
+ "step": 30
22
+ },
23
+ {
24
+ "epoch": 0.5128205128205128,
25
+ "eval_accuracy": 0.46153846153846156,
26
+ "eval_f1": 0.2013725205214567,
27
+ "eval_loss": 1.4627048969268799,
28
+ "eval_precision": 0.25281954887218044,
29
+ "eval_recall": 0.2350972009357102,
30
+ "eval_runtime": 8.4187,
31
+ "eval_samples_per_second": 12.353,
32
+ "eval_steps_per_second": 1.544,
33
+ "step": 60
34
+ },
35
+ {
36
+ "epoch": 0.7692307692307693,
37
+ "eval_accuracy": 0.6153846153846154,
38
+ "eval_f1": 0.37538782821801686,
39
+ "eval_loss": 1.1772669553756714,
40
+ "eval_precision": 0.35533996281025143,
41
+ "eval_recall": 0.40736468500443657,
42
+ "eval_runtime": 7.8916,
43
+ "eval_samples_per_second": 13.179,
44
+ "eval_steps_per_second": 1.647,
45
+ "step": 90
46
+ },
47
+ {
48
+ "epoch": 1.0256410256410255,
49
+ "eval_accuracy": 0.6057692307692307,
50
+ "eval_f1": 0.36798810227453504,
51
+ "eval_loss": 1.1327857971191406,
52
+ "eval_precision": 0.38458852974982005,
53
+ "eval_recall": 0.37962410260546914,
54
+ "eval_runtime": 9.4477,
55
+ "eval_samples_per_second": 11.008,
56
+ "eval_steps_per_second": 1.376,
57
+ "step": 120
58
+ },
59
+ {
60
+ "epoch": 1.282051282051282,
61
+ "eval_accuracy": 0.6538461538461539,
62
+ "eval_f1": 0.41900575614861335,
63
+ "eval_loss": 1.0890244245529175,
64
+ "eval_precision": 0.5082160450387051,
65
+ "eval_recall": 0.43861238830182936,
66
+ "eval_runtime": 7.2525,
67
+ "eval_samples_per_second": 14.34,
68
+ "eval_steps_per_second": 1.792,
69
+ "step": 150
70
+ },
71
+ {
72
+ "epoch": 1.5384615384615383,
73
+ "eval_accuracy": 0.6057692307692307,
74
+ "eval_f1": 0.3925319620409576,
75
+ "eval_loss": 1.1820482015609741,
76
+ "eval_precision": 0.4949262737968994,
77
+ "eval_recall": 0.42246331997884795,
78
+ "eval_runtime": 7.3455,
79
+ "eval_samples_per_second": 14.158,
80
+ "eval_steps_per_second": 1.77,
81
+ "step": 180
82
+ },
83
+ {
84
+ "epoch": 1.7948717948717947,
85
+ "eval_accuracy": 0.6442307692307693,
86
+ "eval_f1": 0.42451081809572366,
87
+ "eval_loss": 1.0674420595169067,
88
+ "eval_precision": 0.42179487179487174,
89
+ "eval_recall": 0.4413065885115575,
90
+ "eval_runtime": 7.8787,
91
+ "eval_samples_per_second": 13.2,
92
+ "eval_steps_per_second": 1.65,
93
+ "step": 210
94
+ },
95
+ {
96
+ "epoch": 2.051282051282051,
97
+ "eval_accuracy": 0.6730769230769231,
98
+ "eval_f1": 0.45817680472411926,
99
+ "eval_loss": 1.0437394380569458,
100
+ "eval_precision": 0.4717732448710709,
101
+ "eval_recall": 0.46608498471852516,
102
+ "eval_runtime": 7.3406,
103
+ "eval_samples_per_second": 14.168,
104
+ "eval_steps_per_second": 1.771,
105
+ "step": 240
106
+ },
107
+ {
108
+ "epoch": 2.3076923076923075,
109
+ "eval_accuracy": 0.6634615384615384,
110
+ "eval_f1": 0.44733091369464534,
111
+ "eval_loss": 1.1606773138046265,
112
+ "eval_precision": 0.4613468768074031,
113
+ "eval_recall": 0.46812400849667934,
114
+ "eval_runtime": 7.0489,
115
+ "eval_samples_per_second": 14.754,
116
+ "eval_steps_per_second": 1.844,
117
+ "step": 270
118
+ },
119
+ {
120
+ "epoch": 2.564102564102564,
121
+ "eval_accuracy": 0.625,
122
+ "eval_f1": 0.4062809568796815,
123
+ "eval_loss": 1.2771650552749634,
124
+ "eval_precision": 0.41611170784103113,
125
+ "eval_recall": 0.4306409256719816,
126
+ "eval_runtime": 7.1782,
127
+ "eval_samples_per_second": 14.488,
128
+ "eval_steps_per_second": 1.811,
129
+ "step": 300
130
+ },
131
+ {
132
+ "epoch": 2.8205128205128203,
133
+ "eval_accuracy": 0.6153846153846154,
134
+ "eval_f1": 0.40669330669330667,
135
+ "eval_loss": 1.0749222040176392,
136
+ "eval_precision": 0.4599390919158361,
137
+ "eval_recall": 0.4225825244458785,
138
+ "eval_runtime": 7.4587,
139
+ "eval_samples_per_second": 13.943,
140
+ "eval_steps_per_second": 1.743,
141
+ "step": 330
142
+ },
143
+ {
144
+ "epoch": 3.076923076923077,
145
+ "eval_accuracy": 0.625,
146
+ "eval_f1": 0.42567364176740446,
147
+ "eval_loss": 1.1399219036102295,
148
+ "eval_precision": 0.46537455679332107,
149
+ "eval_recall": 0.45866383444023195,
150
+ "eval_runtime": 9.0954,
151
+ "eval_samples_per_second": 11.434,
152
+ "eval_steps_per_second": 1.429,
153
+ "step": 360
154
+ },
155
+ {
156
+ "epoch": 3.3333333333333335,
157
+ "eval_accuracy": 0.6538461538461539,
158
+ "eval_f1": 0.4619962083193933,
159
+ "eval_loss": 1.0954734086990356,
160
+ "eval_precision": 0.4735928232168834,
161
+ "eval_recall": 0.48448101243132297,
162
+ "eval_runtime": 8.1347,
163
+ "eval_samples_per_second": 12.785,
164
+ "eval_steps_per_second": 1.598,
165
+ "step": 390
166
+ },
167
+ {
168
+ "epoch": 3.58974358974359,
169
+ "eval_accuracy": 0.6346153846153846,
170
+ "eval_f1": 0.4427206780147956,
171
+ "eval_loss": 1.2177672386169434,
172
+ "eval_precision": 0.45063474537158743,
173
+ "eval_recall": 0.4697516424224498,
174
+ "eval_runtime": 7.1682,
175
+ "eval_samples_per_second": 14.509,
176
+ "eval_steps_per_second": 1.814,
177
+ "step": 420
178
+ },
179
+ {
180
+ "epoch": 3.8461538461538463,
181
+ "eval_accuracy": 0.625,
182
+ "eval_f1": 0.42804680076021545,
183
+ "eval_loss": 1.3288222551345825,
184
+ "eval_precision": 0.4205708300859973,
185
+ "eval_recall": 0.46260295949736946,
186
+ "eval_runtime": 7.5635,
187
+ "eval_samples_per_second": 13.75,
188
+ "eval_steps_per_second": 1.719,
189
+ "step": 450
190
+ },
191
+ {
192
+ "epoch": 4.102564102564102,
193
+ "eval_accuracy": 0.7019230769230769,
194
+ "eval_f1": 0.5106953307638845,
195
+ "eval_loss": 1.0881779193878174,
196
+ "eval_precision": 0.523184558376196,
197
+ "eval_recall": 0.5067507371855199,
198
+ "eval_runtime": 7.5106,
199
+ "eval_samples_per_second": 13.847,
200
+ "eval_steps_per_second": 1.731,
201
+ "step": 480
202
+ },
203
+ {
204
+ "epoch": 4.273504273504273,
205
+ "grad_norm": 7.013587474822998,
206
+ "learning_rate": 3.575498575498576e-05,
207
+ "loss": 0.9258,
208
+ "step": 500
209
+ },
210
+ {
211
+ "epoch": 4.358974358974359,
212
+ "eval_accuracy": 0.7115384615384616,
213
+ "eval_f1": 0.49821801060849874,
214
+ "eval_loss": 1.1554274559020996,
215
+ "eval_precision": 0.49124649859943975,
216
+ "eval_recall": 0.5149140024916423,
217
+ "eval_runtime": 50.6521,
218
+ "eval_samples_per_second": 2.053,
219
+ "eval_steps_per_second": 0.257,
220
+ "step": 510
221
+ },
222
+ {
223
+ "epoch": 4.615384615384615,
224
+ "eval_accuracy": 0.6923076923076923,
225
+ "eval_f1": 0.49742827816559965,
226
+ "eval_loss": 1.4417259693145752,
227
+ "eval_precision": 0.5,
228
+ "eval_recall": 0.5011042097998619,
229
+ "eval_runtime": 103.6054,
230
+ "eval_samples_per_second": 1.004,
231
+ "eval_steps_per_second": 0.125,
232
+ "step": 540
233
+ },
234
+ {
235
+ "epoch": 4.871794871794872,
236
+ "eval_accuracy": 0.6923076923076923,
237
+ "eval_f1": 0.4863793250320917,
238
+ "eval_loss": 1.1507378816604614,
239
+ "eval_precision": 0.48549534756431306,
240
+ "eval_recall": 0.5111003558208527,
241
+ "eval_runtime": 75.6676,
242
+ "eval_samples_per_second": 1.374,
243
+ "eval_steps_per_second": 0.172,
244
+ "step": 570
245
+ },
246
+ {
247
+ "epoch": 5.128205128205128,
248
+ "eval_accuracy": 0.6634615384615384,
249
+ "eval_f1": 0.48039244705911377,
250
+ "eval_loss": 1.2641137838363647,
251
+ "eval_precision": 0.49331432177614126,
252
+ "eval_recall": 0.47660813996217727,
253
+ "eval_runtime": 42.9337,
254
+ "eval_samples_per_second": 2.422,
255
+ "eval_steps_per_second": 0.303,
256
+ "step": 600
257
+ },
258
+ {
259
+ "epoch": 5.384615384615385,
260
+ "eval_accuracy": 0.6923076923076923,
261
+ "eval_f1": 0.4878689679921483,
262
+ "eval_loss": 1.3560154438018799,
263
+ "eval_precision": 0.4848901098901099,
264
+ "eval_recall": 0.4981276832208509,
265
+ "eval_runtime": 8.8339,
266
+ "eval_samples_per_second": 11.773,
267
+ "eval_steps_per_second": 1.472,
268
+ "step": 630
269
+ },
270
+ {
271
+ "epoch": 5.641025641025641,
272
+ "eval_accuracy": 0.6923076923076923,
273
+ "eval_f1": 0.48765147825687144,
274
+ "eval_loss": 1.484885334968567,
275
+ "eval_precision": 0.4961309523809524,
276
+ "eval_recall": 0.49494501357855397,
277
+ "eval_runtime": 7.535,
278
+ "eval_samples_per_second": 13.802,
279
+ "eval_steps_per_second": 1.725,
280
+ "step": 660
281
+ },
282
+ {
283
+ "epoch": 5.897435897435898,
284
+ "eval_accuracy": 0.6923076923076923,
285
+ "eval_f1": 0.4755107161831651,
286
+ "eval_loss": 1.0820866823196411,
287
+ "eval_precision": 0.46649659863945575,
288
+ "eval_recall": 0.4995007752771728,
289
+ "eval_runtime": 7.4913,
290
+ "eval_samples_per_second": 13.883,
291
+ "eval_steps_per_second": 1.735,
292
+ "step": 690
293
+ },
294
+ {
295
+ "epoch": 6.153846153846154,
296
+ "eval_accuracy": 0.7115384615384616,
297
+ "eval_f1": 0.5779124103915706,
298
+ "eval_loss": 1.2979052066802979,
299
+ "eval_precision": 0.5928430383317601,
300
+ "eval_recall": 0.5695562546494223,
301
+ "eval_runtime": 7.5001,
302
+ "eval_samples_per_second": 13.866,
303
+ "eval_steps_per_second": 1.733,
304
+ "step": 720
305
+ },
306
+ {
307
+ "epoch": 6.410256410256411,
308
+ "eval_accuracy": 0.7115384615384616,
309
+ "eval_f1": 0.5007207127411731,
310
+ "eval_loss": 1.4726945161819458,
311
+ "eval_precision": 0.4985431235431235,
312
+ "eval_recall": 0.5046211897143573,
313
+ "eval_runtime": 7.4555,
314
+ "eval_samples_per_second": 13.949,
315
+ "eval_steps_per_second": 1.744,
316
+ "step": 750
317
+ },
318
+ {
319
+ "epoch": 6.666666666666667,
320
+ "eval_accuracy": 0.6826923076923077,
321
+ "eval_f1": 0.49164104194549385,
322
+ "eval_loss": 1.5962382555007935,
323
+ "eval_precision": 0.48890290995554153,
324
+ "eval_recall": 0.49645792440823505,
325
+ "eval_runtime": 7.8196,
326
+ "eval_samples_per_second": 13.3,
327
+ "eval_steps_per_second": 1.662,
328
+ "step": 780
329
+ },
330
+ {
331
+ "epoch": 6.923076923076923,
332
+ "eval_accuracy": 0.7211538461538461,
333
+ "eval_f1": 0.5234641383494542,
334
+ "eval_loss": 1.2745909690856934,
335
+ "eval_precision": 0.5273022215879359,
336
+ "eval_recall": 0.5242791714841405,
337
+ "eval_runtime": 7.0448,
338
+ "eval_samples_per_second": 14.763,
339
+ "eval_steps_per_second": 1.845,
340
+ "step": 810
341
+ },
342
+ {
343
+ "epoch": 7.17948717948718,
344
+ "eval_accuracy": 0.6538461538461539,
345
+ "eval_f1": 0.4674051433314255,
346
+ "eval_loss": 2.0724053382873535,
347
+ "eval_precision": 0.5055194805194805,
348
+ "eval_recall": 0.4713972018319845,
349
+ "eval_runtime": 8.5997,
350
+ "eval_samples_per_second": 12.093,
351
+ "eval_steps_per_second": 1.512,
352
+ "step": 840
353
+ },
354
+ {
355
+ "epoch": 7.435897435897436,
356
+ "eval_accuracy": 0.6826923076923077,
357
+ "eval_f1": 0.559112957131097,
358
+ "eval_loss": 1.8112683296203613,
359
+ "eval_precision": 0.6143772893772894,
360
+ "eval_recall": 0.5511665008559419,
361
+ "eval_runtime": 8.6462,
362
+ "eval_samples_per_second": 12.028,
363
+ "eval_steps_per_second": 1.504,
364
+ "step": 870
365
+ },
366
+ {
367
+ "epoch": 7.6923076923076925,
368
+ "eval_accuracy": 0.7307692307692307,
369
+ "eval_f1": 0.5986119673110736,
370
+ "eval_loss": 1.5658988952636719,
371
+ "eval_precision": 0.6397901095922239,
372
+ "eval_recall": 0.5927312163337008,
373
+ "eval_runtime": 8.5049,
374
+ "eval_samples_per_second": 12.228,
375
+ "eval_steps_per_second": 1.529,
376
+ "step": 900
377
+ },
378
+ {
379
+ "epoch": 7.948717948717949,
380
+ "eval_accuracy": 0.7019230769230769,
381
+ "eval_f1": 0.5792037287395081,
382
+ "eval_loss": 1.473520278930664,
383
+ "eval_precision": 0.5669774028567353,
384
+ "eval_recall": 0.619092432757029,
385
+ "eval_runtime": 8.1417,
386
+ "eval_samples_per_second": 12.774,
387
+ "eval_steps_per_second": 1.597,
388
+ "step": 930
389
+ },
390
+ {
391
+ "epoch": 8.205128205128204,
392
+ "eval_accuracy": 0.7019230769230769,
393
+ "eval_f1": 0.5951573832227319,
394
+ "eval_loss": 1.7524921894073486,
395
+ "eval_precision": 0.5870032223415682,
396
+ "eval_recall": 0.6328215607718713,
397
+ "eval_runtime": 6.8175,
398
+ "eval_samples_per_second": 15.255,
399
+ "eval_steps_per_second": 1.907,
400
+ "step": 960
401
+ },
402
+ {
403
+ "epoch": 8.461538461538462,
404
+ "eval_accuracy": 0.7115384615384616,
405
+ "eval_f1": 0.5830133910730354,
406
+ "eval_loss": 1.7076036930084229,
407
+ "eval_precision": 0.5997339127038376,
408
+ "eval_recall": 0.5719681284898676,
409
+ "eval_runtime": 7.3179,
410
+ "eval_samples_per_second": 14.212,
411
+ "eval_steps_per_second": 1.776,
412
+ "step": 990
413
+ },
414
+ {
415
+ "epoch": 8.547008547008547,
416
+ "grad_norm": 0.23315642774105072,
417
+ "learning_rate": 2.150997150997151e-05,
418
+ "loss": 0.2238,
419
+ "step": 1000
420
+ }
421
+ ],
422
+ "logging_steps": 500,
423
+ "max_steps": 1755,
424
+ "num_input_tokens_seen": 0,
425
+ "num_train_epochs": 15,
426
+ "save_steps": 500,
427
+ "total_flos": 5649929338176000.0,
428
+ "train_batch_size": 8,
429
+ "trial_name": null,
430
+ "trial_params": null
431
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2571126580c83cf162ca39519a44ca8465173d240813184bd8fa877cb3ff5f6d
3
+ size 4984
vocab.txt ADDED
The diff for this file is too large to render. See raw diff