minimario commited on
Commit
1d223b8
1 Parent(s): 121306c

add checkpoint-200

Browse files
Files changed (48) hide show
  1. ranker_bs256_0/checkpoint-200/config.json +62 -0
  2. ranker_bs256_0/checkpoint-200/merges.txt +0 -0
  3. ranker_bs256_0/checkpoint-200/optimizer.pt +3 -0
  4. ranker_bs256_0/checkpoint-200/pytorch_model.bin +3 -0
  5. ranker_bs256_0/checkpoint-200/rng_state.pth +3 -0
  6. ranker_bs256_0/checkpoint-200/scheduler.pt +3 -0
  7. ranker_bs256_0/checkpoint-200/special_tokens_map.json +753 -0
  8. ranker_bs256_0/checkpoint-200/tokenizer.json +0 -0
  9. ranker_bs256_0/checkpoint-200/tokenizer_config.json +65 -0
  10. ranker_bs256_0/checkpoint-200/trainer_state.json +269 -0
  11. ranker_bs256_0/checkpoint-200/training_args.bin +3 -0
  12. ranker_bs256_0/checkpoint-200/vocab.json +0 -0
  13. ranker_bs256_0/checkpoint-400/config.json +62 -0
  14. ranker_bs256_0/checkpoint-400/merges.txt +0 -0
  15. ranker_bs256_0/checkpoint-400/optimizer.pt +3 -0
  16. ranker_bs256_0/checkpoint-400/pytorch_model.bin +3 -0
  17. ranker_bs256_0/checkpoint-400/rng_state.pth +3 -0
  18. ranker_bs256_0/checkpoint-400/scheduler.pt +3 -0
  19. ranker_bs256_0/checkpoint-400/special_tokens_map.json +753 -0
  20. ranker_bs256_0/checkpoint-400/tokenizer.json +0 -0
  21. ranker_bs256_0/checkpoint-400/tokenizer_config.json +65 -0
  22. ranker_bs256_0/checkpoint-400/trainer_state.json +522 -0
  23. ranker_bs256_0/checkpoint-400/training_args.bin +3 -0
  24. ranker_bs256_0/checkpoint-400/vocab.json +0 -0
  25. ranker_bs256_0/checkpoint-600/config.json +62 -0
  26. ranker_bs256_0/checkpoint-600/merges.txt +0 -0
  27. ranker_bs256_0/checkpoint-600/optimizer.pt +3 -0
  28. ranker_bs256_0/checkpoint-600/pytorch_model.bin +3 -0
  29. ranker_bs256_0/checkpoint-600/rng_state.pth +3 -0
  30. ranker_bs256_0/checkpoint-600/scheduler.pt +3 -0
  31. ranker_bs256_0/checkpoint-600/special_tokens_map.json +753 -0
  32. ranker_bs256_0/checkpoint-600/tokenizer.json +0 -0
  33. ranker_bs256_0/checkpoint-600/tokenizer_config.json +65 -0
  34. ranker_bs256_0/checkpoint-600/trainer_state.json +775 -0
  35. ranker_bs256_0/checkpoint-600/training_args.bin +3 -0
  36. ranker_bs256_0/checkpoint-600/vocab.json +0 -0
  37. ranker_bs256_0/checkpoint-800/config.json +62 -0
  38. ranker_bs256_0/checkpoint-800/merges.txt +0 -0
  39. ranker_bs256_0/checkpoint-800/optimizer.pt +3 -0
  40. ranker_bs256_0/checkpoint-800/pytorch_model.bin +3 -0
  41. ranker_bs256_0/checkpoint-800/rng_state.pth +3 -0
  42. ranker_bs256_0/checkpoint-800/scheduler.pt +3 -0
  43. ranker_bs256_0/checkpoint-800/special_tokens_map.json +753 -0
  44. ranker_bs256_0/checkpoint-800/tokenizer.json +0 -0
  45. ranker_bs256_0/checkpoint-800/tokenizer_config.json +65 -0
  46. ranker_bs256_0/checkpoint-800/trainer_state.json +1028 -0
  47. ranker_bs256_0/checkpoint-800/training_args.bin +3 -0
  48. ranker_bs256_0/checkpoint-800/vocab.json +0 -0
ranker_bs256_0/checkpoint-200/config.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "Salesforce/codet5-large",
3
+ "architectures": [
4
+ "T5EncoderForSequenceClassification"
5
+ ],
6
+ "bos_token_id": 1,
7
+ "d_ff": 4096,
8
+ "d_kv": 64,
9
+ "d_model": 1024,
10
+ "decoder_start_token_id": 0,
11
+ "dense_act_fn": "relu",
12
+ "dropout_rate": 0.1,
13
+ "eos_token_id": 2,
14
+ "feed_forward_proj": "relu",
15
+ "initializer_factor": 1.0,
16
+ "is_encoder_decoder": true,
17
+ "is_gated_act": false,
18
+ "layer_norm_epsilon": 1e-06,
19
+ "model_type": "t5",
20
+ "n_positions": 512,
21
+ "num_decoder_layers": 24,
22
+ "num_heads": 16,
23
+ "num_layers": 24,
24
+ "output_past": true,
25
+ "pad_token_id": 0,
26
+ "problem_type": "single_label_classification",
27
+ "relative_attention_max_distance": 128,
28
+ "relative_attention_num_buckets": 32,
29
+ "task_specific_params": {
30
+ "summarization": {
31
+ "early_stopping": true,
32
+ "length_penalty": 2.0,
33
+ "max_length": 200,
34
+ "min_length": 30,
35
+ "no_repeat_ngram_size": 3,
36
+ "num_beams": 4,
37
+ "prefix": "summarize: "
38
+ },
39
+ "translation_en_to_de": {
40
+ "early_stopping": true,
41
+ "max_length": 300,
42
+ "num_beams": 4,
43
+ "prefix": "translate English to German: "
44
+ },
45
+ "translation_en_to_fr": {
46
+ "early_stopping": true,
47
+ "max_length": 300,
48
+ "num_beams": 4,
49
+ "prefix": "translate English to French: "
50
+ },
51
+ "translation_en_to_ro": {
52
+ "early_stopping": true,
53
+ "max_length": 300,
54
+ "num_beams": 4,
55
+ "prefix": "translate English to Romanian: "
56
+ }
57
+ },
58
+ "torch_dtype": "float32",
59
+ "transformers_version": "4.26.0",
60
+ "use_cache": true,
61
+ "vocab_size": 32100
62
+ }
ranker_bs256_0/checkpoint-200/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
ranker_bs256_0/checkpoint-200/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b645104bda136b89b87242a2f939844ef0f641a73c2fb540c1d12b0de2bd491b
3
+ size 2679420677
ranker_bs256_0/checkpoint-200/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbdd504feb155a170ecadd65304b357bc4cb8fff03f91840f5dfa68b723b2475
3
+ size 1339719649
ranker_bs256_0/checkpoint-200/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9887eecc0ea434881f9647f1cc708bd8ec4165dc223b3cfd1f7f83e9129d29c
3
+ size 14575
ranker_bs256_0/checkpoint-200/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d26b9bcaa6ecf48e187c3a0c2bb1986fb0017249d3a15bb756472fbbd50cbb4a
3
+ size 627
ranker_bs256_0/checkpoint-200/special_tokens_map.json ADDED
@@ -0,0 +1,753 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "<extra_id_99>",
5
+ "lstrip": true,
6
+ "normalized": true,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "<extra_id_98>",
12
+ "lstrip": true,
13
+ "normalized": true,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ },
17
+ {
18
+ "content": "<extra_id_97>",
19
+ "lstrip": true,
20
+ "normalized": true,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ {
25
+ "content": "<extra_id_96>",
26
+ "lstrip": true,
27
+ "normalized": true,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ },
31
+ {
32
+ "content": "<extra_id_95>",
33
+ "lstrip": true,
34
+ "normalized": true,
35
+ "rstrip": false,
36
+ "single_word": false
37
+ },
38
+ {
39
+ "content": "<extra_id_94>",
40
+ "lstrip": true,
41
+ "normalized": true,
42
+ "rstrip": false,
43
+ "single_word": false
44
+ },
45
+ {
46
+ "content": "<extra_id_93>",
47
+ "lstrip": true,
48
+ "normalized": true,
49
+ "rstrip": false,
50
+ "single_word": false
51
+ },
52
+ {
53
+ "content": "<extra_id_92>",
54
+ "lstrip": true,
55
+ "normalized": true,
56
+ "rstrip": false,
57
+ "single_word": false
58
+ },
59
+ {
60
+ "content": "<extra_id_91>",
61
+ "lstrip": true,
62
+ "normalized": true,
63
+ "rstrip": false,
64
+ "single_word": false
65
+ },
66
+ {
67
+ "content": "<extra_id_90>",
68
+ "lstrip": true,
69
+ "normalized": true,
70
+ "rstrip": false,
71
+ "single_word": false
72
+ },
73
+ {
74
+ "content": "<extra_id_89>",
75
+ "lstrip": true,
76
+ "normalized": true,
77
+ "rstrip": false,
78
+ "single_word": false
79
+ },
80
+ {
81
+ "content": "<extra_id_88>",
82
+ "lstrip": true,
83
+ "normalized": true,
84
+ "rstrip": false,
85
+ "single_word": false
86
+ },
87
+ {
88
+ "content": "<extra_id_87>",
89
+ "lstrip": true,
90
+ "normalized": true,
91
+ "rstrip": false,
92
+ "single_word": false
93
+ },
94
+ {
95
+ "content": "<extra_id_86>",
96
+ "lstrip": true,
97
+ "normalized": true,
98
+ "rstrip": false,
99
+ "single_word": false
100
+ },
101
+ {
102
+ "content": "<extra_id_85>",
103
+ "lstrip": true,
104
+ "normalized": true,
105
+ "rstrip": false,
106
+ "single_word": false
107
+ },
108
+ {
109
+ "content": "<extra_id_84>",
110
+ "lstrip": true,
111
+ "normalized": true,
112
+ "rstrip": false,
113
+ "single_word": false
114
+ },
115
+ {
116
+ "content": "<extra_id_83>",
117
+ "lstrip": true,
118
+ "normalized": true,
119
+ "rstrip": false,
120
+ "single_word": false
121
+ },
122
+ {
123
+ "content": "<extra_id_82>",
124
+ "lstrip": true,
125
+ "normalized": true,
126
+ "rstrip": false,
127
+ "single_word": false
128
+ },
129
+ {
130
+ "content": "<extra_id_81>",
131
+ "lstrip": true,
132
+ "normalized": true,
133
+ "rstrip": false,
134
+ "single_word": false
135
+ },
136
+ {
137
+ "content": "<extra_id_80>",
138
+ "lstrip": true,
139
+ "normalized": true,
140
+ "rstrip": false,
141
+ "single_word": false
142
+ },
143
+ {
144
+ "content": "<extra_id_79>",
145
+ "lstrip": true,
146
+ "normalized": true,
147
+ "rstrip": false,
148
+ "single_word": false
149
+ },
150
+ {
151
+ "content": "<extra_id_78>",
152
+ "lstrip": true,
153
+ "normalized": true,
154
+ "rstrip": false,
155
+ "single_word": false
156
+ },
157
+ {
158
+ "content": "<extra_id_77>",
159
+ "lstrip": true,
160
+ "normalized": true,
161
+ "rstrip": false,
162
+ "single_word": false
163
+ },
164
+ {
165
+ "content": "<extra_id_76>",
166
+ "lstrip": true,
167
+ "normalized": true,
168
+ "rstrip": false,
169
+ "single_word": false
170
+ },
171
+ {
172
+ "content": "<extra_id_75>",
173
+ "lstrip": true,
174
+ "normalized": true,
175
+ "rstrip": false,
176
+ "single_word": false
177
+ },
178
+ {
179
+ "content": "<extra_id_74>",
180
+ "lstrip": true,
181
+ "normalized": true,
182
+ "rstrip": false,
183
+ "single_word": false
184
+ },
185
+ {
186
+ "content": "<extra_id_73>",
187
+ "lstrip": true,
188
+ "normalized": true,
189
+ "rstrip": false,
190
+ "single_word": false
191
+ },
192
+ {
193
+ "content": "<extra_id_72>",
194
+ "lstrip": true,
195
+ "normalized": true,
196
+ "rstrip": false,
197
+ "single_word": false
198
+ },
199
+ {
200
+ "content": "<extra_id_71>",
201
+ "lstrip": true,
202
+ "normalized": true,
203
+ "rstrip": false,
204
+ "single_word": false
205
+ },
206
+ {
207
+ "content": "<extra_id_70>",
208
+ "lstrip": true,
209
+ "normalized": true,
210
+ "rstrip": false,
211
+ "single_word": false
212
+ },
213
+ {
214
+ "content": "<extra_id_69>",
215
+ "lstrip": true,
216
+ "normalized": true,
217
+ "rstrip": false,
218
+ "single_word": false
219
+ },
220
+ {
221
+ "content": "<extra_id_68>",
222
+ "lstrip": true,
223
+ "normalized": true,
224
+ "rstrip": false,
225
+ "single_word": false
226
+ },
227
+ {
228
+ "content": "<extra_id_67>",
229
+ "lstrip": true,
230
+ "normalized": true,
231
+ "rstrip": false,
232
+ "single_word": false
233
+ },
234
+ {
235
+ "content": "<extra_id_66>",
236
+ "lstrip": true,
237
+ "normalized": true,
238
+ "rstrip": false,
239
+ "single_word": false
240
+ },
241
+ {
242
+ "content": "<extra_id_65>",
243
+ "lstrip": true,
244
+ "normalized": true,
245
+ "rstrip": false,
246
+ "single_word": false
247
+ },
248
+ {
249
+ "content": "<extra_id_64>",
250
+ "lstrip": true,
251
+ "normalized": true,
252
+ "rstrip": false,
253
+ "single_word": false
254
+ },
255
+ {
256
+ "content": "<extra_id_63>",
257
+ "lstrip": true,
258
+ "normalized": true,
259
+ "rstrip": false,
260
+ "single_word": false
261
+ },
262
+ {
263
+ "content": "<extra_id_62>",
264
+ "lstrip": true,
265
+ "normalized": true,
266
+ "rstrip": false,
267
+ "single_word": false
268
+ },
269
+ {
270
+ "content": "<extra_id_61>",
271
+ "lstrip": true,
272
+ "normalized": true,
273
+ "rstrip": false,
274
+ "single_word": false
275
+ },
276
+ {
277
+ "content": "<extra_id_60>",
278
+ "lstrip": true,
279
+ "normalized": true,
280
+ "rstrip": false,
281
+ "single_word": false
282
+ },
283
+ {
284
+ "content": "<extra_id_59>",
285
+ "lstrip": true,
286
+ "normalized": true,
287
+ "rstrip": false,
288
+ "single_word": false
289
+ },
290
+ {
291
+ "content": "<extra_id_58>",
292
+ "lstrip": true,
293
+ "normalized": true,
294
+ "rstrip": false,
295
+ "single_word": false
296
+ },
297
+ {
298
+ "content": "<extra_id_57>",
299
+ "lstrip": true,
300
+ "normalized": true,
301
+ "rstrip": false,
302
+ "single_word": false
303
+ },
304
+ {
305
+ "content": "<extra_id_56>",
306
+ "lstrip": true,
307
+ "normalized": true,
308
+ "rstrip": false,
309
+ "single_word": false
310
+ },
311
+ {
312
+ "content": "<extra_id_55>",
313
+ "lstrip": true,
314
+ "normalized": true,
315
+ "rstrip": false,
316
+ "single_word": false
317
+ },
318
+ {
319
+ "content": "<extra_id_54>",
320
+ "lstrip": true,
321
+ "normalized": true,
322
+ "rstrip": false,
323
+ "single_word": false
324
+ },
325
+ {
326
+ "content": "<extra_id_53>",
327
+ "lstrip": true,
328
+ "normalized": true,
329
+ "rstrip": false,
330
+ "single_word": false
331
+ },
332
+ {
333
+ "content": "<extra_id_52>",
334
+ "lstrip": true,
335
+ "normalized": true,
336
+ "rstrip": false,
337
+ "single_word": false
338
+ },
339
+ {
340
+ "content": "<extra_id_51>",
341
+ "lstrip": true,
342
+ "normalized": true,
343
+ "rstrip": false,
344
+ "single_word": false
345
+ },
346
+ {
347
+ "content": "<extra_id_50>",
348
+ "lstrip": true,
349
+ "normalized": true,
350
+ "rstrip": false,
351
+ "single_word": false
352
+ },
353
+ {
354
+ "content": "<extra_id_49>",
355
+ "lstrip": true,
356
+ "normalized": true,
357
+ "rstrip": false,
358
+ "single_word": false
359
+ },
360
+ {
361
+ "content": "<extra_id_48>",
362
+ "lstrip": true,
363
+ "normalized": true,
364
+ "rstrip": false,
365
+ "single_word": false
366
+ },
367
+ {
368
+ "content": "<extra_id_47>",
369
+ "lstrip": true,
370
+ "normalized": true,
371
+ "rstrip": false,
372
+ "single_word": false
373
+ },
374
+ {
375
+ "content": "<extra_id_46>",
376
+ "lstrip": true,
377
+ "normalized": true,
378
+ "rstrip": false,
379
+ "single_word": false
380
+ },
381
+ {
382
+ "content": "<extra_id_45>",
383
+ "lstrip": true,
384
+ "normalized": true,
385
+ "rstrip": false,
386
+ "single_word": false
387
+ },
388
+ {
389
+ "content": "<extra_id_44>",
390
+ "lstrip": true,
391
+ "normalized": true,
392
+ "rstrip": false,
393
+ "single_word": false
394
+ },
395
+ {
396
+ "content": "<extra_id_43>",
397
+ "lstrip": true,
398
+ "normalized": true,
399
+ "rstrip": false,
400
+ "single_word": false
401
+ },
402
+ {
403
+ "content": "<extra_id_42>",
404
+ "lstrip": true,
405
+ "normalized": true,
406
+ "rstrip": false,
407
+ "single_word": false
408
+ },
409
+ {
410
+ "content": "<extra_id_41>",
411
+ "lstrip": true,
412
+ "normalized": true,
413
+ "rstrip": false,
414
+ "single_word": false
415
+ },
416
+ {
417
+ "content": "<extra_id_40>",
418
+ "lstrip": true,
419
+ "normalized": true,
420
+ "rstrip": false,
421
+ "single_word": false
422
+ },
423
+ {
424
+ "content": "<extra_id_39>",
425
+ "lstrip": true,
426
+ "normalized": true,
427
+ "rstrip": false,
428
+ "single_word": false
429
+ },
430
+ {
431
+ "content": "<extra_id_38>",
432
+ "lstrip": true,
433
+ "normalized": true,
434
+ "rstrip": false,
435
+ "single_word": false
436
+ },
437
+ {
438
+ "content": "<extra_id_37>",
439
+ "lstrip": true,
440
+ "normalized": true,
441
+ "rstrip": false,
442
+ "single_word": false
443
+ },
444
+ {
445
+ "content": "<extra_id_36>",
446
+ "lstrip": true,
447
+ "normalized": true,
448
+ "rstrip": false,
449
+ "single_word": false
450
+ },
451
+ {
452
+ "content": "<extra_id_35>",
453
+ "lstrip": true,
454
+ "normalized": true,
455
+ "rstrip": false,
456
+ "single_word": false
457
+ },
458
+ {
459
+ "content": "<extra_id_34>",
460
+ "lstrip": true,
461
+ "normalized": true,
462
+ "rstrip": false,
463
+ "single_word": false
464
+ },
465
+ {
466
+ "content": "<extra_id_33>",
467
+ "lstrip": true,
468
+ "normalized": true,
469
+ "rstrip": false,
470
+ "single_word": false
471
+ },
472
+ {
473
+ "content": "<extra_id_32>",
474
+ "lstrip": true,
475
+ "normalized": true,
476
+ "rstrip": false,
477
+ "single_word": false
478
+ },
479
+ {
480
+ "content": "<extra_id_31>",
481
+ "lstrip": true,
482
+ "normalized": true,
483
+ "rstrip": false,
484
+ "single_word": false
485
+ },
486
+ {
487
+ "content": "<extra_id_30>",
488
+ "lstrip": true,
489
+ "normalized": true,
490
+ "rstrip": false,
491
+ "single_word": false
492
+ },
493
+ {
494
+ "content": "<extra_id_29>",
495
+ "lstrip": true,
496
+ "normalized": true,
497
+ "rstrip": false,
498
+ "single_word": false
499
+ },
500
+ {
501
+ "content": "<extra_id_28>",
502
+ "lstrip": true,
503
+ "normalized": true,
504
+ "rstrip": false,
505
+ "single_word": false
506
+ },
507
+ {
508
+ "content": "<extra_id_27>",
509
+ "lstrip": true,
510
+ "normalized": true,
511
+ "rstrip": false,
512
+ "single_word": false
513
+ },
514
+ {
515
+ "content": "<extra_id_26>",
516
+ "lstrip": true,
517
+ "normalized": true,
518
+ "rstrip": false,
519
+ "single_word": false
520
+ },
521
+ {
522
+ "content": "<extra_id_25>",
523
+ "lstrip": true,
524
+ "normalized": true,
525
+ "rstrip": false,
526
+ "single_word": false
527
+ },
528
+ {
529
+ "content": "<extra_id_24>",
530
+ "lstrip": true,
531
+ "normalized": true,
532
+ "rstrip": false,
533
+ "single_word": false
534
+ },
535
+ {
536
+ "content": "<extra_id_23>",
537
+ "lstrip": true,
538
+ "normalized": true,
539
+ "rstrip": false,
540
+ "single_word": false
541
+ },
542
+ {
543
+ "content": "<extra_id_22>",
544
+ "lstrip": true,
545
+ "normalized": true,
546
+ "rstrip": false,
547
+ "single_word": false
548
+ },
549
+ {
550
+ "content": "<extra_id_21>",
551
+ "lstrip": true,
552
+ "normalized": true,
553
+ "rstrip": false,
554
+ "single_word": false
555
+ },
556
+ {
557
+ "content": "<extra_id_20>",
558
+ "lstrip": true,
559
+ "normalized": true,
560
+ "rstrip": false,
561
+ "single_word": false
562
+ },
563
+ {
564
+ "content": "<extra_id_19>",
565
+ "lstrip": true,
566
+ "normalized": true,
567
+ "rstrip": false,
568
+ "single_word": false
569
+ },
570
+ {
571
+ "content": "<extra_id_18>",
572
+ "lstrip": true,
573
+ "normalized": true,
574
+ "rstrip": false,
575
+ "single_word": false
576
+ },
577
+ {
578
+ "content": "<extra_id_17>",
579
+ "lstrip": true,
580
+ "normalized": true,
581
+ "rstrip": false,
582
+ "single_word": false
583
+ },
584
+ {
585
+ "content": "<extra_id_16>",
586
+ "lstrip": true,
587
+ "normalized": true,
588
+ "rstrip": false,
589
+ "single_word": false
590
+ },
591
+ {
592
+ "content": "<extra_id_15>",
593
+ "lstrip": true,
594
+ "normalized": true,
595
+ "rstrip": false,
596
+ "single_word": false
597
+ },
598
+ {
599
+ "content": "<extra_id_14>",
600
+ "lstrip": true,
601
+ "normalized": true,
602
+ "rstrip": false,
603
+ "single_word": false
604
+ },
605
+ {
606
+ "content": "<extra_id_13>",
607
+ "lstrip": true,
608
+ "normalized": true,
609
+ "rstrip": false,
610
+ "single_word": false
611
+ },
612
+ {
613
+ "content": "<extra_id_12>",
614
+ "lstrip": true,
615
+ "normalized": true,
616
+ "rstrip": false,
617
+ "single_word": false
618
+ },
619
+ {
620
+ "content": "<extra_id_11>",
621
+ "lstrip": true,
622
+ "normalized": true,
623
+ "rstrip": false,
624
+ "single_word": false
625
+ },
626
+ {
627
+ "content": "<extra_id_10>",
628
+ "lstrip": true,
629
+ "normalized": true,
630
+ "rstrip": false,
631
+ "single_word": false
632
+ },
633
+ {
634
+ "content": "<extra_id_9>",
635
+ "lstrip": true,
636
+ "normalized": true,
637
+ "rstrip": false,
638
+ "single_word": false
639
+ },
640
+ {
641
+ "content": "<extra_id_8>",
642
+ "lstrip": true,
643
+ "normalized": true,
644
+ "rstrip": false,
645
+ "single_word": false
646
+ },
647
+ {
648
+ "content": "<extra_id_7>",
649
+ "lstrip": true,
650
+ "normalized": true,
651
+ "rstrip": false,
652
+ "single_word": false
653
+ },
654
+ {
655
+ "content": "<extra_id_6>",
656
+ "lstrip": true,
657
+ "normalized": true,
658
+ "rstrip": false,
659
+ "single_word": false
660
+ },
661
+ {
662
+ "content": "<extra_id_5>",
663
+ "lstrip": true,
664
+ "normalized": true,
665
+ "rstrip": false,
666
+ "single_word": false
667
+ },
668
+ {
669
+ "content": "<extra_id_4>",
670
+ "lstrip": true,
671
+ "normalized": true,
672
+ "rstrip": false,
673
+ "single_word": false
674
+ },
675
+ {
676
+ "content": "<extra_id_3>",
677
+ "lstrip": true,
678
+ "normalized": true,
679
+ "rstrip": false,
680
+ "single_word": false
681
+ },
682
+ {
683
+ "content": "<extra_id_2>",
684
+ "lstrip": true,
685
+ "normalized": true,
686
+ "rstrip": false,
687
+ "single_word": false
688
+ },
689
+ {
690
+ "content": "<extra_id_1>",
691
+ "lstrip": true,
692
+ "normalized": true,
693
+ "rstrip": false,
694
+ "single_word": false
695
+ },
696
+ {
697
+ "content": "<extra_id_0>",
698
+ "lstrip": true,
699
+ "normalized": true,
700
+ "rstrip": false,
701
+ "single_word": false
702
+ }
703
+ ],
704
+ "bos_token": {
705
+ "content": "<s>",
706
+ "lstrip": false,
707
+ "normalized": true,
708
+ "rstrip": false,
709
+ "single_word": false
710
+ },
711
+ "cls_token": {
712
+ "content": "<s>",
713
+ "lstrip": false,
714
+ "normalized": true,
715
+ "rstrip": false,
716
+ "single_word": false
717
+ },
718
+ "eos_token": {
719
+ "content": "</s>",
720
+ "lstrip": false,
721
+ "normalized": true,
722
+ "rstrip": false,
723
+ "single_word": false
724
+ },
725
+ "mask_token": {
726
+ "content": "<mask>",
727
+ "lstrip": true,
728
+ "normalized": true,
729
+ "rstrip": false,
730
+ "single_word": false
731
+ },
732
+ "pad_token": {
733
+ "content": "<pad>",
734
+ "lstrip": false,
735
+ "normalized": true,
736
+ "rstrip": false,
737
+ "single_word": false
738
+ },
739
+ "sep_token": {
740
+ "content": "</s>",
741
+ "lstrip": false,
742
+ "normalized": true,
743
+ "rstrip": false,
744
+ "single_word": false
745
+ },
746
+ "unk_token": {
747
+ "content": "<unk>",
748
+ "lstrip": false,
749
+ "normalized": true,
750
+ "rstrip": false,
751
+ "single_word": false
752
+ }
753
+ }
ranker_bs256_0/checkpoint-200/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
ranker_bs256_0/checkpoint-200/tokenizer_config.json ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": {
4
+ "__type": "AddedToken",
5
+ "content": "<s>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false
10
+ },
11
+ "cls_token": {
12
+ "__type": "AddedToken",
13
+ "content": "<s>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false
18
+ },
19
+ "eos_token": {
20
+ "__type": "AddedToken",
21
+ "content": "</s>",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false
26
+ },
27
+ "errors": "replace",
28
+ "mask_token": {
29
+ "__type": "AddedToken",
30
+ "content": "<mask>",
31
+ "lstrip": true,
32
+ "normalized": true,
33
+ "rstrip": false,
34
+ "single_word": false
35
+ },
36
+ "model_max_length": 512,
37
+ "name_or_path": "Salesforce/codet5-large",
38
+ "pad_token": {
39
+ "__type": "AddedToken",
40
+ "content": "<pad>",
41
+ "lstrip": false,
42
+ "normalized": true,
43
+ "rstrip": false,
44
+ "single_word": false
45
+ },
46
+ "sep_token": {
47
+ "__type": "AddedToken",
48
+ "content": "</s>",
49
+ "lstrip": false,
50
+ "normalized": true,
51
+ "rstrip": false,
52
+ "single_word": false
53
+ },
54
+ "special_tokens_map_file": "/export/home/cache/model/5941df5e4315c5ab63b7b2ac791fb0bf0f209744a055c06b43b5274849137cdd.b9905d0575bde443a20834122b6e2d48e853b2e36444ce98ddeb43c38097eb3f",
55
+ "tokenizer_class": "RobertaTokenizer",
56
+ "trim_offsets": true,
57
+ "unk_token": {
58
+ "__type": "AddedToken",
59
+ "content": "<unk>",
60
+ "lstrip": false,
61
+ "normalized": true,
62
+ "rstrip": false,
63
+ "single_word": false
64
+ }
65
+ }
ranker_bs256_0/checkpoint-200/trainer_state.json ADDED
@@ -0,0 +1,269 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.030917575675597338,
5
+ "global_step": 200,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.0,
12
+ "learning_rate": 9.999226963512678e-06,
13
+ "loss": 1.1361,
14
+ "step": 5
15
+ },
16
+ {
17
+ "epoch": 0.0,
18
+ "learning_rate": 9.998453927025357e-06,
19
+ "loss": 0.7353,
20
+ "step": 10
21
+ },
22
+ {
23
+ "epoch": 0.0,
24
+ "learning_rate": 9.997680890538034e-06,
25
+ "loss": 0.6909,
26
+ "step": 15
27
+ },
28
+ {
29
+ "epoch": 0.0,
30
+ "learning_rate": 9.996907854050712e-06,
31
+ "loss": 0.6498,
32
+ "step": 20
33
+ },
34
+ {
35
+ "epoch": 0.0,
36
+ "learning_rate": 9.99613481756339e-06,
37
+ "loss": 0.6414,
38
+ "step": 25
39
+ },
40
+ {
41
+ "epoch": 0.0,
42
+ "learning_rate": 9.995361781076068e-06,
43
+ "loss": 0.6415,
44
+ "step": 30
45
+ },
46
+ {
47
+ "epoch": 0.01,
48
+ "learning_rate": 9.994588744588745e-06,
49
+ "loss": 0.6317,
50
+ "step": 35
51
+ },
52
+ {
53
+ "epoch": 0.01,
54
+ "learning_rate": 9.993815708101423e-06,
55
+ "loss": 0.6378,
56
+ "step": 40
57
+ },
58
+ {
59
+ "epoch": 0.01,
60
+ "learning_rate": 9.9930426716141e-06,
61
+ "loss": 0.6347,
62
+ "step": 45
63
+ },
64
+ {
65
+ "epoch": 0.01,
66
+ "learning_rate": 9.992269635126779e-06,
67
+ "loss": 0.5924,
68
+ "step": 50
69
+ },
70
+ {
71
+ "epoch": 0.01,
72
+ "learning_rate": 9.991496598639456e-06,
73
+ "loss": 0.6046,
74
+ "step": 55
75
+ },
76
+ {
77
+ "epoch": 0.01,
78
+ "learning_rate": 9.990723562152135e-06,
79
+ "loss": 0.6045,
80
+ "step": 60
81
+ },
82
+ {
83
+ "epoch": 0.01,
84
+ "learning_rate": 9.989950525664813e-06,
85
+ "loss": 0.6,
86
+ "step": 65
87
+ },
88
+ {
89
+ "epoch": 0.01,
90
+ "learning_rate": 9.98917748917749e-06,
91
+ "loss": 0.5504,
92
+ "step": 70
93
+ },
94
+ {
95
+ "epoch": 0.01,
96
+ "learning_rate": 9.988404452690169e-06,
97
+ "loss": 0.5747,
98
+ "step": 75
99
+ },
100
+ {
101
+ "epoch": 0.01,
102
+ "learning_rate": 9.987631416202846e-06,
103
+ "loss": 0.5526,
104
+ "step": 80
105
+ },
106
+ {
107
+ "epoch": 0.01,
108
+ "learning_rate": 9.986858379715523e-06,
109
+ "loss": 0.5958,
110
+ "step": 85
111
+ },
112
+ {
113
+ "epoch": 0.01,
114
+ "learning_rate": 9.9860853432282e-06,
115
+ "loss": 0.608,
116
+ "step": 90
117
+ },
118
+ {
119
+ "epoch": 0.01,
120
+ "learning_rate": 9.985312306740878e-06,
121
+ "loss": 0.5988,
122
+ "step": 95
123
+ },
124
+ {
125
+ "epoch": 0.02,
126
+ "learning_rate": 9.984539270253557e-06,
127
+ "loss": 0.5861,
128
+ "step": 100
129
+ },
130
+ {
131
+ "epoch": 0.02,
132
+ "learning_rate": 9.983766233766234e-06,
133
+ "loss": 0.5749,
134
+ "step": 105
135
+ },
136
+ {
137
+ "epoch": 0.02,
138
+ "learning_rate": 9.982993197278913e-06,
139
+ "loss": 0.5498,
140
+ "step": 110
141
+ },
142
+ {
143
+ "epoch": 0.02,
144
+ "learning_rate": 9.98222016079159e-06,
145
+ "loss": 0.5841,
146
+ "step": 115
147
+ },
148
+ {
149
+ "epoch": 0.02,
150
+ "learning_rate": 9.981447124304268e-06,
151
+ "loss": 0.5973,
152
+ "step": 120
153
+ },
154
+ {
155
+ "epoch": 0.02,
156
+ "learning_rate": 9.980674087816947e-06,
157
+ "loss": 0.5954,
158
+ "step": 125
159
+ },
160
+ {
161
+ "epoch": 0.02,
162
+ "learning_rate": 9.979901051329624e-06,
163
+ "loss": 0.527,
164
+ "step": 130
165
+ },
166
+ {
167
+ "epoch": 0.02,
168
+ "learning_rate": 9.979128014842301e-06,
169
+ "loss": 0.5321,
170
+ "step": 135
171
+ },
172
+ {
173
+ "epoch": 0.02,
174
+ "learning_rate": 9.978354978354979e-06,
175
+ "loss": 0.5781,
176
+ "step": 140
177
+ },
178
+ {
179
+ "epoch": 0.02,
180
+ "learning_rate": 9.977581941867656e-06,
181
+ "loss": 0.5119,
182
+ "step": 145
183
+ },
184
+ {
185
+ "epoch": 0.02,
186
+ "learning_rate": 9.976808905380335e-06,
187
+ "loss": 0.5271,
188
+ "step": 150
189
+ },
190
+ {
191
+ "epoch": 0.02,
192
+ "learning_rate": 9.976035868893012e-06,
193
+ "loss": 0.5814,
194
+ "step": 155
195
+ },
196
+ {
197
+ "epoch": 0.02,
198
+ "learning_rate": 9.97526283240569e-06,
199
+ "loss": 0.518,
200
+ "step": 160
201
+ },
202
+ {
203
+ "epoch": 0.03,
204
+ "learning_rate": 9.974489795918369e-06,
205
+ "loss": 0.5335,
206
+ "step": 165
207
+ },
208
+ {
209
+ "epoch": 0.03,
210
+ "learning_rate": 9.973716759431046e-06,
211
+ "loss": 0.5062,
212
+ "step": 170
213
+ },
214
+ {
215
+ "epoch": 0.03,
216
+ "learning_rate": 9.972943722943725e-06,
217
+ "loss": 0.5253,
218
+ "step": 175
219
+ },
220
+ {
221
+ "epoch": 0.03,
222
+ "learning_rate": 9.972170686456402e-06,
223
+ "loss": 0.5856,
224
+ "step": 180
225
+ },
226
+ {
227
+ "epoch": 0.03,
228
+ "learning_rate": 9.97139764996908e-06,
229
+ "loss": 0.5196,
230
+ "step": 185
231
+ },
232
+ {
233
+ "epoch": 0.03,
234
+ "learning_rate": 9.970624613481757e-06,
235
+ "loss": 0.4764,
236
+ "step": 190
237
+ },
238
+ {
239
+ "epoch": 0.03,
240
+ "learning_rate": 9.969851576994434e-06,
241
+ "loss": 0.5254,
242
+ "step": 195
243
+ },
244
+ {
245
+ "epoch": 0.03,
246
+ "learning_rate": 9.969078540507111e-06,
247
+ "loss": 0.5442,
248
+ "step": 200
249
+ },
250
+ {
251
+ "epoch": 0.03,
252
+ "eval_accuracy": 0.5804400673190799,
253
+ "eval_accuracy_sklearn": 0.5804400673190799,
254
+ "eval_f1": 0.5294915349019279,
255
+ "eval_loss": 0.7918509840965271,
256
+ "eval_precision": 0.6370946036872561,
257
+ "eval_recall": 0.45298409281186464,
258
+ "eval_runtime": 4914.2737,
259
+ "eval_samples_per_second": 16.323,
260
+ "eval_steps_per_second": 2.04,
261
+ "step": 200
262
+ }
263
+ ],
264
+ "max_steps": 64680,
265
+ "num_train_epochs": 10,
266
+ "total_flos": 2.37535583797248e+16,
267
+ "trial_name": null,
268
+ "trial_params": null
269
+ }
ranker_bs256_0/checkpoint-200/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a89904cc492981aada74646ac2bddbb0a7e40cc9d86615845667f8c5c4c4e4f
3
+ size 3451
ranker_bs256_0/checkpoint-200/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
ranker_bs256_0/checkpoint-400/config.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "Salesforce/codet5-large",
3
+ "architectures": [
4
+ "T5EncoderForSequenceClassification"
5
+ ],
6
+ "bos_token_id": 1,
7
+ "d_ff": 4096,
8
+ "d_kv": 64,
9
+ "d_model": 1024,
10
+ "decoder_start_token_id": 0,
11
+ "dense_act_fn": "relu",
12
+ "dropout_rate": 0.1,
13
+ "eos_token_id": 2,
14
+ "feed_forward_proj": "relu",
15
+ "initializer_factor": 1.0,
16
+ "is_encoder_decoder": true,
17
+ "is_gated_act": false,
18
+ "layer_norm_epsilon": 1e-06,
19
+ "model_type": "t5",
20
+ "n_positions": 512,
21
+ "num_decoder_layers": 24,
22
+ "num_heads": 16,
23
+ "num_layers": 24,
24
+ "output_past": true,
25
+ "pad_token_id": 0,
26
+ "problem_type": "single_label_classification",
27
+ "relative_attention_max_distance": 128,
28
+ "relative_attention_num_buckets": 32,
29
+ "task_specific_params": {
30
+ "summarization": {
31
+ "early_stopping": true,
32
+ "length_penalty": 2.0,
33
+ "max_length": 200,
34
+ "min_length": 30,
35
+ "no_repeat_ngram_size": 3,
36
+ "num_beams": 4,
37
+ "prefix": "summarize: "
38
+ },
39
+ "translation_en_to_de": {
40
+ "early_stopping": true,
41
+ "max_length": 300,
42
+ "num_beams": 4,
43
+ "prefix": "translate English to German: "
44
+ },
45
+ "translation_en_to_fr": {
46
+ "early_stopping": true,
47
+ "max_length": 300,
48
+ "num_beams": 4,
49
+ "prefix": "translate English to French: "
50
+ },
51
+ "translation_en_to_ro": {
52
+ "early_stopping": true,
53
+ "max_length": 300,
54
+ "num_beams": 4,
55
+ "prefix": "translate English to Romanian: "
56
+ }
57
+ },
58
+ "torch_dtype": "float32",
59
+ "transformers_version": "4.26.0",
60
+ "use_cache": true,
61
+ "vocab_size": 32100
62
+ }
ranker_bs256_0/checkpoint-400/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
ranker_bs256_0/checkpoint-400/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:558cb857e5db120f2b4dcca4a9489a964eb49beec8c9a84389edfc8f2d3f2898
3
+ size 2679420869
ranker_bs256_0/checkpoint-400/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50d3fc1fddb25ec7c628708e0de189bec915eee2827e52450b223d1f82126d1c
3
+ size 1339719649
ranker_bs256_0/checkpoint-400/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa20e9893ff44cac6be4e378383beba5eafb8e116f39275a95bf546b6f6c9ca3
3
+ size 14575
ranker_bs256_0/checkpoint-400/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23a4641fad0ad9a18a364fba1e74f186c1d6e53df1b713ac1abdb92c01e72ad3
3
+ size 627
ranker_bs256_0/checkpoint-400/special_tokens_map.json ADDED
@@ -0,0 +1,753 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "<extra_id_99>",
5
+ "lstrip": true,
6
+ "normalized": true,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "<extra_id_98>",
12
+ "lstrip": true,
13
+ "normalized": true,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ },
17
+ {
18
+ "content": "<extra_id_97>",
19
+ "lstrip": true,
20
+ "normalized": true,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ {
25
+ "content": "<extra_id_96>",
26
+ "lstrip": true,
27
+ "normalized": true,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ },
31
+ {
32
+ "content": "<extra_id_95>",
33
+ "lstrip": true,
34
+ "normalized": true,
35
+ "rstrip": false,
36
+ "single_word": false
37
+ },
38
+ {
39
+ "content": "<extra_id_94>",
40
+ "lstrip": true,
41
+ "normalized": true,
42
+ "rstrip": false,
43
+ "single_word": false
44
+ },
45
+ {
46
+ "content": "<extra_id_93>",
47
+ "lstrip": true,
48
+ "normalized": true,
49
+ "rstrip": false,
50
+ "single_word": false
51
+ },
52
+ {
53
+ "content": "<extra_id_92>",
54
+ "lstrip": true,
55
+ "normalized": true,
56
+ "rstrip": false,
57
+ "single_word": false
58
+ },
59
+ {
60
+ "content": "<extra_id_91>",
61
+ "lstrip": true,
62
+ "normalized": true,
63
+ "rstrip": false,
64
+ "single_word": false
65
+ },
66
+ {
67
+ "content": "<extra_id_90>",
68
+ "lstrip": true,
69
+ "normalized": true,
70
+ "rstrip": false,
71
+ "single_word": false
72
+ },
73
+ {
74
+ "content": "<extra_id_89>",
75
+ "lstrip": true,
76
+ "normalized": true,
77
+ "rstrip": false,
78
+ "single_word": false
79
+ },
80
+ {
81
+ "content": "<extra_id_88>",
82
+ "lstrip": true,
83
+ "normalized": true,
84
+ "rstrip": false,
85
+ "single_word": false
86
+ },
87
+ {
88
+ "content": "<extra_id_87>",
89
+ "lstrip": true,
90
+ "normalized": true,
91
+ "rstrip": false,
92
+ "single_word": false
93
+ },
94
+ {
95
+ "content": "<extra_id_86>",
96
+ "lstrip": true,
97
+ "normalized": true,
98
+ "rstrip": false,
99
+ "single_word": false
100
+ },
101
+ {
102
+ "content": "<extra_id_85>",
103
+ "lstrip": true,
104
+ "normalized": true,
105
+ "rstrip": false,
106
+ "single_word": false
107
+ },
108
+ {
109
+ "content": "<extra_id_84>",
110
+ "lstrip": true,
111
+ "normalized": true,
112
+ "rstrip": false,
113
+ "single_word": false
114
+ },
115
+ {
116
+ "content": "<extra_id_83>",
117
+ "lstrip": true,
118
+ "normalized": true,
119
+ "rstrip": false,
120
+ "single_word": false
121
+ },
122
+ {
123
+ "content": "<extra_id_82>",
124
+ "lstrip": true,
125
+ "normalized": true,
126
+ "rstrip": false,
127
+ "single_word": false
128
+ },
129
+ {
130
+ "content": "<extra_id_81>",
131
+ "lstrip": true,
132
+ "normalized": true,
133
+ "rstrip": false,
134
+ "single_word": false
135
+ },
136
+ {
137
+ "content": "<extra_id_80>",
138
+ "lstrip": true,
139
+ "normalized": true,
140
+ "rstrip": false,
141
+ "single_word": false
142
+ },
143
+ {
144
+ "content": "<extra_id_79>",
145
+ "lstrip": true,
146
+ "normalized": true,
147
+ "rstrip": false,
148
+ "single_word": false
149
+ },
150
+ {
151
+ "content": "<extra_id_78>",
152
+ "lstrip": true,
153
+ "normalized": true,
154
+ "rstrip": false,
155
+ "single_word": false
156
+ },
157
+ {
158
+ "content": "<extra_id_77>",
159
+ "lstrip": true,
160
+ "normalized": true,
161
+ "rstrip": false,
162
+ "single_word": false
163
+ },
164
+ {
165
+ "content": "<extra_id_76>",
166
+ "lstrip": true,
167
+ "normalized": true,
168
+ "rstrip": false,
169
+ "single_word": false
170
+ },
171
+ {
172
+ "content": "<extra_id_75>",
173
+ "lstrip": true,
174
+ "normalized": true,
175
+ "rstrip": false,
176
+ "single_word": false
177
+ },
178
+ {
179
+ "content": "<extra_id_74>",
180
+ "lstrip": true,
181
+ "normalized": true,
182
+ "rstrip": false,
183
+ "single_word": false
184
+ },
185
+ {
186
+ "content": "<extra_id_73>",
187
+ "lstrip": true,
188
+ "normalized": true,
189
+ "rstrip": false,
190
+ "single_word": false
191
+ },
192
+ {
193
+ "content": "<extra_id_72>",
194
+ "lstrip": true,
195
+ "normalized": true,
196
+ "rstrip": false,
197
+ "single_word": false
198
+ },
199
+ {
200
+ "content": "<extra_id_71>",
201
+ "lstrip": true,
202
+ "normalized": true,
203
+ "rstrip": false,
204
+ "single_word": false
205
+ },
206
+ {
207
+ "content": "<extra_id_70>",
208
+ "lstrip": true,
209
+ "normalized": true,
210
+ "rstrip": false,
211
+ "single_word": false
212
+ },
213
+ {
214
+ "content": "<extra_id_69>",
215
+ "lstrip": true,
216
+ "normalized": true,
217
+ "rstrip": false,
218
+ "single_word": false
219
+ },
220
+ {
221
+ "content": "<extra_id_68>",
222
+ "lstrip": true,
223
+ "normalized": true,
224
+ "rstrip": false,
225
+ "single_word": false
226
+ },
227
+ {
228
+ "content": "<extra_id_67>",
229
+ "lstrip": true,
230
+ "normalized": true,
231
+ "rstrip": false,
232
+ "single_word": false
233
+ },
234
+ {
235
+ "content": "<extra_id_66>",
236
+ "lstrip": true,
237
+ "normalized": true,
238
+ "rstrip": false,
239
+ "single_word": false
240
+ },
241
+ {
242
+ "content": "<extra_id_65>",
243
+ "lstrip": true,
244
+ "normalized": true,
245
+ "rstrip": false,
246
+ "single_word": false
247
+ },
248
+ {
249
+ "content": "<extra_id_64>",
250
+ "lstrip": true,
251
+ "normalized": true,
252
+ "rstrip": false,
253
+ "single_word": false
254
+ },
255
+ {
256
+ "content": "<extra_id_63>",
257
+ "lstrip": true,
258
+ "normalized": true,
259
+ "rstrip": false,
260
+ "single_word": false
261
+ },
262
+ {
263
+ "content": "<extra_id_62>",
264
+ "lstrip": true,
265
+ "normalized": true,
266
+ "rstrip": false,
267
+ "single_word": false
268
+ },
269
+ {
270
+ "content": "<extra_id_61>",
271
+ "lstrip": true,
272
+ "normalized": true,
273
+ "rstrip": false,
274
+ "single_word": false
275
+ },
276
+ {
277
+ "content": "<extra_id_60>",
278
+ "lstrip": true,
279
+ "normalized": true,
280
+ "rstrip": false,
281
+ "single_word": false
282
+ },
283
+ {
284
+ "content": "<extra_id_59>",
285
+ "lstrip": true,
286
+ "normalized": true,
287
+ "rstrip": false,
288
+ "single_word": false
289
+ },
290
+ {
291
+ "content": "<extra_id_58>",
292
+ "lstrip": true,
293
+ "normalized": true,
294
+ "rstrip": false,
295
+ "single_word": false
296
+ },
297
+ {
298
+ "content": "<extra_id_57>",
299
+ "lstrip": true,
300
+ "normalized": true,
301
+ "rstrip": false,
302
+ "single_word": false
303
+ },
304
+ {
305
+ "content": "<extra_id_56>",
306
+ "lstrip": true,
307
+ "normalized": true,
308
+ "rstrip": false,
309
+ "single_word": false
310
+ },
311
+ {
312
+ "content": "<extra_id_55>",
313
+ "lstrip": true,
314
+ "normalized": true,
315
+ "rstrip": false,
316
+ "single_word": false
317
+ },
318
+ {
319
+ "content": "<extra_id_54>",
320
+ "lstrip": true,
321
+ "normalized": true,
322
+ "rstrip": false,
323
+ "single_word": false
324
+ },
325
+ {
326
+ "content": "<extra_id_53>",
327
+ "lstrip": true,
328
+ "normalized": true,
329
+ "rstrip": false,
330
+ "single_word": false
331
+ },
332
+ {
333
+ "content": "<extra_id_52>",
334
+ "lstrip": true,
335
+ "normalized": true,
336
+ "rstrip": false,
337
+ "single_word": false
338
+ },
339
+ {
340
+ "content": "<extra_id_51>",
341
+ "lstrip": true,
342
+ "normalized": true,
343
+ "rstrip": false,
344
+ "single_word": false
345
+ },
346
+ {
347
+ "content": "<extra_id_50>",
348
+ "lstrip": true,
349
+ "normalized": true,
350
+ "rstrip": false,
351
+ "single_word": false
352
+ },
353
+ {
354
+ "content": "<extra_id_49>",
355
+ "lstrip": true,
356
+ "normalized": true,
357
+ "rstrip": false,
358
+ "single_word": false
359
+ },
360
+ {
361
+ "content": "<extra_id_48>",
362
+ "lstrip": true,
363
+ "normalized": true,
364
+ "rstrip": false,
365
+ "single_word": false
366
+ },
367
+ {
368
+ "content": "<extra_id_47>",
369
+ "lstrip": true,
370
+ "normalized": true,
371
+ "rstrip": false,
372
+ "single_word": false
373
+ },
374
+ {
375
+ "content": "<extra_id_46>",
376
+ "lstrip": true,
377
+ "normalized": true,
378
+ "rstrip": false,
379
+ "single_word": false
380
+ },
381
+ {
382
+ "content": "<extra_id_45>",
383
+ "lstrip": true,
384
+ "normalized": true,
385
+ "rstrip": false,
386
+ "single_word": false
387
+ },
388
+ {
389
+ "content": "<extra_id_44>",
390
+ "lstrip": true,
391
+ "normalized": true,
392
+ "rstrip": false,
393
+ "single_word": false
394
+ },
395
+ {
396
+ "content": "<extra_id_43>",
397
+ "lstrip": true,
398
+ "normalized": true,
399
+ "rstrip": false,
400
+ "single_word": false
401
+ },
402
+ {
403
+ "content": "<extra_id_42>",
404
+ "lstrip": true,
405
+ "normalized": true,
406
+ "rstrip": false,
407
+ "single_word": false
408
+ },
409
+ {
410
+ "content": "<extra_id_41>",
411
+ "lstrip": true,
412
+ "normalized": true,
413
+ "rstrip": false,
414
+ "single_word": false
415
+ },
416
+ {
417
+ "content": "<extra_id_40>",
418
+ "lstrip": true,
419
+ "normalized": true,
420
+ "rstrip": false,
421
+ "single_word": false
422
+ },
423
+ {
424
+ "content": "<extra_id_39>",
425
+ "lstrip": true,
426
+ "normalized": true,
427
+ "rstrip": false,
428
+ "single_word": false
429
+ },
430
+ {
431
+ "content": "<extra_id_38>",
432
+ "lstrip": true,
433
+ "normalized": true,
434
+ "rstrip": false,
435
+ "single_word": false
436
+ },
437
+ {
438
+ "content": "<extra_id_37>",
439
+ "lstrip": true,
440
+ "normalized": true,
441
+ "rstrip": false,
442
+ "single_word": false
443
+ },
444
+ {
445
+ "content": "<extra_id_36>",
446
+ "lstrip": true,
447
+ "normalized": true,
448
+ "rstrip": false,
449
+ "single_word": false
450
+ },
451
+ {
452
+ "content": "<extra_id_35>",
453
+ "lstrip": true,
454
+ "normalized": true,
455
+ "rstrip": false,
456
+ "single_word": false
457
+ },
458
+ {
459
+ "content": "<extra_id_34>",
460
+ "lstrip": true,
461
+ "normalized": true,
462
+ "rstrip": false,
463
+ "single_word": false
464
+ },
465
+ {
466
+ "content": "<extra_id_33>",
467
+ "lstrip": true,
468
+ "normalized": true,
469
+ "rstrip": false,
470
+ "single_word": false
471
+ },
472
+ {
473
+ "content": "<extra_id_32>",
474
+ "lstrip": true,
475
+ "normalized": true,
476
+ "rstrip": false,
477
+ "single_word": false
478
+ },
479
+ {
480
+ "content": "<extra_id_31>",
481
+ "lstrip": true,
482
+ "normalized": true,
483
+ "rstrip": false,
484
+ "single_word": false
485
+ },
486
+ {
487
+ "content": "<extra_id_30>",
488
+ "lstrip": true,
489
+ "normalized": true,
490
+ "rstrip": false,
491
+ "single_word": false
492
+ },
493
+ {
494
+ "content": "<extra_id_29>",
495
+ "lstrip": true,
496
+ "normalized": true,
497
+ "rstrip": false,
498
+ "single_word": false
499
+ },
500
+ {
501
+ "content": "<extra_id_28>",
502
+ "lstrip": true,
503
+ "normalized": true,
504
+ "rstrip": false,
505
+ "single_word": false
506
+ },
507
+ {
508
+ "content": "<extra_id_27>",
509
+ "lstrip": true,
510
+ "normalized": true,
511
+ "rstrip": false,
512
+ "single_word": false
513
+ },
514
+ {
515
+ "content": "<extra_id_26>",
516
+ "lstrip": true,
517
+ "normalized": true,
518
+ "rstrip": false,
519
+ "single_word": false
520
+ },
521
+ {
522
+ "content": "<extra_id_25>",
523
+ "lstrip": true,
524
+ "normalized": true,
525
+ "rstrip": false,
526
+ "single_word": false
527
+ },
528
+ {
529
+ "content": "<extra_id_24>",
530
+ "lstrip": true,
531
+ "normalized": true,
532
+ "rstrip": false,
533
+ "single_word": false
534
+ },
535
+ {
536
+ "content": "<extra_id_23>",
537
+ "lstrip": true,
538
+ "normalized": true,
539
+ "rstrip": false,
540
+ "single_word": false
541
+ },
542
+ {
543
+ "content": "<extra_id_22>",
544
+ "lstrip": true,
545
+ "normalized": true,
546
+ "rstrip": false,
547
+ "single_word": false
548
+ },
549
+ {
550
+ "content": "<extra_id_21>",
551
+ "lstrip": true,
552
+ "normalized": true,
553
+ "rstrip": false,
554
+ "single_word": false
555
+ },
556
+ {
557
+ "content": "<extra_id_20>",
558
+ "lstrip": true,
559
+ "normalized": true,
560
+ "rstrip": false,
561
+ "single_word": false
562
+ },
563
+ {
564
+ "content": "<extra_id_19>",
565
+ "lstrip": true,
566
+ "normalized": true,
567
+ "rstrip": false,
568
+ "single_word": false
569
+ },
570
+ {
571
+ "content": "<extra_id_18>",
572
+ "lstrip": true,
573
+ "normalized": true,
574
+ "rstrip": false,
575
+ "single_word": false
576
+ },
577
+ {
578
+ "content": "<extra_id_17>",
579
+ "lstrip": true,
580
+ "normalized": true,
581
+ "rstrip": false,
582
+ "single_word": false
583
+ },
584
+ {
585
+ "content": "<extra_id_16>",
586
+ "lstrip": true,
587
+ "normalized": true,
588
+ "rstrip": false,
589
+ "single_word": false
590
+ },
591
+ {
592
+ "content": "<extra_id_15>",
593
+ "lstrip": true,
594
+ "normalized": true,
595
+ "rstrip": false,
596
+ "single_word": false
597
+ },
598
+ {
599
+ "content": "<extra_id_14>",
600
+ "lstrip": true,
601
+ "normalized": true,
602
+ "rstrip": false,
603
+ "single_word": false
604
+ },
605
+ {
606
+ "content": "<extra_id_13>",
607
+ "lstrip": true,
608
+ "normalized": true,
609
+ "rstrip": false,
610
+ "single_word": false
611
+ },
612
+ {
613
+ "content": "<extra_id_12>",
614
+ "lstrip": true,
615
+ "normalized": true,
616
+ "rstrip": false,
617
+ "single_word": false
618
+ },
619
+ {
620
+ "content": "<extra_id_11>",
621
+ "lstrip": true,
622
+ "normalized": true,
623
+ "rstrip": false,
624
+ "single_word": false
625
+ },
626
+ {
627
+ "content": "<extra_id_10>",
628
+ "lstrip": true,
629
+ "normalized": true,
630
+ "rstrip": false,
631
+ "single_word": false
632
+ },
633
+ {
634
+ "content": "<extra_id_9>",
635
+ "lstrip": true,
636
+ "normalized": true,
637
+ "rstrip": false,
638
+ "single_word": false
639
+ },
640
+ {
641
+ "content": "<extra_id_8>",
642
+ "lstrip": true,
643
+ "normalized": true,
644
+ "rstrip": false,
645
+ "single_word": false
646
+ },
647
+ {
648
+ "content": "<extra_id_7>",
649
+ "lstrip": true,
650
+ "normalized": true,
651
+ "rstrip": false,
652
+ "single_word": false
653
+ },
654
+ {
655
+ "content": "<extra_id_6>",
656
+ "lstrip": true,
657
+ "normalized": true,
658
+ "rstrip": false,
659
+ "single_word": false
660
+ },
661
+ {
662
+ "content": "<extra_id_5>",
663
+ "lstrip": true,
664
+ "normalized": true,
665
+ "rstrip": false,
666
+ "single_word": false
667
+ },
668
+ {
669
+ "content": "<extra_id_4>",
670
+ "lstrip": true,
671
+ "normalized": true,
672
+ "rstrip": false,
673
+ "single_word": false
674
+ },
675
+ {
676
+ "content": "<extra_id_3>",
677
+ "lstrip": true,
678
+ "normalized": true,
679
+ "rstrip": false,
680
+ "single_word": false
681
+ },
682
+ {
683
+ "content": "<extra_id_2>",
684
+ "lstrip": true,
685
+ "normalized": true,
686
+ "rstrip": false,
687
+ "single_word": false
688
+ },
689
+ {
690
+ "content": "<extra_id_1>",
691
+ "lstrip": true,
692
+ "normalized": true,
693
+ "rstrip": false,
694
+ "single_word": false
695
+ },
696
+ {
697
+ "content": "<extra_id_0>",
698
+ "lstrip": true,
699
+ "normalized": true,
700
+ "rstrip": false,
701
+ "single_word": false
702
+ }
703
+ ],
704
+ "bos_token": {
705
+ "content": "<s>",
706
+ "lstrip": false,
707
+ "normalized": true,
708
+ "rstrip": false,
709
+ "single_word": false
710
+ },
711
+ "cls_token": {
712
+ "content": "<s>",
713
+ "lstrip": false,
714
+ "normalized": true,
715
+ "rstrip": false,
716
+ "single_word": false
717
+ },
718
+ "eos_token": {
719
+ "content": "</s>",
720
+ "lstrip": false,
721
+ "normalized": true,
722
+ "rstrip": false,
723
+ "single_word": false
724
+ },
725
+ "mask_token": {
726
+ "content": "<mask>",
727
+ "lstrip": true,
728
+ "normalized": true,
729
+ "rstrip": false,
730
+ "single_word": false
731
+ },
732
+ "pad_token": {
733
+ "content": "<pad>",
734
+ "lstrip": false,
735
+ "normalized": true,
736
+ "rstrip": false,
737
+ "single_word": false
738
+ },
739
+ "sep_token": {
740
+ "content": "</s>",
741
+ "lstrip": false,
742
+ "normalized": true,
743
+ "rstrip": false,
744
+ "single_word": false
745
+ },
746
+ "unk_token": {
747
+ "content": "<unk>",
748
+ "lstrip": false,
749
+ "normalized": true,
750
+ "rstrip": false,
751
+ "single_word": false
752
+ }
753
+ }
ranker_bs256_0/checkpoint-400/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
ranker_bs256_0/checkpoint-400/tokenizer_config.json ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": {
4
+ "__type": "AddedToken",
5
+ "content": "<s>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false
10
+ },
11
+ "cls_token": {
12
+ "__type": "AddedToken",
13
+ "content": "<s>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false
18
+ },
19
+ "eos_token": {
20
+ "__type": "AddedToken",
21
+ "content": "</s>",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false
26
+ },
27
+ "errors": "replace",
28
+ "mask_token": {
29
+ "__type": "AddedToken",
30
+ "content": "<mask>",
31
+ "lstrip": true,
32
+ "normalized": true,
33
+ "rstrip": false,
34
+ "single_word": false
35
+ },
36
+ "model_max_length": 512,
37
+ "name_or_path": "Salesforce/codet5-large",
38
+ "pad_token": {
39
+ "__type": "AddedToken",
40
+ "content": "<pad>",
41
+ "lstrip": false,
42
+ "normalized": true,
43
+ "rstrip": false,
44
+ "single_word": false
45
+ },
46
+ "sep_token": {
47
+ "__type": "AddedToken",
48
+ "content": "</s>",
49
+ "lstrip": false,
50
+ "normalized": true,
51
+ "rstrip": false,
52
+ "single_word": false
53
+ },
54
+ "special_tokens_map_file": "/export/home/cache/model/5941df5e4315c5ab63b7b2ac791fb0bf0f209744a055c06b43b5274849137cdd.b9905d0575bde443a20834122b6e2d48e853b2e36444ce98ddeb43c38097eb3f",
55
+ "tokenizer_class": "RobertaTokenizer",
56
+ "trim_offsets": true,
57
+ "unk_token": {
58
+ "__type": "AddedToken",
59
+ "content": "<unk>",
60
+ "lstrip": false,
61
+ "normalized": true,
62
+ "rstrip": false,
63
+ "single_word": false
64
+ }
65
+ }
ranker_bs256_0/checkpoint-400/trainer_state.json ADDED
@@ -0,0 +1,522 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.061835151351194675,
5
+ "global_step": 400,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.0,
12
+ "learning_rate": 9.999226963512678e-06,
13
+ "loss": 1.1361,
14
+ "step": 5
15
+ },
16
+ {
17
+ "epoch": 0.0,
18
+ "learning_rate": 9.998453927025357e-06,
19
+ "loss": 0.7353,
20
+ "step": 10
21
+ },
22
+ {
23
+ "epoch": 0.0,
24
+ "learning_rate": 9.997680890538034e-06,
25
+ "loss": 0.6909,
26
+ "step": 15
27
+ },
28
+ {
29
+ "epoch": 0.0,
30
+ "learning_rate": 9.996907854050712e-06,
31
+ "loss": 0.6498,
32
+ "step": 20
33
+ },
34
+ {
35
+ "epoch": 0.0,
36
+ "learning_rate": 9.99613481756339e-06,
37
+ "loss": 0.6414,
38
+ "step": 25
39
+ },
40
+ {
41
+ "epoch": 0.0,
42
+ "learning_rate": 9.995361781076068e-06,
43
+ "loss": 0.6415,
44
+ "step": 30
45
+ },
46
+ {
47
+ "epoch": 0.01,
48
+ "learning_rate": 9.994588744588745e-06,
49
+ "loss": 0.6317,
50
+ "step": 35
51
+ },
52
+ {
53
+ "epoch": 0.01,
54
+ "learning_rate": 9.993815708101423e-06,
55
+ "loss": 0.6378,
56
+ "step": 40
57
+ },
58
+ {
59
+ "epoch": 0.01,
60
+ "learning_rate": 9.9930426716141e-06,
61
+ "loss": 0.6347,
62
+ "step": 45
63
+ },
64
+ {
65
+ "epoch": 0.01,
66
+ "learning_rate": 9.992269635126779e-06,
67
+ "loss": 0.5924,
68
+ "step": 50
69
+ },
70
+ {
71
+ "epoch": 0.01,
72
+ "learning_rate": 9.991496598639456e-06,
73
+ "loss": 0.6046,
74
+ "step": 55
75
+ },
76
+ {
77
+ "epoch": 0.01,
78
+ "learning_rate": 9.990723562152135e-06,
79
+ "loss": 0.6045,
80
+ "step": 60
81
+ },
82
+ {
83
+ "epoch": 0.01,
84
+ "learning_rate": 9.989950525664813e-06,
85
+ "loss": 0.6,
86
+ "step": 65
87
+ },
88
+ {
89
+ "epoch": 0.01,
90
+ "learning_rate": 9.98917748917749e-06,
91
+ "loss": 0.5504,
92
+ "step": 70
93
+ },
94
+ {
95
+ "epoch": 0.01,
96
+ "learning_rate": 9.988404452690169e-06,
97
+ "loss": 0.5747,
98
+ "step": 75
99
+ },
100
+ {
101
+ "epoch": 0.01,
102
+ "learning_rate": 9.987631416202846e-06,
103
+ "loss": 0.5526,
104
+ "step": 80
105
+ },
106
+ {
107
+ "epoch": 0.01,
108
+ "learning_rate": 9.986858379715523e-06,
109
+ "loss": 0.5958,
110
+ "step": 85
111
+ },
112
+ {
113
+ "epoch": 0.01,
114
+ "learning_rate": 9.9860853432282e-06,
115
+ "loss": 0.608,
116
+ "step": 90
117
+ },
118
+ {
119
+ "epoch": 0.01,
120
+ "learning_rate": 9.985312306740878e-06,
121
+ "loss": 0.5988,
122
+ "step": 95
123
+ },
124
+ {
125
+ "epoch": 0.02,
126
+ "learning_rate": 9.984539270253557e-06,
127
+ "loss": 0.5861,
128
+ "step": 100
129
+ },
130
+ {
131
+ "epoch": 0.02,
132
+ "learning_rate": 9.983766233766234e-06,
133
+ "loss": 0.5749,
134
+ "step": 105
135
+ },
136
+ {
137
+ "epoch": 0.02,
138
+ "learning_rate": 9.982993197278913e-06,
139
+ "loss": 0.5498,
140
+ "step": 110
141
+ },
142
+ {
143
+ "epoch": 0.02,
144
+ "learning_rate": 9.98222016079159e-06,
145
+ "loss": 0.5841,
146
+ "step": 115
147
+ },
148
+ {
149
+ "epoch": 0.02,
150
+ "learning_rate": 9.981447124304268e-06,
151
+ "loss": 0.5973,
152
+ "step": 120
153
+ },
154
+ {
155
+ "epoch": 0.02,
156
+ "learning_rate": 9.980674087816947e-06,
157
+ "loss": 0.5954,
158
+ "step": 125
159
+ },
160
+ {
161
+ "epoch": 0.02,
162
+ "learning_rate": 9.979901051329624e-06,
163
+ "loss": 0.527,
164
+ "step": 130
165
+ },
166
+ {
167
+ "epoch": 0.02,
168
+ "learning_rate": 9.979128014842301e-06,
169
+ "loss": 0.5321,
170
+ "step": 135
171
+ },
172
+ {
173
+ "epoch": 0.02,
174
+ "learning_rate": 9.978354978354979e-06,
175
+ "loss": 0.5781,
176
+ "step": 140
177
+ },
178
+ {
179
+ "epoch": 0.02,
180
+ "learning_rate": 9.977581941867656e-06,
181
+ "loss": 0.5119,
182
+ "step": 145
183
+ },
184
+ {
185
+ "epoch": 0.02,
186
+ "learning_rate": 9.976808905380335e-06,
187
+ "loss": 0.5271,
188
+ "step": 150
189
+ },
190
+ {
191
+ "epoch": 0.02,
192
+ "learning_rate": 9.976035868893012e-06,
193
+ "loss": 0.5814,
194
+ "step": 155
195
+ },
196
+ {
197
+ "epoch": 0.02,
198
+ "learning_rate": 9.97526283240569e-06,
199
+ "loss": 0.518,
200
+ "step": 160
201
+ },
202
+ {
203
+ "epoch": 0.03,
204
+ "learning_rate": 9.974489795918369e-06,
205
+ "loss": 0.5335,
206
+ "step": 165
207
+ },
208
+ {
209
+ "epoch": 0.03,
210
+ "learning_rate": 9.973716759431046e-06,
211
+ "loss": 0.5062,
212
+ "step": 170
213
+ },
214
+ {
215
+ "epoch": 0.03,
216
+ "learning_rate": 9.972943722943725e-06,
217
+ "loss": 0.5253,
218
+ "step": 175
219
+ },
220
+ {
221
+ "epoch": 0.03,
222
+ "learning_rate": 9.972170686456402e-06,
223
+ "loss": 0.5856,
224
+ "step": 180
225
+ },
226
+ {
227
+ "epoch": 0.03,
228
+ "learning_rate": 9.97139764996908e-06,
229
+ "loss": 0.5196,
230
+ "step": 185
231
+ },
232
+ {
233
+ "epoch": 0.03,
234
+ "learning_rate": 9.970624613481757e-06,
235
+ "loss": 0.4764,
236
+ "step": 190
237
+ },
238
+ {
239
+ "epoch": 0.03,
240
+ "learning_rate": 9.969851576994434e-06,
241
+ "loss": 0.5254,
242
+ "step": 195
243
+ },
244
+ {
245
+ "epoch": 0.03,
246
+ "learning_rate": 9.969078540507111e-06,
247
+ "loss": 0.5442,
248
+ "step": 200
249
+ },
250
+ {
251
+ "epoch": 0.03,
252
+ "eval_accuracy": 0.5804400673190799,
253
+ "eval_accuracy_sklearn": 0.5804400673190799,
254
+ "eval_f1": 0.5294915349019279,
255
+ "eval_loss": 0.7918509840965271,
256
+ "eval_precision": 0.6370946036872561,
257
+ "eval_recall": 0.45298409281186464,
258
+ "eval_runtime": 4914.2737,
259
+ "eval_samples_per_second": 16.323,
260
+ "eval_steps_per_second": 2.04,
261
+ "step": 200
262
+ },
263
+ {
264
+ "epoch": 0.03,
265
+ "learning_rate": 9.96830550401979e-06,
266
+ "loss": 0.5163,
267
+ "step": 205
268
+ },
269
+ {
270
+ "epoch": 0.03,
271
+ "learning_rate": 9.967532467532468e-06,
272
+ "loss": 0.5044,
273
+ "step": 210
274
+ },
275
+ {
276
+ "epoch": 0.03,
277
+ "learning_rate": 9.966759431045147e-06,
278
+ "loss": 0.5078,
279
+ "step": 215
280
+ },
281
+ {
282
+ "epoch": 0.03,
283
+ "learning_rate": 9.965986394557824e-06,
284
+ "loss": 0.4623,
285
+ "step": 220
286
+ },
287
+ {
288
+ "epoch": 0.03,
289
+ "learning_rate": 9.965213358070501e-06,
290
+ "loss": 0.5359,
291
+ "step": 225
292
+ },
293
+ {
294
+ "epoch": 0.04,
295
+ "learning_rate": 9.96444032158318e-06,
296
+ "loss": 0.5068,
297
+ "step": 230
298
+ },
299
+ {
300
+ "epoch": 0.04,
301
+ "learning_rate": 9.963667285095858e-06,
302
+ "loss": 0.5029,
303
+ "step": 235
304
+ },
305
+ {
306
+ "epoch": 0.04,
307
+ "learning_rate": 9.962894248608535e-06,
308
+ "loss": 0.5084,
309
+ "step": 240
310
+ },
311
+ {
312
+ "epoch": 0.04,
313
+ "learning_rate": 9.962121212121212e-06,
314
+ "loss": 0.4783,
315
+ "step": 245
316
+ },
317
+ {
318
+ "epoch": 0.04,
319
+ "learning_rate": 9.96134817563389e-06,
320
+ "loss": 0.5216,
321
+ "step": 250
322
+ },
323
+ {
324
+ "epoch": 0.04,
325
+ "learning_rate": 9.960575139146569e-06,
326
+ "loss": 0.54,
327
+ "step": 255
328
+ },
329
+ {
330
+ "epoch": 0.04,
331
+ "learning_rate": 9.959802102659246e-06,
332
+ "loss": 0.5494,
333
+ "step": 260
334
+ },
335
+ {
336
+ "epoch": 0.04,
337
+ "learning_rate": 9.959029066171925e-06,
338
+ "loss": 0.5401,
339
+ "step": 265
340
+ },
341
+ {
342
+ "epoch": 0.04,
343
+ "learning_rate": 9.958256029684602e-06,
344
+ "loss": 0.5073,
345
+ "step": 270
346
+ },
347
+ {
348
+ "epoch": 0.04,
349
+ "learning_rate": 9.95748299319728e-06,
350
+ "loss": 0.4598,
351
+ "step": 275
352
+ },
353
+ {
354
+ "epoch": 0.04,
355
+ "learning_rate": 9.956709956709958e-06,
356
+ "loss": 0.4913,
357
+ "step": 280
358
+ },
359
+ {
360
+ "epoch": 0.04,
361
+ "learning_rate": 9.955936920222636e-06,
362
+ "loss": 0.4947,
363
+ "step": 285
364
+ },
365
+ {
366
+ "epoch": 0.04,
367
+ "learning_rate": 9.955163883735313e-06,
368
+ "loss": 0.4806,
369
+ "step": 290
370
+ },
371
+ {
372
+ "epoch": 0.05,
373
+ "learning_rate": 9.95439084724799e-06,
374
+ "loss": 0.4659,
375
+ "step": 295
376
+ },
377
+ {
378
+ "epoch": 0.05,
379
+ "learning_rate": 9.953617810760668e-06,
380
+ "loss": 0.4555,
381
+ "step": 300
382
+ },
383
+ {
384
+ "epoch": 0.05,
385
+ "learning_rate": 9.952844774273347e-06,
386
+ "loss": 0.4606,
387
+ "step": 305
388
+ },
389
+ {
390
+ "epoch": 0.05,
391
+ "learning_rate": 9.952071737786024e-06,
392
+ "loss": 0.4905,
393
+ "step": 310
394
+ },
395
+ {
396
+ "epoch": 0.05,
397
+ "learning_rate": 9.951298701298701e-06,
398
+ "loss": 0.4423,
399
+ "step": 315
400
+ },
401
+ {
402
+ "epoch": 0.05,
403
+ "learning_rate": 9.95052566481138e-06,
404
+ "loss": 0.4855,
405
+ "step": 320
406
+ },
407
+ {
408
+ "epoch": 0.05,
409
+ "learning_rate": 9.949752628324058e-06,
410
+ "loss": 0.486,
411
+ "step": 325
412
+ },
413
+ {
414
+ "epoch": 0.05,
415
+ "learning_rate": 9.948979591836737e-06,
416
+ "loss": 0.4774,
417
+ "step": 330
418
+ },
419
+ {
420
+ "epoch": 0.05,
421
+ "learning_rate": 9.948206555349414e-06,
422
+ "loss": 0.4909,
423
+ "step": 335
424
+ },
425
+ {
426
+ "epoch": 0.05,
427
+ "learning_rate": 9.947433518862091e-06,
428
+ "loss": 0.47,
429
+ "step": 340
430
+ },
431
+ {
432
+ "epoch": 0.05,
433
+ "learning_rate": 9.946660482374768e-06,
434
+ "loss": 0.4496,
435
+ "step": 345
436
+ },
437
+ {
438
+ "epoch": 0.05,
439
+ "learning_rate": 9.945887445887446e-06,
440
+ "loss": 0.5146,
441
+ "step": 350
442
+ },
443
+ {
444
+ "epoch": 0.05,
445
+ "learning_rate": 9.945114409400125e-06,
446
+ "loss": 0.4876,
447
+ "step": 355
448
+ },
449
+ {
450
+ "epoch": 0.06,
451
+ "learning_rate": 9.944341372912802e-06,
452
+ "loss": 0.4747,
453
+ "step": 360
454
+ },
455
+ {
456
+ "epoch": 0.06,
457
+ "learning_rate": 9.94356833642548e-06,
458
+ "loss": 0.4614,
459
+ "step": 365
460
+ },
461
+ {
462
+ "epoch": 0.06,
463
+ "learning_rate": 9.942795299938158e-06,
464
+ "loss": 0.4755,
465
+ "step": 370
466
+ },
467
+ {
468
+ "epoch": 0.06,
469
+ "learning_rate": 9.942022263450836e-06,
470
+ "loss": 0.4785,
471
+ "step": 375
472
+ },
473
+ {
474
+ "epoch": 0.06,
475
+ "learning_rate": 9.941249226963513e-06,
476
+ "loss": 0.4581,
477
+ "step": 380
478
+ },
479
+ {
480
+ "epoch": 0.06,
481
+ "learning_rate": 9.940476190476192e-06,
482
+ "loss": 0.4671,
483
+ "step": 385
484
+ },
485
+ {
486
+ "epoch": 0.06,
487
+ "learning_rate": 9.93970315398887e-06,
488
+ "loss": 0.4327,
489
+ "step": 390
490
+ },
491
+ {
492
+ "epoch": 0.06,
493
+ "learning_rate": 9.938930117501547e-06,
494
+ "loss": 0.4906,
495
+ "step": 395
496
+ },
497
+ {
498
+ "epoch": 0.06,
499
+ "learning_rate": 9.938157081014226e-06,
500
+ "loss": 0.5006,
501
+ "step": 400
502
+ },
503
+ {
504
+ "epoch": 0.06,
505
+ "eval_accuracy": 0.5519790562862308,
506
+ "eval_accuracy_sklearn": 0.5519790562862308,
507
+ "eval_f1": 0.39240549130993435,
508
+ "eval_loss": 0.969095766544342,
509
+ "eval_precision": 0.6691460531626593,
510
+ "eval_recall": 0.27759837340031096,
511
+ "eval_runtime": 4903.2377,
512
+ "eval_samples_per_second": 16.36,
513
+ "eval_steps_per_second": 2.045,
514
+ "step": 400
515
+ }
516
+ ],
517
+ "max_steps": 64680,
518
+ "num_train_epochs": 10,
519
+ "total_flos": 4.75071167594496e+16,
520
+ "trial_name": null,
521
+ "trial_params": null
522
+ }
ranker_bs256_0/checkpoint-400/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a89904cc492981aada74646ac2bddbb0a7e40cc9d86615845667f8c5c4c4e4f
3
+ size 3451
ranker_bs256_0/checkpoint-400/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
ranker_bs256_0/checkpoint-600/config.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "Salesforce/codet5-large",
3
+ "architectures": [
4
+ "T5EncoderForSequenceClassification"
5
+ ],
6
+ "bos_token_id": 1,
7
+ "d_ff": 4096,
8
+ "d_kv": 64,
9
+ "d_model": 1024,
10
+ "decoder_start_token_id": 0,
11
+ "dense_act_fn": "relu",
12
+ "dropout_rate": 0.1,
13
+ "eos_token_id": 2,
14
+ "feed_forward_proj": "relu",
15
+ "initializer_factor": 1.0,
16
+ "is_encoder_decoder": true,
17
+ "is_gated_act": false,
18
+ "layer_norm_epsilon": 1e-06,
19
+ "model_type": "t5",
20
+ "n_positions": 512,
21
+ "num_decoder_layers": 24,
22
+ "num_heads": 16,
23
+ "num_layers": 24,
24
+ "output_past": true,
25
+ "pad_token_id": 0,
26
+ "problem_type": "single_label_classification",
27
+ "relative_attention_max_distance": 128,
28
+ "relative_attention_num_buckets": 32,
29
+ "task_specific_params": {
30
+ "summarization": {
31
+ "early_stopping": true,
32
+ "length_penalty": 2.0,
33
+ "max_length": 200,
34
+ "min_length": 30,
35
+ "no_repeat_ngram_size": 3,
36
+ "num_beams": 4,
37
+ "prefix": "summarize: "
38
+ },
39
+ "translation_en_to_de": {
40
+ "early_stopping": true,
41
+ "max_length": 300,
42
+ "num_beams": 4,
43
+ "prefix": "translate English to German: "
44
+ },
45
+ "translation_en_to_fr": {
46
+ "early_stopping": true,
47
+ "max_length": 300,
48
+ "num_beams": 4,
49
+ "prefix": "translate English to French: "
50
+ },
51
+ "translation_en_to_ro": {
52
+ "early_stopping": true,
53
+ "max_length": 300,
54
+ "num_beams": 4,
55
+ "prefix": "translate English to Romanian: "
56
+ }
57
+ },
58
+ "torch_dtype": "float32",
59
+ "transformers_version": "4.26.0",
60
+ "use_cache": true,
61
+ "vocab_size": 32100
62
+ }
ranker_bs256_0/checkpoint-600/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
ranker_bs256_0/checkpoint-600/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23de02117d6756a8aa1e978b4cf09ea969ab510b60a8ee2551a5f30fbdefd645
3
+ size 2679420869
ranker_bs256_0/checkpoint-600/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5bbd9d93541fd4060695e248811cfbb9d2b92fb7348465343c4fb46a8c7453a
3
+ size 1339719649
ranker_bs256_0/checkpoint-600/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17cf98bce611c6c04b39b31e2565db7b598e2c0270c7aba76cb525ce0217e883
3
+ size 14575
ranker_bs256_0/checkpoint-600/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ba05284aa57c765f70ef39acbeed8486d8bbc213a07ee72ee775e41e1b19f9a
3
+ size 627
ranker_bs256_0/checkpoint-600/special_tokens_map.json ADDED
@@ -0,0 +1,753 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "<extra_id_99>",
5
+ "lstrip": true,
6
+ "normalized": true,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "<extra_id_98>",
12
+ "lstrip": true,
13
+ "normalized": true,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ },
17
+ {
18
+ "content": "<extra_id_97>",
19
+ "lstrip": true,
20
+ "normalized": true,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ {
25
+ "content": "<extra_id_96>",
26
+ "lstrip": true,
27
+ "normalized": true,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ },
31
+ {
32
+ "content": "<extra_id_95>",
33
+ "lstrip": true,
34
+ "normalized": true,
35
+ "rstrip": false,
36
+ "single_word": false
37
+ },
38
+ {
39
+ "content": "<extra_id_94>",
40
+ "lstrip": true,
41
+ "normalized": true,
42
+ "rstrip": false,
43
+ "single_word": false
44
+ },
45
+ {
46
+ "content": "<extra_id_93>",
47
+ "lstrip": true,
48
+ "normalized": true,
49
+ "rstrip": false,
50
+ "single_word": false
51
+ },
52
+ {
53
+ "content": "<extra_id_92>",
54
+ "lstrip": true,
55
+ "normalized": true,
56
+ "rstrip": false,
57
+ "single_word": false
58
+ },
59
+ {
60
+ "content": "<extra_id_91>",
61
+ "lstrip": true,
62
+ "normalized": true,
63
+ "rstrip": false,
64
+ "single_word": false
65
+ },
66
+ {
67
+ "content": "<extra_id_90>",
68
+ "lstrip": true,
69
+ "normalized": true,
70
+ "rstrip": false,
71
+ "single_word": false
72
+ },
73
+ {
74
+ "content": "<extra_id_89>",
75
+ "lstrip": true,
76
+ "normalized": true,
77
+ "rstrip": false,
78
+ "single_word": false
79
+ },
80
+ {
81
+ "content": "<extra_id_88>",
82
+ "lstrip": true,
83
+ "normalized": true,
84
+ "rstrip": false,
85
+ "single_word": false
86
+ },
87
+ {
88
+ "content": "<extra_id_87>",
89
+ "lstrip": true,
90
+ "normalized": true,
91
+ "rstrip": false,
92
+ "single_word": false
93
+ },
94
+ {
95
+ "content": "<extra_id_86>",
96
+ "lstrip": true,
97
+ "normalized": true,
98
+ "rstrip": false,
99
+ "single_word": false
100
+ },
101
+ {
102
+ "content": "<extra_id_85>",
103
+ "lstrip": true,
104
+ "normalized": true,
105
+ "rstrip": false,
106
+ "single_word": false
107
+ },
108
+ {
109
+ "content": "<extra_id_84>",
110
+ "lstrip": true,
111
+ "normalized": true,
112
+ "rstrip": false,
113
+ "single_word": false
114
+ },
115
+ {
116
+ "content": "<extra_id_83>",
117
+ "lstrip": true,
118
+ "normalized": true,
119
+ "rstrip": false,
120
+ "single_word": false
121
+ },
122
+ {
123
+ "content": "<extra_id_82>",
124
+ "lstrip": true,
125
+ "normalized": true,
126
+ "rstrip": false,
127
+ "single_word": false
128
+ },
129
+ {
130
+ "content": "<extra_id_81>",
131
+ "lstrip": true,
132
+ "normalized": true,
133
+ "rstrip": false,
134
+ "single_word": false
135
+ },
136
+ {
137
+ "content": "<extra_id_80>",
138
+ "lstrip": true,
139
+ "normalized": true,
140
+ "rstrip": false,
141
+ "single_word": false
142
+ },
143
+ {
144
+ "content": "<extra_id_79>",
145
+ "lstrip": true,
146
+ "normalized": true,
147
+ "rstrip": false,
148
+ "single_word": false
149
+ },
150
+ {
151
+ "content": "<extra_id_78>",
152
+ "lstrip": true,
153
+ "normalized": true,
154
+ "rstrip": false,
155
+ "single_word": false
156
+ },
157
+ {
158
+ "content": "<extra_id_77>",
159
+ "lstrip": true,
160
+ "normalized": true,
161
+ "rstrip": false,
162
+ "single_word": false
163
+ },
164
+ {
165
+ "content": "<extra_id_76>",
166
+ "lstrip": true,
167
+ "normalized": true,
168
+ "rstrip": false,
169
+ "single_word": false
170
+ },
171
+ {
172
+ "content": "<extra_id_75>",
173
+ "lstrip": true,
174
+ "normalized": true,
175
+ "rstrip": false,
176
+ "single_word": false
177
+ },
178
+ {
179
+ "content": "<extra_id_74>",
180
+ "lstrip": true,
181
+ "normalized": true,
182
+ "rstrip": false,
183
+ "single_word": false
184
+ },
185
+ {
186
+ "content": "<extra_id_73>",
187
+ "lstrip": true,
188
+ "normalized": true,
189
+ "rstrip": false,
190
+ "single_word": false
191
+ },
192
+ {
193
+ "content": "<extra_id_72>",
194
+ "lstrip": true,
195
+ "normalized": true,
196
+ "rstrip": false,
197
+ "single_word": false
198
+ },
199
+ {
200
+ "content": "<extra_id_71>",
201
+ "lstrip": true,
202
+ "normalized": true,
203
+ "rstrip": false,
204
+ "single_word": false
205
+ },
206
+ {
207
+ "content": "<extra_id_70>",
208
+ "lstrip": true,
209
+ "normalized": true,
210
+ "rstrip": false,
211
+ "single_word": false
212
+ },
213
+ {
214
+ "content": "<extra_id_69>",
215
+ "lstrip": true,
216
+ "normalized": true,
217
+ "rstrip": false,
218
+ "single_word": false
219
+ },
220
+ {
221
+ "content": "<extra_id_68>",
222
+ "lstrip": true,
223
+ "normalized": true,
224
+ "rstrip": false,
225
+ "single_word": false
226
+ },
227
+ {
228
+ "content": "<extra_id_67>",
229
+ "lstrip": true,
230
+ "normalized": true,
231
+ "rstrip": false,
232
+ "single_word": false
233
+ },
234
+ {
235
+ "content": "<extra_id_66>",
236
+ "lstrip": true,
237
+ "normalized": true,
238
+ "rstrip": false,
239
+ "single_word": false
240
+ },
241
+ {
242
+ "content": "<extra_id_65>",
243
+ "lstrip": true,
244
+ "normalized": true,
245
+ "rstrip": false,
246
+ "single_word": false
247
+ },
248
+ {
249
+ "content": "<extra_id_64>",
250
+ "lstrip": true,
251
+ "normalized": true,
252
+ "rstrip": false,
253
+ "single_word": false
254
+ },
255
+ {
256
+ "content": "<extra_id_63>",
257
+ "lstrip": true,
258
+ "normalized": true,
259
+ "rstrip": false,
260
+ "single_word": false
261
+ },
262
+ {
263
+ "content": "<extra_id_62>",
264
+ "lstrip": true,
265
+ "normalized": true,
266
+ "rstrip": false,
267
+ "single_word": false
268
+ },
269
+ {
270
+ "content": "<extra_id_61>",
271
+ "lstrip": true,
272
+ "normalized": true,
273
+ "rstrip": false,
274
+ "single_word": false
275
+ },
276
+ {
277
+ "content": "<extra_id_60>",
278
+ "lstrip": true,
279
+ "normalized": true,
280
+ "rstrip": false,
281
+ "single_word": false
282
+ },
283
+ {
284
+ "content": "<extra_id_59>",
285
+ "lstrip": true,
286
+ "normalized": true,
287
+ "rstrip": false,
288
+ "single_word": false
289
+ },
290
+ {
291
+ "content": "<extra_id_58>",
292
+ "lstrip": true,
293
+ "normalized": true,
294
+ "rstrip": false,
295
+ "single_word": false
296
+ },
297
+ {
298
+ "content": "<extra_id_57>",
299
+ "lstrip": true,
300
+ "normalized": true,
301
+ "rstrip": false,
302
+ "single_word": false
303
+ },
304
+ {
305
+ "content": "<extra_id_56>",
306
+ "lstrip": true,
307
+ "normalized": true,
308
+ "rstrip": false,
309
+ "single_word": false
310
+ },
311
+ {
312
+ "content": "<extra_id_55>",
313
+ "lstrip": true,
314
+ "normalized": true,
315
+ "rstrip": false,
316
+ "single_word": false
317
+ },
318
+ {
319
+ "content": "<extra_id_54>",
320
+ "lstrip": true,
321
+ "normalized": true,
322
+ "rstrip": false,
323
+ "single_word": false
324
+ },
325
+ {
326
+ "content": "<extra_id_53>",
327
+ "lstrip": true,
328
+ "normalized": true,
329
+ "rstrip": false,
330
+ "single_word": false
331
+ },
332
+ {
333
+ "content": "<extra_id_52>",
334
+ "lstrip": true,
335
+ "normalized": true,
336
+ "rstrip": false,
337
+ "single_word": false
338
+ },
339
+ {
340
+ "content": "<extra_id_51>",
341
+ "lstrip": true,
342
+ "normalized": true,
343
+ "rstrip": false,
344
+ "single_word": false
345
+ },
346
+ {
347
+ "content": "<extra_id_50>",
348
+ "lstrip": true,
349
+ "normalized": true,
350
+ "rstrip": false,
351
+ "single_word": false
352
+ },
353
+ {
354
+ "content": "<extra_id_49>",
355
+ "lstrip": true,
356
+ "normalized": true,
357
+ "rstrip": false,
358
+ "single_word": false
359
+ },
360
+ {
361
+ "content": "<extra_id_48>",
362
+ "lstrip": true,
363
+ "normalized": true,
364
+ "rstrip": false,
365
+ "single_word": false
366
+ },
367
+ {
368
+ "content": "<extra_id_47>",
369
+ "lstrip": true,
370
+ "normalized": true,
371
+ "rstrip": false,
372
+ "single_word": false
373
+ },
374
+ {
375
+ "content": "<extra_id_46>",
376
+ "lstrip": true,
377
+ "normalized": true,
378
+ "rstrip": false,
379
+ "single_word": false
380
+ },
381
+ {
382
+ "content": "<extra_id_45>",
383
+ "lstrip": true,
384
+ "normalized": true,
385
+ "rstrip": false,
386
+ "single_word": false
387
+ },
388
+ {
389
+ "content": "<extra_id_44>",
390
+ "lstrip": true,
391
+ "normalized": true,
392
+ "rstrip": false,
393
+ "single_word": false
394
+ },
395
+ {
396
+ "content": "<extra_id_43>",
397
+ "lstrip": true,
398
+ "normalized": true,
399
+ "rstrip": false,
400
+ "single_word": false
401
+ },
402
+ {
403
+ "content": "<extra_id_42>",
404
+ "lstrip": true,
405
+ "normalized": true,
406
+ "rstrip": false,
407
+ "single_word": false
408
+ },
409
+ {
410
+ "content": "<extra_id_41>",
411
+ "lstrip": true,
412
+ "normalized": true,
413
+ "rstrip": false,
414
+ "single_word": false
415
+ },
416
+ {
417
+ "content": "<extra_id_40>",
418
+ "lstrip": true,
419
+ "normalized": true,
420
+ "rstrip": false,
421
+ "single_word": false
422
+ },
423
+ {
424
+ "content": "<extra_id_39>",
425
+ "lstrip": true,
426
+ "normalized": true,
427
+ "rstrip": false,
428
+ "single_word": false
429
+ },
430
+ {
431
+ "content": "<extra_id_38>",
432
+ "lstrip": true,
433
+ "normalized": true,
434
+ "rstrip": false,
435
+ "single_word": false
436
+ },
437
+ {
438
+ "content": "<extra_id_37>",
439
+ "lstrip": true,
440
+ "normalized": true,
441
+ "rstrip": false,
442
+ "single_word": false
443
+ },
444
+ {
445
+ "content": "<extra_id_36>",
446
+ "lstrip": true,
447
+ "normalized": true,
448
+ "rstrip": false,
449
+ "single_word": false
450
+ },
451
+ {
452
+ "content": "<extra_id_35>",
453
+ "lstrip": true,
454
+ "normalized": true,
455
+ "rstrip": false,
456
+ "single_word": false
457
+ },
458
+ {
459
+ "content": "<extra_id_34>",
460
+ "lstrip": true,
461
+ "normalized": true,
462
+ "rstrip": false,
463
+ "single_word": false
464
+ },
465
+ {
466
+ "content": "<extra_id_33>",
467
+ "lstrip": true,
468
+ "normalized": true,
469
+ "rstrip": false,
470
+ "single_word": false
471
+ },
472
+ {
473
+ "content": "<extra_id_32>",
474
+ "lstrip": true,
475
+ "normalized": true,
476
+ "rstrip": false,
477
+ "single_word": false
478
+ },
479
+ {
480
+ "content": "<extra_id_31>",
481
+ "lstrip": true,
482
+ "normalized": true,
483
+ "rstrip": false,
484
+ "single_word": false
485
+ },
486
+ {
487
+ "content": "<extra_id_30>",
488
+ "lstrip": true,
489
+ "normalized": true,
490
+ "rstrip": false,
491
+ "single_word": false
492
+ },
493
+ {
494
+ "content": "<extra_id_29>",
495
+ "lstrip": true,
496
+ "normalized": true,
497
+ "rstrip": false,
498
+ "single_word": false
499
+ },
500
+ {
501
+ "content": "<extra_id_28>",
502
+ "lstrip": true,
503
+ "normalized": true,
504
+ "rstrip": false,
505
+ "single_word": false
506
+ },
507
+ {
508
+ "content": "<extra_id_27>",
509
+ "lstrip": true,
510
+ "normalized": true,
511
+ "rstrip": false,
512
+ "single_word": false
513
+ },
514
+ {
515
+ "content": "<extra_id_26>",
516
+ "lstrip": true,
517
+ "normalized": true,
518
+ "rstrip": false,
519
+ "single_word": false
520
+ },
521
+ {
522
+ "content": "<extra_id_25>",
523
+ "lstrip": true,
524
+ "normalized": true,
525
+ "rstrip": false,
526
+ "single_word": false
527
+ },
528
+ {
529
+ "content": "<extra_id_24>",
530
+ "lstrip": true,
531
+ "normalized": true,
532
+ "rstrip": false,
533
+ "single_word": false
534
+ },
535
+ {
536
+ "content": "<extra_id_23>",
537
+ "lstrip": true,
538
+ "normalized": true,
539
+ "rstrip": false,
540
+ "single_word": false
541
+ },
542
+ {
543
+ "content": "<extra_id_22>",
544
+ "lstrip": true,
545
+ "normalized": true,
546
+ "rstrip": false,
547
+ "single_word": false
548
+ },
549
+ {
550
+ "content": "<extra_id_21>",
551
+ "lstrip": true,
552
+ "normalized": true,
553
+ "rstrip": false,
554
+ "single_word": false
555
+ },
556
+ {
557
+ "content": "<extra_id_20>",
558
+ "lstrip": true,
559
+ "normalized": true,
560
+ "rstrip": false,
561
+ "single_word": false
562
+ },
563
+ {
564
+ "content": "<extra_id_19>",
565
+ "lstrip": true,
566
+ "normalized": true,
567
+ "rstrip": false,
568
+ "single_word": false
569
+ },
570
+ {
571
+ "content": "<extra_id_18>",
572
+ "lstrip": true,
573
+ "normalized": true,
574
+ "rstrip": false,
575
+ "single_word": false
576
+ },
577
+ {
578
+ "content": "<extra_id_17>",
579
+ "lstrip": true,
580
+ "normalized": true,
581
+ "rstrip": false,
582
+ "single_word": false
583
+ },
584
+ {
585
+ "content": "<extra_id_16>",
586
+ "lstrip": true,
587
+ "normalized": true,
588
+ "rstrip": false,
589
+ "single_word": false
590
+ },
591
+ {
592
+ "content": "<extra_id_15>",
593
+ "lstrip": true,
594
+ "normalized": true,
595
+ "rstrip": false,
596
+ "single_word": false
597
+ },
598
+ {
599
+ "content": "<extra_id_14>",
600
+ "lstrip": true,
601
+ "normalized": true,
602
+ "rstrip": false,
603
+ "single_word": false
604
+ },
605
+ {
606
+ "content": "<extra_id_13>",
607
+ "lstrip": true,
608
+ "normalized": true,
609
+ "rstrip": false,
610
+ "single_word": false
611
+ },
612
+ {
613
+ "content": "<extra_id_12>",
614
+ "lstrip": true,
615
+ "normalized": true,
616
+ "rstrip": false,
617
+ "single_word": false
618
+ },
619
+ {
620
+ "content": "<extra_id_11>",
621
+ "lstrip": true,
622
+ "normalized": true,
623
+ "rstrip": false,
624
+ "single_word": false
625
+ },
626
+ {
627
+ "content": "<extra_id_10>",
628
+ "lstrip": true,
629
+ "normalized": true,
630
+ "rstrip": false,
631
+ "single_word": false
632
+ },
633
+ {
634
+ "content": "<extra_id_9>",
635
+ "lstrip": true,
636
+ "normalized": true,
637
+ "rstrip": false,
638
+ "single_word": false
639
+ },
640
+ {
641
+ "content": "<extra_id_8>",
642
+ "lstrip": true,
643
+ "normalized": true,
644
+ "rstrip": false,
645
+ "single_word": false
646
+ },
647
+ {
648
+ "content": "<extra_id_7>",
649
+ "lstrip": true,
650
+ "normalized": true,
651
+ "rstrip": false,
652
+ "single_word": false
653
+ },
654
+ {
655
+ "content": "<extra_id_6>",
656
+ "lstrip": true,
657
+ "normalized": true,
658
+ "rstrip": false,
659
+ "single_word": false
660
+ },
661
+ {
662
+ "content": "<extra_id_5>",
663
+ "lstrip": true,
664
+ "normalized": true,
665
+ "rstrip": false,
666
+ "single_word": false
667
+ },
668
+ {
669
+ "content": "<extra_id_4>",
670
+ "lstrip": true,
671
+ "normalized": true,
672
+ "rstrip": false,
673
+ "single_word": false
674
+ },
675
+ {
676
+ "content": "<extra_id_3>",
677
+ "lstrip": true,
678
+ "normalized": true,
679
+ "rstrip": false,
680
+ "single_word": false
681
+ },
682
+ {
683
+ "content": "<extra_id_2>",
684
+ "lstrip": true,
685
+ "normalized": true,
686
+ "rstrip": false,
687
+ "single_word": false
688
+ },
689
+ {
690
+ "content": "<extra_id_1>",
691
+ "lstrip": true,
692
+ "normalized": true,
693
+ "rstrip": false,
694
+ "single_word": false
695
+ },
696
+ {
697
+ "content": "<extra_id_0>",
698
+ "lstrip": true,
699
+ "normalized": true,
700
+ "rstrip": false,
701
+ "single_word": false
702
+ }
703
+ ],
704
+ "bos_token": {
705
+ "content": "<s>",
706
+ "lstrip": false,
707
+ "normalized": true,
708
+ "rstrip": false,
709
+ "single_word": false
710
+ },
711
+ "cls_token": {
712
+ "content": "<s>",
713
+ "lstrip": false,
714
+ "normalized": true,
715
+ "rstrip": false,
716
+ "single_word": false
717
+ },
718
+ "eos_token": {
719
+ "content": "</s>",
720
+ "lstrip": false,
721
+ "normalized": true,
722
+ "rstrip": false,
723
+ "single_word": false
724
+ },
725
+ "mask_token": {
726
+ "content": "<mask>",
727
+ "lstrip": true,
728
+ "normalized": true,
729
+ "rstrip": false,
730
+ "single_word": false
731
+ },
732
+ "pad_token": {
733
+ "content": "<pad>",
734
+ "lstrip": false,
735
+ "normalized": true,
736
+ "rstrip": false,
737
+ "single_word": false
738
+ },
739
+ "sep_token": {
740
+ "content": "</s>",
741
+ "lstrip": false,
742
+ "normalized": true,
743
+ "rstrip": false,
744
+ "single_word": false
745
+ },
746
+ "unk_token": {
747
+ "content": "<unk>",
748
+ "lstrip": false,
749
+ "normalized": true,
750
+ "rstrip": false,
751
+ "single_word": false
752
+ }
753
+ }
ranker_bs256_0/checkpoint-600/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
ranker_bs256_0/checkpoint-600/tokenizer_config.json ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": {
4
+ "__type": "AddedToken",
5
+ "content": "<s>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false
10
+ },
11
+ "cls_token": {
12
+ "__type": "AddedToken",
13
+ "content": "<s>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false
18
+ },
19
+ "eos_token": {
20
+ "__type": "AddedToken",
21
+ "content": "</s>",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false
26
+ },
27
+ "errors": "replace",
28
+ "mask_token": {
29
+ "__type": "AddedToken",
30
+ "content": "<mask>",
31
+ "lstrip": true,
32
+ "normalized": true,
33
+ "rstrip": false,
34
+ "single_word": false
35
+ },
36
+ "model_max_length": 512,
37
+ "name_or_path": "Salesforce/codet5-large",
38
+ "pad_token": {
39
+ "__type": "AddedToken",
40
+ "content": "<pad>",
41
+ "lstrip": false,
42
+ "normalized": true,
43
+ "rstrip": false,
44
+ "single_word": false
45
+ },
46
+ "sep_token": {
47
+ "__type": "AddedToken",
48
+ "content": "</s>",
49
+ "lstrip": false,
50
+ "normalized": true,
51
+ "rstrip": false,
52
+ "single_word": false
53
+ },
54
+ "special_tokens_map_file": "/export/home/cache/model/5941df5e4315c5ab63b7b2ac791fb0bf0f209744a055c06b43b5274849137cdd.b9905d0575bde443a20834122b6e2d48e853b2e36444ce98ddeb43c38097eb3f",
55
+ "tokenizer_class": "RobertaTokenizer",
56
+ "trim_offsets": true,
57
+ "unk_token": {
58
+ "__type": "AddedToken",
59
+ "content": "<unk>",
60
+ "lstrip": false,
61
+ "normalized": true,
62
+ "rstrip": false,
63
+ "single_word": false
64
+ }
65
+ }
ranker_bs256_0/checkpoint-600/trainer_state.json ADDED
@@ -0,0 +1,775 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.092752727026792,
5
+ "global_step": 600,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.0,
12
+ "learning_rate": 9.999226963512678e-06,
13
+ "loss": 1.1361,
14
+ "step": 5
15
+ },
16
+ {
17
+ "epoch": 0.0,
18
+ "learning_rate": 9.998453927025357e-06,
19
+ "loss": 0.7353,
20
+ "step": 10
21
+ },
22
+ {
23
+ "epoch": 0.0,
24
+ "learning_rate": 9.997680890538034e-06,
25
+ "loss": 0.6909,
26
+ "step": 15
27
+ },
28
+ {
29
+ "epoch": 0.0,
30
+ "learning_rate": 9.996907854050712e-06,
31
+ "loss": 0.6498,
32
+ "step": 20
33
+ },
34
+ {
35
+ "epoch": 0.0,
36
+ "learning_rate": 9.99613481756339e-06,
37
+ "loss": 0.6414,
38
+ "step": 25
39
+ },
40
+ {
41
+ "epoch": 0.0,
42
+ "learning_rate": 9.995361781076068e-06,
43
+ "loss": 0.6415,
44
+ "step": 30
45
+ },
46
+ {
47
+ "epoch": 0.01,
48
+ "learning_rate": 9.994588744588745e-06,
49
+ "loss": 0.6317,
50
+ "step": 35
51
+ },
52
+ {
53
+ "epoch": 0.01,
54
+ "learning_rate": 9.993815708101423e-06,
55
+ "loss": 0.6378,
56
+ "step": 40
57
+ },
58
+ {
59
+ "epoch": 0.01,
60
+ "learning_rate": 9.9930426716141e-06,
61
+ "loss": 0.6347,
62
+ "step": 45
63
+ },
64
+ {
65
+ "epoch": 0.01,
66
+ "learning_rate": 9.992269635126779e-06,
67
+ "loss": 0.5924,
68
+ "step": 50
69
+ },
70
+ {
71
+ "epoch": 0.01,
72
+ "learning_rate": 9.991496598639456e-06,
73
+ "loss": 0.6046,
74
+ "step": 55
75
+ },
76
+ {
77
+ "epoch": 0.01,
78
+ "learning_rate": 9.990723562152135e-06,
79
+ "loss": 0.6045,
80
+ "step": 60
81
+ },
82
+ {
83
+ "epoch": 0.01,
84
+ "learning_rate": 9.989950525664813e-06,
85
+ "loss": 0.6,
86
+ "step": 65
87
+ },
88
+ {
89
+ "epoch": 0.01,
90
+ "learning_rate": 9.98917748917749e-06,
91
+ "loss": 0.5504,
92
+ "step": 70
93
+ },
94
+ {
95
+ "epoch": 0.01,
96
+ "learning_rate": 9.988404452690169e-06,
97
+ "loss": 0.5747,
98
+ "step": 75
99
+ },
100
+ {
101
+ "epoch": 0.01,
102
+ "learning_rate": 9.987631416202846e-06,
103
+ "loss": 0.5526,
104
+ "step": 80
105
+ },
106
+ {
107
+ "epoch": 0.01,
108
+ "learning_rate": 9.986858379715523e-06,
109
+ "loss": 0.5958,
110
+ "step": 85
111
+ },
112
+ {
113
+ "epoch": 0.01,
114
+ "learning_rate": 9.9860853432282e-06,
115
+ "loss": 0.608,
116
+ "step": 90
117
+ },
118
+ {
119
+ "epoch": 0.01,
120
+ "learning_rate": 9.985312306740878e-06,
121
+ "loss": 0.5988,
122
+ "step": 95
123
+ },
124
+ {
125
+ "epoch": 0.02,
126
+ "learning_rate": 9.984539270253557e-06,
127
+ "loss": 0.5861,
128
+ "step": 100
129
+ },
130
+ {
131
+ "epoch": 0.02,
132
+ "learning_rate": 9.983766233766234e-06,
133
+ "loss": 0.5749,
134
+ "step": 105
135
+ },
136
+ {
137
+ "epoch": 0.02,
138
+ "learning_rate": 9.982993197278913e-06,
139
+ "loss": 0.5498,
140
+ "step": 110
141
+ },
142
+ {
143
+ "epoch": 0.02,
144
+ "learning_rate": 9.98222016079159e-06,
145
+ "loss": 0.5841,
146
+ "step": 115
147
+ },
148
+ {
149
+ "epoch": 0.02,
150
+ "learning_rate": 9.981447124304268e-06,
151
+ "loss": 0.5973,
152
+ "step": 120
153
+ },
154
+ {
155
+ "epoch": 0.02,
156
+ "learning_rate": 9.980674087816947e-06,
157
+ "loss": 0.5954,
158
+ "step": 125
159
+ },
160
+ {
161
+ "epoch": 0.02,
162
+ "learning_rate": 9.979901051329624e-06,
163
+ "loss": 0.527,
164
+ "step": 130
165
+ },
166
+ {
167
+ "epoch": 0.02,
168
+ "learning_rate": 9.979128014842301e-06,
169
+ "loss": 0.5321,
170
+ "step": 135
171
+ },
172
+ {
173
+ "epoch": 0.02,
174
+ "learning_rate": 9.978354978354979e-06,
175
+ "loss": 0.5781,
176
+ "step": 140
177
+ },
178
+ {
179
+ "epoch": 0.02,
180
+ "learning_rate": 9.977581941867656e-06,
181
+ "loss": 0.5119,
182
+ "step": 145
183
+ },
184
+ {
185
+ "epoch": 0.02,
186
+ "learning_rate": 9.976808905380335e-06,
187
+ "loss": 0.5271,
188
+ "step": 150
189
+ },
190
+ {
191
+ "epoch": 0.02,
192
+ "learning_rate": 9.976035868893012e-06,
193
+ "loss": 0.5814,
194
+ "step": 155
195
+ },
196
+ {
197
+ "epoch": 0.02,
198
+ "learning_rate": 9.97526283240569e-06,
199
+ "loss": 0.518,
200
+ "step": 160
201
+ },
202
+ {
203
+ "epoch": 0.03,
204
+ "learning_rate": 9.974489795918369e-06,
205
+ "loss": 0.5335,
206
+ "step": 165
207
+ },
208
+ {
209
+ "epoch": 0.03,
210
+ "learning_rate": 9.973716759431046e-06,
211
+ "loss": 0.5062,
212
+ "step": 170
213
+ },
214
+ {
215
+ "epoch": 0.03,
216
+ "learning_rate": 9.972943722943725e-06,
217
+ "loss": 0.5253,
218
+ "step": 175
219
+ },
220
+ {
221
+ "epoch": 0.03,
222
+ "learning_rate": 9.972170686456402e-06,
223
+ "loss": 0.5856,
224
+ "step": 180
225
+ },
226
+ {
227
+ "epoch": 0.03,
228
+ "learning_rate": 9.97139764996908e-06,
229
+ "loss": 0.5196,
230
+ "step": 185
231
+ },
232
+ {
233
+ "epoch": 0.03,
234
+ "learning_rate": 9.970624613481757e-06,
235
+ "loss": 0.4764,
236
+ "step": 190
237
+ },
238
+ {
239
+ "epoch": 0.03,
240
+ "learning_rate": 9.969851576994434e-06,
241
+ "loss": 0.5254,
242
+ "step": 195
243
+ },
244
+ {
245
+ "epoch": 0.03,
246
+ "learning_rate": 9.969078540507111e-06,
247
+ "loss": 0.5442,
248
+ "step": 200
249
+ },
250
+ {
251
+ "epoch": 0.03,
252
+ "eval_accuracy": 0.5804400673190799,
253
+ "eval_accuracy_sklearn": 0.5804400673190799,
254
+ "eval_f1": 0.5294915349019279,
255
+ "eval_loss": 0.7918509840965271,
256
+ "eval_precision": 0.6370946036872561,
257
+ "eval_recall": 0.45298409281186464,
258
+ "eval_runtime": 4914.2737,
259
+ "eval_samples_per_second": 16.323,
260
+ "eval_steps_per_second": 2.04,
261
+ "step": 200
262
+ },
263
+ {
264
+ "epoch": 0.03,
265
+ "learning_rate": 9.96830550401979e-06,
266
+ "loss": 0.5163,
267
+ "step": 205
268
+ },
269
+ {
270
+ "epoch": 0.03,
271
+ "learning_rate": 9.967532467532468e-06,
272
+ "loss": 0.5044,
273
+ "step": 210
274
+ },
275
+ {
276
+ "epoch": 0.03,
277
+ "learning_rate": 9.966759431045147e-06,
278
+ "loss": 0.5078,
279
+ "step": 215
280
+ },
281
+ {
282
+ "epoch": 0.03,
283
+ "learning_rate": 9.965986394557824e-06,
284
+ "loss": 0.4623,
285
+ "step": 220
286
+ },
287
+ {
288
+ "epoch": 0.03,
289
+ "learning_rate": 9.965213358070501e-06,
290
+ "loss": 0.5359,
291
+ "step": 225
292
+ },
293
+ {
294
+ "epoch": 0.04,
295
+ "learning_rate": 9.96444032158318e-06,
296
+ "loss": 0.5068,
297
+ "step": 230
298
+ },
299
+ {
300
+ "epoch": 0.04,
301
+ "learning_rate": 9.963667285095858e-06,
302
+ "loss": 0.5029,
303
+ "step": 235
304
+ },
305
+ {
306
+ "epoch": 0.04,
307
+ "learning_rate": 9.962894248608535e-06,
308
+ "loss": 0.5084,
309
+ "step": 240
310
+ },
311
+ {
312
+ "epoch": 0.04,
313
+ "learning_rate": 9.962121212121212e-06,
314
+ "loss": 0.4783,
315
+ "step": 245
316
+ },
317
+ {
318
+ "epoch": 0.04,
319
+ "learning_rate": 9.96134817563389e-06,
320
+ "loss": 0.5216,
321
+ "step": 250
322
+ },
323
+ {
324
+ "epoch": 0.04,
325
+ "learning_rate": 9.960575139146569e-06,
326
+ "loss": 0.54,
327
+ "step": 255
328
+ },
329
+ {
330
+ "epoch": 0.04,
331
+ "learning_rate": 9.959802102659246e-06,
332
+ "loss": 0.5494,
333
+ "step": 260
334
+ },
335
+ {
336
+ "epoch": 0.04,
337
+ "learning_rate": 9.959029066171925e-06,
338
+ "loss": 0.5401,
339
+ "step": 265
340
+ },
341
+ {
342
+ "epoch": 0.04,
343
+ "learning_rate": 9.958256029684602e-06,
344
+ "loss": 0.5073,
345
+ "step": 270
346
+ },
347
+ {
348
+ "epoch": 0.04,
349
+ "learning_rate": 9.95748299319728e-06,
350
+ "loss": 0.4598,
351
+ "step": 275
352
+ },
353
+ {
354
+ "epoch": 0.04,
355
+ "learning_rate": 9.956709956709958e-06,
356
+ "loss": 0.4913,
357
+ "step": 280
358
+ },
359
+ {
360
+ "epoch": 0.04,
361
+ "learning_rate": 9.955936920222636e-06,
362
+ "loss": 0.4947,
363
+ "step": 285
364
+ },
365
+ {
366
+ "epoch": 0.04,
367
+ "learning_rate": 9.955163883735313e-06,
368
+ "loss": 0.4806,
369
+ "step": 290
370
+ },
371
+ {
372
+ "epoch": 0.05,
373
+ "learning_rate": 9.95439084724799e-06,
374
+ "loss": 0.4659,
375
+ "step": 295
376
+ },
377
+ {
378
+ "epoch": 0.05,
379
+ "learning_rate": 9.953617810760668e-06,
380
+ "loss": 0.4555,
381
+ "step": 300
382
+ },
383
+ {
384
+ "epoch": 0.05,
385
+ "learning_rate": 9.952844774273347e-06,
386
+ "loss": 0.4606,
387
+ "step": 305
388
+ },
389
+ {
390
+ "epoch": 0.05,
391
+ "learning_rate": 9.952071737786024e-06,
392
+ "loss": 0.4905,
393
+ "step": 310
394
+ },
395
+ {
396
+ "epoch": 0.05,
397
+ "learning_rate": 9.951298701298701e-06,
398
+ "loss": 0.4423,
399
+ "step": 315
400
+ },
401
+ {
402
+ "epoch": 0.05,
403
+ "learning_rate": 9.95052566481138e-06,
404
+ "loss": 0.4855,
405
+ "step": 320
406
+ },
407
+ {
408
+ "epoch": 0.05,
409
+ "learning_rate": 9.949752628324058e-06,
410
+ "loss": 0.486,
411
+ "step": 325
412
+ },
413
+ {
414
+ "epoch": 0.05,
415
+ "learning_rate": 9.948979591836737e-06,
416
+ "loss": 0.4774,
417
+ "step": 330
418
+ },
419
+ {
420
+ "epoch": 0.05,
421
+ "learning_rate": 9.948206555349414e-06,
422
+ "loss": 0.4909,
423
+ "step": 335
424
+ },
425
+ {
426
+ "epoch": 0.05,
427
+ "learning_rate": 9.947433518862091e-06,
428
+ "loss": 0.47,
429
+ "step": 340
430
+ },
431
+ {
432
+ "epoch": 0.05,
433
+ "learning_rate": 9.946660482374768e-06,
434
+ "loss": 0.4496,
435
+ "step": 345
436
+ },
437
+ {
438
+ "epoch": 0.05,
439
+ "learning_rate": 9.945887445887446e-06,
440
+ "loss": 0.5146,
441
+ "step": 350
442
+ },
443
+ {
444
+ "epoch": 0.05,
445
+ "learning_rate": 9.945114409400125e-06,
446
+ "loss": 0.4876,
447
+ "step": 355
448
+ },
449
+ {
450
+ "epoch": 0.06,
451
+ "learning_rate": 9.944341372912802e-06,
452
+ "loss": 0.4747,
453
+ "step": 360
454
+ },
455
+ {
456
+ "epoch": 0.06,
457
+ "learning_rate": 9.94356833642548e-06,
458
+ "loss": 0.4614,
459
+ "step": 365
460
+ },
461
+ {
462
+ "epoch": 0.06,
463
+ "learning_rate": 9.942795299938158e-06,
464
+ "loss": 0.4755,
465
+ "step": 370
466
+ },
467
+ {
468
+ "epoch": 0.06,
469
+ "learning_rate": 9.942022263450836e-06,
470
+ "loss": 0.4785,
471
+ "step": 375
472
+ },
473
+ {
474
+ "epoch": 0.06,
475
+ "learning_rate": 9.941249226963513e-06,
476
+ "loss": 0.4581,
477
+ "step": 380
478
+ },
479
+ {
480
+ "epoch": 0.06,
481
+ "learning_rate": 9.940476190476192e-06,
482
+ "loss": 0.4671,
483
+ "step": 385
484
+ },
485
+ {
486
+ "epoch": 0.06,
487
+ "learning_rate": 9.93970315398887e-06,
488
+ "loss": 0.4327,
489
+ "step": 390
490
+ },
491
+ {
492
+ "epoch": 0.06,
493
+ "learning_rate": 9.938930117501547e-06,
494
+ "loss": 0.4906,
495
+ "step": 395
496
+ },
497
+ {
498
+ "epoch": 0.06,
499
+ "learning_rate": 9.938157081014226e-06,
500
+ "loss": 0.5006,
501
+ "step": 400
502
+ },
503
+ {
504
+ "epoch": 0.06,
505
+ "eval_accuracy": 0.5519790562862308,
506
+ "eval_accuracy_sklearn": 0.5519790562862308,
507
+ "eval_f1": 0.39240549130993435,
508
+ "eval_loss": 0.969095766544342,
509
+ "eval_precision": 0.6691460531626593,
510
+ "eval_recall": 0.27759837340031096,
511
+ "eval_runtime": 4903.2377,
512
+ "eval_samples_per_second": 16.36,
513
+ "eval_steps_per_second": 2.045,
514
+ "step": 400
515
+ },
516
+ {
517
+ "epoch": 0.06,
518
+ "learning_rate": 9.937384044526903e-06,
519
+ "loss": 0.4531,
520
+ "step": 405
521
+ },
522
+ {
523
+ "epoch": 0.06,
524
+ "learning_rate": 9.93661100803958e-06,
525
+ "loss": 0.4467,
526
+ "step": 410
527
+ },
528
+ {
529
+ "epoch": 0.06,
530
+ "learning_rate": 9.935837971552257e-06,
531
+ "loss": 0.4753,
532
+ "step": 415
533
+ },
534
+ {
535
+ "epoch": 0.06,
536
+ "learning_rate": 9.935064935064936e-06,
537
+ "loss": 0.428,
538
+ "step": 420
539
+ },
540
+ {
541
+ "epoch": 0.07,
542
+ "learning_rate": 9.934291898577614e-06,
543
+ "loss": 0.4418,
544
+ "step": 425
545
+ },
546
+ {
547
+ "epoch": 0.07,
548
+ "learning_rate": 9.933518862090291e-06,
549
+ "loss": 0.5087,
550
+ "step": 430
551
+ },
552
+ {
553
+ "epoch": 0.07,
554
+ "learning_rate": 9.93274582560297e-06,
555
+ "loss": 0.4775,
556
+ "step": 435
557
+ },
558
+ {
559
+ "epoch": 0.07,
560
+ "learning_rate": 9.931972789115647e-06,
561
+ "loss": 0.4923,
562
+ "step": 440
563
+ },
564
+ {
565
+ "epoch": 0.07,
566
+ "learning_rate": 9.931199752628325e-06,
567
+ "loss": 0.4721,
568
+ "step": 445
569
+ },
570
+ {
571
+ "epoch": 0.07,
572
+ "learning_rate": 9.930426716141004e-06,
573
+ "loss": 0.4718,
574
+ "step": 450
575
+ },
576
+ {
577
+ "epoch": 0.07,
578
+ "learning_rate": 9.929653679653681e-06,
579
+ "loss": 0.4392,
580
+ "step": 455
581
+ },
582
+ {
583
+ "epoch": 0.07,
584
+ "learning_rate": 9.928880643166358e-06,
585
+ "loss": 0.4315,
586
+ "step": 460
587
+ },
588
+ {
589
+ "epoch": 0.07,
590
+ "learning_rate": 9.928107606679036e-06,
591
+ "loss": 0.4641,
592
+ "step": 465
593
+ },
594
+ {
595
+ "epoch": 0.07,
596
+ "learning_rate": 9.927334570191713e-06,
597
+ "loss": 0.4417,
598
+ "step": 470
599
+ },
600
+ {
601
+ "epoch": 0.07,
602
+ "learning_rate": 9.926561533704392e-06,
603
+ "loss": 0.454,
604
+ "step": 475
605
+ },
606
+ {
607
+ "epoch": 0.07,
608
+ "learning_rate": 9.925788497217069e-06,
609
+ "loss": 0.4414,
610
+ "step": 480
611
+ },
612
+ {
613
+ "epoch": 0.07,
614
+ "learning_rate": 9.925015460729748e-06,
615
+ "loss": 0.4802,
616
+ "step": 485
617
+ },
618
+ {
619
+ "epoch": 0.08,
620
+ "learning_rate": 9.924242424242425e-06,
621
+ "loss": 0.4262,
622
+ "step": 490
623
+ },
624
+ {
625
+ "epoch": 0.08,
626
+ "learning_rate": 9.923469387755103e-06,
627
+ "loss": 0.4543,
628
+ "step": 495
629
+ },
630
+ {
631
+ "epoch": 0.08,
632
+ "learning_rate": 9.922696351267782e-06,
633
+ "loss": 0.407,
634
+ "step": 500
635
+ },
636
+ {
637
+ "epoch": 0.08,
638
+ "learning_rate": 9.921923314780459e-06,
639
+ "loss": 0.4328,
640
+ "step": 505
641
+ },
642
+ {
643
+ "epoch": 0.08,
644
+ "learning_rate": 9.921150278293136e-06,
645
+ "loss": 0.4589,
646
+ "step": 510
647
+ },
648
+ {
649
+ "epoch": 0.08,
650
+ "learning_rate": 9.920377241805814e-06,
651
+ "loss": 0.4588,
652
+ "step": 515
653
+ },
654
+ {
655
+ "epoch": 0.08,
656
+ "learning_rate": 9.919604205318491e-06,
657
+ "loss": 0.4273,
658
+ "step": 520
659
+ },
660
+ {
661
+ "epoch": 0.08,
662
+ "learning_rate": 9.91883116883117e-06,
663
+ "loss": 0.4688,
664
+ "step": 525
665
+ },
666
+ {
667
+ "epoch": 0.08,
668
+ "learning_rate": 9.918058132343847e-06,
669
+ "loss": 0.4324,
670
+ "step": 530
671
+ },
672
+ {
673
+ "epoch": 0.08,
674
+ "learning_rate": 9.917285095856525e-06,
675
+ "loss": 0.4846,
676
+ "step": 535
677
+ },
678
+ {
679
+ "epoch": 0.08,
680
+ "learning_rate": 9.916512059369204e-06,
681
+ "loss": 0.4366,
682
+ "step": 540
683
+ },
684
+ {
685
+ "epoch": 0.08,
686
+ "learning_rate": 9.91573902288188e-06,
687
+ "loss": 0.442,
688
+ "step": 545
689
+ },
690
+ {
691
+ "epoch": 0.09,
692
+ "learning_rate": 9.91496598639456e-06,
693
+ "loss": 0.4241,
694
+ "step": 550
695
+ },
696
+ {
697
+ "epoch": 0.09,
698
+ "learning_rate": 9.914192949907237e-06,
699
+ "loss": 0.4574,
700
+ "step": 555
701
+ },
702
+ {
703
+ "epoch": 0.09,
704
+ "learning_rate": 9.913419913419914e-06,
705
+ "loss": 0.4173,
706
+ "step": 560
707
+ },
708
+ {
709
+ "epoch": 0.09,
710
+ "learning_rate": 9.912646876932592e-06,
711
+ "loss": 0.4202,
712
+ "step": 565
713
+ },
714
+ {
715
+ "epoch": 0.09,
716
+ "learning_rate": 9.911873840445269e-06,
717
+ "loss": 0.4694,
718
+ "step": 570
719
+ },
720
+ {
721
+ "epoch": 0.09,
722
+ "learning_rate": 9.911100803957948e-06,
723
+ "loss": 0.4595,
724
+ "step": 575
725
+ },
726
+ {
727
+ "epoch": 0.09,
728
+ "learning_rate": 9.910327767470625e-06,
729
+ "loss": 0.464,
730
+ "step": 580
731
+ },
732
+ {
733
+ "epoch": 0.09,
734
+ "learning_rate": 9.909554730983303e-06,
735
+ "loss": 0.4688,
736
+ "step": 585
737
+ },
738
+ {
739
+ "epoch": 0.09,
740
+ "learning_rate": 9.908781694495982e-06,
741
+ "loss": 0.4932,
742
+ "step": 590
743
+ },
744
+ {
745
+ "epoch": 0.09,
746
+ "learning_rate": 9.908008658008659e-06,
747
+ "loss": 0.4648,
748
+ "step": 595
749
+ },
750
+ {
751
+ "epoch": 0.09,
752
+ "learning_rate": 9.907235621521336e-06,
753
+ "loss": 0.5136,
754
+ "step": 600
755
+ },
756
+ {
757
+ "epoch": 0.09,
758
+ "eval_accuracy": 0.595349996883376,
759
+ "eval_accuracy_sklearn": 0.595349996883376,
760
+ "eval_f1": 0.6681389238209163,
761
+ "eval_loss": 0.8079590201377869,
762
+ "eval_precision": 0.5834404685379616,
763
+ "eval_recall": 0.7816050711637363,
764
+ "eval_runtime": 4915.3389,
765
+ "eval_samples_per_second": 16.319,
766
+ "eval_steps_per_second": 2.04,
767
+ "step": 600
768
+ }
769
+ ],
770
+ "max_steps": 64680,
771
+ "num_train_epochs": 10,
772
+ "total_flos": 7.12606751391744e+16,
773
+ "trial_name": null,
774
+ "trial_params": null
775
+ }
ranker_bs256_0/checkpoint-600/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a89904cc492981aada74646ac2bddbb0a7e40cc9d86615845667f8c5c4c4e4f
3
+ size 3451
ranker_bs256_0/checkpoint-600/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
ranker_bs256_0/checkpoint-800/config.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "Salesforce/codet5-large",
3
+ "architectures": [
4
+ "T5EncoderForSequenceClassification"
5
+ ],
6
+ "bos_token_id": 1,
7
+ "d_ff": 4096,
8
+ "d_kv": 64,
9
+ "d_model": 1024,
10
+ "decoder_start_token_id": 0,
11
+ "dense_act_fn": "relu",
12
+ "dropout_rate": 0.1,
13
+ "eos_token_id": 2,
14
+ "feed_forward_proj": "relu",
15
+ "initializer_factor": 1.0,
16
+ "is_encoder_decoder": true,
17
+ "is_gated_act": false,
18
+ "layer_norm_epsilon": 1e-06,
19
+ "model_type": "t5",
20
+ "n_positions": 512,
21
+ "num_decoder_layers": 24,
22
+ "num_heads": 16,
23
+ "num_layers": 24,
24
+ "output_past": true,
25
+ "pad_token_id": 0,
26
+ "problem_type": "single_label_classification",
27
+ "relative_attention_max_distance": 128,
28
+ "relative_attention_num_buckets": 32,
29
+ "task_specific_params": {
30
+ "summarization": {
31
+ "early_stopping": true,
32
+ "length_penalty": 2.0,
33
+ "max_length": 200,
34
+ "min_length": 30,
35
+ "no_repeat_ngram_size": 3,
36
+ "num_beams": 4,
37
+ "prefix": "summarize: "
38
+ },
39
+ "translation_en_to_de": {
40
+ "early_stopping": true,
41
+ "max_length": 300,
42
+ "num_beams": 4,
43
+ "prefix": "translate English to German: "
44
+ },
45
+ "translation_en_to_fr": {
46
+ "early_stopping": true,
47
+ "max_length": 300,
48
+ "num_beams": 4,
49
+ "prefix": "translate English to French: "
50
+ },
51
+ "translation_en_to_ro": {
52
+ "early_stopping": true,
53
+ "max_length": 300,
54
+ "num_beams": 4,
55
+ "prefix": "translate English to Romanian: "
56
+ }
57
+ },
58
+ "torch_dtype": "float32",
59
+ "transformers_version": "4.26.0",
60
+ "use_cache": true,
61
+ "vocab_size": 32100
62
+ }
ranker_bs256_0/checkpoint-800/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
ranker_bs256_0/checkpoint-800/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4889b1708f36ab7ea71c2a1a8e5804cda9f0ef6bc6d1006b4274f1503eb0b0b
3
+ size 2679420869
ranker_bs256_0/checkpoint-800/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39aec49e1b4f305a382ea614b918f9cf2fd13b503ad84e0514381a12b5808a76
3
+ size 1339719649
ranker_bs256_0/checkpoint-800/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f503f3bee295a41c7131b6550fa2182e35170e80e37255ed697d751d4e10063
3
+ size 14575
ranker_bs256_0/checkpoint-800/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c62550af7390dac59bfebbb462dfdd5ba5115ecb1662dc1efc8437c45b1a461
3
+ size 627
ranker_bs256_0/checkpoint-800/special_tokens_map.json ADDED
@@ -0,0 +1,753 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "<extra_id_99>",
5
+ "lstrip": true,
6
+ "normalized": true,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "<extra_id_98>",
12
+ "lstrip": true,
13
+ "normalized": true,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ },
17
+ {
18
+ "content": "<extra_id_97>",
19
+ "lstrip": true,
20
+ "normalized": true,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ {
25
+ "content": "<extra_id_96>",
26
+ "lstrip": true,
27
+ "normalized": true,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ },
31
+ {
32
+ "content": "<extra_id_95>",
33
+ "lstrip": true,
34
+ "normalized": true,
35
+ "rstrip": false,
36
+ "single_word": false
37
+ },
38
+ {
39
+ "content": "<extra_id_94>",
40
+ "lstrip": true,
41
+ "normalized": true,
42
+ "rstrip": false,
43
+ "single_word": false
44
+ },
45
+ {
46
+ "content": "<extra_id_93>",
47
+ "lstrip": true,
48
+ "normalized": true,
49
+ "rstrip": false,
50
+ "single_word": false
51
+ },
52
+ {
53
+ "content": "<extra_id_92>",
54
+ "lstrip": true,
55
+ "normalized": true,
56
+ "rstrip": false,
57
+ "single_word": false
58
+ },
59
+ {
60
+ "content": "<extra_id_91>",
61
+ "lstrip": true,
62
+ "normalized": true,
63
+ "rstrip": false,
64
+ "single_word": false
65
+ },
66
+ {
67
+ "content": "<extra_id_90>",
68
+ "lstrip": true,
69
+ "normalized": true,
70
+ "rstrip": false,
71
+ "single_word": false
72
+ },
73
+ {
74
+ "content": "<extra_id_89>",
75
+ "lstrip": true,
76
+ "normalized": true,
77
+ "rstrip": false,
78
+ "single_word": false
79
+ },
80
+ {
81
+ "content": "<extra_id_88>",
82
+ "lstrip": true,
83
+ "normalized": true,
84
+ "rstrip": false,
85
+ "single_word": false
86
+ },
87
+ {
88
+ "content": "<extra_id_87>",
89
+ "lstrip": true,
90
+ "normalized": true,
91
+ "rstrip": false,
92
+ "single_word": false
93
+ },
94
+ {
95
+ "content": "<extra_id_86>",
96
+ "lstrip": true,
97
+ "normalized": true,
98
+ "rstrip": false,
99
+ "single_word": false
100
+ },
101
+ {
102
+ "content": "<extra_id_85>",
103
+ "lstrip": true,
104
+ "normalized": true,
105
+ "rstrip": false,
106
+ "single_word": false
107
+ },
108
+ {
109
+ "content": "<extra_id_84>",
110
+ "lstrip": true,
111
+ "normalized": true,
112
+ "rstrip": false,
113
+ "single_word": false
114
+ },
115
+ {
116
+ "content": "<extra_id_83>",
117
+ "lstrip": true,
118
+ "normalized": true,
119
+ "rstrip": false,
120
+ "single_word": false
121
+ },
122
+ {
123
+ "content": "<extra_id_82>",
124
+ "lstrip": true,
125
+ "normalized": true,
126
+ "rstrip": false,
127
+ "single_word": false
128
+ },
129
+ {
130
+ "content": "<extra_id_81>",
131
+ "lstrip": true,
132
+ "normalized": true,
133
+ "rstrip": false,
134
+ "single_word": false
135
+ },
136
+ {
137
+ "content": "<extra_id_80>",
138
+ "lstrip": true,
139
+ "normalized": true,
140
+ "rstrip": false,
141
+ "single_word": false
142
+ },
143
+ {
144
+ "content": "<extra_id_79>",
145
+ "lstrip": true,
146
+ "normalized": true,
147
+ "rstrip": false,
148
+ "single_word": false
149
+ },
150
+ {
151
+ "content": "<extra_id_78>",
152
+ "lstrip": true,
153
+ "normalized": true,
154
+ "rstrip": false,
155
+ "single_word": false
156
+ },
157
+ {
158
+ "content": "<extra_id_77>",
159
+ "lstrip": true,
160
+ "normalized": true,
161
+ "rstrip": false,
162
+ "single_word": false
163
+ },
164
+ {
165
+ "content": "<extra_id_76>",
166
+ "lstrip": true,
167
+ "normalized": true,
168
+ "rstrip": false,
169
+ "single_word": false
170
+ },
171
+ {
172
+ "content": "<extra_id_75>",
173
+ "lstrip": true,
174
+ "normalized": true,
175
+ "rstrip": false,
176
+ "single_word": false
177
+ },
178
+ {
179
+ "content": "<extra_id_74>",
180
+ "lstrip": true,
181
+ "normalized": true,
182
+ "rstrip": false,
183
+ "single_word": false
184
+ },
185
+ {
186
+ "content": "<extra_id_73>",
187
+ "lstrip": true,
188
+ "normalized": true,
189
+ "rstrip": false,
190
+ "single_word": false
191
+ },
192
+ {
193
+ "content": "<extra_id_72>",
194
+ "lstrip": true,
195
+ "normalized": true,
196
+ "rstrip": false,
197
+ "single_word": false
198
+ },
199
+ {
200
+ "content": "<extra_id_71>",
201
+ "lstrip": true,
202
+ "normalized": true,
203
+ "rstrip": false,
204
+ "single_word": false
205
+ },
206
+ {
207
+ "content": "<extra_id_70>",
208
+ "lstrip": true,
209
+ "normalized": true,
210
+ "rstrip": false,
211
+ "single_word": false
212
+ },
213
+ {
214
+ "content": "<extra_id_69>",
215
+ "lstrip": true,
216
+ "normalized": true,
217
+ "rstrip": false,
218
+ "single_word": false
219
+ },
220
+ {
221
+ "content": "<extra_id_68>",
222
+ "lstrip": true,
223
+ "normalized": true,
224
+ "rstrip": false,
225
+ "single_word": false
226
+ },
227
+ {
228
+ "content": "<extra_id_67>",
229
+ "lstrip": true,
230
+ "normalized": true,
231
+ "rstrip": false,
232
+ "single_word": false
233
+ },
234
+ {
235
+ "content": "<extra_id_66>",
236
+ "lstrip": true,
237
+ "normalized": true,
238
+ "rstrip": false,
239
+ "single_word": false
240
+ },
241
+ {
242
+ "content": "<extra_id_65>",
243
+ "lstrip": true,
244
+ "normalized": true,
245
+ "rstrip": false,
246
+ "single_word": false
247
+ },
248
+ {
249
+ "content": "<extra_id_64>",
250
+ "lstrip": true,
251
+ "normalized": true,
252
+ "rstrip": false,
253
+ "single_word": false
254
+ },
255
+ {
256
+ "content": "<extra_id_63>",
257
+ "lstrip": true,
258
+ "normalized": true,
259
+ "rstrip": false,
260
+ "single_word": false
261
+ },
262
+ {
263
+ "content": "<extra_id_62>",
264
+ "lstrip": true,
265
+ "normalized": true,
266
+ "rstrip": false,
267
+ "single_word": false
268
+ },
269
+ {
270
+ "content": "<extra_id_61>",
271
+ "lstrip": true,
272
+ "normalized": true,
273
+ "rstrip": false,
274
+ "single_word": false
275
+ },
276
+ {
277
+ "content": "<extra_id_60>",
278
+ "lstrip": true,
279
+ "normalized": true,
280
+ "rstrip": false,
281
+ "single_word": false
282
+ },
283
+ {
284
+ "content": "<extra_id_59>",
285
+ "lstrip": true,
286
+ "normalized": true,
287
+ "rstrip": false,
288
+ "single_word": false
289
+ },
290
+ {
291
+ "content": "<extra_id_58>",
292
+ "lstrip": true,
293
+ "normalized": true,
294
+ "rstrip": false,
295
+ "single_word": false
296
+ },
297
+ {
298
+ "content": "<extra_id_57>",
299
+ "lstrip": true,
300
+ "normalized": true,
301
+ "rstrip": false,
302
+ "single_word": false
303
+ },
304
+ {
305
+ "content": "<extra_id_56>",
306
+ "lstrip": true,
307
+ "normalized": true,
308
+ "rstrip": false,
309
+ "single_word": false
310
+ },
311
+ {
312
+ "content": "<extra_id_55>",
313
+ "lstrip": true,
314
+ "normalized": true,
315
+ "rstrip": false,
316
+ "single_word": false
317
+ },
318
+ {
319
+ "content": "<extra_id_54>",
320
+ "lstrip": true,
321
+ "normalized": true,
322
+ "rstrip": false,
323
+ "single_word": false
324
+ },
325
+ {
326
+ "content": "<extra_id_53>",
327
+ "lstrip": true,
328
+ "normalized": true,
329
+ "rstrip": false,
330
+ "single_word": false
331
+ },
332
+ {
333
+ "content": "<extra_id_52>",
334
+ "lstrip": true,
335
+ "normalized": true,
336
+ "rstrip": false,
337
+ "single_word": false
338
+ },
339
+ {
340
+ "content": "<extra_id_51>",
341
+ "lstrip": true,
342
+ "normalized": true,
343
+ "rstrip": false,
344
+ "single_word": false
345
+ },
346
+ {
347
+ "content": "<extra_id_50>",
348
+ "lstrip": true,
349
+ "normalized": true,
350
+ "rstrip": false,
351
+ "single_word": false
352
+ },
353
+ {
354
+ "content": "<extra_id_49>",
355
+ "lstrip": true,
356
+ "normalized": true,
357
+ "rstrip": false,
358
+ "single_word": false
359
+ },
360
+ {
361
+ "content": "<extra_id_48>",
362
+ "lstrip": true,
363
+ "normalized": true,
364
+ "rstrip": false,
365
+ "single_word": false
366
+ },
367
+ {
368
+ "content": "<extra_id_47>",
369
+ "lstrip": true,
370
+ "normalized": true,
371
+ "rstrip": false,
372
+ "single_word": false
373
+ },
374
+ {
375
+ "content": "<extra_id_46>",
376
+ "lstrip": true,
377
+ "normalized": true,
378
+ "rstrip": false,
379
+ "single_word": false
380
+ },
381
+ {
382
+ "content": "<extra_id_45>",
383
+ "lstrip": true,
384
+ "normalized": true,
385
+ "rstrip": false,
386
+ "single_word": false
387
+ },
388
+ {
389
+ "content": "<extra_id_44>",
390
+ "lstrip": true,
391
+ "normalized": true,
392
+ "rstrip": false,
393
+ "single_word": false
394
+ },
395
+ {
396
+ "content": "<extra_id_43>",
397
+ "lstrip": true,
398
+ "normalized": true,
399
+ "rstrip": false,
400
+ "single_word": false
401
+ },
402
+ {
403
+ "content": "<extra_id_42>",
404
+ "lstrip": true,
405
+ "normalized": true,
406
+ "rstrip": false,
407
+ "single_word": false
408
+ },
409
+ {
410
+ "content": "<extra_id_41>",
411
+ "lstrip": true,
412
+ "normalized": true,
413
+ "rstrip": false,
414
+ "single_word": false
415
+ },
416
+ {
417
+ "content": "<extra_id_40>",
418
+ "lstrip": true,
419
+ "normalized": true,
420
+ "rstrip": false,
421
+ "single_word": false
422
+ },
423
+ {
424
+ "content": "<extra_id_39>",
425
+ "lstrip": true,
426
+ "normalized": true,
427
+ "rstrip": false,
428
+ "single_word": false
429
+ },
430
+ {
431
+ "content": "<extra_id_38>",
432
+ "lstrip": true,
433
+ "normalized": true,
434
+ "rstrip": false,
435
+ "single_word": false
436
+ },
437
+ {
438
+ "content": "<extra_id_37>",
439
+ "lstrip": true,
440
+ "normalized": true,
441
+ "rstrip": false,
442
+ "single_word": false
443
+ },
444
+ {
445
+ "content": "<extra_id_36>",
446
+ "lstrip": true,
447
+ "normalized": true,
448
+ "rstrip": false,
449
+ "single_word": false
450
+ },
451
+ {
452
+ "content": "<extra_id_35>",
453
+ "lstrip": true,
454
+ "normalized": true,
455
+ "rstrip": false,
456
+ "single_word": false
457
+ },
458
+ {
459
+ "content": "<extra_id_34>",
460
+ "lstrip": true,
461
+ "normalized": true,
462
+ "rstrip": false,
463
+ "single_word": false
464
+ },
465
+ {
466
+ "content": "<extra_id_33>",
467
+ "lstrip": true,
468
+ "normalized": true,
469
+ "rstrip": false,
470
+ "single_word": false
471
+ },
472
+ {
473
+ "content": "<extra_id_32>",
474
+ "lstrip": true,
475
+ "normalized": true,
476
+ "rstrip": false,
477
+ "single_word": false
478
+ },
479
+ {
480
+ "content": "<extra_id_31>",
481
+ "lstrip": true,
482
+ "normalized": true,
483
+ "rstrip": false,
484
+ "single_word": false
485
+ },
486
+ {
487
+ "content": "<extra_id_30>",
488
+ "lstrip": true,
489
+ "normalized": true,
490
+ "rstrip": false,
491
+ "single_word": false
492
+ },
493
+ {
494
+ "content": "<extra_id_29>",
495
+ "lstrip": true,
496
+ "normalized": true,
497
+ "rstrip": false,
498
+ "single_word": false
499
+ },
500
+ {
501
+ "content": "<extra_id_28>",
502
+ "lstrip": true,
503
+ "normalized": true,
504
+ "rstrip": false,
505
+ "single_word": false
506
+ },
507
+ {
508
+ "content": "<extra_id_27>",
509
+ "lstrip": true,
510
+ "normalized": true,
511
+ "rstrip": false,
512
+ "single_word": false
513
+ },
514
+ {
515
+ "content": "<extra_id_26>",
516
+ "lstrip": true,
517
+ "normalized": true,
518
+ "rstrip": false,
519
+ "single_word": false
520
+ },
521
+ {
522
+ "content": "<extra_id_25>",
523
+ "lstrip": true,
524
+ "normalized": true,
525
+ "rstrip": false,
526
+ "single_word": false
527
+ },
528
+ {
529
+ "content": "<extra_id_24>",
530
+ "lstrip": true,
531
+ "normalized": true,
532
+ "rstrip": false,
533
+ "single_word": false
534
+ },
535
+ {
536
+ "content": "<extra_id_23>",
537
+ "lstrip": true,
538
+ "normalized": true,
539
+ "rstrip": false,
540
+ "single_word": false
541
+ },
542
+ {
543
+ "content": "<extra_id_22>",
544
+ "lstrip": true,
545
+ "normalized": true,
546
+ "rstrip": false,
547
+ "single_word": false
548
+ },
549
+ {
550
+ "content": "<extra_id_21>",
551
+ "lstrip": true,
552
+ "normalized": true,
553
+ "rstrip": false,
554
+ "single_word": false
555
+ },
556
+ {
557
+ "content": "<extra_id_20>",
558
+ "lstrip": true,
559
+ "normalized": true,
560
+ "rstrip": false,
561
+ "single_word": false
562
+ },
563
+ {
564
+ "content": "<extra_id_19>",
565
+ "lstrip": true,
566
+ "normalized": true,
567
+ "rstrip": false,
568
+ "single_word": false
569
+ },
570
+ {
571
+ "content": "<extra_id_18>",
572
+ "lstrip": true,
573
+ "normalized": true,
574
+ "rstrip": false,
575
+ "single_word": false
576
+ },
577
+ {
578
+ "content": "<extra_id_17>",
579
+ "lstrip": true,
580
+ "normalized": true,
581
+ "rstrip": false,
582
+ "single_word": false
583
+ },
584
+ {
585
+ "content": "<extra_id_16>",
586
+ "lstrip": true,
587
+ "normalized": true,
588
+ "rstrip": false,
589
+ "single_word": false
590
+ },
591
+ {
592
+ "content": "<extra_id_15>",
593
+ "lstrip": true,
594
+ "normalized": true,
595
+ "rstrip": false,
596
+ "single_word": false
597
+ },
598
+ {
599
+ "content": "<extra_id_14>",
600
+ "lstrip": true,
601
+ "normalized": true,
602
+ "rstrip": false,
603
+ "single_word": false
604
+ },
605
+ {
606
+ "content": "<extra_id_13>",
607
+ "lstrip": true,
608
+ "normalized": true,
609
+ "rstrip": false,
610
+ "single_word": false
611
+ },
612
+ {
613
+ "content": "<extra_id_12>",
614
+ "lstrip": true,
615
+ "normalized": true,
616
+ "rstrip": false,
617
+ "single_word": false
618
+ },
619
+ {
620
+ "content": "<extra_id_11>",
621
+ "lstrip": true,
622
+ "normalized": true,
623
+ "rstrip": false,
624
+ "single_word": false
625
+ },
626
+ {
627
+ "content": "<extra_id_10>",
628
+ "lstrip": true,
629
+ "normalized": true,
630
+ "rstrip": false,
631
+ "single_word": false
632
+ },
633
+ {
634
+ "content": "<extra_id_9>",
635
+ "lstrip": true,
636
+ "normalized": true,
637
+ "rstrip": false,
638
+ "single_word": false
639
+ },
640
+ {
641
+ "content": "<extra_id_8>",
642
+ "lstrip": true,
643
+ "normalized": true,
644
+ "rstrip": false,
645
+ "single_word": false
646
+ },
647
+ {
648
+ "content": "<extra_id_7>",
649
+ "lstrip": true,
650
+ "normalized": true,
651
+ "rstrip": false,
652
+ "single_word": false
653
+ },
654
+ {
655
+ "content": "<extra_id_6>",
656
+ "lstrip": true,
657
+ "normalized": true,
658
+ "rstrip": false,
659
+ "single_word": false
660
+ },
661
+ {
662
+ "content": "<extra_id_5>",
663
+ "lstrip": true,
664
+ "normalized": true,
665
+ "rstrip": false,
666
+ "single_word": false
667
+ },
668
+ {
669
+ "content": "<extra_id_4>",
670
+ "lstrip": true,
671
+ "normalized": true,
672
+ "rstrip": false,
673
+ "single_word": false
674
+ },
675
+ {
676
+ "content": "<extra_id_3>",
677
+ "lstrip": true,
678
+ "normalized": true,
679
+ "rstrip": false,
680
+ "single_word": false
681
+ },
682
+ {
683
+ "content": "<extra_id_2>",
684
+ "lstrip": true,
685
+ "normalized": true,
686
+ "rstrip": false,
687
+ "single_word": false
688
+ },
689
+ {
690
+ "content": "<extra_id_1>",
691
+ "lstrip": true,
692
+ "normalized": true,
693
+ "rstrip": false,
694
+ "single_word": false
695
+ },
696
+ {
697
+ "content": "<extra_id_0>",
698
+ "lstrip": true,
699
+ "normalized": true,
700
+ "rstrip": false,
701
+ "single_word": false
702
+ }
703
+ ],
704
+ "bos_token": {
705
+ "content": "<s>",
706
+ "lstrip": false,
707
+ "normalized": true,
708
+ "rstrip": false,
709
+ "single_word": false
710
+ },
711
+ "cls_token": {
712
+ "content": "<s>",
713
+ "lstrip": false,
714
+ "normalized": true,
715
+ "rstrip": false,
716
+ "single_word": false
717
+ },
718
+ "eos_token": {
719
+ "content": "</s>",
720
+ "lstrip": false,
721
+ "normalized": true,
722
+ "rstrip": false,
723
+ "single_word": false
724
+ },
725
+ "mask_token": {
726
+ "content": "<mask>",
727
+ "lstrip": true,
728
+ "normalized": true,
729
+ "rstrip": false,
730
+ "single_word": false
731
+ },
732
+ "pad_token": {
733
+ "content": "<pad>",
734
+ "lstrip": false,
735
+ "normalized": true,
736
+ "rstrip": false,
737
+ "single_word": false
738
+ },
739
+ "sep_token": {
740
+ "content": "</s>",
741
+ "lstrip": false,
742
+ "normalized": true,
743
+ "rstrip": false,
744
+ "single_word": false
745
+ },
746
+ "unk_token": {
747
+ "content": "<unk>",
748
+ "lstrip": false,
749
+ "normalized": true,
750
+ "rstrip": false,
751
+ "single_word": false
752
+ }
753
+ }
ranker_bs256_0/checkpoint-800/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
ranker_bs256_0/checkpoint-800/tokenizer_config.json ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": {
4
+ "__type": "AddedToken",
5
+ "content": "<s>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false
10
+ },
11
+ "cls_token": {
12
+ "__type": "AddedToken",
13
+ "content": "<s>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false
18
+ },
19
+ "eos_token": {
20
+ "__type": "AddedToken",
21
+ "content": "</s>",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false
26
+ },
27
+ "errors": "replace",
28
+ "mask_token": {
29
+ "__type": "AddedToken",
30
+ "content": "<mask>",
31
+ "lstrip": true,
32
+ "normalized": true,
33
+ "rstrip": false,
34
+ "single_word": false
35
+ },
36
+ "model_max_length": 512,
37
+ "name_or_path": "Salesforce/codet5-large",
38
+ "pad_token": {
39
+ "__type": "AddedToken",
40
+ "content": "<pad>",
41
+ "lstrip": false,
42
+ "normalized": true,
43
+ "rstrip": false,
44
+ "single_word": false
45
+ },
46
+ "sep_token": {
47
+ "__type": "AddedToken",
48
+ "content": "</s>",
49
+ "lstrip": false,
50
+ "normalized": true,
51
+ "rstrip": false,
52
+ "single_word": false
53
+ },
54
+ "special_tokens_map_file": "/export/home/cache/model/5941df5e4315c5ab63b7b2ac791fb0bf0f209744a055c06b43b5274849137cdd.b9905d0575bde443a20834122b6e2d48e853b2e36444ce98ddeb43c38097eb3f",
55
+ "tokenizer_class": "RobertaTokenizer",
56
+ "trim_offsets": true,
57
+ "unk_token": {
58
+ "__type": "AddedToken",
59
+ "content": "<unk>",
60
+ "lstrip": false,
61
+ "normalized": true,
62
+ "rstrip": false,
63
+ "single_word": false
64
+ }
65
+ }
ranker_bs256_0/checkpoint-800/trainer_state.json ADDED
@@ -0,0 +1,1028 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.12367030270238935,
5
+ "global_step": 800,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.0,
12
+ "learning_rate": 9.999226963512678e-06,
13
+ "loss": 1.1361,
14
+ "step": 5
15
+ },
16
+ {
17
+ "epoch": 0.0,
18
+ "learning_rate": 9.998453927025357e-06,
19
+ "loss": 0.7353,
20
+ "step": 10
21
+ },
22
+ {
23
+ "epoch": 0.0,
24
+ "learning_rate": 9.997680890538034e-06,
25
+ "loss": 0.6909,
26
+ "step": 15
27
+ },
28
+ {
29
+ "epoch": 0.0,
30
+ "learning_rate": 9.996907854050712e-06,
31
+ "loss": 0.6498,
32
+ "step": 20
33
+ },
34
+ {
35
+ "epoch": 0.0,
36
+ "learning_rate": 9.99613481756339e-06,
37
+ "loss": 0.6414,
38
+ "step": 25
39
+ },
40
+ {
41
+ "epoch": 0.0,
42
+ "learning_rate": 9.995361781076068e-06,
43
+ "loss": 0.6415,
44
+ "step": 30
45
+ },
46
+ {
47
+ "epoch": 0.01,
48
+ "learning_rate": 9.994588744588745e-06,
49
+ "loss": 0.6317,
50
+ "step": 35
51
+ },
52
+ {
53
+ "epoch": 0.01,
54
+ "learning_rate": 9.993815708101423e-06,
55
+ "loss": 0.6378,
56
+ "step": 40
57
+ },
58
+ {
59
+ "epoch": 0.01,
60
+ "learning_rate": 9.9930426716141e-06,
61
+ "loss": 0.6347,
62
+ "step": 45
63
+ },
64
+ {
65
+ "epoch": 0.01,
66
+ "learning_rate": 9.992269635126779e-06,
67
+ "loss": 0.5924,
68
+ "step": 50
69
+ },
70
+ {
71
+ "epoch": 0.01,
72
+ "learning_rate": 9.991496598639456e-06,
73
+ "loss": 0.6046,
74
+ "step": 55
75
+ },
76
+ {
77
+ "epoch": 0.01,
78
+ "learning_rate": 9.990723562152135e-06,
79
+ "loss": 0.6045,
80
+ "step": 60
81
+ },
82
+ {
83
+ "epoch": 0.01,
84
+ "learning_rate": 9.989950525664813e-06,
85
+ "loss": 0.6,
86
+ "step": 65
87
+ },
88
+ {
89
+ "epoch": 0.01,
90
+ "learning_rate": 9.98917748917749e-06,
91
+ "loss": 0.5504,
92
+ "step": 70
93
+ },
94
+ {
95
+ "epoch": 0.01,
96
+ "learning_rate": 9.988404452690169e-06,
97
+ "loss": 0.5747,
98
+ "step": 75
99
+ },
100
+ {
101
+ "epoch": 0.01,
102
+ "learning_rate": 9.987631416202846e-06,
103
+ "loss": 0.5526,
104
+ "step": 80
105
+ },
106
+ {
107
+ "epoch": 0.01,
108
+ "learning_rate": 9.986858379715523e-06,
109
+ "loss": 0.5958,
110
+ "step": 85
111
+ },
112
+ {
113
+ "epoch": 0.01,
114
+ "learning_rate": 9.9860853432282e-06,
115
+ "loss": 0.608,
116
+ "step": 90
117
+ },
118
+ {
119
+ "epoch": 0.01,
120
+ "learning_rate": 9.985312306740878e-06,
121
+ "loss": 0.5988,
122
+ "step": 95
123
+ },
124
+ {
125
+ "epoch": 0.02,
126
+ "learning_rate": 9.984539270253557e-06,
127
+ "loss": 0.5861,
128
+ "step": 100
129
+ },
130
+ {
131
+ "epoch": 0.02,
132
+ "learning_rate": 9.983766233766234e-06,
133
+ "loss": 0.5749,
134
+ "step": 105
135
+ },
136
+ {
137
+ "epoch": 0.02,
138
+ "learning_rate": 9.982993197278913e-06,
139
+ "loss": 0.5498,
140
+ "step": 110
141
+ },
142
+ {
143
+ "epoch": 0.02,
144
+ "learning_rate": 9.98222016079159e-06,
145
+ "loss": 0.5841,
146
+ "step": 115
147
+ },
148
+ {
149
+ "epoch": 0.02,
150
+ "learning_rate": 9.981447124304268e-06,
151
+ "loss": 0.5973,
152
+ "step": 120
153
+ },
154
+ {
155
+ "epoch": 0.02,
156
+ "learning_rate": 9.980674087816947e-06,
157
+ "loss": 0.5954,
158
+ "step": 125
159
+ },
160
+ {
161
+ "epoch": 0.02,
162
+ "learning_rate": 9.979901051329624e-06,
163
+ "loss": 0.527,
164
+ "step": 130
165
+ },
166
+ {
167
+ "epoch": 0.02,
168
+ "learning_rate": 9.979128014842301e-06,
169
+ "loss": 0.5321,
170
+ "step": 135
171
+ },
172
+ {
173
+ "epoch": 0.02,
174
+ "learning_rate": 9.978354978354979e-06,
175
+ "loss": 0.5781,
176
+ "step": 140
177
+ },
178
+ {
179
+ "epoch": 0.02,
180
+ "learning_rate": 9.977581941867656e-06,
181
+ "loss": 0.5119,
182
+ "step": 145
183
+ },
184
+ {
185
+ "epoch": 0.02,
186
+ "learning_rate": 9.976808905380335e-06,
187
+ "loss": 0.5271,
188
+ "step": 150
189
+ },
190
+ {
191
+ "epoch": 0.02,
192
+ "learning_rate": 9.976035868893012e-06,
193
+ "loss": 0.5814,
194
+ "step": 155
195
+ },
196
+ {
197
+ "epoch": 0.02,
198
+ "learning_rate": 9.97526283240569e-06,
199
+ "loss": 0.518,
200
+ "step": 160
201
+ },
202
+ {
203
+ "epoch": 0.03,
204
+ "learning_rate": 9.974489795918369e-06,
205
+ "loss": 0.5335,
206
+ "step": 165
207
+ },
208
+ {
209
+ "epoch": 0.03,
210
+ "learning_rate": 9.973716759431046e-06,
211
+ "loss": 0.5062,
212
+ "step": 170
213
+ },
214
+ {
215
+ "epoch": 0.03,
216
+ "learning_rate": 9.972943722943725e-06,
217
+ "loss": 0.5253,
218
+ "step": 175
219
+ },
220
+ {
221
+ "epoch": 0.03,
222
+ "learning_rate": 9.972170686456402e-06,
223
+ "loss": 0.5856,
224
+ "step": 180
225
+ },
226
+ {
227
+ "epoch": 0.03,
228
+ "learning_rate": 9.97139764996908e-06,
229
+ "loss": 0.5196,
230
+ "step": 185
231
+ },
232
+ {
233
+ "epoch": 0.03,
234
+ "learning_rate": 9.970624613481757e-06,
235
+ "loss": 0.4764,
236
+ "step": 190
237
+ },
238
+ {
239
+ "epoch": 0.03,
240
+ "learning_rate": 9.969851576994434e-06,
241
+ "loss": 0.5254,
242
+ "step": 195
243
+ },
244
+ {
245
+ "epoch": 0.03,
246
+ "learning_rate": 9.969078540507111e-06,
247
+ "loss": 0.5442,
248
+ "step": 200
249
+ },
250
+ {
251
+ "epoch": 0.03,
252
+ "eval_accuracy": 0.5804400673190799,
253
+ "eval_accuracy_sklearn": 0.5804400673190799,
254
+ "eval_f1": 0.5294915349019279,
255
+ "eval_loss": 0.7918509840965271,
256
+ "eval_precision": 0.6370946036872561,
257
+ "eval_recall": 0.45298409281186464,
258
+ "eval_runtime": 4914.2737,
259
+ "eval_samples_per_second": 16.323,
260
+ "eval_steps_per_second": 2.04,
261
+ "step": 200
262
+ },
263
+ {
264
+ "epoch": 0.03,
265
+ "learning_rate": 9.96830550401979e-06,
266
+ "loss": 0.5163,
267
+ "step": 205
268
+ },
269
+ {
270
+ "epoch": 0.03,
271
+ "learning_rate": 9.967532467532468e-06,
272
+ "loss": 0.5044,
273
+ "step": 210
274
+ },
275
+ {
276
+ "epoch": 0.03,
277
+ "learning_rate": 9.966759431045147e-06,
278
+ "loss": 0.5078,
279
+ "step": 215
280
+ },
281
+ {
282
+ "epoch": 0.03,
283
+ "learning_rate": 9.965986394557824e-06,
284
+ "loss": 0.4623,
285
+ "step": 220
286
+ },
287
+ {
288
+ "epoch": 0.03,
289
+ "learning_rate": 9.965213358070501e-06,
290
+ "loss": 0.5359,
291
+ "step": 225
292
+ },
293
+ {
294
+ "epoch": 0.04,
295
+ "learning_rate": 9.96444032158318e-06,
296
+ "loss": 0.5068,
297
+ "step": 230
298
+ },
299
+ {
300
+ "epoch": 0.04,
301
+ "learning_rate": 9.963667285095858e-06,
302
+ "loss": 0.5029,
303
+ "step": 235
304
+ },
305
+ {
306
+ "epoch": 0.04,
307
+ "learning_rate": 9.962894248608535e-06,
308
+ "loss": 0.5084,
309
+ "step": 240
310
+ },
311
+ {
312
+ "epoch": 0.04,
313
+ "learning_rate": 9.962121212121212e-06,
314
+ "loss": 0.4783,
315
+ "step": 245
316
+ },
317
+ {
318
+ "epoch": 0.04,
319
+ "learning_rate": 9.96134817563389e-06,
320
+ "loss": 0.5216,
321
+ "step": 250
322
+ },
323
+ {
324
+ "epoch": 0.04,
325
+ "learning_rate": 9.960575139146569e-06,
326
+ "loss": 0.54,
327
+ "step": 255
328
+ },
329
+ {
330
+ "epoch": 0.04,
331
+ "learning_rate": 9.959802102659246e-06,
332
+ "loss": 0.5494,
333
+ "step": 260
334
+ },
335
+ {
336
+ "epoch": 0.04,
337
+ "learning_rate": 9.959029066171925e-06,
338
+ "loss": 0.5401,
339
+ "step": 265
340
+ },
341
+ {
342
+ "epoch": 0.04,
343
+ "learning_rate": 9.958256029684602e-06,
344
+ "loss": 0.5073,
345
+ "step": 270
346
+ },
347
+ {
348
+ "epoch": 0.04,
349
+ "learning_rate": 9.95748299319728e-06,
350
+ "loss": 0.4598,
351
+ "step": 275
352
+ },
353
+ {
354
+ "epoch": 0.04,
355
+ "learning_rate": 9.956709956709958e-06,
356
+ "loss": 0.4913,
357
+ "step": 280
358
+ },
359
+ {
360
+ "epoch": 0.04,
361
+ "learning_rate": 9.955936920222636e-06,
362
+ "loss": 0.4947,
363
+ "step": 285
364
+ },
365
+ {
366
+ "epoch": 0.04,
367
+ "learning_rate": 9.955163883735313e-06,
368
+ "loss": 0.4806,
369
+ "step": 290
370
+ },
371
+ {
372
+ "epoch": 0.05,
373
+ "learning_rate": 9.95439084724799e-06,
374
+ "loss": 0.4659,
375
+ "step": 295
376
+ },
377
+ {
378
+ "epoch": 0.05,
379
+ "learning_rate": 9.953617810760668e-06,
380
+ "loss": 0.4555,
381
+ "step": 300
382
+ },
383
+ {
384
+ "epoch": 0.05,
385
+ "learning_rate": 9.952844774273347e-06,
386
+ "loss": 0.4606,
387
+ "step": 305
388
+ },
389
+ {
390
+ "epoch": 0.05,
391
+ "learning_rate": 9.952071737786024e-06,
392
+ "loss": 0.4905,
393
+ "step": 310
394
+ },
395
+ {
396
+ "epoch": 0.05,
397
+ "learning_rate": 9.951298701298701e-06,
398
+ "loss": 0.4423,
399
+ "step": 315
400
+ },
401
+ {
402
+ "epoch": 0.05,
403
+ "learning_rate": 9.95052566481138e-06,
404
+ "loss": 0.4855,
405
+ "step": 320
406
+ },
407
+ {
408
+ "epoch": 0.05,
409
+ "learning_rate": 9.949752628324058e-06,
410
+ "loss": 0.486,
411
+ "step": 325
412
+ },
413
+ {
414
+ "epoch": 0.05,
415
+ "learning_rate": 9.948979591836737e-06,
416
+ "loss": 0.4774,
417
+ "step": 330
418
+ },
419
+ {
420
+ "epoch": 0.05,
421
+ "learning_rate": 9.948206555349414e-06,
422
+ "loss": 0.4909,
423
+ "step": 335
424
+ },
425
+ {
426
+ "epoch": 0.05,
427
+ "learning_rate": 9.947433518862091e-06,
428
+ "loss": 0.47,
429
+ "step": 340
430
+ },
431
+ {
432
+ "epoch": 0.05,
433
+ "learning_rate": 9.946660482374768e-06,
434
+ "loss": 0.4496,
435
+ "step": 345
436
+ },
437
+ {
438
+ "epoch": 0.05,
439
+ "learning_rate": 9.945887445887446e-06,
440
+ "loss": 0.5146,
441
+ "step": 350
442
+ },
443
+ {
444
+ "epoch": 0.05,
445
+ "learning_rate": 9.945114409400125e-06,
446
+ "loss": 0.4876,
447
+ "step": 355
448
+ },
449
+ {
450
+ "epoch": 0.06,
451
+ "learning_rate": 9.944341372912802e-06,
452
+ "loss": 0.4747,
453
+ "step": 360
454
+ },
455
+ {
456
+ "epoch": 0.06,
457
+ "learning_rate": 9.94356833642548e-06,
458
+ "loss": 0.4614,
459
+ "step": 365
460
+ },
461
+ {
462
+ "epoch": 0.06,
463
+ "learning_rate": 9.942795299938158e-06,
464
+ "loss": 0.4755,
465
+ "step": 370
466
+ },
467
+ {
468
+ "epoch": 0.06,
469
+ "learning_rate": 9.942022263450836e-06,
470
+ "loss": 0.4785,
471
+ "step": 375
472
+ },
473
+ {
474
+ "epoch": 0.06,
475
+ "learning_rate": 9.941249226963513e-06,
476
+ "loss": 0.4581,
477
+ "step": 380
478
+ },
479
+ {
480
+ "epoch": 0.06,
481
+ "learning_rate": 9.940476190476192e-06,
482
+ "loss": 0.4671,
483
+ "step": 385
484
+ },
485
+ {
486
+ "epoch": 0.06,
487
+ "learning_rate": 9.93970315398887e-06,
488
+ "loss": 0.4327,
489
+ "step": 390
490
+ },
491
+ {
492
+ "epoch": 0.06,
493
+ "learning_rate": 9.938930117501547e-06,
494
+ "loss": 0.4906,
495
+ "step": 395
496
+ },
497
+ {
498
+ "epoch": 0.06,
499
+ "learning_rate": 9.938157081014226e-06,
500
+ "loss": 0.5006,
501
+ "step": 400
502
+ },
503
+ {
504
+ "epoch": 0.06,
505
+ "eval_accuracy": 0.5519790562862308,
506
+ "eval_accuracy_sklearn": 0.5519790562862308,
507
+ "eval_f1": 0.39240549130993435,
508
+ "eval_loss": 0.969095766544342,
509
+ "eval_precision": 0.6691460531626593,
510
+ "eval_recall": 0.27759837340031096,
511
+ "eval_runtime": 4903.2377,
512
+ "eval_samples_per_second": 16.36,
513
+ "eval_steps_per_second": 2.045,
514
+ "step": 400
515
+ },
516
+ {
517
+ "epoch": 0.06,
518
+ "learning_rate": 9.937384044526903e-06,
519
+ "loss": 0.4531,
520
+ "step": 405
521
+ },
522
+ {
523
+ "epoch": 0.06,
524
+ "learning_rate": 9.93661100803958e-06,
525
+ "loss": 0.4467,
526
+ "step": 410
527
+ },
528
+ {
529
+ "epoch": 0.06,
530
+ "learning_rate": 9.935837971552257e-06,
531
+ "loss": 0.4753,
532
+ "step": 415
533
+ },
534
+ {
535
+ "epoch": 0.06,
536
+ "learning_rate": 9.935064935064936e-06,
537
+ "loss": 0.428,
538
+ "step": 420
539
+ },
540
+ {
541
+ "epoch": 0.07,
542
+ "learning_rate": 9.934291898577614e-06,
543
+ "loss": 0.4418,
544
+ "step": 425
545
+ },
546
+ {
547
+ "epoch": 0.07,
548
+ "learning_rate": 9.933518862090291e-06,
549
+ "loss": 0.5087,
550
+ "step": 430
551
+ },
552
+ {
553
+ "epoch": 0.07,
554
+ "learning_rate": 9.93274582560297e-06,
555
+ "loss": 0.4775,
556
+ "step": 435
557
+ },
558
+ {
559
+ "epoch": 0.07,
560
+ "learning_rate": 9.931972789115647e-06,
561
+ "loss": 0.4923,
562
+ "step": 440
563
+ },
564
+ {
565
+ "epoch": 0.07,
566
+ "learning_rate": 9.931199752628325e-06,
567
+ "loss": 0.4721,
568
+ "step": 445
569
+ },
570
+ {
571
+ "epoch": 0.07,
572
+ "learning_rate": 9.930426716141004e-06,
573
+ "loss": 0.4718,
574
+ "step": 450
575
+ },
576
+ {
577
+ "epoch": 0.07,
578
+ "learning_rate": 9.929653679653681e-06,
579
+ "loss": 0.4392,
580
+ "step": 455
581
+ },
582
+ {
583
+ "epoch": 0.07,
584
+ "learning_rate": 9.928880643166358e-06,
585
+ "loss": 0.4315,
586
+ "step": 460
587
+ },
588
+ {
589
+ "epoch": 0.07,
590
+ "learning_rate": 9.928107606679036e-06,
591
+ "loss": 0.4641,
592
+ "step": 465
593
+ },
594
+ {
595
+ "epoch": 0.07,
596
+ "learning_rate": 9.927334570191713e-06,
597
+ "loss": 0.4417,
598
+ "step": 470
599
+ },
600
+ {
601
+ "epoch": 0.07,
602
+ "learning_rate": 9.926561533704392e-06,
603
+ "loss": 0.454,
604
+ "step": 475
605
+ },
606
+ {
607
+ "epoch": 0.07,
608
+ "learning_rate": 9.925788497217069e-06,
609
+ "loss": 0.4414,
610
+ "step": 480
611
+ },
612
+ {
613
+ "epoch": 0.07,
614
+ "learning_rate": 9.925015460729748e-06,
615
+ "loss": 0.4802,
616
+ "step": 485
617
+ },
618
+ {
619
+ "epoch": 0.08,
620
+ "learning_rate": 9.924242424242425e-06,
621
+ "loss": 0.4262,
622
+ "step": 490
623
+ },
624
+ {
625
+ "epoch": 0.08,
626
+ "learning_rate": 9.923469387755103e-06,
627
+ "loss": 0.4543,
628
+ "step": 495
629
+ },
630
+ {
631
+ "epoch": 0.08,
632
+ "learning_rate": 9.922696351267782e-06,
633
+ "loss": 0.407,
634
+ "step": 500
635
+ },
636
+ {
637
+ "epoch": 0.08,
638
+ "learning_rate": 9.921923314780459e-06,
639
+ "loss": 0.4328,
640
+ "step": 505
641
+ },
642
+ {
643
+ "epoch": 0.08,
644
+ "learning_rate": 9.921150278293136e-06,
645
+ "loss": 0.4589,
646
+ "step": 510
647
+ },
648
+ {
649
+ "epoch": 0.08,
650
+ "learning_rate": 9.920377241805814e-06,
651
+ "loss": 0.4588,
652
+ "step": 515
653
+ },
654
+ {
655
+ "epoch": 0.08,
656
+ "learning_rate": 9.919604205318491e-06,
657
+ "loss": 0.4273,
658
+ "step": 520
659
+ },
660
+ {
661
+ "epoch": 0.08,
662
+ "learning_rate": 9.91883116883117e-06,
663
+ "loss": 0.4688,
664
+ "step": 525
665
+ },
666
+ {
667
+ "epoch": 0.08,
668
+ "learning_rate": 9.918058132343847e-06,
669
+ "loss": 0.4324,
670
+ "step": 530
671
+ },
672
+ {
673
+ "epoch": 0.08,
674
+ "learning_rate": 9.917285095856525e-06,
675
+ "loss": 0.4846,
676
+ "step": 535
677
+ },
678
+ {
679
+ "epoch": 0.08,
680
+ "learning_rate": 9.916512059369204e-06,
681
+ "loss": 0.4366,
682
+ "step": 540
683
+ },
684
+ {
685
+ "epoch": 0.08,
686
+ "learning_rate": 9.91573902288188e-06,
687
+ "loss": 0.442,
688
+ "step": 545
689
+ },
690
+ {
691
+ "epoch": 0.09,
692
+ "learning_rate": 9.91496598639456e-06,
693
+ "loss": 0.4241,
694
+ "step": 550
695
+ },
696
+ {
697
+ "epoch": 0.09,
698
+ "learning_rate": 9.914192949907237e-06,
699
+ "loss": 0.4574,
700
+ "step": 555
701
+ },
702
+ {
703
+ "epoch": 0.09,
704
+ "learning_rate": 9.913419913419914e-06,
705
+ "loss": 0.4173,
706
+ "step": 560
707
+ },
708
+ {
709
+ "epoch": 0.09,
710
+ "learning_rate": 9.912646876932592e-06,
711
+ "loss": 0.4202,
712
+ "step": 565
713
+ },
714
+ {
715
+ "epoch": 0.09,
716
+ "learning_rate": 9.911873840445269e-06,
717
+ "loss": 0.4694,
718
+ "step": 570
719
+ },
720
+ {
721
+ "epoch": 0.09,
722
+ "learning_rate": 9.911100803957948e-06,
723
+ "loss": 0.4595,
724
+ "step": 575
725
+ },
726
+ {
727
+ "epoch": 0.09,
728
+ "learning_rate": 9.910327767470625e-06,
729
+ "loss": 0.464,
730
+ "step": 580
731
+ },
732
+ {
733
+ "epoch": 0.09,
734
+ "learning_rate": 9.909554730983303e-06,
735
+ "loss": 0.4688,
736
+ "step": 585
737
+ },
738
+ {
739
+ "epoch": 0.09,
740
+ "learning_rate": 9.908781694495982e-06,
741
+ "loss": 0.4932,
742
+ "step": 590
743
+ },
744
+ {
745
+ "epoch": 0.09,
746
+ "learning_rate": 9.908008658008659e-06,
747
+ "loss": 0.4648,
748
+ "step": 595
749
+ },
750
+ {
751
+ "epoch": 0.09,
752
+ "learning_rate": 9.907235621521336e-06,
753
+ "loss": 0.5136,
754
+ "step": 600
755
+ },
756
+ {
757
+ "epoch": 0.09,
758
+ "eval_accuracy": 0.595349996883376,
759
+ "eval_accuracy_sklearn": 0.595349996883376,
760
+ "eval_f1": 0.6681389238209163,
761
+ "eval_loss": 0.8079590201377869,
762
+ "eval_precision": 0.5834404685379616,
763
+ "eval_recall": 0.7816050711637363,
764
+ "eval_runtime": 4915.3389,
765
+ "eval_samples_per_second": 16.319,
766
+ "eval_steps_per_second": 2.04,
767
+ "step": 600
768
+ },
769
+ {
770
+ "epoch": 0.09,
771
+ "learning_rate": 9.906462585034015e-06,
772
+ "loss": 0.5246,
773
+ "step": 605
774
+ },
775
+ {
776
+ "epoch": 0.09,
777
+ "learning_rate": 9.905689548546693e-06,
778
+ "loss": 0.4751,
779
+ "step": 610
780
+ },
781
+ {
782
+ "epoch": 0.1,
783
+ "learning_rate": 9.90491651205937e-06,
784
+ "loss": 0.4224,
785
+ "step": 615
786
+ },
787
+ {
788
+ "epoch": 0.1,
789
+ "learning_rate": 9.904143475572047e-06,
790
+ "loss": 0.3842,
791
+ "step": 620
792
+ },
793
+ {
794
+ "epoch": 0.1,
795
+ "learning_rate": 9.903370439084724e-06,
796
+ "loss": 0.4524,
797
+ "step": 625
798
+ },
799
+ {
800
+ "epoch": 0.1,
801
+ "learning_rate": 9.902597402597403e-06,
802
+ "loss": 0.4657,
803
+ "step": 630
804
+ },
805
+ {
806
+ "epoch": 0.1,
807
+ "learning_rate": 9.90182436611008e-06,
808
+ "loss": 0.4596,
809
+ "step": 635
810
+ },
811
+ {
812
+ "epoch": 0.1,
813
+ "learning_rate": 9.90105132962276e-06,
814
+ "loss": 0.4383,
815
+ "step": 640
816
+ },
817
+ {
818
+ "epoch": 0.1,
819
+ "learning_rate": 9.900278293135437e-06,
820
+ "loss": 0.4351,
821
+ "step": 645
822
+ },
823
+ {
824
+ "epoch": 0.1,
825
+ "learning_rate": 9.899505256648114e-06,
826
+ "loss": 0.4263,
827
+ "step": 650
828
+ },
829
+ {
830
+ "epoch": 0.1,
831
+ "learning_rate": 9.898732220160793e-06,
832
+ "loss": 0.448,
833
+ "step": 655
834
+ },
835
+ {
836
+ "epoch": 0.1,
837
+ "learning_rate": 9.89795918367347e-06,
838
+ "loss": 0.4372,
839
+ "step": 660
840
+ },
841
+ {
842
+ "epoch": 0.1,
843
+ "learning_rate": 9.897186147186148e-06,
844
+ "loss": 0.4681,
845
+ "step": 665
846
+ },
847
+ {
848
+ "epoch": 0.1,
849
+ "learning_rate": 9.896413110698825e-06,
850
+ "loss": 0.4783,
851
+ "step": 670
852
+ },
853
+ {
854
+ "epoch": 0.1,
855
+ "learning_rate": 9.895640074211502e-06,
856
+ "loss": 0.4155,
857
+ "step": 675
858
+ },
859
+ {
860
+ "epoch": 0.11,
861
+ "learning_rate": 9.894867037724181e-06,
862
+ "loss": 0.4398,
863
+ "step": 680
864
+ },
865
+ {
866
+ "epoch": 0.11,
867
+ "learning_rate": 9.894094001236859e-06,
868
+ "loss": 0.4364,
869
+ "step": 685
870
+ },
871
+ {
872
+ "epoch": 0.11,
873
+ "learning_rate": 9.893320964749536e-06,
874
+ "loss": 0.4175,
875
+ "step": 690
876
+ },
877
+ {
878
+ "epoch": 0.11,
879
+ "learning_rate": 9.892547928262215e-06,
880
+ "loss": 0.4763,
881
+ "step": 695
882
+ },
883
+ {
884
+ "epoch": 0.11,
885
+ "learning_rate": 9.891774891774892e-06,
886
+ "loss": 0.4629,
887
+ "step": 700
888
+ },
889
+ {
890
+ "epoch": 0.11,
891
+ "learning_rate": 9.891001855287571e-06,
892
+ "loss": 0.5083,
893
+ "step": 705
894
+ },
895
+ {
896
+ "epoch": 0.11,
897
+ "learning_rate": 9.890228818800249e-06,
898
+ "loss": 0.4867,
899
+ "step": 710
900
+ },
901
+ {
902
+ "epoch": 0.11,
903
+ "learning_rate": 9.889455782312926e-06,
904
+ "loss": 0.4855,
905
+ "step": 715
906
+ },
907
+ {
908
+ "epoch": 0.11,
909
+ "learning_rate": 9.888682745825603e-06,
910
+ "loss": 0.4219,
911
+ "step": 720
912
+ },
913
+ {
914
+ "epoch": 0.11,
915
+ "learning_rate": 9.88790970933828e-06,
916
+ "loss": 0.4676,
917
+ "step": 725
918
+ },
919
+ {
920
+ "epoch": 0.11,
921
+ "learning_rate": 9.88713667285096e-06,
922
+ "loss": 0.3963,
923
+ "step": 730
924
+ },
925
+ {
926
+ "epoch": 0.11,
927
+ "learning_rate": 9.886363636363637e-06,
928
+ "loss": 0.4347,
929
+ "step": 735
930
+ },
931
+ {
932
+ "epoch": 0.11,
933
+ "learning_rate": 9.885590599876314e-06,
934
+ "loss": 0.4721,
935
+ "step": 740
936
+ },
937
+ {
938
+ "epoch": 0.12,
939
+ "learning_rate": 9.884817563388993e-06,
940
+ "loss": 0.3992,
941
+ "step": 745
942
+ },
943
+ {
944
+ "epoch": 0.12,
945
+ "learning_rate": 9.88404452690167e-06,
946
+ "loss": 0.413,
947
+ "step": 750
948
+ },
949
+ {
950
+ "epoch": 0.12,
951
+ "learning_rate": 9.883271490414348e-06,
952
+ "loss": 0.4881,
953
+ "step": 755
954
+ },
955
+ {
956
+ "epoch": 0.12,
957
+ "learning_rate": 9.882498453927027e-06,
958
+ "loss": 0.4478,
959
+ "step": 760
960
+ },
961
+ {
962
+ "epoch": 0.12,
963
+ "learning_rate": 9.881725417439704e-06,
964
+ "loss": 0.4453,
965
+ "step": 765
966
+ },
967
+ {
968
+ "epoch": 0.12,
969
+ "learning_rate": 9.880952380952381e-06,
970
+ "loss": 0.4252,
971
+ "step": 770
972
+ },
973
+ {
974
+ "epoch": 0.12,
975
+ "learning_rate": 9.88017934446506e-06,
976
+ "loss": 0.411,
977
+ "step": 775
978
+ },
979
+ {
980
+ "epoch": 0.12,
981
+ "learning_rate": 9.879406307977738e-06,
982
+ "loss": 0.435,
983
+ "step": 780
984
+ },
985
+ {
986
+ "epoch": 0.12,
987
+ "learning_rate": 9.878633271490415e-06,
988
+ "loss": 0.3948,
989
+ "step": 785
990
+ },
991
+ {
992
+ "epoch": 0.12,
993
+ "learning_rate": 9.877860235003092e-06,
994
+ "loss": 0.4004,
995
+ "step": 790
996
+ },
997
+ {
998
+ "epoch": 0.12,
999
+ "learning_rate": 9.877087198515771e-06,
1000
+ "loss": 0.4515,
1001
+ "step": 795
1002
+ },
1003
+ {
1004
+ "epoch": 0.12,
1005
+ "learning_rate": 9.876314162028449e-06,
1006
+ "loss": 0.4117,
1007
+ "step": 800
1008
+ },
1009
+ {
1010
+ "epoch": 0.12,
1011
+ "eval_accuracy": 0.5725113756778657,
1012
+ "eval_accuracy_sklearn": 0.5725113756778657,
1013
+ "eval_f1": 0.5080130274465917,
1014
+ "eval_loss": 0.9323258996009827,
1015
+ "eval_precision": 0.6346884634688463,
1016
+ "eval_recall": 0.423490013156321,
1017
+ "eval_runtime": 4928.5167,
1018
+ "eval_samples_per_second": 16.276,
1019
+ "eval_steps_per_second": 2.034,
1020
+ "step": 800
1021
+ }
1022
+ ],
1023
+ "max_steps": 64680,
1024
+ "num_train_epochs": 10,
1025
+ "total_flos": 9.50142335188992e+16,
1026
+ "trial_name": null,
1027
+ "trial_params": null
1028
+ }
ranker_bs256_0/checkpoint-800/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a89904cc492981aada74646ac2bddbb0a7e40cc9d86615845667f8c5c4c4e4f
3
+ size 3451
ranker_bs256_0/checkpoint-800/vocab.json ADDED
The diff for this file is too large to render. See raw diff