Logic123456789 commited on
Commit
c66dc07
1 Parent(s): ffeef58

add the model

Browse files
config.json ADDED
@@ -0,0 +1,1574 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/data/workspace/Embedding/embedding/output-2epoch-final/checkpoint-50000",
3
+ "architectures": [
4
+ "BertForCL"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "directionality": "bidi",
10
+ "eos_token_id": 2,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 1024,
14
+ "id2label": {
15
+ "0": "LABEL_0",
16
+ "1": "LABEL_1",
17
+ "2": "LABEL_2",
18
+ "3": "LABEL_3",
19
+ "4": "LABEL_4",
20
+ "5": "LABEL_5",
21
+ "6": "LABEL_6",
22
+ "7": "LABEL_7",
23
+ "8": "LABEL_8",
24
+ "9": "LABEL_9",
25
+ "10": "LABEL_10",
26
+ "11": "LABEL_11",
27
+ "12": "LABEL_12",
28
+ "13": "LABEL_13",
29
+ "14": "LABEL_14",
30
+ "15": "LABEL_15",
31
+ "16": "LABEL_16",
32
+ "17": "LABEL_17",
33
+ "18": "LABEL_18",
34
+ "19": "LABEL_19",
35
+ "20": "LABEL_20",
36
+ "21": "LABEL_21",
37
+ "22": "LABEL_22",
38
+ "23": "LABEL_23",
39
+ "24": "LABEL_24",
40
+ "25": "LABEL_25",
41
+ "26": "LABEL_26",
42
+ "27": "LABEL_27",
43
+ "28": "LABEL_28",
44
+ "29": "LABEL_29",
45
+ "30": "LABEL_30",
46
+ "31": "LABEL_31",
47
+ "32": "LABEL_32",
48
+ "33": "LABEL_33",
49
+ "34": "LABEL_34",
50
+ "35": "LABEL_35",
51
+ "36": "LABEL_36",
52
+ "37": "LABEL_37",
53
+ "38": "LABEL_38",
54
+ "39": "LABEL_39",
55
+ "40": "LABEL_40",
56
+ "41": "LABEL_41",
57
+ "42": "LABEL_42",
58
+ "43": "LABEL_43",
59
+ "44": "LABEL_44",
60
+ "45": "LABEL_45",
61
+ "46": "LABEL_46",
62
+ "47": "LABEL_47",
63
+ "48": "LABEL_48",
64
+ "49": "LABEL_49",
65
+ "50": "LABEL_50",
66
+ "51": "LABEL_51",
67
+ "52": "LABEL_52",
68
+ "53": "LABEL_53",
69
+ "54": "LABEL_54",
70
+ "55": "LABEL_55",
71
+ "56": "LABEL_56",
72
+ "57": "LABEL_57",
73
+ "58": "LABEL_58",
74
+ "59": "LABEL_59",
75
+ "60": "LABEL_60",
76
+ "61": "LABEL_61",
77
+ "62": "LABEL_62",
78
+ "63": "LABEL_63",
79
+ "64": "LABEL_64",
80
+ "65": "LABEL_65",
81
+ "66": "LABEL_66",
82
+ "67": "LABEL_67",
83
+ "68": "LABEL_68",
84
+ "69": "LABEL_69",
85
+ "70": "LABEL_70",
86
+ "71": "LABEL_71",
87
+ "72": "LABEL_72",
88
+ "73": "LABEL_73",
89
+ "74": "LABEL_74",
90
+ "75": "LABEL_75",
91
+ "76": "LABEL_76",
92
+ "77": "LABEL_77",
93
+ "78": "LABEL_78",
94
+ "79": "LABEL_79",
95
+ "80": "LABEL_80",
96
+ "81": "LABEL_81",
97
+ "82": "LABEL_82",
98
+ "83": "LABEL_83",
99
+ "84": "LABEL_84",
100
+ "85": "LABEL_85",
101
+ "86": "LABEL_86",
102
+ "87": "LABEL_87",
103
+ "88": "LABEL_88",
104
+ "89": "LABEL_89",
105
+ "90": "LABEL_90",
106
+ "91": "LABEL_91",
107
+ "92": "LABEL_92",
108
+ "93": "LABEL_93",
109
+ "94": "LABEL_94",
110
+ "95": "LABEL_95",
111
+ "96": "LABEL_96",
112
+ "97": "LABEL_97",
113
+ "98": "LABEL_98",
114
+ "99": "LABEL_99",
115
+ "100": "LABEL_100",
116
+ "101": "LABEL_101",
117
+ "102": "LABEL_102",
118
+ "103": "LABEL_103",
119
+ "104": "LABEL_104",
120
+ "105": "LABEL_105",
121
+ "106": "LABEL_106",
122
+ "107": "LABEL_107",
123
+ "108": "LABEL_108",
124
+ "109": "LABEL_109",
125
+ "110": "LABEL_110",
126
+ "111": "LABEL_111",
127
+ "112": "LABEL_112",
128
+ "113": "LABEL_113",
129
+ "114": "LABEL_114",
130
+ "115": "LABEL_115",
131
+ "116": "LABEL_116",
132
+ "117": "LABEL_117",
133
+ "118": "LABEL_118",
134
+ "119": "LABEL_119",
135
+ "120": "LABEL_120",
136
+ "121": "LABEL_121",
137
+ "122": "LABEL_122",
138
+ "123": "LABEL_123",
139
+ "124": "LABEL_124",
140
+ "125": "LABEL_125",
141
+ "126": "LABEL_126",
142
+ "127": "LABEL_127",
143
+ "128": "LABEL_128",
144
+ "129": "LABEL_129",
145
+ "130": "LABEL_130",
146
+ "131": "LABEL_131",
147
+ "132": "LABEL_132",
148
+ "133": "LABEL_133",
149
+ "134": "LABEL_134",
150
+ "135": "LABEL_135",
151
+ "136": "LABEL_136",
152
+ "137": "LABEL_137",
153
+ "138": "LABEL_138",
154
+ "139": "LABEL_139",
155
+ "140": "LABEL_140",
156
+ "141": "LABEL_141",
157
+ "142": "LABEL_142",
158
+ "143": "LABEL_143",
159
+ "144": "LABEL_144",
160
+ "145": "LABEL_145",
161
+ "146": "LABEL_146",
162
+ "147": "LABEL_147",
163
+ "148": "LABEL_148",
164
+ "149": "LABEL_149",
165
+ "150": "LABEL_150",
166
+ "151": "LABEL_151",
167
+ "152": "LABEL_152",
168
+ "153": "LABEL_153",
169
+ "154": "LABEL_154",
170
+ "155": "LABEL_155",
171
+ "156": "LABEL_156",
172
+ "157": "LABEL_157",
173
+ "158": "LABEL_158",
174
+ "159": "LABEL_159",
175
+ "160": "LABEL_160",
176
+ "161": "LABEL_161",
177
+ "162": "LABEL_162",
178
+ "163": "LABEL_163",
179
+ "164": "LABEL_164",
180
+ "165": "LABEL_165",
181
+ "166": "LABEL_166",
182
+ "167": "LABEL_167",
183
+ "168": "LABEL_168",
184
+ "169": "LABEL_169",
185
+ "170": "LABEL_170",
186
+ "171": "LABEL_171",
187
+ "172": "LABEL_172",
188
+ "173": "LABEL_173",
189
+ "174": "LABEL_174",
190
+ "175": "LABEL_175",
191
+ "176": "LABEL_176",
192
+ "177": "LABEL_177",
193
+ "178": "LABEL_178",
194
+ "179": "LABEL_179",
195
+ "180": "LABEL_180",
196
+ "181": "LABEL_181",
197
+ "182": "LABEL_182",
198
+ "183": "LABEL_183",
199
+ "184": "LABEL_184",
200
+ "185": "LABEL_185",
201
+ "186": "LABEL_186",
202
+ "187": "LABEL_187",
203
+ "188": "LABEL_188",
204
+ "189": "LABEL_189",
205
+ "190": "LABEL_190",
206
+ "191": "LABEL_191",
207
+ "192": "LABEL_192",
208
+ "193": "LABEL_193",
209
+ "194": "LABEL_194",
210
+ "195": "LABEL_195",
211
+ "196": "LABEL_196",
212
+ "197": "LABEL_197",
213
+ "198": "LABEL_198",
214
+ "199": "LABEL_199",
215
+ "200": "LABEL_200",
216
+ "201": "LABEL_201",
217
+ "202": "LABEL_202",
218
+ "203": "LABEL_203",
219
+ "204": "LABEL_204",
220
+ "205": "LABEL_205",
221
+ "206": "LABEL_206",
222
+ "207": "LABEL_207",
223
+ "208": "LABEL_208",
224
+ "209": "LABEL_209",
225
+ "210": "LABEL_210",
226
+ "211": "LABEL_211",
227
+ "212": "LABEL_212",
228
+ "213": "LABEL_213",
229
+ "214": "LABEL_214",
230
+ "215": "LABEL_215",
231
+ "216": "LABEL_216",
232
+ "217": "LABEL_217",
233
+ "218": "LABEL_218",
234
+ "219": "LABEL_219",
235
+ "220": "LABEL_220",
236
+ "221": "LABEL_221",
237
+ "222": "LABEL_222",
238
+ "223": "LABEL_223",
239
+ "224": "LABEL_224",
240
+ "225": "LABEL_225",
241
+ "226": "LABEL_226",
242
+ "227": "LABEL_227",
243
+ "228": "LABEL_228",
244
+ "229": "LABEL_229",
245
+ "230": "LABEL_230",
246
+ "231": "LABEL_231",
247
+ "232": "LABEL_232",
248
+ "233": "LABEL_233",
249
+ "234": "LABEL_234",
250
+ "235": "LABEL_235",
251
+ "236": "LABEL_236",
252
+ "237": "LABEL_237",
253
+ "238": "LABEL_238",
254
+ "239": "LABEL_239",
255
+ "240": "LABEL_240",
256
+ "241": "LABEL_241",
257
+ "242": "LABEL_242",
258
+ "243": "LABEL_243",
259
+ "244": "LABEL_244",
260
+ "245": "LABEL_245",
261
+ "246": "LABEL_246",
262
+ "247": "LABEL_247",
263
+ "248": "LABEL_248",
264
+ "249": "LABEL_249",
265
+ "250": "LABEL_250",
266
+ "251": "LABEL_251",
267
+ "252": "LABEL_252",
268
+ "253": "LABEL_253",
269
+ "254": "LABEL_254",
270
+ "255": "LABEL_255",
271
+ "256": "LABEL_256",
272
+ "257": "LABEL_257",
273
+ "258": "LABEL_258",
274
+ "259": "LABEL_259",
275
+ "260": "LABEL_260",
276
+ "261": "LABEL_261",
277
+ "262": "LABEL_262",
278
+ "263": "LABEL_263",
279
+ "264": "LABEL_264",
280
+ "265": "LABEL_265",
281
+ "266": "LABEL_266",
282
+ "267": "LABEL_267",
283
+ "268": "LABEL_268",
284
+ "269": "LABEL_269",
285
+ "270": "LABEL_270",
286
+ "271": "LABEL_271",
287
+ "272": "LABEL_272",
288
+ "273": "LABEL_273",
289
+ "274": "LABEL_274",
290
+ "275": "LABEL_275",
291
+ "276": "LABEL_276",
292
+ "277": "LABEL_277",
293
+ "278": "LABEL_278",
294
+ "279": "LABEL_279",
295
+ "280": "LABEL_280",
296
+ "281": "LABEL_281",
297
+ "282": "LABEL_282",
298
+ "283": "LABEL_283",
299
+ "284": "LABEL_284",
300
+ "285": "LABEL_285",
301
+ "286": "LABEL_286",
302
+ "287": "LABEL_287",
303
+ "288": "LABEL_288",
304
+ "289": "LABEL_289",
305
+ "290": "LABEL_290",
306
+ "291": "LABEL_291",
307
+ "292": "LABEL_292",
308
+ "293": "LABEL_293",
309
+ "294": "LABEL_294",
310
+ "295": "LABEL_295",
311
+ "296": "LABEL_296",
312
+ "297": "LABEL_297",
313
+ "298": "LABEL_298",
314
+ "299": "LABEL_299",
315
+ "300": "LABEL_300",
316
+ "301": "LABEL_301",
317
+ "302": "LABEL_302",
318
+ "303": "LABEL_303",
319
+ "304": "LABEL_304",
320
+ "305": "LABEL_305",
321
+ "306": "LABEL_306",
322
+ "307": "LABEL_307",
323
+ "308": "LABEL_308",
324
+ "309": "LABEL_309",
325
+ "310": "LABEL_310",
326
+ "311": "LABEL_311",
327
+ "312": "LABEL_312",
328
+ "313": "LABEL_313",
329
+ "314": "LABEL_314",
330
+ "315": "LABEL_315",
331
+ "316": "LABEL_316",
332
+ "317": "LABEL_317",
333
+ "318": "LABEL_318",
334
+ "319": "LABEL_319",
335
+ "320": "LABEL_320",
336
+ "321": "LABEL_321",
337
+ "322": "LABEL_322",
338
+ "323": "LABEL_323",
339
+ "324": "LABEL_324",
340
+ "325": "LABEL_325",
341
+ "326": "LABEL_326",
342
+ "327": "LABEL_327",
343
+ "328": "LABEL_328",
344
+ "329": "LABEL_329",
345
+ "330": "LABEL_330",
346
+ "331": "LABEL_331",
347
+ "332": "LABEL_332",
348
+ "333": "LABEL_333",
349
+ "334": "LABEL_334",
350
+ "335": "LABEL_335",
351
+ "336": "LABEL_336",
352
+ "337": "LABEL_337",
353
+ "338": "LABEL_338",
354
+ "339": "LABEL_339",
355
+ "340": "LABEL_340",
356
+ "341": "LABEL_341",
357
+ "342": "LABEL_342",
358
+ "343": "LABEL_343",
359
+ "344": "LABEL_344",
360
+ "345": "LABEL_345",
361
+ "346": "LABEL_346",
362
+ "347": "LABEL_347",
363
+ "348": "LABEL_348",
364
+ "349": "LABEL_349",
365
+ "350": "LABEL_350",
366
+ "351": "LABEL_351",
367
+ "352": "LABEL_352",
368
+ "353": "LABEL_353",
369
+ "354": "LABEL_354",
370
+ "355": "LABEL_355",
371
+ "356": "LABEL_356",
372
+ "357": "LABEL_357",
373
+ "358": "LABEL_358",
374
+ "359": "LABEL_359",
375
+ "360": "LABEL_360",
376
+ "361": "LABEL_361",
377
+ "362": "LABEL_362",
378
+ "363": "LABEL_363",
379
+ "364": "LABEL_364",
380
+ "365": "LABEL_365",
381
+ "366": "LABEL_366",
382
+ "367": "LABEL_367",
383
+ "368": "LABEL_368",
384
+ "369": "LABEL_369",
385
+ "370": "LABEL_370",
386
+ "371": "LABEL_371",
387
+ "372": "LABEL_372",
388
+ "373": "LABEL_373",
389
+ "374": "LABEL_374",
390
+ "375": "LABEL_375",
391
+ "376": "LABEL_376",
392
+ "377": "LABEL_377",
393
+ "378": "LABEL_378",
394
+ "379": "LABEL_379",
395
+ "380": "LABEL_380",
396
+ "381": "LABEL_381",
397
+ "382": "LABEL_382",
398
+ "383": "LABEL_383",
399
+ "384": "LABEL_384",
400
+ "385": "LABEL_385",
401
+ "386": "LABEL_386",
402
+ "387": "LABEL_387",
403
+ "388": "LABEL_388",
404
+ "389": "LABEL_389",
405
+ "390": "LABEL_390",
406
+ "391": "LABEL_391",
407
+ "392": "LABEL_392",
408
+ "393": "LABEL_393",
409
+ "394": "LABEL_394",
410
+ "395": "LABEL_395",
411
+ "396": "LABEL_396",
412
+ "397": "LABEL_397",
413
+ "398": "LABEL_398",
414
+ "399": "LABEL_399",
415
+ "400": "LABEL_400",
416
+ "401": "LABEL_401",
417
+ "402": "LABEL_402",
418
+ "403": "LABEL_403",
419
+ "404": "LABEL_404",
420
+ "405": "LABEL_405",
421
+ "406": "LABEL_406",
422
+ "407": "LABEL_407",
423
+ "408": "LABEL_408",
424
+ "409": "LABEL_409",
425
+ "410": "LABEL_410",
426
+ "411": "LABEL_411",
427
+ "412": "LABEL_412",
428
+ "413": "LABEL_413",
429
+ "414": "LABEL_414",
430
+ "415": "LABEL_415",
431
+ "416": "LABEL_416",
432
+ "417": "LABEL_417",
433
+ "418": "LABEL_418",
434
+ "419": "LABEL_419",
435
+ "420": "LABEL_420",
436
+ "421": "LABEL_421",
437
+ "422": "LABEL_422",
438
+ "423": "LABEL_423",
439
+ "424": "LABEL_424",
440
+ "425": "LABEL_425",
441
+ "426": "LABEL_426",
442
+ "427": "LABEL_427",
443
+ "428": "LABEL_428",
444
+ "429": "LABEL_429",
445
+ "430": "LABEL_430",
446
+ "431": "LABEL_431",
447
+ "432": "LABEL_432",
448
+ "433": "LABEL_433",
449
+ "434": "LABEL_434",
450
+ "435": "LABEL_435",
451
+ "436": "LABEL_436",
452
+ "437": "LABEL_437",
453
+ "438": "LABEL_438",
454
+ "439": "LABEL_439",
455
+ "440": "LABEL_440",
456
+ "441": "LABEL_441",
457
+ "442": "LABEL_442",
458
+ "443": "LABEL_443",
459
+ "444": "LABEL_444",
460
+ "445": "LABEL_445",
461
+ "446": "LABEL_446",
462
+ "447": "LABEL_447",
463
+ "448": "LABEL_448",
464
+ "449": "LABEL_449",
465
+ "450": "LABEL_450",
466
+ "451": "LABEL_451",
467
+ "452": "LABEL_452",
468
+ "453": "LABEL_453",
469
+ "454": "LABEL_454",
470
+ "455": "LABEL_455",
471
+ "456": "LABEL_456",
472
+ "457": "LABEL_457",
473
+ "458": "LABEL_458",
474
+ "459": "LABEL_459",
475
+ "460": "LABEL_460",
476
+ "461": "LABEL_461",
477
+ "462": "LABEL_462",
478
+ "463": "LABEL_463",
479
+ "464": "LABEL_464",
480
+ "465": "LABEL_465",
481
+ "466": "LABEL_466",
482
+ "467": "LABEL_467",
483
+ "468": "LABEL_468",
484
+ "469": "LABEL_469",
485
+ "470": "LABEL_470",
486
+ "471": "LABEL_471",
487
+ "472": "LABEL_472",
488
+ "473": "LABEL_473",
489
+ "474": "LABEL_474",
490
+ "475": "LABEL_475",
491
+ "476": "LABEL_476",
492
+ "477": "LABEL_477",
493
+ "478": "LABEL_478",
494
+ "479": "LABEL_479",
495
+ "480": "LABEL_480",
496
+ "481": "LABEL_481",
497
+ "482": "LABEL_482",
498
+ "483": "LABEL_483",
499
+ "484": "LABEL_484",
500
+ "485": "LABEL_485",
501
+ "486": "LABEL_486",
502
+ "487": "LABEL_487",
503
+ "488": "LABEL_488",
504
+ "489": "LABEL_489",
505
+ "490": "LABEL_490",
506
+ "491": "LABEL_491",
507
+ "492": "LABEL_492",
508
+ "493": "LABEL_493",
509
+ "494": "LABEL_494",
510
+ "495": "LABEL_495",
511
+ "496": "LABEL_496",
512
+ "497": "LABEL_497",
513
+ "498": "LABEL_498",
514
+ "499": "LABEL_499",
515
+ "500": "LABEL_500",
516
+ "501": "LABEL_501",
517
+ "502": "LABEL_502",
518
+ "503": "LABEL_503",
519
+ "504": "LABEL_504",
520
+ "505": "LABEL_505",
521
+ "506": "LABEL_506",
522
+ "507": "LABEL_507",
523
+ "508": "LABEL_508",
524
+ "509": "LABEL_509",
525
+ "510": "LABEL_510",
526
+ "511": "LABEL_511",
527
+ "512": "LABEL_512",
528
+ "513": "LABEL_513",
529
+ "514": "LABEL_514",
530
+ "515": "LABEL_515",
531
+ "516": "LABEL_516",
532
+ "517": "LABEL_517",
533
+ "518": "LABEL_518",
534
+ "519": "LABEL_519",
535
+ "520": "LABEL_520",
536
+ "521": "LABEL_521",
537
+ "522": "LABEL_522",
538
+ "523": "LABEL_523",
539
+ "524": "LABEL_524",
540
+ "525": "LABEL_525",
541
+ "526": "LABEL_526",
542
+ "527": "LABEL_527",
543
+ "528": "LABEL_528",
544
+ "529": "LABEL_529",
545
+ "530": "LABEL_530",
546
+ "531": "LABEL_531",
547
+ "532": "LABEL_532",
548
+ "533": "LABEL_533",
549
+ "534": "LABEL_534",
550
+ "535": "LABEL_535",
551
+ "536": "LABEL_536",
552
+ "537": "LABEL_537",
553
+ "538": "LABEL_538",
554
+ "539": "LABEL_539",
555
+ "540": "LABEL_540",
556
+ "541": "LABEL_541",
557
+ "542": "LABEL_542",
558
+ "543": "LABEL_543",
559
+ "544": "LABEL_544",
560
+ "545": "LABEL_545",
561
+ "546": "LABEL_546",
562
+ "547": "LABEL_547",
563
+ "548": "LABEL_548",
564
+ "549": "LABEL_549",
565
+ "550": "LABEL_550",
566
+ "551": "LABEL_551",
567
+ "552": "LABEL_552",
568
+ "553": "LABEL_553",
569
+ "554": "LABEL_554",
570
+ "555": "LABEL_555",
571
+ "556": "LABEL_556",
572
+ "557": "LABEL_557",
573
+ "558": "LABEL_558",
574
+ "559": "LABEL_559",
575
+ "560": "LABEL_560",
576
+ "561": "LABEL_561",
577
+ "562": "LABEL_562",
578
+ "563": "LABEL_563",
579
+ "564": "LABEL_564",
580
+ "565": "LABEL_565",
581
+ "566": "LABEL_566",
582
+ "567": "LABEL_567",
583
+ "568": "LABEL_568",
584
+ "569": "LABEL_569",
585
+ "570": "LABEL_570",
586
+ "571": "LABEL_571",
587
+ "572": "LABEL_572",
588
+ "573": "LABEL_573",
589
+ "574": "LABEL_574",
590
+ "575": "LABEL_575",
591
+ "576": "LABEL_576",
592
+ "577": "LABEL_577",
593
+ "578": "LABEL_578",
594
+ "579": "LABEL_579",
595
+ "580": "LABEL_580",
596
+ "581": "LABEL_581",
597
+ "582": "LABEL_582",
598
+ "583": "LABEL_583",
599
+ "584": "LABEL_584",
600
+ "585": "LABEL_585",
601
+ "586": "LABEL_586",
602
+ "587": "LABEL_587",
603
+ "588": "LABEL_588",
604
+ "589": "LABEL_589",
605
+ "590": "LABEL_590",
606
+ "591": "LABEL_591",
607
+ "592": "LABEL_592",
608
+ "593": "LABEL_593",
609
+ "594": "LABEL_594",
610
+ "595": "LABEL_595",
611
+ "596": "LABEL_596",
612
+ "597": "LABEL_597",
613
+ "598": "LABEL_598",
614
+ "599": "LABEL_599",
615
+ "600": "LABEL_600",
616
+ "601": "LABEL_601",
617
+ "602": "LABEL_602",
618
+ "603": "LABEL_603",
619
+ "604": "LABEL_604",
620
+ "605": "LABEL_605",
621
+ "606": "LABEL_606",
622
+ "607": "LABEL_607",
623
+ "608": "LABEL_608",
624
+ "609": "LABEL_609",
625
+ "610": "LABEL_610",
626
+ "611": "LABEL_611",
627
+ "612": "LABEL_612",
628
+ "613": "LABEL_613",
629
+ "614": "LABEL_614",
630
+ "615": "LABEL_615",
631
+ "616": "LABEL_616",
632
+ "617": "LABEL_617",
633
+ "618": "LABEL_618",
634
+ "619": "LABEL_619",
635
+ "620": "LABEL_620",
636
+ "621": "LABEL_621",
637
+ "622": "LABEL_622",
638
+ "623": "LABEL_623",
639
+ "624": "LABEL_624",
640
+ "625": "LABEL_625",
641
+ "626": "LABEL_626",
642
+ "627": "LABEL_627",
643
+ "628": "LABEL_628",
644
+ "629": "LABEL_629",
645
+ "630": "LABEL_630",
646
+ "631": "LABEL_631",
647
+ "632": "LABEL_632",
648
+ "633": "LABEL_633",
649
+ "634": "LABEL_634",
650
+ "635": "LABEL_635",
651
+ "636": "LABEL_636",
652
+ "637": "LABEL_637",
653
+ "638": "LABEL_638",
654
+ "639": "LABEL_639",
655
+ "640": "LABEL_640",
656
+ "641": "LABEL_641",
657
+ "642": "LABEL_642",
658
+ "643": "LABEL_643",
659
+ "644": "LABEL_644",
660
+ "645": "LABEL_645",
661
+ "646": "LABEL_646",
662
+ "647": "LABEL_647",
663
+ "648": "LABEL_648",
664
+ "649": "LABEL_649",
665
+ "650": "LABEL_650",
666
+ "651": "LABEL_651",
667
+ "652": "LABEL_652",
668
+ "653": "LABEL_653",
669
+ "654": "LABEL_654",
670
+ "655": "LABEL_655",
671
+ "656": "LABEL_656",
672
+ "657": "LABEL_657",
673
+ "658": "LABEL_658",
674
+ "659": "LABEL_659",
675
+ "660": "LABEL_660",
676
+ "661": "LABEL_661",
677
+ "662": "LABEL_662",
678
+ "663": "LABEL_663",
679
+ "664": "LABEL_664",
680
+ "665": "LABEL_665",
681
+ "666": "LABEL_666",
682
+ "667": "LABEL_667",
683
+ "668": "LABEL_668",
684
+ "669": "LABEL_669",
685
+ "670": "LABEL_670",
686
+ "671": "LABEL_671",
687
+ "672": "LABEL_672",
688
+ "673": "LABEL_673",
689
+ "674": "LABEL_674",
690
+ "675": "LABEL_675",
691
+ "676": "LABEL_676",
692
+ "677": "LABEL_677",
693
+ "678": "LABEL_678",
694
+ "679": "LABEL_679",
695
+ "680": "LABEL_680",
696
+ "681": "LABEL_681",
697
+ "682": "LABEL_682",
698
+ "683": "LABEL_683",
699
+ "684": "LABEL_684",
700
+ "685": "LABEL_685",
701
+ "686": "LABEL_686",
702
+ "687": "LABEL_687",
703
+ "688": "LABEL_688",
704
+ "689": "LABEL_689",
705
+ "690": "LABEL_690",
706
+ "691": "LABEL_691",
707
+ "692": "LABEL_692",
708
+ "693": "LABEL_693",
709
+ "694": "LABEL_694",
710
+ "695": "LABEL_695",
711
+ "696": "LABEL_696",
712
+ "697": "LABEL_697",
713
+ "698": "LABEL_698",
714
+ "699": "LABEL_699",
715
+ "700": "LABEL_700",
716
+ "701": "LABEL_701",
717
+ "702": "LABEL_702",
718
+ "703": "LABEL_703",
719
+ "704": "LABEL_704",
720
+ "705": "LABEL_705",
721
+ "706": "LABEL_706",
722
+ "707": "LABEL_707",
723
+ "708": "LABEL_708",
724
+ "709": "LABEL_709",
725
+ "710": "LABEL_710",
726
+ "711": "LABEL_711",
727
+ "712": "LABEL_712",
728
+ "713": "LABEL_713",
729
+ "714": "LABEL_714",
730
+ "715": "LABEL_715",
731
+ "716": "LABEL_716",
732
+ "717": "LABEL_717",
733
+ "718": "LABEL_718",
734
+ "719": "LABEL_719",
735
+ "720": "LABEL_720",
736
+ "721": "LABEL_721",
737
+ "722": "LABEL_722",
738
+ "723": "LABEL_723",
739
+ "724": "LABEL_724",
740
+ "725": "LABEL_725",
741
+ "726": "LABEL_726",
742
+ "727": "LABEL_727",
743
+ "728": "LABEL_728",
744
+ "729": "LABEL_729",
745
+ "730": "LABEL_730",
746
+ "731": "LABEL_731",
747
+ "732": "LABEL_732",
748
+ "733": "LABEL_733",
749
+ "734": "LABEL_734",
750
+ "735": "LABEL_735",
751
+ "736": "LABEL_736",
752
+ "737": "LABEL_737",
753
+ "738": "LABEL_738",
754
+ "739": "LABEL_739",
755
+ "740": "LABEL_740",
756
+ "741": "LABEL_741",
757
+ "742": "LABEL_742",
758
+ "743": "LABEL_743",
759
+ "744": "LABEL_744",
760
+ "745": "LABEL_745",
761
+ "746": "LABEL_746",
762
+ "747": "LABEL_747",
763
+ "748": "LABEL_748",
764
+ "749": "LABEL_749",
765
+ "750": "LABEL_750",
766
+ "751": "LABEL_751",
767
+ "752": "LABEL_752",
768
+ "753": "LABEL_753",
769
+ "754": "LABEL_754",
770
+ "755": "LABEL_755",
771
+ "756": "LABEL_756",
772
+ "757": "LABEL_757",
773
+ "758": "LABEL_758",
774
+ "759": "LABEL_759",
775
+ "760": "LABEL_760",
776
+ "761": "LABEL_761",
777
+ "762": "LABEL_762",
778
+ "763": "LABEL_763",
779
+ "764": "LABEL_764",
780
+ "765": "LABEL_765",
781
+ "766": "LABEL_766",
782
+ "767": "LABEL_767"
783
+ },
784
+ "initializer_range": 0.02,
785
+ "intermediate_size": 4096,
786
+ "label2id": {
787
+ "LABEL_0": 0,
788
+ "LABEL_1": 1,
789
+ "LABEL_10": 10,
790
+ "LABEL_100": 100,
791
+ "LABEL_101": 101,
792
+ "LABEL_102": 102,
793
+ "LABEL_103": 103,
794
+ "LABEL_104": 104,
795
+ "LABEL_105": 105,
796
+ "LABEL_106": 106,
797
+ "LABEL_107": 107,
798
+ "LABEL_108": 108,
799
+ "LABEL_109": 109,
800
+ "LABEL_11": 11,
801
+ "LABEL_110": 110,
802
+ "LABEL_111": 111,
803
+ "LABEL_112": 112,
804
+ "LABEL_113": 113,
805
+ "LABEL_114": 114,
806
+ "LABEL_115": 115,
807
+ "LABEL_116": 116,
808
+ "LABEL_117": 117,
809
+ "LABEL_118": 118,
810
+ "LABEL_119": 119,
811
+ "LABEL_12": 12,
812
+ "LABEL_120": 120,
813
+ "LABEL_121": 121,
814
+ "LABEL_122": 122,
815
+ "LABEL_123": 123,
816
+ "LABEL_124": 124,
817
+ "LABEL_125": 125,
818
+ "LABEL_126": 126,
819
+ "LABEL_127": 127,
820
+ "LABEL_128": 128,
821
+ "LABEL_129": 129,
822
+ "LABEL_13": 13,
823
+ "LABEL_130": 130,
824
+ "LABEL_131": 131,
825
+ "LABEL_132": 132,
826
+ "LABEL_133": 133,
827
+ "LABEL_134": 134,
828
+ "LABEL_135": 135,
829
+ "LABEL_136": 136,
830
+ "LABEL_137": 137,
831
+ "LABEL_138": 138,
832
+ "LABEL_139": 139,
833
+ "LABEL_14": 14,
834
+ "LABEL_140": 140,
835
+ "LABEL_141": 141,
836
+ "LABEL_142": 142,
837
+ "LABEL_143": 143,
838
+ "LABEL_144": 144,
839
+ "LABEL_145": 145,
840
+ "LABEL_146": 146,
841
+ "LABEL_147": 147,
842
+ "LABEL_148": 148,
843
+ "LABEL_149": 149,
844
+ "LABEL_15": 15,
845
+ "LABEL_150": 150,
846
+ "LABEL_151": 151,
847
+ "LABEL_152": 152,
848
+ "LABEL_153": 153,
849
+ "LABEL_154": 154,
850
+ "LABEL_155": 155,
851
+ "LABEL_156": 156,
852
+ "LABEL_157": 157,
853
+ "LABEL_158": 158,
854
+ "LABEL_159": 159,
855
+ "LABEL_16": 16,
856
+ "LABEL_160": 160,
857
+ "LABEL_161": 161,
858
+ "LABEL_162": 162,
859
+ "LABEL_163": 163,
860
+ "LABEL_164": 164,
861
+ "LABEL_165": 165,
862
+ "LABEL_166": 166,
863
+ "LABEL_167": 167,
864
+ "LABEL_168": 168,
865
+ "LABEL_169": 169,
866
+ "LABEL_17": 17,
867
+ "LABEL_170": 170,
868
+ "LABEL_171": 171,
869
+ "LABEL_172": 172,
870
+ "LABEL_173": 173,
871
+ "LABEL_174": 174,
872
+ "LABEL_175": 175,
873
+ "LABEL_176": 176,
874
+ "LABEL_177": 177,
875
+ "LABEL_178": 178,
876
+ "LABEL_179": 179,
877
+ "LABEL_18": 18,
878
+ "LABEL_180": 180,
879
+ "LABEL_181": 181,
880
+ "LABEL_182": 182,
881
+ "LABEL_183": 183,
882
+ "LABEL_184": 184,
883
+ "LABEL_185": 185,
884
+ "LABEL_186": 186,
885
+ "LABEL_187": 187,
886
+ "LABEL_188": 188,
887
+ "LABEL_189": 189,
888
+ "LABEL_19": 19,
889
+ "LABEL_190": 190,
890
+ "LABEL_191": 191,
891
+ "LABEL_192": 192,
892
+ "LABEL_193": 193,
893
+ "LABEL_194": 194,
894
+ "LABEL_195": 195,
895
+ "LABEL_196": 196,
896
+ "LABEL_197": 197,
897
+ "LABEL_198": 198,
898
+ "LABEL_199": 199,
899
+ "LABEL_2": 2,
900
+ "LABEL_20": 20,
901
+ "LABEL_200": 200,
902
+ "LABEL_201": 201,
903
+ "LABEL_202": 202,
904
+ "LABEL_203": 203,
905
+ "LABEL_204": 204,
906
+ "LABEL_205": 205,
907
+ "LABEL_206": 206,
908
+ "LABEL_207": 207,
909
+ "LABEL_208": 208,
910
+ "LABEL_209": 209,
911
+ "LABEL_21": 21,
912
+ "LABEL_210": 210,
913
+ "LABEL_211": 211,
914
+ "LABEL_212": 212,
915
+ "LABEL_213": 213,
916
+ "LABEL_214": 214,
917
+ "LABEL_215": 215,
918
+ "LABEL_216": 216,
919
+ "LABEL_217": 217,
920
+ "LABEL_218": 218,
921
+ "LABEL_219": 219,
922
+ "LABEL_22": 22,
923
+ "LABEL_220": 220,
924
+ "LABEL_221": 221,
925
+ "LABEL_222": 222,
926
+ "LABEL_223": 223,
927
+ "LABEL_224": 224,
928
+ "LABEL_225": 225,
929
+ "LABEL_226": 226,
930
+ "LABEL_227": 227,
931
+ "LABEL_228": 228,
932
+ "LABEL_229": 229,
933
+ "LABEL_23": 23,
934
+ "LABEL_230": 230,
935
+ "LABEL_231": 231,
936
+ "LABEL_232": 232,
937
+ "LABEL_233": 233,
938
+ "LABEL_234": 234,
939
+ "LABEL_235": 235,
940
+ "LABEL_236": 236,
941
+ "LABEL_237": 237,
942
+ "LABEL_238": 238,
943
+ "LABEL_239": 239,
944
+ "LABEL_24": 24,
945
+ "LABEL_240": 240,
946
+ "LABEL_241": 241,
947
+ "LABEL_242": 242,
948
+ "LABEL_243": 243,
949
+ "LABEL_244": 244,
950
+ "LABEL_245": 245,
951
+ "LABEL_246": 246,
952
+ "LABEL_247": 247,
953
+ "LABEL_248": 248,
954
+ "LABEL_249": 249,
955
+ "LABEL_25": 25,
956
+ "LABEL_250": 250,
957
+ "LABEL_251": 251,
958
+ "LABEL_252": 252,
959
+ "LABEL_253": 253,
960
+ "LABEL_254": 254,
961
+ "LABEL_255": 255,
962
+ "LABEL_256": 256,
963
+ "LABEL_257": 257,
964
+ "LABEL_258": 258,
965
+ "LABEL_259": 259,
966
+ "LABEL_26": 26,
967
+ "LABEL_260": 260,
968
+ "LABEL_261": 261,
969
+ "LABEL_262": 262,
970
+ "LABEL_263": 263,
971
+ "LABEL_264": 264,
972
+ "LABEL_265": 265,
973
+ "LABEL_266": 266,
974
+ "LABEL_267": 267,
975
+ "LABEL_268": 268,
976
+ "LABEL_269": 269,
977
+ "LABEL_27": 27,
978
+ "LABEL_270": 270,
979
+ "LABEL_271": 271,
980
+ "LABEL_272": 272,
981
+ "LABEL_273": 273,
982
+ "LABEL_274": 274,
983
+ "LABEL_275": 275,
984
+ "LABEL_276": 276,
985
+ "LABEL_277": 277,
986
+ "LABEL_278": 278,
987
+ "LABEL_279": 279,
988
+ "LABEL_28": 28,
989
+ "LABEL_280": 280,
990
+ "LABEL_281": 281,
991
+ "LABEL_282": 282,
992
+ "LABEL_283": 283,
993
+ "LABEL_284": 284,
994
+ "LABEL_285": 285,
995
+ "LABEL_286": 286,
996
+ "LABEL_287": 287,
997
+ "LABEL_288": 288,
998
+ "LABEL_289": 289,
999
+ "LABEL_29": 29,
1000
+ "LABEL_290": 290,
1001
+ "LABEL_291": 291,
1002
+ "LABEL_292": 292,
1003
+ "LABEL_293": 293,
1004
+ "LABEL_294": 294,
1005
+ "LABEL_295": 295,
1006
+ "LABEL_296": 296,
1007
+ "LABEL_297": 297,
1008
+ "LABEL_298": 298,
1009
+ "LABEL_299": 299,
1010
+ "LABEL_3": 3,
1011
+ "LABEL_30": 30,
1012
+ "LABEL_300": 300,
1013
+ "LABEL_301": 301,
1014
+ "LABEL_302": 302,
1015
+ "LABEL_303": 303,
1016
+ "LABEL_304": 304,
1017
+ "LABEL_305": 305,
1018
+ "LABEL_306": 306,
1019
+ "LABEL_307": 307,
1020
+ "LABEL_308": 308,
1021
+ "LABEL_309": 309,
1022
+ "LABEL_31": 31,
1023
+ "LABEL_310": 310,
1024
+ "LABEL_311": 311,
1025
+ "LABEL_312": 312,
1026
+ "LABEL_313": 313,
1027
+ "LABEL_314": 314,
1028
+ "LABEL_315": 315,
1029
+ "LABEL_316": 316,
1030
+ "LABEL_317": 317,
1031
+ "LABEL_318": 318,
1032
+ "LABEL_319": 319,
1033
+ "LABEL_32": 32,
1034
+ "LABEL_320": 320,
1035
+ "LABEL_321": 321,
1036
+ "LABEL_322": 322,
1037
+ "LABEL_323": 323,
1038
+ "LABEL_324": 324,
1039
+ "LABEL_325": 325,
1040
+ "LABEL_326": 326,
1041
+ "LABEL_327": 327,
1042
+ "LABEL_328": 328,
1043
+ "LABEL_329": 329,
1044
+ "LABEL_33": 33,
1045
+ "LABEL_330": 330,
1046
+ "LABEL_331": 331,
1047
+ "LABEL_332": 332,
1048
+ "LABEL_333": 333,
1049
+ "LABEL_334": 334,
1050
+ "LABEL_335": 335,
1051
+ "LABEL_336": 336,
1052
+ "LABEL_337": 337,
1053
+ "LABEL_338": 338,
1054
+ "LABEL_339": 339,
1055
+ "LABEL_34": 34,
1056
+ "LABEL_340": 340,
1057
+ "LABEL_341": 341,
1058
+ "LABEL_342": 342,
1059
+ "LABEL_343": 343,
1060
+ "LABEL_344": 344,
1061
+ "LABEL_345": 345,
1062
+ "LABEL_346": 346,
1063
+ "LABEL_347": 347,
1064
+ "LABEL_348": 348,
1065
+ "LABEL_349": 349,
1066
+ "LABEL_35": 35,
1067
+ "LABEL_350": 350,
1068
+ "LABEL_351": 351,
1069
+ "LABEL_352": 352,
1070
+ "LABEL_353": 353,
1071
+ "LABEL_354": 354,
1072
+ "LABEL_355": 355,
1073
+ "LABEL_356": 356,
1074
+ "LABEL_357": 357,
1075
+ "LABEL_358": 358,
1076
+ "LABEL_359": 359,
1077
+ "LABEL_36": 36,
1078
+ "LABEL_360": 360,
1079
+ "LABEL_361": 361,
1080
+ "LABEL_362": 362,
1081
+ "LABEL_363": 363,
1082
+ "LABEL_364": 364,
1083
+ "LABEL_365": 365,
1084
+ "LABEL_366": 366,
1085
+ "LABEL_367": 367,
1086
+ "LABEL_368": 368,
1087
+ "LABEL_369": 369,
1088
+ "LABEL_37": 37,
1089
+ "LABEL_370": 370,
1090
+ "LABEL_371": 371,
1091
+ "LABEL_372": 372,
1092
+ "LABEL_373": 373,
1093
+ "LABEL_374": 374,
1094
+ "LABEL_375": 375,
1095
+ "LABEL_376": 376,
1096
+ "LABEL_377": 377,
1097
+ "LABEL_378": 378,
1098
+ "LABEL_379": 379,
1099
+ "LABEL_38": 38,
1100
+ "LABEL_380": 380,
1101
+ "LABEL_381": 381,
1102
+ "LABEL_382": 382,
1103
+ "LABEL_383": 383,
1104
+ "LABEL_384": 384,
1105
+ "LABEL_385": 385,
1106
+ "LABEL_386": 386,
1107
+ "LABEL_387": 387,
1108
+ "LABEL_388": 388,
1109
+ "LABEL_389": 389,
1110
+ "LABEL_39": 39,
1111
+ "LABEL_390": 390,
1112
+ "LABEL_391": 391,
1113
+ "LABEL_392": 392,
1114
+ "LABEL_393": 393,
1115
+ "LABEL_394": 394,
1116
+ "LABEL_395": 395,
1117
+ "LABEL_396": 396,
1118
+ "LABEL_397": 397,
1119
+ "LABEL_398": 398,
1120
+ "LABEL_399": 399,
1121
+ "LABEL_4": 4,
1122
+ "LABEL_40": 40,
1123
+ "LABEL_400": 400,
1124
+ "LABEL_401": 401,
1125
+ "LABEL_402": 402,
1126
+ "LABEL_403": 403,
1127
+ "LABEL_404": 404,
1128
+ "LABEL_405": 405,
1129
+ "LABEL_406": 406,
1130
+ "LABEL_407": 407,
1131
+ "LABEL_408": 408,
1132
+ "LABEL_409": 409,
1133
+ "LABEL_41": 41,
1134
+ "LABEL_410": 410,
1135
+ "LABEL_411": 411,
1136
+ "LABEL_412": 412,
1137
+ "LABEL_413": 413,
1138
+ "LABEL_414": 414,
1139
+ "LABEL_415": 415,
1140
+ "LABEL_416": 416,
1141
+ "LABEL_417": 417,
1142
+ "LABEL_418": 418,
1143
+ "LABEL_419": 419,
1144
+ "LABEL_42": 42,
1145
+ "LABEL_420": 420,
1146
+ "LABEL_421": 421,
1147
+ "LABEL_422": 422,
1148
+ "LABEL_423": 423,
1149
+ "LABEL_424": 424,
1150
+ "LABEL_425": 425,
1151
+ "LABEL_426": 426,
1152
+ "LABEL_427": 427,
1153
+ "LABEL_428": 428,
1154
+ "LABEL_429": 429,
1155
+ "LABEL_43": 43,
1156
+ "LABEL_430": 430,
1157
+ "LABEL_431": 431,
1158
+ "LABEL_432": 432,
1159
+ "LABEL_433": 433,
1160
+ "LABEL_434": 434,
1161
+ "LABEL_435": 435,
1162
+ "LABEL_436": 436,
1163
+ "LABEL_437": 437,
1164
+ "LABEL_438": 438,
1165
+ "LABEL_439": 439,
1166
+ "LABEL_44": 44,
1167
+ "LABEL_440": 440,
1168
+ "LABEL_441": 441,
1169
+ "LABEL_442": 442,
1170
+ "LABEL_443": 443,
1171
+ "LABEL_444": 444,
1172
+ "LABEL_445": 445,
1173
+ "LABEL_446": 446,
1174
+ "LABEL_447": 447,
1175
+ "LABEL_448": 448,
1176
+ "LABEL_449": 449,
1177
+ "LABEL_45": 45,
1178
+ "LABEL_450": 450,
1179
+ "LABEL_451": 451,
1180
+ "LABEL_452": 452,
1181
+ "LABEL_453": 453,
1182
+ "LABEL_454": 454,
1183
+ "LABEL_455": 455,
1184
+ "LABEL_456": 456,
1185
+ "LABEL_457": 457,
1186
+ "LABEL_458": 458,
1187
+ "LABEL_459": 459,
1188
+ "LABEL_46": 46,
1189
+ "LABEL_460": 460,
1190
+ "LABEL_461": 461,
1191
+ "LABEL_462": 462,
1192
+ "LABEL_463": 463,
1193
+ "LABEL_464": 464,
1194
+ "LABEL_465": 465,
1195
+ "LABEL_466": 466,
1196
+ "LABEL_467": 467,
1197
+ "LABEL_468": 468,
1198
+ "LABEL_469": 469,
1199
+ "LABEL_47": 47,
1200
+ "LABEL_470": 470,
1201
+ "LABEL_471": 471,
1202
+ "LABEL_472": 472,
1203
+ "LABEL_473": 473,
1204
+ "LABEL_474": 474,
1205
+ "LABEL_475": 475,
1206
+ "LABEL_476": 476,
1207
+ "LABEL_477": 477,
1208
+ "LABEL_478": 478,
1209
+ "LABEL_479": 479,
1210
+ "LABEL_48": 48,
1211
+ "LABEL_480": 480,
1212
+ "LABEL_481": 481,
1213
+ "LABEL_482": 482,
1214
+ "LABEL_483": 483,
1215
+ "LABEL_484": 484,
1216
+ "LABEL_485": 485,
1217
+ "LABEL_486": 486,
1218
+ "LABEL_487": 487,
1219
+ "LABEL_488": 488,
1220
+ "LABEL_489": 489,
1221
+ "LABEL_49": 49,
1222
+ "LABEL_490": 490,
1223
+ "LABEL_491": 491,
1224
+ "LABEL_492": 492,
1225
+ "LABEL_493": 493,
1226
+ "LABEL_494": 494,
1227
+ "LABEL_495": 495,
1228
+ "LABEL_496": 496,
1229
+ "LABEL_497": 497,
1230
+ "LABEL_498": 498,
1231
+ "LABEL_499": 499,
1232
+ "LABEL_5": 5,
1233
+ "LABEL_50": 50,
1234
+ "LABEL_500": 500,
1235
+ "LABEL_501": 501,
1236
+ "LABEL_502": 502,
1237
+ "LABEL_503": 503,
1238
+ "LABEL_504": 504,
1239
+ "LABEL_505": 505,
1240
+ "LABEL_506": 506,
1241
+ "LABEL_507": 507,
1242
+ "LABEL_508": 508,
1243
+ "LABEL_509": 509,
1244
+ "LABEL_51": 51,
1245
+ "LABEL_510": 510,
1246
+ "LABEL_511": 511,
1247
+ "LABEL_512": 512,
1248
+ "LABEL_513": 513,
1249
+ "LABEL_514": 514,
1250
+ "LABEL_515": 515,
1251
+ "LABEL_516": 516,
1252
+ "LABEL_517": 517,
1253
+ "LABEL_518": 518,
1254
+ "LABEL_519": 519,
1255
+ "LABEL_52": 52,
1256
+ "LABEL_520": 520,
1257
+ "LABEL_521": 521,
1258
+ "LABEL_522": 522,
1259
+ "LABEL_523": 523,
1260
+ "LABEL_524": 524,
1261
+ "LABEL_525": 525,
1262
+ "LABEL_526": 526,
1263
+ "LABEL_527": 527,
1264
+ "LABEL_528": 528,
1265
+ "LABEL_529": 529,
1266
+ "LABEL_53": 53,
1267
+ "LABEL_530": 530,
1268
+ "LABEL_531": 531,
1269
+ "LABEL_532": 532,
1270
+ "LABEL_533": 533,
1271
+ "LABEL_534": 534,
1272
+ "LABEL_535": 535,
1273
+ "LABEL_536": 536,
1274
+ "LABEL_537": 537,
1275
+ "LABEL_538": 538,
1276
+ "LABEL_539": 539,
1277
+ "LABEL_54": 54,
1278
+ "LABEL_540": 540,
1279
+ "LABEL_541": 541,
1280
+ "LABEL_542": 542,
1281
+ "LABEL_543": 543,
1282
+ "LABEL_544": 544,
1283
+ "LABEL_545": 545,
1284
+ "LABEL_546": 546,
1285
+ "LABEL_547": 547,
1286
+ "LABEL_548": 548,
1287
+ "LABEL_549": 549,
1288
+ "LABEL_55": 55,
1289
+ "LABEL_550": 550,
1290
+ "LABEL_551": 551,
1291
+ "LABEL_552": 552,
1292
+ "LABEL_553": 553,
1293
+ "LABEL_554": 554,
1294
+ "LABEL_555": 555,
1295
+ "LABEL_556": 556,
1296
+ "LABEL_557": 557,
1297
+ "LABEL_558": 558,
1298
+ "LABEL_559": 559,
1299
+ "LABEL_56": 56,
1300
+ "LABEL_560": 560,
1301
+ "LABEL_561": 561,
1302
+ "LABEL_562": 562,
1303
+ "LABEL_563": 563,
1304
+ "LABEL_564": 564,
1305
+ "LABEL_565": 565,
1306
+ "LABEL_566": 566,
1307
+ "LABEL_567": 567,
1308
+ "LABEL_568": 568,
1309
+ "LABEL_569": 569,
1310
+ "LABEL_57": 57,
1311
+ "LABEL_570": 570,
1312
+ "LABEL_571": 571,
1313
+ "LABEL_572": 572,
1314
+ "LABEL_573": 573,
1315
+ "LABEL_574": 574,
1316
+ "LABEL_575": 575,
1317
+ "LABEL_576": 576,
1318
+ "LABEL_577": 577,
1319
+ "LABEL_578": 578,
1320
+ "LABEL_579": 579,
1321
+ "LABEL_58": 58,
1322
+ "LABEL_580": 580,
1323
+ "LABEL_581": 581,
1324
+ "LABEL_582": 582,
1325
+ "LABEL_583": 583,
1326
+ "LABEL_584": 584,
1327
+ "LABEL_585": 585,
1328
+ "LABEL_586": 586,
1329
+ "LABEL_587": 587,
1330
+ "LABEL_588": 588,
1331
+ "LABEL_589": 589,
1332
+ "LABEL_59": 59,
1333
+ "LABEL_590": 590,
1334
+ "LABEL_591": 591,
1335
+ "LABEL_592": 592,
1336
+ "LABEL_593": 593,
1337
+ "LABEL_594": 594,
1338
+ "LABEL_595": 595,
1339
+ "LABEL_596": 596,
1340
+ "LABEL_597": 597,
1341
+ "LABEL_598": 598,
1342
+ "LABEL_599": 599,
1343
+ "LABEL_6": 6,
1344
+ "LABEL_60": 60,
1345
+ "LABEL_600": 600,
1346
+ "LABEL_601": 601,
1347
+ "LABEL_602": 602,
1348
+ "LABEL_603": 603,
1349
+ "LABEL_604": 604,
1350
+ "LABEL_605": 605,
1351
+ "LABEL_606": 606,
1352
+ "LABEL_607": 607,
1353
+ "LABEL_608": 608,
1354
+ "LABEL_609": 609,
1355
+ "LABEL_61": 61,
1356
+ "LABEL_610": 610,
1357
+ "LABEL_611": 611,
1358
+ "LABEL_612": 612,
1359
+ "LABEL_613": 613,
1360
+ "LABEL_614": 614,
1361
+ "LABEL_615": 615,
1362
+ "LABEL_616": 616,
1363
+ "LABEL_617": 617,
1364
+ "LABEL_618": 618,
1365
+ "LABEL_619": 619,
1366
+ "LABEL_62": 62,
1367
+ "LABEL_620": 620,
1368
+ "LABEL_621": 621,
1369
+ "LABEL_622": 622,
1370
+ "LABEL_623": 623,
1371
+ "LABEL_624": 624,
1372
+ "LABEL_625": 625,
1373
+ "LABEL_626": 626,
1374
+ "LABEL_627": 627,
1375
+ "LABEL_628": 628,
1376
+ "LABEL_629": 629,
1377
+ "LABEL_63": 63,
1378
+ "LABEL_630": 630,
1379
+ "LABEL_631": 631,
1380
+ "LABEL_632": 632,
1381
+ "LABEL_633": 633,
1382
+ "LABEL_634": 634,
1383
+ "LABEL_635": 635,
1384
+ "LABEL_636": 636,
1385
+ "LABEL_637": 637,
1386
+ "LABEL_638": 638,
1387
+ "LABEL_639": 639,
1388
+ "LABEL_64": 64,
1389
+ "LABEL_640": 640,
1390
+ "LABEL_641": 641,
1391
+ "LABEL_642": 642,
1392
+ "LABEL_643": 643,
1393
+ "LABEL_644": 644,
1394
+ "LABEL_645": 645,
1395
+ "LABEL_646": 646,
1396
+ "LABEL_647": 647,
1397
+ "LABEL_648": 648,
1398
+ "LABEL_649": 649,
1399
+ "LABEL_65": 65,
1400
+ "LABEL_650": 650,
1401
+ "LABEL_651": 651,
1402
+ "LABEL_652": 652,
1403
+ "LABEL_653": 653,
1404
+ "LABEL_654": 654,
1405
+ "LABEL_655": 655,
1406
+ "LABEL_656": 656,
1407
+ "LABEL_657": 657,
1408
+ "LABEL_658": 658,
1409
+ "LABEL_659": 659,
1410
+ "LABEL_66": 66,
1411
+ "LABEL_660": 660,
1412
+ "LABEL_661": 661,
1413
+ "LABEL_662": 662,
1414
+ "LABEL_663": 663,
1415
+ "LABEL_664": 664,
1416
+ "LABEL_665": 665,
1417
+ "LABEL_666": 666,
1418
+ "LABEL_667": 667,
1419
+ "LABEL_668": 668,
1420
+ "LABEL_669": 669,
1421
+ "LABEL_67": 67,
1422
+ "LABEL_670": 670,
1423
+ "LABEL_671": 671,
1424
+ "LABEL_672": 672,
1425
+ "LABEL_673": 673,
1426
+ "LABEL_674": 674,
1427
+ "LABEL_675": 675,
1428
+ "LABEL_676": 676,
1429
+ "LABEL_677": 677,
1430
+ "LABEL_678": 678,
1431
+ "LABEL_679": 679,
1432
+ "LABEL_68": 68,
1433
+ "LABEL_680": 680,
1434
+ "LABEL_681": 681,
1435
+ "LABEL_682": 682,
1436
+ "LABEL_683": 683,
1437
+ "LABEL_684": 684,
1438
+ "LABEL_685": 685,
1439
+ "LABEL_686": 686,
1440
+ "LABEL_687": 687,
1441
+ "LABEL_688": 688,
1442
+ "LABEL_689": 689,
1443
+ "LABEL_69": 69,
1444
+ "LABEL_690": 690,
1445
+ "LABEL_691": 691,
1446
+ "LABEL_692": 692,
1447
+ "LABEL_693": 693,
1448
+ "LABEL_694": 694,
1449
+ "LABEL_695": 695,
1450
+ "LABEL_696": 696,
1451
+ "LABEL_697": 697,
1452
+ "LABEL_698": 698,
1453
+ "LABEL_699": 699,
1454
+ "LABEL_7": 7,
1455
+ "LABEL_70": 70,
1456
+ "LABEL_700": 700,
1457
+ "LABEL_701": 701,
1458
+ "LABEL_702": 702,
1459
+ "LABEL_703": 703,
1460
+ "LABEL_704": 704,
1461
+ "LABEL_705": 705,
1462
+ "LABEL_706": 706,
1463
+ "LABEL_707": 707,
1464
+ "LABEL_708": 708,
1465
+ "LABEL_709": 709,
1466
+ "LABEL_71": 71,
1467
+ "LABEL_710": 710,
1468
+ "LABEL_711": 711,
1469
+ "LABEL_712": 712,
1470
+ "LABEL_713": 713,
1471
+ "LABEL_714": 714,
1472
+ "LABEL_715": 715,
1473
+ "LABEL_716": 716,
1474
+ "LABEL_717": 717,
1475
+ "LABEL_718": 718,
1476
+ "LABEL_719": 719,
1477
+ "LABEL_72": 72,
1478
+ "LABEL_720": 720,
1479
+ "LABEL_721": 721,
1480
+ "LABEL_722": 722,
1481
+ "LABEL_723": 723,
1482
+ "LABEL_724": 724,
1483
+ "LABEL_725": 725,
1484
+ "LABEL_726": 726,
1485
+ "LABEL_727": 727,
1486
+ "LABEL_728": 728,
1487
+ "LABEL_729": 729,
1488
+ "LABEL_73": 73,
1489
+ "LABEL_730": 730,
1490
+ "LABEL_731": 731,
1491
+ "LABEL_732": 732,
1492
+ "LABEL_733": 733,
1493
+ "LABEL_734": 734,
1494
+ "LABEL_735": 735,
1495
+ "LABEL_736": 736,
1496
+ "LABEL_737": 737,
1497
+ "LABEL_738": 738,
1498
+ "LABEL_739": 739,
1499
+ "LABEL_74": 74,
1500
+ "LABEL_740": 740,
1501
+ "LABEL_741": 741,
1502
+ "LABEL_742": 742,
1503
+ "LABEL_743": 743,
1504
+ "LABEL_744": 744,
1505
+ "LABEL_745": 745,
1506
+ "LABEL_746": 746,
1507
+ "LABEL_747": 747,
1508
+ "LABEL_748": 748,
1509
+ "LABEL_749": 749,
1510
+ "LABEL_75": 75,
1511
+ "LABEL_750": 750,
1512
+ "LABEL_751": 751,
1513
+ "LABEL_752": 752,
1514
+ "LABEL_753": 753,
1515
+ "LABEL_754": 754,
1516
+ "LABEL_755": 755,
1517
+ "LABEL_756": 756,
1518
+ "LABEL_757": 757,
1519
+ "LABEL_758": 758,
1520
+ "LABEL_759": 759,
1521
+ "LABEL_76": 76,
1522
+ "LABEL_760": 760,
1523
+ "LABEL_761": 761,
1524
+ "LABEL_762": 762,
1525
+ "LABEL_763": 763,
1526
+ "LABEL_764": 764,
1527
+ "LABEL_765": 765,
1528
+ "LABEL_766": 766,
1529
+ "LABEL_767": 767,
1530
+ "LABEL_77": 77,
1531
+ "LABEL_78": 78,
1532
+ "LABEL_79": 79,
1533
+ "LABEL_8": 8,
1534
+ "LABEL_80": 80,
1535
+ "LABEL_81": 81,
1536
+ "LABEL_82": 82,
1537
+ "LABEL_83": 83,
1538
+ "LABEL_84": 84,
1539
+ "LABEL_85": 85,
1540
+ "LABEL_86": 86,
1541
+ "LABEL_87": 87,
1542
+ "LABEL_88": 88,
1543
+ "LABEL_89": 89,
1544
+ "LABEL_9": 9,
1545
+ "LABEL_90": 90,
1546
+ "LABEL_91": 91,
1547
+ "LABEL_92": 92,
1548
+ "LABEL_93": 93,
1549
+ "LABEL_94": 94,
1550
+ "LABEL_95": 95,
1551
+ "LABEL_96": 96,
1552
+ "LABEL_97": 97,
1553
+ "LABEL_98": 98,
1554
+ "LABEL_99": 99
1555
+ },
1556
+ "layer_norm_eps": 1e-12,
1557
+ "max_position_embeddings": 512,
1558
+ "model_type": "bert",
1559
+ "num_attention_heads": 16,
1560
+ "num_hidden_layers": 24,
1561
+ "output_past": true,
1562
+ "pad_token_id": 0,
1563
+ "pooler_fc_size": 768,
1564
+ "pooler_num_attention_heads": 12,
1565
+ "pooler_num_fc_layers": 3,
1566
+ "pooler_size_per_head": 128,
1567
+ "pooler_type": "first_token_transform",
1568
+ "position_embedding_type": "absolute",
1569
+ "torch_dtype": "float32",
1570
+ "transformers_version": "4.28.1",
1571
+ "type_vocab_size": 2,
1572
+ "use_cache": true,
1573
+ "vocab_size": 21128
1574
+ }
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e6007b52cca5c1a6c73188f0d10bd63757cf9d44291697bf2969f901d49e55b
3
+ size 2617006341
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f97ebdf0199e3f56fac1a6cd07be8eaf6d5691961db60dbadf765819c97966ab
3
+ size 1308526197
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d551da60d4ba08fa8418f2fffd141659d740e22c8ad473117ef00bbc12af5155
3
+ size 627
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "clean_up_tokenization_spaces": true,
3
+ "cls_token": "[CLS]",
4
+ "do_basic_tokenize": true,
5
+ "do_lower_case": true,
6
+ "mask_token": "[MASK]",
7
+ "model_max_length": 1000000000000000019884624838656,
8
+ "never_split": null,
9
+ "pad_token": "[PAD]",
10
+ "sep_token": "[SEP]",
11
+ "strip_accents": null,
12
+ "tokenize_chinese_chars": true,
13
+ "tokenizer_class": "BertTokenizer",
14
+ "unk_token": "[UNK]"
15
+ }
trainer_state.json ADDED
@@ -0,0 +1,1886 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.8760766121467434,
5
+ "global_step": 110000,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.01,
12
+ "learning_rate": 2.97441713710709e-05,
13
+ "loss": 0.0001,
14
+ "step": 500
15
+ },
16
+ {
17
+ "epoch": 0.02,
18
+ "learning_rate": 2.94883427421418e-05,
19
+ "loss": 0.0001,
20
+ "step": 1000
21
+ },
22
+ {
23
+ "epoch": 0.02,
24
+ "eval_stsb_spearman": 0.779150634149958,
25
+ "step": 1000
26
+ },
27
+ {
28
+ "epoch": 0.03,
29
+ "learning_rate": 2.9232514113212696e-05,
30
+ "loss": 0.0001,
31
+ "step": 1500
32
+ },
33
+ {
34
+ "epoch": 0.03,
35
+ "learning_rate": 2.8976685484283595e-05,
36
+ "loss": 0.0001,
37
+ "step": 2000
38
+ },
39
+ {
40
+ "epoch": 0.03,
41
+ "eval_stsb_spearman": 0.7742037737128947,
42
+ "step": 2000
43
+ },
44
+ {
45
+ "epoch": 0.04,
46
+ "learning_rate": 2.8720856855354495e-05,
47
+ "loss": 0.0001,
48
+ "step": 2500
49
+ },
50
+ {
51
+ "epoch": 0.05,
52
+ "learning_rate": 2.8465028226425394e-05,
53
+ "loss": 0.0001,
54
+ "step": 3000
55
+ },
56
+ {
57
+ "epoch": 0.05,
58
+ "eval_stsb_spearman": 0.7762138392811366,
59
+ "step": 3000
60
+ },
61
+ {
62
+ "epoch": 0.06,
63
+ "learning_rate": 2.8209199597496294e-05,
64
+ "loss": 0.0001,
65
+ "step": 3500
66
+ },
67
+ {
68
+ "epoch": 0.07,
69
+ "learning_rate": 2.795337096856719e-05,
70
+ "loss": 0.0001,
71
+ "step": 4000
72
+ },
73
+ {
74
+ "epoch": 0.07,
75
+ "eval_stsb_spearman": 0.7749345657870104,
76
+ "step": 4000
77
+ },
78
+ {
79
+ "epoch": 0.08,
80
+ "learning_rate": 2.769754233963809e-05,
81
+ "loss": 0.0001,
82
+ "step": 4500
83
+ },
84
+ {
85
+ "epoch": 0.09,
86
+ "learning_rate": 2.7441713710708985e-05,
87
+ "loss": 0.0001,
88
+ "step": 5000
89
+ },
90
+ {
91
+ "epoch": 0.09,
92
+ "eval_stsb_spearman": 0.7707595907216558,
93
+ "step": 5000
94
+ },
95
+ {
96
+ "epoch": 0.09,
97
+ "learning_rate": 2.7185885081779885e-05,
98
+ "loss": 0.0001,
99
+ "step": 5500
100
+ },
101
+ {
102
+ "epoch": 0.1,
103
+ "learning_rate": 2.6930056452850784e-05,
104
+ "loss": 0.0001,
105
+ "step": 6000
106
+ },
107
+ {
108
+ "epoch": 0.1,
109
+ "eval_stsb_spearman": 0.7749981304786546,
110
+ "step": 6000
111
+ },
112
+ {
113
+ "epoch": 0.11,
114
+ "learning_rate": 2.667422782392168e-05,
115
+ "loss": 0.0001,
116
+ "step": 6500
117
+ },
118
+ {
119
+ "epoch": 0.12,
120
+ "learning_rate": 2.641839919499258e-05,
121
+ "loss": 0.0001,
122
+ "step": 7000
123
+ },
124
+ {
125
+ "epoch": 0.12,
126
+ "eval_stsb_spearman": 0.7738172782882184,
127
+ "step": 7000
128
+ },
129
+ {
130
+ "epoch": 0.13,
131
+ "learning_rate": 2.616257056606348e-05,
132
+ "loss": 0.0001,
133
+ "step": 7500
134
+ },
135
+ {
136
+ "epoch": 0.14,
137
+ "learning_rate": 2.590674193713438e-05,
138
+ "loss": 0.0001,
139
+ "step": 8000
140
+ },
141
+ {
142
+ "epoch": 0.14,
143
+ "eval_stsb_spearman": 0.7733704832120436,
144
+ "step": 8000
145
+ },
146
+ {
147
+ "epoch": 0.14,
148
+ "learning_rate": 2.565091330820528e-05,
149
+ "loss": 0.0001,
150
+ "step": 8500
151
+ },
152
+ {
153
+ "epoch": 0.15,
154
+ "learning_rate": 2.5395084679276174e-05,
155
+ "loss": 0.0001,
156
+ "step": 9000
157
+ },
158
+ {
159
+ "epoch": 0.15,
160
+ "eval_stsb_spearman": 0.7775208072571205,
161
+ "step": 9000
162
+ },
163
+ {
164
+ "epoch": 0.16,
165
+ "learning_rate": 2.5139256050347074e-05,
166
+ "loss": 0.0001,
167
+ "step": 9500
168
+ },
169
+ {
170
+ "epoch": 0.17,
171
+ "learning_rate": 2.4883427421417973e-05,
172
+ "loss": 0.0001,
173
+ "step": 10000
174
+ },
175
+ {
176
+ "epoch": 0.17,
177
+ "eval_stsb_spearman": 0.772225753495311,
178
+ "step": 10000
179
+ },
180
+ {
181
+ "epoch": 0.18,
182
+ "learning_rate": 2.4627598792488873e-05,
183
+ "loss": 0.0001,
184
+ "step": 10500
185
+ },
186
+ {
187
+ "epoch": 0.19,
188
+ "learning_rate": 2.4371770163559772e-05,
189
+ "loss": 0.0001,
190
+ "step": 11000
191
+ },
192
+ {
193
+ "epoch": 0.19,
194
+ "eval_stsb_spearman": 0.7727032901042289,
195
+ "step": 11000
196
+ },
197
+ {
198
+ "epoch": 0.2,
199
+ "learning_rate": 2.411594153463067e-05,
200
+ "loss": 0.0001,
201
+ "step": 11500
202
+ },
203
+ {
204
+ "epoch": 0.2,
205
+ "learning_rate": 2.3860112905701568e-05,
206
+ "loss": 0.0001,
207
+ "step": 12000
208
+ },
209
+ {
210
+ "epoch": 0.2,
211
+ "eval_stsb_spearman": 0.7753943735719158,
212
+ "step": 12000
213
+ },
214
+ {
215
+ "epoch": 0.21,
216
+ "learning_rate": 2.3604284276772467e-05,
217
+ "loss": 0.0001,
218
+ "step": 12500
219
+ },
220
+ {
221
+ "epoch": 0.22,
222
+ "learning_rate": 2.3348455647843367e-05,
223
+ "loss": 0.0001,
224
+ "step": 13000
225
+ },
226
+ {
227
+ "epoch": 0.22,
228
+ "eval_stsb_spearman": 0.7730675807188958,
229
+ "step": 13000
230
+ },
231
+ {
232
+ "epoch": 0.23,
233
+ "learning_rate": 2.3092627018914266e-05,
234
+ "loss": 0.0001,
235
+ "step": 13500
236
+ },
237
+ {
238
+ "epoch": 0.24,
239
+ "learning_rate": 2.2836798389985162e-05,
240
+ "loss": 0.0001,
241
+ "step": 14000
242
+ },
243
+ {
244
+ "epoch": 0.24,
245
+ "eval_stsb_spearman": 0.7781803089941816,
246
+ "step": 14000
247
+ },
248
+ {
249
+ "epoch": 0.25,
250
+ "learning_rate": 2.2580969761056062e-05,
251
+ "loss": 0.0001,
252
+ "step": 14500
253
+ },
254
+ {
255
+ "epoch": 0.26,
256
+ "learning_rate": 2.2325141132126958e-05,
257
+ "loss": 0.0001,
258
+ "step": 15000
259
+ },
260
+ {
261
+ "epoch": 0.26,
262
+ "eval_stsb_spearman": 0.7760834463511375,
263
+ "step": 15000
264
+ },
265
+ {
266
+ "epoch": 0.26,
267
+ "learning_rate": 2.2069312503197857e-05,
268
+ "loss": 0.0001,
269
+ "step": 15500
270
+ },
271
+ {
272
+ "epoch": 0.27,
273
+ "learning_rate": 2.1813483874268757e-05,
274
+ "loss": 0.0001,
275
+ "step": 16000
276
+ },
277
+ {
278
+ "epoch": 0.27,
279
+ "eval_stsb_spearman": 0.7766114476167615,
280
+ "step": 16000
281
+ },
282
+ {
283
+ "epoch": 0.28,
284
+ "learning_rate": 2.1557655245339653e-05,
285
+ "loss": 0.0001,
286
+ "step": 16500
287
+ },
288
+ {
289
+ "epoch": 0.29,
290
+ "learning_rate": 2.1301826616410553e-05,
291
+ "loss": 0.0001,
292
+ "step": 17000
293
+ },
294
+ {
295
+ "epoch": 0.29,
296
+ "eval_stsb_spearman": 0.774197452942763,
297
+ "step": 17000
298
+ },
299
+ {
300
+ "epoch": 0.3,
301
+ "learning_rate": 2.1045997987481452e-05,
302
+ "loss": 0.0001,
303
+ "step": 17500
304
+ },
305
+ {
306
+ "epoch": 0.31,
307
+ "learning_rate": 2.079016935855235e-05,
308
+ "loss": 0.0001,
309
+ "step": 18000
310
+ },
311
+ {
312
+ "epoch": 0.31,
313
+ "eval_stsb_spearman": 0.7747344951319819,
314
+ "step": 18000
315
+ },
316
+ {
317
+ "epoch": 0.32,
318
+ "learning_rate": 2.053434072962325e-05,
319
+ "loss": 0.0001,
320
+ "step": 18500
321
+ },
322
+ {
323
+ "epoch": 0.32,
324
+ "learning_rate": 2.0278512100694147e-05,
325
+ "loss": 0.0001,
326
+ "step": 19000
327
+ },
328
+ {
329
+ "epoch": 0.32,
330
+ "eval_stsb_spearman": 0.7734231292337955,
331
+ "step": 19000
332
+ },
333
+ {
334
+ "epoch": 0.33,
335
+ "learning_rate": 2.0022683471765047e-05,
336
+ "loss": 0.0001,
337
+ "step": 19500
338
+ },
339
+ {
340
+ "epoch": 0.34,
341
+ "learning_rate": 1.9766854842835946e-05,
342
+ "loss": 0.0001,
343
+ "step": 20000
344
+ },
345
+ {
346
+ "epoch": 0.34,
347
+ "eval_stsb_spearman": 0.7711219149130829,
348
+ "step": 20000
349
+ },
350
+ {
351
+ "epoch": 0.35,
352
+ "learning_rate": 1.9511026213906845e-05,
353
+ "loss": 0.0001,
354
+ "step": 20500
355
+ },
356
+ {
357
+ "epoch": 0.36,
358
+ "learning_rate": 1.9255197584977745e-05,
359
+ "loss": 0.0001,
360
+ "step": 21000
361
+ },
362
+ {
363
+ "epoch": 0.36,
364
+ "eval_stsb_spearman": 0.7764032943131054,
365
+ "step": 21000
366
+ },
367
+ {
368
+ "epoch": 0.37,
369
+ "learning_rate": 1.8999368956048644e-05,
370
+ "loss": 0.0001,
371
+ "step": 21500
372
+ },
373
+ {
374
+ "epoch": 0.38,
375
+ "learning_rate": 1.874354032711954e-05,
376
+ "loss": 0.0001,
377
+ "step": 22000
378
+ },
379
+ {
380
+ "epoch": 0.38,
381
+ "eval_stsb_spearman": 0.7728760022631344,
382
+ "step": 22000
383
+ },
384
+ {
385
+ "epoch": 0.38,
386
+ "learning_rate": 1.848771169819044e-05,
387
+ "loss": 0.0001,
388
+ "step": 22500
389
+ },
390
+ {
391
+ "epoch": 0.39,
392
+ "learning_rate": 1.823188306926134e-05,
393
+ "loss": 0.0001,
394
+ "step": 23000
395
+ },
396
+ {
397
+ "epoch": 0.39,
398
+ "eval_stsb_spearman": 0.775568059169046,
399
+ "step": 23000
400
+ },
401
+ {
402
+ "epoch": 0.4,
403
+ "learning_rate": 1.797605444033224e-05,
404
+ "loss": 0.0001,
405
+ "step": 23500
406
+ },
407
+ {
408
+ "epoch": 0.41,
409
+ "learning_rate": 1.772022581140314e-05,
410
+ "loss": 0.0001,
411
+ "step": 24000
412
+ },
413
+ {
414
+ "epoch": 0.41,
415
+ "eval_stsb_spearman": 0.7716868251335787,
416
+ "step": 24000
417
+ },
418
+ {
419
+ "epoch": 0.42,
420
+ "learning_rate": 1.746439718247403e-05,
421
+ "loss": 0.0001,
422
+ "step": 24500
423
+ },
424
+ {
425
+ "epoch": 0.43,
426
+ "learning_rate": 1.720856855354493e-05,
427
+ "loss": 0.0001,
428
+ "step": 25000
429
+ },
430
+ {
431
+ "epoch": 0.43,
432
+ "eval_stsb_spearman": 0.7752166203205634,
433
+ "step": 25000
434
+ },
435
+ {
436
+ "epoch": 0.43,
437
+ "learning_rate": 1.695273992461583e-05,
438
+ "loss": 0.0,
439
+ "step": 25500
440
+ },
441
+ {
442
+ "epoch": 0.44,
443
+ "learning_rate": 1.669691129568673e-05,
444
+ "loss": 0.0,
445
+ "step": 26000
446
+ },
447
+ {
448
+ "epoch": 0.44,
449
+ "eval_stsb_spearman": 0.7722613478318026,
450
+ "step": 26000
451
+ },
452
+ {
453
+ "epoch": 0.45,
454
+ "learning_rate": 1.644108266675763e-05,
455
+ "loss": 0.0,
456
+ "step": 26500
457
+ },
458
+ {
459
+ "epoch": 0.46,
460
+ "learning_rate": 1.6185254037828525e-05,
461
+ "loss": 0.0001,
462
+ "step": 27000
463
+ },
464
+ {
465
+ "epoch": 0.46,
466
+ "eval_stsb_spearman": 0.7746778882608947,
467
+ "step": 27000
468
+ },
469
+ {
470
+ "epoch": 0.47,
471
+ "learning_rate": 1.5929425408899425e-05,
472
+ "loss": 0.0,
473
+ "step": 27500
474
+ },
475
+ {
476
+ "epoch": 0.48,
477
+ "learning_rate": 1.5673596779970324e-05,
478
+ "loss": 0.0,
479
+ "step": 28000
480
+ },
481
+ {
482
+ "epoch": 0.48,
483
+ "eval_stsb_spearman": 0.7744804439612313,
484
+ "step": 28000
485
+ },
486
+ {
487
+ "epoch": 0.49,
488
+ "learning_rate": 1.5417768151041224e-05,
489
+ "loss": 0.0,
490
+ "step": 28500
491
+ },
492
+ {
493
+ "epoch": 0.49,
494
+ "learning_rate": 1.5161939522112121e-05,
495
+ "loss": 0.0,
496
+ "step": 29000
497
+ },
498
+ {
499
+ "epoch": 0.49,
500
+ "eval_stsb_spearman": 0.7720634386643836,
501
+ "step": 29000
502
+ },
503
+ {
504
+ "epoch": 0.5,
505
+ "learning_rate": 1.4906110893183021e-05,
506
+ "loss": 0.0001,
507
+ "step": 29500
508
+ },
509
+ {
510
+ "epoch": 0.51,
511
+ "learning_rate": 1.4650282264253919e-05,
512
+ "loss": 0.0,
513
+ "step": 30000
514
+ },
515
+ {
516
+ "epoch": 0.51,
517
+ "eval_stsb_spearman": 0.776464481420812,
518
+ "step": 30000
519
+ },
520
+ {
521
+ "epoch": 0.52,
522
+ "learning_rate": 1.4394453635324818e-05,
523
+ "loss": 0.0,
524
+ "step": 30500
525
+ },
526
+ {
527
+ "epoch": 0.53,
528
+ "learning_rate": 1.4138625006395718e-05,
529
+ "loss": 0.0,
530
+ "step": 31000
531
+ },
532
+ {
533
+ "epoch": 0.53,
534
+ "eval_stsb_spearman": 0.7731371111367188,
535
+ "step": 31000
536
+ },
537
+ {
538
+ "epoch": 0.54,
539
+ "learning_rate": 1.3882796377466614e-05,
540
+ "loss": 0.0,
541
+ "step": 31500
542
+ },
543
+ {
544
+ "epoch": 0.55,
545
+ "learning_rate": 1.3626967748537513e-05,
546
+ "loss": 0.0,
547
+ "step": 32000
548
+ },
549
+ {
550
+ "epoch": 0.55,
551
+ "eval_stsb_spearman": 0.7710878430175343,
552
+ "step": 32000
553
+ },
554
+ {
555
+ "epoch": 0.55,
556
+ "learning_rate": 1.3371139119608411e-05,
557
+ "loss": 0.0,
558
+ "step": 32500
559
+ },
560
+ {
561
+ "epoch": 0.56,
562
+ "learning_rate": 1.311531049067931e-05,
563
+ "loss": 0.0,
564
+ "step": 33000
565
+ },
566
+ {
567
+ "epoch": 0.56,
568
+ "eval_stsb_spearman": 0.7752104766273047,
569
+ "step": 33000
570
+ },
571
+ {
572
+ "epoch": 0.57,
573
+ "learning_rate": 1.285948186175021e-05,
574
+ "loss": 0.0,
575
+ "step": 33500
576
+ },
577
+ {
578
+ "epoch": 0.58,
579
+ "learning_rate": 1.2603653232821108e-05,
580
+ "loss": 0.0,
581
+ "step": 34000
582
+ },
583
+ {
584
+ "epoch": 0.58,
585
+ "eval_stsb_spearman": 0.7749487602603908,
586
+ "step": 34000
587
+ },
588
+ {
589
+ "epoch": 0.59,
590
+ "learning_rate": 1.2347824603892007e-05,
591
+ "loss": 0.0,
592
+ "step": 34500
593
+ },
594
+ {
595
+ "epoch": 0.6,
596
+ "learning_rate": 1.2091995974962905e-05,
597
+ "loss": 0.0,
598
+ "step": 35000
599
+ },
600
+ {
601
+ "epoch": 0.6,
602
+ "eval_stsb_spearman": 0.7769257731613575,
603
+ "step": 35000
604
+ },
605
+ {
606
+ "epoch": 0.61,
607
+ "learning_rate": 1.1836167346033804e-05,
608
+ "loss": 0.0,
609
+ "step": 35500
610
+ },
611
+ {
612
+ "epoch": 0.61,
613
+ "learning_rate": 1.1580338717104704e-05,
614
+ "loss": 0.0,
615
+ "step": 36000
616
+ },
617
+ {
618
+ "epoch": 0.61,
619
+ "eval_stsb_spearman": 0.7756559730014233,
620
+ "step": 36000
621
+ },
622
+ {
623
+ "epoch": 0.62,
624
+ "learning_rate": 1.13245100881756e-05,
625
+ "loss": 0.0,
626
+ "step": 36500
627
+ },
628
+ {
629
+ "epoch": 0.63,
630
+ "learning_rate": 1.10686814592465e-05,
631
+ "loss": 0.0,
632
+ "step": 37000
633
+ },
634
+ {
635
+ "epoch": 0.63,
636
+ "eval_stsb_spearman": 0.7745848587301726,
637
+ "step": 37000
638
+ },
639
+ {
640
+ "epoch": 0.64,
641
+ "learning_rate": 1.0812852830317397e-05,
642
+ "loss": 0.0,
643
+ "step": 37500
644
+ },
645
+ {
646
+ "epoch": 0.65,
647
+ "learning_rate": 1.0557024201388297e-05,
648
+ "loss": 0.0,
649
+ "step": 38000
650
+ },
651
+ {
652
+ "epoch": 0.65,
653
+ "eval_stsb_spearman": 0.7721647051923735,
654
+ "step": 38000
655
+ },
656
+ {
657
+ "epoch": 0.66,
658
+ "learning_rate": 1.0301195572459196e-05,
659
+ "loss": 0.0,
660
+ "step": 38500
661
+ },
662
+ {
663
+ "epoch": 0.67,
664
+ "learning_rate": 1.0045366943530094e-05,
665
+ "loss": 0.0,
666
+ "step": 39000
667
+ },
668
+ {
669
+ "epoch": 0.67,
670
+ "eval_stsb_spearman": 0.7742379627015131,
671
+ "step": 39000
672
+ },
673
+ {
674
+ "epoch": 0.67,
675
+ "learning_rate": 9.789538314600993e-06,
676
+ "loss": 0.0,
677
+ "step": 39500
678
+ },
679
+ {
680
+ "epoch": 0.68,
681
+ "learning_rate": 9.533709685671891e-06,
682
+ "loss": 0.0,
683
+ "step": 40000
684
+ },
685
+ {
686
+ "epoch": 0.68,
687
+ "eval_stsb_spearman": 0.7729155725915814,
688
+ "step": 40000
689
+ },
690
+ {
691
+ "epoch": 0.69,
692
+ "learning_rate": 9.27788105674279e-06,
693
+ "loss": 0.0,
694
+ "step": 40500
695
+ },
696
+ {
697
+ "epoch": 0.7,
698
+ "learning_rate": 9.02205242781369e-06,
699
+ "loss": 0.0,
700
+ "step": 41000
701
+ },
702
+ {
703
+ "epoch": 0.7,
704
+ "eval_stsb_spearman": 0.7731093489515521,
705
+ "step": 41000
706
+ },
707
+ {
708
+ "epoch": 0.71,
709
+ "learning_rate": 8.766223798884586e-06,
710
+ "loss": 0.0,
711
+ "step": 41500
712
+ },
713
+ {
714
+ "epoch": 0.72,
715
+ "learning_rate": 8.510395169955486e-06,
716
+ "loss": 0.0,
717
+ "step": 42000
718
+ },
719
+ {
720
+ "epoch": 0.72,
721
+ "eval_stsb_spearman": 0.776349877698032,
722
+ "step": 42000
723
+ },
724
+ {
725
+ "epoch": 0.72,
726
+ "learning_rate": 8.254566541026384e-06,
727
+ "loss": 0.0,
728
+ "step": 42500
729
+ },
730
+ {
731
+ "epoch": 0.73,
732
+ "learning_rate": 7.998737912097283e-06,
733
+ "loss": 0.0,
734
+ "step": 43000
735
+ },
736
+ {
737
+ "epoch": 0.73,
738
+ "eval_stsb_spearman": 0.7742966440865845,
739
+ "step": 43000
740
+ },
741
+ {
742
+ "epoch": 0.74,
743
+ "learning_rate": 7.742909283168183e-06,
744
+ "loss": 0.0,
745
+ "step": 43500
746
+ },
747
+ {
748
+ "epoch": 0.75,
749
+ "learning_rate": 7.48708065423908e-06,
750
+ "loss": 0.0,
751
+ "step": 44000
752
+ },
753
+ {
754
+ "epoch": 0.75,
755
+ "eval_stsb_spearman": 0.776391111019895,
756
+ "step": 44000
757
+ },
758
+ {
759
+ "epoch": 0.76,
760
+ "learning_rate": 7.23125202530998e-06,
761
+ "loss": 0.0,
762
+ "step": 44500
763
+ },
764
+ {
765
+ "epoch": 0.77,
766
+ "learning_rate": 6.975423396380878e-06,
767
+ "loss": 0.0,
768
+ "step": 45000
769
+ },
770
+ {
771
+ "epoch": 0.77,
772
+ "eval_stsb_spearman": 0.7725475338449702,
773
+ "step": 45000
774
+ },
775
+ {
776
+ "epoch": 0.78,
777
+ "learning_rate": 6.719594767451776e-06,
778
+ "loss": 0.0,
779
+ "step": 45500
780
+ },
781
+ {
782
+ "epoch": 0.78,
783
+ "learning_rate": 6.463766138522675e-06,
784
+ "loss": 0.0,
785
+ "step": 46000
786
+ },
787
+ {
788
+ "epoch": 0.78,
789
+ "eval_stsb_spearman": 0.7748880311631935,
790
+ "step": 46000
791
+ },
792
+ {
793
+ "epoch": 0.79,
794
+ "learning_rate": 6.207937509593574e-06,
795
+ "loss": 0.0,
796
+ "step": 46500
797
+ },
798
+ {
799
+ "epoch": 0.8,
800
+ "learning_rate": 5.952108880664473e-06,
801
+ "loss": 0.0,
802
+ "step": 47000
803
+ },
804
+ {
805
+ "epoch": 0.8,
806
+ "eval_stsb_spearman": 0.7736924744692966,
807
+ "step": 47000
808
+ },
809
+ {
810
+ "epoch": 0.81,
811
+ "learning_rate": 5.696280251735371e-06,
812
+ "loss": 0.0,
813
+ "step": 47500
814
+ },
815
+ {
816
+ "epoch": 0.82,
817
+ "learning_rate": 5.440451622806269e-06,
818
+ "loss": 0.0,
819
+ "step": 48000
820
+ },
821
+ {
822
+ "epoch": 0.82,
823
+ "eval_stsb_spearman": 0.7725938501752172,
824
+ "step": 48000
825
+ },
826
+ {
827
+ "epoch": 0.83,
828
+ "learning_rate": 5.184622993877168e-06,
829
+ "loss": 0.0,
830
+ "step": 48500
831
+ },
832
+ {
833
+ "epoch": 0.84,
834
+ "learning_rate": 4.9287943649480675e-06,
835
+ "loss": 0.0,
836
+ "step": 49000
837
+ },
838
+ {
839
+ "epoch": 0.84,
840
+ "eval_stsb_spearman": 0.7746088551214558,
841
+ "step": 49000
842
+ },
843
+ {
844
+ "epoch": 0.84,
845
+ "learning_rate": 4.672965736018966e-06,
846
+ "loss": 0.0,
847
+ "step": 49500
848
+ },
849
+ {
850
+ "epoch": 0.85,
851
+ "learning_rate": 4.417137107089864e-06,
852
+ "loss": 0.0,
853
+ "step": 50000
854
+ },
855
+ {
856
+ "epoch": 0.85,
857
+ "eval_stsb_spearman": 0.7742809440881452,
858
+ "step": 50000
859
+ },
860
+ {
861
+ "epoch": 0.86,
862
+ "learning_rate": 1.7080654239080384e-05,
863
+ "loss": 0.0001,
864
+ "step": 50500
865
+ },
866
+ {
867
+ "epoch": 0.87,
868
+ "learning_rate": 1.695273992461583e-05,
869
+ "loss": 0.0026,
870
+ "step": 51000
871
+ },
872
+ {
873
+ "epoch": 0.87,
874
+ "eval_stsb_spearman": 0.7683574925666393,
875
+ "step": 51000
876
+ },
877
+ {
878
+ "epoch": 0.88,
879
+ "learning_rate": 1.682482561015128e-05,
880
+ "loss": 0.0023,
881
+ "step": 51500
882
+ },
883
+ {
884
+ "epoch": 0.89,
885
+ "learning_rate": 1.669691129568673e-05,
886
+ "loss": 0.002,
887
+ "step": 52000
888
+ },
889
+ {
890
+ "epoch": 0.89,
891
+ "eval_stsb_spearman": 0.7751801343996144,
892
+ "step": 52000
893
+ },
894
+ {
895
+ "epoch": 0.9,
896
+ "learning_rate": 1.656899698122218e-05,
897
+ "loss": 0.0021,
898
+ "step": 52500
899
+ },
900
+ {
901
+ "epoch": 0.9,
902
+ "learning_rate": 1.644108266675763e-05,
903
+ "loss": 0.002,
904
+ "step": 53000
905
+ },
906
+ {
907
+ "epoch": 0.9,
908
+ "eval_stsb_spearman": 0.7706796437720312,
909
+ "step": 53000
910
+ },
911
+ {
912
+ "epoch": 0.91,
913
+ "learning_rate": 1.631316835229308e-05,
914
+ "loss": 0.002,
915
+ "step": 53500
916
+ },
917
+ {
918
+ "epoch": 0.92,
919
+ "learning_rate": 1.6185254037828525e-05,
920
+ "loss": 0.0018,
921
+ "step": 54000
922
+ },
923
+ {
924
+ "epoch": 0.92,
925
+ "eval_stsb_spearman": 0.7756891246212327,
926
+ "step": 54000
927
+ },
928
+ {
929
+ "epoch": 0.93,
930
+ "learning_rate": 1.6057339723363978e-05,
931
+ "loss": 0.0018,
932
+ "step": 54500
933
+ },
934
+ {
935
+ "epoch": 0.94,
936
+ "learning_rate": 1.5929425408899425e-05,
937
+ "loss": 0.0017,
938
+ "step": 55000
939
+ },
940
+ {
941
+ "epoch": 0.94,
942
+ "eval_stsb_spearman": 0.7752558955538417,
943
+ "step": 55000
944
+ },
945
+ {
946
+ "epoch": 0.95,
947
+ "learning_rate": 1.5801511094434878e-05,
948
+ "loss": 0.002,
949
+ "step": 55500
950
+ },
951
+ {
952
+ "epoch": 0.96,
953
+ "learning_rate": 1.5673596779970324e-05,
954
+ "loss": 0.0019,
955
+ "step": 56000
956
+ },
957
+ {
958
+ "epoch": 0.96,
959
+ "eval_stsb_spearman": 0.780701712591289,
960
+ "step": 56000
961
+ },
962
+ {
963
+ "epoch": 0.96,
964
+ "learning_rate": 1.5545682465505774e-05,
965
+ "loss": 0.0019,
966
+ "step": 56500
967
+ },
968
+ {
969
+ "epoch": 0.97,
970
+ "learning_rate": 1.5417768151041224e-05,
971
+ "loss": 0.0018,
972
+ "step": 57000
973
+ },
974
+ {
975
+ "epoch": 0.97,
976
+ "eval_stsb_spearman": 0.7873629268749875,
977
+ "step": 57000
978
+ },
979
+ {
980
+ "epoch": 0.98,
981
+ "learning_rate": 1.5289853836576673e-05,
982
+ "loss": 0.0019,
983
+ "step": 57500
984
+ },
985
+ {
986
+ "epoch": 0.99,
987
+ "learning_rate": 1.5161939522112121e-05,
988
+ "loss": 0.0017,
989
+ "step": 58000
990
+ },
991
+ {
992
+ "epoch": 0.99,
993
+ "eval_stsb_spearman": 0.7660054024130715,
994
+ "step": 58000
995
+ },
996
+ {
997
+ "epoch": 1.0,
998
+ "learning_rate": 1.503402520764757e-05,
999
+ "loss": 0.0019,
1000
+ "step": 58500
1001
+ },
1002
+ {
1003
+ "epoch": 1.01,
1004
+ "learning_rate": 1.4906110893183021e-05,
1005
+ "loss": 0.0017,
1006
+ "step": 59000
1007
+ },
1008
+ {
1009
+ "epoch": 1.01,
1010
+ "eval_stsb_spearman": 0.7855143130229439,
1011
+ "step": 59000
1012
+ },
1013
+ {
1014
+ "epoch": 1.01,
1015
+ "learning_rate": 1.4778196578718469e-05,
1016
+ "loss": 0.0019,
1017
+ "step": 59500
1018
+ },
1019
+ {
1020
+ "epoch": 1.02,
1021
+ "learning_rate": 1.4650282264253919e-05,
1022
+ "loss": 0.0017,
1023
+ "step": 60000
1024
+ },
1025
+ {
1026
+ "epoch": 1.02,
1027
+ "eval_stsb_spearman": 0.77861030088238,
1028
+ "step": 60000
1029
+ },
1030
+ {
1031
+ "epoch": 1.03,
1032
+ "learning_rate": 1.4522367949789368e-05,
1033
+ "loss": 0.0016,
1034
+ "step": 60500
1035
+ },
1036
+ {
1037
+ "epoch": 1.04,
1038
+ "learning_rate": 1.4394453635324818e-05,
1039
+ "loss": 0.0016,
1040
+ "step": 61000
1041
+ },
1042
+ {
1043
+ "epoch": 1.04,
1044
+ "eval_stsb_spearman": 0.7812534433697589,
1045
+ "step": 61000
1046
+ },
1047
+ {
1048
+ "epoch": 1.05,
1049
+ "learning_rate": 1.4266539320860268e-05,
1050
+ "loss": 0.0017,
1051
+ "step": 61500
1052
+ },
1053
+ {
1054
+ "epoch": 1.06,
1055
+ "learning_rate": 1.4138625006395718e-05,
1056
+ "loss": 0.0017,
1057
+ "step": 62000
1058
+ },
1059
+ {
1060
+ "epoch": 1.06,
1061
+ "eval_stsb_spearman": 0.7736488598869644,
1062
+ "step": 62000
1063
+ },
1064
+ {
1065
+ "epoch": 1.07,
1066
+ "learning_rate": 1.4010710691931166e-05,
1067
+ "loss": 0.0018,
1068
+ "step": 62500
1069
+ },
1070
+ {
1071
+ "epoch": 1.07,
1072
+ "learning_rate": 1.3882796377466614e-05,
1073
+ "loss": 0.0017,
1074
+ "step": 63000
1075
+ },
1076
+ {
1077
+ "epoch": 1.07,
1078
+ "eval_stsb_spearman": 0.7776239203767936,
1079
+ "step": 63000
1080
+ },
1081
+ {
1082
+ "epoch": 1.08,
1083
+ "learning_rate": 1.3754882063002063e-05,
1084
+ "loss": 0.0015,
1085
+ "step": 63500
1086
+ },
1087
+ {
1088
+ "epoch": 1.09,
1089
+ "learning_rate": 1.3626967748537513e-05,
1090
+ "loss": 0.0017,
1091
+ "step": 64000
1092
+ },
1093
+ {
1094
+ "epoch": 1.09,
1095
+ "eval_stsb_spearman": 0.772728744520677,
1096
+ "step": 64000
1097
+ },
1098
+ {
1099
+ "epoch": 1.1,
1100
+ "learning_rate": 1.3499053434072963e-05,
1101
+ "loss": 0.0017,
1102
+ "step": 64500
1103
+ },
1104
+ {
1105
+ "epoch": 1.11,
1106
+ "learning_rate": 1.3371139119608411e-05,
1107
+ "loss": 0.0014,
1108
+ "step": 65000
1109
+ },
1110
+ {
1111
+ "epoch": 1.11,
1112
+ "eval_stsb_spearman": 0.7840145435380373,
1113
+ "step": 65000
1114
+ },
1115
+ {
1116
+ "epoch": 1.12,
1117
+ "learning_rate": 1.324322480514386e-05,
1118
+ "loss": 0.0016,
1119
+ "step": 65500
1120
+ },
1121
+ {
1122
+ "epoch": 1.13,
1123
+ "learning_rate": 1.311531049067931e-05,
1124
+ "loss": 0.0015,
1125
+ "step": 66000
1126
+ },
1127
+ {
1128
+ "epoch": 1.13,
1129
+ "eval_stsb_spearman": 0.7814009954533213,
1130
+ "step": 66000
1131
+ },
1132
+ {
1133
+ "epoch": 1.13,
1134
+ "learning_rate": 1.298739617621476e-05,
1135
+ "loss": 0.0015,
1136
+ "step": 66500
1137
+ },
1138
+ {
1139
+ "epoch": 1.14,
1140
+ "learning_rate": 1.285948186175021e-05,
1141
+ "loss": 0.0015,
1142
+ "step": 67000
1143
+ },
1144
+ {
1145
+ "epoch": 1.14,
1146
+ "eval_stsb_spearman": 0.7801342379468632,
1147
+ "step": 67000
1148
+ },
1149
+ {
1150
+ "epoch": 1.15,
1151
+ "learning_rate": 1.2731567547285658e-05,
1152
+ "loss": 0.0016,
1153
+ "step": 67500
1154
+ },
1155
+ {
1156
+ "epoch": 1.16,
1157
+ "learning_rate": 1.2603653232821108e-05,
1158
+ "loss": 0.0015,
1159
+ "step": 68000
1160
+ },
1161
+ {
1162
+ "epoch": 1.16,
1163
+ "eval_stsb_spearman": 0.778155239600538,
1164
+ "step": 68000
1165
+ },
1166
+ {
1167
+ "epoch": 1.17,
1168
+ "learning_rate": 1.2475738918356557e-05,
1169
+ "loss": 0.0015,
1170
+ "step": 68500
1171
+ },
1172
+ {
1173
+ "epoch": 1.18,
1174
+ "learning_rate": 1.2347824603892007e-05,
1175
+ "loss": 0.0015,
1176
+ "step": 69000
1177
+ },
1178
+ {
1179
+ "epoch": 1.18,
1180
+ "eval_stsb_spearman": 0.7727153538621762,
1181
+ "step": 69000
1182
+ },
1183
+ {
1184
+ "epoch": 1.19,
1185
+ "learning_rate": 1.2219910289427457e-05,
1186
+ "loss": 0.0015,
1187
+ "step": 69500
1188
+ },
1189
+ {
1190
+ "epoch": 1.19,
1191
+ "learning_rate": 1.2091995974962905e-05,
1192
+ "loss": 0.0015,
1193
+ "step": 70000
1194
+ },
1195
+ {
1196
+ "epoch": 1.19,
1197
+ "eval_stsb_spearman": 0.7839607415226314,
1198
+ "step": 70000
1199
+ },
1200
+ {
1201
+ "epoch": 1.2,
1202
+ "learning_rate": 1.1964081660498355e-05,
1203
+ "loss": 0.0014,
1204
+ "step": 70500
1205
+ },
1206
+ {
1207
+ "epoch": 1.21,
1208
+ "learning_rate": 1.1836167346033804e-05,
1209
+ "loss": 0.0014,
1210
+ "step": 71000
1211
+ },
1212
+ {
1213
+ "epoch": 1.21,
1214
+ "eval_stsb_spearman": 0.7737039597499138,
1215
+ "step": 71000
1216
+ },
1217
+ {
1218
+ "epoch": 1.22,
1219
+ "learning_rate": 1.1708253031569254e-05,
1220
+ "loss": 0.0015,
1221
+ "step": 71500
1222
+ },
1223
+ {
1224
+ "epoch": 1.23,
1225
+ "learning_rate": 1.1580338717104704e-05,
1226
+ "loss": 0.0014,
1227
+ "step": 72000
1228
+ },
1229
+ {
1230
+ "epoch": 1.23,
1231
+ "eval_stsb_spearman": 0.7717549383604455,
1232
+ "step": 72000
1233
+ },
1234
+ {
1235
+ "epoch": 1.24,
1236
+ "learning_rate": 1.1452424402640152e-05,
1237
+ "loss": 0.0014,
1238
+ "step": 72500
1239
+ },
1240
+ {
1241
+ "epoch": 1.25,
1242
+ "learning_rate": 1.13245100881756e-05,
1243
+ "loss": 0.0015,
1244
+ "step": 73000
1245
+ },
1246
+ {
1247
+ "epoch": 1.25,
1248
+ "eval_stsb_spearman": 0.7761051944670604,
1249
+ "step": 73000
1250
+ },
1251
+ {
1252
+ "epoch": 1.25,
1253
+ "learning_rate": 1.119659577371105e-05,
1254
+ "loss": 0.0014,
1255
+ "step": 73500
1256
+ },
1257
+ {
1258
+ "epoch": 1.26,
1259
+ "learning_rate": 1.10686814592465e-05,
1260
+ "loss": 0.0014,
1261
+ "step": 74000
1262
+ },
1263
+ {
1264
+ "epoch": 1.26,
1265
+ "eval_stsb_spearman": 0.7814437463546053,
1266
+ "step": 74000
1267
+ },
1268
+ {
1269
+ "epoch": 1.27,
1270
+ "learning_rate": 1.094076714478195e-05,
1271
+ "loss": 0.0014,
1272
+ "step": 74500
1273
+ },
1274
+ {
1275
+ "epoch": 1.28,
1276
+ "learning_rate": 1.0812852830317397e-05,
1277
+ "loss": 0.0012,
1278
+ "step": 75000
1279
+ },
1280
+ {
1281
+ "epoch": 1.28,
1282
+ "eval_stsb_spearman": 0.7769911614134192,
1283
+ "step": 75000
1284
+ },
1285
+ {
1286
+ "epoch": 1.29,
1287
+ "learning_rate": 1.0684938515852847e-05,
1288
+ "loss": 0.0013,
1289
+ "step": 75500
1290
+ },
1291
+ {
1292
+ "epoch": 1.3,
1293
+ "learning_rate": 1.0557024201388297e-05,
1294
+ "loss": 0.0013,
1295
+ "step": 76000
1296
+ },
1297
+ {
1298
+ "epoch": 1.3,
1299
+ "eval_stsb_spearman": 0.7809750612307073,
1300
+ "step": 76000
1301
+ },
1302
+ {
1303
+ "epoch": 1.3,
1304
+ "learning_rate": 1.0429109886923746e-05,
1305
+ "loss": 0.0014,
1306
+ "step": 76500
1307
+ },
1308
+ {
1309
+ "epoch": 1.31,
1310
+ "learning_rate": 1.0301195572459196e-05,
1311
+ "loss": 0.0014,
1312
+ "step": 77000
1313
+ },
1314
+ {
1315
+ "epoch": 1.31,
1316
+ "eval_stsb_spearman": 0.7867736950467644,
1317
+ "step": 77000
1318
+ },
1319
+ {
1320
+ "epoch": 1.32,
1321
+ "learning_rate": 1.0173281257994644e-05,
1322
+ "loss": 0.0014,
1323
+ "step": 77500
1324
+ },
1325
+ {
1326
+ "epoch": 1.33,
1327
+ "learning_rate": 1.0045366943530094e-05,
1328
+ "loss": 0.0012,
1329
+ "step": 78000
1330
+ },
1331
+ {
1332
+ "epoch": 1.33,
1333
+ "eval_stsb_spearman": 0.78602360527425,
1334
+ "step": 78000
1335
+ },
1336
+ {
1337
+ "epoch": 1.34,
1338
+ "learning_rate": 9.917452629065544e-06,
1339
+ "loss": 0.0014,
1340
+ "step": 78500
1341
+ },
1342
+ {
1343
+ "epoch": 1.35,
1344
+ "learning_rate": 9.789538314600993e-06,
1345
+ "loss": 0.0013,
1346
+ "step": 79000
1347
+ },
1348
+ {
1349
+ "epoch": 1.35,
1350
+ "eval_stsb_spearman": 0.7879566030360313,
1351
+ "step": 79000
1352
+ },
1353
+ {
1354
+ "epoch": 1.36,
1355
+ "learning_rate": 9.661624000136443e-06,
1356
+ "loss": 0.0013,
1357
+ "step": 79500
1358
+ },
1359
+ {
1360
+ "epoch": 1.36,
1361
+ "learning_rate": 9.533709685671891e-06,
1362
+ "loss": 0.0012,
1363
+ "step": 80000
1364
+ },
1365
+ {
1366
+ "epoch": 1.36,
1367
+ "eval_stsb_spearman": 0.7851583038178344,
1368
+ "step": 80000
1369
+ },
1370
+ {
1371
+ "epoch": 1.37,
1372
+ "learning_rate": 9.405795371207341e-06,
1373
+ "loss": 0.0013,
1374
+ "step": 80500
1375
+ },
1376
+ {
1377
+ "epoch": 1.38,
1378
+ "learning_rate": 9.27788105674279e-06,
1379
+ "loss": 0.0012,
1380
+ "step": 81000
1381
+ },
1382
+ {
1383
+ "epoch": 1.38,
1384
+ "eval_stsb_spearman": 0.7854350613534488,
1385
+ "step": 81000
1386
+ },
1387
+ {
1388
+ "epoch": 1.39,
1389
+ "learning_rate": 9.14996674227824e-06,
1390
+ "loss": 0.0013,
1391
+ "step": 81500
1392
+ },
1393
+ {
1394
+ "epoch": 1.4,
1395
+ "learning_rate": 9.02205242781369e-06,
1396
+ "loss": 0.0012,
1397
+ "step": 82000
1398
+ },
1399
+ {
1400
+ "epoch": 1.4,
1401
+ "eval_stsb_spearman": 0.7893658570377464,
1402
+ "step": 82000
1403
+ },
1404
+ {
1405
+ "epoch": 1.41,
1406
+ "learning_rate": 8.894138113349137e-06,
1407
+ "loss": 0.0012,
1408
+ "step": 82500
1409
+ },
1410
+ {
1411
+ "epoch": 1.42,
1412
+ "learning_rate": 8.766223798884586e-06,
1413
+ "loss": 0.0013,
1414
+ "step": 83000
1415
+ },
1416
+ {
1417
+ "epoch": 1.42,
1418
+ "eval_stsb_spearman": 0.7884863161927248,
1419
+ "step": 83000
1420
+ },
1421
+ {
1422
+ "epoch": 1.42,
1423
+ "learning_rate": 8.638309484420036e-06,
1424
+ "loss": 0.0012,
1425
+ "step": 83500
1426
+ },
1427
+ {
1428
+ "epoch": 1.43,
1429
+ "learning_rate": 8.510395169955486e-06,
1430
+ "loss": 0.0011,
1431
+ "step": 84000
1432
+ },
1433
+ {
1434
+ "epoch": 1.43,
1435
+ "eval_stsb_spearman": 0.7832187972814217,
1436
+ "step": 84000
1437
+ },
1438
+ {
1439
+ "epoch": 1.44,
1440
+ "learning_rate": 8.382480855490936e-06,
1441
+ "loss": 0.0012,
1442
+ "step": 84500
1443
+ },
1444
+ {
1445
+ "epoch": 1.45,
1446
+ "learning_rate": 8.254566541026384e-06,
1447
+ "loss": 0.0012,
1448
+ "step": 85000
1449
+ },
1450
+ {
1451
+ "epoch": 1.45,
1452
+ "eval_stsb_spearman": 0.7780244483654368,
1453
+ "step": 85000
1454
+ },
1455
+ {
1456
+ "epoch": 1.46,
1457
+ "learning_rate": 8.126652226561833e-06,
1458
+ "loss": 0.0012,
1459
+ "step": 85500
1460
+ },
1461
+ {
1462
+ "epoch": 1.47,
1463
+ "learning_rate": 7.998737912097283e-06,
1464
+ "loss": 0.0011,
1465
+ "step": 86000
1466
+ },
1467
+ {
1468
+ "epoch": 1.47,
1469
+ "eval_stsb_spearman": 0.7798499707740966,
1470
+ "step": 86000
1471
+ },
1472
+ {
1473
+ "epoch": 1.48,
1474
+ "learning_rate": 7.870823597632733e-06,
1475
+ "loss": 0.0011,
1476
+ "step": 86500
1477
+ },
1478
+ {
1479
+ "epoch": 1.48,
1480
+ "learning_rate": 7.742909283168183e-06,
1481
+ "loss": 0.0011,
1482
+ "step": 87000
1483
+ },
1484
+ {
1485
+ "epoch": 1.48,
1486
+ "eval_stsb_spearman": 0.783190494125622,
1487
+ "step": 87000
1488
+ },
1489
+ {
1490
+ "epoch": 1.49,
1491
+ "learning_rate": 7.6149949687036314e-06,
1492
+ "loss": 0.0011,
1493
+ "step": 87500
1494
+ },
1495
+ {
1496
+ "epoch": 1.5,
1497
+ "learning_rate": 7.48708065423908e-06,
1498
+ "loss": 0.0012,
1499
+ "step": 88000
1500
+ },
1501
+ {
1502
+ "epoch": 1.5,
1503
+ "eval_stsb_spearman": 0.7804977213380432,
1504
+ "step": 88000
1505
+ },
1506
+ {
1507
+ "epoch": 1.51,
1508
+ "learning_rate": 7.35916633977453e-06,
1509
+ "loss": 0.001,
1510
+ "step": 88500
1511
+ },
1512
+ {
1513
+ "epoch": 1.52,
1514
+ "learning_rate": 7.23125202530998e-06,
1515
+ "loss": 0.0011,
1516
+ "step": 89000
1517
+ },
1518
+ {
1519
+ "epoch": 1.52,
1520
+ "eval_stsb_spearman": 0.7812487997617589,
1521
+ "step": 89000
1522
+ },
1523
+ {
1524
+ "epoch": 1.53,
1525
+ "learning_rate": 7.103337710845429e-06,
1526
+ "loss": 0.0012,
1527
+ "step": 89500
1528
+ },
1529
+ {
1530
+ "epoch": 1.53,
1531
+ "learning_rate": 6.975423396380878e-06,
1532
+ "loss": 0.001,
1533
+ "step": 90000
1534
+ },
1535
+ {
1536
+ "epoch": 1.53,
1537
+ "eval_stsb_spearman": 0.7816751642438525,
1538
+ "step": 90000
1539
+ },
1540
+ {
1541
+ "epoch": 1.54,
1542
+ "learning_rate": 6.847509081916327e-06,
1543
+ "loss": 0.001,
1544
+ "step": 90500
1545
+ },
1546
+ {
1547
+ "epoch": 1.55,
1548
+ "learning_rate": 6.719594767451776e-06,
1549
+ "loss": 0.0011,
1550
+ "step": 91000
1551
+ },
1552
+ {
1553
+ "epoch": 1.55,
1554
+ "eval_stsb_spearman": 0.7820030275667162,
1555
+ "step": 91000
1556
+ },
1557
+ {
1558
+ "epoch": 1.56,
1559
+ "learning_rate": 6.591680452987226e-06,
1560
+ "loss": 0.0011,
1561
+ "step": 91500
1562
+ },
1563
+ {
1564
+ "epoch": 1.57,
1565
+ "learning_rate": 6.463766138522675e-06,
1566
+ "loss": 0.001,
1567
+ "step": 92000
1568
+ },
1569
+ {
1570
+ "epoch": 1.57,
1571
+ "eval_stsb_spearman": 0.7832784805352376,
1572
+ "step": 92000
1573
+ },
1574
+ {
1575
+ "epoch": 1.58,
1576
+ "learning_rate": 6.335851824058125e-06,
1577
+ "loss": 0.001,
1578
+ "step": 92500
1579
+ },
1580
+ {
1581
+ "epoch": 1.59,
1582
+ "learning_rate": 6.207937509593574e-06,
1583
+ "loss": 0.0011,
1584
+ "step": 93000
1585
+ },
1586
+ {
1587
+ "epoch": 1.59,
1588
+ "eval_stsb_spearman": 0.7833093403145232,
1589
+ "step": 93000
1590
+ },
1591
+ {
1592
+ "epoch": 1.59,
1593
+ "learning_rate": 6.080023195129023e-06,
1594
+ "loss": 0.001,
1595
+ "step": 93500
1596
+ },
1597
+ {
1598
+ "epoch": 1.6,
1599
+ "learning_rate": 5.952108880664473e-06,
1600
+ "loss": 0.001,
1601
+ "step": 94000
1602
+ },
1603
+ {
1604
+ "epoch": 1.6,
1605
+ "eval_stsb_spearman": 0.7829398244489639,
1606
+ "step": 94000
1607
+ },
1608
+ {
1609
+ "epoch": 1.61,
1610
+ "learning_rate": 5.824194566199921e-06,
1611
+ "loss": 0.0009,
1612
+ "step": 94500
1613
+ },
1614
+ {
1615
+ "epoch": 1.62,
1616
+ "learning_rate": 5.696280251735371e-06,
1617
+ "loss": 0.001,
1618
+ "step": 95000
1619
+ },
1620
+ {
1621
+ "epoch": 1.62,
1622
+ "eval_stsb_spearman": 0.7820607890459481,
1623
+ "step": 95000
1624
+ },
1625
+ {
1626
+ "epoch": 1.63,
1627
+ "learning_rate": 5.5683659372708205e-06,
1628
+ "loss": 0.001,
1629
+ "step": 95500
1630
+ },
1631
+ {
1632
+ "epoch": 1.64,
1633
+ "learning_rate": 5.440451622806269e-06,
1634
+ "loss": 0.001,
1635
+ "step": 96000
1636
+ },
1637
+ {
1638
+ "epoch": 1.64,
1639
+ "eval_stsb_spearman": 0.7846063901039803,
1640
+ "step": 96000
1641
+ },
1642
+ {
1643
+ "epoch": 1.65,
1644
+ "learning_rate": 5.312537308341719e-06,
1645
+ "loss": 0.0009,
1646
+ "step": 96500
1647
+ },
1648
+ {
1649
+ "epoch": 1.65,
1650
+ "learning_rate": 5.184622993877168e-06,
1651
+ "loss": 0.0009,
1652
+ "step": 97000
1653
+ },
1654
+ {
1655
+ "epoch": 1.65,
1656
+ "eval_stsb_spearman": 0.7852033791449183,
1657
+ "step": 97000
1658
+ },
1659
+ {
1660
+ "epoch": 1.66,
1661
+ "learning_rate": 5.056708679412618e-06,
1662
+ "loss": 0.001,
1663
+ "step": 97500
1664
+ },
1665
+ {
1666
+ "epoch": 1.67,
1667
+ "learning_rate": 4.9287943649480675e-06,
1668
+ "loss": 0.001,
1669
+ "step": 98000
1670
+ },
1671
+ {
1672
+ "epoch": 1.67,
1673
+ "eval_stsb_spearman": 0.7775529189032535,
1674
+ "step": 98000
1675
+ },
1676
+ {
1677
+ "epoch": 1.68,
1678
+ "learning_rate": 4.800880050483516e-06,
1679
+ "loss": 0.0009,
1680
+ "step": 98500
1681
+ },
1682
+ {
1683
+ "epoch": 1.69,
1684
+ "learning_rate": 4.672965736018966e-06,
1685
+ "loss": 0.0009,
1686
+ "step": 99000
1687
+ },
1688
+ {
1689
+ "epoch": 1.69,
1690
+ "eval_stsb_spearman": 0.7785107169299121,
1691
+ "step": 99000
1692
+ },
1693
+ {
1694
+ "epoch": 1.7,
1695
+ "learning_rate": 4.545051421554414e-06,
1696
+ "loss": 0.001,
1697
+ "step": 99500
1698
+ },
1699
+ {
1700
+ "epoch": 1.71,
1701
+ "learning_rate": 4.417137107089864e-06,
1702
+ "loss": 0.0009,
1703
+ "step": 100000
1704
+ },
1705
+ {
1706
+ "epoch": 1.71,
1707
+ "eval_stsb_spearman": 0.7788225966725707,
1708
+ "step": 100000
1709
+ },
1710
+ {
1711
+ "epoch": 1.71,
1712
+ "learning_rate": 4.289222792625314e-06,
1713
+ "loss": 0.0009,
1714
+ "step": 100500
1715
+ },
1716
+ {
1717
+ "epoch": 1.72,
1718
+ "learning_rate": 4.1613084781607625e-06,
1719
+ "loss": 0.0009,
1720
+ "step": 101000
1721
+ },
1722
+ {
1723
+ "epoch": 1.72,
1724
+ "eval_stsb_spearman": 0.7775520554532559,
1725
+ "step": 101000
1726
+ },
1727
+ {
1728
+ "epoch": 1.73,
1729
+ "learning_rate": 4.033394163696212e-06,
1730
+ "loss": 0.0009,
1731
+ "step": 101500
1732
+ },
1733
+ {
1734
+ "epoch": 1.74,
1735
+ "learning_rate": 3.905479849231661e-06,
1736
+ "loss": 0.0009,
1737
+ "step": 102000
1738
+ },
1739
+ {
1740
+ "epoch": 1.74,
1741
+ "eval_stsb_spearman": 0.7765839741253101,
1742
+ "step": 102000
1743
+ },
1744
+ {
1745
+ "epoch": 1.75,
1746
+ "learning_rate": 3.777565534767111e-06,
1747
+ "loss": 0.0009,
1748
+ "step": 102500
1749
+ },
1750
+ {
1751
+ "epoch": 1.76,
1752
+ "learning_rate": 3.64965122030256e-06,
1753
+ "loss": 0.0009,
1754
+ "step": 103000
1755
+ },
1756
+ {
1757
+ "epoch": 1.76,
1758
+ "eval_stsb_spearman": 0.776046284174713,
1759
+ "step": 103000
1760
+ },
1761
+ {
1762
+ "epoch": 1.77,
1763
+ "learning_rate": 3.521736905838009e-06,
1764
+ "loss": 0.0008,
1765
+ "step": 103500
1766
+ },
1767
+ {
1768
+ "epoch": 1.77,
1769
+ "learning_rate": 3.393822591373459e-06,
1770
+ "loss": 0.0008,
1771
+ "step": 104000
1772
+ },
1773
+ {
1774
+ "epoch": 1.77,
1775
+ "eval_stsb_spearman": 0.7734033738249794,
1776
+ "step": 104000
1777
+ },
1778
+ {
1779
+ "epoch": 1.78,
1780
+ "learning_rate": 3.265908276908908e-06,
1781
+ "loss": 0.0009,
1782
+ "step": 104500
1783
+ },
1784
+ {
1785
+ "epoch": 1.79,
1786
+ "learning_rate": 3.1379939624443575e-06,
1787
+ "loss": 0.0009,
1788
+ "step": 105000
1789
+ },
1790
+ {
1791
+ "epoch": 1.79,
1792
+ "eval_stsb_spearman": 0.7771743629229094,
1793
+ "step": 105000
1794
+ },
1795
+ {
1796
+ "epoch": 1.8,
1797
+ "learning_rate": 3.0100796479798064e-06,
1798
+ "loss": 0.0009,
1799
+ "step": 105500
1800
+ },
1801
+ {
1802
+ "epoch": 1.81,
1803
+ "learning_rate": 2.882165333515256e-06,
1804
+ "loss": 0.0008,
1805
+ "step": 106000
1806
+ },
1807
+ {
1808
+ "epoch": 1.81,
1809
+ "eval_stsb_spearman": 0.7762479721057005,
1810
+ "step": 106000
1811
+ },
1812
+ {
1813
+ "epoch": 1.82,
1814
+ "learning_rate": 2.7542510190507054e-06,
1815
+ "loss": 0.0009,
1816
+ "step": 106500
1817
+ },
1818
+ {
1819
+ "epoch": 1.82,
1820
+ "learning_rate": 2.6263367045861547e-06,
1821
+ "loss": 0.0008,
1822
+ "step": 107000
1823
+ },
1824
+ {
1825
+ "epoch": 1.82,
1826
+ "eval_stsb_spearman": 0.7775759659701185,
1827
+ "step": 107000
1828
+ },
1829
+ {
1830
+ "epoch": 1.83,
1831
+ "learning_rate": 2.498422390121604e-06,
1832
+ "loss": 0.0008,
1833
+ "step": 107500
1834
+ },
1835
+ {
1836
+ "epoch": 1.84,
1837
+ "learning_rate": 2.370508075657053e-06,
1838
+ "loss": 0.0008,
1839
+ "step": 108000
1840
+ },
1841
+ {
1842
+ "epoch": 1.84,
1843
+ "eval_stsb_spearman": 0.7744453475475996,
1844
+ "step": 108000
1845
+ },
1846
+ {
1847
+ "epoch": 1.85,
1848
+ "learning_rate": 2.2425937611925027e-06,
1849
+ "loss": 0.0009,
1850
+ "step": 108500
1851
+ },
1852
+ {
1853
+ "epoch": 1.86,
1854
+ "learning_rate": 2.114679446727952e-06,
1855
+ "loss": 0.0009,
1856
+ "step": 109000
1857
+ },
1858
+ {
1859
+ "epoch": 1.86,
1860
+ "eval_stsb_spearman": 0.776758060348122,
1861
+ "step": 109000
1862
+ },
1863
+ {
1864
+ "epoch": 1.87,
1865
+ "learning_rate": 1.9867651322634013e-06,
1866
+ "loss": 0.0007,
1867
+ "step": 109500
1868
+ },
1869
+ {
1870
+ "epoch": 1.88,
1871
+ "learning_rate": 1.8588508177988504e-06,
1872
+ "loss": 0.0007,
1873
+ "step": 110000
1874
+ },
1875
+ {
1876
+ "epoch": 1.88,
1877
+ "eval_stsb_spearman": 0.7779735283874216,
1878
+ "step": 110000
1879
+ }
1880
+ ],
1881
+ "max_steps": 117266,
1882
+ "num_train_epochs": 2,
1883
+ "total_flos": 0,
1884
+ "trial_name": null,
1885
+ "trial_params": null
1886
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6af80fd09672dd29b0facf9856807334d2bdc623e79e4001ce1e108d2132487
3
+ size 3579
vocab.txt ADDED
The diff for this file is too large to render. See raw diff