KoichiYasuoka commited on
Commit
1337e98
1 Parent(s): 74fd357

model improved

Browse files
Files changed (4) hide show
  1. config.json +158 -139
  2. pytorch_model.bin +2 -2
  3. supar.model +1 -1
  4. tokenizer.json +0 -0
config.json CHANGED
@@ -27,26 +27,26 @@
27
  "15": "B-NOUN",
28
  "16": "B-NOUN+ADP",
29
  "17": "B-NOUN+ADP+NOUN",
30
- "18": "B-NOUN+ADP+VERB",
31
- "19": "B-NOUN+ADV",
32
- "20": "B-NOUN+NOUN",
33
- "21": "B-NOUN+VERB",
34
- "22": "B-NUM",
35
- "23": "B-NUM+NOUN",
36
- "24": "B-PART",
37
- "25": "B-PART+AUX",
38
- "26": "B-PART+NOUN",
39
- "27": "B-PART+VERB",
40
- "28": "B-PRON",
41
- "29": "B-PROPN",
42
- "30": "B-PUNCT",
43
- "31": "B-SCONJ",
44
- "32": "B-SCONJ+ADV",
45
- "33": "B-VERB",
46
- "34": "B-VERB+NOUN",
47
- "35": "B-VERB+PART",
48
- "36": "B-VERB+SCONJ",
49
- "37": "B-VERT",
50
  "38": "CCONJ",
51
  "39": "DET",
52
  "40": "DET+NOUN",
@@ -65,45 +65,47 @@
65
  "53": "I-NOUN",
66
  "54": "I-NOUN+ADP",
67
  "55": "I-NOUN+ADP+NOUN",
68
- "56": "I-NOUN+ADP+VERB",
69
- "57": "I-NOUN+ADV",
70
- "58": "I-NOUN+NOUN",
71
- "59": "I-NOUN+VERB",
72
- "60": "I-NUM",
73
- "61": "I-NUM+NOUN",
74
- "62": "I-PART",
75
- "63": "I-PART+AUX",
76
- "64": "I-PART+NOUN",
77
- "65": "I-PART+VERB",
78
- "66": "I-PRON",
79
- "67": "I-PROPN",
80
- "68": "I-PUNCT",
81
- "69": "I-SCONJ",
82
- "70": "I-SCONJ+ADV",
83
- "71": "I-VERB",
84
- "72": "I-VERB+NOUN",
85
- "73": "I-VERB+PART",
86
- "74": "I-VERB+SCONJ",
87
- "75": "I-VERT",
88
  "76": "INTJ",
89
  "77": "NOUN",
90
  "78": "NOUN+ADP",
91
  "79": "NOUN+NOUN",
92
  "80": "NOUN+VERB",
93
  "81": "NUM",
94
- "82": "PART",
95
- "83": "PART+VERB",
96
- "84": "PROPN",
97
- "85": "PUNCT",
98
- "86": "SCONJ",
99
- "87": "SYM",
100
- "88": "VERB",
101
- "89": "VERB+AUX",
102
- "90": "VERB+NOUN",
103
- "91": "VERB+PART",
104
- "92": "VERB+VERB",
105
- "93": "VERT",
106
- "94": "X"
 
 
107
  },
108
  "initializer_range": 0.02,
109
  "intermediate_size": 3072,
@@ -126,26 +128,26 @@
126
  "B-NOUN": 15,
127
  "B-NOUN+ADP": 16,
128
  "B-NOUN+ADP+NOUN": 17,
129
- "B-NOUN+ADP+VERB": 18,
130
- "B-NOUN+ADV": 19,
131
- "B-NOUN+NOUN": 20,
132
- "B-NOUN+VERB": 21,
133
- "B-NUM": 22,
134
- "B-NUM+NOUN": 23,
135
- "B-PART": 24,
136
- "B-PART+AUX": 25,
137
- "B-PART+NOUN": 26,
138
- "B-PART+VERB": 27,
139
- "B-PRON": 28,
140
- "B-PROPN": 29,
141
- "B-PUNCT": 30,
142
- "B-SCONJ": 31,
143
- "B-SCONJ+ADV": 32,
144
- "B-VERB": 33,
145
- "B-VERB+NOUN": 34,
146
- "B-VERB+PART": 35,
147
- "B-VERB+SCONJ": 36,
148
- "B-VERT": 37,
149
  "CCONJ": 38,
150
  "DET": 39,
151
  "DET+NOUN": 40,
@@ -164,45 +166,47 @@
164
  "I-NOUN": 53,
165
  "I-NOUN+ADP": 54,
166
  "I-NOUN+ADP+NOUN": 55,
167
- "I-NOUN+ADP+VERB": 56,
168
- "I-NOUN+ADV": 57,
169
- "I-NOUN+NOUN": 58,
170
- "I-NOUN+VERB": 59,
171
- "I-NUM": 60,
172
- "I-NUM+NOUN": 61,
173
- "I-PART": 62,
174
- "I-PART+AUX": 63,
175
- "I-PART+NOUN": 64,
176
- "I-PART+VERB": 65,
177
- "I-PRON": 66,
178
- "I-PROPN": 67,
179
- "I-PUNCT": 68,
180
- "I-SCONJ": 69,
181
- "I-SCONJ+ADV": 70,
182
- "I-VERB": 71,
183
- "I-VERB+NOUN": 72,
184
- "I-VERB+PART": 73,
185
- "I-VERB+SCONJ": 74,
186
- "I-VERT": 75,
187
  "INTJ": 76,
188
  "NOUN": 77,
189
  "NOUN+ADP": 78,
190
  "NOUN+NOUN": 79,
191
  "NOUN+VERB": 80,
192
  "NUM": 81,
193
- "PART": 82,
194
- "PART+VERB": 83,
195
- "PROPN": 84,
196
- "PUNCT": 85,
197
- "SCONJ": 86,
198
- "SYM": 87,
199
- "VERB": 88,
200
- "VERB+AUX": 89,
201
- "VERB+NOUN": 90,
202
- "VERB+PART": 91,
203
- "VERB+VERB": 92,
204
- "VERT": 93,
205
- "X": 94
 
 
206
  },
207
  "layer_norm_eps": 1e-07,
208
  "max_position_embeddings": 512,
@@ -294,22 +298,18 @@
294
  "Oro",
295
  "wano"
296
  ],
297
- "Oshmaketa": [
298
- "Oshmake",
299
- "ta"
300
- ],
301
  "Pet-samaketa": [
302
  "Pet-samake",
303
  "ta"
304
  ],
 
 
 
 
305
  "Soita": [
306
  "Soi",
307
  "ta"
308
  ],
309
- "cheppone": [
310
- "cheppo",
311
- "ne"
312
- ],
313
  "keseta": [
314
  "kese",
315
  "ta"
@@ -326,10 +326,6 @@
326
  "oro",
327
  "wano"
328
  ],
329
- "oshmaketa": [
330
- "oshmake",
331
- "ta"
332
- ],
333
  "otta": [
334
  "ot",
335
  "ta"
@@ -338,6 +334,10 @@
338
  "samake",
339
  "ta"
340
  ],
 
 
 
 
341
  "soyta": [
342
  "soy",
343
  "ta"
@@ -363,13 +363,6 @@
363
  "puray"
364
  ]
365
  },
366
- "NOUN+ADP+VERB": {
367
- "soytaarpa": [
368
- "soy",
369
- "ta",
370
- "arpa"
371
- ]
372
- },
373
  "NOUN+ADV": {
374
  "Tambeta ne": [
375
  "Tambe",
@@ -486,19 +479,33 @@
486
  "to"
487
  ]
488
  },
489
- "PART+AUX": {
490
- "chine": [
491
- "chi",
492
- "ne"
 
 
 
 
 
 
493
  ]
494
  },
495
  "PART+NOUN": {
496
  "=anpe": [
497
  "=an",
498
  "pe"
 
 
 
 
499
  ]
500
  },
501
  "PART+VERB": {
 
 
 
 
502
  "ainu-wap": [
503
  "a",
504
  "inu-wap"
@@ -522,6 +529,10 @@
522
  "karapa": [
523
  "k",
524
  "arapa"
 
 
 
 
525
  ]
526
  },
527
  "SCONJ+ADV": {
@@ -531,6 +542,14 @@
531
  ]
532
  },
533
  "VERB+AUX": {
 
 
 
 
 
 
 
 
534
  "sattek": [
535
  "sat",
536
  "tek"
@@ -549,9 +568,13 @@
549
  "an",
550
  "pe"
551
  ],
552
- "anto": [
553
- "an",
554
- "to"
 
 
 
 
555
  ],
556
  "h\u00e9sep\u00e1ha": [
557
  "h\u00e9se",
@@ -561,9 +584,9 @@
561
  "kar",
562
  "i"
563
  ],
564
- "ponchise": [
565
- "pon",
566
- "chise"
567
  ],
568
  "wenpuri": [
569
  "wen",
@@ -582,10 +605,6 @@
582
  "sapash": [
583
  "sap",
584
  "ash"
585
- ],
586
- "shinotash": [
587
- "shinot",
588
- "ash"
589
  ]
590
  },
591
  "VERB+SCONJ": {
 
27
  "15": "B-NOUN",
28
  "16": "B-NOUN+ADP",
29
  "17": "B-NOUN+ADP+NOUN",
30
+ "18": "B-NOUN+ADV",
31
+ "19": "B-NOUN+NOUN",
32
+ "20": "B-NOUN+VERB",
33
+ "21": "B-NUM",
34
+ "22": "B-NUM+NOUN",
35
+ "23": "B-PART",
36
+ "24": "B-PART+NOUN",
37
+ "25": "B-PART+VERB",
38
+ "26": "B-PRON",
39
+ "27": "B-PROPN",
40
+ "28": "B-PUNCT",
41
+ "29": "B-SCONJ",
42
+ "30": "B-SCONJ+ADV",
43
+ "31": "B-VERB",
44
+ "32": "B-VERB+AUX",
45
+ "33": "B-VERB+NOUN",
46
+ "34": "B-VERB+PART",
47
+ "35": "B-VERB+SCONJ",
48
+ "36": "B-VERT",
49
+ "37": "B-X",
50
  "38": "CCONJ",
51
  "39": "DET",
52
  "40": "DET+NOUN",
 
65
  "53": "I-NOUN",
66
  "54": "I-NOUN+ADP",
67
  "55": "I-NOUN+ADP+NOUN",
68
+ "56": "I-NOUN+ADV",
69
+ "57": "I-NOUN+NOUN",
70
+ "58": "I-NOUN+VERB",
71
+ "59": "I-NUM",
72
+ "60": "I-NUM+NOUN",
73
+ "61": "I-PART",
74
+ "62": "I-PART+NOUN",
75
+ "63": "I-PART+VERB",
76
+ "64": "I-PRON",
77
+ "65": "I-PROPN",
78
+ "66": "I-PUNCT",
79
+ "67": "I-SCONJ",
80
+ "68": "I-SCONJ+ADV",
81
+ "69": "I-VERB",
82
+ "70": "I-VERB+AUX",
83
+ "71": "I-VERB+NOUN",
84
+ "72": "I-VERB+PART",
85
+ "73": "I-VERB+SCONJ",
86
+ "74": "I-VERT",
87
+ "75": "I-X",
88
  "76": "INTJ",
89
  "77": "NOUN",
90
  "78": "NOUN+ADP",
91
  "79": "NOUN+NOUN",
92
  "80": "NOUN+VERB",
93
  "81": "NUM",
94
+ "82": "NUM+VERB+NOUN",
95
+ "83": "PART",
96
+ "84": "PART+NOUN",
97
+ "85": "PART+VERB",
98
+ "86": "PROPN",
99
+ "87": "PUNCT",
100
+ "88": "SCONJ",
101
+ "89": "SYM",
102
+ "90": "VERB",
103
+ "91": "VERB+AUX",
104
+ "92": "VERB+NOUN",
105
+ "93": "VERB+PART",
106
+ "94": "VERB+VERB",
107
+ "95": "VERT",
108
+ "96": "X"
109
  },
110
  "initializer_range": 0.02,
111
  "intermediate_size": 3072,
 
128
  "B-NOUN": 15,
129
  "B-NOUN+ADP": 16,
130
  "B-NOUN+ADP+NOUN": 17,
131
+ "B-NOUN+ADV": 18,
132
+ "B-NOUN+NOUN": 19,
133
+ "B-NOUN+VERB": 20,
134
+ "B-NUM": 21,
135
+ "B-NUM+NOUN": 22,
136
+ "B-PART": 23,
137
+ "B-PART+NOUN": 24,
138
+ "B-PART+VERB": 25,
139
+ "B-PRON": 26,
140
+ "B-PROPN": 27,
141
+ "B-PUNCT": 28,
142
+ "B-SCONJ": 29,
143
+ "B-SCONJ+ADV": 30,
144
+ "B-VERB": 31,
145
+ "B-VERB+AUX": 32,
146
+ "B-VERB+NOUN": 33,
147
+ "B-VERB+PART": 34,
148
+ "B-VERB+SCONJ": 35,
149
+ "B-VERT": 36,
150
+ "B-X": 37,
151
  "CCONJ": 38,
152
  "DET": 39,
153
  "DET+NOUN": 40,
 
166
  "I-NOUN": 53,
167
  "I-NOUN+ADP": 54,
168
  "I-NOUN+ADP+NOUN": 55,
169
+ "I-NOUN+ADV": 56,
170
+ "I-NOUN+NOUN": 57,
171
+ "I-NOUN+VERB": 58,
172
+ "I-NUM": 59,
173
+ "I-NUM+NOUN": 60,
174
+ "I-PART": 61,
175
+ "I-PART+NOUN": 62,
176
+ "I-PART+VERB": 63,
177
+ "I-PRON": 64,
178
+ "I-PROPN": 65,
179
+ "I-PUNCT": 66,
180
+ "I-SCONJ": 67,
181
+ "I-SCONJ+ADV": 68,
182
+ "I-VERB": 69,
183
+ "I-VERB+AUX": 70,
184
+ "I-VERB+NOUN": 71,
185
+ "I-VERB+PART": 72,
186
+ "I-VERB+SCONJ": 73,
187
+ "I-VERT": 74,
188
+ "I-X": 75,
189
  "INTJ": 76,
190
  "NOUN": 77,
191
  "NOUN+ADP": 78,
192
  "NOUN+NOUN": 79,
193
  "NOUN+VERB": 80,
194
  "NUM": 81,
195
+ "NUM+VERB+NOUN": 82,
196
+ "PART": 83,
197
+ "PART+NOUN": 84,
198
+ "PART+VERB": 85,
199
+ "PROPN": 86,
200
+ "PUNCT": 87,
201
+ "SCONJ": 88,
202
+ "SYM": 89,
203
+ "VERB": 90,
204
+ "VERB+AUX": 91,
205
+ "VERB+NOUN": 92,
206
+ "VERB+PART": 93,
207
+ "VERB+VERB": 94,
208
+ "VERT": 95,
209
+ "X": 96
210
  },
211
  "layer_norm_eps": 1e-07,
212
  "max_position_embeddings": 512,
 
298
  "Oro",
299
  "wano"
300
  ],
 
 
 
 
301
  "Pet-samaketa": [
302
  "Pet-samake",
303
  "ta"
304
  ],
305
+ "Shoita": [
306
+ "Shoi",
307
+ "ta"
308
+ ],
309
  "Soita": [
310
  "Soi",
311
  "ta"
312
  ],
 
 
 
 
313
  "keseta": [
314
  "kese",
315
  "ta"
 
326
  "oro",
327
  "wano"
328
  ],
 
 
 
 
329
  "otta": [
330
  "ot",
331
  "ta"
 
334
  "samake",
335
  "ta"
336
  ],
337
+ "shoita": [
338
+ "shoi",
339
+ "ta"
340
+ ],
341
  "soyta": [
342
  "soy",
343
  "ta"
 
363
  "puray"
364
  ]
365
  },
 
 
 
 
 
 
 
366
  "NOUN+ADV": {
367
  "Tambeta ne": [
368
  "Tambe",
 
479
  "to"
480
  ]
481
  },
482
+ "NUM+VERB+NOUN": {
483
+ "Shineanto": [
484
+ "Shine",
485
+ "an",
486
+ "to"
487
+ ],
488
+ "sineanto": [
489
+ "sine",
490
+ "an",
491
+ "to"
492
  ]
493
  },
494
  "PART+NOUN": {
495
  "=anpe": [
496
  "=an",
497
  "pe"
498
+ ],
499
+ "shichorpok": [
500
+ "shi",
501
+ "chorpok"
502
  ]
503
  },
504
  "PART+VERB": {
505
+ "Chirushka": [
506
+ "Chi",
507
+ "rushka"
508
+ ],
509
  "ainu-wap": [
510
  "a",
511
  "inu-wap"
 
529
  "karapa": [
530
  "k",
531
  "arapa"
532
+ ],
533
+ "shiokote": [
534
+ "shi",
535
+ "okote"
536
  ]
537
  },
538
  "SCONJ+ADV": {
 
542
  ]
543
  },
544
  "VERB+AUX": {
545
+ "poppeta ashinnangoro": [
546
+ "poppeta ashin",
547
+ "nangoro"
548
+ ],
549
+ "poppetaasinnankor": [
550
+ "poppetaasin",
551
+ "nankor"
552
+ ],
553
  "sattek": [
554
  "sat",
555
  "tek"
 
568
  "an",
569
  "pe"
570
  ],
571
+ "ashbe": [
572
+ "ash",
573
+ "be"
574
+ ],
575
+ "aspe": [
576
+ "as",
577
+ "pe"
578
  ],
579
  "h\u00e9sep\u00e1ha": [
580
  "h\u00e9se",
 
584
  "kar",
585
  "i"
586
  ],
587
+ "ohasiri": [
588
+ "oha",
589
+ "siri"
590
  ],
591
  "wenpuri": [
592
  "wen",
 
605
  "sapash": [
606
  "sap",
607
  "ash"
 
 
 
 
608
  ]
609
  },
610
  "VERB+SCONJ": {
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:130950825f157a277a247ff50be3c172941fa762e5f44a9a209b404021e9ac08
3
- size 416089171
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:540dae77de84baec491ec433af635a0a345f9ceb4a7c0fe9845948fa5181efbf
3
+ size 416095379
supar.model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:babbe8b36455e1b5441261b62b7bbc48da3082d0d0980788b0244bddc0f6a04b
3
  size 461045771
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9af6a91919dc6e9ba6390a1933911dd5ce8c8c44d9f84e56a26e21cd7e8f84cc
3
  size 461045771
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff