Spaces:
Running
Running
abhaskumarsinha
commited on
Commit
•
4e92eb8
1
Parent(s):
ed4d0ef
added tokenizer
Browse files- tokenizer_.model +3 -0
- tokenizer_.vocab +454 -0
tokenizer_.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ac1a673b906bf7bb08eb34b16e79da7fed55cd0773805a8e2a9eb90d67cf319d
|
3 |
+
size 243323
|
tokenizer_.vocab
ADDED
@@ -0,0 +1,454 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<unk> 0
|
2 |
+
<s> 0
|
3 |
+
</s> 0
|
4 |
+
▁ -2.98281
|
5 |
+
s -3.25482
|
6 |
+
, -3.64163
|
7 |
+
▁the -3.83505
|
8 |
+
e -3.92426
|
9 |
+
. -3.98345
|
10 |
+
’ -4.01794
|
11 |
+
t -4.03666
|
12 |
+
n -4.12316
|
13 |
+
▁a -4.17113
|
14 |
+
▁to -4.17459
|
15 |
+
d -4.36959
|
16 |
+
▁in -4.41674
|
17 |
+
y -4.56904
|
18 |
+
ing -4.59878
|
19 |
+
and -4.61393
|
20 |
+
▁she -4.77197
|
21 |
+
r -4.83711
|
22 |
+
ed -4.84531
|
23 |
+
a -4.87495
|
24 |
+
re -4.92487
|
25 |
+
▁of -4.93585
|
26 |
+
u -4.94813
|
27 |
+
ll -5.09773
|
28 |
+
- -5.11614
|
29 |
+
l -5.17304
|
30 |
+
▁it -5.24811
|
31 |
+
er -5.25543
|
32 |
+
▁is -5.26249
|
33 |
+
es -5.27771
|
34 |
+
om -5.28957
|
35 |
+
– -5.35225
|
36 |
+
▁you -5.35663
|
37 |
+
▁b -5.39216
|
38 |
+
o -5.45565
|
39 |
+
▁on -5.46072
|
40 |
+
▁T -5.51677
|
41 |
+
▁be -5.52237
|
42 |
+
▁I -5.56146
|
43 |
+
f -5.56416
|
44 |
+
▁s -5.57973
|
45 |
+
▁m -5.58765
|
46 |
+
▁S -5.60246
|
47 |
+
▁an -5.65461
|
48 |
+
▁do -5.65883
|
49 |
+
▁f -5.66498
|
50 |
+
▁her -5.67479
|
51 |
+
▁w -5.69817
|
52 |
+
an -5.74529
|
53 |
+
▁t -5.78632
|
54 |
+
ar -5.79524
|
55 |
+
O -5.79603
|
56 |
+
m -5.80705
|
57 |
+
▁bu -5.81819
|
58 |
+
▁fr -5.84569
|
59 |
+
E -5.86177
|
60 |
+
▁who -5.866
|
61 |
+
▁that -5.86882
|
62 |
+
▁wi -5.87169
|
63 |
+
ade -5.89909
|
64 |
+
en -5.94578
|
65 |
+
▁c -5.96195
|
66 |
+
▁A -5.97429
|
67 |
+
al -6.00744
|
68 |
+
ver -6.01365
|
69 |
+
▁whe -6.0397
|
70 |
+
▁da -6.04174
|
71 |
+
tion -6.04637
|
72 |
+
un -6.04731
|
73 |
+
as -6.04809
|
74 |
+
st -6.05162
|
75 |
+
in -6.05191
|
76 |
+
nd -6.05331
|
77 |
+
w -6.0684
|
78 |
+
ill -6.08554
|
79 |
+
mos -6.08823
|
80 |
+
▁th -6.09591
|
81 |
+
▁for -6.10153
|
82 |
+
R -6.11177
|
83 |
+
tori -6.11304
|
84 |
+
▁home -6.1136
|
85 |
+
▁over -6.11539
|
86 |
+
▁She -6.11858
|
87 |
+
or -6.12376
|
88 |
+
ity -6.12455
|
89 |
+
ke -6.13723
|
90 |
+
▁fac -6.14681
|
91 |
+
ld -6.15102
|
92 |
+
ou -6.15562
|
93 |
+
th -6.16189
|
94 |
+
▁cl -6.16717
|
95 |
+
▁g -6.18264
|
96 |
+
▁li -6.18711
|
97 |
+
▁O -6.20291
|
98 |
+
▁he -6.22143
|
99 |
+
▁p -6.22893
|
100 |
+
▁no -6.24815
|
101 |
+
g -6.28119
|
102 |
+
ster -6.28227
|
103 |
+
▁n -6.28711
|
104 |
+
thing -6.28912
|
105 |
+
ever -6.2906
|
106 |
+
▁ye -6.30007
|
107 |
+
▁ga -6.30476
|
108 |
+
ch -6.31662
|
109 |
+
i -6.32376
|
110 |
+
▁con -6.34465
|
111 |
+
A -6.35218
|
112 |
+
▁l -6.36969
|
113 |
+
▁h -6.37459
|
114 |
+
ex -6.37778
|
115 |
+
▁co -6.38985
|
116 |
+
on -6.39749
|
117 |
+
▁C -6.4007
|
118 |
+
he -6.40221
|
119 |
+
▁o -6.42004
|
120 |
+
▁i -6.4279
|
121 |
+
le -6.44386
|
122 |
+
▁V -6.44511
|
123 |
+
P -6.44511
|
124 |
+
▁W -6.44511
|
125 |
+
B -6.44511
|
126 |
+
: -6.44511
|
127 |
+
? -6.44511
|
128 |
+
N -6.44511
|
129 |
+
z -6.44511
|
130 |
+
▁Znidarcic -6.44511
|
131 |
+
▁conflict -6.44511
|
132 |
+
ntain -6.44533
|
133 |
+
▁fir -6.44924
|
134 |
+
▁wor -6.45161
|
135 |
+
▁sell -6.45165
|
136 |
+
▁car -6.46197
|
137 |
+
ow -6.4632
|
138 |
+
rough -6.4642
|
139 |
+
▁up -6.47421
|
140 |
+
▁my -6.47499
|
141 |
+
bs -6.4795
|
142 |
+
ike -6.48774
|
143 |
+
," -6.48967
|
144 |
+
ce -6.49495
|
145 |
+
tle -6.49884
|
146 |
+
▁mo -6.50425
|
147 |
+
▁Car -6.51001
|
148 |
+
▁F -6.5113
|
149 |
+
▁bl -6.51221
|
150 |
+
▁dis -6.5152
|
151 |
+
av -6.52911
|
152 |
+
▁la -6.52958
|
153 |
+
king -6.53387
|
154 |
+
ter -6.53917
|
155 |
+
ty -6.54434
|
156 |
+
uth -6.55038
|
157 |
+
dow -6.55423
|
158 |
+
ile -6.5561
|
159 |
+
▁us -6.5635
|
160 |
+
ther -6.58487
|
161 |
+
ack -6.59476
|
162 |
+
▁sh -6.64105
|
163 |
+
ime -6.64708
|
164 |
+
▁ha -6.65268
|
165 |
+
day -6.66877
|
166 |
+
▁we -6.67527
|
167 |
+
▁M -6.68209
|
168 |
+
der -6.68547
|
169 |
+
▁com -6.69811
|
170 |
+
ra -6.70012
|
171 |
+
ri -6.70384
|
172 |
+
▁fa -6.71119
|
173 |
+
ant -6.7162
|
174 |
+
one -6.72907
|
175 |
+
mp -6.7366
|
176 |
+
k -6.74416
|
177 |
+
▁un -6.75502
|
178 |
+
ic -6.75614
|
179 |
+
h -6.7633
|
180 |
+
▁ne -6.77204
|
181 |
+
▁si -6.77776
|
182 |
+
ay -6.78566
|
183 |
+
ru -6.80931
|
184 |
+
▁ca -6.82626
|
185 |
+
ig -6.82691
|
186 |
+
ment -6.85812
|
187 |
+
ho -6.85835
|
188 |
+
ca -6.86862
|
189 |
+
H -6.86869
|
190 |
+
il -6.86978
|
191 |
+
at -6.87155
|
192 |
+
I -6.87438
|
193 |
+
to -6.87499
|
194 |
+
T -6.87566
|
195 |
+
ive -6.87875
|
196 |
+
li -6.88518
|
197 |
+
tche -6.88538
|
198 |
+
ine -6.89179
|
199 |
+
um -6.90013
|
200 |
+
use -6.92237
|
201 |
+
me -6.9249
|
202 |
+
pe -6.92835
|
203 |
+
An -6.93263
|
204 |
+
ut -6.93648
|
205 |
+
arme -6.93797
|
206 |
+
j -6.94511
|
207 |
+
‘ -6.94511
|
208 |
+
▁Uganda -6.94511
|
209 |
+
▁photograph -6.94511
|
210 |
+
▁managed -6.94511
|
211 |
+
▁piano -6.94512
|
212 |
+
▁press -6.94519
|
213 |
+
▁Father -6.94529
|
214 |
+
▁val -6.94561
|
215 |
+
▁spa -6.94591
|
216 |
+
▁dark -6.94593
|
217 |
+
▁winter -6.94611
|
218 |
+
appe -6.94627
|
219 |
+
change -6.94631
|
220 |
+
ious -6.94748
|
221 |
+
▁camp -6.94855
|
222 |
+
ng -6.94956
|
223 |
+
rden -6.94978
|
224 |
+
▁di -6.95159
|
225 |
+
cu -6.95239
|
226 |
+
▁let -6.95528
|
227 |
+
term -6.95542
|
228 |
+
▁sof -6.95617
|
229 |
+
▁", -6.95629
|
230 |
+
ights -6.9572
|
231 |
+
▁some -6.95731
|
232 |
+
place -6.958
|
233 |
+
▁wind -6.96054
|
234 |
+
▁thi -6.96182
|
235 |
+
▁pu -6.96681
|
236 |
+
▁comm -6.96754
|
237 |
+
desc -6.9678
|
238 |
+
▁po -6.96809
|
239 |
+
fe -6.97021
|
240 |
+
ome -6.97134
|
241 |
+
dent -6.97519
|
242 |
+
▁G -6.97692
|
243 |
+
▁L -6.97693
|
244 |
+
▁forg -6.97694
|
245 |
+
▁essay -6.97879
|
246 |
+
▁out -6.97915
|
247 |
+
▁child -6.97955
|
248 |
+
▁per -6.97978
|
249 |
+
▁room -6.98075
|
250 |
+
▁his -6.98661
|
251 |
+
ol -6.98952
|
252 |
+
▁pla -6.99025
|
253 |
+
ances -6.99299
|
254 |
+
ard -6.99428
|
255 |
+
▁dr -7.00139
|
256 |
+
all -7.00655
|
257 |
+
▁H -7.02766
|
258 |
+
▁ar -7.03612
|
259 |
+
▁free -7.04492
|
260 |
+
▁hi -7.04835
|
261 |
+
ts -7.05165
|
262 |
+
ion -7.05786
|
263 |
+
ge -7.06057
|
264 |
+
▁ex -7.06557
|
265 |
+
ve -7.06592
|
266 |
+
ture -7.08079
|
267 |
+
▁gr -7.08256
|
268 |
+
nk -7.12309
|
269 |
+
b -7.12789
|
270 |
+
▁par -7.15886
|
271 |
+
ong -7.17102
|
272 |
+
arm -7.17354
|
273 |
+
▁wit -7.18408
|
274 |
+
▁ge -7.1933
|
275 |
+
▁The -7.19351
|
276 |
+
▁e -7.21379
|
277 |
+
ad -7.21474
|
278 |
+
amil -7.23763
|
279 |
+
si -7.24104
|
280 |
+
ess -7.24262
|
281 |
+
eb -7.25031
|
282 |
+
erve -7.26669
|
283 |
+
▁wh -7.27789
|
284 |
+
▁live -7.28473
|
285 |
+
cho -7.28653
|
286 |
+
▁d -7.29344
|
287 |
+
▁Mo -7.29911
|
288 |
+
te -7.3141
|
289 |
+
tra -7.31663
|
290 |
+
itch -7.31822
|
291 |
+
▁lit -7.3428
|
292 |
+
till -7.35067
|
293 |
+
ind -7.36005
|
294 |
+
men -7.37029
|
295 |
+
p -7.38937
|
296 |
+
c -7.40623
|
297 |
+
▁se -7.42879
|
298 |
+
▁tr -7.43108
|
299 |
+
ation -7.4369
|
300 |
+
mil -7.45181
|
301 |
+
ki -7.45207
|
302 |
+
▁sho -7.47937
|
303 |
+
▁acc -7.48153
|
304 |
+
▁wa -7.48241
|
305 |
+
ti -7.4852
|
306 |
+
ass -7.49028
|
307 |
+
▁st -7.49096
|
308 |
+
se -7.50074
|
309 |
+
▁In -7.53188
|
310 |
+
▁ho -7.54006
|
311 |
+
old -7.54322
|
312 |
+
other -7.56929
|
313 |
+
ors -7.58642
|
314 |
+
are -7.59943
|
315 |
+
mm -7.61377
|
316 |
+
ves -7.63739
|
317 |
+
▁Al -7.63808
|
318 |
+
tin -7.65018
|
319 |
+
eme -7.661
|
320 |
+
ro -7.67242
|
321 |
+
it -7.6764
|
322 |
+
ook -7.68149
|
323 |
+
▁mi -7.70738
|
324 |
+
ct -7.72334
|
325 |
+
ast -7.73492
|
326 |
+
ck -7.73984
|
327 |
+
ure -7.77089
|
328 |
+
us -7.77799
|
329 |
+
▁y -7.78405
|
330 |
+
trac -7.7845
|
331 |
+
id -7.78587
|
332 |
+
ul -7.80499
|
333 |
+
lo -7.8428
|
334 |
+
end -7.8498
|
335 |
+
F -7.85778
|
336 |
+
child -7.86205
|
337 |
+
G -7.86792
|
338 |
+
L -7.86792
|
339 |
+
essay -7.87043
|
340 |
+
room -7.874
|
341 |
+
C -7.87462
|
342 |
+
of -7.87688
|
343 |
+
whi -7.89978
|
344 |
+
iv -7.90342
|
345 |
+
cco -7.90462
|
346 |
+
" -7.90846
|
347 |
+
ent -7.90927
|
348 |
+
up -7.91017
|
349 |
+
lit -7.91248
|
350 |
+
ff -7.91432
|
351 |
+
ma -7.91669
|
352 |
+
ell -7.91905
|
353 |
+
ati -7.92941
|
354 |
+
bo -7.93198
|
355 |
+
bl -7.93333
|
356 |
+
mas -7.93684
|
357 |
+
▁ti -7.9427
|
358 |
+
D -7.94511
|
359 |
+
) -7.94511
|
360 |
+
; -7.94511
|
361 |
+
U -7.94511
|
362 |
+
( -7.94511
|
363 |
+
ugh -7.94528
|
364 |
+
pp -7.95039
|
365 |
+
hoto -7.95116
|
366 |
+
mb -7.95275
|
367 |
+
ivi -7.9545
|
368 |
+
v -7.96254
|
369 |
+
et -7.96412
|
370 |
+
pre -7.96724
|
371 |
+
alm -7.96927
|
372 |
+
amp -7.97751
|
373 |
+
rap -7.98387
|
374 |
+
ark -7.98456
|
375 |
+
ough -7.98494
|
376 |
+
bi -7.98855
|
377 |
+
ug -8.00002
|
378 |
+
lar -8.00274
|
379 |
+
ear -8.02483
|
380 |
+
▁re -8.02483
|
381 |
+
ight -8.02664
|
382 |
+
side -8.04561
|
383 |
+
sel -8.05147
|
384 |
+
▁pi -8.05401
|
385 |
+
rand -8.05666
|
386 |
+
our -8.06317
|
387 |
+
mer -8.07982
|
388 |
+
iss -8.08135
|
389 |
+
oth -8.08572
|
390 |
+
ot -8.08918
|
391 |
+
ga -8.09034
|
392 |
+
ree -8.09464
|
393 |
+
is -8.10217
|
394 |
+
cen -8.10548
|
395 |
+
▁so -8.10812
|
396 |
+
ian -8.11603
|
397 |
+
nter -8.12159
|
398 |
+
lve -8.12475
|
399 |
+
no -8.13398
|
400 |
+
▁ro -8.13407
|
401 |
+
ite -8.13992
|
402 |
+
ur -8.15648
|
403 |
+
eed -8.15933
|
404 |
+
ous -8.16485
|
405 |
+
ath -8.17035
|
406 |
+
int -8.17256
|
407 |
+
ana -8.2029
|
408 |
+
sce -8.22229
|
409 |
+
ir -8.22503
|
410 |
+
▁sa -8.26216
|
411 |
+
nti -8.28612
|
412 |
+
tan -8.28859
|
413 |
+
▁ac -8.30665
|
414 |
+
oo -8.36908
|
415 |
+
ides -8.37012
|
416 |
+
ove -8.41417
|
417 |
+
▁sta -8.43306
|
418 |
+
stra -8.48403
|
419 |
+
ons -8.4944
|
420 |
+
▁wil -8.56631
|
421 |
+
ni -8.60037
|
422 |
+
lle -8.63056
|
423 |
+
und -8.64127
|
424 |
+
lay -8.67965
|
425 |
+
▁bro -8.70233
|
426 |
+
hat -8.72064
|
427 |
+
ame -8.72281
|
428 |
+
comm -8.75146
|
429 |
+
▁cam -8.75814
|
430 |
+
▁cont -8.77115
|
431 |
+
str -8.87536
|
432 |
+
unt -8.89135
|
433 |
+
▁r -8.94977
|
434 |
+
am -8.9529
|
435 |
+
oi -8.95627
|
436 |
+
▁le -8.96696
|
437 |
+
su -8.96796
|
438 |
+
▁pa -9.01054
|
439 |
+
hou -9.06986
|
440 |
+
oda -9.07227
|
441 |
+
mo -9.08273
|
442 |
+
act -9.08564
|
443 |
+
▁Sh -9.09262
|
444 |
+
mili -9.1038
|
445 |
+
▁br -9.14123
|
446 |
+
▁su -9.22683
|
447 |
+
rs -9.27548
|
448 |
+
Z -9.28681
|
449 |
+
W -9.28691
|
450 |
+
V -9.28701
|
451 |
+
M -9.28711
|
452 |
+
x -9.28721
|
453 |
+
S -9.28731
|
454 |
+
ange -9.28731
|