sayakpaul's picture
sayakpaul HF staff
Upload folder using huggingface_hub
127f0be verified
{
"!": 2,
"!</w>": 345,
"\"": 3,
"\"</w>": 344,
"#": 4,
"#</w>": 325,
"$": 5,
"$</w>": 348,
"%": 6,
"%</w>": 351,
"&": 7,
"&</w>": 352,
"'": 8,
"'</w>": 296,
"(": 9,
"(</w>": 318,
")": 10,
")</w>": 330,
"*": 11,
"*</w>": 327,
"+": 12,
"+</w>": 341,
",": 13,
",</w>": 279,
",@</w>": 754,
"-": 14,
"-</w>": 276,
"-@</w>": 439,
".": 15,
".</w>": 253,
".@</w>": 695,
"/": 16,
"/</w>": 350,
"0": 17,
"00</w>": 647,
"0</w>": 216,
"1": 18,
"1</w>": 222,
"2": 19,
"2</w>": 231,
"3": 20,
"3</w>": 243,
"4": 21,
"4</w>": 233,
"5": 22,
"5</w>": 240,
"6": 23,
"6</w>": 226,
"7": 24,
"7</w>": 215,
"8": 25,
"8</w>": 236,
"9": 26,
"9</w>": 242,
":": 27,
":</w>": 353,
";": 28,
";</w>": 317,
"<": 29,
"<</w>": 340,
"<|endoftext|>": 1,
"<|startoftext|>": 0,
"=": 30,
"=</w>": 342,
">": 31,
"></w>": 300,
"?": 32,
"?</w>": 346,
"@": 33,
"@</w>": 320,
"A": 34,
"A</w>": 227,
"B": 35,
"B</w>": 258,
"C": 36,
"C</w>": 239,
"D": 37,
"D</w>": 255,
"E": 38,
"E</w>": 246,
"F": 39,
"F</w>": 213,
"G": 40,
"G</w>": 283,
"H": 41,
"H</w>": 219,
"I": 42,
"I</w>": 237,
"J": 43,
"J</w>": 251,
"K": 44,
"K</w>": 254,
"L": 45,
"L</w>": 218,
"M": 46,
"M</w>": 234,
"N": 47,
"N</w>": 238,
"O": 48,
"O</w>": 265,
"P": 49,
"P</w>": 245,
"Q": 50,
"Q</w>": 309,
"R": 51,
"R</w>": 264,
"S": 52,
"S</w>": 230,
"T": 53,
"T</w>": 235,
"U": 54,
"U</w>": 268,
"V": 55,
"V</w>": 248,
"W": 56,
"W</w>": 274,
"X": 57,
"X</w>": 263,
"Y": 58,
"Y</w>": 310,
"Z": 59,
"Z</w>": 207,
"[": 60,
"[</w>": 270,
"\\": 61,
"\\</w>": 338,
"]": 62,
"]</w>": 289,
"^": 63,
"^</w>": 331,
"_": 64,
"_</w>": 334,
"`": 65,
"`</w>": 347,
"a": 66,
"a</w>": 197,
"ab": 555,
"able</w>": 820,
"ac": 420,
"ace</w>": 806,
"ach": 791,
"ach</w>": 885,
"ack</w>": 670,
"act": 929,
"ad": 508,
"ad</w>": 860,
"ade</w>": 771,
"ag": 511,
"age</w>": 710,
"ain": 568,
"ain</w>": 675,
"ak": 577,
"ake</w>": 882,
"al": 397,
"al</w>": 405,
"all": 664,
"all</w>": 658,
"ally</w>": 588,
"als</w>": 796,
"am": 426,
"am</w>": 817,
"ame</w>": 552,
"ames</w>": 976,
"amp": 800,
"an": 384,
"an</w>": 425,
"ance</w>": 751,
"and": 609,
"and</w>": 780,
"ang": 816,
"ans</w>": 844,
"ant": 837,
"ant</w>": 753,
"any</w>": 766,
"ap": 586,
"ar": 376,
"ar</w>": 579,
"ard</w>": 649,
"ards</w>": 982,
"ary</w>": 611,
"as": 416,
"as</w>": 404,
"ase</w>": 849,
"ased</w>": 814,
"ason</w>": 865,
"ass": 792,
"ast</w>": 661,
"at": 372,
"at</w>": 434,
"ate</w>": 541,
"ated</w>": 543,
"ater</w>": 709,
"ates</w>": 825,
"ath": 730,
"ating</w>": 922,
"ation</w>": 497,
"ational</w>": 933,
"ations</w>": 744,
"att": 903,
"aus": 858,
"av": 681,
"ay": 684,
"ay</w>": 523,
"b": 67,
"b</w>": 212,
"ber</w>": 593,
"c": 68,
"c</w>": 224,
"cc": 960,
"ce</w>": 496,
"ces</w>": 830,
"ch": 520,
"ch</w>": 603,
"ct": 834,
"d": 69,
"d</w>": 196,
"ded</w>": 665,
"der</w>": 690,
"ding</w>": 633,
"ds</w>": 530,
"duc": 671,
"e": 70,
"e</w>": 195,
"ea": 471,
"ear": 596,
"ear</w>": 669,
"ears</w>": 906,
"eb": 852,
"ec": 418,
"ect": 838,
"ect</w>": 964,
"ed": 563,
"ed</w>": 362,
"ee": 941,
"een</w>": 779,
"ef": 840,
"eg": 731,
"el": 407,
"el</w>": 610,
"eld</w>": 973,
"ell": 759,
"ell</w>": 756,
"ely</w>": 719,
"em": 455,
"ember</w>": 777,
"ement</w>": 959,
"emp": 975,
"en": 375,
"en</w>": 427,
"ence</w>": 685,
"ens": 880,
"ent": 478,
"ent</w>": 468,
"ents</w>": 674,
"ep": 545,
"er": 364,
"er</w>": 374,
"eral</w>": 793,
"ere</w>": 481,
"ered</w>": 748,
"eric": 921,
"erm": 861,
"ern": 887,
"ern</w>": 977,
"ers": 598,
"ers</w>": 486,
"ert": 986,
"ery</w>": 805,
"es": 402,
"es</w>": 388,
"ese</w>": 794,
"ess": 678,
"ess</w>": 693,
"est": 606,
"est</w>": 584,
"et": 460,
"et</w>": 594,
"etw": 824,
"etween</w>": 886,
"ev": 493,
"evel": 980,
"ever</w>": 855,
"ew": 687,
"ew</w>": 612,
"ex": 938,
"ey</w>": 713,
"f": 71,
"f</w>": 209,
"fer": 911,
"ff": 587,
"for": 728,
"form": 901,
"fter</w>": 634,
"g": 72,
"g</w>": 214,
"ge</w>": 592,
"h": 73,
"h</w>": 203,
"i": 74,
"i</w>": 205,
"ia</w>": 605,
"ial</w>": 672,
"ian</w>": 638,
"ib": 726,
"ic": 395,
"ic</w>": 510,
"ical</w>": 625,
"ice</w>": 782,
"ich</w>": 561,
"ics</w>": 996,
"id": 463,
"id</w>": 613,
"ide</w>": 739,
"ie</w>": 974,
"ied</w>": 812,
"ies</w>": 516,
"if": 524,
"ig": 444,
"igh": 537,
"ight</w>": 680,
"ik": 775,
"ike</w>": 984,
"il": 406,
"il</w>": 714,
"ile</w>": 721,
"ill": 608,
"ill</w>": 789,
"ily</w>": 950,
"im": 469,
"im</w>": 767,
"ime</w>": 691,
"in": 358,
"in</w>": 501,
"ine</w>": 607,
"ing": 557,
"ing</w>": 383,
"ings</w>": 815,
"ion": 472,
"ion</w>": 408,
"ional</w>": 717,
"ions</w>": 540,
"ip": 733,
"ip</w>": 818,
"ir": 453,
"ir</w>": 554,
"is": 393,
"is</w>": 441,
"ish": 694,
"ish</w>": 654,
"ished</w>": 942,
"ision</w>": 944,
"iss": 876,
"ist": 550,
"ist</w>": 811,
"it": 378,
"it</w>": 746,
"ite</w>": 760,
"ited</w>": 809,
"ition</w>": 797,
"ity</w>": 542,
"iv": 435,
"ive</w>": 549,
"ived</w>": 979,
"iz": 722,
"j": 75,
"j</w>": 288,
"k": 76,
"k</w>": 210,
"ked</w>": 810,
"king</w>": 924,
"ks</w>": 692,
"l": 77,
"l</w>": 201,
"la": 467,
"land</w>": 743,
"ld</w>": 559,
"le": 536,
"le</w>": 465,
"les</w>": 799,
"lud": 718,
"ly</w>": 433,
"m": 78,
"m</w>": 202,
"ment</w>": 701,
"mp": 651,
"n": 79,
"n</w>": 199,
"nd</w>": 369,
"ned</w>": 758,
"ning</w>": 843,
"o": 80,
"o</w>": 198,
"ob": 920,
"oc": 534,
"od": 575,
"og": 604,
"oh": 972,
"oin": 831,
"ol": 428,
"oll": 703,
"ollow": 928,
"olog": 932,
"om": 419,
"om</w>": 883,
"ome</w>": 663,
"on": 382,
"on</w>": 390,
"ond</w>": 872,
"one</w>": 835,
"ong": 850,
"ong</w>": 582,
"oo": 517,
"ood</w>": 927,
"ook</w>": 897,
"op": 531,
"op</w>": 971,
"or": 377,
"or</w>": 424,
"ore</w>": 571,
"ors</w>": 917,
"ort": 768,
"ort</w>": 752,
"ory</w>": 737,
"os": 447,
"ose</w>": 881,
"ost</w>": 646,
"ot": 600,
"ot</w>": 879,
"ou": 392,
"oug": 659,
"ough</w>": 798,
"ould</w>": 640,
"oun": 553,
"ound</w>": 961,
"our": 648,
"our</w>": 772,
"ous</w>": 712,
"out</w>": 683,
"outh</w>": 945,
"ov": 515,
"ow": 461,
"ow</w>": 666,
"own</w>": 657,
"oy": 952,
"p": 81,
"p</w>": 217,
"per": 715,
"ph": 916,
"pp": 518,
"q": 82,
"q</w>": 280,
"qu": 546,
"r": 83,
"r</w>": 204,
"ra": 457,
"ran": 624,
"re": 367,
"ree</w>": 765,
"ren": 790,
"res": 572,
"res</w>": 747,
"ri": 487,
"rib": 804,
"ric": 745,
"rit": 589,
"ro": 385,
"rom</w>": 498,
"rop": 826,
"roug": 803,
"ru": 951,
"ruc": 891,
"ry</w>": 908,
"s": 84,
"s</w>": 206,
"se</w>": 741,
"sh": 795,
"so</w>": 630,
"sp": 992,
"ss": 673,
"st": 519,
"st</w>": 528,
"t": 85,
"t</w>": 208,
"te</w>": 954,
"ted</w>": 489,
"ter": 535,
"ter</w>": 505,
"th": 449,
"th</w>": 488,
"ther</w>": 576,
"ting</w>": 676,
"tion</w>": 570,
"tr": 619,
"ts</w>": 436,
"tt": 720,
"tur": 953,
"ty</w>": 821,
"u": 86,
"u</w>": 229,
"ub": 591,
"ubl": 842,
"uc": 490,
"ud": 538,
"ue</w>": 652,
"ug": 560,
"uil": 930,
"ul": 494,
"um": 532,
"um</w>": 644,
"un": 448,
"und</w>": 828,
"up": 833,
"up</w>": 700,
"ur": 413,
"ure</w>": 635,
"uring</w>": 702,
"ury</w>": 957,
"us": 438,
"us</w>": 622,
"ust</w>": 846,
"ut": 529,
"ut</w>": 527,
"v": 87,
"v</w>": 232,
"ve</w>": 567,
"vi": 866,
"w": 88,
"w</w>": 250,
"way</w>": 970,
"wn</w>": 999,
"x": 89,
"x</w>": 269,
"y": 90,
"y</w>": 211,
"yp": 993,
"z": 91,
"z</w>": 228,
"|": 92,
"|</w>": 304,
"}": 93,
"}</w>": 336,
"~": 94,
"~</w>": 343,
"¡": 95,
"¡</w>": 220,
"¢": 96,
"¢</w>": 306,
"£": 97,
"£</w>": 323,
"¤": 98,
"¤</w>": 292,
"¥": 99,
"¥</w>": 339,
"¦": 100,
"¦</w>": 303,
"§": 101,
"§</w>": 275,
"¨": 102,
"¨</w>": 282,
"©": 103,
"©</w>": 259,
"ª": 104,
"ª</w>": 286,
"«": 105,
"«</w>": 266,
"¬": 106,
"¬</w>": 319,
"®": 107,
"®</w>": 329,
"¯": 108,
"¯</w>": 287,
"°": 109,
"°</w>": 298,
"±": 110,
"±</w>": 200,
"²": 111,
"²</w>": 284,
"³": 112,
"³</w>": 272,
"´": 113,
"´</w>": 307,
"µ": 114,
"µ</w>": 261,
"¶": 115,
"¶</w>": 301,
"·": 116,
"·</w>": 326,
"¸": 117,
"¸</w>": 257,
"¹": 118,
"¹</w>": 241,
"º": 119,
"º</w>": 260,
"»": 120,
"»</w>": 247,
"¼": 121,
"¼</w>": 305,
"½": 122,
"½</w>": 294,
"¾": 123,
"¾</w>": 316,
"¿": 124,
"¿</w>": 271,
"Â": 125,
"Ã": 126,
"Ä": 127,
"Å": 128,
"Æ": 129,
"Ç": 130,
"È": 131,
"É": 132,
"Ê": 133,
"Ë": 134,
"Ì": 135,
"Í": 136,
"Î": 137,
"Ï": 138,
"Ð": 139,
"Ñ": 140,
"Ö": 141,
"×": 142,
"Ø": 143,
"Ù": 144,
"Ü": 145,
"à": 146,
"á": 147,
"â": 148,
"ã": 149,
"ä": 150,
"å": 151,
"æ": 152,
"ç": 153,
"è": 154,
"é": 155,
"ë": 156,
"ì": 157,
"ï": 158,
"Ċ": 159,
"Ċ</w>": 349,
"Ġ": 160,
"Ġ\"</w>": 401,
"Ġ'</w>": 431,
"Ġ(</w>": 475,
"Ġ)</w>": 474,
"Ġ,</w>": 360,
"Ġ.</w>": 365,
"Ġ0": 847,
"Ġ1": 411,
"Ġ18": 769,
"Ġ19": 492,
"Ġ199": 893,
"Ġ1</w>": 778,
"Ġ2": 462,
"Ġ20": 522,
"Ġ200": 620,
"Ġ201": 734,
"Ġ2</w>": 813,
"Ġ3": 735,
"Ġ3</w>": 888,
"Ġ4": 870,
"Ġ5": 907,
"Ġ5</w>": 990,
"Ġ:</w>": 637,
"Ġ;</w>": 615,
"Ġ</w>": 333,
"Ġ=</w>": 399,
"Ġ@": 417,
"Ġ@,@</w>": 755,
"Ġ@-@</w>": 440,
"Ġ@.@</w>": 696,
"ĠA": 409,
"ĠA</w>": 807,
"ĠAl": 716,
"ĠAm": 829,
"ĠAmeric": 958,
"ĠAn": 784,
"ĠAr": 894,
"ĠB": 432,
"ĠC": 410,
"ĠCh": 581,
"ĠCom": 904,
"ĠD": 464,
"ĠE": 500,
"ĠEn": 878,
"ĠF": 470,
"ĠG": 482,
"ĠH": 445,
"ĠHe</w>": 742,
"ĠI": 442,
"ĠI</w>": 827,
"ĠIn": 704,
"ĠIn</w>": 574,
"ĠIt</w>": 774,
"ĠJ": 491,
"ĠK": 548,
"ĠL": 484,
"ĠM": 423,
"ĠMar": 776,
"ĠN": 483,
"ĠO": 504,
"ĠP": 450,
"ĠPar": 967,
"ĠR": 459,
"ĠS": 403,
"ĠSh": 750,
"ĠSt": 590,
"ĠT": 396,
"ĠTh": 414,
"ĠThe</w>": 437,
"ĠThis</w>": 997,
"ĠU": 585,
"ĠUn": 773,
"ĠV": 617,
"ĠW": 479,
"ĠWh": 853,
"ĠY": 757,
"Ġa": 356,
"Ġa</w>": 394,
"Ġab": 653,
"Ġabout</w>": 899,
"Ġac": 583,
"Ġacc": 874,
"Ġad": 656,
"Ġafter</w>": 763,
"Ġag": 725,
"Ġal": 476,
"Ġalb": 991,
"Ġall</w>": 839,
"Ġalso</w>": 641,
"Ġan": 602,
"Ġan</w>": 562,
"Ġand</w>": 381,
"Ġapp": 711,
"Ġar": 507,
"Ġare</w>": 601,
"Ġas</w>": 454,
"Ġass": 947,
"Ġat</w>": 514,
"Ġatt": 788,
"Ġb": 371,
"Ġbe": 499,
"Ġbe</w>": 595,
"Ġbec": 706,
"Ġbeen</w>": 686,
"Ġbeg": 915,
"Ġbetween</w>": 914,
"Ġbo": 819,
"Ġbut</w>": 623,
"Ġby</w>": 473,
"Ġc": 368,
"Ġcent": 823,
"Ġch": 526,
"Ġchar": 822,
"Ġcl": 689,
"Ġcom": 509,
"Ġcomm": 707,
"Ġcomp": 616,
"Ġcon": 477,
"Ġcons": 841,
"Ġcont": 655,
"Ġcre": 931,
"Ġd": 387,
"Ġde": 627,
"Ġdec": 873,
"Ġdef": 965,
"Ġdes": 738,
"Ġdi": 892,
"Ġdis": 708,
"Ġduring</w>": 864,
"Ġe": 421,
"Ġear": 854,
"Ġel": 869,
"Ġen": 662,
"Ġev": 682,
"Ġex": 539,
"Ġexp": 867,
"Ġf": 370,
"Ġfe": 845,
"Ġfil": 913,
"Ġfin": 786,
"Ġfir": 599,
"Ġfirst</w>": 626,
"Ġfl": 877,
"Ġfor": 614,
"Ġfor</w>": 458,
"Ġform": 963,
"Ġfrom</w>": 503,
"Ġg": 430,
"Ġgame</w>": 895,
"Ġgen": 985,
"Ġgro": 890,
"Ġh": 380,
"Ġha": 485,
"Ġhad</w>": 566,
"Ġhas</w>": 679,
"Ġhave</w>": 667,
"Ġhe</w>": 558,
"Ġher</w>": 660,
"Ġhim</w>": 896,
"Ġhis</w>": 512,
"Ġi": 366,
"Ġimp": 909,
"Ġin": 429,
"Ġin</w>": 389,
"Ġinc": 618,
"Ġinclud": 761,
"Ġind": 983,
"Ġint": 628,
"Ġinter": 832,
"Ġinto</w>": 785,
"Ġis</w>": 480,
"Ġit</w>": 533,
"Ġits</w>": 642,
"Ġj": 723,
"Ġk": 564,
"Ġkn": 857,
"Ġl": 398,
"Ġlar": 962,
"Ġlater</w>": 936,
"Ġlea": 868,
"Ġm": 386,
"Ġmade</w>": 949,
"Ġme": 968,
"Ġmore</w>": 802,
"Ġmost</w>": 910,
"Ġmov": 956,
"Ġmus": 966,
"Ġn": 415,
"Ġnew</w>": 989,
"Ġno": 547,
"Ġnor": 978,
"Ġnot</w>": 632,
"Ġnum": 926,
"Ġo": 359,
"Ġof</w>": 373,
"Ġoff": 875,
"Ġon": 551,
"Ġon</w>": 456,
"Ġone</w>": 677,
"Ġonly</w>": 871,
"Ġor": 699,
"Ġor</w>": 645,
"Ġother</w>": 787,
"Ġout</w>": 925,
"Ġov": 729,
"Ġover</w>": 856,
"Ġp": 379,
"Ġpar": 636,
"Ġper": 736,
"Ġpl": 698,
"Ġpla": 697,
"Ġplay": 808,
"Ġpos": 859,
"Ġpr": 889,
"Ġpre": 749,
"Ġpres": 912,
"Ġpro": 506,
"Ġproduc": 934,
"Ġqu": 955,
"Ġr": 521,
"Ġra": 863,
"Ġre": 400,
"Ġrec": 597,
"Ġrecor": 919,
"Ġreg": 943,
"Ġrel": 900,
"Ġrele": 946,
"Ġrem": 848,
"Ġrep": 762,
"Ġres": 650,
"Ġro": 629,
"Ġs": 361,
"Ġsa": 905,
"Ġsc": 732,
"Ġse": 569,
"Ġseason</w>": 948,
"Ġsec": 781,
"Ġser": 740,
"Ġsev": 884,
"Ġsh": 513,
"Ġshe</w>": 862,
"Ġsp": 578,
"Ġspec": 940,
"Ġst": 446,
"Ġstar": 939,
"Ġsu": 770,
"Ġsub": 969,
"Ġsuc": 764,
"Ġsuch</w>": 981,
"Ġt": 354,
"Ġth": 355,
"Ġthan</w>": 918,
"Ġthat</w>": 452,
"Ġthe": 502,
"Ġthe</w>": 357,
"Ġtheir</w>": 621,
"Ġthem</w>": 998,
"Ġthey</w>": 727,
"Ġthis</w>": 705,
"Ġthree</w>": 902,
"Ġthroug": 923,
"Ġtime</w>": 783,
"Ġto</w>": 391,
"Ġtra": 836,
"Ġtw": 639,
"Ġtwo</w>": 688,
"Ġun": 544,
"Ġup</w>": 898,
"Ġus": 668,
"Ġused</w>": 988,
"Ġv": 495,
"Ġw": 363,
"Ġwas</w>": 422,
"Ġwere</w>": 525,
"Ġwh": 443,
"Ġwhen</w>": 851,
"Ġwhere</w>": 995,
"Ġwhich</w>": 573,
"Ġwhile</w>": 935,
"Ġwho</w>": 724,
"Ġwit": 451,
"Ġwith": 994,
"Ġwith</w>": 466,
"Ġwor": 643,
"Ġwould</w>": 801,
"Ġwrit": 937,
"Ġy": 580,
"Ġyear</w>": 987,
"Ġâ": 556,
"ĠâĢ": 565,
"ĠâĢĵ</w>": 631,
"ĠĊ</w>": 412,
"Ģ": 161,
"Ģ</w>": 223,
"ģ": 162,
"ģ</w>": 273,
"Ĥ": 163,
"Ĥ</w>": 262,
"ĥ": 164,
"ĥ</w>": 337,
"Ħ": 165,
"Ħ</w>": 278,
"ħ": 166,
"ħ</w>": 281,
"Ĩ": 167,
"Ĩ</w>": 308,
"ĩ": 168,
"ĩ</w>": 225,
"Ī": 169,
"Ī</w>": 221,
"ī": 170,
"ī</w>": 244,
"Ĭ": 171,
"Ĭ</w>": 315,
"ĭ": 172,
"ĭ</w>": 321,
"Į": 173,
"Į</w>": 324,
"į": 174,
"į</w>": 302,
"İ": 175,
"İ</w>": 249,
"ı": 176,
"ı</w>": 332,
"IJ": 177,
"IJ</w>": 295,
"ij": 178,
"ij</w>": 313,
"Ĵ": 179,
"Ĵ</w>": 328,
"ĵ": 180,
"ĵ</w>": 312,
"Ķ": 181,
"Ķ</w>": 256,
"ķ": 182,
"ķ</w>": 314,
"ĸ": 183,
"ĸ</w>": 277,
"Ĺ": 184,
"Ĺ</w>": 322,
"ĺ": 185,
"ĺ</w>": 285,
"Ļ": 186,
"Ļ</w>": 267,
"ļ": 187,
"ļ</w>": 290,
"Ľ": 188,
"Ľ</w>": 311,
"ľ": 189,
"ľ</w>": 299,
"Ŀ": 190,
"Ŀ</w>": 291,
"ŀ": 191,
"ŀ</w>": 293,
"Ł": 192,
"Ł</w>": 335,
"ł": 193,
"ł</w>": 252,
"Ń": 194,
"Ń</w>": 297
}