goldfish-models
commited on
Commit
•
5e2e23c
1
Parent(s):
a9507f4
Upload prs_arab_5mb tokenizer.
Browse files- added_tokens.json +1 -0
- special_tokens_map.json +1 -0
- spiece.model +3 -0
- tokenizer.json +0 -0
- tokenizer_config.json +1 -0
added_tokens.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"[XXXXX241]": 24216, "[XXXXX558]": 24533, "[XXXXX412]": 24387, "[XXXXX468]": 24443, "[XXXXX230]": 24205, "[XXXXX172]": 24147, "[XXXXX46]": 24021, "[XXXXX247]": 24222, "[XXXXX22]": 23997, "[XXXXX309]": 24284, "[XXXXX351]": 24326, "[XXXXX18]": 23993, "[XXXXX232]": 24207, "[XXXXX311]": 24286, "[XXXXX256]": 24231, "[XXXXX250]": 24225, "[XXXXX389]": 24364, "[XXXXX211]": 24186, "[XXXXX179]": 24154, "[XXXXX285]": 24260, "[XXXXX40]": 24015, "[XXXXX395]": 24370, "[XXXXX456]": 24431, "[XXXXX367]": 24342, "[XXXXX589]": 24564, "[XXXXX488]": 24463, "[XXXXX382]": 24357, "[XXXXX328]": 24303, "[XXXXX316]": 24291, "[XXXXX535]": 24510, "[XXXXX90]": 24065, "[XXXXX410]": 24385, "[XXXXX60]": 24035, "[XXXXX431]": 24406, "[XXXXX425]": 24400, "[XXXXX185]": 24160, "[XXXXX258]": 24233, "[XXXXX13]": 23988, "[XXXXX281]": 24256, "[XXXXX360]": 24335, "[XXXXX76]": 24051, "[XXXXX592]": 24567, "[XXXXX387]": 24362, "[XXXXX279]": 24254, "[XXXXX113]": 24088, "[XXXXX428]": 24403, "[XXXXX79]": 24054, "[XXXXX416]": 24391, "[XXXXX509]": 24484, "[XXXXX106]": 24081, "[XXXXX219]": 24194, "[XXXXX42]": 24017, "[XXXXX500]": 24475, "[XXXXX227]": 24202, "[XXXXX449]": 24424, "[XXXXX507]": 24482, "[XXXXX397]": 24372, "[XXXXX204]": 24179, "[XXXXX261]": 24236, "[XXXXX600]": 24575, "[XXXXX220]": 24195, "[XXXXX278]": 24253, "[XXXXX167]": 24142, "[XXXXX30]": 24005, "[XXXXX530]": 24505, "[XXXXX19]": 23994, "[XXXXX234]": 24209, "[XXXXX417]": 24392, "[XXXXX305]": 24280, "[XXXXX568]": 24543, "[XXXXX14]": 23989, "[XXXXX545]": 24520, "[XXXXX582]": 24557, "[XXXXX210]": 24185, "[XXXXX548]": 24523, "[XXXXX155]": 24130, "[XXXXX458]": 24433, "[XXXXX56]": 24031, "[XXXXX439]": 24414, "[XXXXX334]": 24309, "[XXXXX561]": 24536, "[XXXXX409]": 24384, "[XXXXX35]": 24010, "[XXXXX554]": 24529, "[XXXXX587]": 24562, "[XXXXX225]": 24200, "[XXXXX444]": 24419, "[XXXXX532]": 24507, "[XXXXX555]": 24530, "[XXXXX498]": 24473, "[XXXXX271]": 24246, "[XXXXX576]": 24551, "[XXXXX275]": 24250, "[XXXXX392]": 24367, "[XXXXX424]": 24399, "[XXXXX501]": 24476, "[XXXXX100]": 24075, "[XXXXX528]": 24503, "[XXXXX71]": 24046, "[XXXXX550]": 24525, "[XXXXX114]": 24089, "[XXXXX515]": 24490, "[XXXXX324]": 24299, "[XXXXX312]": 24287, "[XXXXX118]": 24093, "[XXXXX380]": 24355, "[XXXXX364]": 24339, "[XXXXX17]": 23992, "[XXXXX446]": 24421, "[XXXXX236]": 24211, "[XXXXX11]": 23986, "[XXXXX531]": 24506, "[XXXXX131]": 24106, "[XXXXX484]": 24459, "[XXXXX508]": 24483, "[XXXXX570]": 24545, "[XXXXX307]": 24282, "[XXXXX361]": 24336, "[XXXXX7]": 23982, "[XXXXX0]": 23975, "[XXXXX124]": 24099, "[XXXXX341]": 24316, "[XXXXX27]": 24002, "[XXXXX125]": 24100, "[XXXXX84]": 24059, "[XXXXX267]": 24242, "[XXXXX221]": 24196, "[XXXXX145]": 24120, "[XXXXX466]": 24441, "[XXXXX562]": 24537, "[XXXXX207]": 24182, "[XXXXX450]": 24425, "[XXXXX269]": 24244, "[XXXXX371]": 24346, "[XXXXX583]": 24558, "[XXXXX77]": 24052, "[XXXXX470]": 24445, "[XXXXX263]": 24238, "[XXXXX516]": 24491, "[XXXXX482]": 24457, "[XXXXX504]": 24479, "[XXXXX537]": 24512, "[XXXXX248]": 24223, "[XXXXX598]": 24573, "[XXXXX284]": 24259, "[XXXXX514]": 24489, "[XXXXX122]": 24097, "[XXXXX178]": 24153, "[XXXXX442]": 24417, "[XXXXX209]": 24184, "[XXXXX193]": 24168, "[XXXXX573]": 24548, "[XXXXX419]": 24394, "[XXXXX163]": 24138, "[XXXXX543]": 24518, "[XXXXX233]": 24208, "[XXXXX486]": 24461, "[XXXXX346]": 24321, "[XXXXX111]": 24086, "[XXXXX126]": 24101, "[XXXXX34]": 24009, "[XXXXX325]": 24300, "[XXXXX384]": 24359, "[XXXXX110]": 24085, "[XXXXX479]": 24454, "[XXXXX359]": 24334, "[XXXXX66]": 24041, "[XXXXX381]": 24356, "[XXXXX98]": 24073, "[XXXXX563]": 24538, "[XXXXX317]": 24292, "[XXXXX138]": 24113, "[XXXXX154]": 24129, "[XXXXX567]": 24542, "[XXXXX577]": 24552, "[XXXXX391]": 24366, "[XXXXX429]": 24404, "[XXXXX217]": 24192, "[XXXXX299]": 24274, "[XXXXX345]": 24320, "[XXXXX83]": 24058, "[XXXXX464]": 24439, "[XXXXX38]": 24013, "[XXXXX521]": 24496, "[XXXXX16]": 23991, "[XXXXX411]": 24386, "[XXXXX159]": 24134, "[XXXXX469]": 24444, "[XXXXX476]": 24451, "[XXXXX103]": 24078, "[XXXXX575]": 24550, "[XXXXX569]": 24544, "[XXXXX184]": 24159, "[XXXXX245]": 24220, "[XXXXX335]": 24310, "[XXXXX169]": 24144, "[XXXXX82]": 24057, "[XXXXX129]": 24104, "[XXXXX408]": 24383, "[XXXXX467]": 24442, "[XXXXX593]": 24568, "[XXXXX222]": 24197, "[XXXXX276]": 24251, "[XXXXX536]": 24511, "[XXXXX69]": 24044, "[XXXXX165]": 24140, "[XXXXX315]": 24290, "[XXXXX26]": 24001, "[XXXXX327]": 24302, "[XXXXX452]": 24427, "[XXXXX427]": 24402, "[XXXXX89]": 24064, "[XXXXX323]": 24298, "[XXXXX101]": 24076, "[XXXXX374]": 24349, "[XXXXX293]": 24268, "[XXXXX68]": 24043, "[XXXXX413]": 24388, "[XXXXX295]": 24270, "[XXXXX434]": 24409, "[XXXXX119]": 24094, "[XXXXX180]": 24155, "[XXXXX44]": 24019, "[XXXXX108]": 24083, "[XXXXX472]": 24447, "[XXXXX45]": 24020, "[XXXXX480]": 24455, "[XXXXX465]": 24440, "[XXXXX406]": 24381, "[XXXXX195]": 24170, "[XXXXX29]": 24004, "[XXXXX105]": 24080, "[XXXXX383]": 24358, "[XXXXX369]": 24344, "[XXXXX205]": 24180, "[XXXXX556]": 24531, "[XXXXX404]": 24379, "[XXXXX198]": 24173, "[XXXXX152]": 24127, "[XXXXX529]": 24504, "[XXXXX74]": 24049, "[XXXXX394]": 24369, "[XXXXX186]": 24161, "[XXXXX97]": 24072, "[XXXXX460]": 24435, "[XXXXX120]": 24095, "[XXXXX591]": 24566, "[XXXXX144]": 24119, "[XXXXX175]": 24150, "[XXXXX194]": 24169, "[XXXXX116]": 24091, "[XXXXX426]": 24401, "[XXXXX115]": 24090, "[XXXXX173]": 24148, "[XXXXX237]": 24212, "[XXXXX148]": 24123, "[XXXXX438]": 24413, "[XXXXX338]": 24313, "[XXXXX255]": 24230, "[XXXXX513]": 24488, "[XXXXX523]": 24498, "[XXXXX308]": 24283, "[XXXXX362]": 24337, "[XXXXX191]": 24166, "[XXXXX379]": 24354, "[XXXXX134]": 24109, "[XXXXX252]": 24227, "[XXXXX189]": 24164, "[XXXXX58]": 24033, "[XXXXX336]": 24311, "[XXXXX390]": 24365, "[XXXXX547]": 24522, "[XXXXX560]": 24535, "[XXXXX405]": 24380, "[XXXXX356]": 24331, "[XXXXX41]": 24016, "[XXXXX376]": 24351, "[XXXXX332]": 24307, "[XXXXX136]": 24111, "[XXXXX70]": 24045, "[XXXXX78]": 24053, "[XXXXX306]": 24281, "[XXXXX352]": 24327, "[XXXXX580]": 24555, "[XXXXX187]": 24162, "[XXXXX368]": 24343, "[XXXXX244]": 24219, "[XXXXX377]": 24352, "[XXXXX297]": 24272, "[XXXXX454]": 24429, "[XXXXX303]": 24278, "[XXXXX333]": 24308, "[XXXXX239]": 24214, "[XXXXX170]": 24145, "[XXXXX177]": 24152, "[XXXXX143]": 24118, "[XXXXX518]": 24493, "[XXXXX212]": 24187, "[XXXXX590]": 24565, "[XXXXX403]": 24378, "[XXXXX53]": 24028, "[XXXXX28]": 24003, "[XXXXX96]": 24071, "[XXXXX477]": 24452, "[XXXXX414]": 24389, "[XXXXX72]": 24047, "[XXXXX357]": 24332, "[XXXXX423]": 24398, "[XXXXX24]": 23999, "[XXXXX502]": 24477, "[XXXXX401]": 24376, "[XXXXX301]": 24276, "[XXXXX55]": 24030, "[XXXXX147]": 24122, "[XXXXX363]": 24338, "[XXXXX400]": 24375, "[XXXXX399]": 24374, "[XXXXX3]": 23978, "[XXXXX137]": 24112, "[XXXXX594]": 24569, "[XXXXX494]": 24469, "[XXXXX10]": 23985, "[XXXXX164]": 24139, "[XXXXX92]": 24067, "[XXXXX520]": 24495, "[XXXXX206]": 24181, "[XXXXX340]": 24315, "[XXXXX398]": 24373, "[XXXXX213]": 24188, "[XXXXX182]": 24157, "[XXXXX457]": 24432, "[XXXXX88]": 24063, "[XXXXX420]": 24395, "[XXXXX65]": 24040, "[XXXXX141]": 24116, "[XXXXX578]": 24553, "[XXXXX75]": 24050, "[XXXXX168]": 24143, "[XXXXX1]": 23976, "[XXXXX318]": 24293, "[XXXXX54]": 24029, "[XXXXX112]": 24087, "[XXXXX393]": 24368, "[XXXXX265]": 24240, "[XXXXX319]": 24294, "[XXXXX31]": 24006, "[XXXXX533]": 24508, "[XXXXX104]": 24079, "[XXXXX87]": 24062, "[XXXXX354]": 24329, "[XXXXX270]": 24245, "[XXXXX274]": 24249, "[XXXXX9]": 23984, "[XXXXX489]": 24464, "[XXXXX418]": 24393, "[XXXXX257]": 24232, "[XXXXX254]": 24229, "[XXXXX156]": 24131, "[XXXXX596]": 24571, "[XXXXX499]": 24474, "[XXXXX517]": 24492, "[XXXXX385]": 24360, "[XXXXX39]": 24014, "[XXXXX181]": 24156, "[XXXXX584]": 24559, "[XXXXX372]": 24347, "[XXXXX140]": 24115, "[XXXXX599]": 24574, "[XXXXX202]": 24177, "[XXXXX47]": 24022, "[XXXXX396]": 24371, "[XXXXX296]": 24271, "[XXXXX546]": 24521, "[XXXXX188]": 24163, "[XXXXX353]": 24328, "[XXXXX288]": 24263, "[XXXXX441]": 24416, "[XXXXX216]": 24191, "[XXXXX36]": 24011, "[XXXXX435]": 24410, "[XXXXX214]": 24189, "[XXXXX133]": 24108, "[XXXXX161]": 24136, "[XXXXX196]": 24171, "[XXXXX63]": 24038, "[XXXXX109]": 24084, "[XXXXX557]": 24532, "[XXXXX551]": 24526, "[XXXXX171]": 24146, "[XXXXX512]": 24487, "[XXXXX597]": 24572, "[XXXXX496]": 24471, "[XXXXX99]": 24074, "[XXXXX81]": 24056, "[XXXXX330]": 24305, "[XXXXX151]": 24126, "[XXXXX355]": 24330, "[XXXXX199]": 24174, "[XXXXX218]": 24193, "[XXXXX483]": 24458, "[XXXXX291]": 24266, "[XXXXX453]": 24428, "[XXXXX373]": 24348, "[XXXXX190]": 24165, "[XXXXX471]": 24446, "[XXXXX337]": 24312, "[XXXXX339]": 24314, "[XXXXX282]": 24257, "[XXXXX8]": 23983, "[XXXXX432]": 24407, "[XXXXX445]": 24420, "[XXXXX298]": 24273, "[XXXXX579]": 24554, "[XXXXX277]": 24252, "[XXXXX52]": 24027, "[XXXXX246]": 24221, "[XXXXX433]": 24408, "[XXXXX421]": 24396, "[XXXXX493]": 24468, "[XXXXX538]": 24513, "[XXXXX485]": 24460, "[XXXXX473]": 24448, "[XXXXX370]": 24345, "[XXXXX94]": 24069, "[XXXXX62]": 24037, "[XXXXX310]": 24285, "[XXXXX272]": 24247, "[XXXXX451]": 24426, "[XXXXX200]": 24175, "[XXXXX142]": 24117, "[XXXXX67]": 24042, "[XXXXX238]": 24213, "[XXXXX290]": 24265, "[XXXXX386]": 24361, "[XXXXX289]": 24264, "[XXXXX540]": 24515, "[XXXXX128]": 24103, "[XXXXX402]": 24377, "[XXXXX586]": 24561, "[XXXXX242]": 24217, "[XXXXX329]": 24304, "[XXXXX231]": 24206, "[XXXXX12]": 23987, "[XXXXX266]": 24241, "[XXXXX15]": 23990, "[XXXXX365]": 24340, "[XXXXX491]": 24466, "[XXXXX534]": 24509, "[XXXXX595]": 24570, "[XXXXX4]": 23979, "[XXXXX552]": 24527, "[XXXXX251]": 24226, "[XXXXX574]": 24549, "[MASK]": 23974, "[XXXXX343]": 24318, "[XXXXX304]": 24279, "[XXXXX264]": 24239, "[XXXXX37]": 24012, "[XXXXX314]": 24289, "[XXXXX2]": 23977, "[XXXXX487]": 24462, "[XXXXX588]": 24563, "[XXXXX287]": 24262, "[XXXXX407]": 24382, "[XXXXX260]": 24235, "[XXXXX490]": 24465, "[XXXXX158]": 24133, "[XXXXX455]": 24430, "[XXXXX123]": 24098, "[XXXXX430]": 24405, "<pad>": 23973, "[XXXXX49]": 24024, "[XXXXX208]": 24183, "[XXXXX463]": 24438, "[XXXXX347]": 24322, "[XXXXX322]": 24297, "[XXXXX375]": 24350, "[XXXXX286]": 24261, "[XXXXX348]": 24323, "[XXXXX526]": 24501, "[XXXXX95]": 24070, "[XXXXX21]": 23996, "[XXXXX6]": 23981, "[XXXXX102]": 24077, "[XXXXX294]": 24269, "[XXXXX549]": 24524, "[XXXXX61]": 24036, "[XXXXX437]": 24412, "[XXXXX149]": 24124, "[XXXXX344]": 24319, "[XXXXX229]": 24204, "[XXXXX280]": 24255, "[XXXXX57]": 24032, "[XXXXX448]": 24423, "[XXXXX510]": 24485, "[XXXXX51]": 24026, "[XXXXX571]": 24546, "[XXXXX117]": 24092, "[XXXXX503]": 24478, "[XXXXX349]": 24324, "[XXXXX443]": 24418, "[XXXXX505]": 24480, "[XXXXX223]": 24198, "[XXXXX292]": 24267, "[XXXXX527]": 24502, "[XXXXX162]": 24137, "[XXXXX331]": 24306, "[XXXXX313]": 24288, "[XXXXX302]": 24277, "[XXXXX201]": 24176, "[XXXXX566]": 24541, "[XXXXX146]": 24121, "[XXXXX183]": 24158, "[XXXXX342]": 24317, "[XXXXX259]": 24234, "[XXXXX253]": 24228, "[XXXXX461]": 24436, "[XXXXX544]": 24519, "[XXXXX262]": 24237, "[XXXXX422]": 24397, "[XXXXX539]": 24514, "[XXXXX572]": 24547, "[XXXXX436]": 24411, "[XXXXX130]": 24105, "[XXXXX43]": 24018, "[XXXXX326]": 24301, "[XXXXX197]": 24172, "[XXXXX497]": 24472, "[XXXXX388]": 24363, "[XXXXX564]": 24539, "[XXXXX553]": 24528, "[XXXXX20]": 23995, "[XXXXX73]": 24048, "[XXXXX107]": 24082, "[XXXXX132]": 24107, "[XXXXX240]": 24215, "[XXXXX541]": 24516, "[XXXXX86]": 24061, "[XXXXX447]": 24422, "[XXXXX25]": 24000, "[XXXXX243]": 24218, "[CLS]": 23971, "[XXXXX522]": 24497, "[XXXXX283]": 24258, "[XXXXX462]": 24437, "[XXXXX91]": 24066, "[XXXXX5]": 23980, "[XXXXX366]": 24341, "[XXXXX559]": 24534, "[XXXXX192]": 24167, "[XXXXX459]": 24434, "[XXXXX495]": 24470, "[XXXXX474]": 24449, "[XXXXX160]": 24135, "[XXXXX524]": 24499, "[XXXXX415]": 24390, "[XXXXX228]": 24203, "[XXXXX320]": 24295, "[XXXXX321]": 24296, "[XXXXX80]": 24055, "[XXXXX121]": 24096, "[XXXXX378]": 24353, "[XXXXX150]": 24125, "[XXXXX506]": 24481, "[XXXXX565]": 24540, "[XXXXX93]": 24068, "[XXXXX23]": 23998, "[XXXXX300]": 24275, "[XXXXX166]": 24141, "[XXXXX542]": 24517, "[XXXXX176]": 24151, "[SEP]": 23972, "[XXXXX249]": 24224, "[XXXXX492]": 24467, "[XXXXX32]": 24007, "[XXXXX215]": 24190, "[XXXXX59]": 24034, "[XXXXX203]": 24178, "[XXXXX358]": 24333, "[XXXXX139]": 24114, "[XXXXX127]": 24102, "[XXXXX481]": 24456, "[XXXXX475]": 24450, "[XXXXX511]": 24486, "[XXXXX525]": 24500, "[XXXXX273]": 24248, "[XXXXX226]": 24201, "[XXXXX581]": 24556, "[XXXXX235]": 24210, "[XXXXX135]": 24110, "[XXXXX50]": 24025, "[XXXXX268]": 24243, "[XXXXX157]": 24132, "[XXXXX33]": 24008, "[XXXXX64]": 24039, "[XXXXX174]": 24149, "[XXXXX585]": 24560, "[XXXXX153]": 24128, "[XXXXX440]": 24415, "[XXXXX478]": 24453, "[XXXXX350]": 24325, "[XXXXX519]": 24494, "[XXXXX224]": 24199, "[XXXXX85]": 24060, "[XXXXX48]": 24023}
|
special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "<unk>", "sep_token": "[SEP]", "pad_token": "<pad>", "cls_token": "[CLS]", "mask_token": {"content": "[MASK]", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false}, "additional_special_tokens": ["[XXXXX0]", "[XXXXX1]", "[XXXXX2]", "[XXXXX3]", "[XXXXX4]", "[XXXXX5]", "[XXXXX6]", "[XXXXX7]", "[XXXXX8]", "[XXXXX9]", "[XXXXX10]", "[XXXXX11]", "[XXXXX12]", "[XXXXX13]", "[XXXXX14]", "[XXXXX15]", "[XXXXX16]", "[XXXXX17]", "[XXXXX18]", "[XXXXX19]", "[XXXXX20]", "[XXXXX21]", "[XXXXX22]", "[XXXXX23]", "[XXXXX24]", "[XXXXX25]", "[XXXXX26]", "[XXXXX27]", "[XXXXX28]", "[XXXXX29]", "[XXXXX30]", "[XXXXX31]", "[XXXXX32]", "[XXXXX33]", "[XXXXX34]", "[XXXXX35]", "[XXXXX36]", "[XXXXX37]", "[XXXXX38]", "[XXXXX39]", "[XXXXX40]", "[XXXXX41]", "[XXXXX42]", "[XXXXX43]", "[XXXXX44]", "[XXXXX45]", "[XXXXX46]", "[XXXXX47]", "[XXXXX48]", "[XXXXX49]", "[XXXXX50]", "[XXXXX51]", "[XXXXX52]", "[XXXXX53]", "[XXXXX54]", "[XXXXX55]", "[XXXXX56]", "[XXXXX57]", "[XXXXX58]", "[XXXXX59]", "[XXXXX60]", "[XXXXX61]", "[XXXXX62]", "[XXXXX63]", "[XXXXX64]", "[XXXXX65]", "[XXXXX66]", "[XXXXX67]", "[XXXXX68]", "[XXXXX69]", "[XXXXX70]", "[XXXXX71]", "[XXXXX72]", "[XXXXX73]", "[XXXXX74]", "[XXXXX75]", "[XXXXX76]", "[XXXXX77]", "[XXXXX78]", "[XXXXX79]", "[XXXXX80]", "[XXXXX81]", "[XXXXX82]", "[XXXXX83]", "[XXXXX84]", "[XXXXX85]", "[XXXXX86]", "[XXXXX87]", "[XXXXX88]", "[XXXXX89]", "[XXXXX90]", "[XXXXX91]", "[XXXXX92]", "[XXXXX93]", "[XXXXX94]", "[XXXXX95]", "[XXXXX96]", "[XXXXX97]", "[XXXXX98]", "[XXXXX99]", "[XXXXX100]", "[XXXXX101]", "[XXXXX102]", "[XXXXX103]", "[XXXXX104]", "[XXXXX105]", "[XXXXX106]", "[XXXXX107]", "[XXXXX108]", "[XXXXX109]", "[XXXXX110]", "[XXXXX111]", "[XXXXX112]", "[XXXXX113]", "[XXXXX114]", "[XXXXX115]", "[XXXXX116]", "[XXXXX117]", "[XXXXX118]", "[XXXXX119]", "[XXXXX120]", "[XXXXX121]", "[XXXXX122]", "[XXXXX123]", "[XXXXX124]", "[XXXXX125]", "[XXXXX126]", "[XXXXX127]", "[XXXXX128]", "[XXXXX129]", "[XXXXX130]", "[XXXXX131]", "[XXXXX132]", "[XXXXX133]", "[XXXXX134]", "[XXXXX135]", "[XXXXX136]", "[XXXXX137]", "[XXXXX138]", "[XXXXX139]", "[XXXXX140]", "[XXXXX141]", "[XXXXX142]", "[XXXXX143]", "[XXXXX144]", "[XXXXX145]", "[XXXXX146]", "[XXXXX147]", "[XXXXX148]", "[XXXXX149]", "[XXXXX150]", "[XXXXX151]", "[XXXXX152]", "[XXXXX153]", "[XXXXX154]", "[XXXXX155]", "[XXXXX156]", "[XXXXX157]", "[XXXXX158]", "[XXXXX159]", "[XXXXX160]", "[XXXXX161]", "[XXXXX162]", "[XXXXX163]", "[XXXXX164]", "[XXXXX165]", "[XXXXX166]", "[XXXXX167]", "[XXXXX168]", "[XXXXX169]", "[XXXXX170]", "[XXXXX171]", "[XXXXX172]", "[XXXXX173]", "[XXXXX174]", "[XXXXX175]", "[XXXXX176]", "[XXXXX177]", "[XXXXX178]", "[XXXXX179]", "[XXXXX180]", "[XXXXX181]", "[XXXXX182]", "[XXXXX183]", "[XXXXX184]", "[XXXXX185]", "[XXXXX186]", "[XXXXX187]", "[XXXXX188]", "[XXXXX189]", "[XXXXX190]", "[XXXXX191]", "[XXXXX192]", "[XXXXX193]", "[XXXXX194]", "[XXXXX195]", "[XXXXX196]", "[XXXXX197]", "[XXXXX198]", "[XXXXX199]", "[XXXXX200]", "[XXXXX201]", "[XXXXX202]", "[XXXXX203]", "[XXXXX204]", "[XXXXX205]", "[XXXXX206]", "[XXXXX207]", "[XXXXX208]", "[XXXXX209]", "[XXXXX210]", "[XXXXX211]", "[XXXXX212]", "[XXXXX213]", "[XXXXX214]", "[XXXXX215]", "[XXXXX216]", "[XXXXX217]", "[XXXXX218]", "[XXXXX219]", "[XXXXX220]", "[XXXXX221]", "[XXXXX222]", "[XXXXX223]", "[XXXXX224]", "[XXXXX225]", "[XXXXX226]", "[XXXXX227]", "[XXXXX228]", "[XXXXX229]", "[XXXXX230]", "[XXXXX231]", "[XXXXX232]", "[XXXXX233]", "[XXXXX234]", "[XXXXX235]", "[XXXXX236]", "[XXXXX237]", "[XXXXX238]", "[XXXXX239]", "[XXXXX240]", "[XXXXX241]", "[XXXXX242]", "[XXXXX243]", "[XXXXX244]", "[XXXXX245]", "[XXXXX246]", "[XXXXX247]", "[XXXXX248]", "[XXXXX249]", "[XXXXX250]", "[XXXXX251]", "[XXXXX252]", "[XXXXX253]", "[XXXXX254]", "[XXXXX255]", "[XXXXX256]", "[XXXXX257]", "[XXXXX258]", "[XXXXX259]", "[XXXXX260]", "[XXXXX261]", "[XXXXX262]", "[XXXXX263]", "[XXXXX264]", "[XXXXX265]", "[XXXXX266]", "[XXXXX267]", "[XXXXX268]", "[XXXXX269]", "[XXXXX270]", "[XXXXX271]", "[XXXXX272]", "[XXXXX273]", "[XXXXX274]", "[XXXXX275]", "[XXXXX276]", "[XXXXX277]", "[XXXXX278]", "[XXXXX279]", "[XXXXX280]", "[XXXXX281]", "[XXXXX282]", "[XXXXX283]", "[XXXXX284]", "[XXXXX285]", "[XXXXX286]", "[XXXXX287]", "[XXXXX288]", "[XXXXX289]", "[XXXXX290]", "[XXXXX291]", "[XXXXX292]", "[XXXXX293]", "[XXXXX294]", "[XXXXX295]", "[XXXXX296]", "[XXXXX297]", "[XXXXX298]", "[XXXXX299]", "[XXXXX300]", "[XXXXX301]", "[XXXXX302]", "[XXXXX303]", "[XXXXX304]", "[XXXXX305]", "[XXXXX306]", "[XXXXX307]", "[XXXXX308]", "[XXXXX309]", "[XXXXX310]", "[XXXXX311]", "[XXXXX312]", "[XXXXX313]", "[XXXXX314]", "[XXXXX315]", "[XXXXX316]", "[XXXXX317]", "[XXXXX318]", "[XXXXX319]", "[XXXXX320]", "[XXXXX321]", "[XXXXX322]", "[XXXXX323]", "[XXXXX324]", "[XXXXX325]", "[XXXXX326]", "[XXXXX327]", "[XXXXX328]", "[XXXXX329]", "[XXXXX330]", "[XXXXX331]", "[XXXXX332]", "[XXXXX333]", "[XXXXX334]", "[XXXXX335]", "[XXXXX336]", "[XXXXX337]", "[XXXXX338]", "[XXXXX339]", "[XXXXX340]", "[XXXXX341]", "[XXXXX342]", "[XXXXX343]", "[XXXXX344]", "[XXXXX345]", "[XXXXX346]", "[XXXXX347]", "[XXXXX348]", "[XXXXX349]", "[XXXXX350]", "[XXXXX351]", "[XXXXX352]", "[XXXXX353]", "[XXXXX354]", "[XXXXX355]", "[XXXXX356]", "[XXXXX357]", "[XXXXX358]", "[XXXXX359]", "[XXXXX360]", "[XXXXX361]", "[XXXXX362]", "[XXXXX363]", "[XXXXX364]", "[XXXXX365]", "[XXXXX366]", "[XXXXX367]", "[XXXXX368]", "[XXXXX369]", "[XXXXX370]", "[XXXXX371]", "[XXXXX372]", "[XXXXX373]", "[XXXXX374]", "[XXXXX375]", "[XXXXX376]", "[XXXXX377]", "[XXXXX378]", "[XXXXX379]", "[XXXXX380]", "[XXXXX381]", "[XXXXX382]", "[XXXXX383]", "[XXXXX384]", "[XXXXX385]", "[XXXXX386]", "[XXXXX387]", "[XXXXX388]", "[XXXXX389]", "[XXXXX390]", "[XXXXX391]", "[XXXXX392]", "[XXXXX393]", "[XXXXX394]", "[XXXXX395]", "[XXXXX396]", "[XXXXX397]", "[XXXXX398]", "[XXXXX399]", "[XXXXX400]", "[XXXXX401]", "[XXXXX402]", "[XXXXX403]", "[XXXXX404]", "[XXXXX405]", "[XXXXX406]", "[XXXXX407]", "[XXXXX408]", "[XXXXX409]", "[XXXXX410]", "[XXXXX411]", "[XXXXX412]", "[XXXXX413]", "[XXXXX414]", "[XXXXX415]", "[XXXXX416]", "[XXXXX417]", "[XXXXX418]", "[XXXXX419]", "[XXXXX420]", "[XXXXX421]", "[XXXXX422]", "[XXXXX423]", "[XXXXX424]", "[XXXXX425]", "[XXXXX426]", "[XXXXX427]", "[XXXXX428]", "[XXXXX429]", "[XXXXX430]", "[XXXXX431]", "[XXXXX432]", "[XXXXX433]", "[XXXXX434]", "[XXXXX435]", "[XXXXX436]", "[XXXXX437]", "[XXXXX438]", "[XXXXX439]", "[XXXXX440]", "[XXXXX441]", "[XXXXX442]", "[XXXXX443]", "[XXXXX444]", "[XXXXX445]", "[XXXXX446]", "[XXXXX447]", "[XXXXX448]", "[XXXXX449]", "[XXXXX450]", "[XXXXX451]", "[XXXXX452]", "[XXXXX453]", "[XXXXX454]", "[XXXXX455]", "[XXXXX456]", "[XXXXX457]", "[XXXXX458]", "[XXXXX459]", "[XXXXX460]", "[XXXXX461]", "[XXXXX462]", "[XXXXX463]", "[XXXXX464]", "[XXXXX465]", "[XXXXX466]", "[XXXXX467]", "[XXXXX468]", "[XXXXX469]", "[XXXXX470]", "[XXXXX471]", "[XXXXX472]", "[XXXXX473]", "[XXXXX474]", "[XXXXX475]", "[XXXXX476]", "[XXXXX477]", "[XXXXX478]", "[XXXXX479]", "[XXXXX480]", "[XXXXX481]", "[XXXXX482]", "[XXXXX483]", "[XXXXX484]", "[XXXXX485]", "[XXXXX486]", "[XXXXX487]", "[XXXXX488]", "[XXXXX489]", "[XXXXX490]", "[XXXXX491]", "[XXXXX492]", "[XXXXX493]", "[XXXXX494]", "[XXXXX495]", "[XXXXX496]", "[XXXXX497]", "[XXXXX498]", "[XXXXX499]", "[XXXXX500]", "[XXXXX501]", "[XXXXX502]", "[XXXXX503]", "[XXXXX504]", "[XXXXX505]", "[XXXXX506]", "[XXXXX507]", "[XXXXX508]", "[XXXXX509]", "[XXXXX510]", "[XXXXX511]", "[XXXXX512]", "[XXXXX513]", "[XXXXX514]", "[XXXXX515]", "[XXXXX516]", "[XXXXX517]", "[XXXXX518]", "[XXXXX519]", "[XXXXX520]", "[XXXXX521]", "[XXXXX522]", "[XXXXX523]", "[XXXXX524]", "[XXXXX525]", "[XXXXX526]", "[XXXXX527]", "[XXXXX528]", "[XXXXX529]", "[XXXXX530]", "[XXXXX531]", "[XXXXX532]", "[XXXXX533]", "[XXXXX534]", "[XXXXX535]", "[XXXXX536]", "[XXXXX537]", "[XXXXX538]", "[XXXXX539]", "[XXXXX540]", "[XXXXX541]", "[XXXXX542]", "[XXXXX543]", "[XXXXX544]", "[XXXXX545]", "[XXXXX546]", "[XXXXX547]", "[XXXXX548]", "[XXXXX549]", "[XXXXX550]", "[XXXXX551]", "[XXXXX552]", "[XXXXX553]", "[XXXXX554]", "[XXXXX555]", "[XXXXX556]", "[XXXXX557]", "[XXXXX558]", "[XXXXX559]", "[XXXXX560]", "[XXXXX561]", "[XXXXX562]", "[XXXXX563]", "[XXXXX564]", "[XXXXX565]", "[XXXXX566]", "[XXXXX567]", "[XXXXX568]", "[XXXXX569]", "[XXXXX570]", "[XXXXX571]", "[XXXXX572]", "[XXXXX573]", "[XXXXX574]", "[XXXXX575]", "[XXXXX576]", "[XXXXX577]", "[XXXXX578]", "[XXXXX579]", "[XXXXX580]", "[XXXXX581]", "[XXXXX582]", "[XXXXX583]", "[XXXXX584]", "[XXXXX585]", "[XXXXX586]", "[XXXXX587]", "[XXXXX588]", "[XXXXX589]", "[XXXXX590]", "[XXXXX591]", "[XXXXX592]", "[XXXXX593]", "[XXXXX594]", "[XXXXX595]", "[XXXXX596]", "[XXXXX597]", "[XXXXX598]", "[XXXXX599]", "[XXXXX600]"]}
|
spiece.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bca29a9596bce29bea1fa6b80d148f5fde5aa39db54b0b5e904eefe291de30b0
|
3 |
+
size 676265
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"do_lower_case": false, "remove_space": true, "keep_accents": true, "bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "<unk>", "sep_token": "[SEP]", "pad_token": "<pad>", "cls_token": "[CLS]", "mask_token": {"content": "[MASK]", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false, "__type": "AddedToken"}, "sp_model_kwargs": {}, "name_or_path": "models/5mb/prs_arab_5mb", "model_input_names": ["input_ids", "attention_mask"], "special_tokens_map_file": "models/5mb/prs_arab_5mb/special_tokens_map.json", "tokenizer_class": "AlbertTokenizer"}
|