goldfish-models commited on
Commit
4e7783c
1 Parent(s): 653fb39

Upload tiv_latn_full tokenizer.

Browse files
added_tokens.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"[XXXXX75]": 18223, "[XXXXX155]": 18303, "[XXXXX115]": 18263, "[XXXXX280]": 18428, "[XXXXX130]": 18278, "[XXXXX237]": 18385, "[XXXXX71]": 18219, "[XXXXX247]": 18395, "[XXXXX46]": 18194, "[XXXXX190]": 18338, "[XXXXX99]": 18247, "[XXXXX5]": 18153, "[XXXXX37]": 18185, "[XXXXX78]": 18226, "[XXXXX92]": 18240, "[XXXXX76]": 18224, "[XXXXX148]": 18296, "[XXXXX234]": 18382, "[XXXXX246]": 18394, "[XXXXX221]": 18369, "[XXXXX81]": 18229, "[XXXXX32]": 18180, "[XXXXX240]": 18388, "[XXXXX276]": 18424, "[XXXXX30]": 18178, "[XXXXX174]": 18322, "[XXXXX121]": 18269, "[XXXXX212]": 18360, "[XXXXX0]": 18148, "[XXXXX252]": 18400, "[XXXXX172]": 18320, "[XXXXX74]": 18222, "[XXXXX254]": 18402, "[XXXXX279]": 18427, "[XXXXX220]": 18368, "[XXXXX277]": 18425, "[XXXXX208]": 18356, "[XXXXX114]": 18262, "[SEP]": 18145, "[XXXXX160]": 18308, "[XXXXX28]": 18176, "[XXXXX196]": 18344, "[XXXXX243]": 18391, "[XXXXX45]": 18193, "[XXXXX107]": 18255, "[XXXXX124]": 18272, "[XXXXX151]": 18299, "[XXXXX88]": 18236, "[XXXXX170]": 18318, "[XXXXX248]": 18396, "[XXXXX26]": 18174, "[XXXXX229]": 18377, "[XXXXX251]": 18399, "[XXXXX27]": 18175, "[XXXXX169]": 18317, "[XXXXX187]": 18335, "[XXXXX133]": 18281, "[XXXXX53]": 18201, "[XXXXX270]": 18418, "[XXXXX91]": 18239, "[XXXXX7]": 18155, "[XXXXX23]": 18171, "[XXXXX199]": 18347, "[XXXXX244]": 18392, "[XXXXX202]": 18350, "[XXXXX128]": 18276, "[XXXXX267]": 18415, "[XXXXX184]": 18332, "[XXXXX77]": 18225, "[XXXXX129]": 18277, "[XXXXX9]": 18157, "[XXXXX176]": 18324, "[XXXXX68]": 18216, "[XXXXX235]": 18383, "[XXXXX192]": 18340, "[XXXXX273]": 18421, "[XXXXX204]": 18352, "[XXXXX171]": 18319, "[XXXXX79]": 18227, "[XXXXX131]": 18279, "[XXXXX194]": 18342, "[XXXXX200]": 18348, "[XXXXX4]": 18152, "[XXXXX218]": 18366, "[XXXXX39]": 18187, "[XXXXX189]": 18337, "[XXXXX119]": 18267, "[XXXXX282]": 18430, "[XXXXX143]": 18291, "[XXXXX268]": 18416, "[XXXXX89]": 18237, "[XXXXX181]": 18329, "[XXXXX232]": 18380, "[XXXXX159]": 18307, "[XXXXX281]": 18429, "[XXXXX145]": 18293, "[XXXXX219]": 18367, "[XXXXX112]": 18260, "[XXXXX6]": 18154, "[XXXXX21]": 18169, "[XXXXX94]": 18242, "[XXXXX55]": 18203, "[XXXXX161]": 18309, "[XXXXX214]": 18362, "[XXXXX12]": 18160, "[XXXXX213]": 18361, "[XXXXX123]": 18271, "[XXXXX138]": 18286, "[XXXXX13]": 18161, "[XXXXX2]": 18150, "[XXXXX31]": 18179, "[XXXXX102]": 18250, "[XXXXX241]": 18389, "[XXXXX70]": 18218, "[XXXXX158]": 18306, "[CLS]": 18144, "[XXXXX104]": 18252, "[XXXXX226]": 18374, "[XXXXX177]": 18325, "[XXXXX203]": 18351, "[XXXXX73]": 18221, "[XXXXX58]": 18206, "[XXXXX142]": 18290, "[XXXXX60]": 18208, "[XXXXX205]": 18353, "[XXXXX1]": 18149, "[XXXXX29]": 18177, "[XXXXX156]": 18304, "[XXXXX43]": 18191, "[XXXXX168]": 18316, "[XXXXX215]": 18363, "[XXXXX182]": 18330, "[XXXXX59]": 18207, "<pad>": 18146, "[XXXXX207]": 18355, "[XXXXX100]": 18248, "[XXXXX82]": 18230, "[XXXXX228]": 18376, "[XXXXX80]": 18228, "[XXXXX250]": 18398, "[XXXXX157]": 18305, "[XXXXX167]": 18315, "[XXXXX238]": 18386, "[XXXXX62]": 18210, "[XXXXX101]": 18249, "[XXXXX95]": 18243, "[XXXXX264]": 18412, "[XXXXX126]": 18274, "[XXXXX150]": 18298, "[XXXXX57]": 18205, "[XXXXX146]": 18294, "[XXXXX110]": 18258, "[XXXXX274]": 18422, "[XXXXX211]": 18359, "[XXXXX275]": 18423, "[XXXXX154]": 18302, "[XXXXX20]": 18168, "[XXXXX197]": 18345, "[XXXXX15]": 18163, "[XXXXX263]": 18411, "[XXXXX152]": 18300, "[XXXXX34]": 18182, "[XXXXX217]": 18365, "[XXXXX41]": 18189, "[XXXXX239]": 18387, "[XXXXX120]": 18268, "[XXXXX50]": 18198, "[XXXXX144]": 18292, "[XXXXX272]": 18420, "[XXXXX175]": 18323, "[XXXXX191]": 18339, "[XXXXX256]": 18404, "[XXXXX64]": 18212, "[XXXXX122]": 18270, "[XXXXX14]": 18162, "[XXXXX231]": 18379, "[XXXXX135]": 18283, "[XXXXX98]": 18246, "[XXXXX227]": 18375, "[XXXXX105]": 18253, "[XXXXX17]": 18165, "[XXXXX24]": 18172, "[XXXXX54]": 18202, "[XXXXX49]": 18197, "[XXXXX173]": 18321, "[XXXXX257]": 18405, "[XXXXX48]": 18196, "[XXXXX265]": 18413, "[XXXXX90]": 18238, "[XXXXX136]": 18284, "[XXXXX84]": 18232, "[XXXXX233]": 18381, "[XXXXX103]": 18251, "[XXXXX83]": 18231, "[XXXXX35]": 18183, "[XXXXX11]": 18159, "[XXXXX283]": 18431, "[XXXXX266]": 18414, "[XXXXX149]": 18297, "[XXXXX111]": 18259, "[XXXXX236]": 18384, "[XXXXX216]": 18364, "[XXXXX69]": 18217, "[XXXXX188]": 18336, "[XXXXX186]": 18334, "[XXXXX258]": 18406, "[XXXXX223]": 18371, "[XXXXX180]": 18328, "[XXXXX18]": 18166, "[XXXXX179]": 18327, "[XXXXX125]": 18273, "[XXXXX140]": 18288, "[XXXXX47]": 18195, "[XXXXX165]": 18313, "[XXXXX51]": 18199, "[XXXXX61]": 18209, "[XXXXX259]": 18407, "[XXXXX117]": 18265, "[XXXXX16]": 18164, "[MASK]": 18147, "[XXXXX116]": 18264, "[XXXXX113]": 18261, "[XXXXX118]": 18266, "[XXXXX242]": 18390, "[XXXXX127]": 18275, "[XXXXX222]": 18370, "[XXXXX3]": 18151, "[XXXXX33]": 18181, "[XXXXX249]": 18397, "[XXXXX262]": 18410, "[XXXXX163]": 18311, "[XXXXX278]": 18426, "[XXXXX224]": 18372, "[XXXXX193]": 18341, "[XXXXX269]": 18417, "[XXXXX206]": 18354, "[XXXXX106]": 18254, "[XXXXX96]": 18244, "[XXXXX185]": 18333, "[XXXXX10]": 18158, "[XXXXX137]": 18285, "[XXXXX162]": 18310, "[XXXXX260]": 18408, "[XXXXX164]": 18312, "[XXXXX19]": 18167, "[XXXXX25]": 18173, "[XXXXX108]": 18256, "[XXXXX230]": 18378, "[XXXXX195]": 18343, "[XXXXX65]": 18213, "[XXXXX38]": 18186, "[XXXXX245]": 18393, "[XXXXX52]": 18200, "[XXXXX178]": 18326, "[XXXXX141]": 18289, "[XXXXX56]": 18204, "[XXXXX42]": 18190, "[XXXXX86]": 18234, "[XXXXX22]": 18170, "[XXXXX253]": 18401, "[XXXXX87]": 18235, "[XXXXX66]": 18214, "[XXXXX134]": 18282, "[XXXXX63]": 18211, "[XXXXX139]": 18287, "[XXXXX36]": 18184, "[XXXXX225]": 18373, "[XXXXX255]": 18403, "[XXXXX109]": 18257, "[XXXXX147]": 18295, "[XXXXX85]": 18233, "[XXXXX44]": 18192, "[XXXXX201]": 18349, "[XXXXX132]": 18280, "[XXXXX209]": 18357, "[XXXXX153]": 18301, "[XXXXX183]": 18331, "[XXXXX93]": 18241, "[XXXXX72]": 18220, "[XXXXX210]": 18358, "[XXXXX67]": 18215, "[XXXXX198]": 18346, "[XXXXX8]": 18156, "[XXXXX40]": 18188, "[XXXXX271]": 18419, "[XXXXX261]": 18409, "[XXXXX97]": 18245, "[XXXXX166]": 18314}
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "<unk>", "sep_token": "[SEP]", "pad_token": "<pad>", "cls_token": "[CLS]", "mask_token": {"content": "[MASK]", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false}, "additional_special_tokens": ["[XXXXX0]", "[XXXXX1]", "[XXXXX2]", "[XXXXX3]", "[XXXXX4]", "[XXXXX5]", "[XXXXX6]", "[XXXXX7]", "[XXXXX8]", "[XXXXX9]", "[XXXXX10]", "[XXXXX11]", "[XXXXX12]", "[XXXXX13]", "[XXXXX14]", "[XXXXX15]", "[XXXXX16]", "[XXXXX17]", "[XXXXX18]", "[XXXXX19]", "[XXXXX20]", "[XXXXX21]", "[XXXXX22]", "[XXXXX23]", "[XXXXX24]", "[XXXXX25]", "[XXXXX26]", "[XXXXX27]", "[XXXXX28]", "[XXXXX29]", "[XXXXX30]", "[XXXXX31]", "[XXXXX32]", "[XXXXX33]", "[XXXXX34]", "[XXXXX35]", "[XXXXX36]", "[XXXXX37]", "[XXXXX38]", "[XXXXX39]", "[XXXXX40]", "[XXXXX41]", "[XXXXX42]", "[XXXXX43]", "[XXXXX44]", "[XXXXX45]", "[XXXXX46]", "[XXXXX47]", "[XXXXX48]", "[XXXXX49]", "[XXXXX50]", "[XXXXX51]", "[XXXXX52]", "[XXXXX53]", "[XXXXX54]", "[XXXXX55]", "[XXXXX56]", "[XXXXX57]", "[XXXXX58]", "[XXXXX59]", "[XXXXX60]", "[XXXXX61]", "[XXXXX62]", "[XXXXX63]", "[XXXXX64]", "[XXXXX65]", "[XXXXX66]", "[XXXXX67]", "[XXXXX68]", "[XXXXX69]", "[XXXXX70]", "[XXXXX71]", "[XXXXX72]", "[XXXXX73]", "[XXXXX74]", "[XXXXX75]", "[XXXXX76]", "[XXXXX77]", "[XXXXX78]", "[XXXXX79]", "[XXXXX80]", "[XXXXX81]", "[XXXXX82]", "[XXXXX83]", "[XXXXX84]", "[XXXXX85]", "[XXXXX86]", "[XXXXX87]", "[XXXXX88]", "[XXXXX89]", "[XXXXX90]", "[XXXXX91]", "[XXXXX92]", "[XXXXX93]", "[XXXXX94]", "[XXXXX95]", "[XXXXX96]", "[XXXXX97]", "[XXXXX98]", "[XXXXX99]", "[XXXXX100]", "[XXXXX101]", "[XXXXX102]", "[XXXXX103]", "[XXXXX104]", "[XXXXX105]", "[XXXXX106]", "[XXXXX107]", "[XXXXX108]", "[XXXXX109]", "[XXXXX110]", "[XXXXX111]", "[XXXXX112]", "[XXXXX113]", "[XXXXX114]", "[XXXXX115]", "[XXXXX116]", "[XXXXX117]", "[XXXXX118]", "[XXXXX119]", "[XXXXX120]", "[XXXXX121]", "[XXXXX122]", "[XXXXX123]", "[XXXXX124]", "[XXXXX125]", "[XXXXX126]", "[XXXXX127]", "[XXXXX128]", "[XXXXX129]", "[XXXXX130]", "[XXXXX131]", "[XXXXX132]", "[XXXXX133]", "[XXXXX134]", "[XXXXX135]", "[XXXXX136]", "[XXXXX137]", "[XXXXX138]", "[XXXXX139]", "[XXXXX140]", "[XXXXX141]", "[XXXXX142]", "[XXXXX143]", "[XXXXX144]", "[XXXXX145]", "[XXXXX146]", "[XXXXX147]", "[XXXXX148]", "[XXXXX149]", "[XXXXX150]", "[XXXXX151]", "[XXXXX152]", "[XXXXX153]", "[XXXXX154]", "[XXXXX155]", "[XXXXX156]", "[XXXXX157]", "[XXXXX158]", "[XXXXX159]", "[XXXXX160]", "[XXXXX161]", "[XXXXX162]", "[XXXXX163]", "[XXXXX164]", "[XXXXX165]", "[XXXXX166]", "[XXXXX167]", "[XXXXX168]", "[XXXXX169]", "[XXXXX170]", "[XXXXX171]", "[XXXXX172]", "[XXXXX173]", "[XXXXX174]", "[XXXXX175]", "[XXXXX176]", "[XXXXX177]", "[XXXXX178]", "[XXXXX179]", "[XXXXX180]", "[XXXXX181]", "[XXXXX182]", "[XXXXX183]", "[XXXXX184]", "[XXXXX185]", "[XXXXX186]", "[XXXXX187]", "[XXXXX188]", "[XXXXX189]", "[XXXXX190]", "[XXXXX191]", "[XXXXX192]", "[XXXXX193]", "[XXXXX194]", "[XXXXX195]", "[XXXXX196]", "[XXXXX197]", "[XXXXX198]", "[XXXXX199]", "[XXXXX200]", "[XXXXX201]", "[XXXXX202]", "[XXXXX203]", "[XXXXX204]", "[XXXXX205]", "[XXXXX206]", "[XXXXX207]", "[XXXXX208]", "[XXXXX209]", "[XXXXX210]", "[XXXXX211]", "[XXXXX212]", "[XXXXX213]", "[XXXXX214]", "[XXXXX215]", "[XXXXX216]", "[XXXXX217]", "[XXXXX218]", "[XXXXX219]", "[XXXXX220]", "[XXXXX221]", "[XXXXX222]", "[XXXXX223]", "[XXXXX224]", "[XXXXX225]", "[XXXXX226]", "[XXXXX227]", "[XXXXX228]", "[XXXXX229]", "[XXXXX230]", "[XXXXX231]", "[XXXXX232]", "[XXXXX233]", "[XXXXX234]", "[XXXXX235]", "[XXXXX236]", "[XXXXX237]", "[XXXXX238]", "[XXXXX239]", "[XXXXX240]", "[XXXXX241]", "[XXXXX242]", "[XXXXX243]", "[XXXXX244]", "[XXXXX245]", "[XXXXX246]", "[XXXXX247]", "[XXXXX248]", "[XXXXX249]", "[XXXXX250]", "[XXXXX251]", "[XXXXX252]", "[XXXXX253]", "[XXXXX254]", "[XXXXX255]", "[XXXXX256]", "[XXXXX257]", "[XXXXX258]", "[XXXXX259]", "[XXXXX260]", "[XXXXX261]", "[XXXXX262]", "[XXXXX263]", "[XXXXX264]", "[XXXXX265]", "[XXXXX266]", "[XXXXX267]", "[XXXXX268]", "[XXXXX269]", "[XXXXX270]", "[XXXXX271]", "[XXXXX272]", "[XXXXX273]", "[XXXXX274]", "[XXXXX275]", "[XXXXX276]", "[XXXXX277]", "[XXXXX278]", "[XXXXX279]", "[XXXXX280]", "[XXXXX281]", "[XXXXX282]", "[XXXXX283]"]}
spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5bffb36f02f9ea178ed86d57b1c577e4a58476079b51d315f090bab0fa749ab
3
+ size 524474
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": false, "remove_space": true, "keep_accents": true, "bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "<unk>", "sep_token": "[SEP]", "pad_token": "<pad>", "cls_token": "[CLS]", "mask_token": {"content": "[MASK]", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false, "__type": "AddedToken"}, "sp_model_kwargs": {}, "name_or_path": "models/full/tiv_latn_full", "model_input_names": ["input_ids", "attention_mask"], "special_tokens_map_file": "models/full/tiv_latn_full/special_tokens_map.json", "tokenizer_class": "AlbertTokenizer"}