thekop79 commited on
Commit
2d9ae1c
1 Parent(s): e989a4f

Create tokenizer_config.json

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +112 -0
tokenizer_config.json ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Hugging Face's logo
2
+ Hugging Face
3
+ Search models, datasets, users...
4
+ Models
5
+ Datasets
6
+ Spaces
7
+ Posts
8
+ Docs
9
+ Solutions
10
+ Pricing
11
+
12
+
13
+
14
+
15
+ quicktensor
16
+ /
17
+ dexml_eurlex-4k
18
+
19
+ like
20
+ 0
21
+ Sentence Similarity
22
+ sentence-transformers
23
+ Safetensors
24
+ Transformers
25
+ English
26
+ distilbert
27
+ feature-extraction
28
+ text-embeddings-inference
29
+ Inference Endpoints
30
+
31
+ arxiv:
32
+ 2310.10636
33
+
34
+ License:
35
+ apache-2.0
36
+ Model card
37
+ Files and versions
38
+ Community
39
+ dexml_eurlex-4k
40
+ /
41
+ tokenizer_config.json
42
+
43
+ nilesh2797
44
+ add model
45
+ 174e238
46
+ 5 months ago
47
+ raw
48
+
49
+ Copy download link
50
+ history
51
+ blame
52
+ contribute
53
+ delete
54
+ No virus
55
+
56
+ 1.2 kB
57
+ {
58
+ "added_tokens_decoder": {
59
+ "0": {
60
+ "content": "[PAD]",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "100": {
68
+ "content": "[UNK]",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "101": {
76
+ "content": "[CLS]",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "102": {
84
+ "content": "[SEP]",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "103": {
92
+ "content": "[MASK]",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ }
99
+ },
100
+ "clean_up_tokenization_spaces": true,
101
+ "cls_token": "[CLS]",
102
+ "do_lower_case": true,
103
+ "mask_token": "[MASK]",
104
+ "model_max_length": 512,
105
+ "pad_token": "[PAD]",
106
+ "sep_token": "[SEP]",
107
+ "strip_accents": null,
108
+ "tokenize_chinese_chars": true,
109
+ "tokenizer_class": "DistilBertTokenizer",
110
+ "unk_token": "[UNK]"
111
+ }
112
+