johngiorgi commited on
Commit
d09a156
1 Parent(s): c0ee347

Upload tokenizer_config.json

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +80 -0
tokenizer_config.json ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "additional_special_tokens": [
4
+ "<background>",
5
+ "</background>",
6
+ "<ref>",
7
+ "</ref>",
8
+ "<sep>",
9
+ "<pop>",
10
+ "</pop>",
11
+ "<int>",
12
+ "</int>",
13
+ "<out>",
14
+ "</out>",
15
+ "<evidence>",
16
+ "</evidence>"
17
+ ],
18
+ "bos_token": {
19
+ "__type": "AddedToken",
20
+ "content": "<s>",
21
+ "lstrip": false,
22
+ "normalized": true,
23
+ "rstrip": false,
24
+ "single_word": false
25
+ },
26
+ "cls_token": {
27
+ "__type": "AddedToken",
28
+ "content": "<s>",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false
33
+ },
34
+ "eos_token": {
35
+ "__type": "AddedToken",
36
+ "content": "</s>",
37
+ "lstrip": false,
38
+ "normalized": true,
39
+ "rstrip": false,
40
+ "single_word": false
41
+ },
42
+ "errors": "replace",
43
+ "mask_token": {
44
+ "__type": "AddedToken",
45
+ "content": "<mask>",
46
+ "lstrip": true,
47
+ "normalized": true,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ },
51
+ "model_max_length": 16384,
52
+ "name_or_path": "allenai/led-base-16384",
53
+ "pad_token": {
54
+ "__type": "AddedToken",
55
+ "content": "<pad>",
56
+ "lstrip": false,
57
+ "normalized": true,
58
+ "rstrip": false,
59
+ "single_word": false
60
+ },
61
+ "sep_token": {
62
+ "__type": "AddedToken",
63
+ "content": "</s>",
64
+ "lstrip": false,
65
+ "normalized": true,
66
+ "rstrip": false,
67
+ "single_word": false
68
+ },
69
+ "special_tokens_map_file": "/Users/johngiorgi/.cache/huggingface/transformers/05da652a7fca41c1c18027c1201e473217bb373e370d1283e3de49d5880cbf0c.cb2244924ab24d706b02fd7fcedaea4531566537687a539ebb94db511fd122a0",
70
+ "tokenizer_class": "LEDTokenizer",
71
+ "trim_offsets": true,
72
+ "unk_token": {
73
+ "__type": "AddedToken",
74
+ "content": "<unk>",
75
+ "lstrip": false,
76
+ "normalized": true,
77
+ "rstrip": false,
78
+ "single_word": false
79
+ }
80
+ }