alext77 commited on
Commit
f91b5b3
1 Parent(s): 4dddb2b

Upload processor

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
preprocessor_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "apply_ocr": true,
3
+ "do_resize": true,
4
+ "image_processor_type": "LayoutLMv2FeatureExtractor",
5
+ "ocr_lang": "chi_sim",
6
+ "processor_class": "LayoutXLMProcessor",
7
+ "resample": 2,
8
+ "size": {
9
+ "height": 224,
10
+ "width": 224
11
+ },
12
+ "tesseract_config": ""
13
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c4147c7d8584d679fcabb10e7c63ac96de865feb8a5e2b6bc9308acf93cacdb
3
+ size 17082953
tokenizer_config.json CHANGED
@@ -6,7 +6,7 @@
6
  "normalized": false,
7
  "rstrip": false,
8
  "single_word": false,
9
- "special": false
10
  },
11
  "1": {
12
  "content": "<pad>",
@@ -14,7 +14,7 @@
14
  "normalized": false,
15
  "rstrip": false,
16
  "single_word": false,
17
- "special": false
18
  },
19
  "2": {
20
  "content": "</s>",
@@ -22,7 +22,7 @@
22
  "normalized": false,
23
  "rstrip": false,
24
  "single_word": false,
25
- "special": false
26
  },
27
  "3": {
28
  "content": "<unk>",
@@ -30,15 +30,15 @@
30
  "normalized": false,
31
  "rstrip": false,
32
  "single_word": false,
33
- "special": false
34
  },
35
  "250001": {
36
  "content": "<mask>",
37
- "lstrip": false,
38
  "normalized": false,
39
  "rstrip": false,
40
  "single_word": false,
41
- "special": false
42
  }
43
  },
44
  "additional_special_tokens": [],
@@ -63,6 +63,7 @@
63
  0
64
  ],
65
  "pad_token_label": -100,
 
66
  "sep_token": "</s>",
67
  "sep_token_box": [
68
  1000,
@@ -70,8 +71,6 @@
70
  1000,
71
  1000
72
  ],
73
- "sp_model_kwargs": {},
74
  "tokenizer_class": "LayoutXLMTokenizer",
75
- "tokenizer_file": "/root/.cache/huggingface/hub/models--microsoft--layoutxlm-base/snapshots/8e04ebc4d3ba0013cf943b697c0aedf19b06472a/tokenizer.json",
76
  "unk_token": "<unk>"
77
  }
 
6
  "normalized": false,
7
  "rstrip": false,
8
  "single_word": false,
9
+ "special": true
10
  },
11
  "1": {
12
  "content": "<pad>",
 
14
  "normalized": false,
15
  "rstrip": false,
16
  "single_word": false,
17
+ "special": true
18
  },
19
  "2": {
20
  "content": "</s>",
 
22
  "normalized": false,
23
  "rstrip": false,
24
  "single_word": false,
25
+ "special": true
26
  },
27
  "3": {
28
  "content": "<unk>",
 
30
  "normalized": false,
31
  "rstrip": false,
32
  "single_word": false,
33
+ "special": true
34
  },
35
  "250001": {
36
  "content": "<mask>",
37
+ "lstrip": true,
38
  "normalized": false,
39
  "rstrip": false,
40
  "single_word": false,
41
+ "special": true
42
  }
43
  },
44
  "additional_special_tokens": [],
 
63
  0
64
  ],
65
  "pad_token_label": -100,
66
+ "processor_class": "LayoutXLMProcessor",
67
  "sep_token": "</s>",
68
  "sep_token_box": [
69
  1000,
 
71
  1000,
72
  1000
73
  ],
 
74
  "tokenizer_class": "LayoutXLMTokenizer",
 
75
  "unk_token": "<unk>"
76
  }