shreyanshu09
commited on
Commit
•
a6dafe0
1
Parent(s):
18bd282
Delete global_information_extractor
Browse files- global_information_extractor/added_tokens.json +0 -6
- global_information_extractor/artifacts.ckpt +0 -3
- global_information_extractor/config.json +0 -24
- global_information_extractor/config.yaml +0 -29
- global_information_extractor/hparams.yaml +0 -1
- global_information_extractor/pytorch_model.bin +0 -3
- global_information_extractor/sentencepiece.bpe.model +0 -3
- global_information_extractor/special_tokens_map.json +0 -18
- global_information_extractor/tokenizer_config.json +0 -22
global_information_extractor/added_tokens.json
DELETED
@@ -1,6 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"<s_>": 57525,
|
3 |
-
"<s_iitcdip>": 57523,
|
4 |
-
"<s_synthdog>": 57524,
|
5 |
-
"<sep/>": 57522
|
6 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
global_information_extractor/artifacts.ckpt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:cd1bb7a057d1f06cc1f0d1abaaeb0f9b056c25f95b2c77534329bebfeddc6516
|
3 |
-
size 1608853531
|
|
|
|
|
|
|
|
global_information_extractor/config.json
DELETED
@@ -1,24 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"_name_or_path": "naver-clova-ix/donut-base",
|
3 |
-
"align_long_axis": false,
|
4 |
-
"architectures": [
|
5 |
-
"DonutModel"
|
6 |
-
],
|
7 |
-
"decoder_layer": 4,
|
8 |
-
"encoder_layer": [
|
9 |
-
2,
|
10 |
-
2,
|
11 |
-
14,
|
12 |
-
2
|
13 |
-
],
|
14 |
-
"input_size": [
|
15 |
-
960,
|
16 |
-
1280
|
17 |
-
],
|
18 |
-
"max_length": 768,
|
19 |
-
"max_position_embeddings": 768,
|
20 |
-
"model_type": "donut",
|
21 |
-
"torch_dtype": "float32",
|
22 |
-
"transformers_version": "4.21.3",
|
23 |
-
"window_size": 10
|
24 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
global_information_extractor/config.yaml
DELETED
@@ -1,29 +0,0 @@
|
|
1 |
-
resume_from_checkpoint_path: None
|
2 |
-
result_path: './result'
|
3 |
-
pretrained_model_name_or_path: 'naver-clova-ix/donut-base'
|
4 |
-
dataset_name_or_paths:
|
5 |
-
- './dataset/1_final_block_dataset/'
|
6 |
-
sort_json_key: False
|
7 |
-
train_batch_sizes:
|
8 |
-
- 4
|
9 |
-
val_batch_sizes:
|
10 |
-
- 4
|
11 |
-
input_size:
|
12 |
-
- 960
|
13 |
-
- 1280
|
14 |
-
max_length: 768
|
15 |
-
align_long_axis: False
|
16 |
-
num_nodes: 1
|
17 |
-
seed: 2022
|
18 |
-
lr: 3e-05
|
19 |
-
warmup_steps: 300
|
20 |
-
num_training_samples_per_epoch: 1368
|
21 |
-
max_epochs: 30
|
22 |
-
max_steps: -1
|
23 |
-
num_workers: 40
|
24 |
-
val_check_interval: 1.0
|
25 |
-
check_val_every_n_epoch: 1
|
26 |
-
gradient_clip_val: 1.0
|
27 |
-
verbose: True
|
28 |
-
exp_name: 'train_cord'
|
29 |
-
exp_version: '1_block_diagram_2'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
global_information_extractor/hparams.yaml
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
{}
|
|
|
|
global_information_extractor/pytorch_model.bin
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:dffe6f9c3b8085bcbfdbc67e44b9e2912e7974bca4ccc6f09d9f5e8650d74159
|
3 |
-
size 858335171
|
|
|
|
|
|
|
|
global_information_extractor/sentencepiece.bpe.model
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:cb9e3dce4c326195d08fc3dd0f7e2eee1da8595c847bf4c1a9c78b7a82d47e2d
|
3 |
-
size 1296245
|
|
|
|
|
|
|
|
global_information_extractor/special_tokens_map.json
DELETED
@@ -1,18 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"additional_special_tokens": [
|
3 |
-
"<s_>"
|
4 |
-
],
|
5 |
-
"bos_token": "<s>",
|
6 |
-
"cls_token": "<s>",
|
7 |
-
"eos_token": "</s>",
|
8 |
-
"mask_token": {
|
9 |
-
"content": "<mask>",
|
10 |
-
"lstrip": true,
|
11 |
-
"normalized": true,
|
12 |
-
"rstrip": false,
|
13 |
-
"single_word": false
|
14 |
-
},
|
15 |
-
"pad_token": "<pad>",
|
16 |
-
"sep_token": "</s>",
|
17 |
-
"unk_token": "<unk>"
|
18 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
global_information_extractor/tokenizer_config.json
DELETED
@@ -1,22 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"bos_token": "<s>",
|
3 |
-
"cls_token": "<s>",
|
4 |
-
"eos_token": "</s>",
|
5 |
-
"from_slow": true,
|
6 |
-
"mask_token": {
|
7 |
-
"__type": "AddedToken",
|
8 |
-
"content": "<mask>",
|
9 |
-
"lstrip": true,
|
10 |
-
"normalized": true,
|
11 |
-
"rstrip": false,
|
12 |
-
"single_word": false
|
13 |
-
},
|
14 |
-
"name_or_path": "naver-clova-ix/donut-base",
|
15 |
-
"pad_token": "<pad>",
|
16 |
-
"processor_class": "DonutProcessor",
|
17 |
-
"sep_token": "</s>",
|
18 |
-
"sp_model_kwargs": {},
|
19 |
-
"special_tokens_map_file": null,
|
20 |
-
"tokenizer_class": "XLMRobertaTokenizer",
|
21 |
-
"unk_token": "<unk>"
|
22 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|