jonaskg ysharma HF staff commited on
Commit
09e80e3
0 Parent(s):

Duplicate from ysharma/nougat

Browse files

Co-authored-by: yuvraj sharma <[email protected]>

.gitattributes ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ output/nougat.pdf filter=lfs diff=lfs merge=lfs -text
37
+ input/nougat.pdf filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Nougat
3
+ emoji: 👁📄
4
+ colorFrom: pink
5
+ colorTo: pink
6
+ sdk: gradio
7
+ sdk_version: 3.41.2
8
+ app_file: app.py
9
+ license: mit
10
+ duplicated_from: ysharma/nougat
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import subprocess
3
+ import uuid
4
+ import os
5
+ import requests
6
+ import re
7
+
8
+
9
+ def get_pdf(pdf_link):
10
+ # Generate a unique filename
11
+ unique_filename = f"input/downloaded_paper_{uuid.uuid4().hex}.pdf"
12
+
13
+ # Send a GET request to the PDF link
14
+ response = requests.get(pdf_link)
15
+
16
+ if response.status_code == 200:
17
+ # Save the PDF content to a local file
18
+ with open(unique_filename, 'wb') as pdf_file:
19
+ pdf_file.write(response.content)
20
+ print("PDF downloaded successfully.")
21
+ else:
22
+ print("Failed to download the PDF.")
23
+ return unique_filename #.split('/')[-1][:-4]
24
+
25
+
26
+ def nougat_ocr(file_name):
27
+
28
+ #unique_filename = f"/content/output/downloaded_paper_{uuid.uuid4().hex}.pdf"
29
+ # Command to run
30
+ cli_command = [
31
+ 'nougat',
32
+ #'--out', unique_filename,
33
+ '--out', 'output',
34
+ 'pdf', f'{file_name}',
35
+ '--checkpoint', 'nougat',
36
+ '--markdown'
37
+ ]
38
+
39
+ # Run the command and capture its output
40
+ #completed_process =
41
+ subprocess.run(cli_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
42
+
43
+ return #unique_filename
44
+
45
+
46
+ def predict(pdf_file, pdf_link):
47
+ if pdf_file is None:
48
+ if pdf_link == '':
49
+ print("No file is uploaded and No link is provided")
50
+ return "No data provided. Upload a pdf file or provide a pdf link and try again!"
51
+ else:
52
+ print(f'pdf_link is - {pdf_link}')
53
+ file_name = get_pdf(pdf_link)
54
+ print(f'file_name is - {file_name}')
55
+ else:
56
+ file_name = pdf_file.name
57
+ print(file_name)
58
+ pdf_name = pdf_file.name.split('/')[-1].split('.')[0]
59
+ print(pdf_name)
60
+
61
+ # Call nougat
62
+ nougat_ocr(file_name)
63
+ #print("BACKKKK")
64
+
65
+ # Open the file for reading
66
+ file_name = file_name.split('/')[-1][:-4]
67
+ with open(f'output/{file_name}.mmd', 'r') as file:
68
+ content = file.read()
69
+ # switch math delimiters
70
+ content = content.replace(r'\(', '$').replace(r'\)', '$').replace(r'\[', '$$').replace(r'\]', '$$')
71
+ return content
72
+
73
+
74
+
75
+
76
+ def nougat_ocr1(file_name):
77
+ print('******* inside nougat_ocr *******')
78
+ # CLI Command to run
79
+ cli_command = [
80
+ 'nougat',
81
+ '--out', 'output',
82
+ 'pdf', f'{file_name}',
83
+ '--checkpoint', 'nougat',
84
+ '--markdown'
85
+ ]
86
+
87
+ # Run the command and get .mmd file in an output folder
88
+ subprocess.run(cli_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
89
+ return
90
+
91
+
92
+ def predict1(pdf_file):
93
+ print('******* inside predict *******')
94
+ print(f"temporary file - {pdf_file.name}")
95
+ pdf_name = pdf_file.name.split('/')[-1].split('.')[0]
96
+ print(f"pdf file name - {pdf_name}")
97
+
98
+ #! Get prediction for a PDF using nougat
99
+ nougat_ocr(pdf_file.name)
100
+ print("BAACCKKK")
101
+
102
+ # Open the multimarkdown (.mmd) file for reading
103
+ with open(f'output/{pdf_name}.mmd', 'r') as file:
104
+ content = file.read()
105
+
106
+ return content
107
+
108
+ def process_example(pdf_file,pdf_link):
109
+ ocr_content = predict(pdf_file,pdf_link)
110
+ return gr.update(value=ocr_content)
111
+
112
+ css = """
113
+ #mkd {
114
+ height: 500px;
115
+ overflow: auto;
116
+ border: 1px solid #ccc;
117
+ }
118
+ """
119
+
120
+ with gr.Blocks(css=css) as demo:
121
+ gr.HTML("<h1><center>Nougat: Neural Optical Understanding for Academic Documents<center><h1>")
122
+ gr.HTML("<h3><center>Lukas Blecher et al. <a href='https://arxiv.org/pdf/2308.13418.pdf' target='_blank'>Paper</a>, <a href='https://facebookresearch.github.io/nougat/'>Project</a><center></h3>")
123
+
124
+ with gr.Row():
125
+ mkd = gr.Markdown('<h4><center>Upload a PDF</center></h4>',scale=1)
126
+ mkd = gr.Markdown('<h4><center><i>OR</i></center></h4>',scale=1)
127
+ mkd = gr.Markdown('<h4><center>Provide a PDF link</center></h4>',scale=1)
128
+
129
+ with gr.Row(equal_height=True):
130
+ pdf_file = gr.File(label='PDF📃', file_count='single', scale=1)
131
+ pdf_link = gr.Textbox(placeholder='Enter an Arxiv link here', label='PDF link🔗🌐', scale=1)
132
+
133
+ with gr.Row():
134
+ btn = gr.Button('Run NOUGAT🍫')
135
+ clr = gr.Button('Clear🚿')
136
+
137
+ output_headline = gr.Markdown("<h3>PDF converted to markup language through Nougat-OCR👇:</h3>")
138
+ parsed_output = gr.Markdown(elem_id='mkd', value='📃🔤OCR Output')
139
+
140
+ btn.click(predict, [pdf_file, pdf_link], parsed_output )
141
+ clr.click(lambda : (gr.update(value=None),
142
+ gr.update(value=None),
143
+ gr.update(value=None)),
144
+ [],
145
+ [pdf_file, pdf_link, parsed_output]
146
+ )
147
+
148
+ gr.Examples(
149
+ [["input/nougat.pdf", ""], [None, "https://arxiv.org/pdf/2308.08316.pdf"]],
150
+ inputs = [pdf_file, pdf_link],
151
+ outputs = parsed_output,
152
+ fn=process_example,
153
+ cache_examples=True,
154
+ label='Click on any Examples below to get Nougat OCR results quickly:'
155
+ )
156
+
157
+ demo.queue()
158
+ demo.launch(debug=True)
input/nougat.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:679be336ce8010d3dc86b9530f0a30d4d5ea2a13153c6f274601b40f4382745b
3
+ size 4133781
nougat/config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/fsx-llm/lblecher/checkpoints/nougat/small/20230426_125023",
3
+ "align_long_axis": false,
4
+ "architectures": [
5
+ "NougatModel"
6
+ ],
7
+ "decoder_layer": 4,
8
+ "embed_dim": 128,
9
+ "encoder_layer": [
10
+ 2,
11
+ 2,
12
+ 14,
13
+ 2
14
+ ],
15
+ "hidden_dimension": 1024,
16
+ "input_size": [
17
+ 896,
18
+ 672
19
+ ],
20
+ "max_length": 3584,
21
+ "max_position_embeddings": 3584,
22
+ "model_type": "nougat",
23
+ "num_heads": [
24
+ 4,
25
+ 8,
26
+ 16,
27
+ 32
28
+ ],
29
+ "patch_size": 4,
30
+ "torch_dtype": "float32",
31
+ "transformers_version": "4.25.1",
32
+ "window_size": 7
33
+ }
nougat/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfca8831ad5e81fcd96b66f207934f2aaac4e8996449619515c58267c5e03686
3
+ size 1002629505
nougat/special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "eos_token": "</s>",
4
+ "pad_token": "<pad>",
5
+ "unk_token": "<unk>"
6
+ }
nougat/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
nougat/tokenizer_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "model_max_length": 1000000000000000019884624838656,
3
+ "tokenizer_class": "PreTrainedTokenizerFast"
4
+ }
output/nougat.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:679be336ce8010d3dc86b9530f0a30d4d5ea2a13153c6f274601b40f4382745b
3
+ size 4133781
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ nougat-ocr