Spaces:

hf-vision
/

nougat-transformers

Runtime error

App Files Files Community

merve HF staff commited on Sep 20, 2023

Commit

1689fdb

•

1 Parent(s): ceeec68

Create app.py

Browse files

Files changed (1) hide show

app.py +125 -0

app.py ADDED Viewed

	@@ -0,0 +1,125 @@

+from huggingface_hub import hf_hub_download
+import re
+from PIL import Image
+import requests
+from nougat.dataset.rasterize import rasterize_paper
+from transformers import NougatProcessor, VisionEncoderDecoderModel
+import torch
+processor = NougatProcessor.from_pretrained("nielsr/nougat")
+model = VisionEncoderDecoderModel.from_pretrained("nielsr/nougat")
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model.to(device)
+def get_pdf(pdf_link):
+  unique_filename = f"{os.getcwd()}/downloaded_paper_{uuid.uuid4().hex}.pdf"
+  response = requests.get(pdf_link)
+  if response.status_code == 200:
+      with open(unique_filename, 'wb') as pdf_file:
+          pdf_file.write(response.content)
+      print("PDF downloaded successfully.")
+  else:
+      print("Failed to download the PDF.")
+  return unique_filename
+def predict(image):
+  # prepare PDF image for the model
+  image = Image.open(image)
+  pixel_values = processor(image, return_tensors="pt").pixel_values
+  # generate transcription (here we only generate 30 tokens)
+  outputs = model.generate(
+      pixel_values.to(device),
+      min_length=1,
+      max_new_tokens=30,
+      bad_words_ids=[[processor.tokenizer.unk_token_id]],
+  )
+  sequence = processor.batch_decode(outputs, skip_special_tokens=True)[0]
+  sequence = processor.post_process_generation(sequence, fix_markdown=False)
+  return sequence
+def inference(pdf_file, pdf_link):
+  if pdf_file is None:
+    if pdf_link == '':
+      print("No file is uploaded and No link is provided")
+      return "No data provided. Upload a pdf file or provide a pdf link and try again!"
+    else:
+      file_name = get_pdf(pdf_link)
+  else:
+    file_name = pdf_file.name
+    pdf_name = pdf_file.name.split('/')[-1].split('.')[0]
+  images = rasterize_paper(file_name, return_pil=True)
+  sequence = ""
+  # infer for every page and concat
+  for image in images:
+    sequence += predict(image)
+  content = sequence.replace(r'\(', '$').replace(r'\)', '$').replace(r'\[', '$$').replace(r'\]', '$$')
+  return content
+import gradio as gr
+import uuid
+import os
+import requests
+import re
+css = """
+  #mkd {
+    height: 500px;
+    overflow: auto;
+    border: 1px solid #ccc;
+  }
+"""
+with gr.Blocks(css=css) as demo:
+  gr.HTML("<h1><center>Nougat: Neural Optical Understanding for Academic Documents 🍫<center><h1>")
+  gr.HTML("<h3><center>Lukas Blecher et al. <a href='https://arxiv.org/pdf/2308.13418.pdf' target='_blank'>Paper</a>, <a href='https://facebookresearch.github.io/nougat/'>Project</a><center></h3>")
+  gr.HTML("<h3><center>This demo is based on transformers implementation of Nougat 🤗<center><h3>")
+  with gr.Row():
+    mkd = gr.Markdown('<h4><center>Upload a PDF</center></h4>',scale=1)
+    mkd = gr.Markdown('<h4><center><i>OR</i></center></h4>',scale=1)
+    mkd = gr.Markdown('<h4><center>Provide a PDF link</center></h4>',scale=1)
+  with gr.Row():
+    mkd = gr.Markdown("Upload a PDF",scale=1)
+    mkd = gr.Markdown('OR',scale=1)
+    mkd = gr.Markdown('Provide a PDF link',scale=1)
+  with gr.Row(equal_height=True):
+    pdf_file = gr.File(label='PDF 📑', file_count='single', scale=1)
+    pdf_link = gr.Textbox(placeholder='Enter an arxiv link here', label='Link to Paper🔗', scale=1)
+  with gr.Row():
+    btn = gr.Button('Run Nougat 🍫')
+    clr = gr.Button('Clear 🧼')
+  output_headline = gr.Markdown("PDF converted to markup language through Nougat-OCR👇")
+  parsed_output = gr.Markdown(elem_id='mkd', value='OCR Output 📝')
+  btn.click(inference, [pdf_file, pdf_link], parsed_output )
+  clr.click(lambda : (gr.update(value=None),
+                      gr.update(value=None),
+                      gr.update(value=None)),
+             [],
+             [pdf_file, pdf_link, parsed_output]
+            )
+demo.queue()
+demo.launch(debug=True)