from huggingface_hub import hf_hub_download import re from PIL import Image import requests from nougat.dataset.rasterize import rasterize_paper from transformers import NougatProcessor, VisionEncoderDecoderModel import torch import gradio as gr import uuid import os import spaces processor = NougatProcessor.from_pretrained("facebook/nougat-small") model = VisionEncoderDecoderModel.from_pretrained("facebook/nougat-small") device = "cuda" if torch.cuda.is_available() else "cpu" model.to(device) def get_pdf(pdf_link): unique_filename = f"{os.getcwd()}/downloaded_paper_{uuid.uuid4().hex}.pdf" response = requests.get(pdf_link) if response.status_code == 200: with open(unique_filename, 'wb') as pdf_file: pdf_file.write(response.content) print("PDF downloaded successfully.") else: print("Failed to download the PDF.") return unique_filename @spaces.GPU def predict(image): # prepare PDF image for the model image = Image.open(image) pixel_values = processor(image, return_tensors="pt").pixel_values # generate transcription (here we only generate 30 tokens) outputs = model.generate( pixel_values.to(device), min_length=1, max_new_tokens=1500, bad_words_ids=[[processor.tokenizer.unk_token_id]], ) page_sequence = processor.batch_decode(outputs, skip_special_tokens=True)[0] page_sequence = processor.post_process_generation(page_sequence, fix_markdown=False) return page_sequence def inference(pdf_file, pdf_link): if pdf_file is None: if pdf_link == '': print("No file is uploaded and No link is provided") return "No data provided. Upload a pdf file or provide a pdf link and try again!" else: file_name = get_pdf(pdf_link) else: file_name = pdf_file.name pdf_name = pdf_file.name.split('/')[-1].split('.')[0] images = rasterize_paper(file_name, return_pil=True) sequence = "" # infer for every page and concat for image in images: sequence += predict(image) content = sequence.replace(r'\(', '$').replace(r'\)', '$').replace(r'\[', '$$').replace(r'\]', '$$') with open(f"{os.getcwd()}/output.md","w+") as f: f.write(content) f.close() return content, f"{os.getcwd()}/output.md" css = """ #mkd { height: 500px; overflow: auto; border: 1px solid #ccc; } """ with gr.Blocks(css=css) as demo: gr.HTML("