Spaces:
Sleeping
Sleeping
import gradio as gr | |
from sentence_transformers import SentenceTransformer | |
import fitz # PyMuPDF | |
import numpy as np | |
from bokeh.plotting import figure, output_file, save | |
from bokeh.io import export_png | |
from bokeh.embed import file_html | |
from bokeh.resources import CDN | |
import tempfile | |
import os | |
# Load your model | |
model = SentenceTransformer('all-MiniLM-L6-v2') | |
def process_pdf(pdf_path): | |
# Open the PDF | |
doc = fitz.open(pdf_path) | |
texts = [] | |
for page in doc: | |
texts.append(page.get_text()) | |
return " ".join(texts) | |
def create_embeddings(text): | |
# Split the text into sentences/chunks and generate embeddings | |
# This is a placeholder for your actual text splitting and embedding code | |
sentences = text.split(".") # Simplistic split, consider using a better sentence splitter | |
embeddings = model.encode(sentences) | |
return embeddings, sentences | |
def generate_plot(query, pdf_file): | |
# Process the PDF and create embeddings | |
text = process_pdf(pdf_file) | |
embeddings, sentences = create_embeddings(text) | |
# Here, you'll integrate the UMAP and Bokeh visualization code you have, | |
# and then save the Bokeh plot to a file. | |
# For simplicity, let's assume it's saved to 'plot.html' | |
output_file("plot.html") | |
# Your Bokeh plot creation code here... | |
save(p) # Assuming 'p' is your Bokeh figure | |
# Alternatively, you can save as PNG | |
# export_png(p, filename="plot.png") | |
# Return the path to the saved file | |
return "plot.html" # or "plot.png" | |
def gradio_interface(pdf_file, query): | |
plot_path = generate_plot(query, pdf_file.name) | |
# If returning HTML file | |
with open(plot_path, "r") as f: | |
html_content = f.read() | |
return html_content | |
# If returning an image | |
# return plot_path | |
# Set up the Gradio app | |
iface = gr.Interface( | |
fn=gradio_interface, | |
inputs=[gr.inputs.File(label="Upload PDF"), gr.inputs.Textbox(label="Query")], | |
outputs=gr.outputs.HTML(label="Visualization"), # Use gr.outputs.Image for image output | |
title="PDF Content Visualizer", | |
description="Upload a PDF and enter a query to visualize the content." | |
) | |
# Run the app | |
iface.launch() | |