SemanticSearch / app.py
umarigan's picture
Create app.py
e932fdf verified
raw
history blame
2.19 kB
import gradio as gr
from sentence_transformers import SentenceTransformer
import fitz # PyMuPDF
import numpy as np
from bokeh.plotting import figure, output_file, save
from bokeh.io import export_png
from bokeh.embed import file_html
from bokeh.resources import CDN
import tempfile
import os
# Load your model
model = SentenceTransformer('all-MiniLM-L6-v2')
def process_pdf(pdf_path):
# Open the PDF
doc = fitz.open(pdf_path)
texts = []
for page in doc:
texts.append(page.get_text())
return " ".join(texts)
def create_embeddings(text):
# Split the text into sentences/chunks and generate embeddings
# This is a placeholder for your actual text splitting and embedding code
sentences = text.split(".") # Simplistic split, consider using a better sentence splitter
embeddings = model.encode(sentences)
return embeddings, sentences
def generate_plot(query, pdf_file):
# Process the PDF and create embeddings
text = process_pdf(pdf_file)
embeddings, sentences = create_embeddings(text)
# Here, you'll integrate the UMAP and Bokeh visualization code you have,
# and then save the Bokeh plot to a file.
# For simplicity, let's assume it's saved to 'plot.html'
output_file("plot.html")
# Your Bokeh plot creation code here...
save(p) # Assuming 'p' is your Bokeh figure
# Alternatively, you can save as PNG
# export_png(p, filename="plot.png")
# Return the path to the saved file
return "plot.html" # or "plot.png"
def gradio_interface(pdf_file, query):
plot_path = generate_plot(query, pdf_file.name)
# If returning HTML file
with open(plot_path, "r") as f:
html_content = f.read()
return html_content
# If returning an image
# return plot_path
# Set up the Gradio app
iface = gr.Interface(
fn=gradio_interface,
inputs=[gr.inputs.File(label="Upload PDF"), gr.inputs.Textbox(label="Query")],
outputs=gr.outputs.HTML(label="Visualization"), # Use gr.outputs.Image for image output
title="PDF Content Visualizer",
description="Upload a PDF and enter a query to visualize the content."
)
# Run the app
iface.launch()