Spaces:

umarigan
/

SemanticSearch

Sleeping

App Files Files Community

SemanticSearch / app.py

umarigan

Create app.py

e932fdf verified 8 months ago

raw

history blame

2.19 kB

	import gradio as gr
	from sentence_transformers import SentenceTransformer
	import fitz # PyMuPDF
	import numpy as np
	from bokeh.plotting import figure, output_file, save
	from bokeh.io import export_png
	from bokeh.embed import file_html
	from bokeh.resources import CDN
	import tempfile
	import os

	# Load your model
	model = SentenceTransformer('all-MiniLM-L6-v2')

	def process_pdf(pdf_path):
	# Open the PDF
	doc = fitz.open(pdf_path)
	texts = []
	for page in doc:
	texts.append(page.get_text())
	return " ".join(texts)

	def create_embeddings(text):
	# Split the text into sentences/chunks and generate embeddings
	# This is a placeholder for your actual text splitting and embedding code
	sentences = text.split(".") # Simplistic split, consider using a better sentence splitter
	embeddings = model.encode(sentences)
	return embeddings, sentences

	def generate_plot(query, pdf_file):
	# Process the PDF and create embeddings
	text = process_pdf(pdf_file)
	embeddings, sentences = create_embeddings(text)

	# Here, you'll integrate the UMAP and Bokeh visualization code you have,
	# and then save the Bokeh plot to a file.
	# For simplicity, let's assume it's saved to 'plot.html'

	output_file("plot.html")
	# Your Bokeh plot creation code here...
	save(p) # Assuming 'p' is your Bokeh figure

	# Alternatively, you can save as PNG
	# export_png(p, filename="plot.png")

	# Return the path to the saved file
	return "plot.html" # or "plot.png"

	def gradio_interface(pdf_file, query):
	plot_path = generate_plot(query, pdf_file.name)

	# If returning HTML file
	with open(plot_path, "r") as f:
	html_content = f.read()
	return html_content

	# If returning an image
	# return plot_path

	# Set up the Gradio app
	iface = gr.Interface(
	fn=gradio_interface,
	inputs=[gr.inputs.File(label="Upload PDF"), gr.inputs.Textbox(label="Query")],
	outputs=gr.outputs.HTML(label="Visualization"), # Use gr.outputs.Image for image output
	title="PDF Content Visualizer",
	description="Upload a PDF and enter a query to visualize the content."
	)

	# Run the app
	iface.launch()