Spaces:

umarigan
/

SemanticSearch

Sleeping

App Files Files Community

umarigan commited on Mar 26

Commit

e932fdf

•

1 Parent(s): f810f7b

Create app.py

Browse files

Files changed (1) hide show

app.py +70 -0

app.py ADDED Viewed

	@@ -0,0 +1,70 @@

+import gradio as gr
+from sentence_transformers import SentenceTransformer
+import fitz  # PyMuPDF
+import numpy as np
+from bokeh.plotting import figure, output_file, save
+from bokeh.io import export_png
+from bokeh.embed import file_html
+from bokeh.resources import CDN
+import tempfile
+import os
+# Load your model
+model = SentenceTransformer('all-MiniLM-L6-v2')
+def process_pdf(pdf_path):
+    # Open the PDF
+    doc = fitz.open(pdf_path)
+    texts = []
+    for page in doc:
+        texts.append(page.get_text())
+    return " ".join(texts)
+def create_embeddings(text):
+    # Split the text into sentences/chunks and generate embeddings
+    # This is a placeholder for your actual text splitting and embedding code
+    sentences = text.split(".")  # Simplistic split, consider using a better sentence splitter
+    embeddings = model.encode(sentences)
+    return embeddings, sentences
+def generate_plot(query, pdf_file):
+    # Process the PDF and create embeddings
+    text = process_pdf(pdf_file)
+    embeddings, sentences = create_embeddings(text)
+    # Here, you'll integrate the UMAP and Bokeh visualization code you have,
+    # and then save the Bokeh plot to a file.
+    # For simplicity, let's assume it's saved to 'plot.html'
+    output_file("plot.html")
+    # Your Bokeh plot creation code here...
+    save(p)  # Assuming 'p' is your Bokeh figure
+    # Alternatively, you can save as PNG
+    # export_png(p, filename="plot.png")
+    # Return the path to the saved file
+    return "plot.html"  # or "plot.png"
+def gradio_interface(pdf_file, query):
+    plot_path = generate_plot(query, pdf_file.name)
+    # If returning HTML file
+    with open(plot_path, "r") as f:
+        html_content = f.read()
+    return html_content
+    # If returning an image
+    # return plot_path
+# Set up the Gradio app
+iface = gr.Interface(
+    fn=gradio_interface,
+    inputs=[gr.inputs.File(label="Upload PDF"), gr.inputs.Textbox(label="Query")],
+    outputs=gr.outputs.HTML(label="Visualization"),  # Use gr.outputs.Image for image output
+    title="PDF Content Visualizer",
+    description="Upload a PDF and enter a query to visualize the content."
+)
+# Run the app
+iface.launch()