umarigan commited on
Commit
0cfdb4e
1 Parent(s): 8315f3e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -21
app.py CHANGED
@@ -32,7 +32,7 @@ def create_embeddings(text):
32
  print("Embeddings created successfully.")
33
  return embeddings, sentences
34
 
35
- def generate_plot(query, pdf_file):
36
  logging.info("Generating plot.")
37
  # Generate embeddings for the query
38
  query_embedding = model.encode([query])[0]
@@ -79,36 +79,55 @@ def generate_plot(query, pdf_file):
79
  save(p)
80
  logging.info("Plot saved to file.")
81
  return temp_file.name
 
 
82
 
83
- def gradio_interface_o(pdf_file, query):
84
- logging.info("Gradio interface called.")
85
- plot_path = generate_plot(query, pdf_file)
86
- with open(plot_path, "r") as f:
87
- html_content = f.read()
88
- logging.info("Returning HTML content.")
89
- return html_content
90
- def gradio_interface(pdf_file, query):
91
- logging.info("Gradio interface called.")
92
- plot_path = generate_plot(query, pdf_file)
 
 
 
93
 
94
- # Assuming 'plot_path' can be accessed via a URL 'plot_url'
95
- plot_url = f"http://yourserver.com/path/to/{os.path.basename(plot_path)}"
96
- logging.info(f"Plot URL: {plot_url}")
 
 
 
 
 
 
 
 
 
 
 
 
 
97
 
98
- # Return an iframe pointing to the plot URL
99
- iframe_html = f'<iframe src="{plot_url}" width="100%" height="700"></iframe>'
100
- logging.info("Returning iframe HTML content.")
101
- return iframe_html
102
 
103
- # Update the outputs to gr.HTML() if it's not already
 
 
 
 
104
  iface = gr.Interface(
105
  fn=gradio_interface,
106
  inputs=[gr.File(label="Upload PDF"), gr.Textbox(label="Query")],
107
- outputs=gr.HTML(),
108
  title="PDF Content Visualizer",
109
  description="Upload a PDF and enter a query to visualize the content."
110
  )
111
 
112
-
113
  if __name__ == "__main__":
114
  iface.launch()
 
32
  print("Embeddings created successfully.")
33
  return embeddings, sentences
34
 
35
+ def generate_plot_bokeh(query, pdf_file):
36
  logging.info("Generating plot.")
37
  # Generate embeddings for the query
38
  query_embedding = model.encode([query])[0]
 
79
  save(p)
80
  logging.info("Plot saved to file.")
81
  return temp_file.name
82
+ import plotly.express as px
83
+ import plotly.graph_objects as go
84
 
85
+ def generate_plotly_figure(query, pdf_file):
86
+ logging.info("Generating plot with Plotly.")
87
+ # Generate embeddings for the query
88
+ query_embedding = model.encode([query])[0]
89
+
90
+ # Process the PDF and create embeddings
91
+ text = process_pdf(pdf_file.name)
92
+ embeddings, sentences = create_embeddings(text)
93
+
94
+ logging.info("Data prepared for UMAP.")
95
+ # Prepare the data for UMAP and visualization
96
+ all_embeddings = np.vstack([embeddings, query_embedding])
97
+ all_sentences = sentences + [query]
98
 
99
+ # UMAP transformation
100
+ umap_transform = umap.UMAP(n_neighbors=15, min_dist=0.0, n_components=2, random_state=42)
101
+ umap_embeddings = umap_transform.fit_transform(all_embeddings)
102
+
103
+ logging.info("UMAP transformation completed.")
104
+ # Find the closest sentences to the query
105
+ distances = cosine_similarity([query_embedding], embeddings)[0]
106
+ closest_indices = distances.argsort()[-5:][::-1] # Adjust the number as needed
107
+
108
+ # Prepare data for plotting
109
+ colors = ['red' if i in closest_indices else 'blue' for i in range(len(sentences))]
110
+ fig = go.Figure()
111
+ fig.add_trace(go.Scatter(x=umap_embeddings[:-1, 0], y=umap_embeddings[:-1, 1], mode='markers',
112
+ marker=dict(color=colors), text=all_sentences[:-1]))
113
+
114
+ fig.update_layout(title="UMAP Projection of Sentences", xaxis_title="UMAP 1", yaxis_title="UMAP 2")
115
 
116
+ logging.info("Plotly figure created successfully.")
117
+ return fig
 
 
118
 
119
+ def gradio_interface(pdf_file, query):
120
+ logging.info("Gradio interface called.")
121
+ fig = generate_plotly_figure(query, pdf_file)
122
+ logging.info("Returning Plotly figure.")
123
+ return fig
124
  iface = gr.Interface(
125
  fn=gradio_interface,
126
  inputs=[gr.File(label="Upload PDF"), gr.Textbox(label="Query")],
127
+ outputs=gr.Plot(), # Updated to use gr.Plot() for Plotly figures
128
  title="PDF Content Visualizer",
129
  description="Upload a PDF and enter a query to visualize the content."
130
  )
131
 
 
132
  if __name__ == "__main__":
133
  iface.launch()