Shreyas094 commited on
Commit
d52f389
1 Parent(s): 978efd2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -13
app.py CHANGED
@@ -66,17 +66,30 @@ def load_document(file: NamedTemporaryFile, parser: str = "llamaparse") -> List[
66
  def get_embeddings():
67
  return HuggingFaceEmbeddings(model_name="sentence-transformers/stsb-roberta-large")
68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  def update_vectors(files, parser):
70
  global uploaded_documents
71
  logging.info(f"Entering update_vectors with {len(files)} files and parser: {parser}")
72
 
73
  if not files:
74
  logging.warning("No files provided for update_vectors")
75
- return "Please upload at least one PDF file.", gr.CheckboxGroup(
76
- choices=[doc["name"] for doc in uploaded_documents],
77
- value=[doc["name"] for doc in uploaded_documents if doc["selected"]],
78
- label="Select documents to query"
79
- )
80
 
81
  embed = get_embeddings()
82
  total_chunks = 0
@@ -89,7 +102,6 @@ def update_vectors(files, parser):
89
  logging.info(f"Loaded {len(data)} chunks from {file.name}")
90
  all_data.extend(data)
91
  total_chunks += len(data)
92
- # Append new documents instead of replacing
93
  if not any(doc["name"] == file.name for doc in uploaded_documents):
94
  uploaded_documents.append({"name": file.name, "selected": True})
95
  logging.info(f"Added new document to uploaded_documents: {file.name}")
@@ -110,12 +122,11 @@ def update_vectors(files, parser):
110
 
111
  database.save_local("faiss_database")
112
  logging.info("FAISS database saved")
113
-
114
- return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}.", gr.CheckboxGroup(
115
- choices=[doc["name"] for doc in uploaded_documents],
116
- value=[doc["name"] for doc in uploaded_documents if doc["selected"]],
117
- label="Select documents to query"
118
- )
119
 
120
  def generate_chunked_response(prompt, model, max_tokens=10000, num_calls=3, temperature=0.2, should_stop=False):
121
  print(f"Starting generate_chunked_response with {num_calls} calls")
@@ -528,6 +539,12 @@ def display_documents():
528
  label="Select documents to query"
529
  )
530
 
 
 
 
 
 
 
531
  def initial_conversation():
532
  return [
533
  (None, "Welcome! I'm your AI assistant for web search and PDF analysis. Here's how you can use me:\n\n"
@@ -539,7 +556,7 @@ def initial_conversation():
539
  ]
540
 
541
  # Define the checkbox outside the demo block
542
- document_selector = gr.CheckboxGroup(label="Select documents to query")
543
 
544
  use_web_search = gr.Checkbox(label="Use Web Search", value=True)
545
 
@@ -603,6 +620,7 @@ with demo:
603
  file_input = gr.Files(label="Upload your PDF documents", file_types=[".pdf"])
604
  parser_dropdown = gr.Dropdown(choices=["pypdf", "llamaparse"], label="Select PDF Parser", value="llamaparse")
605
  update_button = gr.Button("Upload Document")
 
606
 
607
  update_output = gr.Textbox(label="Update Status")
608
 
@@ -610,6 +628,11 @@ with demo:
610
  update_button.click(update_vectors,
611
  inputs=[file_input, parser_dropdown],
612
  outputs=[update_output, document_selector])
 
 
 
 
 
613
 
614
  gr.Markdown(
615
  """
 
66
  def get_embeddings():
67
  return HuggingFaceEmbeddings(model_name="sentence-transformers/stsb-roberta-large")
68
 
69
+ # Add this at the beginning of your script, after imports
70
+ DOCUMENTS_FILE = "uploaded_documents.json"
71
+
72
+ def load_documents():
73
+ if os.path.exists(DOCUMENTS_FILE):
74
+ with open(DOCUMENTS_FILE, "r") as f:
75
+ return json.load(f)
76
+ return []
77
+
78
+ def save_documents(documents):
79
+ with open(DOCUMENTS_FILE, "w") as f:
80
+ json.dump(documents, f)
81
+
82
+ # Replace the global uploaded_documents with this
83
+ uploaded_documents = load_documents()
84
+
85
+ # Modify the update_vectors function
86
  def update_vectors(files, parser):
87
  global uploaded_documents
88
  logging.info(f"Entering update_vectors with {len(files)} files and parser: {parser}")
89
 
90
  if not files:
91
  logging.warning("No files provided for update_vectors")
92
+ return "Please upload at least one PDF file.", display_documents()
 
 
 
 
93
 
94
  embed = get_embeddings()
95
  total_chunks = 0
 
102
  logging.info(f"Loaded {len(data)} chunks from {file.name}")
103
  all_data.extend(data)
104
  total_chunks += len(data)
 
105
  if not any(doc["name"] == file.name for doc in uploaded_documents):
106
  uploaded_documents.append({"name": file.name, "selected": True})
107
  logging.info(f"Added new document to uploaded_documents: {file.name}")
 
122
 
123
  database.save_local("faiss_database")
124
  logging.info("FAISS database saved")
125
+
126
+ # Save the updated list of documents
127
+ save_documents(uploaded_documents)
128
+
129
+ return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}.", display_documents()
 
130
 
131
  def generate_chunked_response(prompt, model, max_tokens=10000, num_calls=3, temperature=0.2, should_stop=False):
132
  print(f"Starting generate_chunked_response with {num_calls} calls")
 
539
  label="Select documents to query"
540
  )
541
 
542
+ # Add this new function
543
+ def refresh_documents():
544
+ global uploaded_documents
545
+ uploaded_documents = load_documents()
546
+ return display_documents()
547
+
548
  def initial_conversation():
549
  return [
550
  (None, "Welcome! I'm your AI assistant for web search and PDF analysis. Here's how you can use me:\n\n"
 
556
  ]
557
 
558
  # Define the checkbox outside the demo block
559
+ document_selector = display_documents()
560
 
561
  use_web_search = gr.Checkbox(label="Use Web Search", value=True)
562
 
 
620
  file_input = gr.Files(label="Upload your PDF documents", file_types=[".pdf"])
621
  parser_dropdown = gr.Dropdown(choices=["pypdf", "llamaparse"], label="Select PDF Parser", value="llamaparse")
622
  update_button = gr.Button("Upload Document")
623
+ refresh_button = gr.Button("Refresh Document List")
624
 
625
  update_output = gr.Textbox(label="Update Status")
626
 
 
628
  update_button.click(update_vectors,
629
  inputs=[file_input, parser_dropdown],
630
  outputs=[update_output, document_selector])
631
+
632
+ # Add the refresh button functionality
633
+ refresh_button.click(refresh_documents,
634
+ inputs=[],
635
+ outputs=[document_selector])
636
 
637
  gr.Markdown(
638
  """