Shreyas094
commited on
Commit
•
c89450b
1
Parent(s):
d52f389
Update app.py
Browse files
app.py
CHANGED
@@ -15,6 +15,7 @@ from langchain_core.documents import Document
|
|
15 |
from huggingface_hub import InferenceClient
|
16 |
import inspect
|
17 |
import logging
|
|
|
18 |
|
19 |
|
20 |
# Set up basic configuration for logging
|
@@ -99,6 +100,9 @@ def update_vectors(files, parser):
|
|
99 |
logging.info(f"Processing file: {file.name}")
|
100 |
try:
|
101 |
data = load_document(file, parser)
|
|
|
|
|
|
|
102 |
logging.info(f"Loaded {len(data)} chunks from {file.name}")
|
103 |
all_data.extend(data)
|
104 |
total_chunks += len(data)
|
@@ -112,22 +116,69 @@ def update_vectors(files, parser):
|
|
112 |
|
113 |
logging.info(f"Total chunks processed: {total_chunks}")
|
114 |
|
115 |
-
if
|
116 |
-
logging.
|
117 |
-
|
118 |
-
database.add_documents(all_data)
|
119 |
-
else:
|
120 |
-
logging.info("Creating new FAISS database")
|
121 |
-
database = FAISS.from_documents(all_data, embed)
|
122 |
|
123 |
-
|
124 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
125 |
|
126 |
# Save the updated list of documents
|
127 |
save_documents(uploaded_documents)
|
128 |
|
129 |
return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}.", display_documents()
|
130 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
def generate_chunked_response(prompt, model, max_tokens=10000, num_calls=3, temperature=0.2, should_stop=False):
|
132 |
print(f"Starting generate_chunked_response with {num_calls} calls")
|
133 |
full_response = ""
|
@@ -536,7 +587,7 @@ def display_documents():
|
|
536 |
return gr.CheckboxGroup(
|
537 |
choices=[doc["name"] for doc in uploaded_documents],
|
538 |
value=[doc["name"] for doc in uploaded_documents if doc["selected"]],
|
539 |
-
label="Select documents to query"
|
540 |
)
|
541 |
|
542 |
# Add this new function
|
@@ -623,6 +674,7 @@ with demo:
|
|
623 |
refresh_button = gr.Button("Refresh Document List")
|
624 |
|
625 |
update_output = gr.Textbox(label="Update Status")
|
|
|
626 |
|
627 |
# Update both the output text and the document selector
|
628 |
update_button.click(update_vectors,
|
@@ -633,6 +685,11 @@ with demo:
|
|
633 |
refresh_button.click(refresh_documents,
|
634 |
inputs=[],
|
635 |
outputs=[document_selector])
|
|
|
|
|
|
|
|
|
|
|
636 |
|
637 |
gr.Markdown(
|
638 |
"""
|
|
|
15 |
from huggingface_hub import InferenceClient
|
16 |
import inspect
|
17 |
import logging
|
18 |
+
import shutil
|
19 |
|
20 |
|
21 |
# Set up basic configuration for logging
|
|
|
100 |
logging.info(f"Processing file: {file.name}")
|
101 |
try:
|
102 |
data = load_document(file, parser)
|
103 |
+
if not data:
|
104 |
+
logging.warning(f"No chunks loaded from {file.name}")
|
105 |
+
continue
|
106 |
logging.info(f"Loaded {len(data)} chunks from {file.name}")
|
107 |
all_data.extend(data)
|
108 |
total_chunks += len(data)
|
|
|
116 |
|
117 |
logging.info(f"Total chunks processed: {total_chunks}")
|
118 |
|
119 |
+
if not all_data:
|
120 |
+
logging.warning("No valid data extracted from uploaded files")
|
121 |
+
return "No valid data could be extracted from the uploaded files. Please check the file contents and try again.", display_documents()
|
|
|
|
|
|
|
|
|
122 |
|
123 |
+
try:
|
124 |
+
if os.path.exists("faiss_database"):
|
125 |
+
logging.info("Updating existing FAISS database")
|
126 |
+
database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
|
127 |
+
database.add_documents(all_data)
|
128 |
+
else:
|
129 |
+
logging.info("Creating new FAISS database")
|
130 |
+
database = FAISS.from_documents(all_data, embed)
|
131 |
+
|
132 |
+
database.save_local("faiss_database")
|
133 |
+
logging.info("FAISS database saved")
|
134 |
+
except Exception as e:
|
135 |
+
logging.error(f"Error updating FAISS database: {str(e)}")
|
136 |
+
return f"Error updating vector store: {str(e)}", display_documents()
|
137 |
|
138 |
# Save the updated list of documents
|
139 |
save_documents(uploaded_documents)
|
140 |
|
141 |
return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}.", display_documents()
|
142 |
|
143 |
+
def delete_documents(selected_docs):
|
144 |
+
global uploaded_documents
|
145 |
+
|
146 |
+
if not selected_docs:
|
147 |
+
return "No documents selected for deletion.", display_documents()
|
148 |
+
|
149 |
+
embed = get_embeddings()
|
150 |
+
database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
|
151 |
+
|
152 |
+
deleted_docs = []
|
153 |
+
docs_to_keep = []
|
154 |
+
for doc in database.docstore._dict.values():
|
155 |
+
if doc.metadata.get("source") not in selected_docs:
|
156 |
+
docs_to_keep.append(doc)
|
157 |
+
else:
|
158 |
+
deleted_docs.append(doc.metadata.get("source", "Unknown"))
|
159 |
+
|
160 |
+
# Print debugging information
|
161 |
+
logging.info(f"Total documents before deletion: {len(database.docstore._dict)}")
|
162 |
+
logging.info(f"Documents to keep: {len(docs_to_keep)}")
|
163 |
+
logging.info(f"Documents to delete: {len(deleted_docs)}")
|
164 |
+
|
165 |
+
if not docs_to_keep:
|
166 |
+
# If all documents are deleted, remove the FAISS database directory
|
167 |
+
if os.path.exists("faiss_database"):
|
168 |
+
shutil.rmtree("faiss_database")
|
169 |
+
logging.info("All documents deleted. Removed FAISS database directory.")
|
170 |
+
else:
|
171 |
+
# Create new FAISS index with remaining documents
|
172 |
+
new_database = FAISS.from_documents(docs_to_keep, embed)
|
173 |
+
new_database.save_local("faiss_database")
|
174 |
+
logging.info(f"Created new FAISS index with {len(docs_to_keep)} documents.")
|
175 |
+
|
176 |
+
# Update uploaded_documents list
|
177 |
+
uploaded_documents = [doc for doc in uploaded_documents if doc["name"] not in deleted_docs]
|
178 |
+
save_documents(uploaded_documents)
|
179 |
+
|
180 |
+
return f"Deleted documents: {', '.join(deleted_docs)}", display_documents()
|
181 |
+
|
182 |
def generate_chunked_response(prompt, model, max_tokens=10000, num_calls=3, temperature=0.2, should_stop=False):
|
183 |
print(f"Starting generate_chunked_response with {num_calls} calls")
|
184 |
full_response = ""
|
|
|
587 |
return gr.CheckboxGroup(
|
588 |
choices=[doc["name"] for doc in uploaded_documents],
|
589 |
value=[doc["name"] for doc in uploaded_documents if doc["selected"]],
|
590 |
+
label="Select documents to query or delete"
|
591 |
)
|
592 |
|
593 |
# Add this new function
|
|
|
674 |
refresh_button = gr.Button("Refresh Document List")
|
675 |
|
676 |
update_output = gr.Textbox(label="Update Status")
|
677 |
+
delete_button = gr.Button("Delete Selected Documents")
|
678 |
|
679 |
# Update both the output text and the document selector
|
680 |
update_button.click(update_vectors,
|
|
|
685 |
refresh_button.click(refresh_documents,
|
686 |
inputs=[],
|
687 |
outputs=[document_selector])
|
688 |
+
|
689 |
+
# Add the delete button functionality
|
690 |
+
delete_button.click(delete_documents,
|
691 |
+
inputs=[document_selector],
|
692 |
+
outputs=[update_output, document_selector])
|
693 |
|
694 |
gr.Markdown(
|
695 |
"""
|