Shreyas094
commited on
Commit
•
c1bd83b
1
Parent(s):
fcbb7d1
Update app.py
Browse files
app.py
CHANGED
@@ -8,6 +8,8 @@ from typing import List
|
|
8 |
from pydantic import BaseModel, Field
|
9 |
from tempfile import NamedTemporaryFile
|
10 |
from langchain_community.vectorstores import FAISS
|
|
|
|
|
11 |
from langchain_community.document_loaders import PyPDFLoader
|
12 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
13 |
from llama_parse import LlamaParse
|
@@ -463,6 +465,10 @@ After writing the document, please provide a list of sources used in your respon
|
|
463 |
main_content += chunk
|
464 |
yield main_content, "" # Yield partial main content without sources
|
465 |
|
|
|
|
|
|
|
|
|
466 |
def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=0.2):
|
467 |
logging.info(f"Entering get_response_from_pdf with query: {query}, model: {model}, selected_docs: {selected_docs}")
|
468 |
|
@@ -475,25 +481,32 @@ def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=
|
|
475 |
yield "No documents available. Please upload PDF documents to answer questions."
|
476 |
return
|
477 |
|
478 |
-
|
479 |
-
|
480 |
-
|
481 |
-
|
482 |
-
|
483 |
-
# Filter relevant_docs based on selected documents
|
484 |
-
filtered_docs = [doc for doc in relevant_docs if doc.metadata["source"] in selected_docs]
|
485 |
-
logging.info(f"Number of filtered documents: {len(filtered_docs)}")
|
486 |
|
|
|
|
|
487 |
if not filtered_docs:
|
488 |
-
logging.warning(f"No
|
489 |
yield "No relevant information found in the selected documents. Please try selecting different documents or rephrasing your query."
|
490 |
return
|
491 |
|
492 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
493 |
logging.info(f"Document source: {doc.metadata['source']}")
|
494 |
logging.info(f"Document content preview: {doc.page_content[:100]}...") # Log first 100 characters of each document
|
495 |
|
496 |
-
context_str = "\n".join([doc.page_content for doc in
|
497 |
logging.info(f"Total context length: {len(context_str)}")
|
498 |
|
499 |
if model == "@cf/meta/llama-3.1-8b-instruct":
|
|
|
8 |
from pydantic import BaseModel, Field
|
9 |
from tempfile import NamedTemporaryFile
|
10 |
from langchain_community.vectorstores import FAISS
|
11 |
+
from langchain_core.vectorstores import VectorStore
|
12 |
+
from langchain_core.documents import Document
|
13 |
from langchain_community.document_loaders import PyPDFLoader
|
14 |
from langchain_community.embeddings import HuggingFaceEmbeddings
|
15 |
from llama_parse import LlamaParse
|
|
|
465 |
main_content += chunk
|
466 |
yield main_content, "" # Yield partial main content without sources
|
467 |
|
468 |
+
from langchain_community.vectorstores import FAISS
|
469 |
+
from langchain_core.vectorstores import VectorStore
|
470 |
+
from langchain_core.documents import Document
|
471 |
+
|
472 |
def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=0.2):
|
473 |
logging.info(f"Entering get_response_from_pdf with query: {query}, model: {model}, selected_docs: {selected_docs}")
|
474 |
|
|
|
481 |
yield "No documents available. Please upload PDF documents to answer questions."
|
482 |
return
|
483 |
|
484 |
+
# Pre-filter the documents
|
485 |
+
filtered_docs = []
|
486 |
+
for doc_id, doc in database.docstore._dict.items():
|
487 |
+
if isinstance(doc, Document) and doc.metadata.get("source") in selected_docs:
|
488 |
+
filtered_docs.append(doc)
|
|
|
|
|
|
|
489 |
|
490 |
+
logging.info(f"Number of documents after pre-filtering: {len(filtered_docs)}")
|
491 |
+
|
492 |
if not filtered_docs:
|
493 |
+
logging.warning(f"No documents found for the selected sources: {selected_docs}")
|
494 |
yield "No relevant information found in the selected documents. Please try selecting different documents or rephrasing your query."
|
495 |
return
|
496 |
|
497 |
+
# Create a new FAISS index with only the selected documents
|
498 |
+
filtered_db = FAISS.from_documents(filtered_docs, embed)
|
499 |
+
|
500 |
+
retriever = filtered_db.as_retriever(search_kwargs={"k": 10})
|
501 |
+
logging.info(f"Retrieving relevant documents for query: {query}")
|
502 |
+
relevant_docs = retriever.get_relevant_documents(query)
|
503 |
+
logging.info(f"Number of relevant documents retrieved: {len(relevant_docs)}")
|
504 |
+
|
505 |
+
for doc in relevant_docs:
|
506 |
logging.info(f"Document source: {doc.metadata['source']}")
|
507 |
logging.info(f"Document content preview: {doc.page_content[:100]}...") # Log first 100 characters of each document
|
508 |
|
509 |
+
context_str = "\n".join([doc.page_content for doc in relevant_docs])
|
510 |
logging.info(f"Total context length: {len(context_str)}")
|
511 |
|
512 |
if model == "@cf/meta/llama-3.1-8b-instruct":
|