Shreyas094 commited on
Commit
c1bd83b
1 Parent(s): fcbb7d1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -11
app.py CHANGED
@@ -8,6 +8,8 @@ from typing import List
8
  from pydantic import BaseModel, Field
9
  from tempfile import NamedTemporaryFile
10
  from langchain_community.vectorstores import FAISS
 
 
11
  from langchain_community.document_loaders import PyPDFLoader
12
  from langchain_community.embeddings import HuggingFaceEmbeddings
13
  from llama_parse import LlamaParse
@@ -463,6 +465,10 @@ After writing the document, please provide a list of sources used in your respon
463
  main_content += chunk
464
  yield main_content, "" # Yield partial main content without sources
465
 
 
 
 
 
466
  def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=0.2):
467
  logging.info(f"Entering get_response_from_pdf with query: {query}, model: {model}, selected_docs: {selected_docs}")
468
 
@@ -475,25 +481,32 @@ def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=
475
  yield "No documents available. Please upload PDF documents to answer questions."
476
  return
477
 
478
- retriever = database.as_retriever()
479
- logging.info(f"Retrieving relevant documents for query: {query}")
480
- relevant_docs = retriever.get_relevant_documents(query)
481
- logging.info(f"Number of relevant documents retrieved: {len(relevant_docs)}")
482
-
483
- # Filter relevant_docs based on selected documents
484
- filtered_docs = [doc for doc in relevant_docs if doc.metadata["source"] in selected_docs]
485
- logging.info(f"Number of filtered documents: {len(filtered_docs)}")
486
 
 
 
487
  if not filtered_docs:
488
- logging.warning(f"No relevant information found in the selected documents: {selected_docs}")
489
  yield "No relevant information found in the selected documents. Please try selecting different documents or rephrasing your query."
490
  return
491
 
492
- for doc in filtered_docs:
 
 
 
 
 
 
 
 
493
  logging.info(f"Document source: {doc.metadata['source']}")
494
  logging.info(f"Document content preview: {doc.page_content[:100]}...") # Log first 100 characters of each document
495
 
496
- context_str = "\n".join([doc.page_content for doc in filtered_docs])
497
  logging.info(f"Total context length: {len(context_str)}")
498
 
499
  if model == "@cf/meta/llama-3.1-8b-instruct":
 
8
  from pydantic import BaseModel, Field
9
  from tempfile import NamedTemporaryFile
10
  from langchain_community.vectorstores import FAISS
11
+ from langchain_core.vectorstores import VectorStore
12
+ from langchain_core.documents import Document
13
  from langchain_community.document_loaders import PyPDFLoader
14
  from langchain_community.embeddings import HuggingFaceEmbeddings
15
  from llama_parse import LlamaParse
 
465
  main_content += chunk
466
  yield main_content, "" # Yield partial main content without sources
467
 
468
+ from langchain_community.vectorstores import FAISS
469
+ from langchain_core.vectorstores import VectorStore
470
+ from langchain_core.documents import Document
471
+
472
  def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=0.2):
473
  logging.info(f"Entering get_response_from_pdf with query: {query}, model: {model}, selected_docs: {selected_docs}")
474
 
 
481
  yield "No documents available. Please upload PDF documents to answer questions."
482
  return
483
 
484
+ # Pre-filter the documents
485
+ filtered_docs = []
486
+ for doc_id, doc in database.docstore._dict.items():
487
+ if isinstance(doc, Document) and doc.metadata.get("source") in selected_docs:
488
+ filtered_docs.append(doc)
 
 
 
489
 
490
+ logging.info(f"Number of documents after pre-filtering: {len(filtered_docs)}")
491
+
492
  if not filtered_docs:
493
+ logging.warning(f"No documents found for the selected sources: {selected_docs}")
494
  yield "No relevant information found in the selected documents. Please try selecting different documents or rephrasing your query."
495
  return
496
 
497
+ # Create a new FAISS index with only the selected documents
498
+ filtered_db = FAISS.from_documents(filtered_docs, embed)
499
+
500
+ retriever = filtered_db.as_retriever(search_kwargs={"k": 10})
501
+ logging.info(f"Retrieving relevant documents for query: {query}")
502
+ relevant_docs = retriever.get_relevant_documents(query)
503
+ logging.info(f"Number of relevant documents retrieved: {len(relevant_docs)}")
504
+
505
+ for doc in relevant_docs:
506
  logging.info(f"Document source: {doc.metadata['source']}")
507
  logging.info(f"Document content preview: {doc.page_content[:100]}...") # Log first 100 characters of each document
508
 
509
+ context_str = "\n".join([doc.page_content for doc in relevant_docs])
510
  logging.info(f"Total context length: {len(context_str)}")
511
 
512
  if model == "@cf/meta/llama-3.1-8b-instruct":