Harikrishna Dev commited on
Commit
16e69bb
1 Parent(s): 7d26e2e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -1
app.py CHANGED
@@ -7,6 +7,7 @@ from langchain.document_loaders.csv_loader import CSVLoader
7
  from langchain_community.document_loaders import PyPDFLoader
8
  from langchain.vectorstores import FAISS
9
  from langchain.prompts import load_prompt
 
10
  from streamlit import session_state as ss
11
  import firebase_admin
12
  from firebase_admin import credentials
@@ -90,8 +91,13 @@ if os.path.exists(faiss_index):
90
  vectors = FAISS.load_local(faiss_index, embeddings, allow_dangerous_deserialization=True)
91
  else:
92
  # Load data from PDF and CSV sources
 
 
 
 
 
93
  pdf_loader = PyPDFLoader(pdf_source)
94
- pdf_data = pdf_loader.load_and_split()
95
  csv_loader = CSVLoader(file_path=data_source, encoding="utf-8")
96
  csv_data = csv_loader.load()
97
  data = pdf_data + csv_data
 
7
  from langchain_community.document_loaders import PyPDFLoader
8
  from langchain.vectorstores import FAISS
9
  from langchain.prompts import load_prompt
10
+ from langchain.text_splitter import CharacterTextSplitter
11
  from streamlit import session_state as ss
12
  import firebase_admin
13
  from firebase_admin import credentials
 
91
  vectors = FAISS.load_local(faiss_index, embeddings, allow_dangerous_deserialization=True)
92
  else:
93
  # Load data from PDF and CSV sources
94
+ text_splitter = CharacterTextSplitter(
95
+ separator="\n",
96
+ chunk_size=400,
97
+ chunk_overlap=40
98
+ )
99
  pdf_loader = PyPDFLoader(pdf_source)
100
+ pdf_data = pdf_loader.load_and_split(text_splitter=text_splitter)
101
  csv_loader = CSVLoader(file_path=data_source, encoding="utf-8")
102
  csv_data = csv_loader.load()
103
  data = pdf_data + csv_data