Spaces:
Running
Running
Harikrishna Dev
commited on
Commit
•
16e69bb
1
Parent(s):
7d26e2e
Update app.py
Browse files
app.py
CHANGED
@@ -7,6 +7,7 @@ from langchain.document_loaders.csv_loader import CSVLoader
|
|
7 |
from langchain_community.document_loaders import PyPDFLoader
|
8 |
from langchain.vectorstores import FAISS
|
9 |
from langchain.prompts import load_prompt
|
|
|
10 |
from streamlit import session_state as ss
|
11 |
import firebase_admin
|
12 |
from firebase_admin import credentials
|
@@ -90,8 +91,13 @@ if os.path.exists(faiss_index):
|
|
90 |
vectors = FAISS.load_local(faiss_index, embeddings, allow_dangerous_deserialization=True)
|
91 |
else:
|
92 |
# Load data from PDF and CSV sources
|
|
|
|
|
|
|
|
|
|
|
93 |
pdf_loader = PyPDFLoader(pdf_source)
|
94 |
-
pdf_data = pdf_loader.load_and_split()
|
95 |
csv_loader = CSVLoader(file_path=data_source, encoding="utf-8")
|
96 |
csv_data = csv_loader.load()
|
97 |
data = pdf_data + csv_data
|
|
|
7 |
from langchain_community.document_loaders import PyPDFLoader
|
8 |
from langchain.vectorstores import FAISS
|
9 |
from langchain.prompts import load_prompt
|
10 |
+
from langchain.text_splitter import CharacterTextSplitter
|
11 |
from streamlit import session_state as ss
|
12 |
import firebase_admin
|
13 |
from firebase_admin import credentials
|
|
|
91 |
vectors = FAISS.load_local(faiss_index, embeddings, allow_dangerous_deserialization=True)
|
92 |
else:
|
93 |
# Load data from PDF and CSV sources
|
94 |
+
text_splitter = CharacterTextSplitter(
|
95 |
+
separator="\n",
|
96 |
+
chunk_size=400,
|
97 |
+
chunk_overlap=40
|
98 |
+
)
|
99 |
pdf_loader = PyPDFLoader(pdf_source)
|
100 |
+
pdf_data = pdf_loader.load_and_split(text_splitter=text_splitter)
|
101 |
csv_loader = CSVLoader(file_path=data_source, encoding="utf-8")
|
102 |
csv_data = csv_loader.load()
|
103 |
data = pdf_data + csv_data
|