Recursive character text splitter
Browse files- app.py +4 -3
- requirements.txt +2 -1
app.py
CHANGED
@@ -12,6 +12,7 @@ from aimakerspace.vectordatabase import VectorDatabase
|
|
12 |
from aimakerspace.openai_utils.chatmodel import ChatOpenAI
|
13 |
import chainlit as cl
|
14 |
from PyPDF2 import PdfReader
|
|
|
15 |
|
16 |
system_template = """\
|
17 |
Use the following context to answer a users question. If you cannot find the answer in the context, say you don't know the answer."""
|
@@ -48,7 +49,7 @@ class RetrievalAugmentedQAPipeline:
|
|
48 |
|
49 |
return {"response": generate_response(), "context": context_list}
|
50 |
|
51 |
-
text_splitter =
|
52 |
|
53 |
|
54 |
def process_text_file(file: AskFileResponse):
|
@@ -69,8 +70,8 @@ def process_text_file(file: AskFileResponse):
|
|
69 |
documents = pdf_loader.load()
|
70 |
else:
|
71 |
raise ValueError("Provide a .txt or .pdf file")
|
72 |
-
texts = text_splitter.split_texts(documents)
|
73 |
-
|
74 |
return texts
|
75 |
|
76 |
|
|
|
12 |
from aimakerspace.openai_utils.chatmodel import ChatOpenAI
|
13 |
import chainlit as cl
|
14 |
from PyPDF2 import PdfReader
|
15 |
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
16 |
|
17 |
system_template = """\
|
18 |
Use the following context to answer a users question. If you cannot find the answer in the context, say you don't know the answer."""
|
|
|
49 |
|
50 |
return {"response": generate_response(), "context": context_list}
|
51 |
|
52 |
+
text_splitter = RecursiveCharacterTextSplitter()
|
53 |
|
54 |
|
55 |
def process_text_file(file: AskFileResponse):
|
|
|
70 |
documents = pdf_loader.load()
|
71 |
else:
|
72 |
raise ValueError("Provide a .txt or .pdf file")
|
73 |
+
# texts = text_splitter.split_texts(documents)
|
74 |
+
texts = [x.page_content for x in text_splitter.transform_documents(documents)]
|
75 |
return texts
|
76 |
|
77 |
|
requirements.txt
CHANGED
@@ -3,4 +3,5 @@ chainlit==0.7.700
|
|
3 |
openai
|
4 |
PyPDF2
|
5 |
langchain-community
|
6 |
-
pypdf
|
|
|
|
3 |
openai
|
4 |
PyPDF2
|
5 |
langchain-community
|
6 |
+
pypdf
|
7 |
+
langchain-text-splitters
|