acpotts commited on
Commit
99cea4d
1 Parent(s): d9ff27e

Recursive character text splitter

Browse files
Files changed (2) hide show
  1. app.py +4 -3
  2. requirements.txt +2 -1
app.py CHANGED
@@ -12,6 +12,7 @@ from aimakerspace.vectordatabase import VectorDatabase
12
  from aimakerspace.openai_utils.chatmodel import ChatOpenAI
13
  import chainlit as cl
14
  from PyPDF2 import PdfReader
 
15
 
16
  system_template = """\
17
  Use the following context to answer a users question. If you cannot find the answer in the context, say you don't know the answer."""
@@ -48,7 +49,7 @@ class RetrievalAugmentedQAPipeline:
48
 
49
  return {"response": generate_response(), "context": context_list}
50
 
51
- text_splitter = CharacterTextSplitter()
52
 
53
 
54
  def process_text_file(file: AskFileResponse):
@@ -69,8 +70,8 @@ def process_text_file(file: AskFileResponse):
69
  documents = pdf_loader.load()
70
  else:
71
  raise ValueError("Provide a .txt or .pdf file")
72
- texts = text_splitter.split_texts(documents)
73
-
74
  return texts
75
 
76
 
 
12
  from aimakerspace.openai_utils.chatmodel import ChatOpenAI
13
  import chainlit as cl
14
  from PyPDF2 import PdfReader
15
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
16
 
17
  system_template = """\
18
  Use the following context to answer a users question. If you cannot find the answer in the context, say you don't know the answer."""
 
49
 
50
  return {"response": generate_response(), "context": context_list}
51
 
52
+ text_splitter = RecursiveCharacterTextSplitter()
53
 
54
 
55
  def process_text_file(file: AskFileResponse):
 
70
  documents = pdf_loader.load()
71
  else:
72
  raise ValueError("Provide a .txt or .pdf file")
73
+ # texts = text_splitter.split_texts(documents)
74
+ texts = [x.page_content for x in text_splitter.transform_documents(documents)]
75
  return texts
76
 
77
 
requirements.txt CHANGED
@@ -3,4 +3,5 @@ chainlit==0.7.700
3
  openai
4
  PyPDF2
5
  langchain-community
6
- pypdf
 
 
3
  openai
4
  PyPDF2
5
  langchain-community
6
+ pypdf
7
+ langchain-text-splitters