Basic_RAG_AI_Chatbot_with_chatGPT

Sleeping

App Files Files Community

PCFISH commited on Nov 27, 2023

Commit

f3a69bb

•

1 Parent(s): fb78073

Update app.py

Browse files

Files changed (1) hide show

app.py +6 -27

app.py CHANGED Viewed

@@ -11,7 +11,6 @@ from langchain.chains import ConversationalRetrievalChain
 from htmlTemplates import css, bot_template, user_template
 from langchain.llms import HuggingFaceHub, LlamaCpp, CTransformers  # For loading transformer models.
 from langchain.document_loaders import PyPDFLoader, TextLoader, JSONLoader, CSVLoader
-from io import TextIOWrapper
 import tempfile # 임시 파일을 생성하기 위한 라이브러리입니다.
 import os
@@ -57,33 +56,13 @@ def get_json_file(docs):
 # 문서들을 처리하여 텍스트 청크로 나누는 함수입니다.
 def get_text_chunks(documents):
     text_splitter = RecursiveCharacterTextSplitter(
-        chunk_size=1000,
-        chunk_overlap=200,
-        length_function=len
     )
-    text_chunks = []
-    for doc in documents:
-        if isinstance(doc, str):
-            # If the document is a string, treat it as plain text
-            text_chunks.append(doc)
-        elif hasattr(doc, 'page_content'):
-            # If the document has a 'page_content' attribute, use it
-            text_chunks.append(doc.page_content)
-        else:
-            # Handle other types of documents as needed
-            # For example, if it's a list of strings, concatenate them
-            if isinstance(doc, list) and all(isinstance(item, str) for item in doc):
-                text_chunks.append(' '.join(doc))
-            else:
-                # Handle other cases based on the actual structure of your documents
-                raise ValueError(f"Unsupported document type: {type(doc)}")
-    # Split the text chunks
-    text_chunks = text_splitter.split_documents(text_chunks)
-    return text_chunks
 # 텍스트 청크들로부터 벡터 스토어를 생성하는 함수입니다.
@@ -183,4 +162,4 @@ def main():
 if __name__ == '__main__':
-    main()

 from htmlTemplates import css, bot_template, user_template
 from langchain.llms import HuggingFaceHub, LlamaCpp, CTransformers  # For loading transformer models.
 from langchain.document_loaders import PyPDFLoader, TextLoader, JSONLoader, CSVLoader
 import tempfile # 임시 파일을 생성하기 위한 라이브러리입니다.
 import os
 # 문서들을 처리하여 텍스트 청크로 나누는 함수입니다.
 def get_text_chunks(documents):
     text_splitter = RecursiveCharacterTextSplitter(
+        chunk_size=1000, # 청크의 크기를 지정합니다.
+        chunk_overlap=200, # 청크 사이의 중복을 지정합니다.
+        length_function=len # 텍스트의 길이를 측정하는 함수를 지정합니다.
     )
+    documents = text_splitter.split_documents(documents) # 문서들을 청크로 나눕니다
+    return documents # 나눈 청크를 반환합니다.
 # 텍스트 청크들로부터 벡터 스토어를 생성하는 함수입니다.
 if __name__ == '__main__':
+    main()