Spaces:

jet-taekyo
/

AIE4-Class3-RAG

Sleeping

App Files Files Community

jet-taekyo commited on Aug 25

Commit

86aaf41

•

1 Parent(s): 45fec1b

modify text-splliting process

Browse files

Files changed (4) hide show

app.py +38 -33
langchain_wrappers/langchain_chat_models.py +3 -3
langchain_wrappers/langchain_embedding_models.py +3 -3
rag_prompts.py +14 -0

app.py CHANGED Viewed

@@ -6,6 +6,8 @@ from langchain_core.vectorstores import VectorStoreRetriever
 from langchain_openai import ChatOpenAI
 from chainlit.types import AskFileResponse
 from langchain_openai.embeddings import OpenAIEmbeddings
 # Libraries to be used
 from langchain_community.document_loaders.text import TextLoader
@@ -15,7 +17,8 @@ from langchain_core.prompts import ChatPromptTemplate
 from langchain_wrappers.langchain_chat_models import MyChatOpenAI
 from langchain_wrappers.langchain_embedding_models import MyOpenAIEmbeddings
 from langchain_qdrant import QdrantVectorStore
-from langchain_core.runnables import RunnablePassthrough, RunnableParallel
 import chainlit as cl
 from dotenv import load_dotenv
@@ -24,26 +27,12 @@ from langchain.globals import set_llm_cache, get_llm_cache
 from langchain_community.cache import InMemoryCache
 set_llm_cache(InMemoryCache())
-system_template = """\
-Use the following context to answer a users question. If you cannot find the answer in the context, say you don't know the answer.\
-Context:
-{context}
-"""
-human_template = """\
-Question:
-{question}
-"""
-system_msg = ('system', system_template)
-user_msg = ('human', human_template)
-text_splitter = RecursiveCharacterTextSplitter()
 load_dotenv()
-### RAG chain
-def Get_RAG_pipeline(retriever: VectorStoreRetriever, llm: ChatOpenAI):
     retriever = retriever.with_config({'run_name': 'RAG: Retriever'})
     prompt = ChatPromptTemplate([system_msg, user_msg]).with_config({'run_name': 'RAG Step2: Prompt (Augmented)'})
@@ -66,25 +55,41 @@ def Get_RAG_pipeline(retriever: VectorStoreRetriever, llm: ChatOpenAI):
     return RAG_chain
-def process_text_file(file: AskFileResponse):
     import tempfile
-    if file.name.endswith('.pdf'):
-        print("PDF file detected")
-        with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".pdf") as temp_file:
-            temp_file_path = temp_file.name
-        with open(temp_file_path, "wb") as f:
-            f.write(file.content)
-        document_loader = PyPDFLoader(temp_file_path)
-    elif file.name.endswith('.txt'):
-        with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".txt") as temp_file:
-            temp_file_path = temp_file.name
-        with open(temp_file_path, "wb") as f:
-            f.write(file.content)
-        document_loader = TextLoader(temp_file_path, autodetect_encoding=True)
     documents = document_loader.load()
     splitted_documents = [x.page_content for x in text_splitter.transform_documents(documents)]
     return splitted_documents

 from langchain_openai import ChatOpenAI
 from chainlit.types import AskFileResponse
 from langchain_openai.embeddings import OpenAIEmbeddings
+from langchain_core.runnables import Runnable
+from langchain_core.documents import Document
 # Libraries to be used
 from langchain_community.document_loaders.text import TextLoader
 from langchain_wrappers.langchain_chat_models import MyChatOpenAI
 from langchain_wrappers.langchain_embedding_models import MyOpenAIEmbeddings
 from langchain_qdrant import QdrantVectorStore
+from langchain_core.runnables import RunnablePassthrough, RunnableParallel, Runnable
+from rag_prompts import system_msg, user_msg
 import chainlit as cl
 from dotenv import load_dotenv
 from langchain_community.cache import InMemoryCache
 set_llm_cache(InMemoryCache())
+# Load the environment variables
 load_dotenv()
+# RAG chain
+def Get_RAG_pipeline(retriever: VectorStoreRetriever, llm: ChatOpenAI)-> Runnable:
     retriever = retriever.with_config({'run_name': 'RAG: Retriever'})
     prompt = ChatPromptTemplate([system_msg, user_msg]).with_config({'run_name': 'RAG Step2: Prompt (Augmented)'})
     return RAG_chain
+# Split documents
+def process_text_file(file: AskFileResponse)-> List[Document]:
     import tempfile
+    if file.name.endswith('.txt'):
+        suffix = '.txt'
+        base_loader = TextLoader
+    elif file.name.endswith('.pdf'):
+        suffix = '.pdf'
+        base_loader = PyPDFLoader
+    with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix=suffix) as temp_file:
+        temp_file_path = temp_file.name
+    with open(temp_file_path, 'wb') as f:
+        f.write(file.content)
+    document_loader = base_loader(temp_file_path)
+    # if file.name.endswith('.pdf'):
+    #     with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".pdf") as temp_file:
+    #         temp_file_path = temp_file.name
+    #     with open(temp_file_path, "wb") as f:
+    #         f.write(file.content)
+    #     document_loader = PyPDFLoader(temp_file_path)
+    # elif file.name.endswith('.txt'):
+    #     with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".txt") as temp_file:
+    #         temp_file_path = temp_file.name
+    #     with open(temp_file_path, "wb") as f:
+    #         f.write(file.content)
+    #     document_loader = TextLoader(temp_file_path, autodetect_encoding=True)
     documents = document_loader.load()
+    text_splitter = RecursiveCharacterTextSplitter()
     splitted_documents = [x.page_content for x in text_splitter.transform_documents(documents)]
     return splitted_documents

langchain_wrappers/langchain_chat_models.py CHANGED Viewed

@@ -4,10 +4,10 @@ from typing import Optional
 from langchain_openai import ChatOpenAI
-# import inspect
-# load_dotenv(os.path.join(os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) , '.env'))
-load_dotenv()
 class MyChatOpenAI:
     @classmethod
     def from_model(

 from langchain_openai import ChatOpenAI
+import inspect
+load_dotenv(os.path.join(os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) , '.env'))
+# load_dotenv()
 class MyChatOpenAI:
     @classmethod
     def from_model(

langchain_wrappers/langchain_embedding_models.py CHANGED Viewed

@@ -3,10 +3,10 @@ from dotenv import load_dotenv
 from typing import Optional
 from langchain_openai.embeddings import OpenAIEmbeddings
-# import inspect
-# load_dotenv(os.path.join(os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) , '.env'))
-load_dotenv()
 class MyOpenAIEmbeddings:
     @classmethod
     def from_model(

 from typing import Optional
 from langchain_openai.embeddings import OpenAIEmbeddings
+import inspect
+load_dotenv(os.path.join(os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) , '.env'))
+# load_dotenv()
 class MyOpenAIEmbeddings:
     @classmethod
     def from_model(

rag_prompts.py ADDED Viewed

	@@ -0,0 +1,14 @@

+system_template = """\
+Use the following context to answer a users question. If you cannot find the answer in the context, say you don't know the answer.\
+Context:
+{context}
+"""
+system_msg = ('system', system_template)
+human_template = """\
+Question:
+{question}
+"""
+user_msg = ('human', human_template)