Spaces:
Sleeping
Sleeping
jet-taekyo
commited on
Commit
•
86aaf41
1
Parent(s):
45fec1b
modify text-splliting process
Browse files- app.py +38 -33
- langchain_wrappers/langchain_chat_models.py +3 -3
- langchain_wrappers/langchain_embedding_models.py +3 -3
- rag_prompts.py +14 -0
app.py
CHANGED
@@ -6,6 +6,8 @@ from langchain_core.vectorstores import VectorStoreRetriever
|
|
6 |
from langchain_openai import ChatOpenAI
|
7 |
from chainlit.types import AskFileResponse
|
8 |
from langchain_openai.embeddings import OpenAIEmbeddings
|
|
|
|
|
9 |
|
10 |
# Libraries to be used
|
11 |
from langchain_community.document_loaders.text import TextLoader
|
@@ -15,7 +17,8 @@ from langchain_core.prompts import ChatPromptTemplate
|
|
15 |
from langchain_wrappers.langchain_chat_models import MyChatOpenAI
|
16 |
from langchain_wrappers.langchain_embedding_models import MyOpenAIEmbeddings
|
17 |
from langchain_qdrant import QdrantVectorStore
|
18 |
-
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
|
|
|
19 |
import chainlit as cl
|
20 |
from dotenv import load_dotenv
|
21 |
|
@@ -24,26 +27,12 @@ from langchain.globals import set_llm_cache, get_llm_cache
|
|
24 |
from langchain_community.cache import InMemoryCache
|
25 |
set_llm_cache(InMemoryCache())
|
26 |
|
27 |
-
system_template = """\
|
28 |
-
Use the following context to answer a users question. If you cannot find the answer in the context, say you don't know the answer.\
|
29 |
-
|
30 |
-
Context:
|
31 |
-
{context}
|
32 |
-
"""
|
33 |
-
human_template = """\
|
34 |
-
Question:
|
35 |
-
{question}
|
36 |
-
"""
|
37 |
-
system_msg = ('system', system_template)
|
38 |
-
user_msg = ('human', human_template)
|
39 |
-
|
40 |
-
text_splitter = RecursiveCharacterTextSplitter()
|
41 |
-
|
42 |
|
|
|
43 |
load_dotenv()
|
44 |
|
45 |
-
|
46 |
-
def Get_RAG_pipeline(retriever: VectorStoreRetriever, llm: ChatOpenAI):
|
47 |
|
48 |
retriever = retriever.with_config({'run_name': 'RAG: Retriever'})
|
49 |
prompt = ChatPromptTemplate([system_msg, user_msg]).with_config({'run_name': 'RAG Step2: Prompt (Augmented)'})
|
@@ -66,25 +55,41 @@ def Get_RAG_pipeline(retriever: VectorStoreRetriever, llm: ChatOpenAI):
|
|
66 |
|
67 |
return RAG_chain
|
68 |
|
69 |
-
|
70 |
-
def process_text_file(file: AskFileResponse):
|
71 |
import tempfile
|
72 |
|
73 |
-
if file.name.endswith('.
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
|
87 |
documents = document_loader.load()
|
|
|
88 |
splitted_documents = [x.page_content for x in text_splitter.transform_documents(documents)]
|
89 |
|
90 |
return splitted_documents
|
|
|
6 |
from langchain_openai import ChatOpenAI
|
7 |
from chainlit.types import AskFileResponse
|
8 |
from langchain_openai.embeddings import OpenAIEmbeddings
|
9 |
+
from langchain_core.runnables import Runnable
|
10 |
+
from langchain_core.documents import Document
|
11 |
|
12 |
# Libraries to be used
|
13 |
from langchain_community.document_loaders.text import TextLoader
|
|
|
17 |
from langchain_wrappers.langchain_chat_models import MyChatOpenAI
|
18 |
from langchain_wrappers.langchain_embedding_models import MyOpenAIEmbeddings
|
19 |
from langchain_qdrant import QdrantVectorStore
|
20 |
+
from langchain_core.runnables import RunnablePassthrough, RunnableParallel, Runnable
|
21 |
+
from rag_prompts import system_msg, user_msg
|
22 |
import chainlit as cl
|
23 |
from dotenv import load_dotenv
|
24 |
|
|
|
27 |
from langchain_community.cache import InMemoryCache
|
28 |
set_llm_cache(InMemoryCache())
|
29 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
|
31 |
+
# Load the environment variables
|
32 |
load_dotenv()
|
33 |
|
34 |
+
# RAG chain
|
35 |
+
def Get_RAG_pipeline(retriever: VectorStoreRetriever, llm: ChatOpenAI)-> Runnable:
|
36 |
|
37 |
retriever = retriever.with_config({'run_name': 'RAG: Retriever'})
|
38 |
prompt = ChatPromptTemplate([system_msg, user_msg]).with_config({'run_name': 'RAG Step2: Prompt (Augmented)'})
|
|
|
55 |
|
56 |
return RAG_chain
|
57 |
|
58 |
+
# Split documents
|
59 |
+
def process_text_file(file: AskFileResponse)-> List[Document]:
|
60 |
import tempfile
|
61 |
|
62 |
+
if file.name.endswith('.txt'):
|
63 |
+
suffix = '.txt'
|
64 |
+
base_loader = TextLoader
|
65 |
+
elif file.name.endswith('.pdf'):
|
66 |
+
suffix = '.pdf'
|
67 |
+
base_loader = PyPDFLoader
|
68 |
+
|
69 |
+
with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix=suffix) as temp_file:
|
70 |
+
temp_file_path = temp_file.name
|
71 |
+
with open(temp_file_path, 'wb') as f:
|
72 |
+
f.write(file.content)
|
73 |
+
document_loader = base_loader(temp_file_path)
|
74 |
+
|
75 |
+
|
76 |
+
|
77 |
+
|
78 |
+
# if file.name.endswith('.pdf'):
|
79 |
+
# with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".pdf") as temp_file:
|
80 |
+
# temp_file_path = temp_file.name
|
81 |
+
# with open(temp_file_path, "wb") as f:
|
82 |
+
# f.write(file.content)
|
83 |
+
# document_loader = PyPDFLoader(temp_file_path)
|
84 |
+
# elif file.name.endswith('.txt'):
|
85 |
+
# with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".txt") as temp_file:
|
86 |
+
# temp_file_path = temp_file.name
|
87 |
+
# with open(temp_file_path, "wb") as f:
|
88 |
+
# f.write(file.content)
|
89 |
+
# document_loader = TextLoader(temp_file_path, autodetect_encoding=True)
|
90 |
|
91 |
documents = document_loader.load()
|
92 |
+
text_splitter = RecursiveCharacterTextSplitter()
|
93 |
splitted_documents = [x.page_content for x in text_splitter.transform_documents(documents)]
|
94 |
|
95 |
return splitted_documents
|
langchain_wrappers/langchain_chat_models.py
CHANGED
@@ -4,10 +4,10 @@ from typing import Optional
|
|
4 |
from langchain_openai import ChatOpenAI
|
5 |
|
6 |
|
7 |
-
|
8 |
-
|
9 |
|
10 |
-
load_dotenv()
|
11 |
class MyChatOpenAI:
|
12 |
@classmethod
|
13 |
def from_model(
|
|
|
4 |
from langchain_openai import ChatOpenAI
|
5 |
|
6 |
|
7 |
+
import inspect
|
8 |
+
load_dotenv(os.path.join(os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) , '.env'))
|
9 |
|
10 |
+
# load_dotenv()
|
11 |
class MyChatOpenAI:
|
12 |
@classmethod
|
13 |
def from_model(
|
langchain_wrappers/langchain_embedding_models.py
CHANGED
@@ -3,10 +3,10 @@ from dotenv import load_dotenv
|
|
3 |
from typing import Optional
|
4 |
from langchain_openai.embeddings import OpenAIEmbeddings
|
5 |
|
6 |
-
|
7 |
-
|
8 |
|
9 |
-
load_dotenv()
|
10 |
class MyOpenAIEmbeddings:
|
11 |
@classmethod
|
12 |
def from_model(
|
|
|
3 |
from typing import Optional
|
4 |
from langchain_openai.embeddings import OpenAIEmbeddings
|
5 |
|
6 |
+
import inspect
|
7 |
+
load_dotenv(os.path.join(os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) , '.env'))
|
8 |
|
9 |
+
# load_dotenv()
|
10 |
class MyOpenAIEmbeddings:
|
11 |
@classmethod
|
12 |
def from_model(
|
rag_prompts.py
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
system_template = """\
|
2 |
+
Use the following context to answer a users question. If you cannot find the answer in the context, say you don't know the answer.\
|
3 |
+
|
4 |
+
Context:
|
5 |
+
{context}
|
6 |
+
"""
|
7 |
+
system_msg = ('system', system_template)
|
8 |
+
|
9 |
+
|
10 |
+
human_template = """\
|
11 |
+
Question:
|
12 |
+
{question}
|
13 |
+
"""
|
14 |
+
user_msg = ('human', human_template)
|