File size: 4,803 Bytes
234eac0
ece0f5f
 
234eac0
ece0f5f
 
234eac0
ece0f5f
 
 
 
 
 
 
 
 
 
 
234eac0
ece0f5f
 
 
 
 
 
234eac0
 
ece0f5f
234eac0
 
ece0f5f
 
 
234eac0
 
 
ece0f5f
 
234eac0
ece0f5f
234eac0
 
ece0f5f
234eac0
ece0f5f
 
 
 
 
 
 
 
 
 
 
 
 
234eac0
ece0f5f
 
 
 
 
 
 
234eac0
ece0f5f
234eac0
 
 
 
 
ece0f5f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234eac0
 
 
 
 
 
 
 
 
 
ece0f5f
 
234eac0
 
 
 
ece0f5f
234eac0
 
 
 
 
 
 
 
 
 
 
ece0f5f
 
 
234eac0
 
 
ece0f5f
 
 
234eac0
 
 
ece0f5f
234eac0
 
ece0f5f
 
 
234eac0
 
 
 
ece0f5f
 
234eac0
 
 
ece0f5f
 
234eac0
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
import os

# For type hints
from typing import List
from langchain_core.vectorstores import VectorStoreRetriever
from langchain_openai import ChatOpenAI
from chainlit.types import AskFileResponse
from langchain_openai.embeddings import OpenAIEmbeddings

# Libraries to be used
from langchain_community.document_loaders.text import TextLoader
from langchain_community.document_loaders.pdf import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.prompts import ChatPromptTemplate
from langchain_wrappers.langchain_chat_models import MyChatOpenAI
from langchain_wrappers.langchain_embedding_models import MyOpenAIEmbeddings
from langchain_qdrant import QdrantVectorStore
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
import chainlit as cl
from dotenv import load_dotenv

# Cache
from langchain.globals import set_llm_cache, get_llm_cache 
from langchain_community.cache import InMemoryCache
set_llm_cache(InMemoryCache())

system_template = """\
Use the following context to answer a users question. If you cannot find the answer in the context, say you don't know the answer.\

Context:
{context}    
"""
human_template = """\
Question:
{question}
"""
system_msg = ('system', system_template)
user_msg = ('human', human_template)

text_splitter = RecursiveCharacterTextSplitter()


load_dotenv()

### RAG chain
def Get_RAG_pipeline(retriever: VectorStoreRetriever, llm: ChatOpenAI):
    
    retriever = retriever.with_config({'run_name': 'RAG: Retriever'})
    prompt = ChatPromptTemplate([system_msg, user_msg]).with_config({'run_name': 'RAG Step2: Prompt (Augmented)'})
    llm = llm.with_config({'run_name': 'RAG Step3: LLM (Generation)'})
    
    def get_context(relevant_docs: List):
        context = ""
        for doc in relevant_docs:
            context += doc.page_content + "\n"
        return context
    

    RAG_chain =  RunnableParallel(
        relevant_docs = retriever,
        question = lambda x: x 
    ).with_config({'run_name':'RAG Step1-1: Get relevant docs (Retrieval)'}) | RunnablePassthrough.assign(
        context = lambda x: get_context(x['relevant_docs'])
    ).with_config({'run_name':'RAG Step1-2: Get context (Retrieval)'}) | prompt | llm
    RAG_chain = RAG_chain.with_config({'run_name':'RAG pipeline'})

    return RAG_chain


def process_text_file(file: AskFileResponse):
    import tempfile

    if file.name.endswith('.pdf'):
        print("PDF file detected")
        with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".pdf") as temp_file:
            temp_file_path = temp_file.name
        with open(temp_file_path, "wb") as f:
            f.write(file.content)
        document_loader = PyPDFLoader(temp_file_path)
    elif file.name.endswith('.txt'):
        with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".txt") as temp_file:
            temp_file_path = temp_file.name
        with open(temp_file_path, "wb") as f:
            f.write(file.content)
        document_loader = TextLoader(temp_file_path, autodetect_encoding=True)
       
    documents = document_loader.load()
    splitted_documents = [x.page_content for x in text_splitter.transform_documents(documents)]
   
    return splitted_documents


@cl.on_chat_start
async def on_chat_start():
    files = None

    # Wait for the user to upload a file
    while files == None:
        files = await cl.AskFileMessage(
            content="Please upload a Text File file to begin!",
            accept=["text/plain", "application/pdf"],  
            max_size_mb=5,
            timeout=180,
        ).send()


    file = files[0] 
    msg = cl.Message(
        content=f"Processing `{file.name}`...", disable_human_feedback=True
    )
    await msg.send()

    # load the file
    texts = process_text_file(file)

    print(f"Processing {len(texts)} text chunks")

    # Create a dict vector store
    vector_db = await QdrantVectorStore.afrom_texts(
        texts, MyOpenAIEmbeddings.from_model('small'), location=":memory:", collection_name="texts"
    )
    

    # Create a chain
    RAG_chain = Get_RAG_pipeline(
        retriever=vector_db.as_retriever(search_kwargs = {'k':3}),
        llm=MyChatOpenAI.from_model()
    )
    
    # Let the user know that the system is ready
    msg.content = f"Processing `{file.name}` done ({len(texts)} chunks in total). You can now ask questions!"
    await msg.update()

    cl.user_session.set("chain", RAG_chain)




@cl.on_message
async def main(message):
    os.environ['LANGSMITH_PROJECT'] = os.getenv('LANGCHAIN_PROJECT')

    chain = cl.user_session.get("chain")

    msg = cl.Message(content="")
    async for stream_resp in chain.astream(message.content):
        await msg.stream_token(stream_resp.content)

    await msg.send()