Spaces:
Sleeping
Sleeping
File size: 4,803 Bytes
234eac0 ece0f5f 234eac0 ece0f5f 234eac0 ece0f5f 234eac0 ece0f5f 234eac0 ece0f5f 234eac0 ece0f5f 234eac0 ece0f5f 234eac0 ece0f5f 234eac0 ece0f5f 234eac0 ece0f5f 234eac0 ece0f5f 234eac0 ece0f5f 234eac0 ece0f5f 234eac0 ece0f5f 234eac0 ece0f5f 234eac0 ece0f5f 234eac0 ece0f5f 234eac0 ece0f5f 234eac0 ece0f5f 234eac0 ece0f5f 234eac0 ece0f5f 234eac0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 |
import os
# For type hints
from typing import List
from langchain_core.vectorstores import VectorStoreRetriever
from langchain_openai import ChatOpenAI
from chainlit.types import AskFileResponse
from langchain_openai.embeddings import OpenAIEmbeddings
# Libraries to be used
from langchain_community.document_loaders.text import TextLoader
from langchain_community.document_loaders.pdf import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.prompts import ChatPromptTemplate
from langchain_wrappers.langchain_chat_models import MyChatOpenAI
from langchain_wrappers.langchain_embedding_models import MyOpenAIEmbeddings
from langchain_qdrant import QdrantVectorStore
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
import chainlit as cl
from dotenv import load_dotenv
# Cache
from langchain.globals import set_llm_cache, get_llm_cache
from langchain_community.cache import InMemoryCache
set_llm_cache(InMemoryCache())
system_template = """\
Use the following context to answer a users question. If you cannot find the answer in the context, say you don't know the answer.\
Context:
{context}
"""
human_template = """\
Question:
{question}
"""
system_msg = ('system', system_template)
user_msg = ('human', human_template)
text_splitter = RecursiveCharacterTextSplitter()
load_dotenv()
### RAG chain
def Get_RAG_pipeline(retriever: VectorStoreRetriever, llm: ChatOpenAI):
retriever = retriever.with_config({'run_name': 'RAG: Retriever'})
prompt = ChatPromptTemplate([system_msg, user_msg]).with_config({'run_name': 'RAG Step2: Prompt (Augmented)'})
llm = llm.with_config({'run_name': 'RAG Step3: LLM (Generation)'})
def get_context(relevant_docs: List):
context = ""
for doc in relevant_docs:
context += doc.page_content + "\n"
return context
RAG_chain = RunnableParallel(
relevant_docs = retriever,
question = lambda x: x
).with_config({'run_name':'RAG Step1-1: Get relevant docs (Retrieval)'}) | RunnablePassthrough.assign(
context = lambda x: get_context(x['relevant_docs'])
).with_config({'run_name':'RAG Step1-2: Get context (Retrieval)'}) | prompt | llm
RAG_chain = RAG_chain.with_config({'run_name':'RAG pipeline'})
return RAG_chain
def process_text_file(file: AskFileResponse):
import tempfile
if file.name.endswith('.pdf'):
print("PDF file detected")
with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".pdf") as temp_file:
temp_file_path = temp_file.name
with open(temp_file_path, "wb") as f:
f.write(file.content)
document_loader = PyPDFLoader(temp_file_path)
elif file.name.endswith('.txt'):
with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".txt") as temp_file:
temp_file_path = temp_file.name
with open(temp_file_path, "wb") as f:
f.write(file.content)
document_loader = TextLoader(temp_file_path, autodetect_encoding=True)
documents = document_loader.load()
splitted_documents = [x.page_content for x in text_splitter.transform_documents(documents)]
return splitted_documents
@cl.on_chat_start
async def on_chat_start():
files = None
# Wait for the user to upload a file
while files == None:
files = await cl.AskFileMessage(
content="Please upload a Text File file to begin!",
accept=["text/plain", "application/pdf"],
max_size_mb=5,
timeout=180,
).send()
file = files[0]
msg = cl.Message(
content=f"Processing `{file.name}`...", disable_human_feedback=True
)
await msg.send()
# load the file
texts = process_text_file(file)
print(f"Processing {len(texts)} text chunks")
# Create a dict vector store
vector_db = await QdrantVectorStore.afrom_texts(
texts, MyOpenAIEmbeddings.from_model('small'), location=":memory:", collection_name="texts"
)
# Create a chain
RAG_chain = Get_RAG_pipeline(
retriever=vector_db.as_retriever(search_kwargs = {'k':3}),
llm=MyChatOpenAI.from_model()
)
# Let the user know that the system is ready
msg.content = f"Processing `{file.name}` done ({len(texts)} chunks in total). You can now ask questions!"
await msg.update()
cl.user_session.set("chain", RAG_chain)
@cl.on_message
async def main(message):
os.environ['LANGSMITH_PROJECT'] = os.getenv('LANGCHAIN_PROJECT')
chain = cl.user_session.get("chain")
msg = cl.Message(content="")
async for stream_resp in chain.astream(message.content):
await msg.stream_token(stream_resp.content)
await msg.send() |