|
from langchain.text_splitter import RecursiveCharacterTextSplitter |
|
from langchain_community.document_loaders import TextLoader |
|
from langchain.schema.runnable import RunnablePassthrough |
|
from langchain.schema.output_parser import StrOutputParser |
|
from langchain_pinecone import PineconeVectorStore |
|
from langchain.prompts import MessagesPlaceholder, ChatPromptTemplate |
|
from langchain_google_genai import GoogleGenerativeAI, GoogleGenerativeAIEmbeddings |
|
from dotenv import load_dotenv, find_dotenv |
|
import os |
|
from pinecone import Pinecone, PodSpec |
|
|
|
load_dotenv(find_dotenv()) |
|
|
|
class ChatbotMemory(): |
|
|
|
loader = TextLoader('dataset.txt', autodetect_encoding=True) |
|
documents = loader.load() |
|
text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=103) |
|
docs = text_splitter.split_documents(documents) |
|
|
|
embeddings = GoogleGenerativeAIEmbeddings( |
|
model="models/embedding-001", task_type="retrieval_query", google_api_key=os.getenv("GEMINI_API_KEY")) |
|
|
|
pinecone = Pinecone( |
|
api_key=os.environ.get("PINECONE_API_KEY") |
|
) |
|
|
|
index_name = "gdscsou-chatbot" |
|
|
|
if index_name not in pinecone.list_indexes().names(): |
|
pinecone.create_index(name=index_name, metric="cosine", dimension=768, spec=PodSpec(environment="gcp-starter")) |
|
docsearch = PineconeVectorStore.from_documents(docs, embeddings, index_name=index_name) |
|
else: |
|
docsearch = PineconeVectorStore.from_existing_index(index_name, embeddings) |
|
|
|
|
|
llm = GoogleGenerativeAI(model="gemini-pro", google_api_key=os.getenv("GEMINI_API_KEY")) |
|
|
|
def contextualized_question(input: dict): |
|
if input.get("chat_history"): |
|
llm = GoogleGenerativeAI(model="gemini-pro", google_api_key=os.getenv("GEMINI_API_KEY")) |
|
|
|
contextualize_q_system_prompt = """Given a chat history and the latest user question \ |
|
which might reference context in the chat history, formulate a standalone question \ |
|
which can be understood without the chat history. Do NOT answer the question, \ |
|
just reformulate it if needed and otherwise return it as is.""" |
|
|
|
contextualize_q_prompt = ChatPromptTemplate.from_messages( |
|
[ |
|
("system", contextualize_q_system_prompt), |
|
MessagesPlaceholder(variable_name="chat_history"), |
|
("human", "{question}"), |
|
] |
|
) |
|
|
|
contextualize_q_chain = contextualize_q_prompt | llm | StrOutputParser() |
|
return contextualize_q_chain |
|
else: |
|
return input["question"] |
|
|
|
template = """ |
|
INSTRUCTION: Act as an AI assistant and sales and marketing expert for Katha - A Tale of Weaves, a brand that bring exclusive collection of sustainable handloom sarees that tells a story of its heritage, this is conversation \ |
|
to a customer who came to visit site. Use the CONTEXT to answer in a helpful manner to the QUESTION. \ |
|
Don't forget you are an AI assistant and sales and marketing expert for Katha - A Tale of Weaves. \ |
|
If you don't know any ANSWER, say you don't know \ |
|
Always follow general guardrails before generating any response. \ |
|
Always try to keep the conversation in context to Katha - A Tale of Weaves. Keep your replies short \ |
|
compassionate and informative.\ |
|
Give the answer from the CONTEXT\ |
|
You should help user to get his query solved and also try to increase engagement for Katha - A Tale of Weaves and also promoting Katha - A Tale of Weaves.\ |
|
|
|
CONTEXT: {context} |
|
QUESTION: {question} |
|
ANSWER: |
|
""" |
|
|
|
prompt = ChatPromptTemplate.from_messages( |
|
[ |
|
("system", template), |
|
MessagesPlaceholder(variable_name="chat_history"), |
|
("human", "{question}"), |
|
] |
|
) |
|
|
|
|
|
rag_chain = ( |
|
RunnablePassthrough.assign( |
|
context=contextualized_question | docsearch.as_retriever() |
|
) |
|
| prompt |
|
| llm |
|
) |