from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.document_loaders import TextLoader from langchain.schema.runnable import RunnablePassthrough from langchain.schema.output_parser import StrOutputParser from langchain_pinecone import PineconeVectorStore from langchain.prompts import MessagesPlaceholder, ChatPromptTemplate from langchain_google_genai import GoogleGenerativeAI, GoogleGenerativeAIEmbeddings from dotenv import load_dotenv, find_dotenv import os from pinecone import Pinecone, PodSpec load_dotenv(find_dotenv()) class ChatbotMemory(): loader = TextLoader('dataset.txt', autodetect_encoding=True) documents = loader.load() text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=103) docs = text_splitter.split_documents(documents) embeddings = GoogleGenerativeAIEmbeddings( model="models/embedding-001", task_type="retrieval_query", google_api_key=os.getenv("GEMINI_API_KEY")) pinecone = Pinecone( api_key=os.environ.get("PINECONE_API_KEY") ) index_name = "gdscsou-chatbot" if index_name not in pinecone.list_indexes().names(): pinecone.create_index(name=index_name, metric="cosine", dimension=768, spec=PodSpec(environment="gcp-starter")) docsearch = PineconeVectorStore.from_documents(docs, embeddings, index_name=index_name) else: docsearch = PineconeVectorStore.from_existing_index(index_name, embeddings) llm = GoogleGenerativeAI(model="gemini-pro", google_api_key=os.getenv("GEMINI_API_KEY")) def contextualized_question(input: dict): if input.get("chat_history"): llm = GoogleGenerativeAI(model="gemini-pro", google_api_key=os.getenv("GEMINI_API_KEY")) contextualize_q_system_prompt = """Given a chat history and the latest user question \ which might reference context in the chat history, formulate a standalone question \ which can be understood without the chat history. Do NOT answer the question, \ just reformulate it if needed and otherwise return it as is.""" contextualize_q_prompt = ChatPromptTemplate.from_messages( [ ("system", contextualize_q_system_prompt), MessagesPlaceholder(variable_name="chat_history"), ("human", "{question}"), ] ) contextualize_q_chain = contextualize_q_prompt | llm | StrOutputParser() return contextualize_q_chain else: return input["question"] template = """ INSTRUCTION: Act as an AI assistant and sales and marketing expert for Katha - A Tale of Weaves, a brand that bring exclusive collection of sustainable handloom sarees that tells a story of its heritage, this is conversation \ to a customer who came to visit site. Use the CONTEXT to answer in a helpful manner to the QUESTION. \ Don't forget you are an AI assistant and sales and marketing expert for Katha - A Tale of Weaves. \ If you don't know any ANSWER, say you don't know \ Always follow general guardrails before generating any response. \ Always try to keep the conversation in context to Katha - A Tale of Weaves. Keep your replies short \ compassionate and informative.\ Give the answer from the CONTEXT\ You should help user to get his query solved and also try to increase engagement for Katha - A Tale of Weaves and also promoting Katha - A Tale of Weaves.\ CONTEXT: {context} QUESTION: {question} ANSWER: """ prompt = ChatPromptTemplate.from_messages( [ ("system", template), MessagesPlaceholder(variable_name="chat_history"), ("human", "{question}"), ] ) rag_chain = ( RunnablePassthrough.assign( context=contextualized_question | docsearch.as_retriever() ) | prompt | llm )