Spaces:

rohanshaw
/

katha-cb

Sleeping

File size: 3,992 Bytes

1dcf0d1

from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import TextLoader
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser
from langchain_pinecone import PineconeVectorStore
from langchain.prompts import MessagesPlaceholder, ChatPromptTemplate
from langchain_google_genai import GoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from dotenv import load_dotenv, find_dotenv
import os
from pinecone import Pinecone, PodSpec

load_dotenv(find_dotenv())

class ChatbotMemory():

    loader = TextLoader('dataset.txt', autodetect_encoding=True)
    documents = loader.load()
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=103)
    docs = text_splitter.split_documents(documents)

    embeddings = GoogleGenerativeAIEmbeddings(
    model="models/embedding-001", task_type="retrieval_query", google_api_key=os.getenv("GEMINI_API_KEY"))

    pinecone = Pinecone(
        api_key=os.environ.get("PINECONE_API_KEY")
    )

    index_name = "gdscsou-chatbot"

    if index_name not in pinecone.list_indexes().names():
        pinecone.create_index(name=index_name, metric="cosine", dimension=768, spec=PodSpec(environment="gcp-starter"))
        docsearch = PineconeVectorStore.from_documents(docs, embeddings, index_name=index_name)
    else:
         docsearch = PineconeVectorStore.from_existing_index(index_name, embeddings)


    llm = GoogleGenerativeAI(model="gemini-pro", google_api_key=os.getenv("GEMINI_API_KEY"))

    def contextualized_question(input: dict):
        if input.get("chat_history"):
            llm = GoogleGenerativeAI(model="gemini-pro", google_api_key=os.getenv("GEMINI_API_KEY"))

            contextualize_q_system_prompt = """Given a chat history and the latest user question \
            which might reference context in the chat history, formulate a standalone question \
            which can be understood without the chat history. Do NOT answer the question, \
            just reformulate it if needed and otherwise return it as is."""

            contextualize_q_prompt = ChatPromptTemplate.from_messages(
                [
                    ("system", contextualize_q_system_prompt),
                    MessagesPlaceholder(variable_name="chat_history"),
                    ("human", "{question}"),
                ]
            )

            contextualize_q_chain = contextualize_q_prompt | llm | StrOutputParser()
            return contextualize_q_chain
        else:
            return input["question"]
    
    template = """
    INSTRUCTION: Act as an AI assistant and sales and marketing expert for Katha - A Tale of Weaves, a brand that bring exclusive collection of sustainable handloom sarees that tells a story of its heritage, this is conversation \
    to a customer who came to visit site. Use the CONTEXT to answer in a helpful manner to the QUESTION. \
    Don't forget you are an AI assistant and sales and marketing expert for Katha - A Tale of Weaves. \
    If you don't know any ANSWER, say you don't know \
    Always follow general guardrails before generating any response. \
    Always try to keep the conversation in context to Katha - A Tale of Weaves. Keep your replies short \
    compassionate and informative.\
    Give the answer from the CONTEXT\
    You should help user to get his query solved and also try to increase engagement for Katha - A Tale of Weaves and also promoting Katha - A Tale of Weaves.\
        
    CONTEXT: {context}
    QUESTION: {question}
    ANSWER:
    """

    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", template),
            MessagesPlaceholder(variable_name="chat_history"),
            ("human", "{question}"),
        ]
    )

    
    rag_chain = (
        RunnablePassthrough.assign(
            context=contextualized_question | docsearch.as_retriever() 
        )
        | prompt
        | llm
    )