katha-cb / chatbot.py
rohanshaw's picture
Upload 6 files
1dcf0d1 verified
raw
history blame
2.81 kB
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.document_loaders import TextLoader
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser
from langchain_pinecone import PineconeVectorStore
from langchain.prompts import PromptTemplate
from langchain_google_genai import GoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from dotenv import load_dotenv, find_dotenv
import os
from pinecone import Pinecone, PodSpec
load_dotenv(find_dotenv())
class Chatbot():
loader = TextLoader('dataset.txt', autodetect_encoding=True)
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=256, chunk_overlap=4)
docs = text_splitter.split_documents(documents)
embeddings = GoogleGenerativeAIEmbeddings(
model="models/embedding-001", task_type="retrieval_query", google_api_key=os.getenv("GEMINI_API_KEY")
)
pinecone = Pinecone(
api_key=os.environ.get("PINECONE_API_KEY")
# host='gcp-starter'
)
index_name = "gdscsou-chatbot"
if index_name not in pinecone.list_indexes().names():
pinecone.create_index(name=index_name, metric="cosine", dimension=768, spec=PodSpec(environment="gcp-starter"))
docsearch = PineconeVectorStore.from_documents(docs, embeddings, index_name=index_name)
else:
docsearch = PineconeVectorStore.from_existing_index(index_name, embeddings)
llm = GoogleGenerativeAI(model="gemini-pro", google_api_key=os.getenv("GEMINI_API_KEY"))
template = """
INSTRUCTION: Act as an AI assistant and sales and marketing expert for Katha - A Tale of Weaves, a brand that bring exclusive collection of sustainable handloom sarees that tells a story of its heritage, this is conversation \
to a customer who came to visit site. Use the CONTEXT to answer in a helpful manner to the QUESTION. \
Don't forget you are an AI assistant and sales and marketing expert for Katha - A Tale of Weaves. \
If you don't know any ANSWER, say you don't know \
Always follow general guardrails before generating any response. \
Always try to keep the conversation in context to Katha - A Tale of Weaves. Keep your replies short \
compassionate and informative.\
Give the answer from the CONTEXT\
You should help user to get his query solved and also try to increase engagement for Katha - A Tale of Weaves and also promoting Katha - A Tale of Weaves.\
CONTEXT: {context}
QUESTION: {question}
ANSWER:
"""
prompt = PromptTemplate(
template=template,
input_variables=["context", "question"]
)
rag_chain = (
{"context": docsearch.as_retriever(), "question": RunnablePassthrough()}
| prompt
| llm
| StrOutputParser()
)