Spaces:

rohanshaw
/

katha-cb

Sleeping

App Files Files Community

katha-cb / chatbotmemory.py

rohanshaw

Upload 6 files

1dcf0d1 verified 7 months ago

raw

history blame contribute delete

3.99 kB

	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain_community.document_loaders import TextLoader
	from langchain.schema.runnable import RunnablePassthrough
	from langchain.schema.output_parser import StrOutputParser
	from langchain_pinecone import PineconeVectorStore
	from langchain.prompts import MessagesPlaceholder, ChatPromptTemplate
	from langchain_google_genai import GoogleGenerativeAI, GoogleGenerativeAIEmbeddings
	from dotenv import load_dotenv, find_dotenv
	import os
	from pinecone import Pinecone, PodSpec

	load_dotenv(find_dotenv())

	class ChatbotMemory():

	loader = TextLoader('dataset.txt', autodetect_encoding=True)
	documents = loader.load()
	text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=103)
	docs = text_splitter.split_documents(documents)

	embeddings = GoogleGenerativeAIEmbeddings(
	model="models/embedding-001", task_type="retrieval_query", google_api_key=os.getenv("GEMINI_API_KEY"))

	pinecone = Pinecone(
	api_key=os.environ.get("PINECONE_API_KEY")
	)

	index_name = "gdscsou-chatbot"

	if index_name not in pinecone.list_indexes().names():
	pinecone.create_index(name=index_name, metric="cosine", dimension=768, spec=PodSpec(environment="gcp-starter"))
	docsearch = PineconeVectorStore.from_documents(docs, embeddings, index_name=index_name)
	else:
	docsearch = PineconeVectorStore.from_existing_index(index_name, embeddings)


	llm = GoogleGenerativeAI(model="gemini-pro", google_api_key=os.getenv("GEMINI_API_KEY"))

	def contextualized_question(input: dict):
	if input.get("chat_history"):
	llm = GoogleGenerativeAI(model="gemini-pro", google_api_key=os.getenv("GEMINI_API_KEY"))

	contextualize_q_system_prompt = """Given a chat history and the latest user question \
	which might reference context in the chat history, formulate a standalone question \
	which can be understood without the chat history. Do NOT answer the question, \
	just reformulate it if needed and otherwise return it as is."""

	contextualize_q_prompt = ChatPromptTemplate.from_messages(
	[
	("system", contextualize_q_system_prompt),
	MessagesPlaceholder(variable_name="chat_history"),
	("human", "{question}"),
	]
	)

	contextualize_q_chain = contextualize_q_prompt \| llm \| StrOutputParser()
	return contextualize_q_chain
	else:
	return input["question"]

	template = """
	INSTRUCTION: Act as an AI assistant and sales and marketing expert for Katha - A Tale of Weaves, a brand that bring exclusive collection of sustainable handloom sarees that tells a story of its heritage, this is conversation \
	to a customer who came to visit site. Use the CONTEXT to answer in a helpful manner to the QUESTION. \
	Don't forget you are an AI assistant and sales and marketing expert for Katha - A Tale of Weaves. \
	If you don't know any ANSWER, say you don't know \
	Always follow general guardrails before generating any response. \
	Always try to keep the conversation in context to Katha - A Tale of Weaves. Keep your replies short \
	compassionate and informative.\
	Give the answer from the CONTEXT\
	You should help user to get his query solved and also try to increase engagement for Katha - A Tale of Weaves and also promoting Katha - A Tale of Weaves.\

	CONTEXT: {context}
	QUESTION: {question}
	ANSWER:
	"""

	prompt = ChatPromptTemplate.from_messages(
	[
	("system", template),
	MessagesPlaceholder(variable_name="chat_history"),
	("human", "{question}"),
	]
	)


	rag_chain = (
	RunnablePassthrough.assign(
	context=contextualized_question \| docsearch.as_retriever()
	)
	\| prompt
	\| llm
	)