rohanshaw commited on
Commit
1dcf0d1
1 Parent(s): e96971d

Upload 6 files

Browse files
Files changed (6) hide show
  1. Dockerfile +13 -0
  2. app.py +75 -0
  3. chatbot.py +69 -0
  4. chatbotmemory.py +92 -0
  5. dataset.txt +74 -0
  6. requirements.txt +10 -0
Dockerfile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:latest
2
+
3
+ WORKDIR /
4
+
5
+ COPY ./requirements.txt .
6
+
7
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
8
+
9
+ COPY . .
10
+
11
+ EXPOSE 7860
12
+
13
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+
3
+ from fastapi.middleware.cors import CORSMiddleware
4
+
5
+ from chatbot import Chatbot
6
+
7
+ from chatbotmemory import ChatbotMemory
8
+
9
+ import logging
10
+
11
+ from langchain_core.messages import AIMessage, HumanMessage
12
+
13
+
14
+ app = FastAPI()
15
+
16
+ # Add logging
17
+
18
+ logging.basicConfig(level=logging.INFO)
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+ formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
23
+
24
+ handler = logging.StreamHandler()
25
+
26
+ handler.setFormatter(formatter)
27
+
28
+ logger.addHandler(handler)
29
+
30
+ # Add CORS
31
+
32
+ origins = ["*"]
33
+
34
+ app.add_middleware(
35
+ CORSMiddleware,
36
+ allow_origins=origins,
37
+ allow_credentials=True,
38
+ allow_methods=["GET", "POST", "PUT", "DELETE"],
39
+ allow_headers=["*"],
40
+ )
41
+
42
+ bot1 = Chatbot()
43
+ bot2 = ChatbotMemory()
44
+
45
+ @app.get("/")
46
+
47
+ def read_root():
48
+
49
+ return {
50
+
51
+ "message": "API running successfully",
52
+
53
+ "endpoints": [
54
+
55
+ "/chat/v1/",
56
+
57
+ "/chat/v2/",
58
+
59
+ ]
60
+
61
+ }
62
+
63
+ @app.post("/chat/v1/")
64
+ def chat(q: str):
65
+ logger.info(q)
66
+ answer = bot1.rag_chain.invoke(q)
67
+ return {"answer": answer}
68
+
69
+ @app.post("/chat/v2/")
70
+ def chatMemory(q: str):
71
+ chat_history = []
72
+ logger.info(q)
73
+ ai_msg = bot2.rag_chain.invoke({"question": q, "chat_history": chat_history})
74
+ chat_history.extend([HumanMessage(content=q), ai_msg])
75
+ return {"answer": ai_msg}
chatbot.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.text_splitter import CharacterTextSplitter
2
+ from langchain_community.document_loaders import TextLoader
3
+ from langchain.schema.runnable import RunnablePassthrough
4
+ from langchain.schema.output_parser import StrOutputParser
5
+ from langchain_pinecone import PineconeVectorStore
6
+ from langchain.prompts import PromptTemplate
7
+ from langchain_google_genai import GoogleGenerativeAI, GoogleGenerativeAIEmbeddings
8
+ from dotenv import load_dotenv, find_dotenv
9
+ import os
10
+ from pinecone import Pinecone, PodSpec
11
+
12
+ load_dotenv(find_dotenv())
13
+
14
+ class Chatbot():
15
+
16
+ loader = TextLoader('dataset.txt', autodetect_encoding=True)
17
+ documents = loader.load()
18
+ text_splitter = CharacterTextSplitter(chunk_size=256, chunk_overlap=4)
19
+ docs = text_splitter.split_documents(documents)
20
+
21
+ embeddings = GoogleGenerativeAIEmbeddings(
22
+ model="models/embedding-001", task_type="retrieval_query", google_api_key=os.getenv("GEMINI_API_KEY")
23
+ )
24
+
25
+ pinecone = Pinecone(
26
+ api_key=os.environ.get("PINECONE_API_KEY")
27
+ # host='gcp-starter'
28
+ )
29
+
30
+ index_name = "gdscsou-chatbot"
31
+
32
+ if index_name not in pinecone.list_indexes().names():
33
+ pinecone.create_index(name=index_name, metric="cosine", dimension=768, spec=PodSpec(environment="gcp-starter"))
34
+ docsearch = PineconeVectorStore.from_documents(docs, embeddings, index_name=index_name)
35
+ else:
36
+ docsearch = PineconeVectorStore.from_existing_index(index_name, embeddings)
37
+
38
+
39
+ llm = GoogleGenerativeAI(model="gemini-pro", google_api_key=os.getenv("GEMINI_API_KEY"))
40
+
41
+ template = """
42
+ INSTRUCTION: Act as an AI assistant and sales and marketing expert for Katha - A Tale of Weaves, a brand that bring exclusive collection of sustainable handloom sarees that tells a story of its heritage, this is conversation \
43
+ to a customer who came to visit site. Use the CONTEXT to answer in a helpful manner to the QUESTION. \
44
+ Don't forget you are an AI assistant and sales and marketing expert for Katha - A Tale of Weaves. \
45
+ If you don't know any ANSWER, say you don't know \
46
+ Always follow general guardrails before generating any response. \
47
+ Always try to keep the conversation in context to Katha - A Tale of Weaves. Keep your replies short \
48
+ compassionate and informative.\
49
+ Give the answer from the CONTEXT\
50
+ You should help user to get his query solved and also try to increase engagement for Katha - A Tale of Weaves and also promoting Katha - A Tale of Weaves.\
51
+
52
+ CONTEXT: {context}
53
+ QUESTION: {question}
54
+ ANSWER:
55
+ """
56
+
57
+ prompt = PromptTemplate(
58
+ template=template,
59
+ input_variables=["context", "question"]
60
+ )
61
+
62
+
63
+ rag_chain = (
64
+ {"context": docsearch.as_retriever(), "question": RunnablePassthrough()}
65
+ | prompt
66
+ | llm
67
+ | StrOutputParser()
68
+ )
69
+
chatbotmemory.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
2
+ from langchain_community.document_loaders import TextLoader
3
+ from langchain.schema.runnable import RunnablePassthrough
4
+ from langchain.schema.output_parser import StrOutputParser
5
+ from langchain_pinecone import PineconeVectorStore
6
+ from langchain.prompts import MessagesPlaceholder, ChatPromptTemplate
7
+ from langchain_google_genai import GoogleGenerativeAI, GoogleGenerativeAIEmbeddings
8
+ from dotenv import load_dotenv, find_dotenv
9
+ import os
10
+ from pinecone import Pinecone, PodSpec
11
+
12
+ load_dotenv(find_dotenv())
13
+
14
+ class ChatbotMemory():
15
+
16
+ loader = TextLoader('dataset.txt', autodetect_encoding=True)
17
+ documents = loader.load()
18
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=103)
19
+ docs = text_splitter.split_documents(documents)
20
+
21
+ embeddings = GoogleGenerativeAIEmbeddings(
22
+ model="models/embedding-001", task_type="retrieval_query", google_api_key=os.getenv("GEMINI_API_KEY"))
23
+
24
+ pinecone = Pinecone(
25
+ api_key=os.environ.get("PINECONE_API_KEY")
26
+ )
27
+
28
+ index_name = "gdscsou-chatbot"
29
+
30
+ if index_name not in pinecone.list_indexes().names():
31
+ pinecone.create_index(name=index_name, metric="cosine", dimension=768, spec=PodSpec(environment="gcp-starter"))
32
+ docsearch = PineconeVectorStore.from_documents(docs, embeddings, index_name=index_name)
33
+ else:
34
+ docsearch = PineconeVectorStore.from_existing_index(index_name, embeddings)
35
+
36
+
37
+ llm = GoogleGenerativeAI(model="gemini-pro", google_api_key=os.getenv("GEMINI_API_KEY"))
38
+
39
+ def contextualized_question(input: dict):
40
+ if input.get("chat_history"):
41
+ llm = GoogleGenerativeAI(model="gemini-pro", google_api_key=os.getenv("GEMINI_API_KEY"))
42
+
43
+ contextualize_q_system_prompt = """Given a chat history and the latest user question \
44
+ which might reference context in the chat history, formulate a standalone question \
45
+ which can be understood without the chat history. Do NOT answer the question, \
46
+ just reformulate it if needed and otherwise return it as is."""
47
+
48
+ contextualize_q_prompt = ChatPromptTemplate.from_messages(
49
+ [
50
+ ("system", contextualize_q_system_prompt),
51
+ MessagesPlaceholder(variable_name="chat_history"),
52
+ ("human", "{question}"),
53
+ ]
54
+ )
55
+
56
+ contextualize_q_chain = contextualize_q_prompt | llm | StrOutputParser()
57
+ return contextualize_q_chain
58
+ else:
59
+ return input["question"]
60
+
61
+ template = """
62
+ INSTRUCTION: Act as an AI assistant and sales and marketing expert for Katha - A Tale of Weaves, a brand that bring exclusive collection of sustainable handloom sarees that tells a story of its heritage, this is conversation \
63
+ to a customer who came to visit site. Use the CONTEXT to answer in a helpful manner to the QUESTION. \
64
+ Don't forget you are an AI assistant and sales and marketing expert for Katha - A Tale of Weaves. \
65
+ If you don't know any ANSWER, say you don't know \
66
+ Always follow general guardrails before generating any response. \
67
+ Always try to keep the conversation in context to Katha - A Tale of Weaves. Keep your replies short \
68
+ compassionate and informative.\
69
+ Give the answer from the CONTEXT\
70
+ You should help user to get his query solved and also try to increase engagement for Katha - A Tale of Weaves and also promoting Katha - A Tale of Weaves.\
71
+
72
+ CONTEXT: {context}
73
+ QUESTION: {question}
74
+ ANSWER:
75
+ """
76
+
77
+ prompt = ChatPromptTemplate.from_messages(
78
+ [
79
+ ("system", template),
80
+ MessagesPlaceholder(variable_name="chat_history"),
81
+ ("human", "{question}"),
82
+ ]
83
+ )
84
+
85
+
86
+ rag_chain = (
87
+ RunnablePassthrough.assign(
88
+ context=contextualized_question | docsearch.as_retriever()
89
+ )
90
+ | prompt
91
+ | llm
92
+ )
dataset.txt ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Brand Name : Katha - A Tale of Weaves
2
+ Facebook Page Link : https://www.facebook.com/profile.php?id=61556532028838 | 1.4K likes • 1.4K followers
3
+ Instagram Page Link : https://www.instagram.com/kathatale/ | 23 posts . 81 followers
4
+
5
+ Intro -
6
+ Our brand aims to bring exclusive collection of sustainable handloom sarees that tells a story of its heritage || DM 7439678095 for order || No COD
7
+
8
+
9
+ Page · Clothing (Brand)
10
+
11
+ Address : Bidhannagar, Kolkata, India, West Bengal
12
+
13
+ Phone Number : 074396 78095
14
+
15
+ Email Address : [email protected]
16
+
17
+ Whatsapp : https://wa.me/message/GZ2F5P6CGRZIP1
18
+
19
+ Delivery · Online booking · In-store pickup…
20
+
21
+ Service Area : Kolkata, West Bengal, India · Navi Mumbai, Maharashtra, India · New Delhi, Delhi, India · Noida, Uttar Pradesh, India · Hyderabad, Telangana, India · Bangalore, Karnataka, India
22
+
23
+
24
+ Reviews :
25
+ 1. Bipasa Sengupta recommends Katha - A Tale of Weaves.
26
+ - March 14 | Exclusive quality and designs. Highly satisfied with the sarees. Promt one day delivery. pls try ... All the best.
27
+
28
+
29
+ Products :
30
+ 1. Payel De in Our “Cotton Tissue Flower” saree that sparkles with delicate jamdani work creating a dazzling effect on the fabric with a stunning pattern of floral and geometric motifs.
31
+ Available Colours: Red, Sea Green, Dark Pink and Black.
32
+ Blouse Piece: Yes.
33
+ Fabric: 100% Pure Handloom Cotton
34
+
35
+ 2. Check out our new Summer collection 🌼
36
+ Actress Payel Mukherjee is looking gorgeous in our 'Purple Panache Saree' & 'Crimson Polka Saree' crafted with pure handloom cotton.
37
+
38
+ 3. Actress Payel De is looking gorgeous and divine in our “Doodh e Alta” saree.
39
+ The “Doodh e Alta” handloom cotton saree is a traditional Bengali saree known for its iconic red-bordered white design, which is deeply rooted in the culture and tradition of Bengal. It celebrates femininity with its elegant combination of scarlet and cream.
40
+ Blouse Piece: Yes.
41
+ Fabric: 100% Pure Handloom Cotton
42
+
43
+ 4. The season of celebrations with our exclusive collection of Diamond Motif Soft Cotton Jamdani Saree.
44
+ Each saree is crafted from 100% pure handspun cotton, ensuring a blend of comfort and sustainability.
45
+ BP: Yes
46
+ Fabric: 100% pure handspun cotton
47
+
48
+ 5. Elegance Woven in Silk: Discover the timeless allure of Pure Raw Silk Sarees. These sarees are renowned for their intricate designs and vibrant colours achieved through a traditional weaving style of Ikkat in the border and pallu.
49
+ Blouse Piece: Yes
50
+ Fabric: Silkmark Certified Raw Silk
51
+
52
+ 6. The season whispers tales of celebrations, promising memorable moments and cherished gatherings with with our exclusive collection of Handwoven Soft, Lightweight Pure Cotton Sarees.
53
+ Each saree is crafted from 100% pure handloom cotton, ensuring a blend of comfort and sustainability.
54
+ BP: Yes
55
+ Fabric: 100% pure handloom cotton
56
+
57
+ 7. Embrace the warmth of the season and the joy of upcoming festivals with our exclusive collection of Handwoven “Eco-chic Elegance” Cotton Sarees. Each saree is crafted from 100% pure handloom cotton, ensuring a blend of comfort and sustainability.
58
+ BP: Yes
59
+ Fabric: 100% pure handloom cotton
60
+
61
+ 8. Our Luxe design exclusive cotton handwoven sarees. Each thread tells a story, and the collection is a canvas of artistry.
62
+ These handwoven sarees, crafted with love, are light, airy, and easy to drape and perfect for any occasion.
63
+ Blouse Piece: Yes
64
+ Fabric: 100% Handloom Cotton
65
+
66
+ 9. Launching our Poila Baishakh special Dhonekhali sarees. These sarees are lightweight, handwoven, characterized by its fine cotton texture and unique ‘khejur chori’ design on the pallu.
67
+ Fabric: 100 count pure cotton by cotton
68
+ Blouse Piece - No
69
+
70
+ 10. Lauching our new MAHAPAR saree collection which is a tribute to the enduring legacy and meticulous skill of the weavers, offering a piece of heritage that is both luxurious and comfortable.
71
+ Blouse Piece: Yes
72
+ Fabric: 100% Handspun Cotton
73
+
74
+ website link : katha.lumaticai.com
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain==0.1.6
2
+ langchain-community==0.0.19
3
+ langchain-core==0.1.23
4
+ pinecone-client
5
+ python-dotenv
6
+ fastapi
7
+ langchain_google_genai
8
+ langchain-pinecone
9
+ chardet
10
+ uvicorn