Thalapathy Vijay commited on
Commit
a0cdb9e
β€’
1 Parent(s): e16b5e8

Upload 5 files

Browse files
Files changed (5) hide show
  1. Support Chat Bot For Website.PNG +0 -0
  2. app.py +90 -0
  3. constants.py +3 -0
  4. requirements.txt +5 -0
  5. utils.py +72 -0
Support Chat Bot For Website.PNG ADDED
app.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from utils import *
3
+ import constants
4
+
5
+ # Creating Session State Variable
6
+ if 'HuggingFace_API_Key' not in st.session_state:
7
+ st.session_state['HuggingFace_API_Key'] =''
8
+ if 'Pinecone_API_Key' not in st.session_state:
9
+ st.session_state['Pinecone_API_Key'] =''
10
+
11
+
12
+ #
13
+ st.title('πŸ€– AI Assistance For Website')
14
+
15
+ #********SIDE BAR Funtionality started*******
16
+
17
+ # Sidebar to capture the API keys
18
+ st.sidebar.title("πŸ˜ŽπŸ—οΈ")
19
+ st.session_state['HuggingFace_API_Key']= st.sidebar.text_input("What's your HuggingFace API key?",type="password")
20
+ st.session_state['Pinecone_API_Key']= st.sidebar.text_input("What's your Pinecone API key?",type="password")
21
+
22
+ load_button = st.sidebar.button("Load data to Pinecone", key="load_button")
23
+
24
+ #If the bove button is clicked, pushing the data to Pinecone...
25
+ if load_button:
26
+ #Proceed only if API keys are provided
27
+ if st.session_state['HuggingFace_API_Key'] !="" and st.session_state['Pinecone_API_Key']!="" :
28
+
29
+ #Fetch data from site
30
+ site_data=get_website_data(constants.WEBSITE_URL)
31
+ st.write("Data pull done...")
32
+
33
+ #Split data into chunks
34
+ chunks_data=split_data(site_data)
35
+ st.write("Spliting data done...")
36
+
37
+ #Creating embeddings instance
38
+ embeddings=create_embeddings()
39
+ st.write("Embeddings instance creation done...")
40
+
41
+ #Push data to Pinecone
42
+ push_to_pinecone(st.session_state['Pinecone_API_Key'],constants.PINECONE_ENVIRONMENT,constants.PINECONE_INDEX,embeddings,chunks_data)
43
+ st.write("Pushing data to Pinecone done...")
44
+
45
+ st.sidebar.success("Data pushed to Pinecone successfully!")
46
+ else:
47
+ st.sidebar.error("Ooopssss!!! Please provide API keys.....")
48
+
49
+ #********SIDE BAR Funtionality ended*******
50
+
51
+ #Captures User Inputs
52
+ prompt = st.text_input('How can I help you my friend ❓',key="prompt") # The box for the text prompt
53
+ document_count = st.slider('No.Of links to return πŸ”— - (0 LOW || 5 HIGH)', 0, 5, 2,step=1)
54
+
55
+ submit = st.button("Search")
56
+
57
+
58
+ if submit:
59
+ #Proceed only if API keys are provided
60
+ if st.session_state['HuggingFace_API_Key'] !="" and st.session_state['Pinecone_API_Key']!="" :
61
+
62
+ #Creating embeddings instance
63
+ embeddings=create_embeddings()
64
+ st.write("Embeddings instance creation done...")
65
+
66
+ #Pull index data from Pinecone
67
+ index=pull_from_pinecone(st.session_state['Pinecone_API_Key'],constants.PINECONE_ENVIRONMENT,constants.PINECONE_INDEX,embeddings)
68
+ st.write("Pinecone index retrieval done...")
69
+
70
+ #Fetch relavant documents from Pinecone index
71
+ relavant_docs=get_similar_docs(index,prompt,document_count)
72
+ #st.write(relavant_docs)
73
+
74
+ #Displaying search results
75
+ st.success("Please find the search results :")
76
+ #Displaying search results
77
+ st.write("search results list....")
78
+ for document in relavant_docs:
79
+
80
+ st.write("πŸ‘‰**Result : "+ str(relavant_docs.index(document)+1)+"**")
81
+ st.write("**Info**: "+document.page_content)
82
+ st.write("**Link**: "+ document.metadata['source'])
83
+
84
+
85
+
86
+ else:
87
+ st.sidebar.error("Ooopssss!!! Please provide API keys.....")
88
+
89
+
90
+
constants.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ WEBSITE_URL="https://guru-25.github.io/chatbot/sitemap.xml"
2
+ PINECONE_ENVIRONMENT="gcp-starter"
3
+ PINECONE_INDEX="chatbot"
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ langchain
2
+ pinecone-client
3
+ openai
4
+ tiktoken
5
+ nest_asyncio
utils.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
2
+ from langchain.vectorstores import Pinecone
3
+ from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
4
+ import pinecone
5
+ import asyncio
6
+ from langchain.document_loaders.sitemap import SitemapLoader
7
+
8
+
9
+ #Function to fetch data from website
10
+ #https://python.langchain.com/docs/modules/data_connection/document_loaders/integrations/sitemap
11
+ def get_website_data(sitemap_url):
12
+
13
+ loop = asyncio.new_event_loop()
14
+ asyncio.set_event_loop(loop)
15
+ loader = SitemapLoader(
16
+ sitemap_url
17
+ )
18
+
19
+ docs = loader.load()
20
+
21
+ return docs
22
+
23
+ #Function to split data into smaller chunks
24
+ def split_data(docs):
25
+
26
+ text_splitter = RecursiveCharacterTextSplitter(
27
+ chunk_size = 1000,
28
+ chunk_overlap = 200,
29
+ length_function = len,
30
+ )
31
+
32
+ docs_chunks = text_splitter.split_documents(docs)
33
+ return docs_chunks
34
+
35
+ #Function to create embeddings instance
36
+ def create_embeddings():
37
+
38
+ embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
39
+ return embeddings
40
+
41
+ #Function to push data to Pinecone
42
+ def push_to_pinecone(pinecone_apikey,pinecone_environment,pinecone_index_name,embeddings,docs):
43
+
44
+ pinecone.init(
45
+ api_key=pinecone_apikey,
46
+ environment=pinecone_environment
47
+ )
48
+
49
+ index_name = pinecone_index_name
50
+ index = Pinecone.from_documents(docs, embeddings, index_name=index_name)
51
+ return index
52
+
53
+ #Function to pull index data from Pinecone
54
+ def pull_from_pinecone(pinecone_apikey,pinecone_environment,pinecone_index_name,embeddings):
55
+
56
+ pinecone.init(
57
+ api_key=pinecone_apikey,
58
+ environment=pinecone_environment
59
+ )
60
+
61
+ index_name = pinecone_index_name
62
+
63
+ index = Pinecone.from_existing_index(index_name, embeddings)
64
+ return index
65
+
66
+ #This function will help us in fetching the top relevent documents from our vector store - Pinecone Index
67
+ def get_similar_docs(index,query,k=2):
68
+
69
+ similar_docs = index.similarity_search(query, k=k)
70
+ return similar_docs
71
+
72
+