ANeuronI commited on
Commit
1842d77
1 Parent(s): 38dde5a
Files changed (6) hide show
  1. .streamlit/secrets.toml +2 -0
  2. FINALAPP.py +206 -0
  3. README.md +33 -0
  4. _init_.py +1 -0
  5. requirements.txt +12 -0
  6. tools.py +44 -0
.streamlit/secrets.toml ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ GOOGLE_API_KEY = "AIzaSyDhICh8yNoZ2dgtmuC-bw8byX_7ELvaHIc"
2
+ GOOGLE_CSE_ID="05aad8a0821c14286"
FINALAPP.py ADDED
@@ -0,0 +1,206 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tempfile
3
+ import streamlit as st
4
+ from dotenv import load_dotenv
5
+ from pdfminer.high_level import extract_text
6
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
7
+ from langchain.vectorstores import FAISS
8
+ from langchain_community.embeddings import HuggingFaceBgeEmbeddings
9
+ from langchain.memory import ConversationBufferMemory
10
+ from langchain.chains.conversational_retrieval.base import ConversationalRetrievalChain
11
+ from langchain.retrievers.multi_query import MultiQueryRetriever
12
+ from langchain.chains.llm import LLMChain
13
+ from langchain.prompts import PromptTemplate
14
+ from langchain_groq import ChatGroq
15
+ from langchain.agents import initialize_agent, load_tools
16
+
17
+ # Check if the secrets file exists and load it
18
+ secrets_exists = os.path.exists(os.path.join(os.getcwd(), ".streamlit", "secrets.toml")) or \
19
+ os.path.exists(os.path.join(os.path.expanduser("~"), ".streamlit", "secrets.toml"))
20
+
21
+ if secrets_exists:
22
+ load_dotenv(os.path.join(os.getcwd(), ".streamlit", "secrets.toml"))
23
+
24
+ # Function to extract text from PDFs
25
+ def extract_text_from_pdfs(docs):
26
+ text = ""
27
+ for doc in docs:
28
+ try:
29
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
30
+ tmp_file.write(doc.getbuffer())
31
+ tmp_file_path = tmp_file.name
32
+
33
+ extracted_text = extract_text(tmp_file_path)
34
+ text += extracted_text
35
+
36
+ except Exception as e:
37
+ st.error(f"Error processing {doc.name}: {e}")
38
+ finally:
39
+ os.remove(tmp_file_path)
40
+
41
+ return text
42
+
43
+ # Function to split text into chunks
44
+ def get_text_chunks(raw_text):
45
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50)
46
+ chunks = text_splitter.split_text(raw_text)
47
+ return chunks
48
+
49
+ # Function to create FAISS index
50
+ def create_faiss_index(text_chunks):
51
+ model_name = "BAAI/bge-small-en"
52
+ model_kwargs = {"device": "cpu"}
53
+ encode_kwargs = {"normalize_embeddings": True}
54
+ embeddings = HuggingFaceBgeEmbeddings(model_name=model_name,
55
+ model_kwargs=model_kwargs,
56
+ encode_kwargs=encode_kwargs)
57
+
58
+ vector_store = FAISS.from_texts(text_chunks, embeddings)
59
+ return vector_store
60
+
61
+ # Function to get the conversation chain
62
+ def get_conversation_chain(vector_store, groq_api_key):
63
+ llm = ChatGroq(
64
+ temperature=0.7,
65
+ model="llama3-70b-8192",
66
+ api_key=groq_api_key,
67
+ streaming=True,
68
+ verbose=True
69
+ )
70
+
71
+ memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
72
+
73
+ prompt_template = PromptTemplate(
74
+ input_variables=["question"],
75
+ template="""You are an AI language model assistant. Your task is to generate 3
76
+ different versions of the given user question to retrieve relevant documents from
77
+ a vector database. By generating multiple perspectives on the user question, your
78
+ goal is to help the user overcome some of the limitations of the distance-based
79
+ similarity search. Provide these alternative questions separated by newlines.
80
+ Original question: {question}""",
81
+ )
82
+ llm_chain = LLMChain(llm=llm, prompt=prompt_template)
83
+
84
+ retriever = MultiQueryRetriever(retriever=vector_store.as_retriever(), llm_chain=llm_chain, num_queries=3)
85
+
86
+ conversation_chain = ConversationalRetrievalChain.from_llm(
87
+ llm=llm,
88
+ retriever=retriever,
89
+ memory=memory
90
+ )
91
+ return conversation_chain, llm
92
+
93
+ # Function to get the web agent
94
+ def get_web_agent(groq_api_key):
95
+ llm = ChatGroq(
96
+ temperature=0.7,
97
+ model="llama3-70b-8192",
98
+ api_key=groq_api_key,
99
+ streaming=True,
100
+ verbose=True
101
+ )
102
+ # can create custom tools
103
+ tools = load_tools([], llm=llm)
104
+ from tools import summarizer_tool
105
+ tools.append(summarizer_tool)
106
+
107
+ additional_tools = load_tools(["llm-math", "google-search"], llm=llm)
108
+ tools.extend(additional_tools)
109
+
110
+ memory = ConversationBufferMemory(memory_key="chat_history")
111
+ ZERO_SHOT_REACT_DESCRIPTION = initialize_agent(
112
+ agent='zero-shot-react-description',
113
+ tools=tools,
114
+ llm=llm,
115
+ verbose=True,
116
+ max_iterations=10,
117
+ memory=memory,
118
+ handle_parsing_errors=True
119
+ )
120
+ return ZERO_SHOT_REACT_DESCRIPTION
121
+
122
+ # Main function
123
+ def main():
124
+ if "conversation" not in st.session_state:
125
+ st.session_state.conversation = None
126
+ st.session_state.chat_history = []
127
+ st.session_state.vector_store = None
128
+
129
+ st.set_page_config(page_title="Multi Model Agent", page_icon=":books:")
130
+
131
+ st.markdown("<h2 style='text-align: center;'>AI Agent 🤖</h2>", unsafe_allow_html=True)
132
+
133
+ with st.sidebar:
134
+ st.markdown('📖 API_KEYS [REPO](https://github.com/ANeuronI/RAG-AGENT)')
135
+ st.title("📤 Upload Pdf ")
136
+ docs = st.file_uploader(" ", type=["pdf"], accept_multiple_files=True)
137
+
138
+ file_details = []
139
+
140
+ if docs is not None:
141
+ for doc in docs:
142
+ file_details.append({"FileName": doc.name})
143
+
144
+ with st.expander("Uploaded Files"):
145
+ if file_details:
146
+ for details in file_details:
147
+ st.write(f"File Name: {details['FileName']}")
148
+
149
+ st.subheader("Start Model🧠")
150
+
151
+ groq_api_key = os.getenv("GROQ_API_KEY")
152
+ if groq_api_key:
153
+ st.success('Groq API key already provided!', icon='✅')
154
+ else:
155
+ groq_api_key = st.text_input('Enter Groq API key:', type='password', key='groq_api_key')
156
+ if groq_api_key and (groq_api_key.startswith('gsk_') and len(groq_api_key) == 56):
157
+ os.environ['GROQ_API_KEY'] = groq_api_key
158
+ st.success('Groq API key provided!', icon='✅')
159
+ else:
160
+ st.warning('Please enter a valid Groq API key!', icon='⚠️')
161
+
162
+ if st.button("Start Inference", key="start_inference") and docs:
163
+ with st.spinner("Processing..."):
164
+ raw_text = extract_text_from_pdfs(docs)
165
+ if raw_text:
166
+ text_chunks = get_text_chunks(raw_text)
167
+ vector_store = create_faiss_index(text_chunks)
168
+ st.session_state.vector_store = vector_store
169
+ st.write("FAISS Vector Store created successfully.")
170
+
171
+ st.session_state.conversation, llm = get_conversation_chain(vector_store, groq_api_key)
172
+ st.session_state.llm = llm
173
+ st.session_state.web_agent = get_web_agent(groq_api_key)
174
+ else:
175
+ st.error("No text extracted from the documents.")
176
+
177
+ if st.session_state.conversation:
178
+ for message in st.session_state.chat_history:
179
+ if message['role'] == 'user':
180
+ with st.chat_message("user"):
181
+ st.write(message["content"])
182
+ else:
183
+ with st.chat_message("assistant"):
184
+ st.write(message["content"])
185
+
186
+ input_disabled = groq_api_key is None
187
+
188
+ if prompt := st.chat_input("Ask your question here..." , disabled=input_disabled):
189
+ st.session_state.chat_history.append({"role": "user", "content": prompt})
190
+ with st.chat_message("user"):
191
+ st.write(prompt)
192
+
193
+ with st.chat_message("assistant"):
194
+ with st.spinner("Thinking..."):
195
+ response = st.session_state.conversation({"question": prompt})
196
+ if "answer" in response and "I don't know" not in response["answer"]:
197
+ st.session_state.chat_history.append({"role": "assistant", "content": response['answer']})
198
+ st.write(response['answer'])
199
+ else:
200
+ with st.spinner("Searching the web..."):
201
+ response = st.session_state.web_agent.run(prompt)
202
+ st.session_state.chat_history.append({"role": "assistant", "content": response})
203
+ st.write(response)
204
+
205
+ if __name__ == '__main__':
206
+ main()
README.md CHANGED
@@ -11,3 +11,36 @@ license: apache-2.0
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
14
+
15
+
16
+ # Multi-Model LLM Agent
17
+
18
+ Welcome to the Multi-Model LLM Agent repository! This repository hosts two versions of our language model agent, each offering unique capabilities tailored to different needs.
19
+
20
+ ## Version 1: RAG Agent
21
+ - Basic web search functionality
22
+ - Implementation of RAG (Retrieval-Augmented Generation)
23
+
24
+ [Explore Version 1 (RAG Agent)](https://multi-model-rag-agent.streamlit.app/)
25
+
26
+ ## Version 2: Multi-Model Agent (Final Version)
27
+ - Advanced web search capabilities, including website scraping
28
+ - Enhanced RAG model with memory for retaining context across conversations
29
+ - Multi-query retrieval for handling complex information needs
30
+
31
+ [Explore Version 2 (Multi-Model Agent)](https://)
32
+
33
+ ## How to Use This Model
34
+
35
+ ### Obtain API Keys
36
+ Before using the Multi-Model Agent, you need to obtain API keys from the following providers:
37
+
38
+ - **GROQ API Keys:** Obtain from [GROQ](https://console.groq.com/keys)
39
+ - **Replicate API Keys:** Obtain from [Replicate](https://replicate.com/meta/meta-llama-3-70b-instruct)
40
+
41
+ ### Integration Instructions
42
+ 1. **API Key Setup:** Insert your obtained API keys into the designated configuration.
43
+ 2. **Usage Guide:** Refer to our detailed documentation for integrating the API keys and utilizing the Multi-Model Agent effectively.
44
+
45
+ ## Notes
46
+ - **Security:** Keep your API keys secure and adhere to the terms of service of each provider.
_init_.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # _init_.py
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ streamlit==1.36.0
2
+ pdfminer.six==20221105
3
+ langchain
4
+ langchain-community
5
+ faiss-cpu==1.7.3
6
+ langchain-groq
7
+ python-dotenv==1.0.0
8
+ langchain-huggingface
9
+ wikipedia
10
+ replicate
11
+ numexpr
12
+ google-api-python-client>=2.100.0
tools.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_groq import ChatGroq
2
+ from langchain.chains import LLMChain
3
+ from langchain.prompts import PromptTemplate
4
+ from langchain.tools import Tool
5
+ import os
6
+
7
+ # summeriser
8
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY")
9
+
10
+ if not GROQ_API_KEY:
11
+ raise ValueError("GROQ_API_KEY environment variable is not set.")
12
+
13
+ # Initialize ChatGroq for summarization
14
+ summarizer_llm = ChatGroq(
15
+ temperature=0.7,
16
+ model="llama3-8b-8192",
17
+ api_key=GROQ_API_KEY,
18
+ streaming=True,
19
+ verbose=True
20
+ )
21
+
22
+ # Define a prompt template for summarization
23
+ summarization_prompt = PromptTemplate(
24
+ input_variables=["text"],
25
+ template="Summarize the following content: {text}"
26
+ )
27
+
28
+ # Create the summarization chain
29
+ summarization_chain = LLMChain(
30
+ llm=summarizer_llm,
31
+ prompt=summarization_prompt
32
+ )
33
+
34
+ # Define the summarizer tool
35
+ def summarize_content_tool(text: str) -> str:
36
+ return summarization_chain.run(text=text)
37
+
38
+ summarizer_tool = Tool(
39
+ name="summarizer",
40
+ description="Summarizes content using a language model.",
41
+ func=summarize_content_tool
42
+ )
43
+
44
+