Zwea Htet commited on
Commit
b83dc9c
1 Parent(s): 3455cec

updated llama index demo

Browse files
app.py CHANGED
@@ -1,13 +1,9 @@
1
- # https://docs.streamlit.io/knowledge-base/tutorials/build-conversational-apps
2
-
3
  import os
4
-
5
  import openai
6
  import requests
7
  import streamlit as st
8
 
9
  from utils.util import *
10
-
11
  from langchain.memory import ConversationBufferMemory
12
 
13
  SAVE_DIR = "uploaded_files"
@@ -17,30 +13,24 @@ os.makedirs(SAVE_DIR, exist_ok=True)
17
  def init_session_state():
18
  if "openai_api_key" not in st.session_state:
19
  st.session_state.openai_api_key = ""
20
-
21
  if "uploaded_files" not in st.session_state:
22
  st.session_state.uploaded_files = os.listdir(SAVE_DIR)
 
 
23
 
24
 
25
  init_session_state()
26
 
27
  st.set_page_config(page_title="RegBotBeta", page_icon="📜🤖")
28
-
29
  st.title("Welcome to RegBotBeta2.0")
30
- st.header("Powered by `LlamaIndex🦙`, `Langchain🦜🔗 ` and `OpenAI API`")
31
 
32
 
33
- def init_session_state():
34
- if "huggingface_token" not in st.session_state:
35
- st.session_state.huggingface_token = ""
36
-
37
-
38
- init_session_state()
39
-
40
  uploaded_files = st.file_uploader(
41
  "Upload Files",
42
  accept_multiple_files=True,
43
  type=["pdf", "docx", "txt", "csv"],
 
44
  )
45
 
46
  if uploaded_files:
@@ -48,14 +38,27 @@ if uploaded_files:
48
  if file not in st.session_state.uploaded_files:
49
  # add the file to session state
50
  st.session_state.uploaded_files.append(file.name)
51
-
52
  # save the file to the sample_data directory
53
  with open(os.path.join(SAVE_DIR, file.name), "wb") as f:
54
  f.write(file.getbuffer())
55
-
56
  st.success("File(s) uploaded successfully!")
57
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  if st.session_state.uploaded_files:
59
  st.write("Uploaded Files:")
60
- for i, filename in enumerate(st.session_state.uploaded_files, start=1):
61
- st.write(f"{i}. {filename}")
 
 
 
 
 
 
1
  import os
 
2
  import openai
3
  import requests
4
  import streamlit as st
5
 
6
  from utils.util import *
 
7
  from langchain.memory import ConversationBufferMemory
8
 
9
  SAVE_DIR = "uploaded_files"
 
13
  def init_session_state():
14
  if "openai_api_key" not in st.session_state:
15
  st.session_state.openai_api_key = ""
 
16
  if "uploaded_files" not in st.session_state:
17
  st.session_state.uploaded_files = os.listdir(SAVE_DIR)
18
+ if "huggingface_token" not in st.session_state:
19
+ st.session_state.huggingface_token = ""
20
 
21
 
22
  init_session_state()
23
 
24
  st.set_page_config(page_title="RegBotBeta", page_icon="📜🤖")
 
25
  st.title("Welcome to RegBotBeta2.0")
26
+ st.header("Powered by `LlamaIndex🦙`, `Langchain🦜🔗` and `OpenAI API`")
27
 
28
 
 
 
 
 
 
 
 
29
  uploaded_files = st.file_uploader(
30
  "Upload Files",
31
  accept_multiple_files=True,
32
  type=["pdf", "docx", "txt", "csv"],
33
+ label_visibility="hidden",
34
  )
35
 
36
  if uploaded_files:
 
38
  if file not in st.session_state.uploaded_files:
39
  # add the file to session state
40
  st.session_state.uploaded_files.append(file.name)
 
41
  # save the file to the sample_data directory
42
  with open(os.path.join(SAVE_DIR, file.name), "wb") as f:
43
  f.write(file.getbuffer())
 
44
  st.success("File(s) uploaded successfully!")
45
 
46
+
47
+ def delete_file(filename):
48
+ """Delete file from session state and local filesystem."""
49
+ if filename in st.session_state.uploaded_files and os.path.exists(
50
+ os.path.join(SAVE_DIR, filename)
51
+ ):
52
+ st.session_state.uploaded_files.remove(filename)
53
+ os.remove(os.path.join(SAVE_DIR, filename))
54
+ st.success(f"Deleted {filename}!")
55
+ st.rerun()
56
+
57
+
58
  if st.session_state.uploaded_files:
59
  st.write("Uploaded Files:")
60
+ for index, filename in enumerate(st.session_state.uploaded_files):
61
+ col1, col2 = st.columns([4, 1])
62
+ col1.write(filename)
63
+ if col2.button("Delete", key=f"delete_{index}"):
64
+ delete_file(filename)
models/llamaCustom.py CHANGED
@@ -54,7 +54,7 @@ Use the following example format for your answer:
54
  Answer:
55
  The answer to the user question.
56
  Reference:
57
- The list of references to the specific sections of the documents that support your answer.
58
  [END_FORMAT]
59
  """
60
 
@@ -184,9 +184,13 @@ class LlamaCustom:
184
 
185
  def get_response(self, query_str: str, chat_history: List[ChatMessage]):
186
  # https://docs.llamaindex.ai/en/stable/module_guides/deploying/chat_engines/
 
 
187
  query_engine = self.index.as_query_engine(
188
- text_qa_template=PromptTemplate(QUERY_ENGINE_QA_TEMPLATE),
189
- refine_template=PromptTemplate(QUERY_ENGINE_REFINE_TEMPLATE),
 
 
190
  verbose=self.verbose,
191
  )
192
  # chat_engine = self.index.as_chat_engine(
 
54
  Answer:
55
  The answer to the user question.
56
  Reference:
57
+ The list of references (such as page number, title, chapter, section) to the specific sections of the documents that support your answer.
58
  [END_FORMAT]
59
  """
60
 
 
184
 
185
  def get_response(self, query_str: str, chat_history: List[ChatMessage]):
186
  # https://docs.llamaindex.ai/en/stable/module_guides/deploying/chat_engines/
187
+ # https://docs.llamaindex.ai/en/stable/examples/query_engine/citation_query_engine/
188
+ # https://docs.llamaindex.ai/en/stable/examples/query_engine/knowledge_graph_rag_query_engine/
189
  query_engine = self.index.as_query_engine(
190
+ text_qa_template=PromptTemplate(QUERY_ENGINE_QA_TEMPLATE + ANSWER_FORMAT),
191
+ refine_template=PromptTemplate(
192
+ QUERY_ENGINE_REFINE_TEMPLATE
193
+ ), # passing ANSWER_FORMAT here will not give the desired output, need to use the output parser from llama index?
194
  verbose=self.verbose,
195
  )
196
  # chat_engine = self.index.as_chat_engine(
models/vector_database.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pinecone import Pinecone, ServerlessSpec
2
+ from llama_index.vector_stores.pinecone import PineconeVectorStore
3
+ from dotenv import load_dotenv
4
+
5
+ import os
6
+
7
+ load_dotenv()
8
+
9
+ # Pinecone Vector Database
10
+ pc = Pinecone(api_key=os.environ.get("PINECONE_API_KEY"))
11
+ pc_index_name = "llama-integration-pinecone"
12
+ # pc_index_name = "openai-embeddings"
13
+ pc_indexes = pc.list_indexes()
14
+
15
+ # Check if the index already exists
16
+ def index_exists(index_name):
17
+ for index in pc_indexes:
18
+ if index["name"] == index_name:
19
+ return True
20
+ return False
21
+
22
+ # Create the index if it doesn't exist
23
+ if not index_exists(pc_index_name):
24
+ pc.create_index(
25
+ name=pc_index_name,
26
+ dimension=1536,
27
+ spec=ServerlessSpec(cloud="aws", region="us-east-1"),
28
+ )
29
+
30
+ # Initialize your index
31
+ pinecone_index = pc.Index(pc_index_name)
32
+
33
+ # Define the vector store
34
+ pinecone_vector_store = PineconeVectorStore(pinecone_index=pinecone_index)
pages/llama_custom_demo.py CHANGED
@@ -7,6 +7,8 @@ from typing import List
7
  from models.llms import load_llm, integrated_llms
8
  from models.embeddings import hf_embed_model, openai_embed_model
9
  from models.llamaCustom import LlamaCustom
 
 
10
  from utils.chatbox import show_previous_messages, show_chat_input
11
  from utils.util import validate_openai_api_key
12
 
@@ -30,7 +32,8 @@ VECTOR_STORE_DIR = "vectorStores"
30
  HF_REPO_ID = "zhtet/RegBotBeta"
31
 
32
  # global
33
- Settings.embed_model = hf_embed_model
 
34
 
35
  # huggingface api
36
  hf_api = HfApi()
@@ -62,9 +65,10 @@ def init_session_state():
62
 
63
 
64
  # @st.cache_resource
65
- def index_docs(
66
  filename: str,
67
  ) -> VectorStoreIndex:
 
68
  try:
69
  index_path = pathlib.Path(f"{VECTOR_STORE_DIR}/{filename.replace('.', '_')}")
70
  if pathlib.Path.exists(index_path):
@@ -89,6 +93,23 @@ def index_docs(
89
  return index
90
 
91
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  def check_api_key(model_name: str, source: str):
93
  if source.startswith("openai"):
94
  if not st.session_state.openai_api_key:
@@ -164,6 +185,13 @@ with tab1:
164
  label="Choose a file to chat with: ", options=os.listdir(SAVE_DIR)
165
  )
166
 
 
 
 
 
 
 
 
167
  if st.button("Submit", key="submit", help="Submit the form"):
168
  with st.status("Loading ...", expanded=True) as status:
169
  try:
@@ -176,7 +204,8 @@ with tab1:
176
  Settings.llm = llama_llm
177
 
178
  st.write("Processing Data ...")
179
- index = index_docs(selected_file)
 
180
 
181
  st.write("Finishing Up ...")
182
  llama_custom = LlamaCustom(model_name=selected_llm_name, index=index)
 
7
  from models.llms import load_llm, integrated_llms
8
  from models.embeddings import hf_embed_model, openai_embed_model
9
  from models.llamaCustom import LlamaCustom
10
+
11
+ # from models.vector_database import pinecone_vector_store
12
  from utils.chatbox import show_previous_messages, show_chat_input
13
  from utils.util import validate_openai_api_key
14
 
 
32
  HF_REPO_ID = "zhtet/RegBotBeta"
33
 
34
  # global
35
+ # Settings.embed_model = hf_embed_model
36
+ Settings.embed_model = openai_embed_model
37
 
38
  # huggingface api
39
  hf_api = HfApi()
 
65
 
66
 
67
  # @st.cache_resource
68
+ def get_index(
69
  filename: str,
70
  ) -> VectorStoreIndex:
71
+ """This function loads the index from storage if it exists, otherwise it creates a new index from the document."""
72
  try:
73
  index_path = pathlib.Path(f"{VECTOR_STORE_DIR}/{filename.replace('.', '_')}")
74
  if pathlib.Path.exists(index_path):
 
93
  return index
94
 
95
 
96
+ # def get_pinecone_index(filename: str) -> VectorStoreIndex:
97
+ # """Thie function loads the index from Pinecone if it exists, otherwise it creates a new index from the document."""
98
+ # reader = SimpleDirectoryReader(input_files=[f"{SAVE_DIR}/{filename}"])
99
+ # docs = reader.load_data(show_progress=True)
100
+ # storage_context = StorageContext.from_defaults(vector_store=pinecone_vector_store)
101
+ # index = VectorStoreIndex.from_documents(
102
+ # documents=docs, show_progress=True, storage_context=storage_context
103
+ # )
104
+
105
+ # return index
106
+
107
+
108
+ def get_chroma_index(filename: str) -> VectorStoreIndex:
109
+ """This function loads the index from Chroma if it exists, otherwise it creates a new index from the document."""
110
+ pass
111
+
112
+
113
  def check_api_key(model_name: str, source: str):
114
  if source.startswith("openai"):
115
  if not st.session_state.openai_api_key:
 
185
  label="Choose a file to chat with: ", options=os.listdir(SAVE_DIR)
186
  )
187
 
188
+ if st.button("Clear all api keys"):
189
+ st.session_state.openai_api_key = ""
190
+ st.session_state.replicate_api_token = ""
191
+ st.session_state.hf_token = ""
192
+ st.success("All API keys cleared!")
193
+ st.rerun()
194
+
195
  if st.button("Submit", key="submit", help="Submit the form"):
196
  with st.status("Loading ...", expanded=True) as status:
197
  try:
 
204
  Settings.llm = llama_llm
205
 
206
  st.write("Processing Data ...")
207
+ index = get_index(selected_file)
208
+ # index = get_pinecone_index(selected_file)
209
 
210
  st.write("Finishing Up ...")
211
  llama_custom = LlamaCustom(model_name=selected_llm_name, index=index)
requirements.txt CHANGED
@@ -7,11 +7,13 @@ langchain_pinecone
7
  openai
8
  faiss-cpu
9
  python-dotenv
10
- streamlit==1.29.0
11
  huggingface_hub<0.21.0
12
  pypdf
13
  llama-index-llms-huggingface>=0.1.4
14
  llama-index-embeddings-langchain>=0.1.2
 
 
15
  replicate>=0.25.1
16
  llama-index-llms-replicate
17
  sentence-transformers>=2.6.1
 
7
  openai
8
  faiss-cpu
9
  python-dotenv
10
+ streamlit>=1.24.0
11
  huggingface_hub<0.21.0
12
  pypdf
13
  llama-index-llms-huggingface>=0.1.4
14
  llama-index-embeddings-langchain>=0.1.2
15
+ llama-index-vector-stores-pinecone
16
+ pinecone-client>=3.0.0
17
  replicate>=0.25.1
18
  llama-index-llms-replicate
19
  sentence-transformers>=2.6.1