datawithsuman
commited on
Commit
•
a09734b
1
Parent(s):
682c36d
Update app.py
Browse files
app.py
CHANGED
@@ -14,6 +14,7 @@ from llama_index.retrievers.bm25 import BM25Retriever
|
|
14 |
from llama_index.core.retrievers import BaseRetriever
|
15 |
from llama_index.core.node_parser import SentenceSplitter
|
16 |
from llama_index.embeddings.openai import OpenAIEmbedding
|
|
|
17 |
from llmlingua import PromptCompressor
|
18 |
from rouge_score import rouge_scorer
|
19 |
from semantic_text_similarity.models import WebBertSimilarity
|
@@ -27,6 +28,13 @@ nest_asyncio.apply()
|
|
27 |
# openai.api_key = key
|
28 |
# os.environ["OPENAI_API_KEY"] = key
|
29 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
# Streamlit UI
|
31 |
st.title("Prompt Optimization for a Policy Bot")
|
32 |
|
@@ -38,6 +46,7 @@ if uploaded_files:
|
|
38 |
f.write(uploaded_file.getbuffer())
|
39 |
reader = SimpleDirectoryReader(input_files=[f"./data/{uploaded_file.name}"])
|
40 |
documents = reader.load_data()
|
|
|
41 |
st.success("File uploaded...")
|
42 |
|
43 |
# # Indexing
|
@@ -103,7 +112,8 @@ if uploaded_files:
|
|
103 |
hybrid_retriever = HybridRetriever(vector_retriever, bm25_retriever)
|
104 |
|
105 |
# Generation
|
106 |
-
model = "gpt-3.5-turbo"
|
|
|
107 |
|
108 |
# def get_context(query):
|
109 |
# contexts = kg_retriever.retrieve(query)
|
@@ -145,6 +155,10 @@ if uploaded_files:
|
|
145 |
with st.chat_message(message["role"]):
|
146 |
st.markdown(message["content"])
|
147 |
|
|
|
|
|
|
|
|
|
148 |
# Accept user input
|
149 |
if prompt := st.chat_input("Enter your query:"):
|
150 |
st.success("Fetching info...")
|
@@ -158,6 +172,11 @@ if uploaded_files:
|
|
158 |
context_list = get_context(prompt)
|
159 |
context = " ".join(context_list)
|
160 |
|
|
|
|
|
|
|
|
|
|
|
161 |
|
162 |
# Original prompt response
|
163 |
full_prompt = "\n\n".join([context + prompt])
|
@@ -168,66 +187,66 @@ if uploaded_files:
|
|
168 |
with st.chat_message("assistant"):
|
169 |
st.markdown(orig_res[3])
|
170 |
|
171 |
-
# Compressed Response
|
172 |
-
st.session_state.messages.append({"role": "assistant", "content": "Generating Optimized prompt response..."})
|
173 |
-
st.success("Generating Optimized prompt response...")
|
174 |
-
|
175 |
-
llm_lingua = PromptCompressor(
|
176 |
-
model_name="microsoft/llmlingua-2-xlm-roberta-large-meetingbank",
|
177 |
-
use_llmlingua2=True, device_map="cpu"
|
178 |
-
)
|
179 |
-
|
180 |
-
def prompt_compression(context, rate=0.5):
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
compressed_context = prompt_compression(context)
|
189 |
-
full_opt_prompt = "\n\n".join([compressed_context['compressed_prompt'] + prompt])
|
190 |
-
compressed_res = res(full_opt_prompt)
|
191 |
-
st.session_state.messages.append({"role": "assistant", "content": compressed_res[3]})
|
192 |
-
with st.chat_message("assistant"):
|
193 |
-
|
194 |
-
|
195 |
-
# Save token summary and evaluation details to session state
|
196 |
-
scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)
|
197 |
-
scores = scorer.score(compressed_res[3],orig_res[3])
|
198 |
-
webert_model = WebBertSimilarity(device='cpu')
|
199 |
-
similarity_score = webert_model.predict([(compressed_res[3], orig_res[3])])[0] / 5 * 100
|
200 |
|
201 |
|
202 |
-
# Display token summary
|
203 |
-
st.session_state.messages.append({"role": "assistant", "content": "Token Length Summary..."})
|
204 |
-
st.success('Token Length Summary...')
|
205 |
-
st.session_state.messages.append({"role": "assistant", "content": f"Original Prompt has {orig_res[0]} tokens"})
|
206 |
-
st.write(f"Original Prompt has {orig_res[0]} tokens")
|
207 |
-
st.session_state.messages.append({"role": "assistant", "content": f"Optimized Prompt has {compressed_res[0]} tokens"})
|
208 |
-
st.write(f"Optimized Prompt has {compressed_res[0]} tokens")
|
209 |
-
|
210 |
-
st.session_state.messages.append({"role": "assistant", "content": "Comparing Original and Optimized Prompt Response..."})
|
211 |
-
st.success("Comparing Original and Optimized Prompt Response...")
|
212 |
-
st.session_state.messages.append({"role": "assistant", "content": f"Rouge Score : {scores['rougeL'].fmeasure * 100}"})
|
213 |
-
st.write(f"Rouge Score : {scores['rougeL'].fmeasure * 100}")
|
214 |
-
st.session_state.messages.append({"role": "assistant", "content": f"Semantic Text Similarity Score : {similarity_score}"})
|
215 |
-
st.write(f"Semantic Text Similarity Score : {similarity_score}")
|
216 |
-
|
217 |
-
st.write(" ")
|
218 |
-
# origin_tokens = compressed_context['origin_tokens']
|
219 |
-
# compressed_tokens = compressed_context['compressed_tokens']
|
220 |
-
origin_tokens = orig_res[0]
|
221 |
-
compressed_tokens = compressed_res[0]
|
222 |
-
gpt_saving = (origin_tokens - compressed_tokens) * 0.06 / 1000
|
223 |
-
claude_saving = (origin_tokens - compressed_tokens) * 0.015 / 1000
|
224 |
-
mistral_saving = (origin_tokens - compressed_tokens) * 0.004 / 1000
|
225 |
-
# st.session_state.messages.append({"role": "assistant", "content": f"""The optimized prompt has saved ${gpt_saving:.4f} in GPT4, ${mistral_saving:.4f} in Mistral"""})
|
226 |
-
# st.success(f"""The optimized prompt has saved ${gpt_saving:.4f} in GPT4, ${mistral_saving:.4f} in Mistral""")
|
227 |
-
st.session_state.messages.append({"role": "assistant", "content": f"The optimized prompt has ${gpt_saving:.4f} saved in GPT-4."})
|
228 |
-
st.success(f"The optimized prompt has ${gpt_saving:.4f} saved in GPT-4.")
|
229 |
-
|
230 |
-
st.success("Downloading Optimized Prompt...")
|
231 |
-
st.download_button(label = "Download Optimized Prompt",
|
232 |
-
|
233 |
|
|
|
14 |
from llama_index.core.retrievers import BaseRetriever
|
15 |
from llama_index.core.node_parser import SentenceSplitter
|
16 |
from llama_index.embeddings.openai import OpenAIEmbedding
|
17 |
+
from llama_index.llms.mistralai import MistralAI
|
18 |
from llmlingua import PromptCompressor
|
19 |
from rouge_score import rouge_scorer
|
20 |
from semantic_text_similarity.models import WebBertSimilarity
|
|
|
28 |
# openai.api_key = key
|
29 |
# os.environ["OPENAI_API_KEY"] = key
|
30 |
|
31 |
+
# key = os.getenv('MISTRAL_API_KEY')
|
32 |
+
# os.environ["MISTRAL_API_KEY"] = key
|
33 |
+
|
34 |
+
# Anthropic credentials
|
35 |
+
key = os.getenv('CLAUDE_API_KEY')
|
36 |
+
os.environ["ANTHROPIC_API_KEY"] = key
|
37 |
+
|
38 |
# Streamlit UI
|
39 |
st.title("Prompt Optimization for a Policy Bot")
|
40 |
|
|
|
46 |
f.write(uploaded_file.getbuffer())
|
47 |
reader = SimpleDirectoryReader(input_files=[f"./data/{uploaded_file.name}"])
|
48 |
documents = reader.load_data()
|
49 |
+
st.write(documents)
|
50 |
st.success("File uploaded...")
|
51 |
|
52 |
# # Indexing
|
|
|
112 |
hybrid_retriever = HybridRetriever(vector_retriever, bm25_retriever)
|
113 |
|
114 |
# Generation
|
115 |
+
# model = "gpt-3.5-turbo"
|
116 |
+
model = "claude-3-opus-20240229"
|
117 |
|
118 |
# def get_context(query):
|
119 |
# contexts = kg_retriever.retrieve(query)
|
|
|
155 |
with st.chat_message(message["role"]):
|
156 |
st.markdown(message["content"])
|
157 |
|
158 |
+
# Summarize
|
159 |
+
full_prompt = "\n\n".join([context + prompt])
|
160 |
+
orig_res = res(full_prompt)
|
161 |
+
|
162 |
# Accept user input
|
163 |
if prompt := st.chat_input("Enter your query:"):
|
164 |
st.success("Fetching info...")
|
|
|
172 |
context_list = get_context(prompt)
|
173 |
context = " ".join(context_list)
|
174 |
|
175 |
+
# # Summarize
|
176 |
+
# full_prompt = "\n\n".join([context + prompt])
|
177 |
+
# orig_res = res(full_prompt)
|
178 |
+
|
179 |
+
|
180 |
|
181 |
# Original prompt response
|
182 |
full_prompt = "\n\n".join([context + prompt])
|
|
|
187 |
with st.chat_message("assistant"):
|
188 |
st.markdown(orig_res[3])
|
189 |
|
190 |
+
# # Compressed Response
|
191 |
+
# st.session_state.messages.append({"role": "assistant", "content": "Generating Optimized prompt response..."})
|
192 |
+
# st.success("Generating Optimized prompt response...")
|
193 |
+
|
194 |
+
# llm_lingua = PromptCompressor(
|
195 |
+
# model_name="microsoft/llmlingua-2-xlm-roberta-large-meetingbank",
|
196 |
+
# use_llmlingua2=True, device_map="cpu"
|
197 |
+
# )
|
198 |
+
|
199 |
+
# def prompt_compression(context, rate=0.5):
|
200 |
+
# compressed_context = llm_lingua.compress_prompt(
|
201 |
+
# context,
|
202 |
+
# rate=rate,
|
203 |
+
# force_tokens=["!", ".", "?", "\n"],
|
204 |
+
# drop_consecutive=True,
|
205 |
+
# )
|
206 |
+
# return compressed_context
|
207 |
+
# compressed_context = prompt_compression(context)
|
208 |
+
# full_opt_prompt = "\n\n".join([compressed_context['compressed_prompt'] + prompt])
|
209 |
+
# compressed_res = res(full_opt_prompt)
|
210 |
+
# st.session_state.messages.append({"role": "assistant", "content": compressed_res[3]})
|
211 |
+
# with st.chat_message("assistant"):
|
212 |
+
# st.markdown(compressed_res[3])
|
213 |
+
|
214 |
+
# # Save token summary and evaluation details to session state
|
215 |
+
# scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)
|
216 |
+
# scores = scorer.score(compressed_res[3],orig_res[3])
|
217 |
+
# webert_model = WebBertSimilarity(device='cpu')
|
218 |
+
# similarity_score = webert_model.predict([(compressed_res[3], orig_res[3])])[0] / 5 * 100
|
219 |
|
220 |
|
221 |
+
# # Display token summary
|
222 |
+
# st.session_state.messages.append({"role": "assistant", "content": "Token Length Summary..."})
|
223 |
+
# st.success('Token Length Summary...')
|
224 |
+
# st.session_state.messages.append({"role": "assistant", "content": f"Original Prompt has {orig_res[0]} tokens"})
|
225 |
+
# st.write(f"Original Prompt has {orig_res[0]} tokens")
|
226 |
+
# st.session_state.messages.append({"role": "assistant", "content": f"Optimized Prompt has {compressed_res[0]} tokens"})
|
227 |
+
# st.write(f"Optimized Prompt has {compressed_res[0]} tokens")
|
228 |
+
|
229 |
+
# st.session_state.messages.append({"role": "assistant", "content": "Comparing Original and Optimized Prompt Response..."})
|
230 |
+
# st.success("Comparing Original and Optimized Prompt Response...")
|
231 |
+
# st.session_state.messages.append({"role": "assistant", "content": f"Rouge Score : {scores['rougeL'].fmeasure * 100}"})
|
232 |
+
# st.write(f"Rouge Score : {scores['rougeL'].fmeasure * 100}")
|
233 |
+
# st.session_state.messages.append({"role": "assistant", "content": f"Semantic Text Similarity Score : {similarity_score}"})
|
234 |
+
# st.write(f"Semantic Text Similarity Score : {similarity_score}")
|
235 |
+
|
236 |
+
# st.write(" ")
|
237 |
+
# # origin_tokens = compressed_context['origin_tokens']
|
238 |
+
# # compressed_tokens = compressed_context['compressed_tokens']
|
239 |
+
# origin_tokens = orig_res[0]
|
240 |
+
# compressed_tokens = compressed_res[0]
|
241 |
+
# gpt_saving = (origin_tokens - compressed_tokens) * 0.06 / 1000
|
242 |
+
# claude_saving = (origin_tokens - compressed_tokens) * 0.015 / 1000
|
243 |
+
# mistral_saving = (origin_tokens - compressed_tokens) * 0.004 / 1000
|
244 |
+
# # st.session_state.messages.append({"role": "assistant", "content": f"""The optimized prompt has saved ${gpt_saving:.4f} in GPT4, ${mistral_saving:.4f} in Mistral"""})
|
245 |
+
# # st.success(f"""The optimized prompt has saved ${gpt_saving:.4f} in GPT4, ${mistral_saving:.4f} in Mistral""")
|
246 |
+
# st.session_state.messages.append({"role": "assistant", "content": f"The optimized prompt has ${gpt_saving:.4f} saved in GPT-4."})
|
247 |
+
# st.success(f"The optimized prompt has ${gpt_saving:.4f} saved in GPT-4.")
|
248 |
+
|
249 |
+
# st.success("Downloading Optimized Prompt...")
|
250 |
+
# st.download_button(label = "Download Optimized Prompt",
|
251 |
+
# data = full_opt_prompt, file_name='./data/optimized_prompt.txt')
|
252 |
|