Corrected major bug
Browse files- app.py +149 -64
- climateqa/chains.py +42 -5
app.py
CHANGED
@@ -20,7 +20,8 @@ from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
|
20 |
|
21 |
# ClimateQ&A imports
|
22 |
from climateqa.llm import get_llm
|
23 |
-
from climateqa.chains import
|
|
|
24 |
from climateqa.vectorstore import get_pinecone_vectorstore
|
25 |
from climateqa.retriever import ClimateQARetriever
|
26 |
from climateqa.prompts import audience_prompts
|
@@ -142,36 +143,49 @@ vectorstore = get_pinecone_vectorstore(embeddings_function)
|
|
142 |
|
143 |
from threading import Thread
|
144 |
|
|
|
145 |
|
146 |
-
def answer_user(
|
147 |
-
return
|
148 |
|
149 |
-
def
|
|
|
150 |
|
|
|
151 |
|
152 |
-
|
|
|
|
|
153 |
|
154 |
llm_reformulation = get_llm(max_tokens = 512,temperature = 0.0,verbose = True,streaming = False)
|
155 |
-
llm_streaming = get_llm(max_tokens = 1024,temperature = 0.0,verbose = True,streaming = True,
|
156 |
-
callbacks=[StreamingGradioCallbackHandler(Q),StreamingStdOutCallbackHandler()],
|
157 |
-
)
|
158 |
-
|
159 |
retriever = ClimateQARetriever(vectorstore=vectorstore,sources = sources,k_summary = 3,k_total = 10)
|
160 |
-
|
161 |
|
|
|
|
|
|
|
|
|
162 |
|
163 |
-
|
164 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
165 |
|
166 |
-
# if len(message) <= 2:
|
167 |
-
# complete_response = "**⚠️ No relevant passages found in the climate science reports (IPCC and IPBES), you may want to ask a more specific question (specifying your question on climate and biodiversity issues).**"
|
168 |
-
# history[-1][1] += "\n\n" + complete_response
|
169 |
-
# return "", history, ""
|
170 |
|
171 |
-
|
172 |
-
response = chain({"query":query,"audience":audience})
|
173 |
-
Q.put(response)
|
174 |
-
Q.put(job_done)
|
175 |
|
176 |
if audience == "Children":
|
177 |
audience_prompt = audience_prompts["children"]
|
@@ -182,6 +196,57 @@ def answer_bot(message,history,audience,sources):
|
|
182 |
else:
|
183 |
audience_prompt = audience_prompts["experts"]
|
184 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
185 |
# history_langchain_format = []
|
186 |
# for human, ai in history:
|
187 |
# history_langchain_format.append(HumanMessage(content=human))
|
@@ -190,41 +255,42 @@ def answer_bot(message,history,audience,sources):
|
|
190 |
# for next_token, content in stream(message):
|
191 |
# yield(content)
|
192 |
|
193 |
-
thread = Thread(target=threaded_chain, kwargs={"query":message,"audience":audience_prompt})
|
194 |
-
thread.start()
|
195 |
-
|
196 |
-
history[-1][1] = ""
|
197 |
-
while True:
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
|
|
228 |
|
229 |
#---------------------------------------------------------------------------
|
230 |
# ClimateQ&A core functions
|
@@ -375,6 +441,8 @@ def log_on_azure(file, logs, share_client):
|
|
375 |
file_client.upload_file(str(logs))
|
376 |
|
377 |
|
|
|
|
|
378 |
|
379 |
|
380 |
|
@@ -419,7 +487,9 @@ with gr.Blocks(title="🌍 Climate Q&A", css="style.css", theme=theme) as demo:
|
|
419 |
show_copy_button=True,show_label = False,elem_id="chatbot",layout = "panel",avatar_images = ("assets/logo4.png",None))
|
420 |
|
421 |
# bot.like(vote,None,None)
|
422 |
-
|
|
|
|
|
423 |
with gr.Row(elem_id = "input-message"):
|
424 |
textbox=gr.Textbox(placeholder="Ask me anything here!",show_label=False,scale=1,lines = 1,interactive = True)
|
425 |
# submit_button = gr.Button(">",scale = 1,elem_id = "submit-button")
|
@@ -472,12 +542,14 @@ with gr.Blocks(title="🌍 Climate Q&A", css="style.css", theme=theme) as demo:
|
|
472 |
)
|
473 |
|
474 |
with gr.Tab("📚 Citations",elem_id = "tab-citations"):
|
475 |
-
sources_textbox = gr.
|
|
|
476 |
|
477 |
with gr.Tab("⚙️ Configuration",elem_id = "tab-config"):
|
478 |
|
479 |
gr.Markdown("Reminder: You can talk in any language, ClimateQ&A is multi-lingual!")
|
480 |
|
|
|
481 |
dropdown_sources = gr.CheckboxGroup(
|
482 |
["IPCC", "IPBES"],
|
483 |
label="Select reports",
|
@@ -492,14 +564,27 @@ with gr.Blocks(title="🌍 Climate Q&A", css="style.css", theme=theme) as demo:
|
|
492 |
interactive=True,
|
493 |
)
|
494 |
|
|
|
|
|
|
|
|
|
495 |
|
496 |
# textbox.submit(predict_climateqa,[textbox,bot],[None,bot,sources_textbox])
|
497 |
-
|
498 |
-
|
499 |
-
)
|
500 |
-
|
501 |
-
|
502 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
503 |
# submit_button.click(answer_user, [textbox, bot], [textbox, bot], queue=True).then(
|
504 |
# answer_bot, [textbox,bot,dropdown_audience,dropdown_sources], [textbox,bot,sources_textbox]
|
505 |
# )
|
@@ -688,6 +773,6 @@ Or around 2 to 4 times more than a typical Google search.
|
|
688 |
"""
|
689 |
)
|
690 |
|
691 |
-
demo.queue(concurrency_count=
|
692 |
|
693 |
demo.launch()
|
|
|
20 |
|
21 |
# ClimateQ&A imports
|
22 |
from climateqa.llm import get_llm
|
23 |
+
from climateqa.chains import load_qa_chain_with_docs,load_qa_chain_with_text
|
24 |
+
from climateqa.chains import load_reformulation_chain
|
25 |
from climateqa.vectorstore import get_pinecone_vectorstore
|
26 |
from climateqa.retriever import ClimateQARetriever
|
27 |
from climateqa.prompts import audience_prompts
|
|
|
143 |
|
144 |
from threading import Thread
|
145 |
|
146 |
+
import json
|
147 |
|
148 |
+
def answer_user(query,query_example,history):
|
149 |
+
return query, history + [[query, ". . ."]]
|
150 |
|
151 |
+
def answer_user_example(query,query_example,history):
|
152 |
+
return query_example, history + [[query_example, ". . ."]]
|
153 |
|
154 |
+
def fetch_sources(query,sources):
|
155 |
|
156 |
+
# Prepare default values
|
157 |
+
if len(sources) == 0:
|
158 |
+
sources = ["IPCC"]
|
159 |
|
160 |
llm_reformulation = get_llm(max_tokens = 512,temperature = 0.0,verbose = True,streaming = False)
|
|
|
|
|
|
|
|
|
161 |
retriever = ClimateQARetriever(vectorstore=vectorstore,sources = sources,k_summary = 3,k_total = 10)
|
162 |
+
reformulation_chain = load_reformulation_chain(llm_reformulation)
|
163 |
|
164 |
+
# Calculate language
|
165 |
+
output_reformulation = reformulation_chain({"query":query})
|
166 |
+
question = output_reformulation["question"]
|
167 |
+
language = output_reformulation["language"]
|
168 |
|
169 |
+
# Retrieve docs
|
170 |
+
docs = retriever.get_relevant_documents(question)
|
171 |
+
|
172 |
+
if len(docs) > 0:
|
173 |
+
|
174 |
+
# Already display the sources
|
175 |
+
sources_text = []
|
176 |
+
for i, d in enumerate(docs, 1):
|
177 |
+
sources_text.append(make_html_source(d, i))
|
178 |
+
citations_text = "".join(sources_text)
|
179 |
+
docs_text = "\n\n".join([d.page_content for d in docs])
|
180 |
+
return "",citations_text,docs_text,question,language
|
181 |
+
else:
|
182 |
+
sources_text = "⚠️ No relevant passages found in the scientific reports (IPCC and IPBES)"
|
183 |
+
citations_text = "**⚠️ No relevant passages found in the climate science reports (IPCC and IPBES), you may want to ask a more specific question (specifying your question on climate and biodiversity issues).**"
|
184 |
+
docs_text = ""
|
185 |
+
return "",citations_text,docs_text,question,language
|
186 |
|
|
|
|
|
|
|
|
|
187 |
|
188 |
+
def answer_bot(query,history,docs,question,language,audience):
|
|
|
|
|
|
|
189 |
|
190 |
if audience == "Children":
|
191 |
audience_prompt = audience_prompts["children"]
|
|
|
196 |
else:
|
197 |
audience_prompt = audience_prompts["experts"]
|
198 |
|
199 |
+
# Prepare Queue for streaming LLMs
|
200 |
+
Q = SimpleQueue()
|
201 |
+
|
202 |
+
llm_streaming = get_llm(max_tokens = 1024,temperature = 0.0,verbose = True,streaming = True,
|
203 |
+
callbacks=[StreamingGradioCallbackHandler(Q),StreamingStdOutCallbackHandler()],
|
204 |
+
)
|
205 |
+
|
206 |
+
qa_chain = load_qa_chain_with_text(llm_streaming)
|
207 |
+
|
208 |
+
def threaded_chain(question,audience,language,docs):
|
209 |
+
try:
|
210 |
+
response = qa_chain({"question":question,"audience":audience,"language":language,"summaries":docs})
|
211 |
+
Q.put(response)
|
212 |
+
Q.put(job_done)
|
213 |
+
except Exception as e:
|
214 |
+
print(e)
|
215 |
+
|
216 |
+
history[-1][1] = ""
|
217 |
+
|
218 |
+
textbox=gr.Textbox(placeholder=". . .",show_label=False,scale=1,lines = 1,interactive = False)
|
219 |
+
|
220 |
+
|
221 |
+
if len(docs) > 0:
|
222 |
+
|
223 |
+
# Start thread for streaming
|
224 |
+
thread = Thread(
|
225 |
+
target=threaded_chain,
|
226 |
+
kwargs={"question":question,"audience":audience_prompt,"language":language,"docs":docs}
|
227 |
+
)
|
228 |
+
thread.start()
|
229 |
+
|
230 |
+
while True:
|
231 |
+
next_item = Q.get(block=True) # Blocks until an input is available
|
232 |
+
|
233 |
+
if next_item is job_done:
|
234 |
+
break
|
235 |
+
elif isinstance(next_item, str):
|
236 |
+
new_paragraph = history[-1][1] + next_item
|
237 |
+
new_paragraph = parse_output_llm_with_sources(new_paragraph)
|
238 |
+
history[-1][1] = new_paragraph
|
239 |
+
yield textbox,history
|
240 |
+
else:
|
241 |
+
pass
|
242 |
+
thread.join()
|
243 |
+
else:
|
244 |
+
complete_response = "**⚠️ No relevant passages found in the climate science reports (IPCC and IPBES), you may want to ask a more specific question (specifying your question on climate and biodiversity issues).**"
|
245 |
+
history[-1][1] += complete_response
|
246 |
+
yield "",history
|
247 |
+
|
248 |
+
|
249 |
+
|
250 |
# history_langchain_format = []
|
251 |
# for human, ai in history:
|
252 |
# history_langchain_format.append(HumanMessage(content=human))
|
|
|
255 |
# for next_token, content in stream(message):
|
256 |
# yield(content)
|
257 |
|
258 |
+
# thread = Thread(target=threaded_chain, kwargs={"query":message,"audience":audience_prompt})
|
259 |
+
# thread.start()
|
260 |
+
|
261 |
+
# history[-1][1] = ""
|
262 |
+
# while True:
|
263 |
+
# next_item = Q.get(block=True) # Blocks until an input is available
|
264 |
+
|
265 |
+
# print(type(next_item))
|
266 |
+
# if next_item is job_done:
|
267 |
+
# continue
|
268 |
+
|
269 |
+
# elif isinstance(next_item, dict): # assuming LLMResult is a dictionary
|
270 |
+
# response = next_item
|
271 |
+
# if "source_documents" in response and len(response["source_documents"]) > 0:
|
272 |
+
# sources_text = []
|
273 |
+
# for i, d in enumerate(response["source_documents"], 1):
|
274 |
+
# sources_text.append(make_html_source(d, i))
|
275 |
+
# sources_text = "\n\n".join([f"Query used for retrieval:\n{response['question']}"] + sources_text)
|
276 |
+
# # history[-1][1] += next_item["answer"]
|
277 |
+
# # history[-1][1] += "\n\n" + sources_text
|
278 |
+
# yield "", history, sources_text
|
279 |
+
|
280 |
+
# else:
|
281 |
+
# sources_text = "⚠️ No relevant passages found in the scientific reports (IPCC and IPBES)"
|
282 |
+
# complete_response = "**⚠️ No relevant passages found in the climate science reports (IPCC and IPBES), you may want to ask a more specific question (specifying your question on climate and biodiversity issues).**"
|
283 |
+
# history[-1][1] += "\n\n" + complete_response
|
284 |
+
# yield "", history, sources_text
|
285 |
+
# break
|
286 |
+
|
287 |
+
# elif isinstance(next_item, str):
|
288 |
+
# new_paragraph = history[-1][1] + next_item
|
289 |
+
# new_paragraph = parse_output_llm_with_sources(new_paragraph)
|
290 |
+
# history[-1][1] = new_paragraph
|
291 |
+
# yield "", history, ""
|
292 |
+
|
293 |
+
# thread.join()
|
294 |
|
295 |
#---------------------------------------------------------------------------
|
296 |
# ClimateQ&A core functions
|
|
|
441 |
file_client.upload_file(str(logs))
|
442 |
|
443 |
|
444 |
+
def disable_component():
|
445 |
+
return gr.update(interactive = False)
|
446 |
|
447 |
|
448 |
|
|
|
487 |
show_copy_button=True,show_label = False,elem_id="chatbot",layout = "panel",avatar_images = ("assets/logo4.png",None))
|
488 |
|
489 |
# bot.like(vote,None,None)
|
490 |
+
|
491 |
+
|
492 |
+
|
493 |
with gr.Row(elem_id = "input-message"):
|
494 |
textbox=gr.Textbox(placeholder="Ask me anything here!",show_label=False,scale=1,lines = 1,interactive = True)
|
495 |
# submit_button = gr.Button(">",scale = 1,elem_id = "submit-button")
|
|
|
542 |
)
|
543 |
|
544 |
with gr.Tab("📚 Citations",elem_id = "tab-citations"):
|
545 |
+
sources_textbox = gr.HTML(show_label=False, elem_id="sources-textbox")
|
546 |
+
docs_textbox = gr.State("")
|
547 |
|
548 |
with gr.Tab("⚙️ Configuration",elem_id = "tab-config"):
|
549 |
|
550 |
gr.Markdown("Reminder: You can talk in any language, ClimateQ&A is multi-lingual!")
|
551 |
|
552 |
+
|
553 |
dropdown_sources = gr.CheckboxGroup(
|
554 |
["IPCC", "IPBES"],
|
555 |
label="Select reports",
|
|
|
564 |
interactive=True,
|
565 |
)
|
566 |
|
567 |
+
output_query = gr.Textbox(label="Query used for retrieval",show_label = True,elem_id = "reformulated-query",lines = 2,interactive = False)
|
568 |
+
output_language = gr.Textbox(label="Language",show_label = True,elem_id = "language",lines = 1,interactive = False)
|
569 |
+
|
570 |
+
|
571 |
|
572 |
# textbox.submit(predict_climateqa,[textbox,bot],[None,bot,sources_textbox])
|
573 |
+
(textbox
|
574 |
+
.submit(answer_user, [textbox,examples_hidden, bot], [textbox, bot],queue = False)
|
575 |
+
.then(disable_component, [examples_questions], [examples_questions],queue = False)
|
576 |
+
.success(fetch_sources,[textbox,dropdown_sources], [textbox,sources_textbox,docs_textbox,output_query,output_language])
|
577 |
+
.success(answer_bot, [textbox,bot,docs_textbox,output_query,output_language,dropdown_audience], [textbox,bot],queue = True)
|
578 |
+
.success(lambda x : textbox,[textbox],[textbox])
|
579 |
+
)
|
580 |
+
|
581 |
+
(examples_hidden
|
582 |
+
.change(answer_user_example, [textbox,examples_hidden, bot], [textbox, bot],queue = False)
|
583 |
+
.then(disable_component, [examples_questions], [examples_questions],queue = False)
|
584 |
+
.success(fetch_sources,[textbox,dropdown_sources], [textbox,sources_textbox,docs_textbox,output_query,output_language])
|
585 |
+
.success(answer_bot, [textbox,bot,docs_textbox,output_query,output_language,dropdown_audience], [textbox,bot],queue=True)
|
586 |
+
.success(lambda x : textbox,[textbox],[textbox])
|
587 |
+
)
|
588 |
# submit_button.click(answer_user, [textbox, bot], [textbox, bot], queue=True).then(
|
589 |
# answer_bot, [textbox,bot,dropdown_audience,dropdown_sources], [textbox,bot,sources_textbox]
|
590 |
# )
|
|
|
773 |
"""
|
774 |
)
|
775 |
|
776 |
+
demo.queue(concurrency_count=16)
|
777 |
|
778 |
demo.launch()
|
climateqa/chains.py
CHANGED
@@ -3,7 +3,7 @@
|
|
3 |
import json
|
4 |
|
5 |
from langchain import PromptTemplate, LLMChain
|
6 |
-
from langchain.chains import RetrievalQAWithSourcesChain
|
7 |
from langchain.chains import TransformChain, SequentialChain
|
8 |
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
|
9 |
|
@@ -37,11 +37,48 @@ def load_reformulation_chain(llm):
|
|
37 |
return reformulation_chain
|
38 |
|
39 |
|
40 |
-
|
41 |
-
|
42 |
-
def load_answer_chain(retriever,llm):
|
43 |
prompt = PromptTemplate(template=answer_prompt, input_variables=["summaries", "question","audience","language"])
|
44 |
qa_chain = load_qa_with_sources_chain(llm, chain_type="stuff",prompt = prompt)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
|
46 |
# This could be improved by providing a document prompt to avoid modifying page_content in the docs
|
47 |
# See here https://github.com/langchain-ai/langchain/issues/3523
|
@@ -59,7 +96,7 @@ def load_answer_chain(retriever,llm):
|
|
59 |
def load_climateqa_chain(retriever,llm_reformulation,llm_answer):
|
60 |
|
61 |
reformulation_chain = load_reformulation_chain(llm_reformulation)
|
62 |
-
answer_chain =
|
63 |
|
64 |
climateqa_chain = SequentialChain(
|
65 |
chains = [reformulation_chain,answer_chain],
|
|
|
3 |
import json
|
4 |
|
5 |
from langchain import PromptTemplate, LLMChain
|
6 |
+
from langchain.chains import RetrievalQAWithSourcesChain,QAWithSourcesChain
|
7 |
from langchain.chains import TransformChain, SequentialChain
|
8 |
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
|
9 |
|
|
|
37 |
return reformulation_chain
|
38 |
|
39 |
|
40 |
+
def load_combine_documents_chain(llm):
|
|
|
|
|
41 |
prompt = PromptTemplate(template=answer_prompt, input_variables=["summaries", "question","audience","language"])
|
42 |
qa_chain = load_qa_with_sources_chain(llm, chain_type="stuff",prompt = prompt)
|
43 |
+
return qa_chain
|
44 |
+
|
45 |
+
def load_qa_chain_with_docs(llm):
|
46 |
+
"""Load a QA chain with documents.
|
47 |
+
Useful when you already have retrieved docs
|
48 |
+
|
49 |
+
To be called with this input
|
50 |
+
|
51 |
+
```
|
52 |
+
output = chain({
|
53 |
+
"question":query,
|
54 |
+
"audience":"experts climate scientists",
|
55 |
+
"docs":docs,
|
56 |
+
"language":"English",
|
57 |
+
})
|
58 |
+
```
|
59 |
+
"""
|
60 |
+
|
61 |
+
qa_chain = load_combine_documents_chain(llm)
|
62 |
+
chain = QAWithSourcesChain(
|
63 |
+
input_docs_key = "docs",
|
64 |
+
combine_documents_chain = qa_chain,
|
65 |
+
return_source_documents = True,
|
66 |
+
)
|
67 |
+
return chain
|
68 |
+
|
69 |
+
|
70 |
+
def load_qa_chain_with_text(llm):
|
71 |
+
|
72 |
+
prompt = PromptTemplate(
|
73 |
+
template = answer_prompt,
|
74 |
+
input_variables=["question","audience","language","summaries"],
|
75 |
+
)
|
76 |
+
qa_chain = LLMChain(llm = llm,prompt = prompt)
|
77 |
+
return qa_chain
|
78 |
+
|
79 |
+
|
80 |
+
def load_qa_chain_with_retriever(retriever,llm):
|
81 |
+
qa_chain = load_combine_documents_chain(llm)
|
82 |
|
83 |
# This could be improved by providing a document prompt to avoid modifying page_content in the docs
|
84 |
# See here https://github.com/langchain-ai/langchain/issues/3523
|
|
|
96 |
def load_climateqa_chain(retriever,llm_reformulation,llm_answer):
|
97 |
|
98 |
reformulation_chain = load_reformulation_chain(llm_reformulation)
|
99 |
+
answer_chain = load_qa_chain_with_retriever(retriever,llm_answer)
|
100 |
|
101 |
climateqa_chain = SequentialChain(
|
102 |
chains = [reformulation_chain,answer_chain],
|