Safety check on non sourced answers + theme
Browse files- .gitignore +4 -0
- app.py +80 -45
- requirements.txt +2 -1
- style.css +2 -7
.gitignore
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.env
|
2 |
+
__pycache__/app.cpython-38.pyc
|
3 |
+
__pycache__/app.cpython-39.pyc
|
4 |
+
__pycache__/utils.cpython-38.pyc
|
app.py
CHANGED
@@ -13,6 +13,21 @@ import numpy as np
|
|
13 |
from datetime import datetime
|
14 |
from azure.storage.fileshare import ShareServiceClient
|
15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
system_template = {"role": "system", "content": os.environ["content"]}
|
18 |
|
@@ -44,10 +59,14 @@ credential = {
|
|
44 |
"account_name": os.environ["account_name"],
|
45 |
}
|
46 |
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
|
|
|
|
|
|
|
|
51 |
user_id = create_user_id(10)
|
52 |
|
53 |
|
@@ -55,7 +74,7 @@ def chat(
|
|
55 |
user_id: str,
|
56 |
query: str,
|
57 |
history: list = [system_template],
|
58 |
-
report_type: str = "
|
59 |
threshold: float = 0.555,
|
60 |
) -> tuple:
|
61 |
"""retrieve relevant documents in the document store then query gpt-turbo
|
@@ -81,7 +100,7 @@ def chat(
|
|
81 |
|
82 |
messages = history + [{"role": "user", "content": query}]
|
83 |
sources = "\n\n".join(
|
84 |
-
f"doc {i}: {d.meta['file_name']} page {d.meta['page_number']}\n{d.content}"
|
85 |
for i, d in enumerate(docs, 1)
|
86 |
if d.score > threshold
|
87 |
)
|
@@ -91,43 +110,54 @@ def chat(
|
|
91 |
{"role": "system", "content": f"{os.environ['sources']}\n\n{sources}"}
|
92 |
)
|
93 |
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
|
102 |
-
if sources:
|
103 |
complete_response = ""
|
104 |
messages.pop()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
else:
|
106 |
-
sources = "No
|
107 |
-
complete_response = "**⚠️ No relevant passages found in the climate science reports,
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
"prompt": query,
|
115 |
-
"retrived": sources,
|
116 |
-
"report_type": report_type,
|
117 |
-
"prompt_eng": messages[0],
|
118 |
-
"answer": messages[-1]["content"],
|
119 |
-
"time": timestamp,
|
120 |
-
}
|
121 |
-
log_on_azure(file, logs, share_client)
|
122 |
-
|
123 |
-
for chunk in response:
|
124 |
-
if (
|
125 |
-
chunk_message := chunk["choices"][0].get("text")
|
126 |
-
) and chunk_message != "<|im_end|>":
|
127 |
-
complete_response += chunk_message
|
128 |
-
messages[-1]["content"] = complete_response
|
129 |
-
gradio_format = make_pairs([a["content"] for a in messages[1:]])
|
130 |
-
yield gradio_format, messages, sources
|
131 |
|
132 |
|
133 |
def save_feedback(feed: str, user_id):
|
@@ -152,7 +182,7 @@ def log_on_azure(file, logs, share_client):
|
|
152 |
file_client.upload_file(str(logs))
|
153 |
|
154 |
|
155 |
-
with gr.Blocks(title="🌍 Climate Q&A", css="style.css") as demo:
|
156 |
|
157 |
user_id_state = gr.State([user_id])
|
158 |
|
@@ -166,15 +196,20 @@ with gr.Blocks(title="🌍 Climate Q&A", css="style.css") as demo:
|
|
166 |
gr.Markdown(
|
167 |
"""
|
168 |
<p><b>Climate change and environmental disruptions have become some of the most pressing challenges facing our planet today</b>. As global temperatures rise and ecosystems suffer, it is essential for individuals to understand the gravity of the situation in order to make informed decisions and advocate for appropriate policy changes.</p>
|
169 |
-
<p>However, comprehending the vast and complex scientific information can be daunting, as the scientific consensus references, such as <b>the Intergovernmental Panel on Climate Change (IPCC) reports, span thousands of pages</b
|
170 |
<div class="tip-box">
|
171 |
<div class="tip-box-title">
|
172 |
<span class="light-bulb" role="img" aria-label="Light Bulb">💡</span>
|
173 |
How does ClimateQ&A work?
|
174 |
</div>
|
175 |
-
ClimateQ&A harnesses modern OCR techniques to parse and preprocess IPCC reports. By leveraging state-of-the-art question-answering algorithms, <i>ClimateQ&A is able to sift through the extensive collection of climate scientific reports and identify relevant passages in response to user inquiries</i>. Furthermore, the integration of the ChatGPT API allows ClimateQ&A to present complex data in a user-friendly manner, summarizing key points and facilitating communication of climate science to a wider audience.
|
176 |
</div>
|
177 |
|
|
|
|
|
|
|
|
|
|
|
178 |
"""
|
179 |
)
|
180 |
|
@@ -186,7 +221,7 @@ ClimateQ&A harnesses modern OCR techniques to parse and preprocess IPCC reports.
|
|
186 |
|
187 |
with gr.Row():
|
188 |
with gr.Column(scale=2):
|
189 |
-
chatbot = gr.Chatbot(elem_id="chatbot")
|
190 |
state = gr.State([system_template])
|
191 |
|
192 |
with gr.Row():
|
@@ -252,7 +287,7 @@ ClimateQ&A harnesses modern OCR techniques to parse and preprocess IPCC reports.
|
|
252 |
state,
|
253 |
gr.inputs.Dropdown(
|
254 |
["IPCC only", "All available"],
|
255 |
-
default="
|
256 |
label="Select reports",
|
257 |
),
|
258 |
],
|
@@ -379,7 +414,7 @@ If you have any questions or feature requests, please feel free to reach us out
|
|
379 |
## 💻 Developers
|
380 |
For developers, the methodology used is detailed below :
|
381 |
|
382 |
-
-
|
383 |
- Use Haystack to compute semantically representative embeddings for each paragraph using a sentence transformers model (https://huggingface.co/sentence-transformers/multi-qa-mpnet-base-dot-v1).
|
384 |
- Store all the embeddings in a FAISS Flat index.
|
385 |
- Reformulate each user query to be as specific as possible and compute its embedding.
|
|
|
13 |
from datetime import datetime
|
14 |
from azure.storage.fileshare import ShareServiceClient
|
15 |
|
16 |
+
from dotenv import load_dotenv
|
17 |
+
|
18 |
+
# Load the environment variables from the .env file
|
19 |
+
load_dotenv()
|
20 |
+
print(os.environ)
|
21 |
+
|
22 |
+
# for key in ["CONTENT","API_KEY","SOURCES","RESSOURCE_ENDPOINT"]:
|
23 |
+
# os.environ[key.lower()] = os.environ[key]
|
24 |
+
|
25 |
+
|
26 |
+
theme = gr.themes.Soft(
|
27 |
+
primary_hue="sky",
|
28 |
+
font=[gr.themes.GoogleFont('Inter'), 'ui-sans-serif', 'system-ui', 'sans-serif'],
|
29 |
+
)
|
30 |
+
|
31 |
|
32 |
system_template = {"role": "system", "content": os.environ["content"]}
|
33 |
|
|
|
59 |
"account_name": os.environ["account_name"],
|
60 |
}
|
61 |
|
62 |
+
try:
|
63 |
+
account_url = os.environ["account_url"]
|
64 |
+
file_share_name = "climategpt"
|
65 |
+
service = ShareServiceClient(account_url=account_url, credential=credential)
|
66 |
+
share_client = service.get_share_client(file_share_name)
|
67 |
+
except:
|
68 |
+
print("Skipped logging")
|
69 |
+
|
70 |
user_id = create_user_id(10)
|
71 |
|
72 |
|
|
|
74 |
user_id: str,
|
75 |
query: str,
|
76 |
history: list = [system_template],
|
77 |
+
report_type: str = "IPCC only",
|
78 |
threshold: float = 0.555,
|
79 |
) -> tuple:
|
80 |
"""retrieve relevant documents in the document store then query gpt-turbo
|
|
|
100 |
|
101 |
messages = history + [{"role": "user", "content": query}]
|
102 |
sources = "\n\n".join(
|
103 |
+
f"📃 doc {i}: {d.meta['file_name']} page {d.meta['page_number']}\n{d.content}"
|
104 |
for i, d in enumerate(docs, 1)
|
105 |
if d.score > threshold
|
106 |
)
|
|
|
110 |
{"role": "system", "content": f"{os.environ['sources']}\n\n{sources}"}
|
111 |
)
|
112 |
|
113 |
+
response = openai.Completion.create(
|
114 |
+
engine="climateGPT",
|
115 |
+
prompt=to_completion(messages),
|
116 |
+
temperature=0.2,
|
117 |
+
stream=True,
|
118 |
+
max_tokens=1024,
|
119 |
+
)
|
120 |
|
|
|
121 |
complete_response = ""
|
122 |
messages.pop()
|
123 |
+
|
124 |
+
messages.append({"role": "assistant", "content": complete_response})
|
125 |
+
timestamp = str(datetime.now().timestamp())
|
126 |
+
file = user_id[0] + timestamp + ".json"
|
127 |
+
logs = {
|
128 |
+
"user_id": user_id[0],
|
129 |
+
"prompt": query,
|
130 |
+
"retrived": sources,
|
131 |
+
"report_type": report_type,
|
132 |
+
"prompt_eng": messages[0],
|
133 |
+
"answer": messages[-1]["content"],
|
134 |
+
"time": timestamp,
|
135 |
+
}
|
136 |
+
try:
|
137 |
+
log_on_azure(file, logs, share_client)
|
138 |
+
except:
|
139 |
+
pass
|
140 |
+
|
141 |
+
|
142 |
+
for chunk in response:
|
143 |
+
if (
|
144 |
+
chunk_message := chunk["choices"][0].get("text")
|
145 |
+
) and chunk_message != "<|im_end|>":
|
146 |
+
complete_response += chunk_message
|
147 |
+
messages[-1]["content"] = complete_response
|
148 |
+
gradio_format = make_pairs([a["content"] for a in messages[1:]])
|
149 |
+
yield gradio_format, messages, sources
|
150 |
+
|
151 |
+
|
152 |
else:
|
153 |
+
sources = "⚠️ No relevant passages found in the climate science reports"
|
154 |
+
complete_response = "**⚠️ No relevant passages found in the climate science reports, you may want to ask a more specific question (specifying your question on climate issues).**"
|
155 |
+
|
156 |
+
messages.append({"role": "assistant", "content": complete_response})
|
157 |
+
|
158 |
+
gradio_format = make_pairs([a["content"] for a in messages[1:]])
|
159 |
+
yield gradio_format, messages, sources
|
160 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
161 |
|
162 |
|
163 |
def save_feedback(feed: str, user_id):
|
|
|
182 |
file_client.upload_file(str(logs))
|
183 |
|
184 |
|
185 |
+
with gr.Blocks(title="🌍 Climate Q&A", css="style.css",theme = theme) as demo:
|
186 |
|
187 |
user_id_state = gr.State([user_id])
|
188 |
|
|
|
196 |
gr.Markdown(
|
197 |
"""
|
198 |
<p><b>Climate change and environmental disruptions have become some of the most pressing challenges facing our planet today</b>. As global temperatures rise and ecosystems suffer, it is essential for individuals to understand the gravity of the situation in order to make informed decisions and advocate for appropriate policy changes.</p>
|
199 |
+
<p>However, comprehending the vast and complex scientific information can be daunting, as the scientific consensus references, such as <b>the Intergovernmental Panel on Climate Change (IPCC) reports, span thousands of pages</b>. To bridge this gap and make climate science more accessible, we introduce <b>ClimateQ&A as a tool to distill expert-level knowledge into easily digestible insights about climate science.</b></p>
|
200 |
<div class="tip-box">
|
201 |
<div class="tip-box-title">
|
202 |
<span class="light-bulb" role="img" aria-label="Light Bulb">💡</span>
|
203 |
How does ClimateQ&A work?
|
204 |
</div>
|
205 |
+
ClimateQ&A harnesses modern OCR techniques to parse and preprocess IPCC reports. By leveraging state-of-the-art question-answering algorithms, <i>ClimateQ&A is able to sift through the extensive collection of climate scientific reports and identify relevant passages in response to user inquiries</i>. Furthermore, the integration of the ChatGPT API allows ClimateQ&A to present complex data in a user-friendly manner, summarizing key points and facilitating communication of climate science to a wider audience.
|
206 |
</div>
|
207 |
|
208 |
+
<div class="warning-box">
|
209 |
+
Version 0.2-beta - This tool is under active development
|
210 |
+
</div>
|
211 |
+
|
212 |
+
|
213 |
"""
|
214 |
)
|
215 |
|
|
|
221 |
|
222 |
with gr.Row():
|
223 |
with gr.Column(scale=2):
|
224 |
+
chatbot = gr.Chatbot(elem_id="chatbot",label = "ClimateQ&A chatbot")
|
225 |
state = gr.State([system_template])
|
226 |
|
227 |
with gr.Row():
|
|
|
287 |
state,
|
288 |
gr.inputs.Dropdown(
|
289 |
["IPCC only", "All available"],
|
290 |
+
default="IPCC only",
|
291 |
label="Select reports",
|
292 |
),
|
293 |
],
|
|
|
414 |
## 💻 Developers
|
415 |
For developers, the methodology used is detailed below :
|
416 |
|
417 |
+
- Extract individual paragraphs from scientific reports (e.g., IPCC, IPBES) using OCR techniques and open sources algorithms
|
418 |
- Use Haystack to compute semantically representative embeddings for each paragraph using a sentence transformers model (https://huggingface.co/sentence-transformers/multi-qa-mpnet-base-dot-v1).
|
419 |
- Store all the embeddings in a FAISS Flat index.
|
420 |
- Reformulate each user query to be as specific as possible and compute its embedding.
|
requirements.txt
CHANGED
@@ -2,4 +2,5 @@ faiss-cpu==1.7.2
|
|
2 |
farm-haystack==1.14.0
|
3 |
gradio==3.22.1
|
4 |
openai==0.27.0
|
5 |
-
azure-storage-file-share==12.11.1
|
|
|
|
2 |
farm-haystack==1.14.0
|
3 |
gradio==3.22.1
|
4 |
openai==0.27.0
|
5 |
+
azure-storage-file-share==12.11.1
|
6 |
+
python-dotenv==1.0.0
|
style.css
CHANGED
@@ -11,8 +11,8 @@
|
|
11 |
|
12 |
|
13 |
.tip-box {
|
14 |
-
background-color: #
|
15 |
-
border: 1px solid #
|
16 |
border-radius: 4px;
|
17 |
margin-top:20px;
|
18 |
padding: 15px 20px;
|
@@ -40,11 +40,6 @@
|
|
40 |
display:none;
|
41 |
}
|
42 |
|
43 |
-
.message.user{
|
44 |
-
border-color:#53bcd4 !important;
|
45 |
-
background-color: #daf1f6 !important;
|
46 |
-
}
|
47 |
-
|
48 |
.message{
|
49 |
font-size:14px !important;
|
50 |
}
|
|
|
11 |
|
12 |
|
13 |
.tip-box {
|
14 |
+
background-color: #f0f9ff;
|
15 |
+
border: 1px solid #80d4fa;
|
16 |
border-radius: 4px;
|
17 |
margin-top:20px;
|
18 |
padding: 15px 20px;
|
|
|
40 |
display:none;
|
41 |
}
|
42 |
|
|
|
|
|
|
|
|
|
|
|
43 |
.message{
|
44 |
font-size:14px !important;
|
45 |
}
|