Spaces:
Paused
Paused
File size: 9,527 Bytes
e247edf ed7625a e247edf ed7625a 72171c3 ed7625a e247edf 2546b39 ed7625a cda1fc5 ed7625a 2fff144 bea43ef 879a13d 2fff144 ed7625a bea43ef 2fff144 2546b39 cda1fc5 2fff144 2546b39 2fff144 ed7625a 2fff144 ed7625a 2546b39 ed7625a 2fff144 2546b39 cda1fc5 ed7625a e247edf 18301a5 e247edf ed7625a cda1fc5 ed7625a cda1fc5 ed7625a cda1fc5 ed7625a cda1fc5 e247edf cda1fc5 e247edf cda1fc5 e247edf cda1fc5 e247edf cda1fc5 e247edf ed7625a cda1fc5 9073090 ab4cbe5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 |
import openai
import streamlit as st
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain.prompts.prompt import PromptTemplate
from langchain.vectorstores import FAISS
import re
import time
# import e5-large-v2 embedding model
model_name = "intfloat/e5-large-v2"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}
embeddings = HuggingFaceEmbeddings(
model_name=model_name,
model_kwargs=model_kwargs,
encode_kwargs=encode_kwargs
)
# load IPCC database
db = FAISS.load_local("IPCC_index_e5_1000_all", embeddings)
question1 = 'Why does temperature increase?'
question2 = 'What evidence we have of climate change?'
question3 = 'What is the link between health and climate change?'
def click_button(button_text):
if prompt := button_text:
#if prompt := st.text_input(label="Your quesiton:",value=st.session_state.button_text if 'button_text' in st.session_state else 'Text your question'):
if not openai_api_key:
st.info("Please add your OpenAI API key to continue.")
st.stop()
st.session_state.messages.append({"role": "user", "content": prompt})
st.chat_message("user").write(prompt)
with st.spinner("Thinking..."):
result = generate_response(prompt)
result_r = result["result"]
index = result_r.find("Highlight:")
# Display assistant response in chat message container
with st.chat_message("assistant"):
message_placeholder = st.empty()
full_response = ""
assistant_response = result_r
# Simulate stream of response with milliseconds delay
for chunk in assistant_response.split():
full_response += chunk + " "
time.sleep(0.05)
# Add a blinking cursor to simulate typing
message_placeholder.write(full_response + "▌")
message_placeholder.write(result_r)
# Add assistant response to chat history
st.session_state.messages.append({"role": "assistant", "content": result_r})
def generate_response(input_text):
docs = db.similarity_search(input_text,k=5)
json1 = docs[0].metadata
json2 = docs[1].metadata
json3 = docs[2].metadata
json4 = docs[3].metadata
json5 = docs[4].metadata
#st.write({"source1":json1["source"], "source2":json2["source"],"source3":json3["source"]})
climate_TEMPLATE = """\
You are a professor in climate change, tasked with answering any question \
about climate change. Take a deep breath and think step by step.
{question}
Generate a comprehensive and informative answer and three next questions to the general audience of 100 words or less for the \
given question based solely on the provided search results (hyperlink and source). You must \
only use information from the provided search results. Use an unbiased and \
journalistic tone. Combine search results together into a coherent answer. Do not \
repeat text. Only use \
relevant results that answer the question accurately. list these sources at the end of your answer \
in a section named "source". After the "source" section, makre sure provide three next questions in the section of predicted \
\
Format your answer in markdown format
If there is nothing in the context relevant to the question at hand, just say "Hmm, \
I'm not sure." Don't try to make up an answer.
Anything between the following `context` html blocks is retrieved from a knowledge \
bank, not part of the conversation with the user.
<context>
{context}
<context/>
Anything between the following `sources` html blocks is the source and hyperlink you should use and list them into a source section\
<sources>
[{source1} page {page1}](https://www.ipcc.ch/report/ar6/{wg1}/downloads/report/{source1}.pdf#page={page1})
[{source2} page {page2}](https://www.ipcc.ch/report/ar6/{wg2}/downloads/report/{source2}.pdf#page={page2})
[{source3} page {page3}](https://www.ipcc.ch/report/ar6/{wg3}/downloads/report/{source3}.pdf#page={page3})
[{source4} page {page4}](https://www.ipcc.ch/report/ar6/{wg4}/downloads/report/{source4}.pdf#page={page4})
[{source5} page {page5}](https://www.ipcc.ch/report/ar6/{wg5}/downloads/report/{source5}.pdf#page={page5})
<sources/>
REMEMBER: If there is no relevant information within the context, just say "Hmm, I'm \
not sure." Don't try to make up an answer. Anything between the preceding 'context' \
html blocks is retrieved from a knowledge bank, not part of the conversation with the \
user.\
"""
climate_PROMPT = PromptTemplate(input_variables=["question", "context"],
partial_variables={"source1":json1["source"], "source2":json2["source"],
"source3":json3["source"],"source4":json4["source"],"source5":json5["source"],"page1":json1["page"],
"page2":json2["page"],"page3":json3["page"],"page4":json4["page"],"page5":json5["page"],"wg1":json1["wg"],
"wg2":json2["wg"],"wg3":json3["wg"],"wg4":json4["wg"],"wg5":json5["wg"]},
template=climate_TEMPLATE, )
#climate_PROMPT.partial(source = docs[0].metadata)
llm = ChatOpenAI(
model_name="gpt-3.5-turbo-16k",
temperature=0.05,
max_tokens=2500,
openai_api_key=openai_api_key
)
# Define retriever
retriever = db.as_retriever(search_kwargs={"k": 5})
qa_chain = RetrievalQA.from_chain_type(llm,
retriever=retriever,
chain_type="stuff", #"stuff", "map_reduce","refine", "map_rerank"
return_source_documents=True,
verbose=True,
chain_type_kwargs={"prompt": climate_PROMPT}
)
return qa_chain({'query': input_text})
with st.sidebar:
openai_api_key = st.text_input("OpenAI API Key", key="chatbot_api_key", type="password")
"[Get an OpenAI API key](https://platform.openai.com/account/api-keys)"
st.markdown("## 🌍 Welcome to ClimateChat! 🌍")
st.markdown("ClimateChat harnesses the latest [IPCC reports](https://www.ipcc.ch/report/sixth-assessment-report-cycle/) and the power of Large Language Models to answer your questions about climate change. When you interact with ClimateChat not only will you receive clear answers, but each response is coupled with sources and hyperlinks for further exploration and verification.\
Our objective is to make climate change information accessible, understandable, and actionable for everyone, everywhere.")
st.title("💬🌍🌡️ClimateChat")
st.caption("💬 A Climate Change chatbot powered by OpenAI LLM and IPCC documents")
#col1, col2, = st.columns(2)
if "messages" not in st.session_state:
st.session_state["messages"] = [{"role": "assistant", "content": "Any question about the climate change? Here are some examples:"}]
for msg in st.session_state.messages:
st.chat_message(msg["role"]).write(msg["content"])
if prompt := st.chat_input():
#if prompt := st.text_input(label="Your quesiton:",value=st.session_state.button_text if 'button_text' in st.session_state else 'Text your question'):
if not openai_api_key:
st.info("Please add your OpenAI API key to continue.")
st.stop()
st.session_state.messages.append({"role": "user", "content": prompt})
st.chat_message("user").write(prompt)
with st.chat_message("assistant"):
with st.spinner("thinking..."):
result = generate_response(prompt)
result_r = result["result"]
index = result_r.find("Highlight:")
message_placeholder = st.empty()
full_response = ""
assistant_response = result_r
# Simulate stream of response with milliseconds delay
for chunk in assistant_response.split():
full_response += chunk + " "
time.sleep(0.05)
# Add a blinking cursor to simulate typing
message_placeholder.write(full_response + "▌")
message_placeholder.write(result_r)
if re.search(r'Next Questions?:', result_r, flags=re.IGNORECASE):
questions_text = re.split(r'Next Questions?:', result_r, flags=re.IGNORECASE)[-1]
pattern = r'\d+\.\s*([^?]+\?)'
# Use re.findall to find all matches of the pattern in the questions_text
matches = re.findall(pattern, questions_text)
question1 = matches[0].strip()
question2 = matches[1].strip()
question3 = matches[2].strip()
# Add assistant response to chat history
st.session_state.messages.append({"role": "assistant", "content": result_r})
button_col1, button_col2, button_col3= st.columns([5, 5, 5])
st.code(question1,language =None)
st.code(question2,language=None)
st.code(question3,language=None)
|