File size: 9,527 Bytes
e247edf
ed7625a
 
 
 
 
 
 
 
 
 
 
 
e247edf
 
ed7625a
72171c3
ed7625a
 
 
 
 
 
 
e247edf
2546b39
ed7625a
 
cda1fc5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ed7625a
 
 
 
 
 
 
 
 
 
 
2fff144
 
bea43ef
879a13d
2fff144
ed7625a
bea43ef
2fff144
 
 
2546b39
 
cda1fc5
 
2fff144
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2546b39
 
 
 
 
2fff144
 
 
 
 
 
ed7625a
 
2fff144
ed7625a
 
 
 
2546b39
 
ed7625a
 
 
 
 
2fff144
2546b39
cda1fc5
ed7625a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e247edf
18301a5
e247edf
 
 
ed7625a
 
 
 
cda1fc5
ed7625a
 
 
 
cda1fc5
 
 
ed7625a
cda1fc5
 
ed7625a
 
 
 
 
 
cda1fc5
e247edf
 
cda1fc5
 
 
 
e247edf
 
 
cda1fc5
e247edf
cda1fc5
 
 
 
e247edf
cda1fc5
 
 
 
 
 
 
 
 
 
 
 
 
 
e247edf
 
ed7625a
 
cda1fc5
9073090
ab4cbe5
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
import openai
import streamlit as st
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA

from langchain.prompts.prompt import PromptTemplate

from langchain.vectorstores import FAISS
import re
import time


# import e5-large-v2 embedding model
model_name = "intfloat/e5-large-v2"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}
embeddings = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

# load IPCC database
db = FAISS.load_local("IPCC_index_e5_1000_all", embeddings)


question1 = 'Why does temperature increase?'
question2 = 'What evidence we have of climate change?'
question3 = 'What is the link between health and climate change?'

def click_button(button_text):
    if prompt := button_text:

    #if prompt := st.text_input(label="Your quesiton:",value=st.session_state.button_text if 'button_text' in st.session_state else 'Text your question'):
        if not openai_api_key:
            st.info("Please add your OpenAI API key to continue.")
            st.stop()

        st.session_state.messages.append({"role": "user", "content": prompt})
        st.chat_message("user").write(prompt)
        with st.spinner("Thinking..."):
            result = generate_response(prompt)
            result_r = result["result"]
            index = result_r.find("Highlight:")



            # Display assistant response in chat message container
            with st.chat_message("assistant"):
                message_placeholder = st.empty()
                full_response = ""
                assistant_response = result_r
                # Simulate stream of response with milliseconds delay
                for chunk in assistant_response.split():
                    full_response += chunk + " "
                    time.sleep(0.05)
                    # Add a blinking cursor to simulate typing
                    message_placeholder.write(full_response + "▌")
                message_placeholder.write(result_r)
            # Add assistant response to chat history
            st.session_state.messages.append({"role": "assistant", "content": result_r})
        
def generate_response(input_text):
    docs = db.similarity_search(input_text,k=5)

    json1 = docs[0].metadata
    json2 = docs[1].metadata
    json3 = docs[2].metadata
    json4 = docs[3].metadata
    json5 = docs[4].metadata
    #st.write({"source1":json1["source"], "source2":json2["source"],"source3":json3["source"]})


    climate_TEMPLATE = """\
    You are a professor in climate change, tasked with answering any question \
    about climate change.  Take a deep breath and think step by step. 

    {question}

   Generate a comprehensive and informative answer and three next questions to the general audience of 100 words or less for the \
    given question based solely on the provided search results (hyperlink and source). You must \
    only use information from the provided search results. Use an unbiased and \
    journalistic tone. Combine search results together into a coherent answer. Do not \
    repeat text. Only use \
    relevant results that answer the question accurately.  list these sources at the end of your answer  \
    in a section named "source". After the "source" section, makre sure provide three next questions in the section of predicted \
\
    
    Format your answer in markdown format

    If there is nothing in the context relevant to the question at hand, just say "Hmm, \
    I'm not sure." Don't try to make up an answer.

    Anything between the following `context`  html blocks is retrieved from a knowledge \
    bank, not part of the conversation with the user. 

    <context>
        {context} 
    <context/>

    Anything between the following `sources`  html blocks is the source and hyperlink you should use and list them into a source section\
    <sources>
        [{source1} page {page1}](https://www.ipcc.ch/report/ar6/{wg1}/downloads/report/{source1}.pdf#page={page1})
        [{source2} page {page2}](https://www.ipcc.ch/report/ar6/{wg2}/downloads/report/{source2}.pdf#page={page2})
        [{source3} page {page3}](https://www.ipcc.ch/report/ar6/{wg3}/downloads/report/{source3}.pdf#page={page3})
        [{source4} page {page4}](https://www.ipcc.ch/report/ar6/{wg4}/downloads/report/{source4}.pdf#page={page4})
        [{source5} page {page5}](https://www.ipcc.ch/report/ar6/{wg5}/downloads/report/{source5}.pdf#page={page5})
    <sources/>

    REMEMBER: If there is no relevant information within the context, just say "Hmm, I'm \
    not sure." Don't try to make up an answer. Anything between the preceding 'context' \
    html blocks is retrieved from a knowledge bank, not part of the conversation with the \
    user.\
    

 
    """
    climate_PROMPT = PromptTemplate(input_variables=["question", "context"], 
                                    partial_variables={"source1":json1["source"], "source2":json2["source"],
                                                       "source3":json3["source"],"source4":json4["source"],"source5":json5["source"],"page1":json1["page"],
                                                       "page2":json2["page"],"page3":json3["page"],"page4":json4["page"],"page5":json5["page"],"wg1":json1["wg"],
                                                       "wg2":json2["wg"],"wg3":json3["wg"],"wg4":json4["wg"],"wg5":json5["wg"]},
                                    template=climate_TEMPLATE, )
    
    #climate_PROMPT.partial(source = docs[0].metadata)

    llm = ChatOpenAI(
        model_name="gpt-3.5-turbo-16k",
        temperature=0.05,
        max_tokens=2500, 
        openai_api_key=openai_api_key
    )

# Define retriever
    retriever = db.as_retriever(search_kwargs={"k": 5})

    qa_chain = RetrievalQA.from_chain_type(llm, 
                                            retriever=retriever,
                                            chain_type="stuff", #"stuff", "map_reduce","refine", "map_rerank"
                                            return_source_documents=True,
                                            verbose=True,
                                            chain_type_kwargs={"prompt": climate_PROMPT}
                                            )
    
    return qa_chain({'query': input_text})


with st.sidebar:
    openai_api_key = st.text_input("OpenAI API Key", key="chatbot_api_key", type="password")
    "[Get an OpenAI API key](https://platform.openai.com/account/api-keys)"
    st.markdown("## 🌍 Welcome to ClimateChat! 🌍")
    st.markdown("ClimateChat harnesses the latest [IPCC reports](https://www.ipcc.ch/report/sixth-assessment-report-cycle/) and the power of Large Language Models to answer your questions about climate change. When you interact with ClimateChat not only will you receive clear answers, but each response is coupled with sources and hyperlinks for further exploration and verification.\
                Our objective is to make climate change information accessible, understandable, and actionable for everyone, everywhere.")
st.title("💬🌍🌡️ClimateChat")
st.caption("💬 A Climate Change chatbot powered by OpenAI LLM and IPCC documents")
#col1, col2,  = st.columns(2)


if "messages" not in st.session_state:
    st.session_state["messages"] = [{"role": "assistant", "content": "Any question about the climate change? Here are some examples:"}]

for msg in st.session_state.messages:
    st.chat_message(msg["role"]).write(msg["content"])




if prompt := st.chat_input():

#if prompt := st.text_input(label="Your quesiton:",value=st.session_state.button_text if 'button_text' in st.session_state else 'Text your question'):
    if not openai_api_key:
        st.info("Please add your OpenAI API key to continue.")
        st.stop()

    st.session_state.messages.append({"role": "user", "content": prompt})
    st.chat_message("user").write(prompt)
 

    with st.chat_message("assistant"):
        with st.spinner("thinking..."):
            result = generate_response(prompt)
            result_r = result["result"]
            index = result_r.find("Highlight:")
        message_placeholder = st.empty()
        full_response = ""
        assistant_response = result_r
            # Simulate stream of response with milliseconds delay
        for chunk in assistant_response.split():
                full_response += chunk + " "
                time.sleep(0.05)
                # Add a blinking cursor to simulate typing
                message_placeholder.write(full_response + "▌")
        message_placeholder.write(result_r)

        if re.search(r'Next Questions?:', result_r, flags=re.IGNORECASE):

            questions_text = re.split(r'Next Questions?:', result_r, flags=re.IGNORECASE)[-1]

            pattern = r'\d+\.\s*([^?]+\?)'

            # Use re.findall to find all matches of the pattern in the questions_text
            matches = re.findall(pattern, questions_text)

            question1 = matches[0].strip()
            question2 = matches[1].strip()
            question3 = matches[2].strip()
        # Add assistant response to chat history
    st.session_state.messages.append({"role": "assistant", "content": result_r})
        


button_col1, button_col2, button_col3= st.columns([5, 5, 5])
st.code(question1,language =None)
st.code(question2,language=None)
st.code(question3,language=None)