Spaces:
Sleeping
Sleeping
added privacy statement, minor cosmetics on the key information, NER written without abbreviation
Browse files- streamlit_app.py +27 -15
streamlit_app.py
CHANGED
@@ -48,11 +48,13 @@ if "messages" not in st.session_state:
|
|
48 |
if 'ner_processing' not in st.session_state:
|
49 |
st.session_state['ner_processing'] = False
|
50 |
|
|
|
|
|
51 |
|
52 |
def new_file():
|
53 |
st.session_state['loaded_embeddings'] = None
|
54 |
st.session_state['doc_id'] = None
|
55 |
-
|
56 |
|
57 |
# @st.cache_resource
|
58 |
def init_qa(model):
|
@@ -128,11 +130,15 @@ def play_old_messages():
|
|
128 |
else:
|
129 |
st.write(message['content'])
|
130 |
|
|
|
131 |
# is_api_key_provided = st.session_state['api_key']
|
132 |
|
133 |
with st.sidebar:
|
|
|
|
|
|
|
134 |
st.session_state['model'] = model = st.radio(
|
135 |
-
"Model
|
136 |
("chatgpt-3.5-turbo", "mistral-7b-instruct-v0.1"), # , "llama-2-70b-chat"),
|
137 |
index=1,
|
138 |
captions=[
|
@@ -140,15 +146,17 @@ with st.sidebar:
|
|
140 |
"Mistral-7B-Instruct-V0.1 + Sentence BERT (embeddings)"
|
141 |
# "LLama2-70B-Chat + Sentence BERT (embeddings)",
|
142 |
],
|
143 |
-
help="Select the model you want to use.",
|
144 |
-
disabled=st.session_state['doc_id'] is not None)
|
145 |
|
146 |
if model == 'mistral-7b-instruct-v0.1' or model == 'llama-2-70b-chat':
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
|
|
|
|
152 |
|
153 |
if api_key:
|
154 |
# st.session_state['api_key'] = is_api_key_provided = True
|
@@ -159,10 +167,13 @@ with st.sidebar:
|
|
159 |
st.session_state['rqa'][model] = init_qa(model)
|
160 |
|
161 |
elif model == 'chatgpt-3.5-turbo':
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
|
|
|
|
|
|
166 |
if api_key:
|
167 |
# st.session_state['api_key'] = is_api_key_provided = True
|
168 |
with st.spinner("Preparing environment"):
|
@@ -177,7 +188,8 @@ st.title("📝 Scientific Document Insight Q&A")
|
|
177 |
st.subheader("Upload a scientific article in PDF, ask questions, get insights.")
|
178 |
|
179 |
uploaded_file = st.file_uploader("Upload an article", type=("pdf", "txt"), on_change=new_file,
|
180 |
-
disabled=st.session_state['model'] is not None and st.session_state['model'] not in
|
|
|
181 |
help="The full-text is extracted using Grobid. ")
|
182 |
|
183 |
question = st.chat_input(
|
@@ -198,7 +210,7 @@ with st.sidebar:
|
|
198 |
help="Number of chunks to consider when answering a question",
|
199 |
disabled=not uploaded_file)
|
200 |
|
201 |
-
st.session_state['ner_processing'] = st.checkbox("NER processing on LLM response")
|
202 |
st.markdown(
|
203 |
'**NER on LLM responses**: The responses from the LLMs are post-processed to extract <span style="color:orange">physical quantities, measurements</span> and <span style="color:green">materials</span> mentions.',
|
204 |
unsafe_allow_html=True)
|
|
|
48 |
if 'ner_processing' not in st.session_state:
|
49 |
st.session_state['ner_processing'] = False
|
50 |
|
51 |
+
if 'uploaded' not in st.session_state:
|
52 |
+
st.session_state['uploaded'] = False
|
53 |
|
54 |
def new_file():
|
55 |
st.session_state['loaded_embeddings'] = None
|
56 |
st.session_state['doc_id'] = None
|
57 |
+
st.session_state['uploaded'] = True
|
58 |
|
59 |
# @st.cache_resource
|
60 |
def init_qa(model):
|
|
|
130 |
else:
|
131 |
st.write(message['content'])
|
132 |
|
133 |
+
|
134 |
# is_api_key_provided = st.session_state['api_key']
|
135 |
|
136 |
with st.sidebar:
|
137 |
+
st.markdown(
|
138 |
+
":warning: Do not upload sensitive data. We **temporarily** store text from the uploaded PDF documents solely for the purpose of processing your request, and we **do not assume responsibility** for any subsequent use or handling of the data submitted to third parties LLMs.")
|
139 |
+
|
140 |
st.session_state['model'] = model = st.radio(
|
141 |
+
"Model",
|
142 |
("chatgpt-3.5-turbo", "mistral-7b-instruct-v0.1"), # , "llama-2-70b-chat"),
|
143 |
index=1,
|
144 |
captions=[
|
|
|
146 |
"Mistral-7B-Instruct-V0.1 + Sentence BERT (embeddings)"
|
147 |
# "LLama2-70B-Chat + Sentence BERT (embeddings)",
|
148 |
],
|
149 |
+
help="Select the LLM model and embeddings you want to use.",
|
150 |
+
disabled=st.session_state['doc_id'] is not None or st.session_state['uploaded'])
|
151 |
|
152 |
if model == 'mistral-7b-instruct-v0.1' or model == 'llama-2-70b-chat':
|
153 |
+
if 'HUGGINGFACEHUB_API_TOKEN' not in os.environ:
|
154 |
+
api_key = st.text_input('Huggingface API Key', type="password")
|
155 |
+
|
156 |
+
st.markdown(
|
157 |
+
"Get it for [Open AI](https://platform.openai.com/account/api-keys) or [Huggingface](https://huggingface.co/docs/hub/security-tokens)")
|
158 |
+
else:
|
159 |
+
api_key = os.environ['HUGGINGFACEHUB_API_TOKEN']
|
160 |
|
161 |
if api_key:
|
162 |
# st.session_state['api_key'] = is_api_key_provided = True
|
|
|
167 |
st.session_state['rqa'][model] = init_qa(model)
|
168 |
|
169 |
elif model == 'chatgpt-3.5-turbo':
|
170 |
+
if 'OPENAI_API_KEY' not in os.environ:
|
171 |
+
api_key = st.text_input('OpenAI API Key', type="password")
|
172 |
+
st.markdown(
|
173 |
+
"Get it for [Open AI](https://platform.openai.com/account/api-keys) or [Huggingface](https://huggingface.co/docs/hub/security-tokens)")
|
174 |
+
else:
|
175 |
+
api_key = os.environ['OPENAI_API_KEY']
|
176 |
+
|
177 |
if api_key:
|
178 |
# st.session_state['api_key'] = is_api_key_provided = True
|
179 |
with st.spinner("Preparing environment"):
|
|
|
188 |
st.subheader("Upload a scientific article in PDF, ask questions, get insights.")
|
189 |
|
190 |
uploaded_file = st.file_uploader("Upload an article", type=("pdf", "txt"), on_change=new_file,
|
191 |
+
disabled=st.session_state['model'] is not None and st.session_state['model'] not in
|
192 |
+
st.session_state['api_keys'],
|
193 |
help="The full-text is extracted using Grobid. ")
|
194 |
|
195 |
question = st.chat_input(
|
|
|
210 |
help="Number of chunks to consider when answering a question",
|
211 |
disabled=not uploaded_file)
|
212 |
|
213 |
+
st.session_state['ner_processing'] = st.checkbox("Named Entities Recognition (NER) processing on LLM response")
|
214 |
st.markdown(
|
215 |
'**NER on LLM responses**: The responses from the LLMs are post-processed to extract <span style="color:orange">physical quantities, measurements</span> and <span style="color:green">materials</span> mentions.',
|
216 |
unsafe_allow_html=True)
|