Rahkakavee Baskaran commited on
Commit
69cd746
1 Parent(s): ea3bd45

update tensors and add text to prediction

Browse files
Files changed (2) hide show
  1. app.py +60 -15
  2. corpus_embeddings.pt +2 -2
app.py CHANGED
@@ -2,7 +2,6 @@ from collections import Counter
2
  import pandas as pd
3
  import streamlit as st
4
  import json
5
- from plotly import express as px
6
  from safetensors import safe_open
7
  from semantic_search import predict
8
  from sentence_transformers import SentenceTransformer
@@ -154,7 +153,7 @@ model = SentenceTransformer(
154
 
155
  st.set_page_config(layout="wide")
156
 
157
- st.title("Musterdatenkatalog")
158
 
159
  st.markdown(
160
  """
@@ -168,13 +167,24 @@ st.markdown(
168
  )
169
 
170
  st.markdown(
171
- '<p class="font">This demo showcases the algorithm of Musterdatenkatalog (MDK) of the Bertelsmann Stiftung. The MDK is a taxonomy of Open Data in municipalities in Germany. It is intended to help municipalities in Germany, as well as data analysts and journalists, to get an overview of the topics and the extent to which cities have already published data sets.</p>',
 
 
 
 
 
 
172
  unsafe_allow_html=True,
173
  )
174
 
175
 
176
  st.markdown(
177
- '<p class="font"> For more details checkout the <a href=https://www.bertelsmann-stiftung.de/de/unsere-projekte/smart-country/musterdatenkatalog> Musterdatenkatalog.</p>',
 
 
 
 
 
178
  unsafe_allow_html=True,
179
  )
180
 
@@ -184,11 +194,14 @@ col1.metric("Datensätze", len(data))
184
  col2.metric("Themen", len(theme_counts))
185
  col3.metric("Bezeichnungen", len(labels_counts))
186
 
187
- st.title("Taxonomy")
188
 
189
- st.plotly_chart(fig)
190
-
191
- st.title("Predict a Dataset")
 
 
 
192
 
193
  st.markdown(
194
  """
@@ -218,9 +231,23 @@ st.markdown(
218
 
219
  col1, col2 = st.columns([1.2, 1])
220
 
 
 
 
 
 
 
 
 
 
 
 
221
 
222
  with col2:
223
- st.subheader("Example Input Dataset Names")
 
 
 
224
  examples = [
225
  "Spielplätze",
226
  "Berliner Weihnachtsmärkte 2022",
@@ -235,15 +262,28 @@ with col2:
235
 
236
 
237
  with col1:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
238
  if "query" not in st.session_state:
239
- query = st.text_input(
240
- "Enter dataset name",
241
- )
242
  if "query" in st.session_state and st.session_state.query in examples:
243
- query = st.text_input("Enter dataset name", value=st.session_state.query)
244
  if "query" in st.session_state and st.session_state.query not in examples:
245
  del st.session_state["query"]
246
- query = st.text_input("Enter dataset name")
247
 
248
  top_k = st.select_slider("Top Results", options=[1, 2, 3, 4, 5], value=1)
249
 
@@ -257,4 +297,9 @@ with col1:
257
 
258
  if st.button("Predict"):
259
  for prediction in predictions:
260
- st.write(prediction)
 
 
 
 
 
 
2
  import pandas as pd
3
  import streamlit as st
4
  import json
 
5
  from safetensors import safe_open
6
  from semantic_search import predict
7
  from sentence_transformers import SentenceTransformer
 
153
 
154
  st.set_page_config(layout="wide")
155
 
156
+ st.title("Musterdatenkatalog (MDK)")
157
 
158
  st.markdown(
159
  """
 
167
  )
168
 
169
  st.markdown(
170
+ """
171
+ <style>
172
+ .prediction {
173
+ font-size:10px !important;
174
+ }
175
+ </style>
176
+ """,
177
  unsafe_allow_html=True,
178
  )
179
 
180
 
181
  st.markdown(
182
+ '<p class="font">This demo showcases the algorithm of Musterdatenkatalog (MDK) of the Bertelsmann Stiftung. The MDK is a taxonomy of Open Data in municipalities in Germany. It is intended to help municipalities in Germany, as well as data analysts and journalists, to get an overview of the topics and the extent to which cities have already published data sets.</p>',
183
+ unsafe_allow_html=True,
184
+ )
185
+
186
+ st.markdown(
187
+ '<p class="font"> For more details checkout the <a href=https://www.bertelsmann-stiftung.de/de/unsere-projekte/smart-country/musterdatenkatalog> Musterdatenkatalog </a>.</p>',
188
  unsafe_allow_html=True,
189
  )
190
 
 
194
  col2.metric("Themen", len(theme_counts))
195
  col3.metric("Bezeichnungen", len(labels_counts))
196
 
197
+ st.header("Explore the MDK-Classifier")
198
 
199
+ st.markdown(
200
+ '<p class="font"> This section allows you to predict a label from the MDK Taxonomy for a title of a dataset from municipalities. You can either enter your own dataset title or click on one of the examples. Checkout also <a href=https://www.govdata.de/> GOVDATA </a> for more dataset title examples. \
201
+ \
202
+ If you click on predict, the model will predict the most likely label for the dataset title. You can also change the number of labels that should be predicted. For example, if you change the Top Results to 3, the model will predict the 3 most likely labels for the dataset title in descending order. </p>',
203
+ unsafe_allow_html=True,
204
+ )
205
 
206
  st.markdown(
207
  """
 
231
 
232
  col1, col2 = st.columns([1.2, 1])
233
 
234
+ st.markdown(
235
+ """
236
+ <style>
237
+ .example {
238
+ font-size:24px !important;
239
+ }
240
+ </style>
241
+ """,
242
+ unsafe_allow_html=True,
243
+ )
244
+
245
 
246
  with col2:
247
+ st.markdown(
248
+ '<p class="example">Example Titles of Datasets</p>',
249
+ unsafe_allow_html=True,
250
+ )
251
  examples = [
252
  "Spielplätze",
253
  "Berliner Weihnachtsmärkte 2022",
 
262
 
263
 
264
  with col1:
265
+ tabs_font_css = """
266
+ <style>
267
+ div[class*="stTextInput"] label p {
268
+ font-size: 2px;
269
+ }
270
+ </style>
271
+ """
272
+
273
+ st.write(tabs_font_css, unsafe_allow_html=True)
274
+
275
+ st.markdown(
276
+ '<p class="example">Enter a dataset title</p>',
277
+ unsafe_allow_html=True,
278
+ )
279
+
280
  if "query" not in st.session_state:
281
+ query = st.text_input("")
 
 
282
  if "query" in st.session_state and st.session_state.query in examples:
283
+ query = st.text_input("Enter a dataset title", value=st.session_state.query)
284
  if "query" in st.session_state and st.session_state.query not in examples:
285
  del st.session_state["query"]
286
+ query = st.text_input("Enter a dataset title")
287
 
288
  top_k = st.select_slider("Top Results", options=[1, 2, 3, 4, 5], value=1)
289
 
 
297
 
298
  if st.button("Predict"):
299
  for prediction in predictions:
300
+ st.markdown(f'<p class="font"> {prediction} <p>', unsafe_allow_html=True)
301
+
302
+
303
+ st.header("Musterdatenkatalog Taxonomy")
304
+
305
+ st.plotly_chart(fig)
corpus_embeddings.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:26b84a249335502cd14ad3ea5b7ce5523266b7e6dddfd5110ba6fdd5cd41828a
3
- size 746592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64143d425585aed670f2556432cb5c38d721a1902f75ffb8e57102e46ea00aaf
3
+ size 743520