Spaces:

and-effect
/

Musterdatenkatalog

Sleeping

App Files Files Community

Rahkakavee Baskaran commited on May 25, 2023

Commit

69cd746

•

1 Parent(s): ea3bd45

update tensors and add text to prediction

Browse files

Files changed (2) hide show

app.py +60 -15
corpus_embeddings.pt +2 -2

app.py CHANGED Viewed

@@ -2,7 +2,6 @@ from collections import Counter
 import pandas as pd
 import streamlit as st
 import json
-from plotly import express as px
 from safetensors import safe_open
 from semantic_search import predict
 from sentence_transformers import SentenceTransformer
@@ -154,7 +153,7 @@ model = SentenceTransformer(
 st.set_page_config(layout="wide")
-st.title("Musterdatenkatalog")
 st.markdown(
     """
@@ -168,13 +167,24 @@ st.markdown(
 )
 st.markdown(
-    '<p class="font">This demo showcases the algorithm of Musterdatenkatalog (MDK) of the Bertelsmann Stiftung. The MDK is a taxonomy of Open Data in municipalities in Germany. It is intended to help municipalities in Germany, as well as data analysts and journalists, to get an overview of the topics and the extent to which cities have already published data sets.</p>',
     unsafe_allow_html=True,
 )
 st.markdown(
-    '<p class="font"> For more details checkout the <a href=https://www.bertelsmann-stiftung.de/de/unsere-projekte/smart-country/musterdatenkatalog> Musterdatenkatalog.</p>',
     unsafe_allow_html=True,
 )
@@ -184,11 +194,14 @@ col1.metric("Datensätze", len(data))
 col2.metric("Themen", len(theme_counts))
 col3.metric("Bezeichnungen", len(labels_counts))
-st.title("Taxonomy")
-st.plotly_chart(fig)
-st.title("Predict a Dataset")
 st.markdown(
     """
@@ -218,9 +231,23 @@ st.markdown(
 col1, col2 = st.columns([1.2, 1])
 with col2:
-    st.subheader("Example Input Dataset Names")
     examples = [
         "Spielplätze",
         "Berliner Weihnachtsmärkte 2022",
@@ -235,15 +262,28 @@ with col2:
 with col1:
     if "query" not in st.session_state:
-        query = st.text_input(
-            "Enter dataset name",
-        )
     if "query" in st.session_state and st.session_state.query in examples:
-        query = st.text_input("Enter dataset name", value=st.session_state.query)
     if "query" in st.session_state and st.session_state.query not in examples:
         del st.session_state["query"]
-        query = st.text_input("Enter dataset name")
     top_k = st.select_slider("Top Results", options=[1, 2, 3, 4, 5], value=1)
@@ -257,4 +297,9 @@ with col1:
     if st.button("Predict"):
         for prediction in predictions:
-            st.write(prediction)

 import pandas as pd
 import streamlit as st
 import json
 from safetensors import safe_open
 from semantic_search import predict
 from sentence_transformers import SentenceTransformer
 st.set_page_config(layout="wide")
+st.title("Musterdatenkatalog (MDK)")
 st.markdown(
     """
 )
 st.markdown(
+    """
+<style>
+.prediction {
+    font-size:10px !important;
+}
+</style>
+""",
     unsafe_allow_html=True,
 )
 st.markdown(
+    '<p class="font">This demo showcases the algorithm of Musterdatenkatalog (MDK) of the Bertelsmann Stiftung. The MDK is a taxonomy of Open Data in municipalities in Germany. It is intended to help municipalities in Germany, as well as data analysts and journalists, to get an overview of the topics and the extent to which cities have already published data sets.</p>',
+    unsafe_allow_html=True,
+)
+st.markdown(
+    '<p class="font"> For more details checkout the <a href=https://www.bertelsmann-stiftung.de/de/unsere-projekte/smart-country/musterdatenkatalog> Musterdatenkatalog </a>.</p>',
     unsafe_allow_html=True,
 )
 col2.metric("Themen", len(theme_counts))
 col3.metric("Bezeichnungen", len(labels_counts))
+st.header("Explore the MDK-Classifier")
+st.markdown(
+    '<p class="font"> This section allows you to predict a label from the MDK Taxonomy for a title of a dataset from municipalities. You can either enter your own dataset title or click on one of the examples. Checkout also <a href=https://www.govdata.de/> GOVDATA </a> for more dataset title examples. \
+    \
+    If you click on predict, the model will predict the most likely label for the dataset title. You can also change the number of labels that should be predicted. For example, if you change the Top Results to 3, the model will predict the 3 most likely labels for the dataset title in descending order. </p>',
+    unsafe_allow_html=True,
+)
 st.markdown(
     """
 col1, col2 = st.columns([1.2, 1])
+st.markdown(
+    """
+<style>
+.example {
+    font-size:24px !important;
+}
+</style>
+""",
+    unsafe_allow_html=True,
+)
 with col2:
+    st.markdown(
+        '<p class="example">Example Titles of Datasets</p>',
+        unsafe_allow_html=True,
+    )
     examples = [
         "Spielplätze",
         "Berliner Weihnachtsmärkte 2022",
 with col1:
+    tabs_font_css = """
+    <style>
+    div[class*="stTextInput"] label p {
+    font-size: 2px;
+    }
+    </style>
+    """
+    st.write(tabs_font_css, unsafe_allow_html=True)
+    st.markdown(
+        '<p class="example">Enter a dataset title</p>',
+        unsafe_allow_html=True,
+    )
     if "query" not in st.session_state:
+        query = st.text_input("")
     if "query" in st.session_state and st.session_state.query in examples:
+        query = st.text_input("Enter a dataset title", value=st.session_state.query)
     if "query" in st.session_state and st.session_state.query not in examples:
         del st.session_state["query"]
+        query = st.text_input("Enter a dataset title")
     top_k = st.select_slider("Top Results", options=[1, 2, 3, 4, 5], value=1)
     if st.button("Predict"):
         for prediction in predictions:
+            st.markdown(f'<p class="font"> {prediction} <p>', unsafe_allow_html=True)
+st.header("Musterdatenkatalog Taxonomy")
+st.plotly_chart(fig)

corpus_embeddings.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:26b84a249335502cd14ad3ea5b7ce5523266b7e6dddfd5110ba6fdd5cd41828a
-size 746592

 version https://git-lfs.github.com/spec/v1
+oid sha256:64143d425585aed670f2556432cb5c38d721a1902f75ffb8e57102e46ea00aaf
+size 743520