Spaces:
Runtime error
Runtime error
import datasets | |
import glob | |
import json | |
import pandas as pd | |
import streamlit as st | |
import sys | |
import textwrap | |
from thermostat import load | |
from thermostat.data.thermostat_configs import builder_configs | |
nlp = datasets | |
HTML_WRAPPER = """<div>{}</div>""" | |
#HTML_WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; | |
# margin-bottom: 2.5rem">{}</div>""" | |
MAX_SIZE = 40000000000 | |
if len(sys.argv) > 1: | |
path_to_datasets = sys.argv[1] | |
else: | |
path_to_datasets = None | |
# Hack to extend the width of the main pane. | |
def _max_width_(): | |
max_width_str = f"max-width: 1000px;" | |
st.markdown( | |
f""" | |
<style> | |
.reportview-container .main .block-container{{ | |
{max_width_str} | |
}} | |
th {{ | |
text-align: left; | |
font-size: 110%; | |
}} | |
tr:hover {{ | |
background-color: #ffff99; | |
}} | |
</style> | |
""", | |
unsafe_allow_html=True, | |
) | |
_max_width_() | |
def render_features(features): | |
if isinstance(features, dict): | |
return {k: render_features(v) for k, v in features.items()} | |
if isinstance(features, nlp.features.ClassLabel): | |
return features.names | |
if isinstance(features, nlp.features.Value): | |
return features.dtype | |
if isinstance(features, nlp.features.Sequence): | |
return {"[]": render_features(features.feature)} | |
return features | |
app_state = st.experimental_get_query_params() | |
start = True | |
loaded = True | |
INITIAL_SELECTION = "" | |
app_state.setdefault("dataset", "glue") | |
if len(app_state.get("dataset", [])) == 1: | |
app_state["dataset"] = app_state["dataset"][0] | |
INITIAL_SELECTION = app_state["dataset"] | |
#print(INITIAL_SELECTION) | |
if start: | |
# Logo and sidebar decoration. | |
st.sidebar.markdown( | |
"""<center> | |
<a href="https://github.com/DFKI-NLP/thermostat"> | |
</a> | |
</center>""", | |
unsafe_allow_html=True, | |
) | |
st.sidebar.image("logo.png", width=300) | |
st.sidebar.markdown( | |
"<center><h2><a href='https://github.com/DFKI-NLP/thermostat'>github/DFKI-NLP/thermostat</h2></a></center>", | |
unsafe_allow_html=True, | |
) | |
st.sidebar.markdown( | |
""" | |
<center> | |
<a target="_blank" href="https://huggingface.co/docs/datasets/">datasets Docs</a> | |
</center>""", | |
unsafe_allow_html=True, | |
) | |
st.sidebar.subheader("") | |
# Interaction with the nlp libary. | |
# @st.cache | |
def get_confs(): | |
""" Get the list of confs for a dataset. """ | |
confs = builder_configs | |
if confs and len(confs) > 1: | |
return confs | |
else: | |
return [] | |
# @st.cache(allow_output_mutation=True) | |
def get(conf): | |
""" Get a dataset from name and conf """ | |
ds = load(conf, cache_dir=path_to_datasets) | |
return ds, False | |
# Dataset select box. | |
datasets = [] | |
selection = None | |
if path_to_datasets is None: | |
list_of_datasets = nlp.list_datasets(with_community_datasets=False) | |
else: | |
list_of_datasets = sorted(glob.glob(path_to_datasets + "*")) | |
for i, dataset in enumerate(list_of_datasets): | |
dataset = dataset.split("/")[-1] | |
if INITIAL_SELECTION and dataset == INITIAL_SELECTION: | |
selection = i | |
datasets.append(dataset) | |
st.experimental_set_query_params(**app_state) | |
# Side bar Configurations. | |
configs = get_confs() | |
conf_avail = len(configs) > 0 | |
conf_option = None | |
if conf_avail: | |
start = 0 | |
for i, conf in enumerate(configs): | |
if conf.name == app_state.get("config", None): | |
start = i | |
conf_option = st.sidebar.selectbox( | |
"Thermostat configuration", configs, index=start, format_func=lambda a: a.name | |
) | |
app_state["config"] = conf_option.name | |
else: | |
if "config" in app_state: | |
del app_state["config"] | |
st.experimental_set_query_params(**app_state) | |
dts, fail = get(str(conf_option.name) if conf_option else None) | |
# Main panel setup. | |
if fail: | |
st.markdown( | |
"Dataset is too large to browse or requires manual download. Check it out in the datasets library! \n\n " | |
"Size: " | |
+ str(dts.info.size_in_bytes) | |
+ "\n\n Instructions: " | |
+ str(dts.manual_download_instructions) | |
) | |
else: | |
d = dts | |
keys = list(d[0].__dict__.keys()) | |
st.header( | |
"Thermostat configuration: " | |
+ (conf_option.name if conf_option else "") | |
) | |
st.markdown( | |
"*Homepage*: " | |
+ d.info.homepage | |
) | |
md = """ | |
%s | |
""" % ( | |
d.info.description.replace("\\", " ") | |
) | |
st.markdown(md) | |
step = 50 | |
offset = st.sidebar.number_input( | |
"Offset (Size: %d)" % len(d), | |
min_value=0, | |
max_value=int(len(d)) - step, | |
value=0, | |
step=step, | |
) | |
citation = None #st.sidebar.checkbox("Show Citations", False) | |
table = not st.sidebar.checkbox("Show List View", False) | |
show_features = st.sidebar.checkbox("Show Features", True) | |
show_atts = st.sidebar.checkbox("Show Attribution Scores", False) | |
md = """ | |
``` | |
%s | |
``` | |
""" % ( | |
d.info.citation.replace("\\", "").replace("}", " }").replace("{", "{ "), | |
) | |
if citation: | |
st.markdown(md) | |
# st.text("Features:") | |
#if show_features: | |
# on_keys = st.multiselect("Features", keys, keys) | |
# #st.write(render_features(d.features)) | |
#else: | |
on_keys = keys | |
# Remove some keys | |
on_keys = [k for k in on_keys if k in ['predictions', 'true_label', 'predicted_label']] | |
if not table: | |
# Full view. | |
for item in range(offset, offset + step): | |
st.text(" ") | |
st.text(" ---- #" + str(item)) | |
st.text(" ") | |
# Use st to write out. | |
for k in on_keys: | |
v = getattr(d[item], k) | |
st.subheader(k) | |
if isinstance(v, str): | |
out = v | |
st.text(textwrap.fill(out, width=120)) | |
elif ( | |
isinstance(v, bool) | |
or isinstance(v, int) | |
or isinstance(v, float) | |
): | |
st.text(v) | |
else: | |
st.write(v) | |
else: | |
# Table view. Use Pandas. | |
df, heatmap_htmls = [], [] | |
for item in range(offset, offset + step): | |
df_item = {} | |
df_item["_number"] = item | |
for k in on_keys: | |
v = getattr(d[item], k) | |
# Remove [PAD] tokens from attributions and input_ids | |
if k in ['attributions', 'input_ids']: | |
v = [vi for vi in v if vi != 0 or vi != 0.0] | |
if isinstance(v, str): | |
out = v | |
df_item[k] = textwrap.fill(out, width=50) | |
elif ( | |
isinstance(v, bool) | |
or isinstance(v, int) | |
or isinstance(v, float) | |
): | |
df_item[k] = v | |
else: | |
out = json.dumps(v, indent=2, sort_keys=True) | |
df_item[k] = out | |
# Add heatmap viz | |
html = getattr(d[item], 'heatmap').render(labels=show_atts) | |
html = html.replace("\n", " ") | |
heatmap_htmls.append(HTML_WRAPPER.format(html)) | |
df.append(df_item) | |
df2 = df | |
df = pd.DataFrame(df).set_index("_number") | |
def hover(hover_color="#ffff99"): | |
return dict( | |
selector="tr:hover", | |
props=[("background-color", "%s" % hover_color)], | |
) | |
styles = [ | |
hover(), | |
dict( | |
selector="th", | |
props=[("font-size", "150%"), ("text-align", "center")], | |
), | |
dict(selector="caption", props=[("caption-side", "bottom")]), | |
] | |
# Table view. Use pands styling. | |
style = df.style.set_properties( | |
**{"text-align": "left", "white-space": "pre"} | |
).set_table_styles([dict(selector="th", props=[("text-align", "left")])]) | |
style = style.set_table_styles(styles) # Setting the style appears to be broken for streamlit+pandas | |
for i, heatmap_html in enumerate(heatmap_htmls): | |
st.write(HTML_WRAPPER.format(heatmap_html), unsafe_allow_html=True) | |
st.table(df.iloc[[i]]) | |
st.markdown(""" --- """) | |
# Additional dataset installation and sidebar properties. | |
md = """ | |
### Code | |
```python | |
!pip install thermostat_datasets | |
from thermostat import load | |
dataset = load( | |
'%s) | |
``` | |
""" % ( | |
(conf_option.name + "'") if conf_option else "", | |
) | |
st.sidebar.markdown(md) | |