Spaces:
Sleeping
Sleeping
Added attribution viz for ITA
Browse files
app.py
CHANGED
@@ -1,12 +1,15 @@
|
|
1 |
-
from datasets import load_dataset
|
2 |
import streamlit as st
|
|
|
3 |
|
4 |
st.set_page_config(layout="wide")
|
5 |
|
6 |
dataset = load_dataset("GroNLP/divemt")
|
|
|
7 |
df = dataset["train"].to_pandas()
|
8 |
unique_src = df[["item_id", "src_text"]].drop_duplicates(subset="item_id")
|
9 |
langs = list(df["lang_id"].unique())
|
|
|
10 |
|
11 |
st.title("DivEMT Explorer π π")
|
12 |
st.markdown("""
|
@@ -81,4 +84,15 @@ for lang in langs:
|
|
81 |
else:
|
82 |
st.text("MT : N/A\nPE : N/A\nEVAL: N/A\n")
|
83 |
st.markdown(f"<b>Metadata</b>:", unsafe_allow_html=True)
|
84 |
-
st.json({k:v for k,v in dic.items() if k not in ["src_text", "mt_text", "tgt_text", "aligned_edit"]}, expanded=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from datasets import load_dataset, DownloadManager
|
2 |
import streamlit as st
|
3 |
+
from inseq import FeatureAttributionOutput
|
4 |
|
5 |
st.set_page_config(layout="wide")
|
6 |
|
7 |
dataset = load_dataset("GroNLP/divemt")
|
8 |
+
attribution_path = "https://huggingface.co/datasets/inseq/divemt_attributions/resolve/main/divemt-attributions/it/{idx}_it_gradl2_{setting}_{sentence_type}.json.gz"
|
9 |
df = dataset["train"].to_pandas()
|
10 |
unique_src = df[["item_id", "src_text"]].drop_duplicates(subset="item_id")
|
11 |
langs = list(df["lang_id"].unique())
|
12 |
+
dl = DownloadManager()
|
13 |
|
14 |
st.title("DivEMT Explorer π π")
|
15 |
st.markdown("""
|
|
|
84 |
else:
|
85 |
st.text("MT : N/A\nPE : N/A\nEVAL: N/A\n")
|
86 |
st.markdown(f"<b>Metadata</b>:", unsafe_allow_html=True)
|
87 |
+
st.json({k:v for k,v in dic.items() if k not in ["src_text", "mt_text", "tgt_text", "aligned_edit"]}, expanded=False)
|
88 |
+
if lang == "ita" and task_name != "From Scratch (HT)":
|
89 |
+
setting = "pe1" if task_name == "Google PE (PE1)" else "pe2"
|
90 |
+
st.markdown(f"<b>Attributions</b>:", unsafe_allow_html=True)
|
91 |
+
st.text("Click on checkboxes to show/hide the respective attributions computed with mBART 1-to-50.")
|
92 |
+
for sentence_type in ["mt", "pe", "diff"]:
|
93 |
+
url = attribution_path.format(idx=item_id, setting=setting, sentence_type=sentence_type)
|
94 |
+
file_path = dl.download(url)
|
95 |
+
attr = FeatureAttributionOutput.load(file_path, decompress=True)
|
96 |
+
if st.checkbox(sentence_type.upper(), key=f"{lang}_{task_name}_{sentence_type}"):
|
97 |
+
st.markdown(f"{attr.show(return_html=True, display=False, do_aggregation=False)}", unsafe_allow_html=True)
|
98 |
+
|