gsarti commited on
Commit
74105b6
β€’
1 Parent(s): c08f926

Added attribution viz for ITA

Browse files
Files changed (1) hide show
  1. app.py +16 -2
app.py CHANGED
@@ -1,12 +1,15 @@
1
- from datasets import load_dataset
2
  import streamlit as st
 
3
 
4
  st.set_page_config(layout="wide")
5
 
6
  dataset = load_dataset("GroNLP/divemt")
 
7
  df = dataset["train"].to_pandas()
8
  unique_src = df[["item_id", "src_text"]].drop_duplicates(subset="item_id")
9
  langs = list(df["lang_id"].unique())
 
10
 
11
  st.title("DivEMT Explorer πŸ” 🌍")
12
  st.markdown("""
@@ -81,4 +84,15 @@ for lang in langs:
81
  else:
82
  st.text("MT : N/A\nPE : N/A\nEVAL: N/A\n")
83
  st.markdown(f"<b>Metadata</b>:", unsafe_allow_html=True)
84
- st.json({k:v for k,v in dic.items() if k not in ["src_text", "mt_text", "tgt_text", "aligned_edit"]}, expanded=False)
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datasets import load_dataset, DownloadManager
2
  import streamlit as st
3
+ from inseq import FeatureAttributionOutput
4
 
5
  st.set_page_config(layout="wide")
6
 
7
  dataset = load_dataset("GroNLP/divemt")
8
+ attribution_path = "https://huggingface.co/datasets/inseq/divemt_attributions/resolve/main/divemt-attributions/it/{idx}_it_gradl2_{setting}_{sentence_type}.json.gz"
9
  df = dataset["train"].to_pandas()
10
  unique_src = df[["item_id", "src_text"]].drop_duplicates(subset="item_id")
11
  langs = list(df["lang_id"].unique())
12
+ dl = DownloadManager()
13
 
14
  st.title("DivEMT Explorer πŸ” 🌍")
15
  st.markdown("""
 
84
  else:
85
  st.text("MT : N/A\nPE : N/A\nEVAL: N/A\n")
86
  st.markdown(f"<b>Metadata</b>:", unsafe_allow_html=True)
87
+ st.json({k:v for k,v in dic.items() if k not in ["src_text", "mt_text", "tgt_text", "aligned_edit"]}, expanded=False)
88
+ if lang == "ita" and task_name != "From Scratch (HT)":
89
+ setting = "pe1" if task_name == "Google PE (PE1)" else "pe2"
90
+ st.markdown(f"<b>Attributions</b>:", unsafe_allow_html=True)
91
+ st.text("Click on checkboxes to show/hide the respective attributions computed with mBART 1-to-50.")
92
+ for sentence_type in ["mt", "pe", "diff"]:
93
+ url = attribution_path.format(idx=item_id, setting=setting, sentence_type=sentence_type)
94
+ file_path = dl.download(url)
95
+ attr = FeatureAttributionOutput.load(file_path, decompress=True)
96
+ if st.checkbox(sentence_type.upper(), key=f"{lang}_{task_name}_{sentence_type}"):
97
+ st.markdown(f"{attr.show(return_html=True, display=False, do_aggregation=False)}", unsafe_allow_html=True)
98
+