Spaces:
Runtime error
Runtime error
PereLluis13
commited on
Commit
•
da80bd2
1
Parent(s):
e02be2a
Update app.py
Browse files
app.py
CHANGED
@@ -7,7 +7,10 @@ import torch
|
|
7 |
|
8 |
def load_tok_and_data(lan):
|
9 |
st_time = time()
|
10 |
-
tokenizer = AutoTokenizer.from_pretrained("Babelscape/mrebel-large",
|
|
|
|
|
|
|
11 |
dataset = load_dataset('Babelscape/SREDFM', lan, split="validation", streaming=True)
|
12 |
dataset = [example for example in dataset.take(1001)]
|
13 |
return (tokenizer, dataset)
|
@@ -59,7 +62,7 @@ def extract_triplets_typed(text):
|
|
59 |
triplets.append({'head': subject.strip(), 'head_type': subject_type, 'type': relation.strip(),'tail': object_.strip(), 'tail_type': object_type})
|
60 |
return triplets
|
61 |
|
62 |
-
st.markdown("""This is a demo for the
|
63 |
|
64 |
model = load_model()
|
65 |
|
@@ -105,7 +108,11 @@ st.write(text)
|
|
105 |
|
106 |
if not agree:
|
107 |
st.title('Silver output')
|
108 |
-
|
|
|
|
|
|
|
|
|
109 |
|
110 |
st.title('Prediction text')
|
111 |
decoded_preds = [text.replace('<s>', '').replace('</s>', '').replace('<pad>', '') for text in decoded_preds]
|
|
|
7 |
|
8 |
def load_tok_and_data(lan):
|
9 |
st_time = time()
|
10 |
+
tokenizer = AutoTokenizer.from_pretrained("Babelscape/mrebel-large", tgt_lang="tp_XX")
|
11 |
+
tokenizer._src_lang = _Tokens[lan]
|
12 |
+
tokenizer.cur_lang_code_id = tokenizer.convert_tokens_to_ids(_Tokens[lan])
|
13 |
+
tokenizer.set_src_lang_special_tokens(_Tokens[lan])
|
14 |
dataset = load_dataset('Babelscape/SREDFM', lan, split="validation", streaming=True)
|
15 |
dataset = [example for example in dataset.take(1001)]
|
16 |
return (tokenizer, dataset)
|
|
|
62 |
triplets.append({'head': subject.strip(), 'head_type': subject_type, 'type': relation.strip(),'tail': object_.strip(), 'tail_type': object_type})
|
63 |
return triplets
|
64 |
|
65 |
+
st.markdown("""This is a demo for the ACL 2023 paper [RED<sup>FM</sup>: a Filtered and Multilingual Relation Extraction Dataset](https://arxiv.org/abs/2306.09802). The pre-trained model is able to extract triplets for up to 400 relation types from Wikidata or be used in downstream Relation Extraction task by fine-tuning. Find the model card [here](https://huggingface.co/Babelscape/mrebel-large). Read more about it in the [paper](https://arxiv.org/abs/2306.09802) and in the original [repository](https://github.com/Babelscape/rebel#REDFM).""")
|
66 |
|
67 |
model = load_model()
|
68 |
|
|
|
108 |
|
109 |
if not agree:
|
110 |
st.title('Silver output')
|
111 |
+
entities = dataset[dataset_example]['entities']
|
112 |
+
relations =[]
|
113 |
+
for trip in dataset[dataset_example]['relations']:
|
114 |
+
relations.append({'subject': entities[trip['subject']], 'predicate': trip['predicate'], 'object': entities[trip['object']]})
|
115 |
+
st.write(relations)
|
116 |
|
117 |
st.title('Prediction text')
|
118 |
decoded_preds = [text.replace('<s>', '').replace('</s>', '').replace('<pad>', '') for text in decoded_preds]
|