Spaces:
Runtime error
Runtime error
PereLluis13
commited on
Commit
•
4e6bd61
1
Parent(s):
da80bd2
Update app.py
Browse files
app.py
CHANGED
@@ -11,7 +11,7 @@ def load_tok_and_data(lan):
|
|
11 |
tokenizer._src_lang = _Tokens[lan]
|
12 |
tokenizer.cur_lang_code_id = tokenizer.convert_tokens_to_ids(_Tokens[lan])
|
13 |
tokenizer.set_src_lang_special_tokens(_Tokens[lan])
|
14 |
-
dataset = load_dataset('Babelscape/SREDFM', lan, split="
|
15 |
dataset = [example for example in dataset.take(1001)]
|
16 |
return (tokenizer, dataset)
|
17 |
|
@@ -62,7 +62,7 @@ def extract_triplets_typed(text):
|
|
62 |
triplets.append({'head': subject.strip(), 'head_type': subject_type, 'type': relation.strip(),'tail': object_.strip(), 'tail_type': object_type})
|
63 |
return triplets
|
64 |
|
65 |
-
st.markdown("""This is a demo for the ACL 2023 paper [RED
|
66 |
|
67 |
model = load_model()
|
68 |
|
@@ -70,7 +70,7 @@ lan = st.selectbox(
|
|
70 |
'Select a Language',
|
71 |
('ar', 'ca', 'de', 'el', 'en', 'es', 'fr', 'hi', 'it', 'ja', 'ko', 'nl', 'pl', 'pt', 'ru', 'sv', 'vi', 'zh'), index=1)
|
72 |
|
73 |
-
_Tokens = {'en': 'en_XX', 'de': 'de_DE', 'ca': 'ca_XX', 'ar': 'ar_AR', 'el': 'el_EL', 'it': 'it_IT', 'ja': 'ja_XX', 'ko': 'ko_KR', 'hi': 'hi_IN', 'pt': 'pt_XX', 'ru': 'ru_RU', 'pl': 'pl_PL', 'zh': 'zh_CN', 'fr': 'fr_XX', 'vi': 'vi_VN', 'sv':'sv_SE'}
|
74 |
|
75 |
tokenizer, dataset = load_tok_and_data(lan)
|
76 |
|
|
|
11 |
tokenizer._src_lang = _Tokens[lan]
|
12 |
tokenizer.cur_lang_code_id = tokenizer.convert_tokens_to_ids(_Tokens[lan])
|
13 |
tokenizer.set_src_lang_special_tokens(_Tokens[lan])
|
14 |
+
dataset = load_dataset('Babelscape/SREDFM', lan, split="test", streaming=True)
|
15 |
dataset = [example for example in dataset.take(1001)]
|
16 |
return (tokenizer, dataset)
|
17 |
|
|
|
62 |
triplets.append({'head': subject.strip(), 'head_type': subject_type, 'type': relation.strip(),'tail': object_.strip(), 'tail_type': object_type})
|
63 |
return triplets
|
64 |
|
65 |
+
st.markdown("""This is a demo for the ACL 2023 paper [RED$^{\rm FM}$: a Filtered and Multilingual Relation Extraction Dataset](https://arxiv.org/abs/2306.09802). The pre-trained model is able to extract triplets for up to 400 relation types from Wikidata or be used in downstream Relation Extraction task by fine-tuning. Find the model card [here](https://huggingface.co/Babelscape/mrebel-large). Read more about it in the [paper](https://arxiv.org/abs/2306.09802) and in the original [repository](https://github.com/Babelscape/rebel#REDFM).""")
|
66 |
|
67 |
model = load_model()
|
68 |
|
|
|
70 |
'Select a Language',
|
71 |
('ar', 'ca', 'de', 'el', 'en', 'es', 'fr', 'hi', 'it', 'ja', 'ko', 'nl', 'pl', 'pt', 'ru', 'sv', 'vi', 'zh'), index=1)
|
72 |
|
73 |
+
_Tokens = {'en': 'en_XX', 'de': 'de_DE', 'ca': 'ca_XX', 'ar': 'ar_AR', 'el': 'el_EL', 'es': 'es_XX', 'it': 'it_IT', 'ja': 'ja_XX', 'ko': 'ko_KR', 'hi': 'hi_IN', 'pt': 'pt_XX', 'ru': 'ru_RU', 'pl': 'pl_PL', 'zh': 'zh_CN', 'fr': 'fr_XX', 'vi': 'vi_VN', 'sv':'sv_SE'}
|
74 |
|
75 |
tokenizer, dataset = load_tok_and_data(lan)
|
76 |
|