Update README.md
Browse files
README.md
CHANGED
@@ -20,3 +20,20 @@ This model was trained to translate multiple sentences at once, compared to one
|
|
20 |
It will occasionally combine sentences or add an extra sentence.
|
21 |
|
22 |
This is the same model as are provided on CLARIN: https://repository.clarin.is/repository/xmlui/handle/20.500.12537/278
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
It will occasionally combine sentences or add an extra sentence.
|
21 |
|
22 |
This is the same model as are provided on CLARIN: https://repository.clarin.is/repository/xmlui/handle/20.500.12537/278
|
23 |
+
|
24 |
+
You can use the following example to get started (note that it is necessary to alter the `decoder_start_token_id` of the model):
|
25 |
+
|
26 |
+
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
|
27 |
+
import torch
|
28 |
+
|
29 |
+
device = torch.cuda.current_device() if torch.cuda.is_available() else -1
|
30 |
+
|
31 |
+
tokenizer = AutoTokenizer.from_pretrained("mideind/nmt-doc-en-is-2022-10",src_lang="en_XX",tgt_lang="is_IS")
|
32 |
+
|
33 |
+
model = AutoModelForSeq2SeqLM.from_pretrained("mideind/nmt-doc-en-is-2022-10")
|
34 |
+
model.config.decoder_start_token_id = 2
|
35 |
+
|
36 |
+
translate = pipeline("translation_XX_to_YY",model=model,tokenizer=tokenizer,device=device,src_lang="en_XX",tgt_lang="is_IS")
|
37 |
+
|
38 |
+
target_seq = translate("I am using a translation model to translate text from English to Icelandic.",src_lang="en_XX",tgt_lang="is_IS",max_length=128)
|
39 |
+
print(target_seq[0]['translation_text'].strip('YY '))
|