Update README.md
Browse files
README.md
CHANGED
@@ -22,24 +22,16 @@ from transformers import RobertaTokenizerFast, RobertaForMaskedLM
|
|
22 |
import torch
|
23 |
# Load the tokenizer
|
24 |
tokenizer = RobertaTokenizerFast.from_pretrained('minhtoan/roberta-masked-lm-vietnamese-nom')
|
25 |
-
|
26 |
# Load the model
|
27 |
model = RobertaForMaskedLM.from_pretrained('minhtoan/roberta-masked-lm-vietnamese-nom')
|
28 |
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
# Tokenize the input sentence
|
33 |
-
input_tokens = tokenizer.encode(input_sentence, return_tensors='pt')
|
34 |
-
|
35 |
-
# Generate predictions
|
36 |
-
with torch.no_grad():
|
37 |
-
outputs = model(input_tokens)
|
38 |
-
predictions = outputs.logits.argmax(dim=-1)
|
39 |
|
40 |
-
|
41 |
-
|
42 |
-
print("Predicted word:",
|
43 |
~~~~
|
44 |
|
45 |
|
|
|
22 |
import torch
|
23 |
# Load the tokenizer
|
24 |
tokenizer = RobertaTokenizerFast.from_pretrained('minhtoan/roberta-masked-lm-vietnamese-nom')
|
|
|
25 |
# Load the model
|
26 |
model = RobertaForMaskedLM.from_pretrained('minhtoan/roberta-masked-lm-vietnamese-nom')
|
27 |
|
28 |
+
text = '<mask>如㗂䳽𠖤戈'
|
29 |
+
inputs = tokenizer(text, return_tensors="pt")
|
30 |
+
mask_token_index = torch.where(inputs["input_ids"] == tokenizer.mask_token_id)[1]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
|
32 |
+
logits = model(**inputs).logits
|
33 |
+
mask_token_logits = logits[0, mask_token_index, :]
|
34 |
+
print("Predicted word:", tokenizer.decode(mask_token_logits[0].argmax()))
|
35 |
~~~~
|
36 |
|
37 |
|