minhtoan commited on
Commit
61c5360
1 Parent(s): 2b86a88

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +6 -14
README.md CHANGED
@@ -22,24 +22,16 @@ from transformers import RobertaTokenizerFast, RobertaForMaskedLM
22
  import torch
23
  # Load the tokenizer
24
  tokenizer = RobertaTokenizerFast.from_pretrained('minhtoan/roberta-masked-lm-vietnamese-nom')
25
-
26
  # Load the model
27
  model = RobertaForMaskedLM.from_pretrained('minhtoan/roberta-masked-lm-vietnamese-nom')
28
 
29
- # Example input sentence with a masked token
30
- input_sentence = '想払𨀐' + '<mask>'
31
-
32
- # Tokenize the input sentence
33
- input_tokens = tokenizer.encode(input_sentence, return_tensors='pt')
34
-
35
- # Generate predictions
36
- with torch.no_grad():
37
- outputs = model(input_tokens)
38
- predictions = outputs.logits.argmax(dim=-1)
39
 
40
- # Decode and print the predicted word
41
- predicted_word = tokenizer.decode(predictions[0, -1].item())
42
- print("Predicted word:", predicted_word)
43
  ~~~~
44
 
45
 
 
22
  import torch
23
  # Load the tokenizer
24
  tokenizer = RobertaTokenizerFast.from_pretrained('minhtoan/roberta-masked-lm-vietnamese-nom')
 
25
  # Load the model
26
  model = RobertaForMaskedLM.from_pretrained('minhtoan/roberta-masked-lm-vietnamese-nom')
27
 
28
+ text = '<mask>如㗂䳽𠖤戈'
29
+ inputs = tokenizer(text, return_tensors="pt")
30
+ mask_token_index = torch.where(inputs["input_ids"] == tokenizer.mask_token_id)[1]
 
 
 
 
 
 
 
31
 
32
+ logits = model(**inputs).logits
33
+ mask_token_logits = logits[0, mask_token_index, :]
34
+ print("Predicted word:", tokenizer.decode(mask_token_logits[0].argmax()))
35
  ~~~~
36
 
37