metadata
license: apache-2.0
language:
- en
- ko
tags:
- transliteration
- multilingual
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
def transliteration(word: str):
model_checkpoint = "eunsour/en-ko-transliterator"
model = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint)
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, src_lang="en", tgt_lang="ko")
encoded_en = tokenizer(word, truncation=True, max_length=48, return_tensors="pt")
generated_tokens = model.generate(**encoded_en)
result = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
return result
transliteration("transformer")
# ['트랜스포머']