eunsour's picture
Update README.md
900b70e
|
raw
history blame
710 Bytes
metadata
license: apache-2.0
language:
  - en
  - ko
tags:
  - transliteration
  - multilingual
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

def transliteration(word: str):
    model_checkpoint = "eunsour/en-ko-transliterator"
    
    model = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint)
    tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, src_lang="en", tgt_lang="ko")
    
    encoded_en = tokenizer(word, truncation=True, max_length=48, return_tensors="pt")
    generated_tokens = model.generate(**encoded_en)
    result = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
    
    return result

transliteration("transformer")
# ['트랜스포머']