from nltk.translate.bleu_score import sentence_bleu,SmoothingFunction | |
from nltk.tokenize import word_tokenize | |
from utils.tokenizer import tokenize | |
import re | |
def is_korean(text): | |
for char in text: | |
if '가' <= char <= '힣': | |
return True | |
return False | |
def simple_score(text1, text2): | |
text1 = re.sub("\n", " ", text1) | |
text2 = re.sub("\n", " ", text2) | |
if is_korean(text1): | |
reference = tokenize(text1) | |
candidate = tokenize(text2) | |
else: | |
reference = word_tokenize(text1.lower()) | |
candidate = word_tokenize(text2.lower()) | |
# base = sentence_bleu([reference], reference) | |
score = sentence_bleu([reference], candidate, smoothing_function=SmoothingFunction().method2) | |
return score | |
if __name__ == "__main__": | |
lang = input('lang(en,ko)>') | |
while True: | |
ref = input("ref: ") | |
cand = input("cand: ") | |
print('score',simple_score(ref, cand, lang)) | |