|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import os |
|
import sys |
|
import asyncio |
|
import websockets |
|
import argparse |
|
import codecs |
|
import csv |
|
|
|
import srx_segmenter |
|
|
|
|
|
from websocket import create_connection |
|
import socket |
|
import time |
|
import asyncio |
|
import websockets |
|
|
|
import config |
|
|
|
from MTUOC_misc import printLOG |
|
from MTUOC_misc import get_IP_info |
|
|
|
from MTUOC_Marian import connect_to_Marian |
|
from MTUOC_Marian import translate_segment_Marian |
|
|
|
from MTUOC_OpenNMT import connect_to_OpenNMT |
|
from MTUOC_OpenNMT import translate_segment_OpenNMT |
|
|
|
from MTUOC_Moses import connect_to_Moses |
|
from MTUOC_Moses import translate_segment_Moses |
|
|
|
from MTUOC_typeMTUOC import start_MTUOC_server |
|
from MTUOC_typeMoses import start_Moses_server |
|
from MTUOC_typeOpenNMT import start_OpenNMT_server |
|
from MTUOC_typeNMTWizard import start_NMTWizard_server |
|
from MTUOC_typeModernMT import start_ModernMT_server |
|
|
|
|
|
|
|
|
|
from MTUOC_tags import TagRestorer |
|
|
|
|
|
|
|
import yaml |
|
from yaml import load, dump |
|
try: |
|
from yaml import CLoader as Loader, CDumper as Dumper |
|
except ImportError: |
|
from yaml import Loader, Dumper |
|
|
|
def startMTEngine(): |
|
printLOG(3,"START MT ENGINE:", config.startMTEngineCommand) |
|
os.system(config.startMTEngineCommand) |
|
|
|
|
|
|
|
|
|
parser = argparse.ArgumentParser(description='MTUOC-server. With no arguments the config-server.yaml file will be used.') |
|
parser.add_argument('-c','--config', action="store", dest="config", help='The server configuration file to be used.',required=False) |
|
parser.add_argument('-p','--port', action="store", dest="port", type=int, help='The MTUOC server port.',required=False) |
|
parser.add_argument('-t','--type', action="store", dest="type", help='The MTUOC server type.',required=False) |
|
|
|
|
|
args = parser.parse_args() |
|
if args.config: |
|
configfile=args.config |
|
else: |
|
configfile="config-server.yaml" |
|
|
|
stream = open(configfile, 'r',encoding="utf-8") |
|
configYAML=yaml.load(stream, Loader=yaml.FullLoader) |
|
|
|
config.MTUOCServer_MTengine=configYAML["MTEngine"]["MTengine"] |
|
config.MTUOCServer_type=configYAML["MTUOCServer"]["type"] |
|
config.MTUOCServer_port=configYAML["MTUOCServer"]["port"] |
|
config.tag_restoration=configYAML["MTUOCServer"]["restore_tags"] |
|
config.fix_xml=configYAML["MTUOCServer"]["fix_xml"] |
|
|
|
config.MTUOCServer_URLs=configYAML["MTUOCServer"]["URLs"] |
|
config.MTUOCServer_EMAILs=configYAML["MTUOCServer"]["EMAILs"] |
|
|
|
config.code_URLs=configYAML["MTUOCServer"]["code_URLs"] |
|
config.code_EMAILs=configYAML["MTUOCServer"]["code_EMAILs"] |
|
|
|
config.min_chars_segment=configYAML["MTUOCServer"]["min_chars_segment"] |
|
translation_selection_strategy=configYAML["MTUOCServer"]["translation_selection_strategy"] |
|
|
|
config.segment_input=configYAML["Preprocess"]["segment_input"] |
|
config.SRXfile=configYAML["Preprocess"]["SRXfile"] |
|
config.SRXlang=configYAML["Preprocess"]["SRXlang"] |
|
|
|
config.rules = srx_segmenter.parse(config.SRXfile) |
|
|
|
config.sllang=configYAML["Preprocess"]["sl_lang"] |
|
config.tllang=configYAML["Preprocess"]["tl_lang"] |
|
config.tokenize_SL=configYAML["Preprocess"]["tokenize_SL"] |
|
config.tokenizerSL=configYAML["Preprocess"]["sl_tokenizer"] |
|
tokenizerSLfile=config.tokenizerSL |
|
config.tokenizerTL=configYAML["Preprocess"]["tl_tokenizer"] |
|
if config.tokenizerSL=="None": config.tokenizerSL=None |
|
if config.tokenizerTL=="None": config.tokenizerTL=None |
|
|
|
if config.tokenizerSL=="Moses": |
|
config.tokenizerSLType="Moses" |
|
import mosestokenizer |
|
config.tokenizerSL = mosestokenizer.MosesTokenizer(config.sllang) |
|
config.tokenizerSLType="Moses" |
|
elif not config.tokenizerSL==None: |
|
config.tokenizerSLType="MTUOC" |
|
import importlib.util |
|
if not config.tokenizerSL.endswith(".py"): config.tokenizerSL=MTUOCtokenizerSL+".py" |
|
spec = importlib.util.spec_from_file_location('', config.tokenizerSL) |
|
tokenizerSLmod = importlib.util.module_from_spec(spec) |
|
spec.loader.exec_module(tokenizerSLmod) |
|
config.tokenizerSL=tokenizerSLmod.Tokenizer() |
|
|
|
else: |
|
config.tokenizerSL=None |
|
|
|
if config.tokenizerTL=="Moses": |
|
import mosestokenizer |
|
config.tokenizerTL = mosestokenizer.MosesTokenizer(config.tllang) |
|
config.tokenizerTLType="Moses" |
|
elif not config.tokenizerTL==None: |
|
import importlib.util |
|
if not config.tokenizerTL.endswith(".py"): config.tokenizerTL=tokenizerTL+".py" |
|
spec = importlib.util.spec_from_file_location('', config.tokenizerTL) |
|
tokenizerTLmod = importlib.util.module_from_spec(spec) |
|
spec.loader.exec_module(tokenizerTLmod) |
|
config.tokenizerTL=tokenizerTLmod.Tokenizer() |
|
else: |
|
config.tokenizerTL=None |
|
|
|
|
|
|
|
config.tcmodel=configYAML["Preprocess"]["tcmodel"] |
|
config.truecase=configYAML["Preprocess"]["truecase"] |
|
|
|
if config.tcmodel=="None": config.tcmodel=None |
|
if config.tcmodel: |
|
from MTUOC_truecaser import Truecaser |
|
config.truecaser=Truecaser(tokenizer=tokenizerSLfile,tc_model=config.tcmodel) |
|
else: |
|
config.truecaser=None |
|
|
|
config.unescape_html_input=configYAML["unescape_html_input"] |
|
config.escape_html_input=configYAML["escape_html_input"] |
|
|
|
config.checkistranslatable=configYAML["MTUOCServer"]["checkistranslatable"] |
|
|
|
config.change_input_files=configYAML["change_input_files"].split(";") |
|
config.changes_input=[] |
|
if not config.change_input_files[0]=="None": |
|
for ci in config.change_input_files: |
|
with open(ci) as csvfile: |
|
csvreader = csv.reader(csvfile, delimiter=';', quotechar='"') |
|
for row in csvreader: |
|
config.changes_input.append(row) |
|
|
|
config.change_output_files=configYAML["change_output_files"].split(";") |
|
config.changes_output=[] |
|
if not config.change_output_files[0]=="None": |
|
for co in config.change_output_files: |
|
with open(co) as csvfile: |
|
csvreader = csv.reader(csvfile, delimiter=';', quotechar='"') |
|
for row in csvreader: |
|
config.changes_output.append(row) |
|
|
|
config.change_translation_files=configYAML["change_translation_files"].split(";") |
|
config.changes_translation=[] |
|
if not config.change_translation_files[0]=="None": |
|
for change_list in config.change_translation_files: |
|
with open(change_list) as csvfile: |
|
csvreader = csv.reader(csvfile, delimiter=';', quotechar='"') |
|
for row in csvreader: |
|
config.changes_translation.append(row) |
|
|
|
|
|
config.tagrestorer=TagRestorer() |
|
|
|
|
|
|
|
|
|
config.sentencepiece=configYAML["Preprocess"]["sentencepiece"] |
|
config.spmodelSL=configYAML["Preprocess"]["sp_model_SL"] |
|
config.spmodelTL=configYAML["Preprocess"]["sp_model_TL"] |
|
config.sp_splitter=configYAML["Preprocess"]["sp_splitter"] |
|
|
|
|
|
|
|
|
|
|
|
config.BPE=configYAML["Preprocess"]["BPE"] |
|
config.bpecodes=configYAML["Preprocess"]["bpecodes"] |
|
config.bpe_joiner=configYAML["Preprocess"]["bpe_joiner"] |
|
if config.BPE: |
|
from subword_nmt import apply_bpe |
|
config.bpeobject=apply_bpe.BPE(open(config.bpecodes,encoding="utf-8")) |
|
|
|
config.bos_annotate=configYAML["Preprocess"]["bos_annotate"] |
|
config.bos_symbol=configYAML["Preprocess"]["bos_symbol"] |
|
config.eos_annotate=configYAML["Preprocess"]["eos_annotate"] |
|
config.eos_symbol=configYAML["Preprocess"]["eos_symbol"] |
|
|
|
config.pre_replace_NUMs=configYAML["Preprocess"]["replaceNUMs"] |
|
config.code_NUMs=configYAML["Preprocess"]["code_NUMs"] |
|
config.pre_split_NUMs=configYAML["Preprocess"]["splitNUMs"] |
|
|
|
if config.sentencepiece: |
|
import sentencepiece as spm |
|
config.spSL= spm.SentencePieceProcessor(model_file=config.spmodelSL, out_type=str, add_bos=config.bos_annotate, add_eos=config.eos_annotate) |
|
config.spTL= spm.SentencePieceProcessor(model_file=config.spmodelTL, out_type=str) |
|
|
|
|
|
if config.MTUOCServer_MTengine in ["Marian","Moses","OpenNMT"]: |
|
config.startMTEngineV=configYAML["MTEngine"]["startMTEngine"] |
|
config.startMTEngineCommand=configYAML["MTEngine"]["startCommand"] |
|
config.MTEngineIP=configYAML["MTEngine"]["IP"] |
|
config.MTEnginePort=configYAML["MTEngine"]["port"] |
|
config.min_len_factor=configYAML["MTEngine"]["min_len_factor"] |
|
|
|
if config.MTUOCServer_MTengine=="GoogleTranslate": |
|
config.Google_sllang=configYAML["GoogleTranslate"]["sllang"] |
|
config.Google_tllang=configYAML["GoogleTranslate"]["tllang"] |
|
config.Google_glossary=configYAML["GoogleTranslate"]["glossary"] |
|
if config.Google_glossary=="None": config.Google_glossary=None |
|
|
|
config.Google_project_id=configYAML["GoogleTranslate"]["project_id"] |
|
config.Google_location=configYAML["GoogleTranslate"]["location"] |
|
config.Google_jsonfile=configYAML["GoogleTranslate"]["jsonfile"] |
|
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = config.Google_jsonfile |
|
config.client = translateGoogle.TranslationServiceClient() |
|
config.client = translateGoogle.TranslationServiceClient() |
|
config.parent = "projects/"+config.Google_project_id+"/locations/"+config.Google_location |
|
|
|
if config.MTUOCServer_MTengine=="DeepL": |
|
config.DeepL_API_key=configYAML["DeepL"]["API_key"] |
|
config.DeepL_sl_lang=configYAML["DeepL"]["sllang"] |
|
config.DeepL_tl_lang=configYAML["DeepL"]["tllang"] |
|
|
|
|
|
config.DeepL_formality=configYAML["DeepL"]["formality"] |
|
|
|
config.DeepL_split_sentences=configYAML["DeepL"]["split_sentences"] |
|
|
|
config.DeepL_glossary=configYAML["DeepL"]["glossary"] |
|
if config.DeepL_glossary=="None": config.DeepL_glossary=None |
|
config.DeepLtranslator = deepl.Translator(config.DeepL_API_key) |
|
|
|
if config.MTUOCServer_MTengine=="Lucy": |
|
|
|
import ast |
|
import xmltodict |
|
import requests |
|
config.Lucy_url=configYAML["Lucy"]["url"] |
|
config.Lucy_TRANSLATION_DIRECTION=configYAML["Lucy"]["TRANSLATION_DIRECTION"] |
|
config.Lucy_MARK_UNKNOWNS=configYAML["Lucy"]["MARK_UNKNOWNS"] |
|
config.Lucy_MARK_ALTERNATIVES=configYAML["Lucy"]["MARK_ALTERNATIVES"] |
|
config.Lucy_MARK_COMPOUNDS=configYAML["Lucy"]["MARK_COMPOUNDS"] |
|
config.Lucy_CHARSET=configYAML["Lucy"]["CHARSET"] |
|
|
|
config.verbosity_level=int(configYAML["MTUOCServer"]["verbosity_level"]) |
|
config.log_file=configYAML["MTUOCServer"]["log_file"] |
|
|
|
config.MTUOCServer_ONMT_url_root=configYAML["MTUOCServer"]["ONMT_url_root"] |
|
|
|
if config.log_file=="None": |
|
config.log_file=False |
|
else: |
|
config.sortidalog=codecs.open(config.log_file,"a",encoding="utf-8") |
|
config.log_file=True |
|
|
|
if config.startMTEngineV and not config.MTUOCServer_MTengine in ["GoogleTranslate","DeepL","Lucy"]: |
|
startMTEngine() |
|
|
|
if config.MTUOCServer_MTengine=="Marian": |
|
connect_to_Marian() |
|
elif config.MTUOCServer_MTengine=="Moses": |
|
config.proxyMoses=connect_to_Moses() |
|
|
|
if config.MTUOCServer_type=="MTUOC": |
|
start_MTUOC_server() |
|
elif config.MTUOCServer_type=="Moses": |
|
start_Moses_server() |
|
elif config.MTUOCServer_type=="OpenNMT": |
|
start_OpenNMT_server() |
|
elif config.MTUOCServer_type=="NMTWizard": |
|
start_NMTWizard_server() |
|
elif config.MTUOCServer_type=="ModernMT": |
|
start_ModernMT_server() |
|
|