Spaces:
Runtime error
Runtime error
"""Detect language via polyglot and fastlid.""" | |
# pylint: disable= | |
from typing import Any, Callable, List, Optional | |
from polyglot.text import Detector | |
import polyglot.detect.base | |
from polyglot.detect.base import UnknownLanguage | |
from fastlid import fastlid | |
from logzero import logger | |
polyglot.detect.base.logger.setLevel("ERROR") | |
def with_func_attrs(**attrs: Any) -> Callable: | |
"""Define func_attrs.""" | |
def with_attrs(fct: Callable) -> Callable: | |
for key, val in attrs.items(): | |
setattr(fct, key, val) | |
return fct | |
return with_attrs | |
# @with_func_attrs(set_languages=None) | |
# def detect(text: str) -> str: | |
def detect(text: str, set_languages: Optional[List[str]] = None) -> str: | |
"""Detect language via polyglot and fastlid.""" | |
# if not text.strip(): return "en" | |
try: | |
_ = [(elm.code[:2], elm.confidence) for elm in Detector(text).languages] | |
detect.lang_conf = _ | |
lang, conf = _[0] | |
except UnknownLanguage: | |
if set_languages is None: | |
def_lang = "en" | |
else: | |
# def_lang = set_languages[-1] | |
def_lang = set_languages[0] | |
logger.warning(" UnknownLanguage exception: probably snippet too short, setting to %s", def_lang) | |
lang, conf = def_lang, 0 | |
except Exception as exc: | |
logger.error(exc) | |
lang, conf = "en", 0 | |
del conf | |
# if set_languages is None, | |
# trust polyglot.text.Detector | |
if set_languages is None: | |
return lang | |
# set_languages is set | |
if not isinstance(set_languages, (list, tuple)): | |
logger.warning("set_languages (%s) ought to be a list/tuple") | |
if lang in set_languages: | |
return lang | |
# lang not in set_languages, use fastlid | |
fastlid.set_languages = set_languages | |
lang, _ = fastlid(text) | |
return lang | |