import os import platform from ctypes import CDLL, POINTER, c_bool, c_char_p, c_float, c_int, c_long from ctypes.util import find_library from dataclasses import dataclass from enum import Enum, auto from pathlib import Path from typing import List, Optional import numpy as np class OldCoreError(Exception): """古いコアが使用されている場合に発生するエラー""" class CoreError(Exception): """コア呼び出しで発生したエラー""" def load_runtime_lib(runtime_dirs: List[Path]): if platform.system() == "Windows": # DirectML.dllはonnxruntimeと互換性のないWindows標準搭載のものを優先して読み込むことがあるため、明示的に読み込む # 参考 1. https://github.com/microsoft/onnxruntime/issues/3360 # 参考 2. https://tadaoyamaoka.hatenablog.com/entry/2020/06/07/113616 lib_file_names = [ "torch_cpu.dll", "torch_cuda.dll", "DirectML.dll", "onnxruntime.dll", ] lib_names = ["torch_cpu", "torch_cuda", "onnxruntime"] elif platform.system() == "Linux": lib_file_names = ["libtorch.so", "libonnxruntime.so"] lib_names = ["torch", "onnxruntime"] elif platform.system() == "Darwin": lib_file_names = ["libonnxruntime.dylib"] lib_names = ["onnxruntime"] else: raise RuntimeError("不明なOSです") for lib_path in runtime_dirs: for file_name in lib_file_names: try: CDLL(str((lib_path / file_name).resolve(strict=True))) except OSError: pass for lib_name in lib_names: try: CDLL(find_library(lib_name)) except (OSError, TypeError): pass class GPUType(Enum): # NONEはCPUしか対応していないことを示す NONE = auto() CUDA = auto() DIRECT_ML = auto() @dataclass(frozen=True) class CoreInfo: name: str platform: str arch: str core_type: str gpu_type: GPUType # version 0.12 より前のコアの情報 CORE_INFOS = [ # Windows CoreInfo( name="core.dll", platform="Windows", arch="x64", core_type="libtorch", gpu_type=GPUType.CUDA, ), CoreInfo( name="core_cpu.dll", platform="Windows", arch="x64", core_type="libtorch", gpu_type=GPUType.NONE, ), CoreInfo( name="core_gpu_x64_nvidia.dll", platform="Windows", arch="x64", core_type="onnxruntime", gpu_type=GPUType.CUDA, ), CoreInfo( name="core_gpu_x64_directml.dll", platform="Windows", arch="x64", core_type="onnxruntime", gpu_type=GPUType.DIRECT_ML, ), CoreInfo( name="core_cpu_x64.dll", platform="Windows", arch="x64", core_type="onnxruntime", gpu_type=GPUType.NONE, ), CoreInfo( name="core_cpu_x86.dll", platform="Windows", arch="x86", core_type="onnxruntime", gpu_type=GPUType.NONE, ), CoreInfo( name="core_gpu_x86_directml.dll", platform="Windows", arch="x86", core_type="onnxruntime", gpu_type=GPUType.DIRECT_ML, ), CoreInfo( name="core_cpu_arm.dll", platform="Windows", arch="armv7l", core_type="onnxruntime", gpu_type=GPUType.NONE, ), CoreInfo( name="core_gpu_arm_directml.dll", platform="Windows", arch="armv7l", core_type="onnxruntime", gpu_type=GPUType.DIRECT_ML, ), CoreInfo( name="core_cpu_arm64.dll", platform="Windows", arch="aarch64", core_type="onnxruntime", gpu_type=GPUType.NONE, ), CoreInfo( name="core_gpu_arm64_directml.dll", platform="Windows", arch="aarch64", core_type="onnxruntime", gpu_type=GPUType.DIRECT_ML, ), # Linux CoreInfo( name="libcore.so", platform="Linux", arch="x64", core_type="libtorch", gpu_type=GPUType.CUDA, ), CoreInfo( name="libcore_cpu.so", platform="Linux", arch="x64", core_type="libtorch", gpu_type=GPUType.NONE, ), CoreInfo( name="libcore_gpu_x64_nvidia.so", platform="Linux", arch="x64", core_type="onnxruntime", gpu_type=GPUType.CUDA, ), CoreInfo( name="libcore_cpu_x64.so", platform="Linux", arch="x64", core_type="onnxruntime", gpu_type=GPUType.NONE, ), CoreInfo( name="libcore_cpu_armhf.so", platform="Linux", arch="armv7l", core_type="onnxruntime", gpu_type=GPUType.NONE, ), CoreInfo( name="libcore_cpu_arm64.so", platform="Linux", arch="aarch64", core_type="onnxruntime", gpu_type=GPUType.NONE, ), # macOS CoreInfo( name="libcore_cpu_universal2.dylib", platform="Darwin", arch="universal", core_type="onnxruntime", gpu_type=GPUType.NONE, ), ] # version 0.12 以降のコアの名前の辞書 # - version 0.12, 0.13 のコアの名前: core # - version 0.14 からのコアの名前: voicevox_core CORENAME_DICT = { "Windows": ("voicevox_core.dll", "core.dll"), "Linux": ("libvoicevox_core.so", "libcore.so"), "Darwin": ("libvoicevox_core.dylib", "libcore.dylib"), } def find_version_0_12_core_or_later(core_dir: Path) -> Optional[str]: """ core_dir で指定したディレクトリにあるコアライブラリが Version 0.12 以降である場合、 見つかった共有ライブラリの名前を返す。 Version 0.12 以降と判定する条件は、 - core_dir に metas.json が存在しない - コアライブラリの名前が CORENAME_DICT の定義に従っている の両方が真のときである。 cf. https://github.com/VOICEVOX/voicevox_engine/issues/385 """ if (core_dir / "metas.json").exists(): return None for core_name in CORENAME_DICT[platform.system()]: if (core_dir / core_name).is_file(): return core_name return None def get_arch_name() -> Optional[str]: """ platform.machine() が特定のアーキテクチャ上で複数パターンの文字列を返し得るので、 一意な文字列に変換する サポート外のアーキテクチャである場合、None を返す """ machine = platform.machine() if machine == "x86_64" or machine == "x64" or machine == "AMD64": return "x64" elif machine == "i386" or machine == "x86": return "x86" elif machine == "arm64": return "aarch64" elif machine in ["armv7l", "aarch64"]: return machine else: return None def get_core_name( arch_name: str, platform_name: str, model_type: str, gpu_type: GPUType, ) -> Optional[str]: if platform_name == "Darwin": if gpu_type == GPUType.NONE and (arch_name == "x64" or arch_name == "aarch64"): arch_name = "universal" else: return None for core_info in CORE_INFOS: if ( core_info.platform == platform_name and core_info.arch == arch_name and core_info.core_type == model_type and core_info.gpu_type == gpu_type ): return core_info.name return None def get_suitable_core_name( model_type: str, gpu_type: GPUType, ) -> Optional[str]: arch_name = get_arch_name() if arch_name is None: return None platform_name = platform.system() return get_core_name(arch_name, platform_name, model_type, gpu_type) def check_core_type(core_dir: Path) -> Optional[str]: # libtorch版はDirectML未対応なので、ここでは`gpu_type=GPUType.DIRECT_ML`は入れない libtorch_core_names = [ get_suitable_core_name("libtorch", gpu_type=GPUType.CUDA), get_suitable_core_name("libtorch", gpu_type=GPUType.NONE), ] onnxruntime_core_names = [ get_suitable_core_name("onnxruntime", gpu_type=GPUType.CUDA), get_suitable_core_name("onnxruntime", gpu_type=GPUType.DIRECT_ML), get_suitable_core_name("onnxruntime", gpu_type=GPUType.NONE), ] if any([(core_dir / name).is_file() for name in libtorch_core_names if name]): return "libtorch" elif any([(core_dir / name).is_file() for name in onnxruntime_core_names if name]): return "onnxruntime" else: return None def load_core(core_dir: Path, use_gpu: bool) -> CDLL: core_name = find_version_0_12_core_or_later(core_dir) if core_name: try: # NOTE: CDLL クラスのコンストラクタの引数 name には文字列を渡す必要がある。 # Windows 環境では PathLike オブジェクトを引数として渡すと初期化に失敗する。 return CDLL(str((core_dir / core_name).resolve(strict=True))) except OSError as err: raise RuntimeError(f"コアの読み込みに失敗しました:{err}") model_type = check_core_type(core_dir) if model_type is None: raise RuntimeError("コアが見つかりません") if use_gpu or model_type == "onnxruntime": core_name = get_suitable_core_name(model_type, gpu_type=GPUType.CUDA) if core_name: try: return CDLL(str((core_dir / core_name).resolve(strict=True))) except OSError: pass core_name = get_suitable_core_name(model_type, gpu_type=GPUType.DIRECT_ML) if core_name: try: return CDLL(str((core_dir / core_name).resolve(strict=True))) except OSError: pass core_name = get_suitable_core_name(model_type, gpu_type=GPUType.NONE) if core_name: try: return CDLL(str((core_dir / core_name).resolve(strict=True))) except OSError as err: if model_type == "libtorch": core_name = get_suitable_core_name(model_type, gpu_type=GPUType.CUDA) if core_name: try: return CDLL(str((core_dir / core_name).resolve(strict=True))) except OSError as err_: err = err_ raise RuntimeError(f"コアの読み込みに失敗しました:{err}") else: raise RuntimeError(f"このコンピュータのアーキテクチャ {platform.machine()} で利用可能なコアがありません") class CoreWrapper: def __init__( self, use_gpu: bool, core_dir: Path, cpu_num_threads: int = 0, load_all_models: bool = False, ) -> None: self.core = load_core(core_dir, use_gpu) self.core.initialize.restype = c_bool self.core.metas.restype = c_char_p self.core.yukarin_s_forward.restype = c_bool self.core.yukarin_sa_forward.restype = c_bool self.core.decode_forward.restype = c_bool self.core.last_error_message.restype = c_char_p self.exist_supported_devices = False self.exist_finalize = False exist_cpu_num_threads = False self.exist_load_model = False self.exist_is_model_loaded = False is_version_0_12_core_or_later = ( find_version_0_12_core_or_later(core_dir) is not None ) if is_version_0_12_core_or_later: model_type = "onnxruntime" self.exist_load_model = True self.exist_is_model_loaded = True self.core.load_model.argtypes = (c_long,) self.core.load_model.restype = c_bool self.core.is_model_loaded.argtypes = (c_long,) self.core.is_model_loaded.restype = c_bool else: model_type = check_core_type(core_dir) assert model_type is not None if model_type == "onnxruntime": self.core.supported_devices.restype = c_char_p self.core.finalize.restype = None self.exist_supported_devices = True self.exist_finalize = True exist_cpu_num_threads = True self.core.yukarin_s_forward.argtypes = ( c_int, POINTER(c_long), POINTER(c_long), POINTER(c_float), ) self.core.yukarin_sa_forward.argtypes = ( c_int, POINTER(c_long), POINTER(c_long), POINTER(c_long), POINTER(c_long), POINTER(c_long), POINTER(c_long), POINTER(c_long), POINTER(c_float), ) self.core.decode_forward.argtypes = ( c_int, c_int, POINTER(c_float), POINTER(c_float), POINTER(c_long), POINTER(c_float), ) cwd = os.getcwd() os.chdir(core_dir) try: if is_version_0_12_core_or_later: self.assert_core_success( self.core.initialize(use_gpu, cpu_num_threads, load_all_models) ) elif exist_cpu_num_threads: self.assert_core_success( self.core.initialize(".", use_gpu, cpu_num_threads) ) else: self.assert_core_success(self.core.initialize(".", use_gpu)) finally: os.chdir(cwd) def metas(self) -> str: return self.core.metas().decode("utf-8") def yukarin_s_forward( self, length: int, phoneme_list: np.ndarray, speaker_id: np.ndarray, ) -> np.ndarray: output = np.zeros((length,), dtype=np.float32) self.assert_core_success( self.core.yukarin_s_forward( c_int(length), phoneme_list.ctypes.data_as(POINTER(c_long)), speaker_id.ctypes.data_as(POINTER(c_long)), output.ctypes.data_as(POINTER(c_float)), ) ) return output def yukarin_sa_forward( self, length: int, vowel_phoneme_list: np.ndarray, consonant_phoneme_list: np.ndarray, start_accent_list: np.ndarray, end_accent_list: np.ndarray, start_accent_phrase_list: np.ndarray, end_accent_phrase_list: np.ndarray, speaker_id: np.ndarray, ) -> np.ndarray: output = np.empty( ( len(speaker_id), length, ), dtype=np.float32, ) self.assert_core_success( self.core.yukarin_sa_forward( c_int(length), vowel_phoneme_list.ctypes.data_as(POINTER(c_long)), consonant_phoneme_list.ctypes.data_as(POINTER(c_long)), start_accent_list.ctypes.data_as(POINTER(c_long)), end_accent_list.ctypes.data_as(POINTER(c_long)), start_accent_phrase_list.ctypes.data_as(POINTER(c_long)), end_accent_phrase_list.ctypes.data_as(POINTER(c_long)), speaker_id.ctypes.data_as(POINTER(c_long)), output.ctypes.data_as(POINTER(c_float)), ) ) return output def decode_forward( self, length: int, phoneme_size: int, f0: np.ndarray, phoneme: np.ndarray, speaker_id: np.ndarray, ) -> np.ndarray: output = np.empty((length * 256,), dtype=np.float32) self.assert_core_success( self.core.decode_forward( c_int(length), c_int(phoneme_size), f0.ctypes.data_as(POINTER(c_float)), phoneme.ctypes.data_as(POINTER(c_float)), speaker_id.ctypes.data_as(POINTER(c_long)), output.ctypes.data_as(POINTER(c_float)), ) ) return output def supported_devices(self) -> str: if self.exist_supported_devices: return self.core.supported_devices().decode("utf-8") raise OldCoreError def finalize(self) -> None: if self.exist_finalize: self.core.finalize() return raise OldCoreError def load_model(self, speaker_id: int) -> None: if self.exist_load_model: self.assert_core_success(self.core.load_model(c_long(speaker_id))) raise OldCoreError def is_model_loaded(self, speaker_id: int) -> bool: if self.exist_is_model_loaded: return self.core.is_model_loaded(c_long(speaker_id)) raise OldCoreError def assert_core_success(self, result: bool) -> None: if not result: raise CoreError( self.core.last_error_message().decode("utf-8", "backslashreplace") )