Spaces:

skytnt
/

moe-tts

Running on CPU Upgrade

App Files Files Community

skytnt commited on Aug 25, 2022

Commit

57a4371

•

1 Parent(s): efe10fa

use numba jit instead of cython

Browse files

Files changed (6) hide show

MoeGoe.py +0 -119
app.py +0 -4
monotonic_align/__init__.py +17 -15
monotonic_align/core.py +36 -0
monotonic_align/core.pyx +0 -42
monotonic_align/setup.py +0 -9

MoeGoe.py DELETED Viewed

@@ -1,119 +0,0 @@
-import sys
-from torch import no_grad, LongTensor
-import logging
-logging.getLogger('numba').setLevel(logging.WARNING)
-import commons
-import utils
-from models import SynthesizerTrn
-from text import text_to_sequence
-from mel_processing import spectrogram_torch
-from scipy.io.wavfile import write
-def get_text(text, hps):
-    text_norm = text_to_sequence(text, hps_ms.symbols, hps.data.text_cleaners)
-    if hps.data.add_blank:
-        text_norm = commons.intersperse(text_norm, 0)
-    text_norm = LongTensor(text_norm)
-    return text_norm
-def ask_if_continue():
-    while True:
-        answer = input('Continue? (y/n): ')
-        if answer == 'y':
-            break
-        elif answer == 'n':
-            sys.exit(0)
-def print_speakers(speakers):
-    print('ID\tSpeaker')
-    for id, name in enumerate(speakers):
-        print(str(id) + '\t' + name)
-def get_speaker_id(message):
-    speaker_id = input(message)
-    try:
-        speaker_id = int(speaker_id)
-    except:
-        print(str(speaker_id) + ' is not a valid ID!')
-        sys.exit(1)
-    return speaker_id
-if __name__ == '__main__':
-    model = input('Path of a VITS model: ')
-    config = input('Path of a config file: ')
-    try:
-        hps_ms = utils.get_hparams_from_file(config)
-        net_g_ms = SynthesizerTrn(
-            len(hps_ms.symbols),
-            hps_ms.data.filter_length // 2 + 1,
-            hps_ms.train.segment_size // hps_ms.data.hop_length,
-            n_speakers=hps_ms.data.n_speakers,
-            **hps_ms.model)
-        _ = net_g_ms.eval()
-        _ = utils.load_checkpoint(model, net_g_ms, None)
-    except:
-        print('Failed to load!')
-        sys.exit(1)
-    while True:
-        choice = input('TTS or VC? (t/v):')
-        if choice == 't':
-            text = input('Text to read: ')
-            try:
-                stn_tst = get_text(text, hps_ms)
-            except:
-                print('Invalid text!')
-                sys.exit(1)
-            print_speakers(hps_ms.speakers)
-            speaker_id = get_speaker_id('Speaker ID: ')
-            out_path = input('Path to save: ')
-            try:
-                with no_grad():
-                    x_tst = stn_tst.unsqueeze(0)
-                    x_tst_lengths = LongTensor([stn_tst.size(0)])
-                    sid = LongTensor([speaker_id])
-                    audio = net_g_ms.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=.667, noise_scale_w=0.8, length_scale=1)[0][0,0].data.cpu().float().numpy()
-                write(out_path, hps_ms.data.sampling_rate, audio)
-            except:
-                print('Failed to generate!')
-                sys.exit(1)
-            print('Successfully saved!')
-            ask_if_continue()
-        elif choice == 'v':
-            wav_path = input('Path of a WAV file (22050 Hz, 16 bits, 1 channel) to convert:\n')
-            print_speakers(hps_ms.speakers)
-            audio, sampling_rate = utils.load_wav_to_torch(wav_path)
-            originnal_id = get_speaker_id('Original speaker ID: ')
-            target_id = get_speaker_id('Target speaker ID: ')
-            out_path = input('Path to save: ')
-            y = audio / hps_ms.data.max_wav_value
-            y = y.unsqueeze(0)
-            spec = spectrogram_torch(y, hps_ms.data.filter_length,
-                hps_ms.data.sampling_rate, hps_ms.data.hop_length, hps_ms.data.win_length,
-                center=False)
-            spec_lengths = LongTensor([spec.size(-1)])
-            sid_src = LongTensor([originnal_id])
-            try:
-                with no_grad():
-                    sid_tgt = LongTensor([target_id])
-                    audio = net_g_ms.voice_conversion(spec, spec_lengths, sid_src=sid_src, sid_tgt=sid_tgt)[0][0,0].data.cpu().float().numpy()
-                write(out_path, hps_ms.data.sampling_rate, audio)
-            except:
-                print('Failed to generate!')
-                sys.exit(1)
-            print('Successfully saved!')
-            ask_if_continue()

app.py CHANGED Viewed

@@ -1,8 +1,4 @@
 import json
-import os
-os.system('cd monotonic_align && python setup.py build_ext --inplace && cd ..')
 import librosa
 import numpy as np
 import torch

 import json
 import librosa
 import numpy as np
 import torch

monotonic_align/__init__.py CHANGED Viewed

@@ -1,19 +1,21 @@
-import numpy as np
-import torch
-from .monotonic_align.core import maximum_path_c
 def maximum_path(neg_cent, mask):
-  """ Cython optimized version.
-  neg_cent: [b, t_t, t_s]
-  mask: [b, t_t, t_s]
-  """
-  device = neg_cent.device
-  dtype = neg_cent.dtype
-  neg_cent = neg_cent.data.cpu().numpy().astype(np.float32)
-  path = np.zeros(neg_cent.shape, dtype=np.int32)
-  t_t_max = mask.sum(1)[:, 0].data.cpu().numpy().astype(np.int32)
-  t_s_max = mask.sum(2)[:, 0].data.cpu().numpy().astype(np.int32)
-  maximum_path_c(path, neg_cent, t_t_max, t_s_max)
-  return torch.from_numpy(path).to(device=device, dtype=dtype)

+from numpy import zeros, int32, float32
+from torch import from_numpy
+from .core import maximum_path_jit
 def maximum_path(neg_cent, mask):
+    """ numba optimized version.
+    neg_cent: [b, t_t, t_s]
+    mask: [b, t_t, t_s]
+    """
+    device = neg_cent.device
+    dtype = neg_cent.dtype
+    neg_cent = neg_cent.data.cpu().numpy().astype(float32)
+    path = zeros(neg_cent.shape, dtype=int32)
+    t_t_max = mask.sum(1)[:, 0].data.cpu().numpy().astype(int32)
+    t_s_max = mask.sum(2)[:, 0].data.cpu().numpy().astype(int32)
+    maximum_path_jit(path, neg_cent, t_t_max, t_s_max)
+    return from_numpy(path).to(device=device, dtype=dtype)

monotonic_align/core.py ADDED Viewed

	@@ -0,0 +1,36 @@

+import numba
+@numba.jit(numba.void(numba.int32[:, :, ::1], numba.float32[:, :, ::1], numba.int32[::1], numba.int32[::1]),
+           nopython=True, nogil=True)
+def maximum_path_jit(paths, values, t_ys, t_xs):
+    b = paths.shape[0]
+    max_neg_val = -1e9
+    for i in range(int(b)):
+        path = paths[i]
+        value = values[i]
+        t_y = t_ys[i]
+        t_x = t_xs[i]
+        v_prev = v_cur = 0.0
+        index = t_x - 1
+        for y in range(t_y):
+            for x in range(max(0, t_x + y - t_y), min(t_x, y + 1)):
+                if x == y:
+                    v_cur = max_neg_val
+                else:
+                    v_cur = value[y - 1, x]
+                if x == 0:
+                    if y == 0:
+                        v_prev = 0.
+                    else:
+                        v_prev = max_neg_val
+                else:
+                    v_prev = value[y - 1, x - 1]
+                value[y, x] += max(v_prev, v_cur)
+        for y in range(t_y - 1, -1, -1):
+            path[y, index] = 1
+            if index != 0 and (index == y or value[y - 1, index] < value[y - 1, index - 1]):
+                index = index - 1

monotonic_align/core.pyx DELETED Viewed

@@ -1,42 +0,0 @@
-cimport cython
-from cython.parallel import prange
-@cython.boundscheck(False)
-@cython.wraparound(False)
-cdef void maximum_path_each(int[:,::1] path, float[:,::1] value, int t_y, int t_x, float max_neg_val=-1e9) nogil:
-  cdef int x
-  cdef int y
-  cdef float v_prev
-  cdef float v_cur
-  cdef float tmp
-  cdef int index = t_x - 1
-  for y in range(t_y):
-    for x in range(max(0, t_x + y - t_y), min(t_x, y + 1)):
-      if x == y:
-        v_cur = max_neg_val
-      else:
-        v_cur = value[y-1, x]
-      if x == 0:
-        if y == 0:
-          v_prev = 0.
-        else:
-          v_prev = max_neg_val
-      else:
-        v_prev = value[y-1, x-1]
-      value[y, x] += max(v_prev, v_cur)
-  for y in range(t_y - 1, -1, -1):
-    path[y, index] = 1
-    if index != 0 and (index == y or value[y-1, index] < value[y-1, index-1]):
-      index = index - 1
-@cython.boundscheck(False)
-@cython.wraparound(False)
-cpdef void maximum_path_c(int[:,:,::1] paths, float[:,:,::1] values, int[::1] t_ys, int[::1] t_xs) nogil:
-  cdef int b = paths.shape[0]
-  cdef int i
-  for i in prange(b, nogil=True):
-    maximum_path_each(paths[i], values[i], t_ys[i], t_xs[i])

monotonic_align/setup.py DELETED Viewed

@@ -1,9 +0,0 @@
-from distutils.core import setup
-from Cython.Build import cythonize
-import numpy
-setup(
-  name = 'monotonic_align',
-  ext_modules = cythonize("core.pyx"),
-  include_dirs=[numpy.get_include()]
-)