leo-emovits

Build error

App Files Files Community

chilge commited on Jun 12, 2023

Commit

04749f1

•

1 Parent(s): 2e910d9

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -38

app.py CHANGED Viewed

@@ -1,23 +1,18 @@
 import os
 import json
-import math
 import torch
-from torch import nn
-from torch.nn import functional as F
-from torch.utils.data import DataLoader
-from scipy.io.wavfile import write
 import numpy as np
-import gradio as gr
-import IPython.display as ipd
-import commons
-import utils
-from data_utils import TextAudioSpeakerLoader, TextAudioSpeakerCollate
 from models import SynthesizerTrn
 from text.symbols import symbols
 from text import text_to_sequence
 def get_text(text, hps):
     text_norm = text_to_sequence(text, hps.data.text_cleaners)
@@ -26,7 +21,6 @@ def get_text(text, hps):
     text_norm = torch.LongTensor(text_norm)
     return text_norm
 def get_text_byroma(text, hps):
     text_norm = []
     for i in text:
@@ -36,26 +30,22 @@ def get_text_byroma(text, hps):
     text_norm = torch.LongTensor(text_norm)
     return text_norm
 hps = utils.get_hparams_from_file("./configs/leo.json")
 net_g = SynthesizerTrn(
     len(symbols),
     hps.data.filter_length // 2 + 1,
     hps.train.segment_size // hps.data.hop_length,
     n_speakers=hps.data.n_speakers,
-    **hps.model
-)
 _ = net_g.eval()
 _ = utils.load_checkpoint("logs/leo/G_4000.pth", net_g, None)
 # 随机抽取情感参考音频的根目录
 random_emotion_root = "wavs"
 emotion_dict = json.load(open("configs/leo.json", "r"))
 def tts(txt, emotion, roma=False, length_scale=1):
-    """emotion为参考情感音频路径 或random_sample（随机抽取）"""
     if roma:
         stn_tst = get_text_byroma(txt, hps)
     else:
@@ -82,21 +72,22 @@ def tts(txt, emotion, roma=False, length_scale=1):
         audio = net_g.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=0.667, noise_scale_w=0.8, length_scale=1.2, emo=emo)[0][0,0].data.float().numpy()
     ipd.display(ipd.Audio(audio, rate=hps.data.sampling_rate, normalize=False))
-def run_tts(text, emotion, roma):
-    tts(text, emotion, roma)
-iface = gr.Interface(
-    fn=run_tts,
-    inputs=["text", "text", "checkbox"],
-    outputs="audio",
-    layout="vertical",
-    title="TTS Demo",
-    description="Generative TTS Demo with Emotional Control",
-    allow_flagging=False,
-    theme="huggingface",
-    flagging_dir="flagged",
-)
-iface.launch(inline=True)

+import gradio as gr
 import os
+import random
+import IPython.display as ipd
+import matplotlib.pyplot as plt
+%matplotlib inline
 import json
 import torch
 import numpy as np
 from models import SynthesizerTrn
 from text.symbols import symbols
 from text import text_to_sequence
+from scipy.io.wavfile import write
 def get_text(text, hps):
     text_norm = text_to_sequence(text, hps.data.text_cleaners)
     text_norm = torch.LongTensor(text_norm)
     return text_norm
 def get_text_byroma(text, hps):
     text_norm = []
     for i in text:
     text_norm = torch.LongTensor(text_norm)
     return text_norm
 hps = utils.get_hparams_from_file("./configs/leo.json")
 net_g = SynthesizerTrn(
     len(symbols),
     hps.data.filter_length // 2 + 1,
     hps.train.segment_size // hps.data.hop_length,
     n_speakers=hps.data.n_speakers,
+    **hps.model)
 _ = net_g.eval()
 _ = utils.load_checkpoint("logs/leo/G_4000.pth", net_g, None)
 # 随机抽取情感参考音频的根目录
 random_emotion_root = "wavs"
 emotion_dict = json.load(open("configs/leo.json", "r"))
 def tts(txt, emotion, roma=False, length_scale=1):
+    """emotion为参考情感音频路径或random_sample（随机抽取）"""
     if roma:
         stn_tst = get_text_byroma(txt, hps)
     else:
         audio = net_g.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=0.667, noise_scale_w=0.8, length_scale=1.2, emo=emo)[0][0,0].data.float().numpy()
     ipd.display(ipd.Audio(audio, rate=hps.data.sampling_rate, normalize=False))
+# 定义GUI界面的输入和输出
+def generate_audio(txt, emotion):
+    tts(txt, emotion)
+    return "Audio Generated"
+inputs = [
+    gr.inputs.Textbox(lines=2, label="Text Input"),
+    gr.inputs.Radio(["random_sample", "wavs/vo_bm_main2_07_20_0048.wav"], label="Emotion Reference"),
+]
+# 创建GUI界面
+title = "Emotion TTS"
+description = "Enter the text and select the emotion reference to generate synthesized speech."
+outputs = gr.outputs.Textbox(label="Audio Output")
+examples = [["Hello, how are you?", "random_sample"]]
+gr_interface = gr.Interface(fn=generate_audio, inputs=inputs, outputs=outputs, title=title, description=description, examples=examples)
+# 运行GUI界面
+gr_interface.launch()