Mahiruoshi
commited on
Commit
•
d1de440
1
Parent(s):
de3b6be
Update app.py
Browse files
app.py
CHANGED
@@ -18,6 +18,188 @@ from text.symbols import symbols
|
|
18 |
from text import text_to_sequence
|
19 |
import unicodedata
|
20 |
from scipy.io.wavfile import write
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
def get_text(text, hps):
|
22 |
text_norm = text_to_sequence(text, hps.data.text_cleaners)
|
23 |
if hps.data.add_blank:
|
@@ -142,4 +324,5 @@ with app:
|
|
142 |
tts_output3 = gr.Image(label = "Model")
|
143 |
tts_submit.click(infer, [language,tts_input1,speaker1,para_input1,para_input2,para_input3], [tts_output2,tts_output3])
|
144 |
#app.launch(share=True)
|
145 |
-
app.launch()
|
|
|
|
18 |
from text import text_to_sequence
|
19 |
import unicodedata
|
20 |
from scipy.io.wavfile import write
|
21 |
+
import openai
|
22 |
+
|
23 |
+
def get_text(text, hps):
|
24 |
+
text_norm = text_to_sequence(text, hps.data.text_cleaners)
|
25 |
+
if hps.data.add_blank:
|
26 |
+
text_norm = commons.intersperse(text_norm, 0)
|
27 |
+
text_norm = torch.LongTensor(text_norm)
|
28 |
+
return text_norm
|
29 |
+
|
30 |
+
def get_label(text, label):
|
31 |
+
if f'[{label}]' in text:
|
32 |
+
return True, text.replace(f'[{label}]', '')
|
33 |
+
else:
|
34 |
+
return False, text
|
35 |
+
|
36 |
+
def selection(speaker):
|
37 |
+
if speaker == "高咲侑(误)":
|
38 |
+
spk = 0
|
39 |
+
return spk
|
40 |
+
|
41 |
+
elif speaker == "歩夢":
|
42 |
+
spk = 1
|
43 |
+
return spk
|
44 |
+
|
45 |
+
elif speaker == "かすみ":
|
46 |
+
spk = 2
|
47 |
+
return spk
|
48 |
+
|
49 |
+
elif speaker == "しずく":
|
50 |
+
spk = 3
|
51 |
+
return spk
|
52 |
+
|
53 |
+
elif speaker == "果林":
|
54 |
+
spk = 4
|
55 |
+
return spk
|
56 |
+
|
57 |
+
elif speaker == "愛":
|
58 |
+
spk = 5
|
59 |
+
return spk
|
60 |
+
|
61 |
+
elif speaker == "彼方":
|
62 |
+
spk = 6
|
63 |
+
return spk
|
64 |
+
|
65 |
+
elif speaker == "せつ菜":
|
66 |
+
spk = 7
|
67 |
+
return spk
|
68 |
+
elif speaker == "エマ":
|
69 |
+
spk = 8
|
70 |
+
return spk
|
71 |
+
elif speaker == "璃奈":
|
72 |
+
spk = 9
|
73 |
+
return spk
|
74 |
+
elif speaker == "栞子":
|
75 |
+
spk = 10
|
76 |
+
return spk
|
77 |
+
elif speaker == "ランジュ":
|
78 |
+
spk = 11
|
79 |
+
return spk
|
80 |
+
elif speaker == "ミア":
|
81 |
+
spk = 12
|
82 |
+
return spk
|
83 |
+
elif speaker == "三色绘恋1":
|
84 |
+
spk = 13
|
85 |
+
return spk
|
86 |
+
elif speaker == "三色绘恋2":
|
87 |
+
spk = 15
|
88 |
+
return spk
|
89 |
+
elif speaker == "派蒙":
|
90 |
+
spk = 16
|
91 |
+
return spk
|
92 |
+
def friend_chat(text,key,call_name,tts_input3):
|
93 |
+
call_name = call_name
|
94 |
+
openai.api_key = key
|
95 |
+
identity = tts_input3
|
96 |
+
start_sequence = '\n'+str(call_name)+':'
|
97 |
+
restart_sequence = "\nYou: "
|
98 |
+
all_text = identity + restart_sequence
|
99 |
+
if 1 == 1:
|
100 |
+
prompt0 = text #当期prompt
|
101 |
+
if text == 'quit':
|
102 |
+
return prompt0
|
103 |
+
prompt = identity + prompt0 + start_sequence
|
104 |
+
|
105 |
+
response = openai.Completion.create(
|
106 |
+
model="text-davinci-003",
|
107 |
+
prompt=prompt,
|
108 |
+
temperature=0.5,
|
109 |
+
max_tokens=1000,
|
110 |
+
top_p=1.0,
|
111 |
+
frequency_penalty=0.5,
|
112 |
+
presence_penalty=0.0,
|
113 |
+
stop=["\nYou:"]
|
114 |
+
)
|
115 |
+
return response['choices'][0]['text'].strip()
|
116 |
+
def is_japanese(string):
|
117 |
+
for ch in string:
|
118 |
+
if ord(ch) > 0x3040 and ord(ch) < 0x30FF:
|
119 |
+
return True
|
120 |
+
return False
|
121 |
+
def sle(language,text,tts_input2,call_name,tts_input3):
|
122 |
+
if language == "中文":
|
123 |
+
tts_input1 = "[ZH]" + text.replace('\n','。').replace(' ',',') + "[ZH]"
|
124 |
+
return tts_input1
|
125 |
+
if language == "对话":
|
126 |
+
text = friend_chat(text,tts_input2,call_name,tts_input3).replace('\n','。').replace(' ',',')
|
127 |
+
text = f"[JA]{text}[JA]" if is_japanese(text) else f"[ZH]{text}[ZH]"
|
128 |
+
return text
|
129 |
+
elif language == "日文":
|
130 |
+
tts_input1 = "[JA]" + text.replace('\n','。').replace(' ',',') + "[JA]"
|
131 |
+
return tts_input1
|
132 |
+
def infer(language,text,tts_input2,tts_input3,speaker_id,n_scale= 0.667,n_scale_w = 0.8, l_scale = 1 ):
|
133 |
+
speaker_name = speaker_id
|
134 |
+
speaker_id = int(selection(speaker_id))
|
135 |
+
stn_tst = get_text(sle(language,text,tts_input2,speaker_name,tts_input3), hps_ms)
|
136 |
+
with torch.no_grad():
|
137 |
+
x_tst = stn_tst.unsqueeze(0).to(dev)
|
138 |
+
x_tst_lengths = torch.LongTensor([stn_tst.size(0)]).to(dev)
|
139 |
+
sid = torch.LongTensor([speaker_id]).to(dev)
|
140 |
+
t1 = time.time()
|
141 |
+
audio = net_g_ms.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=n_scale, noise_scale_w=n_scale_w, length_scale=l_scale)[0][0,0].data.cpu().float().numpy()
|
142 |
+
t2 = time.time()
|
143 |
+
spending_time = "推理时间:"+str(t2-t1)+"s"
|
144 |
+
print(spending_time)
|
145 |
+
return (hps_ms.data.sampling_rate, audio)
|
146 |
+
lan = ["中文","日文","对话"]
|
147 |
+
idols = ["高咲侑(误)","歩夢","かすみ","しずく","果林","愛","彼方","せつ菜","璃奈","栞子","エマ","ランジュ","ミア","三色绘恋1","三色绘恋2","派蒙"]
|
148 |
+
|
149 |
+
|
150 |
+
dev = torch.device("cpu")
|
151 |
+
hps_ms = utils.get_hparams_from_file("config.json")
|
152 |
+
net_g_ms = SynthesizerTrn(
|
153 |
+
len(symbols),
|
154 |
+
hps_ms.data.filter_length // 2 + 1,
|
155 |
+
hps_ms.train.segment_size // hps_ms.data.hop_length,
|
156 |
+
n_speakers=hps_ms.data.n_speakers,
|
157 |
+
**hps_ms.model).to(dev)
|
158 |
+
_ = net_g_ms.eval()
|
159 |
+
|
160 |
+
_ = utils.load_checkpoint("G_1049000.pth", net_g_ms, None)
|
161 |
+
|
162 |
+
app = gr.Blocks()
|
163 |
+
|
164 |
+
with app:
|
165 |
+
with gr.Tabs():
|
166 |
+
|
167 |
+
with gr.TabItem("Basic"):
|
168 |
+
|
169 |
+
tts_input1 = gr.TextArea(label="输入你的文本", value="一次審査、二次審査、それぞれの欄に記入をお願いします。")
|
170 |
+
tts_input2 = gr.TextArea(label="如需使用openai,输入你的openai-key", value="官网")
|
171 |
+
tts_input3 = gr.TextArea(label="写上你给她的设定", value="恶魔系学妹。")
|
172 |
+
language = gr.Dropdown(label="选择合成方式",choices=lan, value="对话", interactive=True)
|
173 |
+
para_input1 = gr.Slider(minimum= 0.01,maximum=1.0,label="更改噪声比例", value=0.667)
|
174 |
+
para_input2 = gr.Slider(minimum= 0.01,maximum=1.0,label="更改噪声偏差", value=0.8)
|
175 |
+
para_input3 = gr.Slider(minimum= 0.1,maximum=10,label="更改时间比例", value=1)
|
176 |
+
tts_submit = gr.Button("Generate", variant="primary")
|
177 |
+
speaker1 = gr.Dropdown(label="选择说话人",choices=idols, value="かすみ", interactive=True)
|
178 |
+
tts_output2 = gr.Audio(label="Output")
|
179 |
+
tts_submit.click(infer, [language,tts_input1,tts_input2,tts_input3,speaker1,para_input1,para_input2,para_input3], [tts_output2])
|
180 |
+
#app.launch(share=True)
|
181 |
+
app.launch()
|
182 |
+
'''
|
183 |
+
import time
|
184 |
+
import matplotlib.pyplot as plt
|
185 |
+
import IPython.display as ipd
|
186 |
+
import re
|
187 |
+
import os
|
188 |
+
import json
|
189 |
+
import math
|
190 |
+
import torch
|
191 |
+
from torch import nn
|
192 |
+
from torch.nn import functional as F
|
193 |
+
from torch.utils.data import DataLoader
|
194 |
+
import gradio as gr
|
195 |
+
import commons
|
196 |
+
import utils
|
197 |
+
from data_utils import TextAudioLoader, TextAudioCollate, TextAudioSpeakerLoader, TextAudioSpeakerCollate
|
198 |
+
from models import SynthesizerTrn
|
199 |
+
from text.symbols import symbols
|
200 |
+
from text import text_to_sequence
|
201 |
+
import unicodedata
|
202 |
+
from scipy.io.wavfile import write
|
203 |
def get_text(text, hps):
|
204 |
text_norm = text_to_sequence(text, hps.data.text_cleaners)
|
205 |
if hps.data.add_blank:
|
|
|
324 |
tts_output3 = gr.Image(label = "Model")
|
325 |
tts_submit.click(infer, [language,tts_input1,speaker1,para_input1,para_input2,para_input3], [tts_output2,tts_output3])
|
326 |
#app.launch(share=True)
|
327 |
+
app.launch()
|
328 |
+
'''
|