Mahiruoshi commited on
Commit
7d4fd9a
1 Parent(s): d220bdb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -148
app.py CHANGED
@@ -116,7 +116,7 @@ idols = ["高咲侑","歩夢","かすみ","しずく","果林","愛","彼方","
116
 
117
 
118
 
119
- dev = torch.device("cuda:0")
120
  hps_ms = utils.get_hparams_from_file("config.json")
121
  net_g_ms = SynthesizerTrn(
122
  len(symbols),
@@ -131,153 +131,6 @@ app = gr.Blocks()
131
 
132
 
133
 
134
- with app:
135
- with gr.Tabs():
136
-
137
- with gr.TabItem("Basic"):
138
-
139
- tts_input1 = gr.TextArea(label="旧模型", value="一次審査、二次審査、それぞれの欄に記入をお願いします。")
140
- language = gr.Dropdown(label="选择语言",choices=lan, value="日文", interactive=True)
141
- para_input1 = gr.Slider(minimum= 0.01,maximum=1.0,label="更改噪声比例", value=0.667)
142
- para_input2 = gr.Slider(minimum= 0.01,maximum=1.0,label="更改噪声偏差", value=0.8)
143
- para_input3 = gr.Slider(minimum= 0.1,maximum=10,label="更改时间比例", value=1)
144
- tts_submit = gr.Button("Generate", variant="primary")
145
- speaker1 = gr.Dropdown(label="选择说话人",choices=idols, value="かすみ", interactive=True)
146
- tts_output2 = gr.Audio(label="Output")
147
- tts_submit.click(infer, [language,tts_input1,speaker1,para_input1,para_input2,para_input3], [tts_output2])
148
- #app.launch(share=True)
149
- app.launch()
150
- '''
151
- import time
152
- import matplotlib.pyplot as plt
153
- import IPython.display as ipd
154
- import re
155
- import os
156
- import json
157
- import math
158
- import torch
159
- from torch import nn
160
- from torch.nn import functional as F
161
- from torch.utils.data import DataLoader
162
- import gradio as gr
163
- import commons
164
- import utils
165
- from data_utils import TextAudioLoader, TextAudioCollate, TextAudioSpeakerLoader, TextAudioSpeakerCollate
166
- from models import SynthesizerTrn
167
- from text.symbols import symbols
168
- from text import text_to_sequence
169
- import unicodedata
170
- from scipy.io.wavfile import write
171
- def get_text(text, hps):
172
- text_norm = text_to_sequence(text, hps.data.text_cleaners)
173
- if hps.data.add_blank:
174
- text_norm = commons.intersperse(text_norm, 0)
175
- text_norm = torch.LongTensor(text_norm)
176
- return text_norm
177
-
178
-
179
- def get_label(text, label):
180
- if f'[{label}]' in text:
181
- return True, text.replace(f'[{label}]', '')
182
- else:
183
- return False, text
184
-
185
-
186
-
187
-
188
-
189
- def selection(speaker):
190
- if speaker == "高咲侑":
191
- spk = 0
192
- return spk
193
-
194
- elif speaker == "歩夢":
195
- spk = 1
196
- return spk
197
-
198
- elif speaker == "かすみ":
199
- spk = 2
200
- return spk
201
-
202
- elif speaker == "しずく":
203
- spk = 3
204
- return spk
205
-
206
- elif speaker == "果林":
207
- spk = 4
208
- return spk
209
-
210
- elif speaker == "愛":
211
- spk = 5
212
- return spk
213
-
214
- elif speaker == "彼方":
215
- spk = 6
216
- return spk
217
-
218
- elif speaker == "せつ菜":
219
- spk = 7
220
- return spk
221
- elif speaker == "エマ":
222
- spk = 8
223
- return spk
224
- elif speaker == "璃奈":
225
- spk = 9
226
- return spk
227
- elif speaker == "栞子":
228
- spk = 10
229
- return spk
230
- elif speaker == "ランジュ":
231
- spk = 11
232
- return spk
233
- elif speaker == "ミア":
234
- spk = 12
235
- return spk
236
- elif speaker == "派蒙":
237
- spk = 16
238
- return spk
239
-
240
- def sle(language,tts_input0):
241
- if language == "中文":
242
- tts_input1 = "[ZH]" + tts_input0.replace('\n','。').replace(' ',',') + "[ZH]"
243
- return tts_input1
244
- if language == "英文":
245
- tts_input1 = "[EN]" + tts_input0.replace('\n','.').replace(' ',',') + "[EN]"
246
- return tts_input1
247
- elif language == "日文":
248
- tts_input1 = "[JA]" + tts_input0.replace('\n','。').replace(' ',',') + "[JA]"
249
- return tts_input1
250
- def infer(language,text,speaker_id, n_scale= 0.667,n_scale_w = 0.8, l_scale = 1 ):
251
- speaker_id = int(selection(speaker_id))
252
- answer = sle(language,text)
253
- stn_tst = get_text(answer, hps_ms)
254
- with torch.no_grad():
255
- x_tst = stn_tst.unsqueeze(0).to(dev)
256
- x_tst_lengths = torch.LongTensor([stn_tst.size(0)]).to(dev)
257
- sid = torch.LongTensor([speaker_id]).to(dev)
258
- t1 = time.time()
259
- audio = net_g_ms.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=n_scale, noise_scale_w=n_scale_w, length_scale=l_scale)[0][0,0].data.cpu().float().numpy()
260
- t2 = time.time()
261
- spending_time = "推理时间:"+str(t2-t1)+"s"
262
- image = '1.png'
263
- print(spending_time)
264
- return (hps_ms.data.sampling_rate, audio),image
265
- lan = ["中文","日文","英文"]
266
- idols = ["高咲侑","歩夢","かすみ","しずく","果林","愛","彼方","せつ菜","璃奈","栞子","エマ","ランジュ","ミア","派蒙"]
267
- dev = torch.device("cpu")
268
- hps_ms = utils.get_hparams_from_file("config.json")
269
- net_g_ms = SynthesizerTrn(
270
- len(symbols),
271
- hps_ms.data.filter_length // 2 + 1,
272
- hps_ms.train.segment_size // hps_ms.data.hop_length,
273
- n_speakers=hps_ms.data.n_speakers,
274
- **hps_ms.model).to(dev)
275
- _ = net_g_ms.eval()
276
-
277
- _ = utils.load_checkpoint("G_842000.pth", net_g_ms, None)
278
-
279
- app = gr.Blocks()
280
-
281
  with app:
282
  with gr.Tabs():
283
 
 
116
 
117
 
118
 
119
+ dev = torch.device("cpu")
120
  hps_ms = utils.get_hparams_from_file("config.json")
121
  net_g_ms = SynthesizerTrn(
122
  len(symbols),
 
131
 
132
 
133
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  with app:
135
  with gr.Tabs():
136