Spaces:
Runtime error
Runtime error
update
Browse files
inference/tts/gradio/gradio_settings.yaml
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
-
title: 'NATSpeech/
|
2 |
description: |
|
3 |
-
Gradio demo for NATSpeech/
|
4 |
article: |
|
5 |
-
Link to <a href='https://github.com/NATSpeech/NATSpeech/blob/main/docs/
|
6 |
example_inputs:
|
7 |
- |-
|
8 |
the invention of movable metal letters in the middle of the fifteenth century may justly be considered as the invention of the art of printing.
|
9 |
- |-
|
10 |
produced the block books, which were the immediate predecessors of the true printed book,
|
11 |
-
inference_cls: inference.tts.
|
12 |
-
exp_name:
|
|
|
1 |
+
title: 'NATSpeech/PortaSpeech'
|
2 |
description: |
|
3 |
+
Gradio demo for NATSpeech/PortaSpeech. To use it, simply add your audio, or click one of the examples to load them. Note: This space is running on CPU, inference times will be higher.
|
4 |
article: |
|
5 |
+
Link to <a href='https://github.com/NATSpeech/NATSpeech/blob/main/docs/portaspeech.md' style='color:blue;' target='_blank\'>Github REPO</a>
|
6 |
example_inputs:
|
7 |
- |-
|
8 |
the invention of movable metal letters in the middle of the fifteenth century may justly be considered as the invention of the art of printing.
|
9 |
- |-
|
10 |
produced the block books, which were the immediate predecessors of the true printed book,
|
11 |
+
inference_cls: inference.tts.ps_flow.PortaSpeechFlowInfer
|
12 |
+
exp_name: ps_normal_exp
|
inference/tts/ps_flow.py
CHANGED
@@ -10,8 +10,9 @@ class PortaSpeechFlowInfer(BaseTTSInfer):
|
|
10 |
ph_dict_size = len(self.ph_encoder)
|
11 |
word_dict_size = len(self.word_encoder)
|
12 |
model = PortaSpeechFlow(ph_dict_size, word_dict_size, self.hparams)
|
13 |
-
model.eval()
|
14 |
load_ckpt(model, hparams['work_dir'], 'model')
|
|
|
|
|
15 |
return model
|
16 |
|
17 |
def forward_model(self, inp):
|
|
|
10 |
ph_dict_size = len(self.ph_encoder)
|
11 |
word_dict_size = len(self.word_encoder)
|
12 |
model = PortaSpeechFlow(ph_dict_size, word_dict_size, self.hparams)
|
|
|
13 |
load_ckpt(model, hparams['work_dir'], 'model')
|
14 |
+
model.post_flow.store_inverse()
|
15 |
+
model.eval()
|
16 |
return model
|
17 |
|
18 |
def forward_model(self, inp):
|
modules/tts/portaspeech/fvae.py
CHANGED
@@ -125,7 +125,7 @@ class FVAE(nn.Module):
|
|
125 |
return z_q, loss_kl, z_p, m_q, logs_q
|
126 |
else:
|
127 |
latent_shape = [cond_sqz.shape[0], self.latent_size, cond_sqz.shape[2]]
|
128 |
-
z_p =
|
129 |
if self.use_prior_flow:
|
130 |
z_p = self.prior_flow(z_p, 1, cond_sqz, reverse=True)
|
131 |
return z_p
|
|
|
125 |
return z_q, loss_kl, z_p, m_q, logs_q
|
126 |
else:
|
127 |
latent_shape = [cond_sqz.shape[0], self.latent_size, cond_sqz.shape[2]]
|
128 |
+
z_p = torch.randn(latent_shape).to(cond.device) * noise_scale
|
129 |
if self.use_prior_flow:
|
130 |
z_p = self.prior_flow(z_p, 1, cond_sqz, reverse=True)
|
131 |
return z_p
|
modules/tts/portaspeech/portaspeech_flow.py
CHANGED
@@ -70,6 +70,6 @@ class PortaSpeechFlow(PortaSpeech):
|
|
70 |
ret['postflow'] = None
|
71 |
else:
|
72 |
nonpadding = torch.ones_like(x_recon[:, :1, :])
|
73 |
-
z_post =
|
74 |
x_recon, _ = self.post_flow(z_post, nonpadding, g, reverse=True)
|
75 |
ret['mel_out'] = x_recon.transpose(1, 2)
|
|
|
70 |
ret['postflow'] = None
|
71 |
else:
|
72 |
nonpadding = torch.ones_like(x_recon[:, :1, :])
|
73 |
+
z_post = torch.randn(x_recon.shape).to(g.device) * self.hparams['noise_scale']
|
74 |
x_recon, _ = self.post_flow(z_post, nonpadding, g, reverse=True)
|
75 |
ret['mel_out'] = x_recon.transpose(1, 2)
|