RayeRen commited on
Commit
37bf4d9
2 Parent(s): d5e28e1 e75aa39

Merge branch 'main' into ps

Browse files
inference/tts/ps_flow.py CHANGED
@@ -10,8 +10,9 @@ class PortaSpeechFlowInfer(BaseTTSInfer):
10
  ph_dict_size = len(self.ph_encoder)
11
  word_dict_size = len(self.word_encoder)
12
  model = PortaSpeechFlow(ph_dict_size, word_dict_size, self.hparams)
13
- model.eval()
14
  load_ckpt(model, hparams['work_dir'], 'model')
 
 
15
  return model
16
 
17
  def forward_model(self, inp):
 
10
  ph_dict_size = len(self.ph_encoder)
11
  word_dict_size = len(self.word_encoder)
12
  model = PortaSpeechFlow(ph_dict_size, word_dict_size, self.hparams)
 
13
  load_ckpt(model, hparams['work_dir'], 'model')
14
+ model.post_flow.store_inverse()
15
+ model.eval()
16
  return model
17
 
18
  def forward_model(self, inp):
modules/tts/portaspeech/fvae.py CHANGED
@@ -125,7 +125,7 @@ class FVAE(nn.Module):
125
  return z_q, loss_kl, z_p, m_q, logs_q
126
  else:
127
  latent_shape = [cond_sqz.shape[0], self.latent_size, cond_sqz.shape[2]]
128
- z_p = self.prior_dist.sample(latent_shape).to(cond.device) * noise_scale
129
  if self.use_prior_flow:
130
  z_p = self.prior_flow(z_p, 1, cond_sqz, reverse=True)
131
  return z_p
 
125
  return z_q, loss_kl, z_p, m_q, logs_q
126
  else:
127
  latent_shape = [cond_sqz.shape[0], self.latent_size, cond_sqz.shape[2]]
128
+ z_p = torch.randn(latent_shape).to(cond.device) * noise_scale
129
  if self.use_prior_flow:
130
  z_p = self.prior_flow(z_p, 1, cond_sqz, reverse=True)
131
  return z_p
modules/tts/portaspeech/portaspeech_flow.py CHANGED
@@ -70,6 +70,6 @@ class PortaSpeechFlow(PortaSpeech):
70
  ret['postflow'] = None
71
  else:
72
  nonpadding = torch.ones_like(x_recon[:, :1, :])
73
- z_post = prior_dist.sample(x_recon.shape).to(g.device) * self.hparams['noise_scale']
74
  x_recon, _ = self.post_flow(z_post, nonpadding, g, reverse=True)
75
  ret['mel_out'] = x_recon.transpose(1, 2)
 
70
  ret['postflow'] = None
71
  else:
72
  nonpadding = torch.ones_like(x_recon[:, :1, :])
73
+ z_post = torch.randn(x_recon.shape).to(g.device) * self.hparams['noise_scale']
74
  x_recon, _ = self.post_flow(z_post, nonpadding, g, reverse=True)
75
  ret['mel_out'] = x_recon.transpose(1, 2)