Florian Lux commited on
Commit
7234e3c
β€’
1 Parent(s): 34649ed

revert adding multispeaker demo until further investigated

Browse files
Files changed (1) hide show
  1. app.py +2 -6
app.py CHANGED
@@ -6,7 +6,6 @@ import numpy as np
6
  import torch
7
 
8
  from InferenceInterfaces.Meta_FastSpeech2 import Meta_FastSpeech2
9
- from Preprocessing.ProsodicConditionExtractor import ProsodicConditionExtractor
10
 
11
 
12
  def float2pcm(sig, dtype='int16'):
@@ -37,7 +36,7 @@ class TTS_Interface:
37
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
38
  self.model = Meta_FastSpeech2(device=self.device)
39
 
40
- def read(self, prompt, language, audio):
41
  language_id_lookup = {
42
  "English" : "en",
43
  "German" : "de",
@@ -50,8 +49,6 @@ class TTS_Interface:
50
  "French" : "fr"
51
  }
52
  self.model.set_language(language_id_lookup[language])
53
- if audio is not None:
54
- self.model.default_utterance_embedding = ProsodicConditionExtractor(sr=audio[0]).extract_condition_from_reference_wave(audio[1]).to(self.device)
55
  wav = self.model(prompt)
56
  return 48000, float2pcm(wav.cpu().numpy())
57
 
@@ -69,8 +66,7 @@ iface = gr.Interface(fn=meta_model.read,
69
  'Russian',
70
  'Hungarian',
71
  'Dutch',
72
- 'French'], type="value", default='English', label="Language Selection"),
73
- gr.inputs.Audio(source="microphone", optional=True, label="[optional] Make the TTS imitate your Voice")],
74
  outputs=gr.outputs.Audio(type="numpy", label=None),
75
  layout="vertical",
76
  title="IMS Toucan Multilingual Multispeaker Demo",
 
6
  import torch
7
 
8
  from InferenceInterfaces.Meta_FastSpeech2 import Meta_FastSpeech2
 
9
 
10
 
11
  def float2pcm(sig, dtype='int16'):
 
36
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
37
  self.model = Meta_FastSpeech2(device=self.device)
38
 
39
+ def read(self, prompt, language):
40
  language_id_lookup = {
41
  "English" : "en",
42
  "German" : "de",
 
49
  "French" : "fr"
50
  }
51
  self.model.set_language(language_id_lookup[language])
 
 
52
  wav = self.model(prompt)
53
  return 48000, float2pcm(wav.cpu().numpy())
54
 
 
66
  'Russian',
67
  'Hungarian',
68
  'Dutch',
69
+ 'French'], type="value", default='English', label="Language Selection")],
 
70
  outputs=gr.outputs.Audio(type="numpy", label=None),
71
  layout="vertical",
72
  title="IMS Toucan Multilingual Multispeaker Demo",