burraco135 commited on
Commit
3c9c00a
1 Parent(s): f46e354

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -20
app.py CHANGED
@@ -11,8 +11,6 @@ model = SpeechT5ForTextToSpeech.from_pretrained(checkpoint)
11
  vocoder = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
12
 
13
  def predict(text, speaker):
14
- if len(text.strip()) == 0:
15
- return (16000, np.zeros(0).astype(np.int16))
16
 
17
  inputs = processor(text=text, return_tensors="pt")
18
 
@@ -20,24 +18,6 @@ def predict(text, speaker):
20
  input_ids = inputs["input_ids"]
21
  input_ids = input_ids[..., :model.config.max_text_positions]
22
 
23
- if speaker == "Surprise Me!":
24
- # load one of the provided speaker embeddings at random
25
- idx = np.random.randint(len(speaker_embeddings))
26
- key = list(speaker_embeddings.keys())[idx]
27
- speaker_embedding = np.load(speaker_embeddings[key])
28
-
29
- # randomly shuffle the elements
30
- np.random.shuffle(speaker_embedding)
31
-
32
- # randomly flip half the values
33
- x = (np.random.rand(512) >= 0.5) * 1.0
34
- x[x == 0] = -1.0
35
- speaker_embedding *= x
36
-
37
- #speaker_embedding = np.random.rand(512).astype(np.float32) * 0.3 - 0.15
38
- else:
39
- speaker_embedding = np.load(speaker_embeddings[speaker[:3]])
40
-
41
  speaker_embedding = torch.tensor(speaker_embedding).unsqueeze(0)
42
 
43
  speech = model.generate_speech(input_ids, speaker_embedding, vocoder=vocoder)
 
11
  vocoder = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
12
 
13
  def predict(text, speaker):
 
 
14
 
15
  inputs = processor(text=text, return_tensors="pt")
16
 
 
18
  input_ids = inputs["input_ids"]
19
  input_ids = input_ids[..., :model.config.max_text_positions]
20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  speaker_embedding = torch.tensor(speaker_embedding).unsqueeze(0)
22
 
23
  speech = model.generate_speech(input_ids, speaker_embedding, vocoder=vocoder)