burraco135 commited on
Commit
f46e354
1 Parent(s): 12097f6

Create test.py

Browse files
Files changed (1) hide show
  1. test.py +49 -0
test.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import librosa
3
+ import numpy as np
4
+ import torch
5
+
6
+ from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
7
+
8
+ checkpoint = "burraco135/speecht5_finetuned_voxpopuli_it"
9
+ processor = SpeechT5Processor.from_pretrained(checkpoint)
10
+ model = SpeechT5ForTextToSpeech.from_pretrained(checkpoint)
11
+ vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
12
+
13
+ speaker_embeddings = r"C:\Users\ester\OneDrive\Documenti\VSCode\Hugging Face\tts\speaker_embeddings.npy"
14
+
15
+ def predict(text, speaker):
16
+ # se il testo è vuoto, fai un array vuoto da 16000
17
+ if len(text.strip()) == 0:
18
+ return (16000, np.zeros(0).astype(np.int16))
19
+
20
+ # preprocess text
21
+ inputs = processor(text=text, return_tensors="pt")
22
+
23
+ vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
24
+ speech = model.generate_speech(speaker_embeddings, vocoder=vocoder)
25
+ return (16000, speech)
26
+
27
+
28
+ title = "SpeechT5: Speech Synthesis"
29
+
30
+ examples = [
31
+ ["It is not in the stars to hold our destiny but in ourselves.", "BDL (male)"],
32
+ ["The octopus and Oliver went to the opera in October.", "CLB (female)"],
33
+ ["She sells seashells by the seashore. I saw a kitten eating chicken in the kitchen.", "RMS (male)"],
34
+ ["Brisk brave brigadiers brandished broad bright blades, blunderbusses, and bludgeons—balancing them badly.", "SLT (female)"],
35
+ ["A synonym for cinnamon is a cinnamon synonym.", "BDL (male)"],
36
+ ["How much wood would a woodchuck chuck if a woodchuck could chuck wood? He would chuck, he would, as much as he could, and chuck as much wood as a woodchuck would if a woodchuck could chuck wood.", "CLB (female)"],
37
+ ]
38
+
39
+ gr.Interface(
40
+ fn=predict,
41
+ inputs=[
42
+ gr.Text(label="Input Text")
43
+ ],
44
+ outputs=[
45
+ gr.Audio(label="Generated Speech", type="numpy"),
46
+ ],
47
+ title=title,
48
+ examples=examples,
49
+ ).launch()