barghavani commited on
Commit
92d8e4f
1 Parent(s): d384ec2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -0
app.py CHANGED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tempfile
3
+ import gradio as gr
4
+ from TTS.utils.synthesizer import Synthesizer
5
+ from huggingface_hub import hf_hub_download
6
+
7
+ # Define constants
8
+ MODEL_INFO = [
9
+ ["vits-multispeaker-495586", "best_model_495586.pth", "config.json", "saillab/vits_multi_cv_15_validated_dataset/resolve/main/","speakers.pth"],
10
+ ]
11
+
12
+ # # Extract model names from MODEL_INFO
13
+ # MODEL_NAMES = [info[0] for info in MODEL_INFO]
14
+
15
+ MODEL_NAMES = [
16
+ "vits-multispeaker-495586",
17
+ # Add other model names similarly...
18
+ ]
19
+
20
+ MAX_TXT_LEN = 400
21
+ TOKEN = os.getenv('HUGGING_FACE_HUB_TOKEN')
22
+
23
+ # # Download models
24
+ # for model_name, model_file, config_file, repo_name in MODEL_INFO:
25
+ # os.makedirs(model_name, exist_ok=True)
26
+ # print(f"|> Downloading: {model_name}")
27
+
28
+ # # Use hf_hub_download to download models from private Hugging Face repositories
29
+ # hf_hub_download(repo_id=repo_name, filename=model_file, use_auth_token=TOKEN)
30
+ # hf_hub_download(repo_id=repo_name, filename=config_file, use_auth_token=TOKEN)
31
+
32
+ repo_name = "saillab/vits_multi_cv_15_validated_dataset"
33
+ filename = "best_model_495586.pth"
34
+
35
+ model_file = hf_hub_download(repo_name, filename, use_auth_token=TOKEN)
36
+ config_file = hf_hub_download(repo_name, "config.json", use_auth_token=TOKEN)
37
+
38
+
39
+ def synthesize(text: str, model_name: str) -> str:
40
+ """Synthesize speech using the selected model."""
41
+ if len(text) > MAX_TXT_LEN:
42
+ text = text[:MAX_TXT_LEN]
43
+ print(f"Input text was cut off as it exceeded the {MAX_TXT_LEN} character limit.")
44
+
45
+ synthesizer = Synthesizer(model_file, config_file)
46
+ if synthesizer is None:
47
+ raise NameError("Model not found")
48
+
49
+ wavs = synthesizer.tts(text)
50
+
51
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
52
+ synthesizer.save_wav(wavs, fp)
53
+ return fp.name
54
+
55
+
56
+ iface = gr.Interface(
57
+ fn=synthesize,
58
+ inputs=[
59
+ gr.Textbox(label="Enter Text to Synthesize:", value="زین همرهان سست عناصر، دلم گرفت."),
60
+ gr.Radio(label="Pick a Model", choices=MODEL_NAMES, value=MODEL_NAMES[0]),
61
+ ],
62
+ outputs=gr.Audio(label="Output", type='filepath'),
63
+ examples=[["زین همرهان سست عناصر، دلم گرفت.", MODEL_NAMES[0]]],
64
+ title='Persian TTS Playground',
65
+ description="Persian text to speech model demo",
66
+ article="",
67
+ live=False
68
+ )
69
+
70
+ iface.launch()