Kurdish_TTS

Runtime error

App Files Files Community

barghavani commited on Dec 7, 2023

Commit

2ccb399

•

1 Parent(s): 57ff557

Update app.py

Browse files

Files changed (1) hide show

app.py +146 -69

app.py CHANGED Viewed

@@ -1,73 +1,150 @@
-import os
-import tempfile
 import gradio as gr
 from TTS.api import TTS
-from TTS.utils.synthesizer import Synthesizer
-from huggingface_hub import hf_hub_download
-import json
-# Define constants
-MODEL_INFO = [
-    ["Xtts Persian","best_model_110880.pth","config.json","saillab/xtts_v2_fa"],
-]
-# Extract model names from MODEL_INFO
-MODEL_NAMES = [info[0] for info in MODEL_INFO]
-MAX_TXT_LEN = 400
-TOKEN = os.getenv('HUGGING_FACE_HUB_TOKEN')
-model_files = {}
-config_files = {}
-# Create a dictionary to store synthesizer objects for each model
-synthesizers = {}
-# Download models and initialize synthesizers
-for info in MODEL_INFO:
-    model_name, model_file, config_file, repo_name = info[:4]
-    print(f"|> Downloading: {model_name}")
-    # Download model and config files
-    model_files[model_name] = hf_hub_download(repo_id=repo_name, filename=model_file, use_auth_token=TOKEN)
-    config_files[model_name] = hf_hub_download(repo_id=repo_name, filename=config_file, use_auth_token=TOKEN)
-    # Initialize synthesizer for the model
-    synthesizer = Synthesizer(
-        tts_checkpoint=model_files[model_name],
-        tts_config_path=config_files[model_name],
-        use_cuda=False
-    )
-    synthesizers[model_name] = synthesizer
-def synthesize(text: str, model_name: str) -> str:
-    if len(text) > MAX_TXT_LEN:
-        text = text[:MAX_TXT_LEN]
-        print(f"Input text was cut off as it exceeded the {MAX_TXT_LEN} character limit.")
-    synthesizer = synthesizers[model_name]
-    if synthesizer is None:
-        raise NameError("Model not found")
-    wavs = synthesizer.tts(text)
-    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
-        synthesizer.save_wav(wavs, fp)
-        return fp.name
-iface = gr.Interface(
-    fn=synthesize,
-    inputs=[
-        gr.Textbox(label="Enter Text to Synthesize:", value="زین همرهان سست عناصر، دلم گرفت."),
-        gr.Radio(label="Pick a Model", choices=MODEL_NAMES, value=MODEL_NAMES[0], type="value"),
-    ],
-    outputs=gr.Audio(label="Output", type='filepath'),
-    examples=[["زین همرهان سست عناصر، دلم گرفت.", MODEL_NAMES[0]]],  # Example should include a speaker name for multispeaker models
-    title='Persian TTS Playground',
-    description="",
-    article="",
-    live=False
 )
-iface.launch()

+import sys
+import io, os, stat
+import subprocess
+import random
+from zipfile import ZipFile
+import uuid
+import time
+import torch
+import torchaudio
+os.environ["COQUI_TOS_AGREED"] = "1"
+import langid
+import base64
+import csv
+from io import StringIO
+import datetime
 import gradio as gr
+from scipy.io.wavfile import write
+from pydub import AudioSegment
 from TTS.api import TTS
+from TTS.tts.configs.xtts_config import XttsConfig
+from TTS.tts.models.xtts import Xtts
+from TTS.utils.generic_utils import get_user_data_dir
+HF_TOKEN = os.environ.get("HUGGING_FACE_HUB_TOKEN")
+from huggingface_hub import HfApi
+api = HfApi(token=HF_TOKEN)
+repo_id = "saillab/xtts-streaming"
+print("Export newer ffmpeg binary for denoise filter")
+ZipFile("ffmpeg.zip").extractall()
+print("Make ffmpeg binary executable")
+st = os.stat('ffmpeg')
+os.chmod('ffmpeg', st.st_mode | stat.S_IEXEC)
+print("Downloading if not downloaded Coqui XTTS V1.1")
+from TTS.utils.manage import ModelManager
+model_name = "saillab/xtts_v2_fa"
+ModelManager().download_model(model_name)
+model_path = os.path.join(get_user_data_dir("tts"), model_name.replace("/", "--"))
+print("XTTS downloaded")
+config = XttsConfig()
+config.load_json(os.path.join(model_path, "config.json"))
+model = Xtts.init_from_config(config)
+model.load_checkpoint(
+    config,
+    checkpoint_path=os.path.join(model_path, "model.pth"),
+    vocab_path=os.path.join(model_path, "vocab.json"),
+    eval=True,
+    use_deepspeed=True
 )
+model.cuda()
+supported_languages=["fa"]
+title = "XTTS Persian"
+description = """
+<div>
+<a style="display:inline-block" href='https://github.com/coqui-ai/TTS'><img src='https://img.shields.io/github/stars/coqui-ai/TTS?style=social' /></a>
+<a style='display:inline-block' href='https://discord.gg/5eXr5seRrv'><img src='https://discord.com/api/guilds/1037326658807533628/widget.png?style=shield' /></a>
+<a href="https://huggingface.co/spaces/coqui/xtts-streaming?duplicate=true">
+<img style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
+</div>
+<img referrerpolicy="no-referrer-when-downgrade" src="https://static.scarf.sh/a.png?x-pxid=0d00920c-8cc9-4bf3-90f2-a615797e5f59" />
+<a href="https://huggingface.co/coqui/XTTS-v1">XTTS</a> is a Voice generation model that lets you clone voices into different languages by using just a quick 6-second audio clip.
+<br/>
+XTTS is built on previous research, like Tortoise, with additional architectural innovations and training to make cross-language voice cloning and multilingual speech generation possible.
+<br/>
+This is the same model that powers our creator application <a href="https://coqui.ai">Coqui Studio</a> as well as the <a href="https://docs.coqui.ai">Coqui API</a>. In production we apply modifications to make low-latency streaming possible.
+<br/>
+Leave a star on the Github <a href="https://github.com/UNHSAILLab/Persian-TTS">🐸TTS</a>, where our open-source inference and training code lives.
+<br/>
+<p>For faster inference without waiting in the queue, you should duplicate this space and upgrade to GPU via the settings.
+<br/>
+</p>
+<p>Language Selectors:
+Persian: fa
+</p>
+<p> Notice: Autoplay may not work on mobile, if you see black waveform image on mobile click it your Audio is there</p>
+<img referrerpolicy="no-referrer-when-downgrade" src="https://static.scarf.sh/a.png?x-pxid=8946ef36-c454-4a8e-a9c9-8a8dd735fabd" />
+"""
+article = """
+<div style='margin:20px auto;'>
+<p>By using this demo you agree to the terms of the Coqui Public Model License at https://coqui.ai/cpml</p>
+<p>We collect data only for error cases for improvement.</p>
+</div>
+"""
+gr.Interface(
+    fn=predict,
+    inputs=[
+        gr.Textbox(
+            label="Text Prompt",
+            info="One or two sentences at a time is better. Up to 200 text characters.",
+            value="Hi there, I'm your new voice clone. Try your best to upload quality audio",
+        ),
+        gr.Dropdown(
+            label="Language",
+            info="Select an output language for the synthesised speech",
+            choices=supported_languages,
+            max_choices=1,
+            value=supported_languages[0],
+        ),
+        gr.Audio(
+            label="Reference Audio",
+            info="Click on the ✎ button to upload your own target speaker audio",
+            type="filepath",
+            value="examples/female.wav",
+        ),
+        gr.Audio(source="microphone",
+                 type="filepath",
+                 info="Use your microphone to record audio",
+                 label="Use Microphone for Reference"),
+        gr.Checkbox(label="Use Microphone",
+                    value=False,
+                    info="Notice: Microphone input may not work properly under traffic",),
+        gr.Checkbox(label="Cleanup Reference Voice",
+                    value=False,
+                    info="This check can improve output if your microphone or reference voice is noisy",
+                    ),
+        gr.Checkbox(label="Do not use language auto-detect",
+                    value=False,
+                    info="Check to disable language auto-detection",),
+        gr.Checkbox(
+            label="Agree",
+            value=False,
+            info="I agree to the terms of the Coqui Public Model License at https://coqui.ai/cpml",
+        ),
+    ],
+    outputs=[
+        gr.Video(label="Waveform Visual"),
+        gr.Audio(label="Synthesised Audio", streaming=True, autoplay=True),
+        gr.Text(label="Metrics"),
+        gr.Audio(label="Reference Audio Used"),
+    ],
+    title=title,
+    description=description,
+    article=article,
+    examples=[],
+    cache_examples=False,
+).queue().launch(debug=True, show_api=True)