Kurdish_TTS

Runtime error

App Files Files Community

barghavani commited on Dec 7, 2023

Commit

c687704

•

1 Parent(s): eb9806c

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -115

app.py CHANGED Viewed

@@ -8,32 +8,7 @@ import json
 # Define constants
 MODEL_INFO = [
-    #["vits checkpoint 57000", "checkpoint_57000.pth", "config.json", "mhrahmani/persian-tts-vits-0"],
-    # ["VITS Grapheme Multispeaker CV15(reduct)(best at 17864)", "best_model_17864.pth", "config.json",
-     # "saillab/persian-tts-cv15-reduct-grapheme-multispeaker"],
-    #["Single speaker (best)VITS Grapheme Azure (61000)", "checkpoint_61000.pth", "config.json", "saillab/persian-tts-azure-grapheme-60K"],
-    #["VITS Grapheme ARM24 Fine-Tuned on 1 (66651)", "best_model_66651.pth", "config.json","saillab/persian-tts-grapheme-arm24-finetuned-on1"],
-    #["Single speaker female best VITS Grapheme CV-Azure_male-Azure_female","best_model_15397.pth","config.json","saillab/female_cv_azure_male_azure_female","speakers1.pth"],
-    ["Xtts Persian","checkpoint_361721.pth","config.json","saillab/xtts_v2_fa"],
-    #["Multi Speaker Vits Grapheme CV+Azure in one set ","best_model_358320.pth","config.json","saillab/Multi_Speaker_Cv_plus_Azure_female_in_one_set","speakers.pth"],
-    #["Multispeaker VITS Grapheme  CV15(reduct)(22000)", "checkpoint_22000.pth", "config.json", "saillab/persian-tts-cv15-reduct-grapheme-multispeaker", "speakers.pth"],
-    #["Multispeaker VITS Grapheme  CV15(reduct)(26000)", "checkpoint_25000.pth", "config.json", "saillab/persian-tts-cv15-reduct-grapheme-multispeaker", "speakers.pth"],
-    #["Multispeaker VITS Grapheme  CV15(90K)", "best_model_56960.pth", "config.json", "saillab/multi_speaker", "speakers.pth"],
-    #["Single speaker female best VITS Grapheme CV-Azure_male-Azure_female","best_model_15397.pth","config.json","saillab/female_cv_azure_male_azure_female","speakers.pth"],
-    # ["VITS Grapheme Azure (best at 15934)", "best_model_15934.pth", "config.json",
-     # "saillab/persian-tts-azure-grapheme-60K"],
-    #["Single speaker VITS Grapheme ARM24 Fine-Tuned on 1 (66651)", "best_model_66651.pth", "config.json","saillab/persian-tts-grapheme-arm24-finetuned-on1"],
-    #["Single speaker VITS Grapheme ARM24 Fine-Tuned on 1 (120000)", "checkpoint_120000.pth", "config.json","saillab/persian-tts-grapheme-arm24-finetuned-on1"],
-    # ... Add other models similarly
 ]
 # Extract model names from MODEL_INFO
@@ -44,37 +19,13 @@ TOKEN = os.getenv('HUGGING_FACE_HUB_TOKEN')
 model_files = {}
 config_files = {}
-speaker_files = {}
 # Create a dictionary to store synthesizer objects for each model
 synthesizers = {}
-def update_config_speakers_file_recursive(config_dict, speakers_path):
-    """Recursively update speakers_file keys in a dictionary."""
-    if "speakers_file" in config_dict:
-        config_dict["speakers_file"] = speakers_path
-    for key, value in config_dict.items():
-        if isinstance(value, dict):
-            update_config_speakers_file_recursive(value, speakers_path)
-def update_config_speakers_file(config_path, speakers_path):
-    """Update the config.json file to point to the correct speakers.pth file."""
-    # Load the existing config
-    with open(config_path, 'r') as f:
-        config = json.load(f)
-    # Modify the speakers_file entry
-    update_config_speakers_file_recursive(config, speakers_path)
-    # Save the modified config
-    with open(config_path, 'w') as f:
-        json.dump(config, f, indent=4)
 # Download models and initialize synthesizers
 for info in MODEL_INFO:
     model_name, model_file, config_file, repo_name = info[:4]
-    speaker_file = info[4] if len(info) == 5 else None  # Check if speakers.pth is defined for the model
     print(f"|> Downloading: {model_name}")
@@ -82,94 +33,41 @@ for info in MODEL_INFO:
     model_files[model_name] = hf_hub_download(repo_id=repo_name, filename=model_file, use_auth_token=TOKEN)
     config_files[model_name] = hf_hub_download(repo_id=repo_name, filename=config_file, use_auth_token=TOKEN)
-    # Download speakers.pth if it exists
-    if speaker_file:
-        speaker_files[model_name] = hf_hub_download(repo_id=repo_name, filename=speaker_file, use_auth_token=TOKEN)
-        update_config_speakers_file(config_files[model_name], speaker_files[model_name])  # Update the config file
-        print(speaker_files[model_name])
-        # Initialize synthesizer for the model
-        synthesizer = Synthesizer(
-            tts_checkpoint=model_files[model_name],
-            tts_config_path=config_files[model_name],
-            tts_speakers_file=speaker_files[model_name],  # Pass the speakers.pth file if it exists
-            use_cuda=False  # Assuming you don't want to use GPU, adjust if needed
-            )
-    elif speaker_file is None:
-        # Initialize synthesizer for the model
-        synthesizer = Synthesizer(
-            tts_checkpoint=model_files[model_name],
-            tts_config_path=config_files[model_name],
-            # tts_speakers_file=speaker_files.get(model_name, None),  # Pass the speakers.pth file if it exists
-            use_cuda=False  # Assuming you don't want to use GPU, adjust if needed
-        )
     synthesizers[model_name] = synthesizer
-#def synthesize(text: str, model_name: str, speaker_name="speaker-0") -> str:
-def synthesize(text: str, model_name: str, speaker_name=None) -> str:
-    """Synthesize speech using the selected model."""
     if len(text) > MAX_TXT_LEN:
         text = text[:MAX_TXT_LEN]
         print(f"Input text was cut off as it exceeded the {MAX_TXT_LEN} character limit.")
-    # Use the synthesizer object for the selected model
     synthesizer = synthesizers[model_name]
     if synthesizer is None:
         raise NameError("Model not found")
-    if synthesizer.tts_speakers_file is "":
-        wavs = synthesizer.tts(text)
-    elif synthesizer.tts_speakers_file is not "":
-        if speaker_name == "":
-            #wavs = synthesizer.tts(text, speaker_name="speaker-0") ## should change, better if gradio conditions are figure out.
-            wavs = synthesizer.tts(text, speaker_name=None)
-        else:
-            wavs = synthesizer.tts(text, speaker_name=speaker_name)
     with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
         synthesizer.save_wav(wavs, fp)
         return fp.name
-# Callback function to update UI based on the selected model
-def update_options(model_name):
-    synthesizer = synthesizers[model_name]
-    # if synthesizer.tts.is_multi_speaker:
-    if model_name is MODEL_NAMES[1]:
-        speakers = synthesizer.tts_model.speaker_manager.speaker_names
-        # return options for the dropdown
-        return speakers
-    else:
-        # return empty options if not multi-speaker
-        return []
-# Create Gradio interface
 iface = gr.Interface(
     fn=synthesize,
     inputs=[
         gr.Textbox(label="Enter Text to Synthesize:", value="زین همرهان سست عناصر، دلم گرفت."),
         gr.Radio(label="Pick a Model", choices=MODEL_NAMES, value=MODEL_NAMES[0], type="value"),
-        #gr.Dropdown(label="Select Speaker", choices=update_options(MODEL_NAMES[1]), type="value", default="speaker-0")
-        gr.Dropdown(label="Select Speaker", choices=update_options(MODEL_NAMES[1]), type="value", default=None)
     ],
     outputs=gr.Audio(label="Output", type='filepath'),
-    examples=[["زین همرهان سست عناصر، دلم گرفت.", MODEL_NAMES[0], ""]],  # Example should include a speaker name for multispeaker models
     title='Persian TTS Playground',
-    description="""
-    ### Persian text to speech model demo.
-    #### Pick a speaker for MultiSpeaker models. (for single speaker go for speaker-0)
-    """,
     article="",
     live=False
 )
-iface.launch()

 # Define constants
 MODEL_INFO = [
+    ["Xtts Persian","best_model_110880.pth","config.json","saillab/xtts_v2_fa"],
 ]
 # Extract model names from MODEL_INFO
 model_files = {}
 config_files = {}
 # Create a dictionary to store synthesizer objects for each model
 synthesizers = {}
 # Download models and initialize synthesizers
 for info in MODEL_INFO:
     model_name, model_file, config_file, repo_name = info[:4]
     print(f"|> Downloading: {model_name}")
     model_files[model_name] = hf_hub_download(repo_id=repo_name, filename=model_file, use_auth_token=TOKEN)
     config_files[model_name] = hf_hub_download(repo_id=repo_name, filename=config_file, use_auth_token=TOKEN)
+    # Initialize synthesizer for the model
+    synthesizer = Synthesizer(
+        tts_checkpoint=model_files[model_name],
+        tts_config_path=config_files[model_name],
+        use_cuda=False
+    )
     synthesizers[model_name] = synthesizer
+def synthesize(text: str, model_name: str) -> str:
     if len(text) > MAX_TXT_LEN:
         text = text[:MAX_TXT_LEN]
         print(f"Input text was cut off as it exceeded the {MAX_TXT_LEN} character limit.")
     synthesizer = synthesizers[model_name]
     if synthesizer is None:
         raise NameError("Model not found")
+    wavs = synthesizer.tts(text)
     with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
         synthesizer.save_wav(wavs, fp)
         return fp.name
 iface = gr.Interface(
     fn=synthesize,
     inputs=[
         gr.Textbox(label="Enter Text to Synthesize:", value="زین همرهان سست عناصر، دلم گرفت."),
         gr.Radio(label="Pick a Model", choices=MODEL_NAMES, value=MODEL_NAMES[0], type="value"),
     ],
     outputs=gr.Audio(label="Output", type='filepath'),
+    examples=[["زین همرهان سست عناصر، دلم گرفت.", MODEL_NAMES[0]]],  # Example should include a speaker name for multispeaker models
     title='Persian TTS Playground',
+    description="",
     article="",
     live=False
 )
+iface.launch()