import os
import tempfile
import gradio as gr
from TTS.api import TTS
from TTS.utils.synthesizer import Synthesizer
from huggingface_hub import hf_hub_download
import json
import glob


# Define constants
MODEL_INFO = [

    ["Persian XTTS", "checkpoint_30000.pth", "config.json", "saillab/xtts_v2_fa_revision1"],
    
]


MAX_TXT_LEN = 400
TOKEN = os.getenv('HUGGING_FACE_HUB_TOKEN')

model_files = {}
config_files = {}
speaker_files = {}

# Create a dictionary to store synthesizer objects for each model
synthesizers = {}

def update_config_speakers_file_recursive(config_dict, speakers_path):
    """Recursively update speakers_file keys in a dictionary."""
    if "speakers_file" in config_dict:
        config_dict["speakers_file"] = speakers_path
    for key, value in config_dict.items():
        if isinstance(value, dict):
            update_config_speakers_file_recursive(value, speakers_path)

def update_config_speakers_file(config_path, speakers_path):
    """Update the config.json file to point to the correct speakers.pth file."""
    
    # Load the existing config
    with open(config_path, 'r') as f:
        config = json.load(f)

    # Modify the speakers_file entry
    update_config_speakers_file_recursive(config, speakers_path)

    # Save the modified config
    with open(config_path, 'w') as f:
        json.dump(config, f, indent=4)

# Download models and initialize synthesizers
for info in MODEL_INFO:
    model_name, model_file, config_file, repo_name = info[:4]
    speaker_file = info[4] if len(info) == 5 else None  # Check if speakers.pth is defined for the model

    print(f"|> Downloading: {model_name}")

    # Download model and config files
    model_files[model_name] = hf_hub_download(repo_id=repo_name, filename=model_file, use_auth_token=TOKEN)
    config_files[model_name] = hf_hub_download(repo_id=repo_name, filename=config_file, use_auth_token=TOKEN)

    # Download speakers.pth if it exists
    if speaker_file:
        speaker_files[model_name] = hf_hub_download(repo_id=repo_name, filename=speaker_file, use_auth_token=TOKEN)
        update_config_speakers_file(config_files[model_name], speaker_files[model_name])  # Update the config file
        print(speaker_files[model_name])
        # Initialize synthesizer for the model
        synthesizer = Synthesizer(
            tts_checkpoint=model_files[model_name],
            tts_config_path=config_files[model_name],
            tts_speakers_file=speaker_files[model_name],  # Pass the speakers.pth file if it exists
            use_cuda=False  # Assuming you don't want to use GPU, adjust if needed
            )

    elif speaker_file is None:
             
        # Initialize synthesizer for the model
        synthesizer = Synthesizer(
            tts_checkpoint=model_files[model_name],
            tts_config_path=config_files[model_name],
            # tts_speakers_file=speaker_files.get(model_name, None),  # Pass the speakers.pth file if it exists
            use_cuda=False  # Assuming you don't want to use GPU, adjust if needed
        )

    synthesizers[model_name] = synthesizer


#def synthesize(text: str, model_name: str, speaker_name="speaker-0") -> str:
def synthesize(text: str, model_name: str, speaker_name=None) -> str:
    """Synthesize speech using the selected model."""
    if len(text) > MAX_TXT_LEN:
        text = text[:MAX_TXT_LEN]
        print(f"Input text was cut off as it exceeded the {MAX_TXT_LEN} character limit.")

    # Use the synthesizer object for the selected model
    synthesizer = synthesizers[model_name]


    if synthesizer is None:
        raise NameError("Model not found")

    if synthesizer.tts_speakers_file is "":
        wavs = synthesizer.tts(text)

    elif synthesizer.tts_speakers_file is not "":
        if speaker_name == "":
            #wavs = synthesizer.tts(text, speaker_name="speaker-0") ## should change, better if gradio conditions are figure out.
            wavs = synthesizer.tts(text, speaker_name=None)
        else:
            wavs = synthesizer.tts(text, speaker_name=speaker_name)
  
    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
        synthesizer.save_wav(wavs, fp)
        return fp.name

# Callback function to update UI based on the selected model
def update_options(model_name):
    synthesizer = synthesizers[model_name]
    # if synthesizer.tts.is_multi_speaker:
    if model_name is MODEL_NAMES[1]:
        speakers = synthesizer.tts_model.speaker_manager.speaker_names
        # return options for the dropdown
        return speakers
    else:
        # return empty options if not multi-speaker
        return []

# Create Gradio interface
iface = gr.Interface(
    fn=synthesize,
    inputs=[
        gr.Textbox(label="Enter Text to Synthesize:", value="زین همرهان سست عناصر، دلم گرفت."),
        gr.Radio(label="Pick a Model", choices=MODEL_NAMES, value=MODEL_NAMES[0], type="value"),
        #gr.Dropdown(label="Select Speaker", choices=update_options(MODEL_NAMES[1]), type="value", default="speaker-0")
        gr.Dropdown(label="Select Speaker", choices=update_options(MODEL_NAMES[1]), type="value", default=None)
    ],
    outputs=gr.Audio(label="Output", type='filepath'),
    examples=[["زین همرهان سست عناصر، دلم گرفت.", MODEL_NAMES[0], ""]],  # Example should include a speaker name for multispeaker models
    title='Persian TTS Playground',
    description="""
    ### Persian text to speech model demo. 
    
        
    #### Pick a speaker for MultiSpeaker models. (for single speaker go for speaker-0)
    """,
    article="",
    live=False
)

iface.launch()