import gradio as gr
import tensorflow as tf
from keras.preprocessing.sequence import pad_sequences
import numpy as np
import json
from huggingface_hub import from_pretrained_keras, hf_hub_download

# Function to convert sequences back to strings
def sequence_to_text(sequence, tokenizer):
    reverse_word_map = dict(map(reversed, tokenizer.word_index.items()))
    text = ''.join([reverse_word_map.get(i, '') for i in sequence])
    return text

# Load the model from Hugging Face repository
model = from_pretrained_keras("Bajiyo/Malayalam_transliteration")

# Load tokenizers
repo_id = "Bajiyo/Malayalam_transliteration"
source_tokenizer_path = hf_hub_download(repo_id=repo_id, filename="source_tokenizer.json")
target_tokenizer_path = hf_hub_download(repo_id=repo_id, filename="target_tokenizer.json")

with open(source_tokenizer_path) as f:
    source_tokenizer_data = json.load(f)
source_tokenizer = tf.keras.preprocessing.text.tokenizer_from_json(source_tokenizer_data)

with open(target_tokenizer_path) as f:
    target_tokenizer_data = json.load(f)
target_tokenizer = tf.keras.preprocessing.text.tokenizer_from_json(target_tokenizer_data)

max_seq_length = 100  # Set the maximum sequence length (adjust if necessary)

def transliterate(input_text):
    input_sequence = source_tokenizer.texts_to_sequences([input_text])
    input_padded = pad_sequences(input_sequence, maxlen=max_seq_length, padding='post')
    prediction = model.predict(input_padded)
    predicted_sequence = np.argmax(prediction, axis=-1)[0]
    predicted_text = sequence_to_text(predicted_sequence, target_tokenizer)
    return predicted_text

# Set up Gradio interface
iface = gr.Interface(
    fn=transliterate,
    inputs=gr.inputs.Textbox(lines=2, placeholder="Enter Malayalam text here..."),
    outputs="text",
    title="Malayalam to English Transliteration",
    description="Enter Malayalam names to get their English transliterations."
)

if __name__ == "__main__":
    iface.launch()