import gradio as gr import tensorflow as tf from keras.preprocessing.sequence import pad_sequences import numpy as np import json from huggingface_hub import from_pretrained_keras, hf_hub_download # Function to convert sequences back to strings def sequence_to_text(sequence, tokenizer): reverse_word_map = dict(map(reversed, tokenizer.word_index.items())) text = ''.join([reverse_word_map.get(i, '') for i in sequence]) return text # Load the model from Hugging Face repository model = from_pretrained_keras("Bajiyo/Malayalam_transliteration") # Load tokenizers repo_id = "Bajiyo/Malayalam_transliteration" source_tokenizer_path = hf_hub_download(repo_id=repo_id, filename="source_tokenizer.json") target_tokenizer_path = hf_hub_download(repo_id=repo_id, filename="target_tokenizer.json") with open(source_tokenizer_path) as f: source_tokenizer_data = json.load(f) source_tokenizer = tf.keras.preprocessing.text.tokenizer_from_json(source_tokenizer_data) with open(target_tokenizer_path) as f: target_tokenizer_data = json.load(f) target_tokenizer = tf.keras.preprocessing.text.tokenizer_from_json(target_tokenizer_data) max_seq_length = 100 # Set the maximum sequence length (adjust if necessary) def transliterate(input_text): input_sequence = source_tokenizer.texts_to_sequences([input_text]) input_padded = pad_sequences(input_sequence, maxlen=max_seq_length, padding='post') prediction = model.predict(input_padded) predicted_sequence = np.argmax(prediction, axis=-1)[0] predicted_text = sequence_to_text(predicted_sequence, target_tokenizer) return predicted_text # Set up Gradio interface iface = gr.Interface( fn=transliterate, inputs=gr.inputs.Textbox(lines=2, placeholder="Enter Malayalam text here..."), outputs="text", title="Malayalam to English Transliteration", description="Enter Malayalam names to get their English transliterations." ) if __name__ == "__main__": iface.launch()