Bajiyo's picture
Update app.py
0eba21b verified
raw
history blame
1.91 kB
import gradio as gr
from huggingface_hub import from_pretrained_keras
# Load the model from Hugging Face Hub
model = from_pretrained_keras("Bajiyo/ml-en-transliteration")
# Load the saved model and tokenizers
import json
from keras.preprocessing.sequence import pad_sequences
# Load tokenizer configurations
source_tokenizer_path = "https://huggingface.co/Bajiyo/ml-en-transliteration/blob/main/source_tokenizer.json"
with open(source_tokenizer_path, "r") as f:
source_tokenizer_config = json.load(f)
target_tokenizer_path = "https://huggingface.co/Bajiyo/ml-en-transliteration/blob/main/target_tokenizer.json"
with open(target_tokenizer_path, "r") as f:
target_tokenizer_config = json.load(f)
# Reconstruct tokenizers
from keras.preprocessing.text import tokenizer_from_json
source_tokenizer = tokenizer_from_json(source_tokenizer_config)
target_tokenizer = tokenizer_from_json(target_tokenizer_config)
# Define the maximum sequence length
max_seq_length = 50
# Function to predict transliteration
def predict_transliteration(input_text):
# Preprocess the input text
input_sequence = source_tokenizer.texts_to_sequences([input_text])
input_sequence_padded = pad_sequences(input_sequence, maxlen=max_seq_length, padding='post')
# Generate predictions
predicted_sequence = model.predict(input_sequence_padded)
# Decode the predicted sequence
predicted_text = "".join(target_tokenizer.index_word[i] for i in np.argmax(predicted_sequence, axis=-1)[0] if i != 0)
return predicted_text
# Create a Gradio interface
input_textbox = gr.inputs.Textbox(lines=2, label="Enter Malayalam text")
output_textbox = gr.outputs.Textbox(label="Predicted Transliteration")
gr.Interface(fn=predict_transliteration, inputs=input_textbox, outputs=output_textbox, title="Malayalam Transliteration", description="Enter Malayalam text to get its transliteration in English.").launch()