Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,53 +1,50 @@
|
|
1 |
import gradio as gr
|
2 |
-
import
|
3 |
from keras.preprocessing.sequence import pad_sequences
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
model = from_pretrained_keras("Bajiyo/ml-en-transliteration")
|
8 |
-
|
9 |
-
# Define URLs for tokenizer files on Hugging Face Hub (replace with actual model identifier if different)
|
10 |
-
source_tokenizer_url = f"https://huggingface.co/Bajiyo/ml-en-transliteration/resolve/main/source_tokenizer.json"
|
11 |
-
target_tokenizer_url = f"https://huggingface.co/Bajiyo/ml-en-transliteration/resolve/main/target_tokenizer.json"
|
12 |
-
|
13 |
-
# Download tokenizer files using cached_download (avoids redundant downloads)
|
14 |
-
source_tokenizer_path = cached_download(source_tokenizer_url)
|
15 |
-
target_tokenizer_path = cached_download(target_tokenizer_url)
|
16 |
|
17 |
-
#
|
18 |
-
|
|
|
|
|
|
|
19 |
|
20 |
-
|
21 |
-
|
22 |
|
23 |
-
|
24 |
-
|
|
|
|
|
25 |
|
26 |
-
|
27 |
-
|
28 |
-
source_tokenizer = tokenizer_from_json(
|
29 |
-
target_tokenizer = tokenizer_from_json(target_tokenizer_config)
|
30 |
|
|
|
|
|
|
|
31 |
|
32 |
-
#
|
33 |
-
max_seq_length = 50
|
34 |
|
35 |
-
|
36 |
-
def predict_transliteration(input_text):
|
37 |
-
# Preprocess the input text
|
38 |
input_sequence = source_tokenizer.texts_to_sequences([input_text])
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
# Decode the predicted sequence
|
45 |
-
predicted_text = "".join(target_tokenizer.index_word[i] for i in np.argmax(predicted_sequence, axis=-1)[0] if i != 0)
|
46 |
-
|
47 |
return predicted_text
|
48 |
|
49 |
-
#
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
+
import tensorflow as tf
|
3 |
from keras.preprocessing.sequence import pad_sequences
|
4 |
+
import numpy as np
|
5 |
+
import json
|
6 |
+
from huggingface_hub import from_pretrained_keras, hf_hub_download
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
+
# Function to convert sequences back to strings
|
9 |
+
def sequence_to_text(sequence, tokenizer):
|
10 |
+
reverse_word_map = dict(map(reversed, tokenizer.word_index.items()))
|
11 |
+
text = ''.join([reverse_word_map.get(i, '') for i in sequence])
|
12 |
+
return text
|
13 |
|
14 |
+
# Load the model from Hugging Face repository
|
15 |
+
model = from_pretrained_keras("Bajiyo/Malayalam_transliteration")
|
16 |
|
17 |
+
# Load tokenizers
|
18 |
+
repo_id = "Bajiyo/Malayalam_transliteration"
|
19 |
+
source_tokenizer_path = hf_hub_download(repo_id=repo_id, filename="source_tokenizer.json")
|
20 |
+
target_tokenizer_path = hf_hub_download(repo_id=repo_id, filename="target_tokenizer.json")
|
21 |
|
22 |
+
with open(source_tokenizer_path) as f:
|
23 |
+
source_tokenizer_data = json.load(f)
|
24 |
+
source_tokenizer = tf.keras.preprocessing.text.tokenizer_from_json(source_tokenizer_data)
|
|
|
25 |
|
26 |
+
with open(target_tokenizer_path) as f:
|
27 |
+
target_tokenizer_data = json.load(f)
|
28 |
+
target_tokenizer = tf.keras.preprocessing.text.tokenizer_from_json(target_tokenizer_data)
|
29 |
|
30 |
+
max_seq_length = 100 # Set the maximum sequence length (adjust if necessary)
|
|
|
31 |
|
32 |
+
def transliterate(input_text):
|
|
|
|
|
33 |
input_sequence = source_tokenizer.texts_to_sequences([input_text])
|
34 |
+
input_padded = pad_sequences(input_sequence, maxlen=max_seq_length, padding='post')
|
35 |
+
prediction = model.predict(input_padded)
|
36 |
+
predicted_sequence = np.argmax(prediction, axis=-1)[0]
|
37 |
+
predicted_text = sequence_to_text(predicted_sequence, target_tokenizer)
|
|
|
|
|
|
|
|
|
38 |
return predicted_text
|
39 |
|
40 |
+
# Set up Gradio interface
|
41 |
+
iface = gr.Interface(
|
42 |
+
fn=transliterate,
|
43 |
+
inputs=gr.inputs.Textbox(lines=2, placeholder="Enter Malayalam text here..."),
|
44 |
+
outputs="text",
|
45 |
+
title="Malayalam to English Transliteration",
|
46 |
+
description="Enter Malayalam names to get their English transliterations."
|
47 |
+
)
|
48 |
+
|
49 |
+
if __name__ == "__main__":
|
50 |
+
iface.launch()
|