Bajiyo commited on
Commit
4755ab1
1 Parent(s): 96273c1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -41
app.py CHANGED
@@ -1,53 +1,50 @@
1
  import gradio as gr
2
- import json
3
  from keras.preprocessing.sequence import pad_sequences
4
- from huggingface_hub import cached_download, from_pretrained_keras
5
-
6
- # Load the model from Hugging Face Hub (assuming the model identifier is "Bajiyo/ml-en-transliteration")
7
- model = from_pretrained_keras("Bajiyo/ml-en-transliteration")
8
-
9
- # Define URLs for tokenizer files on Hugging Face Hub (replace with actual model identifier if different)
10
- source_tokenizer_url = f"https://huggingface.co/Bajiyo/ml-en-transliteration/resolve/main/source_tokenizer.json"
11
- target_tokenizer_url = f"https://huggingface.co/Bajiyo/ml-en-transliteration/resolve/main/target_tokenizer.json"
12
-
13
- # Download tokenizer files using cached_download (avoids redundant downloads)
14
- source_tokenizer_path = cached_download(source_tokenizer_url)
15
- target_tokenizer_path = cached_download(target_tokenizer_url)
16
 
17
- # Load tokenizers from downloaded files
18
- from keras.preprocessing.text import tokenizer_from_json
 
 
 
19
 
20
- with open(source_tokenizer_path, "r") as f:
21
- source_tokenizer = tokenizer_from_json(json.load(f))
22
 
23
- with open(target_tokenizer_path, "r") as f:
24
- target_tokenizer = tokenizer_from_json(json.load(f))
 
 
25
 
26
- # Reconstruct tokenizers
27
- from keras.preprocessing.text import tokenizer_from_json
28
- source_tokenizer = tokenizer_from_json(source_tokenizer_config)
29
- target_tokenizer = tokenizer_from_json(target_tokenizer_config)
30
 
 
 
 
31
 
32
- # Define the maximum sequence length
33
- max_seq_length = 50
34
 
35
- # Function to predict transliteration
36
- def predict_transliteration(input_text):
37
- # Preprocess the input text
38
  input_sequence = source_tokenizer.texts_to_sequences([input_text])
39
- input_sequence_padded = pad_sequences(input_sequence, maxlen=max_seq_length, padding='post')
40
-
41
- # Generate predictions
42
- predicted_sequence = model.predict(input_sequence_padded)
43
-
44
- # Decode the predicted sequence
45
- predicted_text = "".join(target_tokenizer.index_word[i] for i in np.argmax(predicted_sequence, axis=-1)[0] if i != 0)
46
-
47
  return predicted_text
48
 
49
- # Create a Gradio interface
50
- input_textbox = gr.inputs.Textbox(lines=2, label="Enter Malayalam text")
51
- output_textbox = gr.outputs.Textbox(label="Predicted Transliteration")
52
-
53
- gr.Interface(fn=predict_transliteration, inputs=input_textbox, outputs=output_textbox, title="Malayalam Transliteration", description="Enter Malayalam text to get its transliteration in English.").launch()
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ import tensorflow as tf
3
  from keras.preprocessing.sequence import pad_sequences
4
+ import numpy as np
5
+ import json
6
+ from huggingface_hub import from_pretrained_keras, hf_hub_download
 
 
 
 
 
 
 
 
 
7
 
8
+ # Function to convert sequences back to strings
9
+ def sequence_to_text(sequence, tokenizer):
10
+ reverse_word_map = dict(map(reversed, tokenizer.word_index.items()))
11
+ text = ''.join([reverse_word_map.get(i, '') for i in sequence])
12
+ return text
13
 
14
+ # Load the model from Hugging Face repository
15
+ model = from_pretrained_keras("Bajiyo/Malayalam_transliteration")
16
 
17
+ # Load tokenizers
18
+ repo_id = "Bajiyo/Malayalam_transliteration"
19
+ source_tokenizer_path = hf_hub_download(repo_id=repo_id, filename="source_tokenizer.json")
20
+ target_tokenizer_path = hf_hub_download(repo_id=repo_id, filename="target_tokenizer.json")
21
 
22
+ with open(source_tokenizer_path) as f:
23
+ source_tokenizer_data = json.load(f)
24
+ source_tokenizer = tf.keras.preprocessing.text.tokenizer_from_json(source_tokenizer_data)
 
25
 
26
+ with open(target_tokenizer_path) as f:
27
+ target_tokenizer_data = json.load(f)
28
+ target_tokenizer = tf.keras.preprocessing.text.tokenizer_from_json(target_tokenizer_data)
29
 
30
+ max_seq_length = 100 # Set the maximum sequence length (adjust if necessary)
 
31
 
32
+ def transliterate(input_text):
 
 
33
  input_sequence = source_tokenizer.texts_to_sequences([input_text])
34
+ input_padded = pad_sequences(input_sequence, maxlen=max_seq_length, padding='post')
35
+ prediction = model.predict(input_padded)
36
+ predicted_sequence = np.argmax(prediction, axis=-1)[0]
37
+ predicted_text = sequence_to_text(predicted_sequence, target_tokenizer)
 
 
 
 
38
  return predicted_text
39
 
40
+ # Set up Gradio interface
41
+ iface = gr.Interface(
42
+ fn=transliterate,
43
+ inputs=gr.inputs.Textbox(lines=2, placeholder="Enter Malayalam text here..."),
44
+ outputs="text",
45
+ title="Malayalam to English Transliteration",
46
+ description="Enter Malayalam names to get their English transliterations."
47
+ )
48
+
49
+ if __name__ == "__main__":
50
+ iface.launch()