Spaces:
Sleeping
Sleeping
import gradio as gr | |
import os | |
import pandas as pd | |
import numpy as np | |
import tensorflow as tf | |
from tensorflow.keras.preprocessing.text import Tokenizer | |
from tensorflow.keras.preprocessing.sequence import pad_sequences | |
import pickle | |
import sys | |
from tensorflow.keras import preprocessing | |
sys.modules['keras.src.preprocessing'] = preprocessing | |
from tensorflow import keras | |
sys.modules['keras'] = keras | |
from huggingface_hub import HfApi | |
# Set your Hugging Face API token in the settings of this space as a secret variable | |
# Authenticate using HfApi | |
# api = HfApi() | |
# api.login(token=os.getenv("HUGGINGFACE_HUB_TOKEN")) | |
from huggingface_hub import login | |
login(token=os.getenv("HUGGINGFACE_HUB_TOKEN")) | |
# --------------------------------------------------------------------------------------------------------------------------------------- | |
# Loading the translation model and english and french tokenizers | |
with open('english_tokenizer.pickle', 'rb') as handle: | |
english_tokenizer = pickle.load(handle) | |
with open('french_tokenizer.pickle', 'rb') as handle: | |
french_tokenizer = pickle.load(handle) | |
translation_model = tf.keras.models.load_model('model2_v2.h5') | |
# --------------------------------------------------------------------------------------------------------------------------------------- | |
# Translate sentence function | |
MAX_LEN_EN = 15 | |
MAX_LEN_FR = 21 | |
VOCAB_SIZE_EN = len(english_tokenizer.word_index) | |
VOCAB_SIZE_FR = len(french_tokenizer.word_index) | |
# print(f'MAX_LEN_EN: {MAX_LEN_EN}') | |
# print(f'MAX_LEN_FR: {MAX_LEN_FR}') | |
# print(f'VOCAB_SIZE_EN: {VOCAB_SIZE_EN}') | |
# print(f'VOCAB_SIZE_FR: {VOCAB_SIZE_FR}') | |
# function implemented earlier, modified it to be used with gradio. | |
def translate_sentence(sentence, verbose=False): | |
# Preprocess the input sentence | |
sequence = english_tokenizer.texts_to_sequences([sentence]) | |
padded_sequence = pad_sequences(sequence, maxlen=MAX_LEN_EN, padding='post') | |
# Initialize the target sequence with the start token | |
start_token = VOCAB_SIZE_FR #344 | |
target_sequence = np.zeros((1, MAX_LEN_FR)) | |
target_sequence[0, 0] = start_token | |
# Placeholder for the translation | |
translation = '' | |
# Step-by-step translation | |
for i in range(1, MAX_LEN_FR): | |
# Predict the next word | |
output_tokens = translation_model.predict([padded_sequence, target_sequence], verbose=verbose) | |
# Get the most likely next word | |
sampled_token_index = np.argmax(output_tokens[0, i - 1, :]) | |
if verbose: | |
print(f'sampled_token_index: {sampled_token_index}') | |
if sampled_token_index == 0: # End token | |
break | |
sampled_word = french_tokenizer.index_word[sampled_token_index] | |
if verbose: | |
print(f'sampled_word: {sampled_word}') | |
# Append the word to the translation | |
translation += ' ' + sampled_word | |
# Update the target sequence | |
target_sequence[0, i] = sampled_token_index | |
return translation.strip() | |
# Example usage: | |
# english_sentence = "paris is relaxing during december but it is usually chilly in july" | |
# print(english_sentence) | |
# translated_sentence = translate_sentence(english_sentence) | |
# print(translated_sentence) | |
# ---------------------------------------------------------------------------------------------------------------------------------------- | |
# Gradio app | |
from datasets import load_dataset, Dataset | |
# Function to load the dataset from Hugging Face | |
def load_hf_dataset(): | |
dataset = load_dataset("Zaherrr/translation_log") | |
return dataset["train"] # Access the dataset without split specification | |
def update_history_with_status(english, french, history, status): | |
history.append((english, french, status)) | |
history_text = "\n".join([f"{inp} ----> {out} ({status})" for inp, out, status in history]) | |
return history_text, history | |
def revert_last_action(history): | |
if history: | |
# Revert history | |
history.pop() | |
# Update history block text | |
history_text = "\n".join([f"{inp} ----> {out} ({status})" for inp, out, status in history]) | |
# Revert last row in the dataset | |
if row_indices: | |
last_index = row_indices.pop() | |
# Remove the last row from the dataset | |
dataset = load_hf_dataset() | |
df = dataset.to_pandas() | |
df = df.drop(last_index).reset_index(drop=True) | |
updated_dataset = Dataset.from_pandas(df) | |
updated_dataset.push_to_hub("Zaherrr/translation_log") | |
return history_text, history | |
# Function to flag data | |
def flag_action(english, french, corrected_french, flagged_successful, history): | |
data = {"english": english, "french": french, "corrected_french": corrected_french, "status": flagged_successful} | |
dataset = load_hf_dataset() | |
df = dataset.to_pandas() | |
new_df = pd.DataFrame([data]) | |
df = pd.concat([df, new_df], ignore_index=True) | |
updated_dataset = Dataset.from_pandas(df) | |
updated_dataset.push_to_hub("Zaherrr/translation_log") | |
index = len(df) - 1 | |
row_indices.append(index) | |
return update_history_with_status(english, french, history, "Flagged") | |
# Function to accept data | |
def accept_action(english, french, hidden_text, flagged_successful, history): | |
data = {"english": english, "french": french, "corrected_french": hidden_text, "status": flagged_successful} | |
dataset = load_hf_dataset() | |
df = dataset.to_pandas() | |
new_df = pd.DataFrame([data]) | |
df = pd.concat([df, new_df], ignore_index=True) | |
updated_dataset = Dataset.from_pandas(df) | |
updated_dataset.push_to_hub("Zaherrr/translation_log") | |
index = len(df) - 1 | |
row_indices.append(index) | |
return update_history_with_status(english, french, history, "Accepted") | |
# Define the Gradio interface | |
with gr.Blocks(theme='gstaff/sketch') as demo: | |
gr.Markdown("<center><h1>Translate English to French</h1></center>") | |
with gr.Row(): | |
with gr.Column(): | |
english = gr.Textbox(label="English", placeholder="Input English text here") | |
Translate_button = gr.Button(value="Translate", variant="primary") | |
hidden_text = gr.Textbox(label="Hidden Text", placeholder="Hidden Text", interactive=False, visible=False) | |
flagged_successful = gr.Textbox(label="Acceptance Status", placeholder="Flagged Successful", interactive=False, visible=False) | |
with gr.Column(): | |
french = gr.Textbox(label="French", placeholder="Predicted French text here", interactive=False) | |
corrected_french = gr.Textbox(label="Corrected French", placeholder="Corrected French translation here") | |
with gr.Column(): | |
with gr.Row(): | |
accept_button = gr.Button(value="Accept", variant="primary") | |
flag_button = gr.Button(value="Flag", variant="secondary") | |
revert_button = gr.Button(value="Revert", variant="secondary") | |
examples = gr.Examples(examples=[ | |
"paris is relaxing during december but it is usually chilly in july", | |
"She is driving the truck"], | |
inputs=english) | |
gr.Markdown("History:") | |
history_block = gr.Textbox(label="History", placeholder="English - French Translation Pairs", interactive=False, lines=5, max_lines=50) | |
history = gr.State([]) | |
# Track the row indices in the CSVLogger | |
row_indices = [] | |
gr.on( | |
triggers=[english.submit, Translate_button.click], | |
fn=translate_sentence, | |
inputs=english, | |
outputs=[french], | |
).then( | |
fn=lambda: gr.Textbox(visible=False), | |
inputs=None, | |
outputs=flagged_successful, | |
) | |
gr.on( | |
triggers=[flag_button.click], | |
fn=lambda: gr.Textbox(value="Flagged", visible=True), | |
outputs=flagged_successful, | |
).then( | |
fn=flag_action, | |
inputs=[english, french, corrected_french, flagged_successful, history], | |
outputs=[history_block, history], | |
) | |
gr.on( | |
triggers=[accept_button.click], | |
fn=lambda: gr.Textbox(value="Accepted", visible=True), | |
outputs=flagged_successful, | |
).then( | |
fn=accept_action, | |
inputs=[english, french, hidden_text, flagged_successful, history], | |
outputs=[history_block, history], | |
) | |
gr.on( | |
triggers=[revert_button.click], | |
fn=revert_last_action, | |
inputs=[history], | |
outputs=[history_block, history], | |
).then( | |
fn=lambda: gr.Textbox(placeholder="Reverted", visible=True), | |
outputs=flagged_successful, | |
) | |
demo.launch(share=True, auth=('username', 'password123'), auth_message="Check your <strong>Login details</strong> sent to your <i>email</i>") |