Zaherrr's picture
Update app.py
cd90af5 verified
import gradio as gr
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import pickle
import sys
from tensorflow.keras import preprocessing
sys.modules['keras.src.preprocessing'] = preprocessing
from tensorflow import keras
sys.modules['keras'] = keras
from huggingface_hub import HfApi
# Set your Hugging Face API token in the settings of this space as a secret variable
# Authenticate using HfApi
# api = HfApi()
# api.login(token=os.getenv("HUGGINGFACE_HUB_TOKEN"))
from huggingface_hub import login
login(token=os.getenv("HUGGINGFACE_HUB_TOKEN"))
# ---------------------------------------------------------------------------------------------------------------------------------------
# Loading the translation model and english and french tokenizers
with open('english_tokenizer.pickle', 'rb') as handle:
english_tokenizer = pickle.load(handle)
with open('french_tokenizer.pickle', 'rb') as handle:
french_tokenizer = pickle.load(handle)
translation_model = tf.keras.models.load_model('model2_v2.h5')
# ---------------------------------------------------------------------------------------------------------------------------------------
# Translate sentence function
MAX_LEN_EN = 15
MAX_LEN_FR = 21
VOCAB_SIZE_EN = len(english_tokenizer.word_index)
VOCAB_SIZE_FR = len(french_tokenizer.word_index)
# print(f'MAX_LEN_EN: {MAX_LEN_EN}')
# print(f'MAX_LEN_FR: {MAX_LEN_FR}')
# print(f'VOCAB_SIZE_EN: {VOCAB_SIZE_EN}')
# print(f'VOCAB_SIZE_FR: {VOCAB_SIZE_FR}')
# function implemented earlier, modified it to be used with gradio.
def translate_sentence(sentence, verbose=False):
# Preprocess the input sentence
sequence = english_tokenizer.texts_to_sequences([sentence])
padded_sequence = pad_sequences(sequence, maxlen=MAX_LEN_EN, padding='post')
# Initialize the target sequence with the start token
start_token = VOCAB_SIZE_FR #344
target_sequence = np.zeros((1, MAX_LEN_FR))
target_sequence[0, 0] = start_token
# Placeholder for the translation
translation = ''
# Step-by-step translation
for i in range(1, MAX_LEN_FR):
# Predict the next word
output_tokens = translation_model.predict([padded_sequence, target_sequence], verbose=verbose)
# Get the most likely next word
sampled_token_index = np.argmax(output_tokens[0, i - 1, :])
if verbose:
print(f'sampled_token_index: {sampled_token_index}')
if sampled_token_index == 0: # End token
break
sampled_word = french_tokenizer.index_word[sampled_token_index]
if verbose:
print(f'sampled_word: {sampled_word}')
# Append the word to the translation
translation += ' ' + sampled_word
# Update the target sequence
target_sequence[0, i] = sampled_token_index
return translation.strip()
# Example usage:
# english_sentence = "paris is relaxing during december but it is usually chilly in july"
# print(english_sentence)
# translated_sentence = translate_sentence(english_sentence)
# print(translated_sentence)
# ----------------------------------------------------------------------------------------------------------------------------------------
# Gradio app
from datasets import load_dataset, Dataset
# Function to load the dataset from Hugging Face
def load_hf_dataset():
dataset = load_dataset("Zaherrr/translation_log")
return dataset["train"] # Access the dataset without split specification
def update_history_with_status(english, french, history, status):
history.append((english, french, status))
history_text = "\n".join([f"{inp} ----> {out} ({status})" for inp, out, status in history])
return history_text, history
def revert_last_action(history):
if history:
# Revert history
history.pop()
# Update history block text
history_text = "\n".join([f"{inp} ----> {out} ({status})" for inp, out, status in history])
# Revert last row in the dataset
if row_indices:
last_index = row_indices.pop()
# Remove the last row from the dataset
dataset = load_hf_dataset()
df = dataset.to_pandas()
df = df.drop(last_index).reset_index(drop=True)
updated_dataset = Dataset.from_pandas(df)
updated_dataset.push_to_hub("Zaherrr/translation_log")
return history_text, history
# Function to flag data
def flag_action(english, french, corrected_french, flagged_successful, history):
data = {"english": english, "french": french, "corrected_french": corrected_french, "status": flagged_successful}
dataset = load_hf_dataset()
df = dataset.to_pandas()
new_df = pd.DataFrame([data])
df = pd.concat([df, new_df], ignore_index=True)
updated_dataset = Dataset.from_pandas(df)
updated_dataset.push_to_hub("Zaherrr/translation_log")
index = len(df) - 1
row_indices.append(index)
return update_history_with_status(english, french, history, "Flagged")
# Function to accept data
def accept_action(english, french, hidden_text, flagged_successful, history):
data = {"english": english, "french": french, "corrected_french": hidden_text, "status": flagged_successful}
dataset = load_hf_dataset()
df = dataset.to_pandas()
new_df = pd.DataFrame([data])
df = pd.concat([df, new_df], ignore_index=True)
updated_dataset = Dataset.from_pandas(df)
updated_dataset.push_to_hub("Zaherrr/translation_log")
index = len(df) - 1
row_indices.append(index)
return update_history_with_status(english, french, history, "Accepted")
# Define the Gradio interface
with gr.Blocks(theme='gstaff/sketch') as demo:
gr.Markdown("<center><h1>Translate English to French</h1></center>")
with gr.Row():
with gr.Column():
english = gr.Textbox(label="English", placeholder="Input English text here")
Translate_button = gr.Button(value="Translate", variant="primary")
hidden_text = gr.Textbox(label="Hidden Text", placeholder="Hidden Text", interactive=False, visible=False)
flagged_successful = gr.Textbox(label="Acceptance Status", placeholder="Flagged Successful", interactive=False, visible=False)
with gr.Column():
french = gr.Textbox(label="French", placeholder="Predicted French text here", interactive=False)
corrected_french = gr.Textbox(label="Corrected French", placeholder="Corrected French translation here")
with gr.Column():
with gr.Row():
accept_button = gr.Button(value="Accept", variant="primary")
flag_button = gr.Button(value="Flag", variant="secondary")
revert_button = gr.Button(value="Revert", variant="secondary")
examples = gr.Examples(examples=[
"paris is relaxing during december but it is usually chilly in july",
"She is driving the truck"],
inputs=english)
gr.Markdown("History:")
history_block = gr.Textbox(label="History", placeholder="English - French Translation Pairs", interactive=False, lines=5, max_lines=50)
history = gr.State([])
# Track the row indices in the CSVLogger
row_indices = []
gr.on(
triggers=[english.submit, Translate_button.click],
fn=translate_sentence,
inputs=english,
outputs=[french],
).then(
fn=lambda: gr.Textbox(visible=False),
inputs=None,
outputs=flagged_successful,
)
gr.on(
triggers=[flag_button.click],
fn=lambda: gr.Textbox(value="Flagged", visible=True),
outputs=flagged_successful,
).then(
fn=flag_action,
inputs=[english, french, corrected_french, flagged_successful, history],
outputs=[history_block, history],
)
gr.on(
triggers=[accept_button.click],
fn=lambda: gr.Textbox(value="Accepted", visible=True),
outputs=flagged_successful,
).then(
fn=accept_action,
inputs=[english, french, hidden_text, flagged_successful, history],
outputs=[history_block, history],
)
gr.on(
triggers=[revert_button.click],
fn=revert_last_action,
inputs=[history],
outputs=[history_block, history],
).then(
fn=lambda: gr.Textbox(placeholder="Reverted", visible=True),
outputs=flagged_successful,
)
demo.launch(share=True, auth=('username', 'password123'), auth_message="Check your <strong>Login details</strong> sent to your <i>email</i>")