import pandas as pd
import streamlit as st
import time
import random
import os
# File Path
DATA_FILENAME = "Dr-En-space-test.csv"
DATA_PATH = os.path.join("data", DATA_FILENAME)
st.set_page_config(layout="wide")
# Load data
def load_data():
return pd.read_csv(DATA_PATH)
def save_data(data):
data.to_csv(DATA_PATH, index=False)
def skip_correction():
noncorrected_sentences = st.session_state.data[(st.session_state.data.translated == True) & (st.session_state.data.corrected == False)]['sentence'].tolist()
if noncorrected_sentences:
st.session_state.orig_sentence = random.choice(noncorrected_sentences)
st.session_state.orig_translation = st.session_state.data[st.session_state.data.sentence == st.session_state.orig_sentence]['translation']
else:
st.session_state.orig_sentence = "No more sentences to be corrected"
st.session_state.orig_translation = "No more sentences to be corrected"
st.title("Darija Translation Corpus Collection")
if "data" not in st.session_state:
st.session_state.data = load_data()
if "sentence" not in st.session_state:
untranslated_sentences = st.session_state.data[st.session_state.data['translated'] == False]['sentence'].tolist()
if untranslated_sentences:
st.session_state.sentence = random.choice(untranslated_sentences)
else:
st.session_state.sentence = "No more sentences to translate"
if "orig_translation" not in st.session_state:
noncorrected_sentences = st.session_state.data[(st.session_state.data.translated == True) & (st.session_state.data.corrected == False)]['sentence'].tolist()
noncorrected_translations = st.session_state.data[(st.session_state.data.translated == True) & (st.session_state.data.corrected == False)]['translation'].tolist()
if noncorrected_sentences:
st.session_state.orig_sentence = random.choice(noncorrected_sentences)
st.session_state.orig_translation = st.session_state.data.loc[st.session_state.data.sentence == st.session_state.orig_sentence]['translation'].values[0]
else:
st.session_state.orig_sentence = "No more sentences to be corrected"
st.session_state.orig_translation = "No more sentences to be corrected"
if "user_translation" not in st.session_state:
st.session_state.user_translation = ""
with st.sidebar:
st.subheader("About")
st.markdown("""This is app is designed to collect Darija translation corpus.""")
tab1, tab2 = st.tabs(["Translation", "Correction"])
with tab1:
with st.container():
st.subheader("Original Text:")
st.write('
{}
'.format(st.session_state.sentence), unsafe_allow_html=True)
st.subheader("Translation:")
st.session_state.user_translation = st.text_area("Enter your translation here:", value=st.session_state.user_translation)
if st.button("💾 Save"):
if st.session_state.user_translation:
st.session_state.data.loc[st.session_state.data['sentence'] == st.session_state.sentence, 'translation'] = st.session_state.user_translation
st.session_state.data.loc[st.session_state.data['sentence'] == st.session_state.sentence, 'translated'] = True
save_data(st.session_state.data)
st.session_state.user_translation = "" # Reset the input value after saving
# st.toast("Saved!", icon="👏")
st.success("Saved!")
# Update the sentence for the next iteration.
untranslated_sentences = st.session_state.data[st.session_state.data['translated'] == False]['sentence'].tolist()
if untranslated_sentences:
st.session_state.sentence = random.choice(untranslated_sentences)
else:
st.session_state.sentence = "No more sentences to translate"
time.sleep(0.5)
# Rerun the app
st.rerun()
with tab2:
with st.container():
st.subheader("Original Darija Text:")
st.write('{}
'.format(st.session_state.orig_sentence), unsafe_allow_html=True)
with st.container():
st.subheader("Original English Translation:")
st.write('{}
'.format(st.session_state.orig_translation), unsafe_allow_html=True)
st.subheader("Corrected Darija Translation:")
corrected_translation = st.text_area("Enter the corrected Darija translation here:")
if st.button("💾 Save Translation"):
if corrected_translation:
st.session_state.data.loc[st.session_state.data['sentence'] == st.session_state.orig_sentence, 'translation'] = corrected_translation
st.session_state.data.loc[st.session_state.data['sentence'] == st.session_state.orig_sentence, 'correction'] = corrected_translation
st.session_state.data.loc[st.session_state.data['sentence'] == st.session_state.orig_sentence, 'corrected'] = True
save_data(st.session_state.data)
st.success("Saved!")
# Update the sentence for the next iteration.
noncorrected_sentences = st.session_state.data[(st.session_state.data.translated == True) & (st.session_state.data.corrected == False)]['sentence'].tolist()
# noncorrected_sentences = st.session_state.data[st.session_state.data['corrected'] == False]['sentence'].tolist()
if noncorrected_sentences:
st.session_state.orig_sentence = random.choice(noncorrected_sentences)
st.session_state.orig_translation = st.session_state.data[st.session_state.data.sentence == st.session_state.orig_sentence]['translation']
else:
st.session_state.orig_translation = "No more sentences to be corrected"
corrected_translation = "" # Reset the input value after saving
st.button("⏩ Skip to the Next Pair", key="skip_button", on_click=skip_correction)