import streamlit as st import requests import os import PyPDF2 import docx import time #------------------------------------------------------------------------ # Configurations #------------------------------------------------------------------------ # Streamlit page setup st.set_page_config( page_title="Text Translator", page_icon=":speech_balloon:", layout="centered", initial_sidebar_state="auto", menu_items={ 'Get Help': 'mailto:info@mtss.ai', 'About': "This app is built to support translation tasks" } ) #------------------------------------------------------------------------ # Title #------------------------------------------------------------------------ # Set the title of the app st.title("Text Translator") # Description st.write(""" Choose a target language, enter your text or upload a document, and click **Translate** to get the translated text. """) #------------------------------------------------------------------------ # Sidebar #------------------------------------------------------------------------ with st.sidebar: # Password input field # password = st.text_input("Enter Password:", type="password") # Set the desired width in pixels image_width = 300 # Define the path to the image # image_path = "MTSSai_logo.png" # Display the image # st.image(image_path, width=image_width) # Set the title st.title("MTSS.ai") # Toggle for Help and Report a Bug with st.expander("Need help and report a bug"): st.write(""" **Contact**: Cheyne LeVesseur, PhD **Email**: info@mtss.ai """) st.divider() st.subheader('User Instructions') # Principles text with Markdown formatting User_Instructions = """ - **Step 1**: Provide either text input or upload a document for translation. - **Step 2**: Click Translate. - **Step 3**: Sit back, relax, and let the magic happen! """ st.markdown(User_Instructions) #------------------------------------------------------------------------ # Functions #------------------------------------------------------------------------ # Language to model mapping language_model_mapping = { "Spanish": "Helsinki-NLP/opus-mt-en-es", "Arabic": "Helsinki-NLP/opus-mt-en-ar", "Chinese": "Helsinki-NLP/opus-mt-en-zh", "Albanian": "Helsinki-NLP/opus-mt-en-sq", "French": "Helsinki-NLP/opus-mt-en-fr", "German": "Helsinki-NLP/opus-mt-en-de", "Japanese": "Helsinki-NLP/opus-mt-en-jap", "Italian": "Helsinki-NLP/opus-mt-en-it", "Dutch": "Helsinki-NLP/opus-mt-en-nl", "Hindi": "Helsinki-NLP/opus-mt-en-hi", "Russian": "Helsinki-NLP/opus-mt-en-ru", "Indonesian": "Helsinki-NLP/opus-mt-en-id", "Greek": "Helsinki-NLP/opus-mt-en-el", "Danish": "Helsinki-NLP/opus-mt-en-da", "Swedish": "Helsinki-NLP/opus-mt-en-sv", "Czech": "Helsinki-NLP/opus-mt-en-cs", "Catalan": "Helsinki-NLP/opus-mt-en-ca", "Bulgarian": "Helsinki-NLP/opus-mt-en-bg", "Estonian": "Helsinki-NLP/opus-mt-en-et", "Basque": "Helsinki-NLP/opus-mt-en-eu", "Vietnamese": "Helsinki-NLP/opus-mt-en-vi", "Finnish": "Helsinki-NLP/opus-mt-en-fi", "Hebrew": "Helsinki-NLP/opus-mt-en-he", "Azerbaijani": "Helsinki-NLP/opus-mt-en-az", "Afrikaans": "Helsinki-NLP/opus-mt-en-af", "Armenian": "Helsinki-NLP/opus-mt-en-hy", "Hungarian": "Helsinki-NLP/opus-mt-en-hu" } # Dropdown for language selection language = st.selectbox( "Select target language", list(language_model_mapping.keys()) ) # Input method selection input_option = st.radio("Select input method:", ("Text Input", "Upload Document")) input_text = "" # Functions to extract text from files def extract_text_from_pdf(pdf_file): try: pdf_reader = PyPDF2.PdfReader(pdf_file) text = "" for page_num in range(len(pdf_reader.pages)): page = pdf_reader.pages[page_num] extracted_text = page.extract_text() if extracted_text: text += extracted_text + "\n" return text except Exception as e: st.error(f"Error extracting text from PDF: {e}") return "" def extract_text_from_docx(docx_file): try: doc = docx.Document(docx_file) text = "" for para in doc.paragraphs: text += para.text + "\n" return text except Exception as e: st.error(f"Error extracting text from Word document: {e}") return "" # Text area or file uploader based on input method if input_option == "Text Input": input_text = st.text_area("Enter text to translate", height=200) elif input_option == "Upload Document": uploaded_file = st.file_uploader("Choose a file", type=["pdf", "docx"]) if uploaded_file is not None: file_extension = os.path.splitext(uploaded_file.name)[1].lower() if file_extension == ".pdf": with st.spinner("Extracting text from PDF..."): input_text = extract_text_from_pdf(uploaded_file) elif file_extension == ".docx": with st.spinner("Extracting text from Word document..."): input_text = extract_text_from_docx(uploaded_file) else: st.error("Unsupported file type.") input_text = "" # Function to split text into chunks def split_text_into_chunks(text, max_chunk_size): return [text[i:i+max_chunk_size] for i in range(0, len(text), max_chunk_size)] # Function to perform the translation with retry mechanism def translate_text(text, target_lang, max_retries=5, backoff_factor=2): model = language_model_mapping.get(target_lang) if not model: st.error("Unsupported language selected.") return None # Retrieve Hugging Face API key from environment variables hf_api_key = os.getenv('HF_API_KEY') if not hf_api_key: st.error("Hugging Face API key not set in environment variables.") return None API_URL = f"https://api-inference.huggingface.co/models/{model}" headers = { "Authorization": f"Bearer {hf_api_key}" # Use the API key from environment variables } # Split the text into manageable chunks max_chunk_size = 500 # Adjust based on API limitations text_chunks = split_text_into_chunks(text, max_chunk_size) translated_chunks = [] for chunk_index, chunk in enumerate(text_chunks): attempt = 0 while attempt < max_retries: payload = { "inputs": chunk, } try: response = requests.post(API_URL, headers=headers, json=payload) if response.status_code == 503: # Service Unavailable, retry after delay attempt += 1 wait_time = backoff_factor ** attempt time.sleep(wait_time) continue response.raise_for_status() # Raise an error for bad status codes result = response.json() # Handle possible errors from the API if isinstance(result, dict) and result.get("error"): st.error(f"Error from translation API: {result['error']}") return None # The API might return a list of translations if isinstance(result, list) and len(result) > 0: translated_text = result[0].get("translation_text", "No translation found.") elif isinstance(result, dict) and "translation_text" in result: translated_text = result["translation_text"] else: translated_text = "Unexpected response format from the API." translated_chunks.append(translated_text) break # Exit the retry loop if successful except requests.exceptions.RequestException as e: attempt += 1 wait_time = backoff_factor ** attempt time.sleep(wait_time) else: # All retry attempts failed for this chunk st.error(f"Failed to translate chunk {chunk_index + 1} after {max_retries} attempts.") return None return " ".join(translated_chunks) # Translate button if st.button("Translate"): if not input_text.strip(): st.warning("Please enter some text to translate.") else: with st.spinner("Translation service loading..."): translated = translate_text(input_text, language) if translated: st.subheader("Translated Text:") st.write(translated) else: st.error("Translation failed. Please try again later.")