import streamlit as st import numpy as np import torch from transformers import pipeline import librosa # Load the pipelines asr_pipe = pipeline("automatic-speech-recognition", model="alvanlii/whisper-small-cantonese") translation_pipe = pipeline("translation", model="raptorkwok/cantonese-chinese-translation") tts_pipe = pipeline("text-to-speech", model="myshell-ai/MeloTTS-Chinese") # Streamlit UI st.title("Cantonese to Chinese Translator") st.write("Upload your Cantonese audio file (WAV format) below.") # File upload uploaded_file = st.file_uploader("Choose a WAV file", type="wav") if uploaded_file is not None: # Load the audio file audio, sr = librosa.load(uploaded_file, sr=16000) # Recognize Cantonese speech audio_input = torch.tensor(audio) result = asr_pipe(audio_input) cantonese_text = result['text'] st.write(f"Cantonese Text: {cantonese_text}") # Translate Cantonese to Chinese chinese_text = translation_pipe(cantonese_text)[0]['translation_text'] st.write(f"Chinese Text: {chinese_text}") # Convert Chinese text to speech tts_output = tts_pipe(chinese_text) # Play back the Chinese output st.audio(tts_output['audio'], format='audio/wav')