import base64 import json import os import modal import requests import streamlit as st from loguru import logger from pydub import AudioSegment # password = os.environ["PASSWORD"] run_transcription = modal.lookup("ffpub-transcription", "run_transcription") st.set_page_config(page_title="Speech to Text Transcription App") @st.cache def transcribe(url, audio_b64): return run_transcription.call(url=url, audio_b64=audio_b64) def run(): password = st.text_input("Zugriffscode (siehe oben)") audio_file = st.file_uploader( "Datei auswählen", type=[".wav", ".mp3", ".flac", ".m4a", ".ogg"] ) url = st.text_input( "URL (e.g. YouTube video, Dropbox file, etc.)", value="", ) # https://www.youtube.com/watch?v=pLAaQO1iPz0 submit_button = st.button( label="Transkribieren", disabled=(not audio_file and not url) ) if audio_file is not None: st.audio(audio_file) cutoff = None if password == "" else 60_000 audio_file = AudioSegment.from_file(audio_file)[:cutoff] audio_b64 = base64.b64encode(audio_file.export().read()).decode("ascii") if url: st.video(url) if submit_button: audio_b64 = None transcription = transcribe(url, audio_b64) for seg in transcription["text"].split("\n\n"): st.write(seg) st.json(transcription) if __name__ == "__main__": try: run() except Exception as e: logger.error(e) st.error( "Leider ist ein unerwarter Fehler aufgetreten. Ich könnte mir das Problem sofort ansehen, Sie erreichen mich unter alexander.seifert@gmail.com" )