Spaces:

ID2223-labs
/

romanian_parliament_transcription

Sleeping

fix

de84263 10 months ago

No virus

1 kB

	from transformers import pipeline
	import gradio as gr
	import time
	from video_downloader import download_video
	from moviepy.editor import AudioFileClip
	import datetime
	import os

	pipe = pipeline("automatic-speech-recognition", model="Artanis1551/whisper_romanian3")


	def process_video(date):
	# Parse the date to the format yyyymmdd
	date = datetime.datetime.strptime(date, "%Y-%m-%d").strftime("%Y%m%d")

	# Download the video
	video_path = download_video(date)

	# Extract audio from the video
	audio_path = f"audio_{date}.wav"
	AudioFileClip(video_path).write_audiofile(audio_path)

	# Transcribe the audio
	with open(audio_path, "rb") as audio_file:
	audio = audio_file.read()
	transcription = pipe(audio)["text"]

	# Remove the audio file
	os.remove(audio_path)

	return video_path, transcription


	iface = gr.Interface(
	fn=process_video,
	inputs="date",
	outputs=["video", "text"],
	title="Romanian Transcription Test",
	)

	iface.launch()