Spaces:

mlnsio
/

videoChat

Sleeping

ns-devel

Added youtube url option

38efda5 10 months ago

1.74 kB

	import os
	import requests
	import torch
	from transformers import pipeline
	from settings import HF_API_URL, DATA_DIR
	from pathlib import Path
	from moviepy.editor import VideoFileClip

	def convert_video_to_wav(video_path, output_path):
	"""
	Converts a video file to a WAV audio file.

	Args:
	video_path (str): The path of the video file to be converted.
	output_path (str): The desired path for the output WAV audio file.

	Returns:
	None
	"""
	video_clip = VideoFileClip(video_path)
	audio_clip = video_clip.audio
	audio_clip.write_audiofile(output_path)

	def get_transcript1(filepath):
	audio_file = Path(DATA_DIR).joinpath(Path(filepath).stem + ".wav")
	print(audio_file)
	if not audio_file.exists():
	convert_video_to_wav(filepath, audio_file)
	headers = {"Authorization": f"Bearer {os.environ['HF_KEY']}"}
	with open(audio_file, "rb") as f:
	data = f.read()
	response = requests.post(HF_API_URL, headers=headers,
	data=data)
	print(response, response.json())
	return response.json()["text"]

	def get_transcript(url):
	"""
	Converts a audio file to text and provides corresponding time stamps.
	"""

	# Model to find wav to text and time stamps
	device = "cuda:0" if torch.cuda.is_available() else "cpu"
	pipe = pipeline(
	"automatic-speech-recognition", model="openai/whisper-base", device=device
	)

	file_data = pipe(
	url,
	max_new_tokens=256,
	generate_kwargs={"task": "transcribe"},
	chunk_length_s=30,
	batch_size=8,
	return_timestamps=True,
	)["chunks"]
	text = ""
	for doc in file_data:
	text += doc["text"]
	return text