import os import requests import torch from transformers import pipeline from settings import HF_API_URL, DATA_DIR from pathlib import Path from moviepy.editor import VideoFileClip def convert_video_to_wav(video_path, output_path): """ Converts a video file to a WAV audio file. Args: video_path (str): The path of the video file to be converted. output_path (str): The desired path for the output WAV audio file. Returns: None """ video_clip = VideoFileClip(video_path) audio_clip = video_clip.audio audio_clip.write_audiofile(output_path) def get_transcript1(filepath): audio_file = Path(DATA_DIR).joinpath(Path(filepath).stem + ".wav") print(audio_file) if not audio_file.exists(): convert_video_to_wav(filepath, audio_file) headers = {"Authorization": f"Bearer {os.environ['HF_KEY']}"} with open(audio_file, "rb") as f: data = f.read() response = requests.post(HF_API_URL, headers=headers, data=data) print(response, response.json()) return response.json()["text"] def get_transcript(url): """ Converts a audio file to text and provides corresponding time stamps. """ # Model to find wav to text and time stamps device = "cuda:0" if torch.cuda.is_available() else "cpu" pipe = pipeline( "automatic-speech-recognition", model="openai/whisper-base", device=device ) file_data = pipe( url, max_new_tokens=256, generate_kwargs={"task": "transcribe"}, chunk_length_s=30, batch_size=8, return_timestamps=True, )["chunks"] text = "" for doc in file_data: text += doc["text"] return text