File size: 2,532 Bytes
fac06d0
 
 
1936f1e
de84263
e4297a8
de84263
 
4b85b27
 
fac06d0
1936f1e
fac06d0
 
1936f1e
 
de84263
1936f1e
 
 
de84263
4b85b27
 
 
 
 
 
 
 
 
 
1936f1e
4b85b27
de84263
 
 
1936f1e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
be37f4c
fac06d0
534a7d7
de84263
1936f1e
 
 
 
 
306a78c
 
536f3e9
306a78c
1936f1e
534a7d7
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
from transformers import pipeline
import gradio as gr
import time
from video_downloader import download_video, download_video1
from moviepy.editor import AudioFileClip
from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip
import datetime
import os
from pydub import AudioSegment
from pydub.silence import split_on_silence

pipe = pipeline("automatic-speech-recognition", model="Sleepyp00/whisper-small-Swedish")


def process_video(from_date, to_date):
    video_path = download_video1(from_date, to_date)

    # Extract audio from the video
    audio_path = f"audio_{from_date}_{to_date}.wav"
    AudioFileClip(video_path).write_audiofile(audio_path)

    # Split the audio into chunks
    audio = AudioSegment.from_wav(audio_path)
    chunks = split_on_silence(audio, min_silence_len=500, silence_thresh=-40)

    # Transcribe each chunk
    transcription = ""
    for i, chunk in enumerate(chunks):
        chunk.export(f"chunk{i}.wav", format="wav")
        with open(f"chunk{i}.wav", "rb") as audio_file:
            audio = audio_file.read()
        transcription += pipe(audio)["text"] + "\n\n"
        os.remove(f"chunk{i}.wav")

    # Remove the audio file
    os.remove(audio_path)

    return video_path, transcription


# def process_video(date):
#     # Download the video
#     video_path = download_video(date)

#     # Extract audio from the video
#     audio_path = f"audio_{date}.wav"
#     AudioFileClip(video_path).write_audiofile(audio_path)

#     # Split the audio into chunks
#     audio = AudioSegment.from_wav(audio_path)
#     chunks = split_on_silence(audio, min_silence_len=500, silence_thresh=-40)

#     # Transcribe each chunk
#     transcription = ""
#     for i, chunk in enumerate(chunks):
#         chunk.export(f"chunk{i}.wav", format="wav")
#         with open(f"chunk{i}.wav", "rb") as audio_file:
#             audio = audio_file.read()
#         transcription += pipe(audio)["text"] + " "
#         os.remove(f"chunk{i}.wav")

#     # Remove the audio file
#     os.remove(audio_path)

#     return video_path, transcription


iface = gr.Interface(
    fn=process_video,
    # inputs=gr.inputs.Textbox(label="Date with format YYYYMMDD"),
    inputs=[
        gr.inputs.Textbox(label="From date with format YYYY-MM-DD"),
        gr.inputs.Textbox(label="Date with format YYYY-MM-DD"),
    ],
    outputs=[
        gr.outputs.Video(),
        gr.Textbox(lines=1000, max_lines=1000, interactive=True),
    ],
    live=True,
    title="Romanian Transcription Test",
)

iface.launch()