FarhadMadadzade commited on
Commit
292ce47
1 Parent(s): 23d6d67

trying base model with romanian

Browse files
Files changed (1) hide show
  1. app.py +38 -29
app.py CHANGED
@@ -9,10 +9,10 @@ import os
9
  from pydub import AudioSegment
10
  from pydub.silence import split_on_silence
11
 
12
- pipe = pipeline("automatic-speech-recognition", model="Sleepyp00/whisper-small-Swedish")
13
 
14
 
15
- def process_video(from_date, to_date):
16
  video_path = download_video1(from_date, to_date)
17
 
18
  # Extract audio from the video
@@ -29,7 +29,7 @@ def process_video(from_date, to_date):
29
  chunk.export(f"chunk{i}.wav", format="wav")
30
  with open(f"chunk{i}.wav", "rb") as audio_file:
31
  audio = audio_file.read()
32
- transcription += pipe(audio)["text"] + "\n\n"
33
  os.remove(f"chunk{i}.wav")
34
 
35
  # Remove the audio file
@@ -38,45 +38,54 @@ def process_video(from_date, to_date):
38
  return video_path, transcription
39
 
40
 
41
- # def process_video(date):
42
- # # Download the video
43
- # video_path = download_video(date)
44
 
45
- # # Extract audio from the video
46
- # audio_path = f"audio_{date}.wav"
47
- # AudioFileClip(video_path).write_audiofile(audio_path)
48
 
49
- # # Split the audio into chunks
50
- # audio = AudioSegment.from_wav(audio_path)
51
- # chunks = split_on_silence(audio, min_silence_len=500, silence_thresh=-40)
52
 
53
- # # Transcribe each chunk
54
- # transcription = ""
55
- # for i, chunk in enumerate(chunks):
56
- # chunk.export(f"chunk{i}.wav", format="wav")
57
- # with open(f"chunk{i}.wav", "rb") as audio_file:
58
- # audio = audio_file.read()
59
- # transcription += pipe(audio)["text"] + " "
60
- # os.remove(f"chunk{i}.wav")
 
 
 
61
 
62
- # # Remove the audio file
63
- # os.remove(audio_path)
64
 
65
- # return video_path, transcription
66
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
  iface = gr.Interface(
69
  fn=process_video,
70
- # inputs=gr.inputs.Textbox(label="Date with format YYYYMMDD"),
71
- inputs=[
72
- gr.inputs.Textbox(label="From date with format YYYY-MM-DD"),
73
- gr.inputs.Textbox(label="Date with format YYYY-MM-DD"),
74
- ],
75
  outputs=[
76
  gr.outputs.Video(),
77
  gr.Textbox(lines=1000, max_lines=1000, interactive=True),
78
  ],
79
- title="Swedish Transcription Test",
80
  )
81
 
82
  iface.launch()
 
9
  from pydub import AudioSegment
10
  from pydub.silence import split_on_silence
11
 
12
+ pipe = pipeline("automatic-speech-recognition", model="openai/whisper-small")
13
 
14
 
15
+ def process_video1(from_date, to_date):
16
  video_path = download_video1(from_date, to_date)
17
 
18
  # Extract audio from the video
 
29
  chunk.export(f"chunk{i}.wav", format="wav")
30
  with open(f"chunk{i}.wav", "rb") as audio_file:
31
  audio = audio_file.read()
32
+ transcription += pipe(audio)["text"] + "\n "
33
  os.remove(f"chunk{i}.wav")
34
 
35
  # Remove the audio file
 
38
  return video_path, transcription
39
 
40
 
41
+ def process_video(date):
42
+ # Download the video
43
+ video_path = download_video(date)
44
 
45
+ # Extract audio from the video
46
+ audio_path = f"audio_{date}.wav"
47
+ AudioFileClip(video_path).write_audiofile(audio_path)
48
 
49
+ # Split the audio into chunks
50
+ audio = AudioSegment.from_wav(audio_path)
51
+ chunks = split_on_silence(audio, min_silence_len=500, silence_thresh=-40)
52
 
53
+ # Transcribe each chunk
54
+ transcription = ""
55
+ for i, chunk in enumerate(chunks):
56
+ chunk.export(f"chunk{i}.wav", format="wav")
57
+ with open(f"chunk{i}.wav", "rb") as audio_file:
58
+ audio = audio_file.read()
59
+ transcription += pipe(audio)["text"] + " "
60
+ os.remove(f"chunk{i}.wav")
61
+
62
+ # Remove the audio file
63
+ os.remove(audio_path)
64
 
65
+ return video_path, transcription
 
66
 
 
67
 
68
+ # iface = gr.Interface(
69
+ # fn=process_video1,
70
+ # inputs=[
71
+ # gr.inputs.Textbox(label="From date with format YYYY-MM-DD"),
72
+ # gr.inputs.Textbox(label="Date with format YYYY-MM-DD"),
73
+ # ],
74
+ # outputs=[
75
+ # gr.outputs.Video(),
76
+ # gr.Textbox(lines=1000, max_lines=1000, interactive=True),
77
+ # ],
78
+ # title="Swedish Transcription Test",
79
+ # )
80
 
81
  iface = gr.Interface(
82
  fn=process_video,
83
+ inputs=gr.inputs.Textbox(label="Date with format YYYYMMDD"),
 
 
 
 
84
  outputs=[
85
  gr.outputs.Video(),
86
  gr.Textbox(lines=1000, max_lines=1000, interactive=True),
87
  ],
88
+ title="Romanian Transcription Test",
89
  )
90
 
91
  iface.launch()