Zeph27 commited on
Commit
b2fc243
1 Parent(s): 262ec8a

array input audio

Browse files
Files changed (2) hide show
  1. app.py +12 -7
  2. packages.txt +1 -0
app.py CHANGED
@@ -6,6 +6,7 @@ import google.generativeai as genai
6
  import re
7
  import torch
8
  from transformers import pipeline
 
9
  import time
10
  import spaces
11
 
@@ -54,14 +55,14 @@ def summarize_transcription(transcription, model, gemini_prompt):
54
  return f"Error summarizing transcription: {str(e)}"
55
 
56
  @spaces.GPU(duration=120)
57
- def process_audio(audio_file, pipe, language):
58
  print("Starting transcription...")
59
  if language:
60
  print(f"Using language: {language}")
61
- transcription = pipe(f"{audio_file}", batch_size=8, generate_kwargs={"task": "transcribe", "language": language}, return_timestamps=True)["text"]
62
  else:
63
  print("No language defined, using default language")
64
- transcription = pipe(f"{audio_file}", batch_size=8, generate_kwargs={"task": "transcribe"}, return_timestamps=True)["text"]
65
  return transcription
66
 
67
  def transcribe(youtube_url, audio_file, whisper_model, gemini_api_key, gemini_prompt, gemini_model_variant, language, progress=gr.Progress()):
@@ -72,15 +73,13 @@ def transcribe(youtube_url, audio_file, whisper_model, gemini_api_key, gemini_pr
72
  gemini_api_key = default_gemini_api_key
73
  model = configure_genai(gemini_api_key, gemini_model_variant)
74
 
75
- # device = 0 if torch.cuda.is_available() else "cpu"
76
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
77
  pipe = pipeline(
78
  task="automatic-speech-recognition",
79
  model=whisper_model,
80
  chunk_length_s=30,
81
  device=device,
82
  )
83
- pipe.model = pipe.model.to(device)
84
 
85
  if youtube_url:
86
  progress(0.1, desc="Extracting YouTube ID")
@@ -97,9 +96,15 @@ def transcribe(youtube_url, audio_file, whisper_model, gemini_api_key, gemini_pr
97
  progress(0.2, desc="Reading audio file")
98
  audio_file = f"{audio_file.name}"
99
  print(f"Audio file read: {audio_file}")
 
 
 
 
 
 
100
 
101
  progress(0.4, desc="Starting transcription")
102
- transcription = process_audio(audio_file, pipe, language)
103
 
104
  progress(0.6, desc="Cleaning up")
105
  # Delete the audio file after transcription
 
6
  import re
7
  import torch
8
  from transformers import pipeline
9
+ from transformers.pipelines.audio_utils import ffmpeg_read
10
  import time
11
  import spaces
12
 
 
55
  return f"Error summarizing transcription: {str(e)}"
56
 
57
  @spaces.GPU(duration=120)
58
+ def process_audio(inputs, pipe, language):
59
  print("Starting transcription...")
60
  if language:
61
  print(f"Using language: {language}")
62
+ transcription = pipe(inputs, batch_size=8, generate_kwargs={"task": "transcribe", "language": language}, return_timestamps=True)["text"]
63
  else:
64
  print("No language defined, using default language")
65
+ transcription = pipe(inputs, batch_size=8, generate_kwargs={"task": "transcribe"}, return_timestamps=True)["text"]
66
  return transcription
67
 
68
  def transcribe(youtube_url, audio_file, whisper_model, gemini_api_key, gemini_prompt, gemini_model_variant, language, progress=gr.Progress()):
 
73
  gemini_api_key = default_gemini_api_key
74
  model = configure_genai(gemini_api_key, gemini_model_variant)
75
 
76
+ device = 0 if torch.cuda.is_available() else "cpu"
 
77
  pipe = pipeline(
78
  task="automatic-speech-recognition",
79
  model=whisper_model,
80
  chunk_length_s=30,
81
  device=device,
82
  )
 
83
 
84
  if youtube_url:
85
  progress(0.1, desc="Extracting YouTube ID")
 
96
  progress(0.2, desc="Reading audio file")
97
  audio_file = f"{audio_file.name}"
98
  print(f"Audio file read: {audio_file}")
99
+
100
+ with open(audio_file, "rb") as f:
101
+ inputs = f.read()
102
+
103
+ inputs = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate)
104
+ inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
105
 
106
  progress(0.4, desc="Starting transcription")
107
+ transcription = process_audio(inputs, pipe, language)
108
 
109
  progress(0.6, desc="Cleaning up")
110
  # Delete the audio file after transcription
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ffmpeg