Zeph27 commited on
Commit
e502065
1 Parent(s): 9d8380b

change pipe line code

Browse files
Files changed (1) hide show
  1. app.py +22 -15
app.py CHANGED
@@ -15,6 +15,14 @@ default_gemini_api_key = os.getenv('gemini_api_key')
15
 
16
  device = 0 if torch.cuda.is_available() else "cpu"
17
 
 
 
 
 
 
 
 
 
18
  def configure_genai(api_key, model_variant):
19
  genai.configure(api_key=api_key)
20
  return genai.GenerativeModel(model_variant)
@@ -57,8 +65,20 @@ def summarize_transcription(transcription, model, gemini_prompt):
57
  return f"Error summarizing transcription: {str(e)}"
58
 
59
  @spaces.GPU(duration=120)
60
- def process_audio(inputs, pipe, language):
61
  print("Starting transcription...")
 
 
 
 
 
 
 
 
 
 
 
 
62
  if language:
63
  print(f"Using language: {language}")
64
  transcription = pipe(inputs, batch_size=8, generate_kwargs={"task": "transcribe", "language": language}, return_timestamps=True)["text"]
@@ -75,13 +95,6 @@ def transcribe(youtube_url, audio_file, whisper_model, gemini_api_key, gemini_pr
75
  gemini_api_key = default_gemini_api_key
76
  model = configure_genai(gemini_api_key, gemini_model_variant)
77
 
78
- pipe = pipeline(
79
- task="automatic-speech-recognition",
80
- model=whisper_model,
81
- chunk_length_s=30,
82
- device=device,
83
- )
84
-
85
  if youtube_url:
86
  progress(0.1, desc="Extracting YouTube ID")
87
  youtube_id = extract_youtube_id(youtube_url)
@@ -97,15 +110,9 @@ def transcribe(youtube_url, audio_file, whisper_model, gemini_api_key, gemini_pr
97
  progress(0.2, desc="Reading audio file")
98
  audio_file = f"{audio_file.name}"
99
  print(f"Audio file read: {audio_file}")
100
-
101
- with open(audio_file, "rb") as f:
102
- inputs = f.read()
103
-
104
- inputs = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate)
105
- inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
106
 
107
  progress(0.4, desc="Starting transcription")
108
- transcription = process_audio(inputs, pipe, language)
109
 
110
  progress(0.6, desc="Cleaning up")
111
  # Delete the audio file after transcription
 
15
 
16
  device = 0 if torch.cuda.is_available() else "cpu"
17
 
18
+ def load_pipeline(model_name):
19
+ return pipeline(
20
+ task="automatic-speech-recognition",
21
+ model=model_name,
22
+ chunk_length_s=30,
23
+ device=device,
24
+ )
25
+
26
  def configure_genai(api_key, model_variant):
27
  genai.configure(api_key=api_key)
28
  return genai.GenerativeModel(model_variant)
 
65
  return f"Error summarizing transcription: {str(e)}"
66
 
67
  @spaces.GPU(duration=120)
68
+ def process_audio(audio_file, language):
69
  print("Starting transcription...")
70
+
71
+ with open(audio_file, "rb") as f:
72
+ inputs = f.read()
73
+
74
+ inputs = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate)
75
+ inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
76
+
77
+ if device == 0:
78
+ pipe = load_pipeline(whisper_model)
79
+ else:
80
+ pipe = load_pipeline("openai/whisper-tiny")
81
+
82
  if language:
83
  print(f"Using language: {language}")
84
  transcription = pipe(inputs, batch_size=8, generate_kwargs={"task": "transcribe", "language": language}, return_timestamps=True)["text"]
 
95
  gemini_api_key = default_gemini_api_key
96
  model = configure_genai(gemini_api_key, gemini_model_variant)
97
 
 
 
 
 
 
 
 
98
  if youtube_url:
99
  progress(0.1, desc="Extracting YouTube ID")
100
  youtube_id = extract_youtube_id(youtube_url)
 
110
  progress(0.2, desc="Reading audio file")
111
  audio_file = f"{audio_file.name}"
112
  print(f"Audio file read: {audio_file}")
 
 
 
 
 
 
113
 
114
  progress(0.4, desc="Starting transcription")
115
+ transcription = process_audio(audio_file, language)
116
 
117
  progress(0.6, desc="Cleaning up")
118
  # Delete the audio file after transcription