Zeph27 commited on
Commit
a80039f
1 Parent(s): b72617b

default to large v3

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -82,10 +82,10 @@ def process_audio(audio_file, language, whisper_model):
82
 
83
  if language:
84
  print(f"Using language: {language}")
85
- transcription = pipe(inputs, batch_size=8, generate_kwargs={"task": "transcribe", "language": language}, return_timestamps=True)["text"]
86
  else:
87
  print("No language defined, using default language")
88
- transcription = pipe(inputs, batch_size=8, generate_kwargs={"task": "transcribe"}, return_timestamps=True)["text"]
89
 
90
  end_time = time.time()
91
  processing_time = round(end_time - start_time, 2)
@@ -174,7 +174,7 @@ with gr.Blocks(theme='NoCrypt/miku') as demo:
174
  with gr.Row():
175
  youtube_url = gr.Textbox(label="YouTube URL", visible=False, info="Input the full URL of the YouTube video you want to transcribe and summarize. Example: https://www.youtube.com/watch?v=VIDEO_ID")
176
  audio_file = gr.File(label="Upload Audio File", visible=True, file_types=['.wav', '.flac', '.mp3'])
177
- whisper_model = gr.Dropdown(["openai/whisper-tiny", "openai/whisper-base", "openai/whisper-small", "openai/whisper-medium", "openai/whisper-large-v3", "distil-whisper/distil-large-v3"], label="Whisper Model", value="distil-whisper/distil-large-v3", info="Tiny is the fastest model, but it's not the best quality. large-v3 is the best quality, but it's the slowest model.")
178
  gemini_model_variant = gr.Dropdown(["gemini-1.5-flash", "gemini-1.5-pro"], label="Gemini Model Variant", value="gemini-1.5-pro", info="Gemini-1.5-flash is the fastest model, but it's not the best quality. Gemini-1.5-pro is the best quality, but it's slower")
179
  define_language = gr.Checkbox(label="Define Language", value=False, info="If you want to define the language, check this box")
180
  language = gr.Dropdown(["id","en", "es", "fr", "de", "it", "pt", "ru", "ja", "ko", "zh"], label="Language", value=None, info="Select the language for transcription", visible=False)
 
82
 
83
  if language:
84
  print(f"Using language: {language}")
85
+ transcription = pipe(inputs, batch_size=16, generate_kwargs={"task": "transcribe", "language": language}, return_timestamps=True)["text"]
86
  else:
87
  print("No language defined, using default language")
88
+ transcription = pipe(inputs, batch_size=16, generate_kwargs={"task": "transcribe"}, return_timestamps=True)["text"]
89
 
90
  end_time = time.time()
91
  processing_time = round(end_time - start_time, 2)
 
174
  with gr.Row():
175
  youtube_url = gr.Textbox(label="YouTube URL", visible=False, info="Input the full URL of the YouTube video you want to transcribe and summarize. Example: https://www.youtube.com/watch?v=VIDEO_ID")
176
  audio_file = gr.File(label="Upload Audio File", visible=True, file_types=['.wav', '.flac', '.mp3'])
177
+ whisper_model = gr.Dropdown(["openai/whisper-tiny", "openai/whisper-base", "openai/whisper-small", "openai/whisper-medium", "openai/whisper-large-v3"], label="Whisper Model", value="openai/whisper-large-v3", info="Tiny is the fastest model, but it's not the best quality. large-v3 is the best quality, but it's the slowest model.")
178
  gemini_model_variant = gr.Dropdown(["gemini-1.5-flash", "gemini-1.5-pro"], label="Gemini Model Variant", value="gemini-1.5-pro", info="Gemini-1.5-flash is the fastest model, but it's not the best quality. Gemini-1.5-pro is the best quality, but it's slower")
179
  define_language = gr.Checkbox(label="Define Language", value=False, info="If you want to define the language, check this box")
180
  language = gr.Dropdown(["id","en", "es", "fr", "de", "it", "pt", "ru", "ja", "ko", "zh"], label="Language", value=None, info="Select the language for transcription", visible=False)