Spaces:
Sleeping
Sleeping
File size: 2,554 Bytes
92971a2 3d22a7a cd321a3 0019b71 92971a2 a3f2ade 92971a2 40fe5f9 1446129 1eed5b7 9e48b09 92971a2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
import gradio as gr
import soundfile as sf
import tempfile
import shutil
import os
import librosa
import time
import numpy as np
import subprocess
from pywhispercpp.model import Model
# model = Model('base.en', n_threads=6,models_dir="./Models") # Only english
# model = Model('base', n_threads=6,models_dir="./Models",language="hindi",translate=False) # Multilingual
model = Model('medium', n_threads=6,models_dir="./Models",language="hindi",translate=False) # Multilingual
def resample_to_16k(audio, orig_sr):
y_resampled = librosa.resample(y=audio, orig_sr=orig_sr, target_sr = 16000)
return y_resampled
def transcribe(audio):
print(type(audio))
sr,y = audio
y = y.astype(np.float32)
y /= np.max(np.abs(y))
y_resampled = resample_to_16k(y, sr)
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio:
temp_audio_path = temp_audio.name
sf.write(temp_audio_path, y_resampled, 16000)
start_time_py = time.time()
py_result = model.transcribe(f'{temp_audio_path}', n_threads=6)
end_time_py = time.time()
print("Py_result : ",py_result)
print("--------------------------")
print(f"Execution time using py: {end_time_py - start_time_py} seconds")
output_text = ""
for segment in py_result:
output_text+=segment.text
return output_text, (end_time_py - start_time_py)
demo = gr.Interface(
transcribe,
inputs = "microphone",
# gr.Audio(sources=["microphone"]),
outputs=[gr.Textbox(label="Py_Transcription"),gr.Textbox(label="Time taken for Transcription")],
# examples=["./Samples/Hindi_1.mp3","./Samples/Hindi_2.mp3","./Samples/Tamil_1.mp3","./Samples/Tamil_2.mp3","./Samples/Marathi_1.mp3","./Samples/Marathi_2.mp3","./Samples/Nepal_1.mp3","./Samples/Nepal_2.mp3","./Samples/Telugu_1.wav","./Samples/Telugu_2.wav","./Samples/Malayalam_1.wav","./Samples/Malayalam_2.wav","./Samples/Gujarati_1.wav","./Samples/Gujarati_2.wav","./Samples/Bengali_1.wav","./Samples/Bengali_2.wav"]
examples=["./Samples/Hindi_1.mp3","./Samples/Hindi_2.mp3","./Samples/Hindi_3.mp3","./Samples/Hindi_4.mp3","./Samples/Hindi_5.mp3"] # only hindi # ,"./Samples/Tamil_1.mp3","./Samples/Tamil_2.mp3","./Samples/Marathi_1.mp3","./Samples/Marathi_2.mp3","./Samples/Nepal_1.mp3","./Samples/Nepal_2.mp3","./Samples/Telugu_1.wav","./Samples/Telugu_2.wav","./Samples/Malayalam_1.wav","./Samples/Malayalam_2.wav","./Samples/Gujarati_1.wav","./Samples/Gujarati_2.wav","./Samples/Bengali_1.wav","./Samples/Bengali_2.wav"]
)
demo.launch() |