yasserrmd's picture
Update app.py
85e4503 verified
raw
history blame
1.58 kB
import gradio as gr
import torch
from wenet.cli.model import load_model
from huggingface_hub import hf_hub_download
import spaces
REPO_ID = "Revai/reverb-asr"
files = ['reverb_asr_v1.jit.zip', 'tk.units.txt']
downloaded_files = [hf_hub_download(repo_id=REPO_ID, filename=f) for f in files]
model = load_model(downloaded_files[0], downloaded_files[1])
def process_cat_embs(cat_embs):
device = "gpu"
cat_embs = torch.tensor([float(c) for c in cat_embs.split(',')]).to(device)
return cat_embs
@spaces.GPU
def recognition(audio, style=0):
if not audio:
return "Input Error! Please enter one audio!"
cat_embs = process_cat_embs(f'{style},{1-style}')
result = model.transcribe(audio, cat_embs=cat_embs)
if not result or 'text' not in result:
return "ERROR! No text output! Please try again!"
text_output = result['text'].replace('▁', ' ')
return text_output
# Gradio UI Components
inputs = [
gr.Audio(type="filepath", label='Input audio'),
gr.Slider(0, 1, value=0, label="Transcription Style", info="Adjust between non-verbatim (0) and verbatim (1) transcription")
]
output = gr.Textbox(label="Output Text")
# UI and Interface
iface = gr.Interface(
fn=recognition,
inputs=inputs,
outputs=output,
title="Reverb ASR Transcription",
description="Supports verbatim and non-verbatim transcription styles.",
article="<p style='text-align: center'><a href='https://rev.com' target='_blank'>Learn more about Rev</a></p>",
theme='huggingface'
)
iface.launch(enable_queue=True)