yasserrmd commited on
Commit
3d435c7
1 Parent(s): 85e4503

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -21
app.py CHANGED
@@ -9,10 +9,10 @@ files = ['reverb_asr_v1.jit.zip', 'tk.units.txt']
9
  downloaded_files = [hf_hub_download(repo_id=REPO_ID, filename=f) for f in files]
10
  model = load_model(downloaded_files[0], downloaded_files[1])
11
 
12
- def process_cat_embs(cat_embs):
13
- device = "gpu"
14
- cat_embs = torch.tensor([float(c) for c in cat_embs.split(',')]).to(device)
15
- return cat_embs
16
 
17
 
18
  @spaces.GPU
@@ -30,23 +30,22 @@ def recognition(audio, style=0):
30
  return text_output
31
 
32
 
33
- # Gradio UI Components
34
- inputs = [
35
- gr.Audio(type="filepath", label='Input audio'),
36
- gr.Slider(0, 1, value=0, label="Transcription Style", info="Adjust between non-verbatim (0) and verbatim (1) transcription")
37
- ]
38
 
39
- output = gr.Textbox(label="Output Text")
 
 
 
 
 
40
 
41
- # UI and Interface
42
  iface = gr.Interface(
43
- fn=recognition,
44
- inputs=inputs,
45
- outputs=output,
46
- title="Reverb ASR Transcription",
47
- description="Supports verbatim and non-verbatim transcription styles.",
48
- article="<p style='text-align: center'><a href='https://rev.com' target='_blank'>Learn more about Rev</a></p>",
49
- theme='huggingface'
50
- )
51
-
52
- iface.launch(enable_queue=True)
 
9
  downloaded_files = [hf_hub_download(repo_id=REPO_ID, filename=f) for f in files]
10
  model = load_model(downloaded_files[0], downloaded_files[1])
11
 
12
+ def process_style_embedding(style):
13
+ device = torch.device("cpu")
14
+ embedding = torch.tensor([style, 1 - style], device=device)
15
+ return embedding
16
 
17
 
18
  @spaces.GPU
 
30
  return text_output
31
 
32
 
 
 
 
 
 
33
 
34
+ audio_input = gr.Audio(type="filepath", label="Upload or Record Audio")
35
+ style_slider = gr.Slider(0, 1, value=0, step=0.1, label="Transcription Style",
36
+ info="Adjust the transcription style: 0 (casual) to 1 (formal).")
37
+ output_textbox = gr.Textbox(label="Transcription Output")
38
+
39
+ description = "This tool transcribes audio using a customizable transcription style ranging from casual to formal. Upload or record an audio file to begin."
40
 
 
41
  iface = gr.Interface(
42
+ fn=transcribe_audio,
43
+ inputs=[audio_input, style_slider],
44
+ outputs=output_textbox,
45
+ title="Audio Transcription with Style Control",
46
+ description=description,
47
+ theme="default"
48
+ )
49
+
50
+
51
+ iface.launch()