spookyspaghetti commited on
Commit
964152f
1 Parent(s): 82e9b6b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -53
app.py CHANGED
@@ -1,55 +1,3 @@
1
- pip install -U git+https://github.com/PrithivirajDamodaran/Gramformer.git
2
- pip install gradio -q
3
- ## Install dependencies
4
- pip install wget
5
- apt-get install sox libsndfile1 ffmpeg
6
- pip install text-unidecode
7
- pip install matplotlib>=3.3.2
8
-
9
- ## Install NeMo
10
- BRANCH = 'r1.13.0'
11
- python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]
12
-
13
- ## Grab the config we'll use in this example
14
- mkdir configs
15
- wget -P configs/ https://raw.githubusercontent.com/NVIDIA/NeMo/$BRANCH/examples/asr/conf/config.yaml
16
-
17
- python -m spacy download en_core_web_md
18
- python -m spacy link en_core_web_md en
19
-
20
  import gradio as gr
21
- import time
22
- from nemo.collections.asr.models import ASRModel
23
- import torch
24
- if torch.cuda.is_available():
25
- device = torch.device(f'cuda:0')
26
- asr_model = ASRModel.from_pretrained(model_name='stt_en_citrinet_1024')
27
-
28
- from gramformer import Gramformer
29
- import torch
30
-
31
- def set_seed(seed):
32
- torch.manual_seed(seed)
33
- if torch.cuda.is_available():
34
- torch.cuda.manual_seed_all(seed)
35
-
36
- set_seed(1212)
37
-
38
- gf = Gramformer(models = 1, use_gpu=False) # 1=corrector, 2=detector
39
-
40
- def transcribe(audio):
41
- """Speech to text using Nvidia Nemo"""
42
- text = asr_model.transcribe(paths2audio_files=[audio])[0]
43
- correct = list(gf.correct(text, max_candidates = 1))[0]
44
- return text, correct
45
 
46
- # we need input, output and interface components for gradio
47
- gr.Interface(
48
- fn=transcribe,
49
- inputs=[
50
- gr.components.Audio(type="filepath"),
51
- ],
52
- outputs=[
53
- "textbox",
54
- "textbox"
55
- ]).launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
+ gr.Interface.load("models/nvidia/stt_en_citrinet_1024_gamma_0_25").launch()