Davidsamuel101 commited on
Commit
4b76aa5
1 Parent(s): 245d3ad

Add Gradio Codes

Browse files
Files changed (4) hide show
  1. .gitignore +163 -0
  2. app.py +225 -0
  3. packages.txt +1 -0
  4. requirements.txt +3 -0
.gitignore ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .DS_Store
2
+ sherpa-ncnn-pruned-transducer-stateless7-streaming-id/
3
+
4
+ # Byte-compiled / optimized / DLL files
5
+ __pycache__/
6
+ *.py[cod]
7
+ *$py.class
8
+
9
+ # C extensions
10
+ *.so
11
+
12
+ # Distribution / packaging
13
+ .Python
14
+ build/
15
+ develop-eggs/
16
+ dist/
17
+ downloads/
18
+ eggs/
19
+ .eggs/
20
+ lib/
21
+ lib64/
22
+ parts/
23
+ sdist/
24
+ var/
25
+ wheels/
26
+ share/python-wheels/
27
+ *.egg-info/
28
+ .installed.cfg
29
+ *.egg
30
+ MANIFEST
31
+
32
+ # PyInstaller
33
+ # Usually these files are written by a python script from a template
34
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
35
+ *.manifest
36
+ *.spec
37
+
38
+ # Installer logs
39
+ pip-log.txt
40
+ pip-delete-this-directory.txt
41
+
42
+ # Unit test / coverage reports
43
+ htmlcov/
44
+ .tox/
45
+ .nox/
46
+ .coverage
47
+ .coverage.*
48
+ .cache
49
+ nosetests.xml
50
+ coverage.xml
51
+ *.cover
52
+ *.py,cover
53
+ .hypothesis/
54
+ .pytest_cache/
55
+ cover/
56
+
57
+ # Translations
58
+ *.mo
59
+ *.pot
60
+
61
+ # Django stuff:
62
+ *.log
63
+ local_settings.py
64
+ db.sqlite3
65
+ db.sqlite3-journal
66
+
67
+ # Flask stuff:
68
+ instance/
69
+ .webassets-cache
70
+
71
+ # Scrapy stuff:
72
+ .scrapy
73
+
74
+ # Sphinx documentation
75
+ docs/_build/
76
+
77
+ # PyBuilder
78
+ .pybuilder/
79
+ target/
80
+
81
+ # Jupyter Notebook
82
+ .ipynb_checkpoints
83
+
84
+ # IPython
85
+ profile_default/
86
+ ipython_config.py
87
+
88
+ # pyenv
89
+ # For a library or package, you might want to ignore these files since the code is
90
+ # intended to run in multiple environments; otherwise, check them in:
91
+ # .python-version
92
+
93
+ # pipenv
94
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
95
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
96
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
97
+ # install all needed dependencies.
98
+ #Pipfile.lock
99
+
100
+ # poetry
101
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
102
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
103
+ # commonly ignored for libraries.
104
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
105
+ #poetry.lock
106
+
107
+ # pdm
108
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
109
+ #pdm.lock
110
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
111
+ # in version control.
112
+ # https://pdm.fming.dev/#use-with-ide
113
+ .pdm.toml
114
+
115
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
116
+ __pypackages__/
117
+
118
+ # Celery stuff
119
+ celerybeat-schedule
120
+ celerybeat.pid
121
+
122
+ # SageMath parsed files
123
+ *.sage.py
124
+
125
+ # Environments
126
+ .env
127
+ .venv
128
+ env/
129
+ venv/
130
+ ENV/
131
+ env.bak/
132
+ venv.bak/
133
+
134
+ # Spyder project settings
135
+ .spyderproject
136
+ .spyproject
137
+
138
+ # Rope project settings
139
+ .ropeproject
140
+
141
+ # mkdocs documentation
142
+ /site
143
+
144
+ # mypy
145
+ .mypy_cache/
146
+ .dmypy.json
147
+ dmypy.json
148
+
149
+ # Pyre type checker
150
+ .pyre/
151
+
152
+ # pytype static type analyzer
153
+ .pytype/
154
+
155
+ # Cython debug symbols
156
+ cython_debug/
157
+
158
+ # PyCharm
159
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
160
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
161
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
162
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
163
+ #.idea/
app.py ADDED
@@ -0,0 +1,225 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import librosa
2
+ import sherpa_ncnn
3
+ import os
4
+ import time
5
+ import gradio as gr
6
+ import numpy as np
7
+
8
+ from functools import lru_cache
9
+ from pathlib import Path
10
+ from huggingface_hub import Repository
11
+
12
+ AUTH_TOKEN = os.getenv("AUTH_TOKEN")
13
+
14
+ language_to_models = {
15
+ "id": [
16
+ "bookbot/sherpa-ncnn-pruned-transducer-stateless7-streaming-id",
17
+ ],
18
+ }
19
+
20
+ language_choices = list(language_to_models.keys())
21
+
22
+ streaming_recognizer = None
23
+
24
+
25
+ def recognize(
26
+ language: str,
27
+ repo_id: str,
28
+ decoding_method: str,
29
+ num_active_paths: int,
30
+ in_filename: str,
31
+ ):
32
+ recognizer = get_pretrained_model(
33
+ repo_id,
34
+ decoding_method=decoding_method,
35
+ num_active_paths=num_active_paths,
36
+ )
37
+
38
+ audio, sr = librosa.load(in_filename, sr=16_000)
39
+ samples_per_read = int(0.32 * sr)
40
+ recognized_text = ""
41
+
42
+ for i in range(0, len(audio), samples_per_read):
43
+ chunk = audio[i : i + samples_per_read]
44
+ recognizer.accept_waveform(sr, chunk)
45
+ transcript = recognizer.text
46
+ if transcript:
47
+ recognized_text = transcript
48
+
49
+ tail_paddings = np.zeros(int(recognizer.sample_rate * 0.5), dtype=np.float32)
50
+ recognizer.accept_waveform(recognizer.sample_rate, tail_paddings)
51
+
52
+ recognizer.input_finished()
53
+ transcript = recognizer.text
54
+ if transcript:
55
+ recognized_text = transcript
56
+
57
+ return recognized_text
58
+
59
+
60
+ def initialize_streaming_model(
61
+ repo_id: str, decoding_method: str, num_active_paths: int
62
+ ):
63
+ streaming_recognizer = get_pretrained_model(
64
+ repo_id, decoding_method, num_active_paths
65
+ )
66
+ print("Re-intialized model!")
67
+
68
+
69
+ @lru_cache(maxsize=10)
70
+ def get_pretrained_model(repo_id: str, decoding_method: str, num_active_paths: int):
71
+ model_name = Path(repo_id.split("/")[-1])
72
+ _ = Repository(
73
+ local_dir=model_name,
74
+ clone_from=repo_id,
75
+ token=AUTH_TOKEN,
76
+ )
77
+
78
+ return sherpa_ncnn.Recognizer(
79
+ tokens=str(model_name / "tokens.txt"),
80
+ encoder_param=str(model_name / "encoder_jit_trace-pnnx.ncnn.param"),
81
+ encoder_bin=str(model_name / "encoder_jit_trace-pnnx.ncnn.bin"),
82
+ decoder_param=str(model_name / "decoder_jit_trace-pnnx.ncnn.param"),
83
+ decoder_bin=str(model_name / "decoder_jit_trace-pnnx.ncnn.bin"),
84
+ joiner_param=str(model_name / "joiner_jit_trace-pnnx.ncnn.param"),
85
+ joiner_bin=str(model_name / "joiner_jit_trace-pnnx.ncnn.bin"),
86
+ num_threads=os.cpu_count(),
87
+ decoding_method=decoding_method,
88
+ num_active_paths=num_active_paths,
89
+ enable_endpoint_detection=True,
90
+ rule1_min_trailing_silence=30,
91
+ rule2_min_trailing_silence=30,
92
+ rule3_min_utterance_length=30,
93
+ )
94
+
95
+
96
+ def process_uploaded_file(
97
+ language: str,
98
+ repo_id: str,
99
+ decoding_method: str,
100
+ num_active_paths: int,
101
+ in_filename: str,
102
+ ):
103
+ return recognize(
104
+ in_filename=in_filename,
105
+ language=language,
106
+ repo_id=repo_id,
107
+ decoding_method=decoding_method,
108
+ num_active_paths=num_active_paths,
109
+ )
110
+
111
+
112
+ def recognize_audio_from_mic(
113
+ in_filename: str,
114
+ state: str,
115
+ ):
116
+ audio, sr = librosa.load(in_filename, sr=16_000)
117
+ streaming_recognizer.accept_waveform(sr, audio)
118
+ time.sleep(0.32)
119
+ transcript = streaming_recognizer.text
120
+ if transcript:
121
+ state = transcript
122
+ return state, state
123
+
124
+
125
+ def update_model_dropdown(language: str):
126
+ if language in language_to_models:
127
+ choices = language_to_models[language]
128
+ return gr.Dropdown.update(choices=choices, value=choices[0])
129
+ raise ValueError(f"Unsupported language: {language}")
130
+
131
+
132
+ with gr.Blocks() as demo:
133
+ gr.Markdown("# Automatic Speech Recognition with Next-gen Kaldi")
134
+
135
+ language_radio = gr.Radio(
136
+ label="Language", choices=language_choices, value=language_choices[0]
137
+ )
138
+ model_dropdown = gr.Dropdown(
139
+ choices=language_to_models[language_choices[0]],
140
+ label="Select a model",
141
+ value=language_to_models[language_choices[0]][0],
142
+ )
143
+
144
+ language_radio.change(
145
+ update_model_dropdown,
146
+ inputs=language_radio,
147
+ outputs=model_dropdown,
148
+ )
149
+
150
+ decoding_method_radio = gr.Radio(
151
+ label="Decoding method",
152
+ choices=["greedy_search", "modified_beam_search"],
153
+ value="greedy_search",
154
+ )
155
+
156
+ num_active_paths_slider = gr.Slider(
157
+ minimum=1,
158
+ value=4,
159
+ step=1,
160
+ label="Number of active paths for modified_beam_search",
161
+ )
162
+
163
+ with gr.Tab("File Upload"):
164
+ uploaded_file = gr.Audio(
165
+ source="upload", # Choose between "microphone", "upload"
166
+ type="filepath",
167
+ label="Upload audio file",
168
+ )
169
+ uploaded_output = gr.Textbox(label="Recognized speech from uploaded file")
170
+ with gr.Row():
171
+ upload_button = gr.Button("Recognize audio")
172
+ upload_clear_button = gr.ClearButton(
173
+ components=[uploaded_file, uploaded_output]
174
+ )
175
+
176
+ with gr.Tab("Real-time Microphone Recognition"):
177
+ if streaming_recognizer is None:
178
+ streaming_recognizer = get_pretrained_model(
179
+ model_dropdown.value,
180
+ decoding_method_radio.value,
181
+ num_active_paths_slider.value,
182
+ )
183
+ print("Model initialized!")
184
+
185
+ model_dropdown.change(
186
+ fn=initialize_streaming_model,
187
+ inputs=[
188
+ model_dropdown,
189
+ decoding_method_radio,
190
+ num_active_paths_slider,
191
+ ],
192
+ )
193
+
194
+ state = gr.State(value="")
195
+ mic_input_audio = gr.Audio(
196
+ source="microphone",
197
+ type="filepath",
198
+ label="Upload audio file",
199
+ )
200
+ mic_text_output = gr.Textbox(label="Recognized speech from microphone")
201
+ mic_input_audio.stream(
202
+ fn=recognize_audio_from_mic,
203
+ inputs=[mic_input_audio, state],
204
+ outputs=[mic_text_output, state],
205
+ show_progress=False,
206
+ )
207
+ with gr.Row():
208
+ file_clear_button = gr.ClearButton(
209
+ components=[mic_text_output, state]
210
+ ).click(streaming_recognizer.reset)
211
+
212
+ upload_button.click(
213
+ process_uploaded_file,
214
+ inputs=[
215
+ language_radio,
216
+ model_dropdown,
217
+ decoding_method_radio,
218
+ num_active_paths_slider,
219
+ uploaded_file,
220
+ ],
221
+ outputs=uploaded_output,
222
+ )
223
+
224
+
225
+ demo.launch(debug=True)
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ffmpeg
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ sherpa-ncnn
2
+ soundfile
3
+ librosa