Fix conflict
Browse files- add-model-metadata.py +87 -0
- am.mvn +8 -0
- config.yaml +0 -0
- convert-tokens.py +31 -0
- jfk.wav +0 -0
- model.int8.onnx +3 -0
- model.onnx +3 -0
- new_tokens.txt +0 -0
- quantize-model.py +23 -0
- requirements.txt +3 -0
- test-paraformer-onnx.py +107 -0
- tokens.txt +0 -0
add-model-metadata.py
ADDED
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
|
3 |
+
# Copyright (c) 2023 Xiaomi Corporation
|
4 |
+
# Author: Fangjun Kuang
|
5 |
+
|
6 |
+
from typing import Dict
|
7 |
+
|
8 |
+
import numpy as np
|
9 |
+
import onnx
|
10 |
+
|
11 |
+
|
12 |
+
def load_cmvn():
|
13 |
+
neg_mean = None
|
14 |
+
inv_stddev = None
|
15 |
+
|
16 |
+
with open("am.mvn") as f:
|
17 |
+
for line in f:
|
18 |
+
if not line.startswith("<LearnRateCoef>"):
|
19 |
+
continue
|
20 |
+
t = line.split()[3:-1]
|
21 |
+
|
22 |
+
if neg_mean is None:
|
23 |
+
neg_mean = ",".join(t)
|
24 |
+
else:
|
25 |
+
inv_stddev = ",".join(t)
|
26 |
+
|
27 |
+
return neg_mean, inv_stddev
|
28 |
+
|
29 |
+
|
30 |
+
def load_lfr_params():
|
31 |
+
with open("config.yaml") as f:
|
32 |
+
for line in f:
|
33 |
+
if "lfr_m" in line:
|
34 |
+
lfr_m = int(line.split()[-1])
|
35 |
+
elif "lfr_n" in line:
|
36 |
+
lfr_n = int(line.split()[-1])
|
37 |
+
break
|
38 |
+
lfr_window_size = lfr_m
|
39 |
+
lfr_window_shift = lfr_n
|
40 |
+
return lfr_window_size, lfr_window_shift
|
41 |
+
|
42 |
+
|
43 |
+
def get_vocab_size():
|
44 |
+
with open("tokens.txt") as f:
|
45 |
+
return len(f.readlines())
|
46 |
+
|
47 |
+
|
48 |
+
def add_meta_data(filename: str, meta_data: Dict[str, str]):
|
49 |
+
"""Add meta data to an ONNX model. It is changed in-place.
|
50 |
+
|
51 |
+
Args:
|
52 |
+
filename:
|
53 |
+
Filename of the ONNX model to be changed.
|
54 |
+
meta_data:
|
55 |
+
Key-value pairs.
|
56 |
+
"""
|
57 |
+
model = onnx.load(filename)
|
58 |
+
for key, value in meta_data.items():
|
59 |
+
meta = model.metadata_props.add()
|
60 |
+
meta.key = key
|
61 |
+
meta.value = value
|
62 |
+
|
63 |
+
onnx.save(model, filename)
|
64 |
+
print(f"Updated {filename}")
|
65 |
+
|
66 |
+
|
67 |
+
def main():
|
68 |
+
lfr_window_size, lfr_window_shift = load_lfr_params()
|
69 |
+
neg_mean, inv_stddev = load_cmvn()
|
70 |
+
vocab_size = get_vocab_size()
|
71 |
+
|
72 |
+
meta_data = {
|
73 |
+
"lfr_window_size": str(lfr_window_size),
|
74 |
+
"lfr_window_shift": str(lfr_window_shift),
|
75 |
+
"neg_mean": neg_mean,
|
76 |
+
"inv_stddev": inv_stddev,
|
77 |
+
"model_type": "paraformer",
|
78 |
+
"version": "1",
|
79 |
+
"model_author": "damo",
|
80 |
+
"vocab_size": str(vocab_size),
|
81 |
+
"comment": "speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch",
|
82 |
+
}
|
83 |
+
add_meta_data("model.onnx", meta_data)
|
84 |
+
|
85 |
+
|
86 |
+
if __name__ == "__main__":
|
87 |
+
main()
|
am.mvn
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<Nnet>
|
2 |
+
<Splice> 560 560
|
3 |
+
[ 0 ]
|
4 |
+
<AddShift> 560 560
|
5 |
+
<LearnRateCoef> 0 [ -8.311879 -8.600912 -9.615928 -10.43595 -11.21292 -11.88333 -12.36243 -12.63706 -12.8818 -12.83066 -12.89103 -12.95666 -13.19763 -13.40598 -13.49113 -13.5546 -13.55639 -13.51915 -13.68284 -13.53289 -13.42107 -13.65519 -13.50713 -13.75251 -13.76715 -13.87408 -13.73109 -13.70412 -13.56073 -13.53488 -13.54895 -13.56228 -13.59408 -13.62047 -13.64198 -13.66109 -13.62669 -13.58297 -13.57387 -13.4739 -13.53063 -13.48348 -13.61047 -13.64716 -13.71546 -13.79184 -13.90614 -14.03098 -14.18205 -14.35881 -14.48419 -14.60172 -14.70591 -14.83362 -14.92122 -15.00622 -15.05122 -15.03119 -14.99028 -14.92302 -14.86927 -14.82691 -14.7972 -14.76909 -14.71356 -14.61277 -14.51696 -14.42252 -14.36405 -14.30451 -14.23161 -14.19851 -14.16633 -14.15649 -14.10504 -13.99518 -13.79562 -13.3996 -12.7767 -11.71208 -8.311879 -8.600912 -9.615928 -10.43595 -11.21292 -11.88333 -12.36243 -12.63706 -12.8818 -12.83066 -12.89103 -12.95666 -13.19763 -13.40598 -13.49113 -13.5546 -13.55639 -13.51915 -13.68284 -13.53289 -13.42107 -13.65519 -13.50713 -13.75251 -13.76715 -13.87408 -13.73109 -13.70412 -13.56073 -13.53488 -13.54895 -13.56228 -13.59408 -13.62047 -13.64198 -13.66109 -13.62669 -13.58297 -13.57387 -13.4739 -13.53063 -13.48348 -13.61047 -13.64716 -13.71546 -13.79184 -13.90614 -14.03098 -14.18205 -14.35881 -14.48419 -14.60172 -14.70591 -14.83362 -14.92122 -15.00622 -15.05122 -15.03119 -14.99028 -14.92302 -14.86927 -14.82691 -14.7972 -14.76909 -14.71356 -14.61277 -14.51696 -14.42252 -14.36405 -14.30451 -14.23161 -14.19851 -14.16633 -14.15649 -14.10504 -13.99518 -13.79562 -13.3996 -12.7767 -11.71208 -8.311879 -8.600912 -9.615928 -10.43595 -11.21292 -11.88333 -12.36243 -12.63706 -12.8818 -12.83066 -12.89103 -12.95666 -13.19763 -13.40598 -13.49113 -13.5546 -13.55639 -13.51915 -13.68284 -13.53289 -13.42107 -13.65519 -13.50713 -13.75251 -13.76715 -13.87408 -13.73109 -13.70412 -13.56073 -13.53488 -13.54895 -13.56228 -13.59408 -13.62047 -13.64198 -13.66109 -13.62669 -13.58297 -13.57387 -13.4739 -13.53063 -13.48348 -13.61047 -13.64716 -13.71546 -13.79184 -13.90614 -14.03098 -14.18205 -14.35881 -14.48419 -14.60172 -14.70591 -14.83362 -14.92122 -15.00622 -15.05122 -15.03119 -14.99028 -14.92302 -14.86927 -14.82691 -14.7972 -14.76909 -14.71356 -14.61277 -14.51696 -14.42252 -14.36405 -14.30451 -14.23161 -14.19851 -14.16633 -14.15649 -14.10504 -13.99518 -13.79562 -13.3996 -12.7767 -11.71208 -8.311879 -8.600912 -9.615928 -10.43595 -11.21292 -11.88333 -12.36243 -12.63706 -12.8818 -12.83066 -12.89103 -12.95666 -13.19763 -13.40598 -13.49113 -13.5546 -13.55639 -13.51915 -13.68284 -13.53289 -13.42107 -13.65519 -13.50713 -13.75251 -13.76715 -13.87408 -13.73109 -13.70412 -13.56073 -13.53488 -13.54895 -13.56228 -13.59408 -13.62047 -13.64198 -13.66109 -13.62669 -13.58297 -13.57387 -13.4739 -13.53063 -13.48348 -13.61047 -13.64716 -13.71546 -13.79184 -13.90614 -14.03098 -14.18205 -14.35881 -14.48419 -14.60172 -14.70591 -14.83362 -14.92122 -15.00622 -15.05122 -15.03119 -14.99028 -14.92302 -14.86927 -14.82691 -14.7972 -14.76909 -14.71356 -14.61277 -14.51696 -14.42252 -14.36405 -14.30451 -14.23161 -14.19851 -14.16633 -14.15649 -14.10504 -13.99518 -13.79562 -13.3996 -12.7767 -11.71208 -8.311879 -8.600912 -9.615928 -10.43595 -11.21292 -11.88333 -12.36243 -12.63706 -12.8818 -12.83066 -12.89103 -12.95666 -13.19763 -13.40598 -13.49113 -13.5546 -13.55639 -13.51915 -13.68284 -13.53289 -13.42107 -13.65519 -13.50713 -13.75251 -13.76715 -13.87408 -13.73109 -13.70412 -13.56073 -13.53488 -13.54895 -13.56228 -13.59408 -13.62047 -13.64198 -13.66109 -13.62669 -13.58297 -13.57387 -13.4739 -13.53063 -13.48348 -13.61047 -13.64716 -13.71546 -13.79184 -13.90614 -14.03098 -14.18205 -14.35881 -14.48419 -14.60172 -14.70591 -14.83362 -14.92122 -15.00622 -15.05122 -15.03119 -14.99028 -14.92302 -14.86927 -14.82691 -14.7972 -14.76909 -14.71356 -14.61277 -14.51696 -14.42252 -14.36405 -14.30451 -14.23161 -14.19851 -14.16633 -14.15649 -14.10504 -13.99518 -13.79562 -13.3996 -12.7767 -11.71208 -8.311879 -8.600912 -9.615928 -10.43595 -11.21292 -11.88333 -12.36243 -12.63706 -12.8818 -12.83066 -12.89103 -12.95666 -13.19763 -13.40598 -13.49113 -13.5546 -13.55639 -13.51915 -13.68284 -13.53289 -13.42107 -13.65519 -13.50713 -13.75251 -13.76715 -13.87408 -13.73109 -13.70412 -13.56073 -13.53488 -13.54895 -13.56228 -13.59408 -13.62047 -13.64198 -13.66109 -13.62669 -13.58297 -13.57387 -13.4739 -13.53063 -13.48348 -13.61047 -13.64716 -13.71546 -13.79184 -13.90614 -14.03098 -14.18205 -14.35881 -14.48419 -14.60172 -14.70591 -14.83362 -14.92122 -15.00622 -15.05122 -15.03119 -14.99028 -14.92302 -14.86927 -14.82691 -14.7972 -14.76909 -14.71356 -14.61277 -14.51696 -14.42252 -14.36405 -14.30451 -14.23161 -14.19851 -14.16633 -14.15649 -14.10504 -13.99518 -13.79562 -13.3996 -12.7767 -11.71208 -8.311879 -8.600912 -9.615928 -10.43595 -11.21292 -11.88333 -12.36243 -12.63706 -12.8818 -12.83066 -12.89103 -12.95666 -13.19763 -13.40598 -13.49113 -13.5546 -13.55639 -13.51915 -13.68284 -13.53289 -13.42107 -13.65519 -13.50713 -13.75251 -13.76715 -13.87408 -13.73109 -13.70412 -13.56073 -13.53488 -13.54895 -13.56228 -13.59408 -13.62047 -13.64198 -13.66109 -13.62669 -13.58297 -13.57387 -13.4739 -13.53063 -13.48348 -13.61047 -13.64716 -13.71546 -13.79184 -13.90614 -14.03098 -14.18205 -14.35881 -14.48419 -14.60172 -14.70591 -14.83362 -14.92122 -15.00622 -15.05122 -15.03119 -14.99028 -14.92302 -14.86927 -14.82691 -14.7972 -14.76909 -14.71356 -14.61277 -14.51696 -14.42252 -14.36405 -14.30451 -14.23161 -14.19851 -14.16633 -14.15649 -14.10504 -13.99518 -13.79562 -13.3996 -12.7767 -11.71208 ]
|
6 |
+
<Rescale> 560 560
|
7 |
+
<LearnRateCoef> 0 [ 0.155775 0.154484 0.1527379 0.1518718 0.1506028 0.1489256 0.147067 0.1447061 0.1436307 0.1443568 0.1451849 0.1455157 0.1452821 0.1445717 0.1439195 0.1435867 0.1436018 0.1438781 0.1442086 0.1448844 0.1454756 0.145663 0.146268 0.1467386 0.1472724 0.147664 0.1480913 0.1483739 0.1488841 0.1493636 0.1497088 0.1500379 0.1502916 0.1505389 0.1506787 0.1507102 0.1505992 0.1505445 0.1505938 0.1508133 0.1509569 0.1512396 0.1514625 0.1516195 0.1516156 0.1515561 0.1514966 0.1513976 0.1512612 0.151076 0.1510596 0.1510431 0.151077 0.1511168 0.1511917 0.151023 0.1508045 0.1505885 0.1503493 0.1502373 0.1501726 0.1500762 0.1500065 0.1499782 0.150057 0.1502658 0.150469 0.1505335 0.1505505 0.1505328 0.1504275 0.1502438 0.1499674 0.1497118 0.1494661 0.1493102 0.1493681 0.1495501 0.1499738 0.1509654 0.155775 0.154484 0.1527379 0.1518718 0.1506028 0.1489256 0.147067 0.1447061 0.1436307 0.1443568 0.1451849 0.1455157 0.1452821 0.1445717 0.1439195 0.1435867 0.1436018 0.1438781 0.1442086 0.1448844 0.1454756 0.145663 0.146268 0.1467386 0.1472724 0.147664 0.1480913 0.1483739 0.1488841 0.1493636 0.1497088 0.1500379 0.1502916 0.1505389 0.1506787 0.1507102 0.1505992 0.1505445 0.1505938 0.1508133 0.1509569 0.1512396 0.1514625 0.1516195 0.1516156 0.1515561 0.1514966 0.1513976 0.1512612 0.151076 0.1510596 0.1510431 0.151077 0.1511168 0.1511917 0.151023 0.1508045 0.1505885 0.1503493 0.1502373 0.1501726 0.1500762 0.1500065 0.1499782 0.150057 0.1502658 0.150469 0.1505335 0.1505505 0.1505328 0.1504275 0.1502438 0.1499674 0.1497118 0.1494661 0.1493102 0.1493681 0.1495501 0.1499738 0.1509654 0.155775 0.154484 0.1527379 0.1518718 0.1506028 0.1489256 0.147067 0.1447061 0.1436307 0.1443568 0.1451849 0.1455157 0.1452821 0.1445717 0.1439195 0.1435867 0.1436018 0.1438781 0.1442086 0.1448844 0.1454756 0.145663 0.146268 0.1467386 0.1472724 0.147664 0.1480913 0.1483739 0.1488841 0.1493636 0.1497088 0.1500379 0.1502916 0.1505389 0.1506787 0.1507102 0.1505992 0.1505445 0.1505938 0.1508133 0.1509569 0.1512396 0.1514625 0.1516195 0.1516156 0.1515561 0.1514966 0.1513976 0.1512612 0.151076 0.1510596 0.1510431 0.151077 0.1511168 0.1511917 0.151023 0.1508045 0.1505885 0.1503493 0.1502373 0.1501726 0.1500762 0.1500065 0.1499782 0.150057 0.1502658 0.150469 0.1505335 0.1505505 0.1505328 0.1504275 0.1502438 0.1499674 0.1497118 0.1494661 0.1493102 0.1493681 0.1495501 0.1499738 0.1509654 0.155775 0.154484 0.1527379 0.1518718 0.1506028 0.1489256 0.147067 0.1447061 0.1436307 0.1443568 0.1451849 0.1455157 0.1452821 0.1445717 0.1439195 0.1435867 0.1436018 0.1438781 0.1442086 0.1448844 0.1454756 0.145663 0.146268 0.1467386 0.1472724 0.147664 0.1480913 0.1483739 0.1488841 0.1493636 0.1497088 0.1500379 0.1502916 0.1505389 0.1506787 0.1507102 0.1505992 0.1505445 0.1505938 0.1508133 0.1509569 0.1512396 0.1514625 0.1516195 0.1516156 0.1515561 0.1514966 0.1513976 0.1512612 0.151076 0.1510596 0.1510431 0.151077 0.1511168 0.1511917 0.151023 0.1508045 0.1505885 0.1503493 0.1502373 0.1501726 0.1500762 0.1500065 0.1499782 0.150057 0.1502658 0.150469 0.1505335 0.1505505 0.1505328 0.1504275 0.1502438 0.1499674 0.1497118 0.1494661 0.1493102 0.1493681 0.1495501 0.1499738 0.1509654 0.155775 0.154484 0.1527379 0.1518718 0.1506028 0.1489256 0.147067 0.1447061 0.1436307 0.1443568 0.1451849 0.1455157 0.1452821 0.1445717 0.1439195 0.1435867 0.1436018 0.1438781 0.1442086 0.1448844 0.1454756 0.145663 0.146268 0.1467386 0.1472724 0.147664 0.1480913 0.1483739 0.1488841 0.1493636 0.1497088 0.1500379 0.1502916 0.1505389 0.1506787 0.1507102 0.1505992 0.1505445 0.1505938 0.1508133 0.1509569 0.1512396 0.1514625 0.1516195 0.1516156 0.1515561 0.1514966 0.1513976 0.1512612 0.151076 0.1510596 0.1510431 0.151077 0.1511168 0.1511917 0.151023 0.1508045 0.1505885 0.1503493 0.1502373 0.1501726 0.1500762 0.1500065 0.1499782 0.150057 0.1502658 0.150469 0.1505335 0.1505505 0.1505328 0.1504275 0.1502438 0.1499674 0.1497118 0.1494661 0.1493102 0.1493681 0.1495501 0.1499738 0.1509654 0.155775 0.154484 0.1527379 0.1518718 0.1506028 0.1489256 0.147067 0.1447061 0.1436307 0.1443568 0.1451849 0.1455157 0.1452821 0.1445717 0.1439195 0.1435867 0.1436018 0.1438781 0.1442086 0.1448844 0.1454756 0.145663 0.146268 0.1467386 0.1472724 0.147664 0.1480913 0.1483739 0.1488841 0.1493636 0.1497088 0.1500379 0.1502916 0.1505389 0.1506787 0.1507102 0.1505992 0.1505445 0.1505938 0.1508133 0.1509569 0.1512396 0.1514625 0.1516195 0.1516156 0.1515561 0.1514966 0.1513976 0.1512612 0.151076 0.1510596 0.1510431 0.151077 0.1511168 0.1511917 0.151023 0.1508045 0.1505885 0.1503493 0.1502373 0.1501726 0.1500762 0.1500065 0.1499782 0.150057 0.1502658 0.150469 0.1505335 0.1505505 0.1505328 0.1504275 0.1502438 0.1499674 0.1497118 0.1494661 0.1493102 0.1493681 0.1495501 0.1499738 0.1509654 0.155775 0.154484 0.1527379 0.1518718 0.1506028 0.1489256 0.147067 0.1447061 0.1436307 0.1443568 0.1451849 0.1455157 0.1452821 0.1445717 0.1439195 0.1435867 0.1436018 0.1438781 0.1442086 0.1448844 0.1454756 0.145663 0.146268 0.1467386 0.1472724 0.147664 0.1480913 0.1483739 0.1488841 0.1493636 0.1497088 0.1500379 0.1502916 0.1505389 0.1506787 0.1507102 0.1505992 0.1505445 0.1505938 0.1508133 0.1509569 0.1512396 0.1514625 0.1516195 0.1516156 0.1515561 0.1514966 0.1513976 0.1512612 0.151076 0.1510596 0.1510431 0.151077 0.1511168 0.1511917 0.151023 0.1508045 0.1505885 0.1503493 0.1502373 0.1501726 0.1500762 0.1500065 0.1499782 0.150057 0.1502658 0.150469 0.1505335 0.1505505 0.1505328 0.1504275 0.1502438 0.1499674 0.1497118 0.1494661 0.1493102 0.1493681 0.1495501 0.1499738 0.1509654 ]
|
8 |
+
</Nnet>
|
config.yaml
ADDED
The diff for this file is too large to render.
See raw diff
|
|
convert-tokens.py
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
import sys
|
3 |
+
from typing import Dict
|
4 |
+
|
5 |
+
|
6 |
+
def load_tokens():
|
7 |
+
ans = dict()
|
8 |
+
i = 0
|
9 |
+
with open("tokens.txt", encoding="utf-8") as f:
|
10 |
+
for line in f:
|
11 |
+
if len(line.strip().split()) == 2:
|
12 |
+
sys.exit("Already converted!\nExiting")
|
13 |
+
|
14 |
+
ans[i] = line.strip()
|
15 |
+
i += 1
|
16 |
+
return ans
|
17 |
+
|
18 |
+
|
19 |
+
def write_tokens(tokens: Dict[int, str]):
|
20 |
+
with open("new_tokens.txt", "w", encoding="utf-8") as f:
|
21 |
+
for idx, s in tokens.items():
|
22 |
+
f.write(f"{s} {idx}\n")
|
23 |
+
|
24 |
+
|
25 |
+
def main():
|
26 |
+
tokens = load_tokens()
|
27 |
+
write_tokens(tokens)
|
28 |
+
|
29 |
+
|
30 |
+
if __name__ == "__main__":
|
31 |
+
main()
|
jfk.wav
ADDED
Binary file (352 kB). View file
|
|
model.int8.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2a63ecbd790d027e8554995d7f5d66b7e1629d360a2d1be947263de973c8b913
|
3 |
+
size 241595433
|
model.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d6e89731feec68c775f8f756acd960e72ef17c8c87feec9c7b112cbeb0bcff75
|
3 |
+
size 868256686
|
new_tokens.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
quantize-model.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
|
3 |
+
import onnx
|
4 |
+
from onnxruntime.quantization import QuantType, quantize_dynamic
|
5 |
+
|
6 |
+
|
7 |
+
def main():
|
8 |
+
onnx_model = onnx.load("model.onnx")
|
9 |
+
nodes = [n.name for n in onnx_model.graph.node]
|
10 |
+
nodes_to_exclude = [m for m in nodes if "output" in m]
|
11 |
+
print(nodes_to_exclude)
|
12 |
+
quantize_dynamic(
|
13 |
+
model_input="model.onnx",
|
14 |
+
model_output="model.int8.onnx",
|
15 |
+
op_types_to_quantize=["MatMul"],
|
16 |
+
per_channel=True,
|
17 |
+
weight_type=QuantType.QUInt8,
|
18 |
+
nodes_to_exclude=nodes_to_exclude,
|
19 |
+
)
|
20 |
+
|
21 |
+
|
22 |
+
if __name__ == "__main__":
|
23 |
+
main()
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
kaldi-native-fbank
|
2 |
+
librosa
|
3 |
+
onnxruntime
|
test-paraformer-onnx.py
ADDED
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
|
3 |
+
# Copyright (c) 2023 Xiaomi Corporation
|
4 |
+
# Author: Fangjun Kuang
|
5 |
+
|
6 |
+
import kaldi_native_fbank as knf
|
7 |
+
import librosa
|
8 |
+
import numpy as np
|
9 |
+
import onnxruntime
|
10 |
+
|
11 |
+
|
12 |
+
def load_cmvn():
|
13 |
+
neg_mean = None
|
14 |
+
inv_std = None
|
15 |
+
|
16 |
+
with open("am.mvn") as f:
|
17 |
+
for line in f:
|
18 |
+
if not line.startswith("<LearnRateCoef>"):
|
19 |
+
continue
|
20 |
+
t = line.split()[3:-1]
|
21 |
+
t = list(map(lambda x: float(x), t))
|
22 |
+
|
23 |
+
if neg_mean is None:
|
24 |
+
neg_mean = np.array(t, dtype=np.float32)
|
25 |
+
else:
|
26 |
+
inv_std = np.array(t, dtype=np.float32)
|
27 |
+
|
28 |
+
return neg_mean, inv_std
|
29 |
+
|
30 |
+
|
31 |
+
def compute_feat():
|
32 |
+
sample_rate = 16000
|
33 |
+
samples, _ = librosa.load("jfk.wav", sr=sample_rate)
|
34 |
+
opts = knf.FbankOptions()
|
35 |
+
opts.frame_opts.dither = 0
|
36 |
+
opts.frame_opts.snip_edges = False
|
37 |
+
opts.frame_opts.samp_freq = sample_rate
|
38 |
+
opts.mel_opts.num_bins = 80
|
39 |
+
|
40 |
+
online_fbank = knf.OnlineFbank(opts)
|
41 |
+
online_fbank.accept_waveform(sample_rate, (samples * 32768).tolist())
|
42 |
+
online_fbank.input_finished()
|
43 |
+
|
44 |
+
features = np.stack(
|
45 |
+
[online_fbank.get_frame(i) for i in range(online_fbank.num_frames_ready)]
|
46 |
+
)
|
47 |
+
assert features.data.contiguous is True
|
48 |
+
assert features.dtype == np.float32, features.dtype
|
49 |
+
|
50 |
+
window_size = 7 # lfr_m
|
51 |
+
window_shift = 6 # lfr_n
|
52 |
+
|
53 |
+
T = (features.shape[0] - window_size) // window_shift + 1
|
54 |
+
features = np.lib.stride_tricks.as_strided(
|
55 |
+
features,
|
56 |
+
shape=(T, features.shape[1] * window_size),
|
57 |
+
strides=((window_shift * features.shape[1]) * 4, 4),
|
58 |
+
)
|
59 |
+
neg_mean, inv_std = load_cmvn()
|
60 |
+
features = (features + neg_mean) * inv_std
|
61 |
+
return features
|
62 |
+
|
63 |
+
|
64 |
+
# tokens.txt in paraformer has only one column
|
65 |
+
# while it has two columns ins sherpa-onnx.
|
66 |
+
# This function can handle tokens.txt from both paraformer and sherpa-onnx
|
67 |
+
def load_tokens():
|
68 |
+
ans = dict()
|
69 |
+
i = 0
|
70 |
+
with open("tokens.txt", encoding="utf-8") as f:
|
71 |
+
for line in f:
|
72 |
+
ans[i] = line.strip().split()[0]
|
73 |
+
i += 1
|
74 |
+
return ans
|
75 |
+
|
76 |
+
|
77 |
+
def main():
|
78 |
+
features = compute_feat()
|
79 |
+
features = np.expand_dims(features, axis=0)
|
80 |
+
features_length = np.array([features.shape[1]], dtype=np.int32)
|
81 |
+
|
82 |
+
session_opts = onnxruntime.SessionOptions()
|
83 |
+
session_opts.log_severity_level = 3 # error level
|
84 |
+
sess = onnxruntime.InferenceSession("model.onnx", session_opts)
|
85 |
+
|
86 |
+
inputs = {
|
87 |
+
"speech": features,
|
88 |
+
"speech_lengths": features_length,
|
89 |
+
}
|
90 |
+
output_names = ["logits"]
|
91 |
+
|
92 |
+
try:
|
93 |
+
outputs = sess.run(output_names, input_feed=inputs)
|
94 |
+
except ONNXRuntimeError:
|
95 |
+
print("Input wav is silence or noise")
|
96 |
+
return
|
97 |
+
|
98 |
+
log_probs = outputs[0].squeeze(0)
|
99 |
+
y = log_probs.argmax(axis=-1)
|
100 |
+
|
101 |
+
tokens = load_tokens()
|
102 |
+
text = "".join([tokens[i] for i in y if i not in (0, 2)])
|
103 |
+
print(text)
|
104 |
+
|
105 |
+
|
106 |
+
if __name__ == "__main__":
|
107 |
+
main()
|
tokens.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|