Create my_utils.py
Browse files- lib/my_utils.py +21 -0
lib/my_utils.py
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import ffmpeg
|
2 |
+
import numpy as np
|
3 |
+
|
4 |
+
|
5 |
+
def load_audio(file, sr):
|
6 |
+
try:
|
7 |
+
# https://github.com/openai/whisper/blob/main/whisper/audio.py#L26
|
8 |
+
# This launches a subprocess to decode audio while down-mixing and resampling as necessary.
|
9 |
+
# Requires the ffmpeg CLI and `ffmpeg-python` package to be installed.
|
10 |
+
file = (
|
11 |
+
file.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
|
12 |
+
) # 防止小白拷路径头尾带了空格和"和回车
|
13 |
+
out, _ = (
|
14 |
+
ffmpeg.input(file, threads=0)
|
15 |
+
.output("-", format="f32le", acodec="pcm_f32le", ac=1, ar=sr)
|
16 |
+
.run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
|
17 |
+
)
|
18 |
+
except Exception as e:
|
19 |
+
raise RuntimeError(f"Failed to load audio: {e}")
|
20 |
+
|
21 |
+
return np.frombuffer(out, np.float32).flatten()
|