Spaces:
Build error
Build error
Upload 35 files
Browse files- audio_detection/__init__.py +0 -0
- audio_detection/audio_infer/__init__.py +0 -0
- audio_detection/audio_infer/__pycache__/__init__.cpython-38.pyc +0 -0
- audio_detection/audio_infer/metadata/black_list/groundtruth_weak_label_evaluation_set.csv +1350 -0
- audio_detection/audio_infer/metadata/black_list/groundtruth_weak_label_testing_set.csv +606 -0
- audio_detection/audio_infer/metadata/class_labels_indices.csv +528 -0
- audio_detection/audio_infer/pytorch/__pycache__/models.cpython-38.pyc +0 -0
- audio_detection/audio_infer/pytorch/__pycache__/pytorch_utils.cpython-38.pyc +0 -0
- audio_detection/audio_infer/pytorch/evaluate.py +42 -0
- audio_detection/audio_infer/pytorch/finetune_template.py +127 -0
- audio_detection/audio_infer/pytorch/inference.py +206 -0
- audio_detection/audio_infer/pytorch/losses.py +14 -0
- audio_detection/audio_infer/pytorch/main.py +378 -0
- audio_detection/audio_infer/pytorch/models.py +951 -0
- audio_detection/audio_infer/pytorch/pytorch_utils.py +251 -0
- audio_detection/audio_infer/results/YDlWd7Wmdi1E.png +0 -0
- audio_detection/audio_infer/useful_ckpts/audio_detection.pth +3 -0
- audio_detection/audio_infer/utils/__pycache__/config.cpython-38.pyc +0 -0
- audio_detection/audio_infer/utils/config.py +94 -0
- audio_detection/audio_infer/utils/crash.py +12 -0
- audio_detection/audio_infer/utils/create_black_list.py +64 -0
- audio_detection/audio_infer/utils/create_indexes.py +126 -0
- audio_detection/audio_infer/utils/data_generator.py +421 -0
- audio_detection/audio_infer/utils/dataset.py +224 -0
- audio_detection/audio_infer/utils/plot_for_paper.py +565 -0
- audio_detection/audio_infer/utils/plot_statistics.py +0 -0
- audio_detection/audio_infer/utils/utilities.py +172 -0
- audio_detection/target_sound_detection/src/__pycache__/models.cpython-38.pyc +0 -0
- audio_detection/target_sound_detection/src/__pycache__/utils.cpython-38.pyc +0 -0
- audio_detection/target_sound_detection/src/models.py +1288 -0
- audio_detection/target_sound_detection/src/utils.py +353 -0
- audio_detection/target_sound_detection/useful_ckpts/tsd/ref_mel.pth +3 -0
- audio_detection/target_sound_detection/useful_ckpts/tsd/run_config.pth +3 -0
- audio_detection/target_sound_detection/useful_ckpts/tsd/run_model_7_loss=-0.0724.pt +3 -0
- audio_detection/target_sound_detection/useful_ckpts/tsd/text_emb.pth +3 -0
audio_detection/__init__.py
ADDED
File without changes
|
audio_detection/audio_infer/__init__.py
ADDED
File without changes
|
audio_detection/audio_infer/__pycache__/__init__.cpython-38.pyc
ADDED
Binary file (171 Bytes). View file
|
|
audio_detection/audio_infer/metadata/black_list/groundtruth_weak_label_evaluation_set.csv
ADDED
@@ -0,0 +1,1350 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
-JMT0mK0Dbg_30.000_40.000.wav 30.000 40.000 Train horn
|
2 |
+
3ACjUf9QpAQ_30.000_40.000.wav 30.000 40.000 Train horn
|
3 |
+
3S2-TODd__k_90.000_100.000.wav 90.000 100.000 Train horn
|
4 |
+
3YJewEC-NWo_30.000_40.000.wav 30.000 40.000 Train horn
|
5 |
+
3jXAh3V2FO8_30.000_40.000.wav 30.000 40.000 Train horn
|
6 |
+
53oq_Otm_XI_30.000_40.000.wav 30.000 40.000 Train horn
|
7 |
+
8IaInXpdd9M_0.000_10.000.wav 0.000 10.000 Train horn
|
8 |
+
8nU1aVscJec_30.000_40.000.wav 30.000 40.000 Train horn
|
9 |
+
9LQEZJPNVpw_30.000_40.000.wav 30.000 40.000 Train horn
|
10 |
+
AHom7lBbtoY_30.000_40.000.wav 30.000 40.000 Train horn
|
11 |
+
Ag_zT74ZGNc_9.000_19.000.wav 9.000 19.000 Train horn
|
12 |
+
BQpa8whzwAE_30.000_40.000.wav 30.000 40.000 Train horn
|
13 |
+
CCX_4cW_SAU_0.000_10.000.wav 0.000 10.000 Train horn
|
14 |
+
CLIdVCUO_Vw_30.000_40.000.wav 30.000 40.000 Train horn
|
15 |
+
D_nXtMgbPNY_30.000_40.000.wav 30.000 40.000 Train horn
|
16 |
+
GFQnh84kNwU_30.000_40.000.wav 30.000 40.000 Train horn
|
17 |
+
I4qODX0fypE_30.000_40.000.wav 30.000 40.000 Train horn
|
18 |
+
IdqEbjujFb8_30.000_40.000.wav 30.000 40.000 Train horn
|
19 |
+
L3a132_uApg_50.000_60.000.wav 50.000 60.000 Train horn
|
20 |
+
LzcNa3HvD7c_30.000_40.000.wav 30.000 40.000 Train horn
|
21 |
+
MCYY8tJsnfY_7.000_17.000.wav 7.000 17.000 Train horn
|
22 |
+
MPSf7dJpV5w_30.000_40.000.wav 30.000 40.000 Train horn
|
23 |
+
NdCr5IDnkxc_30.000_40.000.wav 30.000 40.000 Train horn
|
24 |
+
P54KKbTA_TE_0.000_7.000.wav 0.000 7.000 Train horn
|
25 |
+
PJUy17bXlhc_40.000_50.000.wav 40.000 50.000 Train horn
|
26 |
+
QrAoRSA13bM_30.000_40.000.wav 30.000 40.000 Train horn
|
27 |
+
R_Lpb-51Kl4_30.000_40.000.wav 30.000 40.000 Train horn
|
28 |
+
Rq-22Cycrpg_30.000_40.000.wav 30.000 40.000 Train horn
|
29 |
+
TBjrN1aMRrM_30.000_40.000.wav 30.000 40.000 Train horn
|
30 |
+
XAUtk9lwzU8_30.000_40.000.wav 30.000 40.000 Train horn
|
31 |
+
XW8pSKLyr0o_20.000_30.000.wav 20.000 30.000 Train horn
|
32 |
+
Y10I9JSvJuQ_30.000_40.000.wav 30.000 40.000 Train horn
|
33 |
+
Y_jwEflLthg_190.000_200.000.wav 190.000 200.000 Train horn
|
34 |
+
YilfKdY7w6Y_60.000_70.000.wav 60.000 70.000 Train horn
|
35 |
+
ZcTI8fQgEZE_240.000_250.000.wav 240.000 250.000 Train horn
|
36 |
+
_8MvhMlbwiE_40.000_50.000.wav 40.000 50.000 Train horn
|
37 |
+
_dkeW6lqmq4_30.000_40.000.wav 30.000 40.000 Train horn
|
38 |
+
aXsUHAKbyLs_30.000_40.000.wav 30.000 40.000 Train horn
|
39 |
+
arevYmB0qGg_30.000_40.000.wav 30.000 40.000 Train horn
|
40 |
+
d1o334I5X_k_30.000_40.000.wav 30.000 40.000 Train horn
|
41 |
+
dSzZWgbJ378_30.000_40.000.wav 30.000 40.000 Train horn
|
42 |
+
ePVb5Upev8k_40.000_50.000.wav 40.000 50.000 Train horn
|
43 |
+
g4cA-ifQc70_30.000_40.000.wav 30.000 40.000 Train horn
|
44 |
+
g9JVq7wfDIo_30.000_40.000.wav 30.000 40.000 Train horn
|
45 |
+
gTFCK9TuLOQ_30.000_40.000.wav 30.000 40.000 Train horn
|
46 |
+
hYqzr_rIIAw_30.000_40.000.wav 30.000 40.000 Train horn
|
47 |
+
iZgzRfa-xPQ_30.000_40.000.wav 30.000 40.000 Train horn
|
48 |
+
k8H8rn4NaSM_0.000_10.000.wav 0.000 10.000 Train horn
|
49 |
+
lKQ-I_P7TEM_20.000_30.000.wav 20.000 30.000 Train horn
|
50 |
+
nfY_zkJceDw_30.000_40.000.wav 30.000 40.000 Train horn
|
51 |
+
pW5SI1ZKUpA_30.000_40.000.wav 30.000 40.000 Train horn
|
52 |
+
pxmrmtEnROk_30.000_40.000.wav 30.000 40.000 Train horn
|
53 |
+
q7zzKHFWGkg_30.000_40.000.wav 30.000 40.000 Train horn
|
54 |
+
qu8vVFWKszA_30.000_40.000.wav 30.000 40.000 Train horn
|
55 |
+
stdjjG6Y5IU_30.000_40.000.wav 30.000 40.000 Train horn
|
56 |
+
tdRMxc4UWRk_30.000_40.000.wav 30.000 40.000 Train horn
|
57 |
+
tu-cxDG2mW8_0.000_10.000.wav 0.000 10.000 Train horn
|
58 |
+
txXSE7kgrc8_30.000_40.000.wav 30.000 40.000 Train horn
|
59 |
+
xabrKa79prM_30.000_40.000.wav 30.000 40.000 Train horn
|
60 |
+
yBVxtq9k8Sg_0.000_10.000.wav 0.000 10.000 Train horn
|
61 |
+
-WoudI3gGvk_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
|
62 |
+
0_gci63CtFY_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
|
63 |
+
2-h8MRSRvEg_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
|
64 |
+
3NX4HaOVBoo_240.000_250.000.wav 240.000 250.000 Air horn, truck horn
|
65 |
+
9NPKQDaNCRk_0.000_6.000.wav 0.000 6.000 Air horn, truck horn
|
66 |
+
9ct4w4aYWdc_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
|
67 |
+
9l9QXgsJSfo_120.000_130.000.wav 120.000 130.000 Air horn, truck horn
|
68 |
+
CN0Bi4MDpA4_20.000_30.000.wav 20.000 30.000 Air horn, truck horn
|
69 |
+
CU2MyVM_B48_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
|
70 |
+
Cg-DWc9nPfQ_90.000_100.000.wav 90.000 100.000 Air horn, truck horn
|
71 |
+
D62L3husEa0_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
|
72 |
+
GO2zKyMtBV4_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
|
73 |
+
Ge_KWS-0098_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
|
74 |
+
Hk7HqLBHWng_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
|
75 |
+
IpyingiCwV8_0.000_3.000.wav 0.000 3.000 Air horn, truck horn
|
76 |
+
Isuh9pOuH6I_300.000_310.000.wav 300.000 310.000 Air horn, truck horn
|
77 |
+
IuTfMfzkr5Y_120.000_130.000.wav 120.000 130.000 Air horn, truck horn
|
78 |
+
MFxsgcZZtFs_10.000_20.000.wav 10.000 20.000 Air horn, truck horn
|
79 |
+
N3osL4QmOL8_49.000_59.000.wav 49.000 59.000 Air horn, truck horn
|
80 |
+
NOZsDTFLm7M_0.000_9.000.wav 0.000 9.000 Air horn, truck horn
|
81 |
+
OjVY3oM1jEU_40.000_50.000.wav 40.000 50.000 Air horn, truck horn
|
82 |
+
PNaLTW50fxM_60.000_70.000.wav 60.000 70.000 Air horn, truck horn
|
83 |
+
TYLZuBBu8ms_0.000_10.000.wav 0.000 10.000 Air horn, truck horn
|
84 |
+
UdHR1P_NIbo_110.000_120.000.wav 110.000 120.000 Air horn, truck horn
|
85 |
+
YilfKdY7w6Y_60.000_70.000.wav 60.000 70.000 Air horn, truck horn
|
86 |
+
Yt4ZWNjvJOY_50.000_60.000.wav 50.000 60.000 Air horn, truck horn
|
87 |
+
Z5M3fGT3Xjk_60.000_70.000.wav 60.000 70.000 Air horn, truck horn
|
88 |
+
ZauRsP1uH74_12.000_22.000.wav 12.000 22.000 Air horn, truck horn
|
89 |
+
a_6CZ2JaEuc_0.000_2.000.wav 0.000 2.000 Air horn, truck horn
|
90 |
+
b7m5Kt5U7Vc_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
|
91 |
+
bIObkrK06rk_15.000_25.000.wav 15.000 25.000 Air horn, truck horn
|
92 |
+
cdrjKqyDrak_420.000_430.000.wav 420.000 430.000 Air horn, truck horn
|
93 |
+
ckSYn557ZyE_20.000_30.000.wav 20.000 30.000 Air horn, truck horn
|
94 |
+
cs-RPPsg_ks_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
|
95 |
+
ctsq33oUBT8_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
|
96 |
+
eCFUwyU9ZWA_9.000_19.000.wav 9.000 19.000 Air horn, truck horn
|
97 |
+
ePVb5Upev8k_40.000_50.000.wav 40.000 50.000 Air horn, truck horn
|
98 |
+
fHaQPHCjyfA_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
|
99 |
+
fOVsAMJ3Yms_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
|
100 |
+
g4cA-ifQc70_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
|
101 |
+
gjlo4evwjlE_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
|
102 |
+
i9VjpIbM3iE_410.000_420.000.wav 410.000 420.000 Air horn, truck horn
|
103 |
+
ieZVo7W3BQ4_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
|
104 |
+
ii87iO6JboA_10.000_20.000.wav 10.000 20.000 Air horn, truck horn
|
105 |
+
jko48cNdvFA_80.000_90.000.wav 80.000 90.000 Air horn, truck horn
|
106 |
+
kJuvA2zmrnY_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
|
107 |
+
kUrb38hMwPs_0.000_10.000.wav 0.000 10.000 Air horn, truck horn
|
108 |
+
km_hVyma2vo_0.000_10.000.wav 0.000 10.000 Air horn, truck horn
|
109 |
+
m1e9aOwRiDQ_0.000_9.000.wav 0.000 9.000 Air horn, truck horn
|
110 |
+
mQJcObz1k_E_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
|
111 |
+
pk75WDyNZKc_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
|
112 |
+
rhUfN81puDI_0.000_10.000.wav 0.000 10.000 Air horn, truck horn
|
113 |
+
suuYwAifIAQ_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
|
114 |
+
wDdEZ46B-tM_460.000_470.000.wav 460.000 470.000 Air horn, truck horn
|
115 |
+
wHISHmuP58s_80.000_90.000.wav 80.000 90.000 Air horn, truck horn
|
116 |
+
xwqIKDz1bT4_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
|
117 |
+
y4Ko6VNiqB0_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
|
118 |
+
yhcmPrU3QSk_61.000_71.000.wav 61.000 71.000 Air horn, truck horn
|
119 |
+
3FWHjjZGT9U_80.000_90.000.wav 80.000 90.000 Car alarm
|
120 |
+
3YChVhqW42E_130.000_140.000.wav 130.000 140.000 Car alarm
|
121 |
+
3YRkin3bMlQ_170.000_180.000.wav 170.000 180.000 Car alarm
|
122 |
+
4APBvMmKubU_10.000_20.000.wav 10.000 20.000 Car alarm
|
123 |
+
4JDah6Ckr9k_5.000_15.000.wav 5.000 15.000 Car alarm
|
124 |
+
5hL1uGb4sas_30.000_40.000.wav 30.000 40.000 Car alarm
|
125 |
+
969Zfj4IoPk_20.000_30.000.wav 20.000 30.000 Car alarm
|
126 |
+
AyfuBDN3Vdw_40.000_50.000.wav 40.000 50.000 Car alarm
|
127 |
+
B-ZqhRg3km4_60.000_70.000.wav 60.000 70.000 Car alarm
|
128 |
+
BDnwA3AaclE_10.000_20.000.wav 10.000 20.000 Car alarm
|
129 |
+
ES-rjFfuxq4_120.000_130.000.wav 120.000 130.000 Car alarm
|
130 |
+
EWbZq5ruCpg_0.000_10.000.wav 0.000 10.000 Car alarm
|
131 |
+
F50h9HiyC3k_40.000_50.000.wav 40.000 50.000 Car alarm
|
132 |
+
F5AP8kQvogM_30.000_40.000.wav 30.000 40.000 Car alarm
|
133 |
+
FKJuDOAumSk_20.000_30.000.wav 20.000 30.000 Car alarm
|
134 |
+
GmbNjZi4xBw_30.000_40.000.wav 30.000 40.000 Car alarm
|
135 |
+
H7lOMlND9dc_30.000_40.000.wav 30.000 40.000 Car alarm
|
136 |
+
Hu8lxbHYaqg_40.000_50.000.wav 40.000 50.000 Car alarm
|
137 |
+
IziTYkSwq9Q_30.000_40.000.wav 30.000 40.000 Car alarm
|
138 |
+
JcO2TTtiplA_30.000_40.000.wav 30.000 40.000 Car alarm
|
139 |
+
KKx7dWRg8s8_8.000_18.000.wav 8.000 18.000 Car alarm
|
140 |
+
Kf9Kr69mwOA_14.000_24.000.wav 14.000 24.000 Car alarm
|
141 |
+
L535vIV3ED4_40.000_50.000.wav 40.000 50.000 Car alarm
|
142 |
+
LOjT44tFx1A_0.000_10.000.wav 0.000 10.000 Car alarm
|
143 |
+
Mxn2FKuNwiI_20.000_30.000.wav 20.000 30.000 Car alarm
|
144 |
+
Nkqx09b-xyI_70.000_80.000.wav 70.000 80.000 Car alarm
|
145 |
+
QNKo1W1WRbc_22.000_32.000.wav 22.000 32.000 Car alarm
|
146 |
+
R0VxYDfjyAU_60.000_70.000.wav 60.000 70.000 Car alarm
|
147 |
+
TJ58vMpSy1w_30.000_40.000.wav 30.000 40.000 Car alarm
|
148 |
+
ToU1kRagUjY_0.000_10.000.wav 0.000 10.000 Car alarm
|
149 |
+
TrQGIZqrW0s_30.000_40.000.wav 30.000 40.000 Car alarm
|
150 |
+
ULFhHR0OLSE_30.000_40.000.wav 30.000 40.000 Car alarm
|
151 |
+
ULS3ffQkCW4_30.000_40.000.wav 30.000 40.000 Car alarm
|
152 |
+
U_9NuNORYQM_1.000_11.000.wav 1.000 11.000 Car alarm
|
153 |
+
UkCEuwYUW8c_110.000_120.000.wav 110.000 120.000 Car alarm
|
154 |
+
Wak5QxsS-QU_30.000_40.000.wav 30.000 40.000 Car alarm
|
155 |
+
XzE7mp3pVik_0.000_10.000.wav 0.000 10.000 Car alarm
|
156 |
+
Y-4dtrP-RNo_7.000_17.000.wav 7.000 17.000 Car alarm
|
157 |
+
Zltlj0fDeS4_30.000_40.000.wav 30.000 40.000 Car alarm
|
158 |
+
cB1jkzgH2es_150.000_160.000.wav 150.000 160.000 Car alarm
|
159 |
+
eIMjkADTWzA_60.000_70.000.wav 60.000 70.000 Car alarm
|
160 |
+
eL7s5CoW0UA_0.000_7.000.wav 0.000 7.000 Car alarm
|
161 |
+
i9VjpIbM3iE_410.000_420.000.wav 410.000 420.000 Car alarm
|
162 |
+
iWl-5LNURFc_30.000_40.000.wav 30.000 40.000 Car alarm
|
163 |
+
iX34nDCq9NU_10.000_20.000.wav 10.000 20.000 Car alarm
|
164 |
+
ii87iO6JboA_10.000_20.000.wav 10.000 20.000 Car alarm
|
165 |
+
l6_h_YHuTbY_30.000_40.000.wav 30.000 40.000 Car alarm
|
166 |
+
lhedRVb85Fk_30.000_40.000.wav 30.000 40.000 Car alarm
|
167 |
+
monelE7hnwI_20.000_30.000.wav 20.000 30.000 Car alarm
|
168 |
+
o2CmtHNUrXg_30.000_40.000.wav 30.000 40.000 Car alarm
|
169 |
+
pXX6cK4xtiY_11.000_21.000.wav 11.000 21.000 Car alarm
|
170 |
+
stnVta2ip9g_30.000_40.000.wav 30.000 40.000 Car alarm
|
171 |
+
uvuVg9Cl0n0_30.000_40.000.wav 30.000 40.000 Car alarm
|
172 |
+
vF2zXcbADUk_20.000_30.000.wav 20.000 30.000 Car alarm
|
173 |
+
vN7dJyt-nj0_20.000_30.000.wav 20.000 30.000 Car alarm
|
174 |
+
w8Md65mE5Vc_30.000_40.000.wav 30.000 40.000 Car alarm
|
175 |
+
ySqfMcFk5LM_30.000_40.000.wav 30.000 40.000 Car alarm
|
176 |
+
ysNK5RVF3Zw_0.000_10.000.wav 0.000 10.000 Car alarm
|
177 |
+
za8KPcQ0dTw_30.000_40.000.wav 30.000 40.000 Car alarm
|
178 |
+
-2sE5CH8Wb8_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
179 |
+
-fJsZm3YRc0_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
180 |
+
-oSzD8P2BtU_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
181 |
+
-pzwalZ0ub0_5.000_15.000.wav 5.000 15.000 Reversing beeps
|
182 |
+
-t-htrAtNvM_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
183 |
+
-zNEcuo28oE_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
184 |
+
077aWlQn6XI_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
185 |
+
0O-gZoirpRA_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
186 |
+
10aF24rMeu0_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
187 |
+
1P5FFxXLSpY_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
188 |
+
1n_s2Gb5R1Q_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
189 |
+
2HZcxlRs-hg_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
190 |
+
2Jpg_KvJWL0_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
191 |
+
2WTk_j_fivY_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
192 |
+
38F6eeIR-s0_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
193 |
+
3xh2kScw64U_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
194 |
+
4MIHbR4QZhE_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
195 |
+
4Tpy1lsfcSM_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
196 |
+
4XMY2IvVSf0_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
197 |
+
4ep09nZl3LA_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
198 |
+
4t1VqRz4w2g_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
199 |
+
4tKvAMmAUMM_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
200 |
+
5-x2pk3YYAs_11.000_21.000.wav 11.000 21.000 Reversing beeps
|
201 |
+
5DW8WjxxCag_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
202 |
+
5DjZHCumLfs_11.000_21.000.wav 11.000 21.000 Reversing beeps
|
203 |
+
5V0xKS-FGMk_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
204 |
+
5fLzQegwHUg_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
205 |
+
6Y8bKS6KLeE_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
206 |
+
6xEHP-C-ZuU_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
207 |
+
6yyToq9cW9A_60.000_70.000.wav 60.000 70.000 Reversing beeps
|
208 |
+
7Gua0-UrKIw_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
209 |
+
7nglQSmcjAk_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
210 |
+
81DteAPIhoE_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
211 |
+
96a4smrM_30_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
212 |
+
9EsgN-WS2qY_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
213 |
+
9OcAwC8y-eQ_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
214 |
+
9Ti98L4PRCo_17.000_27.000.wav 17.000 27.000 Reversing beeps
|
215 |
+
9yhMtJ50sys_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
216 |
+
A9KMqwqLboE_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
217 |
+
AFwmMFq_xlc_390.000_400.000.wav 390.000 400.000 Reversing beeps
|
218 |
+
AvhBRiwWJU4_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
219 |
+
CL5vkiMs2c0_10.000_20.000.wav 10.000 20.000 Reversing beeps
|
220 |
+
DcU6AzN7imA_210.000_220.000.wav 210.000 220.000 Reversing beeps
|
221 |
+
ISBJKY8hwnM_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
222 |
+
LA5TekLaIPI_10.000_20.000.wav 10.000 20.000 Reversing beeps
|
223 |
+
NqzZbJJl3E4_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
224 |
+
PSt0xAYgf4g_0.000_10.000.wav 0.000 10.000 Reversing beeps
|
225 |
+
Q1CMSV81_ws_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
226 |
+
_gG0KNGD47M_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
227 |
+
ckt7YEGcSoY_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
228 |
+
eIkUuCRE_0U_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
229 |
+
kH6fFjIZkB0_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
230 |
+
mCJ0aqIygWE_24.000_34.000.wav 24.000 34.000 Reversing beeps
|
231 |
+
nFqf1vflJaI_350.000_360.000.wav 350.000 360.000 Reversing beeps
|
232 |
+
nMaSkwx6cHE_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
233 |
+
oHKTmTLEy68_11.000_21.000.wav 11.000 21.000 Reversing beeps
|
234 |
+
saPU2JNoytU_0.000_10.000.wav 0.000 10.000 Reversing beeps
|
235 |
+
tQd0vFueRKs_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
236 |
+
vzP6soELj2Q_0.000_10.000.wav 0.000 10.000 Reversing beeps
|
237 |
+
0x82_HySIVU_30.000_40.000.wav 30.000 40.000 Bicycle
|
238 |
+
1IQdvfm9SDY_30.000_40.000.wav 30.000 40.000 Bicycle
|
239 |
+
1_hGvbEiYAs_30.000_40.000.wav 30.000 40.000 Bicycle
|
240 |
+
26CM8IXODG4_2.000_12.000.wav 2.000 12.000 Bicycle
|
241 |
+
2f7Ad-XpbnY_30.000_40.000.wav 30.000 40.000 Bicycle
|
242 |
+
3-a8i_MEUl8_30.000_40.000.wav 30.000 40.000 Bicycle
|
243 |
+
7KiTXYwaD04_7.000_17.000.wav 7.000 17.000 Bicycle
|
244 |
+
7gkjn-LLInI_30.000_40.000.wav 30.000 40.000 Bicycle
|
245 |
+
84flVacRHUI_21.000_31.000.wav 21.000 31.000 Bicycle
|
246 |
+
9VziOIkNXsE_30.000_40.000.wav 30.000 40.000 Bicycle
|
247 |
+
ANofTuuN0W0_160.000_170.000.wav 160.000 170.000 Bicycle
|
248 |
+
B6n0op0sLPA_30.000_40.000.wav 30.000 40.000 Bicycle
|
249 |
+
D4_zTwsCRds_60.000_70.000.wav 60.000 70.000 Bicycle
|
250 |
+
DEs_Sp9S1Nw_30.000_40.000.wav 30.000 40.000 Bicycle
|
251 |
+
GjsxrMRRdfQ_3.000_13.000.wav 3.000 13.000 Bicycle
|
252 |
+
GkpUU3VX4wQ_30.000_40.000.wav 30.000 40.000 Bicycle
|
253 |
+
H9HNXYxRmv8_30.000_40.000.wav 30.000 40.000 Bicycle
|
254 |
+
HPWRKwrs-rY_370.000_380.000.wav 370.000 380.000 Bicycle
|
255 |
+
HrQxbNO5jXU_6.000_16.000.wav 6.000 16.000 Bicycle
|
256 |
+
IYaEZkAO0LU_30.000_40.000.wav 30.000 40.000 Bicycle
|
257 |
+
Idzfy0XbZRo_7.000_17.000.wav 7.000 17.000 Bicycle
|
258 |
+
Iigfz_GeXVs_30.000_40.000.wav 30.000 40.000 Bicycle
|
259 |
+
JWCtQ_94YoQ_30.000_40.000.wav 30.000 40.000 Bicycle
|
260 |
+
JXmBrD4b4EI_30.000_40.000.wav 30.000 40.000 Bicycle
|
261 |
+
LSZPNwZex9s_30.000_40.000.wav 30.000 40.000 Bicycle
|
262 |
+
M5kwg1kx4q0_30.000_40.000.wav 30.000 40.000 Bicycle
|
263 |
+
NrR1wmCpqAk_12.000_22.000.wav 12.000 22.000 Bicycle
|
264 |
+
O1_Rw2dHb1I_2.000_12.000.wav 2.000 12.000 Bicycle
|
265 |
+
OEN0TySl1Jw_10.000_20.000.wav 10.000 20.000 Bicycle
|
266 |
+
PF7uY9ydMYc_30.000_40.000.wav 30.000 40.000 Bicycle
|
267 |
+
SDl0tWf9Q44_30.000_40.000.wav 30.000 40.000 Bicycle
|
268 |
+
SkXXjcw9sJI_30.000_40.000.wav 30.000 40.000 Bicycle
|
269 |
+
Ssa1m5Mnllw_0.000_9.000.wav 0.000 9.000 Bicycle
|
270 |
+
UB-A1oyNyyg_0.000_6.000.wav 0.000 6.000 Bicycle
|
271 |
+
UqyvFyQthHo_30.000_40.000.wav 30.000 40.000 Bicycle
|
272 |
+
Wg4ik5zZxBc_250.000_260.000.wav 250.000 260.000 Bicycle
|
273 |
+
WvquSD2PcCE_30.000_40.000.wav 30.000 40.000 Bicycle
|
274 |
+
YIJBuXUi64U_30.000_40.000.wav 30.000 40.000 Bicycle
|
275 |
+
aBHdl_TiseI_30.000_40.000.wav 30.000 40.000 Bicycle
|
276 |
+
aeHCq6fFkNo_30.000_40.000.wav 30.000 40.000 Bicycle
|
277 |
+
amKDjVcs1Vg_30.000_40.000.wav 30.000 40.000 Bicycle
|
278 |
+
ehYwty_G2L4_13.000_23.000.wav 13.000 23.000 Bicycle
|
279 |
+
jOlVJv7jAHg_30.000_40.000.wav 30.000 40.000 Bicycle
|
280 |
+
lGFDQ-ZwUfk_30.000_40.000.wav 30.000 40.000 Bicycle
|
281 |
+
lmTHvLGQy3g_50.000_60.000.wav 50.000 60.000 Bicycle
|
282 |
+
nNHW3Uxlb-g_30.000_40.000.wav 30.000 40.000 Bicycle
|
283 |
+
o98R4ruf8kw_30.000_40.000.wav 30.000 40.000 Bicycle
|
284 |
+
oiLHBkHgkAo_0.000_8.000.wav 0.000 8.000 Bicycle
|
285 |
+
qL0ESQcaPhM_30.000_40.000.wav 30.000 40.000 Bicycle
|
286 |
+
qjz5t9M4YCw_30.000_40.000.wav 30.000 40.000 Bicycle
|
287 |
+
qrCWPsqG9vA_30.000_40.000.wav 30.000 40.000 Bicycle
|
288 |
+
r06tmeUDgc8_3.000_13.000.wav 3.000 13.000 Bicycle
|
289 |
+
sAMjMyCdGOc_30.000_40.000.wav 30.000 40.000 Bicycle
|
290 |
+
tKdRlWz-1pg_30.000_40.000.wav 30.000 40.000 Bicycle
|
291 |
+
uNpSMpqlkMA_0.000_10.000.wav 0.000 10.000 Bicycle
|
292 |
+
vOYj9W7Jsxk_8.000_18.000.wav 8.000 18.000 Bicycle
|
293 |
+
xBKrmKdjAIA_0.000_10.000.wav 0.000 10.000 Bicycle
|
294 |
+
xfNeZaw4o3U_17.000_27.000.wav 17.000 27.000 Bicycle
|
295 |
+
xgiJqbhhU3c_30.000_40.000.wav 30.000 40.000 Bicycle
|
296 |
+
0vg9qxNKXOw_30.000_40.000.wav 30.000 40.000 Skateboard
|
297 |
+
10YXuv9Go0E_140.000_150.000.wav 140.000 150.000 Skateboard
|
298 |
+
3-a8i_MEUl8_30.000_40.000.wav 30.000 40.000 Skateboard
|
299 |
+
6kXUG1Zo6VA_0.000_10.000.wav 0.000 10.000 Skateboard
|
300 |
+
84fDGWoRtsU_210.000_220.000.wav 210.000 220.000 Skateboard
|
301 |
+
8kbHA22EWd0_330.000_340.000.wav 330.000 340.000 Skateboard
|
302 |
+
8m-a_6wLTkU_230.000_240.000.wav 230.000 240.000 Skateboard
|
303 |
+
9QwaP-cvdeU_360.000_370.000.wav 360.000 370.000 Skateboard
|
304 |
+
9ZYj5toEbGA_0.000_10.000.wav 0.000 10.000 Skateboard
|
305 |
+
9gkppwB5CXA_30.000_40.000.wav 30.000 40.000 Skateboard
|
306 |
+
9hlXgXWXYXQ_0.000_6.000.wav 0.000 6.000 Skateboard
|
307 |
+
ALxn5-2bVyI_30.000_40.000.wav 30.000 40.000 Skateboard
|
308 |
+
ANPjV_rudog_30.000_40.000.wav 30.000 40.000 Skateboard
|
309 |
+
ATAL-_Dblvg_0.000_7.000.wav 0.000 7.000 Skateboard
|
310 |
+
An-4jPvUT14_60.000_70.000.wav 60.000 70.000 Skateboard
|
311 |
+
BGR0QnX4k6w_30.000_40.000.wav 30.000 40.000 Skateboard
|
312 |
+
BlhUt8AJJO8_30.000_40.000.wav 30.000 40.000 Skateboard
|
313 |
+
CD7INyI79fM_170.000_180.000.wav 170.000 180.000 Skateboard
|
314 |
+
CNcxzB9F-Q8_100.000_110.000.wav 100.000 110.000 Skateboard
|
315 |
+
DqOGYyFVnKk_200.000_210.000.wav 200.000 210.000 Skateboard
|
316 |
+
E0gBwPTHxqE_30.000_40.000.wav 30.000 40.000 Skateboard
|
317 |
+
E3XIdP8kxwg_110.000_120.000.wav 110.000 120.000 Skateboard
|
318 |
+
FQZnQhiM41U_0.000_6.000.wav 0.000 6.000 Skateboard
|
319 |
+
FRwFfq3Tl1g_310.000_320.000.wav 310.000 320.000 Skateboard
|
320 |
+
JJo971B_eDg_30.000_40.000.wav 30.000 40.000 Skateboard
|
321 |
+
KXkxqxoCylc_30.000_40.000.wav 30.000 40.000 Skateboard
|
322 |
+
L4Z7XkS6CtA_30.000_40.000.wav 30.000 40.000 Skateboard
|
323 |
+
LjEqr0Z7xm0_0.000_6.000.wav 0.000 6.000 Skateboard
|
324 |
+
MAbDEeLF4cQ_30.000_40.000.wav 30.000 40.000 Skateboard
|
325 |
+
MUBbiivNYZs_30.000_40.000.wav 30.000 40.000 Skateboard
|
326 |
+
Nq8GyBrTI8Y_30.000_40.000.wav 30.000 40.000 Skateboard
|
327 |
+
PPq9QZmV7jc_25.000_35.000.wav 25.000 35.000 Skateboard
|
328 |
+
PVgL5wFOKMs_30.000_40.000.wav 30.000 40.000 Skateboard
|
329 |
+
Tcq_xAdCMr4_30.000_40.000.wav 30.000 40.000 Skateboard
|
330 |
+
UtZofZjccBs_290.000_300.000.wav 290.000 300.000 Skateboard
|
331 |
+
VZfrDZhI7BU_30.000_40.000.wav 30.000 40.000 Skateboard
|
332 |
+
WxChkRrVOIs_0.000_7.000.wav 0.000 7.000 Skateboard
|
333 |
+
YV0noe1sZAs_150.000_160.000.wav 150.000 160.000 Skateboard
|
334 |
+
YjScrri_F7U_0.000_10.000.wav 0.000 10.000 Skateboard
|
335 |
+
YrGQKTbiG1g_30.000_40.000.wav 30.000 40.000 Skateboard
|
336 |
+
ZM67kt6G-d4_30.000_40.000.wav 30.000 40.000 Skateboard
|
337 |
+
ZaUaqnLdg6k_30.000_40.000.wav 30.000 40.000 Skateboard
|
338 |
+
ZhpkRcAEJzc_3.000_13.000.wav 3.000 13.000 Skateboard
|
339 |
+
_43OOP6UEw0_30.000_40.000.wav 30.000 40.000 Skateboard
|
340 |
+
_6Fyave4jqA_260.000_270.000.wav 260.000 270.000 Skateboard
|
341 |
+
aOoZ0bCoaZw_30.000_40.000.wav 30.000 40.000 Skateboard
|
342 |
+
gV6y9L24wWg_0.000_10.000.wav 0.000 10.000 Skateboard
|
343 |
+
hHb0Eq1I7Fk_0.000_10.000.wav 0.000 10.000 Skateboard
|
344 |
+
lGf_L6i6AZI_20.000_30.000.wav 20.000 30.000 Skateboard
|
345 |
+
leOH87itNWM_30.000_40.000.wav 30.000 40.000 Skateboard
|
346 |
+
mIkW7mWlnXw_30.000_40.000.wav 30.000 40.000 Skateboard
|
347 |
+
qadmKrM0ppo_20.000_30.000.wav 20.000 30.000 Skateboard
|
348 |
+
rLUIHCc4b9A_0.000_7.000.wav 0.000 7.000 Skateboard
|
349 |
+
u3vBJgEVJvk_0.000_10.000.wav 0.000 10.000 Skateboard
|
350 |
+
vHKBrtPDSvA_150.000_160.000.wav 150.000 160.000 Skateboard
|
351 |
+
wWmydRt0Z-w_21.000_31.000.wav 21.000 31.000 Skateboard
|
352 |
+
xeHt-R5ScmI_0.000_10.000.wav 0.000 10.000 Skateboard
|
353 |
+
xqGtIVeeXY4_330.000_340.000.wav 330.000 340.000 Skateboard
|
354 |
+
y_lfY0uzmr0_30.000_40.000.wav 30.000 40.000 Skateboard
|
355 |
+
02Ak1eIyj3M_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
|
356 |
+
0N0C0Wbe6AI_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
|
357 |
+
2-h8MRSRvEg_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
|
358 |
+
4APBvMmKubU_10.000_20.000.wav 10.000 20.000 Ambulance (siren)
|
359 |
+
5RgHBmX2HLw_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
|
360 |
+
6rXgD5JlYxY_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
|
361 |
+
7eeN-fXbso8_20.000_30.000.wav 20.000 30.000 Ambulance (siren)
|
362 |
+
8Aq2DyLbUBA_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
|
363 |
+
8qMHvgA9mGw_20.000_30.000.wav 20.000 30.000 Ambulance (siren)
|
364 |
+
9CRb-PToaAM_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
|
365 |
+
AwFuGITwrms_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
|
366 |
+
BGp9-Ro5h8Y_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
|
367 |
+
CDrpqsGqfPo_10.000_20.000.wav 10.000 20.000 Ambulance (siren)
|
368 |
+
Cc7-P0py1Mc_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
|
369 |
+
Daqv2F6SEmQ_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
|
370 |
+
F9Dbcxr-lAI_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
|
371 |
+
GORjnSWhZeY_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
|
372 |
+
GgV0yYogTPI_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
|
373 |
+
H9xQQVv3ElI_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
|
374 |
+
LNQ7fzfdLiY_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
|
375 |
+
MEUcv-QM0cQ_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
|
376 |
+
QWVub6-0jX4_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
|
377 |
+
R8G5Y0HASxY_60.000_70.000.wav 60.000 70.000 Ambulance (siren)
|
378 |
+
RVTKY5KR3ME_20.000_30.000.wav 20.000 30.000 Ambulance (siren)
|
379 |
+
Sm0pPvXPA9U_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
|
380 |
+
VXI3-DI4xNs_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
|
381 |
+
W8fIlauyJkk_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
|
382 |
+
ZlS4vIWQMmE_0.000_10.000.wav 0.000 10.000 Ambulance (siren)
|
383 |
+
ZxlbI2Rj1VY_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
|
384 |
+
ZyuX_gMFiss_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
|
385 |
+
bA8mt0JI0Ko_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
|
386 |
+
bIU0X1v4SF0_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
|
387 |
+
cHm1cYBAXMI_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
|
388 |
+
cR79KnWpiQA_70.000_80.000.wav 70.000 80.000 Ambulance (siren)
|
389 |
+
dPcw4R5lczw_500.000_510.000.wav 500.000 510.000 Ambulance (siren)
|
390 |
+
epwDz5WBkvc_80.000_90.000.wav 80.000 90.000 Ambulance (siren)
|
391 |
+
fHaQPHCjyfA_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
|
392 |
+
gw9pYEG2Zb0_20.000_30.000.wav 20.000 30.000 Ambulance (siren)
|
393 |
+
iEX8L_oEbsU_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
|
394 |
+
iM-U56fTTOQ_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
|
395 |
+
iSnWMz4FUAg_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
|
396 |
+
kJuvA2zmrnY_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
|
397 |
+
kSjvt2Z_pBo_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
|
398 |
+
ke35yF1LHs4_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
|
399 |
+
lqGtL8sUo_g_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
|
400 |
+
mAfPu0meA_Y_20.000_30.000.wav 20.000 30.000 Ambulance (siren)
|
401 |
+
mlS9LLiMIG8_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
|
402 |
+
oPR7tUEUptk_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
|
403 |
+
qsHc2X1toLs_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
|
404 |
+
rCQykaL8Hy4_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
|
405 |
+
rhUfN81puDI_0.000_10.000.wav 0.000 10.000 Ambulance (siren)
|
406 |
+
s0iddDFzL9s_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
|
407 |
+
tcKlq7_cOkw_8.000_18.000.wav 8.000 18.000 Ambulance (siren)
|
408 |
+
u3yYpMwG4Us_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
|
409 |
+
vBXPyBiyJG0_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
|
410 |
+
vVqUvv1SSu8_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
|
411 |
+
vYKWnuvq2FI_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
|
412 |
+
ysNK5RVF3Zw_0.000_10.000.wav 0.000 10.000 Ambulance (siren)
|
413 |
+
z4B14tAqJ4w_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
|
414 |
+
zbiJEml563w_20.000_30.000.wav 20.000 30.000 Ambulance (siren)
|
415 |
+
-HxRz4w60-Y_150.000_160.000.wav 150.000 160.000 Fire engine, fire truck (siren)
|
416 |
+
-_dElQcyJnA_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
417 |
+
0K1mroXg8bs_9.000_19.000.wav 9.000 19.000 Fire engine, fire truck (siren)
|
418 |
+
0SvSNVatkv0_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
419 |
+
2-h8MRSRvEg_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
420 |
+
31WGUPOYS5g_22.000_32.000.wav 22.000 32.000 Fire engine, fire truck (siren)
|
421 |
+
3h3_IZWhX0g_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
422 |
+
4APBvMmKubU_10.000_20.000.wav 10.000 20.000 Fire engine, fire truck (siren)
|
423 |
+
5fjy_2ajEkg_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
424 |
+
6rXgD5JlYxY_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
425 |
+
8Aq2DyLbUBA_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
426 |
+
8DaEd5KbnnA_80.000_90.000.wav 80.000 90.000 Fire engine, fire truck (siren)
|
427 |
+
ARIVxBOc0BQ_40.000_50.000.wav 40.000 50.000 Fire engine, fire truck (siren)
|
428 |
+
AwFuGITwrms_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
429 |
+
Bs2KqqI9F_k_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
430 |
+
Cc7-P0py1Mc_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
431 |
+
D4M3YT75ZrQ_90.000_100.000.wav 90.000 100.000 Fire engine, fire truck (siren)
|
432 |
+
DWXQ_cSUW98_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
433 |
+
Daqv2F6SEmQ_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
434 |
+
DpagxUQwXDo_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
435 |
+
FFSI6Bg2M-Q_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
436 |
+
GORjnSWhZeY_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
437 |
+
GbIuxmaiCOk_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
438 |
+
GgV0yYogTPI_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
439 |
+
H6c8ZDrdUaM_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
440 |
+
H9xQQVv3ElI_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
441 |
+
HQQxGJKg1iM_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
442 |
+
IiCh2H3JtsE_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
443 |
+
InrS4Fdndr4_0.000_10.000.wav 0.000 10.000 Fire engine, fire truck (siren)
|
444 |
+
JpLA7HY9r3Y_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
445 |
+
MEUcv-QM0cQ_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
446 |
+
PCl-q7lCT_U_50.000_60.000.wav 50.000 60.000 Fire engine, fire truck (siren)
|
447 |
+
VXI3-DI4xNs_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
448 |
+
Xggsbzzes3M_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
449 |
+
YbiiaDBU-HI_10.000_20.000.wav 10.000 20.000 Fire engine, fire truck (siren)
|
450 |
+
ZeH6Fc7Y900_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
451 |
+
ZlS4vIWQMmE_0.000_10.000.wav 0.000 10.000 Fire engine, fire truck (siren)
|
452 |
+
bIU0X1v4SF0_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
453 |
+
cHm1cYBAXMI_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
454 |
+
fHaQPHCjyfA_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
455 |
+
iM-U56fTTOQ_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
456 |
+
k2a30--j37Q_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
457 |
+
kJuvA2zmrnY_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
458 |
+
kr8ssbrDDMY_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
459 |
+
pvYwIdGrS90_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
460 |
+
qsHc2X1toLs_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
461 |
+
rCQykaL8Hy4_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
462 |
+
rhUfN81puDI_0.000_10.000.wav 0.000 10.000 Fire engine, fire truck (siren)
|
463 |
+
u08iA12iAmM_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
464 |
+
u9aHjYGbl5o_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
465 |
+
uUiZrgUpw2A_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
466 |
+
vBXPyBiyJG0_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
467 |
+
vVqUvv1SSu8_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
468 |
+
vYKWnuvq2FI_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
469 |
+
wD0P-doqkXo_20.000_30.000.wav 20.000 30.000 Fire engine, fire truck (siren)
|
470 |
+
xbr7x2V6mxk_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
471 |
+
ysNK5RVF3Zw_0.000_10.000.wav 0.000 10.000 Fire engine, fire truck (siren)
|
472 |
+
z4B14tAqJ4w_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
473 |
+
zpzJKMG5iGc_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
474 |
+
02Ak1eIyj3M_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
475 |
+
0CJFt950vOk_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
476 |
+
0phl6nlC-n0_10.000_20.000.wav 10.000 20.000 Civil defense siren
|
477 |
+
1jhbNtCWC9w_50.000_60.000.wav 50.000 60.000 Civil defense siren
|
478 |
+
4Ukj2TTJxHM_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
479 |
+
4XAVaSz_P7c_150.000_160.000.wav 150.000 160.000 Civil defense siren
|
480 |
+
69AIBPnJN5E_0.000_10.000.wav 0.000 10.000 Civil defense siren
|
481 |
+
8DaEd5KbnnA_80.000_90.000.wav 80.000 90.000 Civil defense siren
|
482 |
+
8ILgvaJVPCI_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
483 |
+
9MWHXCLAX8I_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
484 |
+
A5y-aZc0CiM_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
485 |
+
AQCZH4OdNSM_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
486 |
+
AVBUh6qeHrQ_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
487 |
+
BhQPDafekdw_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
488 |
+
CJXNdudcJrs_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
489 |
+
CU2MyVM_B48_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
490 |
+
DdZw0XDv0JI_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
491 |
+
DgWHUawAGnI_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
492 |
+
Do9Dffb6vHA_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
493 |
+
GO2zKyMtBV4_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
494 |
+
GeRgy4of730_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
495 |
+
IIypdzgZAaI_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
496 |
+
JpLA7HY9r3Y_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
497 |
+
JqHJ7015aWM_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
498 |
+
K7a1P4RX_5w_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
499 |
+
KrTocA-I550_190.000_200.000.wav 190.000 200.000 Civil defense siren
|
500 |
+
KumYcZVLOVU_350.000_360.000.wav 350.000 360.000 Civil defense siren
|
501 |
+
L60HS_jbZu0_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
502 |
+
MZ1Yh6mRC-E_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
503 |
+
R8XUrRCFkzs_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
504 |
+
SyWbolNFst4_60.000_70.000.wav 60.000 70.000 Civil defense siren
|
505 |
+
TYLZuBBu8ms_0.000_10.000.wav 0.000 10.000 Civil defense siren
|
506 |
+
Tx6eSkU2lKc_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
507 |
+
VcflBZLflSU_130.000_140.000.wav 130.000 140.000 Civil defense siren
|
508 |
+
WXsTHg_DiYA_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
509 |
+
Wz5ffJxCElQ_10.000_20.000.wav 10.000 20.000 Civil defense siren
|
510 |
+
X2MlmcY8UZU_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
511 |
+
XYLheTmlEYI_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
512 |
+
YyxlD_FwZXM_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
513 |
+
adCuLs-4nmI_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
514 |
+
cPjtrTq3F-I_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
515 |
+
eHDm93tI4Ok_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
516 |
+
etppP5Sdo14_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
517 |
+
fRKxUc1gQBw_50.000_60.000.wav 50.000 60.000 Civil defense siren
|
518 |
+
feIue4LHzfM_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
519 |
+
gr-Yen6Sj_Q_0.000_10.000.wav 0.000 10.000 Civil defense siren
|
520 |
+
hl3Kqi9Wi_g_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
521 |
+
iKca2cbowd4_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
522 |
+
kzFyGWdj6MI_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
523 |
+
m3LGopSVju4_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
524 |
+
ne4IMxs-hMk_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
525 |
+
nuu2iNisoQc_6.000_16.000.wav 6.000 16.000 Civil defense siren
|
526 |
+
oYeql9xE19k_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
527 |
+
rGUrM19BnJ8_110.000_120.000.wav 110.000 120.000 Civil defense siren
|
528 |
+
u08iA12iAmM_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
529 |
+
uCRAnDBXxgI_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
530 |
+
vQG4HZR2KSk_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
531 |
+
vjsG5b2yNzc_190.000_200.000.wav 190.000 200.000 Civil defense siren
|
532 |
+
yO7guxGY-_k_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
533 |
+
-9GUUhB3QV0_30.000_40.000.wav 30.000 40.000 Police car (siren)
|
534 |
+
-HxRz4w60-Y_150.000_160.000.wav 150.000 160.000 Police car (siren)
|
535 |
+
-UBVqmhbT50_30.000_40.000.wav 30.000 40.000 Police car (siren)
|
536 |
+
-_dElQcyJnA_30.000_40.000.wav 30.000 40.000 Police car (siren)
|
537 |
+
0N0C0Wbe6AI_30.000_40.000.wav 30.000 40.000 Police car (siren)
|
538 |
+
0SvSNVatkv0_30.000_40.000.wav 30.000 40.000 Police car (siren)
|
539 |
+
145N68nh4m0_120.000_130.000.wav 120.000 130.000 Police car (siren)
|
540 |
+
2-h8MRSRvEg_30.000_40.000.wav 30.000 40.000 Police car (siren)
|
541 |
+
31WGUPOYS5g_22.000_32.000.wav 22.000 32.000 Police car (siren)
|
542 |
+
5RgHBmX2HLw_30.000_40.000.wav 30.000 40.000 Police car (siren)
|
543 |
+
6rXgD5JlYxY_30.000_40.000.wav 30.000 40.000 Police car (siren)
|
544 |
+
8Aq2DyLbUBA_30.000_40.000.wav 30.000 40.000 Police car (siren)
|
545 |
+
8DaEd5KbnnA_80.000_90.000.wav 80.000 90.000 Police car (siren)
|
546 |
+
8E7okHnCcTA_30.000_40.000.wav 30.000 40.000 Police car (siren)
|
547 |
+
9CRb-PToaAM_30.000_40.000.wav 30.000 40.000 Police car (siren)
|
548 |
+
9OFUd38sBNM_0.000_8.000.wav 0.000 8.000 Police car (siren)
|
549 |
+
AQCZH4OdNSM_30.000_40.000.wav 30.000 40.000 Police car (siren)
|
550 |
+
AwFuGITwrms_30.000_40.000.wav 30.000 40.000 Police car (siren)
|
551 |
+
CDrpqsGqfPo_10.000_20.000.wav 10.000 20.000 Police car (siren)
|
552 |
+
DK_6C29B2zs_14.000_24.000.wav 14.000 24.000 Police car (siren)
|
553 |
+
GORjnSWhZeY_30.000_40.000.wav 30.000 40.000 Police car (siren)
|
554 |
+
GgV0yYogTPI_30.000_40.000.wav 30.000 40.000 Police car (siren)
|
555 |
+
H6c8ZDrdUaM_30.000_40.000.wav 30.000 40.000 Police car (siren)
|
556 |
+
H7lOMlND9dc_30.000_40.000.wav 30.000 40.000 Police car (siren)
|
557 |
+
H9xQQVv3ElI_30.000_40.000.wav 30.000 40.000 Police car (siren)
|
558 |
+
IiCh2H3JtsE_30.000_40.000.wav 30.000 40.000 Police car (siren)
|
559 |
+
InrS4Fdndr4_0.000_10.000.wav 0.000 10.000 Police car (siren)
|
560 |
+
JgDuU9kpHpM_30.000_40.000.wav 30.000 40.000 Police car (siren)
|
561 |
+
JpLA7HY9r3Y_30.000_40.000.wav 30.000 40.000 Police car (siren)
|
562 |
+
LNQ7fzfdLiY_30.000_40.000.wav 30.000 40.000 Police car (siren)
|
563 |
+
PCl-q7lCT_U_50.000_60.000.wav 50.000 60.000 Police car (siren)
|
564 |
+
QWVub6-0jX4_30.000_40.000.wav 30.000 40.000 Police car (siren)
|
565 |
+
Wak5QxsS-QU_30.000_40.000.wav 30.000 40.000 Police car (siren)
|
566 |
+
YbiiaDBU-HI_10.000_20.000.wav 10.000 20.000 Police car (siren)
|
567 |
+
Z34SD-OEpJI_10.000_20.000.wav 10.000 20.000 Police car (siren)
|
568 |
+
ZeH6Fc7Y900_30.000_40.000.wav 30.000 40.000 Police car (siren)
|
569 |
+
ZlS4vIWQMmE_0.000_10.000.wav 0.000 10.000 Police car (siren)
|
570 |
+
ZyuX_gMFiss_30.000_40.000.wav 30.000 40.000 Police car (siren)
|
571 |
+
bIU0X1v4SF0_30.000_40.000.wav 30.000 40.000 Police car (siren)
|
572 |
+
eIMjkADTWzA_60.000_70.000.wav 60.000 70.000 Police car (siren)
|
573 |
+
epwDz5WBkvc_80.000_90.000.wav 80.000 90.000 Police car (siren)
|
574 |
+
fHaQPHCjyfA_30.000_40.000.wav 30.000 40.000 Police car (siren)
|
575 |
+
fNcrlqPrAqM_30.000_40.000.wav 30.000 40.000 Police car (siren)
|
576 |
+
g_DBLppDZAs_30.000_40.000.wav 30.000 40.000 Police car (siren)
|
577 |
+
gw9pYEG2Zb0_20.000_30.000.wav 20.000 30.000 Police car (siren)
|
578 |
+
iEX8L_oEbsU_30.000_40.000.wav 30.000 40.000 Police car (siren)
|
579 |
+
iM-U56fTTOQ_30.000_40.000.wav 30.000 40.000 Police car (siren)
|
580 |
+
kJuvA2zmrnY_30.000_40.000.wav 30.000 40.000 Police car (siren)
|
581 |
+
kSjvt2Z_pBo_30.000_40.000.wav 30.000 40.000 Police car (siren)
|
582 |
+
lqGtL8sUo_g_30.000_40.000.wav 30.000 40.000 Police car (siren)
|
583 |
+
mAfPu0meA_Y_20.000_30.000.wav 20.000 30.000 Police car (siren)
|
584 |
+
mlS9LLiMIG8_30.000_40.000.wav 30.000 40.000 Police car (siren)
|
585 |
+
pzup58Eyhuo_30.000_40.000.wav 30.000 40.000 Police car (siren)
|
586 |
+
rCQykaL8Hy4_30.000_40.000.wav 30.000 40.000 Police car (siren)
|
587 |
+
rhUfN81puDI_0.000_10.000.wav 0.000 10.000 Police car (siren)
|
588 |
+
u08iA12iAmM_30.000_40.000.wav 30.000 40.000 Police car (siren)
|
589 |
+
u3yYpMwG4Us_30.000_40.000.wav 30.000 40.000 Police car (siren)
|
590 |
+
u9aHjYGbl5o_30.000_40.000.wav 30.000 40.000 Police car (siren)
|
591 |
+
uUiZrgUpw2A_30.000_40.000.wav 30.000 40.000 Police car (siren)
|
592 |
+
vYKWnuvq2FI_30.000_40.000.wav 30.000 40.000 Police car (siren)
|
593 |
+
xbr7x2V6mxk_30.000_40.000.wav 30.000 40.000 Police car (siren)
|
594 |
+
z4B14tAqJ4w_30.000_40.000.wav 30.000 40.000 Police car (siren)
|
595 |
+
-FKrYTj_eCU_0.000_10.000.wav 0.000 10.000 Screaming
|
596 |
+
0G50t4FlbIA_60.000_70.000.wav 60.000 70.000 Screaming
|
597 |
+
1LTxZ2aNytc_30.000_40.000.wav 30.000 40.000 Screaming
|
598 |
+
2FEhG1UXb_E_370.000_380.000.wav 370.000 380.000 Screaming
|
599 |
+
45vBbOhzS6g_50.000_60.000.wav 50.000 60.000 Screaming
|
600 |
+
4PYTtp78Ig0_60.000_70.000.wav 60.000 70.000 Screaming
|
601 |
+
5QNq0IEPICQ_30.000_40.000.wav 30.000 40.000 Screaming
|
602 |
+
5YcIJuYQECc_0.000_6.000.wav 0.000 6.000 Screaming
|
603 |
+
5kQF4r03yRI_0.000_6.000.wav 0.000 6.000 Screaming
|
604 |
+
7ARVgI_wx5Y_30.000_40.000.wav 30.000 40.000 Screaming
|
605 |
+
AIFvFuZPr68_30.000_40.000.wav 30.000 40.000 Screaming
|
606 |
+
Aw43FUCkIb8_20.000_30.000.wav 20.000 30.000 Screaming
|
607 |
+
AxM2BofYfPY_30.000_40.000.wav 30.000 40.000 Screaming
|
608 |
+
BFqHyCoypfM_16.000_26.000.wav 16.000 26.000 Screaming
|
609 |
+
Bk_xS_fKCpk_30.000_40.000.wav 30.000 40.000 Screaming
|
610 |
+
C4YMjmJ7tt4_90.000_100.000.wav 90.000 100.000 Screaming
|
611 |
+
CMWoAvgD0A0_9.000_19.000.wav 9.000 19.000 Screaming
|
612 |
+
DZfYFhywhRs_30.000_40.000.wav 30.000 40.000 Screaming
|
613 |
+
ElJFYwRtrH4_30.000_40.000.wav 30.000 40.000 Screaming
|
614 |
+
FcUVtXJMkJs_30.000_40.000.wav 30.000 40.000 Screaming
|
615 |
+
G--718JDmAQ_0.000_10.000.wav 0.000 10.000 Screaming
|
616 |
+
GPJ1uQwmNHk_30.000_40.000.wav 30.000 40.000 Screaming
|
617 |
+
H3vSRzkG82U_30.000_40.000.wav 30.000 40.000 Screaming
|
618 |
+
HS28EUWt8dE_110.000_120.000.wav 110.000 120.000 Screaming
|
619 |
+
KkGTB8ESMCM_0.000_10.000.wav 0.000 10.000 Screaming
|
620 |
+
MQ0YasvMcuQ_1.000_11.000.wav 1.000 11.000 Screaming
|
621 |
+
Msl9dI5yweA_90.000_100.000.wav 90.000 100.000 Screaming
|
622 |
+
Ntn6YvZM3kA_0.000_10.000.wav 0.000 10.000 Screaming
|
623 |
+
NwTHlpXdk4M_30.000_40.000.wav 30.000 40.000 Screaming
|
624 |
+
OHjfSfqa804_0.000_10.000.wav 0.000 10.000 Screaming
|
625 |
+
OzWJuqG2F3Y_30.000_40.000.wav 30.000 40.000 Screaming
|
626 |
+
QDW_uCMnMMU_0.000_8.000.wav 0.000 8.000 Screaming
|
627 |
+
SxI3Lnzzmkw_110.000_120.000.wav 110.000 120.000 Screaming
|
628 |
+
TVvbfuGu9eM_70.000_80.000.wav 70.000 80.000 Screaming
|
629 |
+
YCk9F0Uq3BE_70.000_80.000.wav 70.000 80.000 Screaming
|
630 |
+
Z54pSnNw2iM_30.000_40.000.wav 30.000 40.000 Screaming
|
631 |
+
a59ivTlYoNk_310.000_320.000.wav 310.000 320.000 Screaming
|
632 |
+
auC_LgwFF8g_30.000_40.000.wav 30.000 40.000 Screaming
|
633 |
+
bi8R9JbF2cc_80.000_90.000.wav 80.000 90.000 Screaming
|
634 |
+
cdbYsoEasio_70.000_80.000.wav 70.000 80.000 Screaming
|
635 |
+
dfsvT5xImNg_80.000_90.000.wav 80.000 90.000 Screaming
|
636 |
+
e2AaF6siR1A_540.000_550.000.wav 540.000 550.000 Screaming
|
637 |
+
gB1ytjgpcW4_190.000_200.000.wav 190.000 200.000 Screaming
|
638 |
+
gE-0JxMtUh0_20.000_30.000.wav 20.000 30.000 Screaming
|
639 |
+
hWiGgsuGnzs_100.000_110.000.wav 100.000 110.000 Screaming
|
640 |
+
l-iIfi3SNpw_120.000_130.000.wav 120.000 130.000 Screaming
|
641 |
+
mT-f0lGk-JM_30.000_40.000.wav 30.000 40.000 Screaming
|
642 |
+
nApE_Biu13k_10.000_20.000.wav 10.000 20.000 Screaming
|
643 |
+
nRMmafPUAEU_80.000_90.000.wav 80.000 90.000 Screaming
|
644 |
+
nYAbLuyqPis_30.000_40.000.wav 30.000 40.000 Screaming
|
645 |
+
nlYlNF30bVg_30.000_40.000.wav 30.000 40.000 Screaming
|
646 |
+
sUp-UXzgmrA_0.000_10.000.wav 0.000 10.000 Screaming
|
647 |
+
syIwNMo2TUA_0.000_7.000.wav 0.000 7.000 Screaming
|
648 |
+
uTu0a1wd9-M_21.000_31.000.wav 21.000 31.000 Screaming
|
649 |
+
xVG7dfH5DL0_320.000_330.000.wav 320.000 330.000 Screaming
|
650 |
+
xvAQ44hx3_k_220.000_230.000.wav 220.000 230.000 Screaming
|
651 |
+
yNTkb2zgA_M_70.000_80.000.wav 70.000 80.000 Screaming
|
652 |
+
zCdOEvduBTo_30.000_40.000.wav 30.000 40.000 Screaming
|
653 |
+
zMICvbCJ6zc_550.000_560.000.wav 550.000 560.000 Screaming
|
654 |
+
-0RWZT-miFs_420.000_430.000.wav 420.000 430.000 Car
|
655 |
+
-1pRmoJIGQc_11.000_21.000.wav 11.000 21.000 Car
|
656 |
+
-7eDqv-6AKQ_30.000_40.000.wav 30.000 40.000 Car
|
657 |
+
-CZ1LIc8aos_20.000_30.000.wav 20.000 30.000 Car
|
658 |
+
-HWygXWSNRA_30.000_40.000.wav 30.000 40.000 Car
|
659 |
+
-PVEno65928_30.000_40.000.wav 30.000 40.000 Car
|
660 |
+
-WgJ-M292Yc_30.000_40.000.wav 30.000 40.000 Car
|
661 |
+
0O-gZoirpRA_30.000_40.000.wav 30.000 40.000 Car
|
662 |
+
0QwxnzHf_0E_30.000_40.000.wav 30.000 40.000 Car
|
663 |
+
0bg1nzEVdgY_0.000_10.000.wav 0.000 10.000 Car
|
664 |
+
0lpPdWvg7Eo_0.000_10.000.wav 0.000 10.000 Car
|
665 |
+
11Pn3yJifSQ_4.000_14.000.wav 4.000 14.000 Car
|
666 |
+
1BgqrhbyRFw_30.000_40.000.wav 30.000 40.000 Car
|
667 |
+
1F9zCsJyw6k_430.000_440.000.wav 430.000 440.000 Car
|
668 |
+
1HayoASR-54_80.000_90.000.wav 80.000 90.000 Car
|
669 |
+
1P5FFxXLSpY_30.000_40.000.wav 30.000 40.000 Car
|
670 |
+
1hIg-Lsvc7Q_30.000_40.000.wav 30.000 40.000 Car
|
671 |
+
27m49pmJ8Og_370.000_380.000.wav 370.000 380.000 Car
|
672 |
+
2E_N8lnoVKE_30.000_40.000.wav 30.000 40.000 Car
|
673 |
+
2Fdau5KTEls_30.000_40.000.wav 30.000 40.000 Car
|
674 |
+
2STASUlGAjs_30.000_40.000.wav 30.000 40.000 Car
|
675 |
+
2fi0m8ei_B4_30.000_40.000.wav 30.000 40.000 Car
|
676 |
+
2uMXfAIMeN0_180.000_190.000.wav 180.000 190.000 Car
|
677 |
+
32V2zsK7GME_110.000_120.000.wav 110.000 120.000 Car
|
678 |
+
3YChVhqW42E_130.000_140.000.wav 130.000 140.000 Car
|
679 |
+
3_OLj6XChvM_30.000_40.000.wav 30.000 40.000 Car
|
680 |
+
3hLxPQpmfQo_30.000_40.000.wav 30.000 40.000 Car
|
681 |
+
3mDPQ_CPopw_30.000_40.000.wav 30.000 40.000 Car
|
682 |
+
3mor5mPSYoU_7.000_17.000.wav 7.000 17.000 Car
|
683 |
+
3xh2kScw64U_30.000_40.000.wav 30.000 40.000 Car
|
684 |
+
40s88hEcn5I_170.000_180.000.wav 170.000 180.000 Car
|
685 |
+
42P93B_GzGA_30.000_40.000.wav 30.000 40.000 Car
|
686 |
+
4KZWpXlcpM4_60.000_70.000.wav 60.000 70.000 Car
|
687 |
+
4TshFWSsrn8_290.000_300.000.wav 290.000 300.000 Car
|
688 |
+
4WRgvRI06zc_30.000_40.000.wav 30.000 40.000 Car
|
689 |
+
4aJfQpHt9lY_160.000_170.000.wav 160.000 170.000 Car
|
690 |
+
4hd2CLrzCZs_30.000_40.000.wav 30.000 40.000 Car
|
691 |
+
4zCHl7pRsNY_30.000_40.000.wav 30.000 40.000 Car
|
692 |
+
5RgHBmX2HLw_30.000_40.000.wav 30.000 40.000 Car
|
693 |
+
5oirFKi6Sfo_190.000_200.000.wav 190.000 200.000 Car
|
694 |
+
5vmxFp1r1ZM_30.000_40.000.wav 30.000 40.000 Car
|
695 |
+
5z1rE_l-0Ow_0.000_8.000.wav 0.000 8.000 Car
|
696 |
+
620GoTv5Ic8_30.000_40.000.wav 30.000 40.000 Car
|
697 |
+
6BitLl5Bnxw_30.000_40.000.wav 30.000 40.000 Car
|
698 |
+
6FVA4hqp1Ro_30.000_40.000.wav 30.000 40.000 Car
|
699 |
+
6U942AYlcXA_30.000_40.000.wav 30.000 40.000 Car
|
700 |
+
6b2ZMMrLTz8_5.000_15.000.wav 5.000 15.000 Car
|
701 |
+
6ibh38autyA_30.000_40.000.wav 30.000 40.000 Car
|
702 |
+
6kuESYFcEqw_30.000_40.000.wav 30.000 40.000 Car
|
703 |
+
73cuZZq-J3w_20.000_30.000.wav 20.000 30.000 Car
|
704 |
+
764IcMEMVUk_90.000_100.000.wav 90.000 100.000 Car
|
705 |
+
7NH1WJlSiYI_30.000_40.000.wav 30.000 40.000 Car
|
706 |
+
7lJu9wEsErY_220.000_230.000.wav 220.000 230.000 Car
|
707 |
+
8CqqK9CzuXM_30.000_40.000.wav 30.000 40.000 Car
|
708 |
+
8SYLYWR47EE_30.000_40.000.wav 30.000 40.000 Car
|
709 |
+
8Wk-ZmlsUqY_28.000_38.000.wav 28.000 38.000 Car
|
710 |
+
8q8JrJNAa-Q_30.000_40.000.wav 30.000 40.000 Car
|
711 |
+
8rMlNbKlp_s_0.000_10.000.wav 0.000 10.000 Car
|
712 |
+
8sGJFPr2Nmc_30.000_40.000.wav 30.000 40.000 Car
|
713 |
+
8yRROnG0-lA_30.000_40.000.wav 30.000 40.000 Car
|
714 |
+
9Ti98L4PRCo_17.000_27.000.wav 17.000 27.000 Car
|
715 |
+
9fzAWj5YJ9c_30.000_40.000.wav 30.000 40.000 Car
|
716 |
+
9rq8h4oMJ98_30.000_40.000.wav 30.000 40.000 Car
|
717 |
+
9ye2Fn62xDc_60.000_70.000.wav 60.000 70.000 Car
|
718 |
+
ACGuC6SH4V4_150.000_160.000.wav 150.000 160.000 Car
|
719 |
+
AFz5TIs_Gug_30.000_40.000.wav 30.000 40.000 Car
|
720 |
+
AedlWfHafgw_21.000_31.000.wav 21.000 31.000 Car
|
721 |
+
AlsDSDTiaWI_30.000_40.000.wav 30.000 40.000 Car
|
722 |
+
B3SkK0wuOhY_130.000_140.000.wav 130.000 140.000 Car
|
723 |
+
B9n4a5ciI48_16.000_26.000.wav 16.000 26.000 Car
|
724 |
+
BAekfGvUtFM_30.000_40.000.wav 30.000 40.000 Car
|
725 |
+
BNLOvQbrPdc_290.000_300.000.wav 290.000 300.000 Car
|
726 |
+
BS1fqEDAvh0_330.000_340.000.wav 330.000 340.000 Car
|
727 |
+
Bqx_SZgCzZw_10.000_20.000.wav 10.000 20.000 Car
|
728 |
+
CZB6WXDuM1g_30.000_40.000.wav 30.000 40.000 Car
|
729 |
+
C_pnsyNXphA_30.000_40.000.wav 30.000 40.000 Car
|
730 |
+
Ck5ZjBf1nLM_30.000_40.000.wav 30.000 40.000 Car
|
731 |
+
CqNyeZeHb8Y_30.000_40.000.wav 30.000 40.000 Car
|
732 |
+
Cs1d7Ibk8CA_220.000_230.000.wav 220.000 230.000 Car
|
733 |
+
CuS-ok0xG9g_0.000_10.000.wav 0.000 10.000 Car
|
734 |
+
CuaBHNKycvI_30.000_40.000.wav 30.000 40.000 Car
|
735 |
+
Cwur_jvxMzY_360.000_370.000.wav 360.000 370.000 Car
|
736 |
+
DEGSyVygE98_110.000_120.000.wav 110.000 120.000 Car
|
737 |
+
DLxTYAUifjU_30.000_40.000.wav 30.000 40.000 Car
|
738 |
+
DkKpnvJk9u0_30.000_40.000.wav 30.000 40.000 Car
|
739 |
+
DkVfro9iq80_30.000_40.000.wav 30.000 40.000 Car
|
740 |
+
Dw1q9rBv7oU_30.000_40.000.wav 30.000 40.000 Car
|
741 |
+
E8NgxTz1d90_30.000_40.000.wav 30.000 40.000 Car
|
742 |
+
ExqedxdXuBc_70.000_80.000.wav 70.000 80.000 Car
|
743 |
+
FCxEMSNSEuI_160.000_170.000.wav 160.000 170.000 Car
|
744 |
+
FEoMTMxzn3U_30.000_40.000.wav 30.000 40.000 Car
|
745 |
+
FFSWmryaZ60_30.000_40.000.wav 30.000 40.000 Car
|
746 |
+
FYk2paHPSdg_30.000_40.000.wav 30.000 40.000 Car
|
747 |
+
Fo_FDiZhzDo_30.000_40.000.wav 30.000 40.000 Car
|
748 |
+
GteozUDpJRc_30.000_40.000.wav 30.000 40.000 Car
|
749 |
+
GwBS2NzjAvA_30.000_40.000.wav 30.000 40.000 Car
|
750 |
+
H8d1mZOqb1c_110.000_120.000.wav 110.000 120.000 Car
|
751 |
+
HFF_PpqLQ9w_30.000_40.000.wav 30.000 40.000 Car
|
752 |
+
HHlb-h2Pc7o_30.000_40.000.wav 30.000 40.000 Car
|
753 |
+
Hu8lxbHYaqg_40.000_50.000.wav 40.000 50.000 Car
|
754 |
+
I-HlrcP6Qg4_30.000_40.000.wav 30.000 40.000 Car
|
755 |
+
I7vs2H-Htt8_480.000_490.000.wav 480.000 490.000 Car
|
756 |
+
IblhEF_MiH8_400.000_410.000.wav 400.000 410.000 Car
|
757 |
+
JgXnbgS_XBk_480.000_490.000.wav 480.000 490.000 Car
|
758 |
+
Ju7Kg_H2iZQ_30.000_40.000.wav 30.000 40.000 Car
|
759 |
+
KiCB6pP6EEo_100.000_110.000.wav 100.000 110.000 Car
|
760 |
+
Kwpn3utYEHM_30.000_40.000.wav 30.000 40.000 Car
|
761 |
+
Ky9Kw-0XwAs_30.000_40.000.wav 30.000 40.000 Car
|
762 |
+
KzKDk-UgS54_30.000_40.000.wav 30.000 40.000 Car
|
763 |
+
L1qC8DicAZE_70.000_80.000.wav 70.000 80.000 Car
|
764 |
+
L4N0LOYZrFo_30.000_40.000.wav 30.000 40.000 Car
|
765 |
+
L535vIV3ED4_40.000_50.000.wav 40.000 50.000 Car
|
766 |
+
L9YtOeck3A0_0.000_10.000.wav 0.000 10.000 Car
|
767 |
+
LEtkHiZZugk_30.000_40.000.wav 30.000 40.000 Car
|
768 |
+
LLkNFGrrgUo_30.000_40.000.wav 30.000 40.000 Car
|
769 |
+
LhRNnXaSsCk_30.000_40.000.wav 30.000 40.000 Car
|
770 |
+
M7NvD1WJQ7o_70.000_80.000.wav 70.000 80.000 Car
|
771 |
+
M8BFtmQRHq4_200.000_210.000.wav 200.000 210.000 Car
|
772 |
+
Mxn2FKuNwiI_20.000_30.000.wav 20.000 30.000 Car
|
773 |
+
NMqSBlEq14Q_30.000_40.000.wav 30.000 40.000 Car
|
774 |
+
NoPbk9fy6uw_10.000_20.000.wav 10.000 20.000 Car
|
775 |
+
O36torHptH4_30.000_40.000.wav 30.000 40.000 Car
|
776 |
+
OBwh-KGukE8_30.000_40.000.wav 30.000 40.000 Car
|
777 |
+
Oa2Os8eOUjs_30.000_40.000.wav 30.000 40.000 Car
|
778 |
+
PNaLTW50fxM_60.000_70.000.wav 60.000 70.000 Car
|
779 |
+
PfXdcsW8dJI_540.000_550.000.wav 540.000 550.000 Car
|
780 |
+
QAWuHvVCI6g_30.000_40.000.wav 30.000 40.000 Car
|
781 |
+
QBMDnMRwQCc_70.000_80.000.wav 70.000 80.000 Car
|
782 |
+
QzrS-S7OerE_370.000_380.000.wav 370.000 380.000 Car
|
783 |
+
R0BtkTm_CPI_30.000_40.000.wav 30.000 40.000 Car
|
784 |
+
SEHxfje9Eio_30.000_40.000.wav 30.000 40.000 Car
|
785 |
+
Sb3V17F8xU8_360.000_370.000.wav 360.000 370.000 Car
|
786 |
+
SkbFczIabRY_30.000_40.000.wav 30.000 40.000 Car
|
787 |
+
SqWkV-UQ6CI_30.000_40.000.wav 30.000 40.000 Car
|
788 |
+
TWDytzefXXc_10.000_20.000.wav 10.000 20.000 Car
|
789 |
+
Tv67JhZDAYs_30.000_40.000.wav 30.000 40.000 Car
|
790 |
+
VTwVF3xRSWg_12.000_22.000.wav 12.000 22.000 Car
|
791 |
+
VulCKZgWspc_570.000_580.000.wav 570.000 580.000 Car
|
792 |
+
Vx6mttDHWfo_30.000_40.000.wav 30.000 40.000 Car
|
793 |
+
W11cJ9HZNaY_30.000_40.000.wav 30.000 40.000 Car
|
794 |
+
WLXQgcx8qTI_30.000_40.000.wav 30.000 40.000 Car
|
795 |
+
WMbdMQ7rdFs_30.000_40.000.wav 30.000 40.000 Car
|
796 |
+
WZoQD6cInx8_360.000_370.000.wav 360.000 370.000 Car
|
797 |
+
WffmaOr2p8I_30.000_40.000.wav 30.000 40.000 Car
|
798 |
+
WoynilrteLU_30.000_40.000.wav 30.000 40.000 Car
|
799 |
+
WxrKq0aI0iM_130.000_140.000.wav 130.000 140.000 Car
|
800 |
+
X60eVxecY3I_30.000_40.000.wav 30.000 40.000 Car
|
801 |
+
X8fEzx-fA0U_80.000_90.000.wav 80.000 90.000 Car
|
802 |
+
XVxlZqwWcBI_10.000_20.000.wav 10.000 20.000 Car
|
803 |
+
Xnd8ERrynEo_120.000_130.000.wav 120.000 130.000 Car
|
804 |
+
XqXLI7bDb-I_0.000_7.000.wav 0.000 7.000 Car
|
805 |
+
XyCjByHuDIk_260.000_270.000.wav 260.000 270.000 Car
|
806 |
+
XzE7mp3pVik_0.000_10.000.wav 0.000 10.000 Car
|
807 |
+
Y5e8BW513ww_20.000_30.000.wav 20.000 30.000 Car
|
808 |
+
YJdBwuIn4Ec_30.000_40.000.wav 30.000 40.000 Car
|
809 |
+
YTFJUFWcRns_30.000_40.000.wav 30.000 40.000 Car
|
810 |
+
YY9aConw2QE_0.000_10.000.wav 0.000 10.000 Car
|
811 |
+
Yc_WuISxfLI_30.000_40.000.wav 30.000 40.000 Car
|
812 |
+
Ys_rO2Ieg1U_30.000_40.000.wav 30.000 40.000 Car
|
813 |
+
Z34SD-OEpJI_10.000_20.000.wav 10.000 20.000 Car
|
814 |
+
Z8cigemT5_g_210.000_220.000.wav 210.000 220.000 Car
|
815 |
+
ZJW7ymsioQc_16.000_26.000.wav 16.000 26.000 Car
|
816 |
+
ZY6A9ZDkudg_130.000_140.000.wav 130.000 140.000 Car
|
817 |
+
_Mw9lKigni4_30.000_40.000.wav 30.000 40.000 Car
|
818 |
+
_ZiJA6phEq8_30.000_40.000.wav 30.000 40.000 Car
|
819 |
+
_yU0-fmspFY_210.000_220.000.wav 210.000 220.000 Car
|
820 |
+
a5vTn5286-A_80.000_90.000.wav 80.000 90.000 Car
|
821 |
+
aCX6vJhHO2c_30.000_40.000.wav 30.000 40.000 Car
|
822 |
+
aHEAK0iWqKk_180.000_190.000.wav 180.000 190.000 Car
|
823 |
+
aOVPHKqKjyQ_90.000_100.000.wav 90.000 100.000 Car
|
824 |
+
aUq4glO5ryE_30.000_40.000.wav 30.000 40.000 Car
|
825 |
+
aW3DY8XDrmw_22.000_32.000.wav 22.000 32.000 Car
|
826 |
+
aa4uhPvKviY_30.000_40.000.wav 30.000 40.000 Car
|
827 |
+
akgqVmFFDiY_30.000_40.000.wav 30.000 40.000 Car
|
828 |
+
buOEFwXhoe0_310.000_320.000.wav 310.000 320.000 Car
|
829 |
+
cHCIoXF7moA_30.000_40.000.wav 30.000 40.000 Car
|
830 |
+
cW859JAzVZ0_30.000_40.000.wav 30.000 40.000 Car
|
831 |
+
cbYZQRz09bc_390.000_400.000.wav 390.000 400.000 Car
|
832 |
+
d-do1XZ8f_E_30.000_40.000.wav 30.000 40.000 Car
|
833 |
+
d3gMwtMK6Gs_30.000_40.000.wav 30.000 40.000 Car
|
834 |
+
d6AioJ8CkTc_30.000_40.000.wav 30.000 40.000 Car
|
835 |
+
dAud19zNZyw_190.000_200.000.wav 190.000 200.000 Car
|
836 |
+
dC1TVxwiitc_30.000_40.000.wav 30.000 40.000 Car
|
837 |
+
dFqOBLxhEl8_20.000_30.000.wav 20.000 30.000 Car
|
838 |
+
dSfcznv4KLo_30.000_40.000.wav 30.000 40.000 Car
|
839 |
+
dThSTe35jb0_50.000_60.000.wav 50.000 60.000 Car
|
840 |
+
dfwr8wgZU8M_40.000_50.000.wav 40.000 50.000 Car
|
841 |
+
dmJH84FnQa8_30.000_40.000.wav 30.000 40.000 Car
|
842 |
+
e9xPBfEJni8_230.000_240.000.wav 230.000 240.000 Car
|
843 |
+
eAl9WwRaWUE_30.000_40.000.wav 30.000 40.000 Car
|
844 |
+
eAt6si6k65c_30.000_40.000.wav 30.000 40.000 Car
|
845 |
+
eHiqCLHmoxI_0.000_8.000.wav 0.000 8.000 Car
|
846 |
+
eV5JX81GzqA_150.000_160.000.wav 150.000 160.000 Car
|
847 |
+
er1vQ-nse_g_30.000_40.000.wav 30.000 40.000 Car
|
848 |
+
eyFPHlybqDg_30.000_40.000.wav 30.000 40.000 Car
|
849 |
+
f70nsY7ThBA_220.000_230.000.wav 220.000 230.000 Car
|
850 |
+
fJLCT3xDGxA_30.000_40.000.wav 30.000 40.000 Car
|
851 |
+
fZMPDCNyQxE_30.000_40.000.wav 30.000 40.000 Car
|
852 |
+
f__6chtFRM0_30.000_40.000.wav 30.000 40.000 Car
|
853 |
+
fdDTuo_COG8_90.000_100.000.wav 90.000 100.000 Car
|
854 |
+
gFJjYWXeBn0_30.000_40.000.wav 30.000 40.000 Car
|
855 |
+
g_DBLppDZAs_30.000_40.000.wav 30.000 40.000 Car
|
856 |
+
gaFQgJLQHtU_90.000_100.000.wav 90.000 100.000 Car
|
857 |
+
gc6VlixMHXE_30.000_40.000.wav 30.000 40.000 Car
|
858 |
+
hN1ykzC8kZM_30.000_40.000.wav 30.000 40.000 Car
|
859 |
+
hQ_yyPI46FI_11.000_21.000.wav 11.000 21.000 Car
|
860 |
+
haiMRJEH-Aw_0.000_9.000.wav 0.000 9.000 Car
|
861 |
+
hsC_sT0A4XM_30.000_40.000.wav 30.000 40.000 Car
|
862 |
+
ihQDd1CqFBw_70.000_80.000.wav 70.000 80.000 Car
|
863 |
+
ii87iO6JboA_10.000_20.000.wav 10.000 20.000 Car
|
864 |
+
j2R1zurR39E_30.000_40.000.wav 30.000 40.000 Car
|
865 |
+
j42ETHcp044_0.000_10.000.wav 0.000 10.000 Car
|
866 |
+
j7OEpDiK3IA_30.000_40.000.wav 30.000 40.000 Car
|
867 |
+
jCeUZwd8b2w_0.000_10.000.wav 0.000 10.000 Car
|
868 |
+
jZxusrD28rM_30.000_40.000.wav 30.000 40.000 Car
|
869 |
+
kdDgTDfo9HY_100.000_110.000.wav 100.000 110.000 Car
|
870 |
+
l6_h_YHuTbY_30.000_40.000.wav 30.000 40.000 Car
|
871 |
+
lRrv5m9Xu4k_30.000_40.000.wav 30.000 40.000 Car
|
872 |
+
lb1awXgoyQE_0.000_10.000.wav 0.000 10.000 Car
|
873 |
+
llZBUsAwRWc_30.000_40.000.wav 30.000 40.000 Car
|
874 |
+
lu5teS1j1RQ_0.000_10.000.wav 0.000 10.000 Car
|
875 |
+
mCmjh_EJtb4_30.000_40.000.wav 30.000 40.000 Car
|
876 |
+
nFqf1vflJaI_350.000_360.000.wav 350.000 360.000 Car
|
877 |
+
njodYtK0Hqg_30.000_40.000.wav 30.000 40.000 Car
|
878 |
+
noymXcxyxis_30.000_40.000.wav 30.000 40.000 Car
|
879 |
+
o2CmtHNUrXg_30.000_40.000.wav 30.000 40.000 Car
|
880 |
+
oPJVdi0cqNE_30.000_40.000.wav 30.000 40.000 Car
|
881 |
+
oxJYMzEmtk4_10.000_20.000.wav 10.000 20.000 Car
|
882 |
+
pPnLErF3GOY_30.000_40.000.wav 30.000 40.000 Car
|
883 |
+
pXX6cK4xtiY_11.000_21.000.wav 11.000 21.000 Car
|
884 |
+
qC5M7BAsKOA_0.000_10.000.wav 0.000 10.000 Car
|
885 |
+
qg4WxBm8h_w_510.000_520.000.wav 510.000 520.000 Car
|
886 |
+
qxLdv8u_Ujw_0.000_5.000.wav 0.000 5.000 Car
|
887 |
+
rgeu0Gtf3Es_40.000_50.000.wav 40.000 50.000 Car
|
888 |
+
s3-i5eUpe6c_30.000_40.000.wav 30.000 40.000 Car
|
889 |
+
s5s3aR8Z7I8_350.000_360.000.wav 350.000 360.000 Car
|
890 |
+
syCQldBsAtg_30.000_40.000.wav 30.000 40.000 Car
|
891 |
+
tAfucDIyRiM_30.000_40.000.wav 30.000 40.000 Car
|
892 |
+
teoER4j9H14_290.000_300.000.wav 290.000 300.000 Car
|
893 |
+
uFSkczD2i14_30.000_40.000.wav 30.000 40.000 Car
|
894 |
+
uUyB4q7jgn4_30.000_40.000.wav 30.000 40.000 Car
|
895 |
+
uYqlVTlSgbM_40.000_50.000.wav 40.000 50.000 Car
|
896 |
+
v8Kry1CbTkM_310.000_320.000.wav 310.000 320.000 Car
|
897 |
+
vF2zXcbADUk_20.000_30.000.wav 20.000 30.000 Car
|
898 |
+
vHlqKDR7ggA_30.000_40.000.wav 30.000 40.000 Car
|
899 |
+
vPDXFKcdaS4_0.000_10.000.wav 0.000 10.000 Car
|
900 |
+
vW1nk4o9u5g_30.000_40.000.wav 30.000 40.000 Car
|
901 |
+
vdFYBSlmsXw_30.000_40.000.wav 30.000 40.000 Car
|
902 |
+
vtE1J8HsCUs_30.000_40.000.wav 30.000 40.000 Car
|
903 |
+
w0vy1YvNcOg_30.000_40.000.wav 30.000 40.000 Car
|
904 |
+
wDKrcZ7xLY8_80.000_90.000.wav 80.000 90.000 Car
|
905 |
+
wM-sBzIDzok_30.000_40.000.wav 30.000 40.000 Car
|
906 |
+
wUY4eWJt17w_30.000_40.000.wav 30.000 40.000 Car
|
907 |
+
we66pU0MN1M_30.000_40.000.wav 30.000 40.000 Car
|
908 |
+
wjfMWiYLDWA_30.000_40.000.wav 30.000 40.000 Car
|
909 |
+
wu3-_VKULZU_30.000_40.000.wav 30.000 40.000 Car
|
910 |
+
wwNIm8bgzKc_30.000_40.000.wav 30.000 40.000 Car
|
911 |
+
xqH9TpH6Xy0_0.000_10.000.wav 0.000 10.000 Car
|
912 |
+
xsT5ZJUnBg0_160.000_170.000.wav 160.000 170.000 Car
|
913 |
+
y9DFJEsiTLk_110.000_120.000.wav 110.000 120.000 Car
|
914 |
+
yESwp_fg0Po_70.000_80.000.wav 70.000 80.000 Car
|
915 |
+
yQg3eMb0QKU_30.000_40.000.wav 30.000 40.000 Car
|
916 |
+
yQjnNR7fXKo_50.000_60.000.wav 50.000 60.000 Car
|
917 |
+
zCuKYr_oMlE_60.000_70.000.wav 60.000 70.000 Car
|
918 |
+
zz35Va7tYmA_30.000_40.000.wav 30.000 40.000 Car
|
919 |
+
-CZ1LIc8aos_20.000_30.000.wav 20.000 30.000 Car passing by
|
920 |
+
-WgJ-M292Yc_30.000_40.000.wav 30.000 40.000 Car passing by
|
921 |
+
-iAAxJkoqcM_0.000_6.000.wav 0.000 6.000 Car passing by
|
922 |
+
0mQcGLpc8to_30.000_40.000.wav 30.000 40.000 Car passing by
|
923 |
+
1HtGgZnlKjU_30.000_40.000.wav 30.000 40.000 Car passing by
|
924 |
+
2IsAlhq0XFc_30.000_40.000.wav 30.000 40.000 Car passing by
|
925 |
+
2UvEmetE__I_30.000_40.000.wav 30.000 40.000 Car passing by
|
926 |
+
2oHGIzH_XzA_30.000_40.000.wav 30.000 40.000 Car passing by
|
927 |
+
3mor5mPSYoU_7.000_17.000.wav 7.000 17.000 Car passing by
|
928 |
+
8SYLYWR47EE_30.000_40.000.wav 30.000 40.000 Car passing by
|
929 |
+
8rzhhvS0tGc_30.000_40.000.wav 30.000 40.000 Car passing by
|
930 |
+
8v377AXrgac_30.000_40.000.wav 30.000 40.000 Car passing by
|
931 |
+
9lMtTDKyDEk_30.000_40.000.wav 30.000 40.000 Car passing by
|
932 |
+
BWoL8oKoTFI_30.000_40.000.wav 30.000 40.000 Car passing by
|
933 |
+
BsvD806qNM8_10.000_20.000.wav 10.000 20.000 Car passing by
|
934 |
+
C3LLtToB2zA_30.000_40.000.wav 30.000 40.000 Car passing by
|
935 |
+
Dk6b9dVD0i8_6.000_16.000.wav 6.000 16.000 Car passing by
|
936 |
+
Dw1q9rBv7oU_30.000_40.000.wav 30.000 40.000 Car passing by
|
937 |
+
EqFuY_U0Yz0_30.000_40.000.wav 30.000 40.000 Car passing by
|
938 |
+
FjpOboRcrNc_10.000_20.000.wav 10.000 20.000 Car passing by
|
939 |
+
FjyZV8zIJ0k_30.000_40.000.wav 30.000 40.000 Car passing by
|
940 |
+
Fn7eSPVvgCQ_30.000_40.000.wav 30.000 40.000 Car passing by
|
941 |
+
G6A-sT2DOjY_30.000_40.000.wav 30.000 40.000 Car passing by
|
942 |
+
GBXRuYIvhfM_30.000_40.000.wav 30.000 40.000 Car passing by
|
943 |
+
HDEPd5MIaow_30.000_40.000.wav 30.000 40.000 Car passing by
|
944 |
+
HQQxGJKg1iM_30.000_40.000.wav 30.000 40.000 Car passing by
|
945 |
+
If-V0XO-mpo_30.000_40.000.wav 30.000 40.000 Car passing by
|
946 |
+
JtuNiusRRLk_30.000_40.000.wav 30.000 40.000 Car passing by
|
947 |
+
M8BFtmQRHq4_200.000_210.000.wav 200.000 210.000 Car passing by
|
948 |
+
NKPAwhwZmqs_30.000_40.000.wav 30.000 40.000 Car passing by
|
949 |
+
Oa2Os8eOUjs_30.000_40.000.wav 30.000 40.000 Car passing by
|
950 |
+
QcLfJE-YfJY_30.000_40.000.wav 30.000 40.000 Car passing by
|
951 |
+
SkbFczIabRY_30.000_40.000.wav 30.000 40.000 Car passing by
|
952 |
+
VAiH1LX8guk_17.000_27.000.wav 17.000 27.000 Car passing by
|
953 |
+
Yc_WuISxfLI_30.000_40.000.wav 30.000 40.000 Car passing by
|
954 |
+
Yd10enP9ykM_30.000_40.000.wav 30.000 40.000 Car passing by
|
955 |
+
_HGGCwtyNxM_30.000_40.000.wav 30.000 40.000 Car passing by
|
956 |
+
a2U10_mi5as_30.000_40.000.wav 30.000 40.000 Car passing by
|
957 |
+
aB6FDPKAPus_30.000_40.000.wav 30.000 40.000 Car passing by
|
958 |
+
bDFQWubN4x4_30.000_40.000.wav 30.000 40.000 Car passing by
|
959 |
+
cW859JAzVZ0_30.000_40.000.wav 30.000 40.000 Car passing by
|
960 |
+
dDTvjXXFkDg_30.000_40.000.wav 30.000 40.000 Car passing by
|
961 |
+
dfwr8wgZU8M_40.000_50.000.wav 40.000 50.000 Car passing by
|
962 |
+
fJLCT3xDGxA_30.000_40.000.wav 30.000 40.000 Car passing by
|
963 |
+
gc6VlixMHXE_30.000_40.000.wav 30.000 40.000 Car passing by
|
964 |
+
gd_KjDM4fi8_0.000_10.000.wav 0.000 10.000 Car passing by
|
965 |
+
j7OEpDiK3IA_30.000_40.000.wav 30.000 40.000 Car passing by
|
966 |
+
jZxusrD28rM_30.000_40.000.wav 30.000 40.000 Car passing by
|
967 |
+
llZBUsAwRWc_30.000_40.000.wav 30.000 40.000 Car passing by
|
968 |
+
m_dCO5bBCic_26.000_36.000.wav 26.000 36.000 Car passing by
|
969 |
+
qDQX7Xi3GsQ_30.000_40.000.wav 30.000 40.000 Car passing by
|
970 |
+
qxLdv8u_Ujw_0.000_5.000.wav 0.000 5.000 Car passing by
|
971 |
+
reP-OOWiLWU_30.000_40.000.wav 30.000 40.000 Car passing by
|
972 |
+
s4jG5ZJYCvQ_30.000_40.000.wav 30.000 40.000 Car passing by
|
973 |
+
s5s3aR8Z7I8_350.000_360.000.wav 350.000 360.000 Car passing by
|
974 |
+
uUyB4q7jgn4_30.000_40.000.wav 30.000 40.000 Car passing by
|
975 |
+
vPDXFKcdaS4_0.000_10.000.wav 0.000 10.000 Car passing by
|
976 |
+
wD4QouhX8zo_30.000_40.000.wav 30.000 40.000 Car passing by
|
977 |
+
xqH9TpH6Xy0_0.000_10.000.wav 0.000 10.000 Car passing by
|
978 |
+
zd67ihUZ1u4_25.000_35.000.wav 25.000 35.000 Car passing by
|
979 |
+
-3z5mFRgbxc_30.000_40.000.wav 30.000 40.000 Bus
|
980 |
+
0N9EN0BEjP0_430.000_440.000.wav 430.000 440.000 Bus
|
981 |
+
0lPcHRhXlWk_30.000_40.000.wav 30.000 40.000 Bus
|
982 |
+
1E1evA4T_Tk_30.000_40.000.wav 30.000 40.000 Bus
|
983 |
+
1hIg-Lsvc7Q_30.000_40.000.wav 30.000 40.000 Bus
|
984 |
+
6-yQsEH2WYA_30.000_40.000.wav 30.000 40.000 Bus
|
985 |
+
6Y8wSI1l-Lw_30.000_40.000.wav 30.000 40.000 Bus
|
986 |
+
7T04388Ijk8_30.000_40.000.wav 30.000 40.000 Bus
|
987 |
+
8E7okHnCcTA_30.000_40.000.wav 30.000 40.000 Bus
|
988 |
+
8oEdgb8iXYA_1.000_11.000.wav 1.000 11.000 Bus
|
989 |
+
AdpNSGX2_Pk_10.000_20.000.wav 10.000 20.000 Bus
|
990 |
+
AwJ8orGuOXg_2.000_12.000.wav 2.000 12.000 Bus
|
991 |
+
BS1fqEDAvh0_330.000_340.000.wav 330.000 340.000 Bus
|
992 |
+
CoFbRc1OxFU_9.000_19.000.wav 9.000 19.000 Bus
|
993 |
+
DRqKOlP8BmU_110.000_120.000.wav 110.000 120.000 Bus
|
994 |
+
DYcXvyBFc5w_30.000_40.000.wav 30.000 40.000 Bus
|
995 |
+
DYdalOQnx1Y_30.000_40.000.wav 30.000 40.000 Bus
|
996 |
+
DkwFXd5nYLE_40.000_50.000.wav 40.000 50.000 Bus
|
997 |
+
FBMR3pW9H9o_30.000_40.000.wav 30.000 40.000 Bus
|
998 |
+
FEGa4e6RAlw_30.000_40.000.wav 30.000 40.000 Bus
|
999 |
+
Ge_KWS-0098_30.000_40.000.wav 30.000 40.000 Bus
|
1000 |
+
HxMoMMrA6Eo_30.000_40.000.wav 30.000 40.000 Bus
|
1001 |
+
I7esm6vqqZ4_30.000_40.000.wav 30.000 40.000 Bus
|
1002 |
+
JLj11umr1CE_0.000_10.000.wav 0.000 10.000 Bus
|
1003 |
+
JwAhcHHF2qg_30.000_40.000.wav 30.000 40.000 Bus
|
1004 |
+
LhRNnXaSsCk_30.000_40.000.wav 30.000 40.000 Bus
|
1005 |
+
LzZ_nxuZ8Co_30.000_40.000.wav 30.000 40.000 Bus
|
1006 |
+
LzcNa3HvD7c_30.000_40.000.wav 30.000 40.000 Bus
|
1007 |
+
Nyi9_-u6-w0_30.000_40.000.wav 30.000 40.000 Bus
|
1008 |
+
O_SKumO328I_30.000_40.000.wav 30.000 40.000 Bus
|
1009 |
+
Owg_XU9XmRM_30.000_40.000.wav 30.000 40.000 Bus
|
1010 |
+
P94rcZSuTT8_30.000_40.000.wav 30.000 40.000 Bus
|
1011 |
+
PP741kd2vRM_30.000_40.000.wav 30.000 40.000 Bus
|
1012 |
+
Qna9qrV8_go_30.000_40.000.wav 30.000 40.000 Bus
|
1013 |
+
Qt7FJkuqWPE_30.000_40.000.wav 30.000 40.000 Bus
|
1014 |
+
UcQ7cVukaxY_21.000_31.000.wav 21.000 31.000 Bus
|
1015 |
+
W8fIlauyJkk_30.000_40.000.wav 30.000 40.000 Bus
|
1016 |
+
WDn851XbWTk_30.000_40.000.wav 30.000 40.000 Bus
|
1017 |
+
WvquSD2PcCE_30.000_40.000.wav 30.000 40.000 Bus
|
1018 |
+
a9B_HA3y8WQ_30.000_40.000.wav 30.000 40.000 Bus
|
1019 |
+
cEEoKQ38fHY_30.000_40.000.wav 30.000 40.000 Bus
|
1020 |
+
er1vQ-nse_g_30.000_40.000.wav 30.000 40.000 Bus
|
1021 |
+
fLvM4bbpg6w_0.000_10.000.wav 0.000 10.000 Bus
|
1022 |
+
fOVsAMJ3Yms_30.000_40.000.wav 30.000 40.000 Bus
|
1023 |
+
gxVhAVNjSU0_30.000_40.000.wav 30.000 40.000 Bus
|
1024 |
+
jaSK_t8QP1E_30.000_40.000.wav 30.000 40.000 Bus
|
1025 |
+
ji_YCMygNHQ_8.000_18.000.wav 8.000 18.000 Bus
|
1026 |
+
kNKfoDp0uUw_30.000_40.000.wav 30.000 40.000 Bus
|
1027 |
+
kdDgTDfo9HY_100.000_110.000.wav 100.000 110.000 Bus
|
1028 |
+
lHP0q2sQzPQ_30.000_40.000.wav 30.000 40.000 Bus
|
1029 |
+
mGG8rop4Jig_30.000_40.000.wav 30.000 40.000 Bus
|
1030 |
+
oHKTmTLEy68_11.000_21.000.wav 11.000 21.000 Bus
|
1031 |
+
tAfucDIyRiM_30.000_40.000.wav 30.000 40.000 Bus
|
1032 |
+
tQd0vFueRKs_30.000_40.000.wav 30.000 40.000 Bus
|
1033 |
+
ucICmff0K-Q_30.000_40.000.wav 30.000 40.000 Bus
|
1034 |
+
x-2Abohj8VY_30.000_40.000.wav 30.000 40.000 Bus
|
1035 |
+
xFr2xX6PulQ_70.000_80.000.wav 70.000 80.000 Bus
|
1036 |
+
yfSBqp5IZSM_10.000_20.000.wav 10.000 20.000 Bus
|
1037 |
+
-2sE5CH8Wb8_30.000_40.000.wav 30.000 40.000 Truck
|
1038 |
+
-BY64_p-vtM_30.000_40.000.wav 30.000 40.000 Truck
|
1039 |
+
-fJsZm3YRc0_30.000_40.000.wav 30.000 40.000 Truck
|
1040 |
+
-t-htrAtNvM_30.000_40.000.wav 30.000 40.000 Truck
|
1041 |
+
-zNEcuo28oE_30.000_40.000.wav 30.000 40.000 Truck
|
1042 |
+
01WuUBxFBp4_30.000_40.000.wav 30.000 40.000 Truck
|
1043 |
+
077aWlQn6XI_30.000_40.000.wav 30.000 40.000 Truck
|
1044 |
+
0Ga7T-2e490_17.000_27.000.wav 17.000 27.000 Truck
|
1045 |
+
0N9EN0BEjP0_430.000_440.000.wav 430.000 440.000 Truck
|
1046 |
+
10aF24rMeu0_30.000_40.000.wav 30.000 40.000 Truck
|
1047 |
+
2HZcxlRs-hg_30.000_40.000.wav 30.000 40.000 Truck
|
1048 |
+
2Jpg_KvJWL0_30.000_40.000.wav 30.000 40.000 Truck
|
1049 |
+
2Tmi7EqpGZQ_0.000_10.000.wav 0.000 10.000 Truck
|
1050 |
+
4DlKNmVcoek_20.000_30.000.wav 20.000 30.000 Truck
|
1051 |
+
4MRzQbAIyV4_90.000_100.000.wav 90.000 100.000 Truck
|
1052 |
+
4Tpy1lsfcSM_30.000_40.000.wav 30.000 40.000 Truck
|
1053 |
+
4ep09nZl3LA_30.000_40.000.wav 30.000 40.000 Truck
|
1054 |
+
5DW8WjxxCag_30.000_40.000.wav 30.000 40.000 Truck
|
1055 |
+
5DjZHCumLfs_11.000_21.000.wav 11.000 21.000 Truck
|
1056 |
+
5QP1Tc3XbDc_30.000_40.000.wav 30.000 40.000 Truck
|
1057 |
+
5V0xKS-FGMk_30.000_40.000.wav 30.000 40.000 Truck
|
1058 |
+
5fLzQegwHUg_30.000_40.000.wav 30.000 40.000 Truck
|
1059 |
+
6HL_DKWK-WA_10.000_20.000.wav 10.000 20.000 Truck
|
1060 |
+
6VQGk8IrV-4_30.000_40.000.wav 30.000 40.000 Truck
|
1061 |
+
6Y8bKS6KLeE_30.000_40.000.wav 30.000 40.000 Truck
|
1062 |
+
6xEHP-C-ZuU_30.000_40.000.wav 30.000 40.000 Truck
|
1063 |
+
6yyToq9cW9A_60.000_70.000.wav 60.000 70.000 Truck
|
1064 |
+
7Gua0-UrKIw_30.000_40.000.wav 30.000 40.000 Truck
|
1065 |
+
7nglQSmcjAk_30.000_40.000.wav 30.000 40.000 Truck
|
1066 |
+
81DteAPIhoE_30.000_40.000.wav 30.000 40.000 Truck
|
1067 |
+
84E9i9_ELBs_30.000_40.000.wav 30.000 40.000 Truck
|
1068 |
+
8jblPMBafKE_30.000_40.000.wav 30.000 40.000 Truck
|
1069 |
+
8k17D6qiuqI_30.000_40.000.wav 30.000 40.000 Truck
|
1070 |
+
9EsgN-WS2qY_30.000_40.000.wav 30.000 40.000 Truck
|
1071 |
+
9LJnjmcRcb8_280.000_290.000.wav 280.000 290.000 Truck
|
1072 |
+
9yhMtJ50sys_30.000_40.000.wav 30.000 40.000 Truck
|
1073 |
+
A9KMqwqLboE_30.000_40.000.wav 30.000 40.000 Truck
|
1074 |
+
ARIVxBOc0BQ_40.000_50.000.wav 40.000 50.000 Truck
|
1075 |
+
AwFuGITwrms_30.000_40.000.wav 30.000 40.000 Truck
|
1076 |
+
BQVXzH6YK8g_30.000_40.000.wav 30.000 40.000 Truck
|
1077 |
+
CnYWJp2bknU_50.000_60.000.wav 50.000 60.000 Truck
|
1078 |
+
DRqKOlP8BmU_110.000_120.000.wav 110.000 120.000 Truck
|
1079 |
+
DXlTakKvLzg_30.000_40.000.wav 30.000 40.000 Truck
|
1080 |
+
DkVfro9iq80_30.000_40.000.wav 30.000 40.000 Truck
|
1081 |
+
Dmy4EjohxxU_60.000_70.000.wav 60.000 70.000 Truck
|
1082 |
+
DvMFQ64YwcI_30.000_40.000.wav 30.000 40.000 Truck
|
1083 |
+
FEoMTMxzn3U_30.000_40.000.wav 30.000 40.000 Truck
|
1084 |
+
GTk_6JDmtCY_230.000_240.000.wav 230.000 240.000 Truck
|
1085 |
+
HDEPd5MIaow_30.000_40.000.wav 30.000 40.000 Truck
|
1086 |
+
HQkLVac7z9Q_70.000_80.000.wav 70.000 80.000 Truck
|
1087 |
+
I4VDcVTE4YA_30.000_40.000.wav 30.000 40.000 Truck
|
1088 |
+
IxlvxvG8zOE_110.000_120.000.wav 110.000 120.000 Truck
|
1089 |
+
JLzD44Im1Ec_30.000_40.000.wav 30.000 40.000 Truck
|
1090 |
+
K4Hcb00hTTY_30.000_40.000.wav 30.000 40.000 Truck
|
1091 |
+
L2M3xanqQP8_30.000_40.000.wav 30.000 40.000 Truck
|
1092 |
+
LA5TekLaIPI_10.000_20.000.wav 10.000 20.000 Truck
|
1093 |
+
LhRNnXaSsCk_30.000_40.000.wav 30.000 40.000 Truck
|
1094 |
+
MWTTe0M9vi4_30.000_40.000.wav 30.000 40.000 Truck
|
1095 |
+
Nkqx09b-xyI_70.000_80.000.wav 70.000 80.000 Truck
|
1096 |
+
NqzZbJJl3E4_30.000_40.000.wav 30.000 40.000 Truck
|
1097 |
+
OPd0cz1hRqc_30.000_40.000.wav 30.000 40.000 Truck
|
1098 |
+
PCl-q7lCT_U_50.000_60.000.wav 50.000 60.000 Truck
|
1099 |
+
PNaLTW50fxM_60.000_70.000.wav 60.000 70.000 Truck
|
1100 |
+
PO1eaJ7tQOg_180.000_190.000.wav 180.000 190.000 Truck
|
1101 |
+
PSt0xAYgf4g_0.000_10.000.wav 0.000 10.000 Truck
|
1102 |
+
Pef6g19i5iI_30.000_40.000.wav 30.000 40.000 Truck
|
1103 |
+
Q1CMSV81_ws_30.000_40.000.wav 30.000 40.000 Truck
|
1104 |
+
SiBIYAiIajM_30.000_40.000.wav 30.000 40.000 Truck
|
1105 |
+
T6oYCFRafPs_30.000_40.000.wav 30.000 40.000 Truck
|
1106 |
+
WdubBeFntYQ_460.000_470.000.wav 460.000 470.000 Truck
|
1107 |
+
_ZiJA6phEq8_30.000_40.000.wav 30.000 40.000 Truck
|
1108 |
+
_jfv_ziZWII_60.000_70.000.wav 60.000 70.000 Truck
|
1109 |
+
acvV6yYNc7Y_30.000_40.000.wav 30.000 40.000 Truck
|
1110 |
+
bQSaQ0iX_vk_30.000_40.000.wav 30.000 40.000 Truck
|
1111 |
+
bhxN5w03yS0_30.000_40.000.wav 30.000 40.000 Truck
|
1112 |
+
ckt7YEGcSoY_30.000_40.000.wav 30.000 40.000 Truck
|
1113 |
+
eIkUuCRE_0U_30.000_40.000.wav 30.000 40.000 Truck
|
1114 |
+
gxVhAVNjSU0_30.000_40.000.wav 30.000 40.000 Truck
|
1115 |
+
hDVNQOJCvOk_30.000_40.000.wav 30.000 40.000 Truck
|
1116 |
+
ieZVo7W3BQ4_30.000_40.000.wav 30.000 40.000 Truck
|
1117 |
+
ikmE_kRvDAc_30.000_40.000.wav 30.000 40.000 Truck
|
1118 |
+
jwZTKNsbf58_70.000_80.000.wav 70.000 80.000 Truck
|
1119 |
+
kH6fFjIZkB0_30.000_40.000.wav 30.000 40.000 Truck
|
1120 |
+
kr8ssbrDDMY_30.000_40.000.wav 30.000 40.000 Truck
|
1121 |
+
lp66EaEOOoU_30.000_40.000.wav 30.000 40.000 Truck
|
1122 |
+
n4o1r8Ai66o_30.000_40.000.wav 30.000 40.000 Truck
|
1123 |
+
nDtrUUc2J2U_0.000_10.000.wav 0.000 10.000 Truck
|
1124 |
+
nMaSkwx6cHE_30.000_40.000.wav 30.000 40.000 Truck
|
1125 |
+
p70IcMwsW9M_30.000_40.000.wav 30.000 40.000 Truck
|
1126 |
+
pJ1fore8JbQ_30.000_40.000.wav 30.000 40.000 Truck
|
1127 |
+
pt-J_L-OFI8_0.000_10.000.wav 0.000 10.000 Truck
|
1128 |
+
rdanJP7Usrg_30.000_40.000.wav 30.000 40.000 Truck
|
1129 |
+
srTX18ikXkE_10.000_20.000.wav 10.000 20.000 Truck
|
1130 |
+
tuplsUUDXKw_30.000_40.000.wav 30.000 40.000 Truck
|
1131 |
+
x6vuWsdeS3s_30.000_40.000.wav 30.000 40.000 Truck
|
1132 |
+
xMClk12ouB8_30.000_40.000.wav 30.000 40.000 Truck
|
1133 |
+
ycqDMKTrvLY_30.000_40.000.wav 30.000 40.000 Truck
|
1134 |
+
yk5LqHTtHLo_30.000_40.000.wav 30.000 40.000 Truck
|
1135 |
+
yrscqyUOIlI_30.000_40.000.wav 30.000 40.000 Truck
|
1136 |
+
zM3chsL-B7U_30.000_40.000.wav 30.000 40.000 Truck
|
1137 |
+
06si40RVDco_30.000_40.000.wav 30.000 40.000 Motorcycle
|
1138 |
+
0DzsPL-xElE_20.000_30.000.wav 20.000 30.000 Motorcycle
|
1139 |
+
145N68nh4m0_120.000_130.000.wav 120.000 130.000 Motorcycle
|
1140 |
+
16vw4K9qJnY_30.000_40.000.wav 30.000 40.000 Motorcycle
|
1141 |
+
21QlKF17ipc_30.000_40.000.wav 30.000 40.000 Motorcycle
|
1142 |
+
3LulQoOXNB0_30.000_40.000.wav 30.000 40.000 Motorcycle
|
1143 |
+
45JHcLU57B8_20.000_30.000.wav 20.000 30.000 Motorcycle
|
1144 |
+
4NZkW-XaIa4_30.000_40.000.wav 30.000 40.000 Motorcycle
|
1145 |
+
506I6LfdDuk_50.000_60.000.wav 50.000 60.000 Motorcycle
|
1146 |
+
6MCy1lh4qaw_20.000_30.000.wav 20.000 30.000 Motorcycle
|
1147 |
+
6R8cO4ARzkY_30.000_40.000.wav 30.000 40.000 Motorcycle
|
1148 |
+
6taAP7SFewI_30.000_40.000.wav 30.000 40.000 Motorcycle
|
1149 |
+
7g6aZTBe2xE_30.000_40.000.wav 30.000 40.000 Motorcycle
|
1150 |
+
9HcahqYUVoc_90.000_100.000.wav 90.000 100.000 Motorcycle
|
1151 |
+
9N1iw5Vdim8_20.000_30.000.wav 20.000 30.000 Motorcycle
|
1152 |
+
ANWU9Hiy_5k_40.000_50.000.wav 40.000 50.000 Motorcycle
|
1153 |
+
BTNz6NftP34_30.000_40.000.wav 30.000 40.000 Motorcycle
|
1154 |
+
BxnLAGsByCI_10.000_20.000.wav 10.000 20.000 Motorcycle
|
1155 |
+
CZgx_6XaEkg_30.000_40.000.wav 30.000 40.000 Motorcycle
|
1156 |
+
D3BJuOwltoI_10.000_20.000.wav 10.000 20.000 Motorcycle
|
1157 |
+
FgN9v1jYqjA_30.000_40.000.wav 30.000 40.000 Motorcycle
|
1158 |
+
HQ8eR2lvjSE_30.000_40.000.wav 30.000 40.000 Motorcycle
|
1159 |
+
Mb-GyQEKoEc_30.000_40.000.wav 30.000 40.000 Motorcycle
|
1160 |
+
Pair_NsHdTc_30.000_40.000.wav 30.000 40.000 Motorcycle
|
1161 |
+
UFIBEBkm7ao_30.000_40.000.wav 30.000 40.000 Motorcycle
|
1162 |
+
UWz5OIijWM4_30.000_40.000.wav 30.000 40.000 Motorcycle
|
1163 |
+
WLX3Db60418_20.000_30.000.wav 20.000 30.000 Motorcycle
|
1164 |
+
X5Xs8Y1cJK0_30.000_40.000.wav 30.000 40.000 Motorcycle
|
1165 |
+
ZGf0vrZStwI_30.000_40.000.wav 30.000 40.000 Motorcycle
|
1166 |
+
ZfkO1HlI0zM_30.000_40.000.wav 30.000 40.000 Motorcycle
|
1167 |
+
bhtB2Zgh9Q8_110.000_120.000.wav 110.000 120.000 Motorcycle
|
1168 |
+
d-m8eXCpeDg_30.000_40.000.wav 30.000 40.000 Motorcycle
|
1169 |
+
d21IwtH2oHI_30.000_40.000.wav 30.000 40.000 Motorcycle
|
1170 |
+
dhaKGPCgtfw_30.000_40.000.wav 30.000 40.000 Motorcycle
|
1171 |
+
ee-0JGvEIng_30.000_40.000.wav 30.000 40.000 Motorcycle
|
1172 |
+
epGDNMrsQb8_40.000_50.000.wav 40.000 50.000 Motorcycle
|
1173 |
+
ezUkPETm6cs_30.000_40.000.wav 30.000 40.000 Motorcycle
|
1174 |
+
f724u5z_UDw_30.000_40.000.wav 30.000 40.000 Motorcycle
|
1175 |
+
gGmWm1i6pVo_30.000_40.000.wav 30.000 40.000 Motorcycle
|
1176 |
+
i9VjpIbM3iE_410.000_420.000.wav 410.000 420.000 Motorcycle
|
1177 |
+
iMp8nODaotA_580.000_590.000.wav 580.000 590.000 Motorcycle
|
1178 |
+
lVW2CqsHJ4Y_30.000_40.000.wav 30.000 40.000 Motorcycle
|
1179 |
+
lj7hzmz19-M_30.000_40.000.wav 30.000 40.000 Motorcycle
|
1180 |
+
mX45CiTjf8I_30.000_40.000.wav 30.000 40.000 Motorcycle
|
1181 |
+
mbLiZ_jpgeY_20.000_30.000.wav 20.000 30.000 Motorcycle
|
1182 |
+
owZDBEq6WdU_30.000_40.000.wav 30.000 40.000 Motorcycle
|
1183 |
+
pNMBIqvbyB4_30.000_40.000.wav 30.000 40.000 Motorcycle
|
1184 |
+
po-tnKZAzdg_40.000_50.000.wav 40.000 50.000 Motorcycle
|
1185 |
+
qAQuljp-atA_30.000_40.000.wav 30.000 40.000 Motorcycle
|
1186 |
+
r0Oll28wmXs_30.000_40.000.wav 30.000 40.000 Motorcycle
|
1187 |
+
sAMjMyCdGOc_30.000_40.000.wav 30.000 40.000 Motorcycle
|
1188 |
+
vHlqKDR7ggA_30.000_40.000.wav 30.000 40.000 Motorcycle
|
1189 |
+
wPfv8ifzzyg_30.000_40.000.wav 30.000 40.000 Motorcycle
|
1190 |
+
wyhurCZbKQU_30.000_40.000.wav 30.000 40.000 Motorcycle
|
1191 |
+
xQTPEQDb0Gg_30.000_40.000.wav 30.000 40.000 Motorcycle
|
1192 |
+
xTPmoYwgKf4_30.000_40.000.wav 30.000 40.000 Motorcycle
|
1193 |
+
xXGIKM4daMU_30.000_40.000.wav 30.000 40.000 Motorcycle
|
1194 |
+
xZ8hQliZqhg_160.000_170.000.wav 160.000 170.000 Motorcycle
|
1195 |
+
xuMBy2NoROI_30.000_40.000.wav 30.000 40.000 Motorcycle
|
1196 |
+
z_8yGVO1qws_30.000_40.000.wav 30.000 40.000 Motorcycle
|
1197 |
+
-BaVEk1zS2g_50.000_60.000.wav 50.000 60.000 Train
|
1198 |
+
-Q4fBQ4egrs_0.000_10.000.wav 0.000 10.000 Train
|
1199 |
+
-QxSFr1cYuQ_20.000_30.000.wav 20.000 30.000 Train
|
1200 |
+
-ZdReI9dL6M_530.000_540.000.wav 530.000 540.000 Train
|
1201 |
+
0YIyGEM0yG0_550.000_560.000.wav 550.000 560.000 Train
|
1202 |
+
1Mk2MJDhLJQ_20.000_30.000.wav 20.000 30.000 Train
|
1203 |
+
2nejPPEWqJ8_320.000_330.000.wav 320.000 330.000 Train
|
1204 |
+
3ACjUf9QpAQ_30.000_40.000.wav 30.000 40.000 Train
|
1205 |
+
3RfrTU1p5SA_500.000_510.000.wav 500.000 510.000 Train
|
1206 |
+
3YJewEC-NWo_30.000_40.000.wav 30.000 40.000 Train
|
1207 |
+
3ZZDuYU2HM4_150.000_160.000.wav 150.000 160.000 Train
|
1208 |
+
3fPX1LaGwJo_60.000_70.000.wav 60.000 70.000 Train
|
1209 |
+
4_gyCWuPxRg_170.000_180.000.wav 170.000 180.000 Train
|
1210 |
+
4l4vGrMD4Tw_550.000_560.000.wav 550.000 560.000 Train
|
1211 |
+
4oT0bxldS80_30.000_40.000.wav 30.000 40.000 Train
|
1212 |
+
4t7Mi3pnSA4_210.000_220.000.wav 210.000 220.000 Train
|
1213 |
+
53oq_Otm_XI_30.000_40.000.wav 30.000 40.000 Train
|
1214 |
+
6OgSNQOTw2U_30.000_40.000.wav 30.000 40.000 Train
|
1215 |
+
6_TGlFO0DCk_10.000_20.000.wav 10.000 20.000 Train
|
1216 |
+
7KdSGBzXvz8_420.000_430.000.wav 420.000 430.000 Train
|
1217 |
+
7W_kcu0CJqI_310.000_320.000.wav 310.000 320.000 Train
|
1218 |
+
8IaInXpdd9M_0.000_10.000.wav 0.000 10.000 Train
|
1219 |
+
8nU1aVscJec_30.000_40.000.wav 30.000 40.000 Train
|
1220 |
+
9LQEZJPNVpw_30.000_40.000.wav 30.000 40.000 Train
|
1221 |
+
9NT6gEiqpWA_30.000_40.000.wav 30.000 40.000 Train
|
1222 |
+
AFhll08KM98_30.000_40.000.wav 30.000 40.000 Train
|
1223 |
+
AHom7lBbtoY_30.000_40.000.wav 30.000 40.000 Train
|
1224 |
+
AK0kZUDk294_2.000_12.000.wav 2.000 12.000 Train
|
1225 |
+
AKPC4rEGoyI_30.000_40.000.wav 30.000 40.000 Train
|
1226 |
+
APsvUzw7bWA_60.000_70.000.wav 60.000 70.000 Train
|
1227 |
+
AshwkKUV07s_23.000_33.000.wav 23.000 33.000 Train
|
1228 |
+
BI2Tol64na0_30.000_40.000.wav 30.000 40.000 Train
|
1229 |
+
BmS2NiuT2c0_160.000_170.000.wav 160.000 170.000 Train
|
1230 |
+
CCX_4cW_SAU_0.000_10.000.wav 0.000 10.000 Train
|
1231 |
+
D_nXtMgbPNY_30.000_40.000.wav 30.000 40.000 Train
|
1232 |
+
F-JFxERdA2w_30.000_40.000.wav 30.000 40.000 Train
|
1233 |
+
FoIBRxw0tyE_30.000_40.000.wav 30.000 40.000 Train
|
1234 |
+
G958vjLYBcI_110.000_120.000.wav 110.000 120.000 Train
|
1235 |
+
GFQnh84kNwU_30.000_40.000.wav 30.000 40.000 Train
|
1236 |
+
GKc8PCTen8Q_310.000_320.000.wav 310.000 320.000 Train
|
1237 |
+
I4qODX0fypE_30.000_40.000.wav 30.000 40.000 Train
|
1238 |
+
IIIxN_ziy_I_60.000_70.000.wav 60.000 70.000 Train
|
1239 |
+
IdqEbjujFb8_30.000_40.000.wav 30.000 40.000 Train
|
1240 |
+
K-i81KrH8BQ_30.000_40.000.wav 30.000 40.000 Train
|
1241 |
+
K9pSRLw6FNc_40.000_50.000.wav 40.000 50.000 Train
|
1242 |
+
KPyYUly5xCc_90.000_100.000.wav 90.000 100.000 Train
|
1243 |
+
L3a132_uApg_50.000_60.000.wav 50.000 60.000 Train
|
1244 |
+
LK4b2eJpy24_30.000_40.000.wav 30.000 40.000 Train
|
1245 |
+
LzcNa3HvD7c_30.000_40.000.wav 30.000 40.000 Train
|
1246 |
+
MCYY8tJsnfY_7.000_17.000.wav 7.000 17.000 Train
|
1247 |
+
MDF2vsjm8jU_10.000_20.000.wav 10.000 20.000 Train
|
1248 |
+
MMfiWJVftMA_60.000_70.000.wav 60.000 70.000 Train
|
1249 |
+
MYzVHespZ-E_30.000_40.000.wav 30.000 40.000 Train
|
1250 |
+
Mbe4rlNiM84_0.000_7.000.wav 0.000 7.000 Train
|
1251 |
+
MczH_PWBNeI_360.000_370.000.wav 360.000 370.000 Train
|
1252 |
+
Mfkif49LLc4_30.000_40.000.wav 30.000 40.000 Train
|
1253 |
+
MwSbYICrYj8_290.000_300.000.wav 290.000 300.000 Train
|
1254 |
+
PJUy17bXlhc_40.000_50.000.wav 40.000 50.000 Train
|
1255 |
+
QDTbchu0LrU_30.000_40.000.wav 30.000 40.000 Train
|
1256 |
+
QZJ5WAYIUh8_70.000_80.000.wav 70.000 80.000 Train
|
1257 |
+
QrAoRSA13bM_30.000_40.000.wav 30.000 40.000 Train
|
1258 |
+
RN-_agT8_Cg_0.000_10.000.wav 0.000 10.000 Train
|
1259 |
+
R_Lpb-51Kl4_30.000_40.000.wav 30.000 40.000 Train
|
1260 |
+
Rhvy7V4F95Q_40.000_50.000.wav 40.000 50.000 Train
|
1261 |
+
Rq-22Cycrpg_30.000_40.000.wav 30.000 40.000 Train
|
1262 |
+
RrlgSfQrqQc_20.000_30.000.wav 20.000 30.000 Train
|
1263 |
+
RwBKGPEg6uA_340.000_350.000.wav 340.000 350.000 Train
|
1264 |
+
T73runykdnE_25.000_35.000.wav 25.000 35.000 Train
|
1265 |
+
T8M6W4yOzI4_30.000_40.000.wav 30.000 40.000 Train
|
1266 |
+
Tmm4H6alHCE_30.000_40.000.wav 30.000 40.000 Train
|
1267 |
+
TyTORMEourg_270.000_280.000.wav 270.000 280.000 Train
|
1268 |
+
UQx0EMXtLZA_60.000_70.000.wav 60.000 70.000 Train
|
1269 |
+
UZx7OAgRMRY_90.000_100.000.wav 90.000 100.000 Train
|
1270 |
+
UerX5Bv2hcs_70.000_80.000.wav 70.000 80.000 Train
|
1271 |
+
UxSUGCvpskM_340.000_350.000.wav 340.000 350.000 Train
|
1272 |
+
V2hln47cP78_130.000_140.000.wav 130.000 140.000 Train
|
1273 |
+
VIe_Qkg5RJI_130.000_140.000.wav 130.000 140.000 Train
|
1274 |
+
WDn851XbWTk_30.000_40.000.wav 30.000 40.000 Train
|
1275 |
+
WFdpQCtpBB4_30.000_40.000.wav 30.000 40.000 Train
|
1276 |
+
XAUtk9lwzU8_30.000_40.000.wav 30.000 40.000 Train
|
1277 |
+
XDTlBb3aYqo_30.000_40.000.wav 30.000 40.000 Train
|
1278 |
+
XKvLkIM8dck_40.000_50.000.wav 40.000 50.000 Train
|
1279 |
+
XQbeLJYzY9k_90.000_100.000.wav 90.000 100.000 Train
|
1280 |
+
XW8pSKLyr0o_20.000_30.000.wav 20.000 30.000 Train
|
1281 |
+
XeYiNanFS_M_120.000_130.000.wav 120.000 130.000 Train
|
1282 |
+
Y10I9JSvJuQ_30.000_40.000.wav 30.000 40.000 Train
|
1283 |
+
YDGf-razgyU_250.000_260.000.wav 250.000 260.000 Train
|
1284 |
+
YFD1Qrlskrg_60.000_70.000.wav 60.000 70.000 Train
|
1285 |
+
Y_jwEflLthg_190.000_200.000.wav 190.000 200.000 Train
|
1286 |
+
Y_ynIwm3qm0_370.000_380.000.wav 370.000 380.000 Train
|
1287 |
+
Zy0goYEHPHU_30.000_40.000.wav 30.000 40.000 Train
|
1288 |
+
_dkeW6lqmq4_30.000_40.000.wav 30.000 40.000 Train
|
1289 |
+
aNO2KEXBCOk_30.000_40.000.wav 30.000 40.000 Train
|
1290 |
+
aXsUHAKbyLs_30.000_40.000.wav 30.000 40.000 Train
|
1291 |
+
ahct5yzUtdE_20.000_30.000.wav 20.000 30.000 Train
|
1292 |
+
arevYmB0qGg_30.000_40.000.wav 30.000 40.000 Train
|
1293 |
+
bCGtzspNbNo_30.000_40.000.wav 30.000 40.000 Train
|
1294 |
+
bI6wPI9kAm8_70.000_80.000.wav 70.000 80.000 Train
|
1295 |
+
bpdCMWWiB_0_30.000_40.000.wav 30.000 40.000 Train
|
1296 |
+
cdrjKqyDrak_420.000_430.000.wav 420.000 430.000 Train
|
1297 |
+
d1o334I5X_k_30.000_40.000.wav 30.000 40.000 Train
|
1298 |
+
dSzZWgbJ378_30.000_40.000.wav 30.000 40.000 Train
|
1299 |
+
eRclX9l0F_c_150.000_160.000.wav 150.000 160.000 Train
|
1300 |
+
fOVsAMJ3Yms_30.000_40.000.wav 30.000 40.000 Train
|
1301 |
+
fWVfi9pAh_4_10.000_20.000.wav 10.000 20.000 Train
|
1302 |
+
fztkF47lVQg_0.000_10.000.wav 0.000 10.000 Train
|
1303 |
+
g0ICxHjC9Uc_30.000_40.000.wav 30.000 40.000 Train
|
1304 |
+
g2scd3YVgwQ_30.000_40.000.wav 30.000 40.000 Train
|
1305 |
+
g4cA-ifQc70_30.000_40.000.wav 30.000 40.000 Train
|
1306 |
+
g9JVq7wfDIo_30.000_40.000.wav 30.000 40.000 Train
|
1307 |
+
gKMpowHeyKc_30.000_40.000.wav 30.000 40.000 Train
|
1308 |
+
gTFCK9TuLOQ_30.000_40.000.wav 30.000 40.000 Train
|
1309 |
+
gU0mD2fSh4c_500.000_510.000.wav 500.000 510.000 Train
|
1310 |
+
gkH_Zxasn8o_40.000_50.000.wav 40.000 50.000 Train
|
1311 |
+
gvnM4kK4r70_10.000_20.000.wav 10.000 20.000 Train
|
1312 |
+
hH_M56EnnDk_30.000_40.000.wav 30.000 40.000 Train
|
1313 |
+
hVvtTC9AmNs_30.000_40.000.wav 30.000 40.000 Train
|
1314 |
+
hYqzr_rIIAw_30.000_40.000.wav 30.000 40.000 Train
|
1315 |
+
hdYQzH2E-e4_310.000_320.000.wav 310.000 320.000 Train
|
1316 |
+
iZgzRfa-xPQ_30.000_40.000.wav 30.000 40.000 Train
|
1317 |
+
j9Z63H5hvrQ_0.000_10.000.wav 0.000 10.000 Train
|
1318 |
+
jbW2ew8VMfU_50.000_60.000.wav 50.000 60.000 Train
|
1319 |
+
jlz7r-NSUuA_50.000_60.000.wav 50.000 60.000 Train
|
1320 |
+
k0vRZm7ZnQk_280.000_290.000.wav 280.000 290.000 Train
|
1321 |
+
k8H8rn4NaSM_0.000_10.000.wav 0.000 10.000 Train
|
1322 |
+
kbfkq3TuAe0_470.000_480.000.wav 470.000 480.000 Train
|
1323 |
+
lf1Sblrda3A_560.000_570.000.wav 560.000 570.000 Train
|
1324 |
+
m4DS9-5Gkds_30.000_40.000.wav 30.000 40.000 Train
|
1325 |
+
m5HeCy87QYY_380.000_390.000.wav 380.000 390.000 Train
|
1326 |
+
nKM4MUAsVzg_100.000_110.000.wav 100.000 110.000 Train
|
1327 |
+
nY1gcEMzsWI_10.000_20.000.wav 10.000 20.000 Train
|
1328 |
+
nfY_zkJceDw_30.000_40.000.wav 30.000 40.000 Train
|
1329 |
+
oogrnx-_LBA_60.000_70.000.wav 60.000 70.000 Train
|
1330 |
+
pW5SI1ZKUpA_30.000_40.000.wav 30.000 40.000 Train
|
1331 |
+
pbOZLMrJy0A_0.000_10.000.wav 0.000 10.000 Train
|
1332 |
+
pxmrmtEnROk_30.000_40.000.wav 30.000 40.000 Train
|
1333 |
+
q7zzKHFWGkg_30.000_40.000.wav 30.000 40.000 Train
|
1334 |
+
qu8vVFWKszA_30.000_40.000.wav 30.000 40.000 Train
|
1335 |
+
r6mHSfFkY_8_30.000_40.000.wav 30.000 40.000 Train
|
1336 |
+
rNNPQ9DD4no_30.000_40.000.wav 30.000 40.000 Train
|
1337 |
+
rSrBDAgLUoI_460.000_470.000.wav 460.000 470.000 Train
|
1338 |
+
stdjjG6Y5IU_30.000_40.000.wav 30.000 40.000 Train
|
1339 |
+
t_lFhyZaZR0_150.000_160.000.wav 150.000 160.000 Train
|
1340 |
+
txXSE7kgrc8_30.000_40.000.wav 30.000 40.000 Train
|
1341 |
+
uZfsEDo3elY_20.000_30.000.wav 20.000 30.000 Train
|
1342 |
+
umcnfA9veOw_160.000_170.000.wav 160.000 170.000 Train
|
1343 |
+
uysTr0SfhLI_10.000_20.000.wav 10.000 20.000 Train
|
1344 |
+
wM9wNgY8d4g_150.000_160.000.wav 150.000 160.000 Train
|
1345 |
+
xabrKa79prM_30.000_40.000.wav 30.000 40.000 Train
|
1346 |
+
xshKOSEF_6o_0.000_10.000.wav 0.000 10.000 Train
|
1347 |
+
yBVxtq9k8Sg_0.000_10.000.wav 0.000 10.000 Train
|
1348 |
+
yH1r2Bblluw_240.000_250.000.wav 240.000 250.000 Train
|
1349 |
+
yywGJu6jp8U_30.000_40.000.wav 30.000 40.000 Train
|
1350 |
+
z5uKFGeTtNg_30.000_40.000.wav 30.000 40.000 Train
|
audio_detection/audio_infer/metadata/black_list/groundtruth_weak_label_testing_set.csv
ADDED
@@ -0,0 +1,606 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
-5QrBL6MzLg_60.000_70.000.wav 60.000 70.000 Train horn
|
2 |
+
-E0shPRxAbo_30.000_40.000.wav 30.000 40.000 Train horn
|
3 |
+
-GCwoyCnYsY_0.000_10.000.wav 0.000 10.000 Train horn
|
4 |
+
-Gbohom8C4Q_30.000_40.000.wav 30.000 40.000 Train horn
|
5 |
+
-Qfk_Q2ctBs_30.000_40.000.wav 30.000 40.000 Train horn
|
6 |
+
-Wd1pV7UjWg_60.000_70.000.wav 60.000 70.000 Train horn
|
7 |
+
-Zq22n4OewA_30.000_40.000.wav 30.000 40.000 Train horn
|
8 |
+
-jj2tyuf6-A_80.000_90.000.wav 80.000 90.000 Train horn
|
9 |
+
-nGBPqlRNg4_30.000_40.000.wav 30.000 40.000 Train horn
|
10 |
+
-u9BxBNcrw4_30.000_40.000.wav 30.000 40.000 Train horn
|
11 |
+
-zqW9xCZd80_260.000_270.000.wav 260.000 270.000 Train horn
|
12 |
+
02w3vd_GgF0_390.000_400.000.wav 390.000 400.000 Train horn
|
13 |
+
0HqeYIREv8M_30.000_40.000.wav 30.000 40.000 Train horn
|
14 |
+
0IpYF91Fdt0_80.000_90.000.wav 80.000 90.000 Train horn
|
15 |
+
0NaZejdABG0_90.000_100.000.wav 90.000 100.000 Train horn
|
16 |
+
0RurXUfKyow_4.000_14.000.wav 4.000 14.000 Train horn
|
17 |
+
0_HnD-rW3lI_170.000_180.000.wav 170.000 180.000 Train horn
|
18 |
+
10i60V1RZkQ_210.000_220.000.wav 210.000 220.000 Train horn
|
19 |
+
1FJY5X1iY9I_170.000_180.000.wav 170.000 180.000 Train horn
|
20 |
+
1S5WKCcf-wU_40.000_50.000.wav 40.000 50.000 Train horn
|
21 |
+
1U0Ty6CW6AM_40.000_50.000.wav 40.000 50.000 Train horn
|
22 |
+
1hQLr88iCvg_30.000_40.000.wav 30.000 40.000 Train horn
|
23 |
+
1iUXERALOOs_190.000_200.000.wav 190.000 200.000 Train horn
|
24 |
+
1iWFlLpixKU_5.000_15.000.wav 5.000 15.000 Train horn
|
25 |
+
1oJAVJPX0YY_20.000_30.000.wav 20.000 30.000 Train horn
|
26 |
+
26dNsDuIt9Q_340.000_350.000.wav 340.000 350.000 Train horn
|
27 |
+
2BMHsKLcb7E_90.000_100.000.wav 90.000 100.000 Train horn
|
28 |
+
2RpOd9MJjyQ_10.000_20.000.wav 10.000 20.000 Train horn
|
29 |
+
2U4wSdl10to_200.000_210.000.wav 200.000 210.000 Train horn
|
30 |
+
2aBV6AZt5nk_570.000_580.000.wav 570.000 580.000 Train horn
|
31 |
+
-8baTnilyjs_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
|
32 |
+
-Gbohom8C4Q_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
|
33 |
+
-jG26jT3fP8_230.000_240.000.wav 230.000 240.000 Air horn, truck horn
|
34 |
+
-jj2tyuf6-A_80.000_90.000.wav 80.000 90.000 Air horn, truck horn
|
35 |
+
-v7cUxke-f4_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
|
36 |
+
-yeWlsEpcpA_15.000_25.000.wav 15.000 25.000 Air horn, truck horn
|
37 |
+
04KOunVOkSA_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
|
38 |
+
08y2LHhxmsM_400.000_410.000.wav 400.000 410.000 Air horn, truck horn
|
39 |
+
0G73yqtBwgE_11.000_21.000.wav 11.000 21.000 Air horn, truck horn
|
40 |
+
0UPY7ws-VFs_10.000_20.000.wav 10.000 20.000 Air horn, truck horn
|
41 |
+
0euD32aKYUs_10.000_20.000.wav 10.000 20.000 Air horn, truck horn
|
42 |
+
1T1i2rny8RU_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
|
43 |
+
1iRgwn7p0DA_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
|
44 |
+
1myTsHAIvYc_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
|
45 |
+
1z0XoG6GEv4_420.000_430.000.wav 420.000 430.000 Air horn, truck horn
|
46 |
+
26dNsDuIt9Q_340.000_350.000.wav 340.000 350.000 Air horn, truck horn
|
47 |
+
2KmSuPb9gwA_24.000_34.000.wav 24.000 34.000 Air horn, truck horn
|
48 |
+
2Vy5NCEkg2I_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
|
49 |
+
2ZciT0XrifM_0.000_8.000.wav 0.000 8.000 Air horn, truck horn
|
50 |
+
2jOzX06bzuA_16.000_26.000.wav 16.000 26.000 Air horn, truck horn
|
51 |
+
35EOmSMTQ6I_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
|
52 |
+
3YaLkgUMhAA_110.000_120.000.wav 110.000 120.000 Air horn, truck horn
|
53 |
+
3ntFslTK6hM_90.000_100.000.wav 90.000 100.000 Air horn, truck horn
|
54 |
+
3rGOv4evODE_20.000_30.000.wav 20.000 30.000 Air horn, truck horn
|
55 |
+
42U7xIucU68_20.000_30.000.wav 20.000 30.000 Air horn, truck horn
|
56 |
+
46r7mO2k6zY_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
|
57 |
+
4EBnb2DN3Yg_13.000_23.000.wav 13.000 23.000 Air horn, truck horn
|
58 |
+
4NTjS5pFfSc_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
|
59 |
+
4bvfOnX7BIE_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
|
60 |
+
4l78f9VZ9uE_30.000_40.000.wav 30.000 40.000 Air horn, truck horn
|
61 |
+
-ajCLjpfGKI_83.000_93.000.wav 83.000 93.000 Car alarm
|
62 |
+
-hLSc9aPOms_13.000_23.000.wav 13.000 23.000 Car alarm
|
63 |
+
-rgDWfvxxqw_30.000_40.000.wav 30.000 40.000 Car alarm
|
64 |
+
0C3kqtF76t8_50.000_60.000.wav 50.000 60.000 Car alarm
|
65 |
+
0Hz4R_m0hmI_80.000_90.000.wav 80.000 90.000 Car alarm
|
66 |
+
0ZPafgZftWk_80.000_90.000.wav 80.000 90.000 Car alarm
|
67 |
+
0npLQ4LzD0c_40.000_50.000.wav 40.000 50.000 Car alarm
|
68 |
+
17VuPl9Wxvs_20.000_30.000.wav 20.000 30.000 Car alarm
|
69 |
+
3HxQ83IMyw4_70.000_80.000.wav 70.000 80.000 Car alarm
|
70 |
+
3z05luLEc_Q_0.000_10.000.wav 0.000 10.000 Car alarm
|
71 |
+
4A1Ar1TIXIY_30.000_40.000.wav 30.000 40.000 Car alarm
|
72 |
+
4Kpklmj-ze0_53.000_63.000.wav 53.000 63.000 Car alarm
|
73 |
+
4h01lBkTVQY_18.000_28.000.wav 18.000 28.000 Car alarm
|
74 |
+
5-SzZotiaBU_30.000_40.000.wav 30.000 40.000 Car alarm
|
75 |
+
54PbkldEp9M_30.000_40.000.wav 30.000 40.000 Car alarm
|
76 |
+
5P6YYsMaIH4_30.000_40.000.wav 30.000 40.000 Car alarm
|
77 |
+
5tzTahLHylw_70.000_80.000.wav 70.000 80.000 Car alarm
|
78 |
+
7DC3HtNi4fU_160.000_170.000.wav 160.000 170.000 Car alarm
|
79 |
+
7NJ5TbNEIvA_250.000_260.000.wav 250.000 260.000 Car alarm
|
80 |
+
7NZ0kMj2HSI_54.000_64.000.wav 54.000 64.000 Car alarm
|
81 |
+
7RQpt1_1ZzU_30.000_40.000.wav 30.000 40.000 Car alarm
|
82 |
+
7ee54nr6jG8_30.000_40.000.wav 30.000 40.000 Car alarm
|
83 |
+
8OajsyPSNt8_40.000_50.000.wav 40.000 50.000 Car alarm
|
84 |
+
9fCibkUT_gQ_30.000_40.000.wav 30.000 40.000 Car alarm
|
85 |
+
9fzeD7CeI7Y_110.000_120.000.wav 110.000 120.000 Car alarm
|
86 |
+
9jYv9WuyknA_130.000_140.000.wav 130.000 140.000 Car alarm
|
87 |
+
A-GNszKtjJc_93.000_103.000.wav 93.000 103.000 Car alarm
|
88 |
+
A437a4Y_xag_230.000_240.000.wav 230.000 240.000 Car alarm
|
89 |
+
APMPW2YI-Zk_20.000_30.000.wav 20.000 30.000 Car alarm
|
90 |
+
AR-KmtlXg4Y_70.000_80.000.wav 70.000 80.000 Car alarm
|
91 |
+
-60XojQWWoc_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
92 |
+
-6d-zxMvC5E_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
93 |
+
-6qSMlbJJ58_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
94 |
+
-8OITuFZha8_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
95 |
+
-8n2NqDFRko_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
96 |
+
-AIrHVeCgtM_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
97 |
+
-AVzYvKHwPg_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
98 |
+
-AXDeY-N2_M_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
99 |
+
-B1uzsLG0Dk_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
100 |
+
-BM_EAszxBg_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
101 |
+
-Em3OpyaefM_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
102 |
+
-FWkB2IDMhc_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
103 |
+
-SP7KWmTRUU_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
104 |
+
-h4or05bj_I_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
105 |
+
-oV6dQu5tZo_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
106 |
+
-r8mfjRiHrU_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
107 |
+
-s9kwrRilOY_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
108 |
+
-uMiGr6xvRA_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
109 |
+
-x70B12Mb-8_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
110 |
+
-xYsfYZOI-Y_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
111 |
+
-zxrdL6MlKI_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
112 |
+
03xMfqt4fZI_24.000_34.000.wav 24.000 34.000 Reversing beeps
|
113 |
+
0E4AqW9dmdk_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
114 |
+
0FQo-2xRJ0E_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
115 |
+
0HmiH-wKLB4_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
116 |
+
0KskqFt3DoY_15.000_25.000.wav 15.000 25.000 Reversing beeps
|
117 |
+
0OiPtV9sd_w_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
118 |
+
0P-YGHC5cBU_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
119 |
+
0QKet-tdquc_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
120 |
+
0VnoYVqd-yo_30.000_40.000.wav 30.000 40.000 Reversing beeps
|
121 |
+
-5px8DVPl8A_28.000_38.000.wav 28.000 38.000 Bicycle
|
122 |
+
-D08wyQwDPQ_10.000_20.000.wav 10.000 20.000 Bicycle
|
123 |
+
-F1_Gh78vJ0_30.000_40.000.wav 30.000 40.000 Bicycle
|
124 |
+
-FZQIkX44Pk_10.000_20.000.wav 10.000 20.000 Bicycle
|
125 |
+
-FsvS99nWTc_30.000_40.000.wav 30.000 40.000 Bicycle
|
126 |
+
-Holdef_BZ0_30.000_40.000.wav 30.000 40.000 Bicycle
|
127 |
+
-Inn26beF70_30.000_40.000.wav 30.000 40.000 Bicycle
|
128 |
+
-Jq9HNSs_ns_14.000_24.000.wav 14.000 24.000 Bicycle
|
129 |
+
-KlN_AXMM0Q_30.000_40.000.wav 30.000 40.000 Bicycle
|
130 |
+
-NCcqKWiGus_30.000_40.000.wav 30.000 40.000 Bicycle
|
131 |
+
-NNC_TqWfGw_30.000_40.000.wav 30.000 40.000 Bicycle
|
132 |
+
-OGFiXvmldM_30.000_40.000.wav 30.000 40.000 Bicycle
|
133 |
+
-RFpDUZhN-g_13.000_23.000.wav 13.000 23.000 Bicycle
|
134 |
+
-XUfeRTw3b4_0.000_6.000.wav 0.000 6.000 Bicycle
|
135 |
+
-XoATxJ-Qcg_30.000_40.000.wav 30.000 40.000 Bicycle
|
136 |
+
-bFNxvFwDts_470.000_480.000.wav 470.000 480.000 Bicycle
|
137 |
+
-e5PokL6Cyo_30.000_40.000.wav 30.000 40.000 Bicycle
|
138 |
+
-fNyOf9zIU0_30.000_40.000.wav 30.000 40.000 Bicycle
|
139 |
+
-fhpkRyZL90_30.000_40.000.wav 30.000 40.000 Bicycle
|
140 |
+
-fo3m0hiZbg_30.000_40.000.wav 30.000 40.000 Bicycle
|
141 |
+
-ikJkNwcmkA_27.000_37.000.wav 27.000 37.000 Bicycle
|
142 |
+
-k2nMcxAjWE_30.000_40.000.wav 30.000 40.000 Bicycle
|
143 |
+
-k80ibA-fyw_30.000_40.000.wav 30.000 40.000 Bicycle
|
144 |
+
-lBcEVa_NKw_30.000_40.000.wav 30.000 40.000 Bicycle
|
145 |
+
-mQyAYU_Bd4_50.000_60.000.wav 50.000 60.000 Bicycle
|
146 |
+
-ngrinYHF4c_30.000_40.000.wav 30.000 40.000 Bicycle
|
147 |
+
-nqm_RJ2xj8_40.000_50.000.wav 40.000 50.000 Bicycle
|
148 |
+
-oAw5iTeT1g_40.000_50.000.wav 40.000 50.000 Bicycle
|
149 |
+
-p2EMzpTE38_4.000_14.000.wav 4.000 14.000 Bicycle
|
150 |
+
-qmfWP_yzn4_30.000_40.000.wav 30.000 40.000 Bicycle
|
151 |
+
-0DIFwkUpjQ_50.000_60.000.wav 50.000 60.000 Skateboard
|
152 |
+
-53qltVyjpc_180.000_190.000.wav 180.000 190.000 Skateboard
|
153 |
+
-5y4jb9eUWs_110.000_120.000.wav 110.000 120.000 Skateboard
|
154 |
+
-81kolkG8M0_0.000_8.000.wav 0.000 8.000 Skateboard
|
155 |
+
-9dwTSq6JZg_70.000_80.000.wav 70.000 80.000 Skateboard
|
156 |
+
-9oKZsjjf_0_20.000_30.000.wav 20.000 30.000 Skateboard
|
157 |
+
-AFGfu5zOzQ_30.000_40.000.wav 30.000 40.000 Skateboard
|
158 |
+
-DHGwygUsQc_30.000_40.000.wav 30.000 40.000 Skateboard
|
159 |
+
-DkuTmIs7_Q_30.000_40.000.wav 30.000 40.000 Skateboard
|
160 |
+
-E1E17R7UBA_260.000_270.000.wav 260.000 270.000 Skateboard
|
161 |
+
-E1aIXhB4YU_30.000_40.000.wav 30.000 40.000 Skateboard
|
162 |
+
-McJLXNN3-o_50.000_60.000.wav 50.000 60.000 Skateboard
|
163 |
+
-N7nQ4CXGsY_170.000_180.000.wav 170.000 180.000 Skateboard
|
164 |
+
-O5vrHFRzcY_30.000_40.000.wav 30.000 40.000 Skateboard
|
165 |
+
-Plh9jAN_Eo_0.000_2.000.wav 0.000 2.000 Skateboard
|
166 |
+
-Qd_dXTbgK0_30.000_40.000.wav 30.000 40.000 Skateboard
|
167 |
+
-aVZ-H92M_s_0.000_4.000.wav 0.000 4.000 Skateboard
|
168 |
+
-cd-Zn8qFxU_90.000_100.000.wav 90.000 100.000 Skateboard
|
169 |
+
-esP4loyvjM_60.000_70.000.wav 60.000 70.000 Skateboard
|
170 |
+
-iB3a71aPew_30.000_40.000.wav 30.000 40.000 Skateboard
|
171 |
+
-lZapwtvwlg_0.000_10.000.wav 0.000 10.000 Skateboard
|
172 |
+
-mxMaMJCXL8_180.000_190.000.wav 180.000 190.000 Skateboard
|
173 |
+
-nYGTw9Sypg_20.000_30.000.wav 20.000 30.000 Skateboard
|
174 |
+
-oS19KshdlM_30.000_40.000.wav 30.000 40.000 Skateboard
|
175 |
+
-s6uxc77NWo_40.000_50.000.wav 40.000 50.000 Skateboard
|
176 |
+
-sCrXS2kJlA_30.000_40.000.wav 30.000 40.000 Skateboard
|
177 |
+
-saCvPTdQ7s_30.000_40.000.wav 30.000 40.000 Skateboard
|
178 |
+
-sb-knLiDic_20.000_30.000.wav 20.000 30.000 Skateboard
|
179 |
+
-tSwRvqaKWg_90.000_100.000.wav 90.000 100.000 Skateboard
|
180 |
+
-x_jV34hVq4_30.000_40.000.wav 30.000 40.000 Skateboard
|
181 |
+
--ljM2Kojag_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
|
182 |
+
-4F1TX-T6T4_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
|
183 |
+
-7HVWUwyMig_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
|
184 |
+
-9pUUT-6o8U_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
|
185 |
+
-Ei2LE71Dfg_20.000_30.000.wav 20.000 30.000 Ambulance (siren)
|
186 |
+
-LGTb-xyjzA_11.000_21.000.wav 11.000 21.000 Ambulance (siren)
|
187 |
+
-Y1qiiugnk8_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
|
188 |
+
-YsrLG2K1TE_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
|
189 |
+
-ZeMV790MXE_10.000_20.000.wav 10.000 20.000 Ambulance (siren)
|
190 |
+
-d-T8Y9-TOg_17.000_27.000.wav 17.000 27.000 Ambulance (siren)
|
191 |
+
-dcrL5JLmvo_11.000_21.000.wav 11.000 21.000 Ambulance (siren)
|
192 |
+
-fCSO8SVWZU_6.000_16.000.wav 6.000 16.000 Ambulance (siren)
|
193 |
+
-fGFQTGd2nA_10.000_20.000.wav 10.000 20.000 Ambulance (siren)
|
194 |
+
-hA1yMrEXz0_10.000_20.000.wav 10.000 20.000 Ambulance (siren)
|
195 |
+
-jnQgpHubNI_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
|
196 |
+
-k6p9n9y22Q_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
|
197 |
+
-kr4SUjnm88_29.000_39.000.wav 29.000 39.000 Ambulance (siren)
|
198 |
+
-lyPnABQhCI_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
|
199 |
+
-od8LQAVgno_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
|
200 |
+
-pVEgzu95Nc_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
|
201 |
+
-w-9yF465IY_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
|
202 |
+
-woquFRnQk8_16.000_26.000.wav 16.000 26.000 Ambulance (siren)
|
203 |
+
-xz75wUCln8_50.000_60.000.wav 50.000 60.000 Ambulance (siren)
|
204 |
+
-yGElLHdkEI_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
|
205 |
+
-yPSgCn9AWo_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
|
206 |
+
-z8jsgl3iHE_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
|
207 |
+
00H_s-krtg8_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
|
208 |
+
02u3P99INjs_8.000_18.000.wav 8.000 18.000 Ambulance (siren)
|
209 |
+
06RreMb5qbE_0.000_10.000.wav 0.000 10.000 Ambulance (siren)
|
210 |
+
0EPK7Pv_lbE_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
|
211 |
+
-0Eem_FuIto_15.000_25.000.wav 15.000 25.000 Fire engine, fire truck (siren)
|
212 |
+
-2sT5oBBWWY_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
213 |
+
-45cKZA7Jww_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
214 |
+
-4B435WQvag_20.000_30.000.wav 20.000 30.000 Fire engine, fire truck (siren)
|
215 |
+
-6qhtwdfGOA_23.000_33.000.wav 23.000 33.000 Fire engine, fire truck (siren)
|
216 |
+
-8uyNBFbdFc_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
217 |
+
-Jsu4dbuO4A_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
218 |
+
-KsPTvgJJVE_350.000_360.000.wav 350.000 360.000 Fire engine, fire truck (siren)
|
219 |
+
-PRrNx6_MD0_16.000_26.000.wav 16.000 26.000 Fire engine, fire truck (siren)
|
220 |
+
-QBo1W2w8II_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
221 |
+
-QX-ddNtUvE_24.000_34.000.wav 24.000 34.000 Fire engine, fire truck (siren)
|
222 |
+
-RlUu1el2G4_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
223 |
+
-SkO97C81Ms_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
224 |
+
-T8QHPXfIC4_13.000_23.000.wav 13.000 23.000 Fire engine, fire truck (siren)
|
225 |
+
-USiTjZoh88_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
226 |
+
-X0vNLwH1C0_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
227 |
+
-Z3ByS_RCwI_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
228 |
+
-ZtZOcg3s7M_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
229 |
+
-cOjJ0Nvtlw_23.000_33.000.wav 23.000 33.000 Fire engine, fire truck (siren)
|
230 |
+
-cbYvBBXE6A_12.000_22.000.wav 12.000 22.000 Fire engine, fire truck (siren)
|
231 |
+
-eYUCWGQ_wU_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
232 |
+
-hA1yMrEXz0_10.000_20.000.wav 10.000 20.000 Fire engine, fire truck (siren)
|
233 |
+
-hplTh4SGvs_90.000_100.000.wav 90.000 100.000 Fire engine, fire truck (siren)
|
234 |
+
-nPhg6Eu4b4_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
235 |
+
-oCvKmNbhl0_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
236 |
+
-oEGuMg8hT4_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
237 |
+
-pvaJ4DwtRg_3.000_13.000.wav 3.000 13.000 Fire engine, fire truck (siren)
|
238 |
+
-qKRKDTbt4c_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
239 |
+
-sJn3uUxpH8_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
240 |
+
-sfn1NDHWJI_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
241 |
+
-09rxiqNNEs_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
242 |
+
-3qh-WFUV2U_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
243 |
+
-4JG_Ag99hY_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
244 |
+
-60NmEaP0is_0.000_10.000.wav 0.000 10.000 Civil defense siren
|
245 |
+
-6cTEqIcics_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
246 |
+
-6iVBmb5PZU_40.000_50.000.wav 40.000 50.000 Civil defense siren
|
247 |
+
-6qp8NjWffE_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
248 |
+
-75iY1j3MeY_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
249 |
+
-E3Yju3lrRo_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
250 |
+
-FHSBdx5A3g_40.000_50.000.wav 40.000 50.000 Civil defense siren
|
251 |
+
-JhSzxTdcwY_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
252 |
+
-OtNDK_Hxp8_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
253 |
+
-S3_I0RiG3g_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
254 |
+
-YMXgDKKAwU_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
255 |
+
-c7XoYM-SSY_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
256 |
+
-j8EeIX9ynk_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
257 |
+
-t478yabOQw_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
258 |
+
-uIyMR9luvg_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
259 |
+
-wgP6ua-t4k_40.000_50.000.wav 40.000 50.000 Civil defense siren
|
260 |
+
-zGAb18JxmI_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
261 |
+
03NLMEMi8-I_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
262 |
+
0552YhBdeXo_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
263 |
+
06TM6z3NvuY_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
264 |
+
0CUi0oGUzjU_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
265 |
+
0GpUFFJNFH8_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
266 |
+
0H_WUo2srs0_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
267 |
+
0HvYkBXQ44A_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
268 |
+
0I6Mlp27_gM_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
269 |
+
0JKcTVpby0I_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
270 |
+
0PhU-PIsUMw_40.000_50.000.wav 40.000 50.000 Civil defense siren
|
271 |
+
-122tCXtFhU_30.000_40.000.wav 30.000 40.000 Police car (siren)
|
272 |
+
-1U98XBTyB4_30.000_40.000.wav 30.000 40.000 Police car (siren)
|
273 |
+
-2GlU3e0nTU_170.000_180.000.wav 170.000 180.000 Police car (siren)
|
274 |
+
-6WqJCSmkCw_70.000_80.000.wav 70.000 80.000 Police car (siren)
|
275 |
+
-AF7wp3ezww_140.000_150.000.wav 140.000 150.000 Police car (siren)
|
276 |
+
-AFASmp1fpk_6.000_16.000.wav 6.000 16.000 Police car (siren)
|
277 |
+
-F2lk9A8B8M_30.000_40.000.wav 30.000 40.000 Police car (siren)
|
278 |
+
-GPv09qi9A8_120.000_130.000.wav 120.000 130.000 Police car (siren)
|
279 |
+
-Hi-WpRGUpc_9.000_19.000.wav 9.000 19.000 Police car (siren)
|
280 |
+
-KsPTvgJJVE_350.000_360.000.wav 350.000 360.000 Police car (siren)
|
281 |
+
-MfBpxtGQmE_20.000_30.000.wav 20.000 30.000 Police car (siren)
|
282 |
+
-Pg4vVPs4bE_30.000_40.000.wav 30.000 40.000 Police car (siren)
|
283 |
+
-UCf_-3yzWU_290.000_300.000.wav 290.000 300.000 Police car (siren)
|
284 |
+
-VULyMtKazE_0.000_7.000.wav 0.000 7.000 Police car (siren)
|
285 |
+
-XRiLbb3Syo_2.000_12.000.wav 2.000 12.000 Police car (siren)
|
286 |
+
-XrpzGb6xCU_190.000_200.000.wav 190.000 200.000 Police car (siren)
|
287 |
+
-YsrLG2K1TE_30.000_40.000.wav 30.000 40.000 Police car (siren)
|
288 |
+
-ZtZOcg3s7M_30.000_40.000.wav 30.000 40.000 Police car (siren)
|
289 |
+
-_8fdnv6Crg_30.000_40.000.wav 30.000 40.000 Police car (siren)
|
290 |
+
-az6BooRLxw_40.000_50.000.wav 40.000 50.000 Police car (siren)
|
291 |
+
-bs3c27rEtc_30.000_40.000.wav 30.000 40.000 Police car (siren)
|
292 |
+
-dBTGdL4RFs_30.000_40.000.wav 30.000 40.000 Police car (siren)
|
293 |
+
-gKNRXbpAKs_30.000_40.000.wav 30.000 40.000 Police car (siren)
|
294 |
+
-hA1yMrEXz0_10.000_20.000.wav 10.000 20.000 Police car (siren)
|
295 |
+
-haSUR_IUto_30.000_40.000.wav 30.000 40.000 Police car (siren)
|
296 |
+
-l-DEfDAvNA_30.000_40.000.wav 30.000 40.000 Police car (siren)
|
297 |
+
-lWs7_49gss_30.000_40.000.wav 30.000 40.000 Police car (siren)
|
298 |
+
-lhnhB4rbGw_3.000_13.000.wav 3.000 13.000 Police car (siren)
|
299 |
+
-rkJeBBmiTQ_60.000_70.000.wav 60.000 70.000 Police car (siren)
|
300 |
+
-rs7FPxzc6w_8.000_18.000.wav 8.000 18.000 Police car (siren)
|
301 |
+
-20uudT97E0_30.000_40.000.wav 30.000 40.000 Screaming
|
302 |
+
-3bGlOhRkAo_140.000_150.000.wav 140.000 150.000 Screaming
|
303 |
+
-4pUrlMafww_1.000_11.000.wav 1.000 11.000 Screaming
|
304 |
+
-7R0ybQQAHg_60.000_70.000.wav 60.000 70.000 Screaming
|
305 |
+
-7gojlG6bE4_30.000_40.000.wav 30.000 40.000 Screaming
|
306 |
+
-GI5PbO6j50_30.000_40.000.wav 30.000 40.000 Screaming
|
307 |
+
-MuIRudOtxw_30.000_40.000.wav 30.000 40.000 Screaming
|
308 |
+
-WfQBr42ymw_30.000_40.000.wav 30.000 40.000 Screaming
|
309 |
+
-YOjIgYspsY_30.000_40.000.wav 30.000 40.000 Screaming
|
310 |
+
-g_AcRVFfXU_30.000_40.000.wav 30.000 40.000 Screaming
|
311 |
+
-gb5uvwsRpI_30.000_40.000.wav 30.000 40.000 Screaming
|
312 |
+
-iAwqlQ3TEk_0.000_3.000.wav 0.000 3.000 Screaming
|
313 |
+
-nJoxcmxz5g_30.000_40.000.wav 30.000 40.000 Screaming
|
314 |
+
-pwgypWE-J8_30.000_40.000.wav 30.000 40.000 Screaming
|
315 |
+
-pzasCR0kpc_30.000_40.000.wav 30.000 40.000 Screaming
|
316 |
+
-sUgHKZQKYc_30.000_40.000.wav 30.000 40.000 Screaming
|
317 |
+
-uazzQEmQ7c_0.000_10.000.wav 0.000 10.000 Screaming
|
318 |
+
-vHJU1wDRsY_30.000_40.000.wav 30.000 40.000 Screaming
|
319 |
+
0-RnTXpp8Q0_30.000_40.000.wav 30.000 40.000 Screaming
|
320 |
+
09YQukdYVI4_30.000_40.000.wav 30.000 40.000 Screaming
|
321 |
+
0Ees8KFCUXM_30.000_40.000.wav 30.000 40.000 Screaming
|
322 |
+
0EymGuYWkFk_30.000_40.000.wav 30.000 40.000 Screaming
|
323 |
+
0Nw1OyTsaAo_30.000_40.000.wav 30.000 40.000 Screaming
|
324 |
+
0YnOMAls83g_30.000_40.000.wav 30.000 40.000 Screaming
|
325 |
+
0_gyUQkLCY8_30.000_40.000.wav 30.000 40.000 Screaming
|
326 |
+
0_hnDV2SHBI_7.000_17.000.wav 7.000 17.000 Screaming
|
327 |
+
0cqEaAkbrbI_80.000_90.000.wav 80.000 90.000 Screaming
|
328 |
+
0hC044mDsWA_30.000_40.000.wav 30.000 40.000 Screaming
|
329 |
+
0kQANiakiH0_30.000_40.000.wav 30.000 40.000 Screaming
|
330 |
+
0rVBXpbgO8s_30.000_40.000.wav 30.000 40.000 Screaming
|
331 |
+
---lTs1dxhU_30.000_40.000.wav 30.000 40.000 Car
|
332 |
+
--330hg-Ocw_30.000_40.000.wav 30.000 40.000 Car
|
333 |
+
--8puiAGLhs_30.000_40.000.wav 30.000 40.000 Car
|
334 |
+
--9VR_F7CtY_30.000_40.000.wav 30.000 40.000 Car
|
335 |
+
--F70LWypIg_30.000_40.000.wav 30.000 40.000 Car
|
336 |
+
--P4wuph3Mc_0.000_8.000.wav 0.000 8.000 Car
|
337 |
+
--QvRbvnbUE_30.000_40.000.wav 30.000 40.000 Car
|
338 |
+
--SeOZy3Yik_30.000_40.000.wav 30.000 40.000 Car
|
339 |
+
--Zz7BgxSUg_30.000_40.000.wav 30.000 40.000 Car
|
340 |
+
--e0Vu_ruTc_30.000_40.000.wav 30.000 40.000 Car
|
341 |
+
--iFD6IyQW8_30.000_40.000.wav 30.000 40.000 Car
|
342 |
+
--jGnLqFsQ4_24.000_34.000.wav 24.000 34.000 Car
|
343 |
+
--jc0NAxK8M_30.000_40.000.wav 30.000 40.000 Car
|
344 |
+
--v1WjOJv-w_150.000_160.000.wav 150.000 160.000 Car
|
345 |
+
--xDffQ9Mwo_30.000_40.000.wav 30.000 40.000 Car
|
346 |
+
--yaQA8d1dI_6.000_16.000.wav 6.000 16.000 Car
|
347 |
+
--zLzL0sq3M_30.000_40.000.wav 30.000 40.000 Car
|
348 |
+
-0-jXXldDOU_10.000_20.000.wav 10.000 20.000 Car
|
349 |
+
-03ld83JliM_29.000_39.000.wav 29.000 39.000 Car
|
350 |
+
-0B-egfXU7E_30.000_40.000.wav 30.000 40.000 Car
|
351 |
+
-0Bkyt8iZ1I_8.000_18.000.wav 8.000 18.000 Car
|
352 |
+
-0CIk-OOp7Y_30.000_40.000.wav 30.000 40.000 Car
|
353 |
+
-0CRb8H4hzY_4.000_14.000.wav 4.000 14.000 Car
|
354 |
+
-0CY5NWBHyY_20.000_30.000.wav 20.000 30.000 Car
|
355 |
+
-0HsrVfb5vc_20.000_30.000.wav 20.000 30.000 Car
|
356 |
+
-0I89-H0AFo_26.000_36.000.wav 26.000 36.000 Car
|
357 |
+
-0P6VDQ1YDs_80.000_90.000.wav 80.000 90.000 Car
|
358 |
+
-0PrEsytvc0_30.000_40.000.wav 30.000 40.000 Car
|
359 |
+
-0RqnaXZu_E_30.000_40.000.wav 30.000 40.000 Car
|
360 |
+
-0Yynyhm1AY_14.000_24.000.wav 14.000 24.000 Car
|
361 |
+
---lTs1dxhU_30.000_40.000.wav 30.000 40.000 Car passing by
|
362 |
+
--P4wuph3Mc_0.000_8.000.wav 0.000 8.000 Car passing by
|
363 |
+
--xDffQ9Mwo_30.000_40.000.wav 30.000 40.000 Car passing by
|
364 |
+
--zLzL0sq3M_30.000_40.000.wav 30.000 40.000 Car passing by
|
365 |
+
--zbPxnl27o_20.000_30.000.wav 20.000 30.000 Car passing by
|
366 |
+
-0CRb8H4hzY_4.000_14.000.wav 4.000 14.000 Car passing by
|
367 |
+
-0MnD7jBvkE_0.000_4.000.wav 0.000 4.000 Car passing by
|
368 |
+
-0U3c4PN8sc_30.000_40.000.wav 30.000 40.000 Car passing by
|
369 |
+
-0Yynyhm1AY_14.000_24.000.wav 14.000 24.000 Car passing by
|
370 |
+
-10fWp7Pqs4_30.000_40.000.wav 30.000 40.000 Car passing by
|
371 |
+
-14BFlDzjS4_6.000_16.000.wav 6.000 16.000 Car passing by
|
372 |
+
-15nPYi2v1g_30.000_40.000.wav 30.000 40.000 Car passing by
|
373 |
+
-19pq3HJoBM_30.000_40.000.wav 30.000 40.000 Car passing by
|
374 |
+
-1BrkFLHD74_19.000_29.000.wav 19.000 29.000 Car passing by
|
375 |
+
-1HlfoHZCEE_6.000_16.000.wav 6.000 16.000 Car passing by
|
376 |
+
-1McjOPUzbo_30.000_40.000.wav 30.000 40.000 Car passing by
|
377 |
+
-1sGSNmgiPs_4.000_14.000.wav 4.000 14.000 Car passing by
|
378 |
+
-2-luek6dI8_30.000_40.000.wav 30.000 40.000 Car passing by
|
379 |
+
-21-RfxQscI_30.000_40.000.wav 30.000 40.000 Car passing by
|
380 |
+
-25LkbSjEos_30.000_40.000.wav 30.000 40.000 Car passing by
|
381 |
+
-2LJWaL2PuA_30.000_40.000.wav 30.000 40.000 Car passing by
|
382 |
+
-2ZbvsBSZmY_2.000_12.000.wav 2.000 12.000 Car passing by
|
383 |
+
-2cz2qQDmr4_30.000_40.000.wav 30.000 40.000 Car passing by
|
384 |
+
-31KUAOSg5U_5.000_15.000.wav 5.000 15.000 Car passing by
|
385 |
+
-35qBdzN9ck_30.000_40.000.wav 30.000 40.000 Car passing by
|
386 |
+
-3929cmVE20_30.000_40.000.wav 30.000 40.000 Car passing by
|
387 |
+
-3M-k4nIYIM_30.000_40.000.wav 30.000 40.000 Car passing by
|
388 |
+
-3MNphBfq_0_30.000_40.000.wav 30.000 40.000 Car passing by
|
389 |
+
-3_RSVYKkkk_30.000_40.000.wav 30.000 40.000 Car passing by
|
390 |
+
-3exNVlj92w_30.000_40.000.wav 30.000 40.000 Car passing by
|
391 |
+
--0w1YA1Hm4_30.000_40.000.wav 30.000 40.000 Bus
|
392 |
+
-0_vEaaXndY_11.000_21.000.wav 11.000 21.000 Bus
|
393 |
+
-5GcZwBvBdI_30.000_40.000.wav 30.000 40.000 Bus
|
394 |
+
-5digoPWn6U_8.000_18.000.wav 8.000 18.000 Bus
|
395 |
+
-79l4w4DsYM_30.000_40.000.wav 30.000 40.000 Bus
|
396 |
+
-7B4pbkIEas_30.000_40.000.wav 30.000 40.000 Bus
|
397 |
+
-8YTu7ZGA2w_30.000_40.000.wav 30.000 40.000 Bus
|
398 |
+
-93IM29_8rs_14.000_24.000.wav 14.000 24.000 Bus
|
399 |
+
-9GhPxGkpio_26.000_36.000.wav 26.000 36.000 Bus
|
400 |
+
-9J9xs7LM9Y_25.000_35.000.wav 25.000 35.000 Bus
|
401 |
+
-AY_lZLYJR8_8.000_18.000.wav 8.000 18.000 Bus
|
402 |
+
-AdQBgtN_4E_30.000_40.000.wav 30.000 40.000 Bus
|
403 |
+
-BxfsWlPUPY_30.000_40.000.wav 30.000 40.000 Bus
|
404 |
+
-CgCr8Eknm0_14.000_24.000.wav 14.000 24.000 Bus
|
405 |
+
-CnsvTDIXdE_20.000_30.000.wav 20.000 30.000 Bus
|
406 |
+
-CpMlnGhxEU_0.000_9.000.wav 0.000 9.000 Bus
|
407 |
+
-DP_cv0x_Ng_30.000_40.000.wav 30.000 40.000 Bus
|
408 |
+
-FEXRjcryZE_30.000_40.000.wav 30.000 40.000 Bus
|
409 |
+
-Fp2-w-iLiE_20.000_30.000.wav 20.000 30.000 Bus
|
410 |
+
-GLk6G9U09A_30.000_40.000.wav 30.000 40.000 Bus
|
411 |
+
-Ga9sSkpngg_30.000_40.000.wav 30.000 40.000 Bus
|
412 |
+
-H8V23dZoLo_0.000_10.000.wav 0.000 10.000 Bus
|
413 |
+
-HeQfwKbFzg_30.000_40.000.wav 30.000 40.000 Bus
|
414 |
+
-HzzEuFBiDU_30.000_40.000.wav 30.000 40.000 Bus
|
415 |
+
-I4INTpMKT4_30.000_40.000.wav 30.000 40.000 Bus
|
416 |
+
-II-7qJxKPc_21.000_31.000.wav 21.000 31.000 Bus
|
417 |
+
-LnpzyfTkF8_30.000_40.000.wav 30.000 40.000 Bus
|
418 |
+
-OgRshQfsi8_30.000_40.000.wav 30.000 40.000 Bus
|
419 |
+
-P53lJ1ViWk_30.000_40.000.wav 30.000 40.000 Bus
|
420 |
+
-PvNUvEov4Q_30.000_40.000.wav 30.000 40.000 Bus
|
421 |
+
--12UOziMF0_30.000_40.000.wav 30.000 40.000 Truck
|
422 |
+
--73E04RpiQ_0.000_9.000.wav 0.000 9.000 Truck
|
423 |
+
--J947HxQVM_0.000_9.000.wav 0.000 9.000 Truck
|
424 |
+
--bD1DVKlzQ_30.000_40.000.wav 30.000 40.000 Truck
|
425 |
+
--ivFZu-hlc_30.000_40.000.wav 30.000 40.000 Truck
|
426 |
+
--wuU7kzB5o_30.000_40.000.wav 30.000 40.000 Truck
|
427 |
+
-0B_CYyG5Dg_30.000_40.000.wav 30.000 40.000 Truck
|
428 |
+
-0JqTq_4jaE_40.000_50.000.wav 40.000 50.000 Truck
|
429 |
+
-0MrEZKJ5MQ_30.000_40.000.wav 30.000 40.000 Truck
|
430 |
+
-0awng26xQ8_30.000_40.000.wav 30.000 40.000 Truck
|
431 |
+
-0dq1Vg9rd8_30.000_40.000.wav 30.000 40.000 Truck
|
432 |
+
-0wkq7CUYME_310.000_320.000.wav 310.000 320.000 Truck
|
433 |
+
-14RXdkqYuI_30.000_40.000.wav 30.000 40.000 Truck
|
434 |
+
-1B3CzpiW1M_30.000_40.000.wav 30.000 40.000 Truck
|
435 |
+
-1Q21cZhHDE_30.000_40.000.wav 30.000 40.000 Truck
|
436 |
+
-1ZXXnBXJ6c_8.000_18.000.wav 8.000 18.000 Truck
|
437 |
+
-1s0DWApvT8_30.000_40.000.wav 30.000 40.000 Truck
|
438 |
+
-1s84_2Vn4g_30.000_40.000.wav 30.000 40.000 Truck
|
439 |
+
-26ansJluVo_30.000_40.000.wav 30.000 40.000 Truck
|
440 |
+
-2EscdO0l-A_30.000_40.000.wav 30.000 40.000 Truck
|
441 |
+
-2GlU3e0nTU_170.000_180.000.wav 170.000 180.000 Truck
|
442 |
+
-2NBZUCcvm0_30.000_40.000.wav 30.000 40.000 Truck
|
443 |
+
-2sT5oBBWWY_30.000_40.000.wav 30.000 40.000 Truck
|
444 |
+
-2vmprMUw10_30.000_40.000.wav 30.000 40.000 Truck
|
445 |
+
-2x4TB8VWvE_18.000_28.000.wav 18.000 28.000 Truck
|
446 |
+
-39q4y0tt-g_30.000_40.000.wav 30.000 40.000 Truck
|
447 |
+
-3N5rjPrNCc_190.000_200.000.wav 190.000 200.000 Truck
|
448 |
+
-3NcUIyJtFY_30.000_40.000.wav 30.000 40.000 Truck
|
449 |
+
-3PplV0ErOk_30.000_40.000.wav 30.000 40.000 Truck
|
450 |
+
-3gSkrDKNSA_27.000_37.000.wav 27.000 37.000 Truck
|
451 |
+
--p-rk_HBuU_30.000_40.000.wav 30.000 40.000 Motorcycle
|
452 |
+
-1WK72M4xeg_220.000_230.000.wav 220.000 230.000 Motorcycle
|
453 |
+
-1XfuJcdvfg_30.000_40.000.wav 30.000 40.000 Motorcycle
|
454 |
+
-3XWBAmjmaQ_11.000_21.000.wav 11.000 21.000 Motorcycle
|
455 |
+
-4-87UgJcUw_70.000_80.000.wav 70.000 80.000 Motorcycle
|
456 |
+
-4D3Gkyisyc_30.000_40.000.wav 30.000 40.000 Motorcycle
|
457 |
+
-5k5GyHd2So_4.000_14.000.wav 4.000 14.000 Motorcycle
|
458 |
+
-6A2L1U9b5Y_54.000_64.000.wav 54.000 64.000 Motorcycle
|
459 |
+
-6Yfati1N10_80.000_90.000.wav 80.000 90.000 Motorcycle
|
460 |
+
-7_o_GhpZpM_12.000_22.000.wav 12.000 22.000 Motorcycle
|
461 |
+
-7rZwMK6uSs_70.000_80.000.wav 70.000 80.000 Motorcycle
|
462 |
+
-85f5DKKfSo_30.000_40.000.wav 30.000 40.000 Motorcycle
|
463 |
+
-9Smdrt5zwk_40.000_50.000.wav 40.000 50.000 Motorcycle
|
464 |
+
-9gZLVDKpnE_30.000_40.000.wav 30.000 40.000 Motorcycle
|
465 |
+
-BGebo8V4XY_30.000_40.000.wav 30.000 40.000 Motorcycle
|
466 |
+
-DdiduB5B_w_190.000_200.000.wav 190.000 200.000 Motorcycle
|
467 |
+
-HIPq7T3eFI_11.000_21.000.wav 11.000 21.000 Motorcycle
|
468 |
+
-H_3oEkKe0M_50.000_60.000.wav 50.000 60.000 Motorcycle
|
469 |
+
-HmuMoykRqA_500.000_510.000.wav 500.000 510.000 Motorcycle
|
470 |
+
-IMRE_psvtI_30.000_40.000.wav 30.000 40.000 Motorcycle
|
471 |
+
-Ie4LSPDEF4_6.000_16.000.wav 6.000 16.000 Motorcycle
|
472 |
+
-J0F29UCZiA_70.000_80.000.wav 70.000 80.000 Motorcycle
|
473 |
+
-KFCJ7ydu2E_0.000_10.000.wav 0.000 10.000 Motorcycle
|
474 |
+
-KmDAgYb0Uo_100.000_110.000.wav 100.000 110.000 Motorcycle
|
475 |
+
-P7iW3WzNfc_400.000_410.000.wav 400.000 410.000 Motorcycle
|
476 |
+
-QMAKXzIGx4_10.000_20.000.wav 10.000 20.000 Motorcycle
|
477 |
+
-S-5z2vYtxw_10.000_20.000.wav 10.000 20.000 Motorcycle
|
478 |
+
-SlL0NZh51w_30.000_40.000.wav 30.000 40.000 Motorcycle
|
479 |
+
-US2mpJxbj4_30.000_40.000.wav 30.000 40.000 Motorcycle
|
480 |
+
-VO-C9C0uqY_1.000_11.000.wav 1.000 11.000 Motorcycle
|
481 |
+
--H_-CEB2wA_30.000_40.000.wav 30.000 40.000 Train
|
482 |
+
-1VsFy0eVJs_30.000_40.000.wav 30.000 40.000 Train
|
483 |
+
-1X7kpLnOpM_60.000_70.000.wav 60.000 70.000 Train
|
484 |
+
-3FIglJti0s_30.000_40.000.wav 30.000 40.000 Train
|
485 |
+
-5QrBL6MzLg_60.000_70.000.wav 60.000 70.000 Train
|
486 |
+
-6KOEEiAf9s_19.000_29.000.wav 19.000 29.000 Train
|
487 |
+
-97l_c6PToE_30.000_40.000.wav 30.000 40.000 Train
|
488 |
+
-9S5Z-uciLo_70.000_80.000.wav 70.000 80.000 Train
|
489 |
+
-CkgGfKepO4_140.000_150.000.wav 140.000 150.000 Train
|
490 |
+
-E0shPRxAbo_30.000_40.000.wav 30.000 40.000 Train
|
491 |
+
-Gbohom8C4Q_30.000_40.000.wav 30.000 40.000 Train
|
492 |
+
-JpQivta6MQ_20.000_30.000.wav 20.000 30.000 Train
|
493 |
+
-K9oTZj3mVQ_30.000_40.000.wav 30.000 40.000 Train
|
494 |
+
-KjE40DlSdU_0.000_10.000.wav 0.000 10.000 Train
|
495 |
+
-NrFtZ_xxFU_30.000_40.000.wav 30.000 40.000 Train
|
496 |
+
-PYRamK58Ss_0.000_10.000.wav 0.000 10.000 Train
|
497 |
+
-P_XDJt4p_s_30.000_40.000.wav 30.000 40.000 Train
|
498 |
+
-Pjylzex7oc_350.000_360.000.wav 350.000 360.000 Train
|
499 |
+
-QHuZGmIy_I_30.000_40.000.wav 30.000 40.000 Train
|
500 |
+
-Qfk_Q2ctBs_30.000_40.000.wav 30.000 40.000 Train
|
501 |
+
-RXKRoRPWXg_30.000_40.000.wav 30.000 40.000 Train
|
502 |
+
-VH414svzI0_30.000_40.000.wav 30.000 40.000 Train
|
503 |
+
-WFdYxE-PYI_30.000_40.000.wav 30.000 40.000 Train
|
504 |
+
-Wd1pV7UjWg_60.000_70.000.wav 60.000 70.000 Train
|
505 |
+
-XcC-UlbcRA_30.000_40.000.wav 30.000 40.000 Train
|
506 |
+
-Y2cD8xvCHI_30.000_40.000.wav 30.000 40.000 Train
|
507 |
+
-ZKZkMHe3cY_70.000_80.000.wav 70.000 80.000 Train
|
508 |
+
-Zq22n4OewA_30.000_40.000.wav 30.000 40.000 Train
|
509 |
+
-aZ7XC4LG2A_30.000_40.000.wav 30.000 40.000 Train
|
510 |
+
-abVemAm9HM_430.000_440.000.wav 430.000 440.000 Train
|
511 |
+
1T1i2rny8RU_30.000_40.000.wav 30.000 40.000 Ambulance (siren)
|
512 |
+
7DC3HtNi4fU_160.000_170.000.wav 160.000 170.000 Ambulance (siren)
|
513 |
+
-z8jsgl3iHE_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
514 |
+
00H_s-krtg8_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
515 |
+
0I6Mlp27_gM_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
516 |
+
3YaLkgUMhAA_110.000_120.000.wav 110.000 120.000 Fire engine, fire truck (siren)
|
517 |
+
4l78f9VZ9uE_30.000_40.000.wav 30.000 40.000 Fire engine, fire truck (siren)
|
518 |
+
35EOmSMTQ6I_30.000_40.000.wav 30.000 40.000 Civil defense siren
|
519 |
+
06RreMb5qbE_0.000_10.000.wav 0.000 10.000 Police car (siren)
|
520 |
+
0EPK7Pv_lbE_30.000_40.000.wav 30.000 40.000 Police car (siren)
|
521 |
+
0I6Mlp27_gM_30.000_40.000.wav 30.000 40.000 Police car (siren)
|
522 |
+
17VuPl9Wxvs_20.000_30.000.wav 20.000 30.000 Police car (siren)
|
523 |
+
4A1Ar1TIXIY_30.000_40.000.wav 30.000 40.000 Police car (siren)
|
524 |
+
-10fWp7Pqs4_30.000_40.000.wav 30.000 40.000 Car
|
525 |
+
-122tCXtFhU_30.000_40.000.wav 30.000 40.000 Car
|
526 |
+
-14BFlDzjS4_6.000_16.000.wav 6.000 16.000 Car
|
527 |
+
-1BrkFLHD74_19.000_29.000.wav 19.000 29.000 Car
|
528 |
+
-1HlfoHZCEE_6.000_16.000.wav 6.000 16.000 Car
|
529 |
+
-1McjOPUzbo_30.000_40.000.wav 30.000 40.000 Car
|
530 |
+
-1sGSNmgiPs_4.000_14.000.wav 4.000 14.000 Car
|
531 |
+
-25LkbSjEos_30.000_40.000.wav 30.000 40.000 Car
|
532 |
+
-2GlU3e0nTU_170.000_180.000.wav 170.000 180.000 Car
|
533 |
+
-2LJWaL2PuA_30.000_40.000.wav 30.000 40.000 Car
|
534 |
+
-2ZbvsBSZmY_2.000_12.000.wav 2.000 12.000 Car
|
535 |
+
-2cz2qQDmr4_30.000_40.000.wav 30.000 40.000 Car
|
536 |
+
-31KUAOSg5U_5.000_15.000.wav 5.000 15.000 Car
|
537 |
+
-35qBdzN9ck_30.000_40.000.wav 30.000 40.000 Car
|
538 |
+
-3929cmVE20_30.000_40.000.wav 30.000 40.000 Car
|
539 |
+
-3M-k4nIYIM_30.000_40.000.wav 30.000 40.000 Car
|
540 |
+
-3MNphBfq_0_30.000_40.000.wav 30.000 40.000 Car
|
541 |
+
-3_RSVYKkkk_30.000_40.000.wav 30.000 40.000 Car
|
542 |
+
-AF7wp3ezww_140.000_150.000.wav 140.000 150.000 Car
|
543 |
+
-Pg4vVPs4bE_30.000_40.000.wav 30.000 40.000 Car
|
544 |
+
-VULyMtKazE_0.000_7.000.wav 0.000 7.000 Car
|
545 |
+
-cbYvBBXE6A_12.000_22.000.wav 12.000 22.000 Car
|
546 |
+
06RreMb5qbE_0.000_10.000.wav 0.000 10.000 Car
|
547 |
+
0E4AqW9dmdk_30.000_40.000.wav 30.000 40.000 Car
|
548 |
+
0Hz4R_m0hmI_80.000_90.000.wav 80.000 90.000 Car
|
549 |
+
4Kpklmj-ze0_53.000_63.000.wav 53.000 63.000 Car
|
550 |
+
5tzTahLHylw_70.000_80.000.wav 70.000 80.000 Car
|
551 |
+
7NJ5TbNEIvA_250.000_260.000.wav 250.000 260.000 Car
|
552 |
+
9fCibkUT_gQ_30.000_40.000.wav 30.000 40.000 Car
|
553 |
+
9jYv9WuyknA_130.000_140.000.wav 130.000 140.000 Car
|
554 |
+
-l-DEfDAvNA_30.000_40.000.wav 30.000 40.000 Car passing by
|
555 |
+
9fCibkUT_gQ_30.000_40.000.wav 30.000 40.000 Car passing by
|
556 |
+
-jj2tyuf6-A_80.000_90.000.wav 80.000 90.000 Bus
|
557 |
+
-45cKZA7Jww_30.000_40.000.wav 30.000 40.000 Truck
|
558 |
+
-4B435WQvag_20.000_30.000.wav 20.000 30.000 Truck
|
559 |
+
-60XojQWWoc_30.000_40.000.wav 30.000 40.000 Truck
|
560 |
+
-6qhtwdfGOA_23.000_33.000.wav 23.000 33.000 Truck
|
561 |
+
-8OITuFZha8_30.000_40.000.wav 30.000 40.000 Truck
|
562 |
+
-8n2NqDFRko_30.000_40.000.wav 30.000 40.000 Truck
|
563 |
+
-AIrHVeCgtM_30.000_40.000.wav 30.000 40.000 Truck
|
564 |
+
-AVzYvKHwPg_30.000_40.000.wav 30.000 40.000 Truck
|
565 |
+
-BM_EAszxBg_30.000_40.000.wav 30.000 40.000 Truck
|
566 |
+
-Ei2LE71Dfg_20.000_30.000.wav 20.000 30.000 Truck
|
567 |
+
-FWkB2IDMhc_30.000_40.000.wav 30.000 40.000 Truck
|
568 |
+
-Jsu4dbuO4A_30.000_40.000.wav 30.000 40.000 Truck
|
569 |
+
-PRrNx6_MD0_16.000_26.000.wav 16.000 26.000 Truck
|
570 |
+
-X0vNLwH1C0_30.000_40.000.wav 30.000 40.000 Truck
|
571 |
+
-cbYvBBXE6A_12.000_22.000.wav 12.000 22.000 Truck
|
572 |
+
-oCvKmNbhl0_30.000_40.000.wav 30.000 40.000 Truck
|
573 |
+
-oV6dQu5tZo_30.000_40.000.wav 30.000 40.000 Truck
|
574 |
+
-qKRKDTbt4c_30.000_40.000.wav 30.000 40.000 Truck
|
575 |
+
-r8mfjRiHrU_30.000_40.000.wav 30.000 40.000 Truck
|
576 |
+
-s9kwrRilOY_30.000_40.000.wav 30.000 40.000 Truck
|
577 |
+
-uMiGr6xvRA_30.000_40.000.wav 30.000 40.000 Truck
|
578 |
+
-x70B12Mb-8_30.000_40.000.wav 30.000 40.000 Truck
|
579 |
+
-xYsfYZOI-Y_30.000_40.000.wav 30.000 40.000 Truck
|
580 |
+
-zxrdL6MlKI_30.000_40.000.wav 30.000 40.000 Truck
|
581 |
+
0C3kqtF76t8_50.000_60.000.wav 50.000 60.000 Truck
|
582 |
+
0HmiH-wKLB4_30.000_40.000.wav 30.000 40.000 Truck
|
583 |
+
0KskqFt3DoY_15.000_25.000.wav 15.000 25.000 Truck
|
584 |
+
0OiPtV9sd_w_30.000_40.000.wav 30.000 40.000 Truck
|
585 |
+
0VnoYVqd-yo_30.000_40.000.wav 30.000 40.000 Truck
|
586 |
+
3YaLkgUMhAA_110.000_120.000.wav 110.000 120.000 Truck
|
587 |
+
-nGBPqlRNg4_30.000_40.000.wav 30.000 40.000 Train
|
588 |
+
02w3vd_GgF0_390.000_400.000.wav 390.000 400.000 Train
|
589 |
+
0HqeYIREv8M_30.000_40.000.wav 30.000 40.000 Train
|
590 |
+
0IpYF91Fdt0_80.000_90.000.wav 80.000 90.000 Train
|
591 |
+
0NaZejdABG0_90.000_100.000.wav 90.000 100.000 Train
|
592 |
+
0RurXUfKyow_4.000_14.000.wav 4.000 14.000 Train
|
593 |
+
0_HnD-rW3lI_170.000_180.000.wav 170.000 180.000 Train
|
594 |
+
10i60V1RZkQ_210.000_220.000.wav 210.000 220.000 Train
|
595 |
+
1FJY5X1iY9I_170.000_180.000.wav 170.000 180.000 Train
|
596 |
+
1U0Ty6CW6AM_40.000_50.000.wav 40.000 50.000 Train
|
597 |
+
1hQLr88iCvg_30.000_40.000.wav 30.000 40.000 Train
|
598 |
+
1iUXERALOOs_190.000_200.000.wav 190.000 200.000 Train
|
599 |
+
1iWFlLpixKU_5.000_15.000.wav 5.000 15.000 Train
|
600 |
+
1oJAVJPX0YY_20.000_30.000.wav 20.000 30.000 Train
|
601 |
+
26dNsDuIt9Q_340.000_350.000.wav 340.000 350.000 Train
|
602 |
+
2BMHsKLcb7E_90.000_100.000.wav 90.000 100.000 Train
|
603 |
+
2RpOd9MJjyQ_10.000_20.000.wav 10.000 20.000 Train
|
604 |
+
2U4wSdl10to_200.000_210.000.wav 200.000 210.000 Train
|
605 |
+
2aBV6AZt5nk_570.000_580.000.wav 570.000 580.000 Train
|
606 |
+
3ntFslTK6hM_90.000_100.000.wav 90.000 100.000 Train
|
audio_detection/audio_infer/metadata/class_labels_indices.csv
ADDED
@@ -0,0 +1,528 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
index,mid,display_name
|
2 |
+
0,/m/09x0r,"Speech"
|
3 |
+
1,/m/05zppz,"Male speech, man speaking"
|
4 |
+
2,/m/02zsn,"Female speech, woman speaking"
|
5 |
+
3,/m/0ytgt,"Child speech, kid speaking"
|
6 |
+
4,/m/01h8n0,"Conversation"
|
7 |
+
5,/m/02qldy,"Narration, monologue"
|
8 |
+
6,/m/0261r1,"Babbling"
|
9 |
+
7,/m/0brhx,"Speech synthesizer"
|
10 |
+
8,/m/07p6fty,"Shout"
|
11 |
+
9,/m/07q4ntr,"Bellow"
|
12 |
+
10,/m/07rwj3x,"Whoop"
|
13 |
+
11,/m/07sr1lc,"Yell"
|
14 |
+
12,/m/04gy_2,"Battle cry"
|
15 |
+
13,/t/dd00135,"Children shouting"
|
16 |
+
14,/m/03qc9zr,"Screaming"
|
17 |
+
15,/m/02rtxlg,"Whispering"
|
18 |
+
16,/m/01j3sz,"Laughter"
|
19 |
+
17,/t/dd00001,"Baby laughter"
|
20 |
+
18,/m/07r660_,"Giggle"
|
21 |
+
19,/m/07s04w4,"Snicker"
|
22 |
+
20,/m/07sq110,"Belly laugh"
|
23 |
+
21,/m/07rgt08,"Chuckle, chortle"
|
24 |
+
22,/m/0463cq4,"Crying, sobbing"
|
25 |
+
23,/t/dd00002,"Baby cry, infant cry"
|
26 |
+
24,/m/07qz6j3,"Whimper"
|
27 |
+
25,/m/07qw_06,"Wail, moan"
|
28 |
+
26,/m/07plz5l,"Sigh"
|
29 |
+
27,/m/015lz1,"Singing"
|
30 |
+
28,/m/0l14jd,"Choir"
|
31 |
+
29,/m/01swy6,"Yodeling"
|
32 |
+
30,/m/02bk07,"Chant"
|
33 |
+
31,/m/01c194,"Mantra"
|
34 |
+
32,/t/dd00003,"Male singing"
|
35 |
+
33,/t/dd00004,"Female singing"
|
36 |
+
34,/t/dd00005,"Child singing"
|
37 |
+
35,/t/dd00006,"Synthetic singing"
|
38 |
+
36,/m/06bxc,"Rapping"
|
39 |
+
37,/m/02fxyj,"Humming"
|
40 |
+
38,/m/07s2xch,"Groan"
|
41 |
+
39,/m/07r4k75,"Grunt"
|
42 |
+
40,/m/01w250,"Whistling"
|
43 |
+
41,/m/0lyf6,"Breathing"
|
44 |
+
42,/m/07mzm6,"Wheeze"
|
45 |
+
43,/m/01d3sd,"Snoring"
|
46 |
+
44,/m/07s0dtb,"Gasp"
|
47 |
+
45,/m/07pyy8b,"Pant"
|
48 |
+
46,/m/07q0yl5,"Snort"
|
49 |
+
47,/m/01b_21,"Cough"
|
50 |
+
48,/m/0dl9sf8,"Throat clearing"
|
51 |
+
49,/m/01hsr_,"Sneeze"
|
52 |
+
50,/m/07ppn3j,"Sniff"
|
53 |
+
51,/m/06h7j,"Run"
|
54 |
+
52,/m/07qv_x_,"Shuffle"
|
55 |
+
53,/m/07pbtc8,"Walk, footsteps"
|
56 |
+
54,/m/03cczk,"Chewing, mastication"
|
57 |
+
55,/m/07pdhp0,"Biting"
|
58 |
+
56,/m/0939n_,"Gargling"
|
59 |
+
57,/m/01g90h,"Stomach rumble"
|
60 |
+
58,/m/03q5_w,"Burping, eructation"
|
61 |
+
59,/m/02p3nc,"Hiccup"
|
62 |
+
60,/m/02_nn,"Fart"
|
63 |
+
61,/m/0k65p,"Hands"
|
64 |
+
62,/m/025_jnm,"Finger snapping"
|
65 |
+
63,/m/0l15bq,"Clapping"
|
66 |
+
64,/m/01jg02,"Heart sounds, heartbeat"
|
67 |
+
65,/m/01jg1z,"Heart murmur"
|
68 |
+
66,/m/053hz1,"Cheering"
|
69 |
+
67,/m/028ght,"Applause"
|
70 |
+
68,/m/07rkbfh,"Chatter"
|
71 |
+
69,/m/03qtwd,"Crowd"
|
72 |
+
70,/m/07qfr4h,"Hubbub, speech noise, speech babble"
|
73 |
+
71,/t/dd00013,"Children playing"
|
74 |
+
72,/m/0jbk,"Animal"
|
75 |
+
73,/m/068hy,"Domestic animals, pets"
|
76 |
+
74,/m/0bt9lr,"Dog"
|
77 |
+
75,/m/05tny_,"Bark"
|
78 |
+
76,/m/07r_k2n,"Yip"
|
79 |
+
77,/m/07qf0zm,"Howl"
|
80 |
+
78,/m/07rc7d9,"Bow-wow"
|
81 |
+
79,/m/0ghcn6,"Growling"
|
82 |
+
80,/t/dd00136,"Whimper (dog)"
|
83 |
+
81,/m/01yrx,"Cat"
|
84 |
+
82,/m/02yds9,"Purr"
|
85 |
+
83,/m/07qrkrw,"Meow"
|
86 |
+
84,/m/07rjwbb,"Hiss"
|
87 |
+
85,/m/07r81j2,"Caterwaul"
|
88 |
+
86,/m/0ch8v,"Livestock, farm animals, working animals"
|
89 |
+
87,/m/03k3r,"Horse"
|
90 |
+
88,/m/07rv9rh,"Clip-clop"
|
91 |
+
89,/m/07q5rw0,"Neigh, whinny"
|
92 |
+
90,/m/01xq0k1,"Cattle, bovinae"
|
93 |
+
91,/m/07rpkh9,"Moo"
|
94 |
+
92,/m/0239kh,"Cowbell"
|
95 |
+
93,/m/068zj,"Pig"
|
96 |
+
94,/t/dd00018,"Oink"
|
97 |
+
95,/m/03fwl,"Goat"
|
98 |
+
96,/m/07q0h5t,"Bleat"
|
99 |
+
97,/m/07bgp,"Sheep"
|
100 |
+
98,/m/025rv6n,"Fowl"
|
101 |
+
99,/m/09b5t,"Chicken, rooster"
|
102 |
+
100,/m/07st89h,"Cluck"
|
103 |
+
101,/m/07qn5dc,"Crowing, cock-a-doodle-doo"
|
104 |
+
102,/m/01rd7k,"Turkey"
|
105 |
+
103,/m/07svc2k,"Gobble"
|
106 |
+
104,/m/09ddx,"Duck"
|
107 |
+
105,/m/07qdb04,"Quack"
|
108 |
+
106,/m/0dbvp,"Goose"
|
109 |
+
107,/m/07qwf61,"Honk"
|
110 |
+
108,/m/01280g,"Wild animals"
|
111 |
+
109,/m/0cdnk,"Roaring cats (lions, tigers)"
|
112 |
+
110,/m/04cvmfc,"Roar"
|
113 |
+
111,/m/015p6,"Bird"
|
114 |
+
112,/m/020bb7,"Bird vocalization, bird call, bird song"
|
115 |
+
113,/m/07pggtn,"Chirp, tweet"
|
116 |
+
114,/m/07sx8x_,"Squawk"
|
117 |
+
115,/m/0h0rv,"Pigeon, dove"
|
118 |
+
116,/m/07r_25d,"Coo"
|
119 |
+
117,/m/04s8yn,"Crow"
|
120 |
+
118,/m/07r5c2p,"Caw"
|
121 |
+
119,/m/09d5_,"Owl"
|
122 |
+
120,/m/07r_80w,"Hoot"
|
123 |
+
121,/m/05_wcq,"Bird flight, flapping wings"
|
124 |
+
122,/m/01z5f,"Canidae, dogs, wolves"
|
125 |
+
123,/m/06hps,"Rodents, rats, mice"
|
126 |
+
124,/m/04rmv,"Mouse"
|
127 |
+
125,/m/07r4gkf,"Patter"
|
128 |
+
126,/m/03vt0,"Insect"
|
129 |
+
127,/m/09xqv,"Cricket"
|
130 |
+
128,/m/09f96,"Mosquito"
|
131 |
+
129,/m/0h2mp,"Fly, housefly"
|
132 |
+
130,/m/07pjwq1,"Buzz"
|
133 |
+
131,/m/01h3n,"Bee, wasp, etc."
|
134 |
+
132,/m/09ld4,"Frog"
|
135 |
+
133,/m/07st88b,"Croak"
|
136 |
+
134,/m/078jl,"Snake"
|
137 |
+
135,/m/07qn4z3,"Rattle"
|
138 |
+
136,/m/032n05,"Whale vocalization"
|
139 |
+
137,/m/04rlf,"Music"
|
140 |
+
138,/m/04szw,"Musical instrument"
|
141 |
+
139,/m/0fx80y,"Plucked string instrument"
|
142 |
+
140,/m/0342h,"Guitar"
|
143 |
+
141,/m/02sgy,"Electric guitar"
|
144 |
+
142,/m/018vs,"Bass guitar"
|
145 |
+
143,/m/042v_gx,"Acoustic guitar"
|
146 |
+
144,/m/06w87,"Steel guitar, slide guitar"
|
147 |
+
145,/m/01glhc,"Tapping (guitar technique)"
|
148 |
+
146,/m/07s0s5r,"Strum"
|
149 |
+
147,/m/018j2,"Banjo"
|
150 |
+
148,/m/0jtg0,"Sitar"
|
151 |
+
149,/m/04rzd,"Mandolin"
|
152 |
+
150,/m/01bns_,"Zither"
|
153 |
+
151,/m/07xzm,"Ukulele"
|
154 |
+
152,/m/05148p4,"Keyboard (musical)"
|
155 |
+
153,/m/05r5c,"Piano"
|
156 |
+
154,/m/01s0ps,"Electric piano"
|
157 |
+
155,/m/013y1f,"Organ"
|
158 |
+
156,/m/03xq_f,"Electronic organ"
|
159 |
+
157,/m/03gvt,"Hammond organ"
|
160 |
+
158,/m/0l14qv,"Synthesizer"
|
161 |
+
159,/m/01v1d8,"Sampler"
|
162 |
+
160,/m/03q5t,"Harpsichord"
|
163 |
+
161,/m/0l14md,"Percussion"
|
164 |
+
162,/m/02hnl,"Drum kit"
|
165 |
+
163,/m/0cfdd,"Drum machine"
|
166 |
+
164,/m/026t6,"Drum"
|
167 |
+
165,/m/06rvn,"Snare drum"
|
168 |
+
166,/m/03t3fj,"Rimshot"
|
169 |
+
167,/m/02k_mr,"Drum roll"
|
170 |
+
168,/m/0bm02,"Bass drum"
|
171 |
+
169,/m/011k_j,"Timpani"
|
172 |
+
170,/m/01p970,"Tabla"
|
173 |
+
171,/m/01qbl,"Cymbal"
|
174 |
+
172,/m/03qtq,"Hi-hat"
|
175 |
+
173,/m/01sm1g,"Wood block"
|
176 |
+
174,/m/07brj,"Tambourine"
|
177 |
+
175,/m/05r5wn,"Rattle (instrument)"
|
178 |
+
176,/m/0xzly,"Maraca"
|
179 |
+
177,/m/0mbct,"Gong"
|
180 |
+
178,/m/016622,"Tubular bells"
|
181 |
+
179,/m/0j45pbj,"Mallet percussion"
|
182 |
+
180,/m/0dwsp,"Marimba, xylophone"
|
183 |
+
181,/m/0dwtp,"Glockenspiel"
|
184 |
+
182,/m/0dwt5,"Vibraphone"
|
185 |
+
183,/m/0l156b,"Steelpan"
|
186 |
+
184,/m/05pd6,"Orchestra"
|
187 |
+
185,/m/01kcd,"Brass instrument"
|
188 |
+
186,/m/0319l,"French horn"
|
189 |
+
187,/m/07gql,"Trumpet"
|
190 |
+
188,/m/07c6l,"Trombone"
|
191 |
+
189,/m/0l14_3,"Bowed string instrument"
|
192 |
+
190,/m/02qmj0d,"String section"
|
193 |
+
191,/m/07y_7,"Violin, fiddle"
|
194 |
+
192,/m/0d8_n,"Pizzicato"
|
195 |
+
193,/m/01xqw,"Cello"
|
196 |
+
194,/m/02fsn,"Double bass"
|
197 |
+
195,/m/085jw,"Wind instrument, woodwind instrument"
|
198 |
+
196,/m/0l14j_,"Flute"
|
199 |
+
197,/m/06ncr,"Saxophone"
|
200 |
+
198,/m/01wy6,"Clarinet"
|
201 |
+
199,/m/03m5k,"Harp"
|
202 |
+
200,/m/0395lw,"Bell"
|
203 |
+
201,/m/03w41f,"Church bell"
|
204 |
+
202,/m/027m70_,"Jingle bell"
|
205 |
+
203,/m/0gy1t2s,"Bicycle bell"
|
206 |
+
204,/m/07n_g,"Tuning fork"
|
207 |
+
205,/m/0f8s22,"Chime"
|
208 |
+
206,/m/026fgl,"Wind chime"
|
209 |
+
207,/m/0150b9,"Change ringing (campanology)"
|
210 |
+
208,/m/03qjg,"Harmonica"
|
211 |
+
209,/m/0mkg,"Accordion"
|
212 |
+
210,/m/0192l,"Bagpipes"
|
213 |
+
211,/m/02bxd,"Didgeridoo"
|
214 |
+
212,/m/0l14l2,"Shofar"
|
215 |
+
213,/m/07kc_,"Theremin"
|
216 |
+
214,/m/0l14t7,"Singing bowl"
|
217 |
+
215,/m/01hgjl,"Scratching (performance technique)"
|
218 |
+
216,/m/064t9,"Pop music"
|
219 |
+
217,/m/0glt670,"Hip hop music"
|
220 |
+
218,/m/02cz_7,"Beatboxing"
|
221 |
+
219,/m/06by7,"Rock music"
|
222 |
+
220,/m/03lty,"Heavy metal"
|
223 |
+
221,/m/05r6t,"Punk rock"
|
224 |
+
222,/m/0dls3,"Grunge"
|
225 |
+
223,/m/0dl5d,"Progressive rock"
|
226 |
+
224,/m/07sbbz2,"Rock and roll"
|
227 |
+
225,/m/05w3f,"Psychedelic rock"
|
228 |
+
226,/m/06j6l,"Rhythm and blues"
|
229 |
+
227,/m/0gywn,"Soul music"
|
230 |
+
228,/m/06cqb,"Reggae"
|
231 |
+
229,/m/01lyv,"Country"
|
232 |
+
230,/m/015y_n,"Swing music"
|
233 |
+
231,/m/0gg8l,"Bluegrass"
|
234 |
+
232,/m/02x8m,"Funk"
|
235 |
+
233,/m/02w4v,"Folk music"
|
236 |
+
234,/m/06j64v,"Middle Eastern music"
|
237 |
+
235,/m/03_d0,"Jazz"
|
238 |
+
236,/m/026z9,"Disco"
|
239 |
+
237,/m/0ggq0m,"Classical music"
|
240 |
+
238,/m/05lls,"Opera"
|
241 |
+
239,/m/02lkt,"Electronic music"
|
242 |
+
240,/m/03mb9,"House music"
|
243 |
+
241,/m/07gxw,"Techno"
|
244 |
+
242,/m/07s72n,"Dubstep"
|
245 |
+
243,/m/0283d,"Drum and bass"
|
246 |
+
244,/m/0m0jc,"Electronica"
|
247 |
+
245,/m/08cyft,"Electronic dance music"
|
248 |
+
246,/m/0fd3y,"Ambient music"
|
249 |
+
247,/m/07lnk,"Trance music"
|
250 |
+
248,/m/0g293,"Music of Latin America"
|
251 |
+
249,/m/0ln16,"Salsa music"
|
252 |
+
250,/m/0326g,"Flamenco"
|
253 |
+
251,/m/0155w,"Blues"
|
254 |
+
252,/m/05fw6t,"Music for children"
|
255 |
+
253,/m/02v2lh,"New-age music"
|
256 |
+
254,/m/0y4f8,"Vocal music"
|
257 |
+
255,/m/0z9c,"A capella"
|
258 |
+
256,/m/0164x2,"Music of Africa"
|
259 |
+
257,/m/0145m,"Afrobeat"
|
260 |
+
258,/m/02mscn,"Christian music"
|
261 |
+
259,/m/016cjb,"Gospel music"
|
262 |
+
260,/m/028sqc,"Music of Asia"
|
263 |
+
261,/m/015vgc,"Carnatic music"
|
264 |
+
262,/m/0dq0md,"Music of Bollywood"
|
265 |
+
263,/m/06rqw,"Ska"
|
266 |
+
264,/m/02p0sh1,"Traditional music"
|
267 |
+
265,/m/05rwpb,"Independent music"
|
268 |
+
266,/m/074ft,"Song"
|
269 |
+
267,/m/025td0t,"Background music"
|
270 |
+
268,/m/02cjck,"Theme music"
|
271 |
+
269,/m/03r5q_,"Jingle (music)"
|
272 |
+
270,/m/0l14gg,"Soundtrack music"
|
273 |
+
271,/m/07pkxdp,"Lullaby"
|
274 |
+
272,/m/01z7dr,"Video game music"
|
275 |
+
273,/m/0140xf,"Christmas music"
|
276 |
+
274,/m/0ggx5q,"Dance music"
|
277 |
+
275,/m/04wptg,"Wedding music"
|
278 |
+
276,/t/dd00031,"Happy music"
|
279 |
+
277,/t/dd00032,"Funny music"
|
280 |
+
278,/t/dd00033,"Sad music"
|
281 |
+
279,/t/dd00034,"Tender music"
|
282 |
+
280,/t/dd00035,"Exciting music"
|
283 |
+
281,/t/dd00036,"Angry music"
|
284 |
+
282,/t/dd00037,"Scary music"
|
285 |
+
283,/m/03m9d0z,"Wind"
|
286 |
+
284,/m/09t49,"Rustling leaves"
|
287 |
+
285,/t/dd00092,"Wind noise (microphone)"
|
288 |
+
286,/m/0jb2l,"Thunderstorm"
|
289 |
+
287,/m/0ngt1,"Thunder"
|
290 |
+
288,/m/0838f,"Water"
|
291 |
+
289,/m/06mb1,"Rain"
|
292 |
+
290,/m/07r10fb,"Raindrop"
|
293 |
+
291,/t/dd00038,"Rain on surface"
|
294 |
+
292,/m/0j6m2,"Stream"
|
295 |
+
293,/m/0j2kx,"Waterfall"
|
296 |
+
294,/m/05kq4,"Ocean"
|
297 |
+
295,/m/034srq,"Waves, surf"
|
298 |
+
296,/m/06wzb,"Steam"
|
299 |
+
297,/m/07swgks,"Gurgling"
|
300 |
+
298,/m/02_41,"Fire"
|
301 |
+
299,/m/07pzfmf,"Crackle"
|
302 |
+
300,/m/07yv9,"Vehicle"
|
303 |
+
301,/m/019jd,"Boat, Water vehicle"
|
304 |
+
302,/m/0hsrw,"Sailboat, sailing ship"
|
305 |
+
303,/m/056ks2,"Rowboat, canoe, kayak"
|
306 |
+
304,/m/02rlv9,"Motorboat, speedboat"
|
307 |
+
305,/m/06q74,"Ship"
|
308 |
+
306,/m/012f08,"Motor vehicle (road)"
|
309 |
+
307,/m/0k4j,"Car"
|
310 |
+
308,/m/0912c9,"Vehicle horn, car horn, honking"
|
311 |
+
309,/m/07qv_d5,"Toot"
|
312 |
+
310,/m/02mfyn,"Car alarm"
|
313 |
+
311,/m/04gxbd,"Power windows, electric windows"
|
314 |
+
312,/m/07rknqz,"Skidding"
|
315 |
+
313,/m/0h9mv,"Tire squeal"
|
316 |
+
314,/t/dd00134,"Car passing by"
|
317 |
+
315,/m/0ltv,"Race car, auto racing"
|
318 |
+
316,/m/07r04,"Truck"
|
319 |
+
317,/m/0gvgw0,"Air brake"
|
320 |
+
318,/m/05x_td,"Air horn, truck horn"
|
321 |
+
319,/m/02rhddq,"Reversing beeps"
|
322 |
+
320,/m/03cl9h,"Ice cream truck, ice cream van"
|
323 |
+
321,/m/01bjv,"Bus"
|
324 |
+
322,/m/03j1ly,"Emergency vehicle"
|
325 |
+
323,/m/04qvtq,"Police car (siren)"
|
326 |
+
324,/m/012n7d,"Ambulance (siren)"
|
327 |
+
325,/m/012ndj,"Fire engine, fire truck (siren)"
|
328 |
+
326,/m/04_sv,"Motorcycle"
|
329 |
+
327,/m/0btp2,"Traffic noise, roadway noise"
|
330 |
+
328,/m/06d_3,"Rail transport"
|
331 |
+
329,/m/07jdr,"Train"
|
332 |
+
330,/m/04zmvq,"Train whistle"
|
333 |
+
331,/m/0284vy3,"Train horn"
|
334 |
+
332,/m/01g50p,"Railroad car, train wagon"
|
335 |
+
333,/t/dd00048,"Train wheels squealing"
|
336 |
+
334,/m/0195fx,"Subway, metro, underground"
|
337 |
+
335,/m/0k5j,"Aircraft"
|
338 |
+
336,/m/014yck,"Aircraft engine"
|
339 |
+
337,/m/04229,"Jet engine"
|
340 |
+
338,/m/02l6bg,"Propeller, airscrew"
|
341 |
+
339,/m/09ct_,"Helicopter"
|
342 |
+
340,/m/0cmf2,"Fixed-wing aircraft, airplane"
|
343 |
+
341,/m/0199g,"Bicycle"
|
344 |
+
342,/m/06_fw,"Skateboard"
|
345 |
+
343,/m/02mk9,"Engine"
|
346 |
+
344,/t/dd00065,"Light engine (high frequency)"
|
347 |
+
345,/m/08j51y,"Dental drill, dentist's drill"
|
348 |
+
346,/m/01yg9g,"Lawn mower"
|
349 |
+
347,/m/01j4z9,"Chainsaw"
|
350 |
+
348,/t/dd00066,"Medium engine (mid frequency)"
|
351 |
+
349,/t/dd00067,"Heavy engine (low frequency)"
|
352 |
+
350,/m/01h82_,"Engine knocking"
|
353 |
+
351,/t/dd00130,"Engine starting"
|
354 |
+
352,/m/07pb8fc,"Idling"
|
355 |
+
353,/m/07q2z82,"Accelerating, revving, vroom"
|
356 |
+
354,/m/02dgv,"Door"
|
357 |
+
355,/m/03wwcy,"Doorbell"
|
358 |
+
356,/m/07r67yg,"Ding-dong"
|
359 |
+
357,/m/02y_763,"Sliding door"
|
360 |
+
358,/m/07rjzl8,"Slam"
|
361 |
+
359,/m/07r4wb8,"Knock"
|
362 |
+
360,/m/07qcpgn,"Tap"
|
363 |
+
361,/m/07q6cd_,"Squeak"
|
364 |
+
362,/m/0642b4,"Cupboard open or close"
|
365 |
+
363,/m/0fqfqc,"Drawer open or close"
|
366 |
+
364,/m/04brg2,"Dishes, pots, and pans"
|
367 |
+
365,/m/023pjk,"Cutlery, silverware"
|
368 |
+
366,/m/07pn_8q,"Chopping (food)"
|
369 |
+
367,/m/0dxrf,"Frying (food)"
|
370 |
+
368,/m/0fx9l,"Microwave oven"
|
371 |
+
369,/m/02pjr4,"Blender"
|
372 |
+
370,/m/02jz0l,"Water tap, faucet"
|
373 |
+
371,/m/0130jx,"Sink (filling or washing)"
|
374 |
+
372,/m/03dnzn,"Bathtub (filling or washing)"
|
375 |
+
373,/m/03wvsk,"Hair dryer"
|
376 |
+
374,/m/01jt3m,"Toilet flush"
|
377 |
+
375,/m/012xff,"Toothbrush"
|
378 |
+
376,/m/04fgwm,"Electric toothbrush"
|
379 |
+
377,/m/0d31p,"Vacuum cleaner"
|
380 |
+
378,/m/01s0vc,"Zipper (clothing)"
|
381 |
+
379,/m/03v3yw,"Keys jangling"
|
382 |
+
380,/m/0242l,"Coin (dropping)"
|
383 |
+
381,/m/01lsmm,"Scissors"
|
384 |
+
382,/m/02g901,"Electric shaver, electric razor"
|
385 |
+
383,/m/05rj2,"Shuffling cards"
|
386 |
+
384,/m/0316dw,"Typing"
|
387 |
+
385,/m/0c2wf,"Typewriter"
|
388 |
+
386,/m/01m2v,"Computer keyboard"
|
389 |
+
387,/m/081rb,"Writing"
|
390 |
+
388,/m/07pp_mv,"Alarm"
|
391 |
+
389,/m/07cx4,"Telephone"
|
392 |
+
390,/m/07pp8cl,"Telephone bell ringing"
|
393 |
+
391,/m/01hnzm,"Ringtone"
|
394 |
+
392,/m/02c8p,"Telephone dialing, DTMF"
|
395 |
+
393,/m/015jpf,"Dial tone"
|
396 |
+
394,/m/01z47d,"Busy signal"
|
397 |
+
395,/m/046dlr,"Alarm clock"
|
398 |
+
396,/m/03kmc9,"Siren"
|
399 |
+
397,/m/0dgbq,"Civil defense siren"
|
400 |
+
398,/m/030rvx,"Buzzer"
|
401 |
+
399,/m/01y3hg,"Smoke detector, smoke alarm"
|
402 |
+
400,/m/0c3f7m,"Fire alarm"
|
403 |
+
401,/m/04fq5q,"Foghorn"
|
404 |
+
402,/m/0l156k,"Whistle"
|
405 |
+
403,/m/06hck5,"Steam whistle"
|
406 |
+
404,/t/dd00077,"Mechanisms"
|
407 |
+
405,/m/02bm9n,"Ratchet, pawl"
|
408 |
+
406,/m/01x3z,"Clock"
|
409 |
+
407,/m/07qjznt,"Tick"
|
410 |
+
408,/m/07qjznl,"Tick-tock"
|
411 |
+
409,/m/0l7xg,"Gears"
|
412 |
+
410,/m/05zc1,"Pulleys"
|
413 |
+
411,/m/0llzx,"Sewing machine"
|
414 |
+
412,/m/02x984l,"Mechanical fan"
|
415 |
+
413,/m/025wky1,"Air conditioning"
|
416 |
+
414,/m/024dl,"Cash register"
|
417 |
+
415,/m/01m4t,"Printer"
|
418 |
+
416,/m/0dv5r,"Camera"
|
419 |
+
417,/m/07bjf,"Single-lens reflex camera"
|
420 |
+
418,/m/07k1x,"Tools"
|
421 |
+
419,/m/03l9g,"Hammer"
|
422 |
+
420,/m/03p19w,"Jackhammer"
|
423 |
+
421,/m/01b82r,"Sawing"
|
424 |
+
422,/m/02p01q,"Filing (rasp)"
|
425 |
+
423,/m/023vsd,"Sanding"
|
426 |
+
424,/m/0_ksk,"Power tool"
|
427 |
+
425,/m/01d380,"Drill"
|
428 |
+
426,/m/014zdl,"Explosion"
|
429 |
+
427,/m/032s66,"Gunshot, gunfire"
|
430 |
+
428,/m/04zjc,"Machine gun"
|
431 |
+
429,/m/02z32qm,"Fusillade"
|
432 |
+
430,/m/0_1c,"Artillery fire"
|
433 |
+
431,/m/073cg4,"Cap gun"
|
434 |
+
432,/m/0g6b5,"Fireworks"
|
435 |
+
433,/g/122z_qxw,"Firecracker"
|
436 |
+
434,/m/07qsvvw,"Burst, pop"
|
437 |
+
435,/m/07pxg6y,"Eruption"
|
438 |
+
436,/m/07qqyl4,"Boom"
|
439 |
+
437,/m/083vt,"Wood"
|
440 |
+
438,/m/07pczhz,"Chop"
|
441 |
+
439,/m/07pl1bw,"Splinter"
|
442 |
+
440,/m/07qs1cx,"Crack"
|
443 |
+
441,/m/039jq,"Glass"
|
444 |
+
442,/m/07q7njn,"Chink, clink"
|
445 |
+
443,/m/07rn7sz,"Shatter"
|
446 |
+
444,/m/04k94,"Liquid"
|
447 |
+
445,/m/07rrlb6,"Splash, splatter"
|
448 |
+
446,/m/07p6mqd,"Slosh"
|
449 |
+
447,/m/07qlwh6,"Squish"
|
450 |
+
448,/m/07r5v4s,"Drip"
|
451 |
+
449,/m/07prgkl,"Pour"
|
452 |
+
450,/m/07pqc89,"Trickle, dribble"
|
453 |
+
451,/t/dd00088,"Gush"
|
454 |
+
452,/m/07p7b8y,"Fill (with liquid)"
|
455 |
+
453,/m/07qlf79,"Spray"
|
456 |
+
454,/m/07ptzwd,"Pump (liquid)"
|
457 |
+
455,/m/07ptfmf,"Stir"
|
458 |
+
456,/m/0dv3j,"Boiling"
|
459 |
+
457,/m/0790c,"Sonar"
|
460 |
+
458,/m/0dl83,"Arrow"
|
461 |
+
459,/m/07rqsjt,"Whoosh, swoosh, swish"
|
462 |
+
460,/m/07qnq_y,"Thump, thud"
|
463 |
+
461,/m/07rrh0c,"Thunk"
|
464 |
+
462,/m/0b_fwt,"Electronic tuner"
|
465 |
+
463,/m/02rr_,"Effects unit"
|
466 |
+
464,/m/07m2kt,"Chorus effect"
|
467 |
+
465,/m/018w8,"Basketball bounce"
|
468 |
+
466,/m/07pws3f,"Bang"
|
469 |
+
467,/m/07ryjzk,"Slap, smack"
|
470 |
+
468,/m/07rdhzs,"Whack, thwack"
|
471 |
+
469,/m/07pjjrj,"Smash, crash"
|
472 |
+
470,/m/07pc8lb,"Breaking"
|
473 |
+
471,/m/07pqn27,"Bouncing"
|
474 |
+
472,/m/07rbp7_,"Whip"
|
475 |
+
473,/m/07pyf11,"Flap"
|
476 |
+
474,/m/07qb_dv,"Scratch"
|
477 |
+
475,/m/07qv4k0,"Scrape"
|
478 |
+
476,/m/07pdjhy,"Rub"
|
479 |
+
477,/m/07s8j8t,"Roll"
|
480 |
+
478,/m/07plct2,"Crushing"
|
481 |
+
479,/t/dd00112,"Crumpling, crinkling"
|
482 |
+
480,/m/07qcx4z,"Tearing"
|
483 |
+
481,/m/02fs_r,"Beep, bleep"
|
484 |
+
482,/m/07qwdck,"Ping"
|
485 |
+
483,/m/07phxs1,"Ding"
|
486 |
+
484,/m/07rv4dm,"Clang"
|
487 |
+
485,/m/07s02z0,"Squeal"
|
488 |
+
486,/m/07qh7jl,"Creak"
|
489 |
+
487,/m/07qwyj0,"Rustle"
|
490 |
+
488,/m/07s34ls,"Whir"
|
491 |
+
489,/m/07qmpdm,"Clatter"
|
492 |
+
490,/m/07p9k1k,"Sizzle"
|
493 |
+
491,/m/07qc9xj,"Clicking"
|
494 |
+
492,/m/07rwm0c,"Clickety-clack"
|
495 |
+
493,/m/07phhsh,"Rumble"
|
496 |
+
494,/m/07qyrcz,"Plop"
|
497 |
+
495,/m/07qfgpx,"Jingle, tinkle"
|
498 |
+
496,/m/07rcgpl,"Hum"
|
499 |
+
497,/m/07p78v5,"Zing"
|
500 |
+
498,/t/dd00121,"Boing"
|
501 |
+
499,/m/07s12q4,"Crunch"
|
502 |
+
500,/m/028v0c,"Silence"
|
503 |
+
501,/m/01v_m0,"Sine wave"
|
504 |
+
502,/m/0b9m1,"Harmonic"
|
505 |
+
503,/m/0hdsk,"Chirp tone"
|
506 |
+
504,/m/0c1dj,"Sound effect"
|
507 |
+
505,/m/07pt_g0,"Pulse"
|
508 |
+
506,/t/dd00125,"Inside, small room"
|
509 |
+
507,/t/dd00126,"Inside, large room or hall"
|
510 |
+
508,/t/dd00127,"Inside, public space"
|
511 |
+
509,/t/dd00128,"Outside, urban or manmade"
|
512 |
+
510,/t/dd00129,"Outside, rural or natural"
|
513 |
+
511,/m/01b9nn,"Reverberation"
|
514 |
+
512,/m/01jnbd,"Echo"
|
515 |
+
513,/m/096m7z,"Noise"
|
516 |
+
514,/m/06_y0by,"Environmental noise"
|
517 |
+
515,/m/07rgkc5,"Static"
|
518 |
+
516,/m/06xkwv,"Mains hum"
|
519 |
+
517,/m/0g12c5,"Distortion"
|
520 |
+
518,/m/08p9q4,"Sidetone"
|
521 |
+
519,/m/07szfh9,"Cacophony"
|
522 |
+
520,/m/0chx_,"White noise"
|
523 |
+
521,/m/0cj0r,"Pink noise"
|
524 |
+
522,/m/07p_0gm,"Throbbing"
|
525 |
+
523,/m/01jwx6,"Vibration"
|
526 |
+
524,/m/07c52,"Television"
|
527 |
+
525,/m/06bz3,"Radio"
|
528 |
+
526,/m/07hvw1,"Field recording"
|
audio_detection/audio_infer/pytorch/__pycache__/models.cpython-38.pyc
ADDED
Binary file (24.6 kB). View file
|
|
audio_detection/audio_infer/pytorch/__pycache__/pytorch_utils.cpython-38.pyc
ADDED
Binary file (7.3 kB). View file
|
|
audio_detection/audio_infer/pytorch/evaluate.py
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from sklearn import metrics
|
2 |
+
|
3 |
+
from pytorch_utils import forward
|
4 |
+
|
5 |
+
|
6 |
+
class Evaluator(object):
|
7 |
+
def __init__(self, model):
|
8 |
+
"""Evaluator.
|
9 |
+
|
10 |
+
Args:
|
11 |
+
model: object
|
12 |
+
"""
|
13 |
+
self.model = model
|
14 |
+
|
15 |
+
def evaluate(self, data_loader):
|
16 |
+
"""Forward evaluation data and calculate statistics.
|
17 |
+
|
18 |
+
Args:
|
19 |
+
data_loader: object
|
20 |
+
|
21 |
+
Returns:
|
22 |
+
statistics: dict,
|
23 |
+
{'average_precision': (classes_num,), 'auc': (classes_num,)}
|
24 |
+
"""
|
25 |
+
|
26 |
+
# Forward
|
27 |
+
output_dict = forward(
|
28 |
+
model=self.model,
|
29 |
+
generator=data_loader,
|
30 |
+
return_target=True)
|
31 |
+
|
32 |
+
clipwise_output = output_dict['clipwise_output'] # (audios_num, classes_num)
|
33 |
+
target = output_dict['target'] # (audios_num, classes_num)
|
34 |
+
|
35 |
+
average_precision = metrics.average_precision_score(
|
36 |
+
target, clipwise_output, average=None)
|
37 |
+
|
38 |
+
auc = metrics.roc_auc_score(target, clipwise_output, average=None)
|
39 |
+
|
40 |
+
statistics = {'average_precision': average_precision, 'auc': auc}
|
41 |
+
|
42 |
+
return statistics
|
audio_detection/audio_infer/pytorch/finetune_template.py
ADDED
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
sys.path.insert(1, os.path.join(sys.path[0], '../utils'))
|
4 |
+
import numpy as np
|
5 |
+
import argparse
|
6 |
+
import h5py
|
7 |
+
import math
|
8 |
+
import time
|
9 |
+
import logging
|
10 |
+
import matplotlib.pyplot as plt
|
11 |
+
|
12 |
+
import torch
|
13 |
+
torch.backends.cudnn.benchmark=True
|
14 |
+
torch.manual_seed(0)
|
15 |
+
import torch.nn as nn
|
16 |
+
import torch.nn.functional as F
|
17 |
+
import torch.optim as optim
|
18 |
+
import torch.utils.data
|
19 |
+
|
20 |
+
from utilities import get_filename
|
21 |
+
from models import *
|
22 |
+
import config
|
23 |
+
|
24 |
+
|
25 |
+
class Transfer_Cnn14(nn.Module):
|
26 |
+
def __init__(self, sample_rate, window_size, hop_size, mel_bins, fmin,
|
27 |
+
fmax, classes_num, freeze_base):
|
28 |
+
"""Classifier for a new task using pretrained Cnn14 as a sub module.
|
29 |
+
"""
|
30 |
+
super(Transfer_Cnn14, self).__init__()
|
31 |
+
audioset_classes_num = 527
|
32 |
+
|
33 |
+
self.base = Cnn14(sample_rate, window_size, hop_size, mel_bins, fmin,
|
34 |
+
fmax, audioset_classes_num)
|
35 |
+
|
36 |
+
# Transfer to another task layer
|
37 |
+
self.fc_transfer = nn.Linear(2048, classes_num, bias=True)
|
38 |
+
|
39 |
+
if freeze_base:
|
40 |
+
# Freeze AudioSet pretrained layers
|
41 |
+
for param in self.base.parameters():
|
42 |
+
param.requires_grad = False
|
43 |
+
|
44 |
+
self.init_weights()
|
45 |
+
|
46 |
+
def init_weights(self):
|
47 |
+
init_layer(self.fc_transfer)
|
48 |
+
|
49 |
+
def load_from_pretrain(self, pretrained_checkpoint_path):
|
50 |
+
checkpoint = torch.load(pretrained_checkpoint_path)
|
51 |
+
self.base.load_state_dict(checkpoint['model'])
|
52 |
+
|
53 |
+
def forward(self, input, mixup_lambda=None):
|
54 |
+
"""Input: (batch_size, data_length)
|
55 |
+
"""
|
56 |
+
output_dict = self.base(input, mixup_lambda)
|
57 |
+
embedding = output_dict['embedding']
|
58 |
+
|
59 |
+
clipwise_output = torch.log_softmax(self.fc_transfer(embedding), dim=-1)
|
60 |
+
output_dict['clipwise_output'] = clipwise_output
|
61 |
+
|
62 |
+
return output_dict
|
63 |
+
|
64 |
+
|
65 |
+
def train(args):
|
66 |
+
|
67 |
+
# Arugments & parameters
|
68 |
+
sample_rate = args.sample_rate
|
69 |
+
window_size = args.window_size
|
70 |
+
hop_size = args.hop_size
|
71 |
+
mel_bins = args.mel_bins
|
72 |
+
fmin = args.fmin
|
73 |
+
fmax = args.fmax
|
74 |
+
model_type = args.model_type
|
75 |
+
pretrained_checkpoint_path = args.pretrained_checkpoint_path
|
76 |
+
freeze_base = args.freeze_base
|
77 |
+
device = 'cuda' if (args.cuda and torch.cuda.is_available()) else 'cpu'
|
78 |
+
|
79 |
+
classes_num = config.classes_num
|
80 |
+
pretrain = True if pretrained_checkpoint_path else False
|
81 |
+
|
82 |
+
# Model
|
83 |
+
Model = eval(model_type)
|
84 |
+
model = Model(sample_rate, window_size, hop_size, mel_bins, fmin, fmax,
|
85 |
+
classes_num, freeze_base)
|
86 |
+
|
87 |
+
# Load pretrained model
|
88 |
+
if pretrain:
|
89 |
+
logging.info('Load pretrained model from {}'.format(pretrained_checkpoint_path))
|
90 |
+
model.load_from_pretrain(pretrained_checkpoint_path)
|
91 |
+
|
92 |
+
# Parallel
|
93 |
+
print('GPU number: {}'.format(torch.cuda.device_count()))
|
94 |
+
model = torch.nn.DataParallel(model)
|
95 |
+
|
96 |
+
if 'cuda' in device:
|
97 |
+
model.to(device)
|
98 |
+
|
99 |
+
print('Load pretrained model successfully!')
|
100 |
+
|
101 |
+
|
102 |
+
if __name__ == '__main__':
|
103 |
+
parser = argparse.ArgumentParser(description='Example of parser. ')
|
104 |
+
subparsers = parser.add_subparsers(dest='mode')
|
105 |
+
|
106 |
+
# Train
|
107 |
+
parser_train = subparsers.add_parser('train')
|
108 |
+
parser_train.add_argument('--sample_rate', type=int, required=True)
|
109 |
+
parser_train.add_argument('--window_size', type=int, required=True)
|
110 |
+
parser_train.add_argument('--hop_size', type=int, required=True)
|
111 |
+
parser_train.add_argument('--mel_bins', type=int, required=True)
|
112 |
+
parser_train.add_argument('--fmin', type=int, required=True)
|
113 |
+
parser_train.add_argument('--fmax', type=int, required=True)
|
114 |
+
parser_train.add_argument('--model_type', type=str, required=True)
|
115 |
+
parser_train.add_argument('--pretrained_checkpoint_path', type=str)
|
116 |
+
parser_train.add_argument('--freeze_base', action='store_true', default=False)
|
117 |
+
parser_train.add_argument('--cuda', action='store_true', default=False)
|
118 |
+
|
119 |
+
# Parse arguments
|
120 |
+
args = parser.parse_args()
|
121 |
+
args.filename = get_filename(__file__)
|
122 |
+
|
123 |
+
if args.mode == 'train':
|
124 |
+
train(args)
|
125 |
+
|
126 |
+
else:
|
127 |
+
raise Exception('Error argument!')
|
audio_detection/audio_infer/pytorch/inference.py
ADDED
@@ -0,0 +1,206 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
sys.path.insert(1, os.path.join(sys.path[0], '../utils'))
|
4 |
+
import numpy as np
|
5 |
+
import argparse
|
6 |
+
import librosa
|
7 |
+
import matplotlib.pyplot as plt
|
8 |
+
import torch
|
9 |
+
|
10 |
+
from utilities import create_folder, get_filename
|
11 |
+
from models import *
|
12 |
+
from pytorch_utils import move_data_to_device
|
13 |
+
import config
|
14 |
+
|
15 |
+
def audio_tagging(args):
|
16 |
+
"""Inference audio tagging result of an audio clip.
|
17 |
+
"""
|
18 |
+
|
19 |
+
# Arugments & parameters
|
20 |
+
sample_rate = args.sample_rate
|
21 |
+
window_size = args.window_size
|
22 |
+
hop_size = args.hop_size
|
23 |
+
mel_bins = args.mel_bins
|
24 |
+
fmin = args.fmin
|
25 |
+
fmax = args.fmax
|
26 |
+
model_type = args.model_type
|
27 |
+
checkpoint_path = args.checkpoint_path
|
28 |
+
audio_path = args.audio_path
|
29 |
+
device = torch.device('cuda') if args.cuda and torch.cuda.is_available() else torch.device('cpu')
|
30 |
+
|
31 |
+
classes_num = config.classes_num
|
32 |
+
labels = config.labels
|
33 |
+
|
34 |
+
# Model
|
35 |
+
Model = eval(model_type)
|
36 |
+
model = Model(sample_rate=sample_rate, window_size=window_size,
|
37 |
+
hop_size=hop_size, mel_bins=mel_bins, fmin=fmin, fmax=fmax,
|
38 |
+
classes_num=classes_num)
|
39 |
+
|
40 |
+
checkpoint = torch.load(checkpoint_path, map_location=device)
|
41 |
+
model.load_state_dict(checkpoint['model'])
|
42 |
+
|
43 |
+
# Parallel
|
44 |
+
if 'cuda' in str(device):
|
45 |
+
model.to(device)
|
46 |
+
print('GPU number: {}'.format(torch.cuda.device_count()))
|
47 |
+
model = torch.nn.DataParallel(model)
|
48 |
+
else:
|
49 |
+
print('Using CPU.')
|
50 |
+
|
51 |
+
# Load audio
|
52 |
+
(waveform, _) = librosa.core.load(audio_path, sr=sample_rate, mono=True)
|
53 |
+
|
54 |
+
waveform = waveform[None, :] # (1, audio_length)
|
55 |
+
waveform = move_data_to_device(waveform, device)
|
56 |
+
|
57 |
+
# Forward
|
58 |
+
with torch.no_grad():
|
59 |
+
model.eval()
|
60 |
+
batch_output_dict = model(waveform, None)
|
61 |
+
|
62 |
+
clipwise_output = batch_output_dict['clipwise_output'].data.cpu().numpy()[0]
|
63 |
+
"""(classes_num,)"""
|
64 |
+
|
65 |
+
sorted_indexes = np.argsort(clipwise_output)[::-1]
|
66 |
+
|
67 |
+
# Print audio tagging top probabilities
|
68 |
+
for k in range(10):
|
69 |
+
print('{}: {:.3f}'.format(np.array(labels)[sorted_indexes[k]],
|
70 |
+
clipwise_output[sorted_indexes[k]]))
|
71 |
+
|
72 |
+
# Print embedding
|
73 |
+
if 'embedding' in batch_output_dict.keys():
|
74 |
+
embedding = batch_output_dict['embedding'].data.cpu().numpy()[0]
|
75 |
+
print('embedding: {}'.format(embedding.shape))
|
76 |
+
|
77 |
+
return clipwise_output, labels
|
78 |
+
|
79 |
+
|
80 |
+
def sound_event_detection(args):
|
81 |
+
"""Inference sound event detection result of an audio clip.
|
82 |
+
"""
|
83 |
+
|
84 |
+
# Arugments & parameters
|
85 |
+
sample_rate = args.sample_rate
|
86 |
+
window_size = args.window_size
|
87 |
+
hop_size = args.hop_size
|
88 |
+
mel_bins = args.mel_bins
|
89 |
+
fmin = args.fmin
|
90 |
+
fmax = args.fmax
|
91 |
+
model_type = args.model_type
|
92 |
+
checkpoint_path = args.checkpoint_path
|
93 |
+
audio_path = args.audio_path
|
94 |
+
device = torch.device('cuda') if args.cuda and torch.cuda.is_available() else torch.device('cpu')
|
95 |
+
|
96 |
+
classes_num = config.classes_num
|
97 |
+
labels = config.labels
|
98 |
+
frames_per_second = sample_rate // hop_size
|
99 |
+
|
100 |
+
# Paths
|
101 |
+
fig_path = os.path.join('results', '{}.png'.format(get_filename(audio_path)))
|
102 |
+
create_folder(os.path.dirname(fig_path))
|
103 |
+
|
104 |
+
# Model
|
105 |
+
Model = eval(model_type)
|
106 |
+
model = Model(sample_rate=sample_rate, window_size=window_size,
|
107 |
+
hop_size=hop_size, mel_bins=mel_bins, fmin=fmin, fmax=fmax,
|
108 |
+
classes_num=classes_num)
|
109 |
+
|
110 |
+
checkpoint = torch.load(checkpoint_path, map_location=device)
|
111 |
+
model.load_state_dict(checkpoint['model'])
|
112 |
+
|
113 |
+
# Parallel
|
114 |
+
print('GPU number: {}'.format(torch.cuda.device_count()))
|
115 |
+
model = torch.nn.DataParallel(model)
|
116 |
+
|
117 |
+
if 'cuda' in str(device):
|
118 |
+
model.to(device)
|
119 |
+
|
120 |
+
# Load audio
|
121 |
+
(waveform, _) = librosa.core.load(audio_path, sr=sample_rate, mono=True)
|
122 |
+
|
123 |
+
waveform = waveform[None, :] # (1, audio_length)
|
124 |
+
waveform = move_data_to_device(waveform, device)
|
125 |
+
|
126 |
+
# Forward
|
127 |
+
with torch.no_grad():
|
128 |
+
model.eval()
|
129 |
+
batch_output_dict = model(waveform, None)
|
130 |
+
|
131 |
+
framewise_output = batch_output_dict['framewise_output'].data.cpu().numpy()[0]
|
132 |
+
"""(time_steps, classes_num)"""
|
133 |
+
|
134 |
+
print('Sound event detection result (time_steps x classes_num): {}'.format(
|
135 |
+
framewise_output.shape))
|
136 |
+
|
137 |
+
sorted_indexes = np.argsort(np.max(framewise_output, axis=0))[::-1]
|
138 |
+
|
139 |
+
top_k = 10 # Show top results
|
140 |
+
top_result_mat = framewise_output[:, sorted_indexes[0 : top_k]]
|
141 |
+
"""(time_steps, top_k)"""
|
142 |
+
|
143 |
+
# Plot result
|
144 |
+
stft = librosa.core.stft(y=waveform[0].data.cpu().numpy(), n_fft=window_size,
|
145 |
+
hop_length=hop_size, window='hann', center=True)
|
146 |
+
frames_num = stft.shape[-1]
|
147 |
+
|
148 |
+
fig, axs = plt.subplots(2, 1, sharex=True, figsize=(10, 4))
|
149 |
+
axs[0].matshow(np.log(np.abs(stft)), origin='lower', aspect='auto', cmap='jet')
|
150 |
+
axs[0].set_ylabel('Frequency bins')
|
151 |
+
axs[0].set_title('Log spectrogram')
|
152 |
+
axs[1].matshow(top_result_mat.T, origin='upper', aspect='auto', cmap='jet', vmin=0, vmax=1)
|
153 |
+
axs[1].xaxis.set_ticks(np.arange(0, frames_num, frames_per_second))
|
154 |
+
axs[1].xaxis.set_ticklabels(np.arange(0, frames_num / frames_per_second))
|
155 |
+
axs[1].yaxis.set_ticks(np.arange(0, top_k))
|
156 |
+
axs[1].yaxis.set_ticklabels(np.array(labels)[sorted_indexes[0 : top_k]])
|
157 |
+
axs[1].yaxis.grid(color='k', linestyle='solid', linewidth=0.3, alpha=0.3)
|
158 |
+
axs[1].set_xlabel('Seconds')
|
159 |
+
axs[1].xaxis.set_ticks_position('bottom')
|
160 |
+
|
161 |
+
plt.tight_layout()
|
162 |
+
plt.savefig(fig_path)
|
163 |
+
print('Save sound event detection visualization to {}'.format(fig_path))
|
164 |
+
|
165 |
+
return framewise_output, labels
|
166 |
+
|
167 |
+
|
168 |
+
if __name__ == '__main__':
|
169 |
+
|
170 |
+
parser = argparse.ArgumentParser(description='Example of parser. ')
|
171 |
+
subparsers = parser.add_subparsers(dest='mode')
|
172 |
+
|
173 |
+
parser_at = subparsers.add_parser('audio_tagging')
|
174 |
+
parser_at.add_argument('--sample_rate', type=int, default=32000)
|
175 |
+
parser_at.add_argument('--window_size', type=int, default=1024)
|
176 |
+
parser_at.add_argument('--hop_size', type=int, default=320)
|
177 |
+
parser_at.add_argument('--mel_bins', type=int, default=64)
|
178 |
+
parser_at.add_argument('--fmin', type=int, default=50)
|
179 |
+
parser_at.add_argument('--fmax', type=int, default=14000)
|
180 |
+
parser_at.add_argument('--model_type', type=str, required=True)
|
181 |
+
parser_at.add_argument('--checkpoint_path', type=str, required=True)
|
182 |
+
parser_at.add_argument('--audio_path', type=str, required=True)
|
183 |
+
parser_at.add_argument('--cuda', action='store_true', default=False)
|
184 |
+
|
185 |
+
parser_sed = subparsers.add_parser('sound_event_detection')
|
186 |
+
parser_sed.add_argument('--sample_rate', type=int, default=32000)
|
187 |
+
parser_sed.add_argument('--window_size', type=int, default=1024)
|
188 |
+
parser_sed.add_argument('--hop_size', type=int, default=320)
|
189 |
+
parser_sed.add_argument('--mel_bins', type=int, default=64)
|
190 |
+
parser_sed.add_argument('--fmin', type=int, default=50)
|
191 |
+
parser_sed.add_argument('--fmax', type=int, default=14000)
|
192 |
+
parser_sed.add_argument('--model_type', type=str, required=True)
|
193 |
+
parser_sed.add_argument('--checkpoint_path', type=str, required=True)
|
194 |
+
parser_sed.add_argument('--audio_path', type=str, required=True)
|
195 |
+
parser_sed.add_argument('--cuda', action='store_true', default=False)
|
196 |
+
|
197 |
+
args = parser.parse_args()
|
198 |
+
|
199 |
+
if args.mode == 'audio_tagging':
|
200 |
+
audio_tagging(args)
|
201 |
+
|
202 |
+
elif args.mode == 'sound_event_detection':
|
203 |
+
sound_event_detection(args)
|
204 |
+
|
205 |
+
else:
|
206 |
+
raise Exception('Error argument!')
|
audio_detection/audio_infer/pytorch/losses.py
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn.functional as F
|
3 |
+
|
4 |
+
|
5 |
+
def clip_bce(output_dict, target_dict):
|
6 |
+
"""Binary crossentropy loss.
|
7 |
+
"""
|
8 |
+
return F.binary_cross_entropy(
|
9 |
+
output_dict['clipwise_output'], target_dict['target'])
|
10 |
+
|
11 |
+
|
12 |
+
def get_loss_func(loss_type):
|
13 |
+
if loss_type == 'clip_bce':
|
14 |
+
return clip_bce
|
audio_detection/audio_infer/pytorch/main.py
ADDED
@@ -0,0 +1,378 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
sys.path.insert(1, os.path.join(sys.path[0], '../utils'))
|
4 |
+
import numpy as np
|
5 |
+
import argparse
|
6 |
+
import time
|
7 |
+
import logging
|
8 |
+
|
9 |
+
import torch
|
10 |
+
import torch.nn as nn
|
11 |
+
import torch.nn.functional as F
|
12 |
+
import torch.optim as optim
|
13 |
+
import torch.utils.data
|
14 |
+
|
15 |
+
from utilities import (create_folder, get_filename, create_logging, Mixup,
|
16 |
+
StatisticsContainer)
|
17 |
+
from models import (PVT, PVT2, PVT_lr, PVT_nopretrain, PVT_2layer, Cnn14, Cnn14_no_specaug, Cnn14_no_dropout,
|
18 |
+
Cnn6, Cnn10, ResNet22, ResNet38, ResNet54, Cnn14_emb512, Cnn14_emb128,
|
19 |
+
Cnn14_emb32, MobileNetV1, MobileNetV2, LeeNet11, LeeNet24, DaiNet19,
|
20 |
+
Res1dNet31, Res1dNet51, Wavegram_Cnn14, Wavegram_Logmel_Cnn14,
|
21 |
+
Wavegram_Logmel128_Cnn14, Cnn14_16k, Cnn14_8k, Cnn14_mel32, Cnn14_mel128,
|
22 |
+
Cnn14_mixup_time_domain, Cnn14_DecisionLevelMax, Cnn14_DecisionLevelAtt, Cnn6_Transformer, GLAM, GLAM2, GLAM3, Cnn4, EAT)
|
23 |
+
#from models_test import (PVT_test)
|
24 |
+
#from models1 import (PVT1)
|
25 |
+
#from models_vig import (VIG, VIG2)
|
26 |
+
#from models_vvt import (VVT)
|
27 |
+
#from models2 import (MPVIT, MPVIT2)
|
28 |
+
#from models_reshape import (PVT_reshape, PVT_tscam)
|
29 |
+
#from models_swin import (Swin, Swin_nopretrain)
|
30 |
+
#from models_swin2 import (Swin2)
|
31 |
+
#from models_van import (Van, Van_tiny)
|
32 |
+
#from models_focal import (Focal)
|
33 |
+
#from models_cross import (Cross)
|
34 |
+
#from models_cov import (Cov)
|
35 |
+
#from models_cnn import (Cnn_light)
|
36 |
+
#from models_twins import (Twins)
|
37 |
+
#from models_cmt import (Cmt, Cmt1)
|
38 |
+
#from models_shunted import (Shunted)
|
39 |
+
#from models_quadtree import (Quadtree, Quadtree2, Quadtree_nopretrain)
|
40 |
+
#from models_davit import (Davit_tscam, Davit, Davit_nopretrain)
|
41 |
+
from pytorch_utils import (move_data_to_device, count_parameters, count_flops,
|
42 |
+
do_mixup)
|
43 |
+
from data_generator import (AudioSetDataset, TrainSampler, BalancedTrainSampler,
|
44 |
+
AlternateTrainSampler, EvaluateSampler, collate_fn)
|
45 |
+
from evaluate import Evaluator
|
46 |
+
import config
|
47 |
+
from losses import get_loss_func
|
48 |
+
|
49 |
+
|
50 |
+
def train(args):
|
51 |
+
"""Train AudioSet tagging model.
|
52 |
+
|
53 |
+
Args:
|
54 |
+
dataset_dir: str
|
55 |
+
workspace: str
|
56 |
+
data_type: 'balanced_train' | 'full_train'
|
57 |
+
window_size: int
|
58 |
+
hop_size: int
|
59 |
+
mel_bins: int
|
60 |
+
model_type: str
|
61 |
+
loss_type: 'clip_bce'
|
62 |
+
balanced: 'none' | 'balanced' | 'alternate'
|
63 |
+
augmentation: 'none' | 'mixup'
|
64 |
+
batch_size: int
|
65 |
+
learning_rate: float
|
66 |
+
resume_iteration: int
|
67 |
+
early_stop: int
|
68 |
+
accumulation_steps: int
|
69 |
+
cuda: bool
|
70 |
+
"""
|
71 |
+
|
72 |
+
# Arugments & parameters
|
73 |
+
workspace = args.workspace
|
74 |
+
data_type = args.data_type
|
75 |
+
sample_rate = args.sample_rate
|
76 |
+
window_size = args.window_size
|
77 |
+
hop_size = args.hop_size
|
78 |
+
mel_bins = args.mel_bins
|
79 |
+
fmin = args.fmin
|
80 |
+
fmax = args.fmax
|
81 |
+
model_type = args.model_type
|
82 |
+
loss_type = args.loss_type
|
83 |
+
balanced = args.balanced
|
84 |
+
augmentation = args.augmentation
|
85 |
+
batch_size = args.batch_size
|
86 |
+
learning_rate = args.learning_rate
|
87 |
+
resume_iteration = args.resume_iteration
|
88 |
+
early_stop = args.early_stop
|
89 |
+
device = torch.device('cuda') if args.cuda and torch.cuda.is_available() else torch.device('cpu')
|
90 |
+
filename = args.filename
|
91 |
+
|
92 |
+
num_workers = 8
|
93 |
+
clip_samples = config.clip_samples
|
94 |
+
classes_num = config.classes_num
|
95 |
+
loss_func = get_loss_func(loss_type)
|
96 |
+
|
97 |
+
# Paths
|
98 |
+
black_list_csv = None
|
99 |
+
|
100 |
+
train_indexes_hdf5_path = os.path.join(workspace, 'hdf5s', 'indexes',
|
101 |
+
'{}.h5'.format(data_type))
|
102 |
+
|
103 |
+
eval_bal_indexes_hdf5_path = os.path.join(workspace,
|
104 |
+
'hdf5s', 'indexes', 'balanced_train.h5')
|
105 |
+
|
106 |
+
eval_test_indexes_hdf5_path = os.path.join(workspace, 'hdf5s', 'indexes',
|
107 |
+
'eval.h5')
|
108 |
+
|
109 |
+
checkpoints_dir = os.path.join(workspace, 'checkpoints', filename,
|
110 |
+
'sample_rate={},window_size={},hop_size={},mel_bins={},fmin={},fmax={}'.format(
|
111 |
+
sample_rate, window_size, hop_size, mel_bins, fmin, fmax),
|
112 |
+
'data_type={}'.format(data_type), model_type,
|
113 |
+
'loss_type={}'.format(loss_type), 'balanced={}'.format(balanced),
|
114 |
+
'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size))
|
115 |
+
create_folder(checkpoints_dir)
|
116 |
+
|
117 |
+
statistics_path = os.path.join(workspace, 'statistics', filename,
|
118 |
+
'sample_rate={},window_size={},hop_size={},mel_bins={},fmin={},fmax={}'.format(
|
119 |
+
sample_rate, window_size, hop_size, mel_bins, fmin, fmax),
|
120 |
+
'data_type={}'.format(data_type), model_type,
|
121 |
+
'loss_type={}'.format(loss_type), 'balanced={}'.format(balanced),
|
122 |
+
'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size),
|
123 |
+
'statistics.pkl')
|
124 |
+
create_folder(os.path.dirname(statistics_path))
|
125 |
+
|
126 |
+
logs_dir = os.path.join(workspace, 'logs', filename,
|
127 |
+
'sample_rate={},window_size={},hop_size={},mel_bins={},fmin={},fmax={}'.format(
|
128 |
+
sample_rate, window_size, hop_size, mel_bins, fmin, fmax),
|
129 |
+
'data_type={}'.format(data_type), model_type,
|
130 |
+
'loss_type={}'.format(loss_type), 'balanced={}'.format(balanced),
|
131 |
+
'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size))
|
132 |
+
|
133 |
+
create_logging(logs_dir, filemode='w')
|
134 |
+
logging.info(args)
|
135 |
+
|
136 |
+
if 'cuda' in str(device):
|
137 |
+
logging.info('Using GPU.')
|
138 |
+
device = 'cuda'
|
139 |
+
else:
|
140 |
+
logging.info('Using CPU. Set --cuda flag to use GPU.')
|
141 |
+
device = 'cpu'
|
142 |
+
|
143 |
+
# Model
|
144 |
+
Model = eval(model_type)
|
145 |
+
model = Model(sample_rate=sample_rate, window_size=window_size,
|
146 |
+
hop_size=hop_size, mel_bins=mel_bins, fmin=fmin, fmax=fmax,
|
147 |
+
classes_num=classes_num)
|
148 |
+
total = sum(p.numel() for p in model.parameters())
|
149 |
+
print("Total params: %.2fM" % (total/1e6))
|
150 |
+
logging.info("Total params: %.2fM" % (total/1e6))
|
151 |
+
#params_num = count_parameters(model)
|
152 |
+
# flops_num = count_flops(model, clip_samples)
|
153 |
+
#logging.info('Parameters num: {}'.format(params_num))
|
154 |
+
# logging.info('Flops num: {:.3f} G'.format(flops_num / 1e9))
|
155 |
+
|
156 |
+
# Dataset will be used by DataLoader later. Dataset takes a meta as input
|
157 |
+
# and return a waveform and a target.
|
158 |
+
dataset = AudioSetDataset(sample_rate=sample_rate)
|
159 |
+
|
160 |
+
# Train sampler
|
161 |
+
if balanced == 'none':
|
162 |
+
Sampler = TrainSampler
|
163 |
+
elif balanced == 'balanced':
|
164 |
+
Sampler = BalancedTrainSampler
|
165 |
+
elif balanced == 'alternate':
|
166 |
+
Sampler = AlternateTrainSampler
|
167 |
+
|
168 |
+
train_sampler = Sampler(
|
169 |
+
indexes_hdf5_path=train_indexes_hdf5_path,
|
170 |
+
batch_size=batch_size * 2 if 'mixup' in augmentation else batch_size,
|
171 |
+
black_list_csv=black_list_csv)
|
172 |
+
|
173 |
+
# Evaluate sampler
|
174 |
+
eval_bal_sampler = EvaluateSampler(
|
175 |
+
indexes_hdf5_path=eval_bal_indexes_hdf5_path, batch_size=batch_size)
|
176 |
+
|
177 |
+
eval_test_sampler = EvaluateSampler(
|
178 |
+
indexes_hdf5_path=eval_test_indexes_hdf5_path, batch_size=batch_size)
|
179 |
+
|
180 |
+
# Data loader
|
181 |
+
train_loader = torch.utils.data.DataLoader(dataset=dataset,
|
182 |
+
batch_sampler=train_sampler, collate_fn=collate_fn,
|
183 |
+
num_workers=num_workers, pin_memory=True)
|
184 |
+
|
185 |
+
eval_bal_loader = torch.utils.data.DataLoader(dataset=dataset,
|
186 |
+
batch_sampler=eval_bal_sampler, collate_fn=collate_fn,
|
187 |
+
num_workers=num_workers, pin_memory=True)
|
188 |
+
|
189 |
+
eval_test_loader = torch.utils.data.DataLoader(dataset=dataset,
|
190 |
+
batch_sampler=eval_test_sampler, collate_fn=collate_fn,
|
191 |
+
num_workers=num_workers, pin_memory=True)
|
192 |
+
mix=0.5
|
193 |
+
if 'mixup' in augmentation:
|
194 |
+
mixup_augmenter = Mixup(mixup_alpha=mix)
|
195 |
+
print(mix)
|
196 |
+
logging.info(mix)
|
197 |
+
|
198 |
+
# Evaluator
|
199 |
+
evaluator = Evaluator(model=model)
|
200 |
+
|
201 |
+
# Statistics
|
202 |
+
statistics_container = StatisticsContainer(statistics_path)
|
203 |
+
|
204 |
+
# Optimizer
|
205 |
+
optimizer = optim.AdamW(model.parameters(), lr=learning_rate, betas=(0.9, 0.999), eps=1e-08, weight_decay=0.05, amsgrad=True)
|
206 |
+
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=4, min_lr=1e-06, verbose=True)
|
207 |
+
train_bgn_time = time.time()
|
208 |
+
|
209 |
+
# Resume training
|
210 |
+
if resume_iteration > 0:
|
211 |
+
resume_checkpoint_path = os.path.join(workspace, 'checkpoints', filename,
|
212 |
+
'sample_rate={},window_size={},hop_size={},mel_bins={},fmin={},fmax={}'.format(
|
213 |
+
sample_rate, window_size, hop_size, mel_bins, fmin, fmax),
|
214 |
+
'data_type={}'.format(data_type), model_type,
|
215 |
+
'loss_type={}'.format(loss_type), 'balanced={}'.format(balanced),
|
216 |
+
'augmentation={}'.format(augmentation), 'batch_size={}'.format(batch_size),
|
217 |
+
'{}_iterations.pth'.format(resume_iteration))
|
218 |
+
|
219 |
+
logging.info('Loading checkpoint {}'.format(resume_checkpoint_path))
|
220 |
+
checkpoint = torch.load(resume_checkpoint_path)
|
221 |
+
model.load_state_dict(checkpoint['model'])
|
222 |
+
train_sampler.load_state_dict(checkpoint['sampler'])
|
223 |
+
statistics_container.load_state_dict(resume_iteration)
|
224 |
+
iteration = checkpoint['iteration']
|
225 |
+
|
226 |
+
else:
|
227 |
+
iteration = 0
|
228 |
+
|
229 |
+
# Parallel
|
230 |
+
print('GPU number: {}'.format(torch.cuda.device_count()))
|
231 |
+
model = torch.nn.DataParallel(model)
|
232 |
+
|
233 |
+
if 'cuda' in str(device):
|
234 |
+
model.to(device)
|
235 |
+
|
236 |
+
if resume_iteration:
|
237 |
+
optimizer.load_state_dict(checkpoint['optimizer'])
|
238 |
+
scheduler.load_state_dict(checkpoint['scheduler'])
|
239 |
+
print(optimizer.state_dict()['param_groups'][0]['lr'])
|
240 |
+
|
241 |
+
time1 = time.time()
|
242 |
+
|
243 |
+
for batch_data_dict in train_loader:
|
244 |
+
"""batch_data_dict: {
|
245 |
+
'audio_name': (batch_size [*2 if mixup],),
|
246 |
+
'waveform': (batch_size [*2 if mixup], clip_samples),
|
247 |
+
'target': (batch_size [*2 if mixup], classes_num),
|
248 |
+
(ifexist) 'mixup_lambda': (batch_size * 2,)}
|
249 |
+
"""
|
250 |
+
|
251 |
+
# Evaluate
|
252 |
+
if (iteration % 2000 == 0 and iteration >= resume_iteration) or (iteration == 0):
|
253 |
+
train_fin_time = time.time()
|
254 |
+
|
255 |
+
bal_statistics = evaluator.evaluate(eval_bal_loader)
|
256 |
+
test_statistics = evaluator.evaluate(eval_test_loader)
|
257 |
+
|
258 |
+
logging.info('Validate bal mAP: {:.3f}'.format(
|
259 |
+
np.mean(bal_statistics['average_precision'])))
|
260 |
+
|
261 |
+
logging.info('Validate test mAP: {:.3f}'.format(
|
262 |
+
np.mean(test_statistics['average_precision'])))
|
263 |
+
|
264 |
+
statistics_container.append(iteration, bal_statistics, data_type='bal')
|
265 |
+
statistics_container.append(iteration, test_statistics, data_type='test')
|
266 |
+
statistics_container.dump()
|
267 |
+
|
268 |
+
train_time = train_fin_time - train_bgn_time
|
269 |
+
validate_time = time.time() - train_fin_time
|
270 |
+
|
271 |
+
logging.info(
|
272 |
+
'iteration: {}, train time: {:.3f} s, validate time: {:.3f} s'
|
273 |
+
''.format(iteration, train_time, validate_time))
|
274 |
+
|
275 |
+
logging.info('------------------------------------')
|
276 |
+
|
277 |
+
train_bgn_time = time.time()
|
278 |
+
|
279 |
+
# Save model
|
280 |
+
if iteration % 2000 == 0:
|
281 |
+
checkpoint = {
|
282 |
+
'iteration': iteration,
|
283 |
+
'model': model.module.state_dict(),
|
284 |
+
'sampler': train_sampler.state_dict(),
|
285 |
+
'optimizer': optimizer.state_dict(),
|
286 |
+
'scheduler': scheduler.state_dict()}
|
287 |
+
|
288 |
+
checkpoint_path = os.path.join(
|
289 |
+
checkpoints_dir, '{}_iterations.pth'.format(iteration))
|
290 |
+
|
291 |
+
torch.save(checkpoint, checkpoint_path)
|
292 |
+
logging.info('Model saved to {}'.format(checkpoint_path))
|
293 |
+
|
294 |
+
# Mixup lambda
|
295 |
+
if 'mixup' in augmentation:
|
296 |
+
batch_data_dict['mixup_lambda'] = mixup_augmenter.get_lambda(
|
297 |
+
batch_size=len(batch_data_dict['waveform']))
|
298 |
+
|
299 |
+
# Move data to device
|
300 |
+
for key in batch_data_dict.keys():
|
301 |
+
batch_data_dict[key] = move_data_to_device(batch_data_dict[key], device)
|
302 |
+
|
303 |
+
# Forward
|
304 |
+
model.train()
|
305 |
+
|
306 |
+
if 'mixup' in augmentation:
|
307 |
+
batch_output_dict = model(batch_data_dict['waveform'],
|
308 |
+
batch_data_dict['mixup_lambda'])
|
309 |
+
"""{'clipwise_output': (batch_size, classes_num), ...}"""
|
310 |
+
|
311 |
+
batch_target_dict = {'target': do_mixup(batch_data_dict['target'],
|
312 |
+
batch_data_dict['mixup_lambda'])}
|
313 |
+
"""{'target': (batch_size, classes_num)}"""
|
314 |
+
else:
|
315 |
+
batch_output_dict = model(batch_data_dict['waveform'], None)
|
316 |
+
"""{'clipwise_output': (batch_size, classes_num), ...}"""
|
317 |
+
|
318 |
+
batch_target_dict = {'target': batch_data_dict['target']}
|
319 |
+
"""{'target': (batch_size, classes_num)}"""
|
320 |
+
|
321 |
+
# Loss
|
322 |
+
loss = loss_func(batch_output_dict, batch_target_dict)
|
323 |
+
# Backward
|
324 |
+
loss.backward()
|
325 |
+
|
326 |
+
optimizer.step()
|
327 |
+
optimizer.zero_grad()
|
328 |
+
|
329 |
+
if iteration % 10 == 0:
|
330 |
+
print(iteration, loss)
|
331 |
+
#print('--- Iteration: {}, train time: {:.3f} s / 10 iterations ---'\
|
332 |
+
# .format(iteration, time.time() - time1))
|
333 |
+
#time1 = time.time()
|
334 |
+
|
335 |
+
if iteration % 2000 == 0:
|
336 |
+
scheduler.step(np.mean(test_statistics['average_precision']))
|
337 |
+
print(optimizer.state_dict()['param_groups'][0]['lr'])
|
338 |
+
logging.info(optimizer.state_dict()['param_groups'][0]['lr'])
|
339 |
+
|
340 |
+
# Stop learning
|
341 |
+
if iteration == early_stop:
|
342 |
+
break
|
343 |
+
|
344 |
+
iteration += 1
|
345 |
+
|
346 |
+
|
347 |
+
if __name__ == '__main__':
|
348 |
+
|
349 |
+
parser = argparse.ArgumentParser(description='Example of parser. ')
|
350 |
+
subparsers = parser.add_subparsers(dest='mode')
|
351 |
+
|
352 |
+
parser_train = subparsers.add_parser('train')
|
353 |
+
parser_train.add_argument('--workspace', type=str, required=True)
|
354 |
+
parser_train.add_argument('--data_type', type=str, default='full_train', choices=['balanced_train', 'full_train'])
|
355 |
+
parser_train.add_argument('--sample_rate', type=int, default=32000)
|
356 |
+
parser_train.add_argument('--window_size', type=int, default=1024)
|
357 |
+
parser_train.add_argument('--hop_size', type=int, default=320)
|
358 |
+
parser_train.add_argument('--mel_bins', type=int, default=64)
|
359 |
+
parser_train.add_argument('--fmin', type=int, default=50)
|
360 |
+
parser_train.add_argument('--fmax', type=int, default=14000)
|
361 |
+
parser_train.add_argument('--model_type', type=str, required=True)
|
362 |
+
parser_train.add_argument('--loss_type', type=str, default='clip_bce', choices=['clip_bce'])
|
363 |
+
parser_train.add_argument('--balanced', type=str, default='balanced', choices=['none', 'balanced', 'alternate'])
|
364 |
+
parser_train.add_argument('--augmentation', type=str, default='mixup', choices=['none', 'mixup'])
|
365 |
+
parser_train.add_argument('--batch_size', type=int, default=32)
|
366 |
+
parser_train.add_argument('--learning_rate', type=float, default=1e-3)
|
367 |
+
parser_train.add_argument('--resume_iteration', type=int, default=0)
|
368 |
+
parser_train.add_argument('--early_stop', type=int, default=1000000)
|
369 |
+
parser_train.add_argument('--cuda', action='store_true', default=False)
|
370 |
+
|
371 |
+
args = parser.parse_args()
|
372 |
+
args.filename = get_filename(__file__)
|
373 |
+
|
374 |
+
if args.mode == 'train':
|
375 |
+
train(args)
|
376 |
+
|
377 |
+
else:
|
378 |
+
raise Exception('Error argument!')
|
audio_detection/audio_infer/pytorch/models.py
ADDED
@@ -0,0 +1,951 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn as nn
|
3 |
+
import torch.nn.functional as F
|
4 |
+
from torchlibrosa.stft import Spectrogram, LogmelFilterBank
|
5 |
+
from torchlibrosa.augmentation import SpecAugmentation
|
6 |
+
|
7 |
+
from audio_infer.pytorch.pytorch_utils import do_mixup, interpolate, pad_framewise_output
|
8 |
+
import os
|
9 |
+
import sys
|
10 |
+
import math
|
11 |
+
import numpy as np
|
12 |
+
|
13 |
+
import torch
|
14 |
+
import torch.nn as nn
|
15 |
+
import torch.nn.functional as F
|
16 |
+
from torch.nn.parameter import Parameter
|
17 |
+
from torchlibrosa.stft import Spectrogram, LogmelFilterBank
|
18 |
+
from torchlibrosa.augmentation import SpecAugmentation
|
19 |
+
from audio_infer.pytorch.pytorch_utils import do_mixup
|
20 |
+
import torch.utils.checkpoint as checkpoint
|
21 |
+
from timm.models.layers import DropPath, to_2tuple, trunc_normal_
|
22 |
+
import warnings
|
23 |
+
from functools import partial
|
24 |
+
#from mmdet.models.builder import BACKBONES
|
25 |
+
from mmdet.utils import get_root_logger
|
26 |
+
from mmcv.runner import load_checkpoint
|
27 |
+
os.environ['TORCH_HOME'] = '../pretrained_models'
|
28 |
+
from copy import deepcopy
|
29 |
+
from timm.models.helpers import load_pretrained
|
30 |
+
from torch.cuda.amp import autocast
|
31 |
+
from collections import OrderedDict
|
32 |
+
import io
|
33 |
+
import re
|
34 |
+
from mmcv.runner import _load_checkpoint, load_state_dict
|
35 |
+
import mmcv.runner
|
36 |
+
import copy
|
37 |
+
import random
|
38 |
+
from einops import rearrange
|
39 |
+
from einops.layers.torch import Rearrange, Reduce
|
40 |
+
from torch import nn, einsum
|
41 |
+
|
42 |
+
|
43 |
+
def load_checkpoint(model,
|
44 |
+
filename,
|
45 |
+
map_location=None,
|
46 |
+
strict=False,
|
47 |
+
logger=None,
|
48 |
+
revise_keys=[(r'^module\.', '')]):
|
49 |
+
"""Load checkpoint from a file or URI.
|
50 |
+
|
51 |
+
Args:
|
52 |
+
model (Module): Module to load checkpoint.
|
53 |
+
filename (str): Accept local filepath, URL, ``torchvision://xxx``,
|
54 |
+
``open-mmlab://xxx``. Please refer to ``docs/model_zoo.md`` for
|
55 |
+
details.
|
56 |
+
map_location (str): Same as :func:`torch.load`.
|
57 |
+
strict (bool): Whether to allow different params for the model and
|
58 |
+
checkpoint.
|
59 |
+
logger (:mod:`logging.Logger` or None): The logger for error message.
|
60 |
+
revise_keys (list): A list of customized keywords to modify the
|
61 |
+
state_dict in checkpoint. Each item is a (pattern, replacement)
|
62 |
+
pair of the regular expression operations. Default: strip
|
63 |
+
the prefix 'module.' by [(r'^module\\.', '')].
|
64 |
+
|
65 |
+
Returns:
|
66 |
+
dict or OrderedDict: The loaded checkpoint.
|
67 |
+
"""
|
68 |
+
|
69 |
+
checkpoint = _load_checkpoint(filename, map_location, logger)
|
70 |
+
new_proj = torch.nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(4, 4), padding=(2, 2))
|
71 |
+
new_proj.weight = torch.nn.Parameter(torch.sum(checkpoint['patch_embed1.proj.weight'], dim=1).unsqueeze(1))
|
72 |
+
checkpoint['patch_embed1.proj.weight'] = new_proj.weight
|
73 |
+
# OrderedDict is a subclass of dict
|
74 |
+
if not isinstance(checkpoint, dict):
|
75 |
+
raise RuntimeError(
|
76 |
+
f'No state_dict found in checkpoint file {filename}')
|
77 |
+
# get state_dict from checkpoint
|
78 |
+
if 'state_dict' in checkpoint:
|
79 |
+
state_dict = checkpoint['state_dict']
|
80 |
+
else:
|
81 |
+
state_dict = checkpoint
|
82 |
+
|
83 |
+
# strip prefix of state_dict
|
84 |
+
metadata = getattr(state_dict, '_metadata', OrderedDict())
|
85 |
+
for p, r in revise_keys:
|
86 |
+
state_dict = OrderedDict(
|
87 |
+
{re.sub(p, r, k): v
|
88 |
+
for k, v in state_dict.items()})
|
89 |
+
state_dict = OrderedDict({k.replace('backbone.',''):v for k,v in state_dict.items()})
|
90 |
+
# Keep metadata in state_dict
|
91 |
+
state_dict._metadata = metadata
|
92 |
+
|
93 |
+
# load state_dict
|
94 |
+
load_state_dict(model, state_dict, strict, logger)
|
95 |
+
return checkpoint
|
96 |
+
|
97 |
+
def init_layer(layer):
|
98 |
+
"""Initialize a Linear or Convolutional layer. """
|
99 |
+
nn.init.xavier_uniform_(layer.weight)
|
100 |
+
|
101 |
+
if hasattr(layer, 'bias'):
|
102 |
+
if layer.bias is not None:
|
103 |
+
layer.bias.data.fill_(0.)
|
104 |
+
|
105 |
+
|
106 |
+
def init_bn(bn):
|
107 |
+
"""Initialize a Batchnorm layer. """
|
108 |
+
bn.bias.data.fill_(0.)
|
109 |
+
bn.weight.data.fill_(1.)
|
110 |
+
|
111 |
+
|
112 |
+
|
113 |
+
|
114 |
+
class TimeShift(nn.Module):
|
115 |
+
def __init__(self, mean, std):
|
116 |
+
super().__init__()
|
117 |
+
self.mean = mean
|
118 |
+
self.std = std
|
119 |
+
|
120 |
+
def forward(self, x):
|
121 |
+
if self.training:
|
122 |
+
shift = torch.empty(1).normal_(self.mean, self.std).int().item()
|
123 |
+
x = torch.roll(x, shift, dims=2)
|
124 |
+
return x
|
125 |
+
|
126 |
+
class LinearSoftPool(nn.Module):
|
127 |
+
"""LinearSoftPool
|
128 |
+
Linear softmax, takes logits and returns a probability, near to the actual maximum value.
|
129 |
+
Taken from the paper:
|
130 |
+
A Comparison of Five Multiple Instance Learning Pooling Functions for Sound Event Detection with Weak Labeling
|
131 |
+
https://arxiv.org/abs/1810.09050
|
132 |
+
"""
|
133 |
+
def __init__(self, pooldim=1):
|
134 |
+
super().__init__()
|
135 |
+
self.pooldim = pooldim
|
136 |
+
|
137 |
+
def forward(self, logits, time_decision):
|
138 |
+
return (time_decision**2).sum(self.pooldim) / time_decision.sum(
|
139 |
+
self.pooldim)
|
140 |
+
|
141 |
+
class PVT(nn.Module):
|
142 |
+
def __init__(self, sample_rate, window_size, hop_size, mel_bins, fmin,
|
143 |
+
fmax, classes_num):
|
144 |
+
|
145 |
+
super(PVT, self).__init__()
|
146 |
+
|
147 |
+
window = 'hann'
|
148 |
+
center = True
|
149 |
+
pad_mode = 'reflect'
|
150 |
+
ref = 1.0
|
151 |
+
amin = 1e-10
|
152 |
+
top_db = None
|
153 |
+
|
154 |
+
# Spectrogram extractor
|
155 |
+
self.spectrogram_extractor = Spectrogram(n_fft=window_size, hop_length=hop_size,
|
156 |
+
win_length=window_size, window=window, center=center, pad_mode=pad_mode,
|
157 |
+
freeze_parameters=True)
|
158 |
+
|
159 |
+
# Logmel feature extractor
|
160 |
+
self.logmel_extractor = LogmelFilterBank(sr=sample_rate, n_fft=window_size,
|
161 |
+
n_mels=mel_bins, fmin=fmin, fmax=fmax, ref=ref, amin=amin, top_db=top_db,
|
162 |
+
freeze_parameters=True)
|
163 |
+
|
164 |
+
self.time_shift = TimeShift(0, 10)
|
165 |
+
# Spec augmenter
|
166 |
+
self.spec_augmenter = SpecAugmentation(time_drop_width=64, time_stripes_num=2,
|
167 |
+
freq_drop_width=8, freq_stripes_num=2)
|
168 |
+
|
169 |
+
self.bn0 = nn.BatchNorm2d(64)
|
170 |
+
self.pvt_transformer = PyramidVisionTransformerV2(tdim=1001,
|
171 |
+
fdim=64,
|
172 |
+
patch_size=7,
|
173 |
+
stride=4,
|
174 |
+
in_chans=1,
|
175 |
+
num_classes=classes_num,
|
176 |
+
embed_dims=[64, 128, 320, 512],
|
177 |
+
depths=[3, 4, 6, 3],
|
178 |
+
num_heads=[1, 2, 5, 8],
|
179 |
+
mlp_ratios=[8, 8, 4, 4],
|
180 |
+
qkv_bias=True,
|
181 |
+
qk_scale=None,
|
182 |
+
drop_rate=0.0,
|
183 |
+
drop_path_rate=0.1,
|
184 |
+
sr_ratios=[8, 4, 2, 1],
|
185 |
+
norm_layer=partial(nn.LayerNorm, eps=1e-6),
|
186 |
+
num_stages=4,
|
187 |
+
#pretrained='https://github.com/whai362/PVT/releases/download/v2/pvt_v2_b2.pth'
|
188 |
+
)
|
189 |
+
#self.temp_pool = LinearSoftPool()
|
190 |
+
self.avgpool = nn.AdaptiveAvgPool1d(1)
|
191 |
+
self.fc_audioset = nn.Linear(512, classes_num, bias=True)
|
192 |
+
|
193 |
+
self.init_weights()
|
194 |
+
|
195 |
+
def init_weights(self):
|
196 |
+
init_bn(self.bn0)
|
197 |
+
init_layer(self.fc_audioset)
|
198 |
+
|
199 |
+
def forward(self, input, mixup_lambda=None):
|
200 |
+
"""Input: (batch_size, times_steps, freq_bins)"""
|
201 |
+
|
202 |
+
interpolate_ratio = 32
|
203 |
+
|
204 |
+
x = self.spectrogram_extractor(input) # (batch_size, 1, time_steps, freq_bins)
|
205 |
+
x = self.logmel_extractor(x) # (batch_size, 1, time_steps, mel_bins)
|
206 |
+
frames_num = x.shape[2]
|
207 |
+
x = x.transpose(1, 3)
|
208 |
+
x = self.bn0(x)
|
209 |
+
x = x.transpose(1, 3)
|
210 |
+
|
211 |
+
if self.training:
|
212 |
+
x = self.time_shift(x)
|
213 |
+
x = self.spec_augmenter(x)
|
214 |
+
|
215 |
+
# Mixup on spectrogram
|
216 |
+
if self.training and mixup_lambda is not None:
|
217 |
+
x = do_mixup(x, mixup_lambda)
|
218 |
+
#print(x.shape) #torch.Size([10, 1, 1001, 64])
|
219 |
+
x = self.pvt_transformer(x)
|
220 |
+
#print(x.shape) #torch.Size([10, 800, 128])
|
221 |
+
x = torch.mean(x, dim=3)
|
222 |
+
|
223 |
+
x = x.transpose(1, 2).contiguous()
|
224 |
+
framewise_output = torch.sigmoid(self.fc_audioset(x))
|
225 |
+
#clipwise_output = torch.mean(framewise_output, dim=1)
|
226 |
+
#clipwise_output = self.temp_pool(x, framewise_output).clamp(1e-7, 1.).squeeze(1)
|
227 |
+
x = framewise_output.transpose(1, 2).contiguous()
|
228 |
+
x = self.avgpool(x)
|
229 |
+
clipwise_output = torch.flatten(x, 1)
|
230 |
+
#print(framewise_output.shape) #torch.Size([10, 100, 17])
|
231 |
+
framewise_output = interpolate(framewise_output, interpolate_ratio)
|
232 |
+
#framewise_output = framewise_output[:,:1000,:]
|
233 |
+
#framewise_output = pad_framewise_output(framewise_output, frames_num)
|
234 |
+
output_dict = {'framewise_output': framewise_output,
|
235 |
+
'clipwise_output': clipwise_output}
|
236 |
+
|
237 |
+
return output_dict
|
238 |
+
|
239 |
+
class PVT2(nn.Module):
|
240 |
+
def __init__(self, sample_rate, window_size, hop_size, mel_bins, fmin,
|
241 |
+
fmax, classes_num):
|
242 |
+
|
243 |
+
super(PVT2, self).__init__()
|
244 |
+
|
245 |
+
window = 'hann'
|
246 |
+
center = True
|
247 |
+
pad_mode = 'reflect'
|
248 |
+
ref = 1.0
|
249 |
+
amin = 1e-10
|
250 |
+
top_db = None
|
251 |
+
|
252 |
+
# Spectrogram extractor
|
253 |
+
self.spectrogram_extractor = Spectrogram(n_fft=window_size, hop_length=hop_size,
|
254 |
+
win_length=window_size, window=window, center=center, pad_mode=pad_mode,
|
255 |
+
freeze_parameters=True)
|
256 |
+
|
257 |
+
# Logmel feature extractor
|
258 |
+
self.logmel_extractor = LogmelFilterBank(sr=sample_rate, n_fft=window_size,
|
259 |
+
n_mels=mel_bins, fmin=fmin, fmax=fmax, ref=ref, amin=amin, top_db=top_db,
|
260 |
+
freeze_parameters=True)
|
261 |
+
|
262 |
+
self.time_shift = TimeShift(0, 10)
|
263 |
+
# Spec augmenter
|
264 |
+
self.spec_augmenter = SpecAugmentation(time_drop_width=64, time_stripes_num=2,
|
265 |
+
freq_drop_width=8, freq_stripes_num=2)
|
266 |
+
|
267 |
+
self.bn0 = nn.BatchNorm2d(64)
|
268 |
+
self.pvt_transformer = PyramidVisionTransformerV2(tdim=1001,
|
269 |
+
fdim=64,
|
270 |
+
patch_size=7,
|
271 |
+
stride=4,
|
272 |
+
in_chans=1,
|
273 |
+
num_classes=classes_num,
|
274 |
+
embed_dims=[64, 128, 320, 512],
|
275 |
+
depths=[3, 4, 6, 3],
|
276 |
+
num_heads=[1, 2, 5, 8],
|
277 |
+
mlp_ratios=[8, 8, 4, 4],
|
278 |
+
qkv_bias=True,
|
279 |
+
qk_scale=None,
|
280 |
+
drop_rate=0.0,
|
281 |
+
drop_path_rate=0.1,
|
282 |
+
sr_ratios=[8, 4, 2, 1],
|
283 |
+
norm_layer=partial(nn.LayerNorm, eps=1e-6),
|
284 |
+
num_stages=4,
|
285 |
+
pretrained='https://github.com/whai362/PVT/releases/download/v2/pvt_v2_b2.pth'
|
286 |
+
)
|
287 |
+
#self.temp_pool = LinearSoftPool()
|
288 |
+
self.fc_audioset = nn.Linear(512, classes_num, bias=True)
|
289 |
+
|
290 |
+
self.init_weights()
|
291 |
+
|
292 |
+
def init_weights(self):
|
293 |
+
init_bn(self.bn0)
|
294 |
+
init_layer(self.fc_audioset)
|
295 |
+
|
296 |
+
def forward(self, input, mixup_lambda=None):
|
297 |
+
"""Input: (batch_size, times_steps, freq_bins)"""
|
298 |
+
|
299 |
+
interpolate_ratio = 32
|
300 |
+
|
301 |
+
x = self.spectrogram_extractor(input) # (batch_size, 1, time_steps, freq_bins)
|
302 |
+
x = self.logmel_extractor(x) # (batch_size, 1, time_steps, mel_bins)
|
303 |
+
frames_num = x.shape[2]
|
304 |
+
x = x.transpose(1, 3)
|
305 |
+
x = self.bn0(x)
|
306 |
+
x = x.transpose(1, 3)
|
307 |
+
|
308 |
+
if self.training:
|
309 |
+
#x = self.time_shift(x)
|
310 |
+
x = self.spec_augmenter(x)
|
311 |
+
|
312 |
+
# Mixup on spectrogram
|
313 |
+
if self.training and mixup_lambda is not None:
|
314 |
+
x = do_mixup(x, mixup_lambda)
|
315 |
+
#print(x.shape) #torch.Size([10, 1, 1001, 64])
|
316 |
+
x = self.pvt_transformer(x)
|
317 |
+
#print(x.shape) #torch.Size([10, 800, 128])
|
318 |
+
x = torch.mean(x, dim=3)
|
319 |
+
|
320 |
+
x = x.transpose(1, 2).contiguous()
|
321 |
+
framewise_output = torch.sigmoid(self.fc_audioset(x))
|
322 |
+
clipwise_output = torch.mean(framewise_output, dim=1)
|
323 |
+
#clipwise_output = self.temp_pool(x, framewise_output).clamp(1e-7, 1.).squeeze(1)
|
324 |
+
#print(framewise_output.shape) #torch.Size([10, 100, 17])
|
325 |
+
framewise_output = interpolate(framewise_output, interpolate_ratio)
|
326 |
+
#framewise_output = framewise_output[:,:1000,:]
|
327 |
+
#framewise_output = pad_framewise_output(framewise_output, frames_num)
|
328 |
+
output_dict = {'framewise_output': framewise_output,
|
329 |
+
'clipwise_output': clipwise_output}
|
330 |
+
|
331 |
+
return output_dict
|
332 |
+
|
333 |
+
class PVT_2layer(nn.Module):
|
334 |
+
def __init__(self, sample_rate, window_size, hop_size, mel_bins, fmin,
|
335 |
+
fmax, classes_num):
|
336 |
+
|
337 |
+
super(PVT_2layer, self).__init__()
|
338 |
+
|
339 |
+
window = 'hann'
|
340 |
+
center = True
|
341 |
+
pad_mode = 'reflect'
|
342 |
+
ref = 1.0
|
343 |
+
amin = 1e-10
|
344 |
+
top_db = None
|
345 |
+
|
346 |
+
# Spectrogram extractor
|
347 |
+
self.spectrogram_extractor = Spectrogram(n_fft=window_size, hop_length=hop_size,
|
348 |
+
win_length=window_size, window=window, center=center, pad_mode=pad_mode,
|
349 |
+
freeze_parameters=True)
|
350 |
+
|
351 |
+
# Logmel feature extractor
|
352 |
+
self.logmel_extractor = LogmelFilterBank(sr=sample_rate, n_fft=window_size,
|
353 |
+
n_mels=mel_bins, fmin=fmin, fmax=fmax, ref=ref, amin=amin, top_db=top_db,
|
354 |
+
freeze_parameters=True)
|
355 |
+
|
356 |
+
self.time_shift = TimeShift(0, 10)
|
357 |
+
# Spec augmenter
|
358 |
+
self.spec_augmenter = SpecAugmentation(time_drop_width=64, time_stripes_num=2,
|
359 |
+
freq_drop_width=8, freq_stripes_num=2)
|
360 |
+
|
361 |
+
self.bn0 = nn.BatchNorm2d(64)
|
362 |
+
self.pvt_transformer = PyramidVisionTransformerV2(tdim=1001,
|
363 |
+
fdim=64,
|
364 |
+
patch_size=7,
|
365 |
+
stride=4,
|
366 |
+
in_chans=1,
|
367 |
+
num_classes=classes_num,
|
368 |
+
embed_dims=[64, 128],
|
369 |
+
depths=[3, 4],
|
370 |
+
num_heads=[1, 2],
|
371 |
+
mlp_ratios=[8, 8],
|
372 |
+
qkv_bias=True,
|
373 |
+
qk_scale=None,
|
374 |
+
drop_rate=0.0,
|
375 |
+
drop_path_rate=0.1,
|
376 |
+
sr_ratios=[8, 4],
|
377 |
+
norm_layer=partial(nn.LayerNorm, eps=1e-6),
|
378 |
+
num_stages=2,
|
379 |
+
pretrained='https://github.com/whai362/PVT/releases/download/v2/pvt_v2_b2.pth'
|
380 |
+
)
|
381 |
+
#self.temp_pool = LinearSoftPool()
|
382 |
+
self.avgpool = nn.AdaptiveAvgPool1d(1)
|
383 |
+
self.fc_audioset = nn.Linear(128, classes_num, bias=True)
|
384 |
+
|
385 |
+
self.init_weights()
|
386 |
+
|
387 |
+
def init_weights(self):
|
388 |
+
init_bn(self.bn0)
|
389 |
+
init_layer(self.fc_audioset)
|
390 |
+
|
391 |
+
def forward(self, input, mixup_lambda=None):
|
392 |
+
"""Input: (batch_size, times_steps, freq_bins)"""
|
393 |
+
|
394 |
+
interpolate_ratio = 8
|
395 |
+
|
396 |
+
x = self.spectrogram_extractor(input) # (batch_size, 1, time_steps, freq_bins)
|
397 |
+
x = self.logmel_extractor(x) # (batch_size, 1, time_steps, mel_bins)
|
398 |
+
frames_num = x.shape[2]
|
399 |
+
x = x.transpose(1, 3)
|
400 |
+
x = self.bn0(x)
|
401 |
+
x = x.transpose(1, 3)
|
402 |
+
|
403 |
+
if self.training:
|
404 |
+
x = self.time_shift(x)
|
405 |
+
x = self.spec_augmenter(x)
|
406 |
+
|
407 |
+
# Mixup on spectrogram
|
408 |
+
if self.training and mixup_lambda is not None:
|
409 |
+
x = do_mixup(x, mixup_lambda)
|
410 |
+
#print(x.shape) #torch.Size([10, 1, 1001, 64])
|
411 |
+
x = self.pvt_transformer(x)
|
412 |
+
#print(x.shape) #torch.Size([10, 800, 128])
|
413 |
+
x = torch.mean(x, dim=3)
|
414 |
+
|
415 |
+
x = x.transpose(1, 2).contiguous()
|
416 |
+
framewise_output = torch.sigmoid(self.fc_audioset(x))
|
417 |
+
#clipwise_output = torch.mean(framewise_output, dim=1)
|
418 |
+
#clipwise_output = self.temp_pool(x, framewise_output).clamp(1e-7, 1.).squeeze(1)
|
419 |
+
x = framewise_output.transpose(1, 2).contiguous()
|
420 |
+
x = self.avgpool(x)
|
421 |
+
clipwise_output = torch.flatten(x, 1)
|
422 |
+
#print(framewise_output.shape) #torch.Size([10, 100, 17])
|
423 |
+
framewise_output = interpolate(framewise_output, interpolate_ratio)
|
424 |
+
#framewise_output = framewise_output[:,:1000,:]
|
425 |
+
#framewise_output = pad_framewise_output(framewise_output, frames_num)
|
426 |
+
output_dict = {'framewise_output': framewise_output,
|
427 |
+
'clipwise_output': clipwise_output}
|
428 |
+
|
429 |
+
return output_dict
|
430 |
+
|
431 |
+
class PVT_lr(nn.Module):
|
432 |
+
def __init__(self, sample_rate, window_size, hop_size, mel_bins, fmin,
|
433 |
+
fmax, classes_num):
|
434 |
+
|
435 |
+
super(PVT_lr, self).__init__()
|
436 |
+
|
437 |
+
window = 'hann'
|
438 |
+
center = True
|
439 |
+
pad_mode = 'reflect'
|
440 |
+
ref = 1.0
|
441 |
+
amin = 1e-10
|
442 |
+
top_db = None
|
443 |
+
|
444 |
+
# Spectrogram extractor
|
445 |
+
self.spectrogram_extractor = Spectrogram(n_fft=window_size, hop_length=hop_size,
|
446 |
+
win_length=window_size, window=window, center=center, pad_mode=pad_mode,
|
447 |
+
freeze_parameters=True)
|
448 |
+
|
449 |
+
# Logmel feature extractor
|
450 |
+
self.logmel_extractor = LogmelFilterBank(sr=sample_rate, n_fft=window_size,
|
451 |
+
n_mels=mel_bins, fmin=fmin, fmax=fmax, ref=ref, amin=amin, top_db=top_db,
|
452 |
+
freeze_parameters=True)
|
453 |
+
|
454 |
+
self.time_shift = TimeShift(0, 10)
|
455 |
+
# Spec augmenter
|
456 |
+
self.spec_augmenter = SpecAugmentation(time_drop_width=64, time_stripes_num=2,
|
457 |
+
freq_drop_width=8, freq_stripes_num=2)
|
458 |
+
|
459 |
+
self.bn0 = nn.BatchNorm2d(64)
|
460 |
+
self.pvt_transformer = PyramidVisionTransformerV2(tdim=1001,
|
461 |
+
fdim=64,
|
462 |
+
patch_size=7,
|
463 |
+
stride=4,
|
464 |
+
in_chans=1,
|
465 |
+
num_classes=classes_num,
|
466 |
+
embed_dims=[64, 128, 320, 512],
|
467 |
+
depths=[3, 4, 6, 3],
|
468 |
+
num_heads=[1, 2, 5, 8],
|
469 |
+
mlp_ratios=[8, 8, 4, 4],
|
470 |
+
qkv_bias=True,
|
471 |
+
qk_scale=None,
|
472 |
+
drop_rate=0.0,
|
473 |
+
drop_path_rate=0.1,
|
474 |
+
sr_ratios=[8, 4, 2, 1],
|
475 |
+
norm_layer=partial(nn.LayerNorm, eps=1e-6),
|
476 |
+
num_stages=4,
|
477 |
+
pretrained='https://github.com/whai362/PVT/releases/download/v2/pvt_v2_b2.pth'
|
478 |
+
)
|
479 |
+
self.temp_pool = LinearSoftPool()
|
480 |
+
self.fc_audioset = nn.Linear(512, classes_num, bias=True)
|
481 |
+
|
482 |
+
self.init_weights()
|
483 |
+
|
484 |
+
def init_weights(self):
|
485 |
+
init_bn(self.bn0)
|
486 |
+
init_layer(self.fc_audioset)
|
487 |
+
|
488 |
+
def forward(self, input, mixup_lambda=None):
|
489 |
+
"""Input: (batch_size, times_steps, freq_bins)"""
|
490 |
+
|
491 |
+
interpolate_ratio = 32
|
492 |
+
|
493 |
+
x = self.spectrogram_extractor(input) # (batch_size, 1, time_steps, freq_bins)
|
494 |
+
x = self.logmel_extractor(x) # (batch_size, 1, time_steps, mel_bins)
|
495 |
+
frames_num = x.shape[2]
|
496 |
+
x = x.transpose(1, 3)
|
497 |
+
x = self.bn0(x)
|
498 |
+
x = x.transpose(1, 3)
|
499 |
+
|
500 |
+
if self.training:
|
501 |
+
x = self.time_shift(x)
|
502 |
+
x = self.spec_augmenter(x)
|
503 |
+
|
504 |
+
# Mixup on spectrogram
|
505 |
+
if self.training and mixup_lambda is not None:
|
506 |
+
x = do_mixup(x, mixup_lambda)
|
507 |
+
#print(x.shape) #torch.Size([10, 1, 1001, 64])
|
508 |
+
x = self.pvt_transformer(x)
|
509 |
+
#print(x.shape) #torch.Size([10, 800, 128])
|
510 |
+
x = torch.mean(x, dim=3)
|
511 |
+
|
512 |
+
x = x.transpose(1, 2).contiguous()
|
513 |
+
framewise_output = torch.sigmoid(self.fc_audioset(x))
|
514 |
+
clipwise_output = self.temp_pool(x, framewise_output).clamp(1e-7, 1.).squeeze(1)
|
515 |
+
#print(framewise_output.shape) #torch.Size([10, 100, 17])
|
516 |
+
framewise_output = interpolate(framewise_output, interpolate_ratio)
|
517 |
+
#framewise_output = framewise_output[:,:1000,:]
|
518 |
+
#framewise_output = pad_framewise_output(framewise_output, frames_num)
|
519 |
+
output_dict = {'framewise_output': framewise_output,
|
520 |
+
'clipwise_output': clipwise_output}
|
521 |
+
|
522 |
+
return output_dict
|
523 |
+
|
524 |
+
|
525 |
+
class PVT_nopretrain(nn.Module):
|
526 |
+
def __init__(self, sample_rate, window_size, hop_size, mel_bins, fmin,
|
527 |
+
fmax, classes_num):
|
528 |
+
|
529 |
+
super(PVT_nopretrain, self).__init__()
|
530 |
+
|
531 |
+
window = 'hann'
|
532 |
+
center = True
|
533 |
+
pad_mode = 'reflect'
|
534 |
+
ref = 1.0
|
535 |
+
amin = 1e-10
|
536 |
+
top_db = None
|
537 |
+
|
538 |
+
# Spectrogram extractor
|
539 |
+
self.spectrogram_extractor = Spectrogram(n_fft=window_size, hop_length=hop_size,
|
540 |
+
win_length=window_size, window=window, center=center, pad_mode=pad_mode,
|
541 |
+
freeze_parameters=True)
|
542 |
+
|
543 |
+
# Logmel feature extractor
|
544 |
+
self.logmel_extractor = LogmelFilterBank(sr=sample_rate, n_fft=window_size,
|
545 |
+
n_mels=mel_bins, fmin=fmin, fmax=fmax, ref=ref, amin=amin, top_db=top_db,
|
546 |
+
freeze_parameters=True)
|
547 |
+
|
548 |
+
self.time_shift = TimeShift(0, 10)
|
549 |
+
# Spec augmenter
|
550 |
+
self.spec_augmenter = SpecAugmentation(time_drop_width=64, time_stripes_num=2,
|
551 |
+
freq_drop_width=8, freq_stripes_num=2)
|
552 |
+
|
553 |
+
self.bn0 = nn.BatchNorm2d(64)
|
554 |
+
self.pvt_transformer = PyramidVisionTransformerV2(tdim=1001,
|
555 |
+
fdim=64,
|
556 |
+
patch_size=7,
|
557 |
+
stride=4,
|
558 |
+
in_chans=1,
|
559 |
+
num_classes=classes_num,
|
560 |
+
embed_dims=[64, 128, 320, 512],
|
561 |
+
depths=[3, 4, 6, 3],
|
562 |
+
num_heads=[1, 2, 5, 8],
|
563 |
+
mlp_ratios=[8, 8, 4, 4],
|
564 |
+
qkv_bias=True,
|
565 |
+
qk_scale=None,
|
566 |
+
drop_rate=0.0,
|
567 |
+
drop_path_rate=0.1,
|
568 |
+
sr_ratios=[8, 4, 2, 1],
|
569 |
+
norm_layer=partial(nn.LayerNorm, eps=1e-6),
|
570 |
+
num_stages=4,
|
571 |
+
#pretrained='https://github.com/whai362/PVT/releases/download/v2/pvt_v2_b2.pth'
|
572 |
+
)
|
573 |
+
self.temp_pool = LinearSoftPool()
|
574 |
+
self.fc_audioset = nn.Linear(512, classes_num, bias=True)
|
575 |
+
|
576 |
+
self.init_weights()
|
577 |
+
|
578 |
+
def init_weights(self):
|
579 |
+
init_bn(self.bn0)
|
580 |
+
init_layer(self.fc_audioset)
|
581 |
+
|
582 |
+
def forward(self, input, mixup_lambda=None):
|
583 |
+
"""Input: (batch_size, times_steps, freq_bins)"""
|
584 |
+
|
585 |
+
interpolate_ratio = 32
|
586 |
+
|
587 |
+
x = self.spectrogram_extractor(input) # (batch_size, 1, time_steps, freq_bins)
|
588 |
+
x = self.logmel_extractor(x) # (batch_size, 1, time_steps, mel_bins)
|
589 |
+
frames_num = x.shape[2]
|
590 |
+
x = x.transpose(1, 3)
|
591 |
+
x = self.bn0(x)
|
592 |
+
x = x.transpose(1, 3)
|
593 |
+
|
594 |
+
if self.training:
|
595 |
+
x = self.time_shift(x)
|
596 |
+
x = self.spec_augmenter(x)
|
597 |
+
|
598 |
+
# Mixup on spectrogram
|
599 |
+
if self.training and mixup_lambda is not None:
|
600 |
+
x = do_mixup(x, mixup_lambda)
|
601 |
+
#print(x.shape) #torch.Size([10, 1, 1001, 64])
|
602 |
+
x = self.pvt_transformer(x)
|
603 |
+
#print(x.shape) #torch.Size([10, 800, 128])
|
604 |
+
x = torch.mean(x, dim=3)
|
605 |
+
|
606 |
+
x = x.transpose(1, 2).contiguous()
|
607 |
+
framewise_output = torch.sigmoid(self.fc_audioset(x))
|
608 |
+
clipwise_output = self.temp_pool(x, framewise_output).clamp(1e-7, 1.).squeeze(1)
|
609 |
+
#print(framewise_output.shape) #torch.Size([10, 100, 17])
|
610 |
+
framewise_output = interpolate(framewise_output, interpolate_ratio)
|
611 |
+
framewise_output = framewise_output[:,:1000,:]
|
612 |
+
#framewise_output = pad_framewise_output(framewise_output, frames_num)
|
613 |
+
output_dict = {'framewise_output': framewise_output,
|
614 |
+
'clipwise_output': clipwise_output}
|
615 |
+
|
616 |
+
return output_dict
|
617 |
+
|
618 |
+
|
619 |
+
class Mlp(nn.Module):
|
620 |
+
def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0., linear=False):
|
621 |
+
super().__init__()
|
622 |
+
out_features = out_features or in_features
|
623 |
+
hidden_features = hidden_features or in_features
|
624 |
+
self.fc1 = nn.Linear(in_features, hidden_features)
|
625 |
+
self.dwconv = DWConv(hidden_features)
|
626 |
+
self.act = act_layer()
|
627 |
+
self.fc2 = nn.Linear(hidden_features, out_features)
|
628 |
+
self.drop = nn.Dropout(drop)
|
629 |
+
self.linear = linear
|
630 |
+
if self.linear:
|
631 |
+
self.relu = nn.ReLU()
|
632 |
+
self.apply(self._init_weights)
|
633 |
+
|
634 |
+
def _init_weights(self, m):
|
635 |
+
if isinstance(m, nn.Linear):
|
636 |
+
trunc_normal_(m.weight, std=.02)
|
637 |
+
if isinstance(m, nn.Linear) and m.bias is not None:
|
638 |
+
nn.init.constant_(m.bias, 0)
|
639 |
+
elif isinstance(m, nn.LayerNorm):
|
640 |
+
nn.init.constant_(m.bias, 0)
|
641 |
+
nn.init.constant_(m.weight, 1.0)
|
642 |
+
elif isinstance(m, nn.Conv2d):
|
643 |
+
fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
|
644 |
+
fan_out //= m.groups
|
645 |
+
m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
|
646 |
+
if m.bias is not None:
|
647 |
+
m.bias.data.zero_()
|
648 |
+
|
649 |
+
def forward(self, x, H, W):
|
650 |
+
x = self.fc1(x)
|
651 |
+
if self.linear:
|
652 |
+
x = self.relu(x)
|
653 |
+
x = self.dwconv(x, H, W)
|
654 |
+
x = self.act(x)
|
655 |
+
x = self.drop(x)
|
656 |
+
x = self.fc2(x)
|
657 |
+
x = self.drop(x)
|
658 |
+
return x
|
659 |
+
|
660 |
+
|
661 |
+
class Attention(nn.Module):
|
662 |
+
def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0., sr_ratio=1, linear=False):
|
663 |
+
super().__init__()
|
664 |
+
assert dim % num_heads == 0, f"dim {dim} should be divided by num_heads {num_heads}."
|
665 |
+
|
666 |
+
self.dim = dim
|
667 |
+
self.num_heads = num_heads
|
668 |
+
head_dim = dim // num_heads
|
669 |
+
self.scale = qk_scale or head_dim ** -0.5
|
670 |
+
|
671 |
+
self.q = nn.Linear(dim, dim, bias=qkv_bias)
|
672 |
+
self.kv = nn.Linear(dim, dim * 2, bias=qkv_bias)
|
673 |
+
self.attn_drop = nn.Dropout(attn_drop)
|
674 |
+
self.proj = nn.Linear(dim, dim)
|
675 |
+
self.proj_drop = nn.Dropout(proj_drop)
|
676 |
+
|
677 |
+
self.linear = linear
|
678 |
+
self.sr_ratio = sr_ratio
|
679 |
+
if not linear:
|
680 |
+
if sr_ratio > 1:
|
681 |
+
self.sr = nn.Conv2d(dim, dim, kernel_size=sr_ratio, stride=sr_ratio)
|
682 |
+
self.norm = nn.LayerNorm(dim)
|
683 |
+
else:
|
684 |
+
self.pool = nn.AdaptiveAvgPool2d(7)
|
685 |
+
self.sr = nn.Conv2d(dim, dim, kernel_size=1, stride=1)
|
686 |
+
self.norm = nn.LayerNorm(dim)
|
687 |
+
self.act = nn.GELU()
|
688 |
+
self.apply(self._init_weights)
|
689 |
+
|
690 |
+
def _init_weights(self, m):
|
691 |
+
if isinstance(m, nn.Linear):
|
692 |
+
trunc_normal_(m.weight, std=.02)
|
693 |
+
if isinstance(m, nn.Linear) and m.bias is not None:
|
694 |
+
nn.init.constant_(m.bias, 0)
|
695 |
+
elif isinstance(m, nn.LayerNorm):
|
696 |
+
nn.init.constant_(m.bias, 0)
|
697 |
+
nn.init.constant_(m.weight, 1.0)
|
698 |
+
elif isinstance(m, nn.Conv2d):
|
699 |
+
fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
|
700 |
+
fan_out //= m.groups
|
701 |
+
m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
|
702 |
+
if m.bias is not None:
|
703 |
+
m.bias.data.zero_()
|
704 |
+
|
705 |
+
def forward(self, x, H, W):
|
706 |
+
B, N, C = x.shape
|
707 |
+
q = self.q(x).reshape(B, N, self.num_heads, C // self.num_heads).permute(0, 2, 1, 3)
|
708 |
+
|
709 |
+
if not self.linear:
|
710 |
+
if self.sr_ratio > 1:
|
711 |
+
x_ = x.permute(0, 2, 1).reshape(B, C, H, W)
|
712 |
+
x_ = self.sr(x_).reshape(B, C, -1).permute(0, 2, 1)
|
713 |
+
x_ = self.norm(x_)
|
714 |
+
kv = self.kv(x_).reshape(B, -1, 2, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
|
715 |
+
else:
|
716 |
+
kv = self.kv(x).reshape(B, -1, 2, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
|
717 |
+
else:
|
718 |
+
x_ = x.permute(0, 2, 1).reshape(B, C, H, W)
|
719 |
+
x_ = self.sr(self.pool(x_)).reshape(B, C, -1).permute(0, 2, 1)
|
720 |
+
x_ = self.norm(x_)
|
721 |
+
x_ = self.act(x_)
|
722 |
+
kv = self.kv(x_).reshape(B, -1, 2, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
|
723 |
+
k, v = kv[0], kv[1]
|
724 |
+
|
725 |
+
attn = (q @ k.transpose(-2, -1)) * self.scale
|
726 |
+
attn = attn.softmax(dim=-1)
|
727 |
+
attn = self.attn_drop(attn)
|
728 |
+
|
729 |
+
x = (attn @ v).transpose(1, 2).reshape(B, N, C)
|
730 |
+
x = self.proj(x)
|
731 |
+
x = self.proj_drop(x)
|
732 |
+
|
733 |
+
return x
|
734 |
+
|
735 |
+
|
736 |
+
class Pooling(nn.Module):
|
737 |
+
"""
|
738 |
+
Implementation of pooling for PoolFormer
|
739 |
+
--pool_size: pooling size
|
740 |
+
"""
|
741 |
+
def __init__(self, pool_size=3):
|
742 |
+
super().__init__()
|
743 |
+
self.pool = nn.AvgPool2d(
|
744 |
+
pool_size, stride=1, padding=pool_size//2, count_include_pad=False)
|
745 |
+
|
746 |
+
def forward(self, x):
|
747 |
+
return self.pool(x) - x
|
748 |
+
|
749 |
+
class Block(nn.Module):
|
750 |
+
|
751 |
+
def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0.,
|
752 |
+
drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm, sr_ratio=1, linear=False):
|
753 |
+
super().__init__()
|
754 |
+
self.norm1 = norm_layer(dim)
|
755 |
+
self.attn = Attention(
|
756 |
+
dim,
|
757 |
+
num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale,
|
758 |
+
attn_drop=attn_drop, proj_drop=drop, sr_ratio=sr_ratio, linear=linear)
|
759 |
+
#self.norm3 = norm_layer(dim)
|
760 |
+
#self.token_mixer = Pooling(pool_size=3)
|
761 |
+
# NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
|
762 |
+
self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
|
763 |
+
self.norm2 = norm_layer(dim)
|
764 |
+
mlp_hidden_dim = int(dim * mlp_ratio)
|
765 |
+
self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop, linear=linear)
|
766 |
+
self.apply(self._init_weights)
|
767 |
+
|
768 |
+
def _init_weights(self, m):
|
769 |
+
if isinstance(m, nn.Linear):
|
770 |
+
trunc_normal_(m.weight, std=.02)
|
771 |
+
if isinstance(m, nn.Linear) and m.bias is not None:
|
772 |
+
nn.init.constant_(m.bias, 0)
|
773 |
+
elif isinstance(m, nn.LayerNorm):
|
774 |
+
nn.init.constant_(m.bias, 0)
|
775 |
+
nn.init.constant_(m.weight, 1.0)
|
776 |
+
elif isinstance(m, nn.Conv2d):
|
777 |
+
fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
|
778 |
+
fan_out //= m.groups
|
779 |
+
m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
|
780 |
+
if m.bias is not None:
|
781 |
+
m.bias.data.zero_()
|
782 |
+
|
783 |
+
def forward(self, x, H, W):
|
784 |
+
x = x + self.drop_path(self.attn(self.norm1(x), H, W))
|
785 |
+
x = x + self.drop_path(self.mlp(self.norm2(x), H, W))
|
786 |
+
return x
|
787 |
+
|
788 |
+
|
789 |
+
class OverlapPatchEmbed(nn.Module):
|
790 |
+
""" Image to Patch Embedding
|
791 |
+
"""
|
792 |
+
|
793 |
+
def __init__(self, tdim, fdim, patch_size=7, stride=4, in_chans=3, embed_dim=768):
|
794 |
+
super().__init__()
|
795 |
+
img_size = (tdim, fdim)
|
796 |
+
patch_size = to_2tuple(patch_size)
|
797 |
+
|
798 |
+
self.img_size = img_size
|
799 |
+
self.patch_size = patch_size
|
800 |
+
self.H, self.W = img_size[0] // stride, img_size[1] // stride
|
801 |
+
self.num_patches = self.H * self.W
|
802 |
+
self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=stride,
|
803 |
+
padding=(patch_size[0] // 3, patch_size[1] // 3))
|
804 |
+
self.norm = nn.LayerNorm(embed_dim)
|
805 |
+
|
806 |
+
self.apply(self._init_weights)
|
807 |
+
|
808 |
+
def _init_weights(self, m):
|
809 |
+
if isinstance(m, nn.Linear):
|
810 |
+
trunc_normal_(m.weight, std=.02)
|
811 |
+
if isinstance(m, nn.Linear) and m.bias is not None:
|
812 |
+
nn.init.constant_(m.bias, 0)
|
813 |
+
elif isinstance(m, nn.LayerNorm):
|
814 |
+
nn.init.constant_(m.bias, 0)
|
815 |
+
nn.init.constant_(m.weight, 1.0)
|
816 |
+
elif isinstance(m, nn.Conv2d):
|
817 |
+
fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
|
818 |
+
fan_out //= m.groups
|
819 |
+
m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
|
820 |
+
if m.bias is not None:
|
821 |
+
m.bias.data.zero_()
|
822 |
+
|
823 |
+
def forward(self, x):
|
824 |
+
x = self.proj(x)
|
825 |
+
_, _, H, W = x.shape
|
826 |
+
x = x.flatten(2).transpose(1, 2)
|
827 |
+
x = self.norm(x)
|
828 |
+
|
829 |
+
return x, H, W
|
830 |
+
|
831 |
+
|
832 |
+
class PyramidVisionTransformerV2(nn.Module):
|
833 |
+
def __init__(self, tdim=1001, fdim=64, patch_size=16, stride=4, in_chans=3, num_classes=1000, embed_dims=[64, 128, 256, 512],
|
834 |
+
num_heads=[1, 2, 4, 8], mlp_ratios=[4, 4, 4, 4], qkv_bias=False, qk_scale=None, drop_rate=0.,
|
835 |
+
attn_drop_rate=0., drop_path_rate=0.1, norm_layer=partial(nn.LayerNorm, eps=1e-6), depths=[3, 4, 6, 3],
|
836 |
+
sr_ratios=[8, 4, 2, 1], num_stages=2, linear=False, pretrained=None):
|
837 |
+
super().__init__()
|
838 |
+
# self.num_classes = num_classes
|
839 |
+
self.depths = depths
|
840 |
+
self.num_stages = num_stages
|
841 |
+
self.linear = linear
|
842 |
+
|
843 |
+
dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))] # stochastic depth decay rule
|
844 |
+
cur = 0
|
845 |
+
|
846 |
+
for i in range(num_stages):
|
847 |
+
patch_embed = OverlapPatchEmbed(tdim=tdim if i == 0 else tdim // (2 ** (i + 1)),
|
848 |
+
fdim=fdim if i == 0 else tdim // (2 ** (i + 1)),
|
849 |
+
patch_size=7 if i == 0 else 3,
|
850 |
+
stride=stride if i == 0 else 2,
|
851 |
+
in_chans=in_chans if i == 0 else embed_dims[i - 1],
|
852 |
+
embed_dim=embed_dims[i])
|
853 |
+
block = nn.ModuleList([Block(
|
854 |
+
dim=embed_dims[i], num_heads=num_heads[i], mlp_ratio=mlp_ratios[i], qkv_bias=qkv_bias,
|
855 |
+
qk_scale=qk_scale,
|
856 |
+
drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[cur + j], norm_layer=norm_layer,
|
857 |
+
sr_ratio=sr_ratios[i], linear=linear)
|
858 |
+
for j in range(depths[i])])
|
859 |
+
norm = norm_layer(embed_dims[i])
|
860 |
+
cur += depths[i]
|
861 |
+
|
862 |
+
setattr(self, f"patch_embed{i + 1}", patch_embed)
|
863 |
+
setattr(self, f"block{i + 1}", block)
|
864 |
+
setattr(self, f"norm{i + 1}", norm)
|
865 |
+
#self.n = nn.Linear(125, 250, bias=True)
|
866 |
+
# classification head
|
867 |
+
# self.head = nn.Linear(embed_dims[3], num_classes) if num_classes > 0 else nn.Identity()
|
868 |
+
self.apply(self._init_weights)
|
869 |
+
self.init_weights(pretrained)
|
870 |
+
|
871 |
+
def _init_weights(self, m):
|
872 |
+
if isinstance(m, nn.Linear):
|
873 |
+
trunc_normal_(m.weight, std=.02)
|
874 |
+
if isinstance(m, nn.Linear) and m.bias is not None:
|
875 |
+
nn.init.constant_(m.bias, 0)
|
876 |
+
elif isinstance(m, nn.LayerNorm):
|
877 |
+
nn.init.constant_(m.bias, 0)
|
878 |
+
nn.init.constant_(m.weight, 1.0)
|
879 |
+
elif isinstance(m, nn.Conv2d):
|
880 |
+
fan_out = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
|
881 |
+
fan_out //= m.groups
|
882 |
+
m.weight.data.normal_(0, math.sqrt(2.0 / fan_out))
|
883 |
+
if m.bias is not None:
|
884 |
+
m.bias.data.zero_()
|
885 |
+
|
886 |
+
def init_weights(self, pretrained=None):
|
887 |
+
if isinstance(pretrained, str):
|
888 |
+
logger = get_root_logger()
|
889 |
+
load_checkpoint(self, pretrained, map_location='cpu', strict=False, logger=logger)
|
890 |
+
|
891 |
+
def freeze_patch_emb(self):
|
892 |
+
self.patch_embed1.requires_grad = False
|
893 |
+
|
894 |
+
@torch.jit.ignore
|
895 |
+
def no_weight_decay(self):
|
896 |
+
return {'pos_embed1', 'pos_embed2', 'pos_embed3', 'pos_embed4', 'cls_token'} # has pos_embed may be better
|
897 |
+
|
898 |
+
def get_classifier(self):
|
899 |
+
return self.head
|
900 |
+
|
901 |
+
def reset_classifier(self, num_classes, global_pool=''):
|
902 |
+
self.num_classes = num_classes
|
903 |
+
self.head = nn.Linear(self.embed_dim, num_classes) if num_classes > 0 else nn.Identity()
|
904 |
+
|
905 |
+
def forward_features(self, x):
|
906 |
+
B = x.shape[0]
|
907 |
+
|
908 |
+
for i in range(self.num_stages):
|
909 |
+
patch_embed = getattr(self, f"patch_embed{i + 1}")
|
910 |
+
block = getattr(self, f"block{i + 1}")
|
911 |
+
norm = getattr(self, f"norm{i + 1}")
|
912 |
+
x, H, W = patch_embed(x)
|
913 |
+
#print(x.shape)
|
914 |
+
for blk in block:
|
915 |
+
x = blk(x, H, W)
|
916 |
+
#print(x.shape)
|
917 |
+
x = norm(x)
|
918 |
+
#if i != self.num_stages - 1:
|
919 |
+
x = x.reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous()
|
920 |
+
#print(x.shape)
|
921 |
+
return x
|
922 |
+
|
923 |
+
def forward(self, x):
|
924 |
+
x = self.forward_features(x)
|
925 |
+
# x = self.head(x)
|
926 |
+
|
927 |
+
return x
|
928 |
+
|
929 |
+
class DWConv(nn.Module):
|
930 |
+
def __init__(self, dim=768):
|
931 |
+
super(DWConv, self).__init__()
|
932 |
+
self.dwconv = nn.Conv2d(dim, dim, 3, 1, 1, bias=True, groups=dim)
|
933 |
+
|
934 |
+
def forward(self, x, H, W):
|
935 |
+
B, N, C = x.shape
|
936 |
+
x = x.transpose(1, 2).view(B, C, H, W)
|
937 |
+
x = self.dwconv(x)
|
938 |
+
x = x.flatten(2).transpose(1, 2)
|
939 |
+
|
940 |
+
return x
|
941 |
+
|
942 |
+
|
943 |
+
def _conv_filter(state_dict, patch_size=16):
|
944 |
+
""" convert patch embedding weight from manual patchify + linear proj to conv"""
|
945 |
+
out_dict = {}
|
946 |
+
for k, v in state_dict.items():
|
947 |
+
if 'patch_embed.proj.weight' in k:
|
948 |
+
v = v.reshape((v.shape[0], 3, patch_size, patch_size))
|
949 |
+
out_dict[k] = v
|
950 |
+
|
951 |
+
return out_dict
|
audio_detection/audio_infer/pytorch/pytorch_utils.py
ADDED
@@ -0,0 +1,251 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import time
|
3 |
+
import torch
|
4 |
+
import torch.nn as nn
|
5 |
+
|
6 |
+
|
7 |
+
def move_data_to_device(x, device):
|
8 |
+
if 'float' in str(x.dtype):
|
9 |
+
x = torch.Tensor(x)
|
10 |
+
elif 'int' in str(x.dtype):
|
11 |
+
x = torch.LongTensor(x)
|
12 |
+
else:
|
13 |
+
return x
|
14 |
+
|
15 |
+
return x.to(device)
|
16 |
+
|
17 |
+
|
18 |
+
def do_mixup(x, mixup_lambda):
|
19 |
+
"""Mixup x of even indexes (0, 2, 4, ...) with x of odd indexes
|
20 |
+
(1, 3, 5, ...).
|
21 |
+
|
22 |
+
Args:
|
23 |
+
x: (batch_size * 2, ...)
|
24 |
+
mixup_lambda: (batch_size * 2,)
|
25 |
+
|
26 |
+
Returns:
|
27 |
+
out: (batch_size, ...)
|
28 |
+
"""
|
29 |
+
out = (x[0 :: 2].transpose(0, -1) * mixup_lambda[0 :: 2] + \
|
30 |
+
x[1 :: 2].transpose(0, -1) * mixup_lambda[1 :: 2]).transpose(0, -1)
|
31 |
+
return out
|
32 |
+
|
33 |
+
|
34 |
+
def append_to_dict(dict, key, value):
|
35 |
+
if key in dict.keys():
|
36 |
+
dict[key].append(value)
|
37 |
+
else:
|
38 |
+
dict[key] = [value]
|
39 |
+
|
40 |
+
|
41 |
+
def forward(model, generator, return_input=False,
|
42 |
+
return_target=False):
|
43 |
+
"""Forward data to a model.
|
44 |
+
|
45 |
+
Args:
|
46 |
+
model: object
|
47 |
+
generator: object
|
48 |
+
return_input: bool
|
49 |
+
return_target: bool
|
50 |
+
|
51 |
+
Returns:
|
52 |
+
audio_name: (audios_num,)
|
53 |
+
clipwise_output: (audios_num, classes_num)
|
54 |
+
(ifexist) segmentwise_output: (audios_num, segments_num, classes_num)
|
55 |
+
(ifexist) framewise_output: (audios_num, frames_num, classes_num)
|
56 |
+
(optional) return_input: (audios_num, segment_samples)
|
57 |
+
(optional) return_target: (audios_num, classes_num)
|
58 |
+
"""
|
59 |
+
output_dict = {}
|
60 |
+
device = next(model.parameters()).device
|
61 |
+
time1 = time.time()
|
62 |
+
|
63 |
+
# Forward data to a model in mini-batches
|
64 |
+
for n, batch_data_dict in enumerate(generator):
|
65 |
+
print(n)
|
66 |
+
batch_waveform = move_data_to_device(batch_data_dict['waveform'], device)
|
67 |
+
|
68 |
+
with torch.no_grad():
|
69 |
+
model.eval()
|
70 |
+
batch_output = model(batch_waveform)
|
71 |
+
|
72 |
+
append_to_dict(output_dict, 'audio_name', batch_data_dict['audio_name'])
|
73 |
+
|
74 |
+
append_to_dict(output_dict, 'clipwise_output',
|
75 |
+
batch_output['clipwise_output'].data.cpu().numpy())
|
76 |
+
|
77 |
+
if 'segmentwise_output' in batch_output.keys():
|
78 |
+
append_to_dict(output_dict, 'segmentwise_output',
|
79 |
+
batch_output['segmentwise_output'].data.cpu().numpy())
|
80 |
+
|
81 |
+
if 'framewise_output' in batch_output.keys():
|
82 |
+
append_to_dict(output_dict, 'framewise_output',
|
83 |
+
batch_output['framewise_output'].data.cpu().numpy())
|
84 |
+
|
85 |
+
if return_input:
|
86 |
+
append_to_dict(output_dict, 'waveform', batch_data_dict['waveform'])
|
87 |
+
|
88 |
+
if return_target:
|
89 |
+
if 'target' in batch_data_dict.keys():
|
90 |
+
append_to_dict(output_dict, 'target', batch_data_dict['target'])
|
91 |
+
|
92 |
+
if n % 10 == 0:
|
93 |
+
print(' --- Inference time: {:.3f} s / 10 iterations ---'.format(
|
94 |
+
time.time() - time1))
|
95 |
+
time1 = time.time()
|
96 |
+
|
97 |
+
for key in output_dict.keys():
|
98 |
+
output_dict[key] = np.concatenate(output_dict[key], axis=0)
|
99 |
+
|
100 |
+
return output_dict
|
101 |
+
|
102 |
+
|
103 |
+
def interpolate(x, ratio):
|
104 |
+
"""Interpolate data in time domain. This is used to compensate the
|
105 |
+
resolution reduction in downsampling of a CNN.
|
106 |
+
|
107 |
+
Args:
|
108 |
+
x: (batch_size, time_steps, classes_num)
|
109 |
+
ratio: int, ratio to interpolate
|
110 |
+
|
111 |
+
Returns:
|
112 |
+
upsampled: (batch_size, time_steps * ratio, classes_num)
|
113 |
+
"""
|
114 |
+
(batch_size, time_steps, classes_num) = x.shape
|
115 |
+
upsampled = x[:, :, None, :].repeat(1, 1, ratio, 1)
|
116 |
+
upsampled = upsampled.reshape(batch_size, time_steps * ratio, classes_num)
|
117 |
+
return upsampled
|
118 |
+
|
119 |
+
|
120 |
+
def pad_framewise_output(framewise_output, frames_num):
|
121 |
+
"""Pad framewise_output to the same length as input frames. The pad value
|
122 |
+
is the same as the value of the last frame.
|
123 |
+
|
124 |
+
Args:
|
125 |
+
framewise_output: (batch_size, frames_num, classes_num)
|
126 |
+
frames_num: int, number of frames to pad
|
127 |
+
|
128 |
+
Outputs:
|
129 |
+
output: (batch_size, frames_num, classes_num)
|
130 |
+
"""
|
131 |
+
pad = framewise_output[:, -1 :, :].repeat(1, frames_num - framewise_output.shape[1], 1)
|
132 |
+
"""tensor for padding"""
|
133 |
+
|
134 |
+
output = torch.cat((framewise_output, pad), dim=1)
|
135 |
+
"""(batch_size, frames_num, classes_num)"""
|
136 |
+
|
137 |
+
return output
|
138 |
+
|
139 |
+
|
140 |
+
def count_parameters(model):
|
141 |
+
return sum(p.numel() for p in model.parameters() if p.requires_grad)
|
142 |
+
|
143 |
+
|
144 |
+
def count_flops(model, audio_length):
|
145 |
+
"""Count flops. Code modified from others' implementation.
|
146 |
+
"""
|
147 |
+
multiply_adds = True
|
148 |
+
list_conv2d=[]
|
149 |
+
def conv2d_hook(self, input, output):
|
150 |
+
batch_size, input_channels, input_height, input_width = input[0].size()
|
151 |
+
output_channels, output_height, output_width = output[0].size()
|
152 |
+
|
153 |
+
kernel_ops = self.kernel_size[0] * self.kernel_size[1] * (self.in_channels / self.groups) * (2 if multiply_adds else 1)
|
154 |
+
bias_ops = 1 if self.bias is not None else 0
|
155 |
+
|
156 |
+
params = output_channels * (kernel_ops + bias_ops)
|
157 |
+
flops = batch_size * params * output_height * output_width
|
158 |
+
|
159 |
+
list_conv2d.append(flops)
|
160 |
+
|
161 |
+
list_conv1d=[]
|
162 |
+
def conv1d_hook(self, input, output):
|
163 |
+
batch_size, input_channels, input_length = input[0].size()
|
164 |
+
output_channels, output_length = output[0].size()
|
165 |
+
|
166 |
+
kernel_ops = self.kernel_size[0] * (self.in_channels / self.groups) * (2 if multiply_adds else 1)
|
167 |
+
bias_ops = 1 if self.bias is not None else 0
|
168 |
+
|
169 |
+
params = output_channels * (kernel_ops + bias_ops)
|
170 |
+
flops = batch_size * params * output_length
|
171 |
+
|
172 |
+
list_conv1d.append(flops)
|
173 |
+
|
174 |
+
list_linear=[]
|
175 |
+
def linear_hook(self, input, output):
|
176 |
+
batch_size = input[0].size(0) if input[0].dim() == 2 else 1
|
177 |
+
|
178 |
+
weight_ops = self.weight.nelement() * (2 if multiply_adds else 1)
|
179 |
+
bias_ops = self.bias.nelement()
|
180 |
+
|
181 |
+
flops = batch_size * (weight_ops + bias_ops)
|
182 |
+
list_linear.append(flops)
|
183 |
+
|
184 |
+
list_bn=[]
|
185 |
+
def bn_hook(self, input, output):
|
186 |
+
list_bn.append(input[0].nelement() * 2)
|
187 |
+
|
188 |
+
list_relu=[]
|
189 |
+
def relu_hook(self, input, output):
|
190 |
+
list_relu.append(input[0].nelement() * 2)
|
191 |
+
|
192 |
+
list_pooling2d=[]
|
193 |
+
def pooling2d_hook(self, input, output):
|
194 |
+
batch_size, input_channels, input_height, input_width = input[0].size()
|
195 |
+
output_channels, output_height, output_width = output[0].size()
|
196 |
+
|
197 |
+
kernel_ops = self.kernel_size * self.kernel_size
|
198 |
+
bias_ops = 0
|
199 |
+
params = output_channels * (kernel_ops + bias_ops)
|
200 |
+
flops = batch_size * params * output_height * output_width
|
201 |
+
|
202 |
+
list_pooling2d.append(flops)
|
203 |
+
|
204 |
+
list_pooling1d=[]
|
205 |
+
def pooling1d_hook(self, input, output):
|
206 |
+
batch_size, input_channels, input_length = input[0].size()
|
207 |
+
output_channels, output_length = output[0].size()
|
208 |
+
|
209 |
+
kernel_ops = self.kernel_size[0]
|
210 |
+
bias_ops = 0
|
211 |
+
|
212 |
+
params = output_channels * (kernel_ops + bias_ops)
|
213 |
+
flops = batch_size * params * output_length
|
214 |
+
|
215 |
+
list_pooling2d.append(flops)
|
216 |
+
|
217 |
+
def foo(net):
|
218 |
+
childrens = list(net.children())
|
219 |
+
if not childrens:
|
220 |
+
if isinstance(net, nn.Conv2d):
|
221 |
+
net.register_forward_hook(conv2d_hook)
|
222 |
+
elif isinstance(net, nn.Conv1d):
|
223 |
+
net.register_forward_hook(conv1d_hook)
|
224 |
+
elif isinstance(net, nn.Linear):
|
225 |
+
net.register_forward_hook(linear_hook)
|
226 |
+
elif isinstance(net, nn.BatchNorm2d) or isinstance(net, nn.BatchNorm1d):
|
227 |
+
net.register_forward_hook(bn_hook)
|
228 |
+
elif isinstance(net, nn.ReLU):
|
229 |
+
net.register_forward_hook(relu_hook)
|
230 |
+
elif isinstance(net, nn.AvgPool2d) or isinstance(net, nn.MaxPool2d):
|
231 |
+
net.register_forward_hook(pooling2d_hook)
|
232 |
+
elif isinstance(net, nn.AvgPool1d) or isinstance(net, nn.MaxPool1d):
|
233 |
+
net.register_forward_hook(pooling1d_hook)
|
234 |
+
else:
|
235 |
+
print('Warning: flop of module {} is not counted!'.format(net))
|
236 |
+
return
|
237 |
+
for c in childrens:
|
238 |
+
foo(c)
|
239 |
+
|
240 |
+
# Register hook
|
241 |
+
foo(model)
|
242 |
+
|
243 |
+
device = device = next(model.parameters()).device
|
244 |
+
input = torch.rand(1, audio_length).to(device)
|
245 |
+
|
246 |
+
out = model(input)
|
247 |
+
|
248 |
+
total_flops = sum(list_conv2d) + sum(list_conv1d) + sum(list_linear) + \
|
249 |
+
sum(list_bn) + sum(list_relu) + sum(list_pooling2d) + sum(list_pooling1d)
|
250 |
+
|
251 |
+
return total_flops
|
audio_detection/audio_infer/results/YDlWd7Wmdi1E.png
ADDED
audio_detection/audio_infer/useful_ckpts/audio_detection.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9f909808f17d424dc29063a21953ff2be103489518a4f60a6c649d2e3e7d3e81
|
3 |
+
size 441042195
|
audio_detection/audio_infer/utils/__pycache__/config.cpython-38.pyc
ADDED
Binary file (6.33 kB). View file
|
|
audio_detection/audio_infer/utils/config.py
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import csv
|
3 |
+
|
4 |
+
sample_rate = 32000
|
5 |
+
clip_samples = sample_rate * 10 # Audio clips are 10-second
|
6 |
+
|
7 |
+
# Load label
|
8 |
+
with open('./audio_detection/audio_infer/metadata/class_labels_indices.csv', 'r') as f:
|
9 |
+
reader = csv.reader(f, delimiter=',')
|
10 |
+
lines = list(reader)
|
11 |
+
|
12 |
+
labels = []
|
13 |
+
ids = [] # Each label has a unique id such as "/m/068hy"
|
14 |
+
for i1 in range(1, len(lines)):
|
15 |
+
id = lines[i1][1]
|
16 |
+
label = lines[i1][2]
|
17 |
+
ids.append(id)
|
18 |
+
labels.append(label)
|
19 |
+
|
20 |
+
classes_num = len(labels)
|
21 |
+
|
22 |
+
lb_to_ix = {label : i for i, label in enumerate(labels)}
|
23 |
+
ix_to_lb = {i : label for i, label in enumerate(labels)}
|
24 |
+
|
25 |
+
id_to_ix = {id : i for i, id in enumerate(ids)}
|
26 |
+
ix_to_id = {i : id for i, id in enumerate(ids)}
|
27 |
+
|
28 |
+
full_samples_per_class = np.array([
|
29 |
+
937432, 16344, 7822, 10271, 2043, 14420, 733, 1511,
|
30 |
+
1258, 424, 1751, 704, 369, 590, 1063, 1375,
|
31 |
+
5026, 743, 853, 1648, 714, 1497, 1251, 2139,
|
32 |
+
1093, 133, 224, 39469, 6423, 407, 1559, 4546,
|
33 |
+
6826, 7464, 2468, 549, 4063, 334, 587, 238,
|
34 |
+
1766, 691, 114, 2153, 236, 209, 421, 740,
|
35 |
+
269, 959, 137, 4192, 485, 1515, 655, 274,
|
36 |
+
69, 157, 1128, 807, 1022, 346, 98, 680,
|
37 |
+
890, 352, 4169, 2061, 1753, 9883, 1339, 708,
|
38 |
+
37857, 18504, 12864, 2475, 2182, 757, 3624, 677,
|
39 |
+
1683, 3583, 444, 1780, 2364, 409, 4060, 3097,
|
40 |
+
3143, 502, 723, 600, 230, 852, 1498, 1865,
|
41 |
+
1879, 2429, 5498, 5430, 2139, 1761, 1051, 831,
|
42 |
+
2401, 2258, 1672, 1711, 987, 646, 794, 25061,
|
43 |
+
5792, 4256, 96, 8126, 2740, 752, 513, 554,
|
44 |
+
106, 254, 1592, 556, 331, 615, 2841, 737,
|
45 |
+
265, 1349, 358, 1731, 1115, 295, 1070, 972,
|
46 |
+
174, 937780, 112337, 42509, 49200, 11415, 6092, 13851,
|
47 |
+
2665, 1678, 13344, 2329, 1415, 2244, 1099, 5024,
|
48 |
+
9872, 10948, 4409, 2732, 1211, 1289, 4807, 5136,
|
49 |
+
1867, 16134, 14519, 3086, 19261, 6499, 4273, 2790,
|
50 |
+
8820, 1228, 1575, 4420, 3685, 2019, 664, 324,
|
51 |
+
513, 411, 436, 2997, 5162, 3806, 1389, 899,
|
52 |
+
8088, 7004, 1105, 3633, 2621, 9753, 1082, 26854,
|
53 |
+
3415, 4991, 2129, 5546, 4489, 2850, 1977, 1908,
|
54 |
+
1719, 1106, 1049, 152, 136, 802, 488, 592,
|
55 |
+
2081, 2712, 1665, 1128, 250, 544, 789, 2715,
|
56 |
+
8063, 7056, 2267, 8034, 6092, 3815, 1833, 3277,
|
57 |
+
8813, 2111, 4662, 2678, 2954, 5227, 1472, 2591,
|
58 |
+
3714, 1974, 1795, 4680, 3751, 6585, 2109, 36617,
|
59 |
+
6083, 16264, 17351, 3449, 5034, 3931, 2599, 4134,
|
60 |
+
3892, 2334, 2211, 4516, 2766, 2862, 3422, 1788,
|
61 |
+
2544, 2403, 2892, 4042, 3460, 1516, 1972, 1563,
|
62 |
+
1579, 2776, 1647, 4535, 3921, 1261, 6074, 2922,
|
63 |
+
3068, 1948, 4407, 712, 1294, 1019, 1572, 3764,
|
64 |
+
5218, 975, 1539, 6376, 1606, 6091, 1138, 1169,
|
65 |
+
7925, 3136, 1108, 2677, 2680, 1383, 3144, 2653,
|
66 |
+
1986, 1800, 1308, 1344, 122231, 12977, 2552, 2678,
|
67 |
+
7824, 768, 8587, 39503, 3474, 661, 430, 193,
|
68 |
+
1405, 1442, 3588, 6280, 10515, 785, 710, 305,
|
69 |
+
206, 4990, 5329, 3398, 1771, 3022, 6907, 1523,
|
70 |
+
8588, 12203, 666, 2113, 7916, 434, 1636, 5185,
|
71 |
+
1062, 664, 952, 3490, 2811, 2749, 2848, 15555,
|
72 |
+
363, 117, 1494, 1647, 5886, 4021, 633, 1013,
|
73 |
+
5951, 11343, 2324, 243, 372, 943, 734, 242,
|
74 |
+
3161, 122, 127, 201, 1654, 768, 134, 1467,
|
75 |
+
642, 1148, 2156, 1368, 1176, 302, 1909, 61,
|
76 |
+
223, 1812, 287, 422, 311, 228, 748, 230,
|
77 |
+
1876, 539, 1814, 737, 689, 1140, 591, 943,
|
78 |
+
353, 289, 198, 490, 7938, 1841, 850, 457,
|
79 |
+
814, 146, 551, 728, 1627, 620, 648, 1621,
|
80 |
+
2731, 535, 88, 1736, 736, 328, 293, 3170,
|
81 |
+
344, 384, 7640, 433, 215, 715, 626, 128,
|
82 |
+
3059, 1833, 2069, 3732, 1640, 1508, 836, 567,
|
83 |
+
2837, 1151, 2068, 695, 1494, 3173, 364, 88,
|
84 |
+
188, 740, 677, 273, 1533, 821, 1091, 293,
|
85 |
+
647, 318, 1202, 328, 532, 2847, 526, 721,
|
86 |
+
370, 258, 956, 1269, 1641, 339, 1322, 4485,
|
87 |
+
286, 1874, 277, 757, 1393, 1330, 380, 146,
|
88 |
+
377, 394, 318, 339, 1477, 1886, 101, 1435,
|
89 |
+
284, 1425, 686, 621, 221, 117, 87, 1340,
|
90 |
+
201, 1243, 1222, 651, 1899, 421, 712, 1016,
|
91 |
+
1279, 124, 351, 258, 7043, 368, 666, 162,
|
92 |
+
7664, 137, 70159, 26179, 6321, 32236, 33320, 771,
|
93 |
+
1169, 269, 1103, 444, 364, 2710, 121, 751,
|
94 |
+
1609, 855, 1141, 2287, 1940, 3943, 289])
|
audio_detection/audio_infer/utils/crash.py
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sys
|
2 |
+
|
3 |
+
class ExceptionHook:
|
4 |
+
instance = None
|
5 |
+
def __call__(self, *args, **kwargs):
|
6 |
+
if self.instance is None:
|
7 |
+
from IPython.core import ultratb
|
8 |
+
self.instance = ultratb.FormattedTB(mode='Plain',
|
9 |
+
color_scheme='Linux', call_pdb=1)
|
10 |
+
return self.instance(*args, **kwargs)
|
11 |
+
|
12 |
+
sys.excepthook = ExceptionHook()
|
audio_detection/audio_infer/utils/create_black_list.py
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import csv
|
3 |
+
import os
|
4 |
+
|
5 |
+
from utilities import create_folder
|
6 |
+
|
7 |
+
|
8 |
+
def dcase2017task4(args):
|
9 |
+
"""Create black list. Black list is a list of audio ids that will be
|
10 |
+
skipped in training.
|
11 |
+
"""
|
12 |
+
|
13 |
+
# Augments & parameters
|
14 |
+
workspace = args.workspace
|
15 |
+
|
16 |
+
# Black list from DCASE 2017 Task 4
|
17 |
+
test_weak_csv = 'metadata/black_list/groundtruth_weak_label_testing_set.csv'
|
18 |
+
evaluation_weak_csv = 'metadata/black_list/groundtruth_weak_label_evaluation_set.csv'
|
19 |
+
|
20 |
+
black_list_csv = os.path.join(workspace, 'black_list', 'dcase2017task4.csv')
|
21 |
+
create_folder(os.path.dirname(black_list_csv))
|
22 |
+
|
23 |
+
def get_id_sets(csv_path):
|
24 |
+
with open(csv_path, 'r') as fr:
|
25 |
+
reader = csv.reader(fr, delimiter='\t')
|
26 |
+
lines = list(reader)
|
27 |
+
|
28 |
+
ids_set = []
|
29 |
+
|
30 |
+
for line in lines:
|
31 |
+
"""line: ['-5QrBL6MzLg_60.000_70.000.wav', '60.000', '70.000', 'Train horn']"""
|
32 |
+
ids_set.append(line[0][0 : 11])
|
33 |
+
|
34 |
+
ids_set = list(set(ids_set))
|
35 |
+
return ids_set
|
36 |
+
|
37 |
+
test_ids_set = get_id_sets(test_weak_csv)
|
38 |
+
evaluation_ids_set = get_id_sets(evaluation_weak_csv)
|
39 |
+
|
40 |
+
full_ids_set = test_ids_set + evaluation_ids_set
|
41 |
+
|
42 |
+
# Write black list
|
43 |
+
fw = open(black_list_csv, 'w')
|
44 |
+
|
45 |
+
for id in full_ids_set:
|
46 |
+
fw.write('{}\n'.format(id))
|
47 |
+
|
48 |
+
print('Write black list to {}'.format(black_list_csv))
|
49 |
+
|
50 |
+
|
51 |
+
if __name__ == '__main__':
|
52 |
+
parser = argparse.ArgumentParser(description='')
|
53 |
+
subparsers = parser.add_subparsers(dest='mode')
|
54 |
+
|
55 |
+
parser_dcase2017task4 = subparsers.add_parser('dcase2017task4')
|
56 |
+
parser_dcase2017task4.add_argument('--workspace', type=str, required=True)
|
57 |
+
|
58 |
+
args = parser.parse_args()
|
59 |
+
|
60 |
+
if args.mode == 'dcase2017task4':
|
61 |
+
dcase2017task4(args)
|
62 |
+
|
63 |
+
else:
|
64 |
+
raise Exception('Error argument!')
|
audio_detection/audio_infer/utils/create_indexes.py
ADDED
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import argparse
|
3 |
+
import csv
|
4 |
+
import os
|
5 |
+
import glob
|
6 |
+
import datetime
|
7 |
+
import time
|
8 |
+
import logging
|
9 |
+
import h5py
|
10 |
+
import librosa
|
11 |
+
|
12 |
+
from utilities import create_folder, get_sub_filepaths
|
13 |
+
import config
|
14 |
+
|
15 |
+
|
16 |
+
def create_indexes(args):
|
17 |
+
"""Create indexes a for dataloader to read for training. When users have
|
18 |
+
a new task and their own data, they need to create similar indexes. The
|
19 |
+
indexes contain meta information of "where to find the data for training".
|
20 |
+
"""
|
21 |
+
|
22 |
+
# Arguments & parameters
|
23 |
+
waveforms_hdf5_path = args.waveforms_hdf5_path
|
24 |
+
indexes_hdf5_path = args.indexes_hdf5_path
|
25 |
+
|
26 |
+
# Paths
|
27 |
+
create_folder(os.path.dirname(indexes_hdf5_path))
|
28 |
+
|
29 |
+
with h5py.File(waveforms_hdf5_path, 'r') as hr:
|
30 |
+
with h5py.File(indexes_hdf5_path, 'w') as hw:
|
31 |
+
audios_num = len(hr['audio_name'])
|
32 |
+
hw.create_dataset('audio_name', data=hr['audio_name'][:], dtype='S20')
|
33 |
+
hw.create_dataset('target', data=hr['target'][:], dtype=np.bool)
|
34 |
+
hw.create_dataset('hdf5_path', data=[waveforms_hdf5_path.encode()] * audios_num, dtype='S200')
|
35 |
+
hw.create_dataset('index_in_hdf5', data=np.arange(audios_num), dtype=np.int32)
|
36 |
+
|
37 |
+
print('Write to {}'.format(indexes_hdf5_path))
|
38 |
+
|
39 |
+
|
40 |
+
def combine_full_indexes(args):
|
41 |
+
"""Combine all balanced and unbalanced indexes hdf5s to a single hdf5. This
|
42 |
+
combined indexes hdf5 is used for training with full data (~20k balanced
|
43 |
+
audio clips + ~1.9m unbalanced audio clips).
|
44 |
+
"""
|
45 |
+
|
46 |
+
# Arguments & parameters
|
47 |
+
indexes_hdf5s_dir = args.indexes_hdf5s_dir
|
48 |
+
full_indexes_hdf5_path = args.full_indexes_hdf5_path
|
49 |
+
|
50 |
+
classes_num = config.classes_num
|
51 |
+
|
52 |
+
# Paths
|
53 |
+
paths = get_sub_filepaths(indexes_hdf5s_dir)
|
54 |
+
paths = [path for path in paths if (
|
55 |
+
'train' in path and 'full_train' not in path and 'mini' not in path)]
|
56 |
+
|
57 |
+
print('Total {} hdf5 to combine.'.format(len(paths)))
|
58 |
+
|
59 |
+
with h5py.File(full_indexes_hdf5_path, 'w') as full_hf:
|
60 |
+
full_hf.create_dataset(
|
61 |
+
name='audio_name',
|
62 |
+
shape=(0,),
|
63 |
+
maxshape=(None,),
|
64 |
+
dtype='S20')
|
65 |
+
|
66 |
+
full_hf.create_dataset(
|
67 |
+
name='target',
|
68 |
+
shape=(0, classes_num),
|
69 |
+
maxshape=(None, classes_num),
|
70 |
+
dtype=np.bool)
|
71 |
+
|
72 |
+
full_hf.create_dataset(
|
73 |
+
name='hdf5_path',
|
74 |
+
shape=(0,),
|
75 |
+
maxshape=(None,),
|
76 |
+
dtype='S200')
|
77 |
+
|
78 |
+
full_hf.create_dataset(
|
79 |
+
name='index_in_hdf5',
|
80 |
+
shape=(0,),
|
81 |
+
maxshape=(None,),
|
82 |
+
dtype=np.int32)
|
83 |
+
|
84 |
+
for path in paths:
|
85 |
+
with h5py.File(path, 'r') as part_hf:
|
86 |
+
print(path)
|
87 |
+
n = len(full_hf['audio_name'][:])
|
88 |
+
new_n = n + len(part_hf['audio_name'][:])
|
89 |
+
|
90 |
+
full_hf['audio_name'].resize((new_n,))
|
91 |
+
full_hf['audio_name'][n : new_n] = part_hf['audio_name'][:]
|
92 |
+
|
93 |
+
full_hf['target'].resize((new_n, classes_num))
|
94 |
+
full_hf['target'][n : new_n] = part_hf['target'][:]
|
95 |
+
|
96 |
+
full_hf['hdf5_path'].resize((new_n,))
|
97 |
+
full_hf['hdf5_path'][n : new_n] = part_hf['hdf5_path'][:]
|
98 |
+
|
99 |
+
full_hf['index_in_hdf5'].resize((new_n,))
|
100 |
+
full_hf['index_in_hdf5'][n : new_n] = part_hf['index_in_hdf5'][:]
|
101 |
+
|
102 |
+
print('Write combined full hdf5 to {}'.format(full_indexes_hdf5_path))
|
103 |
+
|
104 |
+
|
105 |
+
if __name__ == '__main__':
|
106 |
+
parser = argparse.ArgumentParser()
|
107 |
+
subparsers = parser.add_subparsers(dest='mode')
|
108 |
+
|
109 |
+
parser_create_indexes = subparsers.add_parser('create_indexes')
|
110 |
+
parser_create_indexes.add_argument('--waveforms_hdf5_path', type=str, required=True, help='Path of packed waveforms hdf5.')
|
111 |
+
parser_create_indexes.add_argument('--indexes_hdf5_path', type=str, required=True, help='Path to write out indexes hdf5.')
|
112 |
+
|
113 |
+
parser_combine_full_indexes = subparsers.add_parser('combine_full_indexes')
|
114 |
+
parser_combine_full_indexes.add_argument('--indexes_hdf5s_dir', type=str, required=True, help='Directory containing indexes hdf5s to be combined.')
|
115 |
+
parser_combine_full_indexes.add_argument('--full_indexes_hdf5_path', type=str, required=True, help='Path to write out full indexes hdf5 file.')
|
116 |
+
|
117 |
+
args = parser.parse_args()
|
118 |
+
|
119 |
+
if args.mode == 'create_indexes':
|
120 |
+
create_indexes(args)
|
121 |
+
|
122 |
+
elif args.mode == 'combine_full_indexes':
|
123 |
+
combine_full_indexes(args)
|
124 |
+
|
125 |
+
else:
|
126 |
+
raise Exception('Incorrect arguments!')
|
audio_detection/audio_infer/utils/data_generator.py
ADDED
@@ -0,0 +1,421 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import h5py
|
3 |
+
import csv
|
4 |
+
import time
|
5 |
+
import logging
|
6 |
+
|
7 |
+
from utilities import int16_to_float32
|
8 |
+
|
9 |
+
|
10 |
+
def read_black_list(black_list_csv):
|
11 |
+
"""Read audio names from black list.
|
12 |
+
"""
|
13 |
+
with open(black_list_csv, 'r') as fr:
|
14 |
+
reader = csv.reader(fr)
|
15 |
+
lines = list(reader)
|
16 |
+
|
17 |
+
black_list_names = ['Y{}.wav'.format(line[0]) for line in lines]
|
18 |
+
return black_list_names
|
19 |
+
|
20 |
+
|
21 |
+
class AudioSetDataset(object):
|
22 |
+
def __init__(self, sample_rate=32000):
|
23 |
+
"""This class takes the meta of an audio clip as input, and return
|
24 |
+
the waveform and target of the audio clip. This class is used by DataLoader.
|
25 |
+
"""
|
26 |
+
self.sample_rate = sample_rate
|
27 |
+
|
28 |
+
def __getitem__(self, meta):
|
29 |
+
"""Load waveform and target of an audio clip.
|
30 |
+
|
31 |
+
Args:
|
32 |
+
meta: {
|
33 |
+
'hdf5_path': str,
|
34 |
+
'index_in_hdf5': int}
|
35 |
+
|
36 |
+
Returns:
|
37 |
+
data_dict: {
|
38 |
+
'audio_name': str,
|
39 |
+
'waveform': (clip_samples,),
|
40 |
+
'target': (classes_num,)}
|
41 |
+
"""
|
42 |
+
hdf5_path = meta['hdf5_path']
|
43 |
+
index_in_hdf5 = meta['index_in_hdf5']
|
44 |
+
with h5py.File(hdf5_path, 'r') as hf:
|
45 |
+
audio_name = hf['audio_name'][index_in_hdf5].decode()
|
46 |
+
waveform = int16_to_float32(hf['waveform'][index_in_hdf5])
|
47 |
+
waveform = self.resample(waveform)
|
48 |
+
target = hf['target'][index_in_hdf5].astype(np.float32)
|
49 |
+
|
50 |
+
data_dict = {
|
51 |
+
'audio_name': audio_name, 'waveform': waveform, 'target': target}
|
52 |
+
|
53 |
+
return data_dict
|
54 |
+
|
55 |
+
def resample(self, waveform):
|
56 |
+
"""Resample.
|
57 |
+
|
58 |
+
Args:
|
59 |
+
waveform: (clip_samples,)
|
60 |
+
|
61 |
+
Returns:
|
62 |
+
(resampled_clip_samples,)
|
63 |
+
"""
|
64 |
+
if self.sample_rate == 32000:
|
65 |
+
return waveform
|
66 |
+
elif self.sample_rate == 16000:
|
67 |
+
return waveform[0 :: 2]
|
68 |
+
elif self.sample_rate == 8000:
|
69 |
+
return waveform[0 :: 4]
|
70 |
+
else:
|
71 |
+
raise Exception('Incorrect sample rate!')
|
72 |
+
|
73 |
+
|
74 |
+
class Base(object):
|
75 |
+
def __init__(self, indexes_hdf5_path, batch_size, black_list_csv, random_seed):
|
76 |
+
"""Base class of train sampler.
|
77 |
+
|
78 |
+
Args:
|
79 |
+
indexes_hdf5_path: string
|
80 |
+
batch_size: int
|
81 |
+
black_list_csv: string
|
82 |
+
random_seed: int
|
83 |
+
"""
|
84 |
+
self.batch_size = batch_size
|
85 |
+
self.random_state = np.random.RandomState(random_seed)
|
86 |
+
|
87 |
+
# Black list
|
88 |
+
if black_list_csv:
|
89 |
+
self.black_list_names = read_black_list(black_list_csv)
|
90 |
+
else:
|
91 |
+
self.black_list_names = []
|
92 |
+
|
93 |
+
logging.info('Black list samples: {}'.format(len(self.black_list_names)))
|
94 |
+
|
95 |
+
# Load target
|
96 |
+
load_time = time.time()
|
97 |
+
|
98 |
+
with h5py.File(indexes_hdf5_path, 'r') as hf:
|
99 |
+
self.audio_names = [audio_name.decode() for audio_name in hf['audio_name'][:]]
|
100 |
+
self.hdf5_paths = [hdf5_path.decode() for hdf5_path in hf['hdf5_path'][:]]
|
101 |
+
self.indexes_in_hdf5 = hf['index_in_hdf5'][:]
|
102 |
+
self.targets = hf['target'][:].astype(np.float32)
|
103 |
+
|
104 |
+
(self.audios_num, self.classes_num) = self.targets.shape
|
105 |
+
logging.info('Training number: {}'.format(self.audios_num))
|
106 |
+
logging.info('Load target time: {:.3f} s'.format(time.time() - load_time))
|
107 |
+
|
108 |
+
|
109 |
+
class TrainSampler(Base):
|
110 |
+
def __init__(self, indexes_hdf5_path, batch_size, black_list_csv=None,
|
111 |
+
random_seed=1234):
|
112 |
+
"""Balanced sampler. Generate batch meta for training.
|
113 |
+
|
114 |
+
Args:
|
115 |
+
indexes_hdf5_path: string
|
116 |
+
batch_size: int
|
117 |
+
black_list_csv: string
|
118 |
+
random_seed: int
|
119 |
+
"""
|
120 |
+
super(TrainSampler, self).__init__(indexes_hdf5_path, batch_size,
|
121 |
+
black_list_csv, random_seed)
|
122 |
+
|
123 |
+
self.indexes = np.arange(self.audios_num)
|
124 |
+
|
125 |
+
# Shuffle indexes
|
126 |
+
self.random_state.shuffle(self.indexes)
|
127 |
+
|
128 |
+
self.pointer = 0
|
129 |
+
|
130 |
+
def __iter__(self):
|
131 |
+
"""Generate batch meta for training.
|
132 |
+
|
133 |
+
Returns:
|
134 |
+
batch_meta: e.g.: [
|
135 |
+
{'hdf5_path': string, 'index_in_hdf5': int},
|
136 |
+
...]
|
137 |
+
"""
|
138 |
+
batch_size = self.batch_size
|
139 |
+
|
140 |
+
while True:
|
141 |
+
batch_meta = []
|
142 |
+
i = 0
|
143 |
+
while i < batch_size:
|
144 |
+
index = self.indexes[self.pointer]
|
145 |
+
self.pointer += 1
|
146 |
+
|
147 |
+
# Shuffle indexes and reset pointer
|
148 |
+
if self.pointer >= self.audios_num:
|
149 |
+
self.pointer = 0
|
150 |
+
self.random_state.shuffle(self.indexes)
|
151 |
+
|
152 |
+
# If audio in black list then continue
|
153 |
+
if self.audio_names[index] in self.black_list_names:
|
154 |
+
continue
|
155 |
+
else:
|
156 |
+
batch_meta.append({
|
157 |
+
'hdf5_path': self.hdf5_paths[index],
|
158 |
+
'index_in_hdf5': self.indexes_in_hdf5[index]})
|
159 |
+
i += 1
|
160 |
+
|
161 |
+
yield batch_meta
|
162 |
+
|
163 |
+
def state_dict(self):
|
164 |
+
state = {
|
165 |
+
'indexes': self.indexes,
|
166 |
+
'pointer': self.pointer}
|
167 |
+
return state
|
168 |
+
|
169 |
+
def load_state_dict(self, state):
|
170 |
+
self.indexes = state['indexes']
|
171 |
+
self.pointer = state['pointer']
|
172 |
+
|
173 |
+
|
174 |
+
class BalancedTrainSampler(Base):
|
175 |
+
def __init__(self, indexes_hdf5_path, batch_size, black_list_csv=None,
|
176 |
+
random_seed=1234):
|
177 |
+
"""Balanced sampler. Generate batch meta for training. Data are equally
|
178 |
+
sampled from different sound classes.
|
179 |
+
|
180 |
+
Args:
|
181 |
+
indexes_hdf5_path: string
|
182 |
+
batch_size: int
|
183 |
+
black_list_csv: string
|
184 |
+
random_seed: int
|
185 |
+
"""
|
186 |
+
super(BalancedTrainSampler, self).__init__(indexes_hdf5_path,
|
187 |
+
batch_size, black_list_csv, random_seed)
|
188 |
+
|
189 |
+
self.samples_num_per_class = np.sum(self.targets, axis=0)
|
190 |
+
logging.info('samples_num_per_class: {}'.format(
|
191 |
+
self.samples_num_per_class.astype(np.int32)))
|
192 |
+
|
193 |
+
# Training indexes of all sound classes. E.g.:
|
194 |
+
# [[0, 11, 12, ...], [3, 4, 15, 16, ...], [7, 8, ...], ...]
|
195 |
+
self.indexes_per_class = []
|
196 |
+
|
197 |
+
for k in range(self.classes_num):
|
198 |
+
self.indexes_per_class.append(
|
199 |
+
np.where(self.targets[:, k] == 1)[0])
|
200 |
+
|
201 |
+
# Shuffle indexes
|
202 |
+
for k in range(self.classes_num):
|
203 |
+
self.random_state.shuffle(self.indexes_per_class[k])
|
204 |
+
|
205 |
+
self.queue = []
|
206 |
+
self.pointers_of_classes = [0] * self.classes_num
|
207 |
+
|
208 |
+
def expand_queue(self, queue):
|
209 |
+
classes_set = np.arange(self.classes_num).tolist()
|
210 |
+
self.random_state.shuffle(classes_set)
|
211 |
+
queue += classes_set
|
212 |
+
return queue
|
213 |
+
|
214 |
+
def __iter__(self):
|
215 |
+
"""Generate batch meta for training.
|
216 |
+
|
217 |
+
Returns:
|
218 |
+
batch_meta: e.g.: [
|
219 |
+
{'hdf5_path': string, 'index_in_hdf5': int},
|
220 |
+
...]
|
221 |
+
"""
|
222 |
+
batch_size = self.batch_size
|
223 |
+
|
224 |
+
while True:
|
225 |
+
batch_meta = []
|
226 |
+
i = 0
|
227 |
+
while i < batch_size:
|
228 |
+
if len(self.queue) == 0:
|
229 |
+
self.queue = self.expand_queue(self.queue)
|
230 |
+
|
231 |
+
class_id = self.queue.pop(0)
|
232 |
+
pointer = self.pointers_of_classes[class_id]
|
233 |
+
self.pointers_of_classes[class_id] += 1
|
234 |
+
index = self.indexes_per_class[class_id][pointer]
|
235 |
+
|
236 |
+
# When finish one epoch of a sound class, then shuffle its indexes and reset pointer
|
237 |
+
if self.pointers_of_classes[class_id] >= self.samples_num_per_class[class_id]:
|
238 |
+
self.pointers_of_classes[class_id] = 0
|
239 |
+
self.random_state.shuffle(self.indexes_per_class[class_id])
|
240 |
+
|
241 |
+
# If audio in black list then continue
|
242 |
+
if self.audio_names[index] in self.black_list_names:
|
243 |
+
continue
|
244 |
+
else:
|
245 |
+
batch_meta.append({
|
246 |
+
'hdf5_path': self.hdf5_paths[index],
|
247 |
+
'index_in_hdf5': self.indexes_in_hdf5[index]})
|
248 |
+
i += 1
|
249 |
+
|
250 |
+
yield batch_meta
|
251 |
+
|
252 |
+
def state_dict(self):
|
253 |
+
state = {
|
254 |
+
'indexes_per_class': self.indexes_per_class,
|
255 |
+
'queue': self.queue,
|
256 |
+
'pointers_of_classes': self.pointers_of_classes}
|
257 |
+
return state
|
258 |
+
|
259 |
+
def load_state_dict(self, state):
|
260 |
+
self.indexes_per_class = state['indexes_per_class']
|
261 |
+
self.queue = state['queue']
|
262 |
+
self.pointers_of_classes = state['pointers_of_classes']
|
263 |
+
|
264 |
+
|
265 |
+
class AlternateTrainSampler(Base):
|
266 |
+
def __init__(self, indexes_hdf5_path, batch_size, black_list_csv=None,
|
267 |
+
random_seed=1234):
|
268 |
+
"""AlternateSampler is a combination of Sampler and Balanced Sampler.
|
269 |
+
AlternateSampler alternately sample data from Sampler and Blanced Sampler.
|
270 |
+
|
271 |
+
Args:
|
272 |
+
indexes_hdf5_path: string
|
273 |
+
batch_size: int
|
274 |
+
black_list_csv: string
|
275 |
+
random_seed: int
|
276 |
+
"""
|
277 |
+
self.sampler1 = TrainSampler(indexes_hdf5_path, batch_size,
|
278 |
+
black_list_csv, random_seed)
|
279 |
+
|
280 |
+
self.sampler2 = BalancedTrainSampler(indexes_hdf5_path, batch_size,
|
281 |
+
black_list_csv, random_seed)
|
282 |
+
|
283 |
+
self.batch_size = batch_size
|
284 |
+
self.count = 0
|
285 |
+
|
286 |
+
def __iter__(self):
|
287 |
+
"""Generate batch meta for training.
|
288 |
+
|
289 |
+
Returns:
|
290 |
+
batch_meta: e.g.: [
|
291 |
+
{'hdf5_path': string, 'index_in_hdf5': int},
|
292 |
+
...]
|
293 |
+
"""
|
294 |
+
batch_size = self.batch_size
|
295 |
+
|
296 |
+
while True:
|
297 |
+
self.count += 1
|
298 |
+
|
299 |
+
if self.count % 2 == 0:
|
300 |
+
batch_meta = []
|
301 |
+
i = 0
|
302 |
+
while i < batch_size:
|
303 |
+
index = self.sampler1.indexes[self.sampler1.pointer]
|
304 |
+
self.sampler1.pointer += 1
|
305 |
+
|
306 |
+
# Shuffle indexes and reset pointer
|
307 |
+
if self.sampler1.pointer >= self.sampler1.audios_num:
|
308 |
+
self.sampler1.pointer = 0
|
309 |
+
self.sampler1.random_state.shuffle(self.sampler1.indexes)
|
310 |
+
|
311 |
+
# If audio in black list then continue
|
312 |
+
if self.sampler1.audio_names[index] in self.sampler1.black_list_names:
|
313 |
+
continue
|
314 |
+
else:
|
315 |
+
batch_meta.append({
|
316 |
+
'hdf5_path': self.sampler1.hdf5_paths[index],
|
317 |
+
'index_in_hdf5': self.sampler1.indexes_in_hdf5[index]})
|
318 |
+
i += 1
|
319 |
+
|
320 |
+
elif self.count % 2 == 1:
|
321 |
+
batch_meta = []
|
322 |
+
i = 0
|
323 |
+
while i < batch_size:
|
324 |
+
if len(self.sampler2.queue) == 0:
|
325 |
+
self.sampler2.queue = self.sampler2.expand_queue(self.sampler2.queue)
|
326 |
+
|
327 |
+
class_id = self.sampler2.queue.pop(0)
|
328 |
+
pointer = self.sampler2.pointers_of_classes[class_id]
|
329 |
+
self.sampler2.pointers_of_classes[class_id] += 1
|
330 |
+
index = self.sampler2.indexes_per_class[class_id][pointer]
|
331 |
+
|
332 |
+
# When finish one epoch of a sound class, then shuffle its indexes and reset pointer
|
333 |
+
if self.sampler2.pointers_of_classes[class_id] >= self.sampler2.samples_num_per_class[class_id]:
|
334 |
+
self.sampler2.pointers_of_classes[class_id] = 0
|
335 |
+
self.sampler2.random_state.shuffle(self.sampler2.indexes_per_class[class_id])
|
336 |
+
|
337 |
+
# If audio in black list then continue
|
338 |
+
if self.sampler2.audio_names[index] in self.sampler2.black_list_names:
|
339 |
+
continue
|
340 |
+
else:
|
341 |
+
batch_meta.append({
|
342 |
+
'hdf5_path': self.sampler2.hdf5_paths[index],
|
343 |
+
'index_in_hdf5': self.sampler2.indexes_in_hdf5[index]})
|
344 |
+
i += 1
|
345 |
+
|
346 |
+
yield batch_meta
|
347 |
+
|
348 |
+
def state_dict(self):
|
349 |
+
state = {
|
350 |
+
'sampler1': self.sampler1.state_dict(),
|
351 |
+
'sampler2': self.sampler2.state_dict()}
|
352 |
+
return state
|
353 |
+
|
354 |
+
def load_state_dict(self, state):
|
355 |
+
self.sampler1.load_state_dict(state['sampler1'])
|
356 |
+
self.sampler2.load_state_dict(state['sampler2'])
|
357 |
+
|
358 |
+
|
359 |
+
class EvaluateSampler(object):
|
360 |
+
def __init__(self, indexes_hdf5_path, batch_size):
|
361 |
+
"""Evaluate sampler. Generate batch meta for evaluation.
|
362 |
+
|
363 |
+
Args:
|
364 |
+
indexes_hdf5_path: string
|
365 |
+
batch_size: int
|
366 |
+
"""
|
367 |
+
self.batch_size = batch_size
|
368 |
+
|
369 |
+
with h5py.File(indexes_hdf5_path, 'r') as hf:
|
370 |
+
self.audio_names = [audio_name.decode() for audio_name in hf['audio_name'][:]]
|
371 |
+
self.hdf5_paths = [hdf5_path.decode() for hdf5_path in hf['hdf5_path'][:]]
|
372 |
+
self.indexes_in_hdf5 = hf['index_in_hdf5'][:]
|
373 |
+
self.targets = hf['target'][:].astype(np.float32)
|
374 |
+
|
375 |
+
self.audios_num = len(self.audio_names)
|
376 |
+
|
377 |
+
def __iter__(self):
|
378 |
+
"""Generate batch meta for training.
|
379 |
+
|
380 |
+
Returns:
|
381 |
+
batch_meta: e.g.: [
|
382 |
+
{'hdf5_path': string,
|
383 |
+
'index_in_hdf5': int}
|
384 |
+
...]
|
385 |
+
"""
|
386 |
+
batch_size = self.batch_size
|
387 |
+
pointer = 0
|
388 |
+
|
389 |
+
while pointer < self.audios_num:
|
390 |
+
batch_indexes = np.arange(pointer,
|
391 |
+
min(pointer + batch_size, self.audios_num))
|
392 |
+
|
393 |
+
batch_meta = []
|
394 |
+
|
395 |
+
for index in batch_indexes:
|
396 |
+
batch_meta.append({
|
397 |
+
'audio_name': self.audio_names[index],
|
398 |
+
'hdf5_path': self.hdf5_paths[index],
|
399 |
+
'index_in_hdf5': self.indexes_in_hdf5[index],
|
400 |
+
'target': self.targets[index]})
|
401 |
+
|
402 |
+
pointer += batch_size
|
403 |
+
yield batch_meta
|
404 |
+
|
405 |
+
|
406 |
+
def collate_fn(list_data_dict):
|
407 |
+
"""Collate data.
|
408 |
+
Args:
|
409 |
+
list_data_dict, e.g., [{'audio_name': str, 'waveform': (clip_samples,), ...},
|
410 |
+
{'audio_name': str, 'waveform': (clip_samples,), ...},
|
411 |
+
...]
|
412 |
+
Returns:
|
413 |
+
np_data_dict, dict, e.g.,
|
414 |
+
{'audio_name': (batch_size,), 'waveform': (batch_size, clip_samples), ...}
|
415 |
+
"""
|
416 |
+
np_data_dict = {}
|
417 |
+
|
418 |
+
for key in list_data_dict[0].keys():
|
419 |
+
np_data_dict[key] = np.array([data_dict[key] for data_dict in list_data_dict])
|
420 |
+
|
421 |
+
return np_data_dict
|
audio_detection/audio_infer/utils/dataset.py
ADDED
@@ -0,0 +1,224 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import argparse
|
3 |
+
import csv
|
4 |
+
import os
|
5 |
+
import glob
|
6 |
+
import datetime
|
7 |
+
import time
|
8 |
+
import logging
|
9 |
+
import h5py
|
10 |
+
import librosa
|
11 |
+
|
12 |
+
from utilities import (create_folder, get_filename, create_logging,
|
13 |
+
float32_to_int16, pad_or_truncate, read_metadata)
|
14 |
+
import config
|
15 |
+
|
16 |
+
|
17 |
+
def split_unbalanced_csv_to_partial_csvs(args):
|
18 |
+
"""Split unbalanced csv to part csvs. Each part csv contains up to 50000 ids.
|
19 |
+
"""
|
20 |
+
|
21 |
+
unbalanced_csv_path = args.unbalanced_csv
|
22 |
+
unbalanced_partial_csvs_dir = args.unbalanced_partial_csvs_dir
|
23 |
+
|
24 |
+
create_folder(unbalanced_partial_csvs_dir)
|
25 |
+
|
26 |
+
with open(unbalanced_csv_path, 'r') as f:
|
27 |
+
lines = f.readlines()
|
28 |
+
|
29 |
+
lines = lines[3:] # Remove head info
|
30 |
+
audios_num_per_file = 50000
|
31 |
+
|
32 |
+
files_num = int(np.ceil(len(lines) / float(audios_num_per_file)))
|
33 |
+
|
34 |
+
for r in range(files_num):
|
35 |
+
lines_per_file = lines[r * audios_num_per_file :
|
36 |
+
(r + 1) * audios_num_per_file]
|
37 |
+
|
38 |
+
out_csv_path = os.path.join(unbalanced_partial_csvs_dir,
|
39 |
+
'unbalanced_train_segments_part{:02d}.csv'.format(r))
|
40 |
+
|
41 |
+
with open(out_csv_path, 'w') as f:
|
42 |
+
f.write('empty\n')
|
43 |
+
f.write('empty\n')
|
44 |
+
f.write('empty\n')
|
45 |
+
for line in lines_per_file:
|
46 |
+
f.write(line)
|
47 |
+
|
48 |
+
print('Write out csv to {}'.format(out_csv_path))
|
49 |
+
|
50 |
+
|
51 |
+
def download_wavs(args):
|
52 |
+
"""Download videos and extract audio in wav format.
|
53 |
+
"""
|
54 |
+
|
55 |
+
# Paths
|
56 |
+
csv_path = args.csv_path
|
57 |
+
audios_dir = args.audios_dir
|
58 |
+
mini_data = args.mini_data
|
59 |
+
|
60 |
+
if mini_data:
|
61 |
+
logs_dir = '_logs/download_dataset/{}'.format(get_filename(csv_path))
|
62 |
+
else:
|
63 |
+
logs_dir = '_logs/download_dataset_minidata/{}'.format(get_filename(csv_path))
|
64 |
+
|
65 |
+
create_folder(audios_dir)
|
66 |
+
create_folder(logs_dir)
|
67 |
+
create_logging(logs_dir, filemode='w')
|
68 |
+
logging.info('Download log is saved to {}'.format(logs_dir))
|
69 |
+
|
70 |
+
# Read csv
|
71 |
+
with open(csv_path, 'r') as f:
|
72 |
+
lines = f.readlines()
|
73 |
+
|
74 |
+
lines = lines[3:] # Remove csv head info
|
75 |
+
|
76 |
+
if mini_data:
|
77 |
+
lines = lines[0 : 10] # Download partial data for debug
|
78 |
+
|
79 |
+
download_time = time.time()
|
80 |
+
|
81 |
+
# Download
|
82 |
+
for (n, line) in enumerate(lines):
|
83 |
+
|
84 |
+
items = line.split(', ')
|
85 |
+
audio_id = items[0]
|
86 |
+
start_time = float(items[1])
|
87 |
+
end_time = float(items[2])
|
88 |
+
duration = end_time - start_time
|
89 |
+
|
90 |
+
logging.info('{} {} start_time: {:.1f}, end_time: {:.1f}'.format(
|
91 |
+
n, audio_id, start_time, end_time))
|
92 |
+
|
93 |
+
# Download full video of whatever format
|
94 |
+
video_name = os.path.join(audios_dir, '_Y{}.%(ext)s'.format(audio_id))
|
95 |
+
os.system("youtube-dl --quiet -o '{}' -x https://www.youtube.com/watch?v={}"\
|
96 |
+
.format(video_name, audio_id))
|
97 |
+
|
98 |
+
video_paths = glob.glob(os.path.join(audios_dir, '_Y' + audio_id + '.*'))
|
99 |
+
|
100 |
+
# If download successful
|
101 |
+
if len(video_paths) > 0:
|
102 |
+
video_path = video_paths[0] # Choose one video
|
103 |
+
|
104 |
+
# Add 'Y' to the head because some video ids are started with '-'
|
105 |
+
# which will cause problem
|
106 |
+
audio_path = os.path.join(audios_dir, 'Y' + audio_id + '.wav')
|
107 |
+
|
108 |
+
# Extract audio in wav format
|
109 |
+
os.system("ffmpeg -loglevel panic -i {} -ac 1 -ar 32000 -ss {} -t 00:00:{} {} "\
|
110 |
+
.format(video_path,
|
111 |
+
str(datetime.timedelta(seconds=start_time)), duration,
|
112 |
+
audio_path))
|
113 |
+
|
114 |
+
# Remove downloaded video
|
115 |
+
os.system("rm {}".format(video_path))
|
116 |
+
|
117 |
+
logging.info("Download and convert to {}".format(audio_path))
|
118 |
+
|
119 |
+
logging.info('Download finished! Time spent: {:.3f} s'.format(
|
120 |
+
time.time() - download_time))
|
121 |
+
|
122 |
+
logging.info('Logs can be viewed in {}'.format(logs_dir))
|
123 |
+
|
124 |
+
|
125 |
+
def pack_waveforms_to_hdf5(args):
|
126 |
+
"""Pack waveform and target of several audio clips to a single hdf5 file.
|
127 |
+
This can speed up loading and training.
|
128 |
+
"""
|
129 |
+
|
130 |
+
# Arguments & parameters
|
131 |
+
audios_dir = args.audios_dir
|
132 |
+
csv_path = args.csv_path
|
133 |
+
waveforms_hdf5_path = args.waveforms_hdf5_path
|
134 |
+
mini_data = args.mini_data
|
135 |
+
|
136 |
+
clip_samples = config.clip_samples
|
137 |
+
classes_num = config.classes_num
|
138 |
+
sample_rate = config.sample_rate
|
139 |
+
id_to_ix = config.id_to_ix
|
140 |
+
|
141 |
+
# Paths
|
142 |
+
if mini_data:
|
143 |
+
prefix = 'mini_'
|
144 |
+
waveforms_hdf5_path += '.mini'
|
145 |
+
else:
|
146 |
+
prefix = ''
|
147 |
+
|
148 |
+
create_folder(os.path.dirname(waveforms_hdf5_path))
|
149 |
+
|
150 |
+
logs_dir = '_logs/pack_waveforms_to_hdf5/{}{}'.format(prefix, get_filename(csv_path))
|
151 |
+
create_folder(logs_dir)
|
152 |
+
create_logging(logs_dir, filemode='w')
|
153 |
+
logging.info('Write logs to {}'.format(logs_dir))
|
154 |
+
|
155 |
+
# Read csv file
|
156 |
+
meta_dict = read_metadata(csv_path, classes_num, id_to_ix)
|
157 |
+
|
158 |
+
if mini_data:
|
159 |
+
mini_num = 10
|
160 |
+
for key in meta_dict.keys():
|
161 |
+
meta_dict[key] = meta_dict[key][0 : mini_num]
|
162 |
+
|
163 |
+
audios_num = len(meta_dict['audio_name'])
|
164 |
+
|
165 |
+
# Pack waveform to hdf5
|
166 |
+
total_time = time.time()
|
167 |
+
|
168 |
+
with h5py.File(waveforms_hdf5_path, 'w') as hf:
|
169 |
+
hf.create_dataset('audio_name', shape=((audios_num,)), dtype='S20')
|
170 |
+
hf.create_dataset('waveform', shape=((audios_num, clip_samples)), dtype=np.int16)
|
171 |
+
hf.create_dataset('target', shape=((audios_num, classes_num)), dtype=np.bool)
|
172 |
+
hf.attrs.create('sample_rate', data=sample_rate, dtype=np.int32)
|
173 |
+
|
174 |
+
# Pack waveform & target of several audio clips to a single hdf5 file
|
175 |
+
for n in range(audios_num):
|
176 |
+
audio_path = os.path.join(audios_dir, meta_dict['audio_name'][n])
|
177 |
+
|
178 |
+
if os.path.isfile(audio_path):
|
179 |
+
logging.info('{} {}'.format(n, audio_path))
|
180 |
+
(audio, _) = librosa.core.load(audio_path, sr=sample_rate, mono=True)
|
181 |
+
audio = pad_or_truncate(audio, clip_samples)
|
182 |
+
|
183 |
+
hf['audio_name'][n] = meta_dict['audio_name'][n].encode()
|
184 |
+
hf['waveform'][n] = float32_to_int16(audio)
|
185 |
+
hf['target'][n] = meta_dict['target'][n]
|
186 |
+
else:
|
187 |
+
logging.info('{} File does not exist! {}'.format(n, audio_path))
|
188 |
+
|
189 |
+
logging.info('Write to {}'.format(waveforms_hdf5_path))
|
190 |
+
logging.info('Pack hdf5 time: {:.3f}'.format(time.time() - total_time))
|
191 |
+
|
192 |
+
|
193 |
+
if __name__ == '__main__':
|
194 |
+
parser = argparse.ArgumentParser()
|
195 |
+
subparsers = parser.add_subparsers(dest='mode')
|
196 |
+
|
197 |
+
parser_split = subparsers.add_parser('split_unbalanced_csv_to_partial_csvs')
|
198 |
+
parser_split.add_argument('--unbalanced_csv', type=str, required=True, help='Path of unbalanced_csv file to read.')
|
199 |
+
parser_split.add_argument('--unbalanced_partial_csvs_dir', type=str, required=True, help='Directory to save out split unbalanced partial csv.')
|
200 |
+
|
201 |
+
parser_download_wavs = subparsers.add_parser('download_wavs')
|
202 |
+
parser_download_wavs.add_argument('--csv_path', type=str, required=True, help='Path of csv file containing audio info to be downloaded.')
|
203 |
+
parser_download_wavs.add_argument('--audios_dir', type=str, required=True, help='Directory to save out downloaded audio.')
|
204 |
+
parser_download_wavs.add_argument('--mini_data', action='store_true', default=True, help='Set true to only download 10 audios for debugging.')
|
205 |
+
|
206 |
+
parser_pack_wavs = subparsers.add_parser('pack_waveforms_to_hdf5')
|
207 |
+
parser_pack_wavs.add_argument('--csv_path', type=str, required=True, help='Path of csv file containing audio info to be downloaded.')
|
208 |
+
parser_pack_wavs.add_argument('--audios_dir', type=str, required=True, help='Directory to save out downloaded audio.')
|
209 |
+
parser_pack_wavs.add_argument('--waveforms_hdf5_path', type=str, required=True, help='Path to save out packed hdf5.')
|
210 |
+
parser_pack_wavs.add_argument('--mini_data', action='store_true', default=False, help='Set true to only download 10 audios for debugging.')
|
211 |
+
|
212 |
+
args = parser.parse_args()
|
213 |
+
|
214 |
+
if args.mode == 'split_unbalanced_csv_to_partial_csvs':
|
215 |
+
split_unbalanced_csv_to_partial_csvs(args)
|
216 |
+
|
217 |
+
elif args.mode == 'download_wavs':
|
218 |
+
download_wavs(args)
|
219 |
+
|
220 |
+
elif args.mode == 'pack_waveforms_to_hdf5':
|
221 |
+
pack_waveforms_to_hdf5(args)
|
222 |
+
|
223 |
+
else:
|
224 |
+
raise Exception('Incorrect arguments!')
|
audio_detection/audio_infer/utils/plot_for_paper.py
ADDED
@@ -0,0 +1,565 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
import numpy as np
|
4 |
+
import argparse
|
5 |
+
import h5py
|
6 |
+
import time
|
7 |
+
import pickle
|
8 |
+
import matplotlib.pyplot as plt
|
9 |
+
import csv
|
10 |
+
from sklearn import metrics
|
11 |
+
|
12 |
+
from utilities import (create_folder, get_filename, d_prime)
|
13 |
+
import config
|
14 |
+
|
15 |
+
|
16 |
+
def load_statistics(statistics_path):
|
17 |
+
statistics_dict = pickle.load(open(statistics_path, 'rb'))
|
18 |
+
|
19 |
+
bal_map = np.array([statistics['average_precision'] for statistics in statistics_dict['bal']]) # (N, classes_num)
|
20 |
+
bal_map = np.mean(bal_map, axis=-1)
|
21 |
+
test_map = np.array([statistics['average_precision'] for statistics in statistics_dict['test']]) # (N, classes_num)
|
22 |
+
test_map = np.mean(test_map, axis=-1)
|
23 |
+
|
24 |
+
return bal_map, test_map
|
25 |
+
|
26 |
+
|
27 |
+
def crop_label(label):
|
28 |
+
max_len = 16
|
29 |
+
if len(label) <= max_len:
|
30 |
+
return label
|
31 |
+
else:
|
32 |
+
words = label.split(' ')
|
33 |
+
cropped_label = ''
|
34 |
+
for w in words:
|
35 |
+
if len(cropped_label + ' ' + w) > max_len:
|
36 |
+
break
|
37 |
+
else:
|
38 |
+
cropped_label += ' {}'.format(w)
|
39 |
+
return cropped_label
|
40 |
+
|
41 |
+
|
42 |
+
def add_comma(integer):
|
43 |
+
"""E.g., 1234567 -> 1,234,567
|
44 |
+
"""
|
45 |
+
integer = int(integer)
|
46 |
+
if integer >= 1000:
|
47 |
+
return str(integer // 1000) + ',' + str(integer % 1000)
|
48 |
+
else:
|
49 |
+
return str(integer)
|
50 |
+
|
51 |
+
|
52 |
+
def plot_classwise_iteration_map(args):
|
53 |
+
|
54 |
+
# Paths
|
55 |
+
save_out_path = 'results/classwise_iteration_map.pdf'
|
56 |
+
create_folder(os.path.dirname(save_out_path))
|
57 |
+
|
58 |
+
# Load statistics
|
59 |
+
statistics_dict = pickle.load(open('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_WavegramLogmelCnn_balanced_mixup_bs32.pkl', 'rb'))
|
60 |
+
|
61 |
+
mAP_mat = np.array([e['average_precision'] for e in statistics_dict['test']])
|
62 |
+
mAP_mat = mAP_mat[0 : 300, :] # 300 * 2000 = 600k iterations
|
63 |
+
sorted_indexes = np.argsort(config.full_samples_per_class)[::-1]
|
64 |
+
|
65 |
+
fig, axs = plt.subplots(1, 3, figsize=(20, 5))
|
66 |
+
ranges = [np.arange(0, 10), np.arange(250, 260), np.arange(517, 527)]
|
67 |
+
axs[0].set_ylabel('AP')
|
68 |
+
|
69 |
+
for col in range(0, 3):
|
70 |
+
axs[col].set_ylim(0, 1.)
|
71 |
+
axs[col].set_xlim(0, 301)
|
72 |
+
axs[col].set_xlabel('Iterations')
|
73 |
+
axs[col].set_ylabel('AP')
|
74 |
+
axs[col].xaxis.set_ticks(np.arange(0, 301, 100))
|
75 |
+
axs[col].xaxis.set_ticklabels(['0', '200k', '400k', '600k'])
|
76 |
+
lines = []
|
77 |
+
for _ix in ranges[col]:
|
78 |
+
_label = crop_label(config.labels[sorted_indexes[_ix]]) + \
|
79 |
+
' ({})'.format(add_comma(config.full_samples_per_class[sorted_indexes[_ix]]))
|
80 |
+
line, = axs[col].plot(mAP_mat[:, sorted_indexes[_ix]], label=_label)
|
81 |
+
lines.append(line)
|
82 |
+
box = axs[col].get_position()
|
83 |
+
axs[col].set_position([box.x0, box.y0, box.width * 1., box.height])
|
84 |
+
axs[col].legend(handles=lines, bbox_to_anchor=(1., 1.))
|
85 |
+
axs[col].yaxis.grid(color='k', linestyle='solid', alpha=0.3, linewidth=0.3)
|
86 |
+
|
87 |
+
plt.tight_layout(pad=4, w_pad=1, h_pad=1)
|
88 |
+
plt.savefig(save_out_path)
|
89 |
+
print(save_out_path)
|
90 |
+
|
91 |
+
|
92 |
+
def plot_six_figures(args):
|
93 |
+
|
94 |
+
# Arguments & parameters
|
95 |
+
classes_num = config.classes_num
|
96 |
+
labels = config.labels
|
97 |
+
max_plot_iteration = 540000
|
98 |
+
iterations = np.arange(0, max_plot_iteration, 2000)
|
99 |
+
|
100 |
+
# Paths
|
101 |
+
class_labels_indices_path = os.path.join('metadata', 'class_labels_indices.csv')
|
102 |
+
save_out_path = 'results/six_figures.pdf'
|
103 |
+
create_folder(os.path.dirname(save_out_path))
|
104 |
+
|
105 |
+
# Plot
|
106 |
+
fig, ax = plt.subplots(2, 3, figsize=(14, 7))
|
107 |
+
bal_alpha = 0.3
|
108 |
+
test_alpha = 1.0
|
109 |
+
linewidth = 1.
|
110 |
+
|
111 |
+
# (a) Comparison of architectures
|
112 |
+
if True:
|
113 |
+
lines = []
|
114 |
+
|
115 |
+
# Wavegram-Logmel-CNN
|
116 |
+
(bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_WavegramLogmelCnn_balanced_mixup_bs32.pkl')
|
117 |
+
line, = ax[0, 0].plot(bal_map, color='g', alpha=bal_alpha, linewidth=linewidth)
|
118 |
+
line, = ax[0, 0].plot(test_map, label='Wavegram-Logmel-CNN', color='g', alpha=test_alpha, linewidth=linewidth)
|
119 |
+
lines.append(line)
|
120 |
+
|
121 |
+
# Cnn14
|
122 |
+
(bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_Cnn14_balanced_mixup_bs32.pkl')
|
123 |
+
line, = ax[0, 0].plot(bal_map, color='r', alpha=bal_alpha, linewidth=linewidth)
|
124 |
+
line, = ax[0, 0].plot(test_map, label='CNN14', color='r', alpha=test_alpha, linewidth=linewidth)
|
125 |
+
lines.append(line)
|
126 |
+
|
127 |
+
# MobileNetV1
|
128 |
+
(bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_MobileNetV1_balanced_mixup_bs32.pkl')
|
129 |
+
line, = ax[0, 0].plot(bal_map, color='b', alpha=bal_alpha, linewidth=linewidth)
|
130 |
+
line, = ax[0, 0].plot(test_map, label='MobileNetV1', color='b', alpha=test_alpha, linewidth=linewidth)
|
131 |
+
lines.append(line)
|
132 |
+
|
133 |
+
ax[0, 0].legend(handles=lines, loc=2)
|
134 |
+
ax[0, 0].set_title('(a) Comparison of architectures')
|
135 |
+
|
136 |
+
# (b) Comparison of training data and augmentation'
|
137 |
+
if True:
|
138 |
+
lines = []
|
139 |
+
|
140 |
+
# Full data + balanced sampler + mixup
|
141 |
+
(bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_Cnn14_balanced_mixup_bs32.pkl')
|
142 |
+
line, = ax[0, 1].plot(bal_map, color='r', alpha=bal_alpha, linewidth=linewidth)
|
143 |
+
line, = ax[0, 1].plot(test_map, label='CNN14,bal,mixup (1.9m)', color='r', alpha=test_alpha, linewidth=linewidth)
|
144 |
+
lines.append(line)
|
145 |
+
|
146 |
+
# Full data + balanced sampler + mixup in time domain
|
147 |
+
(bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_Cnn14_balanced_mixup_timedomain_bs32.pkl')
|
148 |
+
line, = ax[0, 1].plot(bal_map, color='y', alpha=bal_alpha, linewidth=linewidth)
|
149 |
+
line, = ax[0, 1].plot(test_map, label='CNN14,bal,mixup-wav (1.9m)', color='y', alpha=test_alpha, linewidth=linewidth)
|
150 |
+
lines.append(line)
|
151 |
+
|
152 |
+
# Full data + balanced sampler + no mixup
|
153 |
+
(bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_Cnn14_balanced_nomixup_bs32.pkl')
|
154 |
+
line, = ax[0, 1].plot(bal_map, color='g', alpha=bal_alpha, linewidth=linewidth)
|
155 |
+
line, = ax[0, 1].plot(test_map, label='CNN14,bal,no-mixup (1.9m)', color='g', alpha=test_alpha, linewidth=linewidth)
|
156 |
+
lines.append(line)
|
157 |
+
|
158 |
+
# Full data + uniform sampler + no mixup
|
159 |
+
(bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_Cnn14_nobalanced_nomixup_bs32.pkl')
|
160 |
+
line, = ax[0, 1].plot(bal_map, color='b', alpha=bal_alpha, linewidth=linewidth)
|
161 |
+
line, = ax[0, 1].plot(test_map, label='CNN14,no-bal,no-mixup (1.9m)', color='b', alpha=test_alpha, linewidth=linewidth)
|
162 |
+
lines.append(line)
|
163 |
+
|
164 |
+
# Balanced data + balanced sampler + mixup
|
165 |
+
(bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_balanced_train_Cnn14_balanced_mixup_bs32.pkl')
|
166 |
+
line, = ax[0, 1].plot(bal_map, color='m', alpha=bal_alpha, linewidth=linewidth)
|
167 |
+
line, = ax[0, 1].plot(test_map, label='CNN14,bal,mixup (20k)', color='m', alpha=test_alpha, linewidth=linewidth)
|
168 |
+
lines.append(line)
|
169 |
+
|
170 |
+
# Balanced data + balanced sampler + no mixup
|
171 |
+
(bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_balanced_train_Cnn14_balanced_nomixup_bs32.pkl')
|
172 |
+
line, = ax[0, 1].plot(bal_map, color='k', alpha=bal_alpha, linewidth=linewidth)
|
173 |
+
line, = ax[0, 1].plot(test_map, label='CNN14,bal,no-mixup (20k)', color='k', alpha=test_alpha, linewidth=linewidth)
|
174 |
+
lines.append(line)
|
175 |
+
|
176 |
+
ax[0, 1].legend(handles=lines, loc=2, fontsize=8)
|
177 |
+
ax[0, 1].set_title('(b) Comparison of training data and augmentation')
|
178 |
+
|
179 |
+
# (c) Comparison of embedding size
|
180 |
+
if True:
|
181 |
+
lines = []
|
182 |
+
|
183 |
+
# Embedding size 2048
|
184 |
+
(bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_Cnn14_balanced_mixup_bs32.pkl')
|
185 |
+
line, = ax[0, 2].plot(bal_map, color='r', alpha=bal_alpha, linewidth=linewidth)
|
186 |
+
line, = ax[0, 2].plot(test_map, label='CNN14,emb=2048', color='r', alpha=test_alpha, linewidth=linewidth)
|
187 |
+
lines.append(line)
|
188 |
+
|
189 |
+
# Embedding size 128
|
190 |
+
(bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_Cnn14_emb128_balanced_mixup_bs32.pkl')
|
191 |
+
line, = ax[0, 2].plot(bal_map, color='g', alpha=bal_alpha, linewidth=linewidth)
|
192 |
+
line, = ax[0, 2].plot(test_map, label='CNN14,emb=128', color='g', alpha=test_alpha, linewidth=linewidth)
|
193 |
+
lines.append(line)
|
194 |
+
|
195 |
+
# Embedding size 32
|
196 |
+
(bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_Cnn14_emb32_balanced_mixup_bs32.pkl')
|
197 |
+
line, = ax[0, 2].plot(bal_map, color='b', alpha=bal_alpha, linewidth=linewidth)
|
198 |
+
line, = ax[0, 2].plot(test_map, label='CNN14,emb=32', color='b', alpha=test_alpha, linewidth=linewidth)
|
199 |
+
lines.append(line)
|
200 |
+
|
201 |
+
ax[0, 2].legend(handles=lines, loc=2)
|
202 |
+
ax[0, 2].set_title('(c) Comparison of embedding size')
|
203 |
+
|
204 |
+
# (d) Comparison of amount of training data
|
205 |
+
if True:
|
206 |
+
lines = []
|
207 |
+
|
208 |
+
# 100% of full training data
|
209 |
+
(bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_Cnn14_balanced_mixup_bs32.pkl')
|
210 |
+
line, = ax[1, 0].plot(bal_map, color='r', alpha=bal_alpha, linewidth=linewidth)
|
211 |
+
line, = ax[1, 0].plot(test_map, label='CNN14 (100% full)', color='r', alpha=test_alpha, linewidth=linewidth)
|
212 |
+
lines.append(line)
|
213 |
+
|
214 |
+
# 80% of full training data
|
215 |
+
(bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_0.8full_train_Cnn14_balanced_mixup_bs32.pkl')
|
216 |
+
line, = ax[1, 0].plot(bal_map, color='b', alpha=bal_alpha, linewidth=linewidth)
|
217 |
+
line, = ax[1, 0].plot(test_map, label='CNN14 (80% full)', color='b', alpha=test_alpha, linewidth=linewidth)
|
218 |
+
lines.append(line)
|
219 |
+
|
220 |
+
# 50% of full training data
|
221 |
+
(bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_0.5full_train_Cnn14_balanced_mixup_bs32.pkl')
|
222 |
+
line, = ax[1, 0].plot(bal_map, color='g', alpha=bal_alpha, linewidth=linewidth)
|
223 |
+
line, = ax[1, 0].plot(test_map, label='cnn14 (50% full)', color='g', alpha=test_alpha, linewidth=linewidth)
|
224 |
+
lines.append(line)
|
225 |
+
|
226 |
+
ax[1, 0].legend(handles=lines, loc=2)
|
227 |
+
ax[1, 0].set_title('(d) Comparison of amount of training data')
|
228 |
+
|
229 |
+
# (e) Comparison of sampling rate
|
230 |
+
if True:
|
231 |
+
lines = []
|
232 |
+
|
233 |
+
# Cnn14 + 32 kHz
|
234 |
+
(bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_Cnn14_balanced_mixup_bs32.pkl')
|
235 |
+
line, = ax[1, 1].plot(bal_map, color='r', alpha=bal_alpha, linewidth=linewidth)
|
236 |
+
line, = ax[1, 1].plot(test_map, label='CNN14,32kHz', color='r', alpha=test_alpha, linewidth=linewidth)
|
237 |
+
lines.append(line)
|
238 |
+
|
239 |
+
# Cnn14 + 16 kHz
|
240 |
+
(bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_Cnn14_16k_balanced_mixup_bs32.pkl')
|
241 |
+
line, = ax[1, 1].plot(bal_map, color='b', alpha=bal_alpha, linewidth=linewidth)
|
242 |
+
line, = ax[1, 1].plot(test_map, label='CNN14,16kHz', color='b', alpha=test_alpha, linewidth=linewidth)
|
243 |
+
lines.append(line)
|
244 |
+
|
245 |
+
# Cnn14 + 8 kHz
|
246 |
+
(bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_Cnn14_8k_balanced_mixup_bs32.pkl')
|
247 |
+
line, = ax[1, 1].plot(bal_map, color='g', alpha=bal_alpha, linewidth=linewidth)
|
248 |
+
line, = ax[1, 1].plot(test_map, label='CNN14,8kHz', color='g', alpha=test_alpha, linewidth=linewidth)
|
249 |
+
lines.append(line)
|
250 |
+
|
251 |
+
ax[1, 1].legend(handles=lines, loc=2)
|
252 |
+
ax[1, 1].set_title('(e) Comparison of sampling rate')
|
253 |
+
|
254 |
+
# (f) Comparison of mel bins number
|
255 |
+
if True:
|
256 |
+
lines = []
|
257 |
+
|
258 |
+
# Cnn14 + 128 mel bins
|
259 |
+
(bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel128_fmin50_fmax14000_full_train_Cnn14_balanced_mixup_bs32.pkl')
|
260 |
+
line, = ax[1, 2].plot(bal_map, color='g', alpha=bal_alpha)
|
261 |
+
line, = ax[1, 2].plot(test_map, label='CNN14,128-melbins', color='g', alpha=test_alpha, linewidth=linewidth)
|
262 |
+
lines.append(line)
|
263 |
+
|
264 |
+
# Cnn14 + 64 mel bins
|
265 |
+
(bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel64_fmin50_fmax14000_full_train_Cnn14_balanced_mixup_bs32.pkl')
|
266 |
+
line, = ax[1, 2].plot(bal_map, color='r', alpha=bal_alpha, linewidth=linewidth)
|
267 |
+
line, = ax[1, 2].plot(test_map, label='CNN14,64-melbins', color='r', alpha=test_alpha, linewidth=linewidth)
|
268 |
+
lines.append(line)
|
269 |
+
|
270 |
+
# Cnn14 + 32 mel bins
|
271 |
+
(bal_map, test_map) = load_statistics('paper_statistics/statistics_sr32000_window1024_hop320_mel32_fmin50_fmax14000_full_train_Cnn14_balanced_mixup_bs32.pkl')
|
272 |
+
line, = ax[1, 2].plot(bal_map, color='b', alpha=bal_alpha)
|
273 |
+
line, = ax[1, 2].plot(test_map, label='CNN14,32-melbins', color='b', alpha=test_alpha, linewidth=linewidth)
|
274 |
+
lines.append(line)
|
275 |
+
|
276 |
+
ax[1, 2].legend(handles=lines, loc=2)
|
277 |
+
ax[1, 2].set_title('(f) Comparison of mel bins number')
|
278 |
+
|
279 |
+
for i in range(2):
|
280 |
+
for j in range(3):
|
281 |
+
ax[i, j].set_ylim(0, 0.8)
|
282 |
+
ax[i, j].set_xlim(0, len(iterations))
|
283 |
+
ax[i, j].set_xlabel('Iterations')
|
284 |
+
ax[i, j].set_ylabel('mAP')
|
285 |
+
ax[i, j].xaxis.set_ticks(np.arange(0, len(iterations), 50))
|
286 |
+
ax[i, j].xaxis.set_ticklabels(['0', '100k', '200k', '300k', '400k', '500k'])
|
287 |
+
ax[i, j].yaxis.set_ticks(np.arange(0, 0.81, 0.05))
|
288 |
+
ax[i, j].yaxis.set_ticklabels(['0', '', '0.1', '', '0.2', '', '0.3',
|
289 |
+
'', '0.4', '', '0.5', '', '0.6', '', '0.7', '', '0.8'])
|
290 |
+
ax[i, j].yaxis.grid(color='k', linestyle='solid', alpha=0.3, linewidth=0.3)
|
291 |
+
ax[i, j].xaxis.grid(color='k', linestyle='solid', alpha=0.3, linewidth=0.3)
|
292 |
+
|
293 |
+
plt.tight_layout(0, 1, 0)
|
294 |
+
plt.savefig(save_out_path)
|
295 |
+
print('Save figure to {}'.format(save_out_path))
|
296 |
+
|
297 |
+
|
298 |
+
def plot_complexity_map(args):
|
299 |
+
|
300 |
+
# Paths
|
301 |
+
save_out_path = 'results/complexity_mAP.pdf'
|
302 |
+
create_folder(os.path.dirname(save_out_path))
|
303 |
+
|
304 |
+
plt.figure(figsize=(5, 5))
|
305 |
+
fig, ax = plt.subplots(1, 1)
|
306 |
+
|
307 |
+
model_types = np.array(['Cnn6', 'Cnn10', 'Cnn14', 'ResNet22', 'ResNet38', 'ResNet54',
|
308 |
+
'MobileNetV1', 'MobileNetV2', 'DaiNet', 'LeeNet', 'LeeNet18',
|
309 |
+
'Res1dNet30', 'Res1dNet44', 'Wavegram-CNN', 'Wavegram-\nLogmel-CNN'])
|
310 |
+
flops = np.array([21.986, 28.166, 42.220, 30.081, 48.962, 54.563, 3.614, 2.810,
|
311 |
+
30.395, 4.741, 26.369, 32.688, 61.833, 44.234, 53.510])
|
312 |
+
mAPs = np.array([0.343, 0.380, 0.431, 0.430, 0.434, 0.429, 0.389, 0.383, 0.295,
|
313 |
+
0.266, 0.336, 0.365, 0.355, 0.389, 0.439])
|
314 |
+
|
315 |
+
sorted_indexes = np.sort(flops)
|
316 |
+
ax.scatter(flops, mAPs)
|
317 |
+
|
318 |
+
shift = [[-5.5, -0.004], [1, -0.004], [-1, -0.014], [-2, 0.006], [-7, 0.006],
|
319 |
+
[1, -0.01], [0.5, 0.004], [-1, -0.014], [1, -0.007], [0.8, -0.008],
|
320 |
+
[1, -0.007], [1, 0.002], [-6, -0.015], [1, -0.008], [0.8, 0]]
|
321 |
+
|
322 |
+
for i, model_type in enumerate(model_types):
|
323 |
+
ax.annotate(model_type, (flops[i] + shift[i][0], mAPs[i] + shift[i][1]))
|
324 |
+
|
325 |
+
ax.plot(flops[[0, 1, 2]], mAPs[[0, 1, 2]])
|
326 |
+
ax.plot(flops[[3, 4, 5]], mAPs[[3, 4, 5]])
|
327 |
+
ax.plot(flops[[6, 7]], mAPs[[6, 7]])
|
328 |
+
ax.plot(flops[[9, 10]], mAPs[[9, 10]])
|
329 |
+
ax.plot(flops[[11, 12]], mAPs[[11, 12]])
|
330 |
+
ax.plot(flops[[13, 14]], mAPs[[13, 14]])
|
331 |
+
|
332 |
+
ax.set_xlim(0, 70)
|
333 |
+
ax.set_ylim(0.2, 0.5)
|
334 |
+
ax.set_xlabel('Multi-load_statisticss (million)', fontsize=15)
|
335 |
+
ax.set_ylabel('mAP', fontsize=15)
|
336 |
+
ax.tick_params(axis='x', labelsize=12)
|
337 |
+
ax.tick_params(axis='y', labelsize=12)
|
338 |
+
|
339 |
+
plt.tight_layout(0, 0, 0)
|
340 |
+
|
341 |
+
plt.savefig(save_out_path)
|
342 |
+
print('Write out figure to {}'.format(save_out_path))
|
343 |
+
|
344 |
+
|
345 |
+
def plot_long_fig(args):
|
346 |
+
|
347 |
+
# Paths
|
348 |
+
stats = pickle.load(open('paper_statistics/stats_for_long_fig.pkl', 'rb'))
|
349 |
+
|
350 |
+
save_out_path = 'results/long_fig.pdf'
|
351 |
+
create_folder(os.path.dirname(save_out_path))
|
352 |
+
|
353 |
+
# Load meta
|
354 |
+
N = len(config.labels)
|
355 |
+
sorted_indexes = stats['sorted_indexes_for_plot']
|
356 |
+
sorted_labels = np.array(config.labels)[sorted_indexes]
|
357 |
+
audio_clips_per_class = stats['official_balanced_training_samples'] + stats['official_unbalanced_training_samples']
|
358 |
+
audio_clips_per_class = audio_clips_per_class[sorted_indexes]
|
359 |
+
|
360 |
+
# Prepare axes for plot
|
361 |
+
(ax1a, ax2a, ax3a, ax4a, ax1b, ax2b, ax3b, ax4b) = prepare_plot_long_4_rows(sorted_labels)
|
362 |
+
|
363 |
+
# plot the number of training samples
|
364 |
+
ax1a.bar(np.arange(N), audio_clips_per_class, alpha=0.3)
|
365 |
+
ax2a.bar(np.arange(N), audio_clips_per_class, alpha=0.3)
|
366 |
+
ax3a.bar(np.arange(N), audio_clips_per_class, alpha=0.3)
|
367 |
+
ax4a.bar(np.arange(N), audio_clips_per_class, alpha=0.3)
|
368 |
+
|
369 |
+
# Load mAP of different systems
|
370 |
+
"""Average instance system of [1] with an mAP of 0.317.
|
371 |
+
[1] Kong, Qiuqiang, Changsong Yu, Yong Xu, Turab Iqbal, Wenwu Wang, and
|
372 |
+
Mark D. Plumbley. "Weakly labelled audioset tagging with attention neural
|
373 |
+
networks." IEEE/ACM Transactions on Audio, Speech, and Language Processing
|
374 |
+
27, no. 11 (2019): 1791-1802."""
|
375 |
+
maps_avg_instances = stats['averaging_instance_system_avg_9_probs_from_10000_to_50000_iterations']['eval']['average_precision']
|
376 |
+
maps_avg_instances = maps_avg_instances[sorted_indexes]
|
377 |
+
|
378 |
+
# PANNs Cnn14
|
379 |
+
maps_panns_cnn14 = stats['panns_cnn14']['eval']['average_precision']
|
380 |
+
maps_panns_cnn14 = maps_panns_cnn14[sorted_indexes]
|
381 |
+
|
382 |
+
# PANNs MobileNetV1
|
383 |
+
maps_panns_mobilenetv1 = stats['panns_mobilenetv1']['eval']['average_precision']
|
384 |
+
maps_panns_mobilenetv1 = maps_panns_mobilenetv1[sorted_indexes]
|
385 |
+
|
386 |
+
# PANNs Wavegram-Logmel-Cnn14
|
387 |
+
maps_panns_wavegram_logmel_cnn14 = stats['panns_wavegram_logmel_cnn14']['eval']['average_precision']
|
388 |
+
maps_panns_wavegram_logmel_cnn14 = maps_panns_wavegram_logmel_cnn14[sorted_indexes]
|
389 |
+
|
390 |
+
# Plot mAPs
|
391 |
+
_scatter_4_rows(maps_panns_wavegram_logmel_cnn14, ax1b, ax2b, ax3b, ax4b, s=5, c='g')
|
392 |
+
_scatter_4_rows(maps_panns_cnn14, ax1b, ax2b, ax3b, ax4b, s=5, c='r')
|
393 |
+
_scatter_4_rows(maps_panns_mobilenetv1, ax1b, ax2b, ax3b, ax4b, s=5, c='b')
|
394 |
+
_scatter_4_rows(maps_avg_instances, ax1b, ax2b, ax3b, ax4b, s=5, c='k')
|
395 |
+
|
396 |
+
linewidth = 0.7
|
397 |
+
line0te = _plot_4_rows(maps_panns_wavegram_logmel_cnn14, ax1b, ax2b, ax3b, ax4b,
|
398 |
+
c='g', linewidth=linewidth, label='AP with Wavegram-Logmel-CNN')
|
399 |
+
line1te = _plot_4_rows(maps_panns_cnn14, ax1b, ax2b, ax3b, ax4b, c='r',
|
400 |
+
linewidth=linewidth, label='AP with CNN14')
|
401 |
+
line2te = _plot_4_rows(maps_panns_mobilenetv1, ax1b, ax2b, ax3b, ax4b, c='b',
|
402 |
+
linewidth=linewidth, label='AP with MobileNetV1')
|
403 |
+
line3te = _plot_4_rows(maps_avg_instances, ax1b, ax2b, ax3b, ax4b, c='k',
|
404 |
+
linewidth=linewidth, label='AP with averaging instances (baseline)')
|
405 |
+
|
406 |
+
# Plot label quality
|
407 |
+
label_quality = stats['label_quality']
|
408 |
+
sorted_label_quality = np.array(label_quality)[sorted_indexes]
|
409 |
+
for k in range(len(sorted_label_quality)):
|
410 |
+
if sorted_label_quality[k] and sorted_label_quality[k] == 1:
|
411 |
+
sorted_label_quality[k] = 0.99
|
412 |
+
|
413 |
+
ax1b.scatter(np.arange(N)[sorted_label_quality != None],
|
414 |
+
sorted_label_quality[sorted_label_quality != None], s=12, c='r', linewidth=0.8, marker='+')
|
415 |
+
ax2b.scatter(np.arange(N)[sorted_label_quality != None],
|
416 |
+
sorted_label_quality[sorted_label_quality != None], s=12, c='r', linewidth=0.8, marker='+')
|
417 |
+
ax3b.scatter(np.arange(N)[sorted_label_quality != None],
|
418 |
+
sorted_label_quality[sorted_label_quality != None], s=12, c='r', linewidth=0.8, marker='+')
|
419 |
+
line_label_quality = ax4b.scatter(np.arange(N)[sorted_label_quality != None],
|
420 |
+
sorted_label_quality[sorted_label_quality != None], s=12, c='r', linewidth=0.8, marker='+', label='Label quality')
|
421 |
+
ax1b.scatter(np.arange(N)[sorted_label_quality == None],
|
422 |
+
0.5 * np.ones(len(np.arange(N)[sorted_label_quality == None])), s=12, c='r', linewidth=0.8, marker='_')
|
423 |
+
ax2b.scatter(np.arange(N)[sorted_label_quality == None],
|
424 |
+
0.5 * np.ones(len(np.arange(N)[sorted_label_quality == None])), s=12, c='r', linewidth=0.8, marker='_')
|
425 |
+
ax3b.scatter(np.arange(N)[sorted_label_quality == None],
|
426 |
+
0.5 * np.ones(len(np.arange(N)[sorted_label_quality == None])), s=12, c='r', linewidth=0.8, marker='_')
|
427 |
+
ax4b.scatter(np.arange(N)[sorted_label_quality == None],
|
428 |
+
0.5 * np.ones(len(np.arange(N)[sorted_label_quality == None])), s=12, c='r', linewidth=0.8, marker='_')
|
429 |
+
|
430 |
+
plt.legend(handles=[line0te, line1te, line2te, line3te, line_label_quality], fontsize=6, loc=1)
|
431 |
+
plt.tight_layout(0, 0, 0)
|
432 |
+
plt.savefig(save_out_path)
|
433 |
+
print('Save fig to {}'.format(save_out_path))
|
434 |
+
|
435 |
+
|
436 |
+
def prepare_plot_long_4_rows(sorted_lbs):
|
437 |
+
N = len(sorted_lbs)
|
438 |
+
|
439 |
+
f,(ax1a, ax2a, ax3a, ax4a) = plt.subplots(4, 1, sharey=False, facecolor='w', figsize=(10, 10.5))
|
440 |
+
|
441 |
+
fontsize = 5
|
442 |
+
|
443 |
+
K = 132
|
444 |
+
ax1a.set_xlim(0, K)
|
445 |
+
ax2a.set_xlim(K, 2 * K)
|
446 |
+
ax3a.set_xlim(2 * K, 3 * K)
|
447 |
+
ax4a.set_xlim(3 * K, N)
|
448 |
+
|
449 |
+
truncated_sorted_lbs = []
|
450 |
+
for lb in sorted_lbs:
|
451 |
+
lb = lb[0 : 25]
|
452 |
+
words = lb.split(' ')
|
453 |
+
if len(words[-1]) < 3:
|
454 |
+
lb = ' '.join(words[0:-1])
|
455 |
+
truncated_sorted_lbs.append(lb)
|
456 |
+
|
457 |
+
ax1a.grid(which='major', axis='x', linestyle='-', alpha=0.3)
|
458 |
+
ax2a.grid(which='major', axis='x', linestyle='-', alpha=0.3)
|
459 |
+
ax3a.grid(which='major', axis='x', linestyle='-', alpha=0.3)
|
460 |
+
ax4a.grid(which='major', axis='x', linestyle='-', alpha=0.3)
|
461 |
+
|
462 |
+
ax1a.set_yscale('log')
|
463 |
+
ax2a.set_yscale('log')
|
464 |
+
ax3a.set_yscale('log')
|
465 |
+
ax4a.set_yscale('log')
|
466 |
+
|
467 |
+
ax1b = ax1a.twinx()
|
468 |
+
ax2b = ax2a.twinx()
|
469 |
+
ax3b = ax3a.twinx()
|
470 |
+
ax4b = ax4a.twinx()
|
471 |
+
ax1b.set_ylim(0., 1.)
|
472 |
+
ax2b.set_ylim(0., 1.)
|
473 |
+
ax3b.set_ylim(0., 1.)
|
474 |
+
ax4b.set_ylim(0., 1.)
|
475 |
+
ax1b.set_ylabel('Average precision')
|
476 |
+
ax2b.set_ylabel('Average precision')
|
477 |
+
ax3b.set_ylabel('Average precision')
|
478 |
+
ax4b.set_ylabel('Average precision')
|
479 |
+
|
480 |
+
ax1b.yaxis.grid(color='grey', linestyle='--', alpha=0.5)
|
481 |
+
ax2b.yaxis.grid(color='grey', linestyle='--', alpha=0.5)
|
482 |
+
ax3b.yaxis.grid(color='grey', linestyle='--', alpha=0.5)
|
483 |
+
ax4b.yaxis.grid(color='grey', linestyle='--', alpha=0.5)
|
484 |
+
|
485 |
+
ax1a.xaxis.set_ticks(np.arange(K))
|
486 |
+
ax1a.xaxis.set_ticklabels(truncated_sorted_lbs[0:K], rotation=90, fontsize=fontsize)
|
487 |
+
ax1a.xaxis.tick_bottom()
|
488 |
+
ax1a.set_ylabel("Number of audio clips")
|
489 |
+
|
490 |
+
ax2a.xaxis.set_ticks(np.arange(K, 2*K))
|
491 |
+
ax2a.xaxis.set_ticklabels(truncated_sorted_lbs[K:2*K], rotation=90, fontsize=fontsize)
|
492 |
+
ax2a.xaxis.tick_bottom()
|
493 |
+
ax2a.set_ylabel("Number of audio clips")
|
494 |
+
|
495 |
+
ax3a.xaxis.set_ticks(np.arange(2*K, 3*K))
|
496 |
+
ax3a.xaxis.set_ticklabels(truncated_sorted_lbs[2*K:3*K], rotation=90, fontsize=fontsize)
|
497 |
+
ax3a.xaxis.tick_bottom()
|
498 |
+
ax3a.set_ylabel("Number of audio clips")
|
499 |
+
|
500 |
+
ax4a.xaxis.set_ticks(np.arange(3*K, N))
|
501 |
+
ax4a.xaxis.set_ticklabels(truncated_sorted_lbs[3*K:], rotation=90, fontsize=fontsize)
|
502 |
+
ax4a.xaxis.tick_bottom()
|
503 |
+
ax4a.set_ylabel("Number of audio clips")
|
504 |
+
|
505 |
+
ax1a.spines['right'].set_visible(False)
|
506 |
+
ax1b.spines['right'].set_visible(False)
|
507 |
+
ax2a.spines['left'].set_visible(False)
|
508 |
+
ax2b.spines['left'].set_visible(False)
|
509 |
+
ax2a.spines['right'].set_visible(False)
|
510 |
+
ax2b.spines['right'].set_visible(False)
|
511 |
+
ax3a.spines['left'].set_visible(False)
|
512 |
+
ax3b.spines['left'].set_visible(False)
|
513 |
+
ax3a.spines['right'].set_visible(False)
|
514 |
+
ax3b.spines['right'].set_visible(False)
|
515 |
+
ax4a.spines['left'].set_visible(False)
|
516 |
+
ax4b.spines['left'].set_visible(False)
|
517 |
+
|
518 |
+
plt.subplots_adjust(hspace = 0.8)
|
519 |
+
|
520 |
+
return ax1a, ax2a, ax3a, ax4a, ax1b, ax2b, ax3b, ax4b
|
521 |
+
|
522 |
+
|
523 |
+
def _scatter_4_rows(x, ax, ax2, ax3, ax4, s, c, marker='.', alpha=1.):
|
524 |
+
N = len(x)
|
525 |
+
ax.scatter(np.arange(N), x, s=s, c=c, marker=marker, alpha=alpha)
|
526 |
+
ax2.scatter(np.arange(N), x, s=s, c=c, marker=marker, alpha=alpha)
|
527 |
+
ax3.scatter(np.arange(N), x, s=s, c=c, marker=marker, alpha=alpha)
|
528 |
+
ax4.scatter(np.arange(N), x, s=s, c=c, marker=marker, alpha=alpha)
|
529 |
+
|
530 |
+
|
531 |
+
def _plot_4_rows(x, ax, ax2, ax3, ax4, c, linewidth=1.0, alpha=1.0, label=""):
|
532 |
+
N = len(x)
|
533 |
+
ax.plot(x, c=c, linewidth=linewidth, alpha=alpha)
|
534 |
+
ax2.plot(x, c=c, linewidth=linewidth, alpha=alpha)
|
535 |
+
ax3.plot(x, c=c, linewidth=linewidth, alpha=alpha)
|
536 |
+
line, = ax4.plot(x, c=c, linewidth=linewidth, alpha=alpha, label=label)
|
537 |
+
return line
|
538 |
+
|
539 |
+
|
540 |
+
if __name__ == '__main__':
|
541 |
+
|
542 |
+
parser = argparse.ArgumentParser(description='')
|
543 |
+
subparsers = parser.add_subparsers(dest='mode')
|
544 |
+
|
545 |
+
parser_classwise_iteration_map = subparsers.add_parser('plot_classwise_iteration_map')
|
546 |
+
parser_six_figures = subparsers.add_parser('plot_six_figures')
|
547 |
+
parser_complexity_map = subparsers.add_parser('plot_complexity_map')
|
548 |
+
parser_long_fig = subparsers.add_parser('plot_long_fig')
|
549 |
+
|
550 |
+
args = parser.parse_args()
|
551 |
+
|
552 |
+
if args.mode == 'plot_classwise_iteration_map':
|
553 |
+
plot_classwise_iteration_map(args)
|
554 |
+
|
555 |
+
elif args.mode == 'plot_six_figures':
|
556 |
+
plot_six_figures(args)
|
557 |
+
|
558 |
+
elif args.mode == 'plot_complexity_map':
|
559 |
+
plot_complexity_map(args)
|
560 |
+
|
561 |
+
elif args.mode == 'plot_long_fig':
|
562 |
+
plot_long_fig(args)
|
563 |
+
|
564 |
+
else:
|
565 |
+
raise Exception('Incorrect argument!')
|
audio_detection/audio_infer/utils/plot_statistics.py
ADDED
The diff for this file is too large to render.
See raw diff
|
|
audio_detection/audio_infer/utils/utilities.py
ADDED
@@ -0,0 +1,172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import logging
|
3 |
+
import h5py
|
4 |
+
import soundfile
|
5 |
+
import librosa
|
6 |
+
import numpy as np
|
7 |
+
import pandas as pd
|
8 |
+
from scipy import stats
|
9 |
+
import datetime
|
10 |
+
import pickle
|
11 |
+
|
12 |
+
|
13 |
+
def create_folder(fd):
|
14 |
+
if not os.path.exists(fd):
|
15 |
+
os.makedirs(fd)
|
16 |
+
|
17 |
+
|
18 |
+
def get_filename(path):
|
19 |
+
path = os.path.realpath(path)
|
20 |
+
na_ext = path.split('/')[-1]
|
21 |
+
na = os.path.splitext(na_ext)[0]
|
22 |
+
return na
|
23 |
+
|
24 |
+
|
25 |
+
def get_sub_filepaths(folder):
|
26 |
+
paths = []
|
27 |
+
for root, dirs, files in os.walk(folder):
|
28 |
+
for name in files:
|
29 |
+
path = os.path.join(root, name)
|
30 |
+
paths.append(path)
|
31 |
+
return paths
|
32 |
+
|
33 |
+
|
34 |
+
def create_logging(log_dir, filemode):
|
35 |
+
create_folder(log_dir)
|
36 |
+
i1 = 0
|
37 |
+
|
38 |
+
while os.path.isfile(os.path.join(log_dir, '{:04d}.log'.format(i1))):
|
39 |
+
i1 += 1
|
40 |
+
|
41 |
+
log_path = os.path.join(log_dir, '{:04d}.log'.format(i1))
|
42 |
+
logging.basicConfig(
|
43 |
+
level=logging.DEBUG,
|
44 |
+
format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s',
|
45 |
+
datefmt='%a, %d %b %Y %H:%M:%S',
|
46 |
+
filename=log_path,
|
47 |
+
filemode=filemode)
|
48 |
+
|
49 |
+
# Print to console
|
50 |
+
console = logging.StreamHandler()
|
51 |
+
console.setLevel(logging.INFO)
|
52 |
+
formatter = logging.Formatter('%(name)-12s: %(levelname)-8s %(message)s')
|
53 |
+
console.setFormatter(formatter)
|
54 |
+
logging.getLogger('').addHandler(console)
|
55 |
+
|
56 |
+
return logging
|
57 |
+
|
58 |
+
|
59 |
+
def read_metadata(csv_path, classes_num, id_to_ix):
|
60 |
+
"""Read metadata of AudioSet from a csv file.
|
61 |
+
|
62 |
+
Args:
|
63 |
+
csv_path: str
|
64 |
+
|
65 |
+
Returns:
|
66 |
+
meta_dict: {'audio_name': (audios_num,), 'target': (audios_num, classes_num)}
|
67 |
+
"""
|
68 |
+
|
69 |
+
with open(csv_path, 'r') as fr:
|
70 |
+
lines = fr.readlines()
|
71 |
+
lines = lines[3:] # Remove heads
|
72 |
+
|
73 |
+
audios_num = len(lines)
|
74 |
+
targets = np.zeros((audios_num, classes_num), dtype=np.bool)
|
75 |
+
audio_names = []
|
76 |
+
|
77 |
+
for n, line in enumerate(lines):
|
78 |
+
items = line.split(', ')
|
79 |
+
"""items: ['--4gqARaEJE', '0.000', '10.000', '"/m/068hy,/m/07q6cd_,/m/0bt9lr,/m/0jbk"\n']"""
|
80 |
+
|
81 |
+
audio_name = 'Y{}.wav'.format(items[0]) # Audios are started with an extra 'Y' when downloading
|
82 |
+
label_ids = items[3].split('"')[1].split(',')
|
83 |
+
|
84 |
+
audio_names.append(audio_name)
|
85 |
+
|
86 |
+
# Target
|
87 |
+
for id in label_ids:
|
88 |
+
ix = id_to_ix[id]
|
89 |
+
targets[n, ix] = 1
|
90 |
+
|
91 |
+
meta_dict = {'audio_name': np.array(audio_names), 'target': targets}
|
92 |
+
return meta_dict
|
93 |
+
|
94 |
+
|
95 |
+
def float32_to_int16(x):
|
96 |
+
assert np.max(np.abs(x)) <= 1.2
|
97 |
+
x = np.clip(x, -1, 1)
|
98 |
+
return (x * 32767.).astype(np.int16)
|
99 |
+
|
100 |
+
def int16_to_float32(x):
|
101 |
+
return (x / 32767.).astype(np.float32)
|
102 |
+
|
103 |
+
|
104 |
+
def pad_or_truncate(x, audio_length):
|
105 |
+
"""Pad all audio to specific length."""
|
106 |
+
if len(x) <= audio_length:
|
107 |
+
return np.concatenate((x, np.zeros(audio_length - len(x))), axis=0)
|
108 |
+
else:
|
109 |
+
return x[0 : audio_length]
|
110 |
+
|
111 |
+
|
112 |
+
def d_prime(auc):
|
113 |
+
d_prime = stats.norm().ppf(auc) * np.sqrt(2.0)
|
114 |
+
return d_prime
|
115 |
+
|
116 |
+
|
117 |
+
class Mixup(object):
|
118 |
+
def __init__(self, mixup_alpha, random_seed=1234):
|
119 |
+
"""Mixup coefficient generator.
|
120 |
+
"""
|
121 |
+
self.mixup_alpha = mixup_alpha
|
122 |
+
self.random_state = np.random.RandomState(random_seed)
|
123 |
+
|
124 |
+
def get_lambda(self, batch_size):
|
125 |
+
"""Get mixup random coefficients.
|
126 |
+
Args:
|
127 |
+
batch_size: int
|
128 |
+
Returns:
|
129 |
+
mixup_lambdas: (batch_size,)
|
130 |
+
"""
|
131 |
+
mixup_lambdas = []
|
132 |
+
for n in range(0, batch_size, 2):
|
133 |
+
lam = self.random_state.beta(self.mixup_alpha, self.mixup_alpha, 1)[0]
|
134 |
+
mixup_lambdas.append(lam)
|
135 |
+
mixup_lambdas.append(1. - lam)
|
136 |
+
|
137 |
+
return np.array(mixup_lambdas)
|
138 |
+
|
139 |
+
|
140 |
+
class StatisticsContainer(object):
|
141 |
+
def __init__(self, statistics_path):
|
142 |
+
"""Contain statistics of different training iterations.
|
143 |
+
"""
|
144 |
+
self.statistics_path = statistics_path
|
145 |
+
|
146 |
+
self.backup_statistics_path = '{}_{}.pkl'.format(
|
147 |
+
os.path.splitext(self.statistics_path)[0],
|
148 |
+
datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))
|
149 |
+
|
150 |
+
self.statistics_dict = {'bal': [], 'test': []}
|
151 |
+
|
152 |
+
def append(self, iteration, statistics, data_type):
|
153 |
+
statistics['iteration'] = iteration
|
154 |
+
self.statistics_dict[data_type].append(statistics)
|
155 |
+
|
156 |
+
def dump(self):
|
157 |
+
pickle.dump(self.statistics_dict, open(self.statistics_path, 'wb'))
|
158 |
+
pickle.dump(self.statistics_dict, open(self.backup_statistics_path, 'wb'))
|
159 |
+
logging.info(' Dump statistics to {}'.format(self.statistics_path))
|
160 |
+
logging.info(' Dump statistics to {}'.format(self.backup_statistics_path))
|
161 |
+
|
162 |
+
def load_state_dict(self, resume_iteration):
|
163 |
+
self.statistics_dict = pickle.load(open(self.statistics_path, 'rb'))
|
164 |
+
|
165 |
+
resume_statistics_dict = {'bal': [], 'test': []}
|
166 |
+
|
167 |
+
for key in self.statistics_dict.keys():
|
168 |
+
for statistics in self.statistics_dict[key]:
|
169 |
+
if statistics['iteration'] <= resume_iteration:
|
170 |
+
resume_statistics_dict[key].append(statistics)
|
171 |
+
|
172 |
+
self.statistics_dict = resume_statistics_dict
|
audio_detection/target_sound_detection/src/__pycache__/models.cpython-38.pyc
ADDED
Binary file (37.9 kB). View file
|
|
audio_detection/target_sound_detection/src/__pycache__/utils.cpython-38.pyc
ADDED
Binary file (11.1 kB). View file
|
|
audio_detection/target_sound_detection/src/models.py
ADDED
@@ -0,0 +1,1288 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# !/usr/bin/env python
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
# @Time : 2021/3/9 16:33
|
4 |
+
# @Author : dongchao yang
|
5 |
+
# @File : train.py
|
6 |
+
from itertools import zip_longest
|
7 |
+
import numpy as np
|
8 |
+
from scipy import ndimage
|
9 |
+
import torch
|
10 |
+
import torch.nn as nn
|
11 |
+
import torch.nn.functional as F
|
12 |
+
import time
|
13 |
+
from torchlibrosa.augmentation import SpecAugmentation
|
14 |
+
from torchlibrosa.stft import Spectrogram, LogmelFilterBank
|
15 |
+
import math
|
16 |
+
from sklearn.cluster import KMeans
|
17 |
+
import os
|
18 |
+
import time
|
19 |
+
from functools import partial
|
20 |
+
# import timm
|
21 |
+
# from timm.models.layers import DropPath, to_2tuple, trunc_normal_
|
22 |
+
import warnings
|
23 |
+
from functools import partial
|
24 |
+
# from timm.models.registry import register_model
|
25 |
+
# from timm.models.vision_transformer import _cfg
|
26 |
+
# from mmdet.utils import get_root_logger
|
27 |
+
# from mmcv.runner import load_checkpoint
|
28 |
+
# from mmcv.runner import _load_checkpoint, load_state_dict
|
29 |
+
# import mmcv.runner
|
30 |
+
import copy
|
31 |
+
from collections import OrderedDict
|
32 |
+
import io
|
33 |
+
import re
|
34 |
+
DEBUG=0
|
35 |
+
event_labels = ['Alarm', 'Alarm_clock', 'Animal', 'Applause', 'Arrow', 'Artillery_fire',
|
36 |
+
'Babbling', 'Baby_laughter', 'Bark', 'Basketball_bounce', 'Battle_cry',
|
37 |
+
'Bell', 'Bird', 'Bleat', 'Bouncing', 'Breathing', 'Buzz', 'Camera',
|
38 |
+
'Cap_gun', 'Car', 'Car_alarm', 'Cat', 'Caw', 'Cheering', 'Child_singing',
|
39 |
+
'Choir', 'Chop', 'Chopping_(food)', 'Clapping', 'Clickety-clack', 'Clicking',
|
40 |
+
'Clip-clop', 'Cluck', 'Coin_(dropping)', 'Computer_keyboard', 'Conversation',
|
41 |
+
'Coo', 'Cough', 'Cowbell', 'Creak', 'Cricket', 'Croak', 'Crow', 'Crowd', 'DTMF',
|
42 |
+
'Dog', 'Door', 'Drill', 'Drip', 'Engine', 'Engine_starting', 'Explosion', 'Fart',
|
43 |
+
'Female_singing', 'Filing_(rasp)', 'Finger_snapping', 'Fire', 'Fire_alarm', 'Firecracker',
|
44 |
+
'Fireworks', 'Frog', 'Gasp', 'Gears', 'Giggle', 'Glass', 'Glass_shatter', 'Gobble', 'Groan',
|
45 |
+
'Growling', 'Hammer', 'Hands', 'Hiccup', 'Honk', 'Hoot', 'Howl', 'Human_sounds', 'Human_voice',
|
46 |
+
'Insect', 'Laughter', 'Liquid', 'Machine_gun', 'Male_singing', 'Mechanisms', 'Meow', 'Moo',
|
47 |
+
'Motorcycle', 'Mouse', 'Music', 'Oink', 'Owl', 'Pant', 'Pant_(dog)', 'Patter', 'Pig', 'Plop',
|
48 |
+
'Pour', 'Power_tool', 'Purr', 'Quack', 'Radio', 'Rain_on_surface', 'Rapping', 'Rattle',
|
49 |
+
'Reversing_beeps', 'Ringtone', 'Roar', 'Run', 'Rustle', 'Scissors', 'Scrape', 'Scratch',
|
50 |
+
'Screaming', 'Sewing_machine', 'Shout', 'Shuffle', 'Shuffling_cards', 'Singing',
|
51 |
+
'Single-lens_reflex_camera', 'Siren', 'Skateboard', 'Sniff', 'Snoring', 'Speech',
|
52 |
+
'Speech_synthesizer', 'Spray', 'Squeak', 'Squeal', 'Steam', 'Stir', 'Surface_contact',
|
53 |
+
'Tap', 'Tap_dance', 'Telephone_bell_ringing', 'Television', 'Tick', 'Tick-tock', 'Tools',
|
54 |
+
'Train', 'Train_horn', 'Train_wheels_squealing', 'Truck', 'Turkey', 'Typewriter', 'Typing',
|
55 |
+
'Vehicle', 'Video_game_sound', 'Water', 'Whimper_(dog)', 'Whip', 'Whispering', 'Whistle',
|
56 |
+
'Whistling', 'Whoop', 'Wind', 'Writing', 'Yip', 'and_pans', 'bird_song', 'bleep', 'clink',
|
57 |
+
'cock-a-doodle-doo', 'crinkling', 'dove', 'dribble', 'eructation', 'faucet', 'flapping_wings',
|
58 |
+
'footsteps', 'gunfire', 'heartbeat', 'infant_cry', 'kid_speaking', 'man_speaking', 'mastication',
|
59 |
+
'mice', 'river', 'rooster', 'silverware', 'skidding', 'smack', 'sobbing', 'speedboat', 'splatter',
|
60 |
+
'surf', 'thud', 'thwack', 'toot', 'truck_horn', 'tweet', 'vroom', 'waterfowl', 'woman_speaking']
|
61 |
+
def load_checkpoint(model,
|
62 |
+
filename,
|
63 |
+
map_location=None,
|
64 |
+
strict=False,
|
65 |
+
logger=None,
|
66 |
+
revise_keys=[(r'^module\.', '')]):
|
67 |
+
"""Load checkpoint from a file or URI.
|
68 |
+
Args:
|
69 |
+
model (Module): Module to load checkpoint.
|
70 |
+
filename (str): Accept local filepath, URL, ``torchvision://xxx``,
|
71 |
+
``open-mmlab://xxx``. Please refer to ``docs/model_zoo.md`` for
|
72 |
+
details.
|
73 |
+
map_location (str): Same as :func:`torch.load`.
|
74 |
+
strict (bool): Whether to allow different params for the model and
|
75 |
+
checkpoint.
|
76 |
+
logger (:mod:`logging.Logger` or None): The logger for error message.
|
77 |
+
revise_keys (list): A list of customized keywords to modify the
|
78 |
+
state_dict in checkpoint. Each item is a (pattern, replacement)
|
79 |
+
pair of the regular expression operations. Default: strip
|
80 |
+
the prefix 'module.' by [(r'^module\\.', '')].
|
81 |
+
Returns:
|
82 |
+
dict or OrderedDict: The loaded checkpoint.
|
83 |
+
"""
|
84 |
+
|
85 |
+
checkpoint = _load_checkpoint(filename, map_location, logger)
|
86 |
+
'''
|
87 |
+
new_proj = torch.nn.Conv2d(1, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
|
88 |
+
new_proj.weight = torch.nn.Parameter(torch.sum(checkpoint['patch_embed1.proj.weight'], dim=1).unsqueeze(1))
|
89 |
+
checkpoint['patch_embed1.proj.weight'] = new_proj.weight
|
90 |
+
new_proj.weight = torch.nn.Parameter(torch.sum(checkpoint['patch_embed1.proj.weight'], dim=2).unsqueeze(2).repeat(1,1,3,1))
|
91 |
+
checkpoint['patch_embed1.proj.weight'] = new_proj.weight
|
92 |
+
new_proj.weight = torch.nn.Parameter(torch.sum(checkpoint['patch_embed1.proj.weight'], dim=3).unsqueeze(3).repeat(1,1,1,3))
|
93 |
+
checkpoint['patch_embed1.proj.weight'] = new_proj.weight
|
94 |
+
'''
|
95 |
+
new_proj = torch.nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(4, 4), padding=(2, 2))
|
96 |
+
new_proj.weight = torch.nn.Parameter(torch.sum(checkpoint['patch_embed1.proj.weight'], dim=1).unsqueeze(1))
|
97 |
+
checkpoint['patch_embed1.proj.weight'] = new_proj.weight
|
98 |
+
# OrderedDict is a subclass of dict
|
99 |
+
if not isinstance(checkpoint, dict):
|
100 |
+
raise RuntimeError(
|
101 |
+
f'No state_dict found in checkpoint file {filename}')
|
102 |
+
# get state_dict from checkpoint
|
103 |
+
if 'state_dict' in checkpoint:
|
104 |
+
state_dict = checkpoint['state_dict']
|
105 |
+
else:
|
106 |
+
state_dict = checkpoint
|
107 |
+
|
108 |
+
# strip prefix of state_dict
|
109 |
+
metadata = getattr(state_dict, '_metadata', OrderedDict())
|
110 |
+
for p, r in revise_keys:
|
111 |
+
state_dict = OrderedDict(
|
112 |
+
{re.sub(p, r, k): v
|
113 |
+
for k, v in state_dict.items()})
|
114 |
+
state_dict = OrderedDict({k.replace('backbone.',''):v for k,v in state_dict.items()})
|
115 |
+
# Keep metadata in state_dict
|
116 |
+
state_dict._metadata = metadata
|
117 |
+
|
118 |
+
# load state_dict
|
119 |
+
load_state_dict(model, state_dict, strict, logger)
|
120 |
+
return checkpoint
|
121 |
+
|
122 |
+
def init_weights(m):
|
123 |
+
if isinstance(m, (nn.Conv2d, nn.Conv1d)):
|
124 |
+
nn.init.kaiming_normal_(m.weight)
|
125 |
+
if m.bias is not None:
|
126 |
+
nn.init.constant_(m.bias, 0)
|
127 |
+
elif isinstance(m, nn.BatchNorm2d):
|
128 |
+
nn.init.constant_(m.weight, 1)
|
129 |
+
if m.bias is not None:
|
130 |
+
nn.init.constant_(m.bias, 0)
|
131 |
+
if isinstance(m, nn.Linear):
|
132 |
+
nn.init.kaiming_uniform_(m.weight)
|
133 |
+
if m.bias is not None:
|
134 |
+
nn.init.constant_(m.bias, 0)
|
135 |
+
def init_layer(layer):
|
136 |
+
"""Initialize a Linear or Convolutional layer. """
|
137 |
+
nn.init.xavier_uniform_(layer.weight)
|
138 |
+
if hasattr(layer, 'bias'):
|
139 |
+
if layer.bias is not None:
|
140 |
+
layer.bias.data.fill_(0.)
|
141 |
+
|
142 |
+
|
143 |
+
def init_bn(bn):
|
144 |
+
"""Initialize a Batchnorm layer. """
|
145 |
+
bn.bias.data.fill_(0.)
|
146 |
+
bn.weight.data.fill_(1.)
|
147 |
+
|
148 |
+
class MaxPool(nn.Module):
|
149 |
+
def __init__(self, pooldim=1):
|
150 |
+
super().__init__()
|
151 |
+
self.pooldim = pooldim
|
152 |
+
|
153 |
+
def forward(self, logits, decision):
|
154 |
+
return torch.max(decision, dim=self.pooldim)[0]
|
155 |
+
|
156 |
+
|
157 |
+
class LinearSoftPool(nn.Module):
|
158 |
+
"""LinearSoftPool
|
159 |
+
Linear softmax, takes logits and returns a probability, near to the actual maximum value.
|
160 |
+
Taken from the paper:
|
161 |
+
A Comparison of Five Multiple Instance Learning Pooling Functions for Sound Event Detection with Weak Labeling
|
162 |
+
https://arxiv.org/abs/1810.09050
|
163 |
+
"""
|
164 |
+
def __init__(self, pooldim=1):
|
165 |
+
super().__init__()
|
166 |
+
self.pooldim = pooldim
|
167 |
+
|
168 |
+
def forward(self, logits, time_decision):
|
169 |
+
return (time_decision**2).sum(self.pooldim) / (time_decision.sum(
|
170 |
+
self.pooldim)+1e-7)
|
171 |
+
|
172 |
+
class ConvBlock(nn.Module):
|
173 |
+
def __init__(self, in_channels, out_channels):
|
174 |
+
|
175 |
+
super(ConvBlock, self).__init__()
|
176 |
+
|
177 |
+
self.conv1 = nn.Conv2d(in_channels=in_channels,
|
178 |
+
out_channels=out_channels,
|
179 |
+
kernel_size=(3, 3), stride=(1, 1),
|
180 |
+
padding=(1, 1), bias=False)
|
181 |
+
|
182 |
+
self.conv2 = nn.Conv2d(in_channels=out_channels,
|
183 |
+
out_channels=out_channels,
|
184 |
+
kernel_size=(3, 3), stride=(1, 1),
|
185 |
+
padding=(1, 1), bias=False)
|
186 |
+
|
187 |
+
self.bn1 = nn.BatchNorm2d(out_channels)
|
188 |
+
self.bn2 = nn.BatchNorm2d(out_channels)
|
189 |
+
|
190 |
+
self.init_weight()
|
191 |
+
|
192 |
+
def init_weight(self):
|
193 |
+
init_layer(self.conv1)
|
194 |
+
init_layer(self.conv2)
|
195 |
+
init_bn(self.bn1)
|
196 |
+
init_bn(self.bn2)
|
197 |
+
|
198 |
+
|
199 |
+
def forward(self, input, pool_size=(2, 2), pool_type='avg'):
|
200 |
+
|
201 |
+
x = input
|
202 |
+
x = F.relu_(self.bn1(self.conv1(x)))
|
203 |
+
x = F.relu_(self.bn2(self.conv2(x)))
|
204 |
+
if pool_type == 'max':
|
205 |
+
x = F.max_pool2d(x, kernel_size=pool_size)
|
206 |
+
elif pool_type == 'avg':
|
207 |
+
x = F.avg_pool2d(x, kernel_size=pool_size)
|
208 |
+
elif pool_type == 'avg+max':
|
209 |
+
x1 = F.avg_pool2d(x, kernel_size=pool_size)
|
210 |
+
x2 = F.max_pool2d(x, kernel_size=pool_size)
|
211 |
+
x = x1 + x2
|
212 |
+
else:
|
213 |
+
raise Exception('Incorrect argument!')
|
214 |
+
|
215 |
+
return x
|
216 |
+
|
217 |
+
class ConvBlock_GLU(nn.Module):
|
218 |
+
def __init__(self, in_channels, out_channels,kernel_size=(3,3)):
|
219 |
+
super(ConvBlock_GLU, self).__init__()
|
220 |
+
self.conv1 = nn.Conv2d(in_channels=in_channels,
|
221 |
+
out_channels=out_channels,
|
222 |
+
kernel_size=kernel_size, stride=(1, 1),
|
223 |
+
padding=(1, 1), bias=False)
|
224 |
+
self.bn1 = nn.BatchNorm2d(out_channels)
|
225 |
+
self.sigmoid = nn.Sigmoid()
|
226 |
+
self.init_weight()
|
227 |
+
|
228 |
+
def init_weight(self):
|
229 |
+
init_layer(self.conv1)
|
230 |
+
init_bn(self.bn1)
|
231 |
+
|
232 |
+
def forward(self, input, pool_size=(2, 2), pool_type='avg'):
|
233 |
+
x = input
|
234 |
+
x = self.bn1(self.conv1(x))
|
235 |
+
cnn1 = self.sigmoid(x[:, :x.shape[1]//2, :, :])
|
236 |
+
cnn2 = x[:,x.shape[1]//2:,:,:]
|
237 |
+
x = cnn1*cnn2
|
238 |
+
if pool_type == 'max':
|
239 |
+
x = F.max_pool2d(x, kernel_size=pool_size)
|
240 |
+
elif pool_type == 'avg':
|
241 |
+
x = F.avg_pool2d(x, kernel_size=pool_size)
|
242 |
+
elif pool_type == 'avg+max':
|
243 |
+
x1 = F.avg_pool2d(x, kernel_size=pool_size)
|
244 |
+
x2 = F.max_pool2d(x, kernel_size=pool_size)
|
245 |
+
x = x1 + x2
|
246 |
+
elif pool_type == 'None':
|
247 |
+
pass
|
248 |
+
elif pool_type == 'LP':
|
249 |
+
pass
|
250 |
+
#nn.LPPool2d(4, pool_size)
|
251 |
+
else:
|
252 |
+
raise Exception('Incorrect argument!')
|
253 |
+
return x
|
254 |
+
|
255 |
+
class Mul_scale_GLU(nn.Module):
|
256 |
+
def __init__(self):
|
257 |
+
super(Mul_scale_GLU,self).__init__()
|
258 |
+
self.conv_block1_1 = ConvBlock_GLU(in_channels=1, out_channels=64,kernel_size=(1,1)) # 1*1
|
259 |
+
self.conv_block1_2 = ConvBlock_GLU(in_channels=1, out_channels=64,kernel_size=(3,3)) # 3*3
|
260 |
+
self.conv_block1_3 = ConvBlock_GLU(in_channels=1, out_channels=64,kernel_size=(5,5)) # 5*5
|
261 |
+
self.conv_block2 = ConvBlock_GLU(in_channels=96, out_channels=128*2)
|
262 |
+
# self.conv_block3 = ConvBlock(in_channels=64, out_channels=128)
|
263 |
+
self.conv_block3 = ConvBlock_GLU(in_channels=128, out_channels=128*2)
|
264 |
+
self.conv_block4 = ConvBlock_GLU(in_channels=128, out_channels=256*2)
|
265 |
+
self.conv_block5 = ConvBlock_GLU(in_channels=256, out_channels=256*2)
|
266 |
+
self.conv_block6 = ConvBlock_GLU(in_channels=256, out_channels=512*2)
|
267 |
+
self.conv_block7 = ConvBlock_GLU(in_channels=512, out_channels=512*2)
|
268 |
+
self.padding = nn.ReplicationPad2d((0,1,0,1))
|
269 |
+
|
270 |
+
def forward(self, input, fi=None):
|
271 |
+
"""
|
272 |
+
Input: (batch_size, data_length)"""
|
273 |
+
x1 = self.conv_block1_1(input, pool_size=(2, 2), pool_type='avg')
|
274 |
+
x1 = x1[:,:,:500,:32]
|
275 |
+
#print('x1 ',x1.shape)
|
276 |
+
x2 = self.conv_block1_2(input,pool_size=(2,2),pool_type='avg')
|
277 |
+
#print('x2 ',x2.shape)
|
278 |
+
x3 = self.conv_block1_3(input,pool_size=(2,2),pool_type='avg')
|
279 |
+
x3 = self.padding(x3)
|
280 |
+
#print('x3 ',x3.shape)
|
281 |
+
# assert 1==2
|
282 |
+
x = torch.cat([x1,x2],dim=1)
|
283 |
+
x = torch.cat([x,x3],dim=1)
|
284 |
+
#print('x ',x.shape)
|
285 |
+
x = self.conv_block2(x, pool_size=(2, 2), pool_type='None')
|
286 |
+
x = self.conv_block3(x,pool_size=(2,2),pool_type='avg')
|
287 |
+
x = F.dropout(x, p=0.2, training=self.training) #
|
288 |
+
#print('x2,3 ',x.shape)
|
289 |
+
x = self.conv_block4(x, pool_size=(2, 4), pool_type='None')
|
290 |
+
x = self.conv_block5(x,pool_size=(2,4),pool_type='avg')
|
291 |
+
x = F.dropout(x, p=0.2, training=self.training)
|
292 |
+
#print('x4,5 ',x.shape)
|
293 |
+
|
294 |
+
x = self.conv_block6(x, pool_size=(1, 4), pool_type='None')
|
295 |
+
x = self.conv_block7(x, pool_size=(1, 4), pool_type='avg')
|
296 |
+
x = F.dropout(x, p=0.2, training=self.training)
|
297 |
+
# print('x6,7 ',x.shape)
|
298 |
+
# assert 1==2
|
299 |
+
return x
|
300 |
+
|
301 |
+
class Cnn14(nn.Module):
|
302 |
+
def __init__(self, sample_rate=32000, window_size=1024, hop_size=320, mel_bins=64, fmin=50,
|
303 |
+
fmax=14000, classes_num=527):
|
304 |
+
|
305 |
+
super(Cnn14, self).__init__()
|
306 |
+
|
307 |
+
window = 'hann'
|
308 |
+
center = True
|
309 |
+
pad_mode = 'reflect'
|
310 |
+
ref = 1.0
|
311 |
+
amin = 1e-10
|
312 |
+
top_db = None
|
313 |
+
|
314 |
+
# Spectrogram extractor
|
315 |
+
self.spectrogram_extractor = Spectrogram(n_fft=window_size, hop_length=hop_size,
|
316 |
+
win_length=window_size, window=window, center=center, pad_mode=pad_mode,
|
317 |
+
freeze_parameters=True)
|
318 |
+
|
319 |
+
# Logmel feature extractor
|
320 |
+
self.logmel_extractor = LogmelFilterBank(sr=sample_rate, n_fft=window_size,
|
321 |
+
n_mels=mel_bins, fmin=fmin, fmax=fmax, ref=ref, amin=amin, top_db=top_db,
|
322 |
+
freeze_parameters=True)
|
323 |
+
|
324 |
+
# Spec augmenter
|
325 |
+
self.spec_augmenter = SpecAugmentation(time_drop_width=64, time_stripes_num=2,
|
326 |
+
freq_drop_width=8, freq_stripes_num=2)
|
327 |
+
|
328 |
+
self.bn0 = nn.BatchNorm2d(64)
|
329 |
+
|
330 |
+
self.conv_block1 = ConvBlock(in_channels=1, out_channels=64)
|
331 |
+
self.conv_block2 = ConvBlock(in_channels=64, out_channels=128)
|
332 |
+
self.conv_block3 = ConvBlock(in_channels=128, out_channels=256)
|
333 |
+
self.conv_block4 = ConvBlock(in_channels=256, out_channels=512)
|
334 |
+
self.conv_block5 = ConvBlock(in_channels=512, out_channels=1024)
|
335 |
+
self.conv_block6 = ConvBlock(in_channels=1024, out_channels=2048)
|
336 |
+
|
337 |
+
self.fc1 = nn.Linear(2048, 128, bias=True)
|
338 |
+
self.fc_audioset = nn.Linear(128, classes_num, bias=True)
|
339 |
+
|
340 |
+
self.init_weight()
|
341 |
+
|
342 |
+
def init_weight(self):
|
343 |
+
init_layer(self.fc1)
|
344 |
+
init_layer(self.fc_audioset)
|
345 |
+
|
346 |
+
def forward(self, input_, mixup_lambda=None):
|
347 |
+
"""
|
348 |
+
Input: (batch_size, data_length)"""
|
349 |
+
input_ = input_.unsqueeze(1)
|
350 |
+
x = self.conv_block1(input_, pool_size=(2, 2), pool_type='avg')
|
351 |
+
x = F.dropout(x, p=0.2, training=self.training)
|
352 |
+
x = self.conv_block2(x, pool_size=(2, 2), pool_type='avg')
|
353 |
+
x = F.dropout(x, p=0.2, training=self.training)
|
354 |
+
x = self.conv_block3(x, pool_size=(2, 2), pool_type='avg')
|
355 |
+
x = F.dropout(x, p=0.2, training=self.training)
|
356 |
+
x = self.conv_block4(x, pool_size=(1, 2), pool_type='avg')
|
357 |
+
x = F.dropout(x, p=0.2, training=self.training)
|
358 |
+
x = self.conv_block5(x, pool_size=(1, 2), pool_type='avg')
|
359 |
+
x = F.dropout(x, p=0.2, training=self.training)
|
360 |
+
x = self.conv_block6(x, pool_size=(1, 2), pool_type='avg')
|
361 |
+
x = F.dropout(x, p=0.2, training=self.training)
|
362 |
+
# print(x.shape)
|
363 |
+
# x = torch.mean(x, dim=3)
|
364 |
+
x = x.transpose(1, 2).contiguous().flatten(-2)
|
365 |
+
x = self.fc1(x)
|
366 |
+
# print(x.shape)
|
367 |
+
# assert 1==2
|
368 |
+
# (x1,_) = torch.max(x, dim=2)
|
369 |
+
# x2 = torch.mean(x, dim=2)
|
370 |
+
# x = x1 + x2
|
371 |
+
# x = F.dropout(x, p=0.5, training=self.training)
|
372 |
+
# x = F.relu_(self.fc1(x))
|
373 |
+
# embedding = F.dropout(x, p=0.5, training=self.training)
|
374 |
+
return x
|
375 |
+
|
376 |
+
class Cnn10_fi(nn.Module):
|
377 |
+
def __init__(self):
|
378 |
+
super(Cnn10_fi, self).__init__()
|
379 |
+
self.conv_block1 = ConvBlock(in_channels=1, out_channels=64)
|
380 |
+
self.conv_block2 = ConvBlock(in_channels=64, out_channels=128)
|
381 |
+
self.conv_block3 = ConvBlock(in_channels=128, out_channels=256)
|
382 |
+
self.conv_block4 = ConvBlock(in_channels=256, out_channels=512)
|
383 |
+
|
384 |
+
# self.fc1 = nn.Linear(512, 512, bias=True)
|
385 |
+
# self.fc_audioset = nn.Linear(512, classes_num, bias=True)
|
386 |
+
|
387 |
+
# self.init_weight()
|
388 |
+
|
389 |
+
def forward(self, input, fi=None):
|
390 |
+
"""
|
391 |
+
Input: (batch_size, data_length)"""
|
392 |
+
|
393 |
+
x = self.conv_block1(input, pool_size=(2, 2), pool_type='avg')
|
394 |
+
if fi != None:
|
395 |
+
gamma = fi[:,0].unsqueeze(1).unsqueeze(2).unsqueeze(3).expand_as(x)
|
396 |
+
beta = fi[:,1].unsqueeze(1).unsqueeze(2).unsqueeze(3).expand_as(x)
|
397 |
+
x = (gamma)*x + beta
|
398 |
+
x = F.dropout(x, p=0.2, training=self.training)
|
399 |
+
x = self.conv_block2(x, pool_size=(2, 2), pool_type='avg')
|
400 |
+
if fi != None:
|
401 |
+
gamma = fi[:,0].unsqueeze(1).unsqueeze(2).unsqueeze(3).expand_as(x)
|
402 |
+
beta = fi[:,1].unsqueeze(1).unsqueeze(2).unsqueeze(3).expand_as(x)
|
403 |
+
x = (gamma)*x + beta
|
404 |
+
x = F.dropout(x, p=0.2, training=self.training)
|
405 |
+
x = self.conv_block3(x, pool_size=(2, 4), pool_type='avg')
|
406 |
+
if fi != None:
|
407 |
+
gamma = fi[:,0].unsqueeze(1).unsqueeze(2).unsqueeze(3).expand_as(x)
|
408 |
+
beta = fi[:,1].unsqueeze(1).unsqueeze(2).unsqueeze(3).expand_as(x)
|
409 |
+
x = (gamma)*x + beta
|
410 |
+
x = F.dropout(x, p=0.2, training=self.training)
|
411 |
+
x = self.conv_block4(x, pool_size=(1, 4), pool_type='avg')
|
412 |
+
if fi != None:
|
413 |
+
gamma = fi[:,0].unsqueeze(1).unsqueeze(2).unsqueeze(3).expand_as(x)
|
414 |
+
beta = fi[:,1].unsqueeze(1).unsqueeze(2).unsqueeze(3).expand_as(x)
|
415 |
+
x = (gamma)*x + beta
|
416 |
+
x = F.dropout(x, p=0.2, training=self.training)
|
417 |
+
return x
|
418 |
+
|
419 |
+
class Cnn10_mul_scale(nn.Module):
|
420 |
+
def __init__(self,scale=8):
|
421 |
+
super(Cnn10_mul_scale, self).__init__()
|
422 |
+
self.conv_block1_1 = ConvBlock_GLU(in_channels=1, out_channels=64,kernel_size=(1,1))
|
423 |
+
self.conv_block1_2 = ConvBlock_GLU(in_channels=1, out_channels=64,kernel_size=(3,3))
|
424 |
+
self.conv_block1_3 = ConvBlock_GLU(in_channels=1, out_channels=64,kernel_size=(5,5))
|
425 |
+
self.conv_block2 = ConvBlock(in_channels=96, out_channels=128)
|
426 |
+
self.conv_block3 = ConvBlock(in_channels=128, out_channels=256)
|
427 |
+
self.conv_block4 = ConvBlock(in_channels=256, out_channels=512)
|
428 |
+
self.scale = scale
|
429 |
+
self.padding = nn.ReplicationPad2d((0,1,0,1))
|
430 |
+
def forward(self, input, pool_size=(2, 2), pool_type='avg'):
|
431 |
+
"""
|
432 |
+
Input: (batch_size, data_length)"""
|
433 |
+
if self.scale == 8:
|
434 |
+
pool_size1 = (2,2)
|
435 |
+
pool_size2 = (2,2)
|
436 |
+
pool_size3 = (2,4)
|
437 |
+
pool_size4 = (1,4)
|
438 |
+
elif self.scale == 4:
|
439 |
+
pool_size1 = (2,2)
|
440 |
+
pool_size2 = (2,2)
|
441 |
+
pool_size3 = (1,4)
|
442 |
+
pool_size4 = (1,4)
|
443 |
+
elif self.scale == 2:
|
444 |
+
pool_size1 = (2,2)
|
445 |
+
pool_size2 = (1,2)
|
446 |
+
pool_size3 = (1,4)
|
447 |
+
pool_size4 = (1,4)
|
448 |
+
else:
|
449 |
+
pool_size1 = (1,2)
|
450 |
+
pool_size2 = (1,2)
|
451 |
+
pool_size3 = (1,4)
|
452 |
+
pool_size4 = (1,4)
|
453 |
+
# print('input ',input.shape)
|
454 |
+
x1 = self.conv_block1_1(input, pool_size=pool_size1, pool_type='avg')
|
455 |
+
x1 = x1[:,:,:500,:32]
|
456 |
+
#print('x1 ',x1.shape)
|
457 |
+
x2 = self.conv_block1_2(input, pool_size=pool_size1, pool_type='avg')
|
458 |
+
#print('x2 ',x2.shape)
|
459 |
+
x3 = self.conv_block1_3(input, pool_size=pool_size1, pool_type='avg')
|
460 |
+
x3 = self.padding(x3)
|
461 |
+
#print('x3 ',x3.shape)
|
462 |
+
# assert 1==2
|
463 |
+
m_i = min(x3.shape[2],min(x1.shape[2],x2.shape[2]))
|
464 |
+
#print('m_i ', m_i)
|
465 |
+
x = torch.cat([x1[:,:,:m_i,:],x2[:,:, :m_i,:],x3[:,:, :m_i,:]],dim=1)
|
466 |
+
# x = torch.cat([x,x3],dim=1)
|
467 |
+
|
468 |
+
# x = self.conv_block1(input, pool_size=pool_size1, pool_type='avg')
|
469 |
+
x = F.dropout(x, p=0.2, training=self.training)
|
470 |
+
x = self.conv_block2(x, pool_size=pool_size2, pool_type='avg')
|
471 |
+
x = F.dropout(x, p=0.2, training=self.training)
|
472 |
+
x = self.conv_block3(x, pool_size=pool_size3, pool_type='avg')
|
473 |
+
x = F.dropout(x, p=0.2, training=self.training)
|
474 |
+
x = self.conv_block4(x, pool_size=pool_size4, pool_type='avg')
|
475 |
+
x = F.dropout(x, p=0.2, training=self.training)
|
476 |
+
return x
|
477 |
+
|
478 |
+
|
479 |
+
class Cnn10(nn.Module):
|
480 |
+
def __init__(self,scale=8):
|
481 |
+
super(Cnn10, self).__init__()
|
482 |
+
self.conv_block1 = ConvBlock(in_channels=1, out_channels=64)
|
483 |
+
self.conv_block2 = ConvBlock(in_channels=64, out_channels=128)
|
484 |
+
self.conv_block3 = ConvBlock(in_channels=128, out_channels=256)
|
485 |
+
self.conv_block4 = ConvBlock(in_channels=256, out_channels=512)
|
486 |
+
self.scale = scale
|
487 |
+
def forward(self, input, pool_size=(2, 2), pool_type='avg'):
|
488 |
+
"""
|
489 |
+
Input: (batch_size, data_length)"""
|
490 |
+
if self.scale == 8:
|
491 |
+
pool_size1 = (2,2)
|
492 |
+
pool_size2 = (2,2)
|
493 |
+
pool_size3 = (2,4)
|
494 |
+
pool_size4 = (1,4)
|
495 |
+
elif self.scale == 4:
|
496 |
+
pool_size1 = (2,2)
|
497 |
+
pool_size2 = (2,2)
|
498 |
+
pool_size3 = (1,4)
|
499 |
+
pool_size4 = (1,4)
|
500 |
+
elif self.scale == 2:
|
501 |
+
pool_size1 = (2,2)
|
502 |
+
pool_size2 = (1,2)
|
503 |
+
pool_size3 = (1,4)
|
504 |
+
pool_size4 = (1,4)
|
505 |
+
else:
|
506 |
+
pool_size1 = (1,2)
|
507 |
+
pool_size2 = (1,2)
|
508 |
+
pool_size3 = (1,4)
|
509 |
+
pool_size4 = (1,4)
|
510 |
+
x = self.conv_block1(input, pool_size=pool_size1, pool_type='avg')
|
511 |
+
x = F.dropout(x, p=0.2, training=self.training)
|
512 |
+
x = self.conv_block2(x, pool_size=pool_size2, pool_type='avg')
|
513 |
+
x = F.dropout(x, p=0.2, training=self.training)
|
514 |
+
x = self.conv_block3(x, pool_size=pool_size3, pool_type='avg')
|
515 |
+
x = F.dropout(x, p=0.2, training=self.training)
|
516 |
+
x = self.conv_block4(x, pool_size=pool_size4, pool_type='avg')
|
517 |
+
x = F.dropout(x, p=0.2, training=self.training)
|
518 |
+
return x
|
519 |
+
|
520 |
+
class MeanPool(nn.Module):
|
521 |
+
def __init__(self, pooldim=1):
|
522 |
+
super().__init__()
|
523 |
+
self.pooldim = pooldim
|
524 |
+
|
525 |
+
def forward(self, logits, decision):
|
526 |
+
return torch.mean(decision, dim=self.pooldim)
|
527 |
+
|
528 |
+
class ResPool(nn.Module):
|
529 |
+
def __init__(self, pooldim=1):
|
530 |
+
super().__init__()
|
531 |
+
self.pooldim = pooldim
|
532 |
+
self.linPool = LinearSoftPool(pooldim=1)
|
533 |
+
|
534 |
+
class AutoExpPool(nn.Module):
|
535 |
+
def __init__(self, outputdim=10, pooldim=1):
|
536 |
+
super().__init__()
|
537 |
+
self.outputdim = outputdim
|
538 |
+
self.alpha = nn.Parameter(torch.full((outputdim, ), 1))
|
539 |
+
self.pooldim = pooldim
|
540 |
+
|
541 |
+
def forward(self, logits, decision):
|
542 |
+
scaled = self.alpha * decision # \alpha * P(Y|x) in the paper
|
543 |
+
return (logits * torch.exp(scaled)).sum(
|
544 |
+
self.pooldim) / torch.exp(scaled).sum(self.pooldim)
|
545 |
+
|
546 |
+
|
547 |
+
class SoftPool(nn.Module):
|
548 |
+
def __init__(self, T=1, pooldim=1):
|
549 |
+
super().__init__()
|
550 |
+
self.pooldim = pooldim
|
551 |
+
self.T = T
|
552 |
+
|
553 |
+
def forward(self, logits, decision):
|
554 |
+
w = torch.softmax(decision / self.T, dim=self.pooldim)
|
555 |
+
return torch.sum(decision * w, dim=self.pooldim)
|
556 |
+
|
557 |
+
|
558 |
+
class AutoPool(nn.Module):
|
559 |
+
"""docstring for AutoPool"""
|
560 |
+
def __init__(self, outputdim=10, pooldim=1):
|
561 |
+
super().__init__()
|
562 |
+
self.outputdim = outputdim
|
563 |
+
self.alpha = nn.Parameter(torch.ones(outputdim))
|
564 |
+
self.dim = pooldim
|
565 |
+
|
566 |
+
def forward(self, logits, decision):
|
567 |
+
scaled = self.alpha * decision # \alpha * P(Y|x) in the paper
|
568 |
+
weight = torch.softmax(scaled, dim=self.dim)
|
569 |
+
return torch.sum(decision * weight, dim=self.dim) # B x C
|
570 |
+
|
571 |
+
|
572 |
+
class ExtAttentionPool(nn.Module):
|
573 |
+
def __init__(self, inputdim, outputdim=10, pooldim=1, **kwargs):
|
574 |
+
super().__init__()
|
575 |
+
self.inputdim = inputdim
|
576 |
+
self.outputdim = outputdim
|
577 |
+
self.pooldim = pooldim
|
578 |
+
self.attention = nn.Linear(inputdim, outputdim)
|
579 |
+
nn.init.zeros_(self.attention.weight)
|
580 |
+
nn.init.zeros_(self.attention.bias)
|
581 |
+
self.activ = nn.Softmax(dim=self.pooldim)
|
582 |
+
|
583 |
+
def forward(self, logits, decision):
|
584 |
+
# Logits of shape (B, T, D), decision of shape (B, T, C)
|
585 |
+
w_x = self.activ(self.attention(logits) / self.outputdim)
|
586 |
+
h = (logits.permute(0, 2, 1).contiguous().unsqueeze(-2) *
|
587 |
+
w_x.unsqueeze(-1)).flatten(-2).contiguous()
|
588 |
+
return torch.sum(h, self.pooldim)
|
589 |
+
|
590 |
+
|
591 |
+
class AttentionPool(nn.Module):
|
592 |
+
"""docstring for AttentionPool"""
|
593 |
+
def __init__(self, inputdim, outputdim=10, pooldim=1, **kwargs):
|
594 |
+
super().__init__()
|
595 |
+
self.inputdim = inputdim
|
596 |
+
self.outputdim = outputdim
|
597 |
+
self.pooldim = pooldim
|
598 |
+
self.transform = nn.Linear(inputdim, outputdim)
|
599 |
+
self.activ = nn.Softmax(dim=self.pooldim)
|
600 |
+
self.eps = 1e-7
|
601 |
+
|
602 |
+
def forward(self, logits, decision):
|
603 |
+
# Input is (B, T, D)
|
604 |
+
# B, T , D
|
605 |
+
w = self.activ(torch.clamp(self.transform(logits), -15, 15))
|
606 |
+
detect = (decision * w).sum(
|
607 |
+
self.pooldim) / (w.sum(self.pooldim) + self.eps)
|
608 |
+
# B, T, D
|
609 |
+
return detect
|
610 |
+
|
611 |
+
class Block2D(nn.Module):
|
612 |
+
def __init__(self, cin, cout, kernel_size=3, padding=1):
|
613 |
+
super().__init__()
|
614 |
+
self.block = nn.Sequential(
|
615 |
+
nn.BatchNorm2d(cin),
|
616 |
+
nn.Conv2d(cin,
|
617 |
+
cout,
|
618 |
+
kernel_size=kernel_size,
|
619 |
+
padding=padding,
|
620 |
+
bias=False),
|
621 |
+
nn.LeakyReLU(inplace=True, negative_slope=0.1))
|
622 |
+
|
623 |
+
def forward(self, x):
|
624 |
+
return self.block(x)
|
625 |
+
|
626 |
+
class AudioCNN(nn.Module):
|
627 |
+
def __init__(self, classes_num):
|
628 |
+
super(AudioCNN, self).__init__()
|
629 |
+
self.conv_block1 = ConvBlock(in_channels=1, out_channels=64)
|
630 |
+
self.conv_block2 = ConvBlock(in_channels=64, out_channels=128)
|
631 |
+
self.conv_block3 = ConvBlock(in_channels=128, out_channels=256)
|
632 |
+
self.conv_block4 = ConvBlock(in_channels=256, out_channels=512)
|
633 |
+
self.fc1 = nn.Linear(512,128,bias=True)
|
634 |
+
self.fc = nn.Linear(128, classes_num, bias=True)
|
635 |
+
self.init_weights()
|
636 |
+
|
637 |
+
def init_weights(self):
|
638 |
+
init_layer(self.fc)
|
639 |
+
|
640 |
+
def forward(self, input):
|
641 |
+
'''
|
642 |
+
Input: (batch_size, times_steps, freq_bins)'''
|
643 |
+
# [128, 801, 168] --> [128,1,801,168]
|
644 |
+
x = input[:, None, :, :]
|
645 |
+
'''(batch_size, 1, times_steps, freq_bins)'''
|
646 |
+
x = self.conv_block1(x, pool_size=(2, 2), pool_type='avg') # 128,64,400,84
|
647 |
+
x = self.conv_block2(x, pool_size=(2, 2), pool_type='avg') # 128,128,200,42
|
648 |
+
x = self.conv_block3(x, pool_size=(2, 2), pool_type='avg') # 128,256,100,21
|
649 |
+
x = self.conv_block4(x, pool_size=(2, 2), pool_type='avg') # 128,512,50,10
|
650 |
+
'''(batch_size, feature_maps, time_steps, freq_bins)'''
|
651 |
+
x = torch.mean(x, dim=3) # (batch_size, feature_maps, time_stpes) # 128,512,50
|
652 |
+
(x, _) = torch.max(x, dim=2) # (batch_size, feature_maps) 128,512
|
653 |
+
x = self.fc1(x) # 128,128
|
654 |
+
output = self.fc(x) # 128,10
|
655 |
+
return x,output
|
656 |
+
|
657 |
+
def extract(self,input):
|
658 |
+
'''Input: (batch_size, times_steps, freq_bins)'''
|
659 |
+
x = input[:, None, :, :]
|
660 |
+
x = self.conv_block1(x, pool_size=(2, 2), pool_type='avg')
|
661 |
+
x = self.conv_block2(x, pool_size=(2, 2), pool_type='avg')
|
662 |
+
x = self.conv_block3(x, pool_size=(2, 2), pool_type='avg')
|
663 |
+
x = self.conv_block4(x, pool_size=(2, 2), pool_type='avg')
|
664 |
+
'''(batch_size, feature_maps, time_steps, freq_bins)'''
|
665 |
+
x = torch.mean(x, dim=3) # (batch_size, feature_maps, time_stpes)
|
666 |
+
(x, _) = torch.max(x, dim=2) # (batch_size, feature_maps)
|
667 |
+
x = self.fc1(x) # 128,128
|
668 |
+
return x
|
669 |
+
|
670 |
+
def parse_poolingfunction(poolingfunction_name='mean', **kwargs):
|
671 |
+
"""parse_poolingfunction
|
672 |
+
A heler function to parse any temporal pooling
|
673 |
+
Pooling is done on dimension 1
|
674 |
+
:param poolingfunction_name:
|
675 |
+
:param **kwargs:
|
676 |
+
"""
|
677 |
+
poolingfunction_name = poolingfunction_name.lower()
|
678 |
+
if poolingfunction_name == 'mean':
|
679 |
+
return MeanPool(pooldim=1)
|
680 |
+
elif poolingfunction_name == 'max':
|
681 |
+
return MaxPool(pooldim=1)
|
682 |
+
elif poolingfunction_name == 'linear':
|
683 |
+
return LinearSoftPool(pooldim=1)
|
684 |
+
elif poolingfunction_name == 'expalpha':
|
685 |
+
return AutoExpPool(outputdim=kwargs['outputdim'], pooldim=1)
|
686 |
+
|
687 |
+
elif poolingfunction_name == 'soft':
|
688 |
+
return SoftPool(pooldim=1)
|
689 |
+
elif poolingfunction_name == 'auto':
|
690 |
+
return AutoPool(outputdim=kwargs['outputdim'])
|
691 |
+
elif poolingfunction_name == 'attention':
|
692 |
+
return AttentionPool(inputdim=kwargs['inputdim'],
|
693 |
+
outputdim=kwargs['outputdim'])
|
694 |
+
class conv1d(nn.Module):
|
695 |
+
def __init__(self, nin, nout, kernel_size=3, stride=1, padding='VALID', dilation=1):
|
696 |
+
super(conv1d, self).__init__()
|
697 |
+
if padding == 'VALID':
|
698 |
+
dconv_pad = 0
|
699 |
+
elif padding == 'SAME':
|
700 |
+
dconv_pad = dilation * ((kernel_size - 1) // 2)
|
701 |
+
else:
|
702 |
+
raise ValueError("Padding Mode Error!")
|
703 |
+
self.conv = nn.Conv1d(nin, nout, kernel_size=kernel_size, stride=stride, padding=dconv_pad)
|
704 |
+
self.act = nn.ReLU()
|
705 |
+
self.init_layer(self.conv)
|
706 |
+
|
707 |
+
def init_layer(self, layer, nonlinearity='relu'):
|
708 |
+
"""Initialize a Linear or Convolutional layer. """
|
709 |
+
nn.init.kaiming_normal_(layer.weight, nonlinearity=nonlinearity)
|
710 |
+
nn.init.constant_(layer.bias, 0.1)
|
711 |
+
|
712 |
+
def forward(self, x):
|
713 |
+
out = self.act(self.conv(x))
|
714 |
+
return out
|
715 |
+
|
716 |
+
class Atten_1(nn.Module):
|
717 |
+
def __init__(self, input_dim, context=2, dropout_rate=0.2):
|
718 |
+
super(Atten_1, self).__init__()
|
719 |
+
self._matrix_k = nn.Linear(input_dim, input_dim // 4)
|
720 |
+
self._matrix_q = nn.Linear(input_dim, input_dim // 4)
|
721 |
+
self.relu = nn.ReLU()
|
722 |
+
self.context = context
|
723 |
+
self._dropout_layer = nn.Dropout(dropout_rate)
|
724 |
+
self.init_layer(self._matrix_k)
|
725 |
+
self.init_layer(self._matrix_q)
|
726 |
+
|
727 |
+
def init_layer(self, layer, nonlinearity='leaky_relu'):
|
728 |
+
"""Initialize a Linear or Convolutional layer. """
|
729 |
+
nn.init.kaiming_uniform_(layer.weight, nonlinearity=nonlinearity)
|
730 |
+
if hasattr(layer, 'bias'):
|
731 |
+
if layer.bias is not None:
|
732 |
+
layer.bias.data.fill_(0.)
|
733 |
+
|
734 |
+
def forward(self, input_x):
|
735 |
+
k_x = input_x
|
736 |
+
k_x = self.relu(self._matrix_k(k_x))
|
737 |
+
k_x = self._dropout_layer(k_x)
|
738 |
+
# print('k_x ',k_x.shape)
|
739 |
+
q_x = input_x[:, self.context, :]
|
740 |
+
# print('q_x ',q_x.shape)
|
741 |
+
q_x = q_x[:, None, :]
|
742 |
+
# print('q_x1 ',q_x.shape)
|
743 |
+
q_x = self.relu(self._matrix_q(q_x))
|
744 |
+
q_x = self._dropout_layer(q_x)
|
745 |
+
# print('q_x2 ',q_x.shape)
|
746 |
+
x_ = torch.matmul(k_x, q_x.transpose(-2, -1) / math.sqrt(k_x.size(-1)))
|
747 |
+
# print('x_ ',x_.shape)
|
748 |
+
x_ = x_.squeeze(2)
|
749 |
+
alpha = F.softmax(x_, dim=-1)
|
750 |
+
att_ = alpha
|
751 |
+
# print('alpha ',alpha)
|
752 |
+
alpha = alpha.unsqueeze(2).repeat(1,1,input_x.shape[2])
|
753 |
+
# print('alpha ',alpha)
|
754 |
+
# alpha = alpha.view(alpha.size(0), alpha.size(1), alpha.size(2), 1)
|
755 |
+
out = alpha * input_x
|
756 |
+
# print('out ', out.shape)
|
757 |
+
# out = out.mean(2)
|
758 |
+
out = out.mean(1)
|
759 |
+
# print('out ',out.shape)
|
760 |
+
# assert 1==2
|
761 |
+
#y = alpha * input_x
|
762 |
+
#return y, att_
|
763 |
+
out = input_x[:, self.context, :] + out
|
764 |
+
return out
|
765 |
+
|
766 |
+
class Fusion(nn.Module):
|
767 |
+
def __init__(self, inputdim, inputdim2, n_fac):
|
768 |
+
super().__init__()
|
769 |
+
self.fuse_layer1 = conv1d(inputdim, inputdim2*n_fac,1)
|
770 |
+
self.fuse_layer2 = conv1d(inputdim2, inputdim2*n_fac,1)
|
771 |
+
self.avg_pool = nn.AvgPool1d(n_fac, stride=n_fac) # 沿着最后一个维度进行pooling
|
772 |
+
|
773 |
+
def forward(self,embedding,mix_embed):
|
774 |
+
embedding = embedding.permute(0,2,1)
|
775 |
+
fuse1_out = self.fuse_layer1(embedding) # [2, 501, 2560] ,512*5, 1D卷积融合,spk_embeding ,扩大其维度
|
776 |
+
fuse1_out = fuse1_out.permute(0,2,1)
|
777 |
+
|
778 |
+
mix_embed = mix_embed.permute(0,2,1)
|
779 |
+
fuse2_out = self.fuse_layer2(mix_embed) # [2, 501, 2560] ,512*5, 1D卷积融合,spk_embeding ,扩大其维度
|
780 |
+
fuse2_out = fuse2_out.permute(0,2,1)
|
781 |
+
as_embs = torch.mul(fuse1_out, fuse2_out) # 相乘 [2, 501, 2560]
|
782 |
+
# (10, 501, 512)
|
783 |
+
as_embs = self.avg_pool(as_embs) # [2, 501, 512] 相当于 2560//5
|
784 |
+
return as_embs
|
785 |
+
|
786 |
+
class CDur_fusion(nn.Module):
|
787 |
+
def __init__(self, inputdim, outputdim, **kwargs):
|
788 |
+
super().__init__()
|
789 |
+
self.features = nn.Sequential(
|
790 |
+
Block2D(1, 32),
|
791 |
+
nn.LPPool2d(4, (2, 4)),
|
792 |
+
Block2D(32, 128),
|
793 |
+
Block2D(128, 128),
|
794 |
+
nn.LPPool2d(4, (2, 4)),
|
795 |
+
Block2D(128, 128),
|
796 |
+
Block2D(128, 128),
|
797 |
+
nn.LPPool2d(4, (1, 4)),
|
798 |
+
nn.Dropout(0.3),
|
799 |
+
)
|
800 |
+
with torch.no_grad():
|
801 |
+
rnn_input_dim = self.features(torch.randn(1, 1, 500,inputdim)).shape
|
802 |
+
rnn_input_dim = rnn_input_dim[1] * rnn_input_dim[-1]
|
803 |
+
|
804 |
+
self.gru = nn.GRU(128, 128, bidirectional=True, batch_first=True)
|
805 |
+
self.fusion = Fusion(128,2)
|
806 |
+
self.fc = nn.Linear(256,256)
|
807 |
+
self.outputlayer = nn.Linear(256, outputdim)
|
808 |
+
self.features.apply(init_weights)
|
809 |
+
self.outputlayer.apply(init_weights)
|
810 |
+
|
811 |
+
def forward(self, x, embedding): #
|
812 |
+
batch, time, dim = x.shape
|
813 |
+
x = x.unsqueeze(1) # (b,1,t,d)
|
814 |
+
x = self.features(x) #
|
815 |
+
x = x.transpose(1, 2).contiguous().flatten(-2) # 重新拷贝一份x,之后推平-2:-1之间的维度 # (b,125,128)
|
816 |
+
embedding = embedding.unsqueeze(1)
|
817 |
+
embedding = embedding.repeat(1, x.shape[1], 1)
|
818 |
+
x = self.fusion(embedding,x)
|
819 |
+
#x = torch.cat((x, embedding), dim=2) # [B, T, 128 + emb_dim]
|
820 |
+
if not hasattr(self, '_flattened'):
|
821 |
+
self.gru.flatten_parameters()
|
822 |
+
x, _ = self.gru(x) # x torch.Size([16, 125, 256])
|
823 |
+
x = self.fc(x)
|
824 |
+
decision_time = torch.softmax(self.outputlayer(x),dim=2) # x torch.Size([16, 125, 2])
|
825 |
+
decision_up = torch.nn.functional.interpolate(
|
826 |
+
decision_time.transpose(1, 2), # [16, 2, 125]
|
827 |
+
time, # 501
|
828 |
+
mode='linear',
|
829 |
+
align_corners=False).transpose(1, 2) # 从125插值回 501 ?--> (16,501,2)
|
830 |
+
return decision_time[:,:,0],decision_up
|
831 |
+
|
832 |
+
class CDur(nn.Module):
|
833 |
+
def __init__(self, inputdim, outputdim,time_resolution, **kwargs):
|
834 |
+
super().__init__()
|
835 |
+
self.features = nn.Sequential(
|
836 |
+
Block2D(1, 32),
|
837 |
+
nn.LPPool2d(4, (2, 4)),
|
838 |
+
Block2D(32, 128),
|
839 |
+
Block2D(128, 128),
|
840 |
+
nn.LPPool2d(4, (2, 4)),
|
841 |
+
Block2D(128, 128),
|
842 |
+
Block2D(128, 128),
|
843 |
+
nn.LPPool2d(4, (2, 4)),
|
844 |
+
nn.Dropout(0.3),
|
845 |
+
)
|
846 |
+
with torch.no_grad():
|
847 |
+
rnn_input_dim = self.features(torch.randn(1, 1, 500,inputdim)).shape
|
848 |
+
rnn_input_dim = rnn_input_dim[1] * rnn_input_dim[-1]
|
849 |
+
|
850 |
+
self.gru = nn.GRU(256, 256, bidirectional=True, batch_first=True)
|
851 |
+
self.fc = nn.Linear(512,256)
|
852 |
+
self.outputlayer = nn.Linear(256, outputdim)
|
853 |
+
self.features.apply(init_weights)
|
854 |
+
self.outputlayer.apply(init_weights)
|
855 |
+
|
856 |
+
def forward(self, x, embedding,one_hot=None): #
|
857 |
+
batch, time, dim = x.shape
|
858 |
+
x = x.unsqueeze(1) # (b,1,t,d)
|
859 |
+
x = self.features(x) #
|
860 |
+
x = x.transpose(1, 2).contiguous().flatten(-2) # 重新拷贝一份x,之后推平-2:-1之间的维度 # (b,125,128)
|
861 |
+
embedding = embedding.unsqueeze(1)
|
862 |
+
embedding = embedding.repeat(1, x.shape[1], 1)
|
863 |
+
x = torch.cat((x, embedding), dim=2) # [B, T, 128 + emb_dim]
|
864 |
+
if not hasattr(self, '_flattened'):
|
865 |
+
self.gru.flatten_parameters()
|
866 |
+
x, _ = self.gru(x) # x torch.Size([16, 125, 256])
|
867 |
+
x = self.fc(x)
|
868 |
+
decision_time = torch.softmax(self.outputlayer(x),dim=2) # x torch.Size([16, 125, 2])
|
869 |
+
decision_up = torch.nn.functional.interpolate(
|
870 |
+
decision_time.transpose(1, 2), # [16, 2, 125]
|
871 |
+
time, # 501
|
872 |
+
mode='linear',
|
873 |
+
align_corners=False).transpose(1, 2) # 从125插值回 501 ?--> (16,501,2)
|
874 |
+
return decision_time[:,:,0],decision_up
|
875 |
+
|
876 |
+
class CDur_big(nn.Module):
|
877 |
+
def __init__(self, inputdim, outputdim, **kwargs):
|
878 |
+
super().__init__()
|
879 |
+
self.features = nn.Sequential(
|
880 |
+
Block2D(1, 64),
|
881 |
+
Block2D(64, 64),
|
882 |
+
nn.LPPool2d(4, (2, 2)),
|
883 |
+
Block2D(64, 128),
|
884 |
+
Block2D(128, 128),
|
885 |
+
nn.LPPool2d(4, (2, 2)),
|
886 |
+
Block2D(128, 256),
|
887 |
+
Block2D(256, 256),
|
888 |
+
nn.LPPool2d(4, (2, 4)),
|
889 |
+
Block2D(256, 512),
|
890 |
+
Block2D(512, 512),
|
891 |
+
nn.LPPool2d(4, (1, 4)),
|
892 |
+
nn.Dropout(0.3),)
|
893 |
+
with torch.no_grad():
|
894 |
+
rnn_input_dim = self.features(torch.randn(1, 1, 500,inputdim)).shape
|
895 |
+
rnn_input_dim = rnn_input_dim[1] * rnn_input_dim[-1]
|
896 |
+
self.gru = nn.GRU(640, 512, bidirectional=True, batch_first=True)
|
897 |
+
self.fc = nn.Linear(1024,256)
|
898 |
+
self.outputlayer = nn.Linear(256, outputdim)
|
899 |
+
self.features.apply(init_weights)
|
900 |
+
self.outputlayer.apply(init_weights)
|
901 |
+
|
902 |
+
def forward(self, x, embedding): #
|
903 |
+
batch, time, dim = x.shape
|
904 |
+
x = x.unsqueeze(1) # (b,1,t,d)
|
905 |
+
x = self.features(x) #
|
906 |
+
x = x.transpose(1, 2).contiguous().flatten(-2) # 重新拷贝一份x,之后推平-2:-1之间的维度 # (b,125,512)
|
907 |
+
embedding = embedding.unsqueeze(1)
|
908 |
+
embedding = embedding.repeat(1, x.shape[1], 1)
|
909 |
+
x = torch.cat((x, embedding), dim=2) # [B, T, 128 + emb_dim]
|
910 |
+
if not hasattr(self, '_flattened'):
|
911 |
+
self.gru.flatten_parameters()
|
912 |
+
x, _ = self.gru(x) # x torch.Size([16, 125, 256])
|
913 |
+
x = self.fc(x)
|
914 |
+
decision_time = torch.softmax(self.outputlayer(x),dim=2) # x torch.Size([16, 125, 2])
|
915 |
+
decision_up = torch.nn.functional.interpolate(
|
916 |
+
decision_time.transpose(1, 2), # [16, 2, 125]
|
917 |
+
time, # 501
|
918 |
+
mode='linear',
|
919 |
+
align_corners=False).transpose(1, 2) # 从125插值回 501 ?--> (16,501,2)
|
920 |
+
return decision_time[:,:,0],decision_up
|
921 |
+
|
922 |
+
class CDur_GLU(nn.Module):
|
923 |
+
def __init__(self, inputdim, outputdim, **kwargs):
|
924 |
+
super().__init__()
|
925 |
+
self.features = Mul_scale_GLU()
|
926 |
+
# with torch.no_grad():
|
927 |
+
# rnn_input_dim = self.features(torch.randn(1, 1, 500,inputdim)).shape
|
928 |
+
# rnn_input_dim = rnn_input_dim[1] * rnn_input_dim[-1]
|
929 |
+
self.gru = nn.GRU(640, 512,1, bidirectional=True, batch_first=True) # previous is 640
|
930 |
+
# self.gru = LSTMModel(640, 512,1)
|
931 |
+
self.fc = nn.Linear(1024,256)
|
932 |
+
self.outputlayer = nn.Linear(256, outputdim)
|
933 |
+
# self.features.apply(init_weights)
|
934 |
+
self.outputlayer.apply(init_weights)
|
935 |
+
|
936 |
+
def forward(self, x, embedding,one_hot=None): #
|
937 |
+
batch, time, dim = x.shape
|
938 |
+
x = x.unsqueeze(1) # (b,1,t,d)
|
939 |
+
x = self.features(x) #
|
940 |
+
x = x.transpose(1, 2).contiguous().flatten(-2) # 重新拷贝一份x,之后推平-2:-1之间的维度 # (b,125,512)
|
941 |
+
# print('x ',x.shape)
|
942 |
+
# assert 1==2
|
943 |
+
embedding = embedding.unsqueeze(1)
|
944 |
+
embedding = embedding.repeat(1, x.shape[1], 1)
|
945 |
+
|
946 |
+
x = torch.cat((x, embedding), dim=2) # [B, T, 128 + emb_dim]
|
947 |
+
if not hasattr(self, '_flattened'):
|
948 |
+
self.gru.flatten_parameters()
|
949 |
+
x, _ = self.gru(x) # x torch.Size([16, 125, 256])
|
950 |
+
# x = self.gru(x) # x torch.Size([16, 125, 256])
|
951 |
+
x = self.fc(x)
|
952 |
+
decision_time = torch.softmax(self.outputlayer(x),dim=2) # x torch.Size([16, 125, 2])
|
953 |
+
decision_up = torch.nn.functional.interpolate(
|
954 |
+
decision_time.transpose(1, 2), # [16, 2, 125]
|
955 |
+
time, # 501
|
956 |
+
mode='linear',
|
957 |
+
align_corners=False).transpose(1, 2) # 从125插值回 501 ?--> (16,501,2)
|
958 |
+
return decision_time[:,:,0],decision_up
|
959 |
+
|
960 |
+
class CDur_CNN14(nn.Module):
|
961 |
+
def __init__(self, inputdim, outputdim,time_resolution,**kwargs):
|
962 |
+
super().__init__()
|
963 |
+
if time_resolution==125:
|
964 |
+
self.features = Cnn10(8)
|
965 |
+
elif time_resolution == 250:
|
966 |
+
#print('time_resolution ',time_resolution)
|
967 |
+
self.features = Cnn10(4)
|
968 |
+
elif time_resolution == 500:
|
969 |
+
self.features = Cnn10(2)
|
970 |
+
else:
|
971 |
+
self.features = Cnn10(0)
|
972 |
+
with torch.no_grad():
|
973 |
+
rnn_input_dim = self.features(torch.randn(1, 1, 500,inputdim)).shape
|
974 |
+
rnn_input_dim = rnn_input_dim[1] * rnn_input_dim[-1]
|
975 |
+
# self.features = Cnn10()
|
976 |
+
self.gru = nn.GRU(640, 512, bidirectional=True, batch_first=True)
|
977 |
+
# self.gru = LSTMModel(640, 512,1)
|
978 |
+
self.fc = nn.Linear(1024,256)
|
979 |
+
self.outputlayer = nn.Linear(256, outputdim)
|
980 |
+
# self.features.apply(init_weights)
|
981 |
+
self.outputlayer.apply(init_weights)
|
982 |
+
|
983 |
+
def forward(self, x, embedding,one_hot=None):
|
984 |
+
batch, time, dim = x.shape
|
985 |
+
x = x.unsqueeze(1) # (b,1,t,d)
|
986 |
+
x = self.features(x) #
|
987 |
+
x = x.transpose(1, 2).contiguous().flatten(-2) # 重新拷贝一份x,之后推平-2:-1之间的维度 # (b,125,512)
|
988 |
+
# print('x ',x.shape)
|
989 |
+
# assert 1==2
|
990 |
+
embedding = embedding.unsqueeze(1)
|
991 |
+
embedding = embedding.repeat(1, x.shape[1], 1)
|
992 |
+
x = torch.cat((x, embedding), dim=2) # [B, T, 128 + emb_dim]
|
993 |
+
if not hasattr(self, '_flattened'):
|
994 |
+
self.gru.flatten_parameters()
|
995 |
+
x, _ = self.gru(x) # x torch.Size([16, 125, 256])
|
996 |
+
# x = self.gru(x) # x torch.Size([16, 125, 256])
|
997 |
+
x = self.fc(x)
|
998 |
+
decision_time = torch.softmax(self.outputlayer(x),dim=2) # x torch.Size([16, 125, 2])
|
999 |
+
decision_up = torch.nn.functional.interpolate(
|
1000 |
+
decision_time.transpose(1, 2), # [16, 2, 125]
|
1001 |
+
time, # 501
|
1002 |
+
mode='linear',
|
1003 |
+
align_corners=False).transpose(1, 2) # 从125插值回 501 ?--> (16,501,2)
|
1004 |
+
return decision_time[:,:,0],decision_up
|
1005 |
+
|
1006 |
+
class CDur_CNN_mul_scale(nn.Module):
|
1007 |
+
def __init__(self, inputdim, outputdim,time_resolution,**kwargs):
|
1008 |
+
super().__init__()
|
1009 |
+
if time_resolution==125:
|
1010 |
+
self.features = Cnn10_mul_scale(8)
|
1011 |
+
elif time_resolution == 250:
|
1012 |
+
#print('time_resolution ',time_resolution)
|
1013 |
+
self.features = Cnn10_mul_scale(4)
|
1014 |
+
elif time_resolution == 500:
|
1015 |
+
self.features = Cnn10_mul_scale(2)
|
1016 |
+
else:
|
1017 |
+
self.features = Cnn10_mul_scale(0)
|
1018 |
+
# with torch.no_grad():
|
1019 |
+
# rnn_input_dim = self.features(torch.randn(1, 1, 500,inputdim)).shape
|
1020 |
+
# rnn_input_dim = rnn_input_dim[1] * rnn_input_dim[-1]
|
1021 |
+
# self.features = Cnn10()
|
1022 |
+
self.gru = nn.GRU(640, 512, bidirectional=True, batch_first=True)
|
1023 |
+
# self.gru = LSTMModel(640, 512,1)
|
1024 |
+
self.fc = nn.Linear(1024,256)
|
1025 |
+
self.outputlayer = nn.Linear(256, outputdim)
|
1026 |
+
# self.features.apply(init_weights)
|
1027 |
+
self.outputlayer.apply(init_weights)
|
1028 |
+
|
1029 |
+
def forward(self, x, embedding,one_hot=None):
|
1030 |
+
# print('x ',x.shape)
|
1031 |
+
# assert 1==2
|
1032 |
+
batch, time, dim = x.shape
|
1033 |
+
x = x.unsqueeze(1) # (b,1,t,d)
|
1034 |
+
x = self.features(x) #
|
1035 |
+
x = x.transpose(1, 2).contiguous().flatten(-2) # 重新拷贝一份x,之后推平-2:-1之间的维度 # (b,125,512)
|
1036 |
+
# print('x ',x.shape)
|
1037 |
+
# assert 1==2
|
1038 |
+
embedding = embedding.unsqueeze(1)
|
1039 |
+
embedding = embedding.repeat(1, x.shape[1], 1)
|
1040 |
+
x = torch.cat((x, embedding), dim=2) # [B, T, 128 + emb_dim]
|
1041 |
+
if not hasattr(self, '_flattened'):
|
1042 |
+
self.gru.flatten_parameters()
|
1043 |
+
x, _ = self.gru(x) # x torch.Size([16, 125, 256])
|
1044 |
+
# x = self.gru(x) # x torch.Size([16, 125, 256])
|
1045 |
+
x = self.fc(x)
|
1046 |
+
decision_time = torch.softmax(self.outputlayer(x),dim=2) # x torch.Size([16, 125, 2])
|
1047 |
+
decision_up = torch.nn.functional.interpolate(
|
1048 |
+
decision_time.transpose(1, 2), # [16, 2, 125]
|
1049 |
+
time, # 501
|
1050 |
+
mode='linear',
|
1051 |
+
align_corners=False).transpose(1, 2) # 从125插值回 501 ?--> (16,501,2)
|
1052 |
+
return decision_time[:,:,0],decision_up
|
1053 |
+
|
1054 |
+
class CDur_CNN_mul_scale_fusion(nn.Module):
|
1055 |
+
def __init__(self, inputdim, outputdim, time_resolution,**kwargs):
|
1056 |
+
super().__init__()
|
1057 |
+
if time_resolution==125:
|
1058 |
+
self.features = Cnn10_mul_scale(8)
|
1059 |
+
elif time_resolution == 250:
|
1060 |
+
#print('time_resolution ',time_resolution)
|
1061 |
+
self.features = Cnn10_mul_scale(4)
|
1062 |
+
elif time_resolution == 500:
|
1063 |
+
self.features = Cnn10_mul_scale(2)
|
1064 |
+
else:
|
1065 |
+
self.features = Cnn10_mul_scale(0)
|
1066 |
+
# with torch.no_grad():
|
1067 |
+
# rnn_input_dim = self.features(torch.randn(1, 1, 500,inputdim)).shape
|
1068 |
+
# rnn_input_dim = rnn_input_dim[1] * rnn_input_dim[-1]
|
1069 |
+
# self.features = Cnn10()
|
1070 |
+
self.gru = nn.GRU(512, 512, bidirectional=True, batch_first=True)
|
1071 |
+
# self.gru = LSTMModel(640, 512,1)
|
1072 |
+
self.fc = nn.Linear(1024,256)
|
1073 |
+
self.fusion = Fusion(128,512,2)
|
1074 |
+
self.outputlayer = nn.Linear(256, outputdim)
|
1075 |
+
# self.features.apply(init_weights)
|
1076 |
+
self.outputlayer.apply(init_weights)
|
1077 |
+
|
1078 |
+
def forward(self, x, embedding,one_hot=None):
|
1079 |
+
# print('x ',x.shape)
|
1080 |
+
# assert 1==2
|
1081 |
+
batch, time, dim = x.shape
|
1082 |
+
x = x.unsqueeze(1) # (b,1,t,d)
|
1083 |
+
x = self.features(x) #
|
1084 |
+
x = x.transpose(1, 2).contiguous().flatten(-2) # 重新拷贝一份x,之后推平-2:-1之间的维度 # (b,125,512)
|
1085 |
+
# print('x ',x.shape)
|
1086 |
+
# assert 1==2
|
1087 |
+
embedding = embedding.unsqueeze(1)
|
1088 |
+
embedding = embedding.repeat(1, x.shape[1], 1)
|
1089 |
+
x = self.fusion(embedding, x)
|
1090 |
+
#x = torch.cat((x, embedding), dim=2) # [B, T, 128 + emb_dim]
|
1091 |
+
if not hasattr(self, '_flattened'):
|
1092 |
+
self.gru.flatten_parameters()
|
1093 |
+
x, _ = self.gru(x) # x torch.Size([16, 125, 256])
|
1094 |
+
# x = self.gru(x) # x torch.Size([16, 125, 256])
|
1095 |
+
x = self.fc(x)
|
1096 |
+
decision_time = torch.softmax(self.outputlayer(x),dim=2) # x torch.Size([16, 125, 2])
|
1097 |
+
decision_up = torch.nn.functional.interpolate(
|
1098 |
+
decision_time.transpose(1, 2), # [16, 2, 125]
|
1099 |
+
time, # 501
|
1100 |
+
mode='linear',
|
1101 |
+
align_corners=False).transpose(1, 2) # 从125插值回 501 ?--> (16,501,2)
|
1102 |
+
return decision_time[:,:,0],decision_up
|
1103 |
+
|
1104 |
+
|
1105 |
+
class RaDur_fusion(nn.Module):
|
1106 |
+
def __init__(self, model_config, inputdim, outputdim, time_resolution, **kwargs):
|
1107 |
+
super().__init__()
|
1108 |
+
self.encoder = Cnn14()
|
1109 |
+
self.detection = CDur_CNN_mul_scale_fusion(inputdim, outputdim, time_resolution)
|
1110 |
+
self.softmax = nn.Softmax(dim=2)
|
1111 |
+
#self.temperature = 5
|
1112 |
+
# if model_config['pre_train']:
|
1113 |
+
# self.encoder.load_state_dict(torch.load(model_config['encoder_path'])['model'])
|
1114 |
+
# self.detection.load_state_dict(torch.load(model_config['CDur_path']))
|
1115 |
+
|
1116 |
+
self.q = nn.Linear(128,128)
|
1117 |
+
self.k = nn.Linear(128,128)
|
1118 |
+
self.q_ee = nn.Linear(128, 128)
|
1119 |
+
self.k_ee = nn.Linear(128, 128)
|
1120 |
+
self.temperature = 11.3 # sqrt(128)
|
1121 |
+
self.att_pool = model_config['att_pool']
|
1122 |
+
self.enhancement = model_config['enhancement']
|
1123 |
+
self.tao = model_config['tao']
|
1124 |
+
self.top = model_config['top']
|
1125 |
+
self.bn = nn.BatchNorm1d(128)
|
1126 |
+
self.EE_fusion = Fusion(128, 128, 4)
|
1127 |
+
|
1128 |
+
def get_w(self,q,k):
|
1129 |
+
q = self.q(q)
|
1130 |
+
k = self.k(k)
|
1131 |
+
q = q.unsqueeze(1)
|
1132 |
+
attn = torch.bmm(q, k.transpose(1, 2))
|
1133 |
+
attn = attn/self.temperature
|
1134 |
+
attn = self.softmax(attn)
|
1135 |
+
return attn
|
1136 |
+
|
1137 |
+
def get_w_ee(self,q,k):
|
1138 |
+
q = self.q_ee(q)
|
1139 |
+
k = self.k_ee(k)
|
1140 |
+
q = q.unsqueeze(1)
|
1141 |
+
attn = torch.bmm(q, k.transpose(1, 2))
|
1142 |
+
attn = attn/self.temperature
|
1143 |
+
attn = self.softmax(attn)
|
1144 |
+
return attn
|
1145 |
+
|
1146 |
+
def attention_pooling(self, embeddings, mean_embedding):
|
1147 |
+
att_pool_w = self.get_w(mean_embedding,embeddings)
|
1148 |
+
embedding = torch.bmm(att_pool_w, embeddings).squeeze(1)
|
1149 |
+
# print(embedding.shape)
|
1150 |
+
# print(att_pool_w.shape)
|
1151 |
+
# print(att_pool_w[0])
|
1152 |
+
# assert 1==2
|
1153 |
+
return embedding
|
1154 |
+
|
1155 |
+
def select_topk_embeddings(self, scores, embeddings, k):
|
1156 |
+
_, idx_DESC = scores.sort(descending=True, dim=1) # 根据分数进行排序
|
1157 |
+
top_k = _[:,:k]
|
1158 |
+
# print('top_k ', top_k)
|
1159 |
+
# top_k = top_k.mean(1)
|
1160 |
+
idx_topk = idx_DESC[:, :k] # 取top_k个
|
1161 |
+
# print('index ', idx_topk)
|
1162 |
+
idx_topk = idx_topk.unsqueeze(2).expand([-1, -1, embeddings.shape[2]])
|
1163 |
+
selected_embeddings = torch.gather(embeddings, 1, idx_topk)
|
1164 |
+
return selected_embeddings,top_k
|
1165 |
+
|
1166 |
+
def sum_with_attention(self, embedding, top_k, selected_embeddings):
|
1167 |
+
# print('embedding ',embedding)
|
1168 |
+
# print('selected_embeddings ',selected_embeddings.shape)
|
1169 |
+
att_1 = self.get_w_ee(embedding, selected_embeddings)
|
1170 |
+
att_1 = att_1.squeeze(1)
|
1171 |
+
#print('att_1 ',att_1.shape)
|
1172 |
+
larger = top_k > self.tao
|
1173 |
+
# print('larger ',larger)
|
1174 |
+
top_k = top_k*larger
|
1175 |
+
# print('top_k ',top_k.shape)
|
1176 |
+
# print('top_k ',top_k)
|
1177 |
+
att_1 = att_1*top_k
|
1178 |
+
#print('att_1 ',att_1.shape)
|
1179 |
+
# assert 1==2
|
1180 |
+
att_2 = att_1.unsqueeze(2).repeat(1,1,128)
|
1181 |
+
Es = selected_embeddings*att_2
|
1182 |
+
return Es
|
1183 |
+
|
1184 |
+
def orcal_EE(self, x, embedding, label):
|
1185 |
+
batch, time, dim = x.shape
|
1186 |
+
|
1187 |
+
mixture_embedding = self.encoder(x) # 8, 125, 128
|
1188 |
+
mixture_embedding = mixture_embedding.transpose(1,2)
|
1189 |
+
mixture_embedding = self.bn(mixture_embedding)
|
1190 |
+
mixture_embedding = mixture_embedding.transpose(1,2)
|
1191 |
+
|
1192 |
+
x = x.unsqueeze(1) # (b,1,t,d)
|
1193 |
+
x = self.detection.features(x) #
|
1194 |
+
x = x.transpose(1, 2).contiguous().flatten(-2) # 重新拷贝一份x,之后推平-2:-1之间的维度 # (b,125,128)
|
1195 |
+
embedding_pre = embedding.unsqueeze(1)
|
1196 |
+
embedding_pre = embedding_pre.repeat(1, x.shape[1], 1)
|
1197 |
+
f = self.detection.fusion(embedding_pre, x) # the first stage results
|
1198 |
+
#f = torch.cat((x, embedding_pre), dim=2) # [B, T, 128 + emb_dim]
|
1199 |
+
if not hasattr(self, '_flattened'):
|
1200 |
+
self.detection.gru.flatten_parameters()
|
1201 |
+
f, _ = self.detection.gru(f) # x torch.Size([16, 125, 256])
|
1202 |
+
f = self.detection.fc(f)
|
1203 |
+
decision_time = torch.softmax(self.detection.outputlayer(f),dim=2) # x torch.Size([16, 125, 2])
|
1204 |
+
|
1205 |
+
selected_embeddings, top_k = self.select_topk_embeddings(decision_time[:,:,0], mixture_embedding, self.top)
|
1206 |
+
|
1207 |
+
selected_embeddings = self.sum_with_attention(embedding, top_k, selected_embeddings) # add the weight
|
1208 |
+
|
1209 |
+
mix_embedding = selected_embeddings.mean(1).unsqueeze(1) #
|
1210 |
+
mix_embedding = mix_embedding.repeat(1, x.shape[1], 1)
|
1211 |
+
embedding = embedding.unsqueeze(1)
|
1212 |
+
embedding = embedding.repeat(1, x.shape[1], 1)
|
1213 |
+
mix_embedding = self.EE_fusion(mix_embedding, embedding) # 使用神经网络进行融合
|
1214 |
+
# mix_embedding2 = selected_embeddings2.mean(1)
|
1215 |
+
#mix_embedding = embedding + mix_embedding # 直接相加
|
1216 |
+
# new detection results
|
1217 |
+
# embedding_now = mix_embedding.unsqueeze(1)
|
1218 |
+
# embedding_now = embedding_now.repeat(1, x.shape[1], 1)
|
1219 |
+
f_now = self.detection.fusion(mix_embedding, x)
|
1220 |
+
#f_now = torch.cat((x, embedding_now), dim=2) #
|
1221 |
+
f_now, _ = self.detection.gru(f_now) # x torch.Size([16, 125, 256])
|
1222 |
+
f_now = self.detection.fc(f_now)
|
1223 |
+
decision_time_now = torch.softmax(self.detection.outputlayer(f_now), dim=2) # x torch.Size([16, 125, 2])
|
1224 |
+
|
1225 |
+
top_k = top_k.mean(1) # get avg score,higher score will have more weight
|
1226 |
+
larger = top_k > self.tao
|
1227 |
+
top_k = top_k * larger
|
1228 |
+
top_k = top_k/2.0
|
1229 |
+
# print('top_k ',top_k)
|
1230 |
+
# assert 1==2
|
1231 |
+
# print('tok_k[ ',top_k.shape)
|
1232 |
+
# print('decision_time ',decision_time.shape)
|
1233 |
+
# print('decision_time_now ',decision_time_now.shape)
|
1234 |
+
neg_w = top_k.unsqueeze(1).unsqueeze(2)
|
1235 |
+
neg_w = neg_w.repeat(1, decision_time_now.shape[1], decision_time_now.shape[2])
|
1236 |
+
# print('neg_w ',neg_w.shape)
|
1237 |
+
#print('neg_w ',neg_w[:,0:10,0])
|
1238 |
+
pos_w = 1-neg_w
|
1239 |
+
#print('pos_w ',pos_w[:,0:10,0])
|
1240 |
+
decision_time_final = decision_time*pos_w + neg_w*decision_time_now
|
1241 |
+
#print('decision_time_final ',decision_time_final[0,0:10,0])
|
1242 |
+
# print(decision_time_final[0,:,:])
|
1243 |
+
#assert 1==2
|
1244 |
+
return decision_time_final
|
1245 |
+
|
1246 |
+
def forward(self, x, ref, label=None):
|
1247 |
+
batch, time, dim = x.shape
|
1248 |
+
logit = torch.zeros(1).cuda()
|
1249 |
+
embeddings = self.encoder(ref)
|
1250 |
+
mean_embedding = embeddings.mean(1)
|
1251 |
+
if self.att_pool == True:
|
1252 |
+
mean_embedding = self.bn(mean_embedding)
|
1253 |
+
embeddings = embeddings.transpose(1,2)
|
1254 |
+
embeddings = self.bn(embeddings)
|
1255 |
+
embeddings = embeddings.transpose(1,2)
|
1256 |
+
embedding = self.attention_pooling(embeddings, mean_embedding)
|
1257 |
+
else:
|
1258 |
+
embedding = mean_embedding
|
1259 |
+
if self.enhancement == True:
|
1260 |
+
decision_time = self.orcal_EE(x, embedding, label)
|
1261 |
+
decision_up = torch.nn.functional.interpolate(
|
1262 |
+
decision_time.transpose(1, 2), # [16, 2, 125]
|
1263 |
+
time, # 501
|
1264 |
+
mode='linear',
|
1265 |
+
align_corners=False).transpose(1, 2) # 从125插值回 501 ?--> (16,501,2)
|
1266 |
+
return decision_time[:,:,0], decision_up, logit
|
1267 |
+
|
1268 |
+
x = x.unsqueeze(1) # (b,1,t,d)
|
1269 |
+
x = self.detection.features(x) #
|
1270 |
+
x = x.transpose(1, 2).contiguous().flatten(-2) # 重新拷贝一份x,之后推平-2:-1之间的维度 # (b,125,128)
|
1271 |
+
embedding = embedding.unsqueeze(1)
|
1272 |
+
embedding = embedding.repeat(1, x.shape[1], 1)
|
1273 |
+
# x = torch.cat((x, embedding), dim=2) # [B, T, 128 + emb_dim]
|
1274 |
+
x = self.detection.fusion(embedding, x)
|
1275 |
+
# embedding = embedding.unsqueeze(1)
|
1276 |
+
# embedding = embedding.repeat(1, x.shape[1], 1)
|
1277 |
+
# x = torch.cat((x, embedding), dim=2) # [B, T, 128 + emb_dim]
|
1278 |
+
if not hasattr(self, '_flattened'):
|
1279 |
+
self.detection.gru.flatten_parameters()
|
1280 |
+
x, _ = self.detection.gru(x) # x torch.Size([16, 125, 256])
|
1281 |
+
x = self.detection.fc(x)
|
1282 |
+
decision_time = torch.softmax(self.detection.outputlayer(x),dim=2) # x torch.Size([16, 125, 2])
|
1283 |
+
decision_up = torch.nn.functional.interpolate(
|
1284 |
+
decision_time.transpose(1, 2),
|
1285 |
+
time, # 501
|
1286 |
+
mode='linear',
|
1287 |
+
align_corners=False).transpose(1, 2) # 从125插值回 501 ?--> (16,501,2)
|
1288 |
+
return decision_time[:,:,0], decision_up, logit
|
audio_detection/target_sound_detection/src/utils.py
ADDED
@@ -0,0 +1,353 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# !/usr/bin/env python
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
# @Time : 2021/3/9 16:33
|
4 |
+
# @Author : dongchao yang
|
5 |
+
# @File : train.py
|
6 |
+
|
7 |
+
import collections
|
8 |
+
import sys
|
9 |
+
from loguru import logger
|
10 |
+
from pprint import pformat
|
11 |
+
|
12 |
+
import numpy as np
|
13 |
+
import pandas as pd
|
14 |
+
import scipy
|
15 |
+
import six
|
16 |
+
import sklearn.preprocessing as pre
|
17 |
+
import torch
|
18 |
+
import tqdm
|
19 |
+
import yaml
|
20 |
+
|
21 |
+
from scipy.interpolate import interp1d
|
22 |
+
|
23 |
+
def parse_config_or_kwargs(config_file, **kwargs):
|
24 |
+
"""parse_config_or_kwargs
|
25 |
+
:param config_file: Config file that has parameters, yaml format
|
26 |
+
:param **kwargs: Other alternative parameters or overwrites for config
|
27 |
+
"""
|
28 |
+
with open(config_file) as con_read:
|
29 |
+
yaml_config = yaml.load(con_read, Loader=yaml.FullLoader)
|
30 |
+
arguments = dict(yaml_config, **kwargs)
|
31 |
+
return arguments
|
32 |
+
|
33 |
+
|
34 |
+
def find_contiguous_regions(activity_array): # in this part, if you cannot understand the binary operation, I think you can write a O(n) complexity method
|
35 |
+
"""Find contiguous regions from bool valued numpy.array.
|
36 |
+
Copy of https://dcase-repo.github.io/dcase_util/_modules/dcase_util/data/decisions.html#DecisionEncoder
|
37 |
+
Reason is:
|
38 |
+
1. This does not belong to a class necessarily
|
39 |
+
2. Import DecisionEncoder requires sndfile over some other imports..which causes some problems on clusters
|
40 |
+
"""
|
41 |
+
change_indices = np.logical_xor(activity_array[1:], activity_array[:-1]).nonzero()[0]
|
42 |
+
change_indices += 1
|
43 |
+
if activity_array[0]:
|
44 |
+
# If the first element of activity_array is True add 0 at the beginning
|
45 |
+
change_indices = np.r_[0, change_indices]
|
46 |
+
|
47 |
+
if activity_array[-1]:
|
48 |
+
# If the last element of activity_array is True, add the length of the array
|
49 |
+
change_indices = np.r_[change_indices, activity_array.size]
|
50 |
+
# print(change_indices.reshape((-1, 2)))
|
51 |
+
# Reshape the result into two columns
|
52 |
+
return change_indices.reshape((-1, 2))
|
53 |
+
|
54 |
+
|
55 |
+
def split_train_cv(
|
56 |
+
data_frame: pd.DataFrame,
|
57 |
+
frac: float = 0.9,
|
58 |
+
y=None, # Only for stratified, computes necessary split
|
59 |
+
**kwargs):
|
60 |
+
"""split_train_cv
|
61 |
+
|
62 |
+
:param data_frame:
|
63 |
+
:type data_frame: pd.DataFrame
|
64 |
+
:param frac:
|
65 |
+
:type frac: float
|
66 |
+
"""
|
67 |
+
if kwargs.get('mode',
|
68 |
+
None) == 'urbansed': # Filenames are DATA_-1 DATA_-2 etc
|
69 |
+
data_frame.loc[:, 'id'] = data_frame.groupby(
|
70 |
+
data_frame['filename'].str.split('_').apply(
|
71 |
+
lambda x: '_'.join(x[:-1]))).ngroup()
|
72 |
+
sampler = np.random.permutation(data_frame['id'].nunique())
|
73 |
+
num_train = int(frac * len(sampler))
|
74 |
+
train_indexes = sampler[:num_train]
|
75 |
+
cv_indexes = sampler[num_train:]
|
76 |
+
train_data = data_frame[data_frame['id'].isin(train_indexes)]
|
77 |
+
cv_data = data_frame[data_frame['id'].isin(cv_indexes)]
|
78 |
+
del train_data['id']
|
79 |
+
del cv_data['id']
|
80 |
+
elif kwargs.get('mode', None) == 'stratified': # stratified --> 分层的 ?
|
81 |
+
# Use statified sampling
|
82 |
+
from skmultilearn.model_selection import iterative_train_test_split
|
83 |
+
index_train, _, index_cv, _ = iterative_train_test_split(
|
84 |
+
data_frame.index.values.reshape(-1, 1), y, test_size=1. - frac)
|
85 |
+
train_data = data_frame[data_frame.index.isin(index_train.squeeze())]
|
86 |
+
cv_data = data_frame[data_frame.index.isin(index_cv.squeeze())] # cv --> cross validation
|
87 |
+
else:
|
88 |
+
# Simply split train_test
|
89 |
+
train_data = data_frame.sample(frac=frac, random_state=10)
|
90 |
+
cv_data = data_frame[~data_frame.index.isin(train_data.index)]
|
91 |
+
return train_data, cv_data
|
92 |
+
|
93 |
+
|
94 |
+
|
95 |
+
def pprint_dict(in_dict, outputfun=sys.stdout.write, formatter='yaml'): # print yaml file
|
96 |
+
"""pprint_dict
|
97 |
+
:param outputfun: function to use, defaults to sys.stdout
|
98 |
+
:param in_dict: dict to print
|
99 |
+
"""
|
100 |
+
if formatter == 'yaml':
|
101 |
+
format_fun = yaml.dump
|
102 |
+
elif formatter == 'pretty':
|
103 |
+
format_fun = pformat
|
104 |
+
for line in format_fun(in_dict).split('\n'):
|
105 |
+
outputfun(line)
|
106 |
+
|
107 |
+
|
108 |
+
def getfile_outlogger(outputfile):
|
109 |
+
log_format = "[<green>{time:YYYY-MM-DD HH:mm:ss}</green>] {message}"
|
110 |
+
logger.configure(handlers=[{"sink": sys.stderr, "format": log_format}])
|
111 |
+
if outputfile:
|
112 |
+
logger.add(outputfile, enqueue=True, format=log_format)
|
113 |
+
return logger
|
114 |
+
|
115 |
+
# according label, get encoder
|
116 |
+
def train_labelencoder(labels: pd.Series, sparse=True):
|
117 |
+
"""encode_labels
|
118 |
+
|
119 |
+
Encodes labels
|
120 |
+
|
121 |
+
:param labels: pd.Series representing the raw labels e.g., Speech, Water
|
122 |
+
:param encoder (optional): Encoder already fitted
|
123 |
+
returns encoded labels (many hot) and the encoder
|
124 |
+
"""
|
125 |
+
assert isinstance(labels, pd.Series), "Labels need to be series"
|
126 |
+
if isinstance(labels[0], six.string_types):
|
127 |
+
# In case of using non processed strings, e.g., Vaccum, Speech
|
128 |
+
label_array = labels.str.split(',').values.tolist() # split label according to ','
|
129 |
+
elif isinstance(labels[0], np.ndarray):
|
130 |
+
# Encoder does not like to see numpy array
|
131 |
+
label_array = [lab.tolist() for lab in labels]
|
132 |
+
elif isinstance(labels[0], collections.Iterable):
|
133 |
+
label_array = labels
|
134 |
+
encoder = pre.MultiLabelBinarizer(sparse_output=sparse)
|
135 |
+
encoder.fit(label_array)
|
136 |
+
return encoder
|
137 |
+
|
138 |
+
|
139 |
+
def encode_labels(labels: pd.Series, encoder=None, sparse=True):
|
140 |
+
"""encode_labels
|
141 |
+
|
142 |
+
Encodes labels
|
143 |
+
|
144 |
+
:param labels: pd.Series representing the raw labels e.g., Speech, Water
|
145 |
+
:param encoder (optional): Encoder already fitted
|
146 |
+
returns encoded labels (many hot) and the encoder
|
147 |
+
"""
|
148 |
+
assert isinstance(labels, pd.Series), "Labels need to be series"
|
149 |
+
instance = labels.iloc[0]
|
150 |
+
if isinstance(instance, six.string_types):
|
151 |
+
# In case of using non processed strings, e.g., Vaccum, Speech
|
152 |
+
label_array = labels.str.split(',').values.tolist()
|
153 |
+
elif isinstance(instance, np.ndarray):
|
154 |
+
# Encoder does not like to see numpy array
|
155 |
+
label_array = [lab.tolist() for lab in labels]
|
156 |
+
elif isinstance(instance, collections.Iterable):
|
157 |
+
label_array = labels
|
158 |
+
# get label_array, it is a list ,contain a lot of label, this label are string type
|
159 |
+
if not encoder:
|
160 |
+
encoder = pre.MultiLabelBinarizer(sparse_output=sparse) # if we encoder is None, we should init a encoder firstly.
|
161 |
+
encoder.fit(label_array)
|
162 |
+
labels_encoded = encoder.transform(label_array) # transform string to digit
|
163 |
+
return labels_encoded, encoder
|
164 |
+
|
165 |
+
# return pd.arrays.SparseArray(
|
166 |
+
# [row.toarray().ravel() for row in labels_encoded]), encoder
|
167 |
+
|
168 |
+
|
169 |
+
def decode_with_timestamps(events,labels: np.array):
|
170 |
+
"""decode_with_timestamps
|
171 |
+
Decodes the predicted label array (2d) into a list of
|
172 |
+
[(Labelname, onset, offset), ...]
|
173 |
+
|
174 |
+
:param encoder: Encoder during training
|
175 |
+
:type encoder: pre.MultiLabelBinarizer
|
176 |
+
:param labels: n-dim array
|
177 |
+
:type labels: np.array
|
178 |
+
"""
|
179 |
+
# print('events ',events)
|
180 |
+
# print('labels ',labels.shape)
|
181 |
+
#assert 1==2
|
182 |
+
if labels.ndim == 2:
|
183 |
+
#print('...')
|
184 |
+
return [_decode_with_timestamps(events[i],labels[i]) for i in range(labels.shape[0])]
|
185 |
+
else:
|
186 |
+
return _decode_with_timestamps(events,labels)
|
187 |
+
|
188 |
+
|
189 |
+
def median_filter(x, window_size, threshold=0.5):
|
190 |
+
"""median_filter
|
191 |
+
:param x: input prediction array of shape (B, T, C) or (B, T).
|
192 |
+
Input is a sequence of probabilities 0 <= x <= 1
|
193 |
+
:param window_size: An integer to use
|
194 |
+
:param threshold: Binary thresholding threshold
|
195 |
+
"""
|
196 |
+
x = binarize(x, threshold=threshold) # transfer to 0 or 1
|
197 |
+
if x.ndim == 3:
|
198 |
+
size = (1, window_size, 1)
|
199 |
+
elif x.ndim == 2 and x.shape[0] == 1:
|
200 |
+
# Assume input is class-specific median filtering
|
201 |
+
# E.g, Batch x Time [1, 501]
|
202 |
+
size = (1, window_size)
|
203 |
+
elif x.ndim == 2 and x.shape[0] > 1:
|
204 |
+
# Assume input is standard median pooling, class-independent
|
205 |
+
# E.g., Time x Class [501, 10]
|
206 |
+
size = (window_size, 1)
|
207 |
+
return scipy.ndimage.median_filter(x, size=size)
|
208 |
+
|
209 |
+
|
210 |
+
def _decode_with_timestamps(events,labels):
|
211 |
+
result_labels = []
|
212 |
+
# print('.......')
|
213 |
+
# print('labels ',labels.shape)
|
214 |
+
# print(labels)
|
215 |
+
change_indices = find_contiguous_regions(labels)
|
216 |
+
# print(change_indices)
|
217 |
+
# assert 1==2
|
218 |
+
for row in change_indices:
|
219 |
+
result_labels.append((events,row[0], row[1]))
|
220 |
+
return result_labels
|
221 |
+
|
222 |
+
def inverse_transform_labels(encoder, pred):
|
223 |
+
if pred.ndim == 3:
|
224 |
+
return [encoder.inverse_transform(x) for x in pred]
|
225 |
+
else:
|
226 |
+
return encoder.inverse_transform(pred)
|
227 |
+
|
228 |
+
|
229 |
+
def binarize(pred, threshold=0.5):
|
230 |
+
# Batch_wise
|
231 |
+
if pred.ndim == 3:
|
232 |
+
return np.array(
|
233 |
+
[pre.binarize(sub, threshold=threshold) for sub in pred])
|
234 |
+
else:
|
235 |
+
return pre.binarize(pred, threshold=threshold)
|
236 |
+
|
237 |
+
|
238 |
+
def double_threshold(x, high_thres, low_thres, n_connect=1):
|
239 |
+
"""double_threshold
|
240 |
+
Helper function to calculate double threshold for n-dim arrays
|
241 |
+
|
242 |
+
:param x: input array
|
243 |
+
:param high_thres: high threshold value
|
244 |
+
:param low_thres: Low threshold value
|
245 |
+
:param n_connect: Distance of <= n clusters will be merged
|
246 |
+
"""
|
247 |
+
assert x.ndim <= 3, "Whoops something went wrong with the input ({}), check if its <= 3 dims".format(
|
248 |
+
x.shape)
|
249 |
+
if x.ndim == 3:
|
250 |
+
apply_dim = 1
|
251 |
+
elif x.ndim < 3:
|
252 |
+
apply_dim = 0
|
253 |
+
# x is assumed to be 3d: (batch, time, dim)
|
254 |
+
# Assumed to be 2d : (time, dim)
|
255 |
+
# Assumed to be 1d : (time)
|
256 |
+
# time axis is therefore at 1 for 3d and 0 for 2d (
|
257 |
+
return np.apply_along_axis(lambda x: _double_threshold(
|
258 |
+
x, high_thres, low_thres, n_connect=n_connect),
|
259 |
+
axis=apply_dim,
|
260 |
+
arr=x)
|
261 |
+
|
262 |
+
|
263 |
+
def _double_threshold(x, high_thres, low_thres, n_connect=1, return_arr=True): # in nature, double_threshold considers boundary question
|
264 |
+
"""_double_threshold
|
265 |
+
Computes a double threshold over the input array
|
266 |
+
|
267 |
+
:param x: input array, needs to be 1d
|
268 |
+
:param high_thres: High threshold over the array
|
269 |
+
:param low_thres: Low threshold over the array
|
270 |
+
:param n_connect: Postprocessing, maximal distance between clusters to connect
|
271 |
+
:param return_arr: By default this function returns the filtered indiced, but if return_arr = True it returns an array of tsame size as x filled with ones and zeros.
|
272 |
+
"""
|
273 |
+
assert x.ndim == 1, "Input needs to be 1d"
|
274 |
+
high_locations = np.where(x > high_thres)[0] # return the index, where value is greater than high_thres
|
275 |
+
locations = x > low_thres # return true of false
|
276 |
+
encoded_pairs = find_contiguous_regions(locations)
|
277 |
+
# print('encoded_pairs ',encoded_pairs)
|
278 |
+
filtered_list = list(
|
279 |
+
filter(
|
280 |
+
lambda pair:
|
281 |
+
((pair[0] <= high_locations) & (high_locations <= pair[1])).any(),
|
282 |
+
encoded_pairs)) # find encoded_pair where inclide a high_lacations
|
283 |
+
#print('filtered_list ',filtered_list)
|
284 |
+
filtered_list = connect_(filtered_list, n_connect) # if the distance of two pair is less than n_connect, we can merge them
|
285 |
+
if return_arr:
|
286 |
+
zero_one_arr = np.zeros_like(x, dtype=int)
|
287 |
+
for sl in filtered_list:
|
288 |
+
zero_one_arr[sl[0]:sl[1]] = 1
|
289 |
+
return zero_one_arr
|
290 |
+
return filtered_list
|
291 |
+
|
292 |
+
|
293 |
+
def connect_clusters(x, n=1):
|
294 |
+
if x.ndim == 1:
|
295 |
+
return connect_clusters_(x, n)
|
296 |
+
if x.ndim >= 2:
|
297 |
+
return np.apply_along_axis(lambda a: connect_clusters_(a, n=n), -2, x)
|
298 |
+
|
299 |
+
|
300 |
+
def connect_clusters_(x, n=1):
|
301 |
+
"""connect_clusters_
|
302 |
+
Connects clustered predictions (0,1) in x with range n
|
303 |
+
|
304 |
+
:param x: Input array. zero-one format
|
305 |
+
:param n: Number of frames to skip until connection can be made
|
306 |
+
"""
|
307 |
+
assert x.ndim == 1, "input needs to be 1d"
|
308 |
+
reg = find_contiguous_regions(x)
|
309 |
+
start_end = connect_(reg, n=n)
|
310 |
+
zero_one_arr = np.zeros_like(x, dtype=int)
|
311 |
+
for sl in start_end:
|
312 |
+
zero_one_arr[sl[0]:sl[1]] = 1
|
313 |
+
return zero_one_arr
|
314 |
+
|
315 |
+
|
316 |
+
def connect_(pairs, n=1):
|
317 |
+
"""connect_
|
318 |
+
Connects two adjacent clusters if their distance is <= n
|
319 |
+
|
320 |
+
:param pairs: Clusters of iterateables e.g., [(1,5),(7,10)]
|
321 |
+
:param n: distance between two clusters
|
322 |
+
"""
|
323 |
+
if len(pairs) == 0:
|
324 |
+
return []
|
325 |
+
start_, end_ = pairs[0]
|
326 |
+
new_pairs = []
|
327 |
+
for i, (next_item, cur_item) in enumerate(zip(pairs[1:], pairs[0:])):
|
328 |
+
end_ = next_item[1]
|
329 |
+
if next_item[0] - cur_item[1] <= n:
|
330 |
+
pass
|
331 |
+
else:
|
332 |
+
new_pairs.append((start_, cur_item[1]))
|
333 |
+
start_ = next_item[0]
|
334 |
+
new_pairs.append((start_, end_))
|
335 |
+
return new_pairs
|
336 |
+
|
337 |
+
|
338 |
+
def predictions_to_time(df, ratio):
|
339 |
+
df.onset = df.onset * ratio
|
340 |
+
df.offset = df.offset * ratio
|
341 |
+
return df
|
342 |
+
|
343 |
+
def upgrade_resolution(arr, scale):
|
344 |
+
print('arr ',arr.shape)
|
345 |
+
x = np.arange(0, arr.shape[0])
|
346 |
+
f = interp1d(x, arr, kind='linear', axis=0, fill_value='extrapolate')
|
347 |
+
scale_x = np.arange(0, arr.shape[0], 1 / scale)
|
348 |
+
up_scale = f(scale_x)
|
349 |
+
return up_scale
|
350 |
+
# a = [0.1,0.2,0.3,0.8,0.4,0.1,0.3,0.9,0.4]
|
351 |
+
# a = np.array(a)
|
352 |
+
# b = a>0.2
|
353 |
+
# _double_threshold(a,0.7,0.2)
|
audio_detection/target_sound_detection/useful_ckpts/tsd/ref_mel.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5e4525ad12621117c3a0fcfe974fd55e51583cd219106bf510438f4bec4edc18
|
3 |
+
size 140604911
|
audio_detection/target_sound_detection/useful_ckpts/tsd/run_config.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1331dab1e4c3ac2bc5850156f2000a95fe333bdf06d08ce9b490550726548ab0
|
3 |
+
size 2479
|
audio_detection/target_sound_detection/useful_ckpts/tsd/run_model_7_loss=-0.0724.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c9b44e30c4800462c177806bbd7009953d70d531c873e3791ca9aa85375d524d
|
3 |
+
size 343538489
|
audio_detection/target_sound_detection/useful_ckpts/tsd/text_emb.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:de482358747778181e4dc530ec61ae94f53ae0b202ac92e99491fe4ceb3cbb1c
|
3 |
+
size 255398
|