m3hrdadfi commited on
Commit
0ee0f25
1 Parent(s): c204fc6

Initial model

Browse files
README.md ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - audio
4
+ - automatic-speech-recognition
5
+ - audio-classification
6
+ license: apache-2.0
7
+ ---
8
+
9
+ # Music Genre Classification using Wav2Vec 2.0
10
+
11
+
12
+ ## How to use
13
+
14
+ ### Requirements
15
+
16
+ ```bash
17
+ # requirement packages
18
+ !pip install git+https://github.com/huggingface/datasets.git
19
+ !pip install git+https://github.com/huggingface/transformers.git
20
+ !pip install torchaudio
21
+ !pip install librosa
22
+ ```
23
+
24
+ ### Prediction
25
+
26
+ ```python
27
+ import torch
28
+ import torch.nn as nn
29
+ import torch.nn.functional as F
30
+ import torchaudio
31
+ from transformers import AutoConfig, Wav2Vec2FeatureExtractor
32
+
33
+ import librosa
34
+ import IPython.display as ipd
35
+ import numpy as np
36
+ import pandas as pd
37
+ ```
38
+
39
+ ```python
40
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
41
+ model_name_or_path = "m3hrdadfi/wav2vec2-base-100k-voxpopuli-gtzan-music"
42
+ config = AutoConfig.from_pretrained(model_name_or_path)
43
+ feature_extractor = Wav2Vec2Processor.from_pretrained(model_name_or_path)
44
+ sampling_rate = feature_extractor.sampling_rate
45
+ model = Wav2Vec2ForSpeechClassification.from_pretrained(model_name_or_path).to(device)
46
+ ```
47
+
48
+ ```python
49
+ def speech_file_to_array_fn(path, sampling_rate):
50
+ speech_array, _sampling_rate = torchaudio.load(path)
51
+ resampler = torchaudio.transforms.Resample(_sampling_rate)
52
+ speech = resampler(speech_array).squeeze().numpy()
53
+ return speech
54
+
55
+
56
+ def predict(path, sampling_rate):
57
+ speech = speech_file_to_array_fn(path, sampling_rate)
58
+ features = feature_extractor(speech, sampling_rate=sampling_rate, return_tensors="pt", padding=True)
59
+
60
+ input_values = features.input_values.to(device)
61
+
62
+ with torch.no_grad():
63
+ logits = model(input_values).logits
64
+
65
+ scores = F.softmax(logits, dim=1).detach().cpu().numpy()[0]
66
+ outputs = [{"Label": config.id2label[i], "Score": f"{round(score * 100, 3):.1f}%"} for i, score in enumerate(scores)]
67
+ return outputs
68
+ ```
69
+
70
+ ```python
71
+ path = "genres_original/disco/disco.00067.wav"
72
+ outputs = predict(path, sampling_rate)
73
+ ```
74
+
75
+ ```bash
76
+ [
77
+ {'Label': 'blues', 'Score': '0.0%'},
78
+ {'Label': 'classical', 'Score': '0.0%'},
79
+ {'Label': 'country', 'Score': '0.0%'},
80
+ {'Label': 'disco', 'Score': '99.8%'},
81
+ {'Label': 'hiphop', 'Score': '0.0%'},
82
+ {'Label': 'jazz', 'Score': '0.0%'},
83
+ {'Label': 'metal', 'Score': '0.0%'},
84
+ {'Label': 'pop', 'Score': '0.0%'},
85
+ {'Label': 'reggae', 'Score': '0.0%'},
86
+ {'Label': 'rock', 'Score': '0.0%'}
87
+ ]
88
+ ```
89
+
90
+
91
+ ## Evaluation
92
+ The following tables summarize the scores obtained by model overall and per each class.
93
+
94
+
95
+ | label | precision | recall | f1-score | support |
96
+ |:------------:|:---------:|:------:|:--------:|:-------:|
97
+ | blues | 0.792 | 0.950 | 0.864 | 20 |
98
+ | classical | 0.864 | 0.950 | 0.905 | 20 |
99
+ | country | 0.812 | 0.650 | 0.722 | 20 |
100
+ | disco | 0.778 | 0.700 | 0.737 | 20 |
101
+ | hiphop | 0.933 | 0.700 | 0.800 | 20 |
102
+ | jazz | 1.000 | 0.850 | 0.919 | 20 |
103
+ | metal | 0.783 | 0.900 | 0.837 | 20 |
104
+ | pop | 0.917 | 0.550 | 0.687 | 20 |
105
+ | reggae | 0.543 | 0.950 | 0.691 | 20 |
106
+ | rock | 0.611 | 0.550 | 0.579 | 20 |
107
+ | accuracy | 0.775 | 0.775 | 0.775 | 0 |
108
+ | macro avg | 0.803 | 0.775 | 0.774 | 200 |
109
+ | weighted avg | 0.803 | 0.775 | 0.774 | 200 |
110
+
111
+
112
+ ## Questions?
113
+ Post a Github issue from [HERE](https://github.com/m3hrdadfi/soxan/issues).
all_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 20.0,
3
+ "eval_accuracy": 0.7950000166893005,
4
+ "eval_loss": 1.5802829265594482,
5
+ "eval_runtime": 130.5447,
6
+ "eval_samples": 200,
7
+ "eval_samples_per_second": 1.532,
8
+ "eval_steps_per_second": 1.532,
9
+ "train_runtime": 7059.2772,
10
+ "train_samples": 799,
11
+ "train_samples_per_second": 2.264,
12
+ "train_steps_per_second": 1.13
13
+ }
config.json ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/notebooks/checkpoint-5700",
3
+ "activation_dropout": 0.1,
4
+ "apply_spec_augment": true,
5
+ "architectures": [
6
+ "Wav2Vec2ForSpeechClassification"
7
+ ],
8
+ "attention_dropout": 0.1,
9
+ "bos_token_id": 1,
10
+ "conv_bias": false,
11
+ "conv_dim": [
12
+ 512,
13
+ 512,
14
+ 512,
15
+ 512,
16
+ 512,
17
+ 512,
18
+ 512
19
+ ],
20
+ "conv_kernel": [
21
+ 10,
22
+ 3,
23
+ 3,
24
+ 3,
25
+ 3,
26
+ 2,
27
+ 2
28
+ ],
29
+ "conv_stride": [
30
+ 5,
31
+ 2,
32
+ 2,
33
+ 2,
34
+ 2,
35
+ 2,
36
+ 2
37
+ ],
38
+ "ctc_loss_reduction": "sum",
39
+ "ctc_zero_infinity": false,
40
+ "do_stable_layer_norm": false,
41
+ "eos_token_id": 2,
42
+ "feat_extract_activation": "gelu",
43
+ "feat_extract_dropout": 0.0,
44
+ "feat_extract_norm": "group",
45
+ "feat_proj_dropout": 0.1,
46
+ "final_dropout": 0.1,
47
+ "finetuning_task": "wav2vec2_clf",
48
+ "gradient_checkpointing": false,
49
+ "hidden_act": "gelu",
50
+ "hidden_dropout": 0.1,
51
+ "hidden_dropout_prob": 0.1,
52
+ "hidden_size": 768,
53
+ "id2label": {
54
+ "0": "blues",
55
+ "1": "classical",
56
+ "2": "country",
57
+ "3": "disco",
58
+ "4": "hiphop",
59
+ "5": "jazz",
60
+ "6": "metal",
61
+ "7": "pop",
62
+ "8": "reggae",
63
+ "9": "rock"
64
+ },
65
+ "initializer_range": 0.02,
66
+ "intermediate_size": 3072,
67
+ "label2id": {
68
+ "blues": 0,
69
+ "classical": 1,
70
+ "country": 2,
71
+ "disco": 3,
72
+ "hiphop": 4,
73
+ "jazz": 5,
74
+ "metal": 6,
75
+ "pop": 7,
76
+ "reggae": 8,
77
+ "rock": 9
78
+ },
79
+ "layer_norm_eps": 1e-05,
80
+ "layerdrop": 0.1,
81
+ "mask_feature_length": 10,
82
+ "mask_feature_prob": 0.0,
83
+ "mask_time_length": 10,
84
+ "mask_time_prob": 0.05,
85
+ "model_type": "wav2vec2",
86
+ "num_attention_heads": 12,
87
+ "num_conv_pos_embedding_groups": 16,
88
+ "num_conv_pos_embeddings": 128,
89
+ "num_feat_extract_layers": 7,
90
+ "num_hidden_layers": 12,
91
+ "pad_token_id": 0,
92
+ "pooling_mode": "mean",
93
+ "problem_type": "single_label_classification",
94
+ "transformers_version": "4.7.0.dev0",
95
+ "vocab_size": 32
96
+ }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 20.0,
3
+ "eval_accuracy": 0.7950000166893005,
4
+ "eval_loss": 1.5802829265594482,
5
+ "eval_runtime": 130.5447,
6
+ "eval_samples": 200,
7
+ "eval_samples_per_second": 1.532,
8
+ "eval_steps_per_second": 1.532
9
+ }
predict_results.txt ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ index prediction
2
+ 0 blues
3
+ 1 disco
4
+ 2 reggae
5
+ 3 reggae
6
+ 4 country
7
+ 5 classical
8
+ 6 reggae
9
+ 7 metal
10
+ 8 hiphop
11
+ 9 hiphop
12
+ 10 country
13
+ 11 reggae
14
+ 12 reggae
15
+ 13 metal
16
+ 14 hiphop
17
+ 15 hiphop
18
+ 16 rock
19
+ 17 pop
20
+ 18 reggae
21
+ 19 hiphop
22
+ 20 disco
23
+ 21 pop
24
+ 22 classical
25
+ 23 blues
26
+ 24 jazz
27
+ 25 classical
28
+ 26 rock
29
+ 27 rock
30
+ 28 blues
31
+ 29 disco
32
+ 30 jazz
33
+ 31 metal
34
+ 32 blues
35
+ 33 metal
36
+ 34 classical
37
+ 35 country
38
+ 36 blues
39
+ 37 blues
40
+ 38 metal
41
+ 39 reggae
42
+ 40 rock
43
+ 41 classical
44
+ 42 reggae
45
+ 43 classical
46
+ 44 metal
47
+ 45 rock
48
+ 46 hiphop
49
+ 47 rock
50
+ 48 classical
51
+ 49 pop
52
+ 50 reggae
53
+ 51 reggae
54
+ 52 jazz
55
+ 53 pop
56
+ 54 disco
57
+ 55 hiphop
58
+ 56 reggae
59
+ 57 blues
60
+ 58 reggae
61
+ 59 metal
62
+ 60 classical
63
+ 61 country
64
+ 62 rock
65
+ 63 metal
66
+ 64 rock
67
+ 65 metal
68
+ 66 classical
69
+ 67 blues
70
+ 68 blues
71
+ 69 rock
72
+ 70 hiphop
73
+ 71 blues
74
+ 72 pop
75
+ 73 jazz
76
+ 74 jazz
77
+ 75 blues
78
+ 76 metal
79
+ 77 country
80
+ 78 country
81
+ 79 country
82
+ 80 metal
83
+ 81 pop
84
+ 82 disco
85
+ 83 metal
86
+ 84 jazz
87
+ 85 classical
88
+ 86 disco
89
+ 87 country
90
+ 88 metal
91
+ 89 blues
92
+ 90 reggae
93
+ 91 country
94
+ 92 jazz
95
+ 93 disco
96
+ 94 classical
97
+ 95 reggae
98
+ 96 hiphop
99
+ 97 jazz
100
+ 98 country
101
+ 99 jazz
102
+ 100 pop
103
+ 101 jazz
104
+ 102 pop
105
+ 103 metal
106
+ 104 metal
107
+ 105 blues
108
+ 106 jazz
109
+ 107 reggae
110
+ 108 classical
111
+ 109 country
112
+ 110 pop
113
+ 111 reggae
114
+ 112 rock
115
+ 113 classical
116
+ 114 reggae
117
+ 115 rock
118
+ 116 blues
119
+ 117 rock
120
+ 118 classical
121
+ 119 pop
122
+ 120 disco
123
+ 121 jazz
124
+ 122 disco
125
+ 123 disco
126
+ 124 blues
127
+ 125 classical
128
+ 126 country
129
+ 127 country
130
+ 128 rock
131
+ 129 reggae
132
+ 130 disco
133
+ 131 pop
134
+ 132 metal
135
+ 133 reggae
136
+ 134 jazz
137
+ 135 reggae
138
+ 136 metal
139
+ 137 reggae
140
+ 138 hiphop
141
+ 139 reggae
142
+ 140 blues
143
+ 141 country
144
+ 142 country
145
+ 143 pop
146
+ 144 blues
147
+ 145 country
148
+ 146 jazz
149
+ 147 disco
150
+ 148 disco
151
+ 149 pop
152
+ 150 country
153
+ 151 reggae
154
+ 152 pop
155
+ 153 blues
156
+ 154 classical
157
+ 155 pop
158
+ 156 blues
159
+ 157 metal
160
+ 158 country
161
+ 159 disco
162
+ 160 hiphop
163
+ 161 rock
164
+ 162 reggae
165
+ 163 pop
166
+ 164 reggae
167
+ 165 pop
168
+ 166 rock
169
+ 167 reggae
170
+ 168 blues
171
+ 169 disco
172
+ 170 classical
173
+ 171 metal
174
+ 172 hiphop
175
+ 173 hiphop
176
+ 174 metal
177
+ 175 reggae
178
+ 176 rock
179
+ 177 reggae
180
+ 178 classical
181
+ 179 classical
182
+ 180 jazz
183
+ 181 country
184
+ 182 blues
185
+ 183 reggae
186
+ 184 disco
187
+ 185 hiphop
188
+ 186 rock
189
+ 187 jazz
190
+ 188 reggae
191
+ 189 country
192
+ 190 classical
193
+ 191 hiphop
194
+ 192 jazz
195
+ 193 rock
196
+ 194 metal
197
+ 195 disco
198
+ 196 classical
199
+ 197 jazz
200
+ 198 classical
201
+ 199 blues
preprocessor_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0,
7
+ "return_attention_mask": false,
8
+ "sampling_rate": 16000
9
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85a577259f7da721a38f7f9821e4f53930f184dc0f0c96c633791b6afa99ac6f
3
+ size 379970942
test.csv ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name path label
2
+ blues /notebooks/data/audio/Data/genres_original/blues/blues.00085.wav blues
3
+ disco /notebooks/data/audio/Data/genres_original/disco/disco.00055.wav disco
4
+ reggae /notebooks/data/audio/Data/genres_original/reggae/reggae.00080.wav reggae
5
+ reggae /notebooks/data/audio/Data/genres_original/reggae/reggae.00037.wav reggae
6
+ country /notebooks/data/audio/Data/genres_original/country/country.00052.wav country
7
+ blues /notebooks/data/audio/Data/genres_original/blues/blues.00099.wav blues
8
+ reggae /notebooks/data/audio/Data/genres_original/reggae/reggae.00026.wav reggae
9
+ country /notebooks/data/audio/Data/genres_original/country/country.00001.wav country
10
+ hiphop /notebooks/data/audio/Data/genres_original/hiphop/hiphop.00061.wav hiphop
11
+ hiphop /notebooks/data/audio/Data/genres_original/hiphop/hiphop.00072.wav hiphop
12
+ country /notebooks/data/audio/Data/genres_original/country/country.00051.wav country
13
+ disco /notebooks/data/audio/Data/genres_original/disco/disco.00000.wav disco
14
+ disco /notebooks/data/audio/Data/genres_original/disco/disco.00027.wav disco
15
+ metal /notebooks/data/audio/Data/genres_original/metal/metal.00007.wav metal
16
+ hiphop /notebooks/data/audio/Data/genres_original/hiphop/hiphop.00006.wav hiphop
17
+ hiphop /notebooks/data/audio/Data/genres_original/hiphop/hiphop.00041.wav hiphop
18
+ rock /notebooks/data/audio/Data/genres_original/rock/rock.00024.wav rock
19
+ pop /notebooks/data/audio/Data/genres_original/pop/pop.00031.wav pop
20
+ country /notebooks/data/audio/Data/genres_original/country/country.00042.wav country
21
+ hiphop /notebooks/data/audio/Data/genres_original/hiphop/hiphop.00077.wav hiphop
22
+ disco /notebooks/data/audio/Data/genres_original/disco/disco.00028.wav disco
23
+ pop /notebooks/data/audio/Data/genres_original/pop/pop.00027.wav pop
24
+ classical /notebooks/data/audio/Data/genres_original/classical/classical.00094.wav classical
25
+ blues /notebooks/data/audio/Data/genres_original/blues/blues.00078.wav blues
26
+ jazz /notebooks/data/audio/Data/genres_original/jazz/jazz.00038.wav jazz
27
+ classical /notebooks/data/audio/Data/genres_original/classical/classical.00052.wav classical
28
+ rock /notebooks/data/audio/Data/genres_original/rock/rock.00052.wav rock
29
+ reggae /notebooks/data/audio/Data/genres_original/reggae/reggae.00060.wav reggae
30
+ blues /notebooks/data/audio/Data/genres_original/blues/blues.00064.wav blues
31
+ disco /notebooks/data/audio/Data/genres_original/disco/disco.00098.wav disco
32
+ jazz /notebooks/data/audio/Data/genres_original/jazz/jazz.00094.wav jazz
33
+ metal /notebooks/data/audio/Data/genres_original/metal/metal.00042.wav metal
34
+ blues /notebooks/data/audio/Data/genres_original/blues/blues.00066.wav blues
35
+ metal /notebooks/data/audio/Data/genres_original/metal/metal.00036.wav metal
36
+ classical /notebooks/data/audio/Data/genres_original/classical/classical.00050.wav classical
37
+ blues /notebooks/data/audio/Data/genres_original/blues/blues.00076.wav blues
38
+ blues /notebooks/data/audio/Data/genres_original/blues/blues.00009.wav blues
39
+ blues /notebooks/data/audio/Data/genres_original/blues/blues.00017.wav blues
40
+ metal /notebooks/data/audio/Data/genres_original/metal/metal.00017.wav metal
41
+ rock /notebooks/data/audio/Data/genres_original/rock/rock.00030.wav rock
42
+ rock /notebooks/data/audio/Data/genres_original/rock/rock.00091.wav rock
43
+ classical /notebooks/data/audio/Data/genres_original/classical/classical.00023.wav classical
44
+ rock /notebooks/data/audio/Data/genres_original/rock/rock.00088.wav rock
45
+ classical /notebooks/data/audio/Data/genres_original/classical/classical.00015.wav classical
46
+ metal /notebooks/data/audio/Data/genres_original/metal/metal.00028.wav metal
47
+ rock /notebooks/data/audio/Data/genres_original/rock/rock.00098.wav rock
48
+ hiphop /notebooks/data/audio/Data/genres_original/hiphop/hiphop.00046.wav hiphop
49
+ pop /notebooks/data/audio/Data/genres_original/pop/pop.00033.wav pop
50
+ jazz /notebooks/data/audio/Data/genres_original/jazz/jazz.00001.wav jazz
51
+ pop /notebooks/data/audio/Data/genres_original/pop/pop.00057.wav pop
52
+ reggae /notebooks/data/audio/Data/genres_original/reggae/reggae.00084.wav reggae
53
+ hiphop /notebooks/data/audio/Data/genres_original/hiphop/hiphop.00087.wav hiphop
54
+ jazz /notebooks/data/audio/Data/genres_original/jazz/jazz.00060.wav jazz
55
+ pop /notebooks/data/audio/Data/genres_original/pop/pop.00026.wav pop
56
+ disco /notebooks/data/audio/Data/genres_original/disco/disco.00081.wav disco
57
+ hiphop /notebooks/data/audio/Data/genres_original/hiphop/hiphop.00052.wav hiphop
58
+ hiphop /notebooks/data/audio/Data/genres_original/hiphop/hiphop.00044.wav hiphop
59
+ blues /notebooks/data/audio/Data/genres_original/blues/blues.00088.wav blues
60
+ reggae /notebooks/data/audio/Data/genres_original/reggae/reggae.00070.wav reggae
61
+ hiphop /notebooks/data/audio/Data/genres_original/hiphop/hiphop.00014.wav hiphop
62
+ classical /notebooks/data/audio/Data/genres_original/classical/classical.00040.wav classical
63
+ pop /notebooks/data/audio/Data/genres_original/pop/pop.00041.wav pop
64
+ rock /notebooks/data/audio/Data/genres_original/rock/rock.00095.wav rock
65
+ metal /notebooks/data/audio/Data/genres_original/metal/metal.00089.wav metal
66
+ rock /notebooks/data/audio/Data/genres_original/rock/rock.00039.wav rock
67
+ metal /notebooks/data/audio/Data/genres_original/metal/metal.00057.wav metal
68
+ classical /notebooks/data/audio/Data/genres_original/classical/classical.00076.wav classical
69
+ blues /notebooks/data/audio/Data/genres_original/blues/blues.00086.wav blues
70
+ blues /notebooks/data/audio/Data/genres_original/blues/blues.00022.wav blues
71
+ rock /notebooks/data/audio/Data/genres_original/rock/rock.00080.wav rock
72
+ hiphop /notebooks/data/audio/Data/genres_original/hiphop/hiphop.00020.wav hiphop
73
+ blues /notebooks/data/audio/Data/genres_original/blues/blues.00037.wav blues
74
+ pop /notebooks/data/audio/Data/genres_original/pop/pop.00067.wav pop
75
+ reggae /notebooks/data/audio/Data/genres_original/reggae/reggae.00098.wav reggae
76
+ jazz /notebooks/data/audio/Data/genres_original/jazz/jazz.00059.wav jazz
77
+ metal /notebooks/data/audio/Data/genres_original/metal/metal.00029.wav metal
78
+ metal /notebooks/data/audio/Data/genres_original/metal/metal.00072.wav metal
79
+ country /notebooks/data/audio/Data/genres_original/country/country.00090.wav country
80
+ country /notebooks/data/audio/Data/genres_original/country/country.00071.wav country
81
+ country /notebooks/data/audio/Data/genres_original/country/country.00008.wav country
82
+ metal /notebooks/data/audio/Data/genres_original/metal/metal.00005.wav metal
83
+ pop /notebooks/data/audio/Data/genres_original/pop/pop.00036.wav pop
84
+ disco /notebooks/data/audio/Data/genres_original/disco/disco.00058.wav disco
85
+ metal /notebooks/data/audio/Data/genres_original/metal/metal.00095.wav metal
86
+ jazz /notebooks/data/audio/Data/genres_original/jazz/jazz.00074.wav jazz
87
+ classical /notebooks/data/audio/Data/genres_original/classical/classical.00056.wav classical
88
+ disco /notebooks/data/audio/Data/genres_original/disco/disco.00024.wav disco
89
+ rock /notebooks/data/audio/Data/genres_original/rock/rock.00044.wav rock
90
+ metal /notebooks/data/audio/Data/genres_original/metal/metal.00077.wav metal
91
+ blues /notebooks/data/audio/Data/genres_original/blues/blues.00057.wav blues
92
+ reggae /notebooks/data/audio/Data/genres_original/reggae/reggae.00005.wav reggae
93
+ country /notebooks/data/audio/Data/genres_original/country/country.00016.wav country
94
+ jazz /notebooks/data/audio/Data/genres_original/jazz/jazz.00037.wav jazz
95
+ disco /notebooks/data/audio/Data/genres_original/disco/disco.00056.wav disco
96
+ jazz /notebooks/data/audio/Data/genres_original/jazz/jazz.00003.wav jazz
97
+ reggae /notebooks/data/audio/Data/genres_original/reggae/reggae.00062.wav reggae
98
+ hiphop /notebooks/data/audio/Data/genres_original/hiphop/hiphop.00094.wav hiphop
99
+ jazz /notebooks/data/audio/Data/genres_original/jazz/jazz.00055.wav jazz
100
+ rock /notebooks/data/audio/Data/genres_original/rock/rock.00089.wav rock
101
+ jazz /notebooks/data/audio/Data/genres_original/jazz/jazz.00056.wav jazz
102
+ pop /notebooks/data/audio/Data/genres_original/pop/pop.00023.wav pop
103
+ jazz /notebooks/data/audio/Data/genres_original/jazz/jazz.00082.wav jazz
104
+ rock /notebooks/data/audio/Data/genres_original/rock/rock.00058.wav rock
105
+ metal /notebooks/data/audio/Data/genres_original/metal/metal.00064.wav metal
106
+ metal /notebooks/data/audio/Data/genres_original/metal/metal.00079.wav metal
107
+ blues /notebooks/data/audio/Data/genres_original/blues/blues.00019.wav blues
108
+ jazz /notebooks/data/audio/Data/genres_original/jazz/jazz.00043.wav jazz
109
+ reggae /notebooks/data/audio/Data/genres_original/reggae/reggae.00033.wav reggae
110
+ classical /notebooks/data/audio/Data/genres_original/classical/classical.00097.wav classical
111
+ country /notebooks/data/audio/Data/genres_original/country/country.00050.wav country
112
+ pop /notebooks/data/audio/Data/genres_original/pop/pop.00077.wav pop
113
+ reggae /notebooks/data/audio/Data/genres_original/reggae/reggae.00064.wav reggae
114
+ rock /notebooks/data/audio/Data/genres_original/rock/rock.00045.wav rock
115
+ classical /notebooks/data/audio/Data/genres_original/classical/classical.00072.wav classical
116
+ country /notebooks/data/audio/Data/genres_original/country/country.00002.wav country
117
+ disco /notebooks/data/audio/Data/genres_original/disco/disco.00091.wav disco
118
+ blues /notebooks/data/audio/Data/genres_original/blues/blues.00074.wav blues
119
+ disco /notebooks/data/audio/Data/genres_original/disco/disco.00065.wav disco
120
+ classical /notebooks/data/audio/Data/genres_original/classical/classical.00017.wav classical
121
+ pop /notebooks/data/audio/Data/genres_original/pop/pop.00096.wav pop
122
+ hiphop /notebooks/data/audio/Data/genres_original/hiphop/hiphop.00010.wav hiphop
123
+ jazz /notebooks/data/audio/Data/genres_original/jazz/jazz.00088.wav jazz
124
+ country /notebooks/data/audio/Data/genres_original/country/country.00033.wav country
125
+ disco /notebooks/data/audio/Data/genres_original/disco/disco.00077.wav disco
126
+ blues /notebooks/data/audio/Data/genres_original/blues/blues.00090.wav blues
127
+ classical /notebooks/data/audio/Data/genres_original/classical/classical.00032.wav classical
128
+ country /notebooks/data/audio/Data/genres_original/country/country.00082.wav country
129
+ country /notebooks/data/audio/Data/genres_original/country/country.00009.wav country
130
+ classical /notebooks/data/audio/Data/genres_original/classical/classical.00054.wav classical
131
+ reggae /notebooks/data/audio/Data/genres_original/reggae/reggae.00068.wav reggae
132
+ disco /notebooks/data/audio/Data/genres_original/disco/disco.00087.wav disco
133
+ pop /notebooks/data/audio/Data/genres_original/pop/pop.00058.wav pop
134
+ metal /notebooks/data/audio/Data/genres_original/metal/metal.00063.wav metal
135
+ reggae /notebooks/data/audio/Data/genres_original/reggae/reggae.00012.wav reggae
136
+ jazz /notebooks/data/audio/Data/genres_original/jazz/jazz.00079.wav jazz
137
+ hiphop /notebooks/data/audio/Data/genres_original/hiphop/hiphop.00048.wav hiphop
138
+ pop /notebooks/data/audio/Data/genres_original/pop/pop.00005.wav pop
139
+ metal /notebooks/data/audio/Data/genres_original/metal/metal.00088.wav metal
140
+ hiphop /notebooks/data/audio/Data/genres_original/hiphop/hiphop.00007.wav hiphop
141
+ reggae /notebooks/data/audio/Data/genres_original/reggae/reggae.00093.wav reggae
142
+ rock /notebooks/data/audio/Data/genres_original/rock/rock.00011.wav rock
143
+ country /notebooks/data/audio/Data/genres_original/country/country.00058.wav country
144
+ country /notebooks/data/audio/Data/genres_original/country/country.00064.wav country
145
+ pop /notebooks/data/audio/Data/genres_original/pop/pop.00076.wav pop
146
+ rock /notebooks/data/audio/Data/genres_original/rock/rock.00090.wav rock
147
+ rock /notebooks/data/audio/Data/genres_original/rock/rock.00018.wav rock
148
+ jazz /notebooks/data/audio/Data/genres_original/jazz/jazz.00009.wav jazz
149
+ pop /notebooks/data/audio/Data/genres_original/pop/pop.00098.wav pop
150
+ disco /notebooks/data/audio/Data/genres_original/disco/disco.00067.wav disco
151
+ pop /notebooks/data/audio/Data/genres_original/pop/pop.00097.wav pop
152
+ country /notebooks/data/audio/Data/genres_original/country/country.00054.wav country
153
+ reggae /notebooks/data/audio/Data/genres_original/reggae/reggae.00085.wav reggae
154
+ pop /notebooks/data/audio/Data/genres_original/pop/pop.00045.wav pop
155
+ blues /notebooks/data/audio/Data/genres_original/blues/blues.00018.wav blues
156
+ classical /notebooks/data/audio/Data/genres_original/classical/classical.00004.wav classical
157
+ disco /notebooks/data/audio/Data/genres_original/disco/disco.00047.wav disco
158
+ jazz /notebooks/data/audio/Data/genres_original/jazz/jazz.00086.wav jazz
159
+ metal /notebooks/data/audio/Data/genres_original/metal/metal.00020.wav metal
160
+ country /notebooks/data/audio/Data/genres_original/country/country.00044.wav country
161
+ disco /notebooks/data/audio/Data/genres_original/disco/disco.00007.wav disco
162
+ hiphop /notebooks/data/audio/Data/genres_original/hiphop/hiphop.00042.wav hiphop
163
+ country /notebooks/data/audio/Data/genres_original/country/country.00040.wav country
164
+ reggae /notebooks/data/audio/Data/genres_original/reggae/reggae.00034.wav reggae
165
+ pop /notebooks/data/audio/Data/genres_original/pop/pop.00049.wav pop
166
+ country /notebooks/data/audio/Data/genres_original/country/country.00045.wav country
167
+ pop /notebooks/data/audio/Data/genres_original/pop/pop.00060.wav pop
168
+ rock /notebooks/data/audio/Data/genres_original/rock/rock.00049.wav rock
169
+ disco /notebooks/data/audio/Data/genres_original/disco/disco.00083.wav disco
170
+ blues /notebooks/data/audio/Data/genres_original/blues/blues.00001.wav blues
171
+ disco /notebooks/data/audio/Data/genres_original/disco/disco.00018.wav disco
172
+ classical /notebooks/data/audio/Data/genres_original/classical/classical.00075.wav classical
173
+ metal /notebooks/data/audio/Data/genres_original/metal/metal.00043.wav metal
174
+ hiphop /notebooks/data/audio/Data/genres_original/hiphop/hiphop.00009.wav hiphop
175
+ hiphop /notebooks/data/audio/Data/genres_original/hiphop/hiphop.00084.wav hiphop
176
+ metal /notebooks/data/audio/Data/genres_original/metal/metal.00096.wav metal
177
+ reggae /notebooks/data/audio/Data/genres_original/reggae/reggae.00039.wav reggae
178
+ rock /notebooks/data/audio/Data/genres_original/rock/rock.00021.wav rock
179
+ reggae /notebooks/data/audio/Data/genres_original/reggae/reggae.00075.wav reggae
180
+ classical /notebooks/data/audio/Data/genres_original/classical/classical.00077.wav classical
181
+ classical /notebooks/data/audio/Data/genres_original/classical/classical.00055.wav classical
182
+ jazz /notebooks/data/audio/Data/genres_original/jazz/jazz.00096.wav jazz
183
+ country /notebooks/data/audio/Data/genres_original/country/country.00062.wav country
184
+ blues /notebooks/data/audio/Data/genres_original/blues/blues.00023.wav blues
185
+ reggae /notebooks/data/audio/Data/genres_original/reggae/reggae.00002.wav reggae
186
+ disco /notebooks/data/audio/Data/genres_original/disco/disco.00069.wav disco
187
+ hiphop /notebooks/data/audio/Data/genres_original/hiphop/hiphop.00019.wav hiphop
188
+ pop /notebooks/data/audio/Data/genres_original/pop/pop.00014.wav pop
189
+ jazz /notebooks/data/audio/Data/genres_original/jazz/jazz.00077.wav jazz
190
+ reggae /notebooks/data/audio/Data/genres_original/reggae/reggae.00089.wav reggae
191
+ rock /notebooks/data/audio/Data/genres_original/rock/rock.00002.wav rock
192
+ classical /notebooks/data/audio/Data/genres_original/classical/classical.00065.wav classical
193
+ hiphop /notebooks/data/audio/Data/genres_original/hiphop/hiphop.00050.wav hiphop
194
+ jazz /notebooks/data/audio/Data/genres_original/jazz/jazz.00030.wav jazz
195
+ rock /notebooks/data/audio/Data/genres_original/rock/rock.00072.wav rock
196
+ metal /notebooks/data/audio/Data/genres_original/metal/metal.00054.wav metal
197
+ disco /notebooks/data/audio/Data/genres_original/disco/disco.00035.wav disco
198
+ classical /notebooks/data/audio/Data/genres_original/classical/classical.00090.wav classical
199
+ jazz /notebooks/data/audio/Data/genres_original/jazz/jazz.00052.wav jazz
200
+ classical /notebooks/data/audio/Data/genres_original/classical/classical.00002.wav classical
201
+ blues /notebooks/data/audio/Data/genres_original/blues/blues.00060.wav blues
train_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 20.0,
3
+ "train_runtime": 7059.2772,
4
+ "train_samples": 799,
5
+ "train_samples_per_second": 2.264,
6
+ "train_steps_per_second": 1.13
7
+ }
trainer_state.json ADDED
@@ -0,0 +1,249 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 19.99874843554443,
5
+ "global_step": 7980,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 1.25,
12
+ "learning_rate": 2.495e-05,
13
+ "loss": 2.2002,
14
+ "step": 500
15
+ },
16
+ {
17
+ "epoch": 1.25,
18
+ "eval_accuracy": 0.3199999928474426,
19
+ "eval_loss": 1.9293218851089478,
20
+ "eval_runtime": 130.8072,
21
+ "eval_samples_per_second": 1.529,
22
+ "eval_steps_per_second": 1.529,
23
+ "step": 500
24
+ },
25
+ {
26
+ "epoch": 2.51,
27
+ "learning_rate": 4.99e-05,
28
+ "loss": 1.7914,
29
+ "step": 1000
30
+ },
31
+ {
32
+ "epoch": 2.51,
33
+ "eval_accuracy": 0.4699999988079071,
34
+ "eval_loss": 1.528580904006958,
35
+ "eval_runtime": 135.4283,
36
+ "eval_samples_per_second": 1.477,
37
+ "eval_steps_per_second": 1.477,
38
+ "step": 1000
39
+ },
40
+ {
41
+ "epoch": 3.76,
42
+ "learning_rate": 7.475000000000001e-05,
43
+ "loss": 1.6111,
44
+ "step": 1500
45
+ },
46
+ {
47
+ "epoch": 3.76,
48
+ "eval_accuracy": 0.3050000071525574,
49
+ "eval_loss": 2.1590416431427,
50
+ "eval_runtime": 133.2351,
51
+ "eval_samples_per_second": 1.501,
52
+ "eval_steps_per_second": 1.501,
53
+ "step": 1500
54
+ },
55
+ {
56
+ "epoch": 5.01,
57
+ "learning_rate": 9.975000000000001e-05,
58
+ "loss": 1.4597,
59
+ "step": 2000
60
+ },
61
+ {
62
+ "epoch": 5.01,
63
+ "eval_accuracy": 0.46000000834465027,
64
+ "eval_loss": 1.557754635810852,
65
+ "eval_runtime": 131.9409,
66
+ "eval_samples_per_second": 1.516,
67
+ "eval_steps_per_second": 1.516,
68
+ "step": 2000
69
+ },
70
+ {
71
+ "epoch": 6.27,
72
+ "learning_rate": 3.7578814627994955e-05,
73
+ "loss": 1.284,
74
+ "step": 2500
75
+ },
76
+ {
77
+ "epoch": 6.27,
78
+ "eval_accuracy": 0.6650000214576721,
79
+ "eval_loss": 1.078925609588623,
80
+ "eval_runtime": 130.6559,
81
+ "eval_samples_per_second": 1.531,
82
+ "eval_steps_per_second": 1.531,
83
+ "step": 2500
84
+ },
85
+ {
86
+ "epoch": 7.52,
87
+ "learning_rate": 8.336120401337793e-05,
88
+ "loss": 1.1617,
89
+ "step": 3000
90
+ },
91
+ {
92
+ "epoch": 7.52,
93
+ "eval_accuracy": 0.6050000190734863,
94
+ "eval_loss": 1.660351276397705,
95
+ "eval_runtime": 133.1101,
96
+ "eval_samples_per_second": 1.503,
97
+ "eval_steps_per_second": 1.503,
98
+ "step": 3000
99
+ },
100
+ {
101
+ "epoch": 8.77,
102
+ "learning_rate": 7.501672240802676e-05,
103
+ "loss": 1.0521,
104
+ "step": 3500
105
+ },
106
+ {
107
+ "epoch": 8.77,
108
+ "eval_accuracy": 0.699999988079071,
109
+ "eval_loss": 1.3572155237197876,
110
+ "eval_runtime": 132.8422,
111
+ "eval_samples_per_second": 1.506,
112
+ "eval_steps_per_second": 1.506,
113
+ "step": 3500
114
+ },
115
+ {
116
+ "epoch": 10.03,
117
+ "learning_rate": 6.665551839464883e-05,
118
+ "loss": 0.928,
119
+ "step": 4000
120
+ },
121
+ {
122
+ "epoch": 10.03,
123
+ "eval_accuracy": 0.7200000286102295,
124
+ "eval_loss": 1.5309524536132812,
125
+ "eval_runtime": 133.7214,
126
+ "eval_samples_per_second": 1.496,
127
+ "eval_steps_per_second": 1.496,
128
+ "step": 4000
129
+ },
130
+ {
131
+ "epoch": 11.28,
132
+ "learning_rate": 5.829431438127091e-05,
133
+ "loss": 0.7632,
134
+ "step": 4500
135
+ },
136
+ {
137
+ "epoch": 11.28,
138
+ "eval_accuracy": 0.7749999761581421,
139
+ "eval_loss": 1.2670022249221802,
140
+ "eval_runtime": 132.539,
141
+ "eval_samples_per_second": 1.509,
142
+ "eval_steps_per_second": 1.509,
143
+ "step": 4500
144
+ },
145
+ {
146
+ "epoch": 12.53,
147
+ "learning_rate": 4.993311036789298e-05,
148
+ "loss": 0.6718,
149
+ "step": 5000
150
+ },
151
+ {
152
+ "epoch": 12.53,
153
+ "eval_accuracy": 0.7900000214576721,
154
+ "eval_loss": 1.2351398468017578,
155
+ "eval_runtime": 132.8123,
156
+ "eval_samples_per_second": 1.506,
157
+ "eval_steps_per_second": 1.506,
158
+ "step": 5000
159
+ },
160
+ {
161
+ "epoch": 13.78,
162
+ "learning_rate": 4.157190635451505e-05,
163
+ "loss": 0.4634,
164
+ "step": 5500
165
+ },
166
+ {
167
+ "epoch": 13.78,
168
+ "eval_accuracy": 0.75,
169
+ "eval_loss": 1.7186837196350098,
170
+ "eval_runtime": 131.5974,
171
+ "eval_samples_per_second": 1.52,
172
+ "eval_steps_per_second": 1.52,
173
+ "step": 5500
174
+ },
175
+ {
176
+ "epoch": 15.04,
177
+ "learning_rate": 3.322742474916388e-05,
178
+ "loss": 0.373,
179
+ "step": 6000
180
+ },
181
+ {
182
+ "epoch": 15.04,
183
+ "eval_accuracy": 0.7400000095367432,
184
+ "eval_loss": 1.6782505512237549,
185
+ "eval_runtime": 130.9999,
186
+ "eval_samples_per_second": 1.527,
187
+ "eval_steps_per_second": 1.527,
188
+ "step": 6000
189
+ },
190
+ {
191
+ "epoch": 16.29,
192
+ "learning_rate": 2.4866220735785955e-05,
193
+ "loss": 0.349,
194
+ "step": 6500
195
+ },
196
+ {
197
+ "epoch": 16.29,
198
+ "eval_accuracy": 0.7900000214576721,
199
+ "eval_loss": 1.2969427108764648,
200
+ "eval_runtime": 131.4011,
201
+ "eval_samples_per_second": 1.522,
202
+ "eval_steps_per_second": 1.522,
203
+ "step": 6500
204
+ },
205
+ {
206
+ "epoch": 17.54,
207
+ "learning_rate": 1.650501672240803e-05,
208
+ "loss": 0.2225,
209
+ "step": 7000
210
+ },
211
+ {
212
+ "epoch": 17.54,
213
+ "eval_accuracy": 0.7799999713897705,
214
+ "eval_loss": 1.6187431812286377,
215
+ "eval_runtime": 130.156,
216
+ "eval_samples_per_second": 1.537,
217
+ "eval_steps_per_second": 1.537,
218
+ "step": 7000
219
+ },
220
+ {
221
+ "epoch": 18.8,
222
+ "learning_rate": 8.1438127090301e-06,
223
+ "loss": 0.2156,
224
+ "step": 7500
225
+ },
226
+ {
227
+ "epoch": 18.8,
228
+ "eval_accuracy": 0.7850000262260437,
229
+ "eval_loss": 1.5765472650527954,
230
+ "eval_runtime": 130.2954,
231
+ "eval_samples_per_second": 1.535,
232
+ "eval_steps_per_second": 1.535,
233
+ "step": 7500
234
+ },
235
+ {
236
+ "epoch": 20.0,
237
+ "step": 7980,
238
+ "total_flos": 4.373813712366772e+18,
239
+ "train_runtime": 7059.2772,
240
+ "train_samples_per_second": 2.264,
241
+ "train_steps_per_second": 1.13
242
+ }
243
+ ],
244
+ "max_steps": 7980,
245
+ "num_train_epochs": 20,
246
+ "total_flos": 4.373813712366772e+18,
247
+ "trial_name": null,
248
+ "trial_params": null
249
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e003fe25d5cb12a003c986f9c213207190a0dc778c0cda5e10cc2c0a5575b2a
3
+ size 2479