Marcos12886
commited on
Commit
•
5c2a1eb
1
Parent(s):
ee61ac2
Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- README.md +17 -20
- checkpoint-170/config.json +80 -0
- checkpoint-170/model.safetensors +3 -0
- checkpoint-170/optimizer.pt +3 -0
- checkpoint-170/preprocessor_config.json +9 -0
- checkpoint-170/rng_state.pth +3 -0
- checkpoint-170/scheduler.pt +3 -0
- checkpoint-170/trainer_state.json +66 -0
- checkpoint-170/training_args.bin +3 -0
- checkpoint-255/config.json +80 -0
- checkpoint-255/model.safetensors +3 -0
- checkpoint-255/optimizer.pt +3 -0
- checkpoint-255/preprocessor_config.json +9 -0
- checkpoint-255/rng_state.pth +3 -0
- checkpoint-255/scheduler.pt +3 -0
- checkpoint-255/trainer_state.json +78 -0
- checkpoint-255/training_args.bin +3 -0
- checkpoint-256/config.json +80 -0
- checkpoint-256/model.safetensors +3 -0
- checkpoint-256/optimizer.pt +3 -0
- checkpoint-256/preprocessor_config.json +9 -0
- checkpoint-256/rng_state.pth +3 -0
- checkpoint-256/scheduler.pt +3 -0
- checkpoint-256/trainer_state.json +78 -0
- checkpoint-256/training_args.bin +3 -0
- checkpoint-341/config.json +80 -0
- checkpoint-341/model.safetensors +3 -0
- checkpoint-341/optimizer.pt +3 -0
- checkpoint-341/preprocessor_config.json +9 -0
- checkpoint-341/rng_state.pth +3 -0
- checkpoint-341/scheduler.pt +3 -0
- checkpoint-341/trainer_state.json +90 -0
- checkpoint-341/training_args.bin +3 -0
- checkpoint-426/config.json +80 -0
- checkpoint-426/model.safetensors +3 -0
- checkpoint-426/optimizer.pt +3 -0
- checkpoint-426/preprocessor_config.json +9 -0
- checkpoint-426/rng_state.pth +3 -0
- checkpoint-426/scheduler.pt +3 -0
- checkpoint-426/trainer_state.json +102 -0
- checkpoint-426/training_args.bin +3 -0
- checkpoint-512/config.json +80 -0
- checkpoint-512/model.safetensors +3 -0
- checkpoint-512/optimizer.pt +3 -0
- checkpoint-512/preprocessor_config.json +9 -0
- checkpoint-512/rng_state.pth +3 -0
- checkpoint-512/scheduler.pt +3 -0
- checkpoint-512/trainer_state.json +121 -0
- checkpoint-512/training_args.bin +3 -0
- checkpoint-595/config.json +80 -0
README.md
CHANGED
@@ -26,16 +26,16 @@ model-index:
|
|
26 |
metrics:
|
27 |
- name: Accuracy
|
28 |
type: accuracy
|
29 |
-
value: 0.
|
30 |
- name: F1
|
31 |
type: f1
|
32 |
-
value: 0.
|
33 |
- name: Precision
|
34 |
type: precision
|
35 |
-
value: 0.
|
36 |
- name: Recall
|
37 |
type: recall
|
38 |
-
value: 0.
|
39 |
---
|
40 |
|
41 |
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
@@ -45,11 +45,11 @@ should probably proofread and complete it, then remove this comment. -->
|
|
45 |
|
46 |
This model is a fine-tuned version of [ntu-spml/distilhubert](https://huggingface.co/ntu-spml/distilhubert) on the audiofolder dataset.
|
47 |
It achieves the following results on the evaluation set:
|
48 |
-
- Loss: 0.
|
49 |
-
- Accuracy: 0.
|
50 |
-
- F1: 0.
|
51 |
-
- Precision: 0.
|
52 |
-
- Recall: 0.
|
53 |
|
54 |
## Model description
|
55 |
|
@@ -77,22 +77,19 @@ The following hyperparameters were used during training:
|
|
77 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
78 |
- lr_scheduler_type: cosine
|
79 |
- lr_scheduler_warmup_ratio: 0.001
|
80 |
-
- num_epochs:
|
81 |
|
82 |
### Training results
|
83 |
|
84 |
| Training Loss | Epoch | Step | Validation Loss | Accuracy | F1 | Precision | Recall |
|
85 |
|:-------------:|:------:|:----:|:---------------:|:--------:|:------:|:---------:|:------:|
|
86 |
-
| No log | 0.
|
87 |
-
| No log | 1.
|
88 |
-
| No log | 2.
|
89 |
-
| No log |
|
90 |
-
| No log | 4.
|
91 |
-
|
|
92 |
-
|
|
93 |
-
| No log | 8.0 | 102 | 0.0318 | 0.9951 | 0.9951 | 0.9951 | 0.9951 |
|
94 |
-
| No log | 8.9412 | 114 | 0.0331 | 0.9853 | 0.9853 | 0.9854 | 0.9853 |
|
95 |
-
| No log | 9.4118 | 120 | 0.0332 | 0.9853 | 0.9853 | 0.9854 | 0.9853 |
|
96 |
|
97 |
|
98 |
### Framework versions
|
|
|
26 |
metrics:
|
27 |
- name: Accuracy
|
28 |
type: accuracy
|
29 |
+
value: 0.991941391941392
|
30 |
- name: F1
|
31 |
type: f1
|
32 |
+
value: 0.9919569277165429
|
33 |
- name: Precision
|
34 |
type: precision
|
35 |
+
value: 0.9920048531706146
|
36 |
- name: Recall
|
37 |
type: recall
|
38 |
+
value: 0.991941391941392
|
39 |
---
|
40 |
|
41 |
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
|
|
45 |
|
46 |
This model is a fine-tuned version of [ntu-spml/distilhubert](https://huggingface.co/ntu-spml/distilhubert) on the audiofolder dataset.
|
47 |
It achieves the following results on the evaluation set:
|
48 |
+
- Loss: 0.0408
|
49 |
+
- Accuracy: 0.9919
|
50 |
+
- F1: 0.9920
|
51 |
+
- Precision: 0.9920
|
52 |
+
- Recall: 0.9919
|
53 |
|
54 |
## Model description
|
55 |
|
|
|
77 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
78 |
- lr_scheduler_type: cosine
|
79 |
- lr_scheduler_warmup_ratio: 0.001
|
80 |
+
- num_epochs: 7
|
81 |
|
82 |
### Training results
|
83 |
|
84 |
| Training Loss | Epoch | Step | Validation Loss | Accuracy | F1 | Precision | Recall |
|
85 |
|:-------------:|:------:|:----:|:---------------:|:--------:|:------:|:---------:|:------:|
|
86 |
+
| No log | 0.9956 | 85 | 0.0736 | 0.9788 | 0.9788 | 0.9790 | 0.9788 |
|
87 |
+
| No log | 1.9912 | 170 | 0.0680 | 0.9758 | 0.9760 | 0.9770 | 0.9758 |
|
88 |
+
| No log | 2.9985 | 256 | 0.0447 | 0.9875 | 0.9876 | 0.9876 | 0.9875 |
|
89 |
+
| No log | 3.9941 | 341 | 0.0452 | 0.9905 | 0.9905 | 0.9905 | 0.9905 |
|
90 |
+
| No log | 4.9898 | 426 | 0.0439 | 0.9919 | 0.9920 | 0.9920 | 0.9919 |
|
91 |
+
| 0.053 | 5.9971 | 512 | 0.0401 | 0.9919 | 0.9920 | 0.9920 | 0.9919 |
|
92 |
+
| 0.053 | 6.9693 | 595 | 0.0408 | 0.9919 | 0.9920 | 0.9920 | 0.9919 |
|
|
|
|
|
|
|
93 |
|
94 |
|
95 |
### Framework versions
|
checkpoint-170/config.json
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "ntu-spml/distilhubert",
|
3 |
+
"activation_dropout": 0.1,
|
4 |
+
"apply_spec_augment": false,
|
5 |
+
"architectures": [
|
6 |
+
"HubertForSequenceClassification"
|
7 |
+
],
|
8 |
+
"attention_dropout": 0.1,
|
9 |
+
"bos_token_id": 1,
|
10 |
+
"classifier_proj_size": 256,
|
11 |
+
"conv_bias": false,
|
12 |
+
"conv_dim": [
|
13 |
+
512,
|
14 |
+
512,
|
15 |
+
512,
|
16 |
+
512,
|
17 |
+
512,
|
18 |
+
512,
|
19 |
+
512
|
20 |
+
],
|
21 |
+
"conv_kernel": [
|
22 |
+
10,
|
23 |
+
3,
|
24 |
+
3,
|
25 |
+
3,
|
26 |
+
3,
|
27 |
+
2,
|
28 |
+
2
|
29 |
+
],
|
30 |
+
"conv_stride": [
|
31 |
+
5,
|
32 |
+
2,
|
33 |
+
2,
|
34 |
+
2,
|
35 |
+
2,
|
36 |
+
2,
|
37 |
+
2
|
38 |
+
],
|
39 |
+
"ctc_loss_reduction": "sum",
|
40 |
+
"ctc_zero_infinity": false,
|
41 |
+
"do_stable_layer_norm": false,
|
42 |
+
"eos_token_id": 2,
|
43 |
+
"feat_extract_activation": "gelu",
|
44 |
+
"feat_extract_norm": "group",
|
45 |
+
"feat_proj_dropout": 0.0,
|
46 |
+
"feat_proj_layer_norm": false,
|
47 |
+
"final_dropout": 0.0,
|
48 |
+
"hidden_act": "gelu",
|
49 |
+
"hidden_dropout": 0.1,
|
50 |
+
"hidden_size": 768,
|
51 |
+
"id2label": {
|
52 |
+
"0": "crying",
|
53 |
+
"1": "no_crying"
|
54 |
+
},
|
55 |
+
"initializer_range": 0.02,
|
56 |
+
"intermediate_size": 3072,
|
57 |
+
"label2id": {
|
58 |
+
"crying": "0",
|
59 |
+
"no_crying": "1"
|
60 |
+
},
|
61 |
+
"layer_norm_eps": 1e-05,
|
62 |
+
"layerdrop": 0.0,
|
63 |
+
"mask_feature_length": 10,
|
64 |
+
"mask_feature_min_masks": 0,
|
65 |
+
"mask_feature_prob": 0.0,
|
66 |
+
"mask_time_length": 10,
|
67 |
+
"mask_time_min_masks": 2,
|
68 |
+
"mask_time_prob": 0.05,
|
69 |
+
"model_type": "hubert",
|
70 |
+
"num_attention_heads": 12,
|
71 |
+
"num_conv_pos_embedding_groups": 16,
|
72 |
+
"num_conv_pos_embeddings": 128,
|
73 |
+
"num_feat_extract_layers": 7,
|
74 |
+
"num_hidden_layers": 2,
|
75 |
+
"pad_token_id": 0,
|
76 |
+
"torch_dtype": "float32",
|
77 |
+
"transformers_version": "4.44.2",
|
78 |
+
"use_weighted_layer_sum": false,
|
79 |
+
"vocab_size": 32
|
80 |
+
}
|
checkpoint-170/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:14ad2529c107706df5e38d4e839c5fe54bbd3638840f1b016ba56f9e40cfd45a
|
3 |
+
size 94763496
|
checkpoint-170/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:feca43a53b1ea22e785cf6aebc67731d4d759491623447479b488904538b9c5d
|
3 |
+
size 189552570
|
checkpoint-170/preprocessor_config.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"do_normalize": false,
|
3 |
+
"feature_extractor_type": "Wav2Vec2FeatureExtractor",
|
4 |
+
"feature_size": 1,
|
5 |
+
"padding_side": "right",
|
6 |
+
"padding_value": 0,
|
7 |
+
"return_attention_mask": false,
|
8 |
+
"sampling_rate": 16000
|
9 |
+
}
|
checkpoint-170/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:309f3425e8cb713e30160da01fdcf088ad760dfa7761c7bd73a907a4115b0ee0
|
3 |
+
size 14308
|
checkpoint-170/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9e9632dd1819b93de48e3bbe467fe0d96e895179dd60318df68d501d078c6ed9
|
3 |
+
size 1064
|
checkpoint-170/trainer_state.json
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.9787545787545787,
|
3 |
+
"best_model_checkpoint": "distilhubert-finetuned-cry-detector/checkpoint-85",
|
4 |
+
"epoch": 1.9912152269399708,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 170,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.9956076134699854,
|
13 |
+
"eval_accuracy": 0.9787545787545787,
|
14 |
+
"eval_f1": 0.9788275754377449,
|
15 |
+
"eval_loss": 0.07363971322774887,
|
16 |
+
"eval_precision": 0.9790028856592324,
|
17 |
+
"eval_recall": 0.9787545787545787,
|
18 |
+
"eval_runtime": 4.1234,
|
19 |
+
"eval_samples_per_second": 331.041,
|
20 |
+
"eval_steps_per_second": 41.471,
|
21 |
+
"step": 85
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"epoch": 1.9912152269399708,
|
25 |
+
"eval_accuracy": 0.9758241758241758,
|
26 |
+
"eval_f1": 0.9760458977669253,
|
27 |
+
"eval_loss": 0.0680176243185997,
|
28 |
+
"eval_precision": 0.9770364666252971,
|
29 |
+
"eval_recall": 0.9758241758241758,
|
30 |
+
"eval_runtime": 3.9805,
|
31 |
+
"eval_samples_per_second": 342.924,
|
32 |
+
"eval_steps_per_second": 42.96,
|
33 |
+
"step": 170
|
34 |
+
}
|
35 |
+
],
|
36 |
+
"logging_steps": 500,
|
37 |
+
"max_steps": 595,
|
38 |
+
"num_input_tokens_seen": 0,
|
39 |
+
"num_train_epochs": 7,
|
40 |
+
"save_steps": 500,
|
41 |
+
"stateful_callbacks": {
|
42 |
+
"EarlyStoppingCallback": {
|
43 |
+
"args": {
|
44 |
+
"early_stopping_patience": 3,
|
45 |
+
"early_stopping_threshold": 0.0
|
46 |
+
},
|
47 |
+
"attributes": {
|
48 |
+
"early_stopping_patience_counter": 0
|
49 |
+
}
|
50 |
+
},
|
51 |
+
"TrainerControl": {
|
52 |
+
"args": {
|
53 |
+
"should_epoch_stop": false,
|
54 |
+
"should_evaluate": false,
|
55 |
+
"should_log": false,
|
56 |
+
"should_save": true,
|
57 |
+
"should_training_stop": false
|
58 |
+
},
|
59 |
+
"attributes": {}
|
60 |
+
}
|
61 |
+
},
|
62 |
+
"total_flos": 2.483401519872e+16,
|
63 |
+
"train_batch_size": 8,
|
64 |
+
"trial_name": null,
|
65 |
+
"trial_params": null
|
66 |
+
}
|
checkpoint-170/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:caa3df7f2810faa9c35a46d178b12337edb0b134aa27dc1689b302d9bcc9d3d0
|
3 |
+
size 5240
|
checkpoint-255/config.json
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "ntu-spml/distilhubert",
|
3 |
+
"activation_dropout": 0.1,
|
4 |
+
"apply_spec_augment": false,
|
5 |
+
"architectures": [
|
6 |
+
"HubertForSequenceClassification"
|
7 |
+
],
|
8 |
+
"attention_dropout": 0.1,
|
9 |
+
"bos_token_id": 1,
|
10 |
+
"classifier_proj_size": 256,
|
11 |
+
"conv_bias": false,
|
12 |
+
"conv_dim": [
|
13 |
+
512,
|
14 |
+
512,
|
15 |
+
512,
|
16 |
+
512,
|
17 |
+
512,
|
18 |
+
512,
|
19 |
+
512
|
20 |
+
],
|
21 |
+
"conv_kernel": [
|
22 |
+
10,
|
23 |
+
3,
|
24 |
+
3,
|
25 |
+
3,
|
26 |
+
3,
|
27 |
+
2,
|
28 |
+
2
|
29 |
+
],
|
30 |
+
"conv_stride": [
|
31 |
+
5,
|
32 |
+
2,
|
33 |
+
2,
|
34 |
+
2,
|
35 |
+
2,
|
36 |
+
2,
|
37 |
+
2
|
38 |
+
],
|
39 |
+
"ctc_loss_reduction": "sum",
|
40 |
+
"ctc_zero_infinity": false,
|
41 |
+
"do_stable_layer_norm": false,
|
42 |
+
"eos_token_id": 2,
|
43 |
+
"feat_extract_activation": "gelu",
|
44 |
+
"feat_extract_norm": "group",
|
45 |
+
"feat_proj_dropout": 0.0,
|
46 |
+
"feat_proj_layer_norm": false,
|
47 |
+
"final_dropout": 0.0,
|
48 |
+
"hidden_act": "gelu",
|
49 |
+
"hidden_dropout": 0.1,
|
50 |
+
"hidden_size": 768,
|
51 |
+
"id2label": {
|
52 |
+
"0": "crying",
|
53 |
+
"1": "no_crying"
|
54 |
+
},
|
55 |
+
"initializer_range": 0.02,
|
56 |
+
"intermediate_size": 3072,
|
57 |
+
"label2id": {
|
58 |
+
"crying": "0",
|
59 |
+
"no_crying": "1"
|
60 |
+
},
|
61 |
+
"layer_norm_eps": 1e-05,
|
62 |
+
"layerdrop": 0.0,
|
63 |
+
"mask_feature_length": 10,
|
64 |
+
"mask_feature_min_masks": 0,
|
65 |
+
"mask_feature_prob": 0.0,
|
66 |
+
"mask_time_length": 10,
|
67 |
+
"mask_time_min_masks": 2,
|
68 |
+
"mask_time_prob": 0.05,
|
69 |
+
"model_type": "hubert",
|
70 |
+
"num_attention_heads": 12,
|
71 |
+
"num_conv_pos_embedding_groups": 16,
|
72 |
+
"num_conv_pos_embeddings": 128,
|
73 |
+
"num_feat_extract_layers": 7,
|
74 |
+
"num_hidden_layers": 2,
|
75 |
+
"pad_token_id": 0,
|
76 |
+
"torch_dtype": "float32",
|
77 |
+
"transformers_version": "4.44.2",
|
78 |
+
"use_weighted_layer_sum": false,
|
79 |
+
"vocab_size": 32
|
80 |
+
}
|
checkpoint-255/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9f4d1bcece15f285a46eb855f203bd43924522bb36fe48d2d6c26990580d046d
|
3 |
+
size 94763496
|
checkpoint-255/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:55c75ae0d01ca9c0fa93dadeceec95f962b2525037b69dc1a327f1b6c3c0ad59
|
3 |
+
size 189552570
|
checkpoint-255/preprocessor_config.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"do_normalize": false,
|
3 |
+
"feature_extractor_type": "Wav2Vec2FeatureExtractor",
|
4 |
+
"feature_size": 1,
|
5 |
+
"padding_side": "right",
|
6 |
+
"padding_value": 0,
|
7 |
+
"return_attention_mask": false,
|
8 |
+
"sampling_rate": 16000
|
9 |
+
}
|
checkpoint-255/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:32bb7f9c3cdfdb4246f58a8319fd0b86a841985e3d321ecf44d228390d31634d
|
3 |
+
size 14308
|
checkpoint-255/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7859ab68e5739b704ca9ce13610ef11b9b1125824d2905d10d4a9ea15d8739ea
|
3 |
+
size 1064
|
checkpoint-255/trainer_state.json
ADDED
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.989010989010989,
|
3 |
+
"best_model_checkpoint": "distilhubert-finetuned-cry-detector/checkpoint-255",
|
4 |
+
"epoch": 2.986822840409956,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 255,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.9956076134699854,
|
13 |
+
"eval_accuracy": 0.9758241758241758,
|
14 |
+
"eval_f1": 0.9759605689530879,
|
15 |
+
"eval_loss": 0.07685838639736176,
|
16 |
+
"eval_precision": 0.9763775351444481,
|
17 |
+
"eval_recall": 0.9758241758241758,
|
18 |
+
"eval_runtime": 3.9273,
|
19 |
+
"eval_samples_per_second": 347.571,
|
20 |
+
"eval_steps_per_second": 43.542,
|
21 |
+
"step": 85
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"epoch": 1.9912152269399708,
|
25 |
+
"eval_accuracy": 0.9875457875457876,
|
26 |
+
"eval_f1": 0.9875697973801115,
|
27 |
+
"eval_loss": 0.04443557932972908,
|
28 |
+
"eval_precision": 0.9876259880187351,
|
29 |
+
"eval_recall": 0.9875457875457876,
|
30 |
+
"eval_runtime": 3.9441,
|
31 |
+
"eval_samples_per_second": 346.088,
|
32 |
+
"eval_steps_per_second": 43.356,
|
33 |
+
"step": 170
|
34 |
+
},
|
35 |
+
{
|
36 |
+
"epoch": 2.986822840409956,
|
37 |
+
"eval_accuracy": 0.989010989010989,
|
38 |
+
"eval_f1": 0.9890405015532383,
|
39 |
+
"eval_loss": 0.03978995233774185,
|
40 |
+
"eval_precision": 0.9891330367917903,
|
41 |
+
"eval_recall": 0.989010989010989,
|
42 |
+
"eval_runtime": 3.9868,
|
43 |
+
"eval_samples_per_second": 342.378,
|
44 |
+
"eval_steps_per_second": 42.891,
|
45 |
+
"step": 255
|
46 |
+
}
|
47 |
+
],
|
48 |
+
"logging_steps": 500,
|
49 |
+
"max_steps": 255,
|
50 |
+
"num_input_tokens_seen": 0,
|
51 |
+
"num_train_epochs": 3,
|
52 |
+
"save_steps": 500,
|
53 |
+
"stateful_callbacks": {
|
54 |
+
"EarlyStoppingCallback": {
|
55 |
+
"args": {
|
56 |
+
"early_stopping_patience": 3,
|
57 |
+
"early_stopping_threshold": 0.0
|
58 |
+
},
|
59 |
+
"attributes": {
|
60 |
+
"early_stopping_patience_counter": 0
|
61 |
+
}
|
62 |
+
},
|
63 |
+
"TrainerControl": {
|
64 |
+
"args": {
|
65 |
+
"should_epoch_stop": false,
|
66 |
+
"should_evaluate": false,
|
67 |
+
"should_log": false,
|
68 |
+
"should_save": true,
|
69 |
+
"should_training_stop": true
|
70 |
+
},
|
71 |
+
"attributes": {}
|
72 |
+
}
|
73 |
+
},
|
74 |
+
"total_flos": 3.7096378747392e+16,
|
75 |
+
"train_batch_size": 8,
|
76 |
+
"trial_name": null,
|
77 |
+
"trial_params": null
|
78 |
+
}
|
checkpoint-255/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:37edd9986d3659d5cfeb9aa8cfbe6429149a6cd74482bcfbbbae5877c8eae7df
|
3 |
+
size 5240
|
checkpoint-256/config.json
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "ntu-spml/distilhubert",
|
3 |
+
"activation_dropout": 0.1,
|
4 |
+
"apply_spec_augment": false,
|
5 |
+
"architectures": [
|
6 |
+
"HubertForSequenceClassification"
|
7 |
+
],
|
8 |
+
"attention_dropout": 0.1,
|
9 |
+
"bos_token_id": 1,
|
10 |
+
"classifier_proj_size": 256,
|
11 |
+
"conv_bias": false,
|
12 |
+
"conv_dim": [
|
13 |
+
512,
|
14 |
+
512,
|
15 |
+
512,
|
16 |
+
512,
|
17 |
+
512,
|
18 |
+
512,
|
19 |
+
512
|
20 |
+
],
|
21 |
+
"conv_kernel": [
|
22 |
+
10,
|
23 |
+
3,
|
24 |
+
3,
|
25 |
+
3,
|
26 |
+
3,
|
27 |
+
2,
|
28 |
+
2
|
29 |
+
],
|
30 |
+
"conv_stride": [
|
31 |
+
5,
|
32 |
+
2,
|
33 |
+
2,
|
34 |
+
2,
|
35 |
+
2,
|
36 |
+
2,
|
37 |
+
2
|
38 |
+
],
|
39 |
+
"ctc_loss_reduction": "sum",
|
40 |
+
"ctc_zero_infinity": false,
|
41 |
+
"do_stable_layer_norm": false,
|
42 |
+
"eos_token_id": 2,
|
43 |
+
"feat_extract_activation": "gelu",
|
44 |
+
"feat_extract_norm": "group",
|
45 |
+
"feat_proj_dropout": 0.0,
|
46 |
+
"feat_proj_layer_norm": false,
|
47 |
+
"final_dropout": 0.0,
|
48 |
+
"hidden_act": "gelu",
|
49 |
+
"hidden_dropout": 0.1,
|
50 |
+
"hidden_size": 768,
|
51 |
+
"id2label": {
|
52 |
+
"0": "crying",
|
53 |
+
"1": "no_crying"
|
54 |
+
},
|
55 |
+
"initializer_range": 0.02,
|
56 |
+
"intermediate_size": 3072,
|
57 |
+
"label2id": {
|
58 |
+
"crying": "0",
|
59 |
+
"no_crying": "1"
|
60 |
+
},
|
61 |
+
"layer_norm_eps": 1e-05,
|
62 |
+
"layerdrop": 0.0,
|
63 |
+
"mask_feature_length": 10,
|
64 |
+
"mask_feature_min_masks": 0,
|
65 |
+
"mask_feature_prob": 0.0,
|
66 |
+
"mask_time_length": 10,
|
67 |
+
"mask_time_min_masks": 2,
|
68 |
+
"mask_time_prob": 0.05,
|
69 |
+
"model_type": "hubert",
|
70 |
+
"num_attention_heads": 12,
|
71 |
+
"num_conv_pos_embedding_groups": 16,
|
72 |
+
"num_conv_pos_embeddings": 128,
|
73 |
+
"num_feat_extract_layers": 7,
|
74 |
+
"num_hidden_layers": 2,
|
75 |
+
"pad_token_id": 0,
|
76 |
+
"torch_dtype": "float32",
|
77 |
+
"transformers_version": "4.44.2",
|
78 |
+
"use_weighted_layer_sum": false,
|
79 |
+
"vocab_size": 32
|
80 |
+
}
|
checkpoint-256/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a88f787681237fee33b288f12593aacb934387bd6da2bb8ff790e9b5a05f9556
|
3 |
+
size 94763496
|
checkpoint-256/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:346b3d592afd4162ca6d18904ccc574e4d170d2fe28c5c444a687f9850fd23b6
|
3 |
+
size 189552570
|
checkpoint-256/preprocessor_config.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"do_normalize": false,
|
3 |
+
"feature_extractor_type": "Wav2Vec2FeatureExtractor",
|
4 |
+
"feature_size": 1,
|
5 |
+
"padding_side": "right",
|
6 |
+
"padding_value": 0,
|
7 |
+
"return_attention_mask": false,
|
8 |
+
"sampling_rate": 16000
|
9 |
+
}
|
checkpoint-256/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:681b1a6a4f0a991d47831ff66346aef2bdf16d4f5cc0b1dfbedc7d47e4c328a1
|
3 |
+
size 14308
|
checkpoint-256/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:93dfa4ba2e0507f7a6d68626387a161a1bbe6b94f9a9daf967637af5f5748293
|
3 |
+
size 1064
|
checkpoint-256/trainer_state.json
ADDED
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.9875457875457876,
|
3 |
+
"best_model_checkpoint": "distilhubert-finetuned-cry-detector/checkpoint-256",
|
4 |
+
"epoch": 2.998535871156662,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 256,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.9956076134699854,
|
13 |
+
"eval_accuracy": 0.9787545787545787,
|
14 |
+
"eval_f1": 0.9788275754377449,
|
15 |
+
"eval_loss": 0.07363971322774887,
|
16 |
+
"eval_precision": 0.9790028856592324,
|
17 |
+
"eval_recall": 0.9787545787545787,
|
18 |
+
"eval_runtime": 4.1234,
|
19 |
+
"eval_samples_per_second": 331.041,
|
20 |
+
"eval_steps_per_second": 41.471,
|
21 |
+
"step": 85
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"epoch": 1.9912152269399708,
|
25 |
+
"eval_accuracy": 0.9758241758241758,
|
26 |
+
"eval_f1": 0.9760458977669253,
|
27 |
+
"eval_loss": 0.0680176243185997,
|
28 |
+
"eval_precision": 0.9770364666252971,
|
29 |
+
"eval_recall": 0.9758241758241758,
|
30 |
+
"eval_runtime": 3.9805,
|
31 |
+
"eval_samples_per_second": 342.924,
|
32 |
+
"eval_steps_per_second": 42.96,
|
33 |
+
"step": 170
|
34 |
+
},
|
35 |
+
{
|
36 |
+
"epoch": 2.998535871156662,
|
37 |
+
"eval_accuracy": 0.9875457875457876,
|
38 |
+
"eval_f1": 0.987550637300988,
|
39 |
+
"eval_loss": 0.044689420610666275,
|
40 |
+
"eval_precision": 0.9875567820840008,
|
41 |
+
"eval_recall": 0.9875457875457876,
|
42 |
+
"eval_runtime": 4.1011,
|
43 |
+
"eval_samples_per_second": 332.839,
|
44 |
+
"eval_steps_per_second": 41.696,
|
45 |
+
"step": 256
|
46 |
+
}
|
47 |
+
],
|
48 |
+
"logging_steps": 500,
|
49 |
+
"max_steps": 595,
|
50 |
+
"num_input_tokens_seen": 0,
|
51 |
+
"num_train_epochs": 7,
|
52 |
+
"save_steps": 500,
|
53 |
+
"stateful_callbacks": {
|
54 |
+
"EarlyStoppingCallback": {
|
55 |
+
"args": {
|
56 |
+
"early_stopping_patience": 3,
|
57 |
+
"early_stopping_threshold": 0.0
|
58 |
+
},
|
59 |
+
"attributes": {
|
60 |
+
"early_stopping_patience_counter": 0
|
61 |
+
}
|
62 |
+
},
|
63 |
+
"TrainerControl": {
|
64 |
+
"args": {
|
65 |
+
"should_epoch_stop": false,
|
66 |
+
"should_evaluate": false,
|
67 |
+
"should_log": false,
|
68 |
+
"should_save": true,
|
69 |
+
"should_training_stop": false
|
70 |
+
},
|
71 |
+
"attributes": {}
|
72 |
+
}
|
73 |
+
},
|
74 |
+
"total_flos": 3.725102279808e+16,
|
75 |
+
"train_batch_size": 8,
|
76 |
+
"trial_name": null,
|
77 |
+
"trial_params": null
|
78 |
+
}
|
checkpoint-256/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:caa3df7f2810faa9c35a46d178b12337edb0b134aa27dc1689b302d9bcc9d3d0
|
3 |
+
size 5240
|
checkpoint-341/config.json
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "ntu-spml/distilhubert",
|
3 |
+
"activation_dropout": 0.1,
|
4 |
+
"apply_spec_augment": false,
|
5 |
+
"architectures": [
|
6 |
+
"HubertForSequenceClassification"
|
7 |
+
],
|
8 |
+
"attention_dropout": 0.1,
|
9 |
+
"bos_token_id": 1,
|
10 |
+
"classifier_proj_size": 256,
|
11 |
+
"conv_bias": false,
|
12 |
+
"conv_dim": [
|
13 |
+
512,
|
14 |
+
512,
|
15 |
+
512,
|
16 |
+
512,
|
17 |
+
512,
|
18 |
+
512,
|
19 |
+
512
|
20 |
+
],
|
21 |
+
"conv_kernel": [
|
22 |
+
10,
|
23 |
+
3,
|
24 |
+
3,
|
25 |
+
3,
|
26 |
+
3,
|
27 |
+
2,
|
28 |
+
2
|
29 |
+
],
|
30 |
+
"conv_stride": [
|
31 |
+
5,
|
32 |
+
2,
|
33 |
+
2,
|
34 |
+
2,
|
35 |
+
2,
|
36 |
+
2,
|
37 |
+
2
|
38 |
+
],
|
39 |
+
"ctc_loss_reduction": "sum",
|
40 |
+
"ctc_zero_infinity": false,
|
41 |
+
"do_stable_layer_norm": false,
|
42 |
+
"eos_token_id": 2,
|
43 |
+
"feat_extract_activation": "gelu",
|
44 |
+
"feat_extract_norm": "group",
|
45 |
+
"feat_proj_dropout": 0.0,
|
46 |
+
"feat_proj_layer_norm": false,
|
47 |
+
"final_dropout": 0.0,
|
48 |
+
"hidden_act": "gelu",
|
49 |
+
"hidden_dropout": 0.1,
|
50 |
+
"hidden_size": 768,
|
51 |
+
"id2label": {
|
52 |
+
"0": "crying",
|
53 |
+
"1": "no_crying"
|
54 |
+
},
|
55 |
+
"initializer_range": 0.02,
|
56 |
+
"intermediate_size": 3072,
|
57 |
+
"label2id": {
|
58 |
+
"crying": "0",
|
59 |
+
"no_crying": "1"
|
60 |
+
},
|
61 |
+
"layer_norm_eps": 1e-05,
|
62 |
+
"layerdrop": 0.0,
|
63 |
+
"mask_feature_length": 10,
|
64 |
+
"mask_feature_min_masks": 0,
|
65 |
+
"mask_feature_prob": 0.0,
|
66 |
+
"mask_time_length": 10,
|
67 |
+
"mask_time_min_masks": 2,
|
68 |
+
"mask_time_prob": 0.05,
|
69 |
+
"model_type": "hubert",
|
70 |
+
"num_attention_heads": 12,
|
71 |
+
"num_conv_pos_embedding_groups": 16,
|
72 |
+
"num_conv_pos_embeddings": 128,
|
73 |
+
"num_feat_extract_layers": 7,
|
74 |
+
"num_hidden_layers": 2,
|
75 |
+
"pad_token_id": 0,
|
76 |
+
"torch_dtype": "float32",
|
77 |
+
"transformers_version": "4.44.2",
|
78 |
+
"use_weighted_layer_sum": false,
|
79 |
+
"vocab_size": 32
|
80 |
+
}
|
checkpoint-341/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d274d762a507f6e75c1d2a58abb70f2a03423ada1370bde55f03f89babb022da
|
3 |
+
size 94763496
|
checkpoint-341/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ab5a9612889baf3497db6846ceb8c24a7fa052bc1a8f788d7b5b366a70c8d5b9
|
3 |
+
size 189552570
|
checkpoint-341/preprocessor_config.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"do_normalize": false,
|
3 |
+
"feature_extractor_type": "Wav2Vec2FeatureExtractor",
|
4 |
+
"feature_size": 1,
|
5 |
+
"padding_side": "right",
|
6 |
+
"padding_value": 0,
|
7 |
+
"return_attention_mask": false,
|
8 |
+
"sampling_rate": 16000
|
9 |
+
}
|
checkpoint-341/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8c14b301b5ea3ffdd05cbe8ff85a0d16fa4c4467786db8506b5bdcb5af8a9a2d
|
3 |
+
size 14308
|
checkpoint-341/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:db61ebcfff67389442b8098d4cae839f684a87068fe21401504beb7ddd27d04d
|
3 |
+
size 1064
|
checkpoint-341/trainer_state.json
ADDED
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.9904761904761905,
|
3 |
+
"best_model_checkpoint": "distilhubert-finetuned-cry-detector/checkpoint-341",
|
4 |
+
"epoch": 3.994143484626647,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 341,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.9956076134699854,
|
13 |
+
"eval_accuracy": 0.9787545787545787,
|
14 |
+
"eval_f1": 0.9788275754377449,
|
15 |
+
"eval_loss": 0.07363971322774887,
|
16 |
+
"eval_precision": 0.9790028856592324,
|
17 |
+
"eval_recall": 0.9787545787545787,
|
18 |
+
"eval_runtime": 4.1234,
|
19 |
+
"eval_samples_per_second": 331.041,
|
20 |
+
"eval_steps_per_second": 41.471,
|
21 |
+
"step": 85
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"epoch": 1.9912152269399708,
|
25 |
+
"eval_accuracy": 0.9758241758241758,
|
26 |
+
"eval_f1": 0.9760458977669253,
|
27 |
+
"eval_loss": 0.0680176243185997,
|
28 |
+
"eval_precision": 0.9770364666252971,
|
29 |
+
"eval_recall": 0.9758241758241758,
|
30 |
+
"eval_runtime": 3.9805,
|
31 |
+
"eval_samples_per_second": 342.924,
|
32 |
+
"eval_steps_per_second": 42.96,
|
33 |
+
"step": 170
|
34 |
+
},
|
35 |
+
{
|
36 |
+
"epoch": 2.998535871156662,
|
37 |
+
"eval_accuracy": 0.9875457875457876,
|
38 |
+
"eval_f1": 0.987550637300988,
|
39 |
+
"eval_loss": 0.044689420610666275,
|
40 |
+
"eval_precision": 0.9875567820840008,
|
41 |
+
"eval_recall": 0.9875457875457876,
|
42 |
+
"eval_runtime": 4.1011,
|
43 |
+
"eval_samples_per_second": 332.839,
|
44 |
+
"eval_steps_per_second": 41.696,
|
45 |
+
"step": 256
|
46 |
+
},
|
47 |
+
{
|
48 |
+
"epoch": 3.994143484626647,
|
49 |
+
"eval_accuracy": 0.9904761904761905,
|
50 |
+
"eval_f1": 0.9904945509377323,
|
51 |
+
"eval_loss": 0.045228052884340286,
|
52 |
+
"eval_precision": 0.9905452314533213,
|
53 |
+
"eval_recall": 0.9904761904761905,
|
54 |
+
"eval_runtime": 3.9393,
|
55 |
+
"eval_samples_per_second": 346.509,
|
56 |
+
"eval_steps_per_second": 43.409,
|
57 |
+
"step": 341
|
58 |
+
}
|
59 |
+
],
|
60 |
+
"logging_steps": 500,
|
61 |
+
"max_steps": 595,
|
62 |
+
"num_input_tokens_seen": 0,
|
63 |
+
"num_train_epochs": 7,
|
64 |
+
"save_steps": 500,
|
65 |
+
"stateful_callbacks": {
|
66 |
+
"EarlyStoppingCallback": {
|
67 |
+
"args": {
|
68 |
+
"early_stopping_patience": 3,
|
69 |
+
"early_stopping_threshold": 0.0
|
70 |
+
},
|
71 |
+
"attributes": {
|
72 |
+
"early_stopping_patience_counter": 0
|
73 |
+
}
|
74 |
+
},
|
75 |
+
"TrainerControl": {
|
76 |
+
"args": {
|
77 |
+
"should_epoch_stop": false,
|
78 |
+
"should_evaluate": false,
|
79 |
+
"should_log": false,
|
80 |
+
"should_save": true,
|
81 |
+
"should_training_stop": false
|
82 |
+
},
|
83 |
+
"attributes": {}
|
84 |
+
}
|
85 |
+
},
|
86 |
+
"total_flos": 4.966803039744e+16,
|
87 |
+
"train_batch_size": 8,
|
88 |
+
"trial_name": null,
|
89 |
+
"trial_params": null
|
90 |
+
}
|
checkpoint-341/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:caa3df7f2810faa9c35a46d178b12337edb0b134aa27dc1689b302d9bcc9d3d0
|
3 |
+
size 5240
|
checkpoint-426/config.json
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "ntu-spml/distilhubert",
|
3 |
+
"activation_dropout": 0.1,
|
4 |
+
"apply_spec_augment": false,
|
5 |
+
"architectures": [
|
6 |
+
"HubertForSequenceClassification"
|
7 |
+
],
|
8 |
+
"attention_dropout": 0.1,
|
9 |
+
"bos_token_id": 1,
|
10 |
+
"classifier_proj_size": 256,
|
11 |
+
"conv_bias": false,
|
12 |
+
"conv_dim": [
|
13 |
+
512,
|
14 |
+
512,
|
15 |
+
512,
|
16 |
+
512,
|
17 |
+
512,
|
18 |
+
512,
|
19 |
+
512
|
20 |
+
],
|
21 |
+
"conv_kernel": [
|
22 |
+
10,
|
23 |
+
3,
|
24 |
+
3,
|
25 |
+
3,
|
26 |
+
3,
|
27 |
+
2,
|
28 |
+
2
|
29 |
+
],
|
30 |
+
"conv_stride": [
|
31 |
+
5,
|
32 |
+
2,
|
33 |
+
2,
|
34 |
+
2,
|
35 |
+
2,
|
36 |
+
2,
|
37 |
+
2
|
38 |
+
],
|
39 |
+
"ctc_loss_reduction": "sum",
|
40 |
+
"ctc_zero_infinity": false,
|
41 |
+
"do_stable_layer_norm": false,
|
42 |
+
"eos_token_id": 2,
|
43 |
+
"feat_extract_activation": "gelu",
|
44 |
+
"feat_extract_norm": "group",
|
45 |
+
"feat_proj_dropout": 0.0,
|
46 |
+
"feat_proj_layer_norm": false,
|
47 |
+
"final_dropout": 0.0,
|
48 |
+
"hidden_act": "gelu",
|
49 |
+
"hidden_dropout": 0.1,
|
50 |
+
"hidden_size": 768,
|
51 |
+
"id2label": {
|
52 |
+
"0": "crying",
|
53 |
+
"1": "no_crying"
|
54 |
+
},
|
55 |
+
"initializer_range": 0.02,
|
56 |
+
"intermediate_size": 3072,
|
57 |
+
"label2id": {
|
58 |
+
"crying": "0",
|
59 |
+
"no_crying": "1"
|
60 |
+
},
|
61 |
+
"layer_norm_eps": 1e-05,
|
62 |
+
"layerdrop": 0.0,
|
63 |
+
"mask_feature_length": 10,
|
64 |
+
"mask_feature_min_masks": 0,
|
65 |
+
"mask_feature_prob": 0.0,
|
66 |
+
"mask_time_length": 10,
|
67 |
+
"mask_time_min_masks": 2,
|
68 |
+
"mask_time_prob": 0.05,
|
69 |
+
"model_type": "hubert",
|
70 |
+
"num_attention_heads": 12,
|
71 |
+
"num_conv_pos_embedding_groups": 16,
|
72 |
+
"num_conv_pos_embeddings": 128,
|
73 |
+
"num_feat_extract_layers": 7,
|
74 |
+
"num_hidden_layers": 2,
|
75 |
+
"pad_token_id": 0,
|
76 |
+
"torch_dtype": "float32",
|
77 |
+
"transformers_version": "4.44.2",
|
78 |
+
"use_weighted_layer_sum": false,
|
79 |
+
"vocab_size": 32
|
80 |
+
}
|
checkpoint-426/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:94760f7df9dc2c6bad702593ec0871720fa6ae352e88f79c71bcfccc8fab1a09
|
3 |
+
size 94763496
|
checkpoint-426/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b6550e976738f22f913650571284f8eb12dea30c8877134bbe84a1ebd7a73b5e
|
3 |
+
size 189552570
|
checkpoint-426/preprocessor_config.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"do_normalize": false,
|
3 |
+
"feature_extractor_type": "Wav2Vec2FeatureExtractor",
|
4 |
+
"feature_size": 1,
|
5 |
+
"padding_side": "right",
|
6 |
+
"padding_value": 0,
|
7 |
+
"return_attention_mask": false,
|
8 |
+
"sampling_rate": 16000
|
9 |
+
}
|
checkpoint-426/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c2a1aad7f62f9d383e6bd694b87da317b54d942be2b578a455bcdaaf3258d46a
|
3 |
+
size 14308
|
checkpoint-426/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1894ac3a34350cc065802675cae4d32438918a7d772edc033390240aaf5d8841
|
3 |
+
size 1064
|
checkpoint-426/trainer_state.json
ADDED
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.991941391941392,
|
3 |
+
"best_model_checkpoint": "distilhubert-finetuned-cry-detector/checkpoint-426",
|
4 |
+
"epoch": 4.989751098096632,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 426,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.9956076134699854,
|
13 |
+
"eval_accuracy": 0.9787545787545787,
|
14 |
+
"eval_f1": 0.9788275754377449,
|
15 |
+
"eval_loss": 0.07363971322774887,
|
16 |
+
"eval_precision": 0.9790028856592324,
|
17 |
+
"eval_recall": 0.9787545787545787,
|
18 |
+
"eval_runtime": 4.1234,
|
19 |
+
"eval_samples_per_second": 331.041,
|
20 |
+
"eval_steps_per_second": 41.471,
|
21 |
+
"step": 85
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"epoch": 1.9912152269399708,
|
25 |
+
"eval_accuracy": 0.9758241758241758,
|
26 |
+
"eval_f1": 0.9760458977669253,
|
27 |
+
"eval_loss": 0.0680176243185997,
|
28 |
+
"eval_precision": 0.9770364666252971,
|
29 |
+
"eval_recall": 0.9758241758241758,
|
30 |
+
"eval_runtime": 3.9805,
|
31 |
+
"eval_samples_per_second": 342.924,
|
32 |
+
"eval_steps_per_second": 42.96,
|
33 |
+
"step": 170
|
34 |
+
},
|
35 |
+
{
|
36 |
+
"epoch": 2.998535871156662,
|
37 |
+
"eval_accuracy": 0.9875457875457876,
|
38 |
+
"eval_f1": 0.987550637300988,
|
39 |
+
"eval_loss": 0.044689420610666275,
|
40 |
+
"eval_precision": 0.9875567820840008,
|
41 |
+
"eval_recall": 0.9875457875457876,
|
42 |
+
"eval_runtime": 4.1011,
|
43 |
+
"eval_samples_per_second": 332.839,
|
44 |
+
"eval_steps_per_second": 41.696,
|
45 |
+
"step": 256
|
46 |
+
},
|
47 |
+
{
|
48 |
+
"epoch": 3.994143484626647,
|
49 |
+
"eval_accuracy": 0.9904761904761905,
|
50 |
+
"eval_f1": 0.9904945509377323,
|
51 |
+
"eval_loss": 0.045228052884340286,
|
52 |
+
"eval_precision": 0.9905452314533213,
|
53 |
+
"eval_recall": 0.9904761904761905,
|
54 |
+
"eval_runtime": 3.9393,
|
55 |
+
"eval_samples_per_second": 346.509,
|
56 |
+
"eval_steps_per_second": 43.409,
|
57 |
+
"step": 341
|
58 |
+
},
|
59 |
+
{
|
60 |
+
"epoch": 4.989751098096632,
|
61 |
+
"eval_accuracy": 0.991941391941392,
|
62 |
+
"eval_f1": 0.9919569277165429,
|
63 |
+
"eval_loss": 0.04387320205569267,
|
64 |
+
"eval_precision": 0.9920048531706146,
|
65 |
+
"eval_recall": 0.991941391941392,
|
66 |
+
"eval_runtime": 4.0115,
|
67 |
+
"eval_samples_per_second": 340.268,
|
68 |
+
"eval_steps_per_second": 42.627,
|
69 |
+
"step": 426
|
70 |
+
}
|
71 |
+
],
|
72 |
+
"logging_steps": 500,
|
73 |
+
"max_steps": 595,
|
74 |
+
"num_input_tokens_seen": 0,
|
75 |
+
"num_train_epochs": 7,
|
76 |
+
"save_steps": 500,
|
77 |
+
"stateful_callbacks": {
|
78 |
+
"EarlyStoppingCallback": {
|
79 |
+
"args": {
|
80 |
+
"early_stopping_patience": 3,
|
81 |
+
"early_stopping_threshold": 0.0
|
82 |
+
},
|
83 |
+
"attributes": {
|
84 |
+
"early_stopping_patience_counter": 0
|
85 |
+
}
|
86 |
+
},
|
87 |
+
"TrainerControl": {
|
88 |
+
"args": {
|
89 |
+
"should_epoch_stop": false,
|
90 |
+
"should_evaluate": false,
|
91 |
+
"should_log": false,
|
92 |
+
"should_save": true,
|
93 |
+
"should_training_stop": false
|
94 |
+
},
|
95 |
+
"attributes": {}
|
96 |
+
}
|
97 |
+
},
|
98 |
+
"total_flos": 6.20850379968e+16,
|
99 |
+
"train_batch_size": 8,
|
100 |
+
"trial_name": null,
|
101 |
+
"trial_params": null
|
102 |
+
}
|
checkpoint-426/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:caa3df7f2810faa9c35a46d178b12337edb0b134aa27dc1689b302d9bcc9d3d0
|
3 |
+
size 5240
|
checkpoint-512/config.json
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "ntu-spml/distilhubert",
|
3 |
+
"activation_dropout": 0.1,
|
4 |
+
"apply_spec_augment": false,
|
5 |
+
"architectures": [
|
6 |
+
"HubertForSequenceClassification"
|
7 |
+
],
|
8 |
+
"attention_dropout": 0.1,
|
9 |
+
"bos_token_id": 1,
|
10 |
+
"classifier_proj_size": 256,
|
11 |
+
"conv_bias": false,
|
12 |
+
"conv_dim": [
|
13 |
+
512,
|
14 |
+
512,
|
15 |
+
512,
|
16 |
+
512,
|
17 |
+
512,
|
18 |
+
512,
|
19 |
+
512
|
20 |
+
],
|
21 |
+
"conv_kernel": [
|
22 |
+
10,
|
23 |
+
3,
|
24 |
+
3,
|
25 |
+
3,
|
26 |
+
3,
|
27 |
+
2,
|
28 |
+
2
|
29 |
+
],
|
30 |
+
"conv_stride": [
|
31 |
+
5,
|
32 |
+
2,
|
33 |
+
2,
|
34 |
+
2,
|
35 |
+
2,
|
36 |
+
2,
|
37 |
+
2
|
38 |
+
],
|
39 |
+
"ctc_loss_reduction": "sum",
|
40 |
+
"ctc_zero_infinity": false,
|
41 |
+
"do_stable_layer_norm": false,
|
42 |
+
"eos_token_id": 2,
|
43 |
+
"feat_extract_activation": "gelu",
|
44 |
+
"feat_extract_norm": "group",
|
45 |
+
"feat_proj_dropout": 0.0,
|
46 |
+
"feat_proj_layer_norm": false,
|
47 |
+
"final_dropout": 0.0,
|
48 |
+
"hidden_act": "gelu",
|
49 |
+
"hidden_dropout": 0.1,
|
50 |
+
"hidden_size": 768,
|
51 |
+
"id2label": {
|
52 |
+
"0": "crying",
|
53 |
+
"1": "no_crying"
|
54 |
+
},
|
55 |
+
"initializer_range": 0.02,
|
56 |
+
"intermediate_size": 3072,
|
57 |
+
"label2id": {
|
58 |
+
"crying": "0",
|
59 |
+
"no_crying": "1"
|
60 |
+
},
|
61 |
+
"layer_norm_eps": 1e-05,
|
62 |
+
"layerdrop": 0.0,
|
63 |
+
"mask_feature_length": 10,
|
64 |
+
"mask_feature_min_masks": 0,
|
65 |
+
"mask_feature_prob": 0.0,
|
66 |
+
"mask_time_length": 10,
|
67 |
+
"mask_time_min_masks": 2,
|
68 |
+
"mask_time_prob": 0.05,
|
69 |
+
"model_type": "hubert",
|
70 |
+
"num_attention_heads": 12,
|
71 |
+
"num_conv_pos_embedding_groups": 16,
|
72 |
+
"num_conv_pos_embeddings": 128,
|
73 |
+
"num_feat_extract_layers": 7,
|
74 |
+
"num_hidden_layers": 2,
|
75 |
+
"pad_token_id": 0,
|
76 |
+
"torch_dtype": "float32",
|
77 |
+
"transformers_version": "4.44.2",
|
78 |
+
"use_weighted_layer_sum": false,
|
79 |
+
"vocab_size": 32
|
80 |
+
}
|
checkpoint-512/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6beddb5fef4d20200cfcf763c9460267deb7ad4514b1fa760fa4f89d398502cf
|
3 |
+
size 94763496
|
checkpoint-512/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2cfb7299eb13d9980cb44dcbfcf90cc3e4a95d2c84fd840b959005ca0b7566d6
|
3 |
+
size 189552570
|
checkpoint-512/preprocessor_config.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"do_normalize": false,
|
3 |
+
"feature_extractor_type": "Wav2Vec2FeatureExtractor",
|
4 |
+
"feature_size": 1,
|
5 |
+
"padding_side": "right",
|
6 |
+
"padding_value": 0,
|
7 |
+
"return_attention_mask": false,
|
8 |
+
"sampling_rate": 16000
|
9 |
+
}
|
checkpoint-512/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:863dd8c5b813736d29c00a5bac8fd9a20f38b88a7fb576cceea0cc85f0cbd789
|
3 |
+
size 14308
|
checkpoint-512/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7900f26d6583fa63e5c081f39e0457bc3033e9e087a3f2085f881f9fac700fb3
|
3 |
+
size 1064
|
checkpoint-512/trainer_state.json
ADDED
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.991941391941392,
|
3 |
+
"best_model_checkpoint": "distilhubert-finetuned-cry-detector/checkpoint-426",
|
4 |
+
"epoch": 5.997071742313324,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 512,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.9956076134699854,
|
13 |
+
"eval_accuracy": 0.9787545787545787,
|
14 |
+
"eval_f1": 0.9788275754377449,
|
15 |
+
"eval_loss": 0.07363971322774887,
|
16 |
+
"eval_precision": 0.9790028856592324,
|
17 |
+
"eval_recall": 0.9787545787545787,
|
18 |
+
"eval_runtime": 4.1234,
|
19 |
+
"eval_samples_per_second": 331.041,
|
20 |
+
"eval_steps_per_second": 41.471,
|
21 |
+
"step": 85
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"epoch": 1.9912152269399708,
|
25 |
+
"eval_accuracy": 0.9758241758241758,
|
26 |
+
"eval_f1": 0.9760458977669253,
|
27 |
+
"eval_loss": 0.0680176243185997,
|
28 |
+
"eval_precision": 0.9770364666252971,
|
29 |
+
"eval_recall": 0.9758241758241758,
|
30 |
+
"eval_runtime": 3.9805,
|
31 |
+
"eval_samples_per_second": 342.924,
|
32 |
+
"eval_steps_per_second": 42.96,
|
33 |
+
"step": 170
|
34 |
+
},
|
35 |
+
{
|
36 |
+
"epoch": 2.998535871156662,
|
37 |
+
"eval_accuracy": 0.9875457875457876,
|
38 |
+
"eval_f1": 0.987550637300988,
|
39 |
+
"eval_loss": 0.044689420610666275,
|
40 |
+
"eval_precision": 0.9875567820840008,
|
41 |
+
"eval_recall": 0.9875457875457876,
|
42 |
+
"eval_runtime": 4.1011,
|
43 |
+
"eval_samples_per_second": 332.839,
|
44 |
+
"eval_steps_per_second": 41.696,
|
45 |
+
"step": 256
|
46 |
+
},
|
47 |
+
{
|
48 |
+
"epoch": 3.994143484626647,
|
49 |
+
"eval_accuracy": 0.9904761904761905,
|
50 |
+
"eval_f1": 0.9904945509377323,
|
51 |
+
"eval_loss": 0.045228052884340286,
|
52 |
+
"eval_precision": 0.9905452314533213,
|
53 |
+
"eval_recall": 0.9904761904761905,
|
54 |
+
"eval_runtime": 3.9393,
|
55 |
+
"eval_samples_per_second": 346.509,
|
56 |
+
"eval_steps_per_second": 43.409,
|
57 |
+
"step": 341
|
58 |
+
},
|
59 |
+
{
|
60 |
+
"epoch": 4.989751098096632,
|
61 |
+
"eval_accuracy": 0.991941391941392,
|
62 |
+
"eval_f1": 0.9919569277165429,
|
63 |
+
"eval_loss": 0.04387320205569267,
|
64 |
+
"eval_precision": 0.9920048531706146,
|
65 |
+
"eval_recall": 0.991941391941392,
|
66 |
+
"eval_runtime": 4.0115,
|
67 |
+
"eval_samples_per_second": 340.268,
|
68 |
+
"eval_steps_per_second": 42.627,
|
69 |
+
"step": 426
|
70 |
+
},
|
71 |
+
{
|
72 |
+
"epoch": 5.856515373352855,
|
73 |
+
"grad_norm": 0.01621050015091896,
|
74 |
+
"learning_rate": 6.1795711069424666e-06,
|
75 |
+
"loss": 0.053,
|
76 |
+
"step": 500
|
77 |
+
},
|
78 |
+
{
|
79 |
+
"epoch": 5.997071742313324,
|
80 |
+
"eval_accuracy": 0.991941391941392,
|
81 |
+
"eval_f1": 0.9919507596926951,
|
82 |
+
"eval_loss": 0.04010434448719025,
|
83 |
+
"eval_precision": 0.991971823048746,
|
84 |
+
"eval_recall": 0.991941391941392,
|
85 |
+
"eval_runtime": 4.1768,
|
86 |
+
"eval_samples_per_second": 326.803,
|
87 |
+
"eval_steps_per_second": 40.94,
|
88 |
+
"step": 512
|
89 |
+
}
|
90 |
+
],
|
91 |
+
"logging_steps": 500,
|
92 |
+
"max_steps": 595,
|
93 |
+
"num_input_tokens_seen": 0,
|
94 |
+
"num_train_epochs": 7,
|
95 |
+
"save_steps": 500,
|
96 |
+
"stateful_callbacks": {
|
97 |
+
"EarlyStoppingCallback": {
|
98 |
+
"args": {
|
99 |
+
"early_stopping_patience": 3,
|
100 |
+
"early_stopping_threshold": 0.0
|
101 |
+
},
|
102 |
+
"attributes": {
|
103 |
+
"early_stopping_patience_counter": 0
|
104 |
+
}
|
105 |
+
},
|
106 |
+
"TrainerControl": {
|
107 |
+
"args": {
|
108 |
+
"should_epoch_stop": false,
|
109 |
+
"should_evaluate": false,
|
110 |
+
"should_log": false,
|
111 |
+
"should_save": true,
|
112 |
+
"should_training_stop": false
|
113 |
+
},
|
114 |
+
"attributes": {}
|
115 |
+
}
|
116 |
+
},
|
117 |
+
"total_flos": 7.450204559616e+16,
|
118 |
+
"train_batch_size": 8,
|
119 |
+
"trial_name": null,
|
120 |
+
"trial_params": null
|
121 |
+
}
|
checkpoint-512/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:caa3df7f2810faa9c35a46d178b12337edb0b134aa27dc1689b302d9bcc9d3d0
|
3 |
+
size 5240
|
checkpoint-595/config.json
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "ntu-spml/distilhubert",
|
3 |
+
"activation_dropout": 0.1,
|
4 |
+
"apply_spec_augment": false,
|
5 |
+
"architectures": [
|
6 |
+
"HubertForSequenceClassification"
|
7 |
+
],
|
8 |
+
"attention_dropout": 0.1,
|
9 |
+
"bos_token_id": 1,
|
10 |
+
"classifier_proj_size": 256,
|
11 |
+
"conv_bias": false,
|
12 |
+
"conv_dim": [
|
13 |
+
512,
|
14 |
+
512,
|
15 |
+
512,
|
16 |
+
512,
|
17 |
+
512,
|
18 |
+
512,
|
19 |
+
512
|
20 |
+
],
|
21 |
+
"conv_kernel": [
|
22 |
+
10,
|
23 |
+
3,
|
24 |
+
3,
|
25 |
+
3,
|
26 |
+
3,
|
27 |
+
2,
|
28 |
+
2
|
29 |
+
],
|
30 |
+
"conv_stride": [
|
31 |
+
5,
|
32 |
+
2,
|
33 |
+
2,
|
34 |
+
2,
|
35 |
+
2,
|
36 |
+
2,
|
37 |
+
2
|
38 |
+
],
|
39 |
+
"ctc_loss_reduction": "sum",
|
40 |
+
"ctc_zero_infinity": false,
|
41 |
+
"do_stable_layer_norm": false,
|
42 |
+
"eos_token_id": 2,
|
43 |
+
"feat_extract_activation": "gelu",
|
44 |
+
"feat_extract_norm": "group",
|
45 |
+
"feat_proj_dropout": 0.0,
|
46 |
+
"feat_proj_layer_norm": false,
|
47 |
+
"final_dropout": 0.0,
|
48 |
+
"hidden_act": "gelu",
|
49 |
+
"hidden_dropout": 0.1,
|
50 |
+
"hidden_size": 768,
|
51 |
+
"id2label": {
|
52 |
+
"0": "crying",
|
53 |
+
"1": "no_crying"
|
54 |
+
},
|
55 |
+
"initializer_range": 0.02,
|
56 |
+
"intermediate_size": 3072,
|
57 |
+
"label2id": {
|
58 |
+
"crying": "0",
|
59 |
+
"no_crying": "1"
|
60 |
+
},
|
61 |
+
"layer_norm_eps": 1e-05,
|
62 |
+
"layerdrop": 0.0,
|
63 |
+
"mask_feature_length": 10,
|
64 |
+
"mask_feature_min_masks": 0,
|
65 |
+
"mask_feature_prob": 0.0,
|
66 |
+
"mask_time_length": 10,
|
67 |
+
"mask_time_min_masks": 2,
|
68 |
+
"mask_time_prob": 0.05,
|
69 |
+
"model_type": "hubert",
|
70 |
+
"num_attention_heads": 12,
|
71 |
+
"num_conv_pos_embedding_groups": 16,
|
72 |
+
"num_conv_pos_embeddings": 128,
|
73 |
+
"num_feat_extract_layers": 7,
|
74 |
+
"num_hidden_layers": 2,
|
75 |
+
"pad_token_id": 0,
|
76 |
+
"torch_dtype": "float32",
|
77 |
+
"transformers_version": "4.44.2",
|
78 |
+
"use_weighted_layer_sum": false,
|
79 |
+
"vocab_size": 32
|
80 |
+
}
|