callista6699 commited on
Commit
8b7f717
1 Parent(s): baf0f94

Training completed!

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. README.md +60 -0
  2. config.json +49 -0
  3. model.safetensors +3 -0
  4. run-0/checkpoint-19/config.json +49 -0
  5. run-0/checkpoint-19/model.safetensors +3 -0
  6. run-0/checkpoint-19/optimizer.pt +3 -0
  7. run-0/checkpoint-19/rng_state.pth +3 -0
  8. run-0/checkpoint-19/scheduler.pt +3 -0
  9. run-0/checkpoint-19/special_tokens_map.json +7 -0
  10. run-0/checkpoint-19/tokenizer.json +0 -0
  11. run-0/checkpoint-19/tokenizer_config.json +58 -0
  12. run-0/checkpoint-19/trainer_state.json +45 -0
  13. run-0/checkpoint-19/training_args.bin +3 -0
  14. run-0/checkpoint-19/vocab.txt +0 -0
  15. run-1/checkpoint-38/config.json +49 -0
  16. run-1/checkpoint-38/model.safetensors +3 -0
  17. run-1/checkpoint-38/optimizer.pt +3 -0
  18. run-1/checkpoint-38/rng_state.pth +3 -0
  19. run-1/checkpoint-38/scheduler.pt +3 -0
  20. run-1/checkpoint-38/special_tokens_map.json +7 -0
  21. run-1/checkpoint-38/tokenizer.json +0 -0
  22. run-1/checkpoint-38/tokenizer_config.json +58 -0
  23. run-1/checkpoint-38/trainer_state.json +67 -0
  24. run-1/checkpoint-38/training_args.bin +3 -0
  25. run-1/checkpoint-38/vocab.txt +0 -0
  26. run-2/checkpoint-296/config.json +49 -0
  27. run-2/checkpoint-296/model.safetensors +3 -0
  28. run-2/checkpoint-296/optimizer.pt +3 -0
  29. run-2/checkpoint-296/rng_state.pth +3 -0
  30. run-2/checkpoint-296/scheduler.pt +3 -0
  31. run-2/checkpoint-296/special_tokens_map.json +7 -0
  32. run-2/checkpoint-296/tokenizer.json +0 -0
  33. run-2/checkpoint-296/tokenizer_config.json +58 -0
  34. run-2/checkpoint-296/trainer_state.json +249 -0
  35. run-2/checkpoint-296/training_args.bin +3 -0
  36. run-2/checkpoint-296/vocab.txt +0 -0
  37. run-3/checkpoint-295/config.json +49 -0
  38. run-3/checkpoint-295/model.safetensors +3 -0
  39. run-3/checkpoint-295/optimizer.pt +3 -0
  40. run-3/checkpoint-295/rng_state.pth +3 -0
  41. run-3/checkpoint-295/scheduler.pt +3 -0
  42. run-3/checkpoint-295/special_tokens_map.json +7 -0
  43. run-3/checkpoint-295/tokenizer.json +0 -0
  44. run-3/checkpoint-295/tokenizer_config.json +58 -0
  45. run-3/checkpoint-295/trainer_state.json +241 -0
  46. run-3/checkpoint-295/training_args.bin +3 -0
  47. run-3/checkpoint-295/vocab.txt +0 -0
  48. run-4/checkpoint-19/config.json +49 -0
  49. run-4/checkpoint-19/model.safetensors +3 -0
  50. run-4/checkpoint-19/optimizer.pt +3 -0
README.md ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ license: mit
4
+ base_model: nlptown/bert-base-multilingual-uncased-sentiment
5
+ tags:
6
+ - generated_from_trainer
7
+ model-index:
8
+ - name: results
9
+ results: []
10
+ ---
11
+
12
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
13
+ should probably proofread and complete it, then remove this comment. -->
14
+
15
+ # results
16
+
17
+ This model is a fine-tuned version of [nlptown/bert-base-multilingual-uncased-sentiment](https://huggingface.co/nlptown/bert-base-multilingual-uncased-sentiment) on an unknown dataset.
18
+ It achieves the following results on the evaluation set:
19
+ - Loss: 2.6110
20
+
21
+ ## Model description
22
+
23
+ More information needed
24
+
25
+ ## Intended uses & limitations
26
+
27
+ More information needed
28
+
29
+ ## Training and evaluation data
30
+
31
+ More information needed
32
+
33
+ ## Training procedure
34
+
35
+ ### Training hyperparameters
36
+
37
+ The following hyperparameters were used during training:
38
+ - learning_rate: 2.934292727323431e-05
39
+ - train_batch_size: 4
40
+ - eval_batch_size: 16
41
+ - seed: 16
42
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
43
+ - lr_scheduler_type: linear
44
+ - lr_scheduler_warmup_steps: 500
45
+ - num_epochs: 3
46
+
47
+ ### Training results
48
+
49
+ | Training Loss | Epoch | Step | Validation Loss |
50
+ |:-------------:|:-----:|:----:|:---------------:|
51
+ | 0.0002 | 1.0 | 295 | 2.6070 |
52
+ | 0.5428 | 2.0 | 590 | 3.1094 |
53
+ | 0.0002 | 3.0 | 885 | 2.6110 |
54
+
55
+
56
+ ### Framework versions
57
+
58
+ - Transformers 4.44.2
59
+ - Pytorch 2.4.1+cu121
60
+ - Tokenizers 0.19.1
config.json ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "nlptown/bert-base-multilingual-uncased-sentiment",
3
+ "_num_labels": 5,
4
+ "architectures": [
5
+ "BertForSequenceClassification"
6
+ ],
7
+ "attention_probs_dropout_prob": 0.1,
8
+ "classifier_dropout": null,
9
+ "directionality": "bidi",
10
+ "finetuning_task": "sentiment-analysis",
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 768,
14
+ "id2label": {
15
+ "0": "1 star",
16
+ "1": "2 stars",
17
+ "2": "3 stars",
18
+ "3": "4 stars",
19
+ "4": "5 stars"
20
+ },
21
+ "initializer_range": 0.02,
22
+ "intermediate_size": 3072,
23
+ "label2id": {
24
+ "1 star": 0,
25
+ "2 stars": 1,
26
+ "3 stars": 2,
27
+ "4 stars": 3,
28
+ "5 stars": 4
29
+ },
30
+ "layer_norm_eps": 1e-12,
31
+ "max_position_embeddings": 512,
32
+ "model_type": "bert",
33
+ "num_attention_heads": 12,
34
+ "num_hidden_layers": 12,
35
+ "output_past": true,
36
+ "pad_token_id": 0,
37
+ "pooler_fc_size": 768,
38
+ "pooler_num_attention_heads": 12,
39
+ "pooler_num_fc_layers": 3,
40
+ "pooler_size_per_head": 128,
41
+ "pooler_type": "first_token_transform",
42
+ "position_embedding_type": "absolute",
43
+ "problem_type": "single_label_classification",
44
+ "torch_dtype": "float32",
45
+ "transformers_version": "4.44.2",
46
+ "type_vocab_size": 2,
47
+ "use_cache": true,
48
+ "vocab_size": 105879
49
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:adb2f037c4fcb0095b0de830bf80c7a07b9c9db02b3ebca71a7b65e869d77343
3
+ size 669464588
run-0/checkpoint-19/config.json ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "nlptown/bert-base-multilingual-uncased-sentiment",
3
+ "_num_labels": 5,
4
+ "architectures": [
5
+ "BertForSequenceClassification"
6
+ ],
7
+ "attention_probs_dropout_prob": 0.1,
8
+ "classifier_dropout": null,
9
+ "directionality": "bidi",
10
+ "finetuning_task": "sentiment-analysis",
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 768,
14
+ "id2label": {
15
+ "0": "1 star",
16
+ "1": "2 stars",
17
+ "2": "3 stars",
18
+ "3": "4 stars",
19
+ "4": "5 stars"
20
+ },
21
+ "initializer_range": 0.02,
22
+ "intermediate_size": 3072,
23
+ "label2id": {
24
+ "1 star": 0,
25
+ "2 stars": 1,
26
+ "3 stars": 2,
27
+ "4 stars": 3,
28
+ "5 stars": 4
29
+ },
30
+ "layer_norm_eps": 1e-12,
31
+ "max_position_embeddings": 512,
32
+ "model_type": "bert",
33
+ "num_attention_heads": 12,
34
+ "num_hidden_layers": 12,
35
+ "output_past": true,
36
+ "pad_token_id": 0,
37
+ "pooler_fc_size": 768,
38
+ "pooler_num_attention_heads": 12,
39
+ "pooler_num_fc_layers": 3,
40
+ "pooler_size_per_head": 128,
41
+ "pooler_type": "first_token_transform",
42
+ "position_embedding_type": "absolute",
43
+ "problem_type": "single_label_classification",
44
+ "torch_dtype": "float32",
45
+ "transformers_version": "4.44.2",
46
+ "type_vocab_size": 2,
47
+ "use_cache": true,
48
+ "vocab_size": 105879
49
+ }
run-0/checkpoint-19/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d5f5abba49994e4f10e1f832ab7fb583704c86a0e59a2640cadff82a480f1f8
3
+ size 669464588
run-0/checkpoint-19/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:857b82e9b886adf38cadf4c854d81d03f5855d5b85d01dc295baef72968df64a
3
+ size 1339050234
run-0/checkpoint-19/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3d12f967010971f6c719fc8b0c67a887b6c05899c8df2ac0230989587877407
3
+ size 14244
run-0/checkpoint-19/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f0333332e23fd259cb255054bff6d10b7e0571a091482a54a50087b65d8f5f2
3
+ size 1064
run-0/checkpoint-19/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
run-0/checkpoint-19/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
run-0/checkpoint-19/tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "mask_token": "[MASK]",
49
+ "max_len": 512,
50
+ "model_max_length": 512,
51
+ "never_split": null,
52
+ "pad_token": "[PAD]",
53
+ "sep_token": "[SEP]",
54
+ "strip_accents": null,
55
+ "tokenize_chinese_chars": true,
56
+ "tokenizer_class": "BertTokenizer",
57
+ "unk_token": "[UNK]"
58
+ }
run-0/checkpoint-19/trainer_state.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 19,
7
+ "is_hyper_param_search": true,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.5263157894736842,
13
+ "grad_norm": 6.248871803283691,
14
+ "learning_rate": 1.1698489431263144e-07,
15
+ "loss": 0.4253,
16
+ "step": 10
17
+ }
18
+ ],
19
+ "logging_steps": 10,
20
+ "max_steps": 19,
21
+ "num_input_tokens_seen": 0,
22
+ "num_train_epochs": 1,
23
+ "save_steps": 500,
24
+ "stateful_callbacks": {
25
+ "TrainerControl": {
26
+ "args": {
27
+ "should_epoch_stop": false,
28
+ "should_evaluate": false,
29
+ "should_log": false,
30
+ "should_save": true,
31
+ "should_training_stop": true
32
+ },
33
+ "attributes": {}
34
+ }
35
+ },
36
+ "total_flos": 42098902794240.0,
37
+ "train_batch_size": 64,
38
+ "trial_name": null,
39
+ "trial_params": {
40
+ "learning_rate": 5.849244715631572e-06,
41
+ "num_train_epochs": 1,
42
+ "per_device_train_batch_size": 64,
43
+ "seed": 1
44
+ }
45
+ }
run-0/checkpoint-19/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9f07479eb33644366a5ddfafbb9fb5c21b3c754996ddd2be7b070a1a725a265
3
+ size 5112
run-0/checkpoint-19/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
run-1/checkpoint-38/config.json ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "nlptown/bert-base-multilingual-uncased-sentiment",
3
+ "_num_labels": 5,
4
+ "architectures": [
5
+ "BertForSequenceClassification"
6
+ ],
7
+ "attention_probs_dropout_prob": 0.1,
8
+ "classifier_dropout": null,
9
+ "directionality": "bidi",
10
+ "finetuning_task": "sentiment-analysis",
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 768,
14
+ "id2label": {
15
+ "0": "1 star",
16
+ "1": "2 stars",
17
+ "2": "3 stars",
18
+ "3": "4 stars",
19
+ "4": "5 stars"
20
+ },
21
+ "initializer_range": 0.02,
22
+ "intermediate_size": 3072,
23
+ "label2id": {
24
+ "1 star": 0,
25
+ "2 stars": 1,
26
+ "3 stars": 2,
27
+ "4 stars": 3,
28
+ "5 stars": 4
29
+ },
30
+ "layer_norm_eps": 1e-12,
31
+ "max_position_embeddings": 512,
32
+ "model_type": "bert",
33
+ "num_attention_heads": 12,
34
+ "num_hidden_layers": 12,
35
+ "output_past": true,
36
+ "pad_token_id": 0,
37
+ "pooler_fc_size": 768,
38
+ "pooler_num_attention_heads": 12,
39
+ "pooler_num_fc_layers": 3,
40
+ "pooler_size_per_head": 128,
41
+ "pooler_type": "first_token_transform",
42
+ "position_embedding_type": "absolute",
43
+ "problem_type": "single_label_classification",
44
+ "torch_dtype": "float32",
45
+ "transformers_version": "4.44.2",
46
+ "type_vocab_size": 2,
47
+ "use_cache": true,
48
+ "vocab_size": 105879
49
+ }
run-1/checkpoint-38/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dfe9ed0a1b41491d2ecb3014db2f0bfcf7e37e311c6f7bd51e3c54ffa5bf8342
3
+ size 669464588
run-1/checkpoint-38/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37a6a27d2262563ee9006cc2a777a34723be0df25ef09c707457b977100474ec
3
+ size 1339050234
run-1/checkpoint-38/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1775d8c7698f101cd06aaf4074e223c5851d61233c1992c0032b90cbd1cdfb3b
3
+ size 14244
run-1/checkpoint-38/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68e7b049716a379e783a23bccb72371c89bc92dd060a15ab0f21514622d89991
3
+ size 1064
run-1/checkpoint-38/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
run-1/checkpoint-38/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
run-1/checkpoint-38/tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "mask_token": "[MASK]",
49
+ "max_len": 512,
50
+ "model_max_length": 512,
51
+ "never_split": null,
52
+ "pad_token": "[PAD]",
53
+ "sep_token": "[SEP]",
54
+ "strip_accents": null,
55
+ "tokenize_chinese_chars": true,
56
+ "tokenizer_class": "BertTokenizer",
57
+ "unk_token": "[UNK]"
58
+ }
run-1/checkpoint-38/trainer_state.json ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 38,
7
+ "is_hyper_param_search": true,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.5263157894736842,
13
+ "grad_norm": 6.260778427124023,
14
+ "learning_rate": 1.0992924659429563e-06,
15
+ "loss": 0.3935,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "eval_loss": 0.8991609215736389,
21
+ "eval_runtime": 3.0477,
22
+ "eval_samples_per_second": 128.949,
23
+ "eval_steps_per_second": 8.203,
24
+ "step": 19
25
+ },
26
+ {
27
+ "epoch": 1.0526315789473684,
28
+ "grad_norm": 6.275106906890869,
29
+ "learning_rate": 2.1985849318859127e-06,
30
+ "loss": 0.3804,
31
+ "step": 20
32
+ },
33
+ {
34
+ "epoch": 1.5789473684210527,
35
+ "grad_norm": 5.347285747528076,
36
+ "learning_rate": 3.2978773978288686e-06,
37
+ "loss": 0.346,
38
+ "step": 30
39
+ }
40
+ ],
41
+ "logging_steps": 10,
42
+ "max_steps": 38,
43
+ "num_input_tokens_seen": 0,
44
+ "num_train_epochs": 2,
45
+ "save_steps": 500,
46
+ "stateful_callbacks": {
47
+ "TrainerControl": {
48
+ "args": {
49
+ "should_epoch_stop": false,
50
+ "should_evaluate": false,
51
+ "should_log": false,
52
+ "should_save": true,
53
+ "should_training_stop": true
54
+ },
55
+ "attributes": {}
56
+ }
57
+ },
58
+ "total_flos": 123862865564928.0,
59
+ "train_batch_size": 64,
60
+ "trial_name": null,
61
+ "trial_params": {
62
+ "learning_rate": 5.496462329714781e-05,
63
+ "num_train_epochs": 2,
64
+ "per_device_train_batch_size": 64,
65
+ "seed": 7
66
+ }
67
+ }
run-1/checkpoint-38/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ba44fe98f416db72ca51c922415e1f51e4e1404a042747283edd18980a15494
3
+ size 5112
run-1/checkpoint-38/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
run-2/checkpoint-296/config.json ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "nlptown/bert-base-multilingual-uncased-sentiment",
3
+ "_num_labels": 5,
4
+ "architectures": [
5
+ "BertForSequenceClassification"
6
+ ],
7
+ "attention_probs_dropout_prob": 0.1,
8
+ "classifier_dropout": null,
9
+ "directionality": "bidi",
10
+ "finetuning_task": "sentiment-analysis",
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 768,
14
+ "id2label": {
15
+ "0": "1 star",
16
+ "1": "2 stars",
17
+ "2": "3 stars",
18
+ "3": "4 stars",
19
+ "4": "5 stars"
20
+ },
21
+ "initializer_range": 0.02,
22
+ "intermediate_size": 3072,
23
+ "label2id": {
24
+ "1 star": 0,
25
+ "2 stars": 1,
26
+ "3 stars": 2,
27
+ "4 stars": 3,
28
+ "5 stars": 4
29
+ },
30
+ "layer_norm_eps": 1e-12,
31
+ "max_position_embeddings": 512,
32
+ "model_type": "bert",
33
+ "num_attention_heads": 12,
34
+ "num_hidden_layers": 12,
35
+ "output_past": true,
36
+ "pad_token_id": 0,
37
+ "pooler_fc_size": 768,
38
+ "pooler_num_attention_heads": 12,
39
+ "pooler_num_fc_layers": 3,
40
+ "pooler_size_per_head": 128,
41
+ "pooler_type": "first_token_transform",
42
+ "position_embedding_type": "absolute",
43
+ "problem_type": "single_label_classification",
44
+ "torch_dtype": "float32",
45
+ "transformers_version": "4.44.2",
46
+ "type_vocab_size": 2,
47
+ "use_cache": true,
48
+ "vocab_size": 105879
49
+ }
run-2/checkpoint-296/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9a1af22c5f87ae22ee9a1cf5ca0d62eb19a23f9a2f9522692f20f2a1ee3054f
3
+ size 669464588
run-2/checkpoint-296/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30cd1bb00e95dba14a8cdd52c2b0e316a3fab537a820d9641d8d058cc0a2c8e3
3
+ size 1339050234
run-2/checkpoint-296/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29fcd1f8c83d899699357277f7c8e68c2ed22b005d7b0077fd7d6708841cd58e
3
+ size 14244
run-2/checkpoint-296/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f8904e10190ba4dd3d9443d80ca3c01c2c678f77b5c6457f9e844f15aef70c1
3
+ size 1064
run-2/checkpoint-296/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
run-2/checkpoint-296/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
run-2/checkpoint-296/tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "mask_token": "[MASK]",
49
+ "max_len": 512,
50
+ "model_max_length": 512,
51
+ "never_split": null,
52
+ "pad_token": "[PAD]",
53
+ "sep_token": "[SEP]",
54
+ "strip_accents": null,
55
+ "tokenize_chinese_chars": true,
56
+ "tokenizer_class": "BertTokenizer",
57
+ "unk_token": "[UNK]"
58
+ }
run-2/checkpoint-296/trainer_state.json ADDED
@@ -0,0 +1,249 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 296,
7
+ "is_hyper_param_search": true,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.06756756756756757,
13
+ "grad_norm": 23.774078369140625,
14
+ "learning_rate": 2.067012136643694e-08,
15
+ "loss": 0.4146,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.13513513513513514,
20
+ "grad_norm": 27.321271896362305,
21
+ "learning_rate": 4.134024273287388e-08,
22
+ "loss": 0.2673,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.20270270270270271,
27
+ "grad_norm": 19.634639739990234,
28
+ "learning_rate": 6.201036409931082e-08,
29
+ "loss": 0.3514,
30
+ "step": 30
31
+ },
32
+ {
33
+ "epoch": 0.2702702702702703,
34
+ "grad_norm": 18.950864791870117,
35
+ "learning_rate": 8.268048546574776e-08,
36
+ "loss": 0.3675,
37
+ "step": 40
38
+ },
39
+ {
40
+ "epoch": 0.33783783783783783,
41
+ "grad_norm": 3.3055667877197266,
42
+ "learning_rate": 1.0335060683218471e-07,
43
+ "loss": 0.2814,
44
+ "step": 50
45
+ },
46
+ {
47
+ "epoch": 0.40540540540540543,
48
+ "grad_norm": 17.000532150268555,
49
+ "learning_rate": 1.2402072819862164e-07,
50
+ "loss": 0.3085,
51
+ "step": 60
52
+ },
53
+ {
54
+ "epoch": 0.47297297297297297,
55
+ "grad_norm": 15.456575393676758,
56
+ "learning_rate": 1.446908495650586e-07,
57
+ "loss": 0.321,
58
+ "step": 70
59
+ },
60
+ {
61
+ "epoch": 0.5405405405405406,
62
+ "grad_norm": 10.24705982208252,
63
+ "learning_rate": 1.6536097093149552e-07,
64
+ "loss": 0.242,
65
+ "step": 80
66
+ },
67
+ {
68
+ "epoch": 0.6081081081081081,
69
+ "grad_norm": 7.760463714599609,
70
+ "learning_rate": 1.8603109229793245e-07,
71
+ "loss": 0.3407,
72
+ "step": 90
73
+ },
74
+ {
75
+ "epoch": 0.6756756756756757,
76
+ "grad_norm": 25.994970321655273,
77
+ "learning_rate": 2.0670121366436942e-07,
78
+ "loss": 0.3126,
79
+ "step": 100
80
+ },
81
+ {
82
+ "epoch": 0.7432432432432432,
83
+ "grad_norm": 8.515066146850586,
84
+ "learning_rate": 2.2737133503080635e-07,
85
+ "loss": 0.2059,
86
+ "step": 110
87
+ },
88
+ {
89
+ "epoch": 0.8108108108108109,
90
+ "grad_norm": 35.16509246826172,
91
+ "learning_rate": 2.480414563972433e-07,
92
+ "loss": 0.3152,
93
+ "step": 120
94
+ },
95
+ {
96
+ "epoch": 0.8783783783783784,
97
+ "grad_norm": 12.845327377319336,
98
+ "learning_rate": 2.687115777636802e-07,
99
+ "loss": 0.2524,
100
+ "step": 130
101
+ },
102
+ {
103
+ "epoch": 0.9459459459459459,
104
+ "grad_norm": 10.476096153259277,
105
+ "learning_rate": 2.893816991301172e-07,
106
+ "loss": 0.2762,
107
+ "step": 140
108
+ },
109
+ {
110
+ "epoch": 1.0,
111
+ "eval_loss": 0.9408120512962341,
112
+ "eval_runtime": 3.0875,
113
+ "eval_samples_per_second": 127.286,
114
+ "eval_steps_per_second": 8.097,
115
+ "step": 148
116
+ },
117
+ {
118
+ "epoch": 1.0135135135135136,
119
+ "grad_norm": 9.485784530639648,
120
+ "learning_rate": 3.100518204965541e-07,
121
+ "loss": 0.2786,
122
+ "step": 150
123
+ },
124
+ {
125
+ "epoch": 1.0810810810810811,
126
+ "grad_norm": 13.279548645019531,
127
+ "learning_rate": 3.3072194186299103e-07,
128
+ "loss": 0.2737,
129
+ "step": 160
130
+ },
131
+ {
132
+ "epoch": 1.1486486486486487,
133
+ "grad_norm": 20.476022720336914,
134
+ "learning_rate": 3.51392063229428e-07,
135
+ "loss": 0.2716,
136
+ "step": 170
137
+ },
138
+ {
139
+ "epoch": 1.2162162162162162,
140
+ "grad_norm": 7.177048206329346,
141
+ "learning_rate": 3.720621845958649e-07,
142
+ "loss": 0.303,
143
+ "step": 180
144
+ },
145
+ {
146
+ "epoch": 1.2837837837837838,
147
+ "grad_norm": 25.57468032836914,
148
+ "learning_rate": 3.927323059623019e-07,
149
+ "loss": 0.3598,
150
+ "step": 190
151
+ },
152
+ {
153
+ "epoch": 1.3513513513513513,
154
+ "grad_norm": 17.67203712463379,
155
+ "learning_rate": 4.1340242732873883e-07,
156
+ "loss": 0.3148,
157
+ "step": 200
158
+ },
159
+ {
160
+ "epoch": 1.4189189189189189,
161
+ "grad_norm": 18.490848541259766,
162
+ "learning_rate": 4.340725486951757e-07,
163
+ "loss": 0.2996,
164
+ "step": 210
165
+ },
166
+ {
167
+ "epoch": 1.4864864864864864,
168
+ "grad_norm": 7.604789733886719,
169
+ "learning_rate": 4.547426700616127e-07,
170
+ "loss": 0.2956,
171
+ "step": 220
172
+ },
173
+ {
174
+ "epoch": 1.554054054054054,
175
+ "grad_norm": 6.397325038909912,
176
+ "learning_rate": 4.7541279142804964e-07,
177
+ "loss": 0.2275,
178
+ "step": 230
179
+ },
180
+ {
181
+ "epoch": 1.6216216216216215,
182
+ "grad_norm": 15.845990180969238,
183
+ "learning_rate": 4.960829127944866e-07,
184
+ "loss": 0.2727,
185
+ "step": 240
186
+ },
187
+ {
188
+ "epoch": 1.689189189189189,
189
+ "grad_norm": 4.708223342895508,
190
+ "learning_rate": 5.167530341609235e-07,
191
+ "loss": 0.288,
192
+ "step": 250
193
+ },
194
+ {
195
+ "epoch": 1.7567567567567568,
196
+ "grad_norm": 3.2593748569488525,
197
+ "learning_rate": 5.374231555273605e-07,
198
+ "loss": 0.2449,
199
+ "step": 260
200
+ },
201
+ {
202
+ "epoch": 1.8243243243243243,
203
+ "grad_norm": 19.987289428710938,
204
+ "learning_rate": 5.580932768937974e-07,
205
+ "loss": 0.3159,
206
+ "step": 270
207
+ },
208
+ {
209
+ "epoch": 1.8918918918918919,
210
+ "grad_norm": 23.08250617980957,
211
+ "learning_rate": 5.787633982602344e-07,
212
+ "loss": 0.2457,
213
+ "step": 280
214
+ },
215
+ {
216
+ "epoch": 1.9594594594594594,
217
+ "grad_norm": 16.686283111572266,
218
+ "learning_rate": 5.994335196266713e-07,
219
+ "loss": 0.1901,
220
+ "step": 290
221
+ }
222
+ ],
223
+ "logging_steps": 10,
224
+ "max_steps": 296,
225
+ "num_input_tokens_seen": 0,
226
+ "num_train_epochs": 2,
227
+ "save_steps": 500,
228
+ "stateful_callbacks": {
229
+ "TrainerControl": {
230
+ "args": {
231
+ "should_epoch_stop": false,
232
+ "should_evaluate": false,
233
+ "should_log": false,
234
+ "should_save": true,
235
+ "should_training_stop": true
236
+ },
237
+ "attributes": {}
238
+ }
239
+ },
240
+ "total_flos": 152279624951040.0,
241
+ "train_batch_size": 8,
242
+ "trial_name": null,
243
+ "trial_params": {
244
+ "learning_rate": 1.033506068321847e-06,
245
+ "num_train_epochs": 2,
246
+ "per_device_train_batch_size": 8,
247
+ "seed": 39
248
+ }
249
+ }
run-2/checkpoint-296/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d659a6781d7f46824a106c027f33d02fb5e12dec13a6fbc25c770267dd49254
3
+ size 5112
run-2/checkpoint-296/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
run-3/checkpoint-295/config.json ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "nlptown/bert-base-multilingual-uncased-sentiment",
3
+ "_num_labels": 5,
4
+ "architectures": [
5
+ "BertForSequenceClassification"
6
+ ],
7
+ "attention_probs_dropout_prob": 0.1,
8
+ "classifier_dropout": null,
9
+ "directionality": "bidi",
10
+ "finetuning_task": "sentiment-analysis",
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 768,
14
+ "id2label": {
15
+ "0": "1 star",
16
+ "1": "2 stars",
17
+ "2": "3 stars",
18
+ "3": "4 stars",
19
+ "4": "5 stars"
20
+ },
21
+ "initializer_range": 0.02,
22
+ "intermediate_size": 3072,
23
+ "label2id": {
24
+ "1 star": 0,
25
+ "2 stars": 1,
26
+ "3 stars": 2,
27
+ "4 stars": 3,
28
+ "5 stars": 4
29
+ },
30
+ "layer_norm_eps": 1e-12,
31
+ "max_position_embeddings": 512,
32
+ "model_type": "bert",
33
+ "num_attention_heads": 12,
34
+ "num_hidden_layers": 12,
35
+ "output_past": true,
36
+ "pad_token_id": 0,
37
+ "pooler_fc_size": 768,
38
+ "pooler_num_attention_heads": 12,
39
+ "pooler_num_fc_layers": 3,
40
+ "pooler_size_per_head": 128,
41
+ "pooler_type": "first_token_transform",
42
+ "position_embedding_type": "absolute",
43
+ "problem_type": "single_label_classification",
44
+ "torch_dtype": "float32",
45
+ "transformers_version": "4.44.2",
46
+ "type_vocab_size": 2,
47
+ "use_cache": true,
48
+ "vocab_size": 105879
49
+ }
run-3/checkpoint-295/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b66834313fb5809171823aadd15969c48fc40423a5fad81f105f9953e0f9d04
3
+ size 669464588
run-3/checkpoint-295/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5713bd07ba4ff85132bd89c2d23741007e7369a50c36b3fec6843b6116fe5631
3
+ size 1339050234
run-3/checkpoint-295/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e225f1e98308ebd6830a8e8a002c8234e9cdc278fa8f3763323ab15cde900ee
3
+ size 14244
run-3/checkpoint-295/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9322356d21f5762798699d8ea516179054fd3041294f2e3ad969a0f4b93f6b2
3
+ size 1064
run-3/checkpoint-295/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
run-3/checkpoint-295/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
run-3/checkpoint-295/tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "mask_token": "[MASK]",
49
+ "max_len": 512,
50
+ "model_max_length": 512,
51
+ "never_split": null,
52
+ "pad_token": "[PAD]",
53
+ "sep_token": "[SEP]",
54
+ "strip_accents": null,
55
+ "tokenize_chinese_chars": true,
56
+ "tokenizer_class": "BertTokenizer",
57
+ "unk_token": "[UNK]"
58
+ }
run-3/checkpoint-295/trainer_state.json ADDED
@@ -0,0 +1,241 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 295,
7
+ "is_hyper_param_search": true,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.03389830508474576,
13
+ "grad_norm": 32.504554748535156,
14
+ "learning_rate": 1.2477695266700604e-06,
15
+ "loss": 0.3078,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.06779661016949153,
20
+ "grad_norm": 21.678878784179688,
21
+ "learning_rate": 2.4955390533401208e-06,
22
+ "loss": 0.38,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.1016949152542373,
27
+ "grad_norm": 13.523513793945312,
28
+ "learning_rate": 3.7433085800101813e-06,
29
+ "loss": 0.1996,
30
+ "step": 30
31
+ },
32
+ {
33
+ "epoch": 0.13559322033898305,
34
+ "grad_norm": 4.588716983795166,
35
+ "learning_rate": 4.9910781066802415e-06,
36
+ "loss": 0.2688,
37
+ "step": 40
38
+ },
39
+ {
40
+ "epoch": 0.1694915254237288,
41
+ "grad_norm": 12.524947166442871,
42
+ "learning_rate": 6.238847633350303e-06,
43
+ "loss": 0.2351,
44
+ "step": 50
45
+ },
46
+ {
47
+ "epoch": 0.2033898305084746,
48
+ "grad_norm": 39.708248138427734,
49
+ "learning_rate": 7.486617160020363e-06,
50
+ "loss": 0.2264,
51
+ "step": 60
52
+ },
53
+ {
54
+ "epoch": 0.23728813559322035,
55
+ "grad_norm": 33.86473083496094,
56
+ "learning_rate": 8.734386686690424e-06,
57
+ "loss": 0.1624,
58
+ "step": 70
59
+ },
60
+ {
61
+ "epoch": 0.2711864406779661,
62
+ "grad_norm": 30.304136276245117,
63
+ "learning_rate": 9.982156213360483e-06,
64
+ "loss": 0.1768,
65
+ "step": 80
66
+ },
67
+ {
68
+ "epoch": 0.3050847457627119,
69
+ "grad_norm": 4.641134262084961,
70
+ "learning_rate": 1.1229925740030544e-05,
71
+ "loss": 0.3191,
72
+ "step": 90
73
+ },
74
+ {
75
+ "epoch": 0.3389830508474576,
76
+ "grad_norm": 3.7959961891174316,
77
+ "learning_rate": 1.2477695266700606e-05,
78
+ "loss": 0.3552,
79
+ "step": 100
80
+ },
81
+ {
82
+ "epoch": 0.3728813559322034,
83
+ "grad_norm": 0.8196011781692505,
84
+ "learning_rate": 1.3725464793370665e-05,
85
+ "loss": 0.2558,
86
+ "step": 110
87
+ },
88
+ {
89
+ "epoch": 0.4067796610169492,
90
+ "grad_norm": 27.152273178100586,
91
+ "learning_rate": 1.4973234320040725e-05,
92
+ "loss": 0.5833,
93
+ "step": 120
94
+ },
95
+ {
96
+ "epoch": 0.4406779661016949,
97
+ "grad_norm": 4.423884391784668,
98
+ "learning_rate": 1.6221003846710788e-05,
99
+ "loss": 0.633,
100
+ "step": 130
101
+ },
102
+ {
103
+ "epoch": 0.4745762711864407,
104
+ "grad_norm": 39.28899002075195,
105
+ "learning_rate": 1.7468773373380848e-05,
106
+ "loss": 0.5682,
107
+ "step": 140
108
+ },
109
+ {
110
+ "epoch": 0.5084745762711864,
111
+ "grad_norm": 2.4689102172851562,
112
+ "learning_rate": 1.8716542900050905e-05,
113
+ "loss": 0.6854,
114
+ "step": 150
115
+ },
116
+ {
117
+ "epoch": 0.5423728813559322,
118
+ "grad_norm": 67.79933166503906,
119
+ "learning_rate": 1.9964312426720966e-05,
120
+ "loss": 0.5666,
121
+ "step": 160
122
+ },
123
+ {
124
+ "epoch": 0.576271186440678,
125
+ "grad_norm": 26.09642219543457,
126
+ "learning_rate": 2.121208195339103e-05,
127
+ "loss": 0.7398,
128
+ "step": 170
129
+ },
130
+ {
131
+ "epoch": 0.6101694915254238,
132
+ "grad_norm": 20.591644287109375,
133
+ "learning_rate": 2.2459851480061087e-05,
134
+ "loss": 0.5566,
135
+ "step": 180
136
+ },
137
+ {
138
+ "epoch": 0.6440677966101694,
139
+ "grad_norm": 54.15541076660156,
140
+ "learning_rate": 2.3707621006731148e-05,
141
+ "loss": 0.6932,
142
+ "step": 190
143
+ },
144
+ {
145
+ "epoch": 0.6779661016949152,
146
+ "grad_norm": 1.1118764877319336,
147
+ "learning_rate": 2.4955390533401212e-05,
148
+ "loss": 0.7307,
149
+ "step": 200
150
+ },
151
+ {
152
+ "epoch": 0.711864406779661,
153
+ "grad_norm": 7.498295783996582,
154
+ "learning_rate": 2.620316006007127e-05,
155
+ "loss": 0.7541,
156
+ "step": 210
157
+ },
158
+ {
159
+ "epoch": 0.7457627118644068,
160
+ "grad_norm": 125.26350402832031,
161
+ "learning_rate": 2.745092958674133e-05,
162
+ "loss": 0.6168,
163
+ "step": 220
164
+ },
165
+ {
166
+ "epoch": 0.7796610169491526,
167
+ "grad_norm": 64.39082336425781,
168
+ "learning_rate": 2.869869911341139e-05,
169
+ "loss": 0.4098,
170
+ "step": 230
171
+ },
172
+ {
173
+ "epoch": 0.8135593220338984,
174
+ "grad_norm": 13.032197952270508,
175
+ "learning_rate": 2.994646864008145e-05,
176
+ "loss": 0.549,
177
+ "step": 240
178
+ },
179
+ {
180
+ "epoch": 0.847457627118644,
181
+ "grad_norm": 47.54188537597656,
182
+ "learning_rate": 3.119423816675151e-05,
183
+ "loss": 1.0132,
184
+ "step": 250
185
+ },
186
+ {
187
+ "epoch": 0.8813559322033898,
188
+ "grad_norm": 18.0958251953125,
189
+ "learning_rate": 3.2442007693421575e-05,
190
+ "loss": 0.8416,
191
+ "step": 260
192
+ },
193
+ {
194
+ "epoch": 0.9152542372881356,
195
+ "grad_norm": 25.700082778930664,
196
+ "learning_rate": 3.368977722009163e-05,
197
+ "loss": 0.8113,
198
+ "step": 270
199
+ },
200
+ {
201
+ "epoch": 0.9491525423728814,
202
+ "grad_norm": 20.261831283569336,
203
+ "learning_rate": 3.4937546746761697e-05,
204
+ "loss": 0.9122,
205
+ "step": 280
206
+ },
207
+ {
208
+ "epoch": 0.9830508474576272,
209
+ "grad_norm": 12.409477233886719,
210
+ "learning_rate": 3.6185316273431754e-05,
211
+ "loss": 0.5287,
212
+ "step": 290
213
+ }
214
+ ],
215
+ "logging_steps": 10,
216
+ "max_steps": 295,
217
+ "num_input_tokens_seen": 0,
218
+ "num_train_epochs": 1,
219
+ "save_steps": 500,
220
+ "stateful_callbacks": {
221
+ "TrainerControl": {
222
+ "args": {
223
+ "should_epoch_stop": false,
224
+ "should_evaluate": false,
225
+ "should_log": false,
226
+ "should_save": true,
227
+ "should_training_stop": true
228
+ },
229
+ "attributes": {}
230
+ }
231
+ },
232
+ "total_flos": 76304261314560.0,
233
+ "train_batch_size": 4,
234
+ "trial_name": null,
235
+ "trial_params": {
236
+ "learning_rate": 6.238847633350302e-05,
237
+ "num_train_epochs": 1,
238
+ "per_device_train_batch_size": 4,
239
+ "seed": 12
240
+ }
241
+ }
run-3/checkpoint-295/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9021fc656029eb7ea6cff04ecc68350cb36d8078aeea747ba522f9bdf60c5be7
3
+ size 5112
run-3/checkpoint-295/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
run-4/checkpoint-19/config.json ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "nlptown/bert-base-multilingual-uncased-sentiment",
3
+ "_num_labels": 5,
4
+ "architectures": [
5
+ "BertForSequenceClassification"
6
+ ],
7
+ "attention_probs_dropout_prob": 0.1,
8
+ "classifier_dropout": null,
9
+ "directionality": "bidi",
10
+ "finetuning_task": "sentiment-analysis",
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 768,
14
+ "id2label": {
15
+ "0": "1 star",
16
+ "1": "2 stars",
17
+ "2": "3 stars",
18
+ "3": "4 stars",
19
+ "4": "5 stars"
20
+ },
21
+ "initializer_range": 0.02,
22
+ "intermediate_size": 3072,
23
+ "label2id": {
24
+ "1 star": 0,
25
+ "2 stars": 1,
26
+ "3 stars": 2,
27
+ "4 stars": 3,
28
+ "5 stars": 4
29
+ },
30
+ "layer_norm_eps": 1e-12,
31
+ "max_position_embeddings": 512,
32
+ "model_type": "bert",
33
+ "num_attention_heads": 12,
34
+ "num_hidden_layers": 12,
35
+ "output_past": true,
36
+ "pad_token_id": 0,
37
+ "pooler_fc_size": 768,
38
+ "pooler_num_attention_heads": 12,
39
+ "pooler_num_fc_layers": 3,
40
+ "pooler_size_per_head": 128,
41
+ "pooler_type": "first_token_transform",
42
+ "position_embedding_type": "absolute",
43
+ "problem_type": "single_label_classification",
44
+ "torch_dtype": "float32",
45
+ "transformers_version": "4.44.2",
46
+ "type_vocab_size": 2,
47
+ "use_cache": true,
48
+ "vocab_size": 105879
49
+ }
run-4/checkpoint-19/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06e915c1d6cec5db96c3027a05cb12bc940ae21f46565e79faa5754c8a684aa0
3
+ size 669464588
run-4/checkpoint-19/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44dd60d885a10c63785672143b4d8dcc80d5911518f2efb882deb08a9ba56c5d
3
+ size 1339050234