callista6699
commited on
Commit
•
8b7f717
1
Parent(s):
baf0f94
Training completed!
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- README.md +60 -0
- config.json +49 -0
- model.safetensors +3 -0
- run-0/checkpoint-19/config.json +49 -0
- run-0/checkpoint-19/model.safetensors +3 -0
- run-0/checkpoint-19/optimizer.pt +3 -0
- run-0/checkpoint-19/rng_state.pth +3 -0
- run-0/checkpoint-19/scheduler.pt +3 -0
- run-0/checkpoint-19/special_tokens_map.json +7 -0
- run-0/checkpoint-19/tokenizer.json +0 -0
- run-0/checkpoint-19/tokenizer_config.json +58 -0
- run-0/checkpoint-19/trainer_state.json +45 -0
- run-0/checkpoint-19/training_args.bin +3 -0
- run-0/checkpoint-19/vocab.txt +0 -0
- run-1/checkpoint-38/config.json +49 -0
- run-1/checkpoint-38/model.safetensors +3 -0
- run-1/checkpoint-38/optimizer.pt +3 -0
- run-1/checkpoint-38/rng_state.pth +3 -0
- run-1/checkpoint-38/scheduler.pt +3 -0
- run-1/checkpoint-38/special_tokens_map.json +7 -0
- run-1/checkpoint-38/tokenizer.json +0 -0
- run-1/checkpoint-38/tokenizer_config.json +58 -0
- run-1/checkpoint-38/trainer_state.json +67 -0
- run-1/checkpoint-38/training_args.bin +3 -0
- run-1/checkpoint-38/vocab.txt +0 -0
- run-2/checkpoint-296/config.json +49 -0
- run-2/checkpoint-296/model.safetensors +3 -0
- run-2/checkpoint-296/optimizer.pt +3 -0
- run-2/checkpoint-296/rng_state.pth +3 -0
- run-2/checkpoint-296/scheduler.pt +3 -0
- run-2/checkpoint-296/special_tokens_map.json +7 -0
- run-2/checkpoint-296/tokenizer.json +0 -0
- run-2/checkpoint-296/tokenizer_config.json +58 -0
- run-2/checkpoint-296/trainer_state.json +249 -0
- run-2/checkpoint-296/training_args.bin +3 -0
- run-2/checkpoint-296/vocab.txt +0 -0
- run-3/checkpoint-295/config.json +49 -0
- run-3/checkpoint-295/model.safetensors +3 -0
- run-3/checkpoint-295/optimizer.pt +3 -0
- run-3/checkpoint-295/rng_state.pth +3 -0
- run-3/checkpoint-295/scheduler.pt +3 -0
- run-3/checkpoint-295/special_tokens_map.json +7 -0
- run-3/checkpoint-295/tokenizer.json +0 -0
- run-3/checkpoint-295/tokenizer_config.json +58 -0
- run-3/checkpoint-295/trainer_state.json +241 -0
- run-3/checkpoint-295/training_args.bin +3 -0
- run-3/checkpoint-295/vocab.txt +0 -0
- run-4/checkpoint-19/config.json +49 -0
- run-4/checkpoint-19/model.safetensors +3 -0
- run-4/checkpoint-19/optimizer.pt +3 -0
README.md
ADDED
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
library_name: transformers
|
3 |
+
license: mit
|
4 |
+
base_model: nlptown/bert-base-multilingual-uncased-sentiment
|
5 |
+
tags:
|
6 |
+
- generated_from_trainer
|
7 |
+
model-index:
|
8 |
+
- name: results
|
9 |
+
results: []
|
10 |
+
---
|
11 |
+
|
12 |
+
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
13 |
+
should probably proofread and complete it, then remove this comment. -->
|
14 |
+
|
15 |
+
# results
|
16 |
+
|
17 |
+
This model is a fine-tuned version of [nlptown/bert-base-multilingual-uncased-sentiment](https://huggingface.co/nlptown/bert-base-multilingual-uncased-sentiment) on an unknown dataset.
|
18 |
+
It achieves the following results on the evaluation set:
|
19 |
+
- Loss: 2.6110
|
20 |
+
|
21 |
+
## Model description
|
22 |
+
|
23 |
+
More information needed
|
24 |
+
|
25 |
+
## Intended uses & limitations
|
26 |
+
|
27 |
+
More information needed
|
28 |
+
|
29 |
+
## Training and evaluation data
|
30 |
+
|
31 |
+
More information needed
|
32 |
+
|
33 |
+
## Training procedure
|
34 |
+
|
35 |
+
### Training hyperparameters
|
36 |
+
|
37 |
+
The following hyperparameters were used during training:
|
38 |
+
- learning_rate: 2.934292727323431e-05
|
39 |
+
- train_batch_size: 4
|
40 |
+
- eval_batch_size: 16
|
41 |
+
- seed: 16
|
42 |
+
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
43 |
+
- lr_scheduler_type: linear
|
44 |
+
- lr_scheduler_warmup_steps: 500
|
45 |
+
- num_epochs: 3
|
46 |
+
|
47 |
+
### Training results
|
48 |
+
|
49 |
+
| Training Loss | Epoch | Step | Validation Loss |
|
50 |
+
|:-------------:|:-----:|:----:|:---------------:|
|
51 |
+
| 0.0002 | 1.0 | 295 | 2.6070 |
|
52 |
+
| 0.5428 | 2.0 | 590 | 3.1094 |
|
53 |
+
| 0.0002 | 3.0 | 885 | 2.6110 |
|
54 |
+
|
55 |
+
|
56 |
+
### Framework versions
|
57 |
+
|
58 |
+
- Transformers 4.44.2
|
59 |
+
- Pytorch 2.4.1+cu121
|
60 |
+
- Tokenizers 0.19.1
|
config.json
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "nlptown/bert-base-multilingual-uncased-sentiment",
|
3 |
+
"_num_labels": 5,
|
4 |
+
"architectures": [
|
5 |
+
"BertForSequenceClassification"
|
6 |
+
],
|
7 |
+
"attention_probs_dropout_prob": 0.1,
|
8 |
+
"classifier_dropout": null,
|
9 |
+
"directionality": "bidi",
|
10 |
+
"finetuning_task": "sentiment-analysis",
|
11 |
+
"hidden_act": "gelu",
|
12 |
+
"hidden_dropout_prob": 0.1,
|
13 |
+
"hidden_size": 768,
|
14 |
+
"id2label": {
|
15 |
+
"0": "1 star",
|
16 |
+
"1": "2 stars",
|
17 |
+
"2": "3 stars",
|
18 |
+
"3": "4 stars",
|
19 |
+
"4": "5 stars"
|
20 |
+
},
|
21 |
+
"initializer_range": 0.02,
|
22 |
+
"intermediate_size": 3072,
|
23 |
+
"label2id": {
|
24 |
+
"1 star": 0,
|
25 |
+
"2 stars": 1,
|
26 |
+
"3 stars": 2,
|
27 |
+
"4 stars": 3,
|
28 |
+
"5 stars": 4
|
29 |
+
},
|
30 |
+
"layer_norm_eps": 1e-12,
|
31 |
+
"max_position_embeddings": 512,
|
32 |
+
"model_type": "bert",
|
33 |
+
"num_attention_heads": 12,
|
34 |
+
"num_hidden_layers": 12,
|
35 |
+
"output_past": true,
|
36 |
+
"pad_token_id": 0,
|
37 |
+
"pooler_fc_size": 768,
|
38 |
+
"pooler_num_attention_heads": 12,
|
39 |
+
"pooler_num_fc_layers": 3,
|
40 |
+
"pooler_size_per_head": 128,
|
41 |
+
"pooler_type": "first_token_transform",
|
42 |
+
"position_embedding_type": "absolute",
|
43 |
+
"problem_type": "single_label_classification",
|
44 |
+
"torch_dtype": "float32",
|
45 |
+
"transformers_version": "4.44.2",
|
46 |
+
"type_vocab_size": 2,
|
47 |
+
"use_cache": true,
|
48 |
+
"vocab_size": 105879
|
49 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:adb2f037c4fcb0095b0de830bf80c7a07b9c9db02b3ebca71a7b65e869d77343
|
3 |
+
size 669464588
|
run-0/checkpoint-19/config.json
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "nlptown/bert-base-multilingual-uncased-sentiment",
|
3 |
+
"_num_labels": 5,
|
4 |
+
"architectures": [
|
5 |
+
"BertForSequenceClassification"
|
6 |
+
],
|
7 |
+
"attention_probs_dropout_prob": 0.1,
|
8 |
+
"classifier_dropout": null,
|
9 |
+
"directionality": "bidi",
|
10 |
+
"finetuning_task": "sentiment-analysis",
|
11 |
+
"hidden_act": "gelu",
|
12 |
+
"hidden_dropout_prob": 0.1,
|
13 |
+
"hidden_size": 768,
|
14 |
+
"id2label": {
|
15 |
+
"0": "1 star",
|
16 |
+
"1": "2 stars",
|
17 |
+
"2": "3 stars",
|
18 |
+
"3": "4 stars",
|
19 |
+
"4": "5 stars"
|
20 |
+
},
|
21 |
+
"initializer_range": 0.02,
|
22 |
+
"intermediate_size": 3072,
|
23 |
+
"label2id": {
|
24 |
+
"1 star": 0,
|
25 |
+
"2 stars": 1,
|
26 |
+
"3 stars": 2,
|
27 |
+
"4 stars": 3,
|
28 |
+
"5 stars": 4
|
29 |
+
},
|
30 |
+
"layer_norm_eps": 1e-12,
|
31 |
+
"max_position_embeddings": 512,
|
32 |
+
"model_type": "bert",
|
33 |
+
"num_attention_heads": 12,
|
34 |
+
"num_hidden_layers": 12,
|
35 |
+
"output_past": true,
|
36 |
+
"pad_token_id": 0,
|
37 |
+
"pooler_fc_size": 768,
|
38 |
+
"pooler_num_attention_heads": 12,
|
39 |
+
"pooler_num_fc_layers": 3,
|
40 |
+
"pooler_size_per_head": 128,
|
41 |
+
"pooler_type": "first_token_transform",
|
42 |
+
"position_embedding_type": "absolute",
|
43 |
+
"problem_type": "single_label_classification",
|
44 |
+
"torch_dtype": "float32",
|
45 |
+
"transformers_version": "4.44.2",
|
46 |
+
"type_vocab_size": 2,
|
47 |
+
"use_cache": true,
|
48 |
+
"vocab_size": 105879
|
49 |
+
}
|
run-0/checkpoint-19/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9d5f5abba49994e4f10e1f832ab7fb583704c86a0e59a2640cadff82a480f1f8
|
3 |
+
size 669464588
|
run-0/checkpoint-19/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:857b82e9b886adf38cadf4c854d81d03f5855d5b85d01dc295baef72968df64a
|
3 |
+
size 1339050234
|
run-0/checkpoint-19/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d3d12f967010971f6c719fc8b0c67a887b6c05899c8df2ac0230989587877407
|
3 |
+
size 14244
|
run-0/checkpoint-19/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2f0333332e23fd259cb255054bff6d10b7e0571a091482a54a50087b65d8f5f2
|
3 |
+
size 1064
|
run-0/checkpoint-19/special_tokens_map.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": "[CLS]",
|
3 |
+
"mask_token": "[MASK]",
|
4 |
+
"pad_token": "[PAD]",
|
5 |
+
"sep_token": "[SEP]",
|
6 |
+
"unk_token": "[UNK]"
|
7 |
+
}
|
run-0/checkpoint-19/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
run-0/checkpoint-19/tokenizer_config.json
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": true,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_basic_tokenize": true,
|
47 |
+
"do_lower_case": true,
|
48 |
+
"mask_token": "[MASK]",
|
49 |
+
"max_len": 512,
|
50 |
+
"model_max_length": 512,
|
51 |
+
"never_split": null,
|
52 |
+
"pad_token": "[PAD]",
|
53 |
+
"sep_token": "[SEP]",
|
54 |
+
"strip_accents": null,
|
55 |
+
"tokenize_chinese_chars": true,
|
56 |
+
"tokenizer_class": "BertTokenizer",
|
57 |
+
"unk_token": "[UNK]"
|
58 |
+
}
|
run-0/checkpoint-19/trainer_state.json
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 19,
|
7 |
+
"is_hyper_param_search": true,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.5263157894736842,
|
13 |
+
"grad_norm": 6.248871803283691,
|
14 |
+
"learning_rate": 1.1698489431263144e-07,
|
15 |
+
"loss": 0.4253,
|
16 |
+
"step": 10
|
17 |
+
}
|
18 |
+
],
|
19 |
+
"logging_steps": 10,
|
20 |
+
"max_steps": 19,
|
21 |
+
"num_input_tokens_seen": 0,
|
22 |
+
"num_train_epochs": 1,
|
23 |
+
"save_steps": 500,
|
24 |
+
"stateful_callbacks": {
|
25 |
+
"TrainerControl": {
|
26 |
+
"args": {
|
27 |
+
"should_epoch_stop": false,
|
28 |
+
"should_evaluate": false,
|
29 |
+
"should_log": false,
|
30 |
+
"should_save": true,
|
31 |
+
"should_training_stop": true
|
32 |
+
},
|
33 |
+
"attributes": {}
|
34 |
+
}
|
35 |
+
},
|
36 |
+
"total_flos": 42098902794240.0,
|
37 |
+
"train_batch_size": 64,
|
38 |
+
"trial_name": null,
|
39 |
+
"trial_params": {
|
40 |
+
"learning_rate": 5.849244715631572e-06,
|
41 |
+
"num_train_epochs": 1,
|
42 |
+
"per_device_train_batch_size": 64,
|
43 |
+
"seed": 1
|
44 |
+
}
|
45 |
+
}
|
run-0/checkpoint-19/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a9f07479eb33644366a5ddfafbb9fb5c21b3c754996ddd2be7b070a1a725a265
|
3 |
+
size 5112
|
run-0/checkpoint-19/vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
run-1/checkpoint-38/config.json
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "nlptown/bert-base-multilingual-uncased-sentiment",
|
3 |
+
"_num_labels": 5,
|
4 |
+
"architectures": [
|
5 |
+
"BertForSequenceClassification"
|
6 |
+
],
|
7 |
+
"attention_probs_dropout_prob": 0.1,
|
8 |
+
"classifier_dropout": null,
|
9 |
+
"directionality": "bidi",
|
10 |
+
"finetuning_task": "sentiment-analysis",
|
11 |
+
"hidden_act": "gelu",
|
12 |
+
"hidden_dropout_prob": 0.1,
|
13 |
+
"hidden_size": 768,
|
14 |
+
"id2label": {
|
15 |
+
"0": "1 star",
|
16 |
+
"1": "2 stars",
|
17 |
+
"2": "3 stars",
|
18 |
+
"3": "4 stars",
|
19 |
+
"4": "5 stars"
|
20 |
+
},
|
21 |
+
"initializer_range": 0.02,
|
22 |
+
"intermediate_size": 3072,
|
23 |
+
"label2id": {
|
24 |
+
"1 star": 0,
|
25 |
+
"2 stars": 1,
|
26 |
+
"3 stars": 2,
|
27 |
+
"4 stars": 3,
|
28 |
+
"5 stars": 4
|
29 |
+
},
|
30 |
+
"layer_norm_eps": 1e-12,
|
31 |
+
"max_position_embeddings": 512,
|
32 |
+
"model_type": "bert",
|
33 |
+
"num_attention_heads": 12,
|
34 |
+
"num_hidden_layers": 12,
|
35 |
+
"output_past": true,
|
36 |
+
"pad_token_id": 0,
|
37 |
+
"pooler_fc_size": 768,
|
38 |
+
"pooler_num_attention_heads": 12,
|
39 |
+
"pooler_num_fc_layers": 3,
|
40 |
+
"pooler_size_per_head": 128,
|
41 |
+
"pooler_type": "first_token_transform",
|
42 |
+
"position_embedding_type": "absolute",
|
43 |
+
"problem_type": "single_label_classification",
|
44 |
+
"torch_dtype": "float32",
|
45 |
+
"transformers_version": "4.44.2",
|
46 |
+
"type_vocab_size": 2,
|
47 |
+
"use_cache": true,
|
48 |
+
"vocab_size": 105879
|
49 |
+
}
|
run-1/checkpoint-38/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dfe9ed0a1b41491d2ecb3014db2f0bfcf7e37e311c6f7bd51e3c54ffa5bf8342
|
3 |
+
size 669464588
|
run-1/checkpoint-38/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:37a6a27d2262563ee9006cc2a777a34723be0df25ef09c707457b977100474ec
|
3 |
+
size 1339050234
|
run-1/checkpoint-38/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1775d8c7698f101cd06aaf4074e223c5851d61233c1992c0032b90cbd1cdfb3b
|
3 |
+
size 14244
|
run-1/checkpoint-38/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:68e7b049716a379e783a23bccb72371c89bc92dd060a15ab0f21514622d89991
|
3 |
+
size 1064
|
run-1/checkpoint-38/special_tokens_map.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": "[CLS]",
|
3 |
+
"mask_token": "[MASK]",
|
4 |
+
"pad_token": "[PAD]",
|
5 |
+
"sep_token": "[SEP]",
|
6 |
+
"unk_token": "[UNK]"
|
7 |
+
}
|
run-1/checkpoint-38/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
run-1/checkpoint-38/tokenizer_config.json
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": true,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_basic_tokenize": true,
|
47 |
+
"do_lower_case": true,
|
48 |
+
"mask_token": "[MASK]",
|
49 |
+
"max_len": 512,
|
50 |
+
"model_max_length": 512,
|
51 |
+
"never_split": null,
|
52 |
+
"pad_token": "[PAD]",
|
53 |
+
"sep_token": "[SEP]",
|
54 |
+
"strip_accents": null,
|
55 |
+
"tokenize_chinese_chars": true,
|
56 |
+
"tokenizer_class": "BertTokenizer",
|
57 |
+
"unk_token": "[UNK]"
|
58 |
+
}
|
run-1/checkpoint-38/trainer_state.json
ADDED
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 38,
|
7 |
+
"is_hyper_param_search": true,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.5263157894736842,
|
13 |
+
"grad_norm": 6.260778427124023,
|
14 |
+
"learning_rate": 1.0992924659429563e-06,
|
15 |
+
"loss": 0.3935,
|
16 |
+
"step": 10
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 1.0,
|
20 |
+
"eval_loss": 0.8991609215736389,
|
21 |
+
"eval_runtime": 3.0477,
|
22 |
+
"eval_samples_per_second": 128.949,
|
23 |
+
"eval_steps_per_second": 8.203,
|
24 |
+
"step": 19
|
25 |
+
},
|
26 |
+
{
|
27 |
+
"epoch": 1.0526315789473684,
|
28 |
+
"grad_norm": 6.275106906890869,
|
29 |
+
"learning_rate": 2.1985849318859127e-06,
|
30 |
+
"loss": 0.3804,
|
31 |
+
"step": 20
|
32 |
+
},
|
33 |
+
{
|
34 |
+
"epoch": 1.5789473684210527,
|
35 |
+
"grad_norm": 5.347285747528076,
|
36 |
+
"learning_rate": 3.2978773978288686e-06,
|
37 |
+
"loss": 0.346,
|
38 |
+
"step": 30
|
39 |
+
}
|
40 |
+
],
|
41 |
+
"logging_steps": 10,
|
42 |
+
"max_steps": 38,
|
43 |
+
"num_input_tokens_seen": 0,
|
44 |
+
"num_train_epochs": 2,
|
45 |
+
"save_steps": 500,
|
46 |
+
"stateful_callbacks": {
|
47 |
+
"TrainerControl": {
|
48 |
+
"args": {
|
49 |
+
"should_epoch_stop": false,
|
50 |
+
"should_evaluate": false,
|
51 |
+
"should_log": false,
|
52 |
+
"should_save": true,
|
53 |
+
"should_training_stop": true
|
54 |
+
},
|
55 |
+
"attributes": {}
|
56 |
+
}
|
57 |
+
},
|
58 |
+
"total_flos": 123862865564928.0,
|
59 |
+
"train_batch_size": 64,
|
60 |
+
"trial_name": null,
|
61 |
+
"trial_params": {
|
62 |
+
"learning_rate": 5.496462329714781e-05,
|
63 |
+
"num_train_epochs": 2,
|
64 |
+
"per_device_train_batch_size": 64,
|
65 |
+
"seed": 7
|
66 |
+
}
|
67 |
+
}
|
run-1/checkpoint-38/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6ba44fe98f416db72ca51c922415e1f51e4e1404a042747283edd18980a15494
|
3 |
+
size 5112
|
run-1/checkpoint-38/vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
run-2/checkpoint-296/config.json
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "nlptown/bert-base-multilingual-uncased-sentiment",
|
3 |
+
"_num_labels": 5,
|
4 |
+
"architectures": [
|
5 |
+
"BertForSequenceClassification"
|
6 |
+
],
|
7 |
+
"attention_probs_dropout_prob": 0.1,
|
8 |
+
"classifier_dropout": null,
|
9 |
+
"directionality": "bidi",
|
10 |
+
"finetuning_task": "sentiment-analysis",
|
11 |
+
"hidden_act": "gelu",
|
12 |
+
"hidden_dropout_prob": 0.1,
|
13 |
+
"hidden_size": 768,
|
14 |
+
"id2label": {
|
15 |
+
"0": "1 star",
|
16 |
+
"1": "2 stars",
|
17 |
+
"2": "3 stars",
|
18 |
+
"3": "4 stars",
|
19 |
+
"4": "5 stars"
|
20 |
+
},
|
21 |
+
"initializer_range": 0.02,
|
22 |
+
"intermediate_size": 3072,
|
23 |
+
"label2id": {
|
24 |
+
"1 star": 0,
|
25 |
+
"2 stars": 1,
|
26 |
+
"3 stars": 2,
|
27 |
+
"4 stars": 3,
|
28 |
+
"5 stars": 4
|
29 |
+
},
|
30 |
+
"layer_norm_eps": 1e-12,
|
31 |
+
"max_position_embeddings": 512,
|
32 |
+
"model_type": "bert",
|
33 |
+
"num_attention_heads": 12,
|
34 |
+
"num_hidden_layers": 12,
|
35 |
+
"output_past": true,
|
36 |
+
"pad_token_id": 0,
|
37 |
+
"pooler_fc_size": 768,
|
38 |
+
"pooler_num_attention_heads": 12,
|
39 |
+
"pooler_num_fc_layers": 3,
|
40 |
+
"pooler_size_per_head": 128,
|
41 |
+
"pooler_type": "first_token_transform",
|
42 |
+
"position_embedding_type": "absolute",
|
43 |
+
"problem_type": "single_label_classification",
|
44 |
+
"torch_dtype": "float32",
|
45 |
+
"transformers_version": "4.44.2",
|
46 |
+
"type_vocab_size": 2,
|
47 |
+
"use_cache": true,
|
48 |
+
"vocab_size": 105879
|
49 |
+
}
|
run-2/checkpoint-296/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a9a1af22c5f87ae22ee9a1cf5ca0d62eb19a23f9a2f9522692f20f2a1ee3054f
|
3 |
+
size 669464588
|
run-2/checkpoint-296/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:30cd1bb00e95dba14a8cdd52c2b0e316a3fab537a820d9641d8d058cc0a2c8e3
|
3 |
+
size 1339050234
|
run-2/checkpoint-296/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:29fcd1f8c83d899699357277f7c8e68c2ed22b005d7b0077fd7d6708841cd58e
|
3 |
+
size 14244
|
run-2/checkpoint-296/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0f8904e10190ba4dd3d9443d80ca3c01c2c678f77b5c6457f9e844f15aef70c1
|
3 |
+
size 1064
|
run-2/checkpoint-296/special_tokens_map.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": "[CLS]",
|
3 |
+
"mask_token": "[MASK]",
|
4 |
+
"pad_token": "[PAD]",
|
5 |
+
"sep_token": "[SEP]",
|
6 |
+
"unk_token": "[UNK]"
|
7 |
+
}
|
run-2/checkpoint-296/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
run-2/checkpoint-296/tokenizer_config.json
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": true,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_basic_tokenize": true,
|
47 |
+
"do_lower_case": true,
|
48 |
+
"mask_token": "[MASK]",
|
49 |
+
"max_len": 512,
|
50 |
+
"model_max_length": 512,
|
51 |
+
"never_split": null,
|
52 |
+
"pad_token": "[PAD]",
|
53 |
+
"sep_token": "[SEP]",
|
54 |
+
"strip_accents": null,
|
55 |
+
"tokenize_chinese_chars": true,
|
56 |
+
"tokenizer_class": "BertTokenizer",
|
57 |
+
"unk_token": "[UNK]"
|
58 |
+
}
|
run-2/checkpoint-296/trainer_state.json
ADDED
@@ -0,0 +1,249 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 296,
|
7 |
+
"is_hyper_param_search": true,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.06756756756756757,
|
13 |
+
"grad_norm": 23.774078369140625,
|
14 |
+
"learning_rate": 2.067012136643694e-08,
|
15 |
+
"loss": 0.4146,
|
16 |
+
"step": 10
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.13513513513513514,
|
20 |
+
"grad_norm": 27.321271896362305,
|
21 |
+
"learning_rate": 4.134024273287388e-08,
|
22 |
+
"loss": 0.2673,
|
23 |
+
"step": 20
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.20270270270270271,
|
27 |
+
"grad_norm": 19.634639739990234,
|
28 |
+
"learning_rate": 6.201036409931082e-08,
|
29 |
+
"loss": 0.3514,
|
30 |
+
"step": 30
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.2702702702702703,
|
34 |
+
"grad_norm": 18.950864791870117,
|
35 |
+
"learning_rate": 8.268048546574776e-08,
|
36 |
+
"loss": 0.3675,
|
37 |
+
"step": 40
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.33783783783783783,
|
41 |
+
"grad_norm": 3.3055667877197266,
|
42 |
+
"learning_rate": 1.0335060683218471e-07,
|
43 |
+
"loss": 0.2814,
|
44 |
+
"step": 50
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.40540540540540543,
|
48 |
+
"grad_norm": 17.000532150268555,
|
49 |
+
"learning_rate": 1.2402072819862164e-07,
|
50 |
+
"loss": 0.3085,
|
51 |
+
"step": 60
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.47297297297297297,
|
55 |
+
"grad_norm": 15.456575393676758,
|
56 |
+
"learning_rate": 1.446908495650586e-07,
|
57 |
+
"loss": 0.321,
|
58 |
+
"step": 70
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.5405405405405406,
|
62 |
+
"grad_norm": 10.24705982208252,
|
63 |
+
"learning_rate": 1.6536097093149552e-07,
|
64 |
+
"loss": 0.242,
|
65 |
+
"step": 80
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.6081081081081081,
|
69 |
+
"grad_norm": 7.760463714599609,
|
70 |
+
"learning_rate": 1.8603109229793245e-07,
|
71 |
+
"loss": 0.3407,
|
72 |
+
"step": 90
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.6756756756756757,
|
76 |
+
"grad_norm": 25.994970321655273,
|
77 |
+
"learning_rate": 2.0670121366436942e-07,
|
78 |
+
"loss": 0.3126,
|
79 |
+
"step": 100
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.7432432432432432,
|
83 |
+
"grad_norm": 8.515066146850586,
|
84 |
+
"learning_rate": 2.2737133503080635e-07,
|
85 |
+
"loss": 0.2059,
|
86 |
+
"step": 110
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.8108108108108109,
|
90 |
+
"grad_norm": 35.16509246826172,
|
91 |
+
"learning_rate": 2.480414563972433e-07,
|
92 |
+
"loss": 0.3152,
|
93 |
+
"step": 120
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 0.8783783783783784,
|
97 |
+
"grad_norm": 12.845327377319336,
|
98 |
+
"learning_rate": 2.687115777636802e-07,
|
99 |
+
"loss": 0.2524,
|
100 |
+
"step": 130
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 0.9459459459459459,
|
104 |
+
"grad_norm": 10.476096153259277,
|
105 |
+
"learning_rate": 2.893816991301172e-07,
|
106 |
+
"loss": 0.2762,
|
107 |
+
"step": 140
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 1.0,
|
111 |
+
"eval_loss": 0.9408120512962341,
|
112 |
+
"eval_runtime": 3.0875,
|
113 |
+
"eval_samples_per_second": 127.286,
|
114 |
+
"eval_steps_per_second": 8.097,
|
115 |
+
"step": 148
|
116 |
+
},
|
117 |
+
{
|
118 |
+
"epoch": 1.0135135135135136,
|
119 |
+
"grad_norm": 9.485784530639648,
|
120 |
+
"learning_rate": 3.100518204965541e-07,
|
121 |
+
"loss": 0.2786,
|
122 |
+
"step": 150
|
123 |
+
},
|
124 |
+
{
|
125 |
+
"epoch": 1.0810810810810811,
|
126 |
+
"grad_norm": 13.279548645019531,
|
127 |
+
"learning_rate": 3.3072194186299103e-07,
|
128 |
+
"loss": 0.2737,
|
129 |
+
"step": 160
|
130 |
+
},
|
131 |
+
{
|
132 |
+
"epoch": 1.1486486486486487,
|
133 |
+
"grad_norm": 20.476022720336914,
|
134 |
+
"learning_rate": 3.51392063229428e-07,
|
135 |
+
"loss": 0.2716,
|
136 |
+
"step": 170
|
137 |
+
},
|
138 |
+
{
|
139 |
+
"epoch": 1.2162162162162162,
|
140 |
+
"grad_norm": 7.177048206329346,
|
141 |
+
"learning_rate": 3.720621845958649e-07,
|
142 |
+
"loss": 0.303,
|
143 |
+
"step": 180
|
144 |
+
},
|
145 |
+
{
|
146 |
+
"epoch": 1.2837837837837838,
|
147 |
+
"grad_norm": 25.57468032836914,
|
148 |
+
"learning_rate": 3.927323059623019e-07,
|
149 |
+
"loss": 0.3598,
|
150 |
+
"step": 190
|
151 |
+
},
|
152 |
+
{
|
153 |
+
"epoch": 1.3513513513513513,
|
154 |
+
"grad_norm": 17.67203712463379,
|
155 |
+
"learning_rate": 4.1340242732873883e-07,
|
156 |
+
"loss": 0.3148,
|
157 |
+
"step": 200
|
158 |
+
},
|
159 |
+
{
|
160 |
+
"epoch": 1.4189189189189189,
|
161 |
+
"grad_norm": 18.490848541259766,
|
162 |
+
"learning_rate": 4.340725486951757e-07,
|
163 |
+
"loss": 0.2996,
|
164 |
+
"step": 210
|
165 |
+
},
|
166 |
+
{
|
167 |
+
"epoch": 1.4864864864864864,
|
168 |
+
"grad_norm": 7.604789733886719,
|
169 |
+
"learning_rate": 4.547426700616127e-07,
|
170 |
+
"loss": 0.2956,
|
171 |
+
"step": 220
|
172 |
+
},
|
173 |
+
{
|
174 |
+
"epoch": 1.554054054054054,
|
175 |
+
"grad_norm": 6.397325038909912,
|
176 |
+
"learning_rate": 4.7541279142804964e-07,
|
177 |
+
"loss": 0.2275,
|
178 |
+
"step": 230
|
179 |
+
},
|
180 |
+
{
|
181 |
+
"epoch": 1.6216216216216215,
|
182 |
+
"grad_norm": 15.845990180969238,
|
183 |
+
"learning_rate": 4.960829127944866e-07,
|
184 |
+
"loss": 0.2727,
|
185 |
+
"step": 240
|
186 |
+
},
|
187 |
+
{
|
188 |
+
"epoch": 1.689189189189189,
|
189 |
+
"grad_norm": 4.708223342895508,
|
190 |
+
"learning_rate": 5.167530341609235e-07,
|
191 |
+
"loss": 0.288,
|
192 |
+
"step": 250
|
193 |
+
},
|
194 |
+
{
|
195 |
+
"epoch": 1.7567567567567568,
|
196 |
+
"grad_norm": 3.2593748569488525,
|
197 |
+
"learning_rate": 5.374231555273605e-07,
|
198 |
+
"loss": 0.2449,
|
199 |
+
"step": 260
|
200 |
+
},
|
201 |
+
{
|
202 |
+
"epoch": 1.8243243243243243,
|
203 |
+
"grad_norm": 19.987289428710938,
|
204 |
+
"learning_rate": 5.580932768937974e-07,
|
205 |
+
"loss": 0.3159,
|
206 |
+
"step": 270
|
207 |
+
},
|
208 |
+
{
|
209 |
+
"epoch": 1.8918918918918919,
|
210 |
+
"grad_norm": 23.08250617980957,
|
211 |
+
"learning_rate": 5.787633982602344e-07,
|
212 |
+
"loss": 0.2457,
|
213 |
+
"step": 280
|
214 |
+
},
|
215 |
+
{
|
216 |
+
"epoch": 1.9594594594594594,
|
217 |
+
"grad_norm": 16.686283111572266,
|
218 |
+
"learning_rate": 5.994335196266713e-07,
|
219 |
+
"loss": 0.1901,
|
220 |
+
"step": 290
|
221 |
+
}
|
222 |
+
],
|
223 |
+
"logging_steps": 10,
|
224 |
+
"max_steps": 296,
|
225 |
+
"num_input_tokens_seen": 0,
|
226 |
+
"num_train_epochs": 2,
|
227 |
+
"save_steps": 500,
|
228 |
+
"stateful_callbacks": {
|
229 |
+
"TrainerControl": {
|
230 |
+
"args": {
|
231 |
+
"should_epoch_stop": false,
|
232 |
+
"should_evaluate": false,
|
233 |
+
"should_log": false,
|
234 |
+
"should_save": true,
|
235 |
+
"should_training_stop": true
|
236 |
+
},
|
237 |
+
"attributes": {}
|
238 |
+
}
|
239 |
+
},
|
240 |
+
"total_flos": 152279624951040.0,
|
241 |
+
"train_batch_size": 8,
|
242 |
+
"trial_name": null,
|
243 |
+
"trial_params": {
|
244 |
+
"learning_rate": 1.033506068321847e-06,
|
245 |
+
"num_train_epochs": 2,
|
246 |
+
"per_device_train_batch_size": 8,
|
247 |
+
"seed": 39
|
248 |
+
}
|
249 |
+
}
|
run-2/checkpoint-296/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2d659a6781d7f46824a106c027f33d02fb5e12dec13a6fbc25c770267dd49254
|
3 |
+
size 5112
|
run-2/checkpoint-296/vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
run-3/checkpoint-295/config.json
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "nlptown/bert-base-multilingual-uncased-sentiment",
|
3 |
+
"_num_labels": 5,
|
4 |
+
"architectures": [
|
5 |
+
"BertForSequenceClassification"
|
6 |
+
],
|
7 |
+
"attention_probs_dropout_prob": 0.1,
|
8 |
+
"classifier_dropout": null,
|
9 |
+
"directionality": "bidi",
|
10 |
+
"finetuning_task": "sentiment-analysis",
|
11 |
+
"hidden_act": "gelu",
|
12 |
+
"hidden_dropout_prob": 0.1,
|
13 |
+
"hidden_size": 768,
|
14 |
+
"id2label": {
|
15 |
+
"0": "1 star",
|
16 |
+
"1": "2 stars",
|
17 |
+
"2": "3 stars",
|
18 |
+
"3": "4 stars",
|
19 |
+
"4": "5 stars"
|
20 |
+
},
|
21 |
+
"initializer_range": 0.02,
|
22 |
+
"intermediate_size": 3072,
|
23 |
+
"label2id": {
|
24 |
+
"1 star": 0,
|
25 |
+
"2 stars": 1,
|
26 |
+
"3 stars": 2,
|
27 |
+
"4 stars": 3,
|
28 |
+
"5 stars": 4
|
29 |
+
},
|
30 |
+
"layer_norm_eps": 1e-12,
|
31 |
+
"max_position_embeddings": 512,
|
32 |
+
"model_type": "bert",
|
33 |
+
"num_attention_heads": 12,
|
34 |
+
"num_hidden_layers": 12,
|
35 |
+
"output_past": true,
|
36 |
+
"pad_token_id": 0,
|
37 |
+
"pooler_fc_size": 768,
|
38 |
+
"pooler_num_attention_heads": 12,
|
39 |
+
"pooler_num_fc_layers": 3,
|
40 |
+
"pooler_size_per_head": 128,
|
41 |
+
"pooler_type": "first_token_transform",
|
42 |
+
"position_embedding_type": "absolute",
|
43 |
+
"problem_type": "single_label_classification",
|
44 |
+
"torch_dtype": "float32",
|
45 |
+
"transformers_version": "4.44.2",
|
46 |
+
"type_vocab_size": 2,
|
47 |
+
"use_cache": true,
|
48 |
+
"vocab_size": 105879
|
49 |
+
}
|
run-3/checkpoint-295/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2b66834313fb5809171823aadd15969c48fc40423a5fad81f105f9953e0f9d04
|
3 |
+
size 669464588
|
run-3/checkpoint-295/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5713bd07ba4ff85132bd89c2d23741007e7369a50c36b3fec6843b6116fe5631
|
3 |
+
size 1339050234
|
run-3/checkpoint-295/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9e225f1e98308ebd6830a8e8a002c8234e9cdc278fa8f3763323ab15cde900ee
|
3 |
+
size 14244
|
run-3/checkpoint-295/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a9322356d21f5762798699d8ea516179054fd3041294f2e3ad969a0f4b93f6b2
|
3 |
+
size 1064
|
run-3/checkpoint-295/special_tokens_map.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": "[CLS]",
|
3 |
+
"mask_token": "[MASK]",
|
4 |
+
"pad_token": "[PAD]",
|
5 |
+
"sep_token": "[SEP]",
|
6 |
+
"unk_token": "[UNK]"
|
7 |
+
}
|
run-3/checkpoint-295/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
run-3/checkpoint-295/tokenizer_config.json
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": true,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_basic_tokenize": true,
|
47 |
+
"do_lower_case": true,
|
48 |
+
"mask_token": "[MASK]",
|
49 |
+
"max_len": 512,
|
50 |
+
"model_max_length": 512,
|
51 |
+
"never_split": null,
|
52 |
+
"pad_token": "[PAD]",
|
53 |
+
"sep_token": "[SEP]",
|
54 |
+
"strip_accents": null,
|
55 |
+
"tokenize_chinese_chars": true,
|
56 |
+
"tokenizer_class": "BertTokenizer",
|
57 |
+
"unk_token": "[UNK]"
|
58 |
+
}
|
run-3/checkpoint-295/trainer_state.json
ADDED
@@ -0,0 +1,241 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 295,
|
7 |
+
"is_hyper_param_search": true,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.03389830508474576,
|
13 |
+
"grad_norm": 32.504554748535156,
|
14 |
+
"learning_rate": 1.2477695266700604e-06,
|
15 |
+
"loss": 0.3078,
|
16 |
+
"step": 10
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.06779661016949153,
|
20 |
+
"grad_norm": 21.678878784179688,
|
21 |
+
"learning_rate": 2.4955390533401208e-06,
|
22 |
+
"loss": 0.38,
|
23 |
+
"step": 20
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.1016949152542373,
|
27 |
+
"grad_norm": 13.523513793945312,
|
28 |
+
"learning_rate": 3.7433085800101813e-06,
|
29 |
+
"loss": 0.1996,
|
30 |
+
"step": 30
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.13559322033898305,
|
34 |
+
"grad_norm": 4.588716983795166,
|
35 |
+
"learning_rate": 4.9910781066802415e-06,
|
36 |
+
"loss": 0.2688,
|
37 |
+
"step": 40
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.1694915254237288,
|
41 |
+
"grad_norm": 12.524947166442871,
|
42 |
+
"learning_rate": 6.238847633350303e-06,
|
43 |
+
"loss": 0.2351,
|
44 |
+
"step": 50
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.2033898305084746,
|
48 |
+
"grad_norm": 39.708248138427734,
|
49 |
+
"learning_rate": 7.486617160020363e-06,
|
50 |
+
"loss": 0.2264,
|
51 |
+
"step": 60
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.23728813559322035,
|
55 |
+
"grad_norm": 33.86473083496094,
|
56 |
+
"learning_rate": 8.734386686690424e-06,
|
57 |
+
"loss": 0.1624,
|
58 |
+
"step": 70
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.2711864406779661,
|
62 |
+
"grad_norm": 30.304136276245117,
|
63 |
+
"learning_rate": 9.982156213360483e-06,
|
64 |
+
"loss": 0.1768,
|
65 |
+
"step": 80
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.3050847457627119,
|
69 |
+
"grad_norm": 4.641134262084961,
|
70 |
+
"learning_rate": 1.1229925740030544e-05,
|
71 |
+
"loss": 0.3191,
|
72 |
+
"step": 90
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.3389830508474576,
|
76 |
+
"grad_norm": 3.7959961891174316,
|
77 |
+
"learning_rate": 1.2477695266700606e-05,
|
78 |
+
"loss": 0.3552,
|
79 |
+
"step": 100
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.3728813559322034,
|
83 |
+
"grad_norm": 0.8196011781692505,
|
84 |
+
"learning_rate": 1.3725464793370665e-05,
|
85 |
+
"loss": 0.2558,
|
86 |
+
"step": 110
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.4067796610169492,
|
90 |
+
"grad_norm": 27.152273178100586,
|
91 |
+
"learning_rate": 1.4973234320040725e-05,
|
92 |
+
"loss": 0.5833,
|
93 |
+
"step": 120
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 0.4406779661016949,
|
97 |
+
"grad_norm": 4.423884391784668,
|
98 |
+
"learning_rate": 1.6221003846710788e-05,
|
99 |
+
"loss": 0.633,
|
100 |
+
"step": 130
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 0.4745762711864407,
|
104 |
+
"grad_norm": 39.28899002075195,
|
105 |
+
"learning_rate": 1.7468773373380848e-05,
|
106 |
+
"loss": 0.5682,
|
107 |
+
"step": 140
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 0.5084745762711864,
|
111 |
+
"grad_norm": 2.4689102172851562,
|
112 |
+
"learning_rate": 1.8716542900050905e-05,
|
113 |
+
"loss": 0.6854,
|
114 |
+
"step": 150
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 0.5423728813559322,
|
118 |
+
"grad_norm": 67.79933166503906,
|
119 |
+
"learning_rate": 1.9964312426720966e-05,
|
120 |
+
"loss": 0.5666,
|
121 |
+
"step": 160
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 0.576271186440678,
|
125 |
+
"grad_norm": 26.09642219543457,
|
126 |
+
"learning_rate": 2.121208195339103e-05,
|
127 |
+
"loss": 0.7398,
|
128 |
+
"step": 170
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 0.6101694915254238,
|
132 |
+
"grad_norm": 20.591644287109375,
|
133 |
+
"learning_rate": 2.2459851480061087e-05,
|
134 |
+
"loss": 0.5566,
|
135 |
+
"step": 180
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 0.6440677966101694,
|
139 |
+
"grad_norm": 54.15541076660156,
|
140 |
+
"learning_rate": 2.3707621006731148e-05,
|
141 |
+
"loss": 0.6932,
|
142 |
+
"step": 190
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 0.6779661016949152,
|
146 |
+
"grad_norm": 1.1118764877319336,
|
147 |
+
"learning_rate": 2.4955390533401212e-05,
|
148 |
+
"loss": 0.7307,
|
149 |
+
"step": 200
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 0.711864406779661,
|
153 |
+
"grad_norm": 7.498295783996582,
|
154 |
+
"learning_rate": 2.620316006007127e-05,
|
155 |
+
"loss": 0.7541,
|
156 |
+
"step": 210
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 0.7457627118644068,
|
160 |
+
"grad_norm": 125.26350402832031,
|
161 |
+
"learning_rate": 2.745092958674133e-05,
|
162 |
+
"loss": 0.6168,
|
163 |
+
"step": 220
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 0.7796610169491526,
|
167 |
+
"grad_norm": 64.39082336425781,
|
168 |
+
"learning_rate": 2.869869911341139e-05,
|
169 |
+
"loss": 0.4098,
|
170 |
+
"step": 230
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 0.8135593220338984,
|
174 |
+
"grad_norm": 13.032197952270508,
|
175 |
+
"learning_rate": 2.994646864008145e-05,
|
176 |
+
"loss": 0.549,
|
177 |
+
"step": 240
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 0.847457627118644,
|
181 |
+
"grad_norm": 47.54188537597656,
|
182 |
+
"learning_rate": 3.119423816675151e-05,
|
183 |
+
"loss": 1.0132,
|
184 |
+
"step": 250
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 0.8813559322033898,
|
188 |
+
"grad_norm": 18.0958251953125,
|
189 |
+
"learning_rate": 3.2442007693421575e-05,
|
190 |
+
"loss": 0.8416,
|
191 |
+
"step": 260
|
192 |
+
},
|
193 |
+
{
|
194 |
+
"epoch": 0.9152542372881356,
|
195 |
+
"grad_norm": 25.700082778930664,
|
196 |
+
"learning_rate": 3.368977722009163e-05,
|
197 |
+
"loss": 0.8113,
|
198 |
+
"step": 270
|
199 |
+
},
|
200 |
+
{
|
201 |
+
"epoch": 0.9491525423728814,
|
202 |
+
"grad_norm": 20.261831283569336,
|
203 |
+
"learning_rate": 3.4937546746761697e-05,
|
204 |
+
"loss": 0.9122,
|
205 |
+
"step": 280
|
206 |
+
},
|
207 |
+
{
|
208 |
+
"epoch": 0.9830508474576272,
|
209 |
+
"grad_norm": 12.409477233886719,
|
210 |
+
"learning_rate": 3.6185316273431754e-05,
|
211 |
+
"loss": 0.5287,
|
212 |
+
"step": 290
|
213 |
+
}
|
214 |
+
],
|
215 |
+
"logging_steps": 10,
|
216 |
+
"max_steps": 295,
|
217 |
+
"num_input_tokens_seen": 0,
|
218 |
+
"num_train_epochs": 1,
|
219 |
+
"save_steps": 500,
|
220 |
+
"stateful_callbacks": {
|
221 |
+
"TrainerControl": {
|
222 |
+
"args": {
|
223 |
+
"should_epoch_stop": false,
|
224 |
+
"should_evaluate": false,
|
225 |
+
"should_log": false,
|
226 |
+
"should_save": true,
|
227 |
+
"should_training_stop": true
|
228 |
+
},
|
229 |
+
"attributes": {}
|
230 |
+
}
|
231 |
+
},
|
232 |
+
"total_flos": 76304261314560.0,
|
233 |
+
"train_batch_size": 4,
|
234 |
+
"trial_name": null,
|
235 |
+
"trial_params": {
|
236 |
+
"learning_rate": 6.238847633350302e-05,
|
237 |
+
"num_train_epochs": 1,
|
238 |
+
"per_device_train_batch_size": 4,
|
239 |
+
"seed": 12
|
240 |
+
}
|
241 |
+
}
|
run-3/checkpoint-295/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9021fc656029eb7ea6cff04ecc68350cb36d8078aeea747ba522f9bdf60c5be7
|
3 |
+
size 5112
|
run-3/checkpoint-295/vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
run-4/checkpoint-19/config.json
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "nlptown/bert-base-multilingual-uncased-sentiment",
|
3 |
+
"_num_labels": 5,
|
4 |
+
"architectures": [
|
5 |
+
"BertForSequenceClassification"
|
6 |
+
],
|
7 |
+
"attention_probs_dropout_prob": 0.1,
|
8 |
+
"classifier_dropout": null,
|
9 |
+
"directionality": "bidi",
|
10 |
+
"finetuning_task": "sentiment-analysis",
|
11 |
+
"hidden_act": "gelu",
|
12 |
+
"hidden_dropout_prob": 0.1,
|
13 |
+
"hidden_size": 768,
|
14 |
+
"id2label": {
|
15 |
+
"0": "1 star",
|
16 |
+
"1": "2 stars",
|
17 |
+
"2": "3 stars",
|
18 |
+
"3": "4 stars",
|
19 |
+
"4": "5 stars"
|
20 |
+
},
|
21 |
+
"initializer_range": 0.02,
|
22 |
+
"intermediate_size": 3072,
|
23 |
+
"label2id": {
|
24 |
+
"1 star": 0,
|
25 |
+
"2 stars": 1,
|
26 |
+
"3 stars": 2,
|
27 |
+
"4 stars": 3,
|
28 |
+
"5 stars": 4
|
29 |
+
},
|
30 |
+
"layer_norm_eps": 1e-12,
|
31 |
+
"max_position_embeddings": 512,
|
32 |
+
"model_type": "bert",
|
33 |
+
"num_attention_heads": 12,
|
34 |
+
"num_hidden_layers": 12,
|
35 |
+
"output_past": true,
|
36 |
+
"pad_token_id": 0,
|
37 |
+
"pooler_fc_size": 768,
|
38 |
+
"pooler_num_attention_heads": 12,
|
39 |
+
"pooler_num_fc_layers": 3,
|
40 |
+
"pooler_size_per_head": 128,
|
41 |
+
"pooler_type": "first_token_transform",
|
42 |
+
"position_embedding_type": "absolute",
|
43 |
+
"problem_type": "single_label_classification",
|
44 |
+
"torch_dtype": "float32",
|
45 |
+
"transformers_version": "4.44.2",
|
46 |
+
"type_vocab_size": 2,
|
47 |
+
"use_cache": true,
|
48 |
+
"vocab_size": 105879
|
49 |
+
}
|
run-4/checkpoint-19/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:06e915c1d6cec5db96c3027a05cb12bc940ae21f46565e79faa5754c8a684aa0
|
3 |
+
size 669464588
|
run-4/checkpoint-19/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:44dd60d885a10c63785672143b4d8dcc80d5911518f2efb882deb08a9ba56c5d
|
3 |
+
size 1339050234
|