LiYuan199701 commited on
Commit
f26777c
1 Parent(s): abf4c09

Add model weights and configurations

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. README.md +58 -1
  2. checkpoint-13/config.json +37 -0
  3. checkpoint-13/optimizer.pt +3 -0
  4. checkpoint-13/pytorch_model.bin +3 -0
  5. checkpoint-13/rng_state.pth +3 -0
  6. checkpoint-13/scheduler.pt +3 -0
  7. checkpoint-13/special_tokens_map.json +1 -0
  8. checkpoint-13/tokenizer.json +0 -0
  9. checkpoint-13/tokenizer_config.json +1 -0
  10. checkpoint-13/trainer_state.json +25 -0
  11. checkpoint-13/training_args.bin +3 -0
  12. checkpoint-13/vocab.txt +0 -0
  13. checkpoint-26/config.json +37 -0
  14. checkpoint-26/optimizer.pt +3 -0
  15. checkpoint-26/pytorch_model.bin +3 -0
  16. checkpoint-26/rng_state.pth +3 -0
  17. checkpoint-26/scheduler.pt +3 -0
  18. checkpoint-26/special_tokens_map.json +1 -0
  19. checkpoint-26/tokenizer.json +0 -0
  20. checkpoint-26/tokenizer_config.json +1 -0
  21. checkpoint-26/trainer_state.json +34 -0
  22. checkpoint-26/training_args.bin +3 -0
  23. checkpoint-26/vocab.txt +0 -0
  24. checkpoint-35702/config.json +37 -0
  25. checkpoint-35702/optimizer.pt +3 -0
  26. checkpoint-35702/pytorch_model.bin +3 -0
  27. checkpoint-35702/rng_state.pth +3 -0
  28. checkpoint-35702/scheduler.pt +3 -0
  29. checkpoint-35702/special_tokens_map.json +1 -0
  30. checkpoint-35702/tokenizer.json +0 -0
  31. checkpoint-35702/tokenizer_config.json +1 -0
  32. checkpoint-35702/trainer_state.json +451 -0
  33. checkpoint-35702/training_args.bin +3 -0
  34. checkpoint-35702/vocab.txt +0 -0
  35. checkpoint-71404/config.json +37 -0
  36. checkpoint-71404/optimizer.pt +3 -0
  37. checkpoint-71404/pytorch_model.bin +3 -0
  38. checkpoint-71404/rng_state.pth +3 -0
  39. checkpoint-71404/scheduler.pt +3 -0
  40. checkpoint-71404/special_tokens_map.json +1 -0
  41. checkpoint-71404/tokenizer.json +0 -0
  42. checkpoint-71404/tokenizer_config.json +1 -0
  43. checkpoint-71404/trainer_state.json +886 -0
  44. checkpoint-71404/training_args.bin +3 -0
  45. checkpoint-71404/vocab.txt +0 -0
  46. config.json +37 -0
  47. pytorch_model.bin +3 -0
  48. runs/Apr27_04-29-22_a457e5c667c8/1651033831.5069559/events.out.tfevents.1651033831.a457e5c667c8.98.1 +3 -0
  49. runs/Apr27_04-29-22_a457e5c667c8/events.out.tfevents.1651033831.a457e5c667c8.98.0 +3 -0
  50. runs/Apr27_04-29-22_a457e5c667c8/events.out.tfevents.1651033873.a457e5c667c8.98.2 +3 -0
README.md CHANGED
@@ -1,3 +1,60 @@
1
  ---
2
- license: afl-3.0
 
 
 
 
 
 
 
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ license: apache-2.0
3
+ tags:
4
+ - generated_from_trainer
5
+ metrics:
6
+ - accuracy
7
+ model-index:
8
+ - name: distilbert-base-uncased-finetuned-mnli
9
+ results: []
10
  ---
11
+
12
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
13
+ should probably proofread and complete it, then remove this comment. -->
14
+
15
+ # distilbert-base-uncased-finetuned-mnli
16
+
17
+ This model is a fine-tuned version of [distilbert-base-uncased](https://huggingface.co/distilbert-base-uncased) on an unknown dataset.
18
+ It achieves the following results on the evaluation set:
19
+ - Loss: 0.8244
20
+ - Accuracy: 0.6617
21
+
22
+ ## Model description
23
+
24
+ More information needed
25
+
26
+ ## Intended uses & limitations
27
+
28
+ More information needed
29
+
30
+ ## Training and evaluation data
31
+
32
+ More information needed
33
+
34
+ ## Training procedure
35
+
36
+ ### Training hyperparameters
37
+
38
+ The following hyperparameters were used during training:
39
+ - learning_rate: 2e-05
40
+ - train_batch_size: 16
41
+ - eval_batch_size: 16
42
+ - seed: 42
43
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
44
+ - lr_scheduler_type: linear
45
+ - num_epochs: 2
46
+
47
+ ### Training results
48
+
49
+ | Training Loss | Epoch | Step | Validation Loss | Accuracy |
50
+ |:-------------:|:-----:|:-----:|:---------------:|:--------:|
51
+ | 0.8981 | 1.0 | 35702 | 0.8662 | 0.6371 |
52
+ | 0.7837 | 2.0 | 71404 | 0.8244 | 0.6617 |
53
+
54
+
55
+ ### Framework versions
56
+
57
+ - Transformers 4.18.0
58
+ - Pytorch 1.11.0+cu113
59
+ - Datasets 2.1.0
60
+ - Tokenizers 0.12.1
checkpoint-13/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "distilbert-base-uncased",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForSequenceClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "id2label": {
12
+ "0": "LABEL_0",
13
+ "1": "LABEL_1",
14
+ "2": "LABEL_2",
15
+ "3": "LABEL_3"
16
+ },
17
+ "initializer_range": 0.02,
18
+ "label2id": {
19
+ "LABEL_0": 0,
20
+ "LABEL_1": 1,
21
+ "LABEL_2": 2,
22
+ "LABEL_3": 3
23
+ },
24
+ "max_position_embeddings": 512,
25
+ "model_type": "distilbert",
26
+ "n_heads": 12,
27
+ "n_layers": 6,
28
+ "pad_token_id": 0,
29
+ "problem_type": "single_label_classification",
30
+ "qa_dropout": 0.1,
31
+ "seq_classif_dropout": 0.2,
32
+ "sinusoidal_pos_embds": false,
33
+ "tie_weights_": true,
34
+ "torch_dtype": "float32",
35
+ "transformers_version": "4.18.0",
36
+ "vocab_size": 30522
37
+ }
checkpoint-13/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14ae301c531056906f9e80347151258358682c2853a86f031f89366d11d482dd
3
+ size 535712225
checkpoint-13/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1295509e3e54526b462734fedcaa630d6131909b21fdefa42eb653f1a66b4e15
3
+ size 267860465
checkpoint-13/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3df29cec05354a25dd347170384b4cda8a08e5f4f1885f2b0a7ab07c8ee95598
3
+ size 14503
checkpoint-13/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6e9d89a5bd86901cfea599a980713ac341e0c480f14a074ff38b818780f1dd0
3
+ size 623
checkpoint-13/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
checkpoint-13/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-13/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "distilbert-base-uncased", "tokenizer_class": "DistilBertTokenizer"}
checkpoint-13/trainer_state.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.43,
3
+ "best_model_checkpoint": "distilbert-base-uncased-finetuned-mnli/checkpoint-13",
4
+ "epoch": 1.0,
5
+ "global_step": 13,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 1.0,
12
+ "eval_accuracy": 0.43,
13
+ "eval_loss": 1.2553551197052002,
14
+ "eval_runtime": 0.5695,
15
+ "eval_samples_per_second": 175.596,
16
+ "eval_steps_per_second": 12.292,
17
+ "step": 13
18
+ }
19
+ ],
20
+ "max_steps": 26,
21
+ "num_train_epochs": 2,
22
+ "total_flos": 26494424678400.0,
23
+ "trial_name": null,
24
+ "trial_params": null
25
+ }
checkpoint-13/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ddcdf0fab9ae58016832dcbd510e3859fca80f08fc775daf1d3ddd7a9780a83
3
+ size 3119
checkpoint-13/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-26/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "distilbert-base-uncased",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForSequenceClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "id2label": {
12
+ "0": "LABEL_0",
13
+ "1": "LABEL_1",
14
+ "2": "LABEL_2",
15
+ "3": "LABEL_3"
16
+ },
17
+ "initializer_range": 0.02,
18
+ "label2id": {
19
+ "LABEL_0": 0,
20
+ "LABEL_1": 1,
21
+ "LABEL_2": 2,
22
+ "LABEL_3": 3
23
+ },
24
+ "max_position_embeddings": 512,
25
+ "model_type": "distilbert",
26
+ "n_heads": 12,
27
+ "n_layers": 6,
28
+ "pad_token_id": 0,
29
+ "problem_type": "single_label_classification",
30
+ "qa_dropout": 0.1,
31
+ "seq_classif_dropout": 0.2,
32
+ "sinusoidal_pos_embds": false,
33
+ "tie_weights_": true,
34
+ "torch_dtype": "float32",
35
+ "transformers_version": "4.18.0",
36
+ "vocab_size": 30522
37
+ }
checkpoint-26/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b015994f612e4137abe25f5c36ce812e91418bac0d8cbfecd0a7306c8e729dfd
3
+ size 535712225
checkpoint-26/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5140ed6f9897893c5e8df5d9d329c7034fea1ea17be33e200bb30eb3776cbc32
3
+ size 267860465
checkpoint-26/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab7107f8f12911d83fd4acf1036eecac8568f6c2547fab3e465074eacb351559
3
+ size 14503
checkpoint-26/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f2b3f8597016bfcda3f90e9c28b7f920e0052487224afc4ffcfbd2e909a612c
3
+ size 623
checkpoint-26/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
checkpoint-26/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-26/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "distilbert-base-uncased", "tokenizer_class": "DistilBertTokenizer"}
checkpoint-26/trainer_state.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.43,
3
+ "best_model_checkpoint": "distilbert-base-uncased-finetuned-mnli/checkpoint-13",
4
+ "epoch": 2.0,
5
+ "global_step": 26,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 1.0,
12
+ "eval_accuracy": 0.43,
13
+ "eval_loss": 1.2553551197052002,
14
+ "eval_runtime": 0.5695,
15
+ "eval_samples_per_second": 175.596,
16
+ "eval_steps_per_second": 12.292,
17
+ "step": 13
18
+ },
19
+ {
20
+ "epoch": 2.0,
21
+ "eval_accuracy": 0.43,
22
+ "eval_loss": 1.217254877090454,
23
+ "eval_runtime": 0.58,
24
+ "eval_samples_per_second": 172.399,
25
+ "eval_steps_per_second": 12.068,
26
+ "step": 26
27
+ }
28
+ ],
29
+ "max_steps": 26,
30
+ "num_train_epochs": 2,
31
+ "total_flos": 52988849356800.0,
32
+ "trial_name": null,
33
+ "trial_params": null
34
+ }
checkpoint-26/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ddcdf0fab9ae58016832dcbd510e3859fca80f08fc775daf1d3ddd7a9780a83
3
+ size 3119
checkpoint-26/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-35702/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "distilbert-base-uncased",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForSequenceClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "id2label": {
12
+ "0": "LABEL_0",
13
+ "1": "LABEL_1",
14
+ "2": "LABEL_2",
15
+ "3": "LABEL_3"
16
+ },
17
+ "initializer_range": 0.02,
18
+ "label2id": {
19
+ "LABEL_0": 0,
20
+ "LABEL_1": 1,
21
+ "LABEL_2": 2,
22
+ "LABEL_3": 3
23
+ },
24
+ "max_position_embeddings": 512,
25
+ "model_type": "distilbert",
26
+ "n_heads": 12,
27
+ "n_layers": 6,
28
+ "pad_token_id": 0,
29
+ "problem_type": "single_label_classification",
30
+ "qa_dropout": 0.1,
31
+ "seq_classif_dropout": 0.2,
32
+ "sinusoidal_pos_embds": false,
33
+ "tie_weights_": true,
34
+ "torch_dtype": "float32",
35
+ "transformers_version": "4.18.0",
36
+ "vocab_size": 30522
37
+ }
checkpoint-35702/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24ac78d9cfa13f6213586ba2d859e06585978324fb65bfdf9b7689d2a3418d18
3
+ size 535712353
checkpoint-35702/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2dc13f58747c6ddb30f99543ebcaa4357988245f7340388bc036c5d10c0f247f
3
+ size 267860465
checkpoint-35702/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12f91a6243a26d24770ff141db34c77d3f6eae341dd3368689fe09baa941aa85
3
+ size 14503
checkpoint-35702/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cd5914861a4c4f6623e5081d63c24af822e623e6bfdf278636cf79281eabe56
3
+ size 623
checkpoint-35702/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
checkpoint-35702/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-35702/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "distilbert-base-uncased", "tokenizer_class": "DistilBertTokenizer"}
checkpoint-35702/trainer_state.json ADDED
@@ -0,0 +1,451 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.63705,
3
+ "best_model_checkpoint": "distilbert-base-uncased-finetuned-mnli/checkpoint-35702",
4
+ "epoch": 1.0,
5
+ "global_step": 35702,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.01,
12
+ "learning_rate": 1.9859951823427263e-05,
13
+ "loss": 1.1951,
14
+ "step": 500
15
+ },
16
+ {
17
+ "epoch": 0.03,
18
+ "learning_rate": 1.971990364685452e-05,
19
+ "loss": 1.1418,
20
+ "step": 1000
21
+ },
22
+ {
23
+ "epoch": 0.04,
24
+ "learning_rate": 1.9579855470281778e-05,
25
+ "loss": 1.099,
26
+ "step": 1500
27
+ },
28
+ {
29
+ "epoch": 0.06,
30
+ "learning_rate": 1.9439807293709036e-05,
31
+ "loss": 1.0961,
32
+ "step": 2000
33
+ },
34
+ {
35
+ "epoch": 0.07,
36
+ "learning_rate": 1.9299759117136297e-05,
37
+ "loss": 1.0836,
38
+ "step": 2500
39
+ },
40
+ {
41
+ "epoch": 0.08,
42
+ "learning_rate": 1.9159710940563555e-05,
43
+ "loss": 1.0721,
44
+ "step": 3000
45
+ },
46
+ {
47
+ "epoch": 0.1,
48
+ "learning_rate": 1.9019662763990812e-05,
49
+ "loss": 1.0654,
50
+ "step": 3500
51
+ },
52
+ {
53
+ "epoch": 0.11,
54
+ "learning_rate": 1.8879614587418074e-05,
55
+ "loss": 1.0439,
56
+ "step": 4000
57
+ },
58
+ {
59
+ "epoch": 0.13,
60
+ "learning_rate": 1.873956641084533e-05,
61
+ "loss": 1.0453,
62
+ "step": 4500
63
+ },
64
+ {
65
+ "epoch": 0.14,
66
+ "learning_rate": 1.8599518234272592e-05,
67
+ "loss": 1.0399,
68
+ "step": 5000
69
+ },
70
+ {
71
+ "epoch": 0.15,
72
+ "learning_rate": 1.845947005769985e-05,
73
+ "loss": 1.0295,
74
+ "step": 5500
75
+ },
76
+ {
77
+ "epoch": 0.17,
78
+ "learning_rate": 1.8319421881127108e-05,
79
+ "loss": 1.0417,
80
+ "step": 6000
81
+ },
82
+ {
83
+ "epoch": 0.18,
84
+ "learning_rate": 1.817937370455437e-05,
85
+ "loss": 1.0327,
86
+ "step": 6500
87
+ },
88
+ {
89
+ "epoch": 0.2,
90
+ "learning_rate": 1.8039325527981627e-05,
91
+ "loss": 1.0148,
92
+ "step": 7000
93
+ },
94
+ {
95
+ "epoch": 0.21,
96
+ "learning_rate": 1.7899277351408884e-05,
97
+ "loss": 1.0171,
98
+ "step": 7500
99
+ },
100
+ {
101
+ "epoch": 0.22,
102
+ "learning_rate": 1.7759229174836145e-05,
103
+ "loss": 1.0113,
104
+ "step": 8000
105
+ },
106
+ {
107
+ "epoch": 0.24,
108
+ "learning_rate": 1.7619180998263403e-05,
109
+ "loss": 1.0062,
110
+ "step": 8500
111
+ },
112
+ {
113
+ "epoch": 0.25,
114
+ "learning_rate": 1.7479132821690664e-05,
115
+ "loss": 1.0022,
116
+ "step": 9000
117
+ },
118
+ {
119
+ "epoch": 0.27,
120
+ "learning_rate": 1.7339084645117922e-05,
121
+ "loss": 0.9908,
122
+ "step": 9500
123
+ },
124
+ {
125
+ "epoch": 0.28,
126
+ "learning_rate": 1.719903646854518e-05,
127
+ "loss": 0.9919,
128
+ "step": 10000
129
+ },
130
+ {
131
+ "epoch": 0.29,
132
+ "learning_rate": 1.705898829197244e-05,
133
+ "loss": 0.9875,
134
+ "step": 10500
135
+ },
136
+ {
137
+ "epoch": 0.31,
138
+ "learning_rate": 1.69189401153997e-05,
139
+ "loss": 0.9908,
140
+ "step": 11000
141
+ },
142
+ {
143
+ "epoch": 0.32,
144
+ "learning_rate": 1.6778891938826956e-05,
145
+ "loss": 0.9807,
146
+ "step": 11500
147
+ },
148
+ {
149
+ "epoch": 0.34,
150
+ "learning_rate": 1.6638843762254217e-05,
151
+ "loss": 0.9622,
152
+ "step": 12000
153
+ },
154
+ {
155
+ "epoch": 0.35,
156
+ "learning_rate": 1.6498795585681475e-05,
157
+ "loss": 0.9698,
158
+ "step": 12500
159
+ },
160
+ {
161
+ "epoch": 0.36,
162
+ "learning_rate": 1.6358747409108736e-05,
163
+ "loss": 0.9611,
164
+ "step": 13000
165
+ },
166
+ {
167
+ "epoch": 0.38,
168
+ "learning_rate": 1.6218699232535994e-05,
169
+ "loss": 0.9844,
170
+ "step": 13500
171
+ },
172
+ {
173
+ "epoch": 0.39,
174
+ "learning_rate": 1.6078651055963252e-05,
175
+ "loss": 0.9717,
176
+ "step": 14000
177
+ },
178
+ {
179
+ "epoch": 0.41,
180
+ "learning_rate": 1.5938602879390513e-05,
181
+ "loss": 0.978,
182
+ "step": 14500
183
+ },
184
+ {
185
+ "epoch": 0.42,
186
+ "learning_rate": 1.579855470281777e-05,
187
+ "loss": 0.9684,
188
+ "step": 15000
189
+ },
190
+ {
191
+ "epoch": 0.43,
192
+ "learning_rate": 1.5658506526245028e-05,
193
+ "loss": 0.9518,
194
+ "step": 15500
195
+ },
196
+ {
197
+ "epoch": 0.45,
198
+ "learning_rate": 1.551845834967229e-05,
199
+ "loss": 0.9592,
200
+ "step": 16000
201
+ },
202
+ {
203
+ "epoch": 0.46,
204
+ "learning_rate": 1.5378410173099547e-05,
205
+ "loss": 0.9482,
206
+ "step": 16500
207
+ },
208
+ {
209
+ "epoch": 0.48,
210
+ "learning_rate": 1.5238361996526806e-05,
211
+ "loss": 0.9565,
212
+ "step": 17000
213
+ },
214
+ {
215
+ "epoch": 0.49,
216
+ "learning_rate": 1.5098313819954064e-05,
217
+ "loss": 0.956,
218
+ "step": 17500
219
+ },
220
+ {
221
+ "epoch": 0.5,
222
+ "learning_rate": 1.4958265643381324e-05,
223
+ "loss": 0.9432,
224
+ "step": 18000
225
+ },
226
+ {
227
+ "epoch": 0.52,
228
+ "learning_rate": 1.4818217466808585e-05,
229
+ "loss": 0.943,
230
+ "step": 18500
231
+ },
232
+ {
233
+ "epoch": 0.53,
234
+ "learning_rate": 1.4678169290235842e-05,
235
+ "loss": 0.9354,
236
+ "step": 19000
237
+ },
238
+ {
239
+ "epoch": 0.55,
240
+ "learning_rate": 1.4538121113663102e-05,
241
+ "loss": 0.958,
242
+ "step": 19500
243
+ },
244
+ {
245
+ "epoch": 0.56,
246
+ "learning_rate": 1.439807293709036e-05,
247
+ "loss": 0.9358,
248
+ "step": 20000
249
+ },
250
+ {
251
+ "epoch": 0.57,
252
+ "learning_rate": 1.4258024760517619e-05,
253
+ "loss": 0.9395,
254
+ "step": 20500
255
+ },
256
+ {
257
+ "epoch": 0.59,
258
+ "learning_rate": 1.4117976583944878e-05,
259
+ "loss": 0.9448,
260
+ "step": 21000
261
+ },
262
+ {
263
+ "epoch": 0.6,
264
+ "learning_rate": 1.3977928407372136e-05,
265
+ "loss": 0.9349,
266
+ "step": 21500
267
+ },
268
+ {
269
+ "epoch": 0.62,
270
+ "learning_rate": 1.3837880230799397e-05,
271
+ "loss": 0.9444,
272
+ "step": 22000
273
+ },
274
+ {
275
+ "epoch": 0.63,
276
+ "learning_rate": 1.3697832054226653e-05,
277
+ "loss": 0.929,
278
+ "step": 22500
279
+ },
280
+ {
281
+ "epoch": 0.64,
282
+ "learning_rate": 1.3557783877653914e-05,
283
+ "loss": 0.933,
284
+ "step": 23000
285
+ },
286
+ {
287
+ "epoch": 0.66,
288
+ "learning_rate": 1.3417735701081174e-05,
289
+ "loss": 0.9357,
290
+ "step": 23500
291
+ },
292
+ {
293
+ "epoch": 0.67,
294
+ "learning_rate": 1.3277687524508432e-05,
295
+ "loss": 0.9182,
296
+ "step": 24000
297
+ },
298
+ {
299
+ "epoch": 0.69,
300
+ "learning_rate": 1.3137639347935691e-05,
301
+ "loss": 0.9279,
302
+ "step": 24500
303
+ },
304
+ {
305
+ "epoch": 0.7,
306
+ "learning_rate": 1.299759117136295e-05,
307
+ "loss": 0.9245,
308
+ "step": 25000
309
+ },
310
+ {
311
+ "epoch": 0.71,
312
+ "learning_rate": 1.2857542994790208e-05,
313
+ "loss": 0.9205,
314
+ "step": 25500
315
+ },
316
+ {
317
+ "epoch": 0.73,
318
+ "learning_rate": 1.271749481821747e-05,
319
+ "loss": 0.9214,
320
+ "step": 26000
321
+ },
322
+ {
323
+ "epoch": 0.74,
324
+ "learning_rate": 1.2577446641644725e-05,
325
+ "loss": 0.9388,
326
+ "step": 26500
327
+ },
328
+ {
329
+ "epoch": 0.76,
330
+ "learning_rate": 1.2437398465071986e-05,
331
+ "loss": 0.9256,
332
+ "step": 27000
333
+ },
334
+ {
335
+ "epoch": 0.77,
336
+ "learning_rate": 1.2297350288499246e-05,
337
+ "loss": 0.9206,
338
+ "step": 27500
339
+ },
340
+ {
341
+ "epoch": 0.78,
342
+ "learning_rate": 1.2157302111926503e-05,
343
+ "loss": 0.9091,
344
+ "step": 28000
345
+ },
346
+ {
347
+ "epoch": 0.8,
348
+ "learning_rate": 1.2017253935353763e-05,
349
+ "loss": 0.9267,
350
+ "step": 28500
351
+ },
352
+ {
353
+ "epoch": 0.81,
354
+ "learning_rate": 1.187720575878102e-05,
355
+ "loss": 0.9103,
356
+ "step": 29000
357
+ },
358
+ {
359
+ "epoch": 0.83,
360
+ "learning_rate": 1.173715758220828e-05,
361
+ "loss": 0.9032,
362
+ "step": 29500
363
+ },
364
+ {
365
+ "epoch": 0.84,
366
+ "learning_rate": 1.1597109405635541e-05,
367
+ "loss": 0.9075,
368
+ "step": 30000
369
+ },
370
+ {
371
+ "epoch": 0.85,
372
+ "learning_rate": 1.1457061229062799e-05,
373
+ "loss": 0.9016,
374
+ "step": 30500
375
+ },
376
+ {
377
+ "epoch": 0.87,
378
+ "learning_rate": 1.1317013052490058e-05,
379
+ "loss": 0.9119,
380
+ "step": 31000
381
+ },
382
+ {
383
+ "epoch": 0.88,
384
+ "learning_rate": 1.1176964875917316e-05,
385
+ "loss": 0.9085,
386
+ "step": 31500
387
+ },
388
+ {
389
+ "epoch": 0.9,
390
+ "learning_rate": 1.1036916699344575e-05,
391
+ "loss": 0.894,
392
+ "step": 32000
393
+ },
394
+ {
395
+ "epoch": 0.91,
396
+ "learning_rate": 1.0896868522771835e-05,
397
+ "loss": 0.9156,
398
+ "step": 32500
399
+ },
400
+ {
401
+ "epoch": 0.92,
402
+ "learning_rate": 1.0756820346199093e-05,
403
+ "loss": 0.8944,
404
+ "step": 33000
405
+ },
406
+ {
407
+ "epoch": 0.94,
408
+ "learning_rate": 1.0616772169626352e-05,
409
+ "loss": 0.8824,
410
+ "step": 33500
411
+ },
412
+ {
413
+ "epoch": 0.95,
414
+ "learning_rate": 1.047672399305361e-05,
415
+ "loss": 0.9014,
416
+ "step": 34000
417
+ },
418
+ {
419
+ "epoch": 0.97,
420
+ "learning_rate": 1.033667581648087e-05,
421
+ "loss": 0.9022,
422
+ "step": 34500
423
+ },
424
+ {
425
+ "epoch": 0.98,
426
+ "learning_rate": 1.019662763990813e-05,
427
+ "loss": 0.8888,
428
+ "step": 35000
429
+ },
430
+ {
431
+ "epoch": 0.99,
432
+ "learning_rate": 1.0056579463335388e-05,
433
+ "loss": 0.8981,
434
+ "step": 35500
435
+ },
436
+ {
437
+ "epoch": 1.0,
438
+ "eval_accuracy": 0.63705,
439
+ "eval_loss": 0.8662445545196533,
440
+ "eval_runtime": 112.0196,
441
+ "eval_samples_per_second": 178.54,
442
+ "eval_steps_per_second": 11.159,
443
+ "step": 35702
444
+ }
445
+ ],
446
+ "max_steps": 71404,
447
+ "num_train_epochs": 2,
448
+ "total_flos": 7.567112374034842e+16,
449
+ "trial_name": null,
450
+ "trial_params": null
451
+ }
checkpoint-35702/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b69140ab6b24ac5d06b1ffa0cda477a62e57823010306ba7159c6f1c22522fe
3
+ size 3119
checkpoint-35702/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-71404/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "distilbert-base-uncased",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForSequenceClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "id2label": {
12
+ "0": "LABEL_0",
13
+ "1": "LABEL_1",
14
+ "2": "LABEL_2",
15
+ "3": "LABEL_3"
16
+ },
17
+ "initializer_range": 0.02,
18
+ "label2id": {
19
+ "LABEL_0": 0,
20
+ "LABEL_1": 1,
21
+ "LABEL_2": 2,
22
+ "LABEL_3": 3
23
+ },
24
+ "max_position_embeddings": 512,
25
+ "model_type": "distilbert",
26
+ "n_heads": 12,
27
+ "n_layers": 6,
28
+ "pad_token_id": 0,
29
+ "problem_type": "single_label_classification",
30
+ "qa_dropout": 0.1,
31
+ "seq_classif_dropout": 0.2,
32
+ "sinusoidal_pos_embds": false,
33
+ "tie_weights_": true,
34
+ "torch_dtype": "float32",
35
+ "transformers_version": "4.18.0",
36
+ "vocab_size": 30522
37
+ }
checkpoint-71404/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:033c8d340e0be89ec59b924621244f788959be445ee546b3f92fdc1248e98005
3
+ size 535712545
checkpoint-71404/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18f997ca2e55d65912dbf0139e6c3853d4a06f592cb04c60a1766c6557f06a87
3
+ size 267860465
checkpoint-71404/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:590fa2ab684a0256a1eed6423ca4a8b197a92cc903c6613514418f5125d9c88d
3
+ size 14503
checkpoint-71404/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc9cf6973eb6dca3eeef731eda115482e40b297cd7f06999e95553ef4a2b1a4a
3
+ size 623
checkpoint-71404/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
checkpoint-71404/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-71404/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "distilbert-base-uncased", "tokenizer_class": "DistilBertTokenizer"}
checkpoint-71404/trainer_state.json ADDED
@@ -0,0 +1,886 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.6617,
3
+ "best_model_checkpoint": "distilbert-base-uncased-finetuned-mnli/checkpoint-71404",
4
+ "epoch": 2.0,
5
+ "global_step": 71404,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.01,
12
+ "learning_rate": 1.9859951823427263e-05,
13
+ "loss": 1.1951,
14
+ "step": 500
15
+ },
16
+ {
17
+ "epoch": 0.03,
18
+ "learning_rate": 1.971990364685452e-05,
19
+ "loss": 1.1418,
20
+ "step": 1000
21
+ },
22
+ {
23
+ "epoch": 0.04,
24
+ "learning_rate": 1.9579855470281778e-05,
25
+ "loss": 1.099,
26
+ "step": 1500
27
+ },
28
+ {
29
+ "epoch": 0.06,
30
+ "learning_rate": 1.9439807293709036e-05,
31
+ "loss": 1.0961,
32
+ "step": 2000
33
+ },
34
+ {
35
+ "epoch": 0.07,
36
+ "learning_rate": 1.9299759117136297e-05,
37
+ "loss": 1.0836,
38
+ "step": 2500
39
+ },
40
+ {
41
+ "epoch": 0.08,
42
+ "learning_rate": 1.9159710940563555e-05,
43
+ "loss": 1.0721,
44
+ "step": 3000
45
+ },
46
+ {
47
+ "epoch": 0.1,
48
+ "learning_rate": 1.9019662763990812e-05,
49
+ "loss": 1.0654,
50
+ "step": 3500
51
+ },
52
+ {
53
+ "epoch": 0.11,
54
+ "learning_rate": 1.8879614587418074e-05,
55
+ "loss": 1.0439,
56
+ "step": 4000
57
+ },
58
+ {
59
+ "epoch": 0.13,
60
+ "learning_rate": 1.873956641084533e-05,
61
+ "loss": 1.0453,
62
+ "step": 4500
63
+ },
64
+ {
65
+ "epoch": 0.14,
66
+ "learning_rate": 1.8599518234272592e-05,
67
+ "loss": 1.0399,
68
+ "step": 5000
69
+ },
70
+ {
71
+ "epoch": 0.15,
72
+ "learning_rate": 1.845947005769985e-05,
73
+ "loss": 1.0295,
74
+ "step": 5500
75
+ },
76
+ {
77
+ "epoch": 0.17,
78
+ "learning_rate": 1.8319421881127108e-05,
79
+ "loss": 1.0417,
80
+ "step": 6000
81
+ },
82
+ {
83
+ "epoch": 0.18,
84
+ "learning_rate": 1.817937370455437e-05,
85
+ "loss": 1.0327,
86
+ "step": 6500
87
+ },
88
+ {
89
+ "epoch": 0.2,
90
+ "learning_rate": 1.8039325527981627e-05,
91
+ "loss": 1.0148,
92
+ "step": 7000
93
+ },
94
+ {
95
+ "epoch": 0.21,
96
+ "learning_rate": 1.7899277351408884e-05,
97
+ "loss": 1.0171,
98
+ "step": 7500
99
+ },
100
+ {
101
+ "epoch": 0.22,
102
+ "learning_rate": 1.7759229174836145e-05,
103
+ "loss": 1.0113,
104
+ "step": 8000
105
+ },
106
+ {
107
+ "epoch": 0.24,
108
+ "learning_rate": 1.7619180998263403e-05,
109
+ "loss": 1.0062,
110
+ "step": 8500
111
+ },
112
+ {
113
+ "epoch": 0.25,
114
+ "learning_rate": 1.7479132821690664e-05,
115
+ "loss": 1.0022,
116
+ "step": 9000
117
+ },
118
+ {
119
+ "epoch": 0.27,
120
+ "learning_rate": 1.7339084645117922e-05,
121
+ "loss": 0.9908,
122
+ "step": 9500
123
+ },
124
+ {
125
+ "epoch": 0.28,
126
+ "learning_rate": 1.719903646854518e-05,
127
+ "loss": 0.9919,
128
+ "step": 10000
129
+ },
130
+ {
131
+ "epoch": 0.29,
132
+ "learning_rate": 1.705898829197244e-05,
133
+ "loss": 0.9875,
134
+ "step": 10500
135
+ },
136
+ {
137
+ "epoch": 0.31,
138
+ "learning_rate": 1.69189401153997e-05,
139
+ "loss": 0.9908,
140
+ "step": 11000
141
+ },
142
+ {
143
+ "epoch": 0.32,
144
+ "learning_rate": 1.6778891938826956e-05,
145
+ "loss": 0.9807,
146
+ "step": 11500
147
+ },
148
+ {
149
+ "epoch": 0.34,
150
+ "learning_rate": 1.6638843762254217e-05,
151
+ "loss": 0.9622,
152
+ "step": 12000
153
+ },
154
+ {
155
+ "epoch": 0.35,
156
+ "learning_rate": 1.6498795585681475e-05,
157
+ "loss": 0.9698,
158
+ "step": 12500
159
+ },
160
+ {
161
+ "epoch": 0.36,
162
+ "learning_rate": 1.6358747409108736e-05,
163
+ "loss": 0.9611,
164
+ "step": 13000
165
+ },
166
+ {
167
+ "epoch": 0.38,
168
+ "learning_rate": 1.6218699232535994e-05,
169
+ "loss": 0.9844,
170
+ "step": 13500
171
+ },
172
+ {
173
+ "epoch": 0.39,
174
+ "learning_rate": 1.6078651055963252e-05,
175
+ "loss": 0.9717,
176
+ "step": 14000
177
+ },
178
+ {
179
+ "epoch": 0.41,
180
+ "learning_rate": 1.5938602879390513e-05,
181
+ "loss": 0.978,
182
+ "step": 14500
183
+ },
184
+ {
185
+ "epoch": 0.42,
186
+ "learning_rate": 1.579855470281777e-05,
187
+ "loss": 0.9684,
188
+ "step": 15000
189
+ },
190
+ {
191
+ "epoch": 0.43,
192
+ "learning_rate": 1.5658506526245028e-05,
193
+ "loss": 0.9518,
194
+ "step": 15500
195
+ },
196
+ {
197
+ "epoch": 0.45,
198
+ "learning_rate": 1.551845834967229e-05,
199
+ "loss": 0.9592,
200
+ "step": 16000
201
+ },
202
+ {
203
+ "epoch": 0.46,
204
+ "learning_rate": 1.5378410173099547e-05,
205
+ "loss": 0.9482,
206
+ "step": 16500
207
+ },
208
+ {
209
+ "epoch": 0.48,
210
+ "learning_rate": 1.5238361996526806e-05,
211
+ "loss": 0.9565,
212
+ "step": 17000
213
+ },
214
+ {
215
+ "epoch": 0.49,
216
+ "learning_rate": 1.5098313819954064e-05,
217
+ "loss": 0.956,
218
+ "step": 17500
219
+ },
220
+ {
221
+ "epoch": 0.5,
222
+ "learning_rate": 1.4958265643381324e-05,
223
+ "loss": 0.9432,
224
+ "step": 18000
225
+ },
226
+ {
227
+ "epoch": 0.52,
228
+ "learning_rate": 1.4818217466808585e-05,
229
+ "loss": 0.943,
230
+ "step": 18500
231
+ },
232
+ {
233
+ "epoch": 0.53,
234
+ "learning_rate": 1.4678169290235842e-05,
235
+ "loss": 0.9354,
236
+ "step": 19000
237
+ },
238
+ {
239
+ "epoch": 0.55,
240
+ "learning_rate": 1.4538121113663102e-05,
241
+ "loss": 0.958,
242
+ "step": 19500
243
+ },
244
+ {
245
+ "epoch": 0.56,
246
+ "learning_rate": 1.439807293709036e-05,
247
+ "loss": 0.9358,
248
+ "step": 20000
249
+ },
250
+ {
251
+ "epoch": 0.57,
252
+ "learning_rate": 1.4258024760517619e-05,
253
+ "loss": 0.9395,
254
+ "step": 20500
255
+ },
256
+ {
257
+ "epoch": 0.59,
258
+ "learning_rate": 1.4117976583944878e-05,
259
+ "loss": 0.9448,
260
+ "step": 21000
261
+ },
262
+ {
263
+ "epoch": 0.6,
264
+ "learning_rate": 1.3977928407372136e-05,
265
+ "loss": 0.9349,
266
+ "step": 21500
267
+ },
268
+ {
269
+ "epoch": 0.62,
270
+ "learning_rate": 1.3837880230799397e-05,
271
+ "loss": 0.9444,
272
+ "step": 22000
273
+ },
274
+ {
275
+ "epoch": 0.63,
276
+ "learning_rate": 1.3697832054226653e-05,
277
+ "loss": 0.929,
278
+ "step": 22500
279
+ },
280
+ {
281
+ "epoch": 0.64,
282
+ "learning_rate": 1.3557783877653914e-05,
283
+ "loss": 0.933,
284
+ "step": 23000
285
+ },
286
+ {
287
+ "epoch": 0.66,
288
+ "learning_rate": 1.3417735701081174e-05,
289
+ "loss": 0.9357,
290
+ "step": 23500
291
+ },
292
+ {
293
+ "epoch": 0.67,
294
+ "learning_rate": 1.3277687524508432e-05,
295
+ "loss": 0.9182,
296
+ "step": 24000
297
+ },
298
+ {
299
+ "epoch": 0.69,
300
+ "learning_rate": 1.3137639347935691e-05,
301
+ "loss": 0.9279,
302
+ "step": 24500
303
+ },
304
+ {
305
+ "epoch": 0.7,
306
+ "learning_rate": 1.299759117136295e-05,
307
+ "loss": 0.9245,
308
+ "step": 25000
309
+ },
310
+ {
311
+ "epoch": 0.71,
312
+ "learning_rate": 1.2857542994790208e-05,
313
+ "loss": 0.9205,
314
+ "step": 25500
315
+ },
316
+ {
317
+ "epoch": 0.73,
318
+ "learning_rate": 1.271749481821747e-05,
319
+ "loss": 0.9214,
320
+ "step": 26000
321
+ },
322
+ {
323
+ "epoch": 0.74,
324
+ "learning_rate": 1.2577446641644725e-05,
325
+ "loss": 0.9388,
326
+ "step": 26500
327
+ },
328
+ {
329
+ "epoch": 0.76,
330
+ "learning_rate": 1.2437398465071986e-05,
331
+ "loss": 0.9256,
332
+ "step": 27000
333
+ },
334
+ {
335
+ "epoch": 0.77,
336
+ "learning_rate": 1.2297350288499246e-05,
337
+ "loss": 0.9206,
338
+ "step": 27500
339
+ },
340
+ {
341
+ "epoch": 0.78,
342
+ "learning_rate": 1.2157302111926503e-05,
343
+ "loss": 0.9091,
344
+ "step": 28000
345
+ },
346
+ {
347
+ "epoch": 0.8,
348
+ "learning_rate": 1.2017253935353763e-05,
349
+ "loss": 0.9267,
350
+ "step": 28500
351
+ },
352
+ {
353
+ "epoch": 0.81,
354
+ "learning_rate": 1.187720575878102e-05,
355
+ "loss": 0.9103,
356
+ "step": 29000
357
+ },
358
+ {
359
+ "epoch": 0.83,
360
+ "learning_rate": 1.173715758220828e-05,
361
+ "loss": 0.9032,
362
+ "step": 29500
363
+ },
364
+ {
365
+ "epoch": 0.84,
366
+ "learning_rate": 1.1597109405635541e-05,
367
+ "loss": 0.9075,
368
+ "step": 30000
369
+ },
370
+ {
371
+ "epoch": 0.85,
372
+ "learning_rate": 1.1457061229062799e-05,
373
+ "loss": 0.9016,
374
+ "step": 30500
375
+ },
376
+ {
377
+ "epoch": 0.87,
378
+ "learning_rate": 1.1317013052490058e-05,
379
+ "loss": 0.9119,
380
+ "step": 31000
381
+ },
382
+ {
383
+ "epoch": 0.88,
384
+ "learning_rate": 1.1176964875917316e-05,
385
+ "loss": 0.9085,
386
+ "step": 31500
387
+ },
388
+ {
389
+ "epoch": 0.9,
390
+ "learning_rate": 1.1036916699344575e-05,
391
+ "loss": 0.894,
392
+ "step": 32000
393
+ },
394
+ {
395
+ "epoch": 0.91,
396
+ "learning_rate": 1.0896868522771835e-05,
397
+ "loss": 0.9156,
398
+ "step": 32500
399
+ },
400
+ {
401
+ "epoch": 0.92,
402
+ "learning_rate": 1.0756820346199093e-05,
403
+ "loss": 0.8944,
404
+ "step": 33000
405
+ },
406
+ {
407
+ "epoch": 0.94,
408
+ "learning_rate": 1.0616772169626352e-05,
409
+ "loss": 0.8824,
410
+ "step": 33500
411
+ },
412
+ {
413
+ "epoch": 0.95,
414
+ "learning_rate": 1.047672399305361e-05,
415
+ "loss": 0.9014,
416
+ "step": 34000
417
+ },
418
+ {
419
+ "epoch": 0.97,
420
+ "learning_rate": 1.033667581648087e-05,
421
+ "loss": 0.9022,
422
+ "step": 34500
423
+ },
424
+ {
425
+ "epoch": 0.98,
426
+ "learning_rate": 1.019662763990813e-05,
427
+ "loss": 0.8888,
428
+ "step": 35000
429
+ },
430
+ {
431
+ "epoch": 0.99,
432
+ "learning_rate": 1.0056579463335388e-05,
433
+ "loss": 0.8981,
434
+ "step": 35500
435
+ },
436
+ {
437
+ "epoch": 1.0,
438
+ "eval_accuracy": 0.63705,
439
+ "eval_loss": 0.8662445545196533,
440
+ "eval_runtime": 112.0196,
441
+ "eval_samples_per_second": 178.54,
442
+ "eval_steps_per_second": 11.159,
443
+ "step": 35702
444
+ },
445
+ {
446
+ "epoch": 1.01,
447
+ "learning_rate": 9.916531286762647e-06,
448
+ "loss": 0.8662,
449
+ "step": 36000
450
+ },
451
+ {
452
+ "epoch": 1.02,
453
+ "learning_rate": 9.776483110189907e-06,
454
+ "loss": 0.8255,
455
+ "step": 36500
456
+ },
457
+ {
458
+ "epoch": 1.04,
459
+ "learning_rate": 9.636434933617164e-06,
460
+ "loss": 0.8377,
461
+ "step": 37000
462
+ },
463
+ {
464
+ "epoch": 1.05,
465
+ "learning_rate": 9.496386757044424e-06,
466
+ "loss": 0.8203,
467
+ "step": 37500
468
+ },
469
+ {
470
+ "epoch": 1.06,
471
+ "learning_rate": 9.356338580471683e-06,
472
+ "loss": 0.8399,
473
+ "step": 38000
474
+ },
475
+ {
476
+ "epoch": 1.08,
477
+ "learning_rate": 9.216290403898943e-06,
478
+ "loss": 0.8215,
479
+ "step": 38500
480
+ },
481
+ {
482
+ "epoch": 1.09,
483
+ "learning_rate": 9.0762422273262e-06,
484
+ "loss": 0.8266,
485
+ "step": 39000
486
+ },
487
+ {
488
+ "epoch": 1.11,
489
+ "learning_rate": 8.93619405075346e-06,
490
+ "loss": 0.8445,
491
+ "step": 39500
492
+ },
493
+ {
494
+ "epoch": 1.12,
495
+ "learning_rate": 8.79614587418072e-06,
496
+ "loss": 0.8159,
497
+ "step": 40000
498
+ },
499
+ {
500
+ "epoch": 1.13,
501
+ "learning_rate": 8.656097697607979e-06,
502
+ "loss": 0.8139,
503
+ "step": 40500
504
+ },
505
+ {
506
+ "epoch": 1.15,
507
+ "learning_rate": 8.516049521035236e-06,
508
+ "loss": 0.8303,
509
+ "step": 41000
510
+ },
511
+ {
512
+ "epoch": 1.16,
513
+ "learning_rate": 8.376001344462496e-06,
514
+ "loss": 0.8004,
515
+ "step": 41500
516
+ },
517
+ {
518
+ "epoch": 1.18,
519
+ "learning_rate": 8.235953167889755e-06,
520
+ "loss": 0.8373,
521
+ "step": 42000
522
+ },
523
+ {
524
+ "epoch": 1.19,
525
+ "learning_rate": 8.095904991317015e-06,
526
+ "loss": 0.8121,
527
+ "step": 42500
528
+ },
529
+ {
530
+ "epoch": 1.2,
531
+ "learning_rate": 7.955856814744272e-06,
532
+ "loss": 0.8083,
533
+ "step": 43000
534
+ },
535
+ {
536
+ "epoch": 1.22,
537
+ "learning_rate": 7.815808638171532e-06,
538
+ "loss": 0.8287,
539
+ "step": 43500
540
+ },
541
+ {
542
+ "epoch": 1.23,
543
+ "learning_rate": 7.67576046159879e-06,
544
+ "loss": 0.8251,
545
+ "step": 44000
546
+ },
547
+ {
548
+ "epoch": 1.25,
549
+ "learning_rate": 7.53571228502605e-06,
550
+ "loss": 0.8286,
551
+ "step": 44500
552
+ },
553
+ {
554
+ "epoch": 1.26,
555
+ "learning_rate": 7.395664108453308e-06,
556
+ "loss": 0.8139,
557
+ "step": 45000
558
+ },
559
+ {
560
+ "epoch": 1.27,
561
+ "learning_rate": 7.255615931880568e-06,
562
+ "loss": 0.8093,
563
+ "step": 45500
564
+ },
565
+ {
566
+ "epoch": 1.29,
567
+ "learning_rate": 7.115567755307826e-06,
568
+ "loss": 0.8185,
569
+ "step": 46000
570
+ },
571
+ {
572
+ "epoch": 1.3,
573
+ "learning_rate": 6.975519578735085e-06,
574
+ "loss": 0.8182,
575
+ "step": 46500
576
+ },
577
+ {
578
+ "epoch": 1.32,
579
+ "learning_rate": 6.835471402162345e-06,
580
+ "loss": 0.8235,
581
+ "step": 47000
582
+ },
583
+ {
584
+ "epoch": 1.33,
585
+ "learning_rate": 6.695423225589604e-06,
586
+ "loss": 0.8165,
587
+ "step": 47500
588
+ },
589
+ {
590
+ "epoch": 1.34,
591
+ "learning_rate": 6.555375049016862e-06,
592
+ "loss": 0.8013,
593
+ "step": 48000
594
+ },
595
+ {
596
+ "epoch": 1.36,
597
+ "learning_rate": 6.415326872444121e-06,
598
+ "loss": 0.8126,
599
+ "step": 48500
600
+ },
601
+ {
602
+ "epoch": 1.37,
603
+ "learning_rate": 6.275278695871381e-06,
604
+ "loss": 0.8178,
605
+ "step": 49000
606
+ },
607
+ {
608
+ "epoch": 1.39,
609
+ "learning_rate": 6.13523051929864e-06,
610
+ "loss": 0.8052,
611
+ "step": 49500
612
+ },
613
+ {
614
+ "epoch": 1.4,
615
+ "learning_rate": 5.995182342725898e-06,
616
+ "loss": 0.8167,
617
+ "step": 50000
618
+ },
619
+ {
620
+ "epoch": 1.41,
621
+ "learning_rate": 5.855134166153157e-06,
622
+ "loss": 0.8193,
623
+ "step": 50500
624
+ },
625
+ {
626
+ "epoch": 1.43,
627
+ "learning_rate": 5.715085989580415e-06,
628
+ "loss": 0.8114,
629
+ "step": 51000
630
+ },
631
+ {
632
+ "epoch": 1.44,
633
+ "learning_rate": 5.575037813007676e-06,
634
+ "loss": 0.7964,
635
+ "step": 51500
636
+ },
637
+ {
638
+ "epoch": 1.46,
639
+ "learning_rate": 5.434989636434934e-06,
640
+ "loss": 0.8061,
641
+ "step": 52000
642
+ },
643
+ {
644
+ "epoch": 1.47,
645
+ "learning_rate": 5.294941459862193e-06,
646
+ "loss": 0.8148,
647
+ "step": 52500
648
+ },
649
+ {
650
+ "epoch": 1.48,
651
+ "learning_rate": 5.154893283289451e-06,
652
+ "loss": 0.7987,
653
+ "step": 53000
654
+ },
655
+ {
656
+ "epoch": 1.5,
657
+ "learning_rate": 5.014845106716712e-06,
658
+ "loss": 0.7882,
659
+ "step": 53500
660
+ },
661
+ {
662
+ "epoch": 1.51,
663
+ "learning_rate": 4.87479693014397e-06,
664
+ "loss": 0.8347,
665
+ "step": 54000
666
+ },
667
+ {
668
+ "epoch": 1.53,
669
+ "learning_rate": 4.734748753571229e-06,
670
+ "loss": 0.8005,
671
+ "step": 54500
672
+ },
673
+ {
674
+ "epoch": 1.54,
675
+ "learning_rate": 4.594700576998487e-06,
676
+ "loss": 0.7995,
677
+ "step": 55000
678
+ },
679
+ {
680
+ "epoch": 1.55,
681
+ "learning_rate": 4.454652400425747e-06,
682
+ "loss": 0.8096,
683
+ "step": 55500
684
+ },
685
+ {
686
+ "epoch": 1.57,
687
+ "learning_rate": 4.314604223853005e-06,
688
+ "loss": 0.788,
689
+ "step": 56000
690
+ },
691
+ {
692
+ "epoch": 1.58,
693
+ "learning_rate": 4.174556047280265e-06,
694
+ "loss": 0.7972,
695
+ "step": 56500
696
+ },
697
+ {
698
+ "epoch": 1.6,
699
+ "learning_rate": 4.034507870707523e-06,
700
+ "loss": 0.7841,
701
+ "step": 57000
702
+ },
703
+ {
704
+ "epoch": 1.61,
705
+ "learning_rate": 3.894459694134783e-06,
706
+ "loss": 0.8075,
707
+ "step": 57500
708
+ },
709
+ {
710
+ "epoch": 1.62,
711
+ "learning_rate": 3.7544115175620417e-06,
712
+ "loss": 0.8039,
713
+ "step": 58000
714
+ },
715
+ {
716
+ "epoch": 1.64,
717
+ "learning_rate": 3.6143633409893007e-06,
718
+ "loss": 0.7813,
719
+ "step": 58500
720
+ },
721
+ {
722
+ "epoch": 1.65,
723
+ "learning_rate": 3.4743151644165597e-06,
724
+ "loss": 0.7884,
725
+ "step": 59000
726
+ },
727
+ {
728
+ "epoch": 1.67,
729
+ "learning_rate": 3.3342669878438182e-06,
730
+ "loss": 0.7981,
731
+ "step": 59500
732
+ },
733
+ {
734
+ "epoch": 1.68,
735
+ "learning_rate": 3.1942188112710776e-06,
736
+ "loss": 0.7857,
737
+ "step": 60000
738
+ },
739
+ {
740
+ "epoch": 1.69,
741
+ "learning_rate": 3.0541706346983362e-06,
742
+ "loss": 0.8166,
743
+ "step": 60500
744
+ },
745
+ {
746
+ "epoch": 1.71,
747
+ "learning_rate": 2.9141224581255956e-06,
748
+ "loss": 0.8139,
749
+ "step": 61000
750
+ },
751
+ {
752
+ "epoch": 1.72,
753
+ "learning_rate": 2.774074281552854e-06,
754
+ "loss": 0.7986,
755
+ "step": 61500
756
+ },
757
+ {
758
+ "epoch": 1.74,
759
+ "learning_rate": 2.6340261049801136e-06,
760
+ "loss": 0.8084,
761
+ "step": 62000
762
+ },
763
+ {
764
+ "epoch": 1.75,
765
+ "learning_rate": 2.493977928407372e-06,
766
+ "loss": 0.7958,
767
+ "step": 62500
768
+ },
769
+ {
770
+ "epoch": 1.76,
771
+ "learning_rate": 2.353929751834631e-06,
772
+ "loss": 0.8037,
773
+ "step": 63000
774
+ },
775
+ {
776
+ "epoch": 1.78,
777
+ "learning_rate": 2.21388157526189e-06,
778
+ "loss": 0.8021,
779
+ "step": 63500
780
+ },
781
+ {
782
+ "epoch": 1.79,
783
+ "learning_rate": 2.073833398689149e-06,
784
+ "loss": 0.8079,
785
+ "step": 64000
786
+ },
787
+ {
788
+ "epoch": 1.81,
789
+ "learning_rate": 1.933785222116408e-06,
790
+ "loss": 0.7904,
791
+ "step": 64500
792
+ },
793
+ {
794
+ "epoch": 1.82,
795
+ "learning_rate": 1.7937370455436674e-06,
796
+ "loss": 0.8015,
797
+ "step": 65000
798
+ },
799
+ {
800
+ "epoch": 1.83,
801
+ "learning_rate": 1.653688868970926e-06,
802
+ "loss": 0.8057,
803
+ "step": 65500
804
+ },
805
+ {
806
+ "epoch": 1.85,
807
+ "learning_rate": 1.513640692398185e-06,
808
+ "loss": 0.7901,
809
+ "step": 66000
810
+ },
811
+ {
812
+ "epoch": 1.86,
813
+ "learning_rate": 1.373592515825444e-06,
814
+ "loss": 0.7925,
815
+ "step": 66500
816
+ },
817
+ {
818
+ "epoch": 1.88,
819
+ "learning_rate": 1.233544339252703e-06,
820
+ "loss": 0.8031,
821
+ "step": 67000
822
+ },
823
+ {
824
+ "epoch": 1.89,
825
+ "learning_rate": 1.093496162679962e-06,
826
+ "loss": 0.7847,
827
+ "step": 67500
828
+ },
829
+ {
830
+ "epoch": 1.9,
831
+ "learning_rate": 9.53447986107221e-07,
832
+ "loss": 0.792,
833
+ "step": 68000
834
+ },
835
+ {
836
+ "epoch": 1.92,
837
+ "learning_rate": 8.133998095344799e-07,
838
+ "loss": 0.7927,
839
+ "step": 68500
840
+ },
841
+ {
842
+ "epoch": 1.93,
843
+ "learning_rate": 6.733516329617389e-07,
844
+ "loss": 0.7759,
845
+ "step": 69000
846
+ },
847
+ {
848
+ "epoch": 1.95,
849
+ "learning_rate": 5.333034563889979e-07,
850
+ "loss": 0.7915,
851
+ "step": 69500
852
+ },
853
+ {
854
+ "epoch": 1.96,
855
+ "learning_rate": 3.9325527981625684e-07,
856
+ "loss": 0.7801,
857
+ "step": 70000
858
+ },
859
+ {
860
+ "epoch": 1.97,
861
+ "learning_rate": 2.532071032435158e-07,
862
+ "loss": 0.787,
863
+ "step": 70500
864
+ },
865
+ {
866
+ "epoch": 1.99,
867
+ "learning_rate": 1.1315892667077475e-07,
868
+ "loss": 0.7837,
869
+ "step": 71000
870
+ },
871
+ {
872
+ "epoch": 2.0,
873
+ "eval_accuracy": 0.6617,
874
+ "eval_loss": 0.8244466781616211,
875
+ "eval_runtime": 111.9282,
876
+ "eval_samples_per_second": 178.686,
877
+ "eval_steps_per_second": 11.168,
878
+ "step": 71404
879
+ }
880
+ ],
881
+ "max_steps": 71404,
882
+ "num_train_epochs": 2,
883
+ "total_flos": 1.5134224748069683e+17,
884
+ "trial_name": null,
885
+ "trial_params": null
886
+ }
checkpoint-71404/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b69140ab6b24ac5d06b1ffa0cda477a62e57823010306ba7159c6f1c22522fe
3
+ size 3119
checkpoint-71404/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "distilbert-base-uncased",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForSequenceClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "id2label": {
12
+ "0": "LABEL_0",
13
+ "1": "LABEL_1",
14
+ "2": "LABEL_2",
15
+ "3": "LABEL_3"
16
+ },
17
+ "initializer_range": 0.02,
18
+ "label2id": {
19
+ "LABEL_0": 0,
20
+ "LABEL_1": 1,
21
+ "LABEL_2": 2,
22
+ "LABEL_3": 3
23
+ },
24
+ "max_position_embeddings": 512,
25
+ "model_type": "distilbert",
26
+ "n_heads": 12,
27
+ "n_layers": 6,
28
+ "pad_token_id": 0,
29
+ "problem_type": "single_label_classification",
30
+ "qa_dropout": 0.1,
31
+ "seq_classif_dropout": 0.2,
32
+ "sinusoidal_pos_embds": false,
33
+ "tie_weights_": true,
34
+ "torch_dtype": "float32",
35
+ "transformers_version": "4.18.0",
36
+ "vocab_size": 30522
37
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18f997ca2e55d65912dbf0139e6c3853d4a06f592cb04c60a1766c6557f06a87
3
+ size 267860465
runs/Apr27_04-29-22_a457e5c667c8/1651033831.5069559/events.out.tfevents.1651033831.a457e5c667c8.98.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc40e4f680fd7ba8f81e195cab0e7417729c8153da9599922ea9f6011a725ce9
3
+ size 4907
runs/Apr27_04-29-22_a457e5c667c8/events.out.tfevents.1651033831.a457e5c667c8.98.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47234fdd630f3f90ea2c15bb8a21712ddcb7cbd538d4bf5ce6778dad08385166
3
+ size 4464
runs/Apr27_04-29-22_a457e5c667c8/events.out.tfevents.1651033873.a457e5c667c8.98.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13327737222f93a040bda917333a4a68dbf06f59c00b8732ae678c554d64cb68
3
+ size 357