w11wo commited on
Commit
63f658b
1 Parent(s): 4ea916d

initial commit

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. README.md +91 -0
  2. all_results.json +14 -0
  3. checkpoint-6264/config.json +27 -0
  4. checkpoint-6264/merges.txt +0 -0
  5. checkpoint-6264/optimizer.pt +3 -0
  6. checkpoint-6264/pytorch_model.bin +3 -0
  7. checkpoint-6264/rng_state_0.pth +3 -0
  8. checkpoint-6264/rng_state_1.pth +3 -0
  9. checkpoint-6264/rng_state_2.pth +3 -0
  10. checkpoint-6264/rng_state_3.pth +3 -0
  11. checkpoint-6264/rng_state_4.pth +3 -0
  12. checkpoint-6264/rng_state_5.pth +3 -0
  13. checkpoint-6264/rng_state_6.pth +3 -0
  14. checkpoint-6264/rng_state_7.pth +3 -0
  15. checkpoint-6264/scheduler.pt +3 -0
  16. checkpoint-6264/special_tokens_map.json +1 -0
  17. checkpoint-6264/tokenizer.json +0 -0
  18. checkpoint-6264/tokenizer_config.json +1 -0
  19. checkpoint-6264/trainer_state.json +320 -0
  20. checkpoint-6264/training_args.bin +3 -0
  21. checkpoint-6264/vocab.json +0 -0
  22. checkpoint-6480/config.json +27 -0
  23. checkpoint-6480/merges.txt +0 -0
  24. checkpoint-6480/optimizer.pt +3 -0
  25. checkpoint-6480/pytorch_model.bin +3 -0
  26. checkpoint-6480/rng_state_0.pth +3 -0
  27. checkpoint-6480/rng_state_1.pth +3 -0
  28. checkpoint-6480/rng_state_2.pth +3 -0
  29. checkpoint-6480/rng_state_3.pth +3 -0
  30. checkpoint-6480/rng_state_4.pth +3 -0
  31. checkpoint-6480/rng_state_5.pth +3 -0
  32. checkpoint-6480/rng_state_6.pth +3 -0
  33. checkpoint-6480/rng_state_7.pth +3 -0
  34. checkpoint-6480/scheduler.pt +3 -0
  35. checkpoint-6480/special_tokens_map.json +1 -0
  36. checkpoint-6480/tokenizer.json +0 -0
  37. checkpoint-6480/tokenizer_config.json +1 -0
  38. checkpoint-6480/trainer_state.json +328 -0
  39. checkpoint-6480/training_args.bin +3 -0
  40. checkpoint-6480/vocab.json +0 -0
  41. config.json +27 -0
  42. eval_results.json +9 -0
  43. merges.txt +0 -0
  44. pytorch_model.bin +3 -0
  45. runs/Dec05_03-42-19_bigbird/1638675757.7021885/events.out.tfevents.1638675757.bigbird.14625.1 +3 -0
  46. runs/Dec05_03-42-19_bigbird/events.out.tfevents.1638675757.bigbird.14625.0 +3 -0
  47. runs/Dec05_03-42-19_bigbird/events.out.tfevents.1638680810.bigbird.14625.2 +3 -0
  48. special_tokens_map.json +1 -0
  49. tokenizer.json +0 -0
  50. tokenizer_config.json +1 -0
README.md ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - generated_from_trainer
4
+ datasets:
5
+ - oscar-corpus/OSCAR-2109
6
+ model-index:
7
+ - name: runs
8
+ results: []
9
+ ---
10
+
11
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
12
+ should probably proofread and complete it, then remove this comment. -->
13
+
14
+ # runs
15
+
16
+ This model was trained from scratch on the oscar-corpus/OSCAR-2109 deduplicated_lo dataset.
17
+ It achieves the following results on the evaluation set:
18
+ - Loss: 1.4556
19
+
20
+ ## Model description
21
+
22
+ More information needed
23
+
24
+ ## Intended uses & limitations
25
+
26
+ More information needed
27
+
28
+ ## Training and evaluation data
29
+
30
+ More information needed
31
+
32
+ ## Training procedure
33
+
34
+ ### Training hyperparameters
35
+
36
+ The following hyperparameters were used during training:
37
+ - learning_rate: 0.0002
38
+ - train_batch_size: 128
39
+ - eval_batch_size: 128
40
+ - seed: 42
41
+ - distributed_type: tpu
42
+ - num_devices: 8
43
+ - total_train_batch_size: 1024
44
+ - total_eval_batch_size: 1024
45
+ - optimizer: Adam with betas=(0.9,0.98) and epsilon=1e-08
46
+ - lr_scheduler_type: linear
47
+ - lr_scheduler_warmup_steps: 1000
48
+ - num_epochs: 30.0
49
+
50
+ ### Training results
51
+
52
+ | Training Loss | Epoch | Step | Validation Loss |
53
+ |:-------------:|:-----:|:----:|:---------------:|
54
+ | No log | 1.0 | 216 | 5.8586 |
55
+ | No log | 2.0 | 432 | 5.5095 |
56
+ | 6.688 | 3.0 | 648 | 5.3976 |
57
+ | 6.688 | 4.0 | 864 | 5.3562 |
58
+ | 5.3629 | 5.0 | 1080 | 5.2912 |
59
+ | 5.3629 | 6.0 | 1296 | 5.2385 |
60
+ | 5.22 | 7.0 | 1512 | 5.1955 |
61
+ | 5.22 | 8.0 | 1728 | 5.1785 |
62
+ | 5.22 | 9.0 | 1944 | 5.1327 |
63
+ | 5.1248 | 10.0 | 2160 | 5.1243 |
64
+ | 5.1248 | 11.0 | 2376 | 5.0889 |
65
+ | 5.0591 | 12.0 | 2592 | 5.0732 |
66
+ | 5.0591 | 13.0 | 2808 | 5.0417 |
67
+ | 5.0094 | 14.0 | 3024 | 5.0388 |
68
+ | 5.0094 | 15.0 | 3240 | 4.9299 |
69
+ | 5.0094 | 16.0 | 3456 | 4.2991 |
70
+ | 4.7527 | 17.0 | 3672 | 3.6541 |
71
+ | 4.7527 | 18.0 | 3888 | 2.7826 |
72
+ | 3.4431 | 19.0 | 4104 | 2.2796 |
73
+ | 3.4431 | 20.0 | 4320 | 2.0213 |
74
+ | 2.2803 | 21.0 | 4536 | 1.8809 |
75
+ | 2.2803 | 22.0 | 4752 | 1.7615 |
76
+ | 2.2803 | 23.0 | 4968 | 1.6925 |
77
+ | 1.8601 | 24.0 | 5184 | 1.6205 |
78
+ | 1.8601 | 25.0 | 5400 | 1.5751 |
79
+ | 1.6697 | 26.0 | 5616 | 1.5391 |
80
+ | 1.6697 | 27.0 | 5832 | 1.5200 |
81
+ | 1.5655 | 28.0 | 6048 | 1.4866 |
82
+ | 1.5655 | 29.0 | 6264 | 1.4656 |
83
+ | 1.5655 | 30.0 | 6480 | 1.4627 |
84
+
85
+
86
+ ### Framework versions
87
+
88
+ - Transformers 4.13.0.dev0
89
+ - Pytorch 1.9.0+cu102
90
+ - Datasets 1.16.1
91
+ - Tokenizers 0.10.3
all_results.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 30.0,
3
+ "eval_loss": 1.4556076526641846,
4
+ "eval_runtime": 4.6081,
5
+ "eval_samples": 11830,
6
+ "eval_samples_per_second": 2567.232,
7
+ "eval_steps_per_second": 2.604,
8
+ "perplexity": 4.287087734559996,
9
+ "train_loss": 3.818386595926167,
10
+ "train_runtime": 5045.7987,
11
+ "train_samples": 220353,
12
+ "train_samples_per_second": 1310.118,
13
+ "train_steps_per_second": 1.284
14
+ }
checkpoint-6264/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": ".",
3
+ "architectures": [
4
+ "RobertaForMaskedLM"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 3072,
15
+ "layer_norm_eps": 1e-05,
16
+ "max_position_embeddings": 514,
17
+ "model_type": "roberta",
18
+ "num_attention_heads": 12,
19
+ "num_hidden_layers": 12,
20
+ "pad_token_id": 1,
21
+ "position_embedding_type": "absolute",
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.13.0.dev0",
24
+ "type_vocab_size": 1,
25
+ "use_cache": true,
26
+ "vocab_size": 50265
27
+ }
checkpoint-6264/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-6264/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eabf10a7afed9548715c3e9460d0da86aeb789b87c3e2cfaa5273874c968d2d9
3
+ size 997690841
checkpoint-6264/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d374020bfb5ef06d9dbb8cc501959a721c5fc12a0424d77d7d0c24bf4dfc807
3
+ size 498858859
checkpoint-6264/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9455b84f089ccab5b61f7dd15e41e88a8a2c3abc4efd11b953f46cb957cab591
3
+ size 13611
checkpoint-6264/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9455b84f089ccab5b61f7dd15e41e88a8a2c3abc4efd11b953f46cb957cab591
3
+ size 13611
checkpoint-6264/rng_state_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9455b84f089ccab5b61f7dd15e41e88a8a2c3abc4efd11b953f46cb957cab591
3
+ size 13611
checkpoint-6264/rng_state_3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9455b84f089ccab5b61f7dd15e41e88a8a2c3abc4efd11b953f46cb957cab591
3
+ size 13611
checkpoint-6264/rng_state_4.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9455b84f089ccab5b61f7dd15e41e88a8a2c3abc4efd11b953f46cb957cab591
3
+ size 13611
checkpoint-6264/rng_state_5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9455b84f089ccab5b61f7dd15e41e88a8a2c3abc4efd11b953f46cb957cab591
3
+ size 13611
checkpoint-6264/rng_state_6.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9455b84f089ccab5b61f7dd15e41e88a8a2c3abc4efd11b953f46cb957cab591
3
+ size 13611
checkpoint-6264/rng_state_7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9455b84f089ccab5b61f7dd15e41e88a8a2c3abc4efd11b953f46cb957cab591
3
+ size 13611
checkpoint-6264/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6d7917c1b6a581026f3ffc02c8c750e821a635b2e799510b3ff1eafd0d2eb0c
3
+ size 623
checkpoint-6264/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "sep_token": "</s>", "pad_token": "<pad>", "cls_token": "<s>", "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false}}
checkpoint-6264/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-6264/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "<unk>", "bos_token": "<s>", "eos_token": "</s>", "add_prefix_space": false, "errors": "replace", "sep_token": "</s>", "cls_token": "<s>", "pad_token": "<pad>", "mask_token": "<mask>", "special_tokens_map_file": null, "name_or_path": ".", "tokenizer_class": "RobertaTokenizer"}
checkpoint-6264/trainer_state.json ADDED
@@ -0,0 +1,320 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 29.0,
5
+ "global_step": 6264,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 1.0,
12
+ "eval_loss": 5.858560562133789,
13
+ "eval_runtime": 5.0347,
14
+ "eval_samples_per_second": 2349.702,
15
+ "eval_steps_per_second": 2.383,
16
+ "step": 216
17
+ },
18
+ {
19
+ "epoch": 2.0,
20
+ "eval_loss": 5.509454727172852,
21
+ "eval_runtime": 4.6188,
22
+ "eval_samples_per_second": 2561.29,
23
+ "eval_steps_per_second": 2.598,
24
+ "step": 432
25
+ },
26
+ {
27
+ "epoch": 2.31,
28
+ "learning_rate": 0.0001,
29
+ "loss": 6.688,
30
+ "step": 500
31
+ },
32
+ {
33
+ "epoch": 3.0,
34
+ "eval_loss": 5.397603511810303,
35
+ "eval_runtime": 4.6394,
36
+ "eval_samples_per_second": 2549.872,
37
+ "eval_steps_per_second": 2.587,
38
+ "step": 648
39
+ },
40
+ {
41
+ "epoch": 4.0,
42
+ "eval_loss": 5.356218338012695,
43
+ "eval_runtime": 4.6644,
44
+ "eval_samples_per_second": 2536.231,
45
+ "eval_steps_per_second": 2.573,
46
+ "step": 864
47
+ },
48
+ {
49
+ "epoch": 4.63,
50
+ "learning_rate": 0.0002,
51
+ "loss": 5.3629,
52
+ "step": 1000
53
+ },
54
+ {
55
+ "epoch": 5.0,
56
+ "eval_loss": 5.291167736053467,
57
+ "eval_runtime": 4.6079,
58
+ "eval_samples_per_second": 2567.32,
59
+ "eval_steps_per_second": 2.604,
60
+ "step": 1080
61
+ },
62
+ {
63
+ "epoch": 6.0,
64
+ "eval_loss": 5.238525867462158,
65
+ "eval_runtime": 4.628,
66
+ "eval_samples_per_second": 2556.182,
67
+ "eval_steps_per_second": 2.593,
68
+ "step": 1296
69
+ },
70
+ {
71
+ "epoch": 6.94,
72
+ "learning_rate": 0.00018175182481751826,
73
+ "loss": 5.22,
74
+ "step": 1500
75
+ },
76
+ {
77
+ "epoch": 7.0,
78
+ "eval_loss": 5.195512771606445,
79
+ "eval_runtime": 4.6676,
80
+ "eval_samples_per_second": 2534.484,
81
+ "eval_steps_per_second": 2.571,
82
+ "step": 1512
83
+ },
84
+ {
85
+ "epoch": 8.0,
86
+ "eval_loss": 5.178501605987549,
87
+ "eval_runtime": 4.6613,
88
+ "eval_samples_per_second": 2537.906,
89
+ "eval_steps_per_second": 2.574,
90
+ "step": 1728
91
+ },
92
+ {
93
+ "epoch": 9.0,
94
+ "eval_loss": 5.132693767547607,
95
+ "eval_runtime": 4.6567,
96
+ "eval_samples_per_second": 2540.411,
97
+ "eval_steps_per_second": 2.577,
98
+ "step": 1944
99
+ },
100
+ {
101
+ "epoch": 9.26,
102
+ "learning_rate": 0.0001635036496350365,
103
+ "loss": 5.1248,
104
+ "step": 2000
105
+ },
106
+ {
107
+ "epoch": 10.0,
108
+ "eval_loss": 5.1242523193359375,
109
+ "eval_runtime": 4.6307,
110
+ "eval_samples_per_second": 2554.696,
111
+ "eval_steps_per_second": 2.591,
112
+ "step": 2160
113
+ },
114
+ {
115
+ "epoch": 11.0,
116
+ "eval_loss": 5.0888824462890625,
117
+ "eval_runtime": 4.6284,
118
+ "eval_samples_per_second": 2555.934,
119
+ "eval_steps_per_second": 2.593,
120
+ "step": 2376
121
+ },
122
+ {
123
+ "epoch": 11.57,
124
+ "learning_rate": 0.00014525547445255475,
125
+ "loss": 5.0591,
126
+ "step": 2500
127
+ },
128
+ {
129
+ "epoch": 12.0,
130
+ "eval_loss": 5.073211669921875,
131
+ "eval_runtime": 4.7033,
132
+ "eval_samples_per_second": 2515.279,
133
+ "eval_steps_per_second": 2.551,
134
+ "step": 2592
135
+ },
136
+ {
137
+ "epoch": 13.0,
138
+ "eval_loss": 5.041653633117676,
139
+ "eval_runtime": 4.6143,
140
+ "eval_samples_per_second": 2563.795,
141
+ "eval_steps_per_second": 2.601,
142
+ "step": 2808
143
+ },
144
+ {
145
+ "epoch": 13.89,
146
+ "learning_rate": 0.000127007299270073,
147
+ "loss": 5.0094,
148
+ "step": 3000
149
+ },
150
+ {
151
+ "epoch": 14.0,
152
+ "eval_loss": 5.038762092590332,
153
+ "eval_runtime": 4.6173,
154
+ "eval_samples_per_second": 2562.095,
155
+ "eval_steps_per_second": 2.599,
156
+ "step": 3024
157
+ },
158
+ {
159
+ "epoch": 15.0,
160
+ "eval_loss": 4.9298810958862305,
161
+ "eval_runtime": 4.5819,
162
+ "eval_samples_per_second": 2581.88,
163
+ "eval_steps_per_second": 2.619,
164
+ "step": 3240
165
+ },
166
+ {
167
+ "epoch": 16.0,
168
+ "eval_loss": 4.299057960510254,
169
+ "eval_runtime": 4.6127,
170
+ "eval_samples_per_second": 2564.64,
171
+ "eval_steps_per_second": 2.601,
172
+ "step": 3456
173
+ },
174
+ {
175
+ "epoch": 16.2,
176
+ "learning_rate": 0.00010875912408759123,
177
+ "loss": 4.7527,
178
+ "step": 3500
179
+ },
180
+ {
181
+ "epoch": 17.0,
182
+ "eval_loss": 3.654055118560791,
183
+ "eval_runtime": 4.5888,
184
+ "eval_samples_per_second": 2578.023,
185
+ "eval_steps_per_second": 2.615,
186
+ "step": 3672
187
+ },
188
+ {
189
+ "epoch": 18.0,
190
+ "eval_loss": 2.7825753688812256,
191
+ "eval_runtime": 4.6393,
192
+ "eval_samples_per_second": 2549.967,
193
+ "eval_steps_per_second": 2.587,
194
+ "step": 3888
195
+ },
196
+ {
197
+ "epoch": 18.52,
198
+ "learning_rate": 9.051094890510949e-05,
199
+ "loss": 3.4431,
200
+ "step": 4000
201
+ },
202
+ {
203
+ "epoch": 19.0,
204
+ "eval_loss": 2.2795569896698,
205
+ "eval_runtime": 4.647,
206
+ "eval_samples_per_second": 2545.709,
207
+ "eval_steps_per_second": 2.582,
208
+ "step": 4104
209
+ },
210
+ {
211
+ "epoch": 20.0,
212
+ "eval_loss": 2.021310806274414,
213
+ "eval_runtime": 4.6303,
214
+ "eval_samples_per_second": 2554.922,
215
+ "eval_steps_per_second": 2.592,
216
+ "step": 4320
217
+ },
218
+ {
219
+ "epoch": 20.83,
220
+ "learning_rate": 7.226277372262774e-05,
221
+ "loss": 2.2803,
222
+ "step": 4500
223
+ },
224
+ {
225
+ "epoch": 21.0,
226
+ "eval_loss": 1.8808549642562866,
227
+ "eval_runtime": 4.6167,
228
+ "eval_samples_per_second": 2562.421,
229
+ "eval_steps_per_second": 2.599,
230
+ "step": 4536
231
+ },
232
+ {
233
+ "epoch": 22.0,
234
+ "eval_loss": 1.7615374326705933,
235
+ "eval_runtime": 4.6259,
236
+ "eval_samples_per_second": 2557.316,
237
+ "eval_steps_per_second": 2.594,
238
+ "step": 4752
239
+ },
240
+ {
241
+ "epoch": 23.0,
242
+ "eval_loss": 1.6925297975540161,
243
+ "eval_runtime": 4.6567,
244
+ "eval_samples_per_second": 2540.444,
245
+ "eval_steps_per_second": 2.577,
246
+ "step": 4968
247
+ },
248
+ {
249
+ "epoch": 23.15,
250
+ "learning_rate": 5.401459854014599e-05,
251
+ "loss": 1.8601,
252
+ "step": 5000
253
+ },
254
+ {
255
+ "epoch": 24.0,
256
+ "eval_loss": 1.6204941272735596,
257
+ "eval_runtime": 4.7455,
258
+ "eval_samples_per_second": 2492.914,
259
+ "eval_steps_per_second": 2.529,
260
+ "step": 5184
261
+ },
262
+ {
263
+ "epoch": 25.0,
264
+ "eval_loss": 1.5750768184661865,
265
+ "eval_runtime": 4.6733,
266
+ "eval_samples_per_second": 2531.399,
267
+ "eval_steps_per_second": 2.568,
268
+ "step": 5400
269
+ },
270
+ {
271
+ "epoch": 25.46,
272
+ "learning_rate": 3.5766423357664236e-05,
273
+ "loss": 1.6697,
274
+ "step": 5500
275
+ },
276
+ {
277
+ "epoch": 26.0,
278
+ "eval_loss": 1.5390561819076538,
279
+ "eval_runtime": 4.6271,
280
+ "eval_samples_per_second": 2556.66,
281
+ "eval_steps_per_second": 2.593,
282
+ "step": 5616
283
+ },
284
+ {
285
+ "epoch": 27.0,
286
+ "eval_loss": 1.520015835762024,
287
+ "eval_runtime": 4.6894,
288
+ "eval_samples_per_second": 2522.687,
289
+ "eval_steps_per_second": 2.559,
290
+ "step": 5832
291
+ },
292
+ {
293
+ "epoch": 27.78,
294
+ "learning_rate": 1.7518248175182482e-05,
295
+ "loss": 1.5655,
296
+ "step": 6000
297
+ },
298
+ {
299
+ "epoch": 28.0,
300
+ "eval_loss": 1.4865714311599731,
301
+ "eval_runtime": 4.6379,
302
+ "eval_samples_per_second": 2550.726,
303
+ "eval_steps_per_second": 2.587,
304
+ "step": 6048
305
+ },
306
+ {
307
+ "epoch": 29.0,
308
+ "eval_loss": 1.4655797481536865,
309
+ "eval_runtime": 4.6124,
310
+ "eval_samples_per_second": 2564.811,
311
+ "eval_steps_per_second": 2.602,
312
+ "step": 6264
313
+ }
314
+ ],
315
+ "max_steps": 6480,
316
+ "num_train_epochs": 30,
317
+ "total_flos": 5.277103558675661e+16,
318
+ "trial_name": null,
319
+ "trial_params": null
320
+ }
checkpoint-6264/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9866332a8c239e2ea4e41b248ff890b722d4cedc2447fc101eb0e968e01318c
3
+ size 2863
checkpoint-6264/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-6480/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": ".",
3
+ "architectures": [
4
+ "RobertaForMaskedLM"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 3072,
15
+ "layer_norm_eps": 1e-05,
16
+ "max_position_embeddings": 514,
17
+ "model_type": "roberta",
18
+ "num_attention_heads": 12,
19
+ "num_hidden_layers": 12,
20
+ "pad_token_id": 1,
21
+ "position_embedding_type": "absolute",
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.13.0.dev0",
24
+ "type_vocab_size": 1,
25
+ "use_cache": true,
26
+ "vocab_size": 50265
27
+ }
checkpoint-6480/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-6480/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54c0cc8e456135a1228ceda32c0a2cfcadf4a8ca0295df6832eb6cdbd1527f9d
3
+ size 997690841
checkpoint-6480/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:388277022ddab4c9af11b37b0e03a9873d96d6aeec1c84207b175dfecc03db23
3
+ size 498858859
checkpoint-6480/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a560cd7637128ca69573409a6eb93867b35bd9f6ec805f4b861b03f406c85aa
3
+ size 13611
checkpoint-6480/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a560cd7637128ca69573409a6eb93867b35bd9f6ec805f4b861b03f406c85aa
3
+ size 13611
checkpoint-6480/rng_state_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a560cd7637128ca69573409a6eb93867b35bd9f6ec805f4b861b03f406c85aa
3
+ size 13611
checkpoint-6480/rng_state_3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a560cd7637128ca69573409a6eb93867b35bd9f6ec805f4b861b03f406c85aa
3
+ size 13611
checkpoint-6480/rng_state_4.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a560cd7637128ca69573409a6eb93867b35bd9f6ec805f4b861b03f406c85aa
3
+ size 13611
checkpoint-6480/rng_state_5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a560cd7637128ca69573409a6eb93867b35bd9f6ec805f4b861b03f406c85aa
3
+ size 13611
checkpoint-6480/rng_state_6.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a560cd7637128ca69573409a6eb93867b35bd9f6ec805f4b861b03f406c85aa
3
+ size 13611
checkpoint-6480/rng_state_7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a560cd7637128ca69573409a6eb93867b35bd9f6ec805f4b861b03f406c85aa
3
+ size 13611
checkpoint-6480/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95e3852d686e307490e73572dac19393abb02e62aa2d1150c2f411a46fb21570
3
+ size 623
checkpoint-6480/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "sep_token": "</s>", "pad_token": "<pad>", "cls_token": "<s>", "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false}}
checkpoint-6480/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-6480/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "<unk>", "bos_token": "<s>", "eos_token": "</s>", "add_prefix_space": false, "errors": "replace", "sep_token": "</s>", "cls_token": "<s>", "pad_token": "<pad>", "mask_token": "<mask>", "special_tokens_map_file": null, "name_or_path": ".", "tokenizer_class": "RobertaTokenizer"}
checkpoint-6480/trainer_state.json ADDED
@@ -0,0 +1,328 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 30.0,
5
+ "global_step": 6480,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 1.0,
12
+ "eval_loss": 5.858560562133789,
13
+ "eval_runtime": 5.0347,
14
+ "eval_samples_per_second": 2349.702,
15
+ "eval_steps_per_second": 2.383,
16
+ "step": 216
17
+ },
18
+ {
19
+ "epoch": 2.0,
20
+ "eval_loss": 5.509454727172852,
21
+ "eval_runtime": 4.6188,
22
+ "eval_samples_per_second": 2561.29,
23
+ "eval_steps_per_second": 2.598,
24
+ "step": 432
25
+ },
26
+ {
27
+ "epoch": 2.31,
28
+ "learning_rate": 0.0001,
29
+ "loss": 6.688,
30
+ "step": 500
31
+ },
32
+ {
33
+ "epoch": 3.0,
34
+ "eval_loss": 5.397603511810303,
35
+ "eval_runtime": 4.6394,
36
+ "eval_samples_per_second": 2549.872,
37
+ "eval_steps_per_second": 2.587,
38
+ "step": 648
39
+ },
40
+ {
41
+ "epoch": 4.0,
42
+ "eval_loss": 5.356218338012695,
43
+ "eval_runtime": 4.6644,
44
+ "eval_samples_per_second": 2536.231,
45
+ "eval_steps_per_second": 2.573,
46
+ "step": 864
47
+ },
48
+ {
49
+ "epoch": 4.63,
50
+ "learning_rate": 0.0002,
51
+ "loss": 5.3629,
52
+ "step": 1000
53
+ },
54
+ {
55
+ "epoch": 5.0,
56
+ "eval_loss": 5.291167736053467,
57
+ "eval_runtime": 4.6079,
58
+ "eval_samples_per_second": 2567.32,
59
+ "eval_steps_per_second": 2.604,
60
+ "step": 1080
61
+ },
62
+ {
63
+ "epoch": 6.0,
64
+ "eval_loss": 5.238525867462158,
65
+ "eval_runtime": 4.628,
66
+ "eval_samples_per_second": 2556.182,
67
+ "eval_steps_per_second": 2.593,
68
+ "step": 1296
69
+ },
70
+ {
71
+ "epoch": 6.94,
72
+ "learning_rate": 0.00018175182481751826,
73
+ "loss": 5.22,
74
+ "step": 1500
75
+ },
76
+ {
77
+ "epoch": 7.0,
78
+ "eval_loss": 5.195512771606445,
79
+ "eval_runtime": 4.6676,
80
+ "eval_samples_per_second": 2534.484,
81
+ "eval_steps_per_second": 2.571,
82
+ "step": 1512
83
+ },
84
+ {
85
+ "epoch": 8.0,
86
+ "eval_loss": 5.178501605987549,
87
+ "eval_runtime": 4.6613,
88
+ "eval_samples_per_second": 2537.906,
89
+ "eval_steps_per_second": 2.574,
90
+ "step": 1728
91
+ },
92
+ {
93
+ "epoch": 9.0,
94
+ "eval_loss": 5.132693767547607,
95
+ "eval_runtime": 4.6567,
96
+ "eval_samples_per_second": 2540.411,
97
+ "eval_steps_per_second": 2.577,
98
+ "step": 1944
99
+ },
100
+ {
101
+ "epoch": 9.26,
102
+ "learning_rate": 0.0001635036496350365,
103
+ "loss": 5.1248,
104
+ "step": 2000
105
+ },
106
+ {
107
+ "epoch": 10.0,
108
+ "eval_loss": 5.1242523193359375,
109
+ "eval_runtime": 4.6307,
110
+ "eval_samples_per_second": 2554.696,
111
+ "eval_steps_per_second": 2.591,
112
+ "step": 2160
113
+ },
114
+ {
115
+ "epoch": 11.0,
116
+ "eval_loss": 5.0888824462890625,
117
+ "eval_runtime": 4.6284,
118
+ "eval_samples_per_second": 2555.934,
119
+ "eval_steps_per_second": 2.593,
120
+ "step": 2376
121
+ },
122
+ {
123
+ "epoch": 11.57,
124
+ "learning_rate": 0.00014525547445255475,
125
+ "loss": 5.0591,
126
+ "step": 2500
127
+ },
128
+ {
129
+ "epoch": 12.0,
130
+ "eval_loss": 5.073211669921875,
131
+ "eval_runtime": 4.7033,
132
+ "eval_samples_per_second": 2515.279,
133
+ "eval_steps_per_second": 2.551,
134
+ "step": 2592
135
+ },
136
+ {
137
+ "epoch": 13.0,
138
+ "eval_loss": 5.041653633117676,
139
+ "eval_runtime": 4.6143,
140
+ "eval_samples_per_second": 2563.795,
141
+ "eval_steps_per_second": 2.601,
142
+ "step": 2808
143
+ },
144
+ {
145
+ "epoch": 13.89,
146
+ "learning_rate": 0.000127007299270073,
147
+ "loss": 5.0094,
148
+ "step": 3000
149
+ },
150
+ {
151
+ "epoch": 14.0,
152
+ "eval_loss": 5.038762092590332,
153
+ "eval_runtime": 4.6173,
154
+ "eval_samples_per_second": 2562.095,
155
+ "eval_steps_per_second": 2.599,
156
+ "step": 3024
157
+ },
158
+ {
159
+ "epoch": 15.0,
160
+ "eval_loss": 4.9298810958862305,
161
+ "eval_runtime": 4.5819,
162
+ "eval_samples_per_second": 2581.88,
163
+ "eval_steps_per_second": 2.619,
164
+ "step": 3240
165
+ },
166
+ {
167
+ "epoch": 16.0,
168
+ "eval_loss": 4.299057960510254,
169
+ "eval_runtime": 4.6127,
170
+ "eval_samples_per_second": 2564.64,
171
+ "eval_steps_per_second": 2.601,
172
+ "step": 3456
173
+ },
174
+ {
175
+ "epoch": 16.2,
176
+ "learning_rate": 0.00010875912408759123,
177
+ "loss": 4.7527,
178
+ "step": 3500
179
+ },
180
+ {
181
+ "epoch": 17.0,
182
+ "eval_loss": 3.654055118560791,
183
+ "eval_runtime": 4.5888,
184
+ "eval_samples_per_second": 2578.023,
185
+ "eval_steps_per_second": 2.615,
186
+ "step": 3672
187
+ },
188
+ {
189
+ "epoch": 18.0,
190
+ "eval_loss": 2.7825753688812256,
191
+ "eval_runtime": 4.6393,
192
+ "eval_samples_per_second": 2549.967,
193
+ "eval_steps_per_second": 2.587,
194
+ "step": 3888
195
+ },
196
+ {
197
+ "epoch": 18.52,
198
+ "learning_rate": 9.051094890510949e-05,
199
+ "loss": 3.4431,
200
+ "step": 4000
201
+ },
202
+ {
203
+ "epoch": 19.0,
204
+ "eval_loss": 2.2795569896698,
205
+ "eval_runtime": 4.647,
206
+ "eval_samples_per_second": 2545.709,
207
+ "eval_steps_per_second": 2.582,
208
+ "step": 4104
209
+ },
210
+ {
211
+ "epoch": 20.0,
212
+ "eval_loss": 2.021310806274414,
213
+ "eval_runtime": 4.6303,
214
+ "eval_samples_per_second": 2554.922,
215
+ "eval_steps_per_second": 2.592,
216
+ "step": 4320
217
+ },
218
+ {
219
+ "epoch": 20.83,
220
+ "learning_rate": 7.226277372262774e-05,
221
+ "loss": 2.2803,
222
+ "step": 4500
223
+ },
224
+ {
225
+ "epoch": 21.0,
226
+ "eval_loss": 1.8808549642562866,
227
+ "eval_runtime": 4.6167,
228
+ "eval_samples_per_second": 2562.421,
229
+ "eval_steps_per_second": 2.599,
230
+ "step": 4536
231
+ },
232
+ {
233
+ "epoch": 22.0,
234
+ "eval_loss": 1.7615374326705933,
235
+ "eval_runtime": 4.6259,
236
+ "eval_samples_per_second": 2557.316,
237
+ "eval_steps_per_second": 2.594,
238
+ "step": 4752
239
+ },
240
+ {
241
+ "epoch": 23.0,
242
+ "eval_loss": 1.6925297975540161,
243
+ "eval_runtime": 4.6567,
244
+ "eval_samples_per_second": 2540.444,
245
+ "eval_steps_per_second": 2.577,
246
+ "step": 4968
247
+ },
248
+ {
249
+ "epoch": 23.15,
250
+ "learning_rate": 5.401459854014599e-05,
251
+ "loss": 1.8601,
252
+ "step": 5000
253
+ },
254
+ {
255
+ "epoch": 24.0,
256
+ "eval_loss": 1.6204941272735596,
257
+ "eval_runtime": 4.7455,
258
+ "eval_samples_per_second": 2492.914,
259
+ "eval_steps_per_second": 2.529,
260
+ "step": 5184
261
+ },
262
+ {
263
+ "epoch": 25.0,
264
+ "eval_loss": 1.5750768184661865,
265
+ "eval_runtime": 4.6733,
266
+ "eval_samples_per_second": 2531.399,
267
+ "eval_steps_per_second": 2.568,
268
+ "step": 5400
269
+ },
270
+ {
271
+ "epoch": 25.46,
272
+ "learning_rate": 3.5766423357664236e-05,
273
+ "loss": 1.6697,
274
+ "step": 5500
275
+ },
276
+ {
277
+ "epoch": 26.0,
278
+ "eval_loss": 1.5390561819076538,
279
+ "eval_runtime": 4.6271,
280
+ "eval_samples_per_second": 2556.66,
281
+ "eval_steps_per_second": 2.593,
282
+ "step": 5616
283
+ },
284
+ {
285
+ "epoch": 27.0,
286
+ "eval_loss": 1.520015835762024,
287
+ "eval_runtime": 4.6894,
288
+ "eval_samples_per_second": 2522.687,
289
+ "eval_steps_per_second": 2.559,
290
+ "step": 5832
291
+ },
292
+ {
293
+ "epoch": 27.78,
294
+ "learning_rate": 1.7518248175182482e-05,
295
+ "loss": 1.5655,
296
+ "step": 6000
297
+ },
298
+ {
299
+ "epoch": 28.0,
300
+ "eval_loss": 1.4865714311599731,
301
+ "eval_runtime": 4.6379,
302
+ "eval_samples_per_second": 2550.726,
303
+ "eval_steps_per_second": 2.587,
304
+ "step": 6048
305
+ },
306
+ {
307
+ "epoch": 29.0,
308
+ "eval_loss": 1.4655797481536865,
309
+ "eval_runtime": 4.6124,
310
+ "eval_samples_per_second": 2564.811,
311
+ "eval_steps_per_second": 2.602,
312
+ "step": 6264
313
+ },
314
+ {
315
+ "epoch": 30.0,
316
+ "eval_loss": 1.4627275466918945,
317
+ "eval_runtime": 4.9986,
318
+ "eval_samples_per_second": 2366.681,
319
+ "eval_steps_per_second": 2.401,
320
+ "step": 6480
321
+ }
322
+ ],
323
+ "max_steps": 6480,
324
+ "num_train_epochs": 30,
325
+ "total_flos": 5.459072646905856e+16,
326
+ "trial_name": null,
327
+ "trial_params": null
328
+ }
checkpoint-6480/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9866332a8c239e2ea4e41b248ff890b722d4cedc2447fc101eb0e968e01318c
3
+ size 2863
checkpoint-6480/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": ".",
3
+ "architectures": [
4
+ "RobertaForMaskedLM"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 3072,
15
+ "layer_norm_eps": 1e-05,
16
+ "max_position_embeddings": 514,
17
+ "model_type": "roberta",
18
+ "num_attention_heads": 12,
19
+ "num_hidden_layers": 12,
20
+ "pad_token_id": 1,
21
+ "position_embedding_type": "absolute",
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.13.0.dev0",
24
+ "type_vocab_size": 1,
25
+ "use_cache": true,
26
+ "vocab_size": 50265
27
+ }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 30.0,
3
+ "eval_loss": 1.4556076526641846,
4
+ "eval_runtime": 4.6081,
5
+ "eval_samples": 11830,
6
+ "eval_samples_per_second": 2567.232,
7
+ "eval_steps_per_second": 2.604,
8
+ "perplexity": 4.287087734559996
9
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:388277022ddab4c9af11b37b0e03a9873d96d6aeec1c84207b175dfecc03db23
3
+ size 498858859
runs/Dec05_03-42-19_bigbird/1638675757.7021885/events.out.tfevents.1638675757.bigbird.14625.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e6baa99b060aac1ce852a3fe431c270aaf76688226b7e0eeb8f9af122f73af7
3
+ size 4644
runs/Dec05_03-42-19_bigbird/events.out.tfevents.1638675757.bigbird.14625.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b393b5760999683300600f3e8b537691a09653983bd2bf908d74b6b2700c2419
3
+ size 13575
runs/Dec05_03-42-19_bigbird/events.out.tfevents.1638680810.bigbird.14625.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33b6127618bf5772c5044dda6af929a437f2826adf64d5f8e8223bae2eeff52c
3
+ size 311
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "sep_token": "</s>", "pad_token": "<pad>", "cls_token": "<s>", "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false}}
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "<unk>", "bos_token": "<s>", "eos_token": "</s>", "add_prefix_space": false, "errors": "replace", "sep_token": "</s>", "cls_token": "<s>", "pad_token": "<pad>", "mask_token": "<mask>", "special_tokens_map_file": null, "name_or_path": ".", "tokenizer_class": "RobertaTokenizer"}