Step... (24001/50000 | Loss: 1.617178201675415, Acc: 0.6729233860969543): 48%|█████████████▌ | 24176/50000 [9:23:11<10:03:40, 1.40s/it]
Browse files- flax_model.msgpack +3 -0
- outputs/checkpoints/checkpoint-20000/config.json +25 -0
- outputs/checkpoints/checkpoint-20000/data_collator.joblib +3 -0
- outputs/checkpoints/checkpoint-20000/flax_model.msgpack +3 -0
- outputs/checkpoints/checkpoint-20000/optimizer_state.msgpack +3 -0
- outputs/checkpoints/checkpoint-20000/training_args.joblib +3 -0
- outputs/checkpoints/checkpoint-20000/training_state.json +1 -0
- outputs/checkpoints/checkpoint-21000/config.json +25 -0
- outputs/checkpoints/checkpoint-21000/data_collator.joblib +3 -0
- outputs/checkpoints/checkpoint-21000/flax_model.msgpack +3 -0
- outputs/checkpoints/checkpoint-21000/optimizer_state.msgpack +3 -0
- outputs/checkpoints/checkpoint-21000/training_args.joblib +3 -0
- outputs/checkpoints/checkpoint-21000/training_state.json +1 -0
- outputs/checkpoints/checkpoint-22000/config.json +25 -0
- outputs/checkpoints/checkpoint-22000/data_collator.joblib +3 -0
- outputs/checkpoints/checkpoint-22000/flax_model.msgpack +3 -0
- outputs/checkpoints/checkpoint-22000/optimizer_state.msgpack +3 -0
- outputs/checkpoints/checkpoint-22000/training_args.joblib +3 -0
- outputs/checkpoints/checkpoint-22000/training_state.json +1 -0
- outputs/checkpoints/checkpoint-23000/config.json +25 -0
- outputs/checkpoints/checkpoint-23000/data_collator.joblib +3 -0
- outputs/checkpoints/checkpoint-23000/flax_model.msgpack +3 -0
- outputs/checkpoints/checkpoint-23000/optimizer_state.msgpack +3 -0
- outputs/checkpoints/checkpoint-23000/training_args.joblib +3 -0
- outputs/checkpoints/checkpoint-23000/training_state.json +1 -0
- outputs/checkpoints/checkpoint-24000/config.json +25 -0
- outputs/checkpoints/checkpoint-24000/data_collator.joblib +3 -0
- outputs/checkpoints/checkpoint-24000/flax_model.msgpack +3 -0
- outputs/checkpoints/checkpoint-24000/optimizer_state.msgpack +3 -0
- outputs/checkpoints/checkpoint-24000/training_args.joblib +3 -0
- outputs/checkpoints/checkpoint-24000/training_state.json +1 -0
- outputs/config.json +25 -0
- outputs/data_collator.joblib +3 -0
- outputs/events.out.tfevents.1626606849.tablespoon.3622971.3.v2 +3 -0
- outputs/flax_model.msgpack +3 -0
- outputs/optimizer_state.msgpack +3 -0
- outputs/training_args.joblib +3 -0
- outputs/training_state.json +1 -0
- pytorch_model.bin +3 -0
- run_stream.log +3 -0
flax_model.msgpack
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:09beaeeb31a7c0151bd2c93e8cc32b6ea04983edc759ef4cb3c8648db5ad7730
|
3 |
+
size 249750019
|
outputs/checkpoints/checkpoint-20000/config.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"RobertaForMaskedLM"
|
4 |
+
],
|
5 |
+
"attention_probs_dropout_prob": 0.1,
|
6 |
+
"bos_token_id": 0,
|
7 |
+
"eos_token_id": 2,
|
8 |
+
"gradient_checkpointing": false,
|
9 |
+
"hidden_act": "gelu",
|
10 |
+
"hidden_dropout_prob": 0.1,
|
11 |
+
"hidden_size": 768,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 3072,
|
14 |
+
"layer_norm_eps": 1e-05,
|
15 |
+
"max_position_embeddings": 514,
|
16 |
+
"model_type": "roberta",
|
17 |
+
"num_attention_heads": 12,
|
18 |
+
"num_hidden_layers": 12,
|
19 |
+
"pad_token_id": 1,
|
20 |
+
"position_embedding_type": "absolute",
|
21 |
+
"transformers_version": "4.9.0.dev0",
|
22 |
+
"type_vocab_size": 1,
|
23 |
+
"use_cache": true,
|
24 |
+
"vocab_size": 50265
|
25 |
+
}
|
outputs/checkpoints/checkpoint-20000/data_collator.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e02a6e9cfa63cb321cac9402efd29841b652999fcbf787800ae050e747b161ee
|
3 |
+
size 1471394
|
outputs/checkpoints/checkpoint-20000/flax_model.msgpack
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bc062cff226dee3ccdfbbc5682d4b2882c4f57c0a40a3f54bd16d8567298d5c0
|
3 |
+
size 249750019
|
outputs/checkpoints/checkpoint-20000/optimizer_state.msgpack
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5d1e8ec7e4b6752640d08a898bdf803649b14ffa733d5e17300d4eb26761b32c
|
3 |
+
size 499500278
|
outputs/checkpoints/checkpoint-20000/training_args.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7fc949f7397802302b9bca931dc2ae667a615a9e818fe0b22660b2812f0ac94a
|
3 |
+
size 1871
|
outputs/checkpoints/checkpoint-20000/training_state.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"step": 20001}
|
outputs/checkpoints/checkpoint-21000/config.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"RobertaForMaskedLM"
|
4 |
+
],
|
5 |
+
"attention_probs_dropout_prob": 0.1,
|
6 |
+
"bos_token_id": 0,
|
7 |
+
"eos_token_id": 2,
|
8 |
+
"gradient_checkpointing": false,
|
9 |
+
"hidden_act": "gelu",
|
10 |
+
"hidden_dropout_prob": 0.1,
|
11 |
+
"hidden_size": 768,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 3072,
|
14 |
+
"layer_norm_eps": 1e-05,
|
15 |
+
"max_position_embeddings": 514,
|
16 |
+
"model_type": "roberta",
|
17 |
+
"num_attention_heads": 12,
|
18 |
+
"num_hidden_layers": 12,
|
19 |
+
"pad_token_id": 1,
|
20 |
+
"position_embedding_type": "absolute",
|
21 |
+
"transformers_version": "4.9.0.dev0",
|
22 |
+
"type_vocab_size": 1,
|
23 |
+
"use_cache": true,
|
24 |
+
"vocab_size": 50265
|
25 |
+
}
|
outputs/checkpoints/checkpoint-21000/data_collator.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e02a6e9cfa63cb321cac9402efd29841b652999fcbf787800ae050e747b161ee
|
3 |
+
size 1471394
|
outputs/checkpoints/checkpoint-21000/flax_model.msgpack
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:09bef80851e5190807b15c1dbe9d0270c9384ab44c3dcb4db178b85e80a71379
|
3 |
+
size 249750019
|
outputs/checkpoints/checkpoint-21000/optimizer_state.msgpack
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a99a82909987b205609070cca9c4c5799ca4a0b78511f099fc3a9e538161a195
|
3 |
+
size 499500278
|
outputs/checkpoints/checkpoint-21000/training_args.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7fc949f7397802302b9bca931dc2ae667a615a9e818fe0b22660b2812f0ac94a
|
3 |
+
size 1871
|
outputs/checkpoints/checkpoint-21000/training_state.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"step": 21001}
|
outputs/checkpoints/checkpoint-22000/config.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"RobertaForMaskedLM"
|
4 |
+
],
|
5 |
+
"attention_probs_dropout_prob": 0.1,
|
6 |
+
"bos_token_id": 0,
|
7 |
+
"eos_token_id": 2,
|
8 |
+
"gradient_checkpointing": false,
|
9 |
+
"hidden_act": "gelu",
|
10 |
+
"hidden_dropout_prob": 0.1,
|
11 |
+
"hidden_size": 768,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 3072,
|
14 |
+
"layer_norm_eps": 1e-05,
|
15 |
+
"max_position_embeddings": 514,
|
16 |
+
"model_type": "roberta",
|
17 |
+
"num_attention_heads": 12,
|
18 |
+
"num_hidden_layers": 12,
|
19 |
+
"pad_token_id": 1,
|
20 |
+
"position_embedding_type": "absolute",
|
21 |
+
"transformers_version": "4.9.0.dev0",
|
22 |
+
"type_vocab_size": 1,
|
23 |
+
"use_cache": true,
|
24 |
+
"vocab_size": 50265
|
25 |
+
}
|
outputs/checkpoints/checkpoint-22000/data_collator.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e02a6e9cfa63cb321cac9402efd29841b652999fcbf787800ae050e747b161ee
|
3 |
+
size 1471394
|
outputs/checkpoints/checkpoint-22000/flax_model.msgpack
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:456166d50c18f7f3142206184d4feef24a636439fdcef000abad0390a3d059b5
|
3 |
+
size 249750019
|
outputs/checkpoints/checkpoint-22000/optimizer_state.msgpack
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:999d37700b86141a104d23f6d71b9c6d986487679ae464c42f3cd32807cfc1fe
|
3 |
+
size 499500278
|
outputs/checkpoints/checkpoint-22000/training_args.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7fc949f7397802302b9bca931dc2ae667a615a9e818fe0b22660b2812f0ac94a
|
3 |
+
size 1871
|
outputs/checkpoints/checkpoint-22000/training_state.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"step": 22001}
|
outputs/checkpoints/checkpoint-23000/config.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"RobertaForMaskedLM"
|
4 |
+
],
|
5 |
+
"attention_probs_dropout_prob": 0.1,
|
6 |
+
"bos_token_id": 0,
|
7 |
+
"eos_token_id": 2,
|
8 |
+
"gradient_checkpointing": false,
|
9 |
+
"hidden_act": "gelu",
|
10 |
+
"hidden_dropout_prob": 0.1,
|
11 |
+
"hidden_size": 768,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 3072,
|
14 |
+
"layer_norm_eps": 1e-05,
|
15 |
+
"max_position_embeddings": 514,
|
16 |
+
"model_type": "roberta",
|
17 |
+
"num_attention_heads": 12,
|
18 |
+
"num_hidden_layers": 12,
|
19 |
+
"pad_token_id": 1,
|
20 |
+
"position_embedding_type": "absolute",
|
21 |
+
"transformers_version": "4.9.0.dev0",
|
22 |
+
"type_vocab_size": 1,
|
23 |
+
"use_cache": true,
|
24 |
+
"vocab_size": 50265
|
25 |
+
}
|
outputs/checkpoints/checkpoint-23000/data_collator.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e02a6e9cfa63cb321cac9402efd29841b652999fcbf787800ae050e747b161ee
|
3 |
+
size 1471394
|
outputs/checkpoints/checkpoint-23000/flax_model.msgpack
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:998dbfb165df6528c9aba1a3b7f780529f9f4b1802aa9747e677cb3c0df13367
|
3 |
+
size 249750019
|
outputs/checkpoints/checkpoint-23000/optimizer_state.msgpack
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f600914b8fd791890a084de4c55f62c1bfdb83ea63fd68009fad4b9e326a7993
|
3 |
+
size 499500278
|
outputs/checkpoints/checkpoint-23000/training_args.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7fc949f7397802302b9bca931dc2ae667a615a9e818fe0b22660b2812f0ac94a
|
3 |
+
size 1871
|
outputs/checkpoints/checkpoint-23000/training_state.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"step": 23001}
|
outputs/checkpoints/checkpoint-24000/config.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"RobertaForMaskedLM"
|
4 |
+
],
|
5 |
+
"attention_probs_dropout_prob": 0.1,
|
6 |
+
"bos_token_id": 0,
|
7 |
+
"eos_token_id": 2,
|
8 |
+
"gradient_checkpointing": false,
|
9 |
+
"hidden_act": "gelu",
|
10 |
+
"hidden_dropout_prob": 0.1,
|
11 |
+
"hidden_size": 768,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 3072,
|
14 |
+
"layer_norm_eps": 1e-05,
|
15 |
+
"max_position_embeddings": 514,
|
16 |
+
"model_type": "roberta",
|
17 |
+
"num_attention_heads": 12,
|
18 |
+
"num_hidden_layers": 12,
|
19 |
+
"pad_token_id": 1,
|
20 |
+
"position_embedding_type": "absolute",
|
21 |
+
"transformers_version": "4.9.0.dev0",
|
22 |
+
"type_vocab_size": 1,
|
23 |
+
"use_cache": true,
|
24 |
+
"vocab_size": 50265
|
25 |
+
}
|
outputs/checkpoints/checkpoint-24000/data_collator.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e02a6e9cfa63cb321cac9402efd29841b652999fcbf787800ae050e747b161ee
|
3 |
+
size 1471394
|
outputs/checkpoints/checkpoint-24000/flax_model.msgpack
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:09beaeeb31a7c0151bd2c93e8cc32b6ea04983edc759ef4cb3c8648db5ad7730
|
3 |
+
size 249750019
|
outputs/checkpoints/checkpoint-24000/optimizer_state.msgpack
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e28559bb3ef8565f8edae3fabb0dd7f897d6c713613daa2253997225190e79c4
|
3 |
+
size 499500278
|
outputs/checkpoints/checkpoint-24000/training_args.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7fc949f7397802302b9bca931dc2ae667a615a9e818fe0b22660b2812f0ac94a
|
3 |
+
size 1871
|
outputs/checkpoints/checkpoint-24000/training_state.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"step": 24001}
|
outputs/config.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"RobertaForMaskedLM"
|
4 |
+
],
|
5 |
+
"attention_probs_dropout_prob": 0.1,
|
6 |
+
"bos_token_id": 0,
|
7 |
+
"eos_token_id": 2,
|
8 |
+
"gradient_checkpointing": false,
|
9 |
+
"hidden_act": "gelu",
|
10 |
+
"hidden_dropout_prob": 0.1,
|
11 |
+
"hidden_size": 768,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 3072,
|
14 |
+
"layer_norm_eps": 1e-05,
|
15 |
+
"max_position_embeddings": 514,
|
16 |
+
"model_type": "roberta",
|
17 |
+
"num_attention_heads": 12,
|
18 |
+
"num_hidden_layers": 12,
|
19 |
+
"pad_token_id": 1,
|
20 |
+
"position_embedding_type": "absolute",
|
21 |
+
"transformers_version": "4.9.0.dev0",
|
22 |
+
"type_vocab_size": 1,
|
23 |
+
"use_cache": true,
|
24 |
+
"vocab_size": 50265
|
25 |
+
}
|
outputs/data_collator.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e02a6e9cfa63cb321cac9402efd29841b652999fcbf787800ae050e747b161ee
|
3 |
+
size 1471394
|
outputs/events.out.tfevents.1626606849.tablespoon.3622971.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9a8fbb76cc63d74e9ce7a5412bad1ffc119d253cd90aa83ea2c81c9d7983de56
|
3 |
+
size 3549865
|
outputs/flax_model.msgpack
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:09beaeeb31a7c0151bd2c93e8cc32b6ea04983edc759ef4cb3c8648db5ad7730
|
3 |
+
size 249750019
|
outputs/optimizer_state.msgpack
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e28559bb3ef8565f8edae3fabb0dd7f897d6c713613daa2253997225190e79c4
|
3 |
+
size 499500278
|
outputs/training_args.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7fc949f7397802302b9bca931dc2ae667a615a9e818fe0b22660b2812f0ac94a
|
3 |
+
size 1871
|
outputs/training_state.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"step": 24001}
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5245e6d027e2a607f8dd81c64eeb4619c68790451c1c7486cd4a78aff54a84da
|
3 |
+
size 498858859
|
run_stream.log
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:52166949c32235ca69239a87410e283ddad2eb5e37ee631c3b43bc1f16e82897
|
3 |
+
size 4721052
|