badokorach commited on
Commit
9004a78
1 Parent(s): 9f2702b

Training in progress epoch 0

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
- license: cc-by-4.0
3
- base_model: deepset/roberta-base-squad2
4
  tags:
5
  - generated_from_keras_callback
6
  model-index:
@@ -13,11 +13,11 @@ probably proofread and complete it, then remove this comment. -->
13
 
14
  # badokorach/roberta-base-squad2-agric-060124
15
 
16
- This model is a fine-tuned version of [deepset/roberta-base-squad2](https://huggingface.co/deepset/roberta-base-squad2) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
- - Train Loss: 0.3137
19
  - Validation Loss: 0.0
20
- - Epoch: 14
21
 
22
  ## Model description
23
 
@@ -36,28 +36,14 @@ More information needed
36
  ### Training hyperparameters
37
 
38
  The following hyperparameters were used during training:
39
- - optimizer: {'name': 'AdamWeightDecay', 'learning_rate': {'module': 'keras.optimizers.schedules', 'class_name': 'PolynomialDecay', 'config': {'initial_learning_rate': 2e-05, 'decay_steps': 2265, 'end_learning_rate': 0.0, 'power': 1.0, 'cycle': False, 'name': None}, 'registered_name': None}, 'decay': 0.0, 'beta_1': 0.9, 'beta_2': 0.999, 'epsilon': 1e-08, 'amsgrad': False, 'weight_decay_rate': 0.001}
40
  - training_precision: mixed_float16
41
 
42
  ### Training results
43
 
44
  | Train Loss | Validation Loss | Epoch |
45
  |:----------:|:---------------:|:-----:|
46
- | 2.4828 | 0.0 | 0 |
47
- | 1.7955 | 0.0 | 1 |
48
- | 1.4683 | 0.0 | 2 |
49
- | 1.2295 | 0.0 | 3 |
50
- | 1.0351 | 0.0 | 4 |
51
- | 0.8807 | 0.0 | 5 |
52
- | 0.7193 | 0.0 | 6 |
53
- | 0.6236 | 0.0 | 7 |
54
- | 0.5352 | 0.0 | 8 |
55
- | 0.4966 | 0.0 | 9 |
56
- | 0.4385 | 0.0 | 10 |
57
- | 0.3925 | 0.0 | 11 |
58
- | 0.3501 | 0.0 | 12 |
59
- | 0.3384 | 0.0 | 13 |
60
- | 0.3137 | 0.0 | 14 |
61
 
62
 
63
  ### Framework versions
@@ -65,4 +51,4 @@ The following hyperparameters were used during training:
65
  - Transformers 4.35.2
66
  - TensorFlow 2.15.0
67
  - Datasets 2.16.1
68
- - Tokenizers 0.15.0
 
1
  ---
2
+ license: mit
3
+ base_model: badokorach/afriqa_afroxlmr_squad_v2_060124
4
  tags:
5
  - generated_from_keras_callback
6
  model-index:
 
13
 
14
  # badokorach/roberta-base-squad2-agric-060124
15
 
16
+ This model is a fine-tuned version of [badokorach/afriqa_afroxlmr_squad_v2_060124](https://huggingface.co/badokorach/afriqa_afroxlmr_squad_v2_060124) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
+ - Train Loss: 2.0532
19
  - Validation Loss: 0.0
20
+ - Epoch: 0
21
 
22
  ## Model description
23
 
 
36
  ### Training hyperparameters
37
 
38
  The following hyperparameters were used during training:
39
+ - optimizer: {'name': 'AdamWeightDecay', 'learning_rate': {'module': 'keras.optimizers.schedules', 'class_name': 'PolynomialDecay', 'config': {'initial_learning_rate': 4e-05, 'decay_steps': 555, 'end_learning_rate': 0.0, 'power': 1.0, 'cycle': False, 'name': None}, 'registered_name': None}, 'decay': 0.0, 'beta_1': 0.9, 'beta_2': 0.999, 'epsilon': 1e-08, 'amsgrad': False, 'weight_decay_rate': 0.02}
40
  - training_precision: mixed_float16
41
 
42
  ### Training results
43
 
44
  | Train Loss | Validation Loss | Epoch |
45
  |:----------:|:---------------:|:-----:|
46
+ | 2.0532 | 0.0 | 0 |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
 
49
  ### Framework versions
 
51
  - Transformers 4.35.2
52
  - TensorFlow 2.15.0
53
  - Datasets 2.16.1
54
+ - Tokenizers 0.15.1
config.json CHANGED
@@ -1,29 +1,28 @@
1
  {
2
- "_name_or_path": "deepset/roberta-base-squad2",
3
  "architectures": [
4
- "RobertaForQuestionAnswering"
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
7
  "bos_token_id": 0,
8
  "classifier_dropout": null,
9
  "eos_token_id": 2,
10
- "gradient_checkpointing": false,
11
  "hidden_act": "gelu",
12
  "hidden_dropout_prob": 0.1,
13
  "hidden_size": 768,
14
  "initializer_range": 0.02,
15
  "intermediate_size": 3072,
16
- "language": "english",
17
  "layer_norm_eps": 1e-05,
18
  "max_position_embeddings": 514,
19
- "model_type": "roberta",
20
- "name": "Roberta",
21
  "num_attention_heads": 12,
22
  "num_hidden_layers": 12,
 
23
  "pad_token_id": 1,
24
  "position_embedding_type": "absolute",
 
25
  "transformers_version": "4.35.2",
26
  "type_vocab_size": 1,
27
  "use_cache": true,
28
- "vocab_size": 50265
29
  }
 
1
  {
2
+ "_name_or_path": "badokorach/afriqa_afroxlmr_squad_v2_060124",
3
  "architectures": [
4
+ "XLMRobertaForQuestionAnswering"
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
7
  "bos_token_id": 0,
8
  "classifier_dropout": null,
9
  "eos_token_id": 2,
 
10
  "hidden_act": "gelu",
11
  "hidden_dropout_prob": 0.1,
12
  "hidden_size": 768,
13
  "initializer_range": 0.02,
14
  "intermediate_size": 3072,
 
15
  "layer_norm_eps": 1e-05,
16
  "max_position_embeddings": 514,
17
+ "model_type": "xlm-roberta",
 
18
  "num_attention_heads": 12,
19
  "num_hidden_layers": 12,
20
+ "output_past": true,
21
  "pad_token_id": 1,
22
  "position_embedding_type": "absolute",
23
+ "torch_dtype": "float32",
24
  "transformers_version": "4.35.2",
25
  "type_vocab_size": 1,
26
  "use_cache": true,
27
+ "vocab_size": 250002
28
  }
sentencepiece.bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
3
+ size 5069051
special_tokens_map.json CHANGED
@@ -2,49 +2,49 @@
2
  "bos_token": {
3
  "content": "<s>",
4
  "lstrip": false,
5
- "normalized": true,
6
  "rstrip": false,
7
  "single_word": false
8
  },
9
  "cls_token": {
10
  "content": "<s>",
11
  "lstrip": false,
12
- "normalized": true,
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
  "eos_token": {
17
  "content": "</s>",
18
  "lstrip": false,
19
- "normalized": true,
20
  "rstrip": false,
21
  "single_word": false
22
  },
23
  "mask_token": {
24
  "content": "<mask>",
25
  "lstrip": true,
26
- "normalized": true,
27
  "rstrip": false,
28
  "single_word": false
29
  },
30
  "pad_token": {
31
  "content": "<pad>",
32
  "lstrip": false,
33
- "normalized": true,
34
  "rstrip": false,
35
  "single_word": false
36
  },
37
  "sep_token": {
38
  "content": "</s>",
39
  "lstrip": false,
40
- "normalized": true,
41
  "rstrip": false,
42
  "single_word": false
43
  },
44
  "unk_token": {
45
  "content": "<unk>",
46
  "lstrip": false,
47
- "normalized": true,
48
  "rstrip": false,
49
  "single_word": false
50
  }
 
2
  "bos_token": {
3
  "content": "<s>",
4
  "lstrip": false,
5
+ "normalized": false,
6
  "rstrip": false,
7
  "single_word": false
8
  },
9
  "cls_token": {
10
  "content": "<s>",
11
  "lstrip": false,
12
+ "normalized": false,
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
  "eos_token": {
17
  "content": "</s>",
18
  "lstrip": false,
19
+ "normalized": false,
20
  "rstrip": false,
21
  "single_word": false
22
  },
23
  "mask_token": {
24
  "content": "<mask>",
25
  "lstrip": true,
26
+ "normalized": false,
27
  "rstrip": false,
28
  "single_word": false
29
  },
30
  "pad_token": {
31
  "content": "<pad>",
32
  "lstrip": false,
33
+ "normalized": false,
34
  "rstrip": false,
35
  "single_word": false
36
  },
37
  "sep_token": {
38
  "content": "</s>",
39
  "lstrip": false,
40
+ "normalized": false,
41
  "rstrip": false,
42
  "single_word": false
43
  },
44
  "unk_token": {
45
  "content": "<unk>",
46
  "lstrip": false,
47
+ "normalized": false,
48
  "rstrip": false,
49
  "single_word": false
50
  }
tf_model.h5 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:093d3c7d90855f4498a92813d45d41427e86d026a1c531b14ed1e615ef939638
3
- size 496513256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6d7c1aaec0ff03d7dcd558e30d9b5d9f88133293a9e9db262dd0b350765d4a5
3
+ size 1110105320
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -1,10 +1,9 @@
1
  {
2
- "add_prefix_space": false,
3
  "added_tokens_decoder": {
4
  "0": {
5
  "content": "<s>",
6
  "lstrip": false,
7
- "normalized": true,
8
  "rstrip": false,
9
  "single_word": false,
10
  "special": true
@@ -12,7 +11,7 @@
12
  "1": {
13
  "content": "<pad>",
14
  "lstrip": false,
15
- "normalized": true,
16
  "rstrip": false,
17
  "single_word": false,
18
  "special": true
@@ -20,7 +19,7 @@
20
  "2": {
21
  "content": "</s>",
22
  "lstrip": false,
23
- "normalized": true,
24
  "rstrip": false,
25
  "single_word": false,
26
  "special": true
@@ -28,15 +27,15 @@
28
  "3": {
29
  "content": "<unk>",
30
  "lstrip": false,
31
- "normalized": true,
32
  "rstrip": false,
33
  "single_word": false,
34
  "special": true
35
  },
36
- "50264": {
37
  "content": "<mask>",
38
  "lstrip": true,
39
- "normalized": true,
40
  "rstrip": false,
41
  "single_word": false,
42
  "special": true
@@ -45,15 +44,18 @@
45
  "bos_token": "<s>",
46
  "clean_up_tokenization_spaces": true,
47
  "cls_token": "<s>",
48
- "do_lower_case": false,
49
  "eos_token": "</s>",
50
- "errors": "replace",
51
- "full_tokenizer_file": null,
52
  "mask_token": "<mask>",
 
53
  "model_max_length": 512,
 
54
  "pad_token": "<pad>",
 
 
55
  "sep_token": "</s>",
56
- "tokenizer_class": "RobertaTokenizer",
57
- "trim_offsets": true,
 
 
58
  "unk_token": "<unk>"
59
  }
 
1
  {
 
2
  "added_tokens_decoder": {
3
  "0": {
4
  "content": "<s>",
5
  "lstrip": false,
6
+ "normalized": false,
7
  "rstrip": false,
8
  "single_word": false,
9
  "special": true
 
11
  "1": {
12
  "content": "<pad>",
13
  "lstrip": false,
14
+ "normalized": false,
15
  "rstrip": false,
16
  "single_word": false,
17
  "special": true
 
19
  "2": {
20
  "content": "</s>",
21
  "lstrip": false,
22
+ "normalized": false,
23
  "rstrip": false,
24
  "single_word": false,
25
  "special": true
 
27
  "3": {
28
  "content": "<unk>",
29
  "lstrip": false,
30
+ "normalized": false,
31
  "rstrip": false,
32
  "single_word": false,
33
  "special": true
34
  },
35
+ "250001": {
36
  "content": "<mask>",
37
  "lstrip": true,
38
+ "normalized": false,
39
  "rstrip": false,
40
  "single_word": false,
41
  "special": true
 
44
  "bos_token": "<s>",
45
  "clean_up_tokenization_spaces": true,
46
  "cls_token": "<s>",
 
47
  "eos_token": "</s>",
 
 
48
  "mask_token": "<mask>",
49
+ "max_length": 384,
50
  "model_max_length": 512,
51
+ "pad_to_multiple_of": null,
52
  "pad_token": "<pad>",
53
+ "pad_token_type_id": 0,
54
+ "padding_side": "right",
55
  "sep_token": "</s>",
56
+ "stride": 128,
57
+ "tokenizer_class": "XLMRobertaTokenizer",
58
+ "truncation_side": "right",
59
+ "truncation_strategy": "only_second",
60
  "unk_token": "<unk>"
61
  }