helenai commited on
Commit
b90aa01
1 Parent(s): 518b605

Add bert-base-uncased JPQD text-classification model

Browse files
README.md ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ license: apache-2.0
5
+ tags:
6
+ - generated_from_trainer
7
+ datasets:
8
+ - glue
9
+ metrics:
10
+ - accuracy
11
+ model-index:
12
+ - name: jpqd-bert-base-ft-sst2
13
+ results:
14
+ - task:
15
+ name: Text Classification
16
+ type: text-classification
17
+ dataset:
18
+ name: GLUE SST2
19
+ type: glue
20
+ args: sst2
21
+ metrics:
22
+ - name: Accuracy
23
+ type: accuracy
24
+ value: 0.9254587155963303
25
+ ---
26
+
27
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
28
+ should probably proofread and complete it, then remove this comment. -->
29
+
30
+ # jpqd-bert-base-ft-sst2
31
+
32
+ > **Note**
33
+ > This model was trained for only 1 epoch and is shared for testing purposes
34
+
35
+ This model is a fine-tuned version of [bert-base-uncased](https://huggingface.co/bert-base-uncased) on the GLUE SST2 dataset.
36
+ It achieves the following results on the evaluation set:
37
+ - Loss: 0.2181
38
+ - Accuracy: 0.9255
39
+
40
+ ## Model description
41
+
42
+ More information needed
43
+
44
+ ## Intended uses & limitations
45
+
46
+ More information needed
47
+
48
+ ## Training and evaluation data
49
+
50
+ More information needed
51
+
52
+ ## Training procedure
53
+
54
+ ### Training hyperparameters
55
+
56
+ The following hyperparameters were used during training:
57
+ - learning_rate: 2e-05
58
+ - train_batch_size: 32
59
+ - eval_batch_size: 8
60
+ - seed: 42
61
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
62
+ - lr_scheduler_type: linear
63
+ - num_epochs: 1.0
64
+ - mixed_precision_training: Native AMP
65
+
66
+ ### Training results
67
+
68
+ | Training Loss | Epoch | Step | Validation Loss | Accuracy |
69
+ |:-------------:|:-----:|:----:|:---------------:|:--------:|
70
+ | 0.4129 | 0.12 | 250 | 0.4416 | 0.8761 |
71
+ | 0.412 | 0.24 | 500 | 0.4969 | 0.8899 |
72
+ | 0.3191 | 0.36 | 750 | 0.2717 | 0.9163 |
73
+ | 0.2688 | 0.48 | 1000 | 0.2432 | 0.9117 |
74
+ | 0.3306 | 0.59 | 1250 | 0.2033 | 0.9243 |
75
+ | 0.224 | 0.71 | 1500 | 0.2383 | 0.9243 |
76
+ | 0.2082 | 0.83 | 1750 | 0.2233 | 0.9255 |
77
+ | 0.2161 | 0.95 | 2000 | 0.2207 | 0.9255 |
78
+
79
+
80
+ ### Framework versions
81
+
82
+ - Transformers 4.25.1
83
+ - Pytorch 1.13.1+cu117
84
+ - Datasets 2.8.0
85
+ - Tokenizers 0.13.2
all_results.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "eval_accuracy": 0.9254587155963303,
4
+ "eval_loss": 0.21810248494148254,
5
+ "eval_runtime": 22.1504,
6
+ "eval_samples": 872,
7
+ "eval_samples_per_second": 39.367,
8
+ "eval_steps_per_second": 4.921,
9
+ "train_loss": 0.40093172477146793,
10
+ "train_runtime": 1341.7612,
11
+ "train_samples": 67349,
12
+ "train_samples_per_second": 50.194,
13
+ "train_steps_per_second": 1.569
14
+ }
compressed_graph.dot ADDED
The diff for this file is too large to render. See raw diff
 
config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "bert-base-uncased",
3
+ "architectures": [
4
+ "NNCFNetwork"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "finetuning_task": "sst2",
9
+ "gradient_checkpointing": false,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "id2label": {
14
+ "0": "negative",
15
+ "1": "positive"
16
+ },
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 3072,
19
+ "label2id": {
20
+ "negative": 0,
21
+ "positive": 1
22
+ },
23
+ "layer_norm_eps": 1e-12,
24
+ "max_position_embeddings": 512,
25
+ "model_type": "bert",
26
+ "num_attention_heads": 12,
27
+ "num_hidden_layers": 12,
28
+ "pad_token_id": 0,
29
+ "position_embedding_type": "absolute",
30
+ "problem_type": "single_label_classification",
31
+ "torch_dtype": "float32",
32
+ "transformers_version": "4.25.1",
33
+ "type_vocab_size": 2,
34
+ "use_cache": true,
35
+ "vocab_size": 30522
36
+ }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "eval_accuracy": 0.9254587155963303,
4
+ "eval_loss": 0.21810248494148254,
5
+ "eval_runtime": 22.1504,
6
+ "eval_samples": 872,
7
+ "eval_samples_per_second": 39.367,
8
+ "eval_steps_per_second": 4.921
9
+ }
model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74220f4ab04e8b2cf0a8eb0ac647ab95ad8e9cf3abe9af47aee0f4d2b45dab3b
3
+ size 439470129
nncf_output.log ADDED
The diff for this file is too large to render. See raw diff
 
openvino_config.json ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "compression": [
3
+ {
4
+ "algorithm": "movement_sparsity",
5
+ "ignored_scopes": [
6
+ "{re}.*NNCFEmbedding.*",
7
+ "{re}.*LayerNorm.*",
8
+ "{re}.*pooler.*",
9
+ "{re}.*classifier.*"
10
+ ],
11
+ "params": {
12
+ "enable_structured_masking": true,
13
+ "importance_regularization_factor": 0.05,
14
+ "warmup_end_epoch": 2,
15
+ "warmup_start_epoch": 1
16
+ },
17
+ "sparse_structure_by_scopes": [
18
+ {
19
+ "mode": "block",
20
+ "sparse_factors": [
21
+ 32,
22
+ 32
23
+ ],
24
+ "target_scopes": "{re}.*BertAttention.*"
25
+ },
26
+ {
27
+ "axis": 0,
28
+ "mode": "per_dim",
29
+ "target_scopes": "{re}.*BertIntermediate.*"
30
+ },
31
+ {
32
+ "axis": 1,
33
+ "mode": "per_dim",
34
+ "target_scopes": "{re}.*BertOutput.*"
35
+ }
36
+ ]
37
+ },
38
+ {
39
+ "algorithm": "quantization",
40
+ "export_to_onnx_standard_ops": false,
41
+ "ignored_scopes": [
42
+ "{re}.*__add___[0-1]",
43
+ "{re}.*layer_norm_0",
44
+ "{re}.*matmul_1",
45
+ "{re}.*__truediv__*"
46
+ ],
47
+ "initializer": {
48
+ "batchnorm_adaptation": {
49
+ "num_bn_adaptation_samples": 200
50
+ },
51
+ "range": {
52
+ "num_init_samples": 32,
53
+ "params": {
54
+ "max_percentile": 99.99,
55
+ "min_percentile": 0.01
56
+ },
57
+ "type": "percentile"
58
+ }
59
+ },
60
+ "overflow_fix": "disable",
61
+ "preset": "mixed",
62
+ "scope_overrides": {
63
+ "activations": {
64
+ "{re}.*matmul_0": {
65
+ "mode": "symmetric"
66
+ }
67
+ }
68
+ }
69
+ }
70
+ ],
71
+ "input_info": [
72
+ {
73
+ "keyword": "input_ids",
74
+ "sample_size": [
75
+ 32,
76
+ 128
77
+ ],
78
+ "type": "long"
79
+ },
80
+ {
81
+ "keyword": "token_type_ids",
82
+ "sample_size": [
83
+ 32,
84
+ 128
85
+ ],
86
+ "type": "long"
87
+ },
88
+ {
89
+ "keyword": "attention_mask",
90
+ "sample_size": [
91
+ 32,
92
+ 128
93
+ ],
94
+ "type": "long"
95
+ }
96
+ ],
97
+ "log_dir": "/tmp/jpqd-bert-base-ft-sst2",
98
+ "optimum_version": "1.6.1",
99
+ "save_onnx_model": false,
100
+ "transformers_version": "4.25.1"
101
+ }
openvino_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9cfad0e7bc7bf2932066a8a392d1315f9bbbaf4db18504ce184266bbdf288c8
3
+ size 110187016
openvino_model.xml ADDED
The diff for this file is too large to render. See raw diff
 
original_graph.dot ADDED
The diff for this file is too large to render. See raw diff
 
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99590d19bdea8d7f6dbf5a37e3947a09af3ef5805eb39e5238e5afa19452aeed
3
+ size 779394143
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "do_lower_case": true,
4
+ "mask_token": "[MASK]",
5
+ "model_max_length": 512,
6
+ "name_or_path": "bert-base-uncased",
7
+ "pad_token": "[PAD]",
8
+ "sep_token": "[SEP]",
9
+ "special_tokens_map_file": null,
10
+ "strip_accents": null,
11
+ "tokenize_chinese_chars": true,
12
+ "tokenizer_class": "BertTokenizer",
13
+ "unk_token": "[UNK]"
14
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "train_loss": 0.40093172477146793,
4
+ "train_runtime": 1341.7612,
5
+ "train_samples": 67349,
6
+ "train_samples_per_second": 50.194,
7
+ "train_steps_per_second": 1.569
8
+ }
trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fbb005c34586cdf1d9dfa3fa4f3e0e2b7209a1984b065821a0ac11dbbf8e40b
3
+ size 3643
vocab.txt ADDED
The diff for this file is too large to render. See raw diff