Add model

Browse files

Files changed (12) hide show

README.md +49 -0
all_results.json +11 -0
config.json +26 -0
openvino_config.json +99 -0
openvino_model.bin +3 -0
openvino_model.xml +0 -0
special_tokens_map.json +7 -0
structured_sparsity.csv +73 -0
tokenizer.json +0 -0
tokenizer_config.json +14 -0
training_args.bin +3 -0
vocab.txt +0 -0

README.md ADDED Viewed

	@@ -0,0 +1,49 @@

+---
+license: apache-2.0
+tags:
+- generated_from_trainer
+datasets:
+- squad
+model-index:
+- name: bert-base-uncased-squad-v1-jpqd-ov-int8
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# bert-base-uncased-squad-v1-jpqd-ov-int8
+This model is a fine-tuned version of [bert-base-uncased](https://huggingface.co/bert-base-uncased) on the squad dataset.
+It was compressed using [NNCF](https://github.com/openvinotoolkit/nncf) with [Optimum
+Intel](https://github.com/huggingface/optimum-intel#openvino) following the [JPQD question-answering
+example](https://github.com/huggingface/optimum-intel/tree/main/examples/openvino/question-answering#joint-pruning-quantization-and-distillation-jpqd-for-bert-on-squad10).
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 3e-05
+- train_batch_size: 16
+- eval_batch_size: 128
+- seed: 42
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: linear
+- num_epochs: 8.0
+- mixed_precision_training: Native AMP
+### Training results
+```
+***** eval metrics *****
+  epoch            =     8.0
+  eval_exact_match =  83.141
+  eval_f1          = 89.5906
+  eval_samples     =   10784
+```
+### Framework versions
+- Transformers 4.26.1
+- Pytorch 1.13.1+cu117
+- Datasets 2.8.0
+- Tokenizers 0.13.2

all_results.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+    "epoch": 8.0,
+    "eval_exact_match": 83.14096499526963,
+    "eval_f1": 89.59061048191492,
+    "eval_samples": 10784,
+    "train_loss": 2.368001897127539,
+    "train_runtime": 49077.3955,
+    "train_samples": 88524,
+    "train_samples_per_second": 14.43,
+    "train_steps_per_second": 0.902
+}

config.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+  "_name_or_path": "bert-base-uncased",
+  "architectures": [
+    "NNCFNetwork"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "torch_dtype": "float32",
+  "transformers_version": "4.26.1",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

openvino_config.json ADDED Viewed

	@@ -0,0 +1,99 @@

+{
+  "compression": [
+    {
+      "algorithm": "movement_sparsity",
+      "ignored_scopes": [
+        "{re}.*NNCFEmbedding.*",
+        "{re}.*qa_outputs.*",
+        "{re}.*LayerNorm.*"
+      ],
+      "params": {
+        "enable_structured_masking": true,
+        "importance_regularization_factor": 0.02,
+        "warmup_end_epoch": 4,
+        "warmup_start_epoch": 1
+      },
+      "sparse_structure_by_scopes": [
+        {
+          "mode": "block",
+          "sparse_factors": [
+            32,
+            32
+          ],
+          "target_scopes": "{re}.*BertAttention.*"
+        },
+        {
+          "axis": 0,
+          "mode": "per_dim",
+          "target_scopes": "{re}.*BertIntermediate.*"
+        },
+        {
+          "axis": 1,
+          "mode": "per_dim",
+          "target_scopes": "{re}.*BertOutput.*"
+        }
+      ]
+    },
+    {
+      "algorithm": "quantization",
+      "export_to_onnx_standard_ops": false,
+      "ignored_scopes": [
+        "{re}.*__add___[0-1]",
+        "{re}.*layer_norm_0",
+        "{re}.*matmul_1",
+        "{re}.*__truediv__*"
+      ],
+      "initializer": {
+        "batchnorm_adaptation": {
+          "num_bn_adaptation_samples": 200
+        },
+        "range": {
+          "num_init_samples": 32,
+          "params": {
+            "max_percentile": 99.99,
+            "min_percentile": 0.01
+          },
+          "type": "percentile"
+        }
+      },
+      "overflow_fix": "disable",
+      "preset": "mixed",
+      "scope_overrides": {
+        "activations": {
+          "{re}.*matmul_0": {
+            "mode": "symmetric"
+          }
+        }
+      }
+    }
+  ],
+  "input_info": [
+    {
+      "keyword": "input_ids",
+      "sample_size": [
+        16,
+        384
+      ],
+      "type": "long"
+    },
+    {
+      "keyword": "token_type_ids",
+      "sample_size": [
+        16,
+        384
+      ],
+      "type": "long"
+    },
+    {
+      "keyword": "attention_mask",
+      "sample_size": [
+        16,
+        384
+      ],
+      "type": "long"
+    }
+  ],
+  "optimum_version": "1.6.4",
+  "save_onnx_model": false,
+  "transformers_version": "4.26.1"
+}

openvino_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:81f4e1eba20221fd9fcc6bb8820422413f8413123d7418b0a303fc262449ce59
+size 75477404

openvino_model.xml ADDED Viewed

The diff for this file is too large to render. See raw diff

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

structured_sparsity.csv ADDED Viewed

	@@ -0,0 +1,73 @@

+,group_id,type,torch_module,weight_shape,pruned_weight_shape,bias_shape,pruned_bias_shape,head_or_channel_id_to_keep,module_node_name
+0,0,MHSA,nncf_module.bert.encoder.layer.0.attention.self.query,"(768, 768)","(512, 768)","(768,)","(512,)","[0, 3, 4, 6, 7, 8, 9, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
+1,0,MHSA,nncf_module.bert.encoder.layer.0.attention.self.key,"(768, 768)","(512, 768)","(768,)","(512,)","[0, 3, 4, 6, 7, 8, 9, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
+2,0,MHSA,nncf_module.bert.encoder.layer.0.attention.self.value,"(768, 768)","(512, 768)","(768,)","(512,)","[0, 3, 4, 6, 7, 8, 9, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
+3,0,MHSA,nncf_module.bert.encoder.layer.0.attention.output.dense,"(768, 768)","(768, 512)","(768,)","(768,)","[0, 3, 4, 6, 7, 8, 9, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
+4,1,FF,nncf_module.bert.encoder.layer.0.intermediate.dense,"(3072, 768)","(2066, 768)","(3072,)","(2066,)",[2066 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
+5,1,FF,nncf_module.bert.encoder.layer.0.output.dense,"(768, 3072)","(768, 2066)","(768,)","(768,)",[2066 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[output]/NNCFLinear[dense]/linear_0
+6,2,MHSA,nncf_module.bert.encoder.layer.1.attention.self.query,"(768, 768)","(448, 768)","(768,)","(448,)","[1, 4, 6, 7, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
+7,2,MHSA,nncf_module.bert.encoder.layer.1.attention.self.key,"(768, 768)","(448, 768)","(768,)","(448,)","[1, 4, 6, 7, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
+8,2,MHSA,nncf_module.bert.encoder.layer.1.attention.self.value,"(768, 768)","(448, 768)","(768,)","(448,)","[1, 4, 6, 7, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
+9,2,MHSA,nncf_module.bert.encoder.layer.1.attention.output.dense,"(768, 768)","(768, 448)","(768,)","(768,)","[1, 4, 6, 7, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
+10,3,FF,nncf_module.bert.encoder.layer.1.intermediate.dense,"(3072, 768)","(2067, 768)","(3072,)","(2067,)",[2067 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
+11,3,FF,nncf_module.bert.encoder.layer.1.output.dense,"(768, 3072)","(768, 2067)","(768,)","(768,)",[2067 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[output]/NNCFLinear[dense]/linear_0
+12,4,MHSA,nncf_module.bert.encoder.layer.2.attention.self.query,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 1, 2, 3, 5, 6, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
+13,4,MHSA,nncf_module.bert.encoder.layer.2.attention.self.key,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 1, 2, 3, 5, 6, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
+14,4,MHSA,nncf_module.bert.encoder.layer.2.attention.self.value,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 1, 2, 3, 5, 6, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
+15,4,MHSA,nncf_module.bert.encoder.layer.2.attention.output.dense,"(768, 768)","(768, 576)","(768,)","(768,)","[0, 1, 2, 3, 5, 6, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
+16,5,FF,nncf_module.bert.encoder.layer.2.intermediate.dense,"(3072, 768)","(2082, 768)","(3072,)","(2082,)",[2082 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
+17,5,FF,nncf_module.bert.encoder.layer.2.output.dense,"(768, 3072)","(768, 2082)","(768,)","(768,)",[2082 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[output]/NNCFLinear[dense]/linear_0
+18,6,MHSA,nncf_module.bert.encoder.layer.3.attention.self.query,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 1, 3, 5, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
+19,6,MHSA,nncf_module.bert.encoder.layer.3.attention.self.key,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 1, 3, 5, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
+20,6,MHSA,nncf_module.bert.encoder.layer.3.attention.self.value,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 1, 3, 5, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
+21,6,MHSA,nncf_module.bert.encoder.layer.3.attention.output.dense,"(768, 768)","(768, 576)","(768,)","(768,)","[0, 1, 3, 5, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
+22,7,FF,nncf_module.bert.encoder.layer.3.intermediate.dense,"(3072, 768)","(2136, 768)","(3072,)","(2136,)",[2136 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
+23,7,FF,nncf_module.bert.encoder.layer.3.output.dense,"(768, 3072)","(768, 2136)","(768,)","(768,)",[2136 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[output]/NNCFLinear[dense]/linear_0
+24,8,MHSA,nncf_module.bert.encoder.layer.4.attention.self.query,"(768, 768)","(640, 768)","(768,)","(640,)","[0, 1, 3, 4, 5, 6, 7, 8, 9, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
+25,8,MHSA,nncf_module.bert.encoder.layer.4.attention.self.key,"(768, 768)","(640, 768)","(768,)","(640,)","[0, 1, 3, 4, 5, 6, 7, 8, 9, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
+26,8,MHSA,nncf_module.bert.encoder.layer.4.attention.self.value,"(768, 768)","(640, 768)","(768,)","(640,)","[0, 1, 3, 4, 5, 6, 7, 8, 9, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
+27,8,MHSA,nncf_module.bert.encoder.layer.4.attention.output.dense,"(768, 768)","(768, 640)","(768,)","(768,)","[0, 1, 3, 4, 5, 6, 7, 8, 9, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
+28,9,FF,nncf_module.bert.encoder.layer.4.intermediate.dense,"(3072, 768)","(2023, 768)","(3072,)","(2023,)",[2023 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
+29,9,FF,nncf_module.bert.encoder.layer.4.output.dense,"(768, 3072)","(768, 2023)","(768,)","(768,)",[2023 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[output]/NNCFLinear[dense]/linear_0
+30,10,MHSA,nncf_module.bert.encoder.layer.5.attention.self.query,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 3, 4, 5, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
+31,10,MHSA,nncf_module.bert.encoder.layer.5.attention.self.key,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 3, 4, 5, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
+32,10,MHSA,nncf_module.bert.encoder.layer.5.attention.self.value,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 3, 4, 5, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
+33,10,MHSA,nncf_module.bert.encoder.layer.5.attention.output.dense,"(768, 768)","(768, 576)","(768,)","(768,)","[0, 3, 4, 5, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
+34,11,FF,nncf_module.bert.encoder.layer.5.intermediate.dense,"(3072, 768)","(2011, 768)","(3072,)","(2011,)",[2011 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
+35,11,FF,nncf_module.bert.encoder.layer.5.output.dense,"(768, 3072)","(768, 2011)","(768,)","(768,)",[2011 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[output]/NNCFLinear[dense]/linear_0
+36,12,MHSA,nncf_module.bert.encoder.layer.6.attention.self.query,"(768, 768)","(448, 768)","(768,)","(448,)","[1, 4, 5, 6, 8, 9, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
+37,12,MHSA,nncf_module.bert.encoder.layer.6.attention.self.key,"(768, 768)","(448, 768)","(768,)","(448,)","[1, 4, 5, 6, 8, 9, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
+38,12,MHSA,nncf_module.bert.encoder.layer.6.attention.self.value,"(768, 768)","(448, 768)","(768,)","(448,)","[1, 4, 5, 6, 8, 9, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
+39,12,MHSA,nncf_module.bert.encoder.layer.6.attention.output.dense,"(768, 768)","(768, 448)","(768,)","(768,)","[1, 4, 5, 6, 8, 9, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
+40,13,FF,nncf_module.bert.encoder.layer.6.intermediate.dense,"(3072, 768)","(1871, 768)","(3072,)","(1871,)",[1871 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
+41,13,FF,nncf_module.bert.encoder.layer.6.output.dense,"(768, 3072)","(768, 1871)","(768,)","(768,)",[1871 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[output]/NNCFLinear[dense]/linear_0
+42,14,MHSA,nncf_module.bert.encoder.layer.7.attention.self.query,"(768, 768)","(448, 768)","(768,)","(448,)","[0, 2, 4, 5, 8, 9, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
+43,14,MHSA,nncf_module.bert.encoder.layer.7.attention.self.key,"(768, 768)","(448, 768)","(768,)","(448,)","[0, 2, 4, 5, 8, 9, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
+44,14,MHSA,nncf_module.bert.encoder.layer.7.attention.self.value,"(768, 768)","(448, 768)","(768,)","(448,)","[0, 2, 4, 5, 8, 9, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
+45,14,MHSA,nncf_module.bert.encoder.layer.7.attention.output.dense,"(768, 768)","(768, 448)","(768,)","(768,)","[0, 2, 4, 5, 8, 9, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
+46,15,FF,nncf_module.bert.encoder.layer.7.intermediate.dense,"(3072, 768)","(1858, 768)","(3072,)","(1858,)",[1858 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
+47,15,FF,nncf_module.bert.encoder.layer.7.output.dense,"(768, 3072)","(768, 1858)","(768,)","(768,)",[1858 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[output]/NNCFLinear[dense]/linear_0
+48,16,MHSA,nncf_module.bert.encoder.layer.8.attention.self.query,"(768, 768)","(576, 768)","(768,)","(576,)","[1, 2, 3, 5, 6, 7, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
+49,16,MHSA,nncf_module.bert.encoder.layer.8.attention.self.key,"(768, 768)","(576, 768)","(768,)","(576,)","[1, 2, 3, 5, 6, 7, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
+50,16,MHSA,nncf_module.bert.encoder.layer.8.attention.self.value,"(768, 768)","(576, 768)","(768,)","(576,)","[1, 2, 3, 5, 6, 7, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
+51,16,MHSA,nncf_module.bert.encoder.layer.8.attention.output.dense,"(768, 768)","(768, 576)","(768,)","(768,)","[1, 2, 3, 5, 6, 7, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
+52,17,FF,nncf_module.bert.encoder.layer.8.intermediate.dense,"(3072, 768)","(1637, 768)","(3072,)","(1637,)",[1637 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
+53,17,FF,nncf_module.bert.encoder.layer.8.output.dense,"(768, 3072)","(768, 1637)","(768,)","(768,)",[1637 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[output]/NNCFLinear[dense]/linear_0
+54,18,MHSA,nncf_module.bert.encoder.layer.9.attention.self.query,"(768, 768)","(448, 768)","(768,)","(448,)","[0, 2, 3, 6, 8, 9, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
+55,18,MHSA,nncf_module.bert.encoder.layer.9.attention.self.key,"(768, 768)","(448, 768)","(768,)","(448,)","[0, 2, 3, 6, 8, 9, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
+56,18,MHSA,nncf_module.bert.encoder.layer.9.attention.self.value,"(768, 768)","(448, 768)","(768,)","(448,)","[0, 2, 3, 6, 8, 9, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
+57,18,MHSA,nncf_module.bert.encoder.layer.9.attention.output.dense,"(768, 768)","(768, 448)","(768,)","(768,)","[0, 2, 3, 6, 8, 9, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
+58,19,FF,nncf_module.bert.encoder.layer.9.intermediate.dense,"(3072, 768)","(1257, 768)","(3072,)","(1257,)",[1257 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
+59,19,FF,nncf_module.bert.encoder.layer.9.output.dense,"(768, 3072)","(768, 1257)","(768,)","(768,)",[1257 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[output]/NNCFLinear[dense]/linear_0
+60,20,MHSA,nncf_module.bert.encoder.layer.10.attention.self.query,"(768, 768)","(384, 768)","(768,)","(384,)","[0, 2, 3, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
+61,20,MHSA,nncf_module.bert.encoder.layer.10.attention.self.key,"(768, 768)","(384, 768)","(768,)","(384,)","[0, 2, 3, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
+62,20,MHSA,nncf_module.bert.encoder.layer.10.attention.self.value,"(768, 768)","(384, 768)","(768,)","(384,)","[0, 2, 3, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
+63,20,MHSA,nncf_module.bert.encoder.layer.10.attention.output.dense,"(768, 768)","(768, 384)","(768,)","(768,)","[0, 2, 3, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
+64,21,FF,nncf_module.bert.encoder.layer.10.intermediate.dense,"(3072, 768)","(1159, 768)","(3072,)","(1159,)",[1159 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
+65,21,FF,nncf_module.bert.encoder.layer.10.output.dense,"(768, 3072)","(768, 1159)","(768,)","(768,)",[1159 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[output]/NNCFLinear[dense]/linear_0
+66,22,MHSA,nncf_module.bert.encoder.layer.11.attention.self.query,"(768, 768)","(384, 768)","(768,)","(384,)","[0, 1, 2, 4, 6, 9]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
+67,22,MHSA,nncf_module.bert.encoder.layer.11.attention.self.key,"(768, 768)","(384, 768)","(768,)","(384,)","[0, 1, 2, 4, 6, 9]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
+68,22,MHSA,nncf_module.bert.encoder.layer.11.attention.self.value,"(768, 768)","(384, 768)","(768,)","(384,)","[0, 1, 2, 4, 6, 9]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
+69,22,MHSA,nncf_module.bert.encoder.layer.11.attention.output.dense,"(768, 768)","(768, 384)","(768,)","(768,)","[0, 1, 2, 4, 6, 9]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
+70,23,FF,nncf_module.bert.encoder.layer.11.intermediate.dense,"(3072, 768)","(1017, 768)","(3072,)","(1017,)",[1017 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
+71,23,FF,nncf_module.bert.encoder.layer.11.output.dense,"(768, 3072)","(768, 1017)","(768,)","(768,)",[1017 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[output]/NNCFLinear[dense]/linear_0

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "name_or_path": "bert-base-uncased",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "special_tokens_map_file": null,
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d5552aa8097d9d47b0343cdacd36e5bc50157c344cb9488c095d5eb9a1bc83f5
+size 3579

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff