Quantized model with overflow_fix enabled

Browse files

Files changed (8) hide show

README.md +17 -12
all_results.json +6 -6
openvino_config.json +1 -1
openvino_model.bin +2 -2
openvino_model.xml +0 -0
structured_sparsity.csv +48 -48
tokenizer.json +2 -16
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -5,19 +5,30 @@ tags:
 datasets:
 - squad
 model-index:
-- name: bert-base-uncased-squad-v1-jpqd-ov-int8
   results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
-# bert-base-uncased-squad-v1-jpqd-ov-int8
 This model is a fine-tuned version of [bert-base-uncased](https://huggingface.co/bert-base-uncased) on the squad dataset.
-It was compressed using [NNCF](https://github.com/openvinotoolkit/nncf) with [Optimum
-Intel](https://github.com/huggingface/optimum-intel#openvino) following the [JPQD question-answering
-example](https://github.com/huggingface/optimum-intel/tree/main/examples/openvino/question-answering#joint-pruning-quantization-and-distillation-jpqd-for-bert-on-squad10).
 ### Training hyperparameters
@@ -33,13 +44,7 @@ The following hyperparameters were used during training:
 ### Training results
-```
-***** eval metrics *****
-  epoch            =     8.0
-  eval_exact_match =  83.141
-  eval_f1          = 89.5906
-  eval_samples     =   10784
-```
 ### Framework versions

 datasets:
 - squad
 model-index:
+- name: jpqd_bert_squad_overflowfix
   results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
+# jpqd_bert_squad_overflowfix
 This model is a fine-tuned version of [bert-base-uncased](https://huggingface.co/bert-base-uncased) on the squad dataset.
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
 ### Training hyperparameters
 ### Training results
 ### Framework versions

all_results.json CHANGED Viewed

@@ -1,11 +1,11 @@
 {
     "epoch": 8.0,
-    "eval_exact_match": 83.14096499526963,
-    "eval_f1": 89.59061048191492,
     "eval_samples": 10784,
-    "train_loss": 2.368001897127539,
-    "train_runtime": 49077.3955,
     "train_samples": 88524,
-    "train_samples_per_second": 14.43,
-    "train_steps_per_second": 0.902
 }

 {
     "epoch": 8.0,
+    "eval_exact_match": 83.33964049195838,
+    "eval_f1": 89.80725863442484,
     "eval_samples": 10784,
+    "train_loss": 2.369025745212132,
+    "train_runtime": 49814.833,
     "train_samples": 88524,
+    "train_samples_per_second": 14.216,
+    "train_steps_per_second": 0.889
 }

openvino_config.json CHANGED Viewed

@@ -56,7 +56,7 @@
           "type": "percentile"
         }
       },
-      "overflow_fix": "disable",
       "preset": "mixed",
       "scope_overrides": {
         "activations": {

           "type": "percentile"
         }
       },
+      "overflow_fix": "enable",
       "preset": "mixed",
       "scope_overrides": {
         "activations": {

openvino_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:81f4e1eba20221fd9fcc6bb8820422413f8413123d7418b0a303fc262449ce59
-size 75477404

 version https://git-lfs.github.com/spec/v1
+oid sha256:cead9787a2b86a186358962622f9cd81c9d7b83ce860c5422b7585c088bc94ac
+size 75452788

openvino_model.xml CHANGED Viewed

The diff for this file is too large to render. See raw diff

structured_sparsity.csv CHANGED Viewed

@@ -3,71 +3,71 @@
 1,0,MHSA,nncf_module.bert.encoder.layer.0.attention.self.key,"(768, 768)","(512, 768)","(768,)","(512,)","[0, 3, 4, 6, 7, 8, 9, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
 2,0,MHSA,nncf_module.bert.encoder.layer.0.attention.self.value,"(768, 768)","(512, 768)","(768,)","(512,)","[0, 3, 4, 6, 7, 8, 9, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
 3,0,MHSA,nncf_module.bert.encoder.layer.0.attention.output.dense,"(768, 768)","(768, 512)","(768,)","(768,)","[0, 3, 4, 6, 7, 8, 9, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
-4,1,FF,nncf_module.bert.encoder.layer.0.intermediate.dense,"(3072, 768)","(2066, 768)","(3072,)","(2066,)",[2066 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
-5,1,FF,nncf_module.bert.encoder.layer.0.output.dense,"(768, 3072)","(768, 2066)","(768,)","(768,)",[2066 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[output]/NNCFLinear[dense]/linear_0
-6,2,MHSA,nncf_module.bert.encoder.layer.1.attention.self.query,"(768, 768)","(448, 768)","(768,)","(448,)","[1, 4, 6, 7, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
-7,2,MHSA,nncf_module.bert.encoder.layer.1.attention.self.key,"(768, 768)","(448, 768)","(768,)","(448,)","[1, 4, 6, 7, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
-8,2,MHSA,nncf_module.bert.encoder.layer.1.attention.self.value,"(768, 768)","(448, 768)","(768,)","(448,)","[1, 4, 6, 7, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
-9,2,MHSA,nncf_module.bert.encoder.layer.1.attention.output.dense,"(768, 768)","(768, 448)","(768,)","(768,)","[1, 4, 6, 7, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
-10,3,FF,nncf_module.bert.encoder.layer.1.intermediate.dense,"(3072, 768)","(2067, 768)","(3072,)","(2067,)",[2067 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
-11,3,FF,nncf_module.bert.encoder.layer.1.output.dense,"(768, 3072)","(768, 2067)","(768,)","(768,)",[2067 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[output]/NNCFLinear[dense]/linear_0
 12,4,MHSA,nncf_module.bert.encoder.layer.2.attention.self.query,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 1, 2, 3, 5, 6, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
 13,4,MHSA,nncf_module.bert.encoder.layer.2.attention.self.key,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 1, 2, 3, 5, 6, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
 14,4,MHSA,nncf_module.bert.encoder.layer.2.attention.self.value,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 1, 2, 3, 5, 6, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
 15,4,MHSA,nncf_module.bert.encoder.layer.2.attention.output.dense,"(768, 768)","(768, 576)","(768,)","(768,)","[0, 1, 2, 3, 5, 6, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
-16,5,FF,nncf_module.bert.encoder.layer.2.intermediate.dense,"(3072, 768)","(2082, 768)","(3072,)","(2082,)",[2082 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
-17,5,FF,nncf_module.bert.encoder.layer.2.output.dense,"(768, 3072)","(768, 2082)","(768,)","(768,)",[2082 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[output]/NNCFLinear[dense]/linear_0
 18,6,MHSA,nncf_module.bert.encoder.layer.3.attention.self.query,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 1, 3, 5, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
 19,6,MHSA,nncf_module.bert.encoder.layer.3.attention.self.key,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 1, 3, 5, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
 20,6,MHSA,nncf_module.bert.encoder.layer.3.attention.self.value,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 1, 3, 5, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
 21,6,MHSA,nncf_module.bert.encoder.layer.3.attention.output.dense,"(768, 768)","(768, 576)","(768,)","(768,)","[0, 1, 3, 5, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
-22,7,FF,nncf_module.bert.encoder.layer.3.intermediate.dense,"(3072, 768)","(2136, 768)","(3072,)","(2136,)",[2136 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
-23,7,FF,nncf_module.bert.encoder.layer.3.output.dense,"(768, 3072)","(768, 2136)","(768,)","(768,)",[2136 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[output]/NNCFLinear[dense]/linear_0
-24,8,MHSA,nncf_module.bert.encoder.layer.4.attention.self.query,"(768, 768)","(640, 768)","(768,)","(640,)","[0, 1, 3, 4, 5, 6, 7, 8, 9, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
-25,8,MHSA,nncf_module.bert.encoder.layer.4.attention.self.key,"(768, 768)","(640, 768)","(768,)","(640,)","[0, 1, 3, 4, 5, 6, 7, 8, 9, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
-26,8,MHSA,nncf_module.bert.encoder.layer.4.attention.self.value,"(768, 768)","(640, 768)","(768,)","(640,)","[0, 1, 3, 4, 5, 6, 7, 8, 9, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
-27,8,MHSA,nncf_module.bert.encoder.layer.4.attention.output.dense,"(768, 768)","(768, 640)","(768,)","(768,)","[0, 1, 3, 4, 5, 6, 7, 8, 9, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
-28,9,FF,nncf_module.bert.encoder.layer.4.intermediate.dense,"(3072, 768)","(2023, 768)","(3072,)","(2023,)",[2023 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
-29,9,FF,nncf_module.bert.encoder.layer.4.output.dense,"(768, 3072)","(768, 2023)","(768,)","(768,)",[2023 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[output]/NNCFLinear[dense]/linear_0
 30,10,MHSA,nncf_module.bert.encoder.layer.5.attention.self.query,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 3, 4, 5, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
 31,10,MHSA,nncf_module.bert.encoder.layer.5.attention.self.key,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 3, 4, 5, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
 32,10,MHSA,nncf_module.bert.encoder.layer.5.attention.self.value,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 3, 4, 5, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
 33,10,MHSA,nncf_module.bert.encoder.layer.5.attention.output.dense,"(768, 768)","(768, 576)","(768,)","(768,)","[0, 3, 4, 5, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
-34,11,FF,nncf_module.bert.encoder.layer.5.intermediate.dense,"(3072, 768)","(2011, 768)","(3072,)","(2011,)",[2011 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
-35,11,FF,nncf_module.bert.encoder.layer.5.output.dense,"(768, 3072)","(768, 2011)","(768,)","(768,)",[2011 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[output]/NNCFLinear[dense]/linear_0
-36,12,MHSA,nncf_module.bert.encoder.layer.6.attention.self.query,"(768, 768)","(448, 768)","(768,)","(448,)","[1, 4, 5, 6, 8, 9, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
-37,12,MHSA,nncf_module.bert.encoder.layer.6.attention.self.key,"(768, 768)","(448, 768)","(768,)","(448,)","[1, 4, 5, 6, 8, 9, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
-38,12,MHSA,nncf_module.bert.encoder.layer.6.attention.self.value,"(768, 768)","(448, 768)","(768,)","(448,)","[1, 4, 5, 6, 8, 9, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
-39,12,MHSA,nncf_module.bert.encoder.layer.6.attention.output.dense,"(768, 768)","(768, 448)","(768,)","(768,)","[1, 4, 5, 6, 8, 9, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
-40,13,FF,nncf_module.bert.encoder.layer.6.intermediate.dense,"(3072, 768)","(1871, 768)","(3072,)","(1871,)",[1871 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
-41,13,FF,nncf_module.bert.encoder.layer.6.output.dense,"(768, 3072)","(768, 1871)","(768,)","(768,)",[1871 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[output]/NNCFLinear[dense]/linear_0
 42,14,MHSA,nncf_module.bert.encoder.layer.7.attention.self.query,"(768, 768)","(448, 768)","(768,)","(448,)","[0, 2, 4, 5, 8, 9, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
 43,14,MHSA,nncf_module.bert.encoder.layer.7.attention.self.key,"(768, 768)","(448, 768)","(768,)","(448,)","[0, 2, 4, 5, 8, 9, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
 44,14,MHSA,nncf_module.bert.encoder.layer.7.attention.self.value,"(768, 768)","(448, 768)","(768,)","(448,)","[0, 2, 4, 5, 8, 9, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
 45,14,MHSA,nncf_module.bert.encoder.layer.7.attention.output.dense,"(768, 768)","(768, 448)","(768,)","(768,)","[0, 2, 4, 5, 8, 9, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
-46,15,FF,nncf_module.bert.encoder.layer.7.intermediate.dense,"(3072, 768)","(1858, 768)","(3072,)","(1858,)",[1858 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
-47,15,FF,nncf_module.bert.encoder.layer.7.output.dense,"(768, 3072)","(768, 1858)","(768,)","(768,)",[1858 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[output]/NNCFLinear[dense]/linear_0
-48,16,MHSA,nncf_module.bert.encoder.layer.8.attention.self.query,"(768, 768)","(576, 768)","(768,)","(576,)","[1, 2, 3, 5, 6, 7, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
-49,16,MHSA,nncf_module.bert.encoder.layer.8.attention.self.key,"(768, 768)","(576, 768)","(768,)","(576,)","[1, 2, 3, 5, 6, 7, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
-50,16,MHSA,nncf_module.bert.encoder.layer.8.attention.self.value,"(768, 768)","(576, 768)","(768,)","(576,)","[1, 2, 3, 5, 6, 7, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
-51,16,MHSA,nncf_module.bert.encoder.layer.8.attention.output.dense,"(768, 768)","(768, 576)","(768,)","(768,)","[1, 2, 3, 5, 6, 7, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
-52,17,FF,nncf_module.bert.encoder.layer.8.intermediate.dense,"(3072, 768)","(1637, 768)","(3072,)","(1637,)",[1637 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
-53,17,FF,nncf_module.bert.encoder.layer.8.output.dense,"(768, 3072)","(768, 1637)","(768,)","(768,)",[1637 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[output]/NNCFLinear[dense]/linear_0
-54,18,MHSA,nncf_module.bert.encoder.layer.9.attention.self.query,"(768, 768)","(448, 768)","(768,)","(448,)","[0, 2, 3, 6, 8, 9, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
-55,18,MHSA,nncf_module.bert.encoder.layer.9.attention.self.key,"(768, 768)","(448, 768)","(768,)","(448,)","[0, 2, 3, 6, 8, 9, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
-56,18,MHSA,nncf_module.bert.encoder.layer.9.attention.self.value,"(768, 768)","(448, 768)","(768,)","(448,)","[0, 2, 3, 6, 8, 9, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
-57,18,MHSA,nncf_module.bert.encoder.layer.9.attention.output.dense,"(768, 768)","(768, 448)","(768,)","(768,)","[0, 2, 3, 6, 8, 9, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
-58,19,FF,nncf_module.bert.encoder.layer.9.intermediate.dense,"(3072, 768)","(1257, 768)","(3072,)","(1257,)",[1257 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
-59,19,FF,nncf_module.bert.encoder.layer.9.output.dense,"(768, 3072)","(768, 1257)","(768,)","(768,)",[1257 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[output]/NNCFLinear[dense]/linear_0
 60,20,MHSA,nncf_module.bert.encoder.layer.10.attention.self.query,"(768, 768)","(384, 768)","(768,)","(384,)","[0, 2, 3, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
 61,20,MHSA,nncf_module.bert.encoder.layer.10.attention.self.key,"(768, 768)","(384, 768)","(768,)","(384,)","[0, 2, 3, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
 62,20,MHSA,nncf_module.bert.encoder.layer.10.attention.self.value,"(768, 768)","(384, 768)","(768,)","(384,)","[0, 2, 3, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
 63,20,MHSA,nncf_module.bert.encoder.layer.10.attention.output.dense,"(768, 768)","(768, 384)","(768,)","(768,)","[0, 2, 3, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
-64,21,FF,nncf_module.bert.encoder.layer.10.intermediate.dense,"(3072, 768)","(1159, 768)","(3072,)","(1159,)",[1159 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
-65,21,FF,nncf_module.bert.encoder.layer.10.output.dense,"(768, 3072)","(768, 1159)","(768,)","(768,)",[1159 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[output]/NNCFLinear[dense]/linear_0
-66,22,MHSA,nncf_module.bert.encoder.layer.11.attention.self.query,"(768, 768)","(384, 768)","(768,)","(384,)","[0, 1, 2, 4, 6, 9]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
-67,22,MHSA,nncf_module.bert.encoder.layer.11.attention.self.key,"(768, 768)","(384, 768)","(768,)","(384,)","[0, 1, 2, 4, 6, 9]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
-68,22,MHSA,nncf_module.bert.encoder.layer.11.attention.self.value,"(768, 768)","(384, 768)","(768,)","(384,)","[0, 1, 2, 4, 6, 9]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
-69,22,MHSA,nncf_module.bert.encoder.layer.11.attention.output.dense,"(768, 768)","(768, 384)","(768,)","(768,)","[0, 1, 2, 4, 6, 9]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
-70,23,FF,nncf_module.bert.encoder.layer.11.intermediate.dense,"(3072, 768)","(1017, 768)","(3072,)","(1017,)",[1017 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
-71,23,FF,nncf_module.bert.encoder.layer.11.output.dense,"(768, 3072)","(768, 1017)","(768,)","(768,)",[1017 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[output]/NNCFLinear[dense]/linear_0

 1,0,MHSA,nncf_module.bert.encoder.layer.0.attention.self.key,"(768, 768)","(512, 768)","(768,)","(512,)","[0, 3, 4, 6, 7, 8, 9, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
 2,0,MHSA,nncf_module.bert.encoder.layer.0.attention.self.value,"(768, 768)","(512, 768)","(768,)","(512,)","[0, 3, 4, 6, 7, 8, 9, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
 3,0,MHSA,nncf_module.bert.encoder.layer.0.attention.output.dense,"(768, 768)","(768, 512)","(768,)","(768,)","[0, 3, 4, 6, 7, 8, 9, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
+4,1,FF,nncf_module.bert.encoder.layer.0.intermediate.dense,"(3072, 768)","(2089, 768)","(3072,)","(2089,)",[2089 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
+5,1,FF,nncf_module.bert.encoder.layer.0.output.dense,"(768, 3072)","(768, 2089)","(768,)","(768,)",[2089 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[output]/NNCFLinear[dense]/linear_0
+6,2,MHSA,nncf_module.bert.encoder.layer.1.attention.self.query,"(768, 768)","(512, 768)","(768,)","(512,)","[0, 1, 4, 6, 7, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
+7,2,MHSA,nncf_module.bert.encoder.layer.1.attention.self.key,"(768, 768)","(512, 768)","(768,)","(512,)","[0, 1, 4, 6, 7, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
+8,2,MHSA,nncf_module.bert.encoder.layer.1.attention.self.value,"(768, 768)","(512, 768)","(768,)","(512,)","[0, 1, 4, 6, 7, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
+9,2,MHSA,nncf_module.bert.encoder.layer.1.attention.output.dense,"(768, 768)","(768, 512)","(768,)","(768,)","[0, 1, 4, 6, 7, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
+10,3,FF,nncf_module.bert.encoder.layer.1.intermediate.dense,"(3072, 768)","(2042, 768)","(3072,)","(2042,)",[2042 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
+11,3,FF,nncf_module.bert.encoder.layer.1.output.dense,"(768, 3072)","(768, 2042)","(768,)","(768,)",[2042 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[output]/NNCFLinear[dense]/linear_0
 12,4,MHSA,nncf_module.bert.encoder.layer.2.attention.self.query,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 1, 2, 3, 5, 6, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
 13,4,MHSA,nncf_module.bert.encoder.layer.2.attention.self.key,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 1, 2, 3, 5, 6, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
 14,4,MHSA,nncf_module.bert.encoder.layer.2.attention.self.value,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 1, 2, 3, 5, 6, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
 15,4,MHSA,nncf_module.bert.encoder.layer.2.attention.output.dense,"(768, 768)","(768, 576)","(768,)","(768,)","[0, 1, 2, 3, 5, 6, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
+16,5,FF,nncf_module.bert.encoder.layer.2.intermediate.dense,"(3072, 768)","(2103, 768)","(3072,)","(2103,)",[2103 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
+17,5,FF,nncf_module.bert.encoder.layer.2.output.dense,"(768, 3072)","(768, 2103)","(768,)","(768,)",[2103 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[output]/NNCFLinear[dense]/linear_0
 18,6,MHSA,nncf_module.bert.encoder.layer.3.attention.self.query,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 1, 3, 5, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
 19,6,MHSA,nncf_module.bert.encoder.layer.3.attention.self.key,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 1, 3, 5, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
 20,6,MHSA,nncf_module.bert.encoder.layer.3.attention.self.value,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 1, 3, 5, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
 21,6,MHSA,nncf_module.bert.encoder.layer.3.attention.output.dense,"(768, 768)","(768, 576)","(768,)","(768,)","[0, 1, 3, 5, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
+22,7,FF,nncf_module.bert.encoder.layer.3.intermediate.dense,"(3072, 768)","(2125, 768)","(3072,)","(2125,)",[2125 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
+23,7,FF,nncf_module.bert.encoder.layer.3.output.dense,"(768, 3072)","(768, 2125)","(768,)","(768,)",[2125 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[output]/NNCFLinear[dense]/linear_0
+24,8,MHSA,nncf_module.bert.encoder.layer.4.attention.self.query,"(768, 768)","(704, 768)","(768,)","(704,)","[0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
+25,8,MHSA,nncf_module.bert.encoder.layer.4.attention.self.key,"(768, 768)","(704, 768)","(768,)","(704,)","[0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
+26,8,MHSA,nncf_module.bert.encoder.layer.4.attention.self.value,"(768, 768)","(704, 768)","(768,)","(704,)","[0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
+27,8,MHSA,nncf_module.bert.encoder.layer.4.attention.output.dense,"(768, 768)","(768, 704)","(768,)","(768,)","[0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
+28,9,FF,nncf_module.bert.encoder.layer.4.intermediate.dense,"(3072, 768)","(2049, 768)","(3072,)","(2049,)",[2049 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
+29,9,FF,nncf_module.bert.encoder.layer.4.output.dense,"(768, 3072)","(768, 2049)","(768,)","(768,)",[2049 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[output]/NNCFLinear[dense]/linear_0
 30,10,MHSA,nncf_module.bert.encoder.layer.5.attention.self.query,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 3, 4, 5, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
 31,10,MHSA,nncf_module.bert.encoder.layer.5.attention.self.key,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 3, 4, 5, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
 32,10,MHSA,nncf_module.bert.encoder.layer.5.attention.self.value,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 3, 4, 5, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
 33,10,MHSA,nncf_module.bert.encoder.layer.5.attention.output.dense,"(768, 768)","(768, 576)","(768,)","(768,)","[0, 3, 4, 5, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
+34,11,FF,nncf_module.bert.encoder.layer.5.intermediate.dense,"(3072, 768)","(1998, 768)","(3072,)","(1998,)",[1998 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
+35,11,FF,nncf_module.bert.encoder.layer.5.output.dense,"(768, 3072)","(768, 1998)","(768,)","(768,)",[1998 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[output]/NNCFLinear[dense]/linear_0
+36,12,MHSA,nncf_module.bert.encoder.layer.6.attention.self.query,"(768, 768)","(512, 768)","(768,)","(512,)","[0, 1, 4, 5, 6, 8, 9, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
+37,12,MHSA,nncf_module.bert.encoder.layer.6.attention.self.key,"(768, 768)","(512, 768)","(768,)","(512,)","[0, 1, 4, 5, 6, 8, 9, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
+38,12,MHSA,nncf_module.bert.encoder.layer.6.attention.self.value,"(768, 768)","(512, 768)","(768,)","(512,)","[0, 1, 4, 5, 6, 8, 9, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
+39,12,MHSA,nncf_module.bert.encoder.layer.6.attention.output.dense,"(768, 768)","(768, 512)","(768,)","(768,)","[0, 1, 4, 5, 6, 8, 9, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
+40,13,FF,nncf_module.bert.encoder.layer.6.intermediate.dense,"(3072, 768)","(1874, 768)","(3072,)","(1874,)",[1874 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
+41,13,FF,nncf_module.bert.encoder.layer.6.output.dense,"(768, 3072)","(768, 1874)","(768,)","(768,)",[1874 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[output]/NNCFLinear[dense]/linear_0
 42,14,MHSA,nncf_module.bert.encoder.layer.7.attention.self.query,"(768, 768)","(448, 768)","(768,)","(448,)","[0, 2, 4, 5, 8, 9, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
 43,14,MHSA,nncf_module.bert.encoder.layer.7.attention.self.key,"(768, 768)","(448, 768)","(768,)","(448,)","[0, 2, 4, 5, 8, 9, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
 44,14,MHSA,nncf_module.bert.encoder.layer.7.attention.self.value,"(768, 768)","(448, 768)","(768,)","(448,)","[0, 2, 4, 5, 8, 9, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
 45,14,MHSA,nncf_module.bert.encoder.layer.7.attention.output.dense,"(768, 768)","(768, 448)","(768,)","(768,)","[0, 2, 4, 5, 8, 9, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
+46,15,FF,nncf_module.bert.encoder.layer.7.intermediate.dense,"(3072, 768)","(1806, 768)","(3072,)","(1806,)",[1806 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
+47,15,FF,nncf_module.bert.encoder.layer.7.output.dense,"(768, 3072)","(768, 1806)","(768,)","(768,)",[1806 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[output]/NNCFLinear[dense]/linear_0
+48,16,MHSA,nncf_module.bert.encoder.layer.8.attention.self.query,"(768, 768)","(512, 768)","(768,)","(512,)","[1, 2, 5, 6, 7, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
+49,16,MHSA,nncf_module.bert.encoder.layer.8.attention.self.key,"(768, 768)","(512, 768)","(768,)","(512,)","[1, 2, 5, 6, 7, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
+50,16,MHSA,nncf_module.bert.encoder.layer.8.attention.self.value,"(768, 768)","(512, 768)","(768,)","(512,)","[1, 2, 5, 6, 7, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
+51,16,MHSA,nncf_module.bert.encoder.layer.8.attention.output.dense,"(768, 768)","(768, 512)","(768,)","(768,)","[1, 2, 5, 6, 7, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
+52,17,FF,nncf_module.bert.encoder.layer.8.intermediate.dense,"(3072, 768)","(1654, 768)","(3072,)","(1654,)",[1654 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
+53,17,FF,nncf_module.bert.encoder.layer.8.output.dense,"(768, 3072)","(768, 1654)","(768,)","(768,)",[1654 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[output]/NNCFLinear[dense]/linear_0
+54,18,MHSA,nncf_module.bert.encoder.layer.9.attention.self.query,"(768, 768)","(384, 768)","(768,)","(384,)","[0, 2, 3, 6, 8, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
+55,18,MHSA,nncf_module.bert.encoder.layer.9.attention.self.key,"(768, 768)","(384, 768)","(768,)","(384,)","[0, 2, 3, 6, 8, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
+56,18,MHSA,nncf_module.bert.encoder.layer.9.attention.self.value,"(768, 768)","(384, 768)","(768,)","(384,)","[0, 2, 3, 6, 8, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
+57,18,MHSA,nncf_module.bert.encoder.layer.9.attention.output.dense,"(768, 768)","(768, 384)","(768,)","(768,)","[0, 2, 3, 6, 8, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
+58,19,FF,nncf_module.bert.encoder.layer.9.intermediate.dense,"(3072, 768)","(1311, 768)","(3072,)","(1311,)",[1311 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
+59,19,FF,nncf_module.bert.encoder.layer.9.output.dense,"(768, 3072)","(768, 1311)","(768,)","(768,)",[1311 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[output]/NNCFLinear[dense]/linear_0
 60,20,MHSA,nncf_module.bert.encoder.layer.10.attention.self.query,"(768, 768)","(384, 768)","(768,)","(384,)","[0, 2, 3, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
 61,20,MHSA,nncf_module.bert.encoder.layer.10.attention.self.key,"(768, 768)","(384, 768)","(768,)","(384,)","[0, 2, 3, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
 62,20,MHSA,nncf_module.bert.encoder.layer.10.attention.self.value,"(768, 768)","(384, 768)","(768,)","(384,)","[0, 2, 3, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
 63,20,MHSA,nncf_module.bert.encoder.layer.10.attention.output.dense,"(768, 768)","(768, 384)","(768,)","(768,)","[0, 2, 3, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
+64,21,FF,nncf_module.bert.encoder.layer.10.intermediate.dense,"(3072, 768)","(1090, 768)","(3072,)","(1090,)",[1090 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
+65,21,FF,nncf_module.bert.encoder.layer.10.output.dense,"(768, 3072)","(768, 1090)","(768,)","(768,)",[1090 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[output]/NNCFLinear[dense]/linear_0
+66,22,MHSA,nncf_module.bert.encoder.layer.11.attention.self.query,"(768, 768)","(320, 768)","(768,)","(320,)","[0, 1, 2, 3, 4]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
+67,22,MHSA,nncf_module.bert.encoder.layer.11.attention.self.key,"(768, 768)","(320, 768)","(768,)","(320,)","[0, 1, 2, 3, 4]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
+68,22,MHSA,nncf_module.bert.encoder.layer.11.attention.self.value,"(768, 768)","(320, 768)","(768,)","(320,)","[0, 1, 2, 3, 4]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
+69,22,MHSA,nncf_module.bert.encoder.layer.11.attention.output.dense,"(768, 768)","(768, 320)","(768,)","(768,)","[0, 1, 2, 3, 4]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
+70,23,FF,nncf_module.bert.encoder.layer.11.intermediate.dense,"(3072, 768)","(1027, 768)","(3072,)","(1027,)",[1027 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
+71,23,FF,nncf_module.bert.encoder.layer.11.output.dense,"(768, 3072)","(768, 1027)","(768,)","(768,)",[1027 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[output]/NNCFLinear[dense]/linear_0

tokenizer.json CHANGED Viewed

@@ -1,21 +1,7 @@
 {
   "version": "1.0",
-  "truncation": {
-    "direction": "Right",
-    "max_length": 384,
-    "strategy": "OnlySecond",
-    "stride": 128
-  },
-  "padding": {
-    "strategy": {
-      "Fixed": 384
-    },
-    "direction": "Right",
-    "pad_to_multiple_of": null,
-    "pad_id": 0,
-    "pad_type_id": 0,
-    "pad_token": "[PAD]"
-  },
   "added_tokens": [
     {
       "id": 0,

 {
   "version": "1.0",
+  "truncation": null,
+  "padding": null,
   "added_tokens": [
     {
       "id": 0,

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d5552aa8097d9d47b0343cdacd36e5bc50157c344cb9488c095d5eb9a1bc83f5
 size 3579

 version https://git-lfs.github.com/spec/v1
+oid sha256:838f7a4735c59774d578e9a43a91763c34b74e016c13d43a3202f814296076cf
 size 3579