Quantized model with overflow_fix enabled
Browse files- README.md +17 -12
- all_results.json +6 -6
- openvino_config.json +1 -1
- openvino_model.bin +2 -2
- openvino_model.xml +0 -0
- structured_sparsity.csv +48 -48
- tokenizer.json +2 -16
- training_args.bin +1 -1
README.md
CHANGED
@@ -5,19 +5,30 @@ tags:
|
|
5 |
datasets:
|
6 |
- squad
|
7 |
model-index:
|
8 |
-
- name:
|
9 |
results: []
|
10 |
---
|
11 |
|
12 |
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
13 |
should probably proofread and complete it, then remove this comment. -->
|
14 |
|
15 |
-
#
|
16 |
|
17 |
This model is a fine-tuned version of [bert-base-uncased](https://huggingface.co/bert-base-uncased) on the squad dataset.
|
18 |
-
|
19 |
-
|
20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
### Training hyperparameters
|
23 |
|
@@ -33,13 +44,7 @@ The following hyperparameters were used during training:
|
|
33 |
|
34 |
### Training results
|
35 |
|
36 |
-
|
37 |
-
***** eval metrics *****
|
38 |
-
epoch = 8.0
|
39 |
-
eval_exact_match = 83.141
|
40 |
-
eval_f1 = 89.5906
|
41 |
-
eval_samples = 10784
|
42 |
-
```
|
43 |
|
44 |
### Framework versions
|
45 |
|
|
|
5 |
datasets:
|
6 |
- squad
|
7 |
model-index:
|
8 |
+
- name: jpqd_bert_squad_overflowfix
|
9 |
results: []
|
10 |
---
|
11 |
|
12 |
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
13 |
should probably proofread and complete it, then remove this comment. -->
|
14 |
|
15 |
+
# jpqd_bert_squad_overflowfix
|
16 |
|
17 |
This model is a fine-tuned version of [bert-base-uncased](https://huggingface.co/bert-base-uncased) on the squad dataset.
|
18 |
+
|
19 |
+
## Model description
|
20 |
+
|
21 |
+
More information needed
|
22 |
+
|
23 |
+
## Intended uses & limitations
|
24 |
+
|
25 |
+
More information needed
|
26 |
+
|
27 |
+
## Training and evaluation data
|
28 |
+
|
29 |
+
More information needed
|
30 |
+
|
31 |
+
## Training procedure
|
32 |
|
33 |
### Training hyperparameters
|
34 |
|
|
|
44 |
|
45 |
### Training results
|
46 |
|
47 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
|
49 |
### Framework versions
|
50 |
|
all_results.json
CHANGED
@@ -1,11 +1,11 @@
|
|
1 |
{
|
2 |
"epoch": 8.0,
|
3 |
-
"eval_exact_match": 83.
|
4 |
-
"eval_f1": 89.
|
5 |
"eval_samples": 10784,
|
6 |
-
"train_loss": 2.
|
7 |
-
"train_runtime":
|
8 |
"train_samples": 88524,
|
9 |
-
"train_samples_per_second": 14.
|
10 |
-
"train_steps_per_second": 0.
|
11 |
}
|
|
|
1 |
{
|
2 |
"epoch": 8.0,
|
3 |
+
"eval_exact_match": 83.33964049195838,
|
4 |
+
"eval_f1": 89.80725863442484,
|
5 |
"eval_samples": 10784,
|
6 |
+
"train_loss": 2.369025745212132,
|
7 |
+
"train_runtime": 49814.833,
|
8 |
"train_samples": 88524,
|
9 |
+
"train_samples_per_second": 14.216,
|
10 |
+
"train_steps_per_second": 0.889
|
11 |
}
|
openvino_config.json
CHANGED
@@ -56,7 +56,7 @@
|
|
56 |
"type": "percentile"
|
57 |
}
|
58 |
},
|
59 |
-
"overflow_fix": "
|
60 |
"preset": "mixed",
|
61 |
"scope_overrides": {
|
62 |
"activations": {
|
|
|
56 |
"type": "percentile"
|
57 |
}
|
58 |
},
|
59 |
+
"overflow_fix": "enable",
|
60 |
"preset": "mixed",
|
61 |
"scope_overrides": {
|
62 |
"activations": {
|
openvino_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cead9787a2b86a186358962622f9cd81c9d7b83ce860c5422b7585c088bc94ac
|
3 |
+
size 75452788
|
openvino_model.xml
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
structured_sparsity.csv
CHANGED
@@ -3,71 +3,71 @@
|
|
3 |
1,0,MHSA,nncf_module.bert.encoder.layer.0.attention.self.key,"(768, 768)","(512, 768)","(768,)","(512,)","[0, 3, 4, 6, 7, 8, 9, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
|
4 |
2,0,MHSA,nncf_module.bert.encoder.layer.0.attention.self.value,"(768, 768)","(512, 768)","(768,)","(512,)","[0, 3, 4, 6, 7, 8, 9, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
|
5 |
3,0,MHSA,nncf_module.bert.encoder.layer.0.attention.output.dense,"(768, 768)","(768, 512)","(768,)","(768,)","[0, 3, 4, 6, 7, 8, 9, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
|
6 |
-
4,1,FF,nncf_module.bert.encoder.layer.0.intermediate.dense,"(3072, 768)","(
|
7 |
-
5,1,FF,nncf_module.bert.encoder.layer.0.output.dense,"(768, 3072)","(768,
|
8 |
-
6,2,MHSA,nncf_module.bert.encoder.layer.1.attention.self.query,"(768, 768)","(
|
9 |
-
7,2,MHSA,nncf_module.bert.encoder.layer.1.attention.self.key,"(768, 768)","(
|
10 |
-
8,2,MHSA,nncf_module.bert.encoder.layer.1.attention.self.value,"(768, 768)","(
|
11 |
-
9,2,MHSA,nncf_module.bert.encoder.layer.1.attention.output.dense,"(768, 768)","(768,
|
12 |
-
10,3,FF,nncf_module.bert.encoder.layer.1.intermediate.dense,"(3072, 768)","(
|
13 |
-
11,3,FF,nncf_module.bert.encoder.layer.1.output.dense,"(768, 3072)","(768,
|
14 |
12,4,MHSA,nncf_module.bert.encoder.layer.2.attention.self.query,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 1, 2, 3, 5, 6, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
|
15 |
13,4,MHSA,nncf_module.bert.encoder.layer.2.attention.self.key,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 1, 2, 3, 5, 6, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
|
16 |
14,4,MHSA,nncf_module.bert.encoder.layer.2.attention.self.value,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 1, 2, 3, 5, 6, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
|
17 |
15,4,MHSA,nncf_module.bert.encoder.layer.2.attention.output.dense,"(768, 768)","(768, 576)","(768,)","(768,)","[0, 1, 2, 3, 5, 6, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
|
18 |
-
16,5,FF,nncf_module.bert.encoder.layer.2.intermediate.dense,"(3072, 768)","(
|
19 |
-
17,5,FF,nncf_module.bert.encoder.layer.2.output.dense,"(768, 3072)","(768,
|
20 |
18,6,MHSA,nncf_module.bert.encoder.layer.3.attention.self.query,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 1, 3, 5, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
|
21 |
19,6,MHSA,nncf_module.bert.encoder.layer.3.attention.self.key,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 1, 3, 5, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
|
22 |
20,6,MHSA,nncf_module.bert.encoder.layer.3.attention.self.value,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 1, 3, 5, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
|
23 |
21,6,MHSA,nncf_module.bert.encoder.layer.3.attention.output.dense,"(768, 768)","(768, 576)","(768,)","(768,)","[0, 1, 3, 5, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
|
24 |
-
22,7,FF,nncf_module.bert.encoder.layer.3.intermediate.dense,"(3072, 768)","(
|
25 |
-
23,7,FF,nncf_module.bert.encoder.layer.3.output.dense,"(768, 3072)","(768,
|
26 |
-
24,8,MHSA,nncf_module.bert.encoder.layer.4.attention.self.query,"(768, 768)","(
|
27 |
-
25,8,MHSA,nncf_module.bert.encoder.layer.4.attention.self.key,"(768, 768)","(
|
28 |
-
26,8,MHSA,nncf_module.bert.encoder.layer.4.attention.self.value,"(768, 768)","(
|
29 |
-
27,8,MHSA,nncf_module.bert.encoder.layer.4.attention.output.dense,"(768, 768)","(768,
|
30 |
-
28,9,FF,nncf_module.bert.encoder.layer.4.intermediate.dense,"(3072, 768)","(
|
31 |
-
29,9,FF,nncf_module.bert.encoder.layer.4.output.dense,"(768, 3072)","(768,
|
32 |
30,10,MHSA,nncf_module.bert.encoder.layer.5.attention.self.query,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 3, 4, 5, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
|
33 |
31,10,MHSA,nncf_module.bert.encoder.layer.5.attention.self.key,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 3, 4, 5, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
|
34 |
32,10,MHSA,nncf_module.bert.encoder.layer.5.attention.self.value,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 3, 4, 5, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
|
35 |
33,10,MHSA,nncf_module.bert.encoder.layer.5.attention.output.dense,"(768, 768)","(768, 576)","(768,)","(768,)","[0, 3, 4, 5, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
|
36 |
-
34,11,FF,nncf_module.bert.encoder.layer.5.intermediate.dense,"(3072, 768)","(
|
37 |
-
35,11,FF,nncf_module.bert.encoder.layer.5.output.dense,"(768, 3072)","(768,
|
38 |
-
36,12,MHSA,nncf_module.bert.encoder.layer.6.attention.self.query,"(768, 768)","(
|
39 |
-
37,12,MHSA,nncf_module.bert.encoder.layer.6.attention.self.key,"(768, 768)","(
|
40 |
-
38,12,MHSA,nncf_module.bert.encoder.layer.6.attention.self.value,"(768, 768)","(
|
41 |
-
39,12,MHSA,nncf_module.bert.encoder.layer.6.attention.output.dense,"(768, 768)","(768,
|
42 |
-
40,13,FF,nncf_module.bert.encoder.layer.6.intermediate.dense,"(3072, 768)","(
|
43 |
-
41,13,FF,nncf_module.bert.encoder.layer.6.output.dense,"(768, 3072)","(768,
|
44 |
42,14,MHSA,nncf_module.bert.encoder.layer.7.attention.self.query,"(768, 768)","(448, 768)","(768,)","(448,)","[0, 2, 4, 5, 8, 9, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
|
45 |
43,14,MHSA,nncf_module.bert.encoder.layer.7.attention.self.key,"(768, 768)","(448, 768)","(768,)","(448,)","[0, 2, 4, 5, 8, 9, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
|
46 |
44,14,MHSA,nncf_module.bert.encoder.layer.7.attention.self.value,"(768, 768)","(448, 768)","(768,)","(448,)","[0, 2, 4, 5, 8, 9, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
|
47 |
45,14,MHSA,nncf_module.bert.encoder.layer.7.attention.output.dense,"(768, 768)","(768, 448)","(768,)","(768,)","[0, 2, 4, 5, 8, 9, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
|
48 |
-
46,15,FF,nncf_module.bert.encoder.layer.7.intermediate.dense,"(3072, 768)","(
|
49 |
-
47,15,FF,nncf_module.bert.encoder.layer.7.output.dense,"(768, 3072)","(768,
|
50 |
-
48,16,MHSA,nncf_module.bert.encoder.layer.8.attention.self.query,"(768, 768)","(
|
51 |
-
49,16,MHSA,nncf_module.bert.encoder.layer.8.attention.self.key,"(768, 768)","(
|
52 |
-
50,16,MHSA,nncf_module.bert.encoder.layer.8.attention.self.value,"(768, 768)","(
|
53 |
-
51,16,MHSA,nncf_module.bert.encoder.layer.8.attention.output.dense,"(768, 768)","(768,
|
54 |
-
52,17,FF,nncf_module.bert.encoder.layer.8.intermediate.dense,"(3072, 768)","(
|
55 |
-
53,17,FF,nncf_module.bert.encoder.layer.8.output.dense,"(768, 3072)","(768,
|
56 |
-
54,18,MHSA,nncf_module.bert.encoder.layer.9.attention.self.query,"(768, 768)","(
|
57 |
-
55,18,MHSA,nncf_module.bert.encoder.layer.9.attention.self.key,"(768, 768)","(
|
58 |
-
56,18,MHSA,nncf_module.bert.encoder.layer.9.attention.self.value,"(768, 768)","(
|
59 |
-
57,18,MHSA,nncf_module.bert.encoder.layer.9.attention.output.dense,"(768, 768)","(768,
|
60 |
-
58,19,FF,nncf_module.bert.encoder.layer.9.intermediate.dense,"(3072, 768)","(
|
61 |
-
59,19,FF,nncf_module.bert.encoder.layer.9.output.dense,"(768, 3072)","(768,
|
62 |
60,20,MHSA,nncf_module.bert.encoder.layer.10.attention.self.query,"(768, 768)","(384, 768)","(768,)","(384,)","[0, 2, 3, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
|
63 |
61,20,MHSA,nncf_module.bert.encoder.layer.10.attention.self.key,"(768, 768)","(384, 768)","(768,)","(384,)","[0, 2, 3, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
|
64 |
62,20,MHSA,nncf_module.bert.encoder.layer.10.attention.self.value,"(768, 768)","(384, 768)","(768,)","(384,)","[0, 2, 3, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
|
65 |
63,20,MHSA,nncf_module.bert.encoder.layer.10.attention.output.dense,"(768, 768)","(768, 384)","(768,)","(768,)","[0, 2, 3, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
|
66 |
-
64,21,FF,nncf_module.bert.encoder.layer.10.intermediate.dense,"(3072, 768)","(
|
67 |
-
65,21,FF,nncf_module.bert.encoder.layer.10.output.dense,"(768, 3072)","(768,
|
68 |
-
66,22,MHSA,nncf_module.bert.encoder.layer.11.attention.self.query,"(768, 768)","(
|
69 |
-
67,22,MHSA,nncf_module.bert.encoder.layer.11.attention.self.key,"(768, 768)","(
|
70 |
-
68,22,MHSA,nncf_module.bert.encoder.layer.11.attention.self.value,"(768, 768)","(
|
71 |
-
69,22,MHSA,nncf_module.bert.encoder.layer.11.attention.output.dense,"(768, 768)","(768,
|
72 |
-
70,23,FF,nncf_module.bert.encoder.layer.11.intermediate.dense,"(3072, 768)","(
|
73 |
-
71,23,FF,nncf_module.bert.encoder.layer.11.output.dense,"(768, 3072)","(768,
|
|
|
3 |
1,0,MHSA,nncf_module.bert.encoder.layer.0.attention.self.key,"(768, 768)","(512, 768)","(768,)","(512,)","[0, 3, 4, 6, 7, 8, 9, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
|
4 |
2,0,MHSA,nncf_module.bert.encoder.layer.0.attention.self.value,"(768, 768)","(512, 768)","(768,)","(512,)","[0, 3, 4, 6, 7, 8, 9, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
|
5 |
3,0,MHSA,nncf_module.bert.encoder.layer.0.attention.output.dense,"(768, 768)","(768, 512)","(768,)","(768,)","[0, 3, 4, 6, 7, 8, 9, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
|
6 |
+
4,1,FF,nncf_module.bert.encoder.layer.0.intermediate.dense,"(3072, 768)","(2089, 768)","(3072,)","(2089,)",[2089 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
|
7 |
+
5,1,FF,nncf_module.bert.encoder.layer.0.output.dense,"(768, 3072)","(768, 2089)","(768,)","(768,)",[2089 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[output]/NNCFLinear[dense]/linear_0
|
8 |
+
6,2,MHSA,nncf_module.bert.encoder.layer.1.attention.self.query,"(768, 768)","(512, 768)","(768,)","(512,)","[0, 1, 4, 6, 7, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
|
9 |
+
7,2,MHSA,nncf_module.bert.encoder.layer.1.attention.self.key,"(768, 768)","(512, 768)","(768,)","(512,)","[0, 1, 4, 6, 7, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
|
10 |
+
8,2,MHSA,nncf_module.bert.encoder.layer.1.attention.self.value,"(768, 768)","(512, 768)","(768,)","(512,)","[0, 1, 4, 6, 7, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
|
11 |
+
9,2,MHSA,nncf_module.bert.encoder.layer.1.attention.output.dense,"(768, 768)","(768, 512)","(768,)","(768,)","[0, 1, 4, 6, 7, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
|
12 |
+
10,3,FF,nncf_module.bert.encoder.layer.1.intermediate.dense,"(3072, 768)","(2042, 768)","(3072,)","(2042,)",[2042 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
|
13 |
+
11,3,FF,nncf_module.bert.encoder.layer.1.output.dense,"(768, 3072)","(768, 2042)","(768,)","(768,)",[2042 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[output]/NNCFLinear[dense]/linear_0
|
14 |
12,4,MHSA,nncf_module.bert.encoder.layer.2.attention.self.query,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 1, 2, 3, 5, 6, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
|
15 |
13,4,MHSA,nncf_module.bert.encoder.layer.2.attention.self.key,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 1, 2, 3, 5, 6, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
|
16 |
14,4,MHSA,nncf_module.bert.encoder.layer.2.attention.self.value,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 1, 2, 3, 5, 6, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
|
17 |
15,4,MHSA,nncf_module.bert.encoder.layer.2.attention.output.dense,"(768, 768)","(768, 576)","(768,)","(768,)","[0, 1, 2, 3, 5, 6, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
|
18 |
+
16,5,FF,nncf_module.bert.encoder.layer.2.intermediate.dense,"(3072, 768)","(2103, 768)","(3072,)","(2103,)",[2103 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
|
19 |
+
17,5,FF,nncf_module.bert.encoder.layer.2.output.dense,"(768, 3072)","(768, 2103)","(768,)","(768,)",[2103 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[output]/NNCFLinear[dense]/linear_0
|
20 |
18,6,MHSA,nncf_module.bert.encoder.layer.3.attention.self.query,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 1, 3, 5, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
|
21 |
19,6,MHSA,nncf_module.bert.encoder.layer.3.attention.self.key,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 1, 3, 5, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
|
22 |
20,6,MHSA,nncf_module.bert.encoder.layer.3.attention.self.value,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 1, 3, 5, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
|
23 |
21,6,MHSA,nncf_module.bert.encoder.layer.3.attention.output.dense,"(768, 768)","(768, 576)","(768,)","(768,)","[0, 1, 3, 5, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
|
24 |
+
22,7,FF,nncf_module.bert.encoder.layer.3.intermediate.dense,"(3072, 768)","(2125, 768)","(3072,)","(2125,)",[2125 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
|
25 |
+
23,7,FF,nncf_module.bert.encoder.layer.3.output.dense,"(768, 3072)","(768, 2125)","(768,)","(768,)",[2125 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[output]/NNCFLinear[dense]/linear_0
|
26 |
+
24,8,MHSA,nncf_module.bert.encoder.layer.4.attention.self.query,"(768, 768)","(704, 768)","(768,)","(704,)","[0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
|
27 |
+
25,8,MHSA,nncf_module.bert.encoder.layer.4.attention.self.key,"(768, 768)","(704, 768)","(768,)","(704,)","[0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
|
28 |
+
26,8,MHSA,nncf_module.bert.encoder.layer.4.attention.self.value,"(768, 768)","(704, 768)","(768,)","(704,)","[0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
|
29 |
+
27,8,MHSA,nncf_module.bert.encoder.layer.4.attention.output.dense,"(768, 768)","(768, 704)","(768,)","(768,)","[0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
|
30 |
+
28,9,FF,nncf_module.bert.encoder.layer.4.intermediate.dense,"(3072, 768)","(2049, 768)","(3072,)","(2049,)",[2049 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
|
31 |
+
29,9,FF,nncf_module.bert.encoder.layer.4.output.dense,"(768, 3072)","(768, 2049)","(768,)","(768,)",[2049 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[output]/NNCFLinear[dense]/linear_0
|
32 |
30,10,MHSA,nncf_module.bert.encoder.layer.5.attention.self.query,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 3, 4, 5, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
|
33 |
31,10,MHSA,nncf_module.bert.encoder.layer.5.attention.self.key,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 3, 4, 5, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
|
34 |
32,10,MHSA,nncf_module.bert.encoder.layer.5.attention.self.value,"(768, 768)","(576, 768)","(768,)","(576,)","[0, 3, 4, 5, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
|
35 |
33,10,MHSA,nncf_module.bert.encoder.layer.5.attention.output.dense,"(768, 768)","(768, 576)","(768,)","(768,)","[0, 3, 4, 5, 7, 8, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
|
36 |
+
34,11,FF,nncf_module.bert.encoder.layer.5.intermediate.dense,"(3072, 768)","(1998, 768)","(3072,)","(1998,)",[1998 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
|
37 |
+
35,11,FF,nncf_module.bert.encoder.layer.5.output.dense,"(768, 3072)","(768, 1998)","(768,)","(768,)",[1998 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[output]/NNCFLinear[dense]/linear_0
|
38 |
+
36,12,MHSA,nncf_module.bert.encoder.layer.6.attention.self.query,"(768, 768)","(512, 768)","(768,)","(512,)","[0, 1, 4, 5, 6, 8, 9, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
|
39 |
+
37,12,MHSA,nncf_module.bert.encoder.layer.6.attention.self.key,"(768, 768)","(512, 768)","(768,)","(512,)","[0, 1, 4, 5, 6, 8, 9, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
|
40 |
+
38,12,MHSA,nncf_module.bert.encoder.layer.6.attention.self.value,"(768, 768)","(512, 768)","(768,)","(512,)","[0, 1, 4, 5, 6, 8, 9, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
|
41 |
+
39,12,MHSA,nncf_module.bert.encoder.layer.6.attention.output.dense,"(768, 768)","(768, 512)","(768,)","(768,)","[0, 1, 4, 5, 6, 8, 9, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
|
42 |
+
40,13,FF,nncf_module.bert.encoder.layer.6.intermediate.dense,"(3072, 768)","(1874, 768)","(3072,)","(1874,)",[1874 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
|
43 |
+
41,13,FF,nncf_module.bert.encoder.layer.6.output.dense,"(768, 3072)","(768, 1874)","(768,)","(768,)",[1874 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[output]/NNCFLinear[dense]/linear_0
|
44 |
42,14,MHSA,nncf_module.bert.encoder.layer.7.attention.self.query,"(768, 768)","(448, 768)","(768,)","(448,)","[0, 2, 4, 5, 8, 9, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
|
45 |
43,14,MHSA,nncf_module.bert.encoder.layer.7.attention.self.key,"(768, 768)","(448, 768)","(768,)","(448,)","[0, 2, 4, 5, 8, 9, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
|
46 |
44,14,MHSA,nncf_module.bert.encoder.layer.7.attention.self.value,"(768, 768)","(448, 768)","(768,)","(448,)","[0, 2, 4, 5, 8, 9, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
|
47 |
45,14,MHSA,nncf_module.bert.encoder.layer.7.attention.output.dense,"(768, 768)","(768, 448)","(768,)","(768,)","[0, 2, 4, 5, 8, 9, 10]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
|
48 |
+
46,15,FF,nncf_module.bert.encoder.layer.7.intermediate.dense,"(3072, 768)","(1806, 768)","(3072,)","(1806,)",[1806 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
|
49 |
+
47,15,FF,nncf_module.bert.encoder.layer.7.output.dense,"(768, 3072)","(768, 1806)","(768,)","(768,)",[1806 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[output]/NNCFLinear[dense]/linear_0
|
50 |
+
48,16,MHSA,nncf_module.bert.encoder.layer.8.attention.self.query,"(768, 768)","(512, 768)","(768,)","(512,)","[1, 2, 5, 6, 7, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
|
51 |
+
49,16,MHSA,nncf_module.bert.encoder.layer.8.attention.self.key,"(768, 768)","(512, 768)","(768,)","(512,)","[1, 2, 5, 6, 7, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
|
52 |
+
50,16,MHSA,nncf_module.bert.encoder.layer.8.attention.self.value,"(768, 768)","(512, 768)","(768,)","(512,)","[1, 2, 5, 6, 7, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
|
53 |
+
51,16,MHSA,nncf_module.bert.encoder.layer.8.attention.output.dense,"(768, 768)","(768, 512)","(768,)","(768,)","[1, 2, 5, 6, 7, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
|
54 |
+
52,17,FF,nncf_module.bert.encoder.layer.8.intermediate.dense,"(3072, 768)","(1654, 768)","(3072,)","(1654,)",[1654 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
|
55 |
+
53,17,FF,nncf_module.bert.encoder.layer.8.output.dense,"(768, 3072)","(768, 1654)","(768,)","(768,)",[1654 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[output]/NNCFLinear[dense]/linear_0
|
56 |
+
54,18,MHSA,nncf_module.bert.encoder.layer.9.attention.self.query,"(768, 768)","(384, 768)","(768,)","(384,)","[0, 2, 3, 6, 8, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
|
57 |
+
55,18,MHSA,nncf_module.bert.encoder.layer.9.attention.self.key,"(768, 768)","(384, 768)","(768,)","(384,)","[0, 2, 3, 6, 8, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
|
58 |
+
56,18,MHSA,nncf_module.bert.encoder.layer.9.attention.self.value,"(768, 768)","(384, 768)","(768,)","(384,)","[0, 2, 3, 6, 8, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
|
59 |
+
57,18,MHSA,nncf_module.bert.encoder.layer.9.attention.output.dense,"(768, 768)","(768, 384)","(768,)","(768,)","[0, 2, 3, 6, 8, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
|
60 |
+
58,19,FF,nncf_module.bert.encoder.layer.9.intermediate.dense,"(3072, 768)","(1311, 768)","(3072,)","(1311,)",[1311 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
|
61 |
+
59,19,FF,nncf_module.bert.encoder.layer.9.output.dense,"(768, 3072)","(768, 1311)","(768,)","(768,)",[1311 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[output]/NNCFLinear[dense]/linear_0
|
62 |
60,20,MHSA,nncf_module.bert.encoder.layer.10.attention.self.query,"(768, 768)","(384, 768)","(768,)","(384,)","[0, 2, 3, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
|
63 |
61,20,MHSA,nncf_module.bert.encoder.layer.10.attention.self.key,"(768, 768)","(384, 768)","(768,)","(384,)","[0, 2, 3, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
|
64 |
62,20,MHSA,nncf_module.bert.encoder.layer.10.attention.self.value,"(768, 768)","(384, 768)","(768,)","(384,)","[0, 2, 3, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
|
65 |
63,20,MHSA,nncf_module.bert.encoder.layer.10.attention.output.dense,"(768, 768)","(768, 384)","(768,)","(768,)","[0, 2, 3, 9, 10, 11]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
|
66 |
+
64,21,FF,nncf_module.bert.encoder.layer.10.intermediate.dense,"(3072, 768)","(1090, 768)","(3072,)","(1090,)",[1090 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
|
67 |
+
65,21,FF,nncf_module.bert.encoder.layer.10.output.dense,"(768, 3072)","(768, 1090)","(768,)","(768,)",[1090 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[output]/NNCFLinear[dense]/linear_0
|
68 |
+
66,22,MHSA,nncf_module.bert.encoder.layer.11.attention.self.query,"(768, 768)","(320, 768)","(768,)","(320,)","[0, 1, 2, 3, 4]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[query]/linear_0
|
69 |
+
67,22,MHSA,nncf_module.bert.encoder.layer.11.attention.self.key,"(768, 768)","(320, 768)","(768,)","(320,)","[0, 1, 2, 3, 4]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[key]/linear_0
|
70 |
+
68,22,MHSA,nncf_module.bert.encoder.layer.11.attention.self.value,"(768, 768)","(320, 768)","(768,)","(320,)","[0, 1, 2, 3, 4]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/NNCFLinear[value]/linear_0
|
71 |
+
69,22,MHSA,nncf_module.bert.encoder.layer.11.attention.output.dense,"(768, 768)","(768, 320)","(768,)","(768,)","[0, 1, 2, 3, 4]",BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfOutput[output]/NNCFLinear[dense]/linear_0
|
72 |
+
70,23,FF,nncf_module.bert.encoder.layer.11.intermediate.dense,"(3072, 768)","(1027, 768)","(3072,)","(1027,)",[1027 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertIntermediate[intermediate]/NNCFLinear[dense]/linear_0
|
73 |
+
71,23,FF,nncf_module.bert.encoder.layer.11.output.dense,"(768, 3072)","(768, 1027)","(768,)","(768,)",[1027 items],BertForQuestionAnswering/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[output]/NNCFLinear[dense]/linear_0
|
tokenizer.json
CHANGED
@@ -1,21 +1,7 @@
|
|
1 |
{
|
2 |
"version": "1.0",
|
3 |
-
"truncation":
|
4 |
-
|
5 |
-
"max_length": 384,
|
6 |
-
"strategy": "OnlySecond",
|
7 |
-
"stride": 128
|
8 |
-
},
|
9 |
-
"padding": {
|
10 |
-
"strategy": {
|
11 |
-
"Fixed": 384
|
12 |
-
},
|
13 |
-
"direction": "Right",
|
14 |
-
"pad_to_multiple_of": null,
|
15 |
-
"pad_id": 0,
|
16 |
-
"pad_type_id": 0,
|
17 |
-
"pad_token": "[PAD]"
|
18 |
-
},
|
19 |
"added_tokens": [
|
20 |
{
|
21 |
"id": 0,
|
|
|
1 |
{
|
2 |
"version": "1.0",
|
3 |
+
"truncation": null,
|
4 |
+
"padding": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
"added_tokens": [
|
6 |
{
|
7 |
"id": 0,
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3579
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:838f7a4735c59774d578e9a43a91763c34b74e016c13d43a3202f814296076cf
|
3 |
size 3579
|