cpi-connect
commited on
Commit
•
77eacb7
1
Parent(s):
6d09a34
Upload model
Browse files- .gitattributes +1 -0
- config.json +14 -0
- configuration.py +16 -0
- model.py +66 -0
- pytorch_model.bin +3 -0
.gitattributes
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
|
config.json
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"CybersecurityKnowledgeGraphModel"
|
4 |
+
],
|
5 |
+
"auto_map": {
|
6 |
+
"AutoConfig": "configuration.CybersecurityKnowledgeGraphConfig",
|
7 |
+
"AutoModelForTokenClassification": "model.CybersecurityKnowledgeGraphModel"
|
8 |
+
},
|
9 |
+
"event_argument_model_path": "cybersecurity_knowledge_graph/argument_model_state_dict.pth",
|
10 |
+
"event_nugget_model_path": "cybersecurity_knowledge_graph/nugget_model_state_dict.pth",
|
11 |
+
"event_realis_model_path": "cybersecurity_knowledge_graph/realis_model_state_dict.pth",
|
12 |
+
"torch_dtype": "float32",
|
13 |
+
"transformers_version": "4.33.2"
|
14 |
+
}
|
configuration.py
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import PretrainedConfig
|
2 |
+
import torch
|
3 |
+
|
4 |
+
class CybersecurityKnowledgeGraphConfig(PretrainedConfig):
|
5 |
+
|
6 |
+
def __init__(
|
7 |
+
self,
|
8 |
+
event_nugget_model_path : str = "nugget_model_state_dict.pth",
|
9 |
+
event_argument_model_path : str = "argument_model_state_dict.pth",
|
10 |
+
event_realis_model_path : str = "realis_model_state_dict.pth",
|
11 |
+
**kwargs,
|
12 |
+
):
|
13 |
+
self.event_nugget_model_path = event_nugget_model_path
|
14 |
+
self.event_argument_model_path = event_argument_model_path
|
15 |
+
self.event_realis_model_path = event_realis_model_path
|
16 |
+
super().__init__(**kwargs)
|
model.py
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import PreTrainedModel
|
2 |
+
import torch
|
3 |
+
|
4 |
+
from cybersecurity_knowledge_graph.nugget_model_utils import CustomRobertaWithPOS as NuggetModel
|
5 |
+
from cybersecurity_knowledge_graph.args_model_utils import CustomRobertaWithPOS as ArgumentModel
|
6 |
+
from cybersecurity_knowledge_graph.realis_model_utils import CustomRobertaWithPOS as RealisModel
|
7 |
+
|
8 |
+
from cybersecurity_knowledge_graph.configuration import CybersecurityKnowledgeGraphConfig
|
9 |
+
|
10 |
+
from cybersecurity_knowledge_graph.event_nugget_predict import create_dataloader as event_nugget_dataloader
|
11 |
+
from cybersecurity_knowledge_graph.event_realis_predict import create_dataloader as event_realis_dataloader
|
12 |
+
from cybersecurity_knowledge_graph.event_arg_predict import create_dataloader as event_argument_dataloader
|
13 |
+
|
14 |
+
class CybersecurityKnowledgeGraphModel(PreTrainedModel):
|
15 |
+
config_class = CybersecurityKnowledgeGraphConfig
|
16 |
+
|
17 |
+
def __init__(self, config):
|
18 |
+
super().__init__(config)
|
19 |
+
self.event_nugget_model_path = config.event_nugget_model_path
|
20 |
+
self.event_argument_model_path = config.event_argument_model_path
|
21 |
+
self.event_realis_model_path = config.event_realis_model_path
|
22 |
+
|
23 |
+
self.event_nugget_dataloader = event_nugget_dataloader
|
24 |
+
self.event_argument_dataloader = event_argument_dataloader
|
25 |
+
self.event_realis_dataloader = event_realis_dataloader
|
26 |
+
|
27 |
+
self.event_nugget_model = NuggetModel(num_classes = 11)
|
28 |
+
self.event_argument_model = ArgumentModel(num_classes = 43)
|
29 |
+
self.event_realis_model = RealisModel(num_classes_realis = 4)
|
30 |
+
|
31 |
+
self.event_nugget_model.load_state_dict(torch.load(self.event_nugget_model_path))
|
32 |
+
self.event_realis_model.load_state_dict(torch.load(self.event_realis_model_path))
|
33 |
+
self.event_argument_model.load_state_dict(torch.load(self.event_argument_model_path))
|
34 |
+
|
35 |
+
|
36 |
+
def forward(self, text):
|
37 |
+
nugget_dataloader, _ = self.event_nugget_dataloader(text)
|
38 |
+
argument_dataloader, _ = self.event_argument_dataloader(text)
|
39 |
+
realis_dataloader, _ = self.event_realis_dataloader(text)
|
40 |
+
|
41 |
+
nugget_pred = self.forward_model(self.event_nugget_model, nugget_dataloader)
|
42 |
+
no_nuggets = torch.all(nugget_pred == 0, dim=1)
|
43 |
+
|
44 |
+
argument_preds = torch.empty(nugget_pred.size())
|
45 |
+
realis_preds = torch.empty(nugget_pred.size())
|
46 |
+
for idx, (batch, no_nugget) in enumerate(zip(nugget_pred, no_nuggets)):
|
47 |
+
if no_nugget:
|
48 |
+
argument_pred, realis_pred = torch.zeros(batch.size()), torch.zeros(batch.size())
|
49 |
+
else:
|
50 |
+
argument_pred = self.forward_model(self.event_argument_model, argument_dataloader)
|
51 |
+
realis_pred = self.forward_model(self.event_realis_model, realis_dataloader)
|
52 |
+
argument_preds[idx] = argument_pred
|
53 |
+
realis_preds[idx] = realis_pred
|
54 |
+
|
55 |
+
return {"nugget" : nugget_pred, "argument" : argument_pred, "realis" : realis_pred}
|
56 |
+
|
57 |
+
def forward_model(self, model, dataloader):
|
58 |
+
predicted_label = []
|
59 |
+
for batch in dataloader:
|
60 |
+
with torch.no_grad():
|
61 |
+
print(batch.keys())
|
62 |
+
logits = model(**batch)
|
63 |
+
|
64 |
+
batch_predicted_label = logits.argmax(-1)
|
65 |
+
predicted_label.append(batch_predicted_label)
|
66 |
+
return torch.cat(predicted_label, dim=-1)
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fef48b6b9271dd45d7102c4efd5a90a3e2897daeb2393dcbd6e4fc3aa94494c5
|
3 |
+
size 1496163441
|