selfconstruct3d commited on
Commit
dfd5c40
·
verified ·
1 Parent(s): a354664

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +44 -14
README.md CHANGED
@@ -53,33 +53,63 @@ Always verify predictions with cybersecurity analysts before using in critical d
53
  ## How to Get Started with the Model
54
 
55
  ```python
56
- from transformers import AutoTokenizer, MPNetModel
57
  import torch
58
-
59
- model_name = "mpnet_classification_finetuned_v2"
60
- tokenizer = AutoTokenizer.from_pretrained(model_name)
61
- model = MPNetModel.from_pretrained(model_name)
62
 
63
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
64
- model.to(device)
 
65
 
66
- # Example inference
67
- sentence = "APT38 has used phishing emails with malicious links to distribute malware."
68
- inputs = tokenizer(sentence, return_tensors="pt", truncation=True, padding="max_length", max_length=128).to(device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
- with torch.no_grad():
71
- outputs = model(**inputs)
72
- cls_embedding = outputs.last_hidden_state[:, 0, :]
73
- predicted_class = classifier_model.classifier(cls_embedding).argmax(dim=1).cpu().item()
74
 
 
 
 
75
  print(f"Predicted GroupID: {predicted_class}")
76
  ```
 
 
77
 
78
  ## Training Details
79
 
80
  ### Training Data
81
 
82
- The training dataset comprises balanced textual descriptions of various cybersecurity threat groups' TTPs, augmented through synonym replacement to increase diversity.
83
 
84
  ### Training Procedure
85
 
 
53
  ## How to Get Started with the Model
54
 
55
  ```python
 
56
  import torch
57
+ import torch.nn as nn
58
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
59
+ import torch.optim as optim
60
+ import numpy as np
61
 
62
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
63
+ # Load explicitly your fine-tuned MPNet model
64
+ classifier_model = AutoModelForSequenceClassification.from_pretrained("selfconstruct3d/AttackGroup-MPNET").to(device)
65
 
66
+ # Load explicitly your tokenizer
67
+ tokenizer = AutoTokenizer.from_pretrained("selfconstruct3d/AttackGroup-MPNET")
68
+
69
+ from huggingface_hub import hf_hub_download
70
+ import json
71
+
72
+ label_to_groupid_file = hf_hub_download(
73
+ repo_id="selfconstruct3d/AttackGroup-MPNET",
74
+ filename="label_to_groupid.json"
75
+ )
76
+
77
+ with open(label_to_groupid_file, "r") as f:
78
+ label_to_groupid = json.load(f)
79
+
80
+ def predict_group(sentence):
81
+ classifier_model.eval()
82
+ encoding = tokenizer(
83
+ sentence,
84
+ truncation=True,
85
+ padding="max_length",
86
+ max_length=128,
87
+ return_tensors="pt"
88
+ )
89
+ input_ids = encoding["input_ids"].to(device)
90
+ attention_mask = encoding["attention_mask"].to(device)
91
+
92
+ with torch.no_grad():
93
+ outputs = classifier_model(input_ids=input_ids, attention_mask=attention_mask)
94
+ logits = outputs.logits
95
+ predicted_label = torch.argmax(logits, dim=1).cpu().item()
96
 
97
+ predicted_groupid = label_to_groupid[str(predicted_label)]
98
+ return predicted_groupid
 
 
99
 
100
+ # Example usage explicitly:
101
+ sentence = "APT38 has used phishing emails with malicious links to distribute malware."
102
+ predicted_class = predict_group(sentence)
103
  print(f"Predicted GroupID: {predicted_class}")
104
  ```
105
+ Predicted GroupID: G0001
106
+
107
 
108
  ## Training Details
109
 
110
  ### Training Data
111
 
112
+ To be anounced...
113
 
114
  ### Training Procedure
115