selfconstruct3d commited on
Commit
e899270
·
verified ·
1 Parent(s): f2c6a2d

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +36 -7
README.md CHANGED
@@ -172,12 +172,41 @@ To be anounced...
172
  | Classification Accuracy (Test) | 0.7161 |
173
  | Weighted F1 Score | [More Information Needed] |
174
 
 
 
 
 
 
 
 
 
175
  ### Single Prediction Example
176
 
177
  ```python
178
 
179
- # Create explicit mapping from numeric labels to original GroupIDs
180
- label_to_groupid = dict(enumerate(train_df["GroupID"].astype("category").cat.categories))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
 
182
  def predict_group(sentence):
183
  classifier_model.eval()
@@ -192,17 +221,17 @@ def predict_group(sentence):
192
  attention_mask = encoding["attention_mask"].to(device)
193
 
194
  with torch.no_grad():
195
- logits = classifier_model(input_ids, attention_mask)
 
196
  predicted_label = torch.argmax(logits, dim=1).cpu().item()
197
 
198
-
199
- # Explicitly convert numeric label to original GroupID
200
- predicted_groupid = label_to_groupid[predicted_label]
201
  return predicted_groupid
202
 
 
203
  sentence = "APT38 has used phishing emails with malicious links to distribute malware."
204
  predicted_class = predict_group(sentence)
205
- print(f"Predicted GroupID: {predicted_class}") # e.g., Predicted GroupID: G0081
206
  ```
207
 
208
  ## Environmental Impact
 
172
  | Classification Accuracy (Test) | 0.7161 |
173
  | Weighted F1 Score | [More Information Needed] |
174
 
175
+
176
+ Embedding Variability Accuracy
177
+ Original MPNet 0.092721 0.998611
178
+ MLM Fine-tuned MPNet 0.034983 0.653611
179
+ Classification Fine-tuned MPNet 0.193065 0.950833
180
+ SecBERT 0.591303 0.988611
181
+ ATTACK-BERT 0.096108 0.967778
182
+
183
  ### Single Prediction Example
184
 
185
  ```python
186
 
187
+ import torch
188
+ import torch.nn as nn
189
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
190
+ import torch.optim as optim
191
+ import numpy as np
192
+ from huggingface_hub import hf_hub_download
193
+ import json
194
+
195
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
196
+ # Load explicitly your fine-tuned MPNet model
197
+ classifier_model = AutoModelForSequenceClassification.from_pretrained("selfconstruct3d/AttackGroup-MPNET").to(device)
198
+
199
+ # Load explicitly your tokenizer
200
+ tokenizer = AutoTokenizer.from_pretrained("selfconstruct3d/AttackGroup-MPNET")
201
+
202
+
203
+ label_to_groupid_file = hf_hub_download(
204
+ repo_id="selfconstruct3d/AttackGroup-MPNET",
205
+ filename="label_to_groupid.json"
206
+ )
207
+
208
+ with open(label_to_groupid_file, "r") as f:
209
+ label_to_groupid = json.load(f)
210
 
211
  def predict_group(sentence):
212
  classifier_model.eval()
 
221
  attention_mask = encoding["attention_mask"].to(device)
222
 
223
  with torch.no_grad():
224
+ outputs = classifier_model(input_ids=input_ids, attention_mask=attention_mask)
225
+ logits = outputs.logits
226
  predicted_label = torch.argmax(logits, dim=1).cpu().item()
227
 
228
+ predicted_groupid = label_to_groupid[str(predicted_label)]
 
 
229
  return predicted_groupid
230
 
231
+ # Example usage explicitly:
232
  sentence = "APT38 has used phishing emails with malicious links to distribute malware."
233
  predicted_class = predict_group(sentence)
234
+ print(f"Predicted GroupID: {predicted_class}")
235
  ```
236
 
237
  ## Environmental Impact