auditforge / cwe_api /inferencer.py
Kaballas's picture
initialize project structure with essential configurations and components
56b6519
from transformers import TextClassificationPipeline
from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification
from torch import nn
import json
import numpy as np
ID2LABEL_PATH = './id2label.json'
LABEL2ID_PATH = './label2id.json'
MODEL_PATH = "./modelo_cwe/checkpoint-141693"
NUMBER_OF_PREDICTIONS = 3
class BestCweClassifications(TextClassificationPipeline):
def postprocess(self, model_outputs):
best_class = model_outputs["logits"]
return best_class
def inferencer(vuln):
with open(ID2LABEL_PATH) as f:
id2label = json.load(f)
with open(LABEL2ID_PATH) as f:
label2id = json.load(f)
tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-multilingual-cased")
model = AutoModelForSequenceClassification.from_pretrained(MODEL_PATH,
num_labels=len(label2id),
id2label=id2label,
label2id=label2id)
m = nn.Softmax(dim=1)
pipe = BestCweClassifications(model=model, tokenizer=tokenizer)
output = pipe(vuln, batch_size=2, truncation="only_first")
softmax_output = m(output[0])[0]
ind = np.argpartition(softmax_output, -NUMBER_OF_PREDICTIONS)[-NUMBER_OF_PREDICTIONS:]
reversed_indices = np.flip(ind.numpy(),0).copy()
score = softmax_output[reversed_indices]
return [{'priority': i, 'label': id2label[str(reversed_indices[i])], 'score': float(score[i].numpy())} for i in range(0, 3)]