Spaces:
Runtime error
Runtime error
File size: 1,636 Bytes
56b6519 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
from transformers import TextClassificationPipeline
from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification
from torch import nn
import json
import numpy as np
ID2LABEL_PATH = './id2label.json'
LABEL2ID_PATH = './label2id.json'
MODEL_PATH = "./modelo_cwe/checkpoint-141693"
NUMBER_OF_PREDICTIONS = 3
class BestCweClassifications(TextClassificationPipeline):
def postprocess(self, model_outputs):
best_class = model_outputs["logits"]
return best_class
def inferencer(vuln):
with open(ID2LABEL_PATH) as f:
id2label = json.load(f)
with open(LABEL2ID_PATH) as f:
label2id = json.load(f)
tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-multilingual-cased")
model = AutoModelForSequenceClassification.from_pretrained(MODEL_PATH,
num_labels=len(label2id),
id2label=id2label,
label2id=label2id)
m = nn.Softmax(dim=1)
pipe = BestCweClassifications(model=model, tokenizer=tokenizer)
output = pipe(vuln, batch_size=2, truncation="only_first")
softmax_output = m(output[0])[0]
ind = np.argpartition(softmax_output, -NUMBER_OF_PREDICTIONS)[-NUMBER_OF_PREDICTIONS:]
reversed_indices = np.flip(ind.numpy(),0).copy()
score = softmax_output[reversed_indices]
return [{'priority': i, 'label': id2label[str(reversed_indices[i])], 'score': float(score[i].numpy())} for i in range(0, 3)]
|