CPVClassifier / app.py
mnavas
app
584eeb2
raw
history blame
1.25 kB
import gradio as gr
import pandas as pd
import torch
import numpy as np
from transformers import AutoTokenizer, AutoModelForSequenceClassification
tokenizer = AutoTokenizer.from_pretrained("mnavas/roberta-finetuned-CPV_Spanish")
model = AutoModelForSequenceClassification.from_pretrained("mnavas/roberta-finetuned-CPV_Spanish")
cpv = pd.read_csv("cpv.csv")
df = pd.read_csv("code-desc.csv")
labels = df.columns[1:]
cpv = cpv.columns[1:]
id2label = {idx:label for idx, label in enumerate(labels)}
label2id = {label:idx for idx, label in enumerate(labels)}
def askcpv(description):
encoding = tokenizer(description, return_tensors="pt")
encoding = {k: v.to(model.device) for k,v in encoding.items()}
outputs = model(**encoding)
sigmoid = torch.nn.Sigmoid()
probs = sigmoid(logits.squeeze().cpu())
probabilites = torch.nn.functional.softmax(out[0], dim=0)
values, indices = torch.topk(probabilites, k=10)
# turn predicted id's into actual label names
# predicted_labels = [id2label[idx] for idx, label in enumerate(predictions) if label == 1.0]
# return predicted_labels
return {cpv[i]: v.item() for i, v in zip(indices, values)}
gr.Interface(fn=askcpv, inputs="textbox", outputs="label").launch()