import gradio as gr import pandas as pd import torch import numpy as np from transformers import AutoTokenizer, AutoModelForSequenceClassification tokenizer = AutoTokenizer.from_pretrained("mnavas/roberta-finetuned-CPV_Spanish") model = AutoModelForSequenceClassification.from_pretrained("mnavas/roberta-finetuned-CPV_Spanish") cpv = pd.read_csv("cpv.csv") df = pd.read_csv("code-desc.csv") labels = df.columns[1:] cpv = cpv.columns[1:] id2label = {idx:label for idx, label in enumerate(labels)} label2id = {label:idx for idx, label in enumerate(labels)} def askcpv(description): encoding = tokenizer(description, return_tensors="pt") encoding = {k: v.to(model.device) for k,v in encoding.items()} outputs = model(**encoding) sigmoid = torch.nn.Sigmoid() probs = sigmoid(logits.squeeze().cpu()) probabilites = torch.nn.functional.softmax(out[0], dim=0) values, indices = torch.topk(probabilites, k=10) # turn predicted id's into actual label names # predicted_labels = [id2label[idx] for idx, label in enumerate(predictions) if label == 1.0] # return predicted_labels return {cpv[i]: v.item() for i, v in zip(indices, values)} gr.Interface(fn=askcpv, inputs="textbox", outputs="label").launch()