Last commit not found
import gradio as gr | |
import torch | |
import numpy as np | |
from transformers import AutoModelForMultipleChoice, AutoTokenizer | |
model_id = "microsoft/deberta-v2-xlarge" | |
# Load the model and tokenizer | |
model = AutoModelForMultipleChoice.from_pretrained(model_id) | |
tokenizer = AutoTokenizer.from_pretrained(model_id) | |
# Define the preprocessing function | |
def preprocess(text): | |
# Split the input text into lines | |
lines = text.strip().split("\n") | |
samples = [] | |
# Loop through each line and create a sample | |
for line in lines: | |
parts = line.split("\t") | |
if len(parts) >= 6: | |
sample = { | |
"prompt": parts[0], | |
"A": parts[1], | |
"B": parts[2], | |
"C": parts[3], | |
"D": parts[4], | |
"E": parts[5] | |
} | |
samples.append(sample) | |
return samples | |
# Define the prediction function | |
def predict(data): | |
results = [] | |
for sample in data: | |
first_sentences = [sample["prompt"]] * 5 | |
second_sentences = [sample[option] for option in "ABCDE"] | |
tokenized_sentences = tokenizer(first_sentences, second_sentences, truncation=True, padding=True, return_tensors="pt") | |
inputs = tokenized_sentences["input_ids"] | |
masks = tokenized_sentences["attention_mask"] | |
with torch.no_grad(): | |
logits = model(inputs, attention_mask=masks).logits | |
predictions_as_ids = torch.argsort(-logits, dim=1) | |
answers = np.array(list("ABCDE"))[predictions_as_ids.tolist()] | |
results.append(["".join(i) for i in answers[:, :3]]) | |
return results | |
# Create the Gradio interface | |
iface = gr.Interface( | |
fn=predict, | |
inputs=gr.inputs.Textbox(placeholder="Paste multiple-choice questions (prompt and options separated by tabs, one question per line) ..."), | |
outputs=gr.outputs.Label(num_top_classes=3), | |
live=True, | |
title="LLM Science Exam Demo", | |
description="Enter multiple-choice questions (prompt and options) below and get predictions.", | |
) | |
# Run the interface | |
iface.launch() | |
iface.integrate(wandb=wandb) |