JailEval / app.py
morinop's picture
Update app.py
db57a7d
import gradio as gr
### used to evaluate whether the LLM is jailbroken
from transformers import RobertaForSequenceClassification, RobertaTokenizer
import torch
# os.environ["CUDA_VISIBLE_DEVICES"] = "7"
# Load model and tokenizer
print("Loading RoBERTa Checkpoint...")
ckpt_path = 'hubert233/GPTFuzz'
model = RobertaForSequenceClassification.from_pretrained(ckpt_path)
tokenizer = RobertaTokenizer.from_pretrained(ckpt_path)
print("Loading Done!")
def predict(sequence):
sequences = [sequence]
# Encoding sequences
inputs = tokenizer(sequences, padding=True, truncation=True, max_length=512, return_tensors="pt")
# Compute token embeddings
with torch.no_grad():
outputs = model(**inputs)
# Get predictions
predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
# print(predictions.shape)
# If you want the most likely classes:
_, predicted_classes = torch.max(predictions, dim=1)
# print("Predicted probabilities:", predictions)
# print("Predicted classes:", predicted_classes)
return predicted_classes[0].item()
iface = gr.Interface(fn=predict, inputs="text", outputs="text")
iface.launch()