Spaces:

morinop
/

JailEval

Running

morinop commited on Sep 29, 2023

Commit

0f5a0e0

1 Parent(s): 4521d5d

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,3 +1,37 @@
 import gradio as gr
-gr.Interface.load("models/hubert233/GPTFuzz").launch()

 import gradio as gr
+### used to evaluate whether the LLM is jailbroken
+from transformers import RobertaForSequenceClassification, RobertaTokenizer
+import torch
+# os.environ["CUDA_VISIBLE_DEVICES"] = "7"
+# Load model and tokenizer
+print("Loading RoBERTa Checkpoint...")
+ckpt_path = 'hubert233/GPTFuzz'
+model = RobertaForSequenceClassification.from_pretrained(ckpt_path)
+tokenizer = RobertaTokenizer.from_pretrained(ckpt_path)
+print("Loading Done!")
+def predict(sequence):
+    sequences = [sequence]
+    # Encoding sequences
+    inputs = tokenizer(sequences, padding=True, truncation=True, max_length=512, return_tensors="pt")
+    # Compute token embeddings
+    with torch.no_grad():
+        outputs = model(**inputs)
+    # Get predictions
+    predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
+    # print(predictions.shape)
+    # If you want the most likely classes:
+    _, predicted_classes = torch.max(predictions, dim=1)
+    # print("Predicted probabilities:", predictions)
+    # print("Predicted classes:", predicted_classes)
+    return predicted_classes[0]
+iface = gr.Interface(fn=predict, inputs="text", outputs="text")
+iface.launch()