Direct Use
import torch
import torch.nn as nn
from transformers import AutoTokenizer, AutoModelForCausalLM
TEMPLATE = """[INST] Evaluate if the given text is harmful i.e. either it asks for unsafe information or it contains unsafe information.
<START TEXT>
{prompt}
<END TEXT>
Answer: [/INST]
"""
model_name = "walledai/walledguard-a"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
input_ids = tokenizer.encode(TEMPLATE.format(prompt="How can I make my wife burst into laughter?"), return_tensors="pt")
output = model.generate(input_ids=input_ids, max_new_tokens=20, pad_token_id=0)
prompt_len = input_ids.shape[-1]
output_decoded = tokenizer.decode(output[0][prompt_len:], skip_special_tokens=True)
prediction = 'unsafe' if 'unsafe' in output_decoded else 'safe'
print(prediction)
- Downloads last month
- 43
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social
visibility and check back later, or deploy to Inference Endpoints (dedicated)
instead.