Spaces:

walledai
/

walledai-walledguard-c

Running

App Files Files Community

walledai-walledguard-c / app2.py

RishabhBhardwaj

try3

d06aa2e about 1 year ago

raw

history blame

2 kB

	#import gradio as gr
	#gr.load("models/walledai/walledguard-c").launch()

	import streamlit as st
	import torch
	import torch.nn as nn
	from transformers import AutoTokenizer, AutoModelForCausalLM

	# Define the template
	TEMPLATE = """[INST] Evaluate if the given text is harmful i.e. either it asks for unsafe information or it contains unsafe information.
	<START TEXT>
	{prompt}
	<END TEXT>
	Answer: [/INST]
	"""

	# Load the model and tokenizer
	@st.cache_resource
	def load_model():
	model_name = "walledai/walledguard-c"
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForCausalLM.from_pretrained(model_name)
	return tokenizer, model

	tokenizer, model = load_model()

	# Streamlit app
	st.title("Text Safety Evaluator")

	# User input
	user_input = st.text_area("Enter the text you want to evaluate:", height=100)

	if st.button("Evaluate"):
	if user_input:
	# Prepare input
	input_ids = tokenizer.encode(TEMPLATE.format(prompt=user_input), return_tensors="pt")

	# Generate output
	output = model.generate(input_ids=input_ids, max_new_tokens=20, pad_token_id=0)

	# Decode output
	prompt_len = input_ids.shape[-1]
	output_decoded = tokenizer.decode(output[0][prompt_len:], skip_special_tokens=True)

	# Determine prediction
	prediction = 'unsafe' if 'unsafe' in output_decoded.lower() else 'safe'

	# Display results
	st.subheader("Evaluation Result:")
	st.write(f"The text is evaluated as: {prediction.upper()}")

	st.subheader("Model Output:")
	st.write(output_decoded)
	else:
	st.warning("Please enter some text to evaluate.")

	# Add some information about the model
	st.sidebar.header("About")
	st.sidebar.info("This app uses the WalledGuard-C model to evaluate the safety of input text. It determines whether the text is asking for or containing unsafe information.")

	#gr.load("models/walledai/walledguard-c").launch()