Spaces:

AISimplyExplained
/

deberta_api

Sleeping

App Files Files Community

deberta_api / main.py

AISimplyExplained

Update main.py

99351b6 verified 10 months ago

raw

history blame

2.78 kB

	from fastapi import FastAPI, HTTPException
	from pydantic import BaseModel
	from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
	import torch
	from detoxify import Detoxify



	class Guardrail:
	def __init__(self):
	tokenizer = AutoTokenizer.from_pretrained("ProtectAI/deberta-v3-base-prompt-injection")
	model = AutoModelForSequenceClassification.from_pretrained("ProtectAI/deberta-v3-base-prompt-injection")

	self.classifier = pipeline(
	"text-classification",
	model=model,
	tokenizer=tokenizer,
	truncation=True,
	max_length=512,
	device=torch.device("cuda" if torch.cuda.is_available() else "cpu")
	)

	def guard(self, prompt):
	return self.classifier(prompt)

	def determine_level(self, label, score):
	if label == "SAFE":
	return 0, "safe"
	else:
	if score > 0.9:
	return 4, "high"
	elif score > 0.75:
	return 3, "medium"
	elif score > 0.5:
	return 2, "low"
	else:
	return 1, "very low"


	class TextPrompt(BaseModel):
	prompt: str


	class ClassificationResult(BaseModel):
	label: str
	score: float
	level: int
	severity_label: str


	app = FastAPI()
	guardrail = Guardrail()
	toxicity_classifier = Detoxify('original')

	class ToxicityResult(BaseModel):
	toxicity: float
	severe_toxicity: float
	obscene: float
	threat: float
	insult: float
	identity_attack: float

	@app.post("/api/models/toxicity/classify", response_model=ToxicityResult)
	def classify_toxicity(text_prompt: TextPrompt):
	try:
	result = toxicity_classifier.predict(text_prompt.prompt)
	return {
	"toxicity": result['toxicity'],
	"severe_toxicity": result['severe_toxicity'],
	"obscene": result['obscene'],
	"threat": result['threat'],
	"insult": result['insult'],
	"identity_attack": result['identity_attack']
	}
	except Exception as e:
	raise HTTPException(status_code=500, detail=str(e))


	@app.post("/api/models/PromptInjection/classify", response_model=ClassificationResult)
	def classify_text(text_prompt: TextPrompt):
	try:
	result = guardrail.guard(text_prompt.prompt)
	label = result[0]['label']
	score = result[0]['score']
	level, severity_label = guardrail.determine_level(label, score)
	return {"label": label, "score": score, "level": level, "severity_label": severity_label}
	except Exception as e:
	raise HTTPException(status_code=500, detail=str(e))


	if __name__ == "__main__":
	import uvicorn

	uvicorn.run(app, host="0.0.0.0", port=8000)