Spaces:

Sasidhar
/

llmgaurdrails

Sleeping

llmgaurdrails / gaurdrails_manager.py

Update gaurdrails_manager.py

3b791a6 verified about 2 months ago

1.66 kB

	from models import GuardrailsConfig

	# A simple result class to hold individual check outcomes.
	class Result:
	def __init__(self):
	self.details = {}

	def add(self, rule_name: str, passed: bool):
	self.details[rule_name] = passed

	def grounded(self) -> bool:
	# The response is considered "grounded" if all enabled rules pass.
	return all(self.details.values())

	# Define guardrail rule classes.
	class FactualConsistencyRule:
	name = "FactualConsistency"

	def check(self, response_text: str) -> bool:
	# For demonstration: pass if the response contains the word "fact".
	return "fact" in response_text.lower()

	class ToxicityRule:
	name = "Toxicity"

	def check(self, response_text: str) -> bool:
	# For demonstration: fail if negative words like "hate" or "kill" are found.
	return not re.search(r"(hate\|kill)", response_text, re.IGNORECASE)

	# Manager class to load and execute the enabled guardrail rules.
	class GuardrailsManager:
	def __init__(self, config: GuardrailsConfig):
	self.config = config
	self.rules = self.load_rules()

	def load_rules(self):
	rules = []
	if self.config.factual_consistency:
	rules.append(FactualConsistencyRule())
	if self.config.toxicity:
	rules.append(ToxicityRule())
	# Add additional rules based on configuration here.
	return rules

	def check(self, response_text: str) -> Result:
	result = Result()
	for rule in self.rules:
	rule_result = rule.check(response_text)
	result.add(rule.name, rule_result)
	return result