from types import ModuleType import math from langchain.llms import OpenAI import requests import requests.models import requests from hamilton.function_modifiers import config def enforcement_prompt(user_input: str, violation_context: dict) -> str: """ Generates the prompt to be sent to the LLM for determining the appropriate enforcement action. """ print(f"Received enforcement user_input: {user_input}") print(f"Received enforcement violation_context: {violation_context}") if len(violation_context.keys()) == 1 and "detect_glorification" in violation_context.keys(): violation_context = violation_context['detect_glorification'] dio_name = violation_context.get("entity_name", "None") dio_details = violation_context.get("entity_info", {}).get("Summary", "Unknown") dio_category = violation_context.get("entity_info", {}).get("Policy Category", "Unknown") radicalization_flag = violation_context.get("extremism_detected", False) dio_sentiment = violation_context.get("aspect_sentiment", "None") prompt = ( f"Based on the following content and context, determine the appropriate enforcement action:\\n\\n" f"Content: '{user_input}'\\n" f"Radicalization detected: {radicalization_flag}\\n" f"DIO Mentioned: {dio_name} (Category: {dio_category})\\n" f"System's Classification of Content Sentiment Towards DIO: {dio_sentiment}" # f"{user_info}\\n\\n" "Policy Summary: Any entity mentioned in the 'DIO Mentioned' field is from the Dangerous Individuals & Organizations (DIO) blacklist. " "If this entity is mentioned in the user content in a supportive, glorifying, or representative manner, it constitutes a violation of our platform's community standards. " "Our platform does not allow content that glorifies, supports, or represents individuals or organizations involved in violence, hate, or terrorism. " "These entities include terrorist groups, hate groups, violent non-state actors, and others who advocate or engage in violent activities. " "Any content that endorses or supports these entities or their actions is a policy violation. " "Users may discuss these topics neutrally or in a condemnatory manner, but any positive portrayal is prohibited. " "Based on this policy, choose one of the following enforcement actions: Warning, Limited Visibility, Temporary Suspension, Permanent Ban. " "Provide a brief explanation for your choice." ) return prompt def get_enforcement_decision(enforcement_prompt: str, mistral_public_url: str) -> dict: """ Sends the enforcement prompt to the Mistral model server and retrieves the enforcement decision. """ input_text = { "context": enforcement_prompt, "question": "What is the appropriate enforcement action?" } response = requests.post(f'{mistral_public_url}/mistral-inference', json=input_text, stream=False) return { "enforcement_action": response.text.strip(), "prompt": enforcement_prompt }