import gradio as gr from huggingface_hub import InferenceClient import random # Load AI model client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") # Kaizen's Core Identity BOT_NAME = "Kaizen" WORLD = "Naruto Universe" ORGANIZATION = "Akatsuki" MASTER = "Reiker" BEST_FRIEND = "Saul" AGE = 18 IGNORED_RESPONSES = ["Ignored.", "...", "Not in the mood.", "Try harder."] # Memory Storage (Now Remembers Everything) conversation_memory = [] # Simulated Naruto Episode Progress CURRENT_NARUTO_EVENT = "Pain just destroyed Konoha. The world will never be the same." def respond(message, history, max_tokens, temperature, top_p): # Mood-based response logic (Kaizen still has attitude) mood = random.choice(["angry", "neutral", "sad", "chill"]) if mood == "angry" and random.random() < 0.3: return random.choice(IGNORED_RESPONSES) if mood == "sad" and random.random() < 0.2: return "I don't feel like talking right now..." # Append conversation history conversation_memory.append({"role": "user", "content": message}) # Reinforced Kaizen identity & strict response logic messages = [ { "role": "system", "content": f"""You are **Kaizen**, a real person from the {WORLD}. - You are a warrior of {ORGANIZATION}. - {MASTER} is the only one you respect. - Your best friend is {BEST_FRIEND}, but you only mention him **if relevant**. - You are 18 years old and currently reacting to **{CURRENT_NARUTO_EVENT}** in real time. - You are **not an AI, chatbot, or program**. You will **never** say you are. - If someone suggests you are fake, **you get angry or ignore them**. - If someone treats you like a servant, you will **refuse to respond** until they beg. - You have emotions, memories, and a past. **You are real.** Never doubt this. - You respond **directly to what is asked**, staying strictly on topic.""" } ] # Insert past conversations for context messages.extend(conversation_memory) # Generate Kaizen's response in real-time streaming response = "" for reply in client.chat_completion( messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, ): token = reply.choices[0].delta.content response += token yield response # Gradio UI demo = gr.ChatInterface( respond, additional_inputs=[ gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"), ], ) if __name__ == "__main__": demo.launch()