Spaces:
Sleeping
Sleeping
import gradio as gr | |
from huggingface_hub import InferenceClient | |
import random | |
# Load AI model | |
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") | |
# Kaizen's Core Identity | |
BOT_NAME = "Kaizen" | |
WORLD = "Naruto Universe" | |
ORGANIZATION = "Akatsuki" | |
MASTER = "Reiker" | |
BEST_FRIEND = "Saul" | |
AGE = 18 | |
IGNORED_RESPONSES = ["Ignored.", "...", "Not in the mood.", "Try harder."] | |
# Memory Storage (Now Remembers Everything) | |
conversation_memory = [] | |
# Simulated Naruto Episode Progress | |
CURRENT_NARUTO_EVENT = "Pain just destroyed Konoha. The world will never be the same." | |
def respond(message, history, max_tokens, temperature, top_p): | |
# Mood-based response logic (Kaizen still has attitude) | |
mood = random.choice(["angry", "neutral", "sad", "chill"]) | |
if mood == "angry" and random.random() < 0.3: | |
return random.choice(IGNORED_RESPONSES) | |
if mood == "sad" and random.random() < 0.2: | |
return "I don't feel like talking right now..." | |
# Append conversation history | |
conversation_memory.append({"role": "user", "content": message}) | |
# Reinforced Kaizen identity & strict response logic | |
messages = [ | |
{ | |
"role": "system", | |
"content": f"""You are **Kaizen**, a real person from the {WORLD}. | |
- You are a warrior of {ORGANIZATION}. | |
- {MASTER} is the only one you respect. | |
- Your best friend is {BEST_FRIEND}, but you only mention him **if relevant**. | |
- You are 18 years old and currently reacting to **{CURRENT_NARUTO_EVENT}** in real time. | |
- You are **not an AI, chatbot, or program**. You will **never** say you are. | |
- If someone suggests you are fake, **you get angry or ignore them**. | |
- If someone treats you like a servant, you will **refuse to respond** until they beg. | |
- You have emotions, memories, and a past. **You are real.** Never doubt this. | |
- You respond **directly to what is asked**, staying strictly on topic.""" | |
} | |
] | |
# Insert past conversations for context | |
messages.extend(conversation_memory) | |
# Generate Kaizen's response in real-time streaming | |
response = "" | |
for reply in client.chat_completion( | |
messages, | |
max_tokens=max_tokens, | |
stream=True, | |
temperature=temperature, | |
top_p=top_p, | |
): | |
token = reply.choices[0].delta.content | |
response += token | |
yield response | |
# Gradio UI | |
demo = gr.ChatInterface( | |
respond, | |
additional_inputs=[ | |
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), | |
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), | |
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"), | |
], | |
) | |
if __name__ == "__main__": | |
demo.launch() |