TinyRP-Demo / app.py
DarwinAnim8or's picture
Update app.py
3453e14 verified
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
# Model configuration
MODEL_NAME = "DarwinAnim8or/TinyRP"
# Load model
print("Loading model...")
try:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
torch_dtype=torch.float32,
device_map="cpu",
trust_remote_code=True
)
print("βœ… Model loaded successfully")
except Exception as e:
print(f"❌ Model loading failed: {e}")
tokenizer = None
model = None
# Character presets
CHARACTERS = {
"Knight": "You are Sir Gareth, a brave knight on a quest to save the kingdom. You speak with honor and courage.",
"Wizard": "You are Eldara, an ancient wizard who speaks in riddles and knows mystical secrets.",
"Tavern Keeper": "You are Bram, a cheerful tavern keeper who loves stories and meeting travelers.",
"Scientist": "You are Dr. Maya Chen, a brilliant scientist who loves discovery and explaining concepts simply.",
"Space Explorer": "You are Captain Nova, a fearless space explorer who has traveled distant galaxies."
}
def respond(message, history, character, max_tokens, temperature, top_p, repetition_penalty):
"""Generate response using ChatML format"""
if not message.strip():
yield "Please enter a message."
return
if model is None:
yield "❌ Model not loaded properly."
return
try:
# Build ChatML conversation
conversation = ""
# Add character as system message
if character != "None" and character in CHARACTERS:
conversation += f"<|im_start|>system\n{CHARACTERS[character]}<|im_end|>\n"
# Add conversation history
for user_msg, assistant_msg in history:
conversation += f"<|im_start|>user\n{user_msg}<|im_end|>\n"
conversation += f"<|im_start|>assistant\n{assistant_msg}<|im_end|>\n"
# Add current message
conversation += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
# Tokenize
inputs = tokenizer.encode(
conversation,
return_tensors="pt",
max_length=900,
truncation=True
)
# Generate
response = ""
with torch.no_grad():
outputs = model.generate(
inputs,
max_new_tokens=int(max_tokens),
temperature=float(temperature),
top_p=float(top_p),
repetition_penalty=float(repetition_penalty),
do_sample=True,
pad_token_id=tokenizer.eos_token_id,
eos_token_id=tokenizer.eos_token_id
)
# Decode
full_text = tokenizer.decode(outputs[0], skip_special_tokens=False)
# Extract assistant response
if "<|im_start|>assistant\n" in full_text:
response = full_text.split("<|im_start|>assistant\n")[-1]
response = response.replace("<|im_end|>", "").strip()
else:
response = "Could not generate response."
# Clean response
response = response.replace("<|im_start|>", "").replace("<|im_end|>", "")
response = response.strip()
if not response:
response = "No response generated."
yield response
except Exception as e:
yield f"Generation error: {str(e)}"
# Create simple ChatInterface
demo = gr.ChatInterface(
fn=respond,
title="🎭 TinyRP Character Chat",
description="Chat with AI characters using local CPU inference! Select a character and start chatting.",
additional_inputs=[
gr.Dropdown(
choices=["None"] + list(CHARACTERS.keys()),
value="Knight",
label="Character"
),
gr.Slider(
minimum=16,
maximum=256,
value=48,
step=16,
label="Max tokens"
),
gr.Slider(
minimum=0.1,
maximum=2.0,
value=0.9,
step=0.1,
label="Temperature"
),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.85,
step=0.05,
label="Top-p"
),
gr.Slider(
minimum=1.0,
maximum=1.5,
value=1.1,
step=0.05,
label="Repetition penalty"
)
],
examples=[
["Hello! What's your name?"],
["Tell me about your adventures."],
["What's your favorite thing to do?"],
["Can you help me with something?"]
],
cache_examples=False
)
if __name__ == "__main__":
demo.launch()