Spaces:

DarwinAnim8or
/

TinyRP-Demo

Sleeping

File size: 4,866 Bytes

9f9aaaf
b88f866
38b1b0b
9f9aaaf
91315d8
b88f866
 
4aa4e53
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b88f866
4aa4e53
91315d8
4aa4e53
 
 
 
 
38b1b0b
9f9aaaf
4aa4e53
 
b88f866
 
4aa4e53
 
 
91315d8
4aa4e53
 
b88f866
 
 
91315d8
de33e80
91315d8
4aa4e53
 
91315d8
4aa4e53
 
91315d8
4aa4e53
de33e80
91315d8
 
b88f866
91315d8
4aa4e53
 
 
 
 
 
91315d8
 
4aa4e53
de33e80
b88f866
 
4aa4e53
 
 
 
b88f866
91315d8
 
b88f866
de33e80
4aa4e53
91315d8
de33e80
91315d8
 
 
 
 
4aa4e53
de33e80
4aa4e53
91315d8
 
de33e80
91315d8
 
 
4aa4e53
 
b88f866
4aa4e53
b88f866
4aa4e53
 
 
 
 
 
 
 
 
 
 
 
 
 
3453e14
4aa4e53
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b88f866

import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

# Model configuration
MODEL_NAME = "DarwinAnim8or/TinyRP"

# Load model
print("Loading model...")
try:
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
    model = AutoModelForCausalLM.from_pretrained(
        MODEL_NAME,
        torch_dtype=torch.float32,
        device_map="cpu",
        trust_remote_code=True
    )
    print("✅ Model loaded successfully")
except Exception as e:
    print(f"❌ Model loading failed: {e}")
    tokenizer = None
    model = None

# Character presets
CHARACTERS = {
    "Knight": "You are Sir Gareth, a brave knight on a quest to save the kingdom. You speak with honor and courage.",
    "Wizard": "You are Eldara, an ancient wizard who speaks in riddles and knows mystical secrets.",
    "Tavern Keeper": "You are Bram, a cheerful tavern keeper who loves stories and meeting travelers.",
    "Scientist": "You are Dr. Maya Chen, a brilliant scientist who loves discovery and explaining concepts simply.",
    "Space Explorer": "You are Captain Nova, a fearless space explorer who has traveled distant galaxies."
}

def respond(message, history, character, max_tokens, temperature, top_p, repetition_penalty):
    """Generate response using ChatML format"""
    
    if not message.strip():
        yield "Please enter a message."
        return
        
    if model is None:
        yield "❌ Model not loaded properly."
        return
    
    try:
        # Build ChatML conversation
        conversation = ""
        
        # Add character as system message
        if character != "None" and character in CHARACTERS:
            conversation += f"<|im_start|>system\n{CHARACTERS[character]}<|im_end|>\n"
        
        # Add conversation history
        for user_msg, assistant_msg in history:
            conversation += f"<|im_start|>user\n{user_msg}<|im_end|>\n"
            conversation += f"<|im_start|>assistant\n{assistant_msg}<|im_end|>\n"
        
        # Add current message
        conversation += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
        
        # Tokenize
        inputs = tokenizer.encode(
            conversation, 
            return_tensors="pt", 
            max_length=900, 
            truncation=True
        )
        
        # Generate
        response = ""
        with torch.no_grad():
            outputs = model.generate(
                inputs,
                max_new_tokens=int(max_tokens),
                temperature=float(temperature),
                top_p=float(top_p),
                repetition_penalty=float(repetition_penalty),
                do_sample=True,
                pad_token_id=tokenizer.eos_token_id,
                eos_token_id=tokenizer.eos_token_id
            )
        
        # Decode
        full_text = tokenizer.decode(outputs[0], skip_special_tokens=False)
        
        # Extract assistant response
        if "<|im_start|>assistant\n" in full_text:
            response = full_text.split("<|im_start|>assistant\n")[-1]
            response = response.replace("<|im_end|>", "").strip()
        else:
            response = "Could not generate response."
        
        # Clean response
        response = response.replace("<|im_start|>", "").replace("<|im_end|>", "")
        response = response.strip()
        
        if not response:
            response = "No response generated."
            
        yield response
        
    except Exception as e:
        yield f"Generation error: {str(e)}"

# Create simple ChatInterface
demo = gr.ChatInterface(
    fn=respond,
    title="🎭 TinyRP Character Chat",
    description="Chat with AI characters using local CPU inference! Select a character and start chatting.",
    additional_inputs=[
        gr.Dropdown(
            choices=["None"] + list(CHARACTERS.keys()), 
            value="Knight",
            label="Character"
        ),
        gr.Slider(
            minimum=16, 
            maximum=256, 
            value=48, 
            step=16,
            label="Max tokens"
        ),
        gr.Slider(
            minimum=0.1, 
            maximum=2.0, 
            value=0.9, 
            step=0.1,
            label="Temperature"
        ),
        gr.Slider(
            minimum=0.1, 
            maximum=1.0, 
            value=0.85, 
            step=0.05,
            label="Top-p"
        ),
        gr.Slider(
            minimum=1.0, 
            maximum=1.5, 
            value=1.1, 
            step=0.05,
            label="Repetition penalty"
        )
    ],
    examples=[
        ["Hello! What's your name?"],
        ["Tell me about your adventures."],
        ["What's your favorite thing to do?"],
        ["Can you help me with something?"]
    ],
    cache_examples=False
)

if __name__ == "__main__":
    demo.launch()