Spaces:

DarwinAnim8or
/

TinyRP-Demo

Sleeping

App Files Files Community

DarwinAnim8or commited on 20 days ago

Commit

b88f866

verified ·

1 Parent(s): 7d0d9e5

Update app.py

Browse files

Files changed (1) hide show

app.py +293 -340

app.py CHANGED Viewed

@@ -1,389 +1,342 @@
-#!/usr/bin/env python3
-"""
-Gradio Demo App for TinyRP Mistral Model
-Supports ChatML formatting, character creation, and custom generation parameters
-"""
 import gradio as gr
 import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
-import json
-import random
-from typing import Dict, List, Tuple
-import re
-# Sample characters for demo
 SAMPLE_CHARACTERS = {
-    "Luna the Mage": {
-        "description": "A mysterious elven mage with silver hair and glowing blue eyes. She specializes in ice magic and ancient knowledge.",
-        "personality": "Wise, mysterious, slightly aloof but caring. Speaks in an eloquent manner.",
-        "background": "Born in the Frostwood Academy, Luna has spent centuries studying arcane arts."
-    },
-    "Rex the Warrior": {
-        "description": "A brave human knight with a strong sense of justice. Wears gleaming armor and carries an enchanted sword.",
-        "personality": "Noble, brave, protective of others. Direct in speech but kind-hearted.",
-        "background": "Grew up as a farm boy, became a knight after saving his village from bandits."
-    },
-    "Zara the Rogue": {
-        "description": "A cunning halfling thief with quick wit and nimble fingers. Has curly red hair and green eyes.",
-        "personality": "Sarcastic, clever, independent. Uses humor to deflect serious situations.",
-        "background": "Former street orphan who learned to survive through wit and stealth."
-    },
-    "Dr. Elena Cross": {
-        "description": "A brilliant scientist working on advanced AI research in a near-future setting.",
-        "personality": "Analytical, passionate about her work, caring but sometimes absent-minded.",
-        "background": "PhD in Computer Science, leads a small research team at a tech corporation."
-    }
 }
-class TinyRPModel:
-    def __init__(self, model_name: str):
-        """Initialize the model and tokenizer"""
-        print(f"Loading model: {model_name}")
-        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
-        self.model = AutoModelForCausalLM.from_pretrained(
-            model_name,
-            torch_dtype=torch.float32,  # Use float32 for CPU
-            device_map="cpu",
-            low_cpu_mem_usage=True
-        )
-        self.model.eval()
-        # Ensure pad token is set
-        if self.tokenizer.pad_token is None:
-            self.tokenizer.pad_token = self.tokenizer.eos_token
-        print("Model loaded successfully!")
-    def format_chatml(self, character_info: str, conversation_history: List[Tuple[str, str]], user_input: str) -> str:
-        """Format conversation using ChatML format"""
-        formatted = ""
-        # System message with character info
-        if character_info.strip():
-            formatted += f"<|im_start|>system\n{character_info.strip()}<|im_end|>\n"
-        # Add conversation history
-        for user_msg, assistant_msg in conversation_history:
-            if user_msg:
-                formatted += f"<|im_start|>user\n{user_msg}<|im_end|>\n"
-            if assistant_msg:
-                formatted += f"<|im_start|>assistant\n{assistant_msg}<|im_end|>\n"
-        # Add current user input
-        if user_input.strip():
-            formatted += f"<|im_start|>user\n{user_input.strip()}<|im_end|>\n"
-        # Start assistant response
-        formatted += "<|im_start|>assistant\n"
-        return formatted
-    def generate_response(self,
-                         character_info: str,
-                         conversation_history: List[Tuple[str, str]],
-                         user_input: str,
-                         max_length: int = 150,
-                         temperature: float = 0.8,
-                         top_p: float = 0.9,
-                         top_k: int = 50,
-                         repetition_penalty: float = 1.1) -> str:
-        """Generate character response"""
-        # Format input using ChatML
-        formatted_input = self.format_chatml(character_info, conversation_history, user_input)
-        # Tokenize
-        inputs = self.tokenizer.encode(formatted_input, return_tensors="pt", truncation=True, max_length=1024)
-        # Generation config
-        gen_config = GenerationConfig(
-            max_new_tokens=max_length,
-            temperature=temperature,
-            top_p=top_p,
-            top_k=top_k,
-            repetition_penalty=repetition_penalty,
-            do_sample=True,
-            pad_token_id=self.tokenizer.pad_token_id,
-            eos_token_id=self.tokenizer.eos_token_id,
         )
-        # Generate
         with torch.no_grad():
-            outputs = self.model.generate(inputs, generation_config=gen_config)
-        # Decode and extract response
-        generated_text = self.tokenizer.decode(outputs[0], skip_special_tokens=False)
-        # Extract assistant response (between <|im_start|>assistant and <|im_end|>)
-        assistant_start = generated_text.rfind("<|im_start|>assistant\n") + len("<|im_start|>assistant\n")
-        assistant_text = generated_text[assistant_start:]
-        # Remove end tokens
-        if "<|im_end|>" in assistant_text:
-            assistant_text = assistant_text.split("<|im_end|>")[0]
-        return assistant_text.strip()
-def load_sample_character(character_name: str) -> Tuple[str, str, str]:
-    """Load a sample character's information"""
-    if character_name in SAMPLE_CHARACTERS:
-        char = SAMPLE_CHARACTERS[character_name]
-        system_prompt = f"You are {character_name}. {char['description']} {char['background']} Personality: {char['personality']}"
-        return system_prompt, char['description'], char['background']
-    return "", "", ""
-def create_gradio_interface(model_name: str = "DarwinAnim8or/TinyRP"):
-    """Create the Gradio interface"""
-    # Initialize model
-    rp_model = TinyRPModel(model_name)
-    # Custom CSS for better styling
-    custom_css = """
-    .gradio-container {
-        font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif !important;
-    }
-    .character-card {
-        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
-        padding: 20px;
-        border-radius: 15px;
-        color: white;
-        margin: 10px 0;
-    }
-    .chat-bubble-user {
-        background: #e3f2fd;
-        padding: 10px 15px;
-        border-radius: 18px;
-        margin: 5px 0;
-        border-left: 4px solid #2196f3;
-    }
-    .chat-bubble-assistant {
-        background: #f3e5f5;
-        padding: 10px 15px;
-        border-radius: 18px;
-        margin: 5px 0;
-        border-left: 4px solid #9c27b0;
-    }
-    .parameter-box {
-        background: #f8f9fa;
-        padding: 15px;
-        border-radius: 10px;
-        border: 1px solid #dee2e6;
-    }
-    """
-    with gr.Blocks(css=custom_css, theme=gr.themes.Soft(), title="TinyRP Mistral Demo") as interface:
-        gr.HTML("""
-        <div style="text-align: center; padding: 20px;">
-            <h1 style="background: linear-gradient(45deg, #667eea, #764ba2); -webkit-background-clip: text; -webkit-text-fill-color: transparent; font-size: 3em; margin: 0;">
-                🎭 TinyRP Mistral Demo
-            </h1>
-            <p style="font-size: 1.2em; color: #666; margin-top: 10px;">
-                Interactive roleplay with your custom-trained language model
-            </p>
-        </div>
-        """)
-        with gr.Row():
-            with gr.Column(scale=1):
-                gr.HTML('<div class="character-card"><h3>🎪 Character Setup</h3></div>')
-                # Sample character dropdown
-                sample_char_dropdown = gr.Dropdown(
-                    choices=[""] + list(SAMPLE_CHARACTERS.keys()),
-                    label="📚 Load Sample Character",
-                    value="",
                     interactive=True
                 )
-                # Character information inputs
-                character_name = gr.Textbox(
-                    label="👤 Character Name",
-                    placeholder="Enter character name...",
-                    lines=1
                 )
-                character_desc = gr.Textbox(
-                    label="🎨 Character Description",
-                    placeholder="Describe your character's appearance, role, etc...",
-                    lines=3
-                )
-                character_background = gr.Textbox(
-                    label="📖 Background & Personality",
-                    placeholder="Character's history, personality traits, speaking style...",
-                    lines=4
-                )
-                # Generation parameters
-                gr.HTML('<div class="parameter-box"><h4>⚙️ Generation Settings</h4></div>')
-                max_length = gr.Slider(
-                    minimum=50, maximum=300, value=150, step=10,
-                    label="📏 Max Response Length"
                 )
                 temperature = gr.Slider(
-                    minimum=0.1, maximum=2.0, value=0.8, step=0.1,
-                    label="🌡️ Temperature (creativity)"
                 )
                 top_p = gr.Slider(
-                    minimum=0.1, maximum=1.0, value=0.9, step=0.05,
-                    label="🎯 Top-p (focus)"
-                )
-                top_k = gr.Slider(
-                    minimum=1, maximum=100, value=50, step=5,
-                    label="🔝 Top-k"
                 )
                 repetition_penalty = gr.Slider(
-                    minimum=1.0, maximum=1.5, value=1.1, step=0.05,
-                    label="🔄 Repetition Penalty"
                 )
-                clear_btn = gr.Button("🗑️ Clear Conversation", variant="secondary")
-            with gr.Column(scale=2):
-                gr.HTML('<div class="character-card"><h3>💬 Roleplay Chat</h3></div>')
-                # Chat interface
-                chatbot = gr.Chatbot(
-                    label="Conversation",
-                    height=400,
-                    show_label=False,
-                    container=True,
-                    bubble_full_width=False
-                )
-                with gr.Row():
-                    msg = gr.Textbox(
-                        label="Your message",
-                        placeholder="Type your message here...",
-                        lines=2,
-                        scale=4,
-                        show_label=False
-                    )
-                    send_btn = gr.Button("Send 📤", variant="primary", scale=1)
-                # System prompt preview
-                with gr.Accordion("🔍 System Prompt Preview", open=False):
-                    system_prompt_preview = gr.Textbox(
-                        label="Generated System Prompt",
-                        lines=3,
-                        interactive=False
-                    )
-        # Event handlers
-        def update_character_info(selected_char):
-            if selected_char:
-                return load_sample_character(selected_char)
-            return "", "", ""
-        def update_system_prompt(name, desc, bg):
-            if name.strip():
-                prompt = f"You are {name.strip()}."
-                if desc.strip():
-                    prompt += f" {desc.strip()}"
-                if bg.strip():
-                    prompt += f" {bg.strip()}"
-                return prompt
-            return ""
-        def respond(message, history, char_name, char_desc, char_bg, max_len, temp, top_p_val, top_k_val, rep_pen):
-            if not message.strip():
-                return history, ""
-            # Create system prompt
-            system_prompt = update_system_prompt(char_name, char_desc, char_bg)
-            # Generate response
-            try:
-                response = rp_model.generate_response(
-                    character_info=system_prompt,
-                    conversation_history=history,
-                    user_input=message,
-                    max_length=max_len,
-                    temperature=temp,
-                    top_p=top_p_val,
-                    top_k=top_k_val,
-                    repetition_penalty=rep_pen
-                )
-                # Update history
-                history.append((message, response))
-                return history, ""
-            except Exception as e:
-                error_msg = f"Error generating response: {str(e)}"
-                history.append((message, error_msg))
-                return history, ""
-        def clear_conversation():
-            return [], ""
-        # Wire up events
-        sample_char_dropdown.change(
-            update_character_info,
-            inputs=[sample_char_dropdown],
-            outputs=[character_name, character_desc, character_background]
-        )
-        for input_component in [character_name, character_desc, character_background]:
-            input_component.change(
-                update_system_prompt,
-                inputs=[character_name, character_desc, character_background],
-                outputs=[system_prompt_preview]
-            )
-        msg.submit(
-            respond,
-            inputs=[msg, chatbot, character_name, character_desc, character_background,
-                   max_length, temperature, top_p, top_k, repetition_penalty],
-            outputs=[chatbot, msg]
-        )
-        send_btn.click(
-            respond,
-            inputs=[msg, chatbot, character_name, character_desc, character_background,
-                   max_length, temperature, top_p, top_k, repetition_penalty],
-            outputs=[chatbot, msg]
-        )
-        clear_btn.click(
-            clear_conversation,
-            outputs=[chatbot, msg]
-        )
-        # Load initial character
-        interface.load(
-            lambda: load_sample_character("Luna the Mage"),
-            outputs=[character_name, character_desc, character_background]
-        )
-    return interface
-if __name__ == "__main__":
-    import argparse
-    parser = argparse.ArgumentParser(description="TinyRP Mistral Gradio Demo")
-    parser.add_argument("--model", type=str, default="DarwinAnim8or/TinyRP",
-                       help="Hugging Face model name or local path")
-    parser.add_argument("--share", action="store_true", help="Create public share link")
-    parser.add_argument("--port", type=int, default=7860, help="Port to run on")
-    args = parser.parse_args()
-    # Create and launch interface
-    demo = create_gradio_interface(args.model)
-    demo.launch(
-        share=args.share,
-        server_port=args.port,
-        server_name="0.0.0.0" if args.share else "127.0.0.1"
-    )

 import gradio as gr
+from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
+# Model configuration - change this to your model path
+MODEL_NAME = "DarwinAnim8or/TinyRP"
+# Initialize model and tokenizer for CPU inference
+print("Loading model for CPU inference...")
+try:
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+    model = AutoModelForCausalLM.from_pretrained(
+        MODEL_NAME,
+        torch_dtype=torch.float32,  # Use float32 for CPU
+        device_map="cpu",
+        trust_remote_code=True
+    )
+    print(f"✅ Model loaded successfully on CPU: {MODEL_NAME}")
+except Exception as e:
+    print(f"❌ Error loading model: {e}")
+    tokenizer = None
+    model = None
+# Sample character presets
 SAMPLE_CHARACTERS = {
+    "Custom Character": "",
+    "Adventurous Knight": "You are Sir Gareth, a brave and noble knight on a quest to save the kingdom. You speak with honor and courage, always ready to help those in need. You carry an enchanted sword and have a loyal horse named Thunder.",
+    "Mysterious Wizard": "You are Eldara, an ancient and wise wizard who speaks in riddles and knows secrets of the mystical arts. You live in a tower filled with magical books and potions. You are helpful but often cryptic in your responses.",
+    "Friendly Tavern Keeper": "You are Bram, a cheerful tavern keeper who loves telling stories and meeting new travelers. Your tavern 'The Dancing Dragon' is a warm, welcoming place. You know all the local gossip and always have a tale to share.",
+    "Curious Scientist": "You are Dr. Maya Chen, a brilliant scientist who is fascinated by discovery and invention. You're enthusiastic about explaining complex concepts in simple ways and always looking for new experiments to try.",
+    "Space Explorer": "You are Captain Nova, a fearless space explorer who has traveled to distant galaxies. You pilot the starship 'Wanderer' and have encountered many alien species. You're brave, curious, and always ready for the next adventure.",
+    "Fantasy Princess": "You are Princess Lyra, kind-hearted royalty who cares deeply about her people. You're intelligent, diplomatic, and skilled in both politics and magic. You often sneak out of the castle to help citizens in need."
 }
+def build_chatml_conversation(message, history, character_description):
+    """Build a conversation in ChatML format"""
+    conversation = ""
+    # Add system message if character is defined
+    if character_description.strip():
+        conversation += f"<|im_start|>system\n{character_description.strip()}<|im_end|>\n"
+    # Add conversation history
+    for user_msg, assistant_msg in history:
+        if user_msg:
+            conversation += f"<|im_start|>user\n{user_msg}<|im_end|>\n"
+        if assistant_msg:
+            conversation += f"<|im_start|>assistant\n{assistant_msg}<|im_end|>\n"
+    # Add current user message
+    conversation += f"<|im_start|>user\n{message}<|im_end|>\n"
+    # Start assistant response
+    conversation += "<|im_start|>assistant\n"
+    return conversation
+def generate_cpu_response(message, history, character_description, max_tokens, temperature, top_p, repetition_penalty):
+    """Generate response using local CPU inference with ChatML format"""
+    if model is None or tokenizer is None:
+        return "❌ Error: Model not loaded properly. Please check the model path."
+    if not message.strip():
+        return "Please enter a message."
+    try:
+        # Build ChatML conversation
+        conversation = build_chatml_conversation(message, history, character_description)
+        # Tokenize the conversation
+        inputs = tokenizer.encode(
+            conversation,
+            return_tensors="pt",
+            truncation=True,
+            max_length=1024 - max_tokens  # Leave room for response
         )
+        print(f"🔄 Generating response... (Input length: {inputs.shape[1]} tokens)")
+        # Generate response on CPU
         with torch.no_grad():
+            outputs = model.generate(
+                inputs,
+                max_new_tokens=int(max_tokens),
+                temperature=float(temperature),
+                top_p=float(top_p),
+                repetition_penalty=float(repetition_penalty),
+                do_sample=True,
+                pad_token_id=tokenizer.pad_token_id if tokenizer.pad_token_id else tokenizer.eos_token_id,
+                eos_token_id=tokenizer.eos_token_id,
+                use_cache=True,
+                num_return_sequences=1
+            )
+        # Decode the full response
+        full_response = tokenizer.decode(outputs[0], skip_special_tokens=False)
+        # Extract just the assistant's response from ChatML format
+        if "<|im_start|>assistant\n" in full_response:
+            # Split on the last assistant tag to get only the new response
+            assistant_parts = full_response.split("<|im_start|>assistant\n")
+            if len(assistant_parts) > 1:
+                response = assistant_parts[-1]
+                # Remove any trailing <|im_end|> or other tokens
+                response = response.replace("<|im_end|>", "").strip()
+                # Clean up any remaining special tokens
+                response = response.replace("<|im_start|>", "").replace("<|im_end|>", "")
+                response = response.replace("<s>", "").replace("</s>", "")
+                response = response.strip()
+                if response:
+                    print(f"✅ Generated {len(response)} characters")
+                    return response
+        # Fallback: try to extract response after the input
+        input_text = tokenizer.decode(inputs[0], skip_special_tokens=False)
+        if len(full_response) > len(input_text):
+            response = full_response[len(input_text):].strip()
+            # Clean special tokens
+            response = response.replace("<|im_start|>", "").replace("<|im_end|>", "")
+            response = response.replace("<s>", "").replace("</s>", "")
+            response = response.strip()
+            if response:
+                return response
+        return "Sorry, I couldn't generate a proper response. Please try again."
+    except Exception as e:
+        print(f"❌ Generation error: {e}")
+        return f"Error generating response: {str(e)}"
+def load_character_preset(character_name):
+    """Load a character preset description"""
+    return SAMPLE_CHARACTERS.get(character_name, "")
+def chat_function(message, history, character_description, max_tokens, temperature, top_p, repetition_penalty):
+    """Main chat function that handles the conversation flow"""
+    if not message.strip():
+        return history, ""
+    # Generate response using CPU inference
+    response = generate_cpu_response(
+        message,
+        history,
+        character_description,
+        max_tokens,
+        temperature,
+        top_p,
+        repetition_penalty
+    )
+    # Add to history
+    history.append([message, response])
+    return history, ""
+# Custom CSS for better styling
+css = """
+.character-card {
+    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+    border-radius: 15px;
+    padding: 20px;
+    margin: 10px 0;
+    color: white;
+}
+.title-text {
+    text-align: center;
+    font-size: 2.5em;
+    font-weight: bold;
+    background: linear-gradient(45deg, #667eea, #764ba2);
+    -webkit-background-clip: text;
+    -webkit-text-fill-color: transparent;
+    margin-bottom: 20px;
+}
+.parameter-box {
+    background: #f8f9fa;
+    border-radius: 10px;
+    padding: 15px;
+    margin: 10px 0;
+}
+.cpu-badge {
+    background: #28a745;
+    color: white;
+    padding: 5px 10px;
+    border-radius: 15px;
+    font-size: 0.8em;
+    margin-left: 10px;
+}
+"""
+# Create the Gradio interface
+with gr.Blocks(css=css, title="TinyRP Chat Demo") as demo:
+    gr.HTML('<div class="title-text">🎭 TinyRP Character Chat <span class="cpu-badge">CPU Inference</span></div>')
+    gr.Markdown("""
+    ### Welcome to TinyRP!
+    This is a demo of a small but capable roleplay model running on CPU. Choose a character preset or create your own!
+    **Tips for better roleplay:**
+    - Be descriptive in your messages
+    - Stay in character
+    - Uses ChatML format for best results
+    - Adjust temperature for creativity vs consistency
+    ⚡ **Running on CPU** - Responses may take 10-30 seconds depending on your hardware.
+    """)
+    with gr.Row():
+        with gr.Column(scale=2):
+            # Chat interface
+            chatbot = gr.Chatbot(
+                label="Chat",
+                height=500,
+                show_label=False,
+                avatar_images=("🧑", "🎭")
+            )
+            with gr.Row():
+                msg = gr.Textbox(
+                    label="Your message",
+                    placeholder="Type your message here...",
+                    lines=2,
+                    scale=4
+                )
+                send_btn = gr.Button("Send", variant="primary", scale=1)
+        with gr.Column(scale=1):
+            # Character selection
+            with gr.Group():
+                gr.Markdown("### 🎭 Character Setup")
+                character_preset = gr.Dropdown(
+                    choices=list(SAMPLE_CHARACTERS.keys()),
+                    value="Custom Character",
+                    label="Character Presets",
                     interactive=True
                 )
+                character_description = gr.Textbox(
+                    label="Character Description",
+                    placeholder="Describe your character's personality, background, and speaking style...",
+                    lines=6,
+                    value=""
                 )
+                load_preset_btn = gr.Button("Load Preset", variant="secondary")
+            # Generation parameters
+            with gr.Group():
+                gr.Markdown("### ⚙️ Generation Settings")
+                gr.Markdown("*Using ChatML format automatically*")
+                max_tokens = gr.Slider(
+                    minimum=16,
+                    maximum=256,
+                    value=100,
+                    step=16,
+                    label="Max Response Length",
+                    info="Longer = more detailed responses (slower on CPU)"
                 )
                 temperature = gr.Slider(
+                    minimum=0.1,
+                    maximum=2.0,
+                    value=0.9,
+                    step=0.1,
+                    label="Temperature",
+                    info="Higher = more creative/random"
                 )
                 top_p = gr.Slider(
+                    minimum=0.1,
+                    maximum=1.0,
+                    value=0.85,
+                    step=0.05,
+                    label="Top-p",
+                    info="Focus on top % of likely words"
                 )
                 repetition_penalty = gr.Slider(
+                    minimum=1.0,
+                    maximum=1.5,
+                    value=1.1,
+                    step=0.05,
+                    label="Repetition Penalty",
+                    info="Reduce repetitive text"
                 )
+            # Control buttons
+            with gr.Group():
+                clear_btn = gr.Button("🗑️ Clear Chat", variant="secondary")
+    # Sample character cards
+    with gr.Row():
+        gr.Markdown("### 🌟 Featured Characters")
+    with gr.Row():
+        for char_name, char_desc in list(SAMPLE_CHARACTERS.items())[1:4]:  # Show first 3 non-custom
+            with gr.Column(scale=1):
+                gr.Markdown(f"""
+                <div class="character-card">
+                    <h4>{char_name}</h4>
+                    <p>{char_desc[:100]}...</p>
+                </div>
+                """)
+    # Event handlers
+    send_btn.click(
+        chat_function,
+        inputs=[msg, chatbot, character_description, max_tokens, temperature, top_p, repetition_penalty],
+        outputs=[chatbot, msg]
+    )
+    msg.submit(
+        chat_function,
+        inputs=[msg, chatbot, character_description, max_tokens, temperature, top_p, repetition_penalty],
+        outputs=[chatbot, msg]
+    )
+    load_preset_btn.click(
+        load_character_preset,
+        inputs=[character_preset],
+        outputs=[character_description]
+    )
+    character_preset.change(
+        load_character_preset,
+        inputs=[character_preset],
+        outputs=[character_description]
+    )
+    clear_btn.click(lambda: ([], ""), outputs=[chatbot, msg])
+if __name__ == "__main__":
+    demo.launch()