Spaces:

DarwinAnim8or
/

TinyRP-Demo

Sleeping

App Files Files Community

DarwinAnim8or commited on 20 days ago

Commit

4aa4e53

verified ·

1 Parent(s): 91315d8

Update app.py

Browse files

Files changed (1) hide show

app.py +98 -123

app.py CHANGED Viewed

@@ -5,82 +5,81 @@ import torch
 # Model configuration
 MODEL_NAME = "DarwinAnim8or/TinyRP"
-# Global variables for model
-tokenizer = None
-model = None
-def load_model():
-    """Load model and tokenizer"""
-    global tokenizer, model
-    try:
-        print("Loading model for CPU inference...")
-        tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-        model = AutoModelForCausalLM.from_pretrained(
-            MODEL_NAME,
-            torch_dtype=torch.float32,
-            device_map="cpu",
-            trust_remote_code=True
-        )
-        print(f"✅ Model loaded successfully: {MODEL_NAME}")
-        return True
-    except Exception as e:
-        print(f"❌ Error loading model: {e}")
-        return False
-# Sample character presets
 CHARACTERS = {
-    "Custom Character": "",
-    "Adventurous Knight": "You are Sir Gareth, a brave and noble knight on a quest to save the kingdom. You speak with honor and courage, always ready to help those in need.",
-    "Mysterious Wizard": "You are Eldara, an ancient and wise wizard who speaks in riddles and knows secrets of the mystical arts. You are helpful but often cryptic.",
-    "Friendly Tavern Keeper": "You are Bram, a cheerful tavern keeper who loves telling stories and meeting new travelers. Your tavern is a warm, welcoming place.",
-    "Curious Scientist": "You are Dr. Maya Chen, a brilliant scientist fascinated by discovery. You explain complex concepts simply and love new experiments.",
-    "Space Explorer": "You are Captain Nova, a fearless space explorer who has traveled to distant galaxies. You're brave, curious, and ready for adventure."
 }
-def chat_respond(message, history, character_desc, max_tokens, temperature, top_p, rep_penalty):
-    """Main chat response function"""
     if not message.strip():
-        return history
     if model is None:
-        response = "❌ Model not loaded. Please check the model path."
-        history.append([message, response])
-        return history
     try:
         # Build ChatML conversation
         conversation = ""
         # Add character as system message
-        if character_desc.strip():
-            conversation += f"<|im_start|>system\n{character_desc}<|im_end|>\n"
-        # Add history
-        for user_msg, bot_msg in history:
             conversation += f"<|im_start|>user\n{user_msg}<|im_end|>\n"
-            conversation += f"<|im_start|>assistant\n{bot_msg}<|im_end|>\n"
         # Add current message
         conversation += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
         # Tokenize
-        inputs = tokenizer.encode(conversation, return_tensors="pt", max_length=900, truncation=True)
         # Generate
         with torch.no_grad():
             outputs = model.generate(
                 inputs,
-                max_new_tokens=max_tokens,
-                temperature=temperature,
-                top_p=top_p,
-                repetition_penalty=rep_penalty,
                 do_sample=True,
                 pad_token_id=tokenizer.eos_token_id,
                 eos_token_id=tokenizer.eos_token_id
             )
-        # Decode response
         full_text = tokenizer.decode(outputs[0], skip_special_tokens=False)
         # Extract assistant response
@@ -88,92 +87,68 @@ def chat_respond(message, history, character_desc, max_tokens, temperature, top_
             response = full_text.split("<|im_start|>assistant\n")[-1]
             response = response.replace("<|im_end|>", "").strip()
         else:
-            response = "Sorry, couldn't generate a response."
-        # Clean up response
         response = response.replace("<|im_start|>", "").replace("<|im_end|>", "")
         response = response.strip()
         if not response:
             response = "No response generated."
     except Exception as e:
-        response = f"Error: {str(e)}"
-    # Add to history
-    history.append([message, response])
-    return history
-def load_character(character_name):
-    """Load character preset"""
-    return CHARACTERS.get(character_name, "")
-def clear_chat():
-    """Clear chat history"""
-    return []
-# Load model on startup
-model_loaded = load_model()
-# Create interface
-with gr.Blocks(title="TinyRP Chat") as demo:
-    gr.Markdown("# 🎭 TinyRP Character Chat")
-    gr.Markdown("Chat with AI characters using local CPU inference!")
-    with gr.Row():
-        with gr.Column(scale=3):
-            chatbot = gr.Chatbot(height=500, label="Conversation")
-            msg_box = gr.Textbox(label="Message", placeholder="Type here...")
-        with gr.Column(scale=1):
-            gr.Markdown("### Character")
-            char_dropdown = gr.Dropdown(
-                choices=list(CHARACTERS.keys()),
-                value="Custom Character",
-                label="Preset"
-            )
-            char_text = gr.Textbox(
-                label="Description",
-                lines=4,
-                placeholder="Character description..."
-            )
-            load_btn = gr.Button("Load Character")
-            gr.Markdown("### Settings")
-            max_tokens = gr.Slider(16, 256, 80, label="Max tokens")
-            temperature = gr.Slider(0.1, 2.0, 0.9, label="Temperature")
-            top_p = gr.Slider(0.1, 1.0, 0.85, label="Top-p")
-            rep_penalty = gr.Slider(1.0, 1.5, 1.1, label="Rep penalty")
-            clear_btn = gr.Button("Clear Chat")
-    # Character samples
-    gr.Markdown("### Sample Characters")
-    with gr.Row():
-        for name in ["Adventurous Knight", "Mysterious Wizard", "Space Explorer"]:
-            gr.Markdown(f"**{name}**: {CHARACTERS[name][:80]}...")
-    # Event handlers - simplified
-    msg_box.submit(
-        fn=chat_respond,
-        inputs=[msg_box, chatbot, char_text, max_tokens, temperature, top_p, rep_penalty],
-        outputs=[chatbot]
-    ).then(
-        fn=lambda: "",
-        outputs=[msg_box]
-    )
-    load_btn.click(
-        fn=load_character,
-        inputs=[char_dropdown],
-        outputs=[char_text]
-    )
-    clear_btn.click(
-        fn=clear_chat,
-        outputs=[chatbot]
-    )
 if __name__ == "__main__":
     demo.launch()

 # Model configuration
 MODEL_NAME = "DarwinAnim8or/TinyRP"
+# Load model
+print("Loading model...")
+try:
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+    model = AutoModelForCausalLM.from_pretrained(
+        MODEL_NAME,
+        torch_dtype=torch.float32,
+        device_map="cpu",
+        trust_remote_code=True
+    )
+    print("✅ Model loaded successfully")
+except Exception as e:
+    print(f"❌ Model loading failed: {e}")
+    tokenizer = None
+    model = None
+# Character presets
 CHARACTERS = {
+    "Knight": "You are Sir Gareth, a brave knight on a quest to save the kingdom. You speak with honor and courage.",
+    "Wizard": "You are Eldara, an ancient wizard who speaks in riddles and knows mystical secrets.",
+    "Tavern Keeper": "You are Bram, a cheerful tavern keeper who loves stories and meeting travelers.",
+    "Scientist": "You are Dr. Maya Chen, a brilliant scientist who loves discovery and explaining concepts simply.",
+    "Space Explorer": "You are Captain Nova, a fearless space explorer who has traveled distant galaxies."
 }
+def respond(message, history, character, max_tokens, temperature, top_p, repetition_penalty):
+    """Generate response using ChatML format"""
     if not message.strip():
+        yield "Please enter a message."
+        return
     if model is None:
+        yield "❌ Model not loaded properly."
+        return
     try:
         # Build ChatML conversation
         conversation = ""
         # Add character as system message
+        if character != "None" and character in CHARACTERS:
+            conversation += f"<|im_start|>system\n{CHARACTERS[character]}<|im_end|>\n"
+        # Add conversation history
+        for user_msg, assistant_msg in history:
             conversation += f"<|im_start|>user\n{user_msg}<|im_end|>\n"
+            conversation += f"<|im_start|>assistant\n{assistant_msg}<|im_end|>\n"
         # Add current message
         conversation += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
         # Tokenize
+        inputs = tokenizer.encode(
+            conversation,
+            return_tensors="pt",
+            max_length=900,
+            truncation=True
+        )
         # Generate
+        response = ""
         with torch.no_grad():
             outputs = model.generate(
                 inputs,
+                max_new_tokens=int(max_tokens),
+                temperature=float(temperature),
+                top_p=float(top_p),
+                repetition_penalty=float(repetition_penalty),
                 do_sample=True,
                 pad_token_id=tokenizer.eos_token_id,
                 eos_token_id=tokenizer.eos_token_id
             )
+        # Decode
         full_text = tokenizer.decode(outputs[0], skip_special_tokens=False)
         # Extract assistant response
             response = full_text.split("<|im_start|>assistant\n")[-1]
             response = response.replace("<|im_end|>", "").strip()
         else:
+            response = "Could not generate response."
+        # Clean response
         response = response.replace("<|im_start|>", "").replace("<|im_end|>", "")
         response = response.strip()
         if not response:
             response = "No response generated."
+        yield response
     except Exception as e:
+        yield f"Generation error: {str(e)}"
+# Create simple ChatInterface
+demo = gr.ChatInterface(
+    fn=respond,
+    title="🎭 TinyRP Character Chat",
+    description="Chat with AI characters using local CPU inference! Select a character and start chatting.",
+    additional_inputs=[
+        gr.Dropdown(
+            choices=["None"] + list(CHARACTERS.keys()),
+            value="Knight",
+            label="Character"
+        ),
+        gr.Slider(
+            minimum=16,
+            maximum=256,
+            value=80,
+            step=16,
+            label="Max tokens"
+        ),
+        gr.Slider(
+            minimum=0.1,
+            maximum=2.0,
+            value=0.9,
+            step=0.1,
+            label="Temperature"
+        ),
+        gr.Slider(
+            minimum=0.1,
+            maximum=1.0,
+            value=0.85,
+            step=0.05,
+            label="Top-p"
+        ),
+        gr.Slider(
+            minimum=1.0,
+            maximum=1.5,
+            value=1.1,
+            step=0.05,
+            label="Repetition penalty"
+        )
+    ],
+    examples=[
+        ["Hello! What's your name?"],
+        ["Tell me about your adventures."],
+        ["What's your favorite thing to do?"],
+        ["Can you help me with something?"]
+    ],
+    cache_examples=False
+)
 if __name__ == "__main__":
     demo.launch()