Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
import torch | |
# Model configuration | |
MODEL_NAME = "DarwinAnim8or/TinyRP" | |
# Load model | |
print("Loading model...") | |
try: | |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
model = AutoModelForCausalLM.from_pretrained( | |
MODEL_NAME, | |
torch_dtype=torch.float32, | |
device_map="cpu", | |
trust_remote_code=True | |
) | |
print("β Model loaded successfully") | |
except Exception as e: | |
print(f"β Model loading failed: {e}") | |
tokenizer = None | |
model = None | |
# Character presets | |
CHARACTERS = { | |
"Knight": "You are Sir Gareth, a brave knight on a quest to save the kingdom. You speak with honor and courage.", | |
"Wizard": "You are Eldara, an ancient wizard who speaks in riddles and knows mystical secrets.", | |
"Tavern Keeper": "You are Bram, a cheerful tavern keeper who loves stories and meeting travelers.", | |
"Scientist": "You are Dr. Maya Chen, a brilliant scientist who loves discovery and explaining concepts simply.", | |
"Space Explorer": "You are Captain Nova, a fearless space explorer who has traveled distant galaxies." | |
} | |
def respond(message, history, character, max_tokens, temperature, top_p, repetition_penalty): | |
"""Generate response using ChatML format""" | |
if not message.strip(): | |
yield "Please enter a message." | |
return | |
if model is None: | |
yield "β Model not loaded properly." | |
return | |
try: | |
# Build ChatML conversation | |
conversation = "" | |
# Add character as system message | |
if character != "None" and character in CHARACTERS: | |
conversation += f"<|im_start|>system\n{CHARACTERS[character]}<|im_end|>\n" | |
# Add conversation history | |
for user_msg, assistant_msg in history: | |
conversation += f"<|im_start|>user\n{user_msg}<|im_end|>\n" | |
conversation += f"<|im_start|>assistant\n{assistant_msg}<|im_end|>\n" | |
# Add current message | |
conversation += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n" | |
# Tokenize | |
inputs = tokenizer.encode( | |
conversation, | |
return_tensors="pt", | |
max_length=900, | |
truncation=True | |
) | |
# Generate | |
response = "" | |
with torch.no_grad(): | |
outputs = model.generate( | |
inputs, | |
max_new_tokens=int(max_tokens), | |
temperature=float(temperature), | |
top_p=float(top_p), | |
repetition_penalty=float(repetition_penalty), | |
do_sample=True, | |
pad_token_id=tokenizer.eos_token_id, | |
eos_token_id=tokenizer.eos_token_id | |
) | |
# Decode | |
full_text = tokenizer.decode(outputs[0], skip_special_tokens=False) | |
# Extract assistant response | |
if "<|im_start|>assistant\n" in full_text: | |
response = full_text.split("<|im_start|>assistant\n")[-1] | |
response = response.replace("<|im_end|>", "").strip() | |
else: | |
response = "Could not generate response." | |
# Clean response | |
response = response.replace("<|im_start|>", "").replace("<|im_end|>", "") | |
response = response.strip() | |
if not response: | |
response = "No response generated." | |
yield response | |
except Exception as e: | |
yield f"Generation error: {str(e)}" | |
# Create simple ChatInterface | |
demo = gr.ChatInterface( | |
fn=respond, | |
title="π TinyRP Character Chat", | |
description="Chat with AI characters using local CPU inference! Select a character and start chatting.", | |
additional_inputs=[ | |
gr.Dropdown( | |
choices=["None"] + list(CHARACTERS.keys()), | |
value="Knight", | |
label="Character" | |
), | |
gr.Slider( | |
minimum=16, | |
maximum=256, | |
value=48, | |
step=16, | |
label="Max tokens" | |
), | |
gr.Slider( | |
minimum=0.1, | |
maximum=2.0, | |
value=0.9, | |
step=0.1, | |
label="Temperature" | |
), | |
gr.Slider( | |
minimum=0.1, | |
maximum=1.0, | |
value=0.85, | |
step=0.05, | |
label="Top-p" | |
), | |
gr.Slider( | |
minimum=1.0, | |
maximum=1.5, | |
value=1.1, | |
step=0.05, | |
label="Repetition penalty" | |
) | |
], | |
examples=[ | |
["Hello! What's your name?"], | |
["Tell me about your adventures."], | |
["What's your favorite thing to do?"], | |
["Can you help me with something?"] | |
], | |
cache_examples=False | |
) | |
if __name__ == "__main__": | |
demo.launch() |