Spaces:
Sleeping
Sleeping
File size: 4,866 Bytes
9f9aaaf b88f866 38b1b0b 9f9aaaf 91315d8 b88f866 4aa4e53 b88f866 4aa4e53 91315d8 4aa4e53 38b1b0b 9f9aaaf 4aa4e53 b88f866 4aa4e53 91315d8 4aa4e53 b88f866 91315d8 de33e80 91315d8 4aa4e53 91315d8 4aa4e53 91315d8 4aa4e53 de33e80 91315d8 b88f866 91315d8 4aa4e53 91315d8 4aa4e53 de33e80 b88f866 4aa4e53 b88f866 91315d8 b88f866 de33e80 4aa4e53 91315d8 de33e80 91315d8 4aa4e53 de33e80 4aa4e53 91315d8 de33e80 91315d8 4aa4e53 b88f866 4aa4e53 b88f866 4aa4e53 3453e14 4aa4e53 b88f866 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 |
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
# Model configuration
MODEL_NAME = "DarwinAnim8or/TinyRP"
# Load model
print("Loading model...")
try:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
torch_dtype=torch.float32,
device_map="cpu",
trust_remote_code=True
)
print("β
Model loaded successfully")
except Exception as e:
print(f"β Model loading failed: {e}")
tokenizer = None
model = None
# Character presets
CHARACTERS = {
"Knight": "You are Sir Gareth, a brave knight on a quest to save the kingdom. You speak with honor and courage.",
"Wizard": "You are Eldara, an ancient wizard who speaks in riddles and knows mystical secrets.",
"Tavern Keeper": "You are Bram, a cheerful tavern keeper who loves stories and meeting travelers.",
"Scientist": "You are Dr. Maya Chen, a brilliant scientist who loves discovery and explaining concepts simply.",
"Space Explorer": "You are Captain Nova, a fearless space explorer who has traveled distant galaxies."
}
def respond(message, history, character, max_tokens, temperature, top_p, repetition_penalty):
"""Generate response using ChatML format"""
if not message.strip():
yield "Please enter a message."
return
if model is None:
yield "β Model not loaded properly."
return
try:
# Build ChatML conversation
conversation = ""
# Add character as system message
if character != "None" and character in CHARACTERS:
conversation += f"<|im_start|>system\n{CHARACTERS[character]}<|im_end|>\n"
# Add conversation history
for user_msg, assistant_msg in history:
conversation += f"<|im_start|>user\n{user_msg}<|im_end|>\n"
conversation += f"<|im_start|>assistant\n{assistant_msg}<|im_end|>\n"
# Add current message
conversation += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
# Tokenize
inputs = tokenizer.encode(
conversation,
return_tensors="pt",
max_length=900,
truncation=True
)
# Generate
response = ""
with torch.no_grad():
outputs = model.generate(
inputs,
max_new_tokens=int(max_tokens),
temperature=float(temperature),
top_p=float(top_p),
repetition_penalty=float(repetition_penalty),
do_sample=True,
pad_token_id=tokenizer.eos_token_id,
eos_token_id=tokenizer.eos_token_id
)
# Decode
full_text = tokenizer.decode(outputs[0], skip_special_tokens=False)
# Extract assistant response
if "<|im_start|>assistant\n" in full_text:
response = full_text.split("<|im_start|>assistant\n")[-1]
response = response.replace("<|im_end|>", "").strip()
else:
response = "Could not generate response."
# Clean response
response = response.replace("<|im_start|>", "").replace("<|im_end|>", "")
response = response.strip()
if not response:
response = "No response generated."
yield response
except Exception as e:
yield f"Generation error: {str(e)}"
# Create simple ChatInterface
demo = gr.ChatInterface(
fn=respond,
title="π TinyRP Character Chat",
description="Chat with AI characters using local CPU inference! Select a character and start chatting.",
additional_inputs=[
gr.Dropdown(
choices=["None"] + list(CHARACTERS.keys()),
value="Knight",
label="Character"
),
gr.Slider(
minimum=16,
maximum=256,
value=48,
step=16,
label="Max tokens"
),
gr.Slider(
minimum=0.1,
maximum=2.0,
value=0.9,
step=0.1,
label="Temperature"
),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.85,
step=0.05,
label="Top-p"
),
gr.Slider(
minimum=1.0,
maximum=1.5,
value=1.1,
step=0.05,
label="Repetition penalty"
)
],
examples=[
["Hello! What's your name?"],
["Tell me about your adventures."],
["What's your favorite thing to do?"],
["Can you help me with something?"]
],
cache_examples=False
)
if __name__ == "__main__":
demo.launch() |