Spaces:
Runtime error
Runtime error
File size: 2,750 Bytes
313b838 40577a2 d58a9a4 313b838 ef91991 d58a9a4 40577a2 d58a9a4 40577a2 ef91991 40577a2 ef91991 d58a9a4 be6d20e 313b838 be6d20e d58a9a4 be6d20e d58a9a4 be6d20e d58a9a4 40577a2 be6d20e d58a9a4 be6d20e d58a9a4 313b838 5b94e9b 313b838 be6d20e d58a9a4 be6d20e 313b838 be6d20e 313b838 d58a9a4 313b838 be6d20e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch
# Load model and tokenizer with CPU-compatible settings
model_name = "davnas/Italian_Cousine_2.1"
tokenizer = AutoTokenizer.from_pretrained(model_name)
# Configure quantization properly
quantization_config = BitsAndBytesConfig(
load_in_4bit=False,
load_in_8bit=False,
bnb_4bit_quant_type=None
)
model = AutoModelForCausalLM.from_pretrained(
model_name,
device_map="cpu", # Explicitly set to CPU
torch_dtype=torch.float32,
quantization_config=quantization_config,
use_safetensors=True,
low_cpu_mem_usage=True,
)
def respond(message, history, system_message, max_tokens, temperature, top_p):
# Format the conversation
messages = [{"role": "system", "content": system_message}]
# Add history
for user_msg, assistant_msg in history:
messages.append({"role": "user", "content": user_msg})
messages.append({"role": "assistant", "content": assistant_msg})
# Add current message
messages.append({"role": "user", "content": message})
# Create the prompt using the tokenizer's chat template
input_ids = tokenizer.apply_chat_template(
messages,
tokenize=True,
add_generation_prompt=True,
return_tensors="pt"
)
# Generate response
with torch.no_grad():
output_ids = model.generate(
input_ids,
max_new_tokens=max_tokens,
do_sample=True,
temperature=temperature,
top_p=top_p,
pad_token_id=tokenizer.pad_token_id,
)
# Decode and return the response
response = tokenizer.decode(output_ids[0][len(input_ids[0]):], skip_special_tokens=True)
return response
# Create the interface
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(
value="You are a professional chef assistant who provides accurate and detailed recipes.",
label="System message"
),
gr.Slider(
minimum=1,
maximum=2048,
value=512,
step=1,
label="Max new tokens"
),
gr.Slider(
minimum=0.1,
maximum=4.0,
value=0.7,
step=0.1,
label="Temperature"
),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)"
),
],
title="Italian Cuisine Chatbot",
description="Ask me anything about Italian cuisine or cooking!"
)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860) |