Spaces:
Runtime error
Runtime error
File size: 2,750 Bytes
1b43757 b3990cf 8c45748 1b43757 584e514 8c45748 b3990cf 8c45748 b3990cf 584e514 b3990cf 584e514 8c45748 da721b5 8c45748 c0b7ba2 8c45748 7d3f0d0 8c45748 7d3f0d0 c0b7ba2 8c45748 b3990cf 8c45748 7d3f0d0 8c45748 d7828e5 c0b7ba2 1b43757 c0b7ba2 8c45748 c0b7ba2 1b43757 c0b7ba2 1b43757 8c45748 1b43757 8c45748 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch
# Load model and tokenizer with CPU-compatible settings
model_name = "davnas/Italian_Cousine_2.1"
tokenizer = AutoTokenizer.from_pretrained(model_name)
# Configure quantization properly
quantization_config = BitsAndBytesConfig(
load_in_4bit=False,
load_in_8bit=False,
bnb_4bit_quant_type=None
)
model = AutoModelForCausalLM.from_pretrained(
model_name,
device_map="cpu", # Explicitly set to CPU
torch_dtype=torch.float32,
quantization_config=quantization_config,
use_safetensors=True,
low_cpu_mem_usage=True,
)
def respond(message, history, system_message, max_tokens, temperature, top_p):
# Format the conversation
messages = [{"role": "system", "content": system_message}]
# Add history
for user_msg, assistant_msg in history:
messages.append({"role": "user", "content": user_msg})
messages.append({"role": "assistant", "content": assistant_msg})
# Add current message
messages.append({"role": "user", "content": message})
# Create the prompt using the tokenizer's chat template
input_ids = tokenizer.apply_chat_template(
messages,
tokenize=True,
add_generation_prompt=True,
return_tensors="pt"
)
# Generate response
with torch.no_grad():
output_ids = model.generate(
input_ids,
max_new_tokens=max_tokens,
do_sample=True,
temperature=temperature,
top_p=top_p,
pad_token_id=tokenizer.pad_token_id,
)
# Decode and return the response
response = tokenizer.decode(output_ids[0][len(input_ids[0]):], skip_special_tokens=True)
return response
# Create the interface
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(
value="You are a professional chef assistant who provides accurate and detailed recipes.",
label="System message"
),
gr.Slider(
minimum=1,
maximum=2048,
value=512,
step=1,
label="Max new tokens"
),
gr.Slider(
minimum=0.1,
maximum=4.0,
value=0.7,
step=0.1,
label="Temperature"
),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)"
),
],
title="Italian Cuisine Chatbot",
description="Ask me anything about Italian cuisine or cooking!"
)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860) |