Spaces:
Runtime error
Runtime error
File size: 2,498 Bytes
1b43757 8c45748 1b43757 8c45748 da721b5 8c45748 c0b7ba2 8c45748 7d3f0d0 8c45748 7d3f0d0 c0b7ba2 8c45748 7d3f0d0 8c45748 d7828e5 c0b7ba2 1b43757 c0b7ba2 8c45748 c0b7ba2 1b43757 c0b7ba2 1b43757 8c45748 1b43757 8c45748 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
# Load model and tokenizer
model_name = "davnas/Italian_Cousine_2.1"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float32, # Use float32 for CPU
low_cpu_mem_usage=True,
device_map="auto"
)
def respond(message, history, system_message, max_tokens, temperature, top_p):
# Format the conversation
messages = [{"role": "system", "content": system_message}]
# Add history
for user_msg, assistant_msg in history:
messages.append({"role": "user", "content": user_msg})
messages.append({"role": "assistant", "content": assistant_msg})
# Add current message
messages.append({"role": "user", "content": message})
# Create the prompt using the tokenizer's chat template
input_ids = tokenizer.apply_chat_template(
messages,
tokenize=True,
add_generation_prompt=True,
return_tensors="pt"
)
# Generate response
with torch.no_grad():
output_ids = model.generate(
input_ids,
max_new_tokens=max_tokens,
do_sample=True,
temperature=temperature,
top_p=top_p,
pad_token_id=tokenizer.pad_token_id,
streaming=True
)
# Decode and return the response
response = tokenizer.decode(output_ids[0][len(input_ids[0]):], skip_special_tokens=True)
return response
# Create the interface
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(
value="You are a professional chef assistant who provides accurate and detailed recipes.",
label="System message"
),
gr.Slider(
minimum=1,
maximum=2048,
value=512,
step=1,
label="Max new tokens"
),
gr.Slider(
minimum=0.1,
maximum=4.0,
value=0.7,
step=0.1,
label="Temperature"
),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)"
),
],
title="Italian Cuisine Chatbot",
description="Ask me anything about Italian cuisine or cooking!"
)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860) |