File size: 3,771 Bytes
024fe98 15051ef 024fe98 baecb1b 15051ef baecb1b 024fe98 baecb1b 024fe98 baecb1b 15051ef baecb1b 15051ef baecb1b 024fe98 5213619 baecb1b 024fe98 baecb1b 0e85851 024fe98 baecb1b 024fe98 baecb1b 15051ef baecb1b 0e85851 baecb1b 15051ef baecb1b 15051ef baecb1b 0fef393 024fe98 baecb1b 024fe98 baecb1b 024fe98 baecb1b 0e85851 7a325ef 0e85851 baecb1b 0e85851 15051ef baecb1b 15051ef baecb1b 15051ef 024fe98 0fef393 baecb1b 024fe98 baecb1b 15051ef |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
# app.py
# =======
# Полная версия исправленного кода приложения для генерации текста с использованием Gradio 4.44.1
# и модели Qwen/Qwen2.5-Coder-0.5B-Instruct.
# Imports
# =======
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
# Constants
# =========
MODEL_NAME = "Qwen/Qwen2.5-Coder-0.5B-Instruct"
SYSTEM_MESSAGE = "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."
# Load Model and Tokenizer
# ========================
def load_model_and_tokenizer():
"""
Load the model and tokenizer from Hugging Face.
"""
device = "cuda" if torch.cuda.is_available() else "cpu"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
torch_dtype=torch.float16 if device == "cuda" else torch.float32,
device_map=device
)
return model, tokenizer
# Ensure the model and tokenizer are loaded
model, tokenizer = load_model_and_tokenizer()
# Generate Response
# =================
def generate_response(prompt, chat_history, max_new_tokens, temperature):
"""
Generate a response from the model based on the user prompt and chat history.
"""
messages = [{"role": "system", "content": SYSTEM_MESSAGE}] + chat_history + [{"role": "user", "content": prompt}]
# Concatenate messages into a single string for the model
text = "\n".join(f"{msg['role']}: {msg['content']}" for msg in messages)
model_inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=1024).to(model.device)
generated_ids = model.generate(
**model_inputs,
max_new_tokens=max_new_tokens,
do_sample=True,
top_k=50,
top_p=0.95,
temperature=temperature
)
response = tokenizer.decode(generated_ids[0][model_inputs.input_ids.shape[1]:], skip_special_tokens=True)
return response
# Clear Chat History
# ==================
def clear_chat():
"""
Clear the chat history.
"""
return [], ""
# Gradio Interface
# =================
def gradio_interface():
"""
Create and launch the Gradio interface.
"""
with gr.Blocks() as demo:
with gr.Row():
with gr.Column(scale=3):
chatbot = gr.Chatbot(label="Chat with Qwen/Qwen2.5-Coder-0.5B-Instruct", type="messages")
msg = gr.Textbox(label="User Input")
with gr.Row():
submit = gr.Button("Submit")
clear = gr.Button("Clear Chat")
with gr.Column(scale=1):
with gr.Group():
gr.Markdown("### Settings")
max_new_tokens = gr.Slider(50, 1024, value=512, step=1, label="Max New Tokens")
temperature = gr.Slider(0.1, 1.0, value=0.7, step=0.05, label="Temperature")
def respond(message, chat_history, max_new_tokens, temperature):
if not message.strip():
return chat_history, ""
chat_history.append({"role": "user", "content": message})
response = generate_response(message, chat_history, max_new_tokens, temperature)
chat_history.append({"role": "assistant", "content": response})
return chat_history, ""
submit.click(respond, [msg, chatbot, max_new_tokens, temperature], [chatbot, msg])
msg.submit(respond, [msg, chatbot, max_new_tokens, temperature], [chatbot, msg])
clear.click(clear_chat, None, [chatbot, msg])
demo.launch()
# Main
# ====
if __name__ == "__main__":
gradio_interface()
# Dependencies
# =============
# pip install transformers gradio==4.44.1 torch accelerate
|