File size: 3,771 Bytes
024fe98
15051ef
 
 
024fe98
baecb1b
 
 
15051ef
baecb1b
024fe98
baecb1b
 
 
 
024fe98
baecb1b
 
 
 
 
 
15051ef
baecb1b
 
 
15051ef
 
baecb1b
 
024fe98
5213619
baecb1b
024fe98
baecb1b
 
0e85851
024fe98
baecb1b
024fe98
baecb1b
15051ef
 
 
baecb1b
 
 
0e85851
baecb1b
 
 
15051ef
baecb1b
 
15051ef
 
baecb1b
 
 
 
 
 
 
0fef393
024fe98
baecb1b
 
024fe98
 
baecb1b
024fe98
baecb1b
0e85851
 
 
 
 
 
 
 
7a325ef
0e85851
 
 
baecb1b
0e85851
15051ef
 
baecb1b
15051ef
baecb1b
15051ef
024fe98
0fef393
 
 
baecb1b
 
 
 
 
024fe98
baecb1b
 
 
 
15051ef
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# app.py
# =======
# Полная версия исправленного кода приложения для генерации текста с использованием Gradio 4.44.1
# и модели Qwen/Qwen2.5-Coder-0.5B-Instruct.

# Imports
# =======
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

# Constants
# =========
MODEL_NAME = "Qwen/Qwen2.5-Coder-0.5B-Instruct"
SYSTEM_MESSAGE = "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."

# Load Model and Tokenizer
# ========================
def load_model_and_tokenizer():
    """
    Load the model and tokenizer from Hugging Face.
    """
    device = "cuda" if torch.cuda.is_available() else "cpu"
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
    model = AutoModelForCausalLM.from_pretrained(
        MODEL_NAME,
        torch_dtype=torch.float16 if device == "cuda" else torch.float32,
        device_map=device
    )
    return model, tokenizer

# Ensure the model and tokenizer are loaded
model, tokenizer = load_model_and_tokenizer()

# Generate Response
# =================
def generate_response(prompt, chat_history, max_new_tokens, temperature):
    """
    Generate a response from the model based on the user prompt and chat history.
    """
    messages = [{"role": "system", "content": SYSTEM_MESSAGE}] + chat_history + [{"role": "user", "content": prompt}]
    # Concatenate messages into a single string for the model
    text = "\n".join(f"{msg['role']}: {msg['content']}" for msg in messages)
    model_inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=1024).to(model.device)

    generated_ids = model.generate(
        **model_inputs,
        max_new_tokens=max_new_tokens,
        do_sample=True,
        top_k=50,
        top_p=0.95,
        temperature=temperature
    )

    response = tokenizer.decode(generated_ids[0][model_inputs.input_ids.shape[1]:], skip_special_tokens=True)
    return response

# Clear Chat History
# ==================
def clear_chat():
    """
    Clear the chat history.
    """
    return [], ""

# Gradio Interface
# =================
def gradio_interface():
    """
    Create and launch the Gradio interface.
    """
    with gr.Blocks() as demo:
        with gr.Row():
            with gr.Column(scale=3):
                chatbot = gr.Chatbot(label="Chat with Qwen/Qwen2.5-Coder-0.5B-Instruct", type="messages")
                msg = gr.Textbox(label="User Input")
                with gr.Row():
                    submit = gr.Button("Submit")
                    clear = gr.Button("Clear Chat")
            with gr.Column(scale=1):
                with gr.Group():
                    gr.Markdown("### Settings")
                    max_new_tokens = gr.Slider(50, 1024, value=512, step=1, label="Max New Tokens")
                    temperature = gr.Slider(0.1, 1.0, value=0.7, step=0.05, label="Temperature")

        def respond(message, chat_history, max_new_tokens, temperature):
            if not message.strip():
                return chat_history, ""
            chat_history.append({"role": "user", "content": message})
            response = generate_response(message, chat_history, max_new_tokens, temperature)
            chat_history.append({"role": "assistant", "content": response})
            return chat_history, ""

        submit.click(respond, [msg, chatbot, max_new_tokens, temperature], [chatbot, msg])
        msg.submit(respond, [msg, chatbot, max_new_tokens, temperature], [chatbot, msg])
        clear.click(clear_chat, None, [chatbot, msg])

    demo.launch()

# Main
# ====
if __name__ == "__main__":
    gradio_interface()

# Dependencies
# =============
# pip install transformers gradio==4.44.1 torch accelerate