File size: 3,911 Bytes
024fe98 baecb1b 024fe98 baecb1b 024fe98 baecb1b 024fe98 baecb1b 024fe98 5213619 baecb1b 024fe98 baecb1b 0e85851 024fe98 baecb1b 024fe98 baecb1b a1689f4 baecb1b a1689f4 baecb1b 0e85851 baecb1b 0e85851 5213619 6b3de3b baecb1b 5213619 baecb1b 4655661 024fe98 baecb1b 024fe98 baecb1b 024fe98 baecb1b 0e85851 7a325ef 0e85851 baecb1b 0e85851 baecb1b 55a3261 0e85851 55a3261 baecb1b 024fe98 0e85851 4655661 baecb1b 024fe98 baecb1b 777ccbc baecb1b 777ccbc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 |
# app.py
# =============
# This is a complete app.py file for a text generation app using the Qwen/Qwen2.5-Coder-0.5B-Instruct model.
# The app uses the Gradio library to create a web interface for interacting with the model.
# Imports
# =======
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
# Constants
# =========
MODEL_NAME = "Qwen/Qwen2.5-Coder-0.5B-Instruct"
SYSTEM_MESSAGE = "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."
# Load Model and Tokenizer
# ========================
def load_model_and_tokenizer():
"""
Load the model and tokenizer from Hugging Face.
"""
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
torch_dtype="auto",
device_map="cpu" # Ensure the model runs on the CPU
)
return model, tokenizer
# Ensure the model and tokenizer are loaded
model, tokenizer = load_model_and_tokenizer()
# Generate Response
# =================
def generate_response(prompt, chat_history, max_new_tokens, temperature):
"""
Generate a response from the model based on the user prompt and chat history.
"""
messages = [{"role": "system", "content": SYSTEM_MESSAGE}] + chat_history + [{"role": "user", "content": prompt}]
text = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
generated_ids = model.generate(
**model_inputs,
max_new_tokens=max_new_tokens,
do_sample=True,
top_k=50,
top_p=0.95,
temperature=temperature,
output_scores=True,
return_dict_in_generate=True,
return_legacy_cache=True # Ensure legacy format is returned
)
response = ""
for token_id in generated_ids.sequences[0][len(model_inputs.input_ids[0]):]:
response += tokenizer.decode([token_id], skip_special_tokens=True)
yield response
# Clear Chat History
# ==================
def clear_chat():
"""
Clear the chat history.
"""
return []
# Gradio Interface
# =================
def gradio_interface():
"""
Create and launch the Gradio interface.
"""
with gr.Blocks() as demo:
with gr.Row():
with gr.Column(scale=3):
chatbot = gr.Chatbot(label="Chat with Qwen/Qwen2.5-Coder-0.5B-Instruct", type="messages")
msg = gr.Textbox(label="User Input")
with gr.Row():
submit = gr.Button("Submit")
clear = gr.Button("Clear Chat")
with gr.Column(scale=1):
with gr.Group():
gr.Markdown("### Settings")
max_new_tokens = gr.Slider(50, 1024, value=512, step=1, label="Max New Tokens")
temperature = gr.Slider(0.1, 1.0, value=0.7, step=0.05, label="Temperature")
def respond(message, chat_history, max_new_tokens, temperature):
chat_history.append({"role": "user", "content": message})
response = ""
for chunk in generate_response(message, chat_history, max_new_tokens, temperature):
response = chunk
yield chat_history + [{"role": "assistant", "content": response}]
chat_history.append({"role": "assistant", "content": response})
submit.click(respond, [msg, chatbot, max_new_tokens, temperature], [chatbot])
clear.click(clear_chat, None, [chatbot])
demo.launch()
# Main
# ====
if __name__ == "__main__":
gradio_interface()
# Dependencies
# =============
# The following dependencies are required to run this app:
# - transformers
# - gradio
# - torch
# - accelerate
#
# You can install these dependencies using pip:
# pip install transformers gradio torch accelerate |