Update app.py
Browse files
app.py
CHANGED
@@ -1,12 +1,11 @@
|
|
1 |
# app.py
|
2 |
-
#
|
3 |
-
#
|
4 |
-
#
|
5 |
|
6 |
# Imports
|
7 |
# =======
|
8 |
import gradio as gr
|
9 |
-
import torch
|
10 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
11 |
|
12 |
# Constants
|
@@ -20,12 +19,11 @@ def load_model_and_tokenizer():
|
|
20 |
"""
|
21 |
Load the model and tokenizer from Hugging Face.
|
22 |
"""
|
23 |
-
device = "cuda" if torch.cuda.is_available() else "cpu"
|
24 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
25 |
model = AutoModelForCausalLM.from_pretrained(
|
26 |
MODEL_NAME,
|
27 |
-
torch_dtype=
|
28 |
-
device_map=
|
29 |
)
|
30 |
return model, tokenizer
|
31 |
|
@@ -39,9 +37,12 @@ def generate_response(prompt, chat_history, max_new_tokens, temperature):
|
|
39 |
Generate a response from the model based on the user prompt and chat history.
|
40 |
"""
|
41 |
messages = [{"role": "system", "content": SYSTEM_MESSAGE}] + chat_history + [{"role": "user", "content": prompt}]
|
42 |
-
|
43 |
-
|
44 |
-
|
|
|
|
|
|
|
45 |
|
46 |
generated_ids = model.generate(
|
47 |
**model_inputs,
|
@@ -49,11 +50,16 @@ def generate_response(prompt, chat_history, max_new_tokens, temperature):
|
|
49 |
do_sample=True,
|
50 |
top_k=50,
|
51 |
top_p=0.95,
|
52 |
-
temperature=temperature
|
|
|
|
|
|
|
53 |
)
|
54 |
|
55 |
-
response =
|
56 |
-
|
|
|
|
|
57 |
|
58 |
# Clear Chat History
|
59 |
# ==================
|
@@ -84,12 +90,13 @@ def gradio_interface():
|
|
84 |
temperature = gr.Slider(0.1, 1.0, value=0.7, step=0.05, label="Temperature")
|
85 |
|
86 |
def respond(message, chat_history, max_new_tokens, temperature):
|
87 |
-
if not message.strip():
|
88 |
-
return chat_history, ""
|
89 |
chat_history.append({"role": "user", "content": message})
|
90 |
-
response =
|
|
|
|
|
|
|
91 |
chat_history.append({"role": "assistant", "content": response})
|
92 |
-
|
93 |
|
94 |
submit.click(respond, [msg, chatbot, max_new_tokens, temperature], [chatbot, msg])
|
95 |
msg.submit(respond, [msg, chatbot, max_new_tokens, temperature], [chatbot, msg])
|
@@ -104,4 +111,11 @@ if __name__ == "__main__":
|
|
104 |
|
105 |
# Dependencies
|
106 |
# =============
|
107 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
# app.py
|
2 |
+
# =============
|
3 |
+
# This is a complete app.py file for a text generation app using the Qwen/Qwen2.5-Coder-0.5B-Instruct model.
|
4 |
+
# The app uses the Gradio library to create a web interface for interacting with the model.
|
5 |
|
6 |
# Imports
|
7 |
# =======
|
8 |
import gradio as gr
|
|
|
9 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
10 |
|
11 |
# Constants
|
|
|
19 |
"""
|
20 |
Load the model and tokenizer from Hugging Face.
|
21 |
"""
|
|
|
22 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
23 |
model = AutoModelForCausalLM.from_pretrained(
|
24 |
MODEL_NAME,
|
25 |
+
torch_dtype="auto",
|
26 |
+
device_map="cpu" # Ensure the model runs on the CPU
|
27 |
)
|
28 |
return model, tokenizer
|
29 |
|
|
|
37 |
Generate a response from the model based on the user prompt and chat history.
|
38 |
"""
|
39 |
messages = [{"role": "system", "content": SYSTEM_MESSAGE}] + chat_history + [{"role": "user", "content": prompt}]
|
40 |
+
text = tokenizer.apply_chat_template(
|
41 |
+
messages,
|
42 |
+
tokenize=False,
|
43 |
+
add_generation_prompt=True
|
44 |
+
)
|
45 |
+
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
|
46 |
|
47 |
generated_ids = model.generate(
|
48 |
**model_inputs,
|
|
|
50 |
do_sample=True,
|
51 |
top_k=50,
|
52 |
top_p=0.95,
|
53 |
+
temperature=temperature,
|
54 |
+
output_scores=True,
|
55 |
+
return_dict_in_generate=True,
|
56 |
+
return_legacy_cache=True # Ensure legacy format is returned
|
57 |
)
|
58 |
|
59 |
+
response = ""
|
60 |
+
for token_id in generated_ids.sequences[0][len(model_inputs.input_ids[0]):]:
|
61 |
+
response += tokenizer.decode([token_id], skip_special_tokens=True)
|
62 |
+
yield chat_history + [{"role": "assistant", "content": response}]
|
63 |
|
64 |
# Clear Chat History
|
65 |
# ==================
|
|
|
90 |
temperature = gr.Slider(0.1, 1.0, value=0.7, step=0.05, label="Temperature")
|
91 |
|
92 |
def respond(message, chat_history, max_new_tokens, temperature):
|
|
|
|
|
93 |
chat_history.append({"role": "user", "content": message})
|
94 |
+
response = ""
|
95 |
+
for chunk in generate_response(message, chat_history, max_new_tokens, temperature):
|
96 |
+
response = chunk[-1]["content"]
|
97 |
+
yield chat_history, ""
|
98 |
chat_history.append({"role": "assistant", "content": response})
|
99 |
+
yield chat_history, ""
|
100 |
|
101 |
submit.click(respond, [msg, chatbot, max_new_tokens, temperature], [chatbot, msg])
|
102 |
msg.submit(respond, [msg, chatbot, max_new_tokens, temperature], [chatbot, msg])
|
|
|
111 |
|
112 |
# Dependencies
|
113 |
# =============
|
114 |
+
# The following dependencies are required to run this app:
|
115 |
+
# - transformers
|
116 |
+
# - gradio
|
117 |
+
# - torch
|
118 |
+
# - accelerate
|
119 |
+
#
|
120 |
+
# You can install these dependencies using pip:
|
121 |
+
# pip install transformers gradio torch accelerate
|