Ivan000 commited on
Commit
15051ef
·
verified ·
1 Parent(s): 2269308

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -32
app.py CHANGED
@@ -1,11 +1,12 @@
1
  # app.py
2
- # =============
3
- # This is a complete app.py file for a text generation app using the Qwen/Qwen2.5-Coder-0.5B-Instruct model.
4
- # The app uses the Gradio library to create a web interface for interacting with the model.
5
 
6
  # Imports
7
  # =======
8
  import gradio as gr
 
9
  from transformers import AutoModelForCausalLM, AutoTokenizer
10
 
11
  # Constants
@@ -19,11 +20,12 @@ def load_model_and_tokenizer():
19
  """
20
  Load the model and tokenizer from Hugging Face.
21
  """
 
22
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
23
  model = AutoModelForCausalLM.from_pretrained(
24
  MODEL_NAME,
25
- torch_dtype="auto",
26
- device_map="cpu" # Ensure the model runs on the CPU
27
  )
28
  return model, tokenizer
29
 
@@ -37,12 +39,9 @@ def generate_response(prompt, chat_history, max_new_tokens, temperature):
37
  Generate a response from the model based on the user prompt and chat history.
38
  """
39
  messages = [{"role": "system", "content": SYSTEM_MESSAGE}] + chat_history + [{"role": "user", "content": prompt}]
40
- text = tokenizer.apply_chat_template(
41
- messages,
42
- tokenize=False,
43
- add_generation_prompt=True
44
- )
45
- model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
46
 
47
  generated_ids = model.generate(
48
  **model_inputs,
@@ -50,16 +49,11 @@ def generate_response(prompt, chat_history, max_new_tokens, temperature):
50
  do_sample=True,
51
  top_k=50,
52
  top_p=0.95,
53
- temperature=temperature,
54
- output_scores=True,
55
- return_dict_in_generate=True,
56
- return_legacy_cache=True # Ensure legacy format is returned
57
  )
58
 
59
- response = ""
60
- for token_id in generated_ids.sequences[0][len(model_inputs.input_ids[0]):]:
61
- response += tokenizer.decode([token_id], skip_special_tokens=True)
62
- yield chat_history + [{"role": "assistant", "content": response}]
63
 
64
  # Clear Chat History
65
  # ==================
@@ -90,13 +84,12 @@ def gradio_interface():
90
  temperature = gr.Slider(0.1, 1.0, value=0.7, step=0.05, label="Temperature")
91
 
92
  def respond(message, chat_history, max_new_tokens, temperature):
 
 
93
  chat_history.append({"role": "user", "content": message})
94
- response = ""
95
- for chunk in generate_response(message, chat_history, max_new_tokens, temperature):
96
- response = chunk[-1]["content"]
97
- yield chat_history, ""
98
  chat_history.append({"role": "assistant", "content": response})
99
- yield chat_history, ""
100
 
101
  submit.click(respond, [msg, chatbot, max_new_tokens, temperature], [chatbot, msg])
102
  msg.submit(respond, [msg, chatbot, max_new_tokens, temperature], [chatbot, msg])
@@ -111,11 +104,4 @@ if __name__ == "__main__":
111
 
112
  # Dependencies
113
  # =============
114
- # The following dependencies are required to run this app:
115
- # - transformers
116
- # - gradio
117
- # - torch
118
- # - accelerate
119
- #
120
- # You can install these dependencies using pip:
121
- # pip install transformers gradio torch accelerate
 
1
  # app.py
2
+ # =======
3
+ # Полная версия исправленного кода приложения для генерации текста с использованием Gradio 4.44.1
4
+ # и модели Qwen/Qwen2.5-Coder-0.5B-Instruct.
5
 
6
  # Imports
7
  # =======
8
  import gradio as gr
9
+ import torch
10
  from transformers import AutoModelForCausalLM, AutoTokenizer
11
 
12
  # Constants
 
20
  """
21
  Load the model and tokenizer from Hugging Face.
22
  """
23
+ device = "cuda" if torch.cuda.is_available() else "cpu"
24
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
25
  model = AutoModelForCausalLM.from_pretrained(
26
  MODEL_NAME,
27
+ torch_dtype=torch.float16 if device == "cuda" else torch.float32,
28
+ device_map=device
29
  )
30
  return model, tokenizer
31
 
 
39
  Generate a response from the model based on the user prompt and chat history.
40
  """
41
  messages = [{"role": "system", "content": SYSTEM_MESSAGE}] + chat_history + [{"role": "user", "content": prompt}]
42
+ # Concatenate messages into a single string for the model
43
+ text = "\n".join(f"{msg['role']}: {msg['content']}" for msg in messages)
44
+ model_inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=1024).to(model.device)
 
 
 
45
 
46
  generated_ids = model.generate(
47
  **model_inputs,
 
49
  do_sample=True,
50
  top_k=50,
51
  top_p=0.95,
52
+ temperature=temperature
 
 
 
53
  )
54
 
55
+ response = tokenizer.decode(generated_ids[0][model_inputs.input_ids.shape[1]:], skip_special_tokens=True)
56
+ return response
 
 
57
 
58
  # Clear Chat History
59
  # ==================
 
84
  temperature = gr.Slider(0.1, 1.0, value=0.7, step=0.05, label="Temperature")
85
 
86
  def respond(message, chat_history, max_new_tokens, temperature):
87
+ if not message.strip():
88
+ return chat_history, ""
89
  chat_history.append({"role": "user", "content": message})
90
+ response = generate_response(message, chat_history, max_new_tokens, temperature)
 
 
 
91
  chat_history.append({"role": "assistant", "content": response})
92
+ return chat_history, ""
93
 
94
  submit.click(respond, [msg, chatbot, max_new_tokens, temperature], [chatbot, msg])
95
  msg.submit(respond, [msg, chatbot, max_new_tokens, temperature], [chatbot, msg])
 
104
 
105
  # Dependencies
106
  # =============
107
+ # pip install transformers gradio==4.44.1 torch accelerate