Ivan000 commited on
Commit
5f33c5c
·
verified ·
1 Parent(s): 15051ef

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -18
app.py CHANGED
@@ -1,12 +1,11 @@
1
  # app.py
2
- # =======
3
- # Полная версия исправленного кода приложения для генерации текста с использованием Gradio 4.44.1
4
- # и модели Qwen/Qwen2.5-Coder-0.5B-Instruct.
5
 
6
  # Imports
7
  # =======
8
  import gradio as gr
9
- import torch
10
  from transformers import AutoModelForCausalLM, AutoTokenizer
11
 
12
  # Constants
@@ -20,12 +19,11 @@ def load_model_and_tokenizer():
20
  """
21
  Load the model and tokenizer from Hugging Face.
22
  """
23
- device = "cuda" if torch.cuda.is_available() else "cpu"
24
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
25
  model = AutoModelForCausalLM.from_pretrained(
26
  MODEL_NAME,
27
- torch_dtype=torch.float16 if device == "cuda" else torch.float32,
28
- device_map=device
29
  )
30
  return model, tokenizer
31
 
@@ -39,9 +37,12 @@ def generate_response(prompt, chat_history, max_new_tokens, temperature):
39
  Generate a response from the model based on the user prompt and chat history.
40
  """
41
  messages = [{"role": "system", "content": SYSTEM_MESSAGE}] + chat_history + [{"role": "user", "content": prompt}]
42
- # Concatenate messages into a single string for the model
43
- text = "\n".join(f"{msg['role']}: {msg['content']}" for msg in messages)
44
- model_inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=1024).to(model.device)
 
 
 
45
 
46
  generated_ids = model.generate(
47
  **model_inputs,
@@ -49,11 +50,16 @@ def generate_response(prompt, chat_history, max_new_tokens, temperature):
49
  do_sample=True,
50
  top_k=50,
51
  top_p=0.95,
52
- temperature=temperature
 
 
 
53
  )
54
 
55
- response = tokenizer.decode(generated_ids[0][model_inputs.input_ids.shape[1]:], skip_special_tokens=True)
56
- return response
 
 
57
 
58
  # Clear Chat History
59
  # ==================
@@ -84,12 +90,13 @@ def gradio_interface():
84
  temperature = gr.Slider(0.1, 1.0, value=0.7, step=0.05, label="Temperature")
85
 
86
  def respond(message, chat_history, max_new_tokens, temperature):
87
- if not message.strip():
88
- return chat_history, ""
89
  chat_history.append({"role": "user", "content": message})
90
- response = generate_response(message, chat_history, max_new_tokens, temperature)
 
 
 
91
  chat_history.append({"role": "assistant", "content": response})
92
- return chat_history, ""
93
 
94
  submit.click(respond, [msg, chatbot, max_new_tokens, temperature], [chatbot, msg])
95
  msg.submit(respond, [msg, chatbot, max_new_tokens, temperature], [chatbot, msg])
@@ -104,4 +111,11 @@ if __name__ == "__main__":
104
 
105
  # Dependencies
106
  # =============
107
- # pip install transformers gradio==4.44.1 torch accelerate
 
 
 
 
 
 
 
 
1
  # app.py
2
+ # =============
3
+ # This is a complete app.py file for a text generation app using the Qwen/Qwen2.5-Coder-0.5B-Instruct model.
4
+ # The app uses the Gradio library to create a web interface for interacting with the model.
5
 
6
  # Imports
7
  # =======
8
  import gradio as gr
 
9
  from transformers import AutoModelForCausalLM, AutoTokenizer
10
 
11
  # Constants
 
19
  """
20
  Load the model and tokenizer from Hugging Face.
21
  """
 
22
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
23
  model = AutoModelForCausalLM.from_pretrained(
24
  MODEL_NAME,
25
+ torch_dtype="auto",
26
+ device_map="cpu" # Ensure the model runs on the CPU
27
  )
28
  return model, tokenizer
29
 
 
37
  Generate a response from the model based on the user prompt and chat history.
38
  """
39
  messages = [{"role": "system", "content": SYSTEM_MESSAGE}] + chat_history + [{"role": "user", "content": prompt}]
40
+ text = tokenizer.apply_chat_template(
41
+ messages,
42
+ tokenize=False,
43
+ add_generation_prompt=True
44
+ )
45
+ model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
46
 
47
  generated_ids = model.generate(
48
  **model_inputs,
 
50
  do_sample=True,
51
  top_k=50,
52
  top_p=0.95,
53
+ temperature=temperature,
54
+ output_scores=True,
55
+ return_dict_in_generate=True,
56
+ return_legacy_cache=True # Ensure legacy format is returned
57
  )
58
 
59
+ response = ""
60
+ for token_id in generated_ids.sequences[0][len(model_inputs.input_ids[0]):]:
61
+ response += tokenizer.decode([token_id], skip_special_tokens=True)
62
+ yield chat_history + [{"role": "assistant", "content": response}]
63
 
64
  # Clear Chat History
65
  # ==================
 
90
  temperature = gr.Slider(0.1, 1.0, value=0.7, step=0.05, label="Temperature")
91
 
92
  def respond(message, chat_history, max_new_tokens, temperature):
 
 
93
  chat_history.append({"role": "user", "content": message})
94
+ response = ""
95
+ for chunk in generate_response(message, chat_history, max_new_tokens, temperature):
96
+ response = chunk[-1]["content"]
97
+ yield chat_history, ""
98
  chat_history.append({"role": "assistant", "content": response})
99
+ yield chat_history, ""
100
 
101
  submit.click(respond, [msg, chatbot, max_new_tokens, temperature], [chatbot, msg])
102
  msg.submit(respond, [msg, chatbot, max_new_tokens, temperature], [chatbot, msg])
 
111
 
112
  # Dependencies
113
  # =============
114
+ # The following dependencies are required to run this app:
115
+ # - transformers
116
+ # - gradio
117
+ # - torch
118
+ # - accelerate
119
+ #
120
+ # You can install these dependencies using pip:
121
+ # pip install transformers gradio torch accelerate