Ivan000 commited on
Commit
baecb1b
·
verified ·
1 Parent(s): a1689f4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +87 -52
app.py CHANGED
@@ -1,69 +1,104 @@
1
  # app.py
2
  # =============
3
- # This is a complete app.py file for a Gradio app using the meta-llama/Llama-3.2-3B-Instruct model.
4
- # The app allows users to input a message and receive a response from the model.
5
 
6
- # Dependencies
7
- # =============
8
- # The following dependencies are required to run this app:
9
- # - transformers
10
- # - gradio
11
- # - torch
12
- #
13
- # You can install these dependencies using pip:
14
- # pip install transformers gradio torch
15
 
16
- import torch
17
- from transformers import pipeline
18
- import gradio as gr # Import gradio
 
19
 
20
- # Load the model and tokenizer
21
- model_id = "meta-llama/Llama-3.2-3B-Instruct"
22
- device = "cpu" # Use CPU for inference
 
 
 
 
 
 
 
 
 
 
23
 
24
- # Initialize the pipeline
25
- pipe = pipeline(
26
- "text-generation",
27
- model=model_id,
28
- torch_dtype=torch.bfloat16,
29
- device_map="auto",
30
- )
31
 
32
- def generate_response(prompt):
 
 
33
  """
34
- Generate a response from the model based on the given prompt.
35
-
36
- Args:
37
- prompt (str): The input message from the user.
38
-
39
- Returns:
40
- str: The generated response from the model.
41
  """
42
- messages = [
43
- {"role": "system", "content": "You are a helpful assistant!"},
44
- {"role": "user", "content": prompt},
45
- ]
46
- outputs = pipe(
47
  messages,
48
- max_new_tokens=256,
 
49
  )
50
- return outputs[0]["generated_text"][-1]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
- # Define the Gradio interface
 
53
  def gradio_interface():
54
  """
55
- Define the Gradio interface for the app.
56
  """
57
- iface = gr.Interface(
58
- fn=generate_response,
59
- inputs=gr.inputs.Textbox(lines=2, placeholder="Enter your message here..."),
60
- outputs="text",
61
- title="Llama-3.2-3B-Instruct Chatbot",
62
- description="Chat with the Llama-3.2-3B-Instruct model. Enter your message and get a response!",
63
- )
64
- return iface
 
 
65
 
66
- # Launch the Gradio app
 
 
 
 
 
 
67
  if __name__ == "__main__":
68
- iface = gradio_interface()
69
- iface.launch()
 
 
 
 
 
 
 
 
 
 
1
  # app.py
2
  # =============
3
+ # This is a complete app.py file for a text generation app using the Qwen/Qwen2.5-Coder-0.5B-Instruct model.
4
+ # The app uses the Gradio library to create a web interface for interacting with the model.
5
 
6
+ # Imports
7
+ # =======
8
+ import gradio as gr
9
+ from transformers import AutoModelForCausalLM, AutoTokenizer
 
 
 
 
 
10
 
11
+ # Constants
12
+ # =========
13
+ MODEL_NAME = "Qwen/Qwen2.5-Coder-0.5B-Instruct"
14
+ SYSTEM_MESSAGE = "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."
15
 
16
+ # Load Model and Tokenizer
17
+ # ========================
18
+ def load_model_and_tokenizer():
19
+ """
20
+ Load the model and tokenizer from Hugging Face.
21
+ """
22
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
23
+ model = AutoModelForCausalLM.from_pretrained(
24
+ MODEL_NAME,
25
+ torch_dtype="auto",
26
+ device_map="cpu" # Ensure the model runs on the CPU
27
+ )
28
+ return model, tokenizer
29
 
30
+ model, tokenizer = load_model_and_tokenizer()
 
 
 
 
 
 
31
 
32
+ # Generate Response
33
+ # =================
34
+ def generate_response(prompt, chat_history):
35
  """
36
+ Generate a response from the model based on the user prompt and chat history.
 
 
 
 
 
 
37
  """
38
+ messages = [{"role": "system", "content": SYSTEM_MESSAGE}] + chat_history + [{"role": "user", "content": prompt}]
39
+ text = tokenizer.apply_chat_template(
 
 
 
40
  messages,
41
+ tokenize=False,
42
+ add_generation_prompt=True
43
  )
44
+ model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
45
+
46
+ generated_ids = model.generate(
47
+ **model_inputs,
48
+ max_new_tokens=512,
49
+ do_sample=True,
50
+ top_k=50,
51
+ top_p=0.95,
52
+ temperature=0.7,
53
+ stream=True
54
+ )
55
+
56
+ response = ""
57
+ for new_token in generated_ids[0][len(model_inputs.input_ids[0]):]:
58
+ response += tokenizer.decode([new_token], skip_special_tokens=True)
59
+ yield response
60
+
61
+ # Clear Chat History
62
+ # ==================
63
+ def clear_chat():
64
+ """
65
+ Clear the chat history.
66
+ """
67
+ return [], []
68
 
69
+ # Gradio Interface
70
+ # =================
71
  def gradio_interface():
72
  """
73
+ Create and launch the Gradio interface.
74
  """
75
+ with gr.Blocks() as demo:
76
+ chatbot = gr.Chatbot(label="Chat with Qwen/Qwen2.5-Coder-0.5B-Instruct")
77
+ msg = gr.Textbox(label="User Input")
78
+ clear = gr.Button("Clear Chat")
79
+
80
+ def respond(message, chat_history):
81
+ chat_history.append({"role": "user", "content": message})
82
+ response = generate_response(message, chat_history)
83
+ chat_history.append({"role": "assistant", "content": response})
84
+ return chat_history, chat_history
85
 
86
+ msg.submit(respond, [msg, chatbot], [chatbot, chatbot])
87
+ clear.click(clear_chat, None, [chatbot, chatbot])
88
+
89
+ demo.launch()
90
+
91
+ # Main
92
+ # ====
93
  if __name__ == "__main__":
94
+ gradio_interface()
95
+
96
+ # Dependencies
97
+ # =============
98
+ # The following dependencies are required to run this app:
99
+ # - transformers
100
+ # - gradio
101
+ # - torch
102
+ #
103
+ # You can install these dependencies using pip:
104
+ # pip install transformers gradio torch