wuhp commited on
Commit
d858dc3
·
verified ·
1 Parent(s): b27451f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -39
app.py CHANGED
@@ -3,13 +3,14 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import torch
4
  import spaces # Import the spaces library
5
 
6
- # Model IDs from Hugging Face Hub (now only 1.5B and 7B)
7
  model_ids = {
8
  "1.5B": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
9
  "7B": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
 
10
  }
11
 
12
- # Revised Default Prompts
13
  default_prompt_1_5b = """**Code Analysis Task**
14
  As a Senior Code Analyst, analyze this programming problem:
15
 
@@ -22,12 +23,12 @@ As a Senior Code Analyst, analyze this programming problem:
22
  **Analysis Required:**
23
  1. Briefly break down the problem, including key constraints and edge cases.
24
  2. Suggest 2-3 potential approach options (algorithms/data structures).
25
- 3. Recommend a primary strategy and explain your reasoning concisely.
26
- 4. Provide a very brief initial pseudocode sketch of the core logic."""
27
 
28
 
29
  default_prompt_7b = """**Code Implementation Task**
30
- As a Principal Software Engineer, develop a solution based on this analysis:
31
 
32
  **Initial Analysis:**
33
  {response_1_5b}
@@ -35,11 +36,11 @@ As a Principal Software Engineer, develop a solution based on this analysis:
35
  **Relevant Context:**
36
  {context_7b}
37
 
38
- **Solution Development Requirements:**
39
- 1. Present an optimized solution approach, justifying your algorithm choices.
40
- 2. Provide production-grade code in [Python/JS/etc.] (infer language). Include error handling and comments.
41
- 3. Outline a testing plan with key test cases.
42
- 4. Briefly suggest optimization opportunities and debugging tips."""
43
 
44
 
45
  # Function to load model and tokenizer (same)
@@ -53,7 +54,7 @@ def load_model_and_tokenizer(model_id):
53
  )
54
  return model, tokenizer
55
 
56
- # Load the selected models and tokenizers (same)
57
  models = {}
58
  tokenizers = {}
59
  for size, model_id in model_ids.items():
@@ -83,16 +84,16 @@ def retrieve_from_memory(query, top_k=2):
83
  return relevant_memories[:top_k]
84
 
85
 
86
- # --- Swarm Agent Function with Shared Memory (RAG) - DECORATED with @spaces.GPU ---
87
  @spaces.GPU # <---- GPU DECORATOR ADDED HERE!
88
- def swarm_agent_sequential_rag(user_prompt, prompt_1_5b_template, prompt_7b_template, temperature=0.5, top_p=0.9, max_new_tokens=300): # Lowered default temperature
89
  global shared_memory
90
  shared_memory = [] # Clear memory for each new request
91
 
92
- print("\n--- Swarm Agent Processing with Shared Memory (RAG) - GPU ACCELERATED ---") # Updated message
93
 
94
  # 1.5B Model - Brainstorming/Initial Draft (same logic)
95
- print("\n[1.5B Model - Brainstorming] - GPU Accelerated") # Added GPU indication
96
  retrieved_memory_1_5b = retrieve_from_memory(user_prompt)
97
  context_1_5b = "\n".join([f"- {mem}" for mem in retrieved_memory_1_5b]) if retrieved_memory_1_5b else "No relevant context found in memory."
98
 
@@ -111,37 +112,59 @@ def swarm_agent_sequential_rag(user_prompt, prompt_1_5b_template, prompt_7b_temp
111
  print(f"1.5B Response:\n{response_1_5b}")
112
  store_in_memory(f"1.5B Model Initial Response: {response_1_5b[:200]}...")
113
 
114
- # 7B Model - Elaboration and Detail (same logic)
115
- print("\n[7B Model - Elaboration] - GPU Accelerated") # Added GPU indication
116
- retrieved_memory_7b = retrieve_from_memory(response_1_5b)
117
- context_7b = "\n".join([f"- {mem}" for mem in retrieved_memory_7b]) if retrieved_memory_7b else "No relevant context found in memory."
118
-
119
- # Use user-provided prompt template for 7B model
120
- prompt_7b = prompt_7b_template.format(response_1_5b=response_1_5b, context_7b=context_7b)
121
-
122
-
123
- input_ids_7b = tokenizers["7B"].encode(prompt_7b, return_tensors="pt").to(models["7B"].device)
124
- output_7b = models["7B"].generate(
125
- input_ids_7b,
126
- max_new_tokens=max_new_tokens + 100, # Slightly more tokens for 7B
127
- temperature=temperature, # Use user-defined temperature
128
- top_p=top_p, # Use user-defined top_p
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
  do_sample=True
130
  )
131
- response_7b = tokenizers["7B"].decode(output_7b[0], skip_special_tokens=True)
132
- print(f"7B Response:\n{response_7b}")
133
- store_in_memory(f"7B Model Elaborated Response: {response_7b[:200]}...")
134
 
135
- return response_7b # Now returns the 7B model's response as final
136
 
137
 
138
- # --- Gradio ChatInterface --- (same interface definition)
139
- def gradio_interface(message, history, temp, top_p, max_tokens, prompt_1_5b_text, prompt_7b_text): # Accept prompt textboxes
140
  # history is automatically managed by ChatInterface
141
  response = swarm_agent_sequential_rag(
142
  message,
143
  prompt_1_5b_template=prompt_1_5b_text, # Pass prompt templates
144
  prompt_7b_template=prompt_7b_text,
 
145
  temperature=temp,
146
  top_p=top_p,
147
  max_new_tokens=int(max_tokens) # Ensure max_tokens is an integer
@@ -150,16 +173,17 @@ def gradio_interface(message, history, temp, top_p, max_tokens, prompt_1_5b_text
150
 
151
  iface = gr.ChatInterface( # Using ChatInterface now
152
  fn=gradio_interface,
153
- # Define additional inputs for settings and prompts
154
  additional_inputs=[
155
  gr.Slider(minimum=0.1, maximum=1.0, step=0.1, value=0.5, label="Temperature"), # Lowered default temp to 0.5
156
  gr.Slider(minimum=0.01, maximum=1.0, step=0.05, value=0.9, label="Top P"),
157
  gr.Number(value=300, label="Max Tokens", precision=0), # Use Number for integer tokens
158
  gr.Textbox(value=default_prompt_1_5b, lines=10, label="1.5B Model Prompt Template"), # Textbox for 1.5B prompt
159
  gr.Textbox(value=default_prompt_7b, lines=10, label="7B Model Prompt Template"), # Textbox for 7B prompt
 
160
  ],
161
- title="DeepSeek Agent Swarm Chat (ZeroGPU Demo - 2 Models) - PROMPT CUSTOMIZATION", # Updated title
162
- description="Chat with a DeepSeek agent swarm (1.5B, 7B) with shared memory, adjustable settings, **and customizable prompts!** **GPU accelerated using ZeroGPU!** (Requires Pro Space)", # Updated description
163
  )
164
 
165
  if __name__ == "__main__":
 
3
  import torch
4
  import spaces # Import the spaces library
5
 
6
+ # Model IDs from Hugging Face Hub (now 1.5B, 7B, and 14B)
7
  model_ids = {
8
  "1.5B": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
9
  "7B": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
10
+ "14B": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", # Added 14B back
11
  }
12
 
13
+ # Revised Default Prompts (as defined above)
14
  default_prompt_1_5b = """**Code Analysis Task**
15
  As a Senior Code Analyst, analyze this programming problem:
16
 
 
23
  **Analysis Required:**
24
  1. Briefly break down the problem, including key constraints and edge cases.
25
  2. Suggest 2-3 potential approach options (algorithms/data structures).
26
+ 3. Recommend ONE primary strategy and briefly justify your choice.
27
+ 4. Provide a very brief initial pseudocode sketch of the core logic."""
28
 
29
 
30
  default_prompt_7b = """**Code Implementation Task**
31
+ As a Principal Software Engineer, provide production-ready Streamlit/Python code based on this analysis:
32
 
33
  **Initial Analysis:**
34
  {response_1_5b}
 
36
  **Relevant Context:**
37
  {context_7b}
38
 
39
+ **Code Requirements:**
40
+ 1. Generate concise, production-grade Python code for a Streamlit app.
41
+ 2. Include necessary imports, UI elements, and basic functionality.
42
+ 3. Add comments for clarity.
43
+ """
44
 
45
 
46
  # Function to load model and tokenizer (same)
 
54
  )
55
  return model, tokenizer
56
 
57
+ # Load the selected models and tokenizers (now loads 1.5B, 7B, 14B)
58
  models = {}
59
  tokenizers = {}
60
  for size, model_id in model_ids.items():
 
84
  return relevant_memories[:top_k]
85
 
86
 
87
+ # --- Swarm Agent Function with Model Swapping ---
88
  @spaces.GPU # <---- GPU DECORATOR ADDED HERE!
89
+ def swarm_agent_sequential_rag(user_prompt, prompt_1_5b_template, prompt_7b_template, final_model_size="7B", temperature=0.5, top_p=0.9, max_new_tokens=300): # Added final_model_size
90
  global shared_memory
91
  shared_memory = [] # Clear memory for each new request
92
 
93
+ print(f"\n--- Swarm Agent Processing with Shared Memory (RAG) - GPU ACCELERATED - Final Model: {final_model_size} ---") # Updated message
94
 
95
  # 1.5B Model - Brainstorming/Initial Draft (same logic)
96
+ print("\n[1.5B Model - Brainstorming] - GPU Accelerated")
97
  retrieved_memory_1_5b = retrieve_from_memory(user_prompt)
98
  context_1_5b = "\n".join([f"- {mem}" for mem in retrieved_memory_1_5b]) if retrieved_memory_1_5b else "No relevant context found in memory."
99
 
 
112
  print(f"1.5B Response:\n{response_1_5b}")
113
  store_in_memory(f"1.5B Model Initial Response: {response_1_5b[:200]}...")
114
 
115
+ # Final Stage Model Selection (7B or 14B)
116
+ if final_model_size == "7B":
117
+ final_model = models["7B"]
118
+ final_tokenizer = tokenizers["7B"]
119
+ print("\n[7B Model - Final Code Generation] - GPU Accelerated") # Model-specific message
120
+ model_stage_name = "7B Model - Final Code"
121
+ final_max_new_tokens = max_new_tokens + 100 # Slightly more tokens for 7B
122
+
123
+ elif final_model_size == "14B":
124
+ final_model = models["14B"]
125
+ final_tokenizer = tokenizers["14B"]
126
+ print("\n[14B Model - Final Code Generation] - GPU Accelerated") # Model-specific message
127
+ model_stage_name = "14B Model - Final Code"
128
+ final_max_new_tokens = max_new_tokens + 200 # Even more tokens for 14B
129
+
130
+ else: # Default to 7B if selection is somehow invalid
131
+ final_model = models["7B"]
132
+ final_tokenizer = tokenizers["7B"]
133
+ print("\n[7B Model - Final Code Generation] - GPU Accelerated (Default)")
134
+ model_stage_name = "7B Model - Final Code (Default)"
135
+ final_max_new_tokens = max_new_tokens + 100
136
+
137
+
138
+ retrieved_memory_final = retrieve_from_memory(response_1_5b)
139
+ context_final = "\n".join([f"- {mem}" for mem in retrieved_memory_final]) if retrieved_memory_final else "No relevant context found in memory."
140
+
141
+ # Use user-provided prompt template for final model (currently using 7B prompt for both 7B and 14B for simplicity, you can create a separate 14B prompt if needed)
142
+ prompt_final = prompt_7b_template.format(response_1_5b=response_1_5b, context_7b=context_final) # Using prompt_7b_template for final stage for now
143
+
144
+
145
+ input_ids_final = final_tokenizer.encode(prompt_final, return_tensors="pt").to(final_model.device)
146
+ output_final = final_model.generate(
147
+ input_ids_final,
148
+ max_new_tokens=final_max_new_tokens,
149
+ temperature=temperature,
150
+ top_p=top_p,
151
  do_sample=True
152
  )
153
+ response_final = final_tokenizer.decode(output_final[0], skip_special_tokens=True)
154
+ print(f"{model_stage_name} Response:\n{response_final}")
155
+ store_in_memory(f"{model_stage_name} Response: {response_final[:200]}...")
156
 
157
+ return response_final # Returns final model's response
158
 
159
 
160
+ # --- Gradio ChatInterface --- (with Model Selection Dropdown)
161
+ def gradio_interface(message, history, temp, top_p, max_tokens, prompt_1_5b_text, prompt_7b_text, final_model_selector): # Added final_model_selector
162
  # history is automatically managed by ChatInterface
163
  response = swarm_agent_sequential_rag(
164
  message,
165
  prompt_1_5b_template=prompt_1_5b_text, # Pass prompt templates
166
  prompt_7b_template=prompt_7b_text,
167
+ final_model_size=final_model_selector, # Pass model selection
168
  temperature=temp,
169
  top_p=top_p,
170
  max_new_tokens=int(max_tokens) # Ensure max_tokens is an integer
 
173
 
174
  iface = gr.ChatInterface( # Using ChatInterface now
175
  fn=gradio_interface,
176
+ # Define additional inputs for settings, prompts, and model selection
177
  additional_inputs=[
178
  gr.Slider(minimum=0.1, maximum=1.0, step=0.1, value=0.5, label="Temperature"), # Lowered default temp to 0.5
179
  gr.Slider(minimum=0.01, maximum=1.0, step=0.05, value=0.9, label="Top P"),
180
  gr.Number(value=300, label="Max Tokens", precision=0), # Use Number for integer tokens
181
  gr.Textbox(value=default_prompt_1_5b, lines=10, label="1.5B Model Prompt Template"), # Textbox for 1.5B prompt
182
  gr.Textbox(value=default_prompt_7b, lines=10, label="7B Model Prompt Template"), # Textbox for 7B prompt
183
+ gr.Dropdown(choices=["7B", "14B"], value="7B", label="Final Stage Model (7B or 14B)") # Model selection dropdown
184
  ],
185
+ title="DeepSeek Agent Swarm Chat (ZeroGPU Demo - 2 Models + Model Swap)", # Updated title
186
+ description="Chat with a DeepSeek agent swarm (1.5B + 7B/14B selectable) with shared memory, adjustable settings, **customizable prompts, and model swapping!** **GPU accelerated using ZeroGPU!** (Requires Pro Space)", # Updated description
187
  )
188
 
189
  if __name__ == "__main__":