wuhp commited on
Commit
b27451f
·
verified ·
1 Parent(s): 7714f74

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -56
app.py CHANGED
@@ -9,71 +9,40 @@ model_ids = {
9
  "7B": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
10
  }
11
 
12
- # Default Prompts - User can override these in the UI
13
  default_prompt_1_5b = """**Code Analysis Task**
14
- As a Senior Code Analyst, process this programming problem:
15
 
16
- **User Request**
17
  {user_prompt}
18
 
19
- **Context from Memory**
20
  {context_1_5b}
21
 
22
- **Required Output Format**
23
- 1. Problem Breakdown:
24
- - Input/Output requirements
25
- - Key constraints
26
- - Edge cases to consider
27
 
28
- 2. Approach Options:
29
- - [Option 1] Algorithm/data structure choices
30
- - [Option 2] Alternative solutions
31
- - Time/space complexity analysis
32
-
33
- 3. Recommended Strategy:
34
- - Best approach selection rationale
35
- - Potential pitfalls to avoid
36
-
37
- 4. Initial Pseudocode Sketch:
38
- - High-level structure
39
- - Critical function definitions"""
40
 
41
  default_prompt_7b = """**Code Implementation Task**
42
- As a Principal Software Engineer, finalize this solution:
43
 
44
- **Initial Analysis**
45
  {response_1_5b}
46
 
47
- **Context from Memory**
48
  {context_7b}
49
 
50
- **Required Output Format**
51
- 1. Optimized Solution:
52
- - Final algorithm choice justification
53
- - Complexity analysis (Big O)
54
-
55
- 2. Production-Grade Code:
56
- - Clean, modular implementation
57
- - Language: [Python/JS/etc] (infer from question)
58
- - Error handling
59
- - Documentation
60
-
61
- 3. Testing Plan:
62
- - Sample test cases (normal/edge cases)
63
- - Potential failure points
64
-
65
- 4. Optimization Opportunities:
66
- - Alternative approaches for different constraints
67
- - Parallelization/performance tips
68
- - Memory management considerations
69
-
70
- 5. Debugging Guide:
71
- - Common mistakes
72
- - Logging suggestions
73
- - Step-through example"""
74
 
75
 
76
- # Function to load model and tokenizer (slightly adjusted device_map)
77
  def load_model_and_tokenizer(model_id):
78
  tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
79
  model = AutoModelForCausalLM.from_pretrained(
@@ -84,7 +53,7 @@ def load_model_and_tokenizer(model_id):
84
  )
85
  return model, tokenizer
86
 
87
- # Load the selected models and tokenizers
88
  models = {}
89
  tokenizers = {}
90
  for size, model_id in model_ids.items():
@@ -92,7 +61,7 @@ for size, model_id in model_ids.items():
92
  models[size], tokenizers[size] = load_model_and_tokenizer(model_id)
93
  print(f"Loaded {size} model.")
94
 
95
- # --- Shared Memory Implementation --- (Same as before)
96
  shared_memory = []
97
 
98
  def store_in_memory(memory_item):
@@ -116,13 +85,13 @@ def retrieve_from_memory(query, top_k=2):
116
 
117
  # --- Swarm Agent Function with Shared Memory (RAG) - DECORATED with @spaces.GPU ---
118
  @spaces.GPU # <---- GPU DECORATOR ADDED HERE!
119
- def swarm_agent_sequential_rag(user_prompt, prompt_1_5b_template, prompt_7b_template, temperature=0.7, top_p=0.9, max_new_tokens=300): # Added prompt templates as arguments
120
  global shared_memory
121
  shared_memory = [] # Clear memory for each new request
122
 
123
  print("\n--- Swarm Agent Processing with Shared Memory (RAG) - GPU ACCELERATED ---") # Updated message
124
 
125
- # 1.5B Model - Brainstorming/Initial Draft
126
  print("\n[1.5B Model - Brainstorming] - GPU Accelerated") # Added GPU indication
127
  retrieved_memory_1_5b = retrieve_from_memory(user_prompt)
128
  context_1_5b = "\n".join([f"- {mem}" for mem in retrieved_memory_1_5b]) if retrieved_memory_1_5b else "No relevant context found in memory."
@@ -142,7 +111,7 @@ def swarm_agent_sequential_rag(user_prompt, prompt_1_5b_template, prompt_7b_temp
142
  print(f"1.5B Response:\n{response_1_5b}")
143
  store_in_memory(f"1.5B Model Initial Response: {response_1_5b[:200]}...")
144
 
145
- # 7B Model - Elaboration and Detail
146
  print("\n[7B Model - Elaboration] - GPU Accelerated") # Added GPU indication
147
  retrieved_memory_7b = retrieve_from_memory(response_1_5b)
148
  context_7b = "\n".join([f"- {mem}" for mem in retrieved_memory_7b]) if retrieved_memory_7b else "No relevant context found in memory."
@@ -166,7 +135,7 @@ def swarm_agent_sequential_rag(user_prompt, prompt_1_5b_template, prompt_7b_temp
166
  return response_7b # Now returns the 7B model's response as final
167
 
168
 
169
- # --- Gradio ChatInterface ---
170
  def gradio_interface(message, history, temp, top_p, max_tokens, prompt_1_5b_text, prompt_7b_text): # Accept prompt textboxes
171
  # history is automatically managed by ChatInterface
172
  response = swarm_agent_sequential_rag(
@@ -183,7 +152,7 @@ iface = gr.ChatInterface( # Using ChatInterface now
183
  fn=gradio_interface,
184
  # Define additional inputs for settings and prompts
185
  additional_inputs=[
186
- gr.Slider(minimum=0.1, maximum=1.0, step=0.1, value=0.7, label="Temperature"),
187
  gr.Slider(minimum=0.01, maximum=1.0, step=0.05, value=0.9, label="Top P"),
188
  gr.Number(value=300, label="Max Tokens", precision=0), # Use Number for integer tokens
189
  gr.Textbox(value=default_prompt_1_5b, lines=10, label="1.5B Model Prompt Template"), # Textbox for 1.5B prompt
 
9
  "7B": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
10
  }
11
 
12
+ # Revised Default Prompts
13
  default_prompt_1_5b = """**Code Analysis Task**
14
+ As a Senior Code Analyst, analyze this programming problem:
15
 
16
+ **User Request:**
17
  {user_prompt}
18
 
19
+ **Relevant Context:**
20
  {context_1_5b}
21
 
22
+ **Analysis Required:**
23
+ 1. Briefly break down the problem, including key constraints and edge cases.
24
+ 2. Suggest 2-3 potential approach options (algorithms/data structures).
25
+ 3. Recommend a primary strategy and explain your reasoning concisely.
26
+ 4. Provide a very brief initial pseudocode sketch of the core logic."""
27
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
  default_prompt_7b = """**Code Implementation Task**
30
+ As a Principal Software Engineer, develop a solution based on this analysis:
31
 
32
+ **Initial Analysis:**
33
  {response_1_5b}
34
 
35
+ **Relevant Context:**
36
  {context_7b}
37
 
38
+ **Solution Development Requirements:**
39
+ 1. Present an optimized solution approach, justifying your algorithm choices.
40
+ 2. Provide production-grade code in [Python/JS/etc.] (infer language). Include error handling and comments.
41
+ 3. Outline a testing plan with key test cases.
42
+ 4. Briefly suggest optimization opportunities and debugging tips."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
 
45
+ # Function to load model and tokenizer (same)
46
  def load_model_and_tokenizer(model_id):
47
  tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
48
  model = AutoModelForCausalLM.from_pretrained(
 
53
  )
54
  return model, tokenizer
55
 
56
+ # Load the selected models and tokenizers (same)
57
  models = {}
58
  tokenizers = {}
59
  for size, model_id in model_ids.items():
 
61
  models[size], tokenizers[size] = load_model_and_tokenizer(model_id)
62
  print(f"Loaded {size} model.")
63
 
64
+ # --- Shared Memory Implementation --- (Same)
65
  shared_memory = []
66
 
67
  def store_in_memory(memory_item):
 
85
 
86
  # --- Swarm Agent Function with Shared Memory (RAG) - DECORATED with @spaces.GPU ---
87
  @spaces.GPU # <---- GPU DECORATOR ADDED HERE!
88
+ def swarm_agent_sequential_rag(user_prompt, prompt_1_5b_template, prompt_7b_template, temperature=0.5, top_p=0.9, max_new_tokens=300): # Lowered default temperature
89
  global shared_memory
90
  shared_memory = [] # Clear memory for each new request
91
 
92
  print("\n--- Swarm Agent Processing with Shared Memory (RAG) - GPU ACCELERATED ---") # Updated message
93
 
94
+ # 1.5B Model - Brainstorming/Initial Draft (same logic)
95
  print("\n[1.5B Model - Brainstorming] - GPU Accelerated") # Added GPU indication
96
  retrieved_memory_1_5b = retrieve_from_memory(user_prompt)
97
  context_1_5b = "\n".join([f"- {mem}" for mem in retrieved_memory_1_5b]) if retrieved_memory_1_5b else "No relevant context found in memory."
 
111
  print(f"1.5B Response:\n{response_1_5b}")
112
  store_in_memory(f"1.5B Model Initial Response: {response_1_5b[:200]}...")
113
 
114
+ # 7B Model - Elaboration and Detail (same logic)
115
  print("\n[7B Model - Elaboration] - GPU Accelerated") # Added GPU indication
116
  retrieved_memory_7b = retrieve_from_memory(response_1_5b)
117
  context_7b = "\n".join([f"- {mem}" for mem in retrieved_memory_7b]) if retrieved_memory_7b else "No relevant context found in memory."
 
135
  return response_7b # Now returns the 7B model's response as final
136
 
137
 
138
+ # --- Gradio ChatInterface --- (same interface definition)
139
  def gradio_interface(message, history, temp, top_p, max_tokens, prompt_1_5b_text, prompt_7b_text): # Accept prompt textboxes
140
  # history is automatically managed by ChatInterface
141
  response = swarm_agent_sequential_rag(
 
152
  fn=gradio_interface,
153
  # Define additional inputs for settings and prompts
154
  additional_inputs=[
155
+ gr.Slider(minimum=0.1, maximum=1.0, step=0.1, value=0.5, label="Temperature"), # Lowered default temp to 0.5
156
  gr.Slider(minimum=0.01, maximum=1.0, step=0.05, value=0.9, label="Top P"),
157
  gr.Number(value=300, label="Max Tokens", precision=0), # Use Number for integer tokens
158
  gr.Textbox(value=default_prompt_1_5b, lines=10, label="1.5B Model Prompt Template"), # Textbox for 1.5B prompt