Spaces:

wuhp
/

r1-agents

Running on Zero

App Files Files Community

wuhp commited on Feb 3

Commit

b27451f

verified ·

1 Parent(s): 7714f74

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -56

app.py CHANGED Viewed

@@ -9,71 +9,40 @@ model_ids = {
     "7B": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
 }
-# Default Prompts - User can override these in the UI
 default_prompt_1_5b = """**Code Analysis Task**
-As a Senior Code Analyst, process this programming problem:
-**User Request**
 {user_prompt}
-**Context from Memory**
 {context_1_5b}
-**Required Output Format**
-1. Problem Breakdown:
-   - Input/Output requirements
-   - Key constraints
-   - Edge cases to consider
-2. Approach Options:
-   - [Option 1] Algorithm/data structure choices
-   - [Option 2] Alternative solutions
-   - Time/space complexity analysis
-3. Recommended Strategy:
-   - Best approach selection rationale
-   - Potential pitfalls to avoid
-4. Initial Pseudocode Sketch:
-   - High-level structure
-   - Critical function definitions"""
 default_prompt_7b = """**Code Implementation Task**
-As a Principal Software Engineer, finalize this solution:
-**Initial Analysis**
 {response_1_5b}
-**Context from Memory**
 {context_7b}
-**Required Output Format**
-1. Optimized Solution:
-   - Final algorithm choice justification
-   - Complexity analysis (Big O)
-2. Production-Grade Code:
-   - Clean, modular implementation
-   - Language: [Python/JS/etc] (infer from question)
-   - Error handling
-   - Documentation
-3. Testing Plan:
-   - Sample test cases (normal/edge cases)
-   - Potential failure points
-4. Optimization Opportunities:
-   - Alternative approaches for different constraints
-   - Parallelization/performance tips
-   - Memory management considerations
-5. Debugging Guide:
-   - Common mistakes
-   - Logging suggestions
-   - Step-through example"""
-# Function to load model and tokenizer (slightly adjusted device_map)
 def load_model_and_tokenizer(model_id):
     tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
     model = AutoModelForCausalLM.from_pretrained(
@@ -84,7 +53,7 @@ def load_model_and_tokenizer(model_id):
     )
     return model, tokenizer
-# Load the selected models and tokenizers
 models = {}
 tokenizers = {}
 for size, model_id in model_ids.items():
@@ -92,7 +61,7 @@ for size, model_id in model_ids.items():
     models[size], tokenizers[size] = load_model_and_tokenizer(model_id)
     print(f"Loaded {size} model.")
-# --- Shared Memory Implementation --- (Same as before)
 shared_memory = []
 def store_in_memory(memory_item):
@@ -116,13 +85,13 @@ def retrieve_from_memory(query, top_k=2):
 # --- Swarm Agent Function with Shared Memory (RAG) - DECORATED with @spaces.GPU ---
 @spaces.GPU  # <----  GPU DECORATOR ADDED HERE!
-def swarm_agent_sequential_rag(user_prompt, prompt_1_5b_template, prompt_7b_template, temperature=0.7, top_p=0.9, max_new_tokens=300): # Added prompt templates as arguments
     global shared_memory
     shared_memory = [] # Clear memory for each new request
     print("\n--- Swarm Agent Processing with Shared Memory (RAG) - GPU ACCELERATED ---") # Updated message
-    # 1.5B Model - Brainstorming/Initial Draft
     print("\n[1.5B Model - Brainstorming] - GPU Accelerated") # Added GPU indication
     retrieved_memory_1_5b = retrieve_from_memory(user_prompt)
     context_1_5b = "\n".join([f"- {mem}" for mem in retrieved_memory_1_5b]) if retrieved_memory_1_5b else "No relevant context found in memory."
@@ -142,7 +111,7 @@ def swarm_agent_sequential_rag(user_prompt, prompt_1_5b_template, prompt_7b_temp
     print(f"1.5B Response:\n{response_1_5b}")
     store_in_memory(f"1.5B Model Initial Response: {response_1_5b[:200]}...")
-    # 7B Model - Elaboration and Detail
     print("\n[7B Model - Elaboration] - GPU Accelerated") # Added GPU indication
     retrieved_memory_7b = retrieve_from_memory(response_1_5b)
     context_7b = "\n".join([f"- {mem}" for mem in retrieved_memory_7b]) if retrieved_memory_7b else "No relevant context found in memory."
@@ -166,7 +135,7 @@ def swarm_agent_sequential_rag(user_prompt, prompt_1_5b_template, prompt_7b_temp
     return response_7b # Now returns the 7B model's response as final
-# --- Gradio ChatInterface ---
 def gradio_interface(message, history, temp, top_p, max_tokens, prompt_1_5b_text, prompt_7b_text): # Accept prompt textboxes
     # history is automatically managed by ChatInterface
     response = swarm_agent_sequential_rag(
@@ -183,7 +152,7 @@ iface = gr.ChatInterface( # Using ChatInterface now
     fn=gradio_interface,
     # Define additional inputs for settings and prompts
     additional_inputs=[
-        gr.Slider(minimum=0.1, maximum=1.0, step=0.1, value=0.7, label="Temperature"),
         gr.Slider(minimum=0.01, maximum=1.0, step=0.05, value=0.9, label="Top P"),
         gr.Number(value=300, label="Max Tokens", precision=0), # Use Number for integer tokens
         gr.Textbox(value=default_prompt_1_5b, lines=10, label="1.5B Model Prompt Template"), # Textbox for 1.5B prompt

     "7B": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
 }
+# Revised Default Prompts
 default_prompt_1_5b = """**Code Analysis Task**
+As a Senior Code Analyst, analyze this programming problem:
+**User Request:**
 {user_prompt}
+**Relevant Context:**
 {context_1_5b}
+**Analysis Required:**
+1. Briefly break down the problem, including key constraints and edge cases.
+2. Suggest 2-3 potential approach options (algorithms/data structures).
+3. Recommend a primary strategy and explain your reasoning concisely.
+4. Provide a very brief initial pseudocode sketch of the core logic."""
 default_prompt_7b = """**Code Implementation Task**
+As a Principal Software Engineer, develop a solution based on this analysis:
+**Initial Analysis:**
 {response_1_5b}
+**Relevant Context:**
 {context_7b}
+**Solution Development Requirements:**
+1. Present an optimized solution approach, justifying your algorithm choices.
+2. Provide production-grade code in [Python/JS/etc.] (infer language). Include error handling and comments.
+3. Outline a testing plan with key test cases.
+4. Briefly suggest optimization opportunities and debugging tips."""
+# Function to load model and tokenizer (same)
 def load_model_and_tokenizer(model_id):
     tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
     model = AutoModelForCausalLM.from_pretrained(
     )
     return model, tokenizer
+# Load the selected models and tokenizers (same)
 models = {}
 tokenizers = {}
 for size, model_id in model_ids.items():
     models[size], tokenizers[size] = load_model_and_tokenizer(model_id)
     print(f"Loaded {size} model.")
+# --- Shared Memory Implementation --- (Same)
 shared_memory = []
 def store_in_memory(memory_item):
 # --- Swarm Agent Function with Shared Memory (RAG) - DECORATED with @spaces.GPU ---
 @spaces.GPU  # <----  GPU DECORATOR ADDED HERE!
+def swarm_agent_sequential_rag(user_prompt, prompt_1_5b_template, prompt_7b_template, temperature=0.5, top_p=0.9, max_new_tokens=300): # Lowered default temperature
     global shared_memory
     shared_memory = [] # Clear memory for each new request
     print("\n--- Swarm Agent Processing with Shared Memory (RAG) - GPU ACCELERATED ---") # Updated message
+    # 1.5B Model - Brainstorming/Initial Draft (same logic)
     print("\n[1.5B Model - Brainstorming] - GPU Accelerated") # Added GPU indication
     retrieved_memory_1_5b = retrieve_from_memory(user_prompt)
     context_1_5b = "\n".join([f"- {mem}" for mem in retrieved_memory_1_5b]) if retrieved_memory_1_5b else "No relevant context found in memory."
     print(f"1.5B Response:\n{response_1_5b}")
     store_in_memory(f"1.5B Model Initial Response: {response_1_5b[:200]}...")
+    # 7B Model - Elaboration and Detail (same logic)
     print("\n[7B Model - Elaboration] - GPU Accelerated") # Added GPU indication
     retrieved_memory_7b = retrieve_from_memory(response_1_5b)
     context_7b = "\n".join([f"- {mem}" for mem in retrieved_memory_7b]) if retrieved_memory_7b else "No relevant context found in memory."
     return response_7b # Now returns the 7B model's response as final
+# --- Gradio ChatInterface --- (same interface definition)
 def gradio_interface(message, history, temp, top_p, max_tokens, prompt_1_5b_text, prompt_7b_text): # Accept prompt textboxes
     # history is automatically managed by ChatInterface
     response = swarm_agent_sequential_rag(
     fn=gradio_interface,
     # Define additional inputs for settings and prompts
     additional_inputs=[
+        gr.Slider(minimum=0.1, maximum=1.0, step=0.1, value=0.5, label="Temperature"), # Lowered default temp to 0.5
         gr.Slider(minimum=0.01, maximum=1.0, step=0.05, value=0.9, label="Top P"),
         gr.Number(value=300, label="Max Tokens", precision=0), # Use Number for integer tokens
         gr.Textbox(value=default_prompt_1_5b, lines=10, label="1.5B Model Prompt Template"), # Textbox for 1.5B prompt