Spaces:

wuhp
/

r1-agents

Running on Zero

App Files Files Community

wuhp commited on Feb 3

Commit

ccc6355

verified ·

1 Parent(s): 361c4d3

Update app.py

Browse files

Files changed (1) hide show

app.py +291 -171

app.py CHANGED Viewed

@@ -5,210 +5,273 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 import torch
 from threading import Thread
-# Model IDs from Hugging Face Hub (Fixed to Unsloth 7B and 32B Unsloth 4bit)
-model_ids = {
-    "7B-Unsloth": "unsloth/DeepSeek-R1-Distill-Qwen-7B-unsloth-bnb-4bit", # Unsloth 7B model
-    "32B-Unsloth": "unsloth/DeepSeek-R1-Distill-Qwen-32B-bnb-4bit",       # Unsloth 32B model
-}
-models = {}  # Keep models as a dictionary, but initially empty
-tokenizers = {} # Keep tokenizers as a dictionary, initially empty
-# BitsAndBytesConfig for 4-bit quantization (for BOTH models now)
 bnb_config_4bit = BitsAndBytesConfig(
     load_in_4bit=True,
     bnb_4bit_quant_type="nf4",
-    bnb_4bit_compute_dtype=torch.bfloat16, # Or torch.float16 if needed
 )
-def get_model_and_tokenizer(size): # Function to load model on demand
-    if size not in models: # Load only if not already loaded
-        model_id = model_ids[size]
-        print(f"Loading {size} model: {model_id} on demand")
-        tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
         model = AutoModelForCausalLM.from_pretrained(
-            model_id,
-            quantization_config=bnb_config_4bit, # Apply 4-bit config for BOTH models
-            torch_dtype=torch.bfloat16, # Or torch.float16 if needed
             device_map='auto',
-            trust_remote_code=True
         )
-        models[size] = model
-        tokenizers[size] = tokenizer
-        print(f"Loaded {size} model on demand.")
-    return models[size], tokenizers[size]
-# Revised Default Prompts (as defined previously - these are still good)
-default_prompt_1_5b = """**Code Analysis Task**
-As a Senior Code Analyst, analyze this programming problem:
 **User Request:**
 {user_prompt}
-**Relevant Context:**
-{context_1_5b}
-**Analysis Required:**
-1. Briefly break down the problem, including key constraints and edge cases.
-2. Suggest 2-3 potential approach options (algorithms/data structures).
-3. Recommend ONE primary strategy and briefly justify your choice.
-4. Provide a very brief initial pseudocode sketch of the core logic."""
-default_prompt_7b = """**Code Implementation Task**
-As a Principal Software Engineer, provide production-ready Streamlit/Python code based on this analysis:
 **Initial Analysis:**
-{response_1_5b}
-**Relevant Context:**
-{context_7b}
-**Code Requirements:**
-1.  Generate concise, production-grade Python code for a Streamlit app.
-2.  Include necessary imports, UI elements, and basic functionality.
-3.  Add comments for clarity.
-    """
-# --- Shared Memory Implementation --- (Same)
 shared_memory = []
 def store_in_memory(memory_item):
     shared_memory.append(memory_item)
     print(f"\n[Memory Stored]: {memory_item[:50]}...")
 def retrieve_from_memory(query, top_k=2):
     relevant_memories = []
     query_lower = query.lower()
     for memory_item in shared_memory:
         if query_lower in memory_item.lower():
             relevant_memories.append(memory_item)
     if not relevant_memories:
         print("\n[Memory Retrieval]: No relevant memories found.")
         return []
     print(f"\n[Memory Retrieval]: Found {len(relevant_memories)} relevant memories.")
     return relevant_memories[:top_k]
-# --- Streaming Swarm Agent Function - Fixed Models (Unsloth 7B and 32B Unsloth) ---
-@spaces.GPU(duration=120) # Added duration
-def swarm_agent_sequential_rag(user_prompt, prompt_1_5b_template, prompt_7b_template, temperature=0.5, top_p=0.9, max_new_tokens=300): # Removed final_model_size
     global shared_memory
-    shared_memory = [] # Clear memory for each new request
-    print(f"\n--- Swarm Agent Processing with Shared Memory (RAG) - GPU ACCELERATED - Final Model: 32B Unsloth ---") # Updated message
-    # 7B Unsloth Model - Brainstorming/Initial Draft (Lazy Load and get model)
-    print("\n[7B Unsloth Model - Brainstorming] - GPU Accelerated") # Now 7B Unsloth is brainstorming
-    model_7b, tokenizer_7b = get_model_and_tokenizer("7B-Unsloth") # Lazy load 7B Unsloth
-    retrieved_memory_7b = retrieve_from_memory(user_prompt)
-    context_7b = "\n".join([f"- {mem}" for mem in retrieved_memory_7b]) if retrieved_memory_7b else "No relevant context found in memory."
-    # Use user-provided prompt template for 7B model (as brainstorming model now)
-    prompt_7b_brainstorm = prompt_1_5b_template.format(user_prompt=user_prompt, context_1_5b=context_7b) # Reusing 1.5B template - adjust if needed
-    input_ids_7b = tokenizer_7b.encode(prompt_7b_brainstorm, return_tensors="pt").to(model_7b.device)
-    streamer_7b = TextIteratorStreamer(tokenizer_7b, timeout=10.0, skip_prompt=True, skip_special_tokens=True) # Streamer for 7B
-    generate_kwargs_7b = dict( # Generation kwargs for 7B
-        input_ids= input_ids_7b,
-        streamer=streamer_7b,
-        max_new_tokens=max_new_tokens, # Use user-defined max_new_tokens
         do_sample=True,
-        temperature=temperature,
         top_p=top_p,
-        # eos_token_id=tokenizer_7b.eos_token_id, # Not strictly needed as streamer handles EOS
     )
-    thread_7b = Thread(target=model_7b.generate, kwargs=generate_kwargs_7b) # Thread for 7B generation
-    thread_7b.start()
-    response_7b_stream = "" # Accumulate streamed 7B response
-    print(f"7B Unsloth Response (Brainstorming):\n", end="")
-    for text in streamer_7b: # Stream and print 7B response
-        print(text, end="", flush=True) # Print in place
-        response_7b_stream += text
-        yield response_7b_stream # Yield intermediate 7B response
-    store_in_memory(f"7B Unsloth Model Initial Response: {response_7b_stream[:200]}...") # Store accumulated 7B response
-    # 32B Unsloth Model - Final Code Generation (Lazy Load and get model)
-    final_model, final_tokenizer = get_model_and_tokenizer("32B-Unsloth") # Lazy load 32B Unsloth
-    print("\n[32B Unsloth Model - Final Code Generation] - GPU Accelerated") # Model-specific message
-    model_stage_name = "32B Unsloth Model - Final Code"
-    final_max_new_tokens = max_new_tokens + 200 # More tokens for 32B model
-    retrieved_memory_final = retrieve_from_memory(response_7b_stream) # Memory from streamed 7B response
-    context_final = "\n".join([f"- {mem}" for mem in retrieved_memory_final]) if retrieved_memory_final else "No relevant context found in memory."
-    # Use user-provided prompt template for final model (using 7B template)
-    prompt_final = prompt_7b_template.format(response_1_5b=response_7b_stream, context_7b=context_final) # Using prompt_7b_template for final stage
-    input_ids_final = final_tokenizer.encode(prompt_final, return_tensors="pt").to(final_model.device)
-    streamer_final = TextIteratorStreamer(final_tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True) # Streamer for 32B
-    generate_kwargs_final = dict( # Generation kwargs for 32B
-        input_ids= input_ids_final,
-        streamer=streamer_final,
-        max_new_tokens=final_max_new_tokens,
-        temperature=temperature,
         top_p=top_p,
-        # eos_token_id=final_tokenizer.eos_token_id, # Not strictly needed as streamer handles EOS
     )
-    thread_final = Thread(target=final_model.generate, kwargs=generate_kwargs_final) # Thread for 32B generation
-    thread_final.start()
-    response_final_stream = "" # Accumulate streamed 32B response
-    print(f"\n{model_stage_name} Response:\n", end="")
-    for text in streamer_final: # Stream and print 32B response
-        print(text, end="", flush=True) # Print in place
-        response_final_stream += text
-        yield response_final_stream # Yield intermediate 32B response
-    store_in_memory(f"{model_stage_name} Response: {response_final_stream[:200]}...") # Store accumulated 32B response
-    return response_final_stream # Returns final streamed response
-# --- Gradio ChatInterface --- (No Model Selection Dropdown anymore)
-def gradio_interface(message, history, temp, top_p, max_tokens, prompt_1_5b_text, prompt_7b_text): # Removed final_model_selector
-    # history is automatically managed by ChatInterface
-    full_response = "" # Accumulate full response from generator
-    for partial_response in swarm_agent_sequential_rag( # Iterate through generator
-        message,
-        prompt_1_5b_template=prompt_1_5b_text, # Pass prompt templates
-        prompt_7b_template=prompt_7b_text,
-        temperature=temp,
         top_p=top_p,
-        max_new_tokens=int(max_tokens) # Ensure max_tokens is an integer
     ):
-        full_response = partial_response # Update full response with partial response
-        yield full_response # Yield intermediate full response
-DESCRIPTION = '''
 <div>
-<h1 style="text-align: center;">DeepSeek Agent Swarm Chat (Unsloth 7B + 32B) - Streaming Demo</h1>
-<p style="text-align: center;">Agent swarm using Unsloth DeepSeek-R1-Distill models (7B + 32B) with shared memory, adjustable settings, and customizable prompts.  GPU accelerated using ZeroGPU! (Requires Pro Space)</p>
 </div>
 '''
-LICENSE = """
 <p/>
 ---
 """
-PLACEHOLDER = """
-Ask me anything...
 """
 css = """
 h1 {
   text-align: center;
@@ -221,35 +284,92 @@ h1 {
   border-radius: 100vh;
 }
 """
-# Gradio ChatInterface with streaming
-chatbot=gr.Chatbot(height=450, placeholder=PLACEHOLDER, label='Agent Swarm Output')
-with gr.Blocks(fill_height=True, css=css) as demo:
-    gr.Markdown(DESCRIPTION)
-    gr.ChatInterface(
-        fn=gradio_interface,
-        chatbot=chatbot,
-        fill_height=True,
-        additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False), # Accordion for params
-        additional_inputs=[
-            gr.Slider(minimum=0.1, maximum=1.0, step=0.1, value=0.5, label="Temperature"),
-            gr.Slider(minimum=0.01, maximum=1.0, step=0.05, value=0.9, label="Top P"),
-            gr.Number(value=300, label="Max Tokens", precision=0),
-            gr.Textbox(value=default_prompt_1_5b, lines=7, label="Brainstorming Model Prompt Template (Unsloth 7B)"),
-            gr.Textbox(value=default_prompt_7b, lines=7, label="Code Generation Prompt Template (Unsloth 32B)"),
-        ],
-        examples=[
-            ['How to setup a human base on Mars? Give short answer.'],
-            ['Explain theory of relativity to me like I’m 8 years old.'],
-            ['Write a streamlit app to track my finances'],
-            ['Write a pun-filled happy birthday message to my friend Alex.'],
-            ['Justify why a penguin might make a good king of the jungle.']
-        ],
-        cache_examples=False,
-    )
-    gr.Markdown(LICENSE)
 if __name__ == "__main__":
-    demo.launch()

 import torch
 from threading import Thread
+# --- Model & Quantization Settings ---
+MODEL_ID = "unsloth/DeepSeek-R1-Distill-Qwen-7B-unsloth-bnb-4bit"
+# Dictionaries to store the loaded model and tokenizer
+models = {}
+tokenizers = {}
 bnb_config_4bit = BitsAndBytesConfig(
     load_in_4bit=True,
     bnb_4bit_quant_type="nf4",
+    bnb_4bit_compute_dtype=torch.bfloat16,  # Or torch.float16 if needed
 )
+def get_model_and_tokenizer():
+    """Lazy-load the model and tokenizer if not already loaded."""
+    if "7B" not in models:
+        print(f"Loading 7B model: {MODEL_ID} on demand")
+        tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
         model = AutoModelForCausalLM.from_pretrained(
+            MODEL_ID,
+            quantization_config=bnb_config_4bit,
+            torch_dtype=torch.bfloat16,  # Or torch.float16 if needed
             device_map='auto',
+            trust_remote_code=True,
         )
+        models["7B"] = model
+        tokenizers["7B"] = tokenizer
+        print("Loaded 7B model on demand.")
+    return models["7B"], tokenizers["7B"]
+# --- Default Prompt Templates ---
+default_prompt_brainstorm = """**Brainstorming Task (Round 1)**
+As a Senior Code Analyst, provide an initial analysis of the problem below.
 **User Request:**
 {user_prompt}
+**Guidelines:**
+1. Identify key challenges and constraints.
+2. Suggest multiple potential approaches.
+3. Outline any potential edge cases or critical considerations.
+"""
+default_prompt_code_generation = """**Advanced Reasoning & Code Generation (Round 2)**
+Based on the initial analysis below:
 **Initial Analysis:**
+{brainstorm_response}
+**User Request:**
+{user_prompt}
+**Task:**
+1. Develop a detailed solution that includes production-ready code.
+2. Explain the reasoning behind the chosen approach.
+3. Incorporate advanced reasoning to handle edge cases.
+4. Provide commented code that is clear and maintainable.
+"""
+default_prompt_synthesis = """**Synthesis & Final Refinement (Round 3)**
+Review the detailed code generation and reasoning below, and produce a final, refined response that:
+1. Synthesizes the brainstorming insights and advanced reasoning.
+2. Provides a concise summary of the solution.
+3. Highlights any potential improvements or considerations.
+**Detailed Response:**
+{code_response}
+"""
+# --- Shared Memory for Rounds ---
 shared_memory = []
 def store_in_memory(memory_item):
+    """Store a memory item and log an excerpt."""
     shared_memory.append(memory_item)
     print(f"\n[Memory Stored]: {memory_item[:50]}...")
 def retrieve_from_memory(query, top_k=2):
+    """
+    Retrieve memory items that contain the query text (case-insensitive).
+    Returns up to top_k items.
+    """
     relevant_memories = []
     query_lower = query.lower()
     for memory_item in shared_memory:
         if query_lower in memory_item.lower():
             relevant_memories.append(memory_item)
     if not relevant_memories:
         print("\n[Memory Retrieval]: No relevant memories found.")
         return []
     print(f"\n[Memory Retrieval]: Found {len(relevant_memories)} relevant memories.")
     return relevant_memories[:top_k]
+# --- Multi-Round Swarm Agent Function ---
+@spaces.GPU(duration=180)  # Adjust duration as needed
+def swarm_agent_iterative(user_prompt, temp, top_p, max_new_tokens, memory_top_k,
+                          prompt_brainstorm_text, prompt_code_generation_text, prompt_synthesis_text):
+    """
+    A three-round iterative process that uses the provided prompt templates:
+      - Round 1: Brainstorming.
+      - Round 2: Advanced reasoning & code generation.
+      - Round 3: Synthesis & refinement.
+    This generator yields the response from the final round as it is produced.
+    """
     global shared_memory
+    shared_memory = []  # Clear shared memory for each new request
+    model, tokenizer = get_model_and_tokenizer()
+    # ----- Round 1: Brainstorming -----
+    print("\n--- Round 1: Brainstorming ---")
+    prompt_round1 = prompt_brainstorm_text.format(user_prompt=user_prompt)
+    input_ids_r1 = tokenizer.encode(prompt_round1, return_tensors="pt").to(model.device)
+    streamer_r1 = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
+    kwargs_r1 = dict(
+        input_ids=input_ids_r1,
+        streamer=streamer_r1,
+        max_new_tokens=max_new_tokens,
         do_sample=True,
+        temperature=temp,
         top_p=top_p,
     )
+    thread_r1 = Thread(target=model.generate, kwargs=kwargs_r1)
+    thread_r1.start()
+    brainstorm_response = ""
+    for text in streamer_r1:
+        print(text, end="", flush=True)
+        brainstorm_response += text
+    store_in_memory(f"Brainstorm Response: {brainstorm_response[:200]}...")
+    # ----- Round 2: Code Generation -----
+    print("\n\n--- Round 2: Code Generation ---")
+    prompt_round2 = prompt_code_generation_text.format(
+        brainstorm_response=brainstorm_response,
+        user_prompt=user_prompt
+    )
+    input_ids_r2 = tokenizer.encode(prompt_round2, return_tensors="pt").to(model.device)
+    streamer_r2 = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
+    kwargs_r2 = dict(
+        input_ids=input_ids_r2,
+        streamer=streamer_r2,
+        max_new_tokens=max_new_tokens + 100,  # extra tokens for detail
+        temperature=temp,
         top_p=top_p,
     )
+    thread_r2 = Thread(target=model.generate, kwargs=kwargs_r2)
+    thread_r2.start()
+    code_response = ""
+    for text in streamer_r2:
+        print(text, end="", flush=True)
+        code_response += text
+    store_in_memory(f"Code Generation Response: {code_response[:200]}...")
+    # ----- Round 3: Synthesis & Refinement -----
+    print("\n\n--- Round 3: Synthesis & Refinement ---")
+    prompt_round3 = prompt_synthesis_text.format(code_response=code_response)
+    input_ids_r3 = tokenizer.encode(prompt_round3, return_tensors="pt").to(model.device)
+    streamer_r3 = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
+    kwargs_r3 = dict(
+        input_ids=input_ids_r3,
+        streamer=streamer_r3,
+        max_new_tokens=max_new_tokens // 2,
+        temperature=temp,
+        top_p=top_p,
+    )
+    thread_r3 = Thread(target=model.generate, kwargs=kwargs_r3)
+    thread_r3.start()
+    final_response = ""
+    for text in streamer_r3:
+        print(text, end="", flush=True)
+        final_response += text
+        yield final_response  # yield progressive updates
+    store_in_memory(f"Final Synthesis Response: {final_response[:200]}...")
+# --- Helper to Format History ---
+def format_history(history):
+    """
+    Convert history (which might be a list of [user, assistant] pairs or already formatted dictionaries)
+    into a list of OpenAI-style message dictionaries.
+    """
+    messages = []
+    for item in history:
+        # If item is a list or tuple, try to unpack it if it has exactly 2 elements.
+        if isinstance(item, (list, tuple)):
+            if len(item) == 2:
+                user_msg, assistant_msg = item
+                messages.append({"role": "user", "content": user_msg})
+                if assistant_msg:
+                    messages.append({"role": "assistant", "content": assistant_msg})
+            else:
+                # If it doesn't have exactly two items, skip it.
+                continue
+        elif isinstance(item, dict):
+            # Already formatted message dictionary.
+            messages.append(item)
+        else:
+            continue
+    return messages
+# --- Gradio Chat Interface Function ---
+def gradio_interface(message, history, param_state, prompt_state):
+    """
+    This function is called by Gradio's ChatInterface.
+    It uses the current saved generation parameters and prompt templates.
+    """
+    # Unpack parameter state (with fallback defaults)
+    try:
+        temp = float(param_state.get("temperature", 0.5))
+        top_p = float(param_state.get("top_p", 0.9))
+        max_new_tokens = int(param_state.get("max_new_tokens", 300))
+        memory_top_k = int(param_state.get("memory_top_k", 2))
+    except Exception:
+        temp, top_p, max_new_tokens, memory_top_k = 0.5, 0.9, 300, 2
+    # Unpack prompt state (with fallback defaults)
+    prompt_brainstorm_text = prompt_state.get("prompt_brainstorm", default_prompt_brainstorm)
+    prompt_code_generation_text = prompt_state.get("prompt_code_generation", default_prompt_code_generation)
+    prompt_synthesis_text = prompt_state.get("prompt_synthesis", default_prompt_synthesis)
+    # Append the new user message with an empty assistant reply (as a two-item list)
+    history = history + [[message, ""]]
+    # Call the multi-round agent as a generator (for streaming)
+    for partial_response in swarm_agent_iterative(
+        user_prompt=message,
+        temp=temp,
         top_p=top_p,
+        max_new_tokens=max_new_tokens,
+        memory_top_k=memory_top_k,
+        prompt_brainstorm_text=prompt_brainstorm_text,
+        prompt_code_generation_text=prompt_code_generation_text,
+        prompt_synthesis_text=prompt_synthesis_text
     ):
+        # Update the last assistant message with the new partial response.
+        history[-1][1] = partial_response
+        # Yield the history formatted as OpenAI-style messages.
+        yield format_history(history)
+# --- UI Settings & Styling ---
+ui_description = '''
 <div>
+  <h1 style="text-align: center;">DeepSeek Agent Swarm Chat</h1>
+  <p style="text-align: center;">
+    Multi-round agent:
+    <br>- Brainstorming
+    <br>- Advanced reasoning & code generation
+    <br>- Synthesis & refinement
+  </p>
 </div>
 '''
+ui_license = """
 <p/>
 ---
 """
+ui_placeholder = """
+<div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;">
+  <h1 style="font-size: 28px; margin-bottom: 2px; opacity: 0.55;">DeepSeek Agent Swarm</h1>
+  <p style="font-size: 18px; margin-bottom: 2px; opacity: 0.65;">Ask me anything...</p>
+</div>
 """
 css = """
 h1 {
   text-align: center;
   border-radius: 100vh;
 }
 """
+# --- Gradio UI ---
+with gr.Blocks(css=css, title="DeepSeek Agent Swarm Chat") as demo:
+    gr.Markdown(ui_description)
+    # Hidden States to hold parameters and prompt configuration
+    param_state = gr.State({
+        "temperature": 0.5,
+        "top_p": 0.9,
+        "max_new_tokens": 300,
+        "memory_top_k": 2,
+    })
+    prompt_state = gr.State({
+        "prompt_brainstorm": default_prompt_brainstorm,
+        "prompt_code_generation": default_prompt_code_generation,
+        "prompt_synthesis": default_prompt_synthesis,
+    })
+    # Create top-level Tabs
+    with gr.Tabs():
+        # --- Chat Tab ---
+        with gr.Tab("Chat"):
+            # Set type="messages" for OpenAI-style message dictionaries
+            chatbot = gr.Chatbot(height=450, placeholder=ui_placeholder, label="Agent Swarm Output", type="messages")
+            # Use ChatInterface and pass the hidden states as additional inputs.
+            gr.ChatInterface(
+                fn=gradio_interface,
+                chatbot=chatbot,
+                additional_inputs=[param_state, prompt_state],
+                examples=[
+                    ['How can we build a robust web service that scales efficiently under load?'],
+                    ['Explain how to design a fault-tolerant distributed system.'],
+                    ['Develop a streamlit app that visualizes real-time financial data.'],
+                    ['Create a pun-filled birthday message with a coding twist.'],
+                    ['Design a system that uses machine learning to optimize resource allocation.']
+                ],
+                cache_examples=False,
+                type="messages",
+            )
+        # --- Parameters Tab ---
+        with gr.Tab("Parameters"):
+            gr.Markdown("### Generation Parameters")
+            temp_slider = gr.Slider(minimum=0.1, maximum=1.0, step=0.1, value=0.5, label="Temperature")
+            top_p_slider = gr.Slider(minimum=0.01, maximum=1.0, step=0.05, value=0.9, label="Top P")
+            max_tokens_num = gr.Number(value=300, label="Max new tokens", precision=0)
+            memory_topk_slider = gr.Slider(minimum=1, maximum=5, step=1, value=2, label="Memory Retrieval Top K")
+            save_params_btn = gr.Button("Save Parameters")
+            # When the user clicks Save, update the param_state
+            save_params_btn.click(
+                lambda t, p, m, k: {"temperature": t, "top_p": p, "max_new_tokens": m, "memory_top_k": k},
+                inputs=[temp_slider, top_p_slider, max_tokens_num, memory_topk_slider],
+                outputs=param_state,
+            )
+        # --- Prompt Config Tab ---
+        with gr.Tab("Prompt Config"):
+            gr.Markdown("### Configure Prompt Templates")
+            prompt_brainstorm_box = gr.Textbox(
+                value=default_prompt_brainstorm,
+                label="Brainstorm Prompt",
+                lines=8,
+            )
+            prompt_code_generation_box = gr.Textbox(
+                value=default_prompt_code_generation,
+                label="Code Generation Prompt",
+                lines=8,
+            )
+            prompt_synthesis_box = gr.Textbox(
+                value=default_prompt_synthesis,
+                label="Synthesis Prompt",
+                lines=8,
+            )
+            save_prompts_btn = gr.Button("Save Prompts")
+            # When clicked, update the prompt_state with new values
+            save_prompts_btn.click(
+                lambda b, c, s: {
+                    "prompt_brainstorm": b,
+                    "prompt_code_generation": c,
+                    "prompt_synthesis": s,
+                },
+                inputs=[prompt_brainstorm_box, prompt_code_generation_box, prompt_synthesis_box],
+                outputs=prompt_state,
+            )
+    gr.Markdown(ui_license)
 if __name__ == "__main__":
+    demo.launch()