Spaces:

pyvene
/

AxBench-ReFT-cr1-16K

Sleeping

App Files Files Community

frankaging commited on Jan 27

Commit

0fb9f4b

1 Parent(s): f9cd90a

rebuild

Browse files

Files changed (1) hide show

app.py +57 -42

app.py CHANGED Viewed

@@ -14,7 +14,7 @@ HF_TOKEN = os.environ.get("HF_TOKEN")
 login(token=HF_TOKEN)
 MAX_MAX_NEW_TOKENS = 2048
-DEFAULT_MAX_NEW_TOKENS = 256  # smaller default to save memory
 MAX_INPUT_TOKEN_LENGTH = 4096
 css = """
@@ -27,6 +27,13 @@ css = """
     border-radius: 4px;
     font-weight: 500;
 }
 """
 def load_jsonl(jsonl_path):
@@ -212,7 +219,6 @@ def generate(
             }
         ] if steering_list else None, # if steering is not provided, we do not steer.
         "streamer": streamer,
-        "repetition_penalty": 1.5,
         "do_sample": True
     }
@@ -252,87 +258,96 @@ def add_concept_to_list(selected_concept, user_slider_val, current_list):
     current_list = [new_entry]
     return current_list
-def update_dropdown_choices(search_text):
     filtered = filter_concepts(search_text)
     if not filtered or len(filtered) == 0:
-        return gr.update(choices=[f"[New] {search_text}"], value=f"[New] {search_text}", interactive=True), gr.Textbox(
-        label="No matching existing concepts were found!",
-        value="Good news! Based on the concept you provided, we will automatically generate a steering vector. Try it out by starting a chat!",
-        lines=3,
-        interactive=False,
-        visible=True,
-        elem_id="alert-message"
-    )
-    # Automatically select the first matching concept
     return gr.update(
         choices=filtered,
-        value=filtered[0],  # Select the first match
-        interactive=True, visible=True
     ), gr.Textbox(visible=False)
 with gr.Blocks(css=css, fill_height=True) as demo:
-    # States for both detection and steering
     selected_detection = gr.State([])
     selected_subspaces = gr.State([])
-    with gr.Row(min_height=1000):
         # Left side: chat area
         with gr.Column(scale=7):
             chat_interface = gr.ChatInterface(
                 fn=generate,
-                title="Chat with a Concept Steering Model",
-                description="""You can only steer the model when a concept is detected internally. Select concepts on the right →\n\nWe intervene on Gemma-2-2B-it by adding steering vectors to the residual stream at layer 20.""",
-                type="messages",
                 additional_inputs=[selected_detection, selected_subspaces],
                 fill_height=True,
-                css=".gradio-chatbot {min-height: 1500px;}"
             )
         # Right side: concept detection and steering
         with gr.Column(scale=3):
-            # Concept Detection Panel
-            # gr.Markdown("## Detect then Steer")
-            gr.Markdown("Select a concept to detect. We will only steer the model when this concept is detected internally.")
-            with gr.Group():
                 detect_search = gr.Textbox(
-                    label="Search Detection Concepts",
-                    placeholder="Find concepts to detect (e.g. 'Google')",
                     lines=1,
                 )
                 detect_msg = gr.TextArea(visible=False)
                 detect_dropdown = gr.Dropdown(
-                    label="Select concept to detect",
                     interactive=True,
                     allow_custom_value=False,
                 )
                 detect_threshold = gr.Slider(
-                    label="Detection Threshold",
                     minimum=0,
                     maximum=1,
-                    step=0.01,
                     value=0.5,
                 )
-            # Divider
-            # gr.Markdown("---")
-            # Steering Panel (existing)
-            # gr.Markdown("## Steer Response")
-            gr.Markdown("Select a concept to steer when detection occurs.")
-            with gr.Group():
                 search_box = gr.Textbox(
-                    label="Search Steering Concepts",
-                    placeholder="Find concepts to steer the model (e.g. 'ethics and morality')",
                     lines=1,
                 )
                 msg = gr.TextArea(visible=False)
                 concept_dropdown = gr.Dropdown(
-                    label="Select concept to steer",
                     interactive=True,
                     allow_custom_value=False,
                 )
                 concept_magnitude = gr.Slider(
-                    label="Steering Intensity",
                     minimum=-5,
                     maximum=5,
                     step=0.1,
@@ -341,7 +356,7 @@ with gr.Blocks(css=css, fill_height=True) as demo:
     # Wire up events for detection
     detect_search.input(
-        update_dropdown_choices,
         [detect_search],
         [detect_dropdown, detect_msg]
     ).then(
@@ -362,9 +377,9 @@ with gr.Blocks(css=css, fill_height=True) as demo:
         [selected_detection]
     )
-    # Wire up events for steering (existing)
     search_box.input(
-        update_dropdown_choices,
         [search_box],
         [concept_dropdown, msg]
     ).then(

 login(token=HF_TOKEN)
 MAX_MAX_NEW_TOKENS = 2048
+DEFAULT_MAX_NEW_TOKENS = 128  # smaller default to save memory
 MAX_INPUT_TOKEN_LENGTH = 4096
 css = """
     border-radius: 4px;
     font-weight: 500;
 }
+.concept-help {
+    font-size: 0.9em;
+    color: #666;
+    margin-top: 4px;
+    font-style: italic;
+}
 """
 def load_jsonl(jsonl_path):
             }
         ] if steering_list else None, # if steering is not provided, we do not steer.
         "streamer": streamer,
         "do_sample": True
     }
     current_list = [new_entry]
     return current_list
+def update_dropdown_choices(search_text, is_detection=False):
     filtered = filter_concepts(search_text)
     if not filtered or len(filtered) == 0:
+        alert_message = (
+            "Good news! Based on the topic you provided, we will automatically generate a detector for you!"
+        ) if is_detection else (
+            "Good news! Based on the topic you provided, we will automatically generate a steering vector. Try it out by starting a chat!"
+        )
+        return gr.update(
+            choices=[],
+            value=None,
+            interactive=True
+        ), gr.Textbox(
+            label="No matching topics found",
+            value=alert_message,
+            lines=3,
+            interactive=False,
+            visible=True,
+            elem_id="alert-message"
+        )
     return gr.update(
         choices=filtered,
+        value=filtered[0],
+        interactive=True,
+        visible=True
     ), gr.Textbox(visible=False)
 with gr.Blocks(css=css, fill_height=True) as demo:
     selected_detection = gr.State([])
     selected_subspaces = gr.State([])
+    with gr.Row(min_height=500, equal_height=True):
         # Left side: chat area
         with gr.Column(scale=7):
             chat_interface = gr.ChatInterface(
                 fn=generate,
+                title="Conditionally Steer AI Responses Based on Topics",
+                description="""This is an experimental chatbot that you can steer using topics you care about:
+Step 1: Choose a topic to detect (e.g., "Google")
+Step 2: Choose a topic you want the model to discuss when the previous topic comes up (e.g., "ethics")
+Try it out! For example, set it to detect "Google" topics and steer toward discussing "ethics". We intervene on Gemma-2-2B-it by adding steering vectors to the residual stream at layer 20.""",
                 additional_inputs=[selected_detection, selected_subspaces],
                 fill_height=True,
             )
         # Right side: concept detection and steering
         with gr.Column(scale=3):
+            gr.Markdown("""#### Step 1: Choose a topic you want to recognize.""")
+            with gr.Group():
                 detect_search = gr.Textbox(
+                    label="Search for topics to detect",
+                    placeholder="Try: 'Google'",
                     lines=1,
                 )
                 detect_msg = gr.TextArea(visible=False)
                 detect_dropdown = gr.Dropdown(
+                    label="Choose a topic to detect (Click to see more!)",
                     interactive=True,
                     allow_custom_value=False,
                 )
                 detect_threshold = gr.Slider(
+                    label="Detection sensitivity",
                     minimum=0,
                     maximum=1,
+                    step=0.1,
                     value=0.5,
                 )
+            gr.Markdown("---")
+            gr.Markdown("""#### Step 2: Choose another topic you want to discuss when it detects the chosen topic above.""")
+            with gr.Group():
                 search_box = gr.Textbox(
+                    label="Search topics to steer",
+                    placeholder="Try: 'ethics'",
                     lines=1,
                 )
                 msg = gr.TextArea(visible=False)
                 concept_dropdown = gr.Dropdown(
+                    label="Choose a topic to steer the model (Click to see more!)",
                     interactive=True,
                     allow_custom_value=False,
                 )
                 concept_magnitude = gr.Slider(
+                    label="Steering intensity",
                     minimum=-5,
                     maximum=5,
                     step=0.1,
     # Wire up events for detection
     detect_search.input(
+        lambda x: update_dropdown_choices(x, is_detection=True),
         [detect_search],
         [detect_dropdown, detect_msg]
     ).then(
         [selected_detection]
     )
+    # Wire up events for steering
     search_box.input(
+        lambda x: update_dropdown_choices(x, is_detection=False),
         [search_box],
         [concept_dropdown, msg]
     ).then(