Spaces:

pyvene
/

AxBench-ReFT-r1-16K

Running on Zero

App Files Files Community

frankaging commited on Jan 25

Commit

e39562b

1 Parent(s): 1baa5c3

o1 impl

Browse files

Files changed (1) hide show

app.py +84 -135

app.py CHANGED Viewed

@@ -12,22 +12,8 @@ HF_TOKEN = os.environ.get("HF_TOKEN")
 login(token=HF_TOKEN)
 MAX_MAX_NEW_TOKENS = 2048
-DEFAULT_MAX_NEW_TOKENS = 1024
-MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
-DESCRIPTION = """\
-# Model Steering with Supervised Dictionary Learning (SDL)
-### What's Model Steering with SDL?
-This is a demo of model steering with AxBench-ReFT-r1-16K, ...
-"""
-LICENSE = """
-<p/>
----
-Please refer to the specific licensing and use policy of the underlying model.
-"""
 def load_jsonl(jsonl_path):
     jsonl_data = []
@@ -38,41 +24,41 @@ def load_jsonl(jsonl_path):
     return jsonl_data
 class Steer(pv.SourcelessIntervention):
-    """Steer model via activation addition"""
     def __init__(self, **kwargs):
         super().__init__(**kwargs, keep_last_dim=True)
         self.proj = torch.nn.Linear(self.embed_dim, kwargs["latent_dim"], bias=False)
     def forward(self, base, source=None, subspaces=None):
-        # subspaces is a list of dicts:
-        #   each has {"idx": int, "internal_mag": float, "text": str, ...}
         steer_vec = base
         if subspaces is not None:
             for sp in subspaces:
                 idx = sp["idx"]
-                mag = sp["internal_mag"]  # the true scaling factor
                 steering_vec = mag * self.proj.weight[idx].unsqueeze(dim=0)
                 steer_vec = steer_vec + steering_vec
         return steer_vec
-# ------------------------------------------
-# Load the Model & Dictionary if GPU exists
-# ------------------------------------------
 if not torch.cuda.is_available():
-    DESCRIPTION += "\n<p>Running on CPU 🥶 This demo won't perform well on CPU.</p>"
 if torch.cuda.is_available():
-    model_id = "google/gemma-2-2b-it"
     model = AutoModelForCausalLM.from_pretrained(
         model_id, device_map="cuda", torch_dtype=torch.bfloat16
     )
     tokenizer = AutoTokenizer.from_pretrained(model_id)
-    path_to_params = hf_hub_download(repo_id="pyvene/gemma-reft-2b-it-res", filename="l20/weight.pt")
-    path_to_md = hf_hub_download(repo_id="pyvene/gemma-reft-2b-it-res", filename="l20/metadata.jsonl")
-    params = torch.load(path_to_params).cuda()
-    md = load_jsonl(path_to_md)
     concept_list = [item["concept"] for item in md]
     concept_id_map = {item["concept"]: item["concept_id"] for item in md}
@@ -88,12 +74,8 @@ if torch.cuda.is_available():
         model=model,
     )
-terminators = [tokenizer.eos_token_id]
-# --------------------------------------------
-# Main generation function: keep last 3 turns
-# --------------------------------------------
 @spaces.GPU
 def generate(
     message: str,
@@ -101,37 +83,28 @@ def generate(
     max_new_tokens: int,
     subspaces_list: list[dict],
 ) -> Iterator[str]:
-    # Restrict to the last 3 turns only
     start_idx = max(0, len(chat_history) - 3)
     recent_history = chat_history[start_idx:]
-    # Convert (user_msg, model_msg) => list of messages
     messages = []
     for user_msg, model_msg in recent_history:
         messages.append({"role": "user", "content": user_msg})
         messages.append({"role": "model", "content": model_msg})
-    # Add the new user message
     messages.append({"role": "user", "content": message})
-    # Apply the chat template (some HF models expect "assistant" instead of "model")
-    # but let's keep "model" to match your code, if that is required.
-    prompt_dict = tokenizer.apply_chat_template(
-        messages, tokenize=True, add_generation_prompt=True
-    )
-    input_ids = torch.tensor([prompt_dict["input_ids"]]).cuda()
-    attention_mask = torch.tensor([prompt_dict["attention_mask"]]).cuda()
-    # Possibly trim if too long
     if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
         input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
-        attention_mask = attention_mask[:, -MAX_INPUT_TOKEN_LENGTH:]
-        yield "\n[Warning: Truncated conversation exceeds max allowed input tokens]\n"
     streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
     generate_kwargs = {
-        "base": {"input_ids": input_ids, "attention_mask": attention_mask},
         "unit_locations": None,
         "max_new_tokens": max_new_tokens,
         "intervene_on_prompt": True,
@@ -150,29 +123,20 @@ def generate(
         partial_text.append(token_str)
         yield "".join(partial_text)
-# ----------------
-# UI Callbacks
-# ----------------
 def filter_concepts(search_text: str):
-    """Return the first 500 concepts that match (case-insensitive)."""
     if not search_text.strip():
         return concept_list[:500]
     filtered = [c for c in concept_list if search_text.lower() in c.lower()]
     return filtered[:500]
 def add_concept_to_list(selected_concept, user_slider_val, current_list):
-    """
-    user_slider_val is from [-5..5]. We multiply by 50 internally to get the real magnitude.
-    """
     if not selected_concept:
         return current_list, _build_table_data(current_list), gr.update(choices=_build_remove_choices(current_list))
-    concept_idx = concept_id_map[selected_concept]
-    internal_mag = user_slider_val * 50  # scale by 50
     new_entry = {
         "text": selected_concept,
-        "idx": concept_idx,
         "display_mag": user_slider_val,
         "internal_mag": internal_mag,
     }
@@ -183,14 +147,10 @@ def add_concept_to_list(selected_concept, user_slider_val, current_list):
         gr.update(choices=_build_remove_choices(updated_list))
     )
-def remove_concept_from_list(concept_to_remove, current_list):
-    """
-    Remove the chosen concept name from the list.
-    """
-    if not concept_to_remove:
         return current_list, _build_table_data(current_list), gr.update(choices=_build_remove_choices(current_list))
-    updated_list = [x for x in current_list if x["text"] != concept_to_remove]
     return (
         updated_list,
         _build_table_data(updated_list),
@@ -198,115 +158,104 @@ def remove_concept_from_list(concept_to_remove, current_list):
     )
 def _build_table_data(subspaces):
-    """Return [[concept_name, scaled_mag], ...] for display."""
     return [[x["text"], x["display_mag"]] for x in subspaces]
 def _build_remove_choices(subspaces):
-    """Return concept names for the remove dropdown."""
     return [x["text"] for x in subspaces]
 def update_dropdown_choices(search_text):
     filtered = filter_concepts(search_text)
     return gr.update(choices=filtered)
-# --------------------------------------------------------------------
-# Build the Interface
-# --------------------------------------------------------------------
 with gr.Blocks(css="style.css") as demo:
-    gr.Markdown(DESCRIPTION)
-    gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
-    # If GPU is available, pick a random concept as default
     default_subspaces = []
-    if torch.cuda.is_available() and len(concept_list) > 0:
         default_concept = random.choice(concept_list)
         default_subspaces = [{
             "text": default_concept,
             "idx": concept_id_map[default_concept],
-            "display_mag": 3,       # user sees 3
-            "internal_mag": 150.0,  # actual factor
         }]
     selected_subspaces = gr.State(default_subspaces)
     with gr.Row():
-        with gr.Column(scale=5):
-            # Use type="messages" to avoid tuple-format deprecation warnings
             chat_interface = gr.ChatInterface(
                 fn=generate,
-                additional_inputs=[
-                    gr.Slider(
-                        label="Max new tokens",
-                        minimum=1,
-                        maximum=MAX_MAX_NEW_TOKENS,
-                        step=1,
-                        value=DEFAULT_MAX_NEW_TOKENS,
-                    ),
-                    selected_subspaces
-                ],
-                title="Model Steering with ReFT-r1 (16K concepts)",
-                type="messages",  # <--- uses openai-style 'role' and 'content'
             )
-        with gr.Column(scale=4):
-            gr.Markdown("## Steering Concepts")
             search_box = gr.Textbox(
                 label="Search concepts",
-                placeholder="Type text to filter concepts (e.g. 'sports')"
             )
             concept_dropdown = gr.Dropdown(
                 label="Filtered Concepts",
-                choices=[],  # dynamically populated
-                multiselect=False
             )
             concept_magnitude = gr.Slider(
-                label="Scaled Magnitude (×50 internally)",
                 minimum=-5,
                 maximum=5,
                 step=1,
                 value=3
             )
             add_button = gr.Button("Add Concept")
-            # Show the table of active subspaces
             active_subspaces_table = gr.Dataframe(
-                headers=["Concept", "Magnitude (scaled)"],
                 datatype=["str", "number"],
                 value=_build_table_data(default_subspaces),
                 interactive=False,
-                label="Active Concept Subspaces"
             )
-            # Remove concept by name
-            remove_dropdown = gr.Dropdown(
-                label="Remove a concept",
-                choices=_build_remove_choices(default_subspaces),
-                multiselect=False
-            )
-            remove_button = gr.Button("Remove Selected Concept")
-    gr.Markdown(LICENSE)
     # Wire up events
-    # Update concept dropdown when user types in search
-    search_box.change(
-        fn=update_dropdown_choices,
-        inputs=[search_box],
-        outputs=[concept_dropdown]
-    )
-    # Add concept
     add_button.click(
-        fn=add_concept_to_list,
-        inputs=[concept_dropdown, concept_magnitude, selected_subspaces],
-        outputs=[selected_subspaces, active_subspaces_table, remove_dropdown],
     )
-    # Remove a concept
     remove_button.click(
-        fn=remove_concept_from_list,
-        inputs=[remove_dropdown, selected_subspaces],
-        outputs=[selected_subspaces, active_subspaces_table, remove_dropdown],
     )
-    demo.queue(max_size=20).launch()

 login(token=HF_TOKEN)
 MAX_MAX_NEW_TOKENS = 2048
+DEFAULT_MAX_NEW_TOKENS = 512  # smaller default to save memory
+MAX_INPUT_TOKEN_LENGTH = 4096
 def load_jsonl(jsonl_path):
     jsonl_data = []
     return jsonl_data
 class Steer(pv.SourcelessIntervention):
     def __init__(self, **kwargs):
         super().__init__(**kwargs, keep_last_dim=True)
         self.proj = torch.nn.Linear(self.embed_dim, kwargs["latent_dim"], bias=False)
     def forward(self, base, source=None, subspaces=None):
         steer_vec = base
         if subspaces is not None:
             for sp in subspaces:
                 idx = sp["idx"]
+                mag = sp["internal_mag"]  # scaled by 50
                 steering_vec = mag * self.proj.weight[idx].unsqueeze(dim=0)
                 steer_vec = steer_vec + steering_vec
         return steer_vec
+# Check GPU
 if not torch.cuda.is_available():
+    print("Warning: Running on CPU, may be slow.")
+# Load model & dictionary
+model_id = "google/gemma-2-2b-it"
+pv_model = None
+tokenizer = None
+concept_list = []
+concept_id_map = {}
 if torch.cuda.is_available():
     model = AutoModelForCausalLM.from_pretrained(
         model_id, device_map="cuda", torch_dtype=torch.bfloat16
     )
     tokenizer = AutoTokenizer.from_pretrained(model_id)
+    # Download dictionary
+    weight_path = hf_hub_download(repo_id="pyvene/gemma-reft-2b-it-res", filename="l20/weight.pt")
+    meta_path = hf_hub_download(repo_id="pyvene/gemma-reft-2b-it-res", filename="l20/metadata.jsonl")
+    params = torch.load(weight_path).cuda()
+    md = load_jsonl(meta_path)
     concept_list = [item["concept"] for item in md]
     concept_id_map = {item["concept"]: item["concept_id"] for item in md}
         model=model,
     )
+terminators = [tokenizer.eos_token_id] if tokenizer else []
 @spaces.GPU
 def generate(
     message: str,
     max_new_tokens: int,
     subspaces_list: list[dict],
 ) -> Iterator[str]:
+    # limit to last 3 turns
     start_idx = max(0, len(chat_history) - 3)
     recent_history = chat_history[start_idx:]
+    # build list of messages
     messages = []
     for user_msg, model_msg in recent_history:
         messages.append({"role": "user", "content": user_msg})
         messages.append({"role": "model", "content": model_msg})
     messages.append({"role": "user", "content": message})
+    input_ids = torch.tensor([tokenizer.apply_chat_template(
+        messages, tokenize=True, add_generation_prompt=True)]).cuda()
+    # trim if needed
     if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
         input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
+        yield "[Truncated prior text]\n"
     streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
     generate_kwargs = {
+        "base": {"input_ids": input_ids},
         "unit_locations": None,
         "max_new_tokens": max_new_tokens,
         "intervene_on_prompt": True,
         partial_text.append(token_str)
         yield "".join(partial_text)
 def filter_concepts(search_text: str):
     if not search_text.strip():
         return concept_list[:500]
     filtered = [c for c in concept_list if search_text.lower() in c.lower()]
     return filtered[:500]
 def add_concept_to_list(selected_concept, user_slider_val, current_list):
     if not selected_concept:
         return current_list, _build_table_data(current_list), gr.update(choices=_build_remove_choices(current_list))
+    idx = concept_id_map[selected_concept]
+    internal_mag = user_slider_val * 50
     new_entry = {
         "text": selected_concept,
+        "idx": idx,
         "display_mag": user_slider_val,
         "internal_mag": internal_mag,
     }
         gr.update(choices=_build_remove_choices(updated_list))
     )
+def remove_concept_from_list(selected_text, current_list):
+    if not selected_text:
         return current_list, _build_table_data(current_list), gr.update(choices=_build_remove_choices(current_list))
+    updated_list = [x for x in current_list if x["text"] != selected_text]
     return (
         updated_list,
         _build_table_data(updated_list),
     )
 def _build_table_data(subspaces):
     return [[x["text"], x["display_mag"]] for x in subspaces]
 def _build_remove_choices(subspaces):
     return [x["text"] for x in subspaces]
 def update_dropdown_choices(search_text):
     filtered = filter_concepts(search_text)
     return gr.update(choices=filtered)
 with gr.Blocks(css="style.css") as demo:
+    # A short title only
+    gr.Markdown("## Model Steering with ReFT-r1 (16K concepts)")
+    # Pre-populate with a random concept if available
     default_subspaces = []
+    if pv_model and concept_list:
         default_concept = random.choice(concept_list)
         default_subspaces = [{
             "text": default_concept,
             "idx": concept_id_map[default_concept],
+            "display_mag": 3,
+            "internal_mag": 150.0,
         }]
     selected_subspaces = gr.State(default_subspaces)
     with gr.Row():
+        # Left side: bigger chat area
+        with gr.Column(scale=7):
             chat_interface = gr.ChatInterface(
                 fn=generate,
+                additional_inputs=[],  # we'll put the max tokens slider below
+                title="",
+                type="messages",
+                height=550  # a bit taller to show more conversation
             )
+        # Right side: concept management
+        with gr.Column(scale=3):
+            gr.Markdown("### Steering Concepts")
             search_box = gr.Textbox(
                 label="Search concepts",
+                placeholder="e.g. 'time travel'"
             )
             concept_dropdown = gr.Dropdown(
                 label="Filtered Concepts",
+                choices=[]
             )
             concept_magnitude = gr.Slider(
+                label="Scaled Factor",
                 minimum=-5,
                 maximum=5,
                 step=1,
                 value=3
             )
             add_button = gr.Button("Add Concept")
             active_subspaces_table = gr.Dataframe(
+                headers=["Concept", "Mag (scaled)"],
                 datatype=["str", "number"],
                 value=_build_table_data(default_subspaces),
                 interactive=False,
+                label="Active Concept Subspaces",
+                height=170  # give it a bit more room
             )
+            # Row with the remove dropdown + button
+            with gr.Row():
+                remove_dropdown = gr.Dropdown(
+                    label="Remove concept",
+                    choices=_build_remove_choices(default_subspaces),
+                    multiselect=False
+                )
+                remove_button = gr.Button("Remove", variant="secondary")
+    # Place the max tokens slider at bottom, smaller
+    with gr.Row():
+        gr.Markdown("**Max New Tokens**", elem_classes=["small-label"])
+        max_token_slider = gr.Slider(
+            minimum=1, maximum=MAX_MAX_NEW_TOKENS, step=1,
+            value=DEFAULT_MAX_NEW_TOKENS,
+            label="",  # hide the big label
+            container=False,
+            style={"width": "30%"}  # narrower
+        )
     # Wire up events
+    search_box.change(update_dropdown_choices, [search_box], [concept_dropdown])
     add_button.click(
+        add_concept_to_list,
+        [concept_dropdown, concept_magnitude, selected_subspaces],
+        [selected_subspaces, active_subspaces_table, remove_dropdown]
     )
     remove_button.click(
+        remove_concept_from_list,
+        [remove_dropdown, selected_subspaces],
+        [selected_subspaces, active_subspaces_table, remove_dropdown]
+    )
+    # Link the slider back to chat generation
+    chat_interface.configure(
+        extra_inputs=[max_token_slider, selected_subspaces]
     )
+    demo.launch()