Spaces:

pyvene
/

AxBench-ReFT-r1-16K

Runtime error

App Files Files Community

frankaging commited on Jan 25

Commit

f860e61

1 Parent(s): 7065c79

o1 impl

Browse files

Files changed (1) hide show

app.py +192 -69

app.py CHANGED Viewed

@@ -1,37 +1,32 @@
-# login as a privileged user.
 import os, json
-HF_TOKEN = os.environ.get("HF_TOKEN")
-from huggingface_hub import login, hf_hub_download
-login(token=HF_TOKEN)
-from threading import Thread
-from typing import Iterator
 import gradio as gr
 import spaces
-import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 import pyvene as pv
 MAX_MAX_NEW_TOKENS = 2048
 DEFAULT_MAX_NEW_TOKENS = 1024
 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 DESCRIPTION = """\
 # Model Steering with Supervised Dictionary Learning (SDL)
 ### What's Model Steering with SDL?
-This is a demo of model steering with Supervised Dictionary Learning (SDL) using AxBench-ReFT-r1-16K, which hosts steering vectors for 16K concepts. We evaluate various steering methods, including ReFT-r1, a novel weakly-supervised dictionary learning method. ReFT-r1 demonstrates competitive steering capabilities compared to finetuning and prompting baselines.
 """
 LICENSE = """
 <p/>
 ---
-This demo is governed by the original license and acceptable use policy of the model it is derived from. Please refer to the specific licensing and use policy of the underlying model.
 """
 def load_jsonl(jsonl_path):
@@ -39,81 +34,112 @@ def load_jsonl(jsonl_path):
     with open(jsonl_path, 'r') as f:
         for line in f:
             data = json.loads(line)
-            jsonl_data += [data]
     return jsonl_data
 class Steer(pv.SourcelessIntervention):
     """Steer model via activation addition"""
     def __init__(self, **kwargs):
         super().__init__(**kwargs, keep_last_dim=True)
-        self.proj = torch.nn.Linear(
-                self.embed_dim, kwargs["latent_dim"], bias=False)
-    def forward(self, base, source=None, subspaces=None):
-        steering_vec = torch.tensor(subspaces["mag"]) * \
-            self.proj.weight[subspaces["idx"]].unsqueeze(dim=0)
-        return base + steering_vec
 if not torch.cuda.is_available():
-    DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
 if torch.cuda.is_available():
-    # load the LLM
     model_id = "google/gemma-2-2b-it"
     model = AutoModelForCausalLM.from_pretrained(
         model_id, device_map="cuda", torch_dtype=torch.bfloat16
     )
     tokenizer = AutoTokenizer.from_pretrained(model_id)
-    # load the dictionary
-    path_to_params = hf_hub_download(repo_id="pyvene/gemma-reft-2b-it-res", filename="l20/weight.pt", force_download=False)
-    path_to_md = hf_hub_download(repo_id="pyvene/gemma-reft-2b-it-res", filename="l20/metadata.jsonl", force_download=False)
     params = torch.load(path_to_params).cuda()
     md = load_jsonl(path_to_md)
-    id_to_concept = {item["concept_id"]: item["concept"] for item in md}
     concept_list = [item["concept"] for item in md]
     steer = Steer(embed_dim=params.shape[0], latent_dim=params.shape[1])
     steer.proj.weight.data = params.float()
-    # Mount the encoder to the model
-    pv_model = pv.IntervenableModel({
-        "component": f"model.layers[20].output",
-        "intervention": steer}, model=model)
-terminators = [
-    tokenizer.eos_token_id,
-]
 @spaces.GPU
 def generate(
     message: str,
     chat_history: list[tuple[str, str]],
-    max_new_tokens: int = 1024,
 ) -> Iterator[str]:
-    # tokenize and prepare the input
-    prompt = torch.tensor([tokenizer.apply_chat_template(
-        [{"role": "user", "content": message}], tokenize=True, add_generation_prompt=True)]).cuda()
-    input_ids = prompt["input_ids"]
-    attention_mask = prompt["attention_mask"]
     if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
         input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
         attention_mask = attention_mask[:, -MAX_INPUT_TOKEN_LENGTH:]
-        gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
     streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
     generate_kwargs = {
         "base": {"input_ids": input_ids, "attention_mask": attention_mask},
         "unit_locations": None,
         "max_new_tokens": max_new_tokens,
         "intervene_on_prompt": True,
-        "subspaces": [{"idx": 1795, "mag": 150.0}],
         "streamer": streamer,
         "eos_token_id": terminators,
         "early_stopping": True,
@@ -123,33 +149,130 @@ def generate(
     t = Thread(target=pv_model.generate, kwargs=generate_kwargs)
     t.start()
-    outputs = []
-    for text in streamer:
-        outputs.append(text)
-        yield "".join(outputs)
-chat_interface = gr.ChatInterface(
-    fn=generate,
-    additional_inputs=[
-        gr.Slider(
-            label="Max new tokens",
-            minimum=1,
-            maximum=MAX_MAX_NEW_TOKENS,
-            step=1,
-            value=DEFAULT_MAX_NEW_TOKENS,
-        )
-    ],
-    stop_btn=None,
-    title="Model Steering with ReFT-r1 (16K concepts)",
-)
 with gr.Blocks(css="style.css") as demo:
     gr.Markdown(DESCRIPTION)
     gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
-    chat_interface.render()
     gr.Markdown(LICENSE)
-if __name__ == "__main__":
-    demo.queue(max_size=20).launch()

 import os, json
+import torch
 import gradio as gr
 import spaces
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
+from huggingface_hub import login, hf_hub_download
 import pyvene as pv
+from threading import Thread
+from typing import Iterator
+HF_TOKEN = os.environ.get("HF_TOKEN")
+login(token=HF_TOKEN)
 MAX_MAX_NEW_TOKENS = 2048
 DEFAULT_MAX_NEW_TOKENS = 1024
 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 DESCRIPTION = """\
 # Model Steering with Supervised Dictionary Learning (SDL)
 ### What's Model Steering with SDL?
+This is a demo of model steering with AxBench-ReFT-r1-16K, ...
 """
 LICENSE = """
 <p/>
 ---
+Please refer to the specific licensing and use policy of the underlying model.
 """
 def load_jsonl(jsonl_path):
     with open(jsonl_path, 'r') as f:
         for line in f:
             data = json.loads(line)
+            jsonl_data.append(data)
     return jsonl_data
 class Steer(pv.SourcelessIntervention):
     """Steer model via activation addition"""
     def __init__(self, **kwargs):
         super().__init__(**kwargs, keep_last_dim=True)
+        self.proj = torch.nn.Linear(self.embed_dim, kwargs["latent_dim"], bias=False)
+    def forward(self, base, source=None, subspaces=None):
+        # subspaces is a list of dicts: each has {"idx": int, "mag": float}
+        steer_vec = base
+        if subspaces is not None:
+            for sp in subspaces:
+                idx = sp["idx"]
+                mag = sp["mag"]
+                # each idx is a row in self.proj.weight
+                steering_vec = mag * self.proj.weight[idx].unsqueeze(dim=0)
+                steer_vec = steer_vec + steering_vec
+        return steer_vec
+# ---------------------------------------------------
+# Load Model & Dictionary if GPU is available
+# ---------------------------------------------------
 if not torch.cuda.is_available():
+    DESCRIPTION += "\n<p>Running on CPU 🥶 This demo won't perform well on CPU.</p>"
 if torch.cuda.is_available():
     model_id = "google/gemma-2-2b-it"
     model = AutoModelForCausalLM.from_pretrained(
         model_id, device_map="cuda", torch_dtype=torch.bfloat16
     )
     tokenizer = AutoTokenizer.from_pretrained(model_id)
+    path_to_params = hf_hub_download(repo_id="pyvene/gemma-reft-2b-it-res", filename="l20/weight.pt")
+    path_to_md = hf_hub_download(repo_id="pyvene/gemma-reft-2b-it-res", filename="l20/metadata.jsonl")
     params = torch.load(path_to_params).cuda()
     md = load_jsonl(path_to_md)
     concept_list = [item["concept"] for item in md]
+    concept_id_map = {item["concept"]: item["concept_id"] for item in md}
     steer = Steer(embed_dim=params.shape[0], latent_dim=params.shape[1])
     steer.proj.weight.data = params.float()
+    pv_model = pv.IntervenableModel(
+        {
+            "component": f"model.layers[20].output",
+            "intervention": steer,
+        },
+        model=model,
+    )
+terminators = [tokenizer.eos_token_id]
+# ---------------------------------------------------------------------
+# The main generation function, limiting to last 3 conversation turns
+# and then using apply_chat_template
+# ---------------------------------------------------------------------
 @spaces.GPU
 def generate(
     message: str,
     chat_history: list[tuple[str, str]],
+    max_new_tokens: int,
+    subspaces_list: list[dict],
 ) -> Iterator[str]:
+    # Restrict to the last 3 turns only
+    start_idx = max(0, len(chat_history) - 3)
+    recent_history = chat_history[start_idx:]
+    # Build a list of messages
+    # each tuple is (user_message, assistant_message)
+    messages = []
+    for user_msg, assistant_msg in recent_history:
+        messages.append({"role": "user", "content": user_msg})
+        messages.append({"role": "assistant", "content": assistant_msg})
+    # Now append the new user message
+    messages.append({"role": "user", "content": message})
+    # Convert messages into model input tokens with a generation prompt
+    prompt = tokenizer.apply_chat_template(
+        messages,
+        tokenize=True,
+        add_generation_prompt=True  # appends a final "Assistant:" for the model to continue
+    )
+    # Retrieve input_ids and mask
+    input_ids = torch.tensor([prompt["input_ids"]]).cuda()
+    attention_mask = torch.tensor([prompt["attention_mask"]]).cuda()
+    # Possibly trim if over max length
     if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
         input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
         attention_mask = attention_mask[:, -MAX_INPUT_TOKEN_LENGTH:]
+        yield "\n[Warning: Truncated conversation exceeds max allowed input tokens]\n"
     streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
     generate_kwargs = {
         "base": {"input_ids": input_ids, "attention_mask": attention_mask},
         "unit_locations": None,
         "max_new_tokens": max_new_tokens,
         "intervene_on_prompt": True,
+        "subspaces": subspaces_list,
         "streamer": streamer,
         "eos_token_id": terminators,
         "early_stopping": True,
     t = Thread(target=pv_model.generate, kwargs=generate_kwargs)
     t.start()
+    partial_text = []
+    for token_str in streamer:
+        partial_text.append(token_str)
+        yield "".join(partial_text)
+# --------------
+# UI Callbacks
+# --------------
+def filter_concepts(search_text: str):
+    if not search_text.strip():
+        return concept_list[:500]
+    filtered = [c for c in concept_list if search_text.lower() in c.lower()]
+    return filtered[:500]
+def add_concept_to_list(selected_concept, magnitude, current_list):
+    """When 'Add Concept' is clicked, add the chosen concept and magnitude to subspaces."""
+    if not selected_concept:
+        return current_list, current_list, gr.update(choices=[str(x["idx"]) for x in current_list])
+    concept_idx = concept_id_map[selected_concept]
+    new_entry = {"idx": concept_idx, "mag": magnitude}
+    updated_list = current_list + [new_entry]
+    remove_choices = [str(x["idx"]) for x in updated_list]
+    table_data = [[x['idx'], x['mag']] for x in updated_list]
+    return updated_list, table_data, gr.update(choices=remove_choices)
+def remove_concept_from_list(rem_concept_idx_str, current_list):
+    """Remove the chosen concept from the list. Index is a string from remove_dropdown."""
+    if not rem_concept_idx_str:
+        return current_list, current_list, gr.update()
+    rem_idx = int(rem_concept_idx_str)
+    updated_list = [x for x in current_list if x["idx"] != rem_idx]
+    remove_choices = [str(x["idx"]) for x in updated_list]
+    table_data = [[x['idx'], x['mag']] for x in updated_list]
+    return updated_list, table_data, gr.update(choices=remove_choices)
+def update_dropdown_choices(search_text):
+    filtered = filter_concepts(search_text)
+    return gr.update(choices=filtered)
+# -------------------------
+# Build the Gradio Blocks
+# -------------------------
 with gr.Blocks(css="style.css") as demo:
     gr.Markdown(DESCRIPTION)
     gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
+    selected_subspaces = gr.State([])
+    with gr.Row():
+        with gr.Column():
+            # Searching / selecting a concept
+            search_box = gr.Textbox(
+                label="Search concepts",
+                placeholder="Type text to filter concepts (e.g. 'sports')"
+            )
+            concept_dropdown = gr.Dropdown(
+                label="Filtered Concepts",
+                choices=[],
+                multiselect=False
+            )
+            concept_magnitude = gr.Slider(
+                label="Magnitude",
+                minimum=-300.0,
+                maximum=300.0,
+                step=1.0,
+                value=150.0
+            )
+            add_button = gr.Button("Add Concept")
+            # Removal
+            remove_dropdown = gr.Dropdown(
+                label="Remove from active list",
+                choices=[],
+                multiselect=False
+            )
+            remove_button = gr.Button("Remove Selected")
+        with gr.Column():
+            # Display currently active subspaces
+            active_subspaces_table = gr.Dataframe(
+                headers=["idx", "magnitude"],
+                datatype=["number", "number"],
+                interactive=False,
+                label="Active Concept Subspaces"
+            )
+    # The Chat Interface
+    chat_interface = gr.ChatInterface(
+        fn=generate,
+        additional_inputs=[
+            gr.Slider(
+                label="Max new tokens",
+                minimum=1,
+                maximum=MAX_MAX_NEW_TOKENS,
+                step=1,
+                value=DEFAULT_MAX_NEW_TOKENS,
+            ),
+            selected_subspaces
+        ],
+        title="Model Steering with ReFT-r1 (16K concepts)",
+    )
     gr.Markdown(LICENSE)
+    # Wire up events
+    search_box.change(
+        fn=update_dropdown_choices,
+        inputs=[search_box],
+        outputs=[concept_dropdown]
+    )
+    add_button.click(
+        fn=add_concept_to_list,
+        inputs=[concept_dropdown, concept_magnitude, selected_subspaces],
+        outputs=[selected_subspaces, active_subspaces_table, remove_dropdown],
+    )
+    remove_button.click(
+        fn=remove_concept_from_list,
+        inputs=[remove_dropdown, selected_subspaces],
+        outputs=[selected_subspaces, active_subspaces_table, remove_dropdown],
+    )
+    demo.queue(max_size=20).launch()