Spaces:
Running
on
Zero
Running
on
Zero
frankaging
commited on
Commit
·
e3ab52c
1
Parent(s):
98bf5cc
o1 impl
Browse files
app.py
CHANGED
@@ -25,19 +25,15 @@ def load_jsonl(jsonl_path):
|
|
25 |
return jsonl_data
|
26 |
|
27 |
class Steer(pv.SourcelessIntervention):
|
|
|
28 |
def __init__(self, **kwargs):
|
29 |
super().__init__(**kwargs, keep_last_dim=True)
|
30 |
-
self.proj = torch.nn.Linear(
|
31 |
-
|
32 |
def forward(self, base, source=None, subspaces=None):
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
idx = sp["idx"]
|
37 |
-
mag = sp["internal_mag"] # scaled by 50
|
38 |
-
steering_vec = mag * self.proj.weight[idx].unsqueeze(dim=0)
|
39 |
-
steer_vec = steer_vec + steering_vec
|
40 |
-
return steer_vec
|
41 |
|
42 |
# Check GPU
|
43 |
if not torch.cuda.is_available():
|
@@ -67,13 +63,9 @@ if torch.cuda.is_available():
|
|
67 |
steer = Steer(embed_dim=params.shape[0], latent_dim=params.shape[1])
|
68 |
steer.proj.weight.data = params.float()
|
69 |
|
70 |
-
pv_model = pv.IntervenableModel(
|
71 |
-
|
72 |
-
|
73 |
-
"intervention": steer,
|
74 |
-
},
|
75 |
-
model=model,
|
76 |
-
)
|
77 |
|
78 |
terminators = [tokenizer.eos_token_id] if tokenizer else []
|
79 |
|
@@ -171,7 +163,7 @@ with gr.Blocks(css="style.css") as demo:
|
|
171 |
# Pre-populate with a random concept if available
|
172 |
default_subspaces = []
|
173 |
if pv_model and concept_list:
|
174 |
-
default_concept =
|
175 |
default_subspaces = [{
|
176 |
"text": default_concept,
|
177 |
"idx": concept_id_map[default_concept],
|
@@ -191,7 +183,7 @@ with gr.Blocks(css="style.css") as demo:
|
|
191 |
)
|
192 |
# Right side: concept management
|
193 |
with gr.Column(scale=3):
|
194 |
-
gr.Markdown("
|
195 |
search_box = gr.Textbox(
|
196 |
label="Search concepts",
|
197 |
placeholder="e.g. 'time travel'"
|
|
|
25 |
return jsonl_data
|
26 |
|
27 |
class Steer(pv.SourcelessIntervention):
|
28 |
+
"""Steer model via activation addition"""
|
29 |
def __init__(self, **kwargs):
|
30 |
super().__init__(**kwargs, keep_last_dim=True)
|
31 |
+
self.proj = torch.nn.Linear(
|
32 |
+
self.embed_dim, kwargs["latent_dim"], bias=False)
|
33 |
def forward(self, base, source=None, subspaces=None):
|
34 |
+
steering_vec = torch.tensor(subspaces["mag"]) * \
|
35 |
+
self.proj.weight[subspaces["idx"]].unsqueeze(dim=0)
|
36 |
+
return base + steering_vec
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
# Check GPU
|
39 |
if not torch.cuda.is_available():
|
|
|
63 |
steer = Steer(embed_dim=params.shape[0], latent_dim=params.shape[1])
|
64 |
steer.proj.weight.data = params.float()
|
65 |
|
66 |
+
pv_model = pv.IntervenableModel({
|
67 |
+
"component": f"model.layers[20].output",
|
68 |
+
"intervention": steer}, model=model)
|
|
|
|
|
|
|
|
|
69 |
|
70 |
terminators = [tokenizer.eos_token_id] if tokenizer else []
|
71 |
|
|
|
163 |
# Pre-populate with a random concept if available
|
164 |
default_subspaces = []
|
165 |
if pv_model and concept_list:
|
166 |
+
default_concept = "words related to time travel and its consequences"
|
167 |
default_subspaces = [{
|
168 |
"text": default_concept,
|
169 |
"idx": concept_id_map[default_concept],
|
|
|
183 |
)
|
184 |
# Right side: concept management
|
185 |
with gr.Column(scale=3):
|
186 |
+
gr.Markdown("# Steering Concepts")
|
187 |
search_box = gr.Textbox(
|
188 |
label="Search concepts",
|
189 |
placeholder="e.g. 'time travel'"
|