frankaging commited on
Commit
3d0e95b
·
1 Parent(s): de8f900

claude impl

Browse files
Files changed (1) hide show
  1. app.py +44 -54
app.py CHANGED
@@ -13,7 +13,7 @@ HF_TOKEN = os.environ.get("HF_TOKEN")
13
  login(token=HF_TOKEN)
14
 
15
  MAX_MAX_NEW_TOKENS = 2048
16
- DEFAULT_MAX_NEW_TOKENS = 128 # smaller default to save memory
17
  MAX_INPUT_TOKEN_LENGTH = 4096
18
 
19
  def load_jsonl(jsonl_path):
@@ -133,6 +133,9 @@ def generate(
133
  partial_text.append(token_str)
134
  yield "".join(partial_text)
135
 
 
 
 
136
  def filter_concepts(search_text: str):
137
  if not search_text.strip():
138
  return concept_list[:500]
@@ -140,11 +143,6 @@ def filter_concepts(search_text: str):
140
  return filtered[:500]
141
 
142
  def add_concept_to_list(selected_concept, user_slider_val, current_list):
143
- """
144
- Return exactly 2 values:
145
- 1) The updated list of concepts (list of dicts).
146
- 2) A Gradio update for the removal dropdown’s choices.
147
- """
148
  if not selected_concept:
149
  return current_list, gr.update(choices=_build_remove_choices(current_list))
150
 
@@ -156,78 +154,69 @@ def add_concept_to_list(selected_concept, user_slider_val, current_list):
156
  "display_mag": user_slider_val,
157
  "internal_mag": internal_mag,
158
  }
159
- updated_list = current_list + [new_entry]
 
160
  return updated_list, gr.update(choices=_build_remove_choices(updated_list))
161
 
162
  def remove_concept_from_list(selected_text, current_list):
163
- """
164
- Return exactly 2 values:
165
- 1) The updated list of concepts (list of dicts).
166
- 2) A Gradio update for the removal dropdown’s choices.
167
- """
168
  if not selected_text:
169
  return current_list, gr.update(choices=_build_remove_choices(current_list))
170
- updated_list = [x for x in current_list if x["text"] != selected_text]
 
 
171
  return updated_list, gr.update(choices=_build_remove_choices(updated_list))
172
 
173
- def _build_remove_choices(subspaces):
174
- return [x["text"] for x in subspaces]
175
-
176
  def update_dropdown_choices(search_text):
177
  filtered = filter_concepts(search_text)
178
  return gr.update(choices=filtered)
179
 
180
  with gr.Blocks(css="style.css") as demo:
181
-
182
- # Pre-populate with a random concept if available
183
- default_subspaces = []
184
- if pv_model and concept_list:
185
- default_concept = "words related to time travel and its consequences"
186
- default_subspaces = [{
187
- "text": default_concept,
188
- "idx": concept_id_map[default_concept],
189
- "display_mag": 3,
190
- "internal_mag": 150.0,
191
- }]
192
-
193
- selected_subspaces = gr.State(default_subspaces)
194
  with gr.Row():
195
  # Left side: bigger chat area
196
  with gr.Column(scale=7):
197
  chat_interface = gr.ChatInterface(
198
  fn=generate,
199
- title="LM Steering with ReFT-r1 (16K concepts)",
 
200
  type="messages",
201
  additional_inputs=[selected_subspaces],
202
  )
 
203
  # Right side: concept management
204
  with gr.Column(scale=3):
205
- gr.Markdown("# Steering Concepts")
206
- search_box = gr.Textbox(
207
- label="Search concepts",
208
- placeholder="e.g. 'time travel'"
209
- )
210
- concept_dropdown = gr.Dropdown(
211
- label="Filtered Concepts",
212
- choices=[]
213
- )
214
- concept_magnitude = gr.Slider(
215
- label="Steering Factor",
216
- minimum=-5,
217
- maximum=5,
218
- step=1,
219
- value=3
220
- )
221
- add_button = gr.Button("Add Concept")
 
 
 
222
 
223
- # Row with the remove dropdown + button
224
- with gr.Row():
 
225
  remove_dropdown = gr.Dropdown(
226
- label="Remove concept",
227
- choices=_build_remove_choices(default_subspaces),
228
- multiselect=False
229
  )
230
- remove_button = gr.Button("Remove", variant="secondary")
231
 
232
  # Wire up events
233
  # When the search box changes, update the concept dropdown choices:
@@ -253,4 +242,5 @@ with gr.Blocks(css="style.css") as demo:
253
  [selected_subspaces, remove_dropdown]
254
  )
255
 
256
- demo.launch()
 
 
13
  login(token=HF_TOKEN)
14
 
15
  MAX_MAX_NEW_TOKENS = 2048
16
+ DEFAULT_MAX_NEW_TOKENS = 256 # smaller default to save memory
17
  MAX_INPUT_TOKEN_LENGTH = 4096
18
 
19
  def load_jsonl(jsonl_path):
 
133
  partial_text.append(token_str)
134
  yield "".join(partial_text)
135
 
136
+ def _build_remove_choices(subspaces):
137
+ return [f"(+{x['display_mag']:.1f}*) {x['text']}" for x in subspaces]
138
+
139
  def filter_concepts(search_text: str):
140
  if not search_text.strip():
141
  return concept_list[:500]
 
143
  return filtered[:500]
144
 
145
  def add_concept_to_list(selected_concept, user_slider_val, current_list):
 
 
 
 
 
146
  if not selected_concept:
147
  return current_list, gr.update(choices=_build_remove_choices(current_list))
148
 
 
154
  "display_mag": user_slider_val,
155
  "internal_mag": internal_mag,
156
  }
157
+ # Add to the beginning of the list
158
+ updated_list = [new_entry] + current_list
159
  return updated_list, gr.update(choices=_build_remove_choices(updated_list))
160
 
161
  def remove_concept_from_list(selected_text, current_list):
 
 
 
 
 
162
  if not selected_text:
163
  return current_list, gr.update(choices=_build_remove_choices(current_list))
164
+
165
+ # Remove based on the full formatted text
166
+ updated_list = [x for x in current_list if f"(+{x['display_mag']:.1f}*) {x['text']}" != selected_text]
167
  return updated_list, gr.update(choices=_build_remove_choices(updated_list))
168
 
 
 
 
169
  def update_dropdown_choices(search_text):
170
  filtered = filter_concepts(search_text)
171
  return gr.update(choices=filtered)
172
 
173
  with gr.Blocks(css="style.css") as demo:
174
+ # Remove default subspaces
175
+ selected_subspaces = gr.State([])
176
+
 
 
 
 
 
 
 
 
 
 
177
  with gr.Row():
178
  # Left side: bigger chat area
179
  with gr.Column(scale=7):
180
  chat_interface = gr.ChatInterface(
181
  fn=generate,
182
+ title="Language Model Concept Steering",
183
+ description="Steer responses by selecting concepts on the right →",
184
  type="messages",
185
  additional_inputs=[selected_subspaces],
186
  )
187
+
188
  # Right side: concept management
189
  with gr.Column(scale=3):
190
+ gr.Markdown("## Steer Model Responses")
191
+
192
+ # Concept Search and Selection
193
+ with gr.Group():
194
+ search_box = gr.Textbox(
195
+ label="Search Concepts",
196
+ placeholder="Find concepts to steer the model (e.g. 'time travel')",
197
+ )
198
+ concept_dropdown = gr.Dropdown(
199
+ label="Select a Concept",
200
+ interactive=True,
201
+ )
202
+ concept_magnitude = gr.Slider(
203
+ label="Steering Intensity",
204
+ minimum=-5,
205
+ maximum=5,
206
+ step=0.1, # Allow 1 decimal point
207
+ value=3,
208
+ )
209
+ add_button = gr.Button("Add Concept to Steering")
210
 
211
+ # Current Steering Concepts
212
+ gr.Markdown("## Current Steering Concepts")
213
+ with gr.Group():
214
  remove_dropdown = gr.Dropdown(
215
+ label="Select a Current Steering Concept to Stop",
216
+ choices=[],
217
+ multiselect=False,
218
  )
219
+ remove_button = gr.Button("Remove Current Steering Concept", variant="secondary")
220
 
221
  # Wire up events
222
  # When the search box changes, update the concept dropdown choices:
 
242
  [selected_subspaces, remove_dropdown]
243
  )
244
 
245
+ demo.launch(share=True)
246
+