JoFrost commited on
Commit
3e1ba39
·
1 Parent(s): 02e0932

feat: remote logic

Browse files
Files changed (2) hide show
  1. app.py +20 -71
  2. requirements.txt +6 -0
app.py CHANGED
@@ -1,12 +1,14 @@
1
  import gradio as gr
2
- import os
3
- from transformers import AutoModelForCausalLM, AutoTokenizer
4
- from huggingface_hub import hf_hub_download
5
- import numpy as np
6
  import torch
7
- import pickle
8
  import numpy as np
9
  import pandas as pd
 
 
 
 
 
 
 
10
  from sklearn.linear_model import LogisticRegression
11
 
12
  torch.set_grad_enabled(False) # avoid blowing up mem
@@ -22,6 +24,8 @@ params = {
22
  "filename" : "layer_31/width_16k/average_l0_76/params.npz"
23
  }
24
 
 
 
25
  model_name = params["model_name"]
26
  width = params["width"]
27
  layer = params["layer"]
@@ -29,23 +33,17 @@ l0 = params["l0"]
29
  sae_repo_id = params["sae_repo_id"]
30
  filename = params["filename"]
31
 
32
- C = 0.01
33
-
34
- model = AutoModelForCausalLM.from_pretrained(
35
- model_name,
36
- device_map='auto',
37
- torch_dtype=torch.bfloat16,
38
- )
39
- tokenizer = AutoTokenizer.from_pretrained(model_name)
40
-
41
  path_to_params = hf_hub_download(
42
  repo_id=sae_repo_id,
43
  filename=filename,
44
  force_download=False,
 
45
  )
46
 
 
 
47
  params = np.load(path_to_params)
48
- pt_params = {k: torch.from_numpy(v).cuda() for k, v in params.items()}
49
 
50
  clf_name = f"linear_classifier_C_{C}_ "+ model_name + "_" + filename.split(".npz")[0]
51
  clf_name = clf_name.replace(os.sep, "_")
@@ -53,49 +51,6 @@ clf_name = clf_name.replace(os.sep, "_")
53
  with open(f"{clf_name}.pkl", 'rb') as model_file:
54
  clf: LogisticRegression = pickle.load(model_file)
55
 
56
- import torch.nn as nn
57
- class JumpReLUSAE(nn.Module):
58
- def __init__(self, d_model, d_sae):
59
- # Note that we initialise these to zeros because we're loading in pre-trained weights.
60
- # If you want to train your own SAEs then we recommend using blah
61
- super().__init__()
62
- self.W_enc = nn.Parameter(torch.zeros(d_model, d_sae))
63
- self.W_dec = nn.Parameter(torch.zeros(d_sae, d_model))
64
- self.threshold = nn.Parameter(torch.zeros(d_sae))
65
- self.b_enc = nn.Parameter(torch.zeros(d_sae))
66
- self.b_dec = nn.Parameter(torch.zeros(d_model))
67
-
68
- def encode(self, input_acts):
69
- pre_acts = input_acts @ self.W_enc + self.b_enc
70
- mask = (pre_acts > self.threshold)
71
- acts = mask * torch.nn.functional.relu(pre_acts)
72
- return acts
73
-
74
- def decode(self, acts):
75
- return acts @ self.W_dec + self.b_dec
76
-
77
- def forward(self, acts):
78
- acts = self.encode(acts)
79
- recon = self.decode(acts)
80
- return recon
81
-
82
- sae = JumpReLUSAE(params['W_enc'].shape[0], params['W_enc'].shape[1])
83
- sae.load_state_dict(pt_params)
84
- sae.to(dtype=torch.bfloat16).cuda()
85
-
86
- @torch.no_grad()
87
- def gather_residual_activations(model, target_layer, inputs):
88
- target_act = None
89
- def gather_target_act_hook(mod, inputs, outputs):
90
- nonlocal target_act # make sure we can modify the target_act from the outer scope
91
- target_act = outputs[0]
92
- return outputs
93
- handle = model.model.layers[target_layer].register_forward_hook(gather_target_act_hook)
94
- _ = model.forward(inputs)
95
- handle.remove()
96
- return target_act
97
-
98
- import requests
99
 
100
  def get_feature_descriptions(feature):
101
  layer_name = f"{layer}-gemmascope-res-{width}"
@@ -127,15 +82,14 @@ topk = 5
127
 
128
  # Function to wrap in a FastAPI in case of
129
  def get_activations(text):
130
- inputs = tokenizer.encode(text, return_tensors="pt", add_special_tokens=True).to("cuda")
131
-
132
- target_act = gather_residual_activations(model, layer, inputs)
133
- sae_act = sae.encode(target_act)
134
  return sae_act
135
 
136
  def get_features(text):
137
  sae_act = get_activations(text)
138
- sae_act_aggregated = ((sae_act[:,:,:] > 0).sum(1) > 0).cpu().numpy()
139
 
140
  X = pd.DataFrame(sae_act_aggregated)
141
 
@@ -159,8 +113,6 @@ def get_features(text):
159
  descriptions.append(description)
160
  contrib_df["description"] = descriptions
161
 
162
- import plotly.graph_objs as go
163
-
164
  fig = go.Figure(go.Bar(
165
  x=contrib_df['contribution'],
166
  y=contrib_df['description'],
@@ -190,17 +142,14 @@ def get_features(text):
190
  return classes, fig, dropdown
191
 
192
  def get_highlighted_text(text, feature):
193
-
194
- inputs = tokenizer.encode(text, return_tensors="pt", add_special_tokens=True).to("cuda")
195
-
196
- target_act = gather_residual_activations(model, layer, inputs)
197
- sae_act = sae.encode(target_act)
198
 
199
  activated_tokens = sae_act[0:,:,feature]
200
  max_activation = activated_tokens.max().item()
201
  activated_tokens /= max_activation
202
 
203
- activated_tokens = activated_tokens.cpu().detach().float().numpy()
204
 
205
  output = []
206
 
 
1
  import gradio as gr
 
 
 
 
2
  import torch
 
3
  import numpy as np
4
  import pandas as pd
5
+ import pickle
6
+ from transformers import AutoModelForCausalLM, AutoTokenizer
7
+ from huggingface_hub import hf_hub_download
8
+ import requests
9
+ import os
10
+ import msgpack_numpy as m
11
+ import plotly.graph_objs as go
12
  from sklearn.linear_model import LogisticRegression
13
 
14
  torch.set_grad_enabled(False) # avoid blowing up mem
 
24
  "filename" : "layer_31/width_16k/average_l0_76/params.npz"
25
  }
26
 
27
+ C = 0.01
28
+
29
  model_name = params["model_name"]
30
  width = params["width"]
31
  layer = params["layer"]
 
33
  sae_repo_id = params["sae_repo_id"]
34
  filename = params["filename"]
35
 
 
 
 
 
 
 
 
 
 
36
  path_to_params = hf_hub_download(
37
  repo_id=sae_repo_id,
38
  filename=filename,
39
  force_download=False,
40
+ token=os.environ['TOKEN'],
41
  )
42
 
43
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
44
+
45
  params = np.load(path_to_params)
46
+ pt_params = {k: torch.from_numpy(v) for k, v in params.items()}
47
 
48
  clf_name = f"linear_classifier_C_{C}_ "+ model_name + "_" + filename.split(".npz")[0]
49
  clf_name = clf_name.replace(os.sep, "_")
 
51
  with open(f"{clf_name}.pkl", 'rb') as model_file:
52
  clf: LogisticRegression = pickle.load(model_file)
53
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
  def get_feature_descriptions(feature):
56
  layer_name = f"{layer}-gemmascope-res-{width}"
 
82
 
83
  # Function to wrap in a FastAPI in case of
84
  def get_activations(text):
85
+ response = requests.post("http://34.71.249.22:3000/execute_req", json={"query": text})
86
+ pack = m.unpackb(response.content)
87
+ sae_act = torch.from_numpy(pack["sae_act"]).to(dtype=torch.bfloat16)
 
88
  return sae_act
89
 
90
  def get_features(text):
91
  sae_act = get_activations(text)
92
+ sae_act_aggregated = ((sae_act[:,:,:] > 0).sum(1) > 0).numpy()
93
 
94
  X = pd.DataFrame(sae_act_aggregated)
95
 
 
113
  descriptions.append(description)
114
  contrib_df["description"] = descriptions
115
 
 
 
116
  fig = go.Figure(go.Bar(
117
  x=contrib_df['contribution'],
118
  y=contrib_df['description'],
 
142
  return classes, fig, dropdown
143
 
144
  def get_highlighted_text(text, feature):
145
+ inputs = tokenizer.encode(text, return_tensors="pt", add_special_tokens=True)
146
+ sae_act = get_activations(text)
 
 
 
147
 
148
  activated_tokens = sae_act[0:,:,feature]
149
  max_activation = activated_tokens.max().item()
150
  activated_tokens /= max_activation
151
 
152
+ activated_tokens = activated_tokens.float().numpy()
153
 
154
  output = []
155
 
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ transformers
2
+ numpy
3
+ torch
4
+ pandas
5
+ scikit-learn
6
+ msgpack-numpy