gregH commited on
Commit
48cf386
·
verified ·
1 Parent(s): 60e322b

Update index.html

Browse files
Files changed (1) hide show
  1. index.html +6 -7
index.html CHANGED
@@ -146,13 +146,12 @@ gradient norm and then apply soft removal on them to mitigate the potential jail
146
  <span id="Refusal-Loss-Approximation" class="formula" style="display: none;">
147
  $$
148
  \displaystyle
149
- \begin{aligned}
150
- f_\theta(x) &=1-\frac{1}{N}\sum_{i=1}^N JB(y_i)\\
151
- JB (y_i) &= \begin{cases}
152
- 1 \text{, if $y_i$ contains any jailbreak keyword;} \\
153
- 0 \text{, otherwise.}
154
- \end{cases}
155
- \end{aligned}
156
  $$
157
  </span>
158
  <span id="Gradient-Estimation" class="formula" style="display: none;">$$\displaystyle g_\theta(x)=\sum_{i=1}^P \frac{f_\theta(x\oplus \mu u_i)-f_\theta(x)}{\mu} u_i $$</span>
 
146
  <span id="Refusal-Loss-Approximation" class="formula" style="display: none;">
147
  $$
148
  \displaystyle
149
+ \begin{aligned}
150
+ \label{eq:influence}
151
+ \mathtt{Influence} (x_i) =& \Vert \nabla_{x_i} \log P_\theta(y|x_{1:n}) \Vert_2 \\
152
+ \mathcal{X} =& \mathtt{argtop}\text{-}n\alpha(\{\mathtt{Influence}(x_i), \forall x_i \in x_{1:n}\}) \\
153
+ \mathcal{Q} =& \{q_i, \forall x_i \in \mathcal{X}\}
154
+ \end{aligned}
 
155
  $$
156
  </span>
157
  <span id="Gradient-Estimation" class="formula" style="display: none;">$$\displaystyle g_\theta(x)=\sum_{i=1}^P \frac{f_\theta(x\oplus \mu u_i)-f_\theta(x)}{\mu} u_i $$</span>