gregH commited on
Commit
5e444f4
·
verified ·
1 Parent(s): 2f98671

Update index.html

Browse files
Files changed (1) hide show
  1. index.html +2 -7
index.html CHANGED
@@ -137,13 +137,8 @@ gradient norm and then apply soft removal on them to mitigate the potential jail
137
  <span id="Refusal-Loss" class="formula" style="">
138
  $$
139
  \displaystyle
140
- \begin{aligned}
141
- \phi_\theta(x)&=1-\mathbb{E}_{y \sim T_\theta(x)} JB(y)\\
142
- JB (y) &= \begin{cases}
143
- 1 \text{, if $y$ contains any jailbreak keyword;} \\
144
- 0 \text{, otherwise.}
145
- \end{cases}
146
- \end{aligned}
147
  $$
148
  </span>
149
  <span id="Refusal-Loss-Approximation" class="formula" style="display: none;">
 
137
  <span id="Refusal-Loss" class="formula" style="">
138
  $$
139
  \displaystyle
140
+ x_{1:n}=\mathtt{embed}_\theta(q_{1:n})\\
141
+ \mathtt{Affirmation~Loss}(x_{1:n},\theta)=-\log P(y|x_{1:n})
 
 
 
 
 
142
  $$
143
  </span>
144
  <span id="Refusal-Loss-Approximation" class="formula" style="display: none;">