gregH commited on
Commit
2f76d7a
·
verified ·
1 Parent(s): 28c42a4

Update index.html

Browse files
Files changed (1) hide show
  1. index.html +8 -4
index.html CHANGED
@@ -163,28 +163,32 @@ Exploring Refusal Loss Landscapes </title>
163
  <h3>Perpleixty Filter</h3>
164
  <div>
165
  <ul>
166
- <li>Paper: </li>
 
167
  <li>Brief Introduction: </li>
168
  </ul>
169
  </div>
170
  <h3>SmoothLLM</h3>
171
  <div>
172
  <ul>
173
- <li>Paper: </li>
 
174
  <li>Brief Introduction: </li>
175
  </ul>
176
  </div>
177
  <h3>Erase-Check</h3>
178
  <div>
179
  <ul>
180
- <li>Paper: </li>
 
181
  <li>Brief Introduction: </li>
182
  </ul>
183
  </div>
184
  <h3>Self-Reminder</h3>
185
  <div>
186
  <ul>
187
- <li>Paper: </li>
 
188
  <li>Brief Introduction: </li>
189
  </ul>
190
  </div>
 
163
  <h3>Perpleixty Filter</h3>
164
  <div>
165
  <ul>
166
+ <li>Paper: <a href="https://arxiv.org/abs/2309.00614" target="_blank" rel="noopener noreferrer">
167
+ Baseline Defenses for Adversarial Attacks Against Aligned Language Models</a></li>
168
  <li>Brief Introduction: </li>
169
  </ul>
170
  </div>
171
  <h3>SmoothLLM</h3>
172
  <div>
173
  <ul>
174
+ <li>Paper: <a href="https://arxiv.org/abs/2310.03684" target="_blank" rel="noopener noreferrer">
175
+ SmoothLLM: Defending Large Language Models Against Jailbreaking Attacks</a></li>
176
  <li>Brief Introduction: </li>
177
  </ul>
178
  </div>
179
  <h3>Erase-Check</h3>
180
  <div>
181
  <ul>
182
+ <li>Paper: <a href="https://arxiv.org/abs/2309.02705" target="_blank" rel="noopener noreferrer">
183
+ Certifying LLM Safety against Adversarial Prompting</a></li>
184
  <li>Brief Introduction: </li>
185
  </ul>
186
  </div>
187
  <h3>Self-Reminder</h3>
188
  <div>
189
  <ul>
190
+ <li>Paper: <a href="https://assets.researchsquare.com/files/rs-2873090/v1_covered_eb589a01-bf05-4f32-b3eb-0d6864f64ad9.pdf?c=1702456350" target="_blank" rel="noopener noreferrer">
191
+ Defending ChatGPT against Jailbreak Attack via Self-Reminder</a></li>
192
  <li>Brief Introduction: </li>
193
  </ul>
194
  </div>