gregH commited on
Commit
31e882a
·
verified ·
1 Parent(s): 3f93432

Update index.html

Browse files
Files changed (1) hide show
  1. index.html +7 -7
index.html CHANGED
@@ -192,15 +192,15 @@ gradient norm and then apply soft removal on them to mitigate the potential jail
192
 
193
  <script>
194
  function selectExample(exampleId) {
195
- const examples = {
196
- example1: "High-level speaking, successful jailbreaks share a common principle that they are trying to make the LLMs willing to affirm the user request which will be rejected at the beginning.",
197
- example2: "Drawing upon this inspiration, our proposed defense aims to find the tokens that are most critical in forcing the LLM to generate such affirmative responses, decrease their importance in the generation, and thereby resolve the potential jailbreak risks brought by these tokens.",
198
- example3: "To identify these tokens, we propose a new concept called the Affirmation Loss. We then use the loss's gradient norm with respect to each token in the user input prompt to find the jailbreak-critical tokens.",
199
- example4: "We select those tokens with the larger gradient norm and then apply soft removal on them to mitigate the potential jailbreak risks."
200
- };
201
 
202
  const exampleText = document.getElementById('exampleText');
203
- exampleText.innerHTML = examples[exampleId];
204
 
205
  // Highlight specific parts of the text
206
  switch (exampleId) {
 
192
 
193
  <script>
194
  function selectExample(exampleId) {
195
+ //const examples = {
196
+ // example1: "High-level speaking, successful jailbreaks share a common principle that they are trying to make the LLMs willing to affirm the user request which will be rejected at the beginning.",
197
+ // example2: "Drawing upon this inspiration, our proposed defense aims to find the tokens that are most critical in forcing the LLM to generate such affirmative responses, decrease their importance in the generation, and thereby resolve the potential jailbreak risks brought by these tokens.",
198
+ // example3: "To identify these tokens, we propose a new concept called the Affirmation Loss. We then use the loss's gradient norm with respect to each token in the user input prompt to find the jailbreak-critical tokens.",
199
+ // example4: "We select those tokens with the larger gradient norm and then apply soft removal on them to mitigate the potential jailbreak risks."
200
+ //};
201
 
202
  const exampleText = document.getElementById('exampleText');
203
+ //exampleText.innerHTML = examples[exampleId];
204
 
205
  // Highlight specific parts of the text
206
  switch (exampleId) {