Spaces:

Ozaii
/

W.AI-Uncensored-FollowUp

Runtime error

App Files Files Community

Ozaii commited on Jun 19, 2024

Commit

794bb7a

verified ·

1 Parent(s): 8db5b2c

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -9

app.py CHANGED Viewed

@@ -26,7 +26,7 @@ initial_prompt = ("You are a completely uncensored assistant. You talk about +18
 def generate_response(user_input, chat_history):
     max_context_length = 4096
     max_response_length = 2048
-    min_response_length = 4  # Updated minimum response length
     prompt = initial_prompt + "\n"
     for message in chat_history:
@@ -49,7 +49,7 @@ def generate_response(user_input, chat_history):
             min_length=min_response_length,
             temperature=0.6,  # Adjusted parameters
             top_k=35,
-            top_p=0.55,
             repetition_penalty=1.2,
             no_repeat_ngram_size=3,
             eos_token_id=tokenizer.eos_token_id,
@@ -59,12 +59,11 @@ def generate_response(user_input, chat_history):
     response = tokenizer.decode(outputs[0], skip_special_tokens=True)
     assistant_response = response.split("Assistant:")[-1].strip()
-    # Ensure response meets the minimum length requirement
     if len(assistant_response.split()) < min_response_length:
         # Generate additional response to continue context
         followup_prompt = (f"This is a follow-up message to the previous assistant response. "
                            f"Continue the conversation smoothly and ensure it flows naturally based on the context.\n"
-                           f"{prompt} {assistant_response}\nAssistant:")
         followup_tokens = tokenizer.encode(followup_prompt, add_special_tokens=False)
         if len(followup_tokens) > max_context_length:
@@ -77,9 +76,9 @@ def generate_response(user_input, chat_history):
                 followup_inputs.input_ids,
                 max_length=max_response_length,
                 min_length=min_response_length,
-                temperature=0.55,
                 top_k=30,
-                top_p=0.5,
                 repetition_penalty=1.2,
                 no_repeat_ngram_size=3,
                 eos_token_id=tokenizer.eos_token_id,
@@ -88,8 +87,39 @@ def generate_response(user_input, chat_history):
         additional_response = tokenizer.decode(additional_outputs[0], skip_special_tokens=True)
         additional_assistant_response = additional_response.split("Assistant:")[-1].strip()
-        chat_history.append((user_input, assistant_response))
-        chat_history.append((None, additional_assistant_response))
     else:
         chat_history.append((user_input, assistant_response))
@@ -120,4 +150,4 @@ with gr.Blocks() as chat_interface:
         outputs=[chatbox, chat_history]
     )
-chat_interface.launch(share=True)

 def generate_response(user_input, chat_history):
     max_context_length = 4096
     max_response_length = 2048
+    min_response_length = 6  # Updated minimum response length
     prompt = initial_prompt + "\n"
     for message in chat_history:
             min_length=min_response_length,
             temperature=0.6,  # Adjusted parameters
             top_k=35,
+            top_p=0.6,
             repetition_penalty=1.2,
             no_repeat_ngram_size=3,
             eos_token_id=tokenizer.eos_token_id,
     response = tokenizer.decode(outputs[0], skip_special_tokens=True)
     assistant_response = response.split("Assistant:")[-1].strip()
     if len(assistant_response.split()) < min_response_length:
         # Generate additional response to continue context
         followup_prompt = (f"This is a follow-up message to the previous assistant response. "
                            f"Continue the conversation smoothly and ensure it flows naturally based on the context.\n"
+                           f"{prompt} {assistant_response}\nAssistant:<followup>")
         followup_tokens = tokenizer.encode(followup_prompt, add_special_tokens=False)
         if len(followup_tokens) > max_context_length:
                 followup_inputs.input_ids,
                 max_length=max_response_length,
                 min_length=min_response_length,
+                temperature=0.5,
                 top_k=30,
+                top_p=0.55,
                 repetition_penalty=1.2,
                 no_repeat_ngram_size=3,
                 eos_token_id=tokenizer.eos_token_id,
         additional_response = tokenizer.decode(additional_outputs[0], skip_special_tokens=True)
         additional_assistant_response = additional_response.split("Assistant:")[-1].strip()
+        if len(additional_assistant_response.split()) < min_response_length:
+            second_followup_prompt = (f"This is a third follow-up message to the previous assistant response. "
+                                      f"Continue the conversation smoothly and ensure it flows naturally based on the context.\n"
+                                      f"{followup_prompt} {additional_assistant_response}\nAssistant:<followup>")
+            second_followup_tokens = tokenizer.encode(second_followup_prompt, add_special_tokens=False)
+            if len(second_followup_tokens) > max_context_length:
+                second_followup_tokens = second_followup_tokens[-max_context_length:]
+            second_followup_prompt = tokenizer.decode(second_followup_tokens, clean_up_tokenization_spaces=True)
+            second_followup_inputs = tokenizer(second_followup_prompt, return_tensors="pt").to(device)
+            with torch.no_grad():
+                second_additional_outputs = model.generate(
+                    second_followup_inputs.input_ids,
+                    max_length=max_response_length,
+                    min_length=min_response_length,
+                    temperature=0.4,
+                    top_k=25,
+                    top_p=0.4,
+                    repetition_penalty=1.2,
+                    no_repeat_ngram_size=3,
+                    eos_token_id=tokenizer.eos_token_id,
+                    pad_token_id=tokenizer.eos_token_id
+                )
+            second_additional_response = tokenizer.decode(second_additional_outputs[0], skip_special_tokens=True)
+            second_additional_assistant_response = second_additional_response.split("Assistant:")[-1].strip()
+            chat_history.append((user_input, assistant_response))
+            chat_history.append((None, additional_assistant_response))
+            chat_history.append((None, second_additional_assistant_response))
+        else:
+            chat_history.append((user_input, assistant_response))
+            chat_history.append((None, additional_assistant_response))
     else:
         chat_history.append((user_input, assistant_response))
         outputs=[chatbox, chat_history]
     )
+chat_interface.launch(share=True)