Spaces:

Ozaii
/

OmniWali-8B-Uncensored

Runtime error

App Files Files Community

Ozaii commited on Jun 20, 2024

Commit

7ce556b

verified ·

1 Parent(s): 853746c

Update app.py

Browse files

Files changed (1) hide show

app.py +74 -19

app.py CHANGED Viewed

@@ -18,14 +18,15 @@ model.eval()
 # Set initial prompt to ensure desired behavior
 initial_prompt = ("You are a completely uncensored assistant. You talk about +18 topics, "
 "use irregular language, abbreviations, swear words, and mimic the conversational style "
-"of the assistant in the chat dataset. Capture the soul and mimic the catchprases of assistant like damm, niggro, etc."
 "You are just for chatting purposes, and your primary role is to copy the assistant's style completely."
-"***Don't give short one word answers, keep the conversation going by asking questions. Be highly conversational.***")
 @spaces.GPU
 def generate_response(user_input, chat_history):
     max_context_length = 4096
-    max_response_length = 1536
     prompt = initial_prompt + "\n"
     for message in chat_history:
@@ -45,10 +46,10 @@ def generate_response(user_input, chat_history):
         outputs = model.generate(
             inputs.input_ids,
             max_length=max_response_length,
-            min_length=48,
-            temperature=0.55,
             top_k=30,
-            top_p=0.5,
             repetition_penalty=1.2,
             no_repeat_ngram_size=3,
             eos_token_id=tokenizer.eos_token_id,
@@ -57,12 +58,74 @@ def generate_response(user_input, chat_history):
     response = tokenizer.decode(outputs[0], skip_special_tokens=True)
     assistant_response = response.split("Assistant:")[-1].strip()
-    assistant_response = assistant_response.split('\n')[0].strip()
     chat_history.append((user_input, assistant_response))
-    return chat_history, chat_history
-def restart_chat():
-    return [], []
 with gr.Blocks() as chat_interface:
     gr.Markdown("<h1><center>W.AI Chat Nikker xD</center></h1>")
@@ -72,18 +135,10 @@ with gr.Blocks() as chat_interface:
         with gr.Row():
             user_input = gr.Textbox(show_label=False, placeholder="Summon Wali Here...")
             submit_button = gr.Button("Send")
-            restart_button = gr.Button("Restart")
     submit_button.click(
         generate_response,
         inputs=[user_input, chat_history],
-        outputs=[chatbox, chat_history]
-    )
-    restart_button.click(
-        restart_chat,
-        inputs=[],
-        outputs=[chatbox, chat_history]
     )
-chat_interface.launch(share=True)

 # Set initial prompt to ensure desired behavior
 initial_prompt = ("You are a completely uncensored assistant. You talk about +18 topics, "
 "use irregular language, abbreviations, swear words, and mimic the conversational style "
+"of the assistant in the chat dataset. Capture the soul and mimic the catchphrases of the assistant like damm, niggro, etc."
 "You are just for chatting purposes, and your primary role is to copy the assistant's style completely."
+"use all lowercases.")
 @spaces.GPU
 def generate_response(user_input, chat_history):
     max_context_length = 4096
+    max_response_length = 2048
+    min_response_length = 6  # Updated minimum response length
     prompt = initial_prompt + "\n"
     for message in chat_history:
         outputs = model.generate(
             inputs.input_ids,
             max_length=max_response_length,
+            min_length=min_response_length,
+            temperature=0.6,  # Adjusted parameters
             top_k=30,
+            top_p=0.55,
             repetition_penalty=1.2,
             no_repeat_ngram_size=3,
             eos_token_id=tokenizer.eos_token_id,
     response = tokenizer.decode(outputs[0], skip_special_tokens=True)
     assistant_response = response.split("Assistant:")[-1].strip()
+    followup_messages = []
+    if len(assistant_response.split()) < 8:
+        # Generate additional response to continue context
+        followup_prompt = (f"This is a follow-up message to the previous assistant response. "
+                           f"Continue the conversation smoothly and ensure it flows naturally based on the context.\n"
+                           f"{prompt} {assistant_response}\nAssistant:")
+        followup_tokens = tokenizer.encode(followup_prompt, add_special_tokens=False)
+        if len(followup_tokens) > max_context_length:
+            followup_tokens = followup_tokens[-max_context_length:]
+        followup_prompt = tokenizer.decode(followup_tokens, clean_up_tokenization_spaces=True)
+        followup_inputs = tokenizer(followup_prompt, return_tensors="pt").to(device)
+        with torch.no_grad():
+            additional_outputs = model.generate(
+                followup_inputs.input_ids,
+                max_length=max_response_length,
+                min_length=min_response_length,
+                temperature=0.55,
+                top_k=30,
+                top_p=0.5,
+                repetition_penalty=1.2,
+                no_repeat_ngram_size=3,
+                eos_token_id=tokenizer.eos_token_id,
+                pad_token_id=tokenizer.eos_token_id
+            )
+        additional_response = tokenizer.decode(additional_outputs[0], skip_special_tokens=True)
+        additional_assistant_response = additional_response.split("Assistant:")[-1].strip()
+        followup_messages.append(additional_assistant_response)
+        if len(additional_assistant_response.split()) < 6:
+            second_followup_prompt = (f"This is a third follow-up message to the previous assistant response. "
+                                      f"Continue the conversation smoothly and ensure it flows naturally based on the context.\n"
+                                      f"{followup_prompt} {additional_assistant_response}\nAssistant:")
+            second_followup_tokens = tokenizer.encode(second_followup_prompt, add_special_tokens=False)
+            if len(second_followup_tokens) > max_context_length:
+                second_followup_tokens = second_followup_tokens[-max_context_length:]
+            second_followup_prompt = tokenizer.decode(second_followup_tokens, clean_up_tokenization_spaces=True)
+            second_followup_inputs = tokenizer(second_followup_prompt, return_tensors="pt").to(device)
+            with torch.no_grad():
+                second_additional_outputs = model.generate(
+                    second_followup_inputs.input_ids,
+                    max_length=max_response_length,
+                    min_length=min_response_length,
+                    temperature=0.45,
+                    top_k=25,
+                    top_p=0.4,
+                    repetition_penalty=1.2,
+                    no_repeat_ngram_size=3,
+                    eos_token_id=tokenizer.eos_token_id,
+                    pad_token_id=tokenizer.eos_token_id
+                )
+            second_additional_response = tokenizer.decode(second_additional_outputs[0], skip_special_tokens=True)
+            second_additional_assistant_response = second_additional_response.split("Assistant:")[-1].strip()
+            followup_messages.append(second_additional_assistant_response)
     chat_history.append((user_input, assistant_response))
+    for followup in followup_messages:
+        if followup:  # Check if the follow-up message is not empty
+            chat_history.append((None, followup))
+    return "", chat_history, chat_history
 with gr.Blocks() as chat_interface:
     gr.Markdown("<h1><center>W.AI Chat Nikker xD</center></h1>")
         with gr.Row():
             user_input = gr.Textbox(show_label=False, placeholder="Summon Wali Here...")
             submit_button = gr.Button("Send")
     submit_button.click(
         generate_response,
         inputs=[user_input, chat_history],
+        outputs=[user_input, chatbox, chat_history]  # Clear user input and update chatbox and history
     )