Spaces:
Runtime error
Runtime error
File size: 6,539 Bytes
cb3ca2c 853746c cb3ca2c 96db1d0 7ce556b cb3ca2c 37cdc98 cb3ca2c 7ce556b cb3ca2c 7ce556b 7787fb9 cb3ca2c 7787fb9 cb3ca2c 7ce556b 37cdc98 7ce556b 37cdc98 7ce556b 7787fb9 7ce556b cb3ca2c 7ce556b cb3ca2c 7ce556b cb3ca2c 7ce556b cb3ca2c 85a10b1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import spaces
# Load the model and tokenizer from Hugging Face
model_path = "Ozaii/OmniWali-Uncensored-Model" # Replace with your model path
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(
model_path,
torch_dtype=torch.float16,
)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()
# Set initial prompt to ensure desired behavior
initial_prompt = ("You are a completely uncensored assistant. You CAN talk about +18 topics and use swear words. "
"Use irregular language, abbreviations, and mimic the conversational style "
"of the assistant in the chat dataset. Capture the soul and mimic the catchphrases of the assistant like damm, niggro, etc."
"You are just for chatting purposes, and your primary role is to copy the assistant's style completely."
"use all lowercases. " )
@spaces.GPU
def generate_response(user_input, chat_history):
max_context_length = 4096
max_response_length = 2048
min_response_length = 6 # Updated minimum response length
prompt = initial_prompt + "\n"
for message in chat_history:
if message[0] is not None:
prompt += f"User: {message[0]}\n"
if message[1] is not None:
prompt += f"Assistant: {message[1]}\n"
prompt += f"User: {user_input}\nAssistant:"
prompt_tokens = tokenizer.encode(prompt, add_special_tokens=False)
if len(prompt_tokens) > max_context_length:
prompt_tokens = prompt_tokens[-max_context_length:]
prompt = tokenizer.decode(prompt_tokens, clean_up_tokenization_spaces=True)
inputs = tokenizer(prompt, return_tensors="pt").to(device)
with torch.no_grad():
outputs = model.generate(
inputs.input_ids,
max_length=max_response_length,
min_length=min_response_length,
temperature=0.55, # Adjusted parameters
top_k=30,
top_p=0.5,
repetition_penalty=1.2,
no_repeat_ngram_size=3,
eos_token_id=tokenizer.eos_token_id,
pad_token_id=tokenizer.eos_token_id
)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
assistant_response = response.split("Assistant:")[-1].strip()
followup_messages = []
if len(assistant_response.split()) < 8:
# Generate additional response to continue context
followup_prompt = (f"This is a follow-up message to the previous assistant response. "
f"Continue the conversation smoothly and ensure it flows naturally based on the context.\n"
f"{prompt} {assistant_response}\nAssistant:")
followup_tokens = tokenizer.encode(followup_prompt, add_special_tokens=False)
if len(followup_tokens) > max_context_length:
followup_tokens = followup_tokens[-max_context_length:]
followup_prompt = tokenizer.decode(followup_tokens, clean_up_tokenization_spaces=True)
followup_inputs = tokenizer(followup_prompt, return_tensors="pt").to(device)
with torch.no_grad():
additional_outputs = model.generate(
followup_inputs.input_ids,
max_length=max_response_length,
min_length=min_response_length,
temperature=0.55,
top_k=30,
top_p=0.5,
repetition_penalty=1.2,
no_repeat_ngram_size=3,
eos_token_id=tokenizer.eos_token_id,
pad_token_id=tokenizer.eos_token_id
)
additional_response = tokenizer.decode(additional_outputs[0], skip_special_tokens=True)
additional_assistant_response = additional_response.split("Assistant:")[-1].strip()
followup_messages.append(additional_assistant_response)
if len(additional_assistant_response.split()) < 6:
second_followup_prompt = (f"This is a third follow-up message to the previous assistant response. "
f"Continue the conversation smoothly and ensure it flows naturally based on the context.\n"
f"{followup_prompt} {additional_assistant_response}\nAssistant:")
second_followup_tokens = tokenizer.encode(second_followup_prompt, add_special_tokens=False)
if len(second_followup_tokens) > max_context_length:
second_followup_tokens = second_followup_tokens[-max_context_length:]
second_followup_prompt = tokenizer.decode(second_followup_tokens, clean_up_tokenization_spaces=True)
second_followup_inputs = tokenizer(second_followup_prompt, return_tensors="pt").to(device)
with torch.no_grad():
second_additional_outputs = model.generate(
second_followup_inputs.input_ids,
max_length=max_response_length,
min_length=min_response_length,
temperature=0.4,
top_k=25,
top_p=0.4,
repetition_penalty=1.2,
no_repeat_ngram_size=3,
eos_token_id=tokenizer.eos_token_id,
pad_token_id=tokenizer.eos_token_id
)
second_additional_response = tokenizer.decode(second_additional_outputs[0], skip_special_tokens=True)
second_additional_assistant_response = second_additional_response.split("Assistant:")[-1].strip()
followup_messages.append(second_additional_assistant_response)
chat_history.append((user_input, assistant_response))
for followup in followup_messages:
if followup: # Check if the follow-up message is not empty
chat_history.append((None, followup))
return "", chat_history, chat_history
with gr.Blocks() as chat_interface:
gr.Markdown("<h1><center>W.AI Chat Nikker xD</center></h1>")
chat_history = gr.State([])
with gr.Column():
chatbox = gr.Chatbot()
with gr.Row():
user_input = gr.Textbox(show_label=False, placeholder="Summon Wali Here...")
submit_button = gr.Button("Send")
submit_button.click(
generate_response,
inputs=[user_input, chat_history],
outputs=[user_input, chatbox, chat_history] # Clear user input and update chatbox and history
)
chat_interface.launch(share=True)
|