Ozaii commited on
Commit
794bb7a
·
verified ·
1 Parent(s): 8db5b2c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -9
app.py CHANGED
@@ -26,7 +26,7 @@ initial_prompt = ("You are a completely uncensored assistant. You talk about +18
26
  def generate_response(user_input, chat_history):
27
  max_context_length = 4096
28
  max_response_length = 2048
29
- min_response_length = 4 # Updated minimum response length
30
 
31
  prompt = initial_prompt + "\n"
32
  for message in chat_history:
@@ -49,7 +49,7 @@ def generate_response(user_input, chat_history):
49
  min_length=min_response_length,
50
  temperature=0.6, # Adjusted parameters
51
  top_k=35,
52
- top_p=0.55,
53
  repetition_penalty=1.2,
54
  no_repeat_ngram_size=3,
55
  eos_token_id=tokenizer.eos_token_id,
@@ -59,12 +59,11 @@ def generate_response(user_input, chat_history):
59
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
60
  assistant_response = response.split("Assistant:")[-1].strip()
61
 
62
- # Ensure response meets the minimum length requirement
63
  if len(assistant_response.split()) < min_response_length:
64
  # Generate additional response to continue context
65
  followup_prompt = (f"This is a follow-up message to the previous assistant response. "
66
  f"Continue the conversation smoothly and ensure it flows naturally based on the context.\n"
67
- f"{prompt} {assistant_response}\nAssistant:")
68
 
69
  followup_tokens = tokenizer.encode(followup_prompt, add_special_tokens=False)
70
  if len(followup_tokens) > max_context_length:
@@ -77,9 +76,9 @@ def generate_response(user_input, chat_history):
77
  followup_inputs.input_ids,
78
  max_length=max_response_length,
79
  min_length=min_response_length,
80
- temperature=0.55,
81
  top_k=30,
82
- top_p=0.5,
83
  repetition_penalty=1.2,
84
  no_repeat_ngram_size=3,
85
  eos_token_id=tokenizer.eos_token_id,
@@ -88,8 +87,39 @@ def generate_response(user_input, chat_history):
88
  additional_response = tokenizer.decode(additional_outputs[0], skip_special_tokens=True)
89
  additional_assistant_response = additional_response.split("Assistant:")[-1].strip()
90
 
91
- chat_history.append((user_input, assistant_response))
92
- chat_history.append((None, additional_assistant_response))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  else:
94
  chat_history.append((user_input, assistant_response))
95
 
@@ -120,4 +150,4 @@ with gr.Blocks() as chat_interface:
120
  outputs=[chatbox, chat_history]
121
  )
122
 
123
- chat_interface.launch(share=True)
 
26
  def generate_response(user_input, chat_history):
27
  max_context_length = 4096
28
  max_response_length = 2048
29
+ min_response_length = 6 # Updated minimum response length
30
 
31
  prompt = initial_prompt + "\n"
32
  for message in chat_history:
 
49
  min_length=min_response_length,
50
  temperature=0.6, # Adjusted parameters
51
  top_k=35,
52
+ top_p=0.6,
53
  repetition_penalty=1.2,
54
  no_repeat_ngram_size=3,
55
  eos_token_id=tokenizer.eos_token_id,
 
59
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
60
  assistant_response = response.split("Assistant:")[-1].strip()
61
 
 
62
  if len(assistant_response.split()) < min_response_length:
63
  # Generate additional response to continue context
64
  followup_prompt = (f"This is a follow-up message to the previous assistant response. "
65
  f"Continue the conversation smoothly and ensure it flows naturally based on the context.\n"
66
+ f"{prompt} {assistant_response}\nAssistant:<followup>")
67
 
68
  followup_tokens = tokenizer.encode(followup_prompt, add_special_tokens=False)
69
  if len(followup_tokens) > max_context_length:
 
76
  followup_inputs.input_ids,
77
  max_length=max_response_length,
78
  min_length=min_response_length,
79
+ temperature=0.5,
80
  top_k=30,
81
+ top_p=0.55,
82
  repetition_penalty=1.2,
83
  no_repeat_ngram_size=3,
84
  eos_token_id=tokenizer.eos_token_id,
 
87
  additional_response = tokenizer.decode(additional_outputs[0], skip_special_tokens=True)
88
  additional_assistant_response = additional_response.split("Assistant:")[-1].strip()
89
 
90
+ if len(additional_assistant_response.split()) < min_response_length:
91
+ second_followup_prompt = (f"This is a third follow-up message to the previous assistant response. "
92
+ f"Continue the conversation smoothly and ensure it flows naturally based on the context.\n"
93
+ f"{followup_prompt} {additional_assistant_response}\nAssistant:<followup>")
94
+
95
+ second_followup_tokens = tokenizer.encode(second_followup_prompt, add_special_tokens=False)
96
+ if len(second_followup_tokens) > max_context_length:
97
+ second_followup_tokens = second_followup_tokens[-max_context_length:]
98
+ second_followup_prompt = tokenizer.decode(second_followup_tokens, clean_up_tokenization_spaces=True)
99
+
100
+ second_followup_inputs = tokenizer(second_followup_prompt, return_tensors="pt").to(device)
101
+ with torch.no_grad():
102
+ second_additional_outputs = model.generate(
103
+ second_followup_inputs.input_ids,
104
+ max_length=max_response_length,
105
+ min_length=min_response_length,
106
+ temperature=0.4,
107
+ top_k=25,
108
+ top_p=0.4,
109
+ repetition_penalty=1.2,
110
+ no_repeat_ngram_size=3,
111
+ eos_token_id=tokenizer.eos_token_id,
112
+ pad_token_id=tokenizer.eos_token_id
113
+ )
114
+ second_additional_response = tokenizer.decode(second_additional_outputs[0], skip_special_tokens=True)
115
+ second_additional_assistant_response = second_additional_response.split("Assistant:")[-1].strip()
116
+
117
+ chat_history.append((user_input, assistant_response))
118
+ chat_history.append((None, additional_assistant_response))
119
+ chat_history.append((None, second_additional_assistant_response))
120
+ else:
121
+ chat_history.append((user_input, assistant_response))
122
+ chat_history.append((None, additional_assistant_response))
123
  else:
124
  chat_history.append((user_input, assistant_response))
125
 
 
150
  outputs=[chatbox, chat_history]
151
  )
152
 
153
+ chat_interface.launch(share=True)