Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -107,12 +107,44 @@ def apply_replacements(text):
|
|
107 |
text = text.replace(phrase, replacement)
|
108 |
return text
|
109 |
|
110 |
-
|
111 |
def chat_with_openai(message: str, history: list, temperature: float, max_new_tokens: int, fast_mode: bool = False):
|
112 |
"""
|
113 |
Call the OpenAI ChatCompletion endpoint using the new client and yield streaming responses.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
114 |
"""
|
115 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
116 |
else:
|
117 |
yield "HealthAssistant is Thinking! Please wait, your response will output shortly...\n\n"
|
118 |
|
|
|
107 |
text = text.replace(phrase, replacement)
|
108 |
return text
|
109 |
|
|
|
110 |
def chat_with_openai(message: str, history: list, temperature: float, max_new_tokens: int, fast_mode: bool = False):
|
111 |
"""
|
112 |
Call the OpenAI ChatCompletion endpoint using the new client and yield streaming responses.
|
113 |
+
|
114 |
+
Implements <think> logic and retries if the full response is blank.
|
115 |
+
|
116 |
+
Args:
|
117 |
+
message (str): The latest user message.
|
118 |
+
history (list): Conversation history as a list of (user, assistant) tuples.
|
119 |
+
temperature (float): Sampling temperature.
|
120 |
+
max_new_tokens (int): Maximum tokens to generate.
|
121 |
+
|
122 |
+
Yields:
|
123 |
+
str: Partial cumulative output from the assistant.
|
124 |
"""
|
125 |
+
|
126 |
+
conversation = []
|
127 |
+
|
128 |
+
if (not history and message.startswith("Start a talk therapy session with me.")) or \
|
129 |
+
any(user_msg.startswith("Start a talk therapy session with me.") for user_msg, _ in history):
|
130 |
+
fast_mode = True
|
131 |
+
|
132 |
+
if not history:
|
133 |
+
# Initialize with system prompt and assistant confirmation.
|
134 |
+
conversation.append({"role": "system", "content": SYSTEM_PROMPT})
|
135 |
+
conversation.append({"role": "assistant", "content": "Understood! I will act as the user's healthcare provider..."})
|
136 |
+
|
137 |
+
for user_msg, assistant_msg in history:
|
138 |
+
conversation.append({"role": "user", "content": user_msg})
|
139 |
+
conversation.append({"role": "assistant", "content": assistant_msg})
|
140 |
+
|
141 |
+
conversation.append({"role": "user", "content": message})
|
142 |
+
|
143 |
+
if not fast_mode:
|
144 |
+
# Indicate that the assistant is thinking.
|
145 |
+
yield "HealthAssistant is Thinking! Please wait, your response will output shortly. This may take 10-30 seconds...\n\n"
|
146 |
+
think_result = think(conversation)
|
147 |
+
conversation.append({"role": "assistant", "content": "<think>\n" + think_result + "\n</think> I will now respond to the user's message:\n\n"})
|
148 |
else:
|
149 |
yield "HealthAssistant is Thinking! Please wait, your response will output shortly...\n\n"
|
150 |
|