Spaces:
Running
Running
Commit
·
d8fffd2
1
Parent(s):
3c02b3d
testing openai fake streaming and reasoning
Browse files- app/api_helpers.py +21 -14
app/api_helpers.py
CHANGED
@@ -59,7 +59,7 @@ def is_gemini_response_valid(response: Any) -> bool:
|
|
59 |
for candidate in response.candidates:
|
60 |
if hasattr(candidate, 'text') and isinstance(candidate.text, str) and candidate.text.strip(): return True
|
61 |
if hasattr(candidate, 'content') and hasattr(candidate.content, 'parts') and candidate.content.parts:
|
62 |
-
for part_item in candidate.content.parts:
|
63 |
if hasattr(part_item, 'text') and isinstance(part_item.text, str) and part_item.text.strip(): return True
|
64 |
return False
|
65 |
|
@@ -70,6 +70,7 @@ async def _base_fake_stream_engine(
|
|
70 |
sse_model_name: str,
|
71 |
is_auto_attempt: bool,
|
72 |
is_valid_response_func: Callable[[Any], bool],
|
|
|
73 |
process_text_func: Optional[Callable[[str, str], str]] = None,
|
74 |
check_block_reason_func: Optional[Callable[[Any], None]] = None,
|
75 |
reasoning_text_to_yield: Optional[str] = None,
|
@@ -77,10 +78,13 @@ async def _base_fake_stream_engine(
|
|
77 |
):
|
78 |
api_call_task = api_call_task_creator()
|
79 |
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
|
|
|
|
|
|
84 |
|
85 |
try:
|
86 |
full_api_response = await api_call_task
|
@@ -144,7 +148,7 @@ def gemini_fake_stream_generator(
|
|
144 |
request_obj: OpenAIRequest,
|
145 |
is_auto_attempt: bool
|
146 |
):
|
147 |
-
model_name_for_log = getattr(gemini_client_instance, 'model_name', 'unknown_gemini_model_object')
|
148 |
print(f"FAKE STREAMING (Gemini): Prep for '{request_obj.model}' (using API model string: '{model_for_api_call}', client object: '{model_name_for_log}')")
|
149 |
|
150 |
def _create_gemini_api_task() -> asyncio.Task:
|
@@ -185,7 +189,7 @@ def gemini_fake_stream_generator(
|
|
185 |
check_block_reason_func=_check_gemini_block,
|
186 |
is_valid_response_func=is_gemini_response_valid,
|
187 |
response_id=response_id, sse_model_name=request_obj.model,
|
188 |
-
keep_alive_interval_seconds=app_config.FAKE_STREAMING_INTERVAL_SECONDS,
|
189 |
is_auto_attempt=is_auto_attempt
|
190 |
)
|
191 |
|
@@ -201,11 +205,10 @@ async def openai_fake_stream_generator(
|
|
201 |
base_model_id_for_tokenizer: str
|
202 |
):
|
203 |
api_model_name = openai_params.get("model", "unknown-openai-model")
|
204 |
-
print(f"FAKE STREAMING (OpenAI): Prep for '{request_obj.model}' (API model: '{api_model_name}') with reasoning
|
205 |
response_id = f"chatcmpl-{int(time.time())}"
|
206 |
|
207 |
async def _openai_api_call_and_split_task_creator_wrapper():
|
208 |
-
# Ensure 'stream' is False for this specific call, overriding any 'stream': True from original openai_params
|
209 |
params_for_non_stream_call = openai_params.copy()
|
210 |
params_for_non_stream_call['stream'] = False
|
211 |
|
@@ -233,11 +236,15 @@ async def openai_fake_stream_generator(
|
|
233 |
print(f"DEBUG_FAKE_REASONING_SPLIT: Success. Reasoning len: {len(reasoning_text)}, Content len: {len(actual_content_text)}")
|
234 |
return raw_response, reasoning_text, actual_content_text
|
235 |
|
|
|
236 |
temp_task_for_keepalive_check = asyncio.create_task(_openai_api_call_and_split_task_creator_wrapper())
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
|
|
|
|
|
|
241 |
|
242 |
try:
|
243 |
full_api_response, separated_reasoning_text, separated_actual_content_text = await temp_task_for_keepalive_check
|
@@ -254,7 +261,7 @@ async def openai_fake_stream_generator(
|
|
254 |
is_valid_response_func=_is_openai_response_valid,
|
255 |
response_id=response_id,
|
256 |
sse_model_name=request_obj.model,
|
257 |
-
keep_alive_interval_seconds=0,
|
258 |
is_auto_attempt=is_auto_attempt,
|
259 |
reasoning_text_to_yield=separated_reasoning_text,
|
260 |
actual_content_text_to_yield=separated_actual_content_text
|
|
|
59 |
for candidate in response.candidates:
|
60 |
if hasattr(candidate, 'text') and isinstance(candidate.text, str) and candidate.text.strip(): return True
|
61 |
if hasattr(candidate, 'content') and hasattr(candidate.content, 'parts') and candidate.content.parts:
|
62 |
+
for part_item in candidate.content.parts:
|
63 |
if hasattr(part_item, 'text') and isinstance(part_item.text, str) and part_item.text.strip(): return True
|
64 |
return False
|
65 |
|
|
|
70 |
sse_model_name: str,
|
71 |
is_auto_attempt: bool,
|
72 |
is_valid_response_func: Callable[[Any], bool],
|
73 |
+
keep_alive_interval_seconds: float, # Added parameter
|
74 |
process_text_func: Optional[Callable[[str, str], str]] = None,
|
75 |
check_block_reason_func: Optional[Callable[[Any], None]] = None,
|
76 |
reasoning_text_to_yield: Optional[str] = None,
|
|
|
78 |
):
|
79 |
api_call_task = api_call_task_creator()
|
80 |
|
81 |
+
# Use the passed-in keep_alive_interval_seconds
|
82 |
+
# Only loop for keep-alive if the interval is positive
|
83 |
+
if keep_alive_interval_seconds > 0:
|
84 |
+
while not api_call_task.done():
|
85 |
+
keep_alive_data = {"id": "chatcmpl-keepalive", "object": "chat.completion.chunk", "created": int(time.time()), "model": sse_model_name, "choices": [{"delta": {"reasoning_content": ""}, "index": 0, "finish_reason": None}]}
|
86 |
+
yield f"data: {json.dumps(keep_alive_data)}\n\n"
|
87 |
+
await asyncio.sleep(keep_alive_interval_seconds)
|
88 |
|
89 |
try:
|
90 |
full_api_response = await api_call_task
|
|
|
148 |
request_obj: OpenAIRequest,
|
149 |
is_auto_attempt: bool
|
150 |
):
|
151 |
+
model_name_for_log = getattr(gemini_client_instance, 'model_name', 'unknown_gemini_model_object')
|
152 |
print(f"FAKE STREAMING (Gemini): Prep for '{request_obj.model}' (using API model string: '{model_for_api_call}', client object: '{model_name_for_log}')")
|
153 |
|
154 |
def _create_gemini_api_task() -> asyncio.Task:
|
|
|
189 |
check_block_reason_func=_check_gemini_block,
|
190 |
is_valid_response_func=is_gemini_response_valid,
|
191 |
response_id=response_id, sse_model_name=request_obj.model,
|
192 |
+
keep_alive_interval_seconds=app_config.FAKE_STREAMING_INTERVAL_SECONDS, # This call was correct
|
193 |
is_auto_attempt=is_auto_attempt
|
194 |
)
|
195 |
|
|
|
205 |
base_model_id_for_tokenizer: str
|
206 |
):
|
207 |
api_model_name = openai_params.get("model", "unknown-openai-model")
|
208 |
+
print(f"FAKE STREAMING (OpenAI): Prep for '{request_obj.model}' (API model: '{api_model_name}') with reasoning split.")
|
209 |
response_id = f"chatcmpl-{int(time.time())}"
|
210 |
|
211 |
async def _openai_api_call_and_split_task_creator_wrapper():
|
|
|
212 |
params_for_non_stream_call = openai_params.copy()
|
213 |
params_for_non_stream_call['stream'] = False
|
214 |
|
|
|
236 |
print(f"DEBUG_FAKE_REASONING_SPLIT: Success. Reasoning len: {len(reasoning_text)}, Content len: {len(actual_content_text)}")
|
237 |
return raw_response, reasoning_text, actual_content_text
|
238 |
|
239 |
+
# The keep-alive for the combined API call + tokenization is handled here
|
240 |
temp_task_for_keepalive_check = asyncio.create_task(_openai_api_call_and_split_task_creator_wrapper())
|
241 |
+
# Use app_config directly for this outer keep-alive loop
|
242 |
+
outer_keep_alive_interval = app_config.FAKE_STREAMING_INTERVAL_SECONDS
|
243 |
+
if outer_keep_alive_interval > 0:
|
244 |
+
while not temp_task_for_keepalive_check.done():
|
245 |
+
keep_alive_data = {"id": "chatcmpl-keepalive", "object": "chat.completion.chunk", "created": int(time.time()), "model": request_obj.model, "choices": [{"delta": {"content": ""}, "index": 0, "finish_reason": None}]}
|
246 |
+
yield f"data: {json.dumps(keep_alive_data)}\n\n"
|
247 |
+
await asyncio.sleep(outer_keep_alive_interval)
|
248 |
|
249 |
try:
|
250 |
full_api_response, separated_reasoning_text, separated_actual_content_text = await temp_task_for_keepalive_check
|
|
|
261 |
is_valid_response_func=_is_openai_response_valid,
|
262 |
response_id=response_id,
|
263 |
sse_model_name=request_obj.model,
|
264 |
+
keep_alive_interval_seconds=0, # Set to 0 as keep-alive is handled by the wrapper
|
265 |
is_auto_attempt=is_auto_attempt,
|
266 |
reasoning_text_to_yield=separated_reasoning_text,
|
267 |
actual_content_text_to_yield=separated_actual_content_text
|