bibibi12345 commited on
Commit
d8fffd2
·
1 Parent(s): 3c02b3d

testing openai fake streaming and reasoning

Browse files
Files changed (1) hide show
  1. app/api_helpers.py +21 -14
app/api_helpers.py CHANGED
@@ -59,7 +59,7 @@ def is_gemini_response_valid(response: Any) -> bool:
59
  for candidate in response.candidates:
60
  if hasattr(candidate, 'text') and isinstance(candidate.text, str) and candidate.text.strip(): return True
61
  if hasattr(candidate, 'content') and hasattr(candidate.content, 'parts') and candidate.content.parts:
62
- for part_item in candidate.content.parts: # Renamed part to part_item
63
  if hasattr(part_item, 'text') and isinstance(part_item.text, str) and part_item.text.strip(): return True
64
  return False
65
 
@@ -70,6 +70,7 @@ async def _base_fake_stream_engine(
70
  sse_model_name: str,
71
  is_auto_attempt: bool,
72
  is_valid_response_func: Callable[[Any], bool],
 
73
  process_text_func: Optional[Callable[[str, str], str]] = None,
74
  check_block_reason_func: Optional[Callable[[Any], None]] = None,
75
  reasoning_text_to_yield: Optional[str] = None,
@@ -77,10 +78,13 @@ async def _base_fake_stream_engine(
77
  ):
78
  api_call_task = api_call_task_creator()
79
 
80
- while not api_call_task.done():
81
- keep_alive_data = {"id": "chatcmpl-keepalive", "object": "chat.completion.chunk", "created": int(time.time()), "model": sse_model_name, "choices": [{"delta": {"reasoning_content": ""}, "index": 0, "finish_reason": None}]}
82
- yield f"data: {json.dumps(keep_alive_data)}\n\n"
83
- await asyncio.sleep(app_config.FAKE_STREAMING_INTERVAL_SECONDS)
 
 
 
84
 
85
  try:
86
  full_api_response = await api_call_task
@@ -144,7 +148,7 @@ def gemini_fake_stream_generator(
144
  request_obj: OpenAIRequest,
145
  is_auto_attempt: bool
146
  ):
147
- model_name_for_log = getattr(gemini_client_instance, 'model_name', 'unknown_gemini_model_object') # Use a default if no model_name
148
  print(f"FAKE STREAMING (Gemini): Prep for '{request_obj.model}' (using API model string: '{model_for_api_call}', client object: '{model_name_for_log}')")
149
 
150
  def _create_gemini_api_task() -> asyncio.Task:
@@ -185,7 +189,7 @@ def gemini_fake_stream_generator(
185
  check_block_reason_func=_check_gemini_block,
186
  is_valid_response_func=is_gemini_response_valid,
187
  response_id=response_id, sse_model_name=request_obj.model,
188
- keep_alive_interval_seconds=app_config.FAKE_STREAMING_INTERVAL_SECONDS,
189
  is_auto_attempt=is_auto_attempt
190
  )
191
 
@@ -201,11 +205,10 @@ async def openai_fake_stream_generator(
201
  base_model_id_for_tokenizer: str
202
  ):
203
  api_model_name = openai_params.get("model", "unknown-openai-model")
204
- print(f"FAKE STREAMING (OpenAI): Prep for '{request_obj.model}' (API model: '{api_model_name}') with reasoning spli t.")
205
  response_id = f"chatcmpl-{int(time.time())}"
206
 
207
  async def _openai_api_call_and_split_task_creator_wrapper():
208
- # Ensure 'stream' is False for this specific call, overriding any 'stream': True from original openai_params
209
  params_for_non_stream_call = openai_params.copy()
210
  params_for_non_stream_call['stream'] = False
211
 
@@ -233,11 +236,15 @@ async def openai_fake_stream_generator(
233
  print(f"DEBUG_FAKE_REASONING_SPLIT: Success. Reasoning len: {len(reasoning_text)}, Content len: {len(actual_content_text)}")
234
  return raw_response, reasoning_text, actual_content_text
235
 
 
236
  temp_task_for_keepalive_check = asyncio.create_task(_openai_api_call_and_split_task_creator_wrapper())
237
- while not temp_task_for_keepalive_check.done():
238
- keep_alive_data = {"id": "chatcmpl-keepalive", "object": "chat.completion.chunk", "created": int(time.time()), "model": request_obj.model, "choices": [{"delta": {"content": ""}, "index": 0, "finish_reason": None}]}
239
- yield f"data: {json.dumps(keep_alive_data)}\n\n"
240
- await asyncio.sleep(app_config.FAKE_STREAMING_INTERVAL_SECONDS)
 
 
 
241
 
242
  try:
243
  full_api_response, separated_reasoning_text, separated_actual_content_text = await temp_task_for_keepalive_check
@@ -254,7 +261,7 @@ async def openai_fake_stream_generator(
254
  is_valid_response_func=_is_openai_response_valid,
255
  response_id=response_id,
256
  sse_model_name=request_obj.model,
257
- keep_alive_interval_seconds=0,
258
  is_auto_attempt=is_auto_attempt,
259
  reasoning_text_to_yield=separated_reasoning_text,
260
  actual_content_text_to_yield=separated_actual_content_text
 
59
  for candidate in response.candidates:
60
  if hasattr(candidate, 'text') and isinstance(candidate.text, str) and candidate.text.strip(): return True
61
  if hasattr(candidate, 'content') and hasattr(candidate.content, 'parts') and candidate.content.parts:
62
+ for part_item in candidate.content.parts:
63
  if hasattr(part_item, 'text') and isinstance(part_item.text, str) and part_item.text.strip(): return True
64
  return False
65
 
 
70
  sse_model_name: str,
71
  is_auto_attempt: bool,
72
  is_valid_response_func: Callable[[Any], bool],
73
+ keep_alive_interval_seconds: float, # Added parameter
74
  process_text_func: Optional[Callable[[str, str], str]] = None,
75
  check_block_reason_func: Optional[Callable[[Any], None]] = None,
76
  reasoning_text_to_yield: Optional[str] = None,
 
78
  ):
79
  api_call_task = api_call_task_creator()
80
 
81
+ # Use the passed-in keep_alive_interval_seconds
82
+ # Only loop for keep-alive if the interval is positive
83
+ if keep_alive_interval_seconds > 0:
84
+ while not api_call_task.done():
85
+ keep_alive_data = {"id": "chatcmpl-keepalive", "object": "chat.completion.chunk", "created": int(time.time()), "model": sse_model_name, "choices": [{"delta": {"reasoning_content": ""}, "index": 0, "finish_reason": None}]}
86
+ yield f"data: {json.dumps(keep_alive_data)}\n\n"
87
+ await asyncio.sleep(keep_alive_interval_seconds)
88
 
89
  try:
90
  full_api_response = await api_call_task
 
148
  request_obj: OpenAIRequest,
149
  is_auto_attempt: bool
150
  ):
151
+ model_name_for_log = getattr(gemini_client_instance, 'model_name', 'unknown_gemini_model_object')
152
  print(f"FAKE STREAMING (Gemini): Prep for '{request_obj.model}' (using API model string: '{model_for_api_call}', client object: '{model_name_for_log}')")
153
 
154
  def _create_gemini_api_task() -> asyncio.Task:
 
189
  check_block_reason_func=_check_gemini_block,
190
  is_valid_response_func=is_gemini_response_valid,
191
  response_id=response_id, sse_model_name=request_obj.model,
192
+ keep_alive_interval_seconds=app_config.FAKE_STREAMING_INTERVAL_SECONDS, # This call was correct
193
  is_auto_attempt=is_auto_attempt
194
  )
195
 
 
205
  base_model_id_for_tokenizer: str
206
  ):
207
  api_model_name = openai_params.get("model", "unknown-openai-model")
208
+ print(f"FAKE STREAMING (OpenAI): Prep for '{request_obj.model}' (API model: '{api_model_name}') with reasoning split.")
209
  response_id = f"chatcmpl-{int(time.time())}"
210
 
211
  async def _openai_api_call_and_split_task_creator_wrapper():
 
212
  params_for_non_stream_call = openai_params.copy()
213
  params_for_non_stream_call['stream'] = False
214
 
 
236
  print(f"DEBUG_FAKE_REASONING_SPLIT: Success. Reasoning len: {len(reasoning_text)}, Content len: {len(actual_content_text)}")
237
  return raw_response, reasoning_text, actual_content_text
238
 
239
+ # The keep-alive for the combined API call + tokenization is handled here
240
  temp_task_for_keepalive_check = asyncio.create_task(_openai_api_call_and_split_task_creator_wrapper())
241
+ # Use app_config directly for this outer keep-alive loop
242
+ outer_keep_alive_interval = app_config.FAKE_STREAMING_INTERVAL_SECONDS
243
+ if outer_keep_alive_interval > 0:
244
+ while not temp_task_for_keepalive_check.done():
245
+ keep_alive_data = {"id": "chatcmpl-keepalive", "object": "chat.completion.chunk", "created": int(time.time()), "model": request_obj.model, "choices": [{"delta": {"content": ""}, "index": 0, "finish_reason": None}]}
246
+ yield f"data: {json.dumps(keep_alive_data)}\n\n"
247
+ await asyncio.sleep(outer_keep_alive_interval)
248
 
249
  try:
250
  full_api_response, separated_reasoning_text, separated_actual_content_text = await temp_task_for_keepalive_check
 
261
  is_valid_response_func=_is_openai_response_valid,
262
  response_id=response_id,
263
  sse_model_name=request_obj.model,
264
+ keep_alive_interval_seconds=0, # Set to 0 as keep-alive is handled by the wrapper
265
  is_auto_attempt=is_auto_attempt,
266
  reasoning_text_to_yield=separated_reasoning_text,
267
  actual_content_text_to_yield=separated_actual_content_text