bibibi12345 commited on
Commit
3c02b3d
·
1 Parent(s): 312f276

testing openai fake streaming and reasoning

Browse files
Files changed (1) hide show
  1. app/api_helpers.py +24 -27
app/api_helpers.py CHANGED
@@ -59,8 +59,8 @@ def is_gemini_response_valid(response: Any) -> bool:
59
  for candidate in response.candidates:
60
  if hasattr(candidate, 'text') and isinstance(candidate.text, str) and candidate.text.strip(): return True
61
  if hasattr(candidate, 'content') and hasattr(candidate.content, 'parts') and candidate.content.parts:
62
- for part in candidate.content.parts:
63
- if hasattr(part, 'text') and isinstance(part.text, str) and part.text.strip(): return True
64
  return False
65
 
66
  async def _base_fake_stream_engine(
@@ -137,23 +137,22 @@ async def _base_fake_stream_engine(
137
  raise
138
 
139
  def gemini_fake_stream_generator(
140
- gemini_client_instance: Any, # Changed name to reflect it's the client object
141
- model_for_api_call: str, # The model string to pass to the API
142
  prompt_for_api_call: Union[types.Content, List[types.Content]],
143
  gen_config_for_api_call: Dict[str, Any],
144
  request_obj: OpenAIRequest,
145
  is_auto_attempt: bool
146
  ):
147
- # model_for_api_call is the string (e.g. "gemini-pro") to be used in client.aio.models.generate_content
148
- print(f"FAKE STREAMING (Gemini): Prep for '{request_obj.model}' (using API model string: '{model_for_api_call}')")
149
 
150
  def _create_gemini_api_task() -> asyncio.Task:
151
- # Using current_client.aio.models.generate_content as per user feedback pattern
152
  return asyncio.create_task(
153
  gemini_client_instance.aio.models.generate_content(
154
- model=model_for_api_call, # Pass the model string here
155
  contents=prompt_for_api_call,
156
- config=gen_config_for_api_call # Renamed from generation_config for consistency
157
  )
158
  )
159
 
@@ -164,7 +163,7 @@ def gemini_fake_stream_generator(
164
  candidate = response.candidates[0]
165
  if hasattr(candidate, 'text') and candidate.text is not None: full_text = candidate.text
166
  elif hasattr(candidate, 'content') and hasattr(candidate.content, 'parts') and candidate.content.parts:
167
- texts = [part.text for part in candidate.content.parts if hasattr(part, 'text') and part.text is not None]
168
  full_text = "".join(texts)
169
  return full_text
170
 
@@ -202,12 +201,16 @@ async def openai_fake_stream_generator(
202
  base_model_id_for_tokenizer: str
203
  ):
204
  api_model_name = openai_params.get("model", "unknown-openai-model")
205
- print(f"FAKE STREAMING (OpenAI): Prep for '{request_obj.model}' (API model: '{api_model_name}') with reasoning split.")
206
  response_id = f"chatcmpl-{int(time.time())}"
207
 
208
  async def _openai_api_call_and_split_task_creator_wrapper():
 
 
 
 
209
  _api_call_task = asyncio.create_task(
210
- openai_client.chat.completions.create(**openai_params, extra_body=openai_extra_body, stream=False)
211
  )
212
  raw_response = await _api_call_task
213
  full_content_from_api = ""
@@ -270,27 +273,23 @@ async def openai_fake_stream_generator(
270
  yield "data: [DONE]\n\n"
271
 
272
  async def execute_gemini_call(
273
- current_client: Any, # This IS the model object for Gemini API calls
274
- model_to_call: str, # The specific model string for the API (e.g., "gemini-pro")
275
  prompt_func: Callable[[List[OpenAIMessage]], Union[types.Content, List[types.Content]]],
276
  gen_config_for_call: Dict[str, Any],
277
  request_obj: OpenAIRequest,
278
  is_auto_attempt: bool = False
279
  ):
280
  actual_prompt_for_call = prompt_func(request_obj.messages)
281
-
282
- # current_client is used directly as per user's explicit SDK usage pattern
283
- # model_to_call is the string to be passed to the SDK method
284
-
285
  client_model_name_for_log = getattr(current_client, 'model_name', 'unknown_direct_client_object')
286
  print(f"INFO: execute_gemini_call for requested API model '{model_to_call}', using client object with internal name '{client_model_name_for_log}'. Original request model: '{request_obj.model}'")
287
 
288
  if request_obj.stream:
289
  if app_config.FAKE_STREAMING_ENABLED:
290
  return StreamingResponse(
291
- gemini_fake_stream_generator( # Pass current_client and model_to_call
292
  current_client,
293
- model_to_call, # This is the model string for client.aio.models.generate_content
294
  actual_prompt_for_call,
295
  gen_config_for_call,
296
  request_obj,
@@ -304,11 +303,10 @@ async def execute_gemini_call(
304
 
305
  async def _gemini_real_stream_generator_inner():
306
  try:
307
- # Using current_client.aio.models.generate_content_stream as per explicit user feedback
308
  async for chunk_item_call in await current_client.aio.models.generate_content_stream(
309
- model=model_to_call, # Pass the model string here
310
  contents=actual_prompt_for_call,
311
- config=gen_config_for_call # Renamed from generation_config for consistency
312
  ):
313
  yield convert_chunk_to_openai(chunk_item_call, request_obj.model, response_id_for_stream, 0)
314
  yield create_final_chunk(request_obj.model, response_id_for_stream, cand_count_stream)
@@ -324,12 +322,11 @@ async def execute_gemini_call(
324
  yield "data: [DONE]\n\n"
325
  raise e_stream_call
326
  return StreamingResponse(_gemini_real_stream_generator_inner(), media_type="text/event-stream")
327
- else: # Non-streaming
328
- # Using current_client.aio.models.generate_content as per explicit user feedback pattern
329
  response_obj_call = await current_client.aio.models.generate_content(
330
- model=model_to_call, # Pass the model string here
331
  contents=actual_prompt_for_call,
332
- config=gen_config_for_call # Renamed from generation_config
333
  )
334
  if hasattr(response_obj_call, 'prompt_feedback') and hasattr(response_obj_call.prompt_feedback, 'block_reason') and response_obj_call.prompt_feedback.block_reason:
335
  block_msg = f"Blocked (Gemini): {response_obj_call.prompt_feedback.block_reason}"
 
59
  for candidate in response.candidates:
60
  if hasattr(candidate, 'text') and isinstance(candidate.text, str) and candidate.text.strip(): return True
61
  if hasattr(candidate, 'content') and hasattr(candidate.content, 'parts') and candidate.content.parts:
62
+ for part_item in candidate.content.parts: # Renamed part to part_item
63
+ if hasattr(part_item, 'text') and isinstance(part_item.text, str) and part_item.text.strip(): return True
64
  return False
65
 
66
  async def _base_fake_stream_engine(
 
137
  raise
138
 
139
  def gemini_fake_stream_generator(
140
+ gemini_client_instance: Any,
141
+ model_for_api_call: str,
142
  prompt_for_api_call: Union[types.Content, List[types.Content]],
143
  gen_config_for_api_call: Dict[str, Any],
144
  request_obj: OpenAIRequest,
145
  is_auto_attempt: bool
146
  ):
147
+ model_name_for_log = getattr(gemini_client_instance, 'model_name', 'unknown_gemini_model_object') # Use a default if no model_name
148
+ print(f"FAKE STREAMING (Gemini): Prep for '{request_obj.model}' (using API model string: '{model_for_api_call}', client object: '{model_name_for_log}')")
149
 
150
  def _create_gemini_api_task() -> asyncio.Task:
 
151
  return asyncio.create_task(
152
  gemini_client_instance.aio.models.generate_content(
153
+ model=model_for_api_call,
154
  contents=prompt_for_api_call,
155
+ config=gen_config_for_api_call
156
  )
157
  )
158
 
 
163
  candidate = response.candidates[0]
164
  if hasattr(candidate, 'text') and candidate.text is not None: full_text = candidate.text
165
  elif hasattr(candidate, 'content') and hasattr(candidate.content, 'parts') and candidate.content.parts:
166
+ texts = [part_item.text for part_item in candidate.content.parts if hasattr(part_item, 'text') and part_item.text is not None]
167
  full_text = "".join(texts)
168
  return full_text
169
 
 
201
  base_model_id_for_tokenizer: str
202
  ):
203
  api_model_name = openai_params.get("model", "unknown-openai-model")
204
+ print(f"FAKE STREAMING (OpenAI): Prep for '{request_obj.model}' (API model: '{api_model_name}') with reasoning spli t.")
205
  response_id = f"chatcmpl-{int(time.time())}"
206
 
207
  async def _openai_api_call_and_split_task_creator_wrapper():
208
+ # Ensure 'stream' is False for this specific call, overriding any 'stream': True from original openai_params
209
+ params_for_non_stream_call = openai_params.copy()
210
+ params_for_non_stream_call['stream'] = False
211
+
212
  _api_call_task = asyncio.create_task(
213
+ openai_client.chat.completions.create(**params_for_non_stream_call, extra_body=openai_extra_body)
214
  )
215
  raw_response = await _api_call_task
216
  full_content_from_api = ""
 
273
  yield "data: [DONE]\n\n"
274
 
275
  async def execute_gemini_call(
276
+ current_client: Any,
277
+ model_to_call: str,
278
  prompt_func: Callable[[List[OpenAIMessage]], Union[types.Content, List[types.Content]]],
279
  gen_config_for_call: Dict[str, Any],
280
  request_obj: OpenAIRequest,
281
  is_auto_attempt: bool = False
282
  ):
283
  actual_prompt_for_call = prompt_func(request_obj.messages)
 
 
 
 
284
  client_model_name_for_log = getattr(current_client, 'model_name', 'unknown_direct_client_object')
285
  print(f"INFO: execute_gemini_call for requested API model '{model_to_call}', using client object with internal name '{client_model_name_for_log}'. Original request model: '{request_obj.model}'")
286
 
287
  if request_obj.stream:
288
  if app_config.FAKE_STREAMING_ENABLED:
289
  return StreamingResponse(
290
+ gemini_fake_stream_generator(
291
  current_client,
292
+ model_to_call,
293
  actual_prompt_for_call,
294
  gen_config_for_call,
295
  request_obj,
 
303
 
304
  async def _gemini_real_stream_generator_inner():
305
  try:
 
306
  async for chunk_item_call in await current_client.aio.models.generate_content_stream(
307
+ model=model_to_call,
308
  contents=actual_prompt_for_call,
309
+ config=gen_config_for_call
310
  ):
311
  yield convert_chunk_to_openai(chunk_item_call, request_obj.model, response_id_for_stream, 0)
312
  yield create_final_chunk(request_obj.model, response_id_for_stream, cand_count_stream)
 
322
  yield "data: [DONE]\n\n"
323
  raise e_stream_call
324
  return StreamingResponse(_gemini_real_stream_generator_inner(), media_type="text/event-stream")
325
+ else:
 
326
  response_obj_call = await current_client.aio.models.generate_content(
327
+ model=model_to_call,
328
  contents=actual_prompt_for_call,
329
+ config=gen_config_for_call
330
  )
331
  if hasattr(response_obj_call, 'prompt_feedback') and hasattr(response_obj_call.prompt_feedback, 'block_reason') and response_obj_call.prompt_feedback.block_reason:
332
  block_msg = f"Blocked (Gemini): {response_obj_call.prompt_feedback.block_reason}"