Spaces:
Running
Running
Commit
·
3c02b3d
1
Parent(s):
312f276
testing openai fake streaming and reasoning
Browse files- app/api_helpers.py +24 -27
app/api_helpers.py
CHANGED
@@ -59,8 +59,8 @@ def is_gemini_response_valid(response: Any) -> bool:
|
|
59 |
for candidate in response.candidates:
|
60 |
if hasattr(candidate, 'text') and isinstance(candidate.text, str) and candidate.text.strip(): return True
|
61 |
if hasattr(candidate, 'content') and hasattr(candidate.content, 'parts') and candidate.content.parts:
|
62 |
-
for
|
63 |
-
if hasattr(
|
64 |
return False
|
65 |
|
66 |
async def _base_fake_stream_engine(
|
@@ -137,23 +137,22 @@ async def _base_fake_stream_engine(
|
|
137 |
raise
|
138 |
|
139 |
def gemini_fake_stream_generator(
|
140 |
-
gemini_client_instance: Any,
|
141 |
-
model_for_api_call: str,
|
142 |
prompt_for_api_call: Union[types.Content, List[types.Content]],
|
143 |
gen_config_for_api_call: Dict[str, Any],
|
144 |
request_obj: OpenAIRequest,
|
145 |
is_auto_attempt: bool
|
146 |
):
|
147 |
-
|
148 |
-
print(f"FAKE STREAMING (Gemini): Prep for '{request_obj.model}' (using API model string: '{model_for_api_call}')")
|
149 |
|
150 |
def _create_gemini_api_task() -> asyncio.Task:
|
151 |
-
# Using current_client.aio.models.generate_content as per user feedback pattern
|
152 |
return asyncio.create_task(
|
153 |
gemini_client_instance.aio.models.generate_content(
|
154 |
-
model=model_for_api_call,
|
155 |
contents=prompt_for_api_call,
|
156 |
-
config=gen_config_for_api_call
|
157 |
)
|
158 |
)
|
159 |
|
@@ -164,7 +163,7 @@ def gemini_fake_stream_generator(
|
|
164 |
candidate = response.candidates[0]
|
165 |
if hasattr(candidate, 'text') and candidate.text is not None: full_text = candidate.text
|
166 |
elif hasattr(candidate, 'content') and hasattr(candidate.content, 'parts') and candidate.content.parts:
|
167 |
-
texts = [
|
168 |
full_text = "".join(texts)
|
169 |
return full_text
|
170 |
|
@@ -202,12 +201,16 @@ async def openai_fake_stream_generator(
|
|
202 |
base_model_id_for_tokenizer: str
|
203 |
):
|
204 |
api_model_name = openai_params.get("model", "unknown-openai-model")
|
205 |
-
print(f"FAKE STREAMING (OpenAI): Prep for '{request_obj.model}' (API model: '{api_model_name}') with reasoning
|
206 |
response_id = f"chatcmpl-{int(time.time())}"
|
207 |
|
208 |
async def _openai_api_call_and_split_task_creator_wrapper():
|
|
|
|
|
|
|
|
|
209 |
_api_call_task = asyncio.create_task(
|
210 |
-
openai_client.chat.completions.create(**
|
211 |
)
|
212 |
raw_response = await _api_call_task
|
213 |
full_content_from_api = ""
|
@@ -270,27 +273,23 @@ async def openai_fake_stream_generator(
|
|
270 |
yield "data: [DONE]\n\n"
|
271 |
|
272 |
async def execute_gemini_call(
|
273 |
-
current_client: Any,
|
274 |
-
model_to_call: str,
|
275 |
prompt_func: Callable[[List[OpenAIMessage]], Union[types.Content, List[types.Content]]],
|
276 |
gen_config_for_call: Dict[str, Any],
|
277 |
request_obj: OpenAIRequest,
|
278 |
is_auto_attempt: bool = False
|
279 |
):
|
280 |
actual_prompt_for_call = prompt_func(request_obj.messages)
|
281 |
-
|
282 |
-
# current_client is used directly as per user's explicit SDK usage pattern
|
283 |
-
# model_to_call is the string to be passed to the SDK method
|
284 |
-
|
285 |
client_model_name_for_log = getattr(current_client, 'model_name', 'unknown_direct_client_object')
|
286 |
print(f"INFO: execute_gemini_call for requested API model '{model_to_call}', using client object with internal name '{client_model_name_for_log}'. Original request model: '{request_obj.model}'")
|
287 |
|
288 |
if request_obj.stream:
|
289 |
if app_config.FAKE_STREAMING_ENABLED:
|
290 |
return StreamingResponse(
|
291 |
-
gemini_fake_stream_generator(
|
292 |
current_client,
|
293 |
-
model_to_call,
|
294 |
actual_prompt_for_call,
|
295 |
gen_config_for_call,
|
296 |
request_obj,
|
@@ -304,11 +303,10 @@ async def execute_gemini_call(
|
|
304 |
|
305 |
async def _gemini_real_stream_generator_inner():
|
306 |
try:
|
307 |
-
# Using current_client.aio.models.generate_content_stream as per explicit user feedback
|
308 |
async for chunk_item_call in await current_client.aio.models.generate_content_stream(
|
309 |
-
model=model_to_call,
|
310 |
contents=actual_prompt_for_call,
|
311 |
-
config=gen_config_for_call
|
312 |
):
|
313 |
yield convert_chunk_to_openai(chunk_item_call, request_obj.model, response_id_for_stream, 0)
|
314 |
yield create_final_chunk(request_obj.model, response_id_for_stream, cand_count_stream)
|
@@ -324,12 +322,11 @@ async def execute_gemini_call(
|
|
324 |
yield "data: [DONE]\n\n"
|
325 |
raise e_stream_call
|
326 |
return StreamingResponse(_gemini_real_stream_generator_inner(), media_type="text/event-stream")
|
327 |
-
else:
|
328 |
-
# Using current_client.aio.models.generate_content as per explicit user feedback pattern
|
329 |
response_obj_call = await current_client.aio.models.generate_content(
|
330 |
-
model=model_to_call,
|
331 |
contents=actual_prompt_for_call,
|
332 |
-
config=gen_config_for_call
|
333 |
)
|
334 |
if hasattr(response_obj_call, 'prompt_feedback') and hasattr(response_obj_call.prompt_feedback, 'block_reason') and response_obj_call.prompt_feedback.block_reason:
|
335 |
block_msg = f"Blocked (Gemini): {response_obj_call.prompt_feedback.block_reason}"
|
|
|
59 |
for candidate in response.candidates:
|
60 |
if hasattr(candidate, 'text') and isinstance(candidate.text, str) and candidate.text.strip(): return True
|
61 |
if hasattr(candidate, 'content') and hasattr(candidate.content, 'parts') and candidate.content.parts:
|
62 |
+
for part_item in candidate.content.parts: # Renamed part to part_item
|
63 |
+
if hasattr(part_item, 'text') and isinstance(part_item.text, str) and part_item.text.strip(): return True
|
64 |
return False
|
65 |
|
66 |
async def _base_fake_stream_engine(
|
|
|
137 |
raise
|
138 |
|
139 |
def gemini_fake_stream_generator(
|
140 |
+
gemini_client_instance: Any,
|
141 |
+
model_for_api_call: str,
|
142 |
prompt_for_api_call: Union[types.Content, List[types.Content]],
|
143 |
gen_config_for_api_call: Dict[str, Any],
|
144 |
request_obj: OpenAIRequest,
|
145 |
is_auto_attempt: bool
|
146 |
):
|
147 |
+
model_name_for_log = getattr(gemini_client_instance, 'model_name', 'unknown_gemini_model_object') # Use a default if no model_name
|
148 |
+
print(f"FAKE STREAMING (Gemini): Prep for '{request_obj.model}' (using API model string: '{model_for_api_call}', client object: '{model_name_for_log}')")
|
149 |
|
150 |
def _create_gemini_api_task() -> asyncio.Task:
|
|
|
151 |
return asyncio.create_task(
|
152 |
gemini_client_instance.aio.models.generate_content(
|
153 |
+
model=model_for_api_call,
|
154 |
contents=prompt_for_api_call,
|
155 |
+
config=gen_config_for_api_call
|
156 |
)
|
157 |
)
|
158 |
|
|
|
163 |
candidate = response.candidates[0]
|
164 |
if hasattr(candidate, 'text') and candidate.text is not None: full_text = candidate.text
|
165 |
elif hasattr(candidate, 'content') and hasattr(candidate.content, 'parts') and candidate.content.parts:
|
166 |
+
texts = [part_item.text for part_item in candidate.content.parts if hasattr(part_item, 'text') and part_item.text is not None]
|
167 |
full_text = "".join(texts)
|
168 |
return full_text
|
169 |
|
|
|
201 |
base_model_id_for_tokenizer: str
|
202 |
):
|
203 |
api_model_name = openai_params.get("model", "unknown-openai-model")
|
204 |
+
print(f"FAKE STREAMING (OpenAI): Prep for '{request_obj.model}' (API model: '{api_model_name}') with reasoning spli t.")
|
205 |
response_id = f"chatcmpl-{int(time.time())}"
|
206 |
|
207 |
async def _openai_api_call_and_split_task_creator_wrapper():
|
208 |
+
# Ensure 'stream' is False for this specific call, overriding any 'stream': True from original openai_params
|
209 |
+
params_for_non_stream_call = openai_params.copy()
|
210 |
+
params_for_non_stream_call['stream'] = False
|
211 |
+
|
212 |
_api_call_task = asyncio.create_task(
|
213 |
+
openai_client.chat.completions.create(**params_for_non_stream_call, extra_body=openai_extra_body)
|
214 |
)
|
215 |
raw_response = await _api_call_task
|
216 |
full_content_from_api = ""
|
|
|
273 |
yield "data: [DONE]\n\n"
|
274 |
|
275 |
async def execute_gemini_call(
|
276 |
+
current_client: Any,
|
277 |
+
model_to_call: str,
|
278 |
prompt_func: Callable[[List[OpenAIMessage]], Union[types.Content, List[types.Content]]],
|
279 |
gen_config_for_call: Dict[str, Any],
|
280 |
request_obj: OpenAIRequest,
|
281 |
is_auto_attempt: bool = False
|
282 |
):
|
283 |
actual_prompt_for_call = prompt_func(request_obj.messages)
|
|
|
|
|
|
|
|
|
284 |
client_model_name_for_log = getattr(current_client, 'model_name', 'unknown_direct_client_object')
|
285 |
print(f"INFO: execute_gemini_call for requested API model '{model_to_call}', using client object with internal name '{client_model_name_for_log}'. Original request model: '{request_obj.model}'")
|
286 |
|
287 |
if request_obj.stream:
|
288 |
if app_config.FAKE_STREAMING_ENABLED:
|
289 |
return StreamingResponse(
|
290 |
+
gemini_fake_stream_generator(
|
291 |
current_client,
|
292 |
+
model_to_call,
|
293 |
actual_prompt_for_call,
|
294 |
gen_config_for_call,
|
295 |
request_obj,
|
|
|
303 |
|
304 |
async def _gemini_real_stream_generator_inner():
|
305 |
try:
|
|
|
306 |
async for chunk_item_call in await current_client.aio.models.generate_content_stream(
|
307 |
+
model=model_to_call,
|
308 |
contents=actual_prompt_for_call,
|
309 |
+
config=gen_config_for_call
|
310 |
):
|
311 |
yield convert_chunk_to_openai(chunk_item_call, request_obj.model, response_id_for_stream, 0)
|
312 |
yield create_final_chunk(request_obj.model, response_id_for_stream, cand_count_stream)
|
|
|
322 |
yield "data: [DONE]\n\n"
|
323 |
raise e_stream_call
|
324 |
return StreamingResponse(_gemini_real_stream_generator_inner(), media_type="text/event-stream")
|
325 |
+
else:
|
|
|
326 |
response_obj_call = await current_client.aio.models.generate_content(
|
327 |
+
model=model_to_call,
|
328 |
contents=actual_prompt_for_call,
|
329 |
+
config=gen_config_for_call
|
330 |
)
|
331 |
if hasattr(response_obj_call, 'prompt_feedback') and hasattr(response_obj_call.prompt_feedback, 'block_reason') and response_obj_call.prompt_feedback.block_reason:
|
332 |
block_msg = f"Blocked (Gemini): {response_obj_call.prompt_feedback.block_reason}"
|