Spaces:
Running
Running
Commit
·
01aa095
1
Parent(s):
c096220
bug fix
Browse files- app/direct_vertex_client.py +61 -19
- app/message_processing.py +7 -4
app/direct_vertex_client.py
CHANGED
@@ -349,32 +349,74 @@ class DirectVertexClient:
|
|
349 |
error_msg = error_data.get("error", {}).get("message", f"HTTP {response.status}") if isinstance(error_data, dict) else str(error_data)
|
350 |
raise Exception(f"Vertex AI API error: {error_msg}")
|
351 |
|
352 |
-
# The Vertex AI streaming endpoint returns
|
353 |
-
# We need to parse these
|
354 |
buffer = ""
|
|
|
355 |
async for chunk in response.content.iter_any():
|
356 |
-
|
|
|
357 |
|
358 |
-
#
|
359 |
-
while
|
360 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
361 |
|
362 |
-
|
|
|
|
|
363 |
continue
|
364 |
|
365 |
-
#
|
366 |
-
if
|
367 |
-
|
368 |
-
|
369 |
-
|
370 |
-
|
371 |
|
372 |
-
|
373 |
-
|
374 |
-
|
375 |
-
|
376 |
-
|
377 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
378 |
|
379 |
except Exception as e:
|
380 |
print(f"ERROR: Direct Vertex streaming API call failed: {e}")
|
|
|
349 |
error_msg = error_data.get("error", {}).get("message", f"HTTP {response.status}") if isinstance(error_data, dict) else str(error_data)
|
350 |
raise Exception(f"Vertex AI API error: {error_msg}")
|
351 |
|
352 |
+
# The Vertex AI streaming endpoint returns JSON array elements
|
353 |
+
# We need to parse these as they arrive
|
354 |
buffer = ""
|
355 |
+
|
356 |
async for chunk in response.content.iter_any():
|
357 |
+
decoded_chunk = chunk.decode('utf-8')
|
358 |
+
buffer += decoded_chunk
|
359 |
|
360 |
+
# Try to extract complete JSON objects from the buffer
|
361 |
+
while True:
|
362 |
+
# Skip whitespace and array brackets
|
363 |
+
buffer = buffer.lstrip()
|
364 |
+
if buffer.startswith('['):
|
365 |
+
buffer = buffer[1:].lstrip()
|
366 |
+
continue
|
367 |
+
if buffer.startswith(']'):
|
368 |
+
# End of array
|
369 |
+
return
|
370 |
|
371 |
+
# Skip comma and whitespace between objects
|
372 |
+
if buffer.startswith(','):
|
373 |
+
buffer = buffer[1:].lstrip()
|
374 |
continue
|
375 |
|
376 |
+
# Look for a complete JSON object
|
377 |
+
if buffer.startswith('{'):
|
378 |
+
# Find the matching closing brace
|
379 |
+
brace_count = 0
|
380 |
+
in_string = False
|
381 |
+
escape_next = False
|
382 |
|
383 |
+
for i, char in enumerate(buffer):
|
384 |
+
if escape_next:
|
385 |
+
escape_next = False
|
386 |
+
continue
|
387 |
+
|
388 |
+
if char == '\\' and in_string:
|
389 |
+
escape_next = True
|
390 |
+
continue
|
391 |
+
|
392 |
+
if char == '"' and not in_string:
|
393 |
+
in_string = True
|
394 |
+
elif char == '"' and in_string:
|
395 |
+
in_string = False
|
396 |
+
elif char == '{' and not in_string:
|
397 |
+
brace_count += 1
|
398 |
+
elif char == '}' and not in_string:
|
399 |
+
brace_count -= 1
|
400 |
+
|
401 |
+
if brace_count == 0:
|
402 |
+
# Found complete object
|
403 |
+
obj_str = buffer[:i+1]
|
404 |
+
buffer = buffer[i+1:]
|
405 |
+
|
406 |
+
try:
|
407 |
+
chunk_data = json.loads(obj_str)
|
408 |
+
converted_obj = self._dict_to_obj(chunk_data)
|
409 |
+
yield converted_obj
|
410 |
+
except json.JSONDecodeError as e:
|
411 |
+
print(f"ERROR: DirectVertexClient - Failed to parse JSON: {e}")
|
412 |
+
|
413 |
+
break
|
414 |
+
else:
|
415 |
+
# No complete object found, need more data
|
416 |
+
break
|
417 |
+
else:
|
418 |
+
# No more objects to process in current buffer
|
419 |
+
break
|
420 |
|
421 |
except Exception as e:
|
422 |
print(f"ERROR: Direct Vertex streaming API call failed: {e}")
|
app/message_processing.py
CHANGED
@@ -241,7 +241,7 @@ def parse_gemini_response_for_reasoning_and_content(gemini_response_candidate: A
|
|
241 |
reasoning_text_parts.append(part_text)
|
242 |
else:
|
243 |
normal_text_parts.append(part_text)
|
244 |
-
|
245 |
normal_text_parts.append(candidate_part_text)
|
246 |
# If no parts and no direct text on candidate, both lists remain empty.
|
247 |
|
@@ -291,10 +291,14 @@ def convert_to_openai_format(gemini_response: Any, model: str) -> Dict[str, Any]
|
|
291 |
def convert_chunk_to_openai(chunk: Any, model: str, response_id: str, candidate_index: int = 0) -> str:
|
292 |
is_encrypt_full = model.endswith("-encrypt-full")
|
293 |
delta_payload = {}
|
294 |
-
finish_reason = None
|
295 |
|
296 |
if hasattr(chunk, 'candidates') and chunk.candidates:
|
297 |
-
candidate = chunk.candidates[0]
|
|
|
|
|
|
|
|
|
298 |
|
299 |
# For a streaming chunk, candidate might be simpler, or might have candidate.content with parts.
|
300 |
# parse_gemini_response_for_reasoning_and_content is designed to handle both candidate and candidate.content
|
@@ -308,7 +312,6 @@ def convert_chunk_to_openai(chunk: Any, model: str, response_id: str, candidate_
|
|
308 |
if normal_text or (not reasoning_text and not delta_payload): # Ensure content key if nothing else
|
309 |
delta_payload['content'] = normal_text if normal_text else ""
|
310 |
|
311 |
-
|
312 |
chunk_data = {
|
313 |
"id": response_id, "object": "chat.completion.chunk", "created": int(time.time()), "model": model,
|
314 |
"choices": [{"index": candidate_index, "delta": delta_payload, "finish_reason": finish_reason}]
|
|
|
241 |
reasoning_text_parts.append(part_text)
|
242 |
else:
|
243 |
normal_text_parts.append(part_text)
|
244 |
+
elif candidate_part_text: # Candidate had text but no parts and was not a thought itself
|
245 |
normal_text_parts.append(candidate_part_text)
|
246 |
# If no parts and no direct text on candidate, both lists remain empty.
|
247 |
|
|
|
291 |
def convert_chunk_to_openai(chunk: Any, model: str, response_id: str, candidate_index: int = 0) -> str:
|
292 |
is_encrypt_full = model.endswith("-encrypt-full")
|
293 |
delta_payload = {}
|
294 |
+
finish_reason = None
|
295 |
|
296 |
if hasattr(chunk, 'candidates') and chunk.candidates:
|
297 |
+
candidate = chunk.candidates[0]
|
298 |
+
|
299 |
+
# Check for finish reason
|
300 |
+
if hasattr(candidate, 'finishReason') and candidate.finishReason:
|
301 |
+
finish_reason = "stop" # Convert Gemini finish reasons to OpenAI format
|
302 |
|
303 |
# For a streaming chunk, candidate might be simpler, or might have candidate.content with parts.
|
304 |
# parse_gemini_response_for_reasoning_and_content is designed to handle both candidate and candidate.content
|
|
|
312 |
if normal_text or (not reasoning_text and not delta_payload): # Ensure content key if nothing else
|
313 |
delta_payload['content'] = normal_text if normal_text else ""
|
314 |
|
|
|
315 |
chunk_data = {
|
316 |
"id": response_id, "object": "chat.completion.chunk", "created": int(time.time()), "model": model,
|
317 |
"choices": [{"index": candidate_index, "delta": delta_payload, "finish_reason": finish_reason}]
|