bibibi12345 commited on
Commit
01aa095
·
1 Parent(s): c096220
app/direct_vertex_client.py CHANGED
@@ -349,32 +349,74 @@ class DirectVertexClient:
349
  error_msg = error_data.get("error", {}).get("message", f"HTTP {response.status}") if isinstance(error_data, dict) else str(error_data)
350
  raise Exception(f"Vertex AI API error: {error_msg}")
351
 
352
- # The Vertex AI streaming endpoint returns Server-Sent Events
353
- # We need to parse these and yield them as objects
354
  buffer = ""
 
355
  async for chunk in response.content.iter_any():
356
- buffer += chunk.decode('utf-8')
 
357
 
358
- # Process complete SSE messages
359
- while '\n\n' in buffer:
360
- message, buffer = buffer.split('\n\n', 1)
 
 
 
 
 
 
 
361
 
362
- if not message.strip():
 
 
363
  continue
364
 
365
- # Parse SSE format
366
- if message.startswith('data: '):
367
- data_str = message[6:]
368
-
369
- if data_str.strip() == '[DONE]':
370
- return
371
 
372
- try:
373
- # Parse JSON and convert to object
374
- chunk_data = json.loads(data_str)
375
- yield self._dict_to_obj(chunk_data)
376
- except json.JSONDecodeError:
377
- continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
378
 
379
  except Exception as e:
380
  print(f"ERROR: Direct Vertex streaming API call failed: {e}")
 
349
  error_msg = error_data.get("error", {}).get("message", f"HTTP {response.status}") if isinstance(error_data, dict) else str(error_data)
350
  raise Exception(f"Vertex AI API error: {error_msg}")
351
 
352
+ # The Vertex AI streaming endpoint returns JSON array elements
353
+ # We need to parse these as they arrive
354
  buffer = ""
355
+
356
  async for chunk in response.content.iter_any():
357
+ decoded_chunk = chunk.decode('utf-8')
358
+ buffer += decoded_chunk
359
 
360
+ # Try to extract complete JSON objects from the buffer
361
+ while True:
362
+ # Skip whitespace and array brackets
363
+ buffer = buffer.lstrip()
364
+ if buffer.startswith('['):
365
+ buffer = buffer[1:].lstrip()
366
+ continue
367
+ if buffer.startswith(']'):
368
+ # End of array
369
+ return
370
 
371
+ # Skip comma and whitespace between objects
372
+ if buffer.startswith(','):
373
+ buffer = buffer[1:].lstrip()
374
  continue
375
 
376
+ # Look for a complete JSON object
377
+ if buffer.startswith('{'):
378
+ # Find the matching closing brace
379
+ brace_count = 0
380
+ in_string = False
381
+ escape_next = False
382
 
383
+ for i, char in enumerate(buffer):
384
+ if escape_next:
385
+ escape_next = False
386
+ continue
387
+
388
+ if char == '\\' and in_string:
389
+ escape_next = True
390
+ continue
391
+
392
+ if char == '"' and not in_string:
393
+ in_string = True
394
+ elif char == '"' and in_string:
395
+ in_string = False
396
+ elif char == '{' and not in_string:
397
+ brace_count += 1
398
+ elif char == '}' and not in_string:
399
+ brace_count -= 1
400
+
401
+ if brace_count == 0:
402
+ # Found complete object
403
+ obj_str = buffer[:i+1]
404
+ buffer = buffer[i+1:]
405
+
406
+ try:
407
+ chunk_data = json.loads(obj_str)
408
+ converted_obj = self._dict_to_obj(chunk_data)
409
+ yield converted_obj
410
+ except json.JSONDecodeError as e:
411
+ print(f"ERROR: DirectVertexClient - Failed to parse JSON: {e}")
412
+
413
+ break
414
+ else:
415
+ # No complete object found, need more data
416
+ break
417
+ else:
418
+ # No more objects to process in current buffer
419
+ break
420
 
421
  except Exception as e:
422
  print(f"ERROR: Direct Vertex streaming API call failed: {e}")
app/message_processing.py CHANGED
@@ -241,7 +241,7 @@ def parse_gemini_response_for_reasoning_and_content(gemini_response_candidate: A
241
  reasoning_text_parts.append(part_text)
242
  else:
243
  normal_text_parts.append(part_text)
244
- if candidate_part_text: # Candidate had text but no parts and was not a thought itself
245
  normal_text_parts.append(candidate_part_text)
246
  # If no parts and no direct text on candidate, both lists remain empty.
247
 
@@ -291,10 +291,14 @@ def convert_to_openai_format(gemini_response: Any, model: str) -> Dict[str, Any]
291
  def convert_chunk_to_openai(chunk: Any, model: str, response_id: str, candidate_index: int = 0) -> str:
292
  is_encrypt_full = model.endswith("-encrypt-full")
293
  delta_payload = {}
294
- finish_reason = None
295
 
296
  if hasattr(chunk, 'candidates') and chunk.candidates:
297
- candidate = chunk.candidates[0]
 
 
 
 
298
 
299
  # For a streaming chunk, candidate might be simpler, or might have candidate.content with parts.
300
  # parse_gemini_response_for_reasoning_and_content is designed to handle both candidate and candidate.content
@@ -308,7 +312,6 @@ def convert_chunk_to_openai(chunk: Any, model: str, response_id: str, candidate_
308
  if normal_text or (not reasoning_text and not delta_payload): # Ensure content key if nothing else
309
  delta_payload['content'] = normal_text if normal_text else ""
310
 
311
-
312
  chunk_data = {
313
  "id": response_id, "object": "chat.completion.chunk", "created": int(time.time()), "model": model,
314
  "choices": [{"index": candidate_index, "delta": delta_payload, "finish_reason": finish_reason}]
 
241
  reasoning_text_parts.append(part_text)
242
  else:
243
  normal_text_parts.append(part_text)
244
+ elif candidate_part_text: # Candidate had text but no parts and was not a thought itself
245
  normal_text_parts.append(candidate_part_text)
246
  # If no parts and no direct text on candidate, both lists remain empty.
247
 
 
291
  def convert_chunk_to_openai(chunk: Any, model: str, response_id: str, candidate_index: int = 0) -> str:
292
  is_encrypt_full = model.endswith("-encrypt-full")
293
  delta_payload = {}
294
+ finish_reason = None
295
 
296
  if hasattr(chunk, 'candidates') and chunk.candidates:
297
+ candidate = chunk.candidates[0]
298
+
299
+ # Check for finish reason
300
+ if hasattr(candidate, 'finishReason') and candidate.finishReason:
301
+ finish_reason = "stop" # Convert Gemini finish reasons to OpenAI format
302
 
303
  # For a streaming chunk, candidate might be simpler, or might have candidate.content with parts.
304
  # parse_gemini_response_for_reasoning_and_content is designed to handle both candidate and candidate.content
 
312
  if normal_text or (not reasoning_text and not delta_payload): # Ensure content key if nothing else
313
  delta_payload['content'] = normal_text if normal_text else ""
314
 
 
315
  chunk_data = {
316
  "id": response_id, "object": "chat.completion.chunk", "created": int(time.time()), "model": model,
317
  "choices": [{"index": candidate_index, "delta": delta_payload, "finish_reason": finish_reason}]