bibibi12345 commited on
Commit
b4fffbe
·
1 Parent(s): 0c1db3e

added express mode

Browse files
Files changed (1) hide show
  1. app/main.py +84 -14
app/main.py CHANGED
@@ -22,6 +22,13 @@ from google.genai import types
22
 
23
  from google import genai
24
  import math
 
 
 
 
 
 
 
25
 
26
  client = None
27
 
@@ -1423,6 +1430,51 @@ async def list_models(api_key: str = Depends(get_api_key)):
1423
  "root": "gemini-2.5-pro-exp-03-25", # Underlying model
1424
  "parent": None,
1425
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1426
  {
1427
  "id": "gemini-2.5-pro-preview-05-06",
1428
  "object": "model",
@@ -1450,6 +1502,15 @@ async def list_models(api_key: str = Depends(get_api_key)):
1450
  "root": "gemini-2.5-pro-preview-05-06",
1451
  "parent": None,
1452
  },
 
 
 
 
 
 
 
 
 
1453
  {
1454
  "id": "gemini-2.5-pro-preview-05-06-auto", # New auto model
1455
  "object": "model",
@@ -1824,7 +1885,6 @@ async def chat_completions(request: OpenAIRequest, api_key: str = Depends(get_ap
1824
  is_nothinking_model = True
1825
  base_model_name = request.model.replace("-nothinking","")
1826
  # Specific check for the flash model requiring budget
1827
- # Specific check for the flash model requiring budget
1828
  if base_model_name != "gemini-2.5-flash-preview-04-17":
1829
  error_response = create_openai_error_response(
1830
  400, f"Model '{request.model}' does not support -nothinking variant", "invalid_request_error"
@@ -1834,41 +1894,51 @@ async def chat_completions(request: OpenAIRequest, api_key: str = Depends(get_ap
1834
  is_max_thinking_model = True
1835
  base_model_name = request.model.replace("-max","")
1836
  # Specific check for the flash model requiring budget
1837
- # Specific check for the flash model requiring budget
1838
  if base_model_name != "gemini-2.5-flash-preview-04-17":
1839
  error_response = create_openai_error_response(
1840
  400, f"Model '{request.model}' does not support -max variant", "invalid_request_error"
1841
  )
1842
  return JSONResponse(status_code=400, content=error_response)
1843
  else:
1844
- base_model_name = request.model
1845
 
1846
  # Create generation config
1847
  generation_config = create_generation_config(request)
1848
 
1849
- # --- Determine which client to use (Rotation or Fallback) ---
1850
  client_to_use = None
1851
- rotated_credentials, rotated_project_id = credential_manager.get_next_credentials()
1852
 
1853
- if rotated_credentials and rotated_project_id:
 
1854
  try:
1855
- # Create a request-specific client using the rotated credentials
1856
- client_to_use = genai.Client(vertexai=True, credentials=rotated_credentials, project=rotated_project_id, location="us-central1")
1857
- print(f"INFO: Using rotated credential for project: {rotated_project_id} (Index: {credential_manager.current_index -1 if credential_manager.current_index > 0 else len(credential_manager.credentials_files) - 1})") # Log which credential was used
1858
  except Exception as e:
1859
- print(f"ERROR: Failed to create client from rotated credential: {e}. Will attempt fallback.")
1860
- client_to_use = None # Ensure it's None if creation failed
 
 
 
 
 
 
 
 
 
 
 
1861
 
1862
- # If rotation failed or wasn't possible, try the fallback client
1863
  if client_to_use is None:
1864
  global client # Access the fallback client initialized at startup
1865
  if client is not None:
1866
  client_to_use = client
1867
  print("INFO: Using fallback Vertex AI client.")
1868
  else:
1869
- # Critical error: No rotated client AND no fallback client
1870
  error_response = create_openai_error_response(
1871
- 500, "Vertex AI client not available (Rotation failed and no fallback)", "server_error"
1872
  )
1873
  return JSONResponse(status_code=500, content=error_response)
1874
  # --- Client determined ---
 
22
 
23
  from google import genai
24
  import math
25
+ VERTEX_EXPRESS_API_KEY_ENV_VAR = "VERTEX_EXPRESS_API_KEY"
26
+ VERTEX_EXPRESS_MODELS = [
27
+ "gemini-2.0-flash-001",
28
+ "gemini-2.0-flash-lite-001",
29
+ "gemini-2.5-pro-preview-03-25",
30
+ "gemini-2.5-flash-preview-04-17",
31
+ ]
32
 
33
  client = None
34
 
 
1430
  "root": "gemini-2.5-pro-exp-03-25", # Underlying model
1431
  "parent": None,
1432
  },
1433
+ {
1434
+ "id": "gemini-2.5-pro-preview-03-25",
1435
+ "object": "model",
1436
+ "created": int(time.time()),
1437
+ "owned_by": "google",
1438
+ "permission": [],
1439
+ "root": "gemini-2.5-pro-preview-05-06",
1440
+ "parent": None,
1441
+ },
1442
+ {
1443
+ "id": "gemini-2.5-pro-preview-03-25-search",
1444
+ "object": "model",
1445
+ "created": int(time.time()),
1446
+ "owned_by": "google",
1447
+ "permission": [],
1448
+ "root": "gemini-2.5-pro-preview-03-25",
1449
+ "parent": None,
1450
+ },
1451
+ {
1452
+ "id": "gemini-2.5-pro-preview-03-25-encrypt",
1453
+ "object": "model",
1454
+ "created": int(time.time()),
1455
+ "owned_by": "google",
1456
+ "permission": [],
1457
+ "root": "gemini-2.5-pro-preview-03-25",
1458
+ "parent": None,
1459
+ },
1460
+ {
1461
+ "id": "gemini-2.5-pro-preview-03-25-encrypt-full",
1462
+ "object": "model",
1463
+ "created": int(time.time()),
1464
+ "owned_by": "google",
1465
+ "permission": [],
1466
+ "root": "gemini-2.5-pro-preview-03-25",
1467
+ "parent": None,
1468
+ },
1469
+ {
1470
+ "id": "gemini-2.5-pro-preview-03-25-auto", # New auto model
1471
+ "object": "model",
1472
+ "created": int(time.time()),
1473
+ "owned_by": "google",
1474
+ "permission": [],
1475
+ "root": "gemini-2.5-pro-preview-03-25",
1476
+ "parent": None,
1477
+ },
1478
  {
1479
  "id": "gemini-2.5-pro-preview-05-06",
1480
  "object": "model",
 
1502
  "root": "gemini-2.5-pro-preview-05-06",
1503
  "parent": None,
1504
  },
1505
+ {
1506
+ "id": "gemini-2.5-pro-preview-05-06-encrypt-full",
1507
+ "object": "model",
1508
+ "created": int(time.time()),
1509
+ "owned_by": "google",
1510
+ "permission": [],
1511
+ "root": "gemini-2.5-pro-preview-05-06",
1512
+ "parent": None,
1513
+ },
1514
  {
1515
  "id": "gemini-2.5-pro-preview-05-06-auto", # New auto model
1516
  "object": "model",
 
1885
  is_nothinking_model = True
1886
  base_model_name = request.model.replace("-nothinking","")
1887
  # Specific check for the flash model requiring budget
 
1888
  if base_model_name != "gemini-2.5-flash-preview-04-17":
1889
  error_response = create_openai_error_response(
1890
  400, f"Model '{request.model}' does not support -nothinking variant", "invalid_request_error"
 
1894
  is_max_thinking_model = True
1895
  base_model_name = request.model.replace("-max","")
1896
  # Specific check for the flash model requiring budget
 
1897
  if base_model_name != "gemini-2.5-flash-preview-04-17":
1898
  error_response = create_openai_error_response(
1899
  400, f"Model '{request.model}' does not support -max variant", "invalid_request_error"
1900
  )
1901
  return JSONResponse(status_code=400, content=error_response)
1902
  else:
1903
+ base_model_name = request.model # This ensures base_model_name is set if no suffix matches
1904
 
1905
  # Create generation config
1906
  generation_config = create_generation_config(request)
1907
 
1908
+ # --- Determine which client to use (Express, Rotation, or Fallback) ---
1909
  client_to_use = None
1910
+ express_api_key = os.environ.get(VERTEX_EXPRESS_API_KEY_ENV_VAR)
1911
 
1912
+ if express_api_key and base_model_name in VERTEX_EXPRESS_MODELS:
1913
+ print(f"INFO: Attempting to use Vertex Express Mode for model {base_model_name} with API Key.")
1914
  try:
1915
+ client_to_use = genai.Client(vertexai=True, api_key=express_api_key)
1916
+ print(f"INFO: Successfully initialized Vertex AI client in Express Mode for model {base_model_name}.")
 
1917
  except Exception as e:
1918
+ print(f"ERROR: Failed to initialize Vertex AI client in Express Mode: {e}. Falling back to other methods.")
1919
+ client_to_use = None # Ensure client_to_use is None if express mode fails
1920
+
1921
+ if client_to_use is None: # If Express Mode was not used or failed
1922
+ rotated_credentials, rotated_project_id = credential_manager.get_next_credentials()
1923
+ if rotated_credentials and rotated_project_id:
1924
+ try:
1925
+ # Create a request-specific client using the rotated credentials
1926
+ client_to_use = genai.Client(vertexai=True, credentials=rotated_credentials, project=rotated_project_id, location="us-central1")
1927
+ print(f"INFO: Using rotated credential for project: {rotated_project_id} (Index: {credential_manager.current_index -1 if credential_manager.current_index > 0 else credential_manager.get_total_credentials() - 1})") # Log which credential was used
1928
+ except Exception as e:
1929
+ print(f"ERROR: Failed to create client from rotated credential: {e}. Will attempt fallback.")
1930
+ client_to_use = None # Ensure it's None if creation failed
1931
 
1932
+ # If express and rotation failed or weren't possible, try the fallback client
1933
  if client_to_use is None:
1934
  global client # Access the fallback client initialized at startup
1935
  if client is not None:
1936
  client_to_use = client
1937
  print("INFO: Using fallback Vertex AI client.")
1938
  else:
1939
+ # Critical error: No express, rotated, AND no fallback client
1940
  error_response = create_openai_error_response(
1941
+ 500, "Vertex AI client not available (Express, Rotation failed and no fallback)", "server_error"
1942
  )
1943
  return JSONResponse(status_code=500, content=error_response)
1944
  # --- Client determined ---