Spaces:
Running
Running
Commit
·
b4fffbe
1
Parent(s):
0c1db3e
added express mode
Browse files- app/main.py +84 -14
app/main.py
CHANGED
@@ -22,6 +22,13 @@ from google.genai import types
|
|
22 |
|
23 |
from google import genai
|
24 |
import math
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
|
26 |
client = None
|
27 |
|
@@ -1423,6 +1430,51 @@ async def list_models(api_key: str = Depends(get_api_key)):
|
|
1423 |
"root": "gemini-2.5-pro-exp-03-25", # Underlying model
|
1424 |
"parent": None,
|
1425 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1426 |
{
|
1427 |
"id": "gemini-2.5-pro-preview-05-06",
|
1428 |
"object": "model",
|
@@ -1450,6 +1502,15 @@ async def list_models(api_key: str = Depends(get_api_key)):
|
|
1450 |
"root": "gemini-2.5-pro-preview-05-06",
|
1451 |
"parent": None,
|
1452 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1453 |
{
|
1454 |
"id": "gemini-2.5-pro-preview-05-06-auto", # New auto model
|
1455 |
"object": "model",
|
@@ -1824,7 +1885,6 @@ async def chat_completions(request: OpenAIRequest, api_key: str = Depends(get_ap
|
|
1824 |
is_nothinking_model = True
|
1825 |
base_model_name = request.model.replace("-nothinking","")
|
1826 |
# Specific check for the flash model requiring budget
|
1827 |
-
# Specific check for the flash model requiring budget
|
1828 |
if base_model_name != "gemini-2.5-flash-preview-04-17":
|
1829 |
error_response = create_openai_error_response(
|
1830 |
400, f"Model '{request.model}' does not support -nothinking variant", "invalid_request_error"
|
@@ -1834,41 +1894,51 @@ async def chat_completions(request: OpenAIRequest, api_key: str = Depends(get_ap
|
|
1834 |
is_max_thinking_model = True
|
1835 |
base_model_name = request.model.replace("-max","")
|
1836 |
# Specific check for the flash model requiring budget
|
1837 |
-
# Specific check for the flash model requiring budget
|
1838 |
if base_model_name != "gemini-2.5-flash-preview-04-17":
|
1839 |
error_response = create_openai_error_response(
|
1840 |
400, f"Model '{request.model}' does not support -max variant", "invalid_request_error"
|
1841 |
)
|
1842 |
return JSONResponse(status_code=400, content=error_response)
|
1843 |
else:
|
1844 |
-
base_model_name = request.model
|
1845 |
|
1846 |
# Create generation config
|
1847 |
generation_config = create_generation_config(request)
|
1848 |
|
1849 |
-
# --- Determine which client to use (Rotation or Fallback) ---
|
1850 |
client_to_use = None
|
1851 |
-
|
1852 |
|
1853 |
-
if
|
|
|
1854 |
try:
|
1855 |
-
|
1856 |
-
|
1857 |
-
print(f"INFO: Using rotated credential for project: {rotated_project_id} (Index: {credential_manager.current_index -1 if credential_manager.current_index > 0 else len(credential_manager.credentials_files) - 1})") # Log which credential was used
|
1858 |
except Exception as e:
|
1859 |
-
print(f"ERROR: Failed to
|
1860 |
-
client_to_use = None # Ensure
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1861 |
|
1862 |
-
# If rotation failed or
|
1863 |
if client_to_use is None:
|
1864 |
global client # Access the fallback client initialized at startup
|
1865 |
if client is not None:
|
1866 |
client_to_use = client
|
1867 |
print("INFO: Using fallback Vertex AI client.")
|
1868 |
else:
|
1869 |
-
# Critical error: No rotated
|
1870 |
error_response = create_openai_error_response(
|
1871 |
-
500, "Vertex AI client not available (Rotation failed and no fallback)", "server_error"
|
1872 |
)
|
1873 |
return JSONResponse(status_code=500, content=error_response)
|
1874 |
# --- Client determined ---
|
|
|
22 |
|
23 |
from google import genai
|
24 |
import math
|
25 |
+
VERTEX_EXPRESS_API_KEY_ENV_VAR = "VERTEX_EXPRESS_API_KEY"
|
26 |
+
VERTEX_EXPRESS_MODELS = [
|
27 |
+
"gemini-2.0-flash-001",
|
28 |
+
"gemini-2.0-flash-lite-001",
|
29 |
+
"gemini-2.5-pro-preview-03-25",
|
30 |
+
"gemini-2.5-flash-preview-04-17",
|
31 |
+
]
|
32 |
|
33 |
client = None
|
34 |
|
|
|
1430 |
"root": "gemini-2.5-pro-exp-03-25", # Underlying model
|
1431 |
"parent": None,
|
1432 |
},
|
1433 |
+
{
|
1434 |
+
"id": "gemini-2.5-pro-preview-03-25",
|
1435 |
+
"object": "model",
|
1436 |
+
"created": int(time.time()),
|
1437 |
+
"owned_by": "google",
|
1438 |
+
"permission": [],
|
1439 |
+
"root": "gemini-2.5-pro-preview-05-06",
|
1440 |
+
"parent": None,
|
1441 |
+
},
|
1442 |
+
{
|
1443 |
+
"id": "gemini-2.5-pro-preview-03-25-search",
|
1444 |
+
"object": "model",
|
1445 |
+
"created": int(time.time()),
|
1446 |
+
"owned_by": "google",
|
1447 |
+
"permission": [],
|
1448 |
+
"root": "gemini-2.5-pro-preview-03-25",
|
1449 |
+
"parent": None,
|
1450 |
+
},
|
1451 |
+
{
|
1452 |
+
"id": "gemini-2.5-pro-preview-03-25-encrypt",
|
1453 |
+
"object": "model",
|
1454 |
+
"created": int(time.time()),
|
1455 |
+
"owned_by": "google",
|
1456 |
+
"permission": [],
|
1457 |
+
"root": "gemini-2.5-pro-preview-03-25",
|
1458 |
+
"parent": None,
|
1459 |
+
},
|
1460 |
+
{
|
1461 |
+
"id": "gemini-2.5-pro-preview-03-25-encrypt-full",
|
1462 |
+
"object": "model",
|
1463 |
+
"created": int(time.time()),
|
1464 |
+
"owned_by": "google",
|
1465 |
+
"permission": [],
|
1466 |
+
"root": "gemini-2.5-pro-preview-03-25",
|
1467 |
+
"parent": None,
|
1468 |
+
},
|
1469 |
+
{
|
1470 |
+
"id": "gemini-2.5-pro-preview-03-25-auto", # New auto model
|
1471 |
+
"object": "model",
|
1472 |
+
"created": int(time.time()),
|
1473 |
+
"owned_by": "google",
|
1474 |
+
"permission": [],
|
1475 |
+
"root": "gemini-2.5-pro-preview-03-25",
|
1476 |
+
"parent": None,
|
1477 |
+
},
|
1478 |
{
|
1479 |
"id": "gemini-2.5-pro-preview-05-06",
|
1480 |
"object": "model",
|
|
|
1502 |
"root": "gemini-2.5-pro-preview-05-06",
|
1503 |
"parent": None,
|
1504 |
},
|
1505 |
+
{
|
1506 |
+
"id": "gemini-2.5-pro-preview-05-06-encrypt-full",
|
1507 |
+
"object": "model",
|
1508 |
+
"created": int(time.time()),
|
1509 |
+
"owned_by": "google",
|
1510 |
+
"permission": [],
|
1511 |
+
"root": "gemini-2.5-pro-preview-05-06",
|
1512 |
+
"parent": None,
|
1513 |
+
},
|
1514 |
{
|
1515 |
"id": "gemini-2.5-pro-preview-05-06-auto", # New auto model
|
1516 |
"object": "model",
|
|
|
1885 |
is_nothinking_model = True
|
1886 |
base_model_name = request.model.replace("-nothinking","")
|
1887 |
# Specific check for the flash model requiring budget
|
|
|
1888 |
if base_model_name != "gemini-2.5-flash-preview-04-17":
|
1889 |
error_response = create_openai_error_response(
|
1890 |
400, f"Model '{request.model}' does not support -nothinking variant", "invalid_request_error"
|
|
|
1894 |
is_max_thinking_model = True
|
1895 |
base_model_name = request.model.replace("-max","")
|
1896 |
# Specific check for the flash model requiring budget
|
|
|
1897 |
if base_model_name != "gemini-2.5-flash-preview-04-17":
|
1898 |
error_response = create_openai_error_response(
|
1899 |
400, f"Model '{request.model}' does not support -max variant", "invalid_request_error"
|
1900 |
)
|
1901 |
return JSONResponse(status_code=400, content=error_response)
|
1902 |
else:
|
1903 |
+
base_model_name = request.model # This ensures base_model_name is set if no suffix matches
|
1904 |
|
1905 |
# Create generation config
|
1906 |
generation_config = create_generation_config(request)
|
1907 |
|
1908 |
+
# --- Determine which client to use (Express, Rotation, or Fallback) ---
|
1909 |
client_to_use = None
|
1910 |
+
express_api_key = os.environ.get(VERTEX_EXPRESS_API_KEY_ENV_VAR)
|
1911 |
|
1912 |
+
if express_api_key and base_model_name in VERTEX_EXPRESS_MODELS:
|
1913 |
+
print(f"INFO: Attempting to use Vertex Express Mode for model {base_model_name} with API Key.")
|
1914 |
try:
|
1915 |
+
client_to_use = genai.Client(vertexai=True, api_key=express_api_key)
|
1916 |
+
print(f"INFO: Successfully initialized Vertex AI client in Express Mode for model {base_model_name}.")
|
|
|
1917 |
except Exception as e:
|
1918 |
+
print(f"ERROR: Failed to initialize Vertex AI client in Express Mode: {e}. Falling back to other methods.")
|
1919 |
+
client_to_use = None # Ensure client_to_use is None if express mode fails
|
1920 |
+
|
1921 |
+
if client_to_use is None: # If Express Mode was not used or failed
|
1922 |
+
rotated_credentials, rotated_project_id = credential_manager.get_next_credentials()
|
1923 |
+
if rotated_credentials and rotated_project_id:
|
1924 |
+
try:
|
1925 |
+
# Create a request-specific client using the rotated credentials
|
1926 |
+
client_to_use = genai.Client(vertexai=True, credentials=rotated_credentials, project=rotated_project_id, location="us-central1")
|
1927 |
+
print(f"INFO: Using rotated credential for project: {rotated_project_id} (Index: {credential_manager.current_index -1 if credential_manager.current_index > 0 else credential_manager.get_total_credentials() - 1})") # Log which credential was used
|
1928 |
+
except Exception as e:
|
1929 |
+
print(f"ERROR: Failed to create client from rotated credential: {e}. Will attempt fallback.")
|
1930 |
+
client_to_use = None # Ensure it's None if creation failed
|
1931 |
|
1932 |
+
# If express and rotation failed or weren't possible, try the fallback client
|
1933 |
if client_to_use is None:
|
1934 |
global client # Access the fallback client initialized at startup
|
1935 |
if client is not None:
|
1936 |
client_to_use = client
|
1937 |
print("INFO: Using fallback Vertex AI client.")
|
1938 |
else:
|
1939 |
+
# Critical error: No express, rotated, AND no fallback client
|
1940 |
error_response = create_openai_error_response(
|
1941 |
+
500, "Vertex AI client not available (Express, Rotation failed and no fallback)", "server_error"
|
1942 |
)
|
1943 |
return JSONResponse(status_code=500, content=error_response)
|
1944 |
# --- Client determined ---
|