Spaces:
Running
Running
Commit
·
a8e6344
1
Parent(s):
d67404e
dynamic model list
Browse files- app/config.py +3 -0
- app/main.py +2 -2
- app/requirements.txt +2 -1
- app/routes/chat_api.py +69 -23
- app/routes/models_api.py +48 -42
- app/vertex_ai_init.py +18 -11
app/config.py
CHANGED
@@ -19,4 +19,7 @@ VERTEX_EXPRESS_API_KEY_VAL = os.environ.get("VERTEX_EXPRESS_API_KEY")
|
|
19 |
FAKE_STREAMING_ENABLED = os.environ.get("FAKE_STREAMING", "false").lower() == "true"
|
20 |
FAKE_STREAMING_INTERVAL_SECONDS = float(os.environ.get("FAKE_STREAMING_INTERVAL", "1.0"))
|
21 |
|
|
|
|
|
|
|
22 |
# Validation logic moved to app/auth.py
|
|
|
19 |
FAKE_STREAMING_ENABLED = os.environ.get("FAKE_STREAMING", "false").lower() == "true"
|
20 |
FAKE_STREAMING_INTERVAL_SECONDS = float(os.environ.get("FAKE_STREAMING_INTERVAL", "1.0"))
|
21 |
|
22 |
+
# URL for the remote JSON file containing model lists
|
23 |
+
MODELS_CONFIG_URL = os.environ.get("MODELS_CONFIG_URL", "https://gist.githubusercontent.com/gzzhongqi/e0b684f319437a859bcf5bd6203fd1f6/raw")
|
24 |
+
|
25 |
# Validation logic moved to app/auth.py
|
app/main.py
CHANGED
@@ -35,8 +35,8 @@ app.include_router(chat_api.router)
|
|
35 |
|
36 |
@app.on_event("startup")
|
37 |
async def startup_event():
|
38 |
-
if init_vertex_ai(credential_manager):
|
39 |
-
print("INFO:
|
40 |
else:
|
41 |
print("ERROR: Failed to initialize a fallback Vertex AI client. API will likely fail.")
|
42 |
|
|
|
35 |
|
36 |
@app.on_event("startup")
|
37 |
async def startup_event():
|
38 |
+
if await init_vertex_ai(credential_manager): # Added await
|
39 |
+
print("INFO: Vertex AI credential and model config initialization check completed successfully.")
|
40 |
else:
|
41 |
print("ERROR: Failed to initialize a fallback Vertex AI client. API will likely fail.")
|
42 |
|
app/requirements.txt
CHANGED
@@ -3,4 +3,5 @@ uvicorn==0.27.1
|
|
3 |
google-auth==2.38.0
|
4 |
google-cloud-aiplatform==1.86.0
|
5 |
pydantic==2.6.1
|
6 |
-
google-genai==1.13.0
|
|
|
|
3 |
google-auth==2.38.0
|
4 |
google-cloud-aiplatform==1.86.0
|
5 |
pydantic==2.6.1
|
6 |
+
google-genai==1.13.0
|
7 |
+
httpx>=0.25.0
|
app/routes/chat_api.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
import asyncio
|
2 |
import json # Needed for error streaming
|
3 |
-
from fastapi import APIRouter, Depends, Request
|
4 |
from fastapi.responses import JSONResponse, StreamingResponse
|
5 |
from typing import List, Dict, Any
|
6 |
|
@@ -8,12 +8,12 @@ from typing import List, Dict, Any
|
|
8 |
from google.genai import types
|
9 |
from google import genai
|
10 |
|
11 |
-
# Local module imports
|
12 |
from models import OpenAIRequest, OpenAIMessage
|
13 |
from auth import get_api_key
|
14 |
-
|
15 |
import config as app_config
|
16 |
-
from
|
17 |
from message_processing import (
|
18 |
create_gemini_prompt,
|
19 |
create_encrypted_gemini_prompt,
|
@@ -27,12 +27,38 @@ from api_helpers import (
|
|
27 |
|
28 |
router = APIRouter()
|
29 |
|
30 |
-
|
31 |
@router.post("/v1/chat/completions")
|
32 |
async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api_key: str = Depends(get_api_key)):
|
33 |
try:
|
34 |
-
# Access credential_manager from app state
|
35 |
credential_manager_instance = fastapi_request.app.state.credential_manager
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
is_auto_model = request.model.endswith("-auto")
|
37 |
is_grounded_search = request.model.endswith("-search")
|
38 |
is_encrypted_model = request.model.endswith("-encrypt")
|
@@ -41,18 +67,28 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
|
|
41 |
is_max_thinking_model = request.model.endswith("-max")
|
42 |
base_model_name = request.model
|
43 |
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
elif
|
48 |
-
elif
|
49 |
-
elif
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
generation_config = create_generation_config(request)
|
51 |
|
52 |
client_to_use = None
|
53 |
express_api_key_val = app_config.VERTEX_EXPRESS_API_KEY_VAL
|
54 |
-
|
55 |
-
|
|
|
56 |
try:
|
57 |
client_to_use = genai.Client(vertexai=True, api_key=express_api_key_val)
|
58 |
print(f"INFO: Using Vertex Express Mode for model {base_model_name}.")
|
@@ -74,28 +110,28 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
|
|
74 |
print("ERROR: No Vertex AI client could be initialized via Express Mode or Rotated Credentials.")
|
75 |
return JSONResponse(status_code=500, content=create_openai_error_response(500, "Vertex AI client not available. Ensure credentials are set up correctly (env var or files).", "server_error"))
|
76 |
|
77 |
-
|
78 |
|
79 |
if is_auto_model:
|
80 |
print(f"Processing auto model: {request.model}")
|
81 |
attempts = [
|
82 |
{"name": "base", "model": base_model_name, "prompt_func": create_gemini_prompt, "config_modifier": lambda c: c},
|
83 |
-
{"name": "encrypt", "model": base_model_name, "prompt_func": create_encrypted_gemini_prompt, "config_modifier": lambda c: {**c, "system_instruction":
|
84 |
{"name": "old_format", "model": base_model_name, "prompt_func": create_encrypted_full_gemini_prompt, "config_modifier": lambda c: c}
|
85 |
]
|
86 |
last_err = None
|
87 |
for attempt in attempts:
|
88 |
-
print(f"Auto-mode attempting: '{attempt['name']}'")
|
89 |
current_gen_config = attempt["config_modifier"](generation_config.copy())
|
90 |
try:
|
91 |
return await execute_gemini_call(client_to_use, attempt["model"], attempt["prompt_func"], current_gen_config, request)
|
92 |
except Exception as e_auto:
|
93 |
last_err = e_auto
|
94 |
-
print(f"Auto-attempt '{attempt['name']}' failed: {e_auto}")
|
95 |
await asyncio.sleep(1)
|
96 |
|
97 |
print(f"All auto attempts failed. Last error: {last_err}")
|
98 |
-
err_msg = f"All auto-mode attempts failed for {request.model}. Last error: {str(last_err)}"
|
99 |
if not request.stream and last_err:
|
100 |
return JSONResponse(status_code=500, content=create_openai_error_response(500, err_msg, "server_error"))
|
101 |
elif request.stream:
|
@@ -106,23 +142,33 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
|
|
106 |
return StreamingResponse(final_error_stream(), media_type="text/event-stream")
|
107 |
return JSONResponse(status_code=500, content=create_openai_error_response(500, "All auto-mode attempts failed without specific error.", "server_error"))
|
108 |
|
109 |
-
else:
|
110 |
current_prompt_func = create_gemini_prompt
|
|
|
|
|
|
|
111 |
if is_grounded_search:
|
112 |
search_tool = types.Tool(google_search=types.GoogleSearch())
|
113 |
generation_config["tools"] = [search_tool]
|
114 |
elif is_encrypted_model:
|
115 |
-
generation_config["system_instruction"] =
|
116 |
current_prompt_func = create_encrypted_gemini_prompt
|
117 |
elif is_encrypted_full_model:
|
118 |
-
generation_config["system_instruction"] =
|
119 |
current_prompt_func = create_encrypted_full_gemini_prompt
|
120 |
elif is_nothinking_model:
|
121 |
generation_config["thinking_config"] = {"thinking_budget": 0}
|
122 |
elif is_max_thinking_model:
|
123 |
generation_config["thinking_config"] = {"thinking_budget": 24576}
|
124 |
|
125 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
126 |
|
127 |
except Exception as e:
|
128 |
error_msg = f"Unexpected error in chat_completions endpoint: {str(e)}"
|
|
|
1 |
import asyncio
|
2 |
import json # Needed for error streaming
|
3 |
+
from fastapi import APIRouter, Depends, Request
|
4 |
from fastapi.responses import JSONResponse, StreamingResponse
|
5 |
from typing import List, Dict, Any
|
6 |
|
|
|
8 |
from google.genai import types
|
9 |
from google import genai
|
10 |
|
11 |
+
# Local module imports
|
12 |
from models import OpenAIRequest, OpenAIMessage
|
13 |
from auth import get_api_key
|
14 |
+
from main import credential_manager # Accessing the instance from main.py
|
15 |
import config as app_config
|
16 |
+
from model_loader import get_vertex_models, get_vertex_express_models # Import from model_loader
|
17 |
from message_processing import (
|
18 |
create_gemini_prompt,
|
19 |
create_encrypted_gemini_prompt,
|
|
|
27 |
|
28 |
router = APIRouter()
|
29 |
|
|
|
30 |
@router.post("/v1/chat/completions")
|
31 |
async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api_key: str = Depends(get_api_key)):
|
32 |
try:
|
|
|
33 |
credential_manager_instance = fastapi_request.app.state.credential_manager
|
34 |
+
|
35 |
+
# Dynamically fetch allowed models for validation
|
36 |
+
vertex_model_ids = await get_vertex_models()
|
37 |
+
# Suffixes that can be appended to base models.
|
38 |
+
# The remote model config should ideally be the source of truth for all valid permutations.
|
39 |
+
standard_suffixes = ["-search", "-encrypt", "-encrypt-full", "-auto"]
|
40 |
+
special_suffix_map = { # For models with unique suffixes not covered by standard ones
|
41 |
+
"gemini-2.5-flash-preview-04-17": ["-nothinking", "-max"]
|
42 |
+
}
|
43 |
+
|
44 |
+
all_allowed_model_ids = set(vertex_model_ids) # Start with base models from config
|
45 |
+
for base_id in vertex_model_ids: # Iterate over base models to add suffixed versions
|
46 |
+
for suffix in standard_suffixes:
|
47 |
+
all_allowed_model_ids.add(f"{base_id}{suffix}")
|
48 |
+
if base_id in special_suffix_map:
|
49 |
+
for special_suffix in special_suffix_map[base_id]:
|
50 |
+
all_allowed_model_ids.add(f"{base_id}{special_suffix}")
|
51 |
+
|
52 |
+
# Add express models to the allowed list as well, as they are distinct
|
53 |
+
# and might not be covered by the base vertex_models list from remote config.
|
54 |
+
# Alternatively, the remote config's vertex_models should include express models if they are also usable as base.
|
55 |
+
vertex_express_model_ids = await get_vertex_express_models()
|
56 |
+
all_allowed_model_ids.update(vertex_express_model_ids)
|
57 |
+
|
58 |
+
|
59 |
+
if not request.model or request.model not in all_allowed_model_ids:
|
60 |
+
return JSONResponse(status_code=400, content=create_openai_error_response(400, f"Model '{request.model}' not found or not supported by this adapter. Valid models are: {sorted(list(all_allowed_model_ids))}", "invalid_request_error"))
|
61 |
+
|
62 |
is_auto_model = request.model.endswith("-auto")
|
63 |
is_grounded_search = request.model.endswith("-search")
|
64 |
is_encrypted_model = request.model.endswith("-encrypt")
|
|
|
67 |
is_max_thinking_model = request.model.endswith("-max")
|
68 |
base_model_name = request.model
|
69 |
|
70 |
+
# Determine base_model_name by stripping known suffixes
|
71 |
+
# This order matters if a model could have multiple (e.g. -encrypt-auto, though not currently a pattern)
|
72 |
+
if is_auto_model: base_model_name = request.model[:-len("-auto")]
|
73 |
+
elif is_grounded_search: base_model_name = request.model[:-len("-search")]
|
74 |
+
elif is_encrypted_full_model: base_model_name = request.model[:-len("-encrypt-full")] # Must be before -encrypt
|
75 |
+
elif is_encrypted_model: base_model_name = request.model[:-len("-encrypt")]
|
76 |
+
elif is_nothinking_model: base_model_name = request.model[:-len("-nothinking")]
|
77 |
+
elif is_max_thinking_model: base_model_name = request.model[:-len("-max")]
|
78 |
+
|
79 |
+
# Specific model variant checks (if any remain exclusive and not covered dynamically)
|
80 |
+
if is_nothinking_model and base_model_name != "gemini-2.5-flash-preview-04-17":
|
81 |
+
return JSONResponse(status_code=400, content=create_openai_error_response(400, f"Model '{request.model}' (-nothinking) is only supported for 'gemini-2.5-flash-preview-04-17'.", "invalid_request_error"))
|
82 |
+
if is_max_thinking_model and base_model_name != "gemini-2.5-flash-preview-04-17":
|
83 |
+
return JSONResponse(status_code=400, content=create_openai_error_response(400, f"Model '{request.model}' (-max) is only supported for 'gemini-2.5-flash-preview-04-17'.", "invalid_request_error"))
|
84 |
+
|
85 |
generation_config = create_generation_config(request)
|
86 |
|
87 |
client_to_use = None
|
88 |
express_api_key_val = app_config.VERTEX_EXPRESS_API_KEY_VAL
|
89 |
+
|
90 |
+
# Use dynamically fetched express models list for this check
|
91 |
+
if express_api_key_val and base_model_name in vertex_express_model_ids: # Check against base_model_name
|
92 |
try:
|
93 |
client_to_use = genai.Client(vertexai=True, api_key=express_api_key_val)
|
94 |
print(f"INFO: Using Vertex Express Mode for model {base_model_name}.")
|
|
|
110 |
print("ERROR: No Vertex AI client could be initialized via Express Mode or Rotated Credentials.")
|
111 |
return JSONResponse(status_code=500, content=create_openai_error_response(500, "Vertex AI client not available. Ensure credentials are set up correctly (env var or files).", "server_error"))
|
112 |
|
113 |
+
encryption_instructions_placeholder = ["// Protocol Instructions Placeholder //"] # Actual instructions are in message_processing
|
114 |
|
115 |
if is_auto_model:
|
116 |
print(f"Processing auto model: {request.model}")
|
117 |
attempts = [
|
118 |
{"name": "base", "model": base_model_name, "prompt_func": create_gemini_prompt, "config_modifier": lambda c: c},
|
119 |
+
{"name": "encrypt", "model": base_model_name, "prompt_func": create_encrypted_gemini_prompt, "config_modifier": lambda c: {**c, "system_instruction": encryption_instructions_placeholder}},
|
120 |
{"name": "old_format", "model": base_model_name, "prompt_func": create_encrypted_full_gemini_prompt, "config_modifier": lambda c: c}
|
121 |
]
|
122 |
last_err = None
|
123 |
for attempt in attempts:
|
124 |
+
print(f"Auto-mode attempting: '{attempt['name']}' for model {attempt['model']}")
|
125 |
current_gen_config = attempt["config_modifier"](generation_config.copy())
|
126 |
try:
|
127 |
return await execute_gemini_call(client_to_use, attempt["model"], attempt["prompt_func"], current_gen_config, request)
|
128 |
except Exception as e_auto:
|
129 |
last_err = e_auto
|
130 |
+
print(f"Auto-attempt '{attempt['name']}' for model {attempt['model']} failed: {e_auto}")
|
131 |
await asyncio.sleep(1)
|
132 |
|
133 |
print(f"All auto attempts failed. Last error: {last_err}")
|
134 |
+
err_msg = f"All auto-mode attempts failed for model {request.model}. Last error: {str(last_err)}"
|
135 |
if not request.stream and last_err:
|
136 |
return JSONResponse(status_code=500, content=create_openai_error_response(500, err_msg, "server_error"))
|
137 |
elif request.stream:
|
|
|
142 |
return StreamingResponse(final_error_stream(), media_type="text/event-stream")
|
143 |
return JSONResponse(status_code=500, content=create_openai_error_response(500, "All auto-mode attempts failed without specific error.", "server_error"))
|
144 |
|
145 |
+
else: # Not an auto model
|
146 |
current_prompt_func = create_gemini_prompt
|
147 |
+
# Determine the actual model string to call the API with (e.g., "gemini-1.5-pro-search")
|
148 |
+
api_model_string = request.model
|
149 |
+
|
150 |
if is_grounded_search:
|
151 |
search_tool = types.Tool(google_search=types.GoogleSearch())
|
152 |
generation_config["tools"] = [search_tool]
|
153 |
elif is_encrypted_model:
|
154 |
+
generation_config["system_instruction"] = encryption_instructions_placeholder
|
155 |
current_prompt_func = create_encrypted_gemini_prompt
|
156 |
elif is_encrypted_full_model:
|
157 |
+
generation_config["system_instruction"] = encryption_instructions_placeholder
|
158 |
current_prompt_func = create_encrypted_full_gemini_prompt
|
159 |
elif is_nothinking_model:
|
160 |
generation_config["thinking_config"] = {"thinking_budget": 0}
|
161 |
elif is_max_thinking_model:
|
162 |
generation_config["thinking_config"] = {"thinking_budget": 24576}
|
163 |
|
164 |
+
# For non-auto models, the 'base_model_name' might have suffix stripped.
|
165 |
+
# We should use the original 'request.model' for API call if it's a suffixed one,
|
166 |
+
# or 'base_model_name' if it's truly a base model without suffixes.
|
167 |
+
# The current logic uses 'base_model_name' for the API call in the 'else' block.
|
168 |
+
# This means if `request.model` was "gemini-1.5-pro-search", `base_model_name` becomes "gemini-1.5-pro"
|
169 |
+
# but the API call might need the full "gemini-1.5-pro-search".
|
170 |
+
# Let's use `request.model` for the API call here, and `base_model_name` for checks like Express eligibility.
|
171 |
+
return await execute_gemini_call(client_to_use, api_model_string, current_prompt_func, generation_config, request)
|
172 |
|
173 |
except Exception as e:
|
174 |
error_msg = f"Unexpected error in chat_completions endpoint: {str(e)}"
|
app/routes/models_api.py
CHANGED
@@ -1,49 +1,55 @@
|
|
1 |
import time
|
2 |
from fastapi import APIRouter, Depends
|
3 |
-
|
4 |
-
|
5 |
-
from
|
6 |
|
7 |
router = APIRouter()
|
8 |
|
9 |
@router.get("/v1/models")
|
10 |
async def list_models(api_key: str = Depends(get_api_key)):
|
11 |
-
#
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import time
|
2 |
from fastapi import APIRouter, Depends
|
3 |
+
from typing import List, Dict, Any # Will be needed for constructing model dicts
|
4 |
+
from auth import get_api_key
|
5 |
+
from ..model_loader import get_vertex_models, get_vertex_express_models, refresh_models_config_cache
|
6 |
|
7 |
router = APIRouter()
|
8 |
|
9 |
@router.get("/v1/models")
|
10 |
async def list_models(api_key: str = Depends(get_api_key)):
|
11 |
+
# Attempt to refresh the cache. If it fails, getters will use the old cache.
|
12 |
+
await refresh_models_config_cache()
|
13 |
+
|
14 |
+
vertex_model_ids = await get_vertex_models()
|
15 |
+
vertex_express_model_ids = await get_vertex_express_models()
|
16 |
+
|
17 |
+
# Combine and unique model IDs.
|
18 |
+
# We should also consider creating the OpenAI model suffixes (-search, -encrypt, -auto)
|
19 |
+
# based on the base models available, similar to how chat_api.py currently does.
|
20 |
+
# For simplicity here, we'll list all unique base models from the config
|
21 |
+
# and then also list the specific variations.
|
22 |
+
|
23 |
+
all_model_ids = set(vertex_model_ids + vertex_express_model_ids)
|
24 |
+
|
25 |
+
# Create extended model list with variations (search, encrypt, auto etc.)
|
26 |
+
# This logic might need to be more sophisticated based on actual supported features per base model.
|
27 |
+
# For now, let's assume for each base model, we might have these variations.
|
28 |
+
# A better approach would be if the remote config specified these variations.
|
29 |
+
|
30 |
+
dynamic_models_data: List[Dict[str, Any]] = []
|
31 |
+
current_time = int(time.time())
|
32 |
+
|
33 |
+
# Add base models
|
34 |
+
for model_id in sorted(list(all_model_ids)):
|
35 |
+
dynamic_models_data.append({
|
36 |
+
"id": model_id, "object": "model", "created": current_time, "owned_by": "google",
|
37 |
+
"permission": [], "root": model_id, "parent": None
|
38 |
+
})
|
39 |
+
# Add common variations if not already present directly in fetched list (more robust if config provides these)
|
40 |
+
# This part is a simplification and might create non-existent model permutations
|
41 |
+
# if not all base models support all suffixes.
|
42 |
+
suffixes = ["-search", "-encrypt", "-encrypt-full", "-auto"]
|
43 |
+
# Special suffixes like -nothinking, -max are very model specific, harder to generalize here
|
44 |
+
for suffix in suffixes:
|
45 |
+
suffixed_id = f"{model_id}{suffix}"
|
46 |
+
if suffixed_id not in all_model_ids: # Avoid duplicates if config already lists them
|
47 |
+
dynamic_models_data.append({
|
48 |
+
"id": suffixed_id, "object": "model", "created": current_time, "owned_by": "google",
|
49 |
+
"permission": [], "root": model_id, "parent": None
|
50 |
+
})
|
51 |
+
|
52 |
+
# Ensure uniqueness again after adding suffixes (in case some suffixed models were also in base lists)
|
53 |
+
final_models_data_map = {m["id"]: m for m in dynamic_models_data}
|
54 |
+
|
55 |
+
return {"object": "list", "data": list(final_models_data_map.values())}
|
app/vertex_ai_init.py
CHANGED
@@ -1,20 +1,17 @@
|
|
1 |
import json
|
|
|
2 |
from google import genai
|
3 |
-
from credentials_manager import CredentialManager, parse_multiple_json_credentials
|
4 |
-
import config as app_config
|
|
|
5 |
|
6 |
-
#
|
7 |
-
VERTEX_EXPRESS_MODELS
|
8 |
-
|
9 |
-
"gemini-2.0-flash-lite-001",
|
10 |
-
"gemini-2.5-pro-preview-03-25",
|
11 |
-
"gemini-2.5-flash-preview-04-17",
|
12 |
-
"gemini-2.5-pro-preview-05-06",
|
13 |
-
]
|
14 |
|
15 |
# Global 'client' and 'get_vertex_client()' are removed.
|
16 |
|
17 |
-
def init_vertex_ai(credential_manager_instance: CredentialManager) -> bool:
|
18 |
"""
|
19 |
Initializes the credential manager with credentials from GOOGLE_CREDENTIALS_JSON (if provided)
|
20 |
and verifies if any credentials (environment or file-based through the manager) are available.
|
@@ -65,6 +62,16 @@ def init_vertex_ai(credential_manager_instance: CredentialManager) -> bool:
|
|
65 |
else:
|
66 |
print("INFO: GOOGLE_CREDENTIALS_JSON environment variable not found.")
|
67 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
# CredentialManager's __init__ calls load_credentials_list() for files.
|
69 |
# refresh_credentials_list() re-scans files and combines with in-memory (already includes env creds if loaded above).
|
70 |
# The return value of refresh_credentials_list indicates if total > 0
|
|
|
1 |
import json
|
2 |
+
import asyncio # Added for await
|
3 |
from google import genai
|
4 |
+
from credentials_manager import CredentialManager, parse_multiple_json_credentials
|
5 |
+
import config as app_config
|
6 |
+
from model_loader import refresh_models_config_cache # Import new model loader function
|
7 |
|
8 |
+
# VERTEX_EXPRESS_MODELS list is now dynamically loaded via model_loader
|
9 |
+
# The constant VERTEX_EXPRESS_MODELS previously defined here is removed.
|
10 |
+
# Consumers should use get_vertex_express_models() from model_loader.
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
# Global 'client' and 'get_vertex_client()' are removed.
|
13 |
|
14 |
+
async def init_vertex_ai(credential_manager_instance: CredentialManager) -> bool: # Made async
|
15 |
"""
|
16 |
Initializes the credential manager with credentials from GOOGLE_CREDENTIALS_JSON (if provided)
|
17 |
and verifies if any credentials (environment or file-based through the manager) are available.
|
|
|
62 |
else:
|
63 |
print("INFO: GOOGLE_CREDENTIALS_JSON environment variable not found.")
|
64 |
|
65 |
+
# Attempt to pre-warm the model configuration cache
|
66 |
+
print("INFO: Attempting to pre-warm model configuration cache during startup...")
|
67 |
+
models_loaded_successfully = await refresh_models_config_cache()
|
68 |
+
if models_loaded_successfully:
|
69 |
+
print("INFO: Model configuration cache pre-warmed successfully.")
|
70 |
+
else:
|
71 |
+
print("WARNING: Failed to pre-warm model configuration cache during startup. It will be loaded lazily on first request.")
|
72 |
+
# We don't necessarily fail the entire init_vertex_ai if model list fetching fails,
|
73 |
+
# as credential validation might still be important, and model list can be fetched later.
|
74 |
+
|
75 |
# CredentialManager's __init__ calls load_credentials_list() for files.
|
76 |
# refresh_credentials_list() re-scans files and combines with in-memory (already includes env creds if loaded above).
|
77 |
# The return value of refresh_credentials_list indicates if total > 0
|