bibibi12345 commited on
Commit
a8e6344
·
1 Parent(s): d67404e

dynamic model list

Browse files
app/config.py CHANGED
@@ -19,4 +19,7 @@ VERTEX_EXPRESS_API_KEY_VAL = os.environ.get("VERTEX_EXPRESS_API_KEY")
19
  FAKE_STREAMING_ENABLED = os.environ.get("FAKE_STREAMING", "false").lower() == "true"
20
  FAKE_STREAMING_INTERVAL_SECONDS = float(os.environ.get("FAKE_STREAMING_INTERVAL", "1.0"))
21
 
 
 
 
22
  # Validation logic moved to app/auth.py
 
19
  FAKE_STREAMING_ENABLED = os.environ.get("FAKE_STREAMING", "false").lower() == "true"
20
  FAKE_STREAMING_INTERVAL_SECONDS = float(os.environ.get("FAKE_STREAMING_INTERVAL", "1.0"))
21
 
22
+ # URL for the remote JSON file containing model lists
23
+ MODELS_CONFIG_URL = os.environ.get("MODELS_CONFIG_URL", "https://gist.githubusercontent.com/gzzhongqi/e0b684f319437a859bcf5bd6203fd1f6/raw")
24
+
25
  # Validation logic moved to app/auth.py
app/main.py CHANGED
@@ -35,8 +35,8 @@ app.include_router(chat_api.router)
35
 
36
  @app.on_event("startup")
37
  async def startup_event():
38
- if init_vertex_ai(credential_manager):
39
- print("INFO: Fallback Vertex AI client initialization check completed successfully.")
40
  else:
41
  print("ERROR: Failed to initialize a fallback Vertex AI client. API will likely fail.")
42
 
 
35
 
36
  @app.on_event("startup")
37
  async def startup_event():
38
+ if await init_vertex_ai(credential_manager): # Added await
39
+ print("INFO: Vertex AI credential and model config initialization check completed successfully.")
40
  else:
41
  print("ERROR: Failed to initialize a fallback Vertex AI client. API will likely fail.")
42
 
app/requirements.txt CHANGED
@@ -3,4 +3,5 @@ uvicorn==0.27.1
3
  google-auth==2.38.0
4
  google-cloud-aiplatform==1.86.0
5
  pydantic==2.6.1
6
- google-genai==1.13.0
 
 
3
  google-auth==2.38.0
4
  google-cloud-aiplatform==1.86.0
5
  pydantic==2.6.1
6
+ google-genai==1.13.0
7
+ httpx>=0.25.0
app/routes/chat_api.py CHANGED
@@ -1,6 +1,6 @@
1
  import asyncio
2
  import json # Needed for error streaming
3
- from fastapi import APIRouter, Depends, Request # Added Request
4
  from fastapi.responses import JSONResponse, StreamingResponse
5
  from typing import List, Dict, Any
6
 
@@ -8,12 +8,12 @@ from typing import List, Dict, Any
8
  from google.genai import types
9
  from google import genai
10
 
11
- # Local module imports (now absolute from app/ perspective)
12
  from models import OpenAIRequest, OpenAIMessage
13
  from auth import get_api_key
14
- # from main import credential_manager # Removed, will use request.app.state
15
  import config as app_config
16
- from vertex_ai_init import VERTEX_EXPRESS_MODELS
17
  from message_processing import (
18
  create_gemini_prompt,
19
  create_encrypted_gemini_prompt,
@@ -27,12 +27,38 @@ from api_helpers import (
27
 
28
  router = APIRouter()
29
 
30
-
31
  @router.post("/v1/chat/completions")
32
  async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api_key: str = Depends(get_api_key)):
33
  try:
34
- # Access credential_manager from app state
35
  credential_manager_instance = fastapi_request.app.state.credential_manager
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  is_auto_model = request.model.endswith("-auto")
37
  is_grounded_search = request.model.endswith("-search")
38
  is_encrypted_model = request.model.endswith("-encrypt")
@@ -41,18 +67,28 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
41
  is_max_thinking_model = request.model.endswith("-max")
42
  base_model_name = request.model
43
 
44
- if is_auto_model: base_model_name = request.model.replace("-auto", "")
45
- elif is_grounded_search: base_model_name = request.model.replace("-search", "")
46
- elif is_encrypted_model: base_model_name = request.model.replace("-encrypt", "")
47
- elif is_encrypted_full_model: base_model_name = request.model.replace("-encrypt-full", "")
48
- elif is_nothinking_model: base_model_name = request.model.replace("-nothinking","")
49
- elif is_max_thinking_model: base_model_name = request.model.replace("-max","")
 
 
 
 
 
 
 
 
 
50
  generation_config = create_generation_config(request)
51
 
52
  client_to_use = None
53
  express_api_key_val = app_config.VERTEX_EXPRESS_API_KEY_VAL
54
-
55
- if express_api_key_val and base_model_name in VERTEX_EXPRESS_MODELS:
 
56
  try:
57
  client_to_use = genai.Client(vertexai=True, api_key=express_api_key_val)
58
  print(f"INFO: Using Vertex Express Mode for model {base_model_name}.")
@@ -74,28 +110,28 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
74
  print("ERROR: No Vertex AI client could be initialized via Express Mode or Rotated Credentials.")
75
  return JSONResponse(status_code=500, content=create_openai_error_response(500, "Vertex AI client not available. Ensure credentials are set up correctly (env var or files).", "server_error"))
76
 
77
- encryption_instructions = ["// Protocol Instructions Placeholder //"]
78
 
79
  if is_auto_model:
80
  print(f"Processing auto model: {request.model}")
81
  attempts = [
82
  {"name": "base", "model": base_model_name, "prompt_func": create_gemini_prompt, "config_modifier": lambda c: c},
83
- {"name": "encrypt", "model": base_model_name, "prompt_func": create_encrypted_gemini_prompt, "config_modifier": lambda c: {**c, "system_instruction": encryption_instructions}},
84
  {"name": "old_format", "model": base_model_name, "prompt_func": create_encrypted_full_gemini_prompt, "config_modifier": lambda c: c}
85
  ]
86
  last_err = None
87
  for attempt in attempts:
88
- print(f"Auto-mode attempting: '{attempt['name']}'")
89
  current_gen_config = attempt["config_modifier"](generation_config.copy())
90
  try:
91
  return await execute_gemini_call(client_to_use, attempt["model"], attempt["prompt_func"], current_gen_config, request)
92
  except Exception as e_auto:
93
  last_err = e_auto
94
- print(f"Auto-attempt '{attempt['name']}' failed: {e_auto}")
95
  await asyncio.sleep(1)
96
 
97
  print(f"All auto attempts failed. Last error: {last_err}")
98
- err_msg = f"All auto-mode attempts failed for {request.model}. Last error: {str(last_err)}"
99
  if not request.stream and last_err:
100
  return JSONResponse(status_code=500, content=create_openai_error_response(500, err_msg, "server_error"))
101
  elif request.stream:
@@ -106,23 +142,33 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
106
  return StreamingResponse(final_error_stream(), media_type="text/event-stream")
107
  return JSONResponse(status_code=500, content=create_openai_error_response(500, "All auto-mode attempts failed without specific error.", "server_error"))
108
 
109
- else:
110
  current_prompt_func = create_gemini_prompt
 
 
 
111
  if is_grounded_search:
112
  search_tool = types.Tool(google_search=types.GoogleSearch())
113
  generation_config["tools"] = [search_tool]
114
  elif is_encrypted_model:
115
- generation_config["system_instruction"] = encryption_instructions
116
  current_prompt_func = create_encrypted_gemini_prompt
117
  elif is_encrypted_full_model:
118
- generation_config["system_instruction"] = encryption_instructions
119
  current_prompt_func = create_encrypted_full_gemini_prompt
120
  elif is_nothinking_model:
121
  generation_config["thinking_config"] = {"thinking_budget": 0}
122
  elif is_max_thinking_model:
123
  generation_config["thinking_config"] = {"thinking_budget": 24576}
124
 
125
- return await execute_gemini_call(client_to_use, base_model_name, current_prompt_func, generation_config, request)
 
 
 
 
 
 
 
126
 
127
  except Exception as e:
128
  error_msg = f"Unexpected error in chat_completions endpoint: {str(e)}"
 
1
  import asyncio
2
  import json # Needed for error streaming
3
+ from fastapi import APIRouter, Depends, Request
4
  from fastapi.responses import JSONResponse, StreamingResponse
5
  from typing import List, Dict, Any
6
 
 
8
  from google.genai import types
9
  from google import genai
10
 
11
+ # Local module imports
12
  from models import OpenAIRequest, OpenAIMessage
13
  from auth import get_api_key
14
+ from main import credential_manager # Accessing the instance from main.py
15
  import config as app_config
16
+ from model_loader import get_vertex_models, get_vertex_express_models # Import from model_loader
17
  from message_processing import (
18
  create_gemini_prompt,
19
  create_encrypted_gemini_prompt,
 
27
 
28
  router = APIRouter()
29
 
 
30
  @router.post("/v1/chat/completions")
31
  async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api_key: str = Depends(get_api_key)):
32
  try:
 
33
  credential_manager_instance = fastapi_request.app.state.credential_manager
34
+
35
+ # Dynamically fetch allowed models for validation
36
+ vertex_model_ids = await get_vertex_models()
37
+ # Suffixes that can be appended to base models.
38
+ # The remote model config should ideally be the source of truth for all valid permutations.
39
+ standard_suffixes = ["-search", "-encrypt", "-encrypt-full", "-auto"]
40
+ special_suffix_map = { # For models with unique suffixes not covered by standard ones
41
+ "gemini-2.5-flash-preview-04-17": ["-nothinking", "-max"]
42
+ }
43
+
44
+ all_allowed_model_ids = set(vertex_model_ids) # Start with base models from config
45
+ for base_id in vertex_model_ids: # Iterate over base models to add suffixed versions
46
+ for suffix in standard_suffixes:
47
+ all_allowed_model_ids.add(f"{base_id}{suffix}")
48
+ if base_id in special_suffix_map:
49
+ for special_suffix in special_suffix_map[base_id]:
50
+ all_allowed_model_ids.add(f"{base_id}{special_suffix}")
51
+
52
+ # Add express models to the allowed list as well, as they are distinct
53
+ # and might not be covered by the base vertex_models list from remote config.
54
+ # Alternatively, the remote config's vertex_models should include express models if they are also usable as base.
55
+ vertex_express_model_ids = await get_vertex_express_models()
56
+ all_allowed_model_ids.update(vertex_express_model_ids)
57
+
58
+
59
+ if not request.model or request.model not in all_allowed_model_ids:
60
+ return JSONResponse(status_code=400, content=create_openai_error_response(400, f"Model '{request.model}' not found or not supported by this adapter. Valid models are: {sorted(list(all_allowed_model_ids))}", "invalid_request_error"))
61
+
62
  is_auto_model = request.model.endswith("-auto")
63
  is_grounded_search = request.model.endswith("-search")
64
  is_encrypted_model = request.model.endswith("-encrypt")
 
67
  is_max_thinking_model = request.model.endswith("-max")
68
  base_model_name = request.model
69
 
70
+ # Determine base_model_name by stripping known suffixes
71
+ # This order matters if a model could have multiple (e.g. -encrypt-auto, though not currently a pattern)
72
+ if is_auto_model: base_model_name = request.model[:-len("-auto")]
73
+ elif is_grounded_search: base_model_name = request.model[:-len("-search")]
74
+ elif is_encrypted_full_model: base_model_name = request.model[:-len("-encrypt-full")] # Must be before -encrypt
75
+ elif is_encrypted_model: base_model_name = request.model[:-len("-encrypt")]
76
+ elif is_nothinking_model: base_model_name = request.model[:-len("-nothinking")]
77
+ elif is_max_thinking_model: base_model_name = request.model[:-len("-max")]
78
+
79
+ # Specific model variant checks (if any remain exclusive and not covered dynamically)
80
+ if is_nothinking_model and base_model_name != "gemini-2.5-flash-preview-04-17":
81
+ return JSONResponse(status_code=400, content=create_openai_error_response(400, f"Model '{request.model}' (-nothinking) is only supported for 'gemini-2.5-flash-preview-04-17'.", "invalid_request_error"))
82
+ if is_max_thinking_model and base_model_name != "gemini-2.5-flash-preview-04-17":
83
+ return JSONResponse(status_code=400, content=create_openai_error_response(400, f"Model '{request.model}' (-max) is only supported for 'gemini-2.5-flash-preview-04-17'.", "invalid_request_error"))
84
+
85
  generation_config = create_generation_config(request)
86
 
87
  client_to_use = None
88
  express_api_key_val = app_config.VERTEX_EXPRESS_API_KEY_VAL
89
+
90
+ # Use dynamically fetched express models list for this check
91
+ if express_api_key_val and base_model_name in vertex_express_model_ids: # Check against base_model_name
92
  try:
93
  client_to_use = genai.Client(vertexai=True, api_key=express_api_key_val)
94
  print(f"INFO: Using Vertex Express Mode for model {base_model_name}.")
 
110
  print("ERROR: No Vertex AI client could be initialized via Express Mode or Rotated Credentials.")
111
  return JSONResponse(status_code=500, content=create_openai_error_response(500, "Vertex AI client not available. Ensure credentials are set up correctly (env var or files).", "server_error"))
112
 
113
+ encryption_instructions_placeholder = ["// Protocol Instructions Placeholder //"] # Actual instructions are in message_processing
114
 
115
  if is_auto_model:
116
  print(f"Processing auto model: {request.model}")
117
  attempts = [
118
  {"name": "base", "model": base_model_name, "prompt_func": create_gemini_prompt, "config_modifier": lambda c: c},
119
+ {"name": "encrypt", "model": base_model_name, "prompt_func": create_encrypted_gemini_prompt, "config_modifier": lambda c: {**c, "system_instruction": encryption_instructions_placeholder}},
120
  {"name": "old_format", "model": base_model_name, "prompt_func": create_encrypted_full_gemini_prompt, "config_modifier": lambda c: c}
121
  ]
122
  last_err = None
123
  for attempt in attempts:
124
+ print(f"Auto-mode attempting: '{attempt['name']}' for model {attempt['model']}")
125
  current_gen_config = attempt["config_modifier"](generation_config.copy())
126
  try:
127
  return await execute_gemini_call(client_to_use, attempt["model"], attempt["prompt_func"], current_gen_config, request)
128
  except Exception as e_auto:
129
  last_err = e_auto
130
+ print(f"Auto-attempt '{attempt['name']}' for model {attempt['model']} failed: {e_auto}")
131
  await asyncio.sleep(1)
132
 
133
  print(f"All auto attempts failed. Last error: {last_err}")
134
+ err_msg = f"All auto-mode attempts failed for model {request.model}. Last error: {str(last_err)}"
135
  if not request.stream and last_err:
136
  return JSONResponse(status_code=500, content=create_openai_error_response(500, err_msg, "server_error"))
137
  elif request.stream:
 
142
  return StreamingResponse(final_error_stream(), media_type="text/event-stream")
143
  return JSONResponse(status_code=500, content=create_openai_error_response(500, "All auto-mode attempts failed without specific error.", "server_error"))
144
 
145
+ else: # Not an auto model
146
  current_prompt_func = create_gemini_prompt
147
+ # Determine the actual model string to call the API with (e.g., "gemini-1.5-pro-search")
148
+ api_model_string = request.model
149
+
150
  if is_grounded_search:
151
  search_tool = types.Tool(google_search=types.GoogleSearch())
152
  generation_config["tools"] = [search_tool]
153
  elif is_encrypted_model:
154
+ generation_config["system_instruction"] = encryption_instructions_placeholder
155
  current_prompt_func = create_encrypted_gemini_prompt
156
  elif is_encrypted_full_model:
157
+ generation_config["system_instruction"] = encryption_instructions_placeholder
158
  current_prompt_func = create_encrypted_full_gemini_prompt
159
  elif is_nothinking_model:
160
  generation_config["thinking_config"] = {"thinking_budget": 0}
161
  elif is_max_thinking_model:
162
  generation_config["thinking_config"] = {"thinking_budget": 24576}
163
 
164
+ # For non-auto models, the 'base_model_name' might have suffix stripped.
165
+ # We should use the original 'request.model' for API call if it's a suffixed one,
166
+ # or 'base_model_name' if it's truly a base model without suffixes.
167
+ # The current logic uses 'base_model_name' for the API call in the 'else' block.
168
+ # This means if `request.model` was "gemini-1.5-pro-search", `base_model_name` becomes "gemini-1.5-pro"
169
+ # but the API call might need the full "gemini-1.5-pro-search".
170
+ # Let's use `request.model` for the API call here, and `base_model_name` for checks like Express eligibility.
171
+ return await execute_gemini_call(client_to_use, api_model_string, current_prompt_func, generation_config, request)
172
 
173
  except Exception as e:
174
  error_msg = f"Unexpected error in chat_completions endpoint: {str(e)}"
app/routes/models_api.py CHANGED
@@ -1,49 +1,55 @@
1
  import time
2
  from fastapi import APIRouter, Depends
3
- # from typing import List, Dict, Any # Removed as unused
4
-
5
- from auth import get_api_key # Changed from relative
6
 
7
  router = APIRouter()
8
 
9
  @router.get("/v1/models")
10
  async def list_models(api_key: str = Depends(get_api_key)):
11
- # This model list should ideally be dynamic or configurable
12
- models_data = [
13
- {"id": "gemini-2.5-pro-exp-03-25", "object": "model", "created": int(time.time()), "owned_by": "google"},
14
- {"id": "gemini-2.5-pro-exp-03-25-search", "object": "model", "created": int(time.time()), "owned_by": "google"},
15
- {"id": "gemini-2.5-pro-exp-03-25-encrypt", "object": "model", "created": int(time.time()), "owned_by": "google"},
16
- {"id": "gemini-2.5-pro-exp-03-25-encrypt-full", "object": "model", "created": int(time.time()), "owned_by": "google"},
17
- {"id": "gemini-2.5-pro-exp-03-25-auto", "object": "model", "created": int(time.time()), "owned_by": "google"},
18
- {"id": "gemini-2.5-pro-preview-03-25", "object": "model", "created": int(time.time()), "owned_by": "google"},
19
- {"id": "gemini-2.5-pro-preview-03-25-search", "object": "model", "created": int(time.time()), "owned_by": "google"},
20
- {"id": "gemini-2.5-pro-preview-03-25-encrypt", "object": "model", "created": int(time.time()), "owned_by": "google"},
21
- {"id": "gemini-2.5-pro-preview-03-25-encrypt-full", "object": "model", "created": int(time.time()), "owned_by": "google"},
22
- {"id": "gemini-2.5-pro-preview-03-25-auto", "object": "model", "created": int(time.time()), "owned_by": "google"},
23
- {"id": "gemini-2.5-pro-preview-05-06", "object": "model", "created": int(time.time()), "owned_by": "google"},
24
- {"id": "gemini-2.5-pro-preview-05-06-search", "object": "model", "created": int(time.time()), "owned_by": "google"},
25
- {"id": "gemini-2.5-pro-preview-05-06-encrypt", "object": "model", "created": int(time.time()), "owned_by": "google"},
26
- {"id": "gemini-2.5-pro-preview-05-06-encrypt-full", "object": "model", "created": int(time.time()), "owned_by": "google"},
27
- {"id": "gemini-2.5-pro-preview-05-06-auto", "object": "model", "created": int(time.time()), "owned_by": "google"},
28
- {"id": "gemini-2.0-flash", "object": "model", "created": int(time.time()), "owned_by": "google"},
29
- {"id": "gemini-2.0-flash-search", "object": "model", "created": int(time.time()), "owned_by": "google"},
30
- {"id": "gemini-2.0-flash-lite", "object": "model", "created": int(time.time()), "owned_by": "google"},
31
- {"id": "gemini-2.0-flash-lite-search", "object": "model", "created": int(time.time()), "owned_by": "google"},
32
- {"id": "gemini-2.0-pro-exp-02-05", "object": "model", "created": int(time.time()), "owned_by": "google"},
33
- {"id": "gemini-1.5-flash", "object": "model", "created": int(time.time()), "owned_by": "google"},
34
- {"id": "gemini-2.5-flash-preview-04-17", "object": "model", "created": int(time.time()), "owned_by": "google"},
35
- {"id": "gemini-2.5-flash-preview-04-17-encrypt", "object": "model", "created": int(time.time()), "owned_by": "google"},
36
- {"id": "gemini-2.5-flash-preview-04-17-nothinking", "object": "model", "created": int(time.time()), "owned_by": "google"},
37
- {"id": "gemini-2.5-flash-preview-04-17-max", "object": "model", "created": int(time.time()), "owned_by": "google"},
38
- {"id": "gemini-1.5-flash-8b", "object": "model", "created": int(time.time()), "owned_by": "google"},
39
- {"id": "gemini-1.5-pro", "object": "model", "created": int(time.time()), "owned_by": "google"},
40
- {"id": "gemini-1.0-pro-002", "object": "model", "created": int(time.time()), "owned_by": "google"},
41
- {"id": "gemini-1.0-pro-vision-001", "object": "model", "created": int(time.time()), "owned_by": "google"},
42
- {"id": "gemini-embedding-exp", "object": "model", "created": int(time.time()), "owned_by": "google"}
43
- ]
44
- # Add root and parent for consistency with OpenAI-like response
45
- for model_info in models_data:
46
- model_info.setdefault("permission", [])
47
- model_info.setdefault("root", model_info["id"]) # Typically the model ID itself
48
- model_info.setdefault("parent", None) # Typically None for base models
49
- return {"object": "list", "data": models_data}
 
 
 
 
 
 
 
1
  import time
2
  from fastapi import APIRouter, Depends
3
+ from typing import List, Dict, Any # Will be needed for constructing model dicts
4
+ from auth import get_api_key
5
+ from ..model_loader import get_vertex_models, get_vertex_express_models, refresh_models_config_cache
6
 
7
  router = APIRouter()
8
 
9
  @router.get("/v1/models")
10
  async def list_models(api_key: str = Depends(get_api_key)):
11
+ # Attempt to refresh the cache. If it fails, getters will use the old cache.
12
+ await refresh_models_config_cache()
13
+
14
+ vertex_model_ids = await get_vertex_models()
15
+ vertex_express_model_ids = await get_vertex_express_models()
16
+
17
+ # Combine and unique model IDs.
18
+ # We should also consider creating the OpenAI model suffixes (-search, -encrypt, -auto)
19
+ # based on the base models available, similar to how chat_api.py currently does.
20
+ # For simplicity here, we'll list all unique base models from the config
21
+ # and then also list the specific variations.
22
+
23
+ all_model_ids = set(vertex_model_ids + vertex_express_model_ids)
24
+
25
+ # Create extended model list with variations (search, encrypt, auto etc.)
26
+ # This logic might need to be more sophisticated based on actual supported features per base model.
27
+ # For now, let's assume for each base model, we might have these variations.
28
+ # A better approach would be if the remote config specified these variations.
29
+
30
+ dynamic_models_data: List[Dict[str, Any]] = []
31
+ current_time = int(time.time())
32
+
33
+ # Add base models
34
+ for model_id in sorted(list(all_model_ids)):
35
+ dynamic_models_data.append({
36
+ "id": model_id, "object": "model", "created": current_time, "owned_by": "google",
37
+ "permission": [], "root": model_id, "parent": None
38
+ })
39
+ # Add common variations if not already present directly in fetched list (more robust if config provides these)
40
+ # This part is a simplification and might create non-existent model permutations
41
+ # if not all base models support all suffixes.
42
+ suffixes = ["-search", "-encrypt", "-encrypt-full", "-auto"]
43
+ # Special suffixes like -nothinking, -max are very model specific, harder to generalize here
44
+ for suffix in suffixes:
45
+ suffixed_id = f"{model_id}{suffix}"
46
+ if suffixed_id not in all_model_ids: # Avoid duplicates if config already lists them
47
+ dynamic_models_data.append({
48
+ "id": suffixed_id, "object": "model", "created": current_time, "owned_by": "google",
49
+ "permission": [], "root": model_id, "parent": None
50
+ })
51
+
52
+ # Ensure uniqueness again after adding suffixes (in case some suffixed models were also in base lists)
53
+ final_models_data_map = {m["id"]: m for m in dynamic_models_data}
54
+
55
+ return {"object": "list", "data": list(final_models_data_map.values())}
app/vertex_ai_init.py CHANGED
@@ -1,20 +1,17 @@
1
  import json
 
2
  from google import genai
3
- from credentials_manager import CredentialManager, parse_multiple_json_credentials # Changed from relative
4
- import config as app_config # Changed from relative
 
5
 
6
- # VERTEX_EXPRESS_API_KEY constant is removed, direct string "VERTEX_EXPRESS_API_KEY" will be used in chat_api.py
7
- VERTEX_EXPRESS_MODELS = [
8
- "gemini-2.0-flash-001",
9
- "gemini-2.0-flash-lite-001",
10
- "gemini-2.5-pro-preview-03-25",
11
- "gemini-2.5-flash-preview-04-17",
12
- "gemini-2.5-pro-preview-05-06",
13
- ]
14
 
15
  # Global 'client' and 'get_vertex_client()' are removed.
16
 
17
- def init_vertex_ai(credential_manager_instance: CredentialManager) -> bool:
18
  """
19
  Initializes the credential manager with credentials from GOOGLE_CREDENTIALS_JSON (if provided)
20
  and verifies if any credentials (environment or file-based through the manager) are available.
@@ -65,6 +62,16 @@ def init_vertex_ai(credential_manager_instance: CredentialManager) -> bool:
65
  else:
66
  print("INFO: GOOGLE_CREDENTIALS_JSON environment variable not found.")
67
 
 
 
 
 
 
 
 
 
 
 
68
  # CredentialManager's __init__ calls load_credentials_list() for files.
69
  # refresh_credentials_list() re-scans files and combines with in-memory (already includes env creds if loaded above).
70
  # The return value of refresh_credentials_list indicates if total > 0
 
1
  import json
2
+ import asyncio # Added for await
3
  from google import genai
4
+ from credentials_manager import CredentialManager, parse_multiple_json_credentials
5
+ import config as app_config
6
+ from model_loader import refresh_models_config_cache # Import new model loader function
7
 
8
+ # VERTEX_EXPRESS_MODELS list is now dynamically loaded via model_loader
9
+ # The constant VERTEX_EXPRESS_MODELS previously defined here is removed.
10
+ # Consumers should use get_vertex_express_models() from model_loader.
 
 
 
 
 
11
 
12
  # Global 'client' and 'get_vertex_client()' are removed.
13
 
14
+ async def init_vertex_ai(credential_manager_instance: CredentialManager) -> bool: # Made async
15
  """
16
  Initializes the credential manager with credentials from GOOGLE_CREDENTIALS_JSON (if provided)
17
  and verifies if any credentials (environment or file-based through the manager) are available.
 
62
  else:
63
  print("INFO: GOOGLE_CREDENTIALS_JSON environment variable not found.")
64
 
65
+ # Attempt to pre-warm the model configuration cache
66
+ print("INFO: Attempting to pre-warm model configuration cache during startup...")
67
+ models_loaded_successfully = await refresh_models_config_cache()
68
+ if models_loaded_successfully:
69
+ print("INFO: Model configuration cache pre-warmed successfully.")
70
+ else:
71
+ print("WARNING: Failed to pre-warm model configuration cache during startup. It will be loaded lazily on first request.")
72
+ # We don't necessarily fail the entire init_vertex_ai if model list fetching fails,
73
+ # as credential validation might still be important, and model list can be fetched later.
74
+
75
  # CredentialManager's __init__ calls load_credentials_list() for files.
76
  # refresh_credentials_list() re-scans files and combines with in-memory (already includes env creds if loaded above).
77
  # The return value of refresh_credentials_list indicates if total > 0