Spaces:
Running
Running
Commit
·
11cdcf6
1
Parent(s):
52d9215
added openai mode for express mode models
Browse files- app/model_loader.py +1 -3
- app/routes/models_api.py +46 -113
app/model_loader.py
CHANGED
@@ -33,11 +33,9 @@ async def fetch_and_parse_models_config() -> Optional[Dict[str, List[str]]]:
|
|
33 |
print("Successfully fetched and parsed model configuration.")
|
34 |
|
35 |
# Add [EXPRESS] prefix to express models
|
36 |
-
prefixed_express_models = [f"[EXPRESS] {model_name}" for model_name in data["vertex_express_models"]]
|
37 |
-
|
38 |
return {
|
39 |
"vertex_models": data["vertex_models"],
|
40 |
-
"vertex_express_models":
|
41 |
}
|
42 |
else:
|
43 |
print(f"ERROR: Fetched model configuration has an invalid structure: {data}")
|
|
|
33 |
print("Successfully fetched and parsed model configuration.")
|
34 |
|
35 |
# Add [EXPRESS] prefix to express models
|
|
|
|
|
36 |
return {
|
37 |
"vertex_models": data["vertex_models"],
|
38 |
+
"vertex_express_models": data["vertex_express_models"]
|
39 |
}
|
40 |
else:
|
41 |
print(f"ERROR: Fetched model configuration has an invalid structure: {data}")
|
app/routes/models_api.py
CHANGED
@@ -1,10 +1,10 @@
|
|
1 |
import time
|
2 |
-
from fastapi import APIRouter, Depends, Request
|
3 |
-
from typing import List, Dict, Any
|
4 |
from auth import get_api_key
|
5 |
from model_loader import get_vertex_models, get_vertex_express_models, refresh_models_config_cache
|
6 |
-
import config as app_config
|
7 |
-
from credentials_manager import CredentialManager
|
8 |
|
9 |
router = APIRouter()
|
10 |
|
@@ -12,10 +12,10 @@ router = APIRouter()
|
|
12 |
async def list_models(fastapi_request: Request, api_key: str = Depends(get_api_key)):
|
13 |
await refresh_models_config_cache()
|
14 |
|
15 |
-
OPENAI_DIRECT_SUFFIX = "-openai"
|
16 |
-
EXPERIMENTAL_MARKER = "-exp-"
|
17 |
PAY_PREFIX = "[PAY]"
|
18 |
-
|
|
|
|
|
19 |
credential_manager_instance: CredentialManager = fastapi_request.app.state.credential_manager
|
20 |
express_key_manager_instance = fastapi_request.app.state.express_key_manager
|
21 |
|
@@ -25,116 +25,49 @@ async def list_models(fastapi_request: Request, api_key: str = Depends(get_api_k
|
|
25 |
raw_vertex_models = await get_vertex_models()
|
26 |
raw_express_models = await get_vertex_express_models()
|
27 |
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
if has_express_key:
|
32 |
-
candidate_model_ids.update(raw_express_models)
|
33 |
-
# If *only* express key is available, only express models (and their variants) should be listed.
|
34 |
-
# The current `vertex_model_ids` from remote config might contain non-express models.
|
35 |
-
# The `get_vertex_express_models()` should be the source of truth for express-eligible base models.
|
36 |
-
if not has_sa_creds:
|
37 |
-
# Only list models that are explicitly in the express list.
|
38 |
-
# Suffix generation will apply only to these if they are not gemini-2.0
|
39 |
-
all_model_ids = set(raw_express_models)
|
40 |
-
else:
|
41 |
-
# Both SA and Express are available, combine all known models
|
42 |
-
all_model_ids = set(raw_vertex_models + raw_express_models)
|
43 |
-
elif has_sa_creds:
|
44 |
-
# Only SA creds available, use all vertex_models (which might include express-eligible ones)
|
45 |
-
all_model_ids = set(raw_vertex_models)
|
46 |
-
else:
|
47 |
-
# No credentials available
|
48 |
-
all_model_ids = set()
|
49 |
-
|
50 |
-
# Create extended model list with variations (search, encrypt, auto etc.)
|
51 |
-
# This logic might need to be more sophisticated based on actual supported features per base model.
|
52 |
-
# For now, let's assume for each base model, we might have these variations.
|
53 |
-
# A better approach would be if the remote config specified these variations.
|
54 |
-
|
55 |
-
dynamic_models_data: List[Dict[str, Any]] = []
|
56 |
current_time = int(time.time())
|
57 |
|
58 |
-
|
59 |
-
|
60 |
-
current_display_prefix = ""
|
61 |
-
# Only add PAY_PREFIX if the model is not already an EXPRESS model (which has its own prefix)
|
62 |
-
# Apply PAY_PREFIX if SA creds are present, it's a model from raw_vertex_models,
|
63 |
-
# it's not experimental, and not already an EXPRESS model.
|
64 |
-
if has_sa_creds and \
|
65 |
-
original_model_id in raw_vertex_models_set and \
|
66 |
-
EXPERIMENTAL_MARKER not in original_model_id and \
|
67 |
-
not original_model_id.startswith("[EXPRESS]"):
|
68 |
-
current_display_prefix = PAY_PREFIX
|
69 |
|
70 |
-
|
|
|
|
|
|
|
|
|
|
|
71 |
|
72 |
-
|
73 |
-
|
74 |
-
"permission": [], "root": original_model_id, "parent": None
|
75 |
-
})
|
76 |
-
|
77 |
-
# Conditionally add common variations (standard suffixes)
|
78 |
-
if not original_model_id.startswith("gemini-2.0"): # Suffix rules based on original_model_id
|
79 |
-
standard_suffixes = ["-search", "-encrypt", "-encrypt-full", "-auto"]
|
80 |
-
for suffix in standard_suffixes:
|
81 |
-
# Suffix is applied to the original model ID part
|
82 |
-
suffixed_model_part = f"{original_model_id}{suffix}"
|
83 |
-
# Then the whole thing is prefixed
|
84 |
-
final_suffixed_display_id = f"{current_display_prefix}{suffixed_model_part}"
|
85 |
-
|
86 |
-
# Check if this suffixed ID is already in all_model_ids (unlikely with prefix) or already added
|
87 |
-
if final_suffixed_display_id not in all_model_ids and not any(m['id'] == final_suffixed_display_id for m in dynamic_models_data):
|
88 |
-
dynamic_models_data.append({
|
89 |
-
"id": final_suffixed_display_id, "object": "model", "created": current_time, "owned_by": "google",
|
90 |
-
"permission": [], "root": original_model_id, "parent": None
|
91 |
-
})
|
92 |
-
|
93 |
-
# Apply special suffixes for models starting with "gemini-2.5-flash" or containing "gemini-2.5-pro"
|
94 |
-
# This includes both regular and EXPRESS versions
|
95 |
-
if "gemini-2.5-flash" in original_model_id or "gemini-2.5-pro" in original_model_id: # Suffix rules based on original_model_id
|
96 |
-
special_thinking_suffixes = ["-nothinking", "-max"]
|
97 |
-
for special_suffix in special_thinking_suffixes:
|
98 |
-
suffixed_model_part = f"{original_model_id}{special_suffix}"
|
99 |
-
final_special_suffixed_display_id = f"{current_display_prefix}{suffixed_model_part}"
|
100 |
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
if display_model_id and not any(m['id'] == display_model_id for m in dynamic_models_data):
|
119 |
-
dynamic_models_data.append({
|
120 |
-
"id": display_model_id, "object": "model", "created": current_time, "owned_by": "google",
|
121 |
-
"permission": [], "root": base_model_id_for_openai, "parent": None
|
122 |
-
})
|
123 |
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
})
|
136 |
-
# final_models_data_map = {m["id"]: m for m in dynamic_models_data}
|
137 |
-
# model_list = list(final_models_data_map.values())
|
138 |
-
# model_list.sort()
|
139 |
-
|
140 |
-
return {"object": "list", "data": sorted(dynamic_models_data, key=lambda x: x['id'])}
|
|
|
1 |
import time
|
2 |
+
from fastapi import APIRouter, Depends, Request
|
3 |
+
from typing import List, Dict, Any, Set
|
4 |
from auth import get_api_key
|
5 |
from model_loader import get_vertex_models, get_vertex_express_models, refresh_models_config_cache
|
6 |
+
import config as app_config
|
7 |
+
from credentials_manager import CredentialManager
|
8 |
|
9 |
router = APIRouter()
|
10 |
|
|
|
12 |
async def list_models(fastapi_request: Request, api_key: str = Depends(get_api_key)):
|
13 |
await refresh_models_config_cache()
|
14 |
|
|
|
|
|
15 |
PAY_PREFIX = "[PAY]"
|
16 |
+
EXPRESS_PREFIX = "[EXPRESS] "
|
17 |
+
OPENAI_DIRECT_SUFFIX = "-openai"
|
18 |
+
|
19 |
credential_manager_instance: CredentialManager = fastapi_request.app.state.credential_manager
|
20 |
express_key_manager_instance = fastapi_request.app.state.express_key_manager
|
21 |
|
|
|
25 |
raw_vertex_models = await get_vertex_models()
|
26 |
raw_express_models = await get_vertex_express_models()
|
27 |
|
28 |
+
final_model_list: List[Dict[str, Any]] = []
|
29 |
+
processed_ids: Set[str] = set()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
current_time = int(time.time())
|
31 |
|
32 |
+
def add_model_and_variants(base_id: str, prefix: str):
|
33 |
+
"""Adds a model and its variants to the list if not already present."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
|
35 |
+
# Define all possible suffixes for a given model
|
36 |
+
suffixes = [""] # For the base model itself
|
37 |
+
if not base_id.startswith("gemini-2.0"):
|
38 |
+
suffixes.extend(["-search", "-encrypt", "-encrypt-full", "-auto"])
|
39 |
+
if "gemini-2.5-flash" in base_id or "gemini-2.5-pro" in base_id:
|
40 |
+
suffixes.extend(["-nothinking", "-max"])
|
41 |
|
42 |
+
# Add the openai variant for all models
|
43 |
+
suffixes.append(OPENAI_DIRECT_SUFFIX)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
|
45 |
+
for suffix in suffixes:
|
46 |
+
model_id_with_suffix = f"{base_id}{suffix}"
|
47 |
+
|
48 |
+
# Experimental models have no prefix
|
49 |
+
final_id = f"{prefix}{model_id_with_suffix}" if "-exp-" not in base_id else model_id_with_suffix
|
50 |
|
51 |
+
if final_id not in processed_ids:
|
52 |
+
final_model_list.append({
|
53 |
+
"id": final_id,
|
54 |
+
"object": "model",
|
55 |
+
"created": current_time,
|
56 |
+
"owned_by": "google",
|
57 |
+
"permission": [],
|
58 |
+
"root": base_id,
|
59 |
+
"parent": None
|
60 |
+
})
|
61 |
+
processed_ids.add(final_id)
|
|
|
|
|
|
|
|
|
|
|
62 |
|
63 |
+
# Process Express Key models first
|
64 |
+
if has_express_key:
|
65 |
+
for model_id in raw_express_models:
|
66 |
+
add_model_and_variants(model_id, EXPRESS_PREFIX)
|
67 |
+
|
68 |
+
# Process Service Account (PAY) models, they have lower priority
|
69 |
+
if has_sa_creds:
|
70 |
+
for model_id in raw_vertex_models:
|
71 |
+
add_model_and_variants(model_id, PAY_PREFIX)
|
72 |
+
|
73 |
+
return {"object": "list", "data": sorted(final_model_list, key=lambda x: x['id'])}
|
|
|
|
|
|
|
|
|
|
|
|