Spaces:
Running
Running
Commit
·
294201c
1
Parent(s):
7fe287f
added back thinking config for 2.5f
Browse files- app/main.py +52 -0
app/main.py
CHANGED
@@ -1248,6 +1248,33 @@ async def list_models(api_key: str = Depends(get_api_key)):
|
|
1248 |
"root": "gemini-2.5-flash-preview-04-17",
|
1249 |
"parent": None,
|
1250 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1251 |
{
|
1252 |
"id": "gemini-1.5-flash-8b",
|
1253 |
"object": "model",
|
@@ -1326,6 +1353,8 @@ async def chat_completions(request: OpenAIRequest, api_key: str = Depends(get_ap
|
|
1326 |
is_grounded_search = request.model.endswith("-search")
|
1327 |
is_encrypted_model = request.model.endswith("-encrypt")
|
1328 |
is_encrypted_full_model = request.model.endswith("-encrypt-full")
|
|
|
|
|
1329 |
|
1330 |
if is_auto_model:
|
1331 |
base_model_name = request.model.replace("-auto", "")
|
@@ -1335,6 +1364,22 @@ async def chat_completions(request: OpenAIRequest, api_key: str = Depends(get_ap
|
|
1335 |
base_model_name = request.model.replace("-encrypt", "")
|
1336 |
elif is_encrypted_full_model:
|
1337 |
base_model_name = request.model.replace("-encrypt-full", "")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1338 |
else:
|
1339 |
base_model_name = request.model
|
1340 |
|
@@ -1585,6 +1630,13 @@ async def chat_completions(request: OpenAIRequest, api_key: str = Depends(get_ap
|
|
1585 |
]
|
1586 |
current_config["system_instruction"] = encryption_instructions
|
1587 |
current_prompt_func = create_encrypted_full_gemini_prompt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1588 |
|
1589 |
|
1590 |
try:
|
|
|
1248 |
"root": "gemini-2.5-flash-preview-04-17",
|
1249 |
"parent": None,
|
1250 |
},
|
1251 |
+
{
|
1252 |
+
"id": "gemini-2.5-flash-preview-04-17-encrypt",
|
1253 |
+
"object": "model",
|
1254 |
+
"created": int(time.time()),
|
1255 |
+
"owned_by": "google",
|
1256 |
+
"permission": [],
|
1257 |
+
"root": "gemini-2.5-flash-preview-04-17",
|
1258 |
+
"parent": None,
|
1259 |
+
},
|
1260 |
+
{
|
1261 |
+
"id": "gemini-2.5-flash-preview-04-17-nothinking",
|
1262 |
+
"object": "model",
|
1263 |
+
"created": int(time.time()),
|
1264 |
+
"owned_by": "google",
|
1265 |
+
"permission": [],
|
1266 |
+
"root": "gemini-2.5-flash-preview-04-17",
|
1267 |
+
"parent": None,
|
1268 |
+
},
|
1269 |
+
{
|
1270 |
+
"id": "gemini-2.5-flash-preview-04-17-max",
|
1271 |
+
"object": "model",
|
1272 |
+
"created": int(time.time()),
|
1273 |
+
"owned_by": "google",
|
1274 |
+
"permission": [],
|
1275 |
+
"root": "gemini-2.5-flash-preview-04-17",
|
1276 |
+
"parent": None,
|
1277 |
+
},
|
1278 |
{
|
1279 |
"id": "gemini-1.5-flash-8b",
|
1280 |
"object": "model",
|
|
|
1353 |
is_grounded_search = request.model.endswith("-search")
|
1354 |
is_encrypted_model = request.model.endswith("-encrypt")
|
1355 |
is_encrypted_full_model = request.model.endswith("-encrypt-full")
|
1356 |
+
is_nothinking_model = request.model.endswith("-nothinking")
|
1357 |
+
is_max_thinking_model = request.model.endswith("-max")
|
1358 |
|
1359 |
if is_auto_model:
|
1360 |
base_model_name = request.model.replace("-auto", "")
|
|
|
1364 |
base_model_name = request.model.replace("-encrypt", "")
|
1365 |
elif is_encrypted_full_model:
|
1366 |
base_model_name = request.model.replace("-encrypt-full", "")
|
1367 |
+
elif is_nothinking_model:
|
1368 |
+
base_model_name = request.model.replace("-nothinking","")
|
1369 |
+
# Specific check for the flash model requiring budget
|
1370 |
+
if base_model_name != "gemini-2.5-flash-preview-04-17":
|
1371 |
+
error_response = create_openai_error_response(
|
1372 |
+
400, f"Model '{request.model}' does not support -nothinking variant", "invalid_request_error"
|
1373 |
+
)
|
1374 |
+
return JSONResponse(status_code=400, content=error_response)
|
1375 |
+
elif is_max_thinking_model:
|
1376 |
+
base_model_name = request.model.replace("-max","")
|
1377 |
+
# Specific check for the flash model requiring budget
|
1378 |
+
if base_model_name != "gemini-2.5-flash-preview-04-17":
|
1379 |
+
error_response = create_openai_error_response(
|
1380 |
+
400, f"Model '{request.model}' does not support -max variant", "invalid_request_error"
|
1381 |
+
)
|
1382 |
+
return JSONResponse(status_code=400, content=error_response)
|
1383 |
else:
|
1384 |
base_model_name = request.model
|
1385 |
|
|
|
1630 |
]
|
1631 |
current_config["system_instruction"] = encryption_instructions
|
1632 |
current_prompt_func = create_encrypted_full_gemini_prompt
|
1633 |
+
elif is_nothinking_model:
|
1634 |
+
print(f"Using no thinking budget for model: {request.model}")
|
1635 |
+
current_config["thinking_config"] = {"thinking_budget": 0}
|
1636 |
+
|
1637 |
+
elif is_max_thinking_model:
|
1638 |
+
print(f"Using max thinking budget for model: {request.model}")
|
1639 |
+
current_config["thinking_config"] = {"thinking_budget": 24576}
|
1640 |
|
1641 |
|
1642 |
try:
|