bibibi12345 commited on
Commit
294201c
·
1 Parent(s): 7fe287f

added back thinking config for 2.5f

Browse files
Files changed (1) hide show
  1. app/main.py +52 -0
app/main.py CHANGED
@@ -1248,6 +1248,33 @@ async def list_models(api_key: str = Depends(get_api_key)):
1248
  "root": "gemini-2.5-flash-preview-04-17",
1249
  "parent": None,
1250
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1251
  {
1252
  "id": "gemini-1.5-flash-8b",
1253
  "object": "model",
@@ -1326,6 +1353,8 @@ async def chat_completions(request: OpenAIRequest, api_key: str = Depends(get_ap
1326
  is_grounded_search = request.model.endswith("-search")
1327
  is_encrypted_model = request.model.endswith("-encrypt")
1328
  is_encrypted_full_model = request.model.endswith("-encrypt-full")
 
 
1329
 
1330
  if is_auto_model:
1331
  base_model_name = request.model.replace("-auto", "")
@@ -1335,6 +1364,22 @@ async def chat_completions(request: OpenAIRequest, api_key: str = Depends(get_ap
1335
  base_model_name = request.model.replace("-encrypt", "")
1336
  elif is_encrypted_full_model:
1337
  base_model_name = request.model.replace("-encrypt-full", "")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1338
  else:
1339
  base_model_name = request.model
1340
 
@@ -1585,6 +1630,13 @@ async def chat_completions(request: OpenAIRequest, api_key: str = Depends(get_ap
1585
  ]
1586
  current_config["system_instruction"] = encryption_instructions
1587
  current_prompt_func = create_encrypted_full_gemini_prompt
 
 
 
 
 
 
 
1588
 
1589
 
1590
  try:
 
1248
  "root": "gemini-2.5-flash-preview-04-17",
1249
  "parent": None,
1250
  },
1251
+ {
1252
+ "id": "gemini-2.5-flash-preview-04-17-encrypt",
1253
+ "object": "model",
1254
+ "created": int(time.time()),
1255
+ "owned_by": "google",
1256
+ "permission": [],
1257
+ "root": "gemini-2.5-flash-preview-04-17",
1258
+ "parent": None,
1259
+ },
1260
+ {
1261
+ "id": "gemini-2.5-flash-preview-04-17-nothinking",
1262
+ "object": "model",
1263
+ "created": int(time.time()),
1264
+ "owned_by": "google",
1265
+ "permission": [],
1266
+ "root": "gemini-2.5-flash-preview-04-17",
1267
+ "parent": None,
1268
+ },
1269
+ {
1270
+ "id": "gemini-2.5-flash-preview-04-17-max",
1271
+ "object": "model",
1272
+ "created": int(time.time()),
1273
+ "owned_by": "google",
1274
+ "permission": [],
1275
+ "root": "gemini-2.5-flash-preview-04-17",
1276
+ "parent": None,
1277
+ },
1278
  {
1279
  "id": "gemini-1.5-flash-8b",
1280
  "object": "model",
 
1353
  is_grounded_search = request.model.endswith("-search")
1354
  is_encrypted_model = request.model.endswith("-encrypt")
1355
  is_encrypted_full_model = request.model.endswith("-encrypt-full")
1356
+ is_nothinking_model = request.model.endswith("-nothinking")
1357
+ is_max_thinking_model = request.model.endswith("-max")
1358
 
1359
  if is_auto_model:
1360
  base_model_name = request.model.replace("-auto", "")
 
1364
  base_model_name = request.model.replace("-encrypt", "")
1365
  elif is_encrypted_full_model:
1366
  base_model_name = request.model.replace("-encrypt-full", "")
1367
+ elif is_nothinking_model:
1368
+ base_model_name = request.model.replace("-nothinking","")
1369
+ # Specific check for the flash model requiring budget
1370
+ if base_model_name != "gemini-2.5-flash-preview-04-17":
1371
+ error_response = create_openai_error_response(
1372
+ 400, f"Model '{request.model}' does not support -nothinking variant", "invalid_request_error"
1373
+ )
1374
+ return JSONResponse(status_code=400, content=error_response)
1375
+ elif is_max_thinking_model:
1376
+ base_model_name = request.model.replace("-max","")
1377
+ # Specific check for the flash model requiring budget
1378
+ if base_model_name != "gemini-2.5-flash-preview-04-17":
1379
+ error_response = create_openai_error_response(
1380
+ 400, f"Model '{request.model}' does not support -max variant", "invalid_request_error"
1381
+ )
1382
+ return JSONResponse(status_code=400, content=error_response)
1383
  else:
1384
  base_model_name = request.model
1385
 
 
1630
  ]
1631
  current_config["system_instruction"] = encryption_instructions
1632
  current_prompt_func = create_encrypted_full_gemini_prompt
1633
+ elif is_nothinking_model:
1634
+ print(f"Using no thinking budget for model: {request.model}")
1635
+ current_config["thinking_config"] = {"thinking_budget": 0}
1636
+
1637
+ elif is_max_thinking_model:
1638
+ print(f"Using max thinking budget for model: {request.model}")
1639
+ current_config["thinking_config"] = {"thinking_budget": 24576}
1640
 
1641
 
1642
  try: