|
import json |
|
from fastapi import APIRouter, Depends, HTTPException, Request, Response |
|
from fastapi.responses import StreamingResponse |
|
from slowapi import Limiter |
|
from slowapi.util import get_remote_address |
|
from slowapi.errors import RateLimitExceeded |
|
from api.auth import verify_app_secret |
|
from api.config import ALLOWED_MODELS |
|
from api.models import ChatRequest |
|
from api.utils import process_non_streaming_response, process_streaming_response |
|
from api.logger import setup_logger |
|
from api.rpmlimits import check_rate_limit |
|
|
|
|
|
logger = setup_logger(__name__) |
|
|
|
|
|
router = APIRouter() |
|
limiter = Limiter(key_func=get_remote_address) |
|
|
|
|
|
@router.exception_handler(RateLimitExceeded) |
|
async def rate_limit_exceeded_handler(request: Request, exc: RateLimitExceeded): |
|
return Response( |
|
status_code=429, |
|
content=json.dumps({"error": {"message": "Rate limit exceeded. Please wait and try again.", "type": "rate_limit"}}), |
|
media_type="application/json" |
|
) |
|
|
|
@router.options("/v1/chat/completions") |
|
@router.options("/api/v1/chat/completions") |
|
async def chat_completions_options(): |
|
return Response( |
|
status_code=200, |
|
headers={ |
|
"Access-Control-Allow-Origin": "*", |
|
"Access-Control-Allow-Methods": "POST, OPTIONS", |
|
"Access-Control-Allow-Headers": "Content-Type, Authorization", |
|
}, |
|
) |
|
|
|
@router.get("/v1/models") |
|
@router.get("/api/v1/models") |
|
async def list_models(): |
|
return {"object": "list", "data": ALLOWED_MODELS} |
|
|
|
@router.post("/v1/chat/completions") |
|
@router.post("/api/v1/chat/completions") |
|
@limiter.limit("10/minute") |
|
async def chat_completions( |
|
request: Request, |
|
app_secret: str = Depends(verify_app_secret) |
|
): |
|
logger.info("Entering chat_completions route") |
|
|
|
|
|
request_body = await request.json() |
|
chat_request = ChatRequest(**request_body) |
|
|
|
logger.info(f"Processing chat completion request for model: {chat_request.model}") |
|
|
|
if chat_request.model not in [model["id"] for model in ALLOWED_MODELS]: |
|
raise HTTPException( |
|
status_code=400, |
|
detail=f"Model {chat_request.model} is not allowed. Allowed models are: {', '.join(model['id'] for model in ALLOWED_MODELS)}", |
|
) |
|
|
|
if chat_request.stream: |
|
logger.info("Streaming response") |
|
return StreamingResponse( |
|
process_streaming_response(chat_request, request), |
|
media_type="text/event-stream" |
|
) |
|
else: |
|
logger.info("Non-streaming response") |
|
return await process_non_streaming_response(chat_request, request) |
|
|
|
@router.route('/') |
|
@router.route('/healthz') |
|
@router.route('/ready') |
|
@router.route('/alive') |
|
@router.route('/status') |
|
@router.get("/health") |
|
def health_check(request: Request): |
|
return Response(content=json.dumps({"status": "ok"}), media_type="application/json") |
|
|