Spaces:
Sleeping
Sleeping
from fastapi import FastAPI, HTTPException, Depends, Header, Request | |
from fastapi.responses import StreamingResponse | |
from pydantic import BaseModel | |
from typing import List | |
from g4f import ChatCompletion | |
from slowapi import Limiter | |
from slowapi.util import get_remote_address | |
app = FastAPI() | |
# Initialize the rate limiter | |
limiter = Limiter(key_func=get_remote_address) | |
# List of available models | |
models = [ | |
"gpt-4o", "gpt-4o-mini", "gpt-4", | |
"gpt-4-turbo", "gpt-3.5-turbo", | |
"claude-3.7-sonnet", "o3-mini", "o1", "claude-3.5", "llama-3.1-405b", "gemini-flash", "blackboxai-pro", "openchat-3.5", "glm-4-9B", "blackboxai" | |
] | |
# Request model | |
class Message(BaseModel): | |
role: str | |
content: str | |
class ChatRequest(BaseModel): | |
model: str | |
messages: List[Message] | |
streaming: bool = True # Add streaming support | |
class ChatResponse(BaseModel): | |
role: str | |
content: str | |
# Dependency to check API key | |
async def verify_api_key(x_api_key: str = Header(...)): | |
if x_api_key != "vs-5wEvIw6vfLKIypGm7uiNoWuXrJcg4vAL": # Replace with your actual API key | |
raise HTTPException(status_code=403, detail="Invalid API key") | |
async def get_models(): | |
"""Endpoint to get the list of available models.""" | |
return {"models": models} | |
# Rate limit to 10 requests per minute | |
async def chat_completion( | |
request: Request, | |
chat_request: ChatRequest, | |
api_key: str = Depends(verify_api_key) | |
): | |
# Validate model | |
if chat_request.model not in models: | |
raise HTTPException(status_code=400, detail="Invalid model selected.") | |
# Check if messages are provided | |
if not chat_request.messages: | |
raise HTTPException(status_code=400, detail="Messages cannot be empty.") | |
# Convert messages to the format expected by ChatCompletion | |
formatted_messages = [{"role": msg.role, "content": msg.content} for msg in chat_request.messages] | |
try: | |
if chat_request.streaming: | |
# Stream the response | |
def event_stream(): | |
response = ChatCompletion.create( | |
model=chat_request.model, | |
messages=formatted_messages, | |
stream=True # Enable streaming | |
) | |
for chunk in response: | |
if isinstance(chunk, dict) and 'choices' in chunk: | |
for choice in chunk['choices']: | |
if 'message' in choice: | |
yield f"data: {choice['message']['content']}\n\n" | |
else: | |
yield f"data: {chunk}\n\n" # Fallback if chunk is not as expected | |
return StreamingResponse(event_stream(), media_type="text/event-stream") | |
else: | |
# Non-streaming response | |
response = ChatCompletion.create( | |
model=chat_request.model, | |
messages=formatted_messages | |
) | |
if isinstance(response, str): | |
response_content = response # Directly use if it's a string | |
else: | |
try: | |
response_content = response['choices'][0]['message']['content'] | |
except (IndexError, KeyError): | |
raise HTTPException(status_code=500, detail="Unexpected response structure.") | |
return ChatResponse(role="assistant", content=response_content) | |
except Exception as e: | |
raise HTTPException(status_code=500, detail=str(e)) | |
if __name__ == "__main__": | |
import uvicorn | |
uvicorn.run(app, host="0.0.0.0", port=7860) |