Spaces:
Sleeping
Sleeping
import os | |
import time | |
import random | |
import asyncio | |
import json | |
from fastapi import FastAPI, HTTPException, Depends | |
from fastapi.middleware.cors import CORSMiddleware | |
from fastapi.security.api_key import APIKeyHeader | |
from pydantic import BaseModel, Field, field_validator | |
from typing import List, Optional | |
from dotenv import load_dotenv | |
from starlette.responses import StreamingResponse | |
from openai import OpenAI | |
from typing import List, Optional, Type | |
load_dotenv() | |
API_KEYS = [ | |
os.getenv("API_GEMINI_1"), | |
os.getenv("API_GEMINI_2"), | |
os.getenv("API_GEMINI_3") | |
] | |
BASE_URL = os.getenv("BASE_URL", "https://generativelanguage.googleapis.com/v1beta/openai/") | |
EXPECTED_API_KEY = os.getenv("API_HUGGINGFACE") | |
API_KEY_NAME = "Authorization" | |
class Message(BaseModel): | |
role: str | |
content: str | |
class ChatCompletionRequest(BaseModel): | |
model: str = "gemini-2.0-flash" | |
messages: List[Message] | |
max_tokens: Optional[int] = 8196 | |
temperature: Optional[float] = 0.8 | |
stream: Optional[bool] = False | |
api_key_header = APIKeyHeader(name=API_KEY_NAME, auto_error=False) | |
app = FastAPI(title="OpenAI-SDK-compatible API", version="1.0.0", description="Un wrapper FastAPI compatibile con le specifiche dell'API OpenAI.") | |
app.add_middleware( | |
CORSMiddleware, | |
allow_origins=["*"], | |
allow_credentials=True, | |
allow_methods=["*"], | |
allow_headers=["*"], | |
) | |
def verify_api_key(api_key: str = Depends(api_key_header)): | |
if not api_key: | |
raise HTTPException(status_code=403, detail="API key mancante") | |
if api_key != f"Bearer {EXPECTED_API_KEY}": | |
raise HTTPException(status_code=403, detail="API key non valida") | |
return api_key | |
def get_openai_client(): | |
api_key = random.choice(API_KEYS) | |
return OpenAI(api_key=api_key, base_url=BASE_URL) | |
def call_api_sync(params: ChatCompletionRequest): | |
try: | |
client = get_openai_client() | |
print(params) | |
response = client.chat.completions.create( | |
model=params.model, | |
messages=[m.model_dump() for m in params.messages], | |
max_tokens=params.max_tokens, | |
temperature=params.temperature, | |
stream=params.stream | |
) | |
return response | |
except Exception as e: | |
if "429" in str(e): | |
time.sleep(2) | |
return call_api_sync(params) | |
else: | |
raise e | |
async def _resp_async_generator(params: ChatCompletionRequest): | |
client = get_openai_client() | |
try: | |
response = client.chat.completions.create( | |
model=params.model, | |
messages=[m.model_dump() for m in params.messages], | |
max_tokens=params.max_tokens, | |
temperature=params.temperature, | |
stream=True | |
) | |
for chunk in response: | |
chunk_data = chunk.to_dict() if hasattr(chunk, "to_dict") else chunk | |
yield f"data: {json.dumps(chunk_data)}\n\n" | |
await asyncio.sleep(0.01) | |
yield "data: [DONE]\n\n" | |
except Exception as e: | |
error_data = {"error": str(e)} | |
yield f"data: {json.dumps(error_data)}\n\n" | |
# --------------------------------------------------------------------------------------- | |
def read_general(): | |
return {"response": "Benvenuto"} | |
async def health_check(): | |
return {"message": "success"} | |
# ---------------------------------- Generazione Testo --------------------------------------- | |
async def chat_completions(req: ChatCompletionRequest): | |
if not req.messages: | |
raise HTTPException(status_code=400, detail="Nessun messaggio fornito") | |
if req.stream: | |
return StreamingResponse( | |
_resp_async_generator(req), | |
media_type="application/x-ndjson" | |
) | |
else: | |
try: | |
response = call_api_sync(req) | |
return response | |
except Exception as e: | |
raise HTTPException(status_code=500, detail=str(e)) |