|
import json |
|
import os |
|
import glob |
|
import time |
|
|
|
from fastapi import FastAPI |
|
import hashlib |
|
import asyncio |
|
|
|
REFRESH_INTERVAL_SEC = 60 |
|
LOG_DIR_LIST = [] |
|
|
|
|
|
|
|
class Monitor: |
|
"""Monitor the number of calls to each model.""" |
|
|
|
def __init__(self, log_dir_list: list): |
|
self.log_dir_list = log_dir_list |
|
self.model_call = {} |
|
self.user_call = {} |
|
self.model_call_limit_global = { |
|
"gpt-4-1106-preview": 100, |
|
"gpt-4-0125-preview": 100, |
|
} |
|
self.model_call_day_limit_per_user = { |
|
"gpt-4-1106-preview": 5, |
|
"gpt-4-0125-preview": 5, |
|
} |
|
|
|
async def update_stats(self, num_file=1) -> None: |
|
while True: |
|
|
|
json_files = [] |
|
for log_dir in self.log_dir_list: |
|
json_files_per_server = glob.glob(os.path.join(log_dir, "*.json")) |
|
json_files_per_server.sort(key=os.path.getctime, reverse=True) |
|
json_files += json_files_per_server[:num_file] |
|
model_call = {} |
|
user_call = {} |
|
for json_file in json_files: |
|
for line in open(json_file, "r", encoding="utf-8"): |
|
obj = json.loads(line) |
|
if obj["type"] != "chat": |
|
continue |
|
if obj["model"] not in model_call: |
|
model_call[obj["model"]] = [] |
|
model_call[obj["model"]].append( |
|
{"tstamp": obj["tstamp"], "user_id": obj["ip"]} |
|
) |
|
if obj["ip"] not in user_call: |
|
user_call[obj["ip"]] = [] |
|
user_call[obj["ip"]].append( |
|
{"tstamp": obj["tstamp"], "model": obj["model"]} |
|
) |
|
|
|
self.model_call = model_call |
|
self.model_call_stats_hour = self.get_model_call_stats(top_k=None) |
|
self.model_call_stats_day = self.get_model_call_stats( |
|
top_k=None, most_recent_min=24 * 60 |
|
) |
|
|
|
self.user_call = user_call |
|
self.user_call_stats_hour = self.get_user_call_stats(top_k=None) |
|
self.user_call_stats_day = self.get_user_call_stats( |
|
top_k=None, most_recent_min=24 * 60 |
|
) |
|
await asyncio.sleep(REFRESH_INTERVAL_SEC) |
|
|
|
def get_model_call_limit(self, model: str) -> int: |
|
if model not in self.model_call_limit_global: |
|
return -1 |
|
return self.model_call_limit_global[model] |
|
|
|
def update_model_call_limit(self, model: str, limit: int) -> bool: |
|
if model not in self.model_call_limit_global: |
|
return False |
|
self.model_call_limit_global[model] = limit |
|
return True |
|
|
|
def is_model_limit_reached(self, model: str) -> bool: |
|
if model not in self.model_call_limit_global: |
|
return False |
|
if model not in self.model_call_stats_hour: |
|
return False |
|
|
|
if self.model_call_stats_hour[model] >= self.model_call_limit_global[model]: |
|
return True |
|
return False |
|
|
|
def is_user_limit_reached(self, model: str, user_id: str) -> bool: |
|
if model not in self.model_call_day_limit_per_user: |
|
return False |
|
if user_id not in self.user_call_stats_day: |
|
return False |
|
if model not in self.user_call_stats_day[user_id]["call_dict"]: |
|
return False |
|
|
|
if ( |
|
self.user_call_stats_day[user_id]["call_dict"][model] |
|
>= self.model_call_day_limit_per_user[model] |
|
): |
|
return True |
|
return False |
|
|
|
def get_model_call_stats( |
|
self, target_model=None, most_recent_min: int = 60, top_k: int = 20 |
|
) -> dict: |
|
model_call_stats = {} |
|
for model, reqs in self.model_call.items(): |
|
if target_model is not None and model != target_model: |
|
continue |
|
model_call = [] |
|
for req in reqs: |
|
if req["tstamp"] < time.time() - most_recent_min * 60: |
|
continue |
|
model_call.append(req["tstamp"]) |
|
model_call_stats[model] = len(model_call) |
|
if top_k is not None: |
|
top_k_model = sorted( |
|
model_call_stats, key=lambda x: model_call_stats[x], reverse=True |
|
)[:top_k] |
|
model_call_stats = {model: model_call_stats[model] for model in top_k_model} |
|
return model_call_stats |
|
|
|
def get_user_call_stats( |
|
self, target_model=None, most_recent_min: int = 60, top_k: int = 20 |
|
) -> dict: |
|
user_call_stats = {} |
|
for user_id, reqs in self.user_call.items(): |
|
user_model_call = {"call_dict": {}} |
|
for req in reqs: |
|
if req["tstamp"] < time.time() - most_recent_min * 60: |
|
continue |
|
if target_model is not None and req["model"] != target_model: |
|
continue |
|
if req["model"] not in user_model_call["call_dict"]: |
|
user_model_call["call_dict"][req["model"]] = 0 |
|
user_model_call["call_dict"][req["model"]] += 1 |
|
|
|
user_model_call["total_calls"] = sum(user_model_call["call_dict"].values()) |
|
if user_model_call["total_calls"] > 0: |
|
user_call_stats[user_id] = user_model_call |
|
if top_k is not None: |
|
top_k_user = sorted( |
|
user_call_stats, |
|
key=lambda x: user_call_stats[x]["total_calls"], |
|
reverse=True, |
|
)[:top_k] |
|
user_call_stats = { |
|
user_id: user_call_stats[user_id] for user_id in top_k_user |
|
} |
|
return user_call_stats |
|
|
|
def get_num_users(self, most_recent_min: int = 60) -> int: |
|
user_call_stats = self.get_user_call_stats( |
|
most_recent_min=most_recent_min, top_k=None |
|
) |
|
return len(user_call_stats) |
|
|
|
|
|
monitor = Monitor(log_dir_list=LOG_DIR_LIST) |
|
app = FastAPI() |
|
|
|
|
|
@app.on_event("startup") |
|
async def app_startup(): |
|
asyncio.create_task(monitor.update_stats(2)) |
|
|
|
|
|
@app.get("/get_model_call_limit/{model}") |
|
async def get_model_call_limit(model: str): |
|
return {"model_call_limit": {model: monitor.get_model_call_limit(model)}} |
|
|
|
|
|
@app.get("/update_model_call_limit/{model}/{limit}") |
|
async def update_model_call_limit(model: str, limit: int): |
|
if not monitor.update_model_call_limit(model, limit): |
|
return {"success": False} |
|
return {"success": True} |
|
|
|
|
|
@app.get("/is_limit_reached") |
|
async def is_limit_reached(model: str, user_id: str): |
|
if monitor.is_model_limit_reached(model): |
|
return { |
|
"is_limit_reached": True, |
|
"reason": f"MODEL_HOURLY_LIMIT ({model}): {monitor.get_model_call_limit(model)}", |
|
} |
|
if monitor.is_user_limit_reached(model, user_id): |
|
return { |
|
"is_limit_reached": True, |
|
"reason": f"USER_DAILY_LIMIT ({model}): {monitor.model_call_day_limit_per_user[model]}", |
|
} |
|
return {"is_limit_reached": False} |
|
|
|
|
|
@app.get("/get_num_users_hr") |
|
async def get_num_users(): |
|
return {"num_users": len(monitor.user_call_stats_hour)} |
|
|
|
|
|
@app.get("/get_num_users_day") |
|
async def get_num_users_day(): |
|
return {"num_users": len(monitor.user_call_stats_day)} |
|
|
|
|
|
@app.get("/get_user_call_stats") |
|
async def get_user_call_stats( |
|
model: str = None, most_recent_min: int = 60, top_k: int = None |
|
): |
|
return { |
|
"user_call_stats": monitor.get_user_call_stats(model, most_recent_min, top_k) |
|
} |
|
|
|
|
|
@app.get("/get_model_call_stats") |
|
async def get_model_call_stats( |
|
model: str = None, most_recent_min: int = 60, top_k: int = None |
|
): |
|
return { |
|
"model_call_stats": monitor.get_model_call_stats(model, most_recent_min, top_k) |
|
} |
|
|