from fastapi import APIRouter, HTTPException import logging from lm_eval import evaluator, utils from svc.schemas import LMHarnessTaskRequest, LMHarnessTaskResponse router = APIRouter() logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") logger = logging.getLogger(__name__) @router.post("/chat", response_model=LMHarnessTaskResponse) def inference_model(request: LMHarnessTaskRequest): try: results = evaluator.simple_evaluate( model=request.model, model_args=request.model_args, tasks=request.tasks, num_fewshot=request.num_fewshot, batch_size=request.batch_size, device=request.device, limit=request.limit, write_out=request.write_out # Whether to write out an example document and model input, for checking task integrity ) except Exception as e: raise HTTPException(status_code=500, detail=f"lm-harness task execution failed for model: {request.model_args}") logger.info(results) return LMHarnessTaskResponse(results=results)