yevvonlim
/

distildire

Model card Files Files and versions Community

File size: 1,903 Bytes

424919d

# Two server routes that OctoAI containers should have:
# a route for inference requests (e.g. ”/predict”). This route for inference requests must receive JSON inputs and JSON outputs.
# a route for health checks (e.g. ”/healthcheck”).
# Number of workers (not required). Typical best practice is to make this number some function of the # of CPU cores that the server has access to and should use.

"""HTTP Inference serving interface using sanic."""
import os

from custommodel import CustomModel
from sanic import Request, Sanic, response

_DEFAULT_PORT = 8000
"""Default port to serve inference on."""

# Load and initialize the model on startup globally, so it can be reused.
model_instance = CustomModel(ckpt='models/distil-240709-tot-model_epoch_20.pth')
"""Global instance of the model to serve."""

server = Sanic("server")
"""Global instance of the web server."""


@server.route("/healthcheck", methods=["GET"])
def healthcheck(_: Request) -> response.JSONResponse:
    """Responds to healthcheck requests.

    :param request: the incoming healthcheck request.
    :return: json responding to the healthcheck.
    """
    return response.json({"healthy": "yes"})


@server.route("/reconstruction/predict", methods=["POST"])
def predict(request: Request) -> response.JSONResponse:
    """Responds to inference/prediction requests.

    :param request: the incoming request containing inputs for the model.
    :return: json containing the inference results.
    """

    try:
        inputs = request.json
        output = model_instance.predict(inputs)
        return response.json(output)
    except Exception as e:
        return response.json({'error': str(e)}, status=500)


def main():
    """Entry point for the server."""
    port = int(os.environ.get("SERVING_PORT", _DEFAULT_PORT))
    server.run(host="0.0.0.0", port=port, workers=1)

if __name__ == "__main__":
    main()