mxbai-rerank-base-v1 / handler.py
Debanjum
Add handler, dependencies to setup custom inference endpoint handler
3e769d6
import logging
from datetime import datetime
from typing import Dict, List, AnyStr
from sentence_transformers import CrossEncoder
import torch
logger = logging.getLogger(__name__)
class EndpointHandler():
def __init__(self, path=""):
device = "cuda" if torch.cuda.is_available() else "cpu"
self.cross_encoder = CrossEncoder(path, device=device)
def __call__(self, data: Dict[str, AnyStr]) -> Dict[str, List[float]]:
"""
Args:
data (Dict[str, AnyStr]): A dictionary containing the input data and parameters for inference.
The input data should include a "query" and a list of "passages".
Return:
Dict[str, List[float]]: A dictionary with a single key "scores", containing a list of floating point numbers.
Each number represents the score of a passage for the given query. The order of the scores matches the order of the passages.
"""
inputs = data.get("inputs")
query = inputs.get("query")
passages = inputs.get("passages")
logger.info(f"Query: {query}")
logger.info(f"N. of passages: {len(passages)}")
start_time = datetime.now()
scores = self.cross_encoder.predict([(query, passage) for passage in passages], activation_fct=torch.nn.Sigmoid())
logger.info(f"Time to run cross-encoder for query '{query}' with {len(passages)} passages: {datetime.now() - start_time}")
logger.info(f"Scores: {scores}")
return {
"scores": scores.tolist()
}