Add handler and requirements.txt to setup our custom inference endpoint handler

Files changed (2) hide show

handler.py +41 -0
requirements.txt +23 -0

handler.py ADDED Viewed

	@@ -0,0 +1,41 @@

+import logging
+from datetime import datetime
+from typing import  Dict, List, AnyStr
+from sentence_transformers import CrossEncoder
+import torch
+logger = logging.getLogger(__name__)
+class EndpointHandler():
+    def __init__(self, path=""):
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        self.cross_encoder = CrossEncoder(path, device=device)
+    def __call__(self, data: Dict[str, AnyStr]) -> Dict[str, List[float]]:
+        """
+        Args:
+            data (Dict[str, AnyStr]): A dictionary containing the input data and parameters for inference.
+                The input data should include a "query" and a list of "passages".
+        Return:
+            Dict[str, List[float]]: A dictionary with a single key "scores", containing a list of floating point numbers.
+                Each number represents the score of a passage for the given query. The order of the scores matches the order of the passages.
+        """
+        inputs = data.get("inputs")
+        query = inputs.get("query")
+        passages = inputs.get("passages")
+        logger.info(f"Query: {query}")
+        logger.info(f"N. of passages: {len(passages)}")
+        start_time = datetime.now()
+        scores = self.cross_encoder.predict([(query, passage) for passage in passages], activation_fct=torch.nn.Sigmoid())
+        logger.info(f"Time to run cross-encoder for query '{query}' with {len(passages)} passages: {datetime.now() - start_time}")
+        logger.info(f"Scores: {scores}")
+        return {
+            "scores": scores
+        }

requirements.txt ADDED Viewed

	@@ -0,0 +1,23 @@

+certifi==2023.11.17
+charset-normalizer==3.3.2
+filelock==3.13.1
+fsspec==2023.12.2
+huggingface-hub==0.20.2
+idna==3.6
+Jinja2==3.1.3
+MarkupSafe==2.1.3
+mpmath==1.3.0
+networkx==3.2.1
+numpy==1.26.3
+packaging==23.2
+PyYAML==6.0.1
+regex==2023.12.25
+requests==2.31.0
+safetensors==0.4.1
+sympy==1.12
+tokenizers==0.15.0
+torch==2.1.2
+tqdm==4.66.1
+transformers==4.36.2
+typing_extensions==4.9.0
+urllib3==2.1.0