Spaces:

lamhieu
/

lightweight-embeddings

Running

App Files Files Community

lamhieu commited on Jan 5

Commit

b6efbf5

1 Parent(s): 7234381

chore: update something

Browse files

Files changed (3) hide show

lightweight_embeddings/__init__.py +0 -21
lightweight_embeddings/router.py +4 -23
lightweight_embeddings/service.py +122 -154

lightweight_embeddings/__init__.py CHANGED Viewed

@@ -1,24 +1,3 @@
-# filename: __init__.py
-"""
-LightweightEmbeddings - FastAPI Application Entry Point
-This application provides text and image embeddings using multiple text models and one image model.
-Supported text model IDs:
-- "multilingual-e5-small"
-- "multilingual-e5-base"
-- "multilingual-e5-large"
-- "snowflake-arctic-embed-l-v2.0"
-- "paraphrase-multilingual-MiniLM-L12-v2"
-- "paraphrase-multilingual-mpnet-base-v2"
-- "bge-m3"
-- "gte-multilingual-base"
-Supported image model ID:
-- "siglip-base-patch16-256-multilingual"
-"""
 import gradio as gr
 import requests
 import json

 import gradio as gr
 import requests
 import json

lightweight_embeddings/router.py CHANGED Viewed

@@ -1,22 +1,3 @@
-"""
-FastAPI Router for Embeddings Service (Revised & Simplified)
-Exposes the EmbeddingsService methods via a RESTful API.
-Supported Text Model IDs:
-- "multilingual-e5-small"
-- "multilingual-e5-base"
-- "multilingual-e5-large"
-- "snowflake-arctic-embed-l-v2.0"
-- "paraphrase-multilingual-MiniLM-L12-v2"
-- "paraphrase-multilingual-mpnet-base-v2"
-- "bge-m3"
-- "gte-multilingual-base"
-Supported Image Model IDs:
-- "siglip-base-patch16-256-multilingual"
-"""
 from __future__ import annotations
 import logging
@@ -158,10 +139,6 @@ async def create_embeddings(
             },
         }
-        background_tasks.add_task(
-            analytics.access, request.model, resp["usage"]["total_tokens"]
-        )
         for idx, emb in enumerate(embeddings):
             resp["data"].append(
                 {
@@ -171,6 +148,10 @@ async def create_embeddings(
                 }
             )
         return resp
     except Exception as e:

 from __future__ import annotations
 import logging
             },
         }
         for idx, emb in enumerate(embeddings):
             resp["data"].append(
                 {
                 }
             )
+        background_tasks.add_task(
+            analytics.access, request.model, resp["usage"]["total_tokens"]
+        )
         return resp
     except Exception as e:

lightweight_embeddings/service.py CHANGED Viewed

@@ -1,29 +1,3 @@
-"""
-Lightweight Embeddings Service Module (Revised & Simplified)
-This module provides a service for generating and comparing embeddings from text and images
-using state-of-the-art transformer models. It supports both CPU and GPU inference.
-Features:
-- Text and image embedding generation
-- Cross-modal similarity ranking
-- Batch processing support
-- Asynchronous API support
-Supported Text Model IDs:
-- "multilingual-e5-small"
-- "multilingual-e5-base"
-- "multilingual-e5-large"
-- "snowflake-arctic-embed-l-v2.0"
-- "paraphrase-multilingual-MiniLM-L12-v2"
-- "paraphrase-multilingual-mpnet-base-v2"
-- "bge-m3"
-- "gte-multilingual-base"
-Supported Image Model IDs:
-- "google/siglip-base-patch16-256-multilingual" (default, but extensible)
-"""
 from __future__ import annotations
 import logging
@@ -49,7 +23,6 @@ logging.basicConfig(level=logging.INFO)
 class TextModelType(str, Enum):
     """
     Enumeration of supported text models.
-    Adjust as needed for your environment.
     """
     MULTILINGUAL_E5_SMALL = "multilingual-e5-small"
@@ -72,7 +45,7 @@ class ImageModelType(str, Enum):
 class ModelInfo(NamedTuple):
     """
-    Simple container that maps an enum to:
       - model_id: Hugging Face model ID (or local path)
       - onnx_file: Path to ONNX file (if available)
     """
@@ -91,14 +64,12 @@ class ModelConfig:
     image_model_type: ImageModelType = (
         ImageModelType.SIGLIP_BASE_PATCH16_256_MULTILINGUAL
     )
-    # If you need extra parameters like `logit_scale`, etc., keep them here
-    logit_scale: float = 4.60517
     @property
     def text_model_info(self) -> ModelInfo:
         """
-        Return ModelInfo for the configured text_model_type.
         """
         text_configs = {
             TextModelType.MULTILINGUAL_E5_SMALL: ModelInfo(
@@ -139,7 +110,7 @@ class ModelConfig:
     @property
     def image_model_info(self) -> ModelInfo:
         """
-        Return ModelInfo for the configured image_model_type.
         """
         image_configs = {
             ImageModelType.SIGLIP_BASE_PATCH16_256_MULTILINGUAL: ModelInfo(
@@ -156,8 +127,8 @@ class ModelKind(str, Enum):
 def detect_model_kind(model_id: str) -> ModelKind:
     """
-    Detect whether model_id is for a text or an image model.
-    Raises ValueError if unrecognized.
     """
     if model_id in [m.value for m in TextModelType]:
         return ModelKind.TEXT
@@ -173,21 +144,21 @@ def detect_model_kind(model_id: str) -> ModelKind:
 class EmbeddingsService:
     """
-    Service for generating text/image embeddings and performing ranking.
     """
     def __init__(self, config: Optional[ModelConfig] = None):
-        self.lru_cache = LRUCache(maxsize=10_000)  # Approximate for ~100MB usage
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
         self.config = config or ModelConfig()
-        # Preloaded text & image models
         self.text_models: Dict[TextModelType, SentenceTransformer] = {}
         self.image_models: Dict[ImageModelType, AutoModel] = {}
         self.image_processors: Dict[ImageModelType, AutoProcessor] = {}
-        # Load all models
         self._load_all_models()
     def _load_all_models(self) -> None:
@@ -195,40 +166,37 @@ class EmbeddingsService:
         Pre-load all known text and image models for quick switching.
         """
         try:
             for t_model_type in TextModelType:
                 info = ModelConfig(text_model_type=t_model_type).text_model_info
                 logger.info("Loading text model: %s", info.model_id)
-                # If you have an ONNX file AND your SentenceTransformer supports ONNX
                 if info.onnx_file:
                     logger.info("Using ONNX file: %s", info.onnx_file)
-                    # The following 'backend' & 'model_kwargs' parameters
-                    # are recognized only in special/certain distributions of SentenceTransformer
                     self.text_models[t_model_type] = SentenceTransformer(
                         info.model_id,
                         device=self.device,
-                        backend="onnx",  # or "ort" in some custom forks
                         model_kwargs={
-                            "provider": "CPUExecutionProvider",  # or "CUDAExecutionProvider"
                             "file_name": info.onnx_file,
                         },
                         trust_remote_code=True,
                     )
                 else:
-                    # Fallback: standard HF loading
                     self.text_models[t_model_type] = SentenceTransformer(
                         info.model_id,
                         device=self.device,
                         trust_remote_code=True,
                     )
             for i_model_type in ImageModelType:
                 model_id = ModelConfig(
                     image_model_type=i_model_type
                 ).image_model_info.model_id
                 logger.info("Loading image model: %s", model_id)
-                # Typically, for CLIP-like models:
                 model = AutoModel.from_pretrained(model_id).to(self.device)
                 processor = AutoProcessor.from_pretrained(model_id)
@@ -242,9 +210,10 @@ class EmbeddingsService:
             raise RuntimeError(msg) from e
     @staticmethod
-    def _validate_text_input(input_text: Union[str, List[str]]) -> List[str]:
         """
-        Ensure input_text is a non-empty string or list of strings.
         """
         if isinstance(input_text, str):
             if not input_text.strip():
@@ -262,27 +231,42 @@ class EmbeddingsService:
         return input_text
     @staticmethod
-    def _validate_modality(modality: str) -> None:
-        if modality not in ("text", "image"):
-            raise ValueError("Unsupported modality. Must be 'text' or 'image'.")
-    def _process_image(self, path_or_url: Union[str, Path]) -> torch.Tensor:
         """
-        Download/Load image from path/URL and apply transformations.
         """
         try:
-            if isinstance(path_or_url, Path) or not path_or_url.startswith("http"):
-                # Local file path
-                img = Image.open(path_or_url).convert("RGB")
-            else:
-                # URL
                 resp = requests.get(path_or_url, timeout=10)
                 resp.raise_for_status()
                 img = Image.open(BytesIO(resp.content)).convert("RGB")
-            proc = self.image_processors[self.config.image_model_type]
-            data = proc(images=img, return_tensors="pt").to(self.device)
-            return data
         except Exception as e:
             raise ValueError(f"Error processing image '{path_or_url}': {str(e)}") from e
@@ -292,145 +276,125 @@ class EmbeddingsService:
         texts: List[str],
     ) -> np.ndarray:
         """
-        Generate text embeddings using the currently configured text model
-        with an LRU cache for single-text requests.
         """
         try:
             if len(texts) == 1:
-                key = md5(texts[0].encode("utf-8")).hexdigest()
                 if key in self.lru_cache:
                     return self.lru_cache[key]
             model = self.text_models[model_id]
-            embeddings = model.encode(texts)
-            if len(texts) == 1:
-                self.lru_cache[key] = embeddings
-            return embeddings
         except Exception as e:
             raise RuntimeError(
-                f"Error generating text embeddings for model '{self.config.text_model_type}': {e}"
             ) from e
     def _generate_image_embeddings(
         self,
         model_id: ImageModelType,
-        images: Union[str, List[str]],
-        batch_size: Optional[int] = None,
     ) -> np.ndarray:
         """
-        Generate image embeddings using the currently configured image model.
-        If `batch_size` is None, all images are processed at once.
         """
         try:
             model = self.image_models[model_id]
-            # Single image
-            if isinstance(images, str):
-                processed = self._process_image(images)
-                with torch.no_grad():
-                    emb = model.get_image_features(**processed)
-                return emb.cpu().numpy()
-            # Multiple images
-            if batch_size is None:
-                # Process them all in one batch
-                tensors = []
-                for img_path in images:
-                    tensors.append(self._process_image(img_path))
-                # Concatenate
-                keys = tensors[0].keys()
-                combined = {k: torch.cat([t[k] for t in tensors], dim=0) for k in keys}
-                with torch.no_grad():
-                    emb = model.get_image_features(**combined)
-                return emb.cpu().numpy()
-            # Process in smaller batches
-            all_embeddings = []
-            for i in range(0, len(images), batch_size):
-                batch_images = images[i : i + batch_size]
-                # Process each sub-batch
-                tensors = []
-                for img_path in batch_images:
-                    tensors.append(self._process_image(img_path))
-                keys = tensors[0].keys()
-                combined = {k: torch.cat([t[k] for t in tensors], dim=0) for k in keys}
-                with torch.no_grad():
-                    emb = model.get_image_features(**combined)
-                all_embeddings.append(emb.cpu().numpy())
-            return np.vstack(all_embeddings)
         except Exception as e:
             raise RuntimeError(
-                f"Error generating image embeddings for model '{self.config.image_model_type}': {e}"
             ) from e
     async def generate_embeddings(
         self,
         model: str,
         inputs: Union[str, List[str]],
-        batch_size: Optional[int] = None,
     ) -> np.ndarray:
         """
-        Asynchronously generate embeddings for text or image.
         """
-        # Determine if it's text or image
         modality = detect_model_kind(model)
-        model_id = (
-            TextModelType(model)
-            if modality == ModelKind.TEXT
-            else ImageModelType(model)
-        )
-        self._validate_modality(modality)
-        if modality == "text" and isinstance(model_id, TextModelType):
-            text_list = self._validate_text_input(inputs)
-            return self._generate_text_embeddings(model_id=model_id, texts=text_list)
-        elif modality == "image" and isinstance(model_id, ImageModelType):
-            return self._generate_image_embeddings(
-                model_id=model_id, images=inputs, batch_size=batch_size
-            )
     async def rank(
         self,
         model: str,
         queries: Union[str, List[str]],
-        candidates: List[str],
-        batch_size: Optional[int] = None,
     ) -> Dict[str, Any]:
         """
-        Rank candidates (always text) against the queries, which may be text or image.
-        Returns dict of { probabilities, cosine_similarities, usage }.
         """
-        # Determine if it's text or image
         modality = detect_model_kind(model)
-        model_id = (
-            TextModelType(model)
-            if modality == ModelKind.TEXT
-            else ImageModelType(model)
-        )
         # 1) Generate embeddings for queries
-        query_embeds = await self.generate_embeddings(
-            model=model_id, inputs=queries, batch_size=batch_size
-        )
-        # 2) Generate embeddings for text candidates
-        candidate_embeds = await self.generate_embeddings(
-            model=model_id, inputs=candidates, batch_size=batch_size
-        )
         # 3) Compute cosine similarity
         sim_matrix = self.cosine_similarity(query_embeds, candidate_embeds)
-        # 4) Apply logit scale + softmax
         scaled = np.exp(self.config.logit_scale) * sim_matrix
         probs = self.softmax(scaled)
-        # 5) Compute usage (similar to embeddings)
-        query_tokens = self.estimate_tokens(queries) if modality == "text" else 0
-        candidate_tokens = self.estimate_tokens(candidates) if modality == "text" else 0
-        total_tokens = query_tokens + candidate_tokens
         usage = {
             "prompt_tokens": total_tokens,
             "total_tokens": total_tokens,
@@ -444,27 +408,31 @@ class EmbeddingsService:
     def estimate_tokens(self, input_data: Union[str, List[str]]) -> int:
         """
-        Estimate token count using the model's tokenizer.
         """
-        texts = self._validate_text_input(input_data)
         model = self.text_models[self.config.text_model_type]
         tokenized = model.tokenize(texts)
         return sum(len(ids) for ids in tokenized["input_ids"])
     @staticmethod
     def softmax(scores: np.ndarray) -> np.ndarray:
         """
-        Standard softmax along the last dimension.
         """
         exps = np.exp(scores - np.max(scores, axis=-1, keepdims=True))
         return exps / np.sum(exps, axis=-1, keepdims=True)
     @staticmethod
     def cosine_similarity(a: np.ndarray, b: np.ndarray) -> np.ndarray:
         """
         a: (N, D)
         b: (M, D)
-        Return: (N, M) of cos sim
         """
         a_norm = a / (np.linalg.norm(a, axis=1, keepdims=True) + 1e-9)
         b_norm = b / (np.linalg.norm(b, axis=1, keepdims=True) + 1e-9)

 from __future__ import annotations
 import logging
 class TextModelType(str, Enum):
     """
     Enumeration of supported text models.
     """
     MULTILINGUAL_E5_SMALL = "multilingual-e5-small"
 class ModelInfo(NamedTuple):
     """
+    This container maps an enum to:
       - model_id: Hugging Face model ID (or local path)
       - onnx_file: Path to ONNX file (if available)
     """
     image_model_type: ImageModelType = (
         ImageModelType.SIGLIP_BASE_PATCH16_256_MULTILINGUAL
     )
+    logit_scale: float = 4.60517  # Example scale used in cross-modal similarity
     @property
     def text_model_info(self) -> ModelInfo:
         """
+        Returns ModelInfo for the configured text_model_type.
         """
         text_configs = {
             TextModelType.MULTILINGUAL_E5_SMALL: ModelInfo(
     @property
     def image_model_info(self) -> ModelInfo:
         """
+        Returns ModelInfo for the configured image_model_type.
         """
         image_configs = {
             ImageModelType.SIGLIP_BASE_PATCH16_256_MULTILINGUAL: ModelInfo(
 def detect_model_kind(model_id: str) -> ModelKind:
     """
+    Detect whether model_id belongs to a text or an image model.
+    Raises ValueError if the model is not recognized.
     """
     if model_id in [m.value for m in TextModelType]:
         return ModelKind.TEXT
 class EmbeddingsService:
     """
+    Service for generating text/image embeddings and performing similarity ranking.
+    Batch size has been removed. Single or multiple inputs are handled uniformly.
     """
     def __init__(self, config: Optional[ModelConfig] = None):
+        self.lru_cache = LRUCache(maxsize=10_000)
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
         self.config = config or ModelConfig()
+        # Dictionaries to hold preloaded models
         self.text_models: Dict[TextModelType, SentenceTransformer] = {}
         self.image_models: Dict[ImageModelType, AutoModel] = {}
         self.image_processors: Dict[ImageModelType, AutoProcessor] = {}
+        # Load all relevant models on init
         self._load_all_models()
     def _load_all_models(self) -> None:
         Pre-load all known text and image models for quick switching.
         """
         try:
+            # Preload text models
             for t_model_type in TextModelType:
                 info = ModelConfig(text_model_type=t_model_type).text_model_info
                 logger.info("Loading text model: %s", info.model_id)
                 if info.onnx_file:
                     logger.info("Using ONNX file: %s", info.onnx_file)
                     self.text_models[t_model_type] = SentenceTransformer(
                         info.model_id,
                         device=self.device,
+                        backend="onnx",
                         model_kwargs={
+                            "provider": "CPUExecutionProvider",
                             "file_name": info.onnx_file,
                         },
                         trust_remote_code=True,
                     )
                 else:
                     self.text_models[t_model_type] = SentenceTransformer(
                         info.model_id,
                         device=self.device,
                         trust_remote_code=True,
                     )
+            # Preload image models
             for i_model_type in ImageModelType:
                 model_id = ModelConfig(
                     image_model_type=i_model_type
                 ).image_model_info.model_id
                 logger.info("Loading image model: %s", model_id)
                 model = AutoModel.from_pretrained(model_id).to(self.device)
                 processor = AutoProcessor.from_pretrained(model_id)
             raise RuntimeError(msg) from e
     @staticmethod
+    def _validate_text_list(input_text: Union[str, List[str]]) -> List[str]:
         """
+        Convert text input into a non-empty list of strings.
+        Raises ValueError if the input is invalid.
         """
         if isinstance(input_text, str):
             if not input_text.strip():
         return input_text
     @staticmethod
+    def _validate_image_list(input_images: Union[str, List[str]]) -> List[str]:
+        """
+        Convert image input into a non-empty list of image paths/URLs.
+        Raises ValueError if the input is invalid.
+        """
+        if isinstance(input_images, str):
+            if not input_images.strip():
+                raise ValueError("Image input cannot be empty.")
+            return [input_images]
+        if not isinstance(input_images, list) or not all(
+            isinstance(x, str) for x in input_images
+        ):
+            raise ValueError("Image input must be a string or a list of strings.")
+        if len(input_images) == 0:
+            raise ValueError("Image input list cannot be empty.")
+        return input_images
+    def _process_image(self, path_or_url: str) -> Dict[str, torch.Tensor]:
         """
+        Loads and processes a single image from local path or URL.
+        Returns a dictionary of tensors ready for the model.
         """
         try:
+            if path_or_url.startswith("http"):
                 resp = requests.get(path_or_url, timeout=10)
                 resp.raise_for_status()
                 img = Image.open(BytesIO(resp.content)).convert("RGB")
+            else:
+                img = Image.open(Path(path_or_url)).convert("RGB")
+            processor = self.image_processors[self.config.image_model_type]
+            processed_data = processor(images=img, return_tensors="pt").to(self.device)
+            return processed_data
         except Exception as e:
             raise ValueError(f"Error processing image '{path_or_url}': {str(e)}") from e
         texts: List[str],
     ) -> np.ndarray:
         """
+        Generates text embeddings using the SentenceTransformer-based model.
+        Utilizes an LRU cache for single-input scenarios.
         """
         try:
             if len(texts) == 1:
+                single_text = texts[0]
+                key = md5(single_text.encode("utf-8")).hexdigest()
                 if key in self.lru_cache:
                     return self.lru_cache[key]
+                model = self.text_models[model_id]
+                emb = model.encode([single_text])
+                self.lru_cache[key] = emb
+                return emb
+            # For multiple texts, no LRU cache is used
             model = self.text_models[model_id]
+            return model.encode(texts)
         except Exception as e:
             raise RuntimeError(
+                f"Error generating text embeddings with model '{model_id}': {e}"
             ) from e
     def _generate_image_embeddings(
         self,
         model_id: ImageModelType,
+        images: List[str],
     ) -> np.ndarray:
         """
+        Generates image embeddings using the CLIP-like transformer model.
+        Handles single or multiple images uniformly (no batch size parameter).
         """
         try:
             model = self.image_models[model_id]
+            # Collect processed inputs in a single batch
+            processed_tensors = []
+            for img_path in images:
+                processed_tensors.append(self._process_image(img_path))
+            # Keys should be the same for all processed outputs
+            keys = processed_tensors[0].keys()
+            # Concatenate along the batch dimension
+            combined = {
+                k: torch.cat([pt[k] for pt in processed_tensors], dim=0) for k in keys
+            }
+            with torch.no_grad():
+                embeddings = model.get_image_features(**combined)
+            return embeddings.cpu().numpy()
         except Exception as e:
             raise RuntimeError(
+                f"Error generating image embeddings with model '{model_id}': {e}"
             ) from e
     async def generate_embeddings(
         self,
         model: str,
         inputs: Union[str, List[str]],
     ) -> np.ndarray:
         """
+        Asynchronously generates embeddings for either text or image based on the model type.
         """
         modality = detect_model_kind(model)
+        if modality == ModelKind.TEXT:
+            text_model_id = TextModelType(model)
+            text_list = self._validate_text_list(inputs)
+            return self._generate_text_embeddings(text_model_id, text_list)
+        elif modality == ModelKind.IMAGE:
+            image_model_id = ImageModelType(model)
+            image_list = self._validate_image_list(inputs)
+            return self._generate_image_embeddings(image_model_id, image_list)
     async def rank(
         self,
         model: str,
         queries: Union[str, List[str]],
+        candidates: Union[str, List[str]],
     ) -> Dict[str, Any]:
         """
+        Ranks text `candidates` given `queries`, which can be text or images.
+        Always returns a dictionary of { probabilities, cosine_similarities, usage }.
+        Note: This implementation uses the same model for both queries and candidates.
+              For true cross-modal ranking, you might need separate models or a shared model.
         """
         modality = detect_model_kind(model)
+        # Convert the string model to the appropriate enum
+        if modality == ModelKind.TEXT:
+            model_enum = TextModelType(model)
+        else:
+            model_enum = ImageModelType(model)
         # 1) Generate embeddings for queries
+        query_embeds = await self.generate_embeddings(model_enum.value, queries)
+        # 2) Generate embeddings for candidates (assumed text if queries are text;
+        #    or if queries are images, also use the image model for candidates).
+        candidate_embeds = await self.generate_embeddings(model_enum.value, candidates)
         # 3) Compute cosine similarity
         sim_matrix = self.cosine_similarity(query_embeds, candidate_embeds)
+        # 4) Apply logit scale + softmax to obtain probabilities
         scaled = np.exp(self.config.logit_scale) * sim_matrix
         probs = self.softmax(scaled)
+        # 5) Estimate token usage if we're dealing with text
+        if modality == ModelKind.TEXT:
+            query_tokens = self.estimate_tokens(queries)
+            candidate_tokens = self.estimate_tokens(candidates)
+            total_tokens = query_tokens + candidate_tokens
+        else:
+            total_tokens = 0
         usage = {
             "prompt_tokens": total_tokens,
             "total_tokens": total_tokens,
     def estimate_tokens(self, input_data: Union[str, List[str]]) -> int:
         """
+        Estimates token count using the SentenceTransformer tokenizer.
+        Only applicable if the current configured model is a text model.
         """
+        texts = self._validate_text_list(input_data)
         model = self.text_models[self.config.text_model_type]
         tokenized = model.tokenize(texts)
+        # Summing over the lengths of input_ids for each example
         return sum(len(ids) for ids in tokenized["input_ids"])
     @staticmethod
     def softmax(scores: np.ndarray) -> np.ndarray:
         """
+        Applies the standard softmax function along the last dimension.
         """
+        # Stabilize scores by subtracting max
         exps = np.exp(scores - np.max(scores, axis=-1, keepdims=True))
         return exps / np.sum(exps, axis=-1, keepdims=True)
     @staticmethod
     def cosine_similarity(a: np.ndarray, b: np.ndarray) -> np.ndarray:
         """
+        Computes the pairwise cosine similarity between all rows of a and b.
         a: (N, D)
         b: (M, D)
+        Return: (N, M) matrix of cosine similarities
         """
         a_norm = a / (np.linalg.norm(a, axis=1, keepdims=True) + 1e-9)
         b_norm = b / (np.linalg.norm(b, axis=1, keepdims=True) + 1e-9)