taylorj94
/

Llama-3.2-1B

Text Generation

text-generation-inference

Model card Files Files and versions

taylorj94 commited on Dec 26, 2024

Commit

c572e06

·

verified ·

1 Parent(s): c3e7e6d

Update handler.py

Files changed (1) hide show

handler.py +5 -11

handler.py CHANGED Viewed

@@ -29,25 +29,19 @@ class EndpointHandler:
                 allowed_ids.add(token_id)
         return allowed_ids
-    def filter_allowed_tokens(input_ids: torch.Tensor, scores: np.ndarray, allowed_token_ids: set[int]) -> np.ndarray:
         """
-        Modify scores to allow only tokens in the allowed_token_ids set, except for tokens in the range 128000-128255.
         Handles both 1D and 2D scores arrays.
         """
-        # Define the range of exempt tokens
-        exempt_range = set(range(128000, 128256))
-        # Combine exempt tokens with allowed tokens
-        effective_allowed_ids = allowed_token_ids.union(exempt_range)
         if scores.ndim == 1:
             # 1D case: Apply mask directly
-            mask = np.isin(np.arange(scores.shape[0]), list(effective_allowed_ids))
             scores[~mask] = float('-inf')
         elif scores.ndim == 2:
             # 2D case: Apply mask across each row
             for i in range(scores.shape[0]):
-                mask = np.isin(np.arange(scores.shape[1]), list(effective_allowed_ids))
                 scores[i, ~mask] = float('-inf')
         else:
             raise ValueError(f"Unsupported scores dimension: {scores.ndim}")
@@ -95,4 +89,4 @@ class EndpointHandler:
         # Decode the output
         generated_text = response["choices"][0]["message"]["content"]
-        return [{"generated_text": generated_text}]

                 allowed_ids.add(token_id)
         return allowed_ids
+    def filter_allowed_tokens(self, input_ids: torch.Tensor, scores: np.ndarray, allowed_token_ids: set[int]) -> np.ndarray:
         """
+        Modify scores to allow only tokens in the allowed_token_ids set.
         Handles both 1D and 2D scores arrays.
         """
         if scores.ndim == 1:
             # 1D case: Apply mask directly
+            mask = np.isin(np.arange(scores.shape[0]), list(allowed_token_ids))
             scores[~mask] = float('-inf')
         elif scores.ndim == 2:
             # 2D case: Apply mask across each row
             for i in range(scores.shape[0]):
+                mask = np.isin(np.arange(scores.shape[1]), list(allowed_token_ids))
                 scores[i, ~mask] = float('-inf')
         else:
             raise ValueError(f"Unsupported scores dimension: {scores.ndim}")
         # Decode the output
         generated_text = response["choices"][0]["message"]["content"]
+        return [{"generated_text": generated_text}]