Spaces:

camillebrl
/

modernbert_overfit

Sleeping

App Files Files Community

camillebrl commited on Jan 28

Commit

f5ac2a0

verified ·

1 Parent(s): a66df45

Update tasks/text.py

Browse files

Files changed (1) hide show

tasks/text.py +35 -27

tasks/text.py CHANGED Viewed

@@ -21,6 +21,28 @@ router = APIRouter()
 DESCRIPTION = "Random Baseline"
 ROUTE = "/text"
 @router.post(ROUTE, tags=["Text Task"],
              description=DESCRIPTION)
 async def evaluate_text(request: TextEvaluationRequest):
@@ -83,12 +105,7 @@ async def evaluate_text(request: TextEvaluationRequest):
     #     print(predictions)
     # print("final predictions : ", predictions)
     # Initialize the model once
-    classifier = pipeline(
-        "text-classification",
-        "camillebrl/ModernBERT-envclaims-overfit",
-        device="cpu",  # Explicitly set device
-        batch_size=16  # Set batch size for pipeline
-    )
     # Prepare batches
     batch_size = 32
@@ -105,27 +122,18 @@ async def evaluate_text(request: TextEvaluationRequest):
     with ThreadPoolExecutor(max_workers=max_workers) as executor:
         # Submit all batches for processing
-        future_to_batch = {
-            executor.submit(
-                process_batch,
-                batch,
-                classifier,
-                label2id
-            ): i for i, batch in enumerate(batches)
-        }
-        # Collect results in order
-        batch_predictions = [[] for _ in range(len(batches))]
-        for future in future_to_batch:
-            batch_idx = future_to_batch[future]
-            try:
-                batch_predictions[batch_idx] = future.result()
-            except Exception as e:
-                print(f"Batch {batch_idx} generated an exception: {e}")
-                batch_predictions[batch_idx] = []
-        # Flatten predictions
-        predictions = [pred for batch in batch_predictions for pred in batch]
     #--------------------------------------------------------------------------------------------
     # YOUR MODEL INFERENCE STOPS HERE

 DESCRIPTION = "Random Baseline"
 ROUTE = "/text"
+class TextClassifier:
+    def __init__(self):
+        self.config = AutoConfig.from_pretrained("camillebrl/ModernBERT-envclaims-overfit")
+        self.label2id = self.config.label2id
+        self.classifier = pipeline(
+            "text-classification",
+            "camillebrl/ModernBERT-envclaims-overfit",
+            device="cpu",
+            batch_size=16
+        )
+    def process_batch(self, batch: List[str]) -> List[int]:
+        """
+        Process a batch of texts and return their predictions
+        """
+        try:
+            batch_preds = self.classifier(list(batch))
+            return [self.label2id[pred[0]["label"]] for pred in batch_preds]
+        except Exception as e:
+            print(f"Error processing batch: {e}")
+            return []
 @router.post(ROUTE, tags=["Text Task"],
              description=DESCRIPTION)
 async def evaluate_text(request: TextEvaluationRequest):
     #     print(predictions)
     # print("final predictions : ", predictions)
     # Initialize the model once
+    classifier = TextClassifier()
     # Prepare batches
     batch_size = 32
     with ThreadPoolExecutor(max_workers=max_workers) as executor:
         # Submit all batches for processing
+            futures = [
+                executor.submit(classifier.process_batch, batch)
+                for batch in batches
+            ]
+            # Collect results in order
+            for future in futures:
+                try:
+                    batch_preds = future.result()
+                    predictions.extend(batch_preds)
+                except Exception as e:
+                    print(f"Batch processing failed: {e}")
     #--------------------------------------------------------------------------------------------
     # YOUR MODEL INFERENCE STOPS HERE