Spaces:

camillebrl
/

modernbert_overfit

Sleeping

App Files Files Community

camillebrl commited on Jan 28

Commit

93741cc

verified ·

1 Parent(s): f5ac2a0

Update tasks/text.py

Browse files

Files changed (1) hide show

tasks/text.py +38 -17

tasks/text.py CHANGED Viewed

@@ -32,16 +32,26 @@ class TextClassifier:
             batch_size=16
         )
-    def process_batch(self, batch: List[str]) -> List[int]:
         """
-        Process a batch of texts and return their predictions
         """
         try:
             batch_preds = self.classifier(list(batch))
-            return [self.label2id[pred[0]["label"]] for pred in batch_preds]
         except Exception as e:
-            print(f"Error processing batch: {e}")
-            return []
 @router.post(ROUTE, tags=["Text Task"],
              description=DESCRIPTION)
@@ -122,18 +132,29 @@ async def evaluate_text(request: TextEvaluationRequest):
     with ThreadPoolExecutor(max_workers=max_workers) as executor:
         # Submit all batches for processing
-            futures = [
-                executor.submit(classifier.process_batch, batch)
-                for batch in batches
-            ]
-            # Collect results in order
-            for future in futures:
-                try:
-                    batch_preds = future.result()
-                    predictions.extend(batch_preds)
-                except Exception as e:
-                    print(f"Batch processing failed: {e}")
     #--------------------------------------------------------------------------------------------
     # YOUR MODEL INFERENCE STOPS HERE

             batch_size=16
         )
+    def process_batch(self, batch: List[str], batch_idx: int) -> Tuple[List[int], int]:
         """
+        Process a batch of texts and return their predictions along with batch index
+        Args:
+            batch: List of texts to process
+            batch_idx: Index of the current batch
+        Returns:
+            Tuple containing list of predictions and batch index
         """
         try:
+            print(f"Processing batch {batch_idx} with {len(batch)} items")
             batch_preds = self.classifier(list(batch))
+            predictions = [self.label2id[pred[0]["label"]] for pred in batch_preds]
+            print(f"Completed batch {batch_idx} with {len(predictions)} predictions")
+            return predictions, batch_idx
         except Exception as e:
+            print(f"Error in batch {batch_idx}: {str(e)}")
+            return [], batch_idx
 @router.post(ROUTE, tags=["Text Task"],
              description=DESCRIPTION)
     with ThreadPoolExecutor(max_workers=max_workers) as executor:
         # Submit all batches for processing
+        future_to_batch = {
+            executor.submit(
+                classifier.process_batch,
+                batch,
+                idx
+            ): idx for idx, batch in enumerate(batches)
+        }
+        # Collect results in order
+        for future in future_to_batch:
+            batch_idx = future_to_batch[future]
+            try:
+                predictions, idx = future.result()
+                batch_results[idx] = predictions
+                print(f"Stored results for batch {idx}")
+            except Exception as e:
+                print(f"Failed to get results for batch {batch_idx}: {e}")
+                batch_results[batch_idx] = []
+    # Flatten predictions while maintaining order
+    all_predictions = [pred for batch_preds in batch_results for pred in batch_preds]
+    print(f"Total predictions collected: {len(all_predictions)}")
     #--------------------------------------------------------------------------------------------
     # YOUR MODEL INFERENCE STOPS HERE