Spaces:

camillebrl
/

modernbert_overfit

Sleeping

App Files Files Community

camillebrl commited on Jan 28

Commit

92fa037

verified ·

1 Parent(s): b134905

Update tasks/text.py

Browse files

Files changed (1) hide show

tasks/text.py +7 -21

tasks/text.py CHANGED Viewed

@@ -97,23 +97,8 @@ async def evaluate_text(request: TextEvaluationRequest):
     # Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
     #--------------------------------------------------------------------------------------------
-    # Make random predictions (placeholder for actual model inference)
     true_labels = test_dataset["label"]
-    config = AutoConfig.from_pretrained("camillebrl/ModernBERT-envclaims-overfit")
-    label2id = config.label2id
-    # classifier = pipeline(
-    #         "text-classification",
-    #         "camillebrl/ModernBERT-envclaims-overfit",
-    #         device="cpu"
-    #         )
-    # print("len dataset : ", len(test_dataset["quote"]))
-    # predictions = []
-    # for batch in range(0, len(test_dataset["quote"]), 32):  # Ajustez la taille des batchs
-    #     batch_quotes = test_dataset["quote"][batch:batch + 32]
-    #     batch_predictions = classifier(batch_quotes)
-    #     predictions.extend([label2id[pred["label"]] for pred in batch_predictions])
-    #     print(predictions)
-    # print("final predictions : ", predictions)
     # Initialize the model once
     classifier = TextClassifier()
@@ -126,6 +111,9 @@ async def evaluate_text(request: TextEvaluationRequest):
         for i in range(num_batches)
     ]
     # Process batches in parallel
     max_workers = min(os.cpu_count(), 4)  # Limit to 4 workers or CPU count
     print(f"Processing with {max_workers} workers")
@@ -152,14 +140,12 @@ async def evaluate_text(request: TextEvaluationRequest):
                 batch_results[batch_idx] = []
     # Flatten predictions while maintaining order
-    all_predictions = [pred for batch_preds in batch_results for pred in batch_preds]
-    print(f"Total predictions collected: {len(all_predictions)}")
     #--------------------------------------------------------------------------------------------
     # YOUR MODEL INFERENCE STOPS HERE
     #--------------------------------------------------------------------------------------------
     # Stop tracking emissions
     emissions_data = tracker.stop_task()
@@ -188,4 +174,4 @@ async def evaluate_text(request: TextEvaluationRequest):
     print("results : ", results)
-    return results

     # Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
     #--------------------------------------------------------------------------------------------
     true_labels = test_dataset["label"]
     # Initialize the model once
     classifier = TextClassifier()
         for i in range(num_batches)
     ]
+    # Initialize batch_results before parallel processing
+    batch_results = [[] for _ in range(num_batches)]
     # Process batches in parallel
     max_workers = min(os.cpu_count(), 4)  # Limit to 4 workers or CPU count
     print(f"Processing with {max_workers} workers")
                 batch_results[batch_idx] = []
     # Flatten predictions while maintaining order
+    predictions = [pred for batch_preds in batch_results for pred in batch_preds]
+    print(f"Total predictions collected: {len(predictions)}")
     #--------------------------------------------------------------------------------------------
     # YOUR MODEL INFERENCE STOPS HERE
     #--------------------------------------------------------------------------------------------
     # Stop tracking emissions
     emissions_data = tracker.stop_task()
     print("results : ", results)
+    return results