submission-template-frugal-ai-challenge

Sleeping

App Files Files Community

aweber commited on Jan 13

Commit

10b182c

verified ·

1 Parent(s): 9e5baf4

Initiate audio challenge with 1st model

Browse files

Files changed (1) hide show

tasks/audio.py +26 -30

tasks/audio.py CHANGED Viewed

@@ -2,9 +2,9 @@ from fastapi import APIRouter
 from datetime import datetime
 from datasets import load_dataset
 from sklearn.metrics import accuracy_score
-import random
 import os
 import joblib
 from .utils.evaluation import AudioEvaluationRequest
 from .utils.emissions import tracker, clean_emissions_data, get_space_info
@@ -18,13 +18,11 @@ DESCRIPTION = "Random Forest"
 ROUTE = "/audio"
-@router.post(ROUTE, tags=["Audio Task"],
-             description=DESCRIPTION)
 async def evaluate_audio(request: AudioEvaluationRequest):
     """
     Evaluate audio classification for rainforest sound detection.
     Current Model: Random Baseline
     - Makes random predictions from the label space (0-1)
     - Used as a baseline for comparison
@@ -33,47 +31,45 @@ async def evaluate_audio(request: AudioEvaluationRequest):
     username, space_url = get_space_info()
     # Define the label mapping
-    LABEL_MAPPING = {
-        "chainsaw": 0,
-        "environment": 1
-    }
     # Load and prepare the dataset
     # Because the dataset is gated, we need to use the HF_TOKEN environment variable to authenticate
-    dataset = load_dataset(request.dataset_name,token=os.getenv("HF_TOKEN"))
     # Split dataset
-    train_test = dataset["train"].train_test_split(test_size=request.test_size, seed=request.test_seed)
     test_dataset = train_test["test"]
     # Start tracking emissions
     tracker.start()
     tracker.start_task("inference")
-    #--------------------------------------------------------------------------------------------
     # YOUR MODEL INFERENCE CODE HERE
     # Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
-    #--------------------------------------------------------------------------------------------
     # Make random predictions (placeholder for actual model inference)
-    true_labels = np.array(test_dataset["label"])
     # Extract audio samples from test_dataset
     x_test = [sample["audio"]["array"] for sample in test_dataset]
-    clf = joblib.load()
     predictions = clf.predict(x_test)
-    #--------------------------------------------------------------------------------------------
     # YOUR MODEL INFERENCE STOPS HERE
-    #--------------------------------------------------------------------------------------------
     # Stop tracking emissions
     emissions_data = tracker.stop_task()
     # Calculate accuracy
     accuracy = accuracy_score(true_labels, predictions)
     # Prepare results dictionary
     results = {
         "username": username,
@@ -88,8 +84,8 @@ async def evaluate_audio(request: AudioEvaluationRequest):
         "dataset_config": {
             "dataset_name": request.dataset_name,
             "test_size": request.test_size,
-            "test_seed": request.test_seed
-        }
     }
-    return results

 from datetime import datetime
 from datasets import load_dataset
 from sklearn.metrics import accuracy_score
 import os
 import joblib
+from pathlib import Path
 from .utils.evaluation import AudioEvaluationRequest
 from .utils.emissions import tracker, clean_emissions_data, get_space_info
 ROUTE = "/audio"
+@router.post(ROUTE, tags=["Audio Task"], description=DESCRIPTION)
 async def evaluate_audio(request: AudioEvaluationRequest):
     """
     Evaluate audio classification for rainforest sound detection.
     Current Model: Random Baseline
     - Makes random predictions from the label space (0-1)
     - Used as a baseline for comparison
     username, space_url = get_space_info()
     # Define the label mapping
+    LABEL_MAPPING = {"chainsaw": 0, "environment": 1}
     # Load and prepare the dataset
     # Because the dataset is gated, we need to use the HF_TOKEN environment variable to authenticate
+    dataset = load_dataset(request.dataset_name, token=os.getenv("HF_TOKEN"))
     # Split dataset
+    train_test = dataset["train"].train_test_split(
+        test_size=request.test_size, seed=request.test_seed
+    )
     test_dataset = train_test["test"]
     # Start tracking emissions
     tracker.start()
     tracker.start_task("inference")
+    # --------------------------------------------------------------------------------------------
     # YOUR MODEL INFERENCE CODE HERE
     # Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
+    # --------------------------------------------------------------------------------------------
     # Make random predictions (placeholder for actual model inference)
+    true_labels = test_dataset["label"]
     # Extract audio samples from test_dataset
     x_test = [sample["audio"]["array"] for sample in test_dataset]
+    clf = joblib.load(Path("audio_models") / "RandomForestClassifier_withScaler.pkl")
     predictions = clf.predict(x_test)
+    # --------------------------------------------------------------------------------------------
     # YOUR MODEL INFERENCE STOPS HERE
+    # --------------------------------------------------------------------------------------------
     # Stop tracking emissions
     emissions_data = tracker.stop_task()
     # Calculate accuracy
     accuracy = accuracy_score(true_labels, predictions)
     # Prepare results dictionary
     results = {
         "username": username,
         "dataset_config": {
             "dataset_name": request.dataset_name,
             "test_size": request.test_size,
+            "test_seed": request.test_seed,
+        },
     }
+    return results