aweber commited on
Commit
10b182c
·
verified ·
1 Parent(s): 9e5baf4

Initiate audio challenge with 1st model

Browse files
Files changed (1) hide show
  1. tasks/audio.py +26 -30
tasks/audio.py CHANGED
@@ -2,9 +2,9 @@ from fastapi import APIRouter
2
  from datetime import datetime
3
  from datasets import load_dataset
4
  from sklearn.metrics import accuracy_score
5
- import random
6
  import os
7
  import joblib
 
8
 
9
  from .utils.evaluation import AudioEvaluationRequest
10
  from .utils.emissions import tracker, clean_emissions_data, get_space_info
@@ -18,13 +18,11 @@ DESCRIPTION = "Random Forest"
18
  ROUTE = "/audio"
19
 
20
 
21
-
22
- @router.post(ROUTE, tags=["Audio Task"],
23
- description=DESCRIPTION)
24
  async def evaluate_audio(request: AudioEvaluationRequest):
25
  """
26
  Evaluate audio classification for rainforest sound detection.
27
-
28
  Current Model: Random Baseline
29
  - Makes random predictions from the label space (0-1)
30
  - Used as a baseline for comparison
@@ -33,47 +31,45 @@ async def evaluate_audio(request: AudioEvaluationRequest):
33
  username, space_url = get_space_info()
34
 
35
  # Define the label mapping
36
- LABEL_MAPPING = {
37
- "chainsaw": 0,
38
- "environment": 1
39
- }
40
  # Load and prepare the dataset
41
  # Because the dataset is gated, we need to use the HF_TOKEN environment variable to authenticate
42
- dataset = load_dataset(request.dataset_name,token=os.getenv("HF_TOKEN"))
43
-
44
  # Split dataset
45
- train_test = dataset["train"].train_test_split(test_size=request.test_size, seed=request.test_seed)
 
 
46
  test_dataset = train_test["test"]
47
-
48
  # Start tracking emissions
49
  tracker.start()
50
  tracker.start_task("inference")
51
-
52
- #--------------------------------------------------------------------------------------------
53
  # YOUR MODEL INFERENCE CODE HERE
54
  # Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
55
- #--------------------------------------------------------------------------------------------
56
-
57
  # Make random predictions (placeholder for actual model inference)
58
- true_labels = np.array(test_dataset["label"])
59
 
60
  # Extract audio samples from test_dataset
61
  x_test = [sample["audio"]["array"] for sample in test_dataset]
62
-
63
- clf = joblib.load()
64
  predictions = clf.predict(x_test)
65
 
66
-
67
- #--------------------------------------------------------------------------------------------
68
  # YOUR MODEL INFERENCE STOPS HERE
69
- #--------------------------------------------------------------------------------------------
70
-
71
  # Stop tracking emissions
72
  emissions_data = tracker.stop_task()
73
-
74
  # Calculate accuracy
75
  accuracy = accuracy_score(true_labels, predictions)
76
-
77
  # Prepare results dictionary
78
  results = {
79
  "username": username,
@@ -88,8 +84,8 @@ async def evaluate_audio(request: AudioEvaluationRequest):
88
  "dataset_config": {
89
  "dataset_name": request.dataset_name,
90
  "test_size": request.test_size,
91
- "test_seed": request.test_seed
92
- }
93
  }
94
-
95
- return results
 
2
  from datetime import datetime
3
  from datasets import load_dataset
4
  from sklearn.metrics import accuracy_score
 
5
  import os
6
  import joblib
7
+ from pathlib import Path
8
 
9
  from .utils.evaluation import AudioEvaluationRequest
10
  from .utils.emissions import tracker, clean_emissions_data, get_space_info
 
18
  ROUTE = "/audio"
19
 
20
 
21
+ @router.post(ROUTE, tags=["Audio Task"], description=DESCRIPTION)
 
 
22
  async def evaluate_audio(request: AudioEvaluationRequest):
23
  """
24
  Evaluate audio classification for rainforest sound detection.
25
+
26
  Current Model: Random Baseline
27
  - Makes random predictions from the label space (0-1)
28
  - Used as a baseline for comparison
 
31
  username, space_url = get_space_info()
32
 
33
  # Define the label mapping
34
+ LABEL_MAPPING = {"chainsaw": 0, "environment": 1}
 
 
 
35
  # Load and prepare the dataset
36
  # Because the dataset is gated, we need to use the HF_TOKEN environment variable to authenticate
37
+ dataset = load_dataset(request.dataset_name, token=os.getenv("HF_TOKEN"))
38
+
39
  # Split dataset
40
+ train_test = dataset["train"].train_test_split(
41
+ test_size=request.test_size, seed=request.test_seed
42
+ )
43
  test_dataset = train_test["test"]
44
+
45
  # Start tracking emissions
46
  tracker.start()
47
  tracker.start_task("inference")
48
+
49
+ # --------------------------------------------------------------------------------------------
50
  # YOUR MODEL INFERENCE CODE HERE
51
  # Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
52
+ # --------------------------------------------------------------------------------------------
53
+
54
  # Make random predictions (placeholder for actual model inference)
55
+ true_labels = test_dataset["label"]
56
 
57
  # Extract audio samples from test_dataset
58
  x_test = [sample["audio"]["array"] for sample in test_dataset]
59
+
60
+ clf = joblib.load(Path("audio_models") / "RandomForestClassifier_withScaler.pkl")
61
  predictions = clf.predict(x_test)
62
 
63
+ # --------------------------------------------------------------------------------------------
 
64
  # YOUR MODEL INFERENCE STOPS HERE
65
+ # --------------------------------------------------------------------------------------------
66
+
67
  # Stop tracking emissions
68
  emissions_data = tracker.stop_task()
69
+
70
  # Calculate accuracy
71
  accuracy = accuracy_score(true_labels, predictions)
72
+
73
  # Prepare results dictionary
74
  results = {
75
  "username": username,
 
84
  "dataset_config": {
85
  "dataset_name": request.dataset_name,
86
  "test_size": request.test_size,
87
+ "test_seed": request.test_seed,
88
+ },
89
  }
90
+
91
+ return results