submission-template

Sleeping

App Files Files Community

vlaurent17 commited on Jan 29

Commit

0ff353f

verified ·

1 Parent(s): 3c1b5c3

Upload 3 files

Browse files

Files changed (3) hide show

H.npy +3 -0
audio.py +130 -0
model.joblib +3 -0

H.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e5b7bf9ff7240c43f532650b661e94d16f6da2d0be2c6c583a5b6cc0da226b87
+size 34688

audio.py ADDED Viewed

	@@ -0,0 +1,130 @@

+from fastapi import APIRouter
+from datetime import datetime
+from datasets import load_dataset
+from sklearn.metrics import accuracy_score
+import random
+import os
+import numpy as np
+import librosa
+import joblib
+from .utils.evaluation import AudioEvaluationRequest
+from .utils.emissions import tracker, clean_emissions_data, get_space_info
+from dotenv import load_dotenv
+load_dotenv()
+router = APIRouter()
+DESCRIPTION = "Random Baseline"
+ROUTE = "/audio"
+def create_spec(dataset, target_sampling_rate=3000):
+    spectograms = []
+    audio_length = int(36000/(12000/target_sampling_rate))
+    for d in dataset:
+        audio_sample = librosa.resample(
+            d["audio"]["array"],
+            orig_sr= d["audio"]["sampling_rate"],
+            target_sr=target_sampling_rate
+        )
+        if len(audio_sample) == 0:
+            continue
+        if len(audio_sample) < audio_length:
+            padding_needed = audio_length - len(audio_sample)
+            repeats = (padding_needed // len(audio_sample)) + 1
+            audio_sample = np.concatenate([audio_sample] + [audio_sample[:padding_needed]] * repeats)[:audio_length]
+        elif len(audio_sample) > audio_length:
+            audio_sample = audio_sample[:audio_length]
+        rms = np.sqrt(np.mean(np.square(audio_sample)))
+        scalar = 10 ** (-20 / 20) / (rms + 1e-8)
+        mel = librosa.feature.melspectrogram(
+            y=audio_sample*scalar,
+            sr=12000,
+            n_fft=2048,
+            hop_length=1024,
+            n_mels=12,
+            power=2.0,
+        )
+        mel_db = librosa.power_to_db(mel, ref=np.max)
+        mel_db_normalized = (mel_db - mel_db.mean()) / (mel_db.std() + 1e-8)
+        spectograms.append(mel_db_normalized.T.flatten())
+    return np.stack(spectograms)
+@router.post(ROUTE, tags=["Audio Task"],
+             description=DESCRIPTION)
+async def evaluate_audio(request: AudioEvaluationRequest):
+    """
+    Evaluate audio classification for rainforest sound detection.
+    Current Model: Random Baseline
+    - Makes random predictions from the label space (0-1)
+    - Used as a baseline for comparison
+    """
+    # Get space info
+    username, space_url = get_space_info()
+    # Define the label mapping
+    LABEL_MAPPING = {
+        "chainsaw": 0,
+        "environment": 1
+    }
+    # Load and prepare the dataset
+    # Because the dataset is gated, we need to use the HF_TOKEN environment variable to authenticate
+    dataset = load_dataset(request.dataset_name,token=os.getenv("HF_TOKEN"))
+    # Split dataset
+    train_test = dataset["train"].train_test_split(test_size=request.test_size, seed=request.test_seed)
+    test_dataset = train_test["test"]
+    # Start tracking emissions
+    tracker.start()
+    tracker.start_task("inference")
+    test_spec = create_spec(test_dataset)
+    H = np.load("H.npy")
+    W_test = np.dot(test_spec, H)
+    model = joblib.load('model.joblib')
+    #--------------------------------------------------------------------------------------------
+    # YOUR MODEL INFERENCE CODE HERE
+    # Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
+    #--------------------------------------------------------------------------------------------
+    # Make random predictions (placeholder for actual model inference)
+    true_labels = test_dataset["label"]
+    predictions = model.predict(W_test)
+    #--------------------------------------------------------------------------------------------
+    # YOUR MODEL INFERENCE STOPS HERE
+    #--------------------------------------------------------------------------------------------
+    # Stop tracking emissions
+    emissions_data = tracker.stop_task()
+    # Calculate accuracy
+    accuracy = accuracy_score(true_labels, predictions)
+    # Prepare results dictionary
+    results = {
+        "username": username,
+        "space_url": space_url,
+        "submission_timestamp": datetime.now().isoformat(),
+        "model_description": DESCRIPTION,
+        "accuracy": float(accuracy),
+        "energy_consumed_wh": emissions_data.energy_consumed * 1000,
+        "emissions_gco2eq": emissions_data.emissions * 1000,
+        "emissions_data": clean_emissions_data(emissions_data),
+        "api_route": ROUTE,
+        "dataset_config": {
+            "dataset_name": request.dataset_name,
+            "test_size": request.test_size,
+            "test_seed": request.test_seed
+        }
+    }
+    return results

model.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:33688213987985901a75886484a540a423d9e0d5967fd4a49a79c74aadb8697a
+size 1350138