submission-template

Sleeping

App Files Files

CindyDelage commited on Jan 28

Commit

e9b633a

verified ·

1 Parent(s): abbb8ef

Upload audio.py

Browse files

Files changed (1) hide show

tasks/audio.py +118 -0

tasks/audio.py ADDED Viewed

	@@ -0,0 +1,118 @@

+from fastapi import APIRouter
+from datetime import datetime
+from datasets import load_dataset
+from sklearn.metrics import accuracy_score
+import random
+import os
+from .utils.evaluation import AudioEvaluationRequest
+from .utils.emissions import tracker, clean_emissions_data, get_space_info
+from dotenv import load_dotenv
+load_dotenv()
+router = APIRouter()
+DESCRIPTION = "Random Baseline"
+ROUTE = "/audio"
+@router.post(ROUTE, tags=["Audio Task"],
+             description=DESCRIPTION)
+async def evaluate_audio(request: AudioEvaluationRequest):
+    """
+    Evaluate audio classification for rainforest sound detection.
+    Current Model: Random Baseline
+    - Makes random predictions from the label space (0-1)
+    - Used as a baseline for comparison
+    """
+    # Get space info
+    username, space_url = get_space_info()
+    # Define the label mapping
+    LABEL_MAPPING = {
+        "chainsaw": 0,
+        "environment": 1
+    }
+    # Load and prepare the dataset
+    # Because the dataset is gated, we need to use the HF_TOKEN environment variable to authenticate
+    dataset = load_dataset(request.dataset_name,token=dataset_name,token=os.getenv("HF_TOKEN"))
+    # Split dataset
+    train_test = dataset["train"]
+    test_dataset = dataset["test"]
+    # Start tracking emissions
+    tracker.start()
+    tracker.start_task("inference")
+    #--------------------------------------------------------------------------------------------
+    # YOUR MODEL INFERENCE CODE HERE
+    # Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
+    #--------------------------------------------------------------------------------------------
+    # Make random predictions (placeholder for actual model inference)
+    true_labels = test_dataset["label"]
+    import torch
+    from transformers import pipeline
+    from sklearn import preprocessing
+    #encoded_data_fine_tuned_model = train_test["train"].map(preprocess_function, remove_columns="audio", batched=True)
+    from datasets import Dataset
+    # Utilisation du pipeline directement sur le dataset
+    classifier = pipeline("audio-classification", model="CindyDelage/Challenge_HuggingFace_DFG_FrugalAI", feature_extractor=feature_extractor)
+    # Correctly access the audio data
+    audio_data = [example["array"] for example in dataset["test"]["audio"]]
+    # Prédiction sur tout le dataset
+    results = classifier(audio_data, batch_size=8)
+    predictions = []
+    for result in results:
+        # Check if result is a dictionary
+        if isinstance(result, dict):
+            # Get the label with the highest score
+            predicted_label = result['label']
+        else:
+            # If result is not a dictionary, access it as a list
+            predicted_label = result[0]['label']  # Assuming the dictionary is the first element
+        # Assign 1 for "environment", 0 for "chainsaw"
+        if predicted_label == 'environment':
+            predictions.append(1)
+        else:
+            predictions.append(0)
+    #--------------------------------------------------------------------------------------------
+    # YOUR MODEL INFERENCE STOPS HERE
+    #--------------------------------------------------------------------------------------------
+    # Stop tracking emissions
+    emissions_data = tracker.stop_task()
+    # Calculate accuracy
+    accuracy = accuracy_score(true_labels, predictions)
+    # Prepare results dictionary
+    results = {
+        "username": username,
+        "space_url": space_url,
+        "submission_timestamp": datetime.now().isoformat(),
+        "model_description": DESCRIPTION,
+        "accuracy": float(accuracy),
+        "energy_consumed_wh": emissions_data.energy_consumed * 1000,
+        "emissions_gco2eq": emissions_data.emissions * 1000,
+        "emissions_data": clean_emissions_data(emissions_data),
+        "api_route": ROUTE,
+        "dataset_config": {
+            "dataset_name": request.dataset_name,
+            "test_size": request.test_size,
+            "test_seed": request.test_seed
+        }
+    }
+    return results