# example

In [1]:
MODEL_NAME = "gaepago-20"
DATASET_NAME = "Gae8J/modeling_v1"

## load dataset (test data)

In [2]:
from datasets import load_dataset, Audio

dataset = load_dataset(DATASET_NAME)
dataset = dataset.cast_column("audio", Audio(sampling_rate=16000))
test_data = dataset['test']
sampling_rate = test_data.features["audio"].sampling_rate

Found cached dataset parquet (/home/jovyan/.cache/huggingface/datasets/Gae8J___parquet/Gae8J--modeling_v1-b480c78c61a26816/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)


  0%|          | 0/3 [00:00<?, ?it/s]

## run

In [5]:
from transformers import AutoModelForAudioClassification
from transformers import AutoFeatureExtractor
import torch

model = AutoModelForAudioClassification.from_pretrained(MODEL_NAME)
feature_extractor = AutoFeatureExtractor.from_pretrained(MODEL_NAME)

preds = []
gts = []
for i in range(len(test_data)):
    inputs = feature_extractor(test_data[i]["audio"]["array"], sampling_rate=sampling_rate, return_tensors="pt")
    with torch.no_grad():
        logits = model(**inputs).logits
    predicted_class_ids = torch.argmax(logits).item()
    predicted_label = model.config.id2label[predicted_class_ids]
    preds.append(predicted_label)
    gts.append(model.config.id2label[test_data[i]['label']])

## performance

In [6]:
from sklearn.metrics import classification_report
test_performance = classification_report(gts, preds)
print(test_performance)

              precision    recall  f1-score   support

        bark       0.56      0.62      0.59         8
    growling       1.00      0.83      0.91         6
        howl       0.75      0.86      0.80         7
     panting       1.00      0.80      0.89        10
     whimper       0.38      0.43      0.40         7

    accuracy                           0.71        38
   macro avg       0.74      0.71      0.72        38
weighted avg       0.75      0.71      0.72        38



## load dataset (validation data)

In [7]:
from datasets import load_dataset, Audio

dataset = load_dataset(DATASET_NAME)
dataset = dataset.cast_column("audio", Audio(sampling_rate=16000))
test_data = dataset['validation']
sampling_rate = test_data.features["audio"].sampling_rate



  0%|          | 0/3 [00:00<?, ?it/s]

## run

In [9]:
from transformers import AutoModelForAudioClassification
import torch

model = AutoModelForAudioClassification.from_pretrained(MODEL_NAME)
feature_extractor = AutoFeatureExtractor.from_pretrained(MODEL_NAME)

preds = []
gts = []
for i in range(len(test_data)):
    inputs = feature_extractor(test_data[i]["audio"]["array"], sampling_rate=sampling_rate, return_tensors="pt")
    with torch.no_grad():
        logits = model(**inputs).logits
    predicted_class_ids = torch.argmax(logits).item()
    predicted_label = model.config.id2label[predicted_class_ids]
    preds.append(predicted_label)
    gts.append(model.config.id2label[test_data[i]['label']])

## performance

In [10]:
from sklearn.metrics import classification_report
valid_performance = classification_report(gts, preds)
print(valid_performance)

              precision    recall  f1-score   support

        bark       0.75      0.67      0.71         9
    growling       1.00      0.71      0.83         7
        howl       0.86      0.86      0.86         7
     panting       1.00      0.70      0.82        10
     whimper       0.54      1.00      0.70         7

    accuracy                           0.78        40
   macro avg       0.83      0.79      0.78        40
weighted avg       0.84      0.78      0.78        40

