Spaces:
Running
Running
File size: 1,088 Bytes
99bbd30 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
import logging
import os
from collections import defaultdict
from pathlib import Path
from typing import Union
import pandas as pd
import torch
from torch.utils.data.dataset import Dataset
log = logging.getLogger()
class AudioCapsData(Dataset):
def __init__(self, audio_path: Union[str, Path], csv_path: Union[str, Path]):
df = pd.read_csv(csv_path).to_dict(orient='records')
audio_files = sorted(os.listdir(audio_path))
audio_files = set(
[Path(f).stem for f in audio_files if f.endswith('.wav') or f.endswith('.flac')])
self.data = []
for row in df:
self.data.append({
'name': row['name'],
'caption': row['caption'],
})
self.audio_path = Path(audio_path)
self.csv_path = Path(csv_path)
log.info(f'Found {len(self.data)} matching audio files in {self.audio_path}')
def __getitem__(self, idx: int) -> torch.Tensor:
return self.data[idx]
def __len__(self):
return len(self.data)
|