File size: 1,195 Bytes
557fb53
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3a0f0a5
557fb53
28d5117
 
557fb53
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3a0f0a5
 
 
 
 
557fb53
3a0f0a5
 
 
557fb53
3a0f0a5
28d5117
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
training_fn: audio_spectrogram_transformer.train_lightning_ast
device: mps
seed: 42
dance_ids: &dance_ids
  - BCH
  - CHA
  - JIV
  - ECS
  - QST
  - RMB
  - SFT
  - SLS
  - SMB
  - SWZ
  - TGO
  - VWZ
  - WCS

data_module:
  batch_size: 64
  num_workers: 7 # Reduced to avoid over opening files
  # data_subset: 0.001
  test_proportion: 0.2

datasets:
  preprocessing.dataset.BestBallroomDataset:
    audio_dir: data/ballroom-songs
    class_list: *dance_ids
    audio_window_jitter: 0.7

  preprocessing.dataset.Music4DanceDataset:
    song_data_path: data/songs_cleaned.csv
    song_audio_path: data/samples # data/samples
    class_list: *dance_ids
    multi_label: False
    min_votes: 1
    audio_window_jitter: 0.7

model:
  n_channels: 128

feature_extractor:
  mask_count: 0 # Don't mask the data
  snr_mean: 15.0 # Pretty much eliminate the noise
  freq_mask_size: 10
  time_mask_size: 80

trainer:
  log_every_n_steps: 15
  accelerator: gpu
  max_epochs: 50
  min_epochs: 7
  fast_dev_run: False
  # gradient_clip_val: 0.5
  # overfit_batches: 1

training_environment:
  learning_rate: 0.000053
  # loggers:
  #   models.training_environment.SpectrogramLogger:
  #     frequency: 100