File size: 725 Bytes
5325fcc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 |
# @package __global__
# Setup for execute only on audiocaps for audio generation
# evaluation with objective metrics
# execute_only=evaluate
dataset:
max_audio_duration: null
# ensure the proper values are broadcasted here for evaluate
evaluate:
min_audio_duration: 1. # some metrics requires a minimum audio length
max_audio_duration: null # all samples from audiocaps should be ~10s
num_samples: null
segment_duration: null
generate:
min_audio_duration: 1.
max_audio_duration: null
num_samples: 500
evaluate:
metrics:
fad: true
kld: true
text_consistency: true
metrics:
kld:
passt:
pretrained_length: 10 # similarly to reported results in AudioGen paper
|