|
|
|
|
|
classifier_free_guidance: |
|
training_dropout: 0.2 |
|
inference_coef: 3.0 |
|
|
|
attribute_dropout: |
|
args: |
|
active_on_eval: false |
|
text: {} |
|
wav: |
|
self_wav: 0.5 |
|
|
|
fuser: |
|
cross_attention_pos_emb: false |
|
cross_attention_pos_emb_scale: 1 |
|
sum: [] |
|
prepend: [self_wav, description] |
|
cross: [] |
|
input_interpolate: [] |
|
|
|
conditioners: |
|
self_wav: |
|
model: chroma_stem |
|
chroma_stem: |
|
sample_rate: ${sample_rate} |
|
n_chroma: 12 |
|
radix2_exp: 14 |
|
argmax: true |
|
match_len_on_eval: false |
|
eval_wavs: null |
|
n_eval_wavs: 100 |
|
cache_path: null |
|
description: |
|
model: t5 |
|
t5: |
|
name: t5-base |
|
finetune: false |
|
word_dropout: 0.2 |
|
normalize_text: false |
|
|
|
dataset: |
|
train: |
|
merge_text_p: 0.25 |
|
drop_desc_p: 0.5 |
|
drop_other_p: 0.5 |
|
|