cocktailpeanut commited on
Commit
de3c23d
Β·
1 Parent(s): 38449a6
diffrhythm/infer/infer_utils.py CHANGED
@@ -19,7 +19,8 @@ def prepare_model(device):
19
  model_config = json.load(f)
20
  dit_model_cls = DiT
21
  cfm = CFM(
22
- transformer=dit_model_cls(**model_config["model"], use_style_prompt=True, max_pos=6144),
 
23
  num_channels=model_config["model"]['mel_dim'],
24
  use_style_prompt=True
25
  )
 
19
  model_config = json.load(f)
20
  dit_model_cls = DiT
21
  cfm = CFM(
22
+ #transformer=dit_model_cls(**model_config["model"], use_style_prompt=True, max_pos=6144),
23
+ transformer=dit_model_cls(**model_config["model"], use_style_prompt=True, max_pos=2048),
24
  num_channels=model_config["model"]['mel_dim'],
25
  use_style_prompt=True
26
  )
diffrhythm/model/cfm.py CHANGED
@@ -111,8 +111,8 @@ class CFM(nn.Module):
111
  cfg_strength=4.0,
112
  sway_sampling_coef=None,
113
  seed: int | None = None,
114
- #max_duration=4096,
115
- max_duration=6144,
116
  vocoder: Callable[[float["b d n"]], float["b nw"]] | None = None, # noqa: F722
117
  no_ref_audio=False,
118
  duplicate_test=False,
 
111
  cfg_strength=4.0,
112
  sway_sampling_coef=None,
113
  seed: int | None = None,
114
+ max_duration=4096,
115
+ #max_duration=6144,
116
  vocoder: Callable[[float["b d n"]], float["b nw"]] | None = None, # noqa: F722
117
  no_ref_audio=False,
118
  duplicate_test=False,