Sayoyo commited on
Commit
de3773c
·
2 Parent(s): 851cdce 1005670

Merge branch 'main' of hf.co:spaces/ACE-Step/ACE-Step

Browse files
Files changed (2) hide show
  1. pipeline_ace_step.py +5 -4
  2. ui/components.py +5 -0
pipeline_ace_step.py CHANGED
@@ -24,6 +24,7 @@ from models.ace_step_transformer import ACEStepTransformer2DModel
24
  from models.lyrics_utils.lyric_tokenizer import VoiceBpeTokenizer
25
  from apg_guidance import apg_forward, MomentumBuffer, cfg_forward, cfg_zero_star, cfg_double_condition_forward
26
  import torchaudio
 
27
 
28
 
29
  torch.backends.cudnn.benchmark = False
@@ -917,7 +918,7 @@ class ACEStepPipeline:
917
  target_latents = torch.cat([to_right_pad_gt_latents, target_latents], dim=0)
918
  return target_latents
919
 
920
- def latents2audio(self, latents, target_wav_duration_second=30, sample_rate=48000, save_path=None, format="flac"):
921
  output_audio_paths = []
922
  bs = latents.shape[0]
923
  audio_lengths = [target_wav_duration_second * sample_rate] * bs
@@ -930,7 +931,7 @@ class ACEStepPipeline:
930
  output_audio_paths.append(output_audio_path)
931
  return output_audio_paths
932
 
933
- def save_wav_file(self, target_wav, idx, save_path=None, sample_rate=48000, format="flac"):
934
  if save_path is None:
935
  logger.warning("save_path is None, using default path ./outputs/")
936
  base_path = f"./outputs"
@@ -941,7 +942,7 @@ class ACEStepPipeline:
941
 
942
  output_path_flac = f"{base_path}/output_{time.strftime('%Y%m%d%H%M%S')}_{idx}.{format}"
943
  target_wav = target_wav.float()
944
- torchaudio.save(output_path_flac, target_wav, sample_rate=sample_rate, format=format)
945
  return output_path_flac
946
 
947
  def infer_latents(self, input_audio_path):
@@ -986,7 +987,7 @@ class ACEStepPipeline:
986
  edit_n_max: float = 1.0,
987
  edit_n_avg: int = 1,
988
  save_path: str = None,
989
- format: str = "flac",
990
  batch_size: int = 1,
991
  debug: bool = False,
992
  ):
 
24
  from models.lyrics_utils.lyric_tokenizer import VoiceBpeTokenizer
25
  from apg_guidance import apg_forward, MomentumBuffer, cfg_forward, cfg_zero_star, cfg_double_condition_forward
26
  import torchaudio
27
+ import torio
28
 
29
 
30
  torch.backends.cudnn.benchmark = False
 
918
  target_latents = torch.cat([to_right_pad_gt_latents, target_latents], dim=0)
919
  return target_latents
920
 
921
+ def latents2audio(self, latents, target_wav_duration_second=30, sample_rate=48000, save_path=None, format="mp3"):
922
  output_audio_paths = []
923
  bs = latents.shape[0]
924
  audio_lengths = [target_wav_duration_second * sample_rate] * bs
 
931
  output_audio_paths.append(output_audio_path)
932
  return output_audio_paths
933
 
934
+ def save_wav_file(self, target_wav, idx, save_path=None, sample_rate=48000, format="mp3"):
935
  if save_path is None:
936
  logger.warning("save_path is None, using default path ./outputs/")
937
  base_path = f"./outputs"
 
942
 
943
  output_path_flac = f"{base_path}/output_{time.strftime('%Y%m%d%H%M%S')}_{idx}.{format}"
944
  target_wav = target_wav.float()
945
+ torchaudio.save(output_path_flac, target_wav, sample_rate=sample_rate, format=format, compression=torio.io.CodecConfig(bit_rate=320000))
946
  return output_path_flac
947
 
948
  def infer_latents(self, input_audio_path):
 
987
  edit_n_max: float = 1.0,
988
  edit_n_avg: int = 1,
989
  save_path: str = None,
990
+ format: str = "mp3",
991
  batch_size: int = 1,
992
  debug: bool = False,
993
  ):
ui/components.py CHANGED
@@ -594,6 +594,11 @@ def create_main_demo_ui(
594
  gr.Markdown(
595
  """
596
  <h1 style="text-align: center;">ACE-Step: A Step Towards Music Generation Foundation Model</h1>
 
 
 
 
 
597
  """)
598
 
599
  with gr.Tab("text2music"):
 
594
  gr.Markdown(
595
  """
596
  <h1 style="text-align: center;">ACE-Step: A Step Towards Music Generation Foundation Model</h1>
597
+ <p>
598
+ <a href="https://ace-step.github.io/">Project</a> |
599
+ <a href="https://huggingface.co/ACE-Step/ACE-Step-v1-3.5B">Checkpoints</a> |
600
+ <a href="https://discord.gg/rjAZz2xBdG">Discord</a>
601
+ </p>
602
  """)
603
 
604
  with gr.Tab("text2music"):