CSH-1220 commited on
Commit
075c9a6
Β·
1 Parent(s): 4a1c63d

Update requirement

Browse files
Files changed (2) hide show
  1. app.py +29 -23
  2. requirements.txt +1 -0
app.py CHANGED
@@ -2,19 +2,24 @@ import os
2
  import gradio as gr
3
  import torchaudio
4
  import torch
 
5
  from pipeline.morph_pipeline_successed_ver1 import AudioLDM2MorphPipeline
6
 
 
7
  pipeline = AudioLDM2MorphPipeline.from_pretrained("cvssp/audioldm2-large", torch_dtype=torch.float32)
8
  pipeline.to("cuda")
9
 
 
10
  def morph_audio(audio_file1, audio_file2, prompt1, prompt2, negative_prompt1="Low quality", negative_prompt2="Low quality"):
11
  save_lora_dir = "output"
12
  os.makedirs(save_lora_dir, exist_ok=True)
13
 
 
14
  waveform, sample_rate = torchaudio.load(audio_file1)
15
  duration = waveform.shape[1] / sample_rate
16
  duration = int(duration)
17
 
 
18
  _ = pipeline(
19
  audio_file=audio_file1,
20
  audio_file2=audio_file2,
@@ -27,7 +32,7 @@ def morph_audio(audio_file1, audio_file2, prompt1, prompt2, negative_prompt1="Lo
27
  negative_prompt_2=negative_prompt2,
28
  save_lora_dir=save_lora_dir,
29
  use_adain=True,
30
- use_reschedule=True,
31
  num_inference_steps=50,
32
  lamd=0.6,
33
  output_path=save_lora_dir,
@@ -41,32 +46,33 @@ def morph_audio(audio_file1, audio_file2, prompt1, prompt2, negative_prompt1="Lo
41
  guidance_scale=7.5,
42
  )
43
 
 
44
  output_paths = [os.path.join(save_lora_dir, file) for file in os.listdir(save_lora_dir) if file.endswith(".wav")]
45
  return output_paths
46
 
 
47
  def interface(audio1, audio2, prompt1, prompt2):
48
  output_paths = morph_audio(audio1, audio2, prompt1, prompt2)
49
- return output_paths
50
 
51
- # Gradio UI
52
- with gr.Blocks() as demo:
53
- gr.Markdown("### Audio Morphing Demo with AudioLDM2")
54
-
55
- with gr.Row():
56
- audio_file1 = gr.Audio(label="Upload Audio File 1", type="filepath")
57
- audio_file2 = gr.Audio(label="Upload Audio File 2", type="filepath")
58
-
59
- with gr.Row():
60
- prompt1 = gr.Textbox(label="Prompt for Audio File 1")
61
- prompt2 = gr.Textbox(label="Prompt for Audio File 2")
62
-
63
- output_audios = gr.Audio(label="Generated Morphing Audios", type="filepath", interactive=False)
64
- morph_button = gr.Button("Generate Morphing Audio")
65
-
66
- morph_button.click(
67
- interface,
68
- inputs=[audio_file1, audio_file2, prompt1, prompt2],
69
- outputs=[output_audios]
70
- )
71
 
72
- demo.launch()
 
 
2
  import gradio as gr
3
  import torchaudio
4
  import torch
5
+ import numpy as np
6
  from pipeline.morph_pipeline_successed_ver1 import AudioLDM2MorphPipeline
7
 
8
+ # Initialize AudioLDM2 Pipeline
9
  pipeline = AudioLDM2MorphPipeline.from_pretrained("cvssp/audioldm2-large", torch_dtype=torch.float32)
10
  pipeline.to("cuda")
11
 
12
+ # Audio morphing function
13
  def morph_audio(audio_file1, audio_file2, prompt1, prompt2, negative_prompt1="Low quality", negative_prompt2="Low quality"):
14
  save_lora_dir = "output"
15
  os.makedirs(save_lora_dir, exist_ok=True)
16
 
17
+ # Load audio and compute duration
18
  waveform, sample_rate = torchaudio.load(audio_file1)
19
  duration = waveform.shape[1] / sample_rate
20
  duration = int(duration)
21
 
22
+ # Perform morphing using the pipeline
23
  _ = pipeline(
24
  audio_file=audio_file1,
25
  audio_file2=audio_file2,
 
32
  negative_prompt_2=negative_prompt2,
33
  save_lora_dir=save_lora_dir,
34
  use_adain=True,
35
+ use_reschedule=False,
36
  num_inference_steps=50,
37
  lamd=0.6,
38
  output_path=save_lora_dir,
 
46
  guidance_scale=7.5,
47
  )
48
 
49
+ # Collect the output file paths
50
  output_paths = [os.path.join(save_lora_dir, file) for file in os.listdir(save_lora_dir) if file.endswith(".wav")]
51
  return output_paths
52
 
53
+ # Gradio interface function
54
  def interface(audio1, audio2, prompt1, prompt2):
55
  output_paths = morph_audio(audio1, audio2, prompt1, prompt2)
56
+ return output_paths
57
 
58
+ # Gradio Interface
59
+ demo = gr.Interface(
60
+ fn=interface,
61
+ inputs=[
62
+ gr.Audio(label="Upload Audio File 1", type="filepath"),
63
+ gr.Audio(label="Upload Audio File 2", type="filepath"),
64
+ # gr.Slider(4, 6, step=1, label="Octave 1"),
65
+ gr.Textbox(label="Prompt for Audio File 1"),
66
+ gr.Textbox(label="Prompt for Audio File 2")
67
+ ],
68
+ outputs=[
69
+ gr.Audio(label="Generated Tone 1"),
70
+ gr.Audio(label="Generated Tone 2"),
71
+ gr.Audio(label="Generated Tone 3"),
72
+ gr.Audio(label="Generated Tone 4"),
73
+ gr.Audio(label="Generated Tone 5"),
74
+ ],
75
+ )
 
 
76
 
77
+ if __name__ == "__main__":
78
+ demo.launch()
requirements.txt CHANGED
@@ -76,3 +76,4 @@ uvicorn==0.32.1
76
  wavaugment==0.2
77
  websockets==12.0
78
  zstandard==0.23.0
 
 
76
  wavaugment==0.2
77
  websockets==12.0
78
  zstandard==0.23.0
79
+ timm