Files changed (3) hide show
  1. README.md +1 -1
  2. app.py +3 -17
  3. requirements.txt +1 -1
README.md CHANGED
@@ -4,7 +4,7 @@ emoji: πŸ‘‚
4
  colorFrom: green
5
  colorTo: pink
6
  sdk: gradio
7
- sdk_version: 5.33.0
8
  app_file: app.py
9
  pinned: false
10
  short_description: Generates audio environment from an image
 
4
  colorFrom: green
5
  colorTo: pink
6
  sdk: gradio
7
+ sdk_version: 5.0.1
8
  app_file: app.py
9
  pinned: false
10
  short_description: Generates audio environment from an image
app.py CHANGED
@@ -184,20 +184,6 @@ def get_ezaudio(prompt):
184
  raise gr.Error("EzAudio space API is not ready, please try again in few minutes ")
185
 
186
  def infer(image_in, chosen_model):
187
- """
188
- Generate an audio clip (sound effect) from an input image using the selected generative model.
189
-
190
- This function first generates a caption from the provided image using a vision-language model.
191
- The caption is then used as a text prompt for various audio generation models.
192
-
193
- Args:
194
- image_in (str): File path to the input image. The image will be processed to generate a descriptive caption.
195
- chosen_model (str): The name of the audio generation model to use. Supported options include: "AudioLDM-2", "Tango", "Stable Audio Open".
196
-
197
- Returns:
198
- str | dict: The path or result object of the generated audio clip, depending on the model used.
199
-
200
- """
201
  caption = get_caption_from_kosmos(image_in)
202
  if chosen_model == "MAGNet" :
203
  magnet_result = get_magnet(caption)
@@ -247,9 +233,9 @@ with gr.Blocks(css=css) as demo:
247
  "AudioLDM-2",
248
  #"AudioGen",
249
  "Tango",
250
- #"Tango 2",
251
  "Stable Audio Open",
252
- #"EzAudio"
253
  ], value="AudioLDM-2")
254
  submit_btn = gr.Button("Submit")
255
  with gr.Column():
@@ -266,4 +252,4 @@ with gr.Blocks(css=css) as demo:
266
  outputs=[audio_o],
267
  )
268
 
269
- demo.queue(max_size=10).launch(debug=True, show_error=True, ssr_mode=False, mcp_server=True)
 
184
  raise gr.Error("EzAudio space API is not ready, please try again in few minutes ")
185
 
186
  def infer(image_in, chosen_model):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
  caption = get_caption_from_kosmos(image_in)
188
  if chosen_model == "MAGNet" :
189
  magnet_result = get_magnet(caption)
 
233
  "AudioLDM-2",
234
  #"AudioGen",
235
  "Tango",
236
+ "Tango 2",
237
  "Stable Audio Open",
238
+ "EzAudio"
239
  ], value="AudioLDM-2")
240
  submit_btn = gr.Button("Submit")
241
  with gr.Column():
 
252
  outputs=[audio_o],
253
  )
254
 
255
+ demo.queue(max_size=10).launch(debug=True, show_error=True)
requirements.txt CHANGED
@@ -1 +1 @@
1
- moviepy<2
 
1
+ moviepy