Spaces:
Running
Running
885
#7
by
hualing0222
- opened
- README.md +1 -1
- app.py +3 -17
- requirements.txt +1 -1
README.md
CHANGED
@@ -4,7 +4,7 @@ emoji: π
|
|
4 |
colorFrom: green
|
5 |
colorTo: pink
|
6 |
sdk: gradio
|
7 |
-
sdk_version: 5.
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
short_description: Generates audio environment from an image
|
|
|
4 |
colorFrom: green
|
5 |
colorTo: pink
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 5.0.1
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
short_description: Generates audio environment from an image
|
app.py
CHANGED
@@ -184,20 +184,6 @@ def get_ezaudio(prompt):
|
|
184 |
raise gr.Error("EzAudio space API is not ready, please try again in few minutes ")
|
185 |
|
186 |
def infer(image_in, chosen_model):
|
187 |
-
"""
|
188 |
-
Generate an audio clip (sound effect) from an input image using the selected generative model.
|
189 |
-
|
190 |
-
This function first generates a caption from the provided image using a vision-language model.
|
191 |
-
The caption is then used as a text prompt for various audio generation models.
|
192 |
-
|
193 |
-
Args:
|
194 |
-
image_in (str): File path to the input image. The image will be processed to generate a descriptive caption.
|
195 |
-
chosen_model (str): The name of the audio generation model to use. Supported options include: "AudioLDM-2", "Tango", "Stable Audio Open".
|
196 |
-
|
197 |
-
Returns:
|
198 |
-
str | dict: The path or result object of the generated audio clip, depending on the model used.
|
199 |
-
|
200 |
-
"""
|
201 |
caption = get_caption_from_kosmos(image_in)
|
202 |
if chosen_model == "MAGNet" :
|
203 |
magnet_result = get_magnet(caption)
|
@@ -247,9 +233,9 @@ with gr.Blocks(css=css) as demo:
|
|
247 |
"AudioLDM-2",
|
248 |
#"AudioGen",
|
249 |
"Tango",
|
250 |
-
|
251 |
"Stable Audio Open",
|
252 |
-
|
253 |
], value="AudioLDM-2")
|
254 |
submit_btn = gr.Button("Submit")
|
255 |
with gr.Column():
|
@@ -266,4 +252,4 @@ with gr.Blocks(css=css) as demo:
|
|
266 |
outputs=[audio_o],
|
267 |
)
|
268 |
|
269 |
-
demo.queue(max_size=10).launch(debug=True, show_error=True
|
|
|
184 |
raise gr.Error("EzAudio space API is not ready, please try again in few minutes ")
|
185 |
|
186 |
def infer(image_in, chosen_model):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
187 |
caption = get_caption_from_kosmos(image_in)
|
188 |
if chosen_model == "MAGNet" :
|
189 |
magnet_result = get_magnet(caption)
|
|
|
233 |
"AudioLDM-2",
|
234 |
#"AudioGen",
|
235 |
"Tango",
|
236 |
+
"Tango 2",
|
237 |
"Stable Audio Open",
|
238 |
+
"EzAudio"
|
239 |
], value="AudioLDM-2")
|
240 |
submit_btn = gr.Button("Submit")
|
241 |
with gr.Column():
|
|
|
252 |
outputs=[audio_o],
|
253 |
)
|
254 |
|
255 |
+
demo.queue(max_size=10).launch(debug=True, show_error=True)
|
requirements.txt
CHANGED
@@ -1 +1 @@
|
|
1 |
-
moviepy
|
|
|
1 |
+
moviepy
|