Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -11,7 +11,7 @@ from PIL import Image
|
|
11 |
import io
|
12 |
from pydub import AudioSegment
|
13 |
from typing import List
|
14 |
-
import
|
15 |
|
16 |
# Load environment variables
|
17 |
load_dotenv()
|
@@ -20,8 +20,8 @@ HF_TOKEN = os.getenv("HF_TKN")
|
|
20 |
# Device configuration
|
21 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
22 |
|
23 |
-
# Initialize models
|
24 |
-
@
|
25 |
def load_caption_model():
|
26 |
return pipeline(
|
27 |
"image-to-text",
|
@@ -29,7 +29,7 @@ def load_caption_model():
|
|
29 |
device=device
|
30 |
)
|
31 |
|
32 |
-
@
|
33 |
def load_audio_model():
|
34 |
pipe = DiffusionPipeline.from_pretrained(
|
35 |
"cvssp/audioldm2",
|
@@ -40,7 +40,6 @@ def load_audio_model():
|
|
40 |
caption_pipe = load_caption_model()
|
41 |
audio_pipe = load_audio_model().to(device)
|
42 |
|
43 |
-
@spaces.GPU(duration=120)
|
44 |
def analyze_image(image_file):
|
45 |
"""Generate caption from image with validation"""
|
46 |
try:
|
@@ -65,7 +64,6 @@ def analyze_image(image_file):
|
|
65 |
except Exception as e:
|
66 |
raise gr.Error(f"Image processing error: {str(e)}")
|
67 |
|
68 |
-
@spaces.GPU(duration=120)
|
69 |
def generate_audio(prompt: str, num_steps=100, guidance_scale=7.5):
|
70 |
"""Generate audio from single prompt"""
|
71 |
try:
|
@@ -87,7 +85,6 @@ def generate_audio(prompt: str, num_steps=100, guidance_scale=7.5):
|
|
87 |
except Exception as e:
|
88 |
raise gr.Error(f"Audio generation error: {str(e)}")
|
89 |
|
90 |
-
@spaces.GPU(duration=120)
|
91 |
def blend_audios(audio_files: List[str]) -> str:
|
92 |
"""Mix multiple audio files into one"""
|
93 |
try:
|
@@ -246,6 +243,8 @@ with gr.Blocks(css=css, theme=gr.themes.Default(primary_hue="emerald")) as app:
|
|
246 |
# Footer
|
247 |
gr.Markdown("""
|
248 |
---
|
|
|
|
|
249 |
[GitHub Repository](https://github.com/bilsimaging/Imaginesound)*
|
250 |
""")
|
251 |
|
@@ -256,5 +255,8 @@ with gr.Blocks(css=css, theme=gr.themes.Default(primary_hue="emerald")) as app:
|
|
256 |
outputs=[prompt_display, final_audio, *track_components]
|
257 |
)
|
258 |
|
|
|
|
|
|
|
259 |
if __name__ == "__main__":
|
260 |
app.launch(debug=True, share=True)
|
|
|
11 |
import io
|
12 |
from pydub import AudioSegment
|
13 |
from typing import List
|
14 |
+
from functools import lru_cache
|
15 |
|
16 |
# Load environment variables
|
17 |
load_dotenv()
|
|
|
20 |
# Device configuration
|
21 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
22 |
|
23 |
+
# Initialize models with caching
|
24 |
+
@lru_cache(maxsize=None)
|
25 |
def load_caption_model():
|
26 |
return pipeline(
|
27 |
"image-to-text",
|
|
|
29 |
device=device
|
30 |
)
|
31 |
|
32 |
+
@lru_cache(maxsize=None)
|
33 |
def load_audio_model():
|
34 |
pipe = DiffusionPipeline.from_pretrained(
|
35 |
"cvssp/audioldm2",
|
|
|
40 |
caption_pipe = load_caption_model()
|
41 |
audio_pipe = load_audio_model().to(device)
|
42 |
|
|
|
43 |
def analyze_image(image_file):
|
44 |
"""Generate caption from image with validation"""
|
45 |
try:
|
|
|
64 |
except Exception as e:
|
65 |
raise gr.Error(f"Image processing error: {str(e)}")
|
66 |
|
|
|
67 |
def generate_audio(prompt: str, num_steps=100, guidance_scale=7.5):
|
68 |
"""Generate audio from single prompt"""
|
69 |
try:
|
|
|
85 |
except Exception as e:
|
86 |
raise gr.Error(f"Audio generation error: {str(e)}")
|
87 |
|
|
|
88 |
def blend_audios(audio_files: List[str]) -> str:
|
89 |
"""Mix multiple audio files into one"""
|
90 |
try:
|
|
|
243 |
# Footer
|
244 |
gr.Markdown("""
|
245 |
---
|
246 |
+
*Powered by [BLIP](https://huggingface.co/Salesforce/blip-image-captioning-base) and
|
247 |
+
[AudioLDM 2](https://huggingface.co/cvssp/audioldm2) •
|
248 |
[GitHub Repository](https://github.com/bilsimaging/Imaginesound)*
|
249 |
""")
|
250 |
|
|
|
255 |
outputs=[prompt_display, final_audio, *track_components]
|
256 |
)
|
257 |
|
258 |
+
# Enable queuing for concurrent processing
|
259 |
+
app.queue(concurrency_count=3)
|
260 |
+
|
261 |
if __name__ == "__main__":
|
262 |
app.launch(debug=True, share=True)
|