Bils commited on
Commit
8a09658
·
verified ·
1 Parent(s): fa05f3c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -7
app.py CHANGED
@@ -11,7 +11,7 @@ from PIL import Image
11
  import io
12
  from pydub import AudioSegment
13
  from typing import List
14
- import spaces
15
 
16
  # Load environment variables
17
  load_dotenv()
@@ -20,8 +20,8 @@ HF_TOKEN = os.getenv("HF_TKN")
20
  # Device configuration
21
  device = "cuda" if torch.cuda.is_available() else "cpu"
22
 
23
- # Initialize models
24
- @gr.cache()
25
  def load_caption_model():
26
  return pipeline(
27
  "image-to-text",
@@ -29,7 +29,7 @@ def load_caption_model():
29
  device=device
30
  )
31
 
32
- @gr.cache()
33
  def load_audio_model():
34
  pipe = DiffusionPipeline.from_pretrained(
35
  "cvssp/audioldm2",
@@ -40,7 +40,6 @@ def load_audio_model():
40
  caption_pipe = load_caption_model()
41
  audio_pipe = load_audio_model().to(device)
42
 
43
- @spaces.GPU(duration=120)
44
  def analyze_image(image_file):
45
  """Generate caption from image with validation"""
46
  try:
@@ -65,7 +64,6 @@ def analyze_image(image_file):
65
  except Exception as e:
66
  raise gr.Error(f"Image processing error: {str(e)}")
67
 
68
- @spaces.GPU(duration=120)
69
  def generate_audio(prompt: str, num_steps=100, guidance_scale=7.5):
70
  """Generate audio from single prompt"""
71
  try:
@@ -87,7 +85,6 @@ def generate_audio(prompt: str, num_steps=100, guidance_scale=7.5):
87
  except Exception as e:
88
  raise gr.Error(f"Audio generation error: {str(e)}")
89
 
90
- @spaces.GPU(duration=120)
91
  def blend_audios(audio_files: List[str]) -> str:
92
  """Mix multiple audio files into one"""
93
  try:
@@ -246,6 +243,8 @@ with gr.Blocks(css=css, theme=gr.themes.Default(primary_hue="emerald")) as app:
246
  # Footer
247
  gr.Markdown("""
248
  ---
 
 
249
  [GitHub Repository](https://github.com/bilsimaging/Imaginesound)*
250
  """)
251
 
@@ -256,5 +255,8 @@ with gr.Blocks(css=css, theme=gr.themes.Default(primary_hue="emerald")) as app:
256
  outputs=[prompt_display, final_audio, *track_components]
257
  )
258
 
 
 
 
259
  if __name__ == "__main__":
260
  app.launch(debug=True, share=True)
 
11
  import io
12
  from pydub import AudioSegment
13
  from typing import List
14
+ from functools import lru_cache
15
 
16
  # Load environment variables
17
  load_dotenv()
 
20
  # Device configuration
21
  device = "cuda" if torch.cuda.is_available() else "cpu"
22
 
23
+ # Initialize models with caching
24
+ @lru_cache(maxsize=None)
25
  def load_caption_model():
26
  return pipeline(
27
  "image-to-text",
 
29
  device=device
30
  )
31
 
32
+ @lru_cache(maxsize=None)
33
  def load_audio_model():
34
  pipe = DiffusionPipeline.from_pretrained(
35
  "cvssp/audioldm2",
 
40
  caption_pipe = load_caption_model()
41
  audio_pipe = load_audio_model().to(device)
42
 
 
43
  def analyze_image(image_file):
44
  """Generate caption from image with validation"""
45
  try:
 
64
  except Exception as e:
65
  raise gr.Error(f"Image processing error: {str(e)}")
66
 
 
67
  def generate_audio(prompt: str, num_steps=100, guidance_scale=7.5):
68
  """Generate audio from single prompt"""
69
  try:
 
85
  except Exception as e:
86
  raise gr.Error(f"Audio generation error: {str(e)}")
87
 
 
88
  def blend_audios(audio_files: List[str]) -> str:
89
  """Mix multiple audio files into one"""
90
  try:
 
243
  # Footer
244
  gr.Markdown("""
245
  ---
246
+ *Powered by [BLIP](https://huggingface.co/Salesforce/blip-image-captioning-base) and
247
+ [AudioLDM 2](https://huggingface.co/cvssp/audioldm2) •
248
  [GitHub Repository](https://github.com/bilsimaging/Imaginesound)*
249
  """)
250
 
 
255
  outputs=[prompt_display, final_audio, *track_components]
256
  )
257
 
258
+ # Enable queuing for concurrent processing
259
+ app.queue(concurrency_count=3)
260
+
261
  if __name__ == "__main__":
262
  app.launch(debug=True, share=True)