Julian-Hans commited on
Commit
a15bc9b
·
1 Parent(s): 9b3501f

removed force pushed, load models separately to optimize ram usage

Browse files
Files changed (3) hide show
  1. .github/workflows/sync.yml +1 -1
  2. .gitignore +2 -1
  3. app.py +50 -5
.github/workflows/sync.yml CHANGED
@@ -17,4 +17,4 @@ jobs:
17
  - name: Push to hub
18
  env:
19
  HF_TOKEN: ${{ secrets.HF_TOKEN }}
20
- run: git push https://oxmraz-mldo:[email protected]/spaces/Group17WPIMLDO24/Case-Study-1 main --force
 
17
  - name: Push to hub
18
  env:
19
  HF_TOKEN: ${{ secrets.HF_TOKEN }}
20
+ run: git push https://oxmraz-mldo:[email protected]/spaces/Group17WPIMLDO24/Case-Study-1 main
.gitignore CHANGED
@@ -1 +1,2 @@
1
- /__pycache__
 
 
1
+ /__pycache__
2
+ *.wav
app.py CHANGED
@@ -1,7 +1,10 @@
1
  # external imports
 
 
2
  import time
3
  import uuid
4
  import gradio as gr
 
5
 
6
  # local imports
7
  from blip_image_caption_large import Blip_Image_Caption_Large
@@ -9,12 +12,11 @@ from phi3_mini_4k_instruct import Phi3_Mini_4k_Instruct
9
  from musicgen_small import Musicgen_Small
10
  import config
11
 
 
 
 
12
  class Image_To_Music:
13
  def __init__(self):
14
- self.image_caption_model = Blip_Image_Caption_Large()
15
- self.text_generation_model = Phi3_Mini_4k_Instruct()
16
- self.music_generation_model = Musicgen_Small()
17
-
18
  self.image_path = None
19
  self.generated_caption = None
20
  self.generated_description = None
@@ -23,28 +25,71 @@ class Image_To_Music:
23
  self.caption_generation_duration = -1
24
  self.description_generation_duration = -1
25
  self.music_generation_duration = -1
26
-
 
 
 
 
 
 
 
 
 
 
27
  def caption_image(self, image_path):
 
28
  caption_start_time = time.time()
 
 
 
 
29
  self.image_path = image_path
30
  self.generated_caption = self.image_caption_model.caption_image_local_pipeline(self.image_path)[0]["generated_text"]
 
 
 
 
 
31
  self.caption_generation_duration = time.time() - caption_start_time
 
32
  return self.generated_caption
33
 
34
  def generate_description(self):
 
35
  description_start_time = time.time()
 
 
 
 
36
  messages = [
37
  {"role": "system", "content": "You are an image caption to song description converter with a deep understanding of Music and Art. You are given the caption of an image. Your task is to generate a textual description of a musical piece that fits the caption. The description should be detailed and vivid, and should include the genre, mood, instruments, tempo, and other relevant information about the music. You should also use your knowledge of art and visual aesthetics to create a musical piece that complements the image. Only output the description of the music, without any explanation or introduction. Be concise."},
38
  {"role": "user", "content": self.generated_caption},
39
  ]
40
  self.generated_description = self.text_generation_model.generate_text_local_pipeline(messages)[-1]['generated_text'][-1]['content']
 
 
 
 
 
41
  self.description_generation_duration = time.time() - description_start_time
 
42
  return self.generated_description
43
 
44
  def generate_music(self):
 
45
  music_start_time = time.time()
 
 
 
 
46
  self.music_generation_model.generate_music_local_pipeline(self.generated_description, self.audio_path)
 
 
 
 
 
47
  self.music_generation_duration = time.time() - music_start_time
 
48
  return self.audio_path
49
 
50
  def get_durations(self):
 
1
  # external imports
2
+ import gc
3
+ import logging as log
4
  import time
5
  import uuid
6
  import gradio as gr
7
+ import os
8
 
9
  # local imports
10
  from blip_image_caption_large import Blip_Image_Caption_Large
 
12
  from musicgen_small import Musicgen_Small
13
  import config
14
 
15
+ log.basicConfig(level=log.INFO)
16
+
17
+
18
  class Image_To_Music:
19
  def __init__(self):
 
 
 
 
20
  self.image_path = None
21
  self.generated_caption = None
22
  self.generated_description = None
 
25
  self.caption_generation_duration = -1
26
  self.description_generation_duration = -1
27
  self.music_generation_duration = -1
28
+ self.create_output_folder()
29
+
30
+
31
+ # ----ATTRIBUTION-START----
32
+ # LLM: Github Copilot
33
+ # PROMPT: create an output folder for the generated audio files
34
+ # EDITS: /
35
+ def create_output_folder(self):
36
+ os.makedirs(config.AUDIO_DIR, exist_ok=True)
37
+ # -----ATTRIBUTION-END-----
38
+
39
  def caption_image(self, image_path):
40
+ log.info("Captioning Image...")
41
  caption_start_time = time.time()
42
+
43
+ # load model
44
+ self.image_caption_model = Blip_Image_Caption_Large()
45
+
46
  self.image_path = image_path
47
  self.generated_caption = self.image_caption_model.caption_image_local_pipeline(self.image_path)[0]["generated_text"]
48
+
49
+ # delete model to free up ram
50
+ del self.image_caption_model
51
+ gc.collect()
52
+
53
  self.caption_generation_duration = time.time() - caption_start_time
54
+ log.info(f"Captioning Complete in {self.caption_generation_duration:.2f} seconds: {self.generated_caption}")
55
  return self.generated_caption
56
 
57
  def generate_description(self):
58
+ log.info("Generating Music Description...")
59
  description_start_time = time.time()
60
+
61
+ # load model
62
+ self.text_generation_model = Phi3_Mini_4k_Instruct()
63
+
64
  messages = [
65
  {"role": "system", "content": "You are an image caption to song description converter with a deep understanding of Music and Art. You are given the caption of an image. Your task is to generate a textual description of a musical piece that fits the caption. The description should be detailed and vivid, and should include the genre, mood, instruments, tempo, and other relevant information about the music. You should also use your knowledge of art and visual aesthetics to create a musical piece that complements the image. Only output the description of the music, without any explanation or introduction. Be concise."},
66
  {"role": "user", "content": self.generated_caption},
67
  ]
68
  self.generated_description = self.text_generation_model.generate_text_local_pipeline(messages)[-1]['generated_text'][-1]['content']
69
+
70
+ # delete model to free up ram
71
+ del self.text_generation_model
72
+ gc.collect()
73
+
74
  self.description_generation_duration = time.time() - description_start_time
75
+ log.info(f"Description Generation Complete in {self.description_generation_duration:.2f} seconds: {self.generated_description}")
76
  return self.generated_description
77
 
78
  def generate_music(self):
79
+ log.info("Generating Music...")
80
  music_start_time = time.time()
81
+
82
+ # load model
83
+ self.music_generation_model = Musicgen_Small()
84
+
85
  self.music_generation_model.generate_music_local_pipeline(self.generated_description, self.audio_path)
86
+
87
+ # delete model to free up ram
88
+ del self.music_generation_model
89
+ gc.collect()
90
+
91
  self.music_generation_duration = time.time() - music_start_time
92
+ log.info(f"Music Generation Complete in {self.music_generation_duration:.2f} seconds: {self.audio_path}")
93
  return self.audio_path
94
 
95
  def get_durations(self):