MusicGen

Running

adefossez commited on Jun 13, 2023

Commit

fad2862

1 Parent(s): 6457900

adding support for cpu

Files changed (3) hide show

audiocraft/models/loaders.py CHANGED Viewed

@@ -80,8 +80,6 @@ def load_lm_model(file_or_url_or_id: tp.Union[Path, str], device='cpu', cache_di
     cfg = OmegaConf.create(pkg['xp.cfg'])
     cfg.device = str(device)
     if cfg.device == 'cpu':
-        cfg.transformer_lm.memory_efficient = False
-        cfg.transformer_lm.custom = True
         cfg.dtype = 'float32'
     else:
         cfg.dtype = 'float16'

     cfg = OmegaConf.create(pkg['xp.cfg'])
     cfg.device = str(device)
     if cfg.device == 'cpu':
         cfg.dtype = 'float32'
     else:
         cfg.dtype = 'float16'

audiocraft/models/musicgen.py CHANGED Viewed

@@ -68,7 +68,7 @@ class MusicGen:
         return self.compression_model.channels
     @staticmethod
-    def get_pretrained(name: str = 'melody', device='cuda'):
         """Return pretrained model, we provide four models:
         - small (300M), text to music, # see: https://huggingface.co/facebook/musicgen-small
         - medium (1.5B), text to music, # see: https://huggingface.co/facebook/musicgen-medium
@@ -76,11 +76,17 @@ class MusicGen:
         - large (3.3B), text to music, # see: https://huggingface.co/facebook/musicgen-large
         """
         if name == 'debug':
             # used only for unit tests
             compression_model = get_debug_compression_model(device)
             lm = get_debug_lm_model(device)
-            return MusicGen(name, compression_model, lm, max_duration=3.)
         if name not in HF_MODEL_CHECKPOINTS_MAP:
             raise ValueError(
@@ -313,7 +319,6 @@ class MusicGen:
                 all_tokens.append(prompt_tokens)
                 prompt_length = prompt_tokens.shape[-1]
             stride_tokens = int(self.frame_rate * self.extend_stride)
             while current_gen_offset + prompt_length < total_gen_len:

         return self.compression_model.channels
     @staticmethod
+    def get_pretrained(name: str = 'melody', device=None):
         """Return pretrained model, we provide four models:
         - small (300M), text to music, # see: https://huggingface.co/facebook/musicgen-small
         - medium (1.5B), text to music, # see: https://huggingface.co/facebook/musicgen-medium
         - large (3.3B), text to music, # see: https://huggingface.co/facebook/musicgen-large
         """
+        if device is None:
+            if torch.cuda.device_count():
+                device = 'cuda'
+            else:
+                device = 'cpu'
         if name == 'debug':
             # used only for unit tests
             compression_model = get_debug_compression_model(device)
             lm = get_debug_lm_model(device)
+            return MusicGen(name, compression_model, lm)
         if name not in HF_MODEL_CHECKPOINTS_MAP:
             raise ValueError(
                 all_tokens.append(prompt_tokens)
                 prompt_length = prompt_tokens.shape[-1]
             stride_tokens = int(self.frame_rate * self.extend_stride)
             while current_gen_offset + prompt_length < total_gen_len:

tests/models/test_musicgen.py CHANGED Viewed

@@ -51,6 +51,7 @@ class TestSEANetModel:
     def test_generate_long(self):
         mg = self.get_musicgen()
         mg.set_generation_params(duration=4., stride_extend=2.)
         wav = mg.generate(
             ['youpi', 'lapin dort'])

     def test_generate_long(self):
         mg = self.get_musicgen()
+        mg.max_duration = 3.
         mg.set_generation_params(duration=4., stride_extend=2.)
         wav = mg.generate(
             ['youpi', 'lapin dort'])