Spaces:

lym0302
/

DeepSound-V1

Running

lym0302123 commited on Mar 25

Commit

0321bb5

1 Parent(s): c7895e7

bfloat16

Files changed (2) hide show

app.py CHANGED Viewed

@@ -41,7 +41,7 @@ os.makedirs("pretrained/v2a/mmaudio", exist_ok=True)
 setup_eval_logging()
 pipeline = Pipeline(
     step0_model_dir=repo_local_path,
-    step1_mode='mmaudio_medium_44k',
     step2_model_dir=repo_local_path,
     step2_mode='cot',
     step3_mode='bs_roformer',

 setup_eval_logging()
 pipeline = Pipeline(
     step0_model_dir=repo_local_path,
+    step1_mode='mmaudio_small_44k',
     step2_model_dir=repo_local_path,
     step2_mode='cot',
     step3_mode='bs_roformer',

third_party/VideoLLaMA2/videollama2/model/__init__.py CHANGED Viewed

@@ -76,7 +76,8 @@ def load_pretrained_model(model_path, model_base, model_name, load_8bit=False, l
             bnb_4bit_quant_type='nf4'
         )
     else:
-        kwargs['torch_dtype'] = torch.float16
     if use_flash_attn:
         kwargs['attn_implementation'] = 'flash_attention_2'
@@ -181,7 +182,7 @@ def load_pretrained_model(model_path, model_base, model_name, load_8bit=False, l
         elif model_type in ['videollama2_mixtral']:
             model = Videollama2MixtralForCausalLM.from_pretrained(model_path, low_cpu_mem_usage=True, config=config, **kwargs)
         elif model_type in ['videollama2_qwen2']:
-            model = Videollama2Qwen2ForCausalLM.from_pretrained(model_path, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16, config=config, **kwargs)
         elif model_type in ['videollama2_gemma2']:
             model = Videollama2Gemma2ForCausalLM.from_pretrained(model_path, low_cpu_mem_usage=True, config=config, **kwargs)
         elif model_type in ['videollama2_phi3']:

             bnb_4bit_quant_type='nf4'
         )
     else:
+        # kwargs['torch_dtype'] = torch.float16
+        kwargs['torch_dtype'] = torch.bfloat16
     if use_flash_attn:
         kwargs['attn_implementation'] = 'flash_attention_2'
         elif model_type in ['videollama2_mixtral']:
             model = Videollama2MixtralForCausalLM.from_pretrained(model_path, low_cpu_mem_usage=True, config=config, **kwargs)
         elif model_type in ['videollama2_qwen2']:
+            model = Videollama2Qwen2ForCausalLM.from_pretrained(model_path, low_cpu_mem_usage=True, config=config, **kwargs)
         elif model_type in ['videollama2_gemma2']:
             model = Videollama2Gemma2ForCausalLM.from_pretrained(model_path, low_cpu_mem_usage=True, config=config, **kwargs)
         elif model_type in ['videollama2_phi3']: