Spaces:

AIGC-Audio
/

Make-An-Audio-3

Running on Zero

App Files Files Community

wenxiang guo commited on Jun 18, 2024

Commit

14db764

verified ·

1 Parent(s): 8c70e9a

Update ldm/modules/encoders/modules.py

Browse files

Files changed (1) hide show

ldm/modules/encoders/modules.py +4 -2

ldm/modules/encoders/modules.py CHANGED Viewed

@@ -56,6 +56,7 @@ class FrozenFLANEmbedder(AbstractEncoder):
     def __init__(self, version="google/flan-t5-large", device="cuda", max_length=77,
                  freeze=True):  # others are google/t5-v1_1-xl and google/t5-v1_1-xxl
         super().__init__()
         self.tokenizer = T5Tokenizer.from_pretrained(version)
@@ -88,8 +89,8 @@ class FrozenCLAPEmbedder(AbstractEncoder):
     """Uses the CLAP transformer encoder for text from microsoft"""
     def __init__(self, weights_path, freeze=True, device="cuda", max_length=77):  # clip-vit-base-patch32
         super().__init__()
         model_state_dict = torch.load(weights_path, map_location=torch.device('cpu'))['model']
         match_params = dict()
         for key in list(model_state_dict.keys()):
@@ -103,7 +104,7 @@ class FrozenCLAPEmbedder(AbstractEncoder):
         self.caption_encoder = TextEncoder(
             args.d_proj, args.text_model, args.transformer_embed_dim
         )
-        device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
         self.max_length = max_length
         self.device = device
         if freeze: self.freeze()
@@ -130,6 +131,7 @@ class FrozenCLAPFLANEmbedder(AbstractEncoder):
     def __init__(self, weights_path, t5version="google/t5-v1_1-large", freeze=True, device="cuda",
                  max_length=77):  # clip-vit-base-patch32
         super().__init__()
         model_state_dict = torch.load(weights_path, map_location=torch.device('cpu'))['model']

     def __init__(self, version="google/flan-t5-large", device="cuda", max_length=77,
                  freeze=True):  # others are google/t5-v1_1-xl and google/t5-v1_1-xxl
+        device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
         super().__init__()
         self.tokenizer = T5Tokenizer.from_pretrained(version)
     """Uses the CLAP transformer encoder for text from microsoft"""
     def __init__(self, weights_path, freeze=True, device="cuda", max_length=77):  # clip-vit-base-patch32
+        device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
         super().__init__()
         model_state_dict = torch.load(weights_path, map_location=torch.device('cpu'))['model']
         match_params = dict()
         for key in list(model_state_dict.keys()):
         self.caption_encoder = TextEncoder(
             args.d_proj, args.text_model, args.transformer_embed_dim
         )
         self.max_length = max_length
         self.device = device
         if freeze: self.freeze()
     def __init__(self, weights_path, t5version="google/t5-v1_1-large", freeze=True, device="cuda",
                  max_length=77):  # clip-vit-base-patch32
+        device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
         super().__init__()
         model_state_dict = torch.load(weights_path, map_location=torch.device('cpu'))['model']