Healthydater
/

musicgen-melody-large-endpoint

PyTorch

musicgen

Inference Endpoints

Model card Files Files and versions Community

Phoenixak99 commited on Nov 8

Commit

30b75e1

•

1 Parent(s): 247afcc

Update handler.py

Browse files

Files changed (1) hide show

handler.py +67 -33

handler.py CHANGED Viewed

@@ -1,7 +1,12 @@
 from typing import Dict, Any
 from transformers import AutoProcessor, MusicgenForConditionalGeneration
 import torch
 class EndpointHandler:
     def __init__(self, path=""):
         # Load the processor and model from the specified path
@@ -11,45 +16,74 @@ class EndpointHandler:
         ).to("cuda")
         self.sampling_rate = self.model.config.audio_encoder.sampling_rate
-    def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
         """
         Args:
             data (dict): The payload with the text prompt and generation parameters.
         """
-        # Extract inputs and parameters from the payload
-        inputs = data.get("inputs", {})
-        prompt = inputs.get("prompt", "")
-        duration = inputs.get("duration", 10)
-        parameters = data.get("parameters", {})
-        # Validate the prompt
-        if not prompt:
-            return {"error": "No prompt provided."}
-        # Preprocess the prompt
-        input_ids = self.processor(
-            text=[prompt],
-            padding=True,
-            return_tensors="pt",
-        ).to("cuda")
-        # Set generation parameters
-        gen_kwargs = {
-            "max_new_tokens": int(duration * 50),  # MusicGen uses 50 tokens per second
-            **parameters,
-        }
-        # Generate audio
-        with torch.autocast("cuda"):
-            outputs = self.model.generate(**input_ids, **gen_kwargs)
-        # Convert the output audio tensor to a list of lists (channel-wise)
-        audio_tensor = outputs[0].cpu()  # Shape: [num_channels, seq_len]
-        audio_list = audio_tensor.numpy().tolist()  # [[channel1_data], [channel2_data]]
-        return [
-            {
-                "generated_audio": audio_list,
-                "sample_rate": self.sampling_rate,
             }
-        ]

+import logging
 from typing import Dict, Any
 from transformers import AutoProcessor, MusicgenForConditionalGeneration
 import torch
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
 class EndpointHandler:
     def __init__(self, path=""):
         # Load the processor and model from the specified path
         ).to("cuda")
         self.sampling_rate = self.model.config.audio_encoder.sampling_rate
+    def __call__(self, data: Dict[str, Any]) -> Any:
         """
         Args:
             data (dict): The payload with the text prompt and generation parameters.
         """
+        try:
+            # Extract inputs and parameters from the payload
+            inputs = data.get("inputs", data)
+            parameters = data.get("parameters", {})
+            # Handle inputs
+            if isinstance(inputs, str):
+                prompt = inputs
+                duration = 10  # Default duration
+            elif isinstance(inputs, dict):
+                prompt = inputs.get("text") or inputs.get("prompt")
+                duration = inputs.get("duration", 10)
+            else:
+                prompt = None
+                duration = 10
+            # Override duration if provided in parameters
+            if 'duration' in parameters:
+                duration = parameters.pop('duration')
+            # Validate the prompt
+            if not prompt:
+                return {"error": "No prompt provided."}
+            # Preprocess the prompt
+            input_ids = self.processor(
+                text=[prompt],
+                padding=True,
+                return_tensors="pt",
+            ).to("cuda")
+            # Set generation parameters
+            gen_kwargs = {
+                "max_new_tokens": int(duration * 50),  # MusicGen uses 50 tokens per second
             }
+            # Filter out unsupported parameters
+            supported_params = [
+                "max_length", "min_length", "do_sample", "early_stopping", "num_beams",
+                "temperature", "top_k", "top_p", "repetition_penalty", "bad_words_ids",
+                "num_return_sequences", "attention_mask"
+            ]
+            for param in supported_params:
+                if param in parameters:
+                    gen_kwargs[param] = parameters[param]
+            logger.info(f"Received prompt: {prompt}")
+            logger.info(f"Generation parameters: {gen_kwargs}")
+            # Generate audio
+            with torch.autocast("cuda"):
+                outputs = self.model.generate(**input_ids, **gen_kwargs)
+            # Convert the output audio tensor to a list of lists (channel-wise)
+            audio_tensor = outputs[0].cpu()  # Shape: [num_channels, seq_len]
+            audio_list = audio_tensor.numpy().tolist()  # [[channel1_data], [channel2_data]]
+            return [
+                {
+                    "generated_audio": audio_list,
+                    "sample_rate": self.sampling_rate,
+                }
+            ]
+        except Exception as e:
+            logger.error(f"Exception during generation: {e}")
+            return {"error": str(e)}