Spaces:

jacob-c
/

fyp_start_space

Running

App Files Files Community

jacob-c commited on Jan 6

Commit

3dc83fd

1 Parent(s): 46e5e67

.

Browse files

Files changed (2) hide show

README.md +16 -1
src/classifier.py +41 -6

README.md CHANGED Viewed

@@ -75,10 +75,24 @@ python app.py
 ## Models Used
 - **Genre Classification**:
-  - Audio: `superb/wav2vec2-base-superb-gc` (Pre-trained on music genre classification)
   - Text: `facebook/bart-large-mnli` (Zero-shot classification)
 - **Lyric Generation**: `gpt2-medium`
 ## Contributing
 Contributions are welcome! Please feel free to submit a Pull Request.
@@ -89,6 +103,7 @@ This project is licensed under the MIT License - see the LICENSE file for detail
 ## Acknowledgments
 - Hugging Face for providing the pre-trained models
 - Gradio for the web interface framework
 - The open-source community for various audio processing libraries

 ## Models Used
 - **Genre Classification**:
+  - Audio: `mit/ast-finetuned-audioset-10-10-0.4593` (MIT's Audio Spectrogram Transformer)
   - Text: `facebook/bart-large-mnli` (Zero-shot classification)
 - **Lyric Generation**: `gpt2-medium`
+## Supported Genres
+The system supports classification and generation for the following genres:
+- Rock
+- Pop
+- Hip Hop
+- Country
+- Jazz
+- Classical
+- Electronic
+- Blues
+- Reggae
+- Metal
 ## Contributing
 Contributions are welcome! Please feel free to submit a Pull Request.
 ## Acknowledgments
+- MIT for the Audio Spectrogram Transformer model
 - Hugging Face for providing the pre-trained models
 - Gradio for the web interface framework
 - The open-source community for various audio processing libraries

src/classifier.py CHANGED Viewed

@@ -13,16 +13,32 @@ class MusicGenreClassifier:
             model="facebook/bart-large-mnli"
         )
-        # For audio classification, we'll use a different pre-trained model
         self.audio_classifier = pipeline(
             "audio-classification",
-            model="superb/wav2vec2-base-superb-gc"
         )
         self.genres = [
             "rock", "pop", "hip hop", "country", "jazz",
             "classical", "electronic", "blues", "reggae", "metal"
         ]
     def process_audio(self, audio_path: str) -> torch.Tensor:
         """Process audio file to match model requirements."""
@@ -37,16 +53,35 @@ class MusicGenreClassifier:
         except Exception as e:
             raise ValueError(f"Error processing audio file: {str(e)}")
     def classify_audio(self, audio_path: str) -> Tuple[str, float]:
         """Classify genre from audio file."""
         try:
             waveform = self.process_audio(audio_path)
-            predictions = self.audio_classifier(waveform, top_k=1)
-            # Get the top prediction
             if isinstance(predictions, list):
                 predictions = predictions[0]
-            top_pred = max(predictions, key=lambda x: x['score'])
-            return top_pred['label'], top_pred['score']
         except Exception as e:
             raise ValueError(f"Audio classification failed: {str(e)}")

             model="facebook/bart-large-mnli"
         )
+        # For audio classification, we'll use MIT's music classification model
         self.audio_classifier = pipeline(
             "audio-classification",
+            model="mit/ast-finetuned-audioset-10-10-0.4593"
         )
+        # Define standard genres for classification
         self.genres = [
             "rock", "pop", "hip hop", "country", "jazz",
             "classical", "electronic", "blues", "reggae", "metal"
         ]
+        # Mapping from model output labels to our standard genres
+        self.label_mapping = {
+            "Music": "pop",  # Default mapping
+            "Rock music": "rock",
+            "Pop music": "pop",
+            "Hip hop music": "hip hop",
+            "Country": "country",
+            "Jazz": "jazz",
+            "Classical music": "classical",
+            "Electronic music": "electronic",
+            "Blues": "blues",
+            "Reggae": "reggae",
+            "Heavy metal": "metal"
+        }
     def process_audio(self, audio_path: str) -> torch.Tensor:
         """Process audio file to match model requirements."""
         except Exception as e:
             raise ValueError(f"Error processing audio file: {str(e)}")
+    def map_label_to_genre(self, label: str) -> str:
+        """Map model output label to standard genre."""
+        return self.label_mapping.get(label, "pop")  # Default to pop if unknown
     def classify_audio(self, audio_path: str) -> Tuple[str, float]:
         """Classify genre from audio file."""
         try:
             waveform = self.process_audio(audio_path)
+            predictions = self.audio_classifier(waveform, top_k=3)
+            # Process predictions
             if isinstance(predictions, list):
                 predictions = predictions[0]
+            # Find the highest scoring music-related prediction
+            music_preds = [
+                (self.map_label_to_genre(p['label']), p['score'])
+                for p in predictions
+                if p['label'] in self.label_mapping
+            ]
+            if not music_preds:
+                # If no music genres found, return default
+                return "pop", 0.5
+            # Get the highest scoring genre
+            genre, score = max(music_preds, key=lambda x: x[1])
+            return genre, score
         except Exception as e:
             raise ValueError(f"Audio classification failed: {str(e)}")