Spaces:
Running
Running
- README.md +80 -0
- requirements.txt +9 -4
- src/classifier.py +90 -0
- src/lyric_generator.py +129 -0
README.md
CHANGED
@@ -12,3 +12,83 @@ short_description: create this first space for getting familiar with space
|
|
12 |
---
|
13 |
|
14 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
---
|
13 |
|
14 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
15 |
+
|
16 |
+
# Music Genre Classifier + Lyric Stylist 🎵
|
17 |
+
|
18 |
+
A powerful web application that combines music genre classification with AI-powered lyric generation. This tool can analyze both audio files and text lyrics to determine the genre, then generate new lyrics in that style or transform existing lyrics into different genres.
|
19 |
+
|
20 |
+
## Features
|
21 |
+
|
22 |
+
- **Dual Input Support**:
|
23 |
+
- Audio file analysis for genre detection
|
24 |
+
- Text-based lyrics analysis
|
25 |
+
- **Genre Classification**:
|
26 |
+
- Accurate genre detection using state-of-the-art models
|
27 |
+
- Supports multiple popular music genres
|
28 |
+
- **Lyric Generation**:
|
29 |
+
- Genre-aware lyric generation
|
30 |
+
- Theme-based content creation
|
31 |
+
- Multiple generation options (temperature, length, versions)
|
32 |
+
- **Style Transfer**:
|
33 |
+
- Transform existing lyrics into different genres
|
34 |
+
- Preserve core message while adapting style
|
35 |
+
|
36 |
+
## Installation
|
37 |
+
|
38 |
+
1. Clone the repository:
|
39 |
+
```bash
|
40 |
+
git clone [your-repo-url]
|
41 |
+
cd music-genre-classifier-lyric-stylist
|
42 |
+
```
|
43 |
+
|
44 |
+
2. Create a virtual environment (recommended):
|
45 |
+
```bash
|
46 |
+
python -m venv venv
|
47 |
+
source venv/bin/activate # On Windows: venv\Scripts\activate
|
48 |
+
```
|
49 |
+
|
50 |
+
3. Install dependencies:
|
51 |
+
```bash
|
52 |
+
pip install -r requirements.txt
|
53 |
+
```
|
54 |
+
|
55 |
+
## Usage
|
56 |
+
|
57 |
+
1. Start the application:
|
58 |
+
```bash
|
59 |
+
python app.py
|
60 |
+
```
|
61 |
+
|
62 |
+
2. Open your web browser and navigate to the provided URL (typically http://localhost:7860)
|
63 |
+
|
64 |
+
3. Choose your input method:
|
65 |
+
- Upload an audio file
|
66 |
+
- Enter lyrics text
|
67 |
+
|
68 |
+
4. Adjust generation parameters:
|
69 |
+
- Temperature (controls randomness)
|
70 |
+
- Maximum length
|
71 |
+
- Number of versions
|
72 |
+
|
73 |
+
5. Click "Detect Genre & Generate Lyrics" or use the "Style Transfer" tab for existing lyrics
|
74 |
+
|
75 |
+
## Models Used
|
76 |
+
|
77 |
+
- **Genre Classification**:
|
78 |
+
- Audio: `anton-l/wav2vec2-base-superb-gc`
|
79 |
+
- Text: `facebook/bart-large-mnli` (Zero-shot classification)
|
80 |
+
- **Lyric Generation**: `gpt2-medium`
|
81 |
+
|
82 |
+
## Contributing
|
83 |
+
|
84 |
+
Contributions are welcome! Please feel free to submit a Pull Request.
|
85 |
+
|
86 |
+
## License
|
87 |
+
|
88 |
+
This project is licensed under the MIT License - see the LICENSE file for details.
|
89 |
+
|
90 |
+
## Acknowledgments
|
91 |
+
|
92 |
+
- Hugging Face for providing the pre-trained models
|
93 |
+
- Gradio for the web interface framework
|
94 |
+
- The open-source community for various audio processing libraries
|
requirements.txt
CHANGED
@@ -1,4 +1,9 @@
|
|
1 |
-
gradio
|
2 |
-
transformers
|
3 |
-
torch
|
4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio==4.12.0
|
2 |
+
transformers==4.36.2
|
3 |
+
torch==2.1.2
|
4 |
+
torchaudio==2.1.2
|
5 |
+
numpy==1.26.2
|
6 |
+
datasets==2.15.0
|
7 |
+
soundfile==0.12.1
|
8 |
+
librosa==0.10.1
|
9 |
+
python-dotenv==1.0.0
|
src/classifier.py
ADDED
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torchaudio
|
3 |
+
import librosa
|
4 |
+
import numpy as np
|
5 |
+
from transformers import pipeline
|
6 |
+
from typing import Union, Tuple, List
|
7 |
+
|
8 |
+
class MusicGenreClassifier:
|
9 |
+
def __init__(self):
|
10 |
+
# Initialize both audio and text classification pipelines
|
11 |
+
self.text_classifier = pipeline(
|
12 |
+
"zero-shot-classification",
|
13 |
+
model="facebook/bart-large-mnli"
|
14 |
+
)
|
15 |
+
|
16 |
+
# For audio classification, we'll use a pre-trained model
|
17 |
+
self.audio_classifier = pipeline(
|
18 |
+
"audio-classification",
|
19 |
+
model="anton-l/wav2vec2-base-superb-gc"
|
20 |
+
)
|
21 |
+
|
22 |
+
self.genres = [
|
23 |
+
"rock", "pop", "hip hop", "country", "jazz",
|
24 |
+
"classical", "electronic", "blues", "reggae", "metal"
|
25 |
+
]
|
26 |
+
|
27 |
+
def process_audio(self, audio_path: str) -> torch.Tensor:
|
28 |
+
"""Process audio file to match model requirements."""
|
29 |
+
try:
|
30 |
+
# Load audio using librosa (handles more formats)
|
31 |
+
waveform, sample_rate = librosa.load(audio_path, sr=16000)
|
32 |
+
return torch.from_numpy(waveform)
|
33 |
+
except Exception as e:
|
34 |
+
raise ValueError(f"Error processing audio file: {str(e)}")
|
35 |
+
|
36 |
+
def classify_audio(self, audio_path: str) -> Tuple[str, float]:
|
37 |
+
"""Classify genre from audio file."""
|
38 |
+
try:
|
39 |
+
waveform = self.process_audio(audio_path)
|
40 |
+
predictions = self.audio_classifier(waveform)
|
41 |
+
# Get the top prediction
|
42 |
+
top_pred = max(predictions, key=lambda x: x['score'])
|
43 |
+
return top_pred['label'], top_pred['score']
|
44 |
+
except Exception as e:
|
45 |
+
raise ValueError(f"Audio classification failed: {str(e)}")
|
46 |
+
|
47 |
+
def classify_text(self, lyrics: str) -> Tuple[str, float]:
|
48 |
+
"""Classify genre from lyrics text."""
|
49 |
+
try:
|
50 |
+
# Prepare the hypothesis template for zero-shot classification
|
51 |
+
hypothesis_template = "This text contains {} music lyrics."
|
52 |
+
|
53 |
+
result = self.text_classifier(
|
54 |
+
lyrics,
|
55 |
+
candidate_labels=self.genres,
|
56 |
+
hypothesis_template=hypothesis_template
|
57 |
+
)
|
58 |
+
|
59 |
+
return result['labels'][0], result['scores'][0]
|
60 |
+
except Exception as e:
|
61 |
+
raise ValueError(f"Text classification failed: {str(e)}")
|
62 |
+
|
63 |
+
def predict(self, input_data: str, input_type: str = None) -> dict:
|
64 |
+
"""
|
65 |
+
Main prediction method that handles both audio and text inputs.
|
66 |
+
|
67 |
+
Args:
|
68 |
+
input_data: Path to audio file or lyrics text
|
69 |
+
input_type: Optional, 'audio' or 'text'. If None, will try to auto-detect
|
70 |
+
|
71 |
+
Returns:
|
72 |
+
dict containing predicted genre and confidence score
|
73 |
+
"""
|
74 |
+
# Try to auto-detect input type if not specified
|
75 |
+
if input_type is None:
|
76 |
+
input_type = 'audio' if input_data.lower().endswith(('.mp3', '.wav', '.ogg', '.flac')) else 'text'
|
77 |
+
|
78 |
+
try:
|
79 |
+
if input_type == 'audio':
|
80 |
+
genre, confidence = self.classify_audio(input_data)
|
81 |
+
else:
|
82 |
+
genre, confidence = self.classify_text(input_data)
|
83 |
+
|
84 |
+
return {
|
85 |
+
'genre': genre,
|
86 |
+
'confidence': float(confidence),
|
87 |
+
'input_type': input_type
|
88 |
+
}
|
89 |
+
except Exception as e:
|
90 |
+
raise ValueError(f"Prediction failed: {str(e)}")
|
src/lyric_generator.py
ADDED
@@ -0,0 +1,129 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import pipeline
|
2 |
+
from typing import Dict, List, Optional
|
3 |
+
|
4 |
+
class LyricGenerator:
|
5 |
+
def __init__(self, model_name: str = "gpt2-medium"):
|
6 |
+
"""
|
7 |
+
Initialize the lyric generator with a specified language model.
|
8 |
+
|
9 |
+
Args:
|
10 |
+
model_name: The name of the pre-trained model to use
|
11 |
+
"""
|
12 |
+
self.generator = pipeline(
|
13 |
+
"text-generation",
|
14 |
+
model=model_name,
|
15 |
+
device=0 if pipeline.device.type == "cuda" else -1
|
16 |
+
)
|
17 |
+
|
18 |
+
# Genre-specific prompts to guide generation
|
19 |
+
self.genre_prompts = {
|
20 |
+
"rock": "Write energetic rock lyrics about",
|
21 |
+
"pop": "Create catchy pop lyrics about",
|
22 |
+
"hip hop": "Write hip hop verses about",
|
23 |
+
"country": "Write country music lyrics about",
|
24 |
+
"jazz": "Compose smooth jazz lyrics about",
|
25 |
+
"classical": "Write classical music lyrics about",
|
26 |
+
"electronic": "Create electronic dance music lyrics about",
|
27 |
+
"blues": "Write soulful blues lyrics about",
|
28 |
+
"reggae": "Write laid-back reggae lyrics about",
|
29 |
+
"metal": "Write intense metal lyrics about"
|
30 |
+
}
|
31 |
+
|
32 |
+
def generate_lyrics(
|
33 |
+
self,
|
34 |
+
genre: str,
|
35 |
+
theme: str,
|
36 |
+
max_length: int = 200,
|
37 |
+
num_return_sequences: int = 1,
|
38 |
+
temperature: float = 0.9,
|
39 |
+
top_p: float = 0.9,
|
40 |
+
top_k: int = 50
|
41 |
+
) -> List[str]:
|
42 |
+
"""
|
43 |
+
Generate lyrics based on genre and theme.
|
44 |
+
|
45 |
+
Args:
|
46 |
+
genre: The music genre to generate lyrics for
|
47 |
+
theme: The theme or topic for the lyrics
|
48 |
+
max_length: Maximum length of generated text
|
49 |
+
num_return_sequences: Number of different lyrics to generate
|
50 |
+
temperature: Controls randomness (higher = more random)
|
51 |
+
top_p: Nucleus sampling parameter
|
52 |
+
top_k: Top-k sampling parameter
|
53 |
+
|
54 |
+
Returns:
|
55 |
+
List of generated lyrics
|
56 |
+
"""
|
57 |
+
try:
|
58 |
+
# Get genre-specific prompt or use default
|
59 |
+
genre = genre.lower()
|
60 |
+
base_prompt = self.genre_prompts.get(
|
61 |
+
genre,
|
62 |
+
"Write song lyrics about"
|
63 |
+
)
|
64 |
+
|
65 |
+
# Construct full prompt
|
66 |
+
prompt = f"{base_prompt} {theme}:\n\n"
|
67 |
+
|
68 |
+
# Generate lyrics
|
69 |
+
outputs = self.generator(
|
70 |
+
prompt,
|
71 |
+
max_length=max_length,
|
72 |
+
num_return_sequences=num_return_sequences,
|
73 |
+
temperature=temperature,
|
74 |
+
top_p=top_p,
|
75 |
+
top_k=top_k,
|
76 |
+
do_sample=True,
|
77 |
+
pad_token_id=50256 # GPT-2's pad token ID
|
78 |
+
)
|
79 |
+
|
80 |
+
# Process and clean up the generated texts
|
81 |
+
generated_lyrics = []
|
82 |
+
for output in outputs:
|
83 |
+
# Remove the prompt from the generated text
|
84 |
+
lyrics = output['generated_text'][len(prompt):].strip()
|
85 |
+
# Basic cleanup
|
86 |
+
lyrics = lyrics.replace('<|endoftext|>', '').strip()
|
87 |
+
generated_lyrics.append(lyrics)
|
88 |
+
|
89 |
+
return generated_lyrics
|
90 |
+
|
91 |
+
except Exception as e:
|
92 |
+
raise ValueError(f"Lyric generation failed: {str(e)}")
|
93 |
+
|
94 |
+
def style_transfer(
|
95 |
+
self,
|
96 |
+
original_lyrics: str,
|
97 |
+
target_genre: str,
|
98 |
+
temperature: float = 0.9
|
99 |
+
) -> str:
|
100 |
+
"""
|
101 |
+
Attempt to transfer the style of existing lyrics to a target genre.
|
102 |
+
|
103 |
+
Args:
|
104 |
+
original_lyrics: The original lyrics to restyle
|
105 |
+
target_genre: The target genre for the style transfer
|
106 |
+
temperature: Controls randomness of generation
|
107 |
+
|
108 |
+
Returns:
|
109 |
+
Restyled lyrics in the target genre
|
110 |
+
"""
|
111 |
+
try:
|
112 |
+
prompt = f"Rewrite these lyrics in {target_genre} style:\n\n{original_lyrics}\n\nNew version:\n"
|
113 |
+
|
114 |
+
output = self.generator(
|
115 |
+
prompt,
|
116 |
+
max_length=len(prompt) + 200,
|
117 |
+
temperature=temperature,
|
118 |
+
top_p=0.9,
|
119 |
+
do_sample=True,
|
120 |
+
num_return_sequences=1
|
121 |
+
)[0]
|
122 |
+
|
123 |
+
# Extract the new version only
|
124 |
+
generated_text = output['generated_text']
|
125 |
+
new_lyrics = generated_text.split("New version:\n")[-1].strip()
|
126 |
+
return new_lyrics
|
127 |
+
|
128 |
+
except Exception as e:
|
129 |
+
raise ValueError(f"Style transfer failed: {str(e)}")
|