jacob-c commited on
Commit
c31b527
·
1 Parent(s): 3bc2967
Files changed (1) hide show
  1. app.py +99 -33
app.py CHANGED
@@ -6,6 +6,7 @@ import json
6
  import time
7
  import tempfile
8
  import shutil
 
9
  from transformers import AutoTokenizer, AutoModelForCausalLM
10
 
11
  # Check if CUDA is available and set the device accordingly
@@ -16,12 +17,46 @@ AUDIO_API_URL = "https://api-inference.huggingface.co/models/MIT/ast-finetuned-a
16
  LYRICS_API_URL = "https://api-inference.huggingface.co/models/gpt2-xl"
17
  headers = {"Authorization": f"Bearer {os.environ.get('HF_TOKEN')}"}
18
 
19
- def format_error(message):
20
- """Helper function to format error messages as JSON"""
21
- return {"error": message}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
- def create_lyrics_prompt(classification_results):
24
- """Create a prompt for lyrics generation based on classification results"""
25
  # Get the top genre and its characteristics
26
  top_result = classification_results[0]
27
  genre = top_result['label']
@@ -30,14 +65,57 @@ def create_lyrics_prompt(classification_results):
30
  # Get additional musical elements
31
  additional_elements = [r['label'] for r in classification_results[1:3]]
32
 
33
- # Create a more focused prompt for GPT2-XL
34
  prompt = f"""Write song lyrics in the style of {genre}.
35
  Theme: A {genre} song with elements of {' and '.join(additional_elements)}
 
 
 
 
36
 
37
  [Verse 1]"""
38
  return prompt
39
 
40
- def generate_lyrics_with_retry(prompt, max_retries=5, initial_wait=2):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  """Generate lyrics using GPT2-XL with retry logic"""
42
  wait_time = initial_wait
43
 
@@ -49,7 +127,7 @@ def generate_lyrics_with_retry(prompt, max_retries=5, initial_wait=2):
49
  json={
50
  "inputs": prompt,
51
  "parameters": {
52
- "max_new_tokens": 200,
53
  "temperature": 0.9,
54
  "top_p": 0.95,
55
  "do_sample": True,
@@ -66,23 +144,7 @@ def generate_lyrics_with_retry(prompt, max_retries=5, initial_wait=2):
66
  result = response.json()
67
  if isinstance(result, list) and len(result) > 0:
68
  generated_text = result[0].get("generated_text", "")
69
- # Clean up and format the generated text
70
- lines = generated_text.split('\n')
71
- cleaned_lines = []
72
- current_section = "[Verse 1]"
73
-
74
- for line in lines:
75
- line = line.strip()
76
- if line and not line.startswith('###') and not line.startswith('```'):
77
- if line.lower().startswith('[verse') or line.lower().startswith('[chorus'):
78
- current_section = line
79
- cleaned_lines.append(line)
80
-
81
- # Add chorus after first verse if not present
82
- if len(cleaned_lines) == 4 and current_section == "[Verse 1]":
83
- cleaned_lines.append("\n[Chorus]")
84
-
85
- return "\n".join(cleaned_lines)
86
  return "Error: No text generated"
87
  elif response.status_code == 503:
88
  print(f"Model loading, attempt {attempt + 1}/{max_retries}. Waiting {wait_time} seconds...")
@@ -127,14 +189,18 @@ def classify_and_generate(audio_file):
127
  if not token:
128
  return "Error: HF_TOKEN environment variable is not set. Please set your Hugging Face API token."
129
 
 
 
 
 
 
 
 
 
 
 
130
  # Create a temporary file to handle the audio data
131
  with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_audio:
132
- # If audio_file is a tuple (file path and sampling rate)
133
- if isinstance(audio_file, tuple):
134
- audio_path = audio_file[0]
135
- else:
136
- audio_path = audio_file
137
-
138
  # Copy the audio file to our temporary file
139
  shutil.copy2(audio_path, temp_audio.name)
140
 
@@ -163,8 +229,8 @@ def classify_and_generate(audio_file):
163
 
164
  # Generate lyrics based on classification with retry logic
165
  print("Generating lyrics based on classification...")
166
- prompt = create_lyrics_prompt(formatted_results)
167
- lyrics = generate_lyrics_with_retry(prompt)
168
 
169
  # Format and return results
170
  return format_results(formatted_results, lyrics, prompt)
 
6
  import time
7
  import tempfile
8
  import shutil
9
+ import librosa
10
  from transformers import AutoTokenizer, AutoModelForCausalLM
11
 
12
  # Check if CUDA is available and set the device accordingly
 
17
  LYRICS_API_URL = "https://api-inference.huggingface.co/models/gpt2-xl"
18
  headers = {"Authorization": f"Bearer {os.environ.get('HF_TOKEN')}"}
19
 
20
+ def get_audio_duration(audio_path):
21
+ """Get the duration of the audio file in seconds"""
22
+ try:
23
+ duration = librosa.get_duration(path=audio_path)
24
+ return duration
25
+ except Exception as e:
26
+ print(f"Error getting audio duration: {e}")
27
+ return None
28
+
29
+ def calculate_song_structure(duration):
30
+ """Calculate song structure based on audio duration"""
31
+ if duration is None:
32
+ return {"verses": 2, "choruses": 1, "tokens": 200} # Default structure
33
+
34
+ # Basic rules for song structure:
35
+ # - Short clips (< 30s): 1 verse, 1 chorus
36
+ # - Medium clips (30s-2min): 2 verses, 1-2 choruses
37
+ # - Longer clips (>2min): 3 verses, 2-3 choruses
38
+
39
+ if duration < 30:
40
+ return {
41
+ "verses": 1,
42
+ "choruses": 1,
43
+ "tokens": 150
44
+ }
45
+ elif duration < 120:
46
+ return {
47
+ "verses": 2,
48
+ "choruses": 2,
49
+ "tokens": 200
50
+ }
51
+ else:
52
+ return {
53
+ "verses": 3,
54
+ "choruses": 3,
55
+ "tokens": 300
56
+ }
57
 
58
+ def create_lyrics_prompt(classification_results, song_structure):
59
+ """Create a prompt for lyrics generation based on classification results and desired structure"""
60
  # Get the top genre and its characteristics
61
  top_result = classification_results[0]
62
  genre = top_result['label']
 
65
  # Get additional musical elements
66
  additional_elements = [r['label'] for r in classification_results[1:3]]
67
 
68
+ # Create a structured prompt based on song length
69
  prompt = f"""Write song lyrics in the style of {genre}.
70
  Theme: A {genre} song with elements of {' and '.join(additional_elements)}
71
+ Structure: {song_structure['verses']} verses and {song_structure['choruses']} choruses
72
+
73
+ Format the lyrics with [Verse 1], [Chorus], [Verse 2], etc.
74
+ Make each verse 4-6 lines and chorus 4 lines.
75
 
76
  [Verse 1]"""
77
  return prompt
78
 
79
+ def format_lyrics(generated_text, song_structure):
80
+ """Format the generated lyrics according to desired structure"""
81
+ lines = generated_text.split('\n')
82
+ cleaned_lines = []
83
+ current_section = "[Verse 1]"
84
+ verse_count = 0
85
+ chorus_count = 0
86
+
87
+ for line in lines:
88
+ line = line.strip()
89
+ if not line or line.startswith('###') or line.startswith('```'):
90
+ continue
91
+
92
+ # Handle section markers
93
+ if line.lower().startswith('[verse'):
94
+ if verse_count < song_structure['verses']:
95
+ verse_count += 1
96
+ current_section = f"[Verse {verse_count}]"
97
+ cleaned_lines.append(f"\n{current_section}")
98
+ continue
99
+ elif line.lower().startswith('[chorus'):
100
+ if chorus_count < song_structure['choruses']:
101
+ chorus_count += 1
102
+ current_section = f"[Chorus {chorus_count}]"
103
+ cleaned_lines.append(f"\n{current_section}")
104
+ continue
105
+
106
+ # Add the line if we haven't exceeded our structure limits
107
+ if (current_section.startswith('[Verse') and verse_count <= song_structure['verses']) or \
108
+ (current_section.startswith('[Chorus') and chorus_count <= song_structure['choruses']):
109
+ cleaned_lines.append(line)
110
+
111
+ # Add chorus after first verse if not present
112
+ if len(cleaned_lines) == 5 and chorus_count == 0: # After 4 lines of verse + section header
113
+ chorus_count += 1
114
+ cleaned_lines.append(f"\n[Chorus 1]")
115
+
116
+ return "\n".join(cleaned_lines)
117
+
118
+ def generate_lyrics_with_retry(prompt, song_structure, max_retries=5, initial_wait=2):
119
  """Generate lyrics using GPT2-XL with retry logic"""
120
  wait_time = initial_wait
121
 
 
127
  json={
128
  "inputs": prompt,
129
  "parameters": {
130
+ "max_new_tokens": song_structure['tokens'],
131
  "temperature": 0.9,
132
  "top_p": 0.95,
133
  "do_sample": True,
 
144
  result = response.json()
145
  if isinstance(result, list) and len(result) > 0:
146
  generated_text = result[0].get("generated_text", "")
147
+ return format_lyrics(generated_text, song_structure)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
  return "Error: No text generated"
149
  elif response.status_code == 503:
150
  print(f"Model loading, attempt {attempt + 1}/{max_retries}. Waiting {wait_time} seconds...")
 
189
  if not token:
190
  return "Error: HF_TOKEN environment variable is not set. Please set your Hugging Face API token."
191
 
192
+ # Get audio duration and calculate structure
193
+ if isinstance(audio_file, tuple):
194
+ audio_path = audio_file[0]
195
+ else:
196
+ audio_path = audio_file
197
+
198
+ duration = get_audio_duration(audio_path)
199
+ song_structure = calculate_song_structure(duration)
200
+ print(f"Audio duration: {duration:.2f}s, Structure: {song_structure}")
201
+
202
  # Create a temporary file to handle the audio data
203
  with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_audio:
 
 
 
 
 
 
204
  # Copy the audio file to our temporary file
205
  shutil.copy2(audio_path, temp_audio.name)
206
 
 
229
 
230
  # Generate lyrics based on classification with retry logic
231
  print("Generating lyrics based on classification...")
232
+ prompt = create_lyrics_prompt(formatted_results, song_structure)
233
+ lyrics = generate_lyrics_with_retry(prompt, song_structure)
234
 
235
  # Format and return results
236
  return format_results(formatted_results, lyrics, prompt)