root
commited on
Commit
·
14555be
1
Parent(s):
0750c9c
ss
Browse files- DEPLOYMENT.md +0 -42
- app.py +261 -377
- example.py +0 -49
DEPLOYMENT.md
DELETED
@@ -1,42 +0,0 @@
|
|
1 |
-
# Deploying to Hugging Face Spaces
|
2 |
-
|
3 |
-
This guide explains how to deploy the Music Genre Classifier & Lyrics Generator to Hugging Face Spaces.
|
4 |
-
|
5 |
-
## Prerequisites
|
6 |
-
|
7 |
-
1. A Hugging Face account
|
8 |
-
2. Access to the Llama 3.1 8B Instruct model (requires acceptance of the model license)
|
9 |
-
3. A Hugging Face API token
|
10 |
-
|
11 |
-
## Deployment Steps
|
12 |
-
|
13 |
-
### 1. Create a New Space
|
14 |
-
|
15 |
-
1. Go to the Hugging Face website and log in
|
16 |
-
2. Navigate to "Spaces" in the top navigation
|
17 |
-
3. Click "Create new Space"
|
18 |
-
4. Choose "Gradio" as the SDK
|
19 |
-
5. Give your Space a name and description
|
20 |
-
6. Select "T4 GPU" as the hardware
|
21 |
-
|
22 |
-
### 2. Set up Environment Variables
|
23 |
-
|
24 |
-
Set up your Hugging Face access token as an environment variable:
|
25 |
-
|
26 |
-
1. Go to your profile settings in Hugging Face
|
27 |
-
2. Navigate to "Access Tokens" and create a new token with "write" access
|
28 |
-
3. In your Space settings, under "Repository secrets", add a new secret:
|
29 |
-
- Name: `HF_TOKEN`
|
30 |
-
- Value: Your Hugging Face access token
|
31 |
-
|
32 |
-
### 3. Upload the Files
|
33 |
-
|
34 |
-
Upload all the files from this repository to your Space.
|
35 |
-
|
36 |
-
### 4. Wait for Deployment
|
37 |
-
|
38 |
-
Hugging Face will automatically build and deploy your Space. This may take a few minutes, especially since it needs to download the models.
|
39 |
-
|
40 |
-
### 5. Access Your Application
|
41 |
-
|
42 |
-
Once deployed, you can access your application on your Hugging Face Space URL.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.py
CHANGED
@@ -236,126 +236,113 @@ def generate_lyrics(music_analysis, genre, duration):
|
|
236 |
lyric_templates = music_analysis.get("lyric_templates", [])
|
237 |
|
238 |
# Define num_phrases here to ensure it's available in all code paths
|
239 |
-
|
240 |
-
|
241 |
-
# Verify LLM is loaded
|
242 |
-
if llm_model is None or llm_tokenizer is None:
|
243 |
-
return "Error: LLM model not properly loaded"
|
244 |
-
|
245 |
-
# If no templates, fall back to original method
|
246 |
if not lyric_templates:
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
-
|
256 |
-
-
|
257 |
-
|
258 |
-
|
259 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
260 |
else:
|
261 |
# Calculate the typical syllable range for this genre
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
min_syllables = min([t.get('min_expected', 2) for t in lyric_templates]) if lyric_templates[0].get('min_expected') else 2
|
266 |
-
avg_syllables = (min_syllables + max_syllables) // 2
|
267 |
-
else:
|
268 |
-
min_syllables = 2
|
269 |
-
max_syllables = 7
|
270 |
-
avg_syllables = 4
|
271 |
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
|
289 |
-
|
290 |
-
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
-
|
319 |
-
|
320 |
-
CRITICAL INSTRUCTIONS:
|
321 |
-
1. EXTREMELY SHORT LINES: Each line MUST be between {min_syllables}-{max_syllables} syllables MAXIMUM
|
322 |
-
2. ENFORCE BREVITY: NO exceptions to the syllable limit - not a single line should exceed {max_syllables} syllables
|
323 |
-
3. FRAGMENT STYLE: Use sentence fragments and short phrases instead of complete sentences
|
324 |
-
4. CONNECTED THOUGHTS: Use prepositions and conjunctions at the start of lines to connect ideas
|
325 |
-
5. SIMPLE WORDS: Choose one or two-syllable words whenever possible
|
326 |
-
6. CONCRETE IMAGERY: Use specific, tangible details rather than abstract concepts
|
327 |
-
7. NO CLICHÉS: Avoid common phrases like "time slips away" or "memories fade"
|
328 |
-
8. ONE THOUGHT PER LINE: Express just one simple idea in each line
|
329 |
-
9. EMOTION BLEND: Blend both {primary_emotion} and {secondary_emotion} emotions naturally
|
330 |
-
10. THEME WEAVING: Weave both {primary_theme} and {secondary_theme} themes together
|
331 |
-
|
332 |
-
FORMAT:
|
333 |
-
- Write exactly {num_phrases} short text lines
|
334 |
-
- No annotations, explanations, or line numbers
|
335 |
-
- Do not count syllables in the output
|
336 |
-
|
337 |
-
IMPORTANT: If you can't express an idea in {max_syllables} or fewer syllables, break it across two lines or choose a simpler way to express it.
|
338 |
-
|
339 |
-
===== EXAMPLES OF CORRECT LENGTH =====
|
340 |
-
|
341 |
-
Example 1 (short fragments connected by flow):
|
342 |
-
Cold tea cup (3 syllables)
|
343 |
-
on windowsill (3 syllables)
|
344 |
-
cat watches rain (3 syllables)
|
345 |
-
through foggy glass (3 syllables)
|
346 |
-
|
347 |
-
Example 2 (prepositional connections):
|
348 |
-
Keys dropped here (3 syllables)
|
349 |
-
by the front door (3 syllables)
|
350 |
-
where shoes pile up (3 syllables)
|
351 |
-
since you moved in (3 syllables)
|
352 |
-
|
353 |
-
DO NOT copy my examples. Create ENTIRELY NEW lyrics that blend {primary_emotion} and {secondary_emotion} emotions while exploring {primary_theme} and {secondary_theme} themes.
|
354 |
-
|
355 |
-
REMEMBER: NO LINE SHOULD EXCEED {max_syllables} SYLLABLES - this is the most important rule!
|
356 |
-
"""
|
357 |
-
|
358 |
-
# Generate lyrics using the LLM model
|
359 |
messages = [
|
360 |
{"role": "user", "content": prompt}
|
361 |
]
|
@@ -370,14 +357,15 @@ REMEMBER: NO LINE SHOULD EXCEED {max_syllables} SYLLABLES - this is the most imp
|
|
370 |
# Tokenize and move to model device
|
371 |
model_inputs = llm_tokenizer([text], return_tensors="pt").to(llm_model.device)
|
372 |
|
373 |
-
# Generate with optimized parameters
|
374 |
generated_ids = llm_model.generate(
|
375 |
**model_inputs,
|
376 |
-
max_new_tokens=1024
|
377 |
do_sample=True,
|
378 |
-
temperature=0.
|
379 |
-
top_p=0.
|
380 |
-
|
|
|
381 |
pad_token_id=llm_tokenizer.eos_token_id
|
382 |
)
|
383 |
|
@@ -385,303 +373,199 @@ REMEMBER: NO LINE SHOULD EXCEED {max_syllables} SYLLABLES - this is the most imp
|
|
385 |
output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()
|
386 |
lyrics = llm_tokenizer.decode(output_ids, skip_special_tokens=True).strip()
|
387 |
|
388 |
-
#
|
389 |
-
#
|
390 |
|
391 |
-
#
|
392 |
-
divider_patterns = [
|
393 |
-
r'Here are the lyrics:',
|
394 |
-
r'Here is my song:',
|
395 |
-
r'The lyrics:',
|
396 |
-
r'My lyrics:',
|
397 |
-
r'Song lyrics:',
|
398 |
-
r'\*\*\*+',
|
399 |
-
r'===+',
|
400 |
-
r'---+',
|
401 |
-
r'```',
|
402 |
-
r'Lyrics:'
|
403 |
-
]
|
404 |
|
405 |
-
|
406 |
-
|
407 |
-
|
408 |
-
|
409 |
-
lyrics = lyrics[match.end():].strip()
|
410 |
|
411 |
-
# 2.
|
412 |
-
|
413 |
-
|
414 |
-
|
415 |
-
|
416 |
-
|
417 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
418 |
|
419 |
-
# 3. Split
|
420 |
lines = lyrics.strip().split('\n')
|
421 |
clean_lines = []
|
422 |
|
423 |
-
# 4.
|
424 |
-
non_lyric_patterns = [
|
425 |
-
# Meta-commentary
|
426 |
-
r'^(note|thinking|thoughts|let me|i will|i am going|i would|i can|i need to|i have to|i should|let\'s|here|now)',
|
427 |
-
r'^(first|second|third|next|finally|importantly|remember|so|ok|okay|as requested|as asked|considering)',
|
428 |
-
# Explanations
|
429 |
-
r'syllable[s]?|phrase|rhythm|beats?|tempo|bpm|instruction|follow|alignment|match|corresponding',
|
430 |
-
r'verses?|chorus|bridge|section|stanza|part|template|format|pattern|example',
|
431 |
-
r'requirements?|guidelines?|song structure|stressed|unstressed',
|
432 |
-
# Technical language
|
433 |
-
r'generated|output|result|provide|create|write|draft|version',
|
434 |
-
# Annotations and numbering
|
435 |
-
r'^line \d+|^\d+[\.\):]|^\[\w+\]|^[\*\-\+] ',
|
436 |
-
# Questions or analytical statements
|
437 |
-
r'\?$|analysis|evaluate|review|check|ensure',
|
438 |
-
# Instruction-like statements
|
439 |
-
r'make sure|please note|important|notice|pay attention'
|
440 |
-
]
|
441 |
-
|
442 |
-
# 5. Identify which lines are likely actual lyrics vs non-lyrics
|
443 |
for line in lines:
|
444 |
line = line.strip()
|
445 |
-
|
446 |
-
# Skip empty lines or lines with just spaces/tabs
|
447 |
if not line or line.isspace():
|
448 |
continue
|
449 |
|
450 |
-
#
|
451 |
-
|
452 |
-
for pattern in non_lyric_patterns:
|
453 |
-
if re.search(pattern, line.lower()):
|
454 |
-
should_skip = True
|
455 |
-
break
|
456 |
-
|
457 |
-
if should_skip:
|
458 |
-
continue
|
459 |
-
|
460 |
-
# Skip section headers
|
461 |
-
if (line.startswith('[') and ']' in line) or (line.startswith('(') and ')' in line and len(line) < 20):
|
462 |
-
continue
|
463 |
-
|
464 |
-
# Skip lines that look like annotations (not prose-like)
|
465 |
-
if ':' in line and not any(word in line.lower() for word in ['like', 'when', 'where', 'how', 'why', 'what']):
|
466 |
-
if len(line.split(':')[0]) < 15: # Short prefixes followed by colon are likely annotations
|
467 |
-
continue
|
468 |
|
469 |
-
|
470 |
-
if len(line) < 3:
|
471 |
-
continue
|
472 |
|
473 |
-
#
|
474 |
-
if re.match(r'^\
|
475 |
continue
|
476 |
|
477 |
-
# Skip
|
478 |
-
if
|
|
|
|
|
|
|
|
|
479 |
continue
|
480 |
|
481 |
-
# Skip
|
482 |
-
if
|
483 |
continue
|
484 |
-
|
485 |
-
# Add this line as it passed all filters
|
486 |
-
clean_lines.append(line)
|
487 |
-
|
488 |
-
# 6. Additional block-level filters for common patterns
|
489 |
-
# Check beginning of lyrics for common prefixes
|
490 |
-
if clean_lines and any(clean_lines[0].lower().startswith(prefix) for prefix in
|
491 |
-
['here are', 'these are', 'below are', 'following are']):
|
492 |
-
clean_lines = clean_lines[1:] # Skip the first line
|
493 |
-
|
494 |
-
# 7. Process blocks of lines to detect explanation blocks
|
495 |
-
if len(clean_lines) > 3:
|
496 |
-
# Check for explanation blocks at the beginning
|
497 |
-
first_three = ' '.join(clean_lines[:3]).lower()
|
498 |
-
if any(term in first_three for term in ['i will', 'i have created', 'i\'ll provide', 'i\'ll write']):
|
499 |
-
# This looks like an explanation, skip the first few lines
|
500 |
-
start_idx = 0
|
501 |
-
for i, line in enumerate(clean_lines):
|
502 |
-
if i >= 3 and not any(term in line.lower() for term in ['i will', 'created', 'write', 'provide']):
|
503 |
-
start_idx = i
|
504 |
-
break
|
505 |
-
clean_lines = clean_lines[start_idx:]
|
506 |
|
507 |
-
#
|
508 |
-
|
509 |
-
if
|
510 |
-
|
511 |
-
|
512 |
-
|
513 |
-
|
514 |
-
|
515 |
-
|
516 |
-
|
517 |
-
|
518 |
-
|
519 |
-
# 8. Cleanup - Remove remaining annotations or thinking
|
520 |
-
for i in range(len(clean_lines)):
|
521 |
# Remove trailing thoughts/annotations
|
522 |
-
|
523 |
-
|
524 |
-
|
525 |
-
# Remove thinking tags completely
|
526 |
-
clean_lines[i] = re.sub(r'<think>.*?</think>', '', clean_lines[i], flags=re.DOTALL)
|
527 |
-
clean_lines[i] = re.sub(r'\[thinking\].*?\[/thinking\]', '', clean_lines[i], flags=re.DOTALL)
|
528 |
-
clean_lines[i] = re.sub(r'<think>', '', clean_lines[i])
|
529 |
-
clean_lines[i] = re.sub(r'</think>', '', clean_lines[i])
|
530 |
-
clean_lines[i] = re.sub(r'\[thinking\]', '', clean_lines[i])
|
531 |
-
clean_lines[i] = re.sub(r'\[/thinking\]', '', clean_lines[i])
|
532 |
|
533 |
# Remove syllable count annotations
|
534 |
-
|
|
|
|
|
|
|
|
|
535 |
|
536 |
-
|
537 |
-
clean_lines = [line for line in clean_lines if line.strip() and not line.isspace()]
|
538 |
|
539 |
-
#
|
540 |
-
# This is a critical step to ensure no line exceeds our max syllable count
|
541 |
if lyric_templates:
|
542 |
-
max_allowed_syllables =
|
|
|
543 |
else:
|
544 |
max_allowed_syllables = 6
|
545 |
-
|
546 |
-
|
547 |
-
|
548 |
-
|
549 |
-
cliched_patterns = [
|
550 |
-
r'moonlight (shimmers?|falls?|dances?)',
|
551 |
-
r'shadows? (dance|play|fall|stretch)',
|
552 |
-
r'time slips? away',
|
553 |
-
r'whispers? (fade|in the)',
|
554 |
-
r'silence speaks',
|
555 |
-
r'stars? shine',
|
556 |
-
r'hearts? beat',
|
557 |
-
r'footsteps (fade|echo)',
|
558 |
-
r'gentle wind',
|
559 |
-
r'(old|empty) (roads?|chair)',
|
560 |
-
r'night (holds?|falls?)',
|
561 |
-
r'memories fade',
|
562 |
-
r'dreams (linger|drift)'
|
563 |
-
]
|
564 |
-
|
565 |
-
cliche_count = 0
|
566 |
for line in clean_lines:
|
567 |
-
|
568 |
-
|
569 |
-
cliche_count += 1
|
570 |
-
break
|
571 |
-
|
572 |
-
# Calculate percentage of clichéd lines
|
573 |
-
if clean_lines:
|
574 |
-
cliche_percentage = (cliche_count / len(clean_lines)) * 100
|
575 |
-
else:
|
576 |
-
cliche_percentage = 0
|
577 |
|
578 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
579 |
if lyric_templates:
|
580 |
num_required = len(lyric_templates)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
581 |
|
582 |
-
|
583 |
-
|
584 |
-
|
585 |
-
|
586 |
-
|
587 |
-
# If we don't have enough lines, generate placeholders that fit the syllable count
|
588 |
-
while len(clean_lines) < num_required:
|
589 |
-
i = len(clean_lines)
|
590 |
-
if i < len(lyric_templates):
|
591 |
-
template = lyric_templates[i]
|
592 |
-
target_syllables = min(max_allowed_syllables - 1, (template.get('min_expected', 2) + template.get('max_expected', 6)) // 2)
|
593 |
|
594 |
-
|
595 |
-
|
596 |
-
specific_placeholders = {
|
597 |
-
# 2-3 syllables - specific, concrete phrases
|
598 |
-
2: [
|
599 |
-
"Phone rings twice",
|
600 |
-
"Dogs bark loud",
|
601 |
-
"Keys dropped here",
|
602 |
-
"Train rolls by",
|
603 |
-
"Birds take flight"
|
604 |
-
],
|
605 |
-
# 3-4 syllables - specific contexts
|
606 |
-
3: [
|
607 |
-
"Coffee gets cold",
|
608 |
-
"Fan blades spin",
|
609 |
-
"Pages turn slow",
|
610 |
-
"Neighbors talk",
|
611 |
-
"Radio hums soft"
|
612 |
-
],
|
613 |
-
# 4-5 syllables - specific details
|
614 |
-
4: [
|
615 |
-
"Fingers tap table",
|
616 |
-
"Taxi waits in rain",
|
617 |
-
"Laptop screen blinks",
|
618 |
-
"Ring left on sink",
|
619 |
-
"Church bells ring loud"
|
620 |
-
],
|
621 |
-
# 5-6 syllables - context rich
|
622 |
-
5: [
|
623 |
-
"Letters with no stamps",
|
624 |
-
"Watch shows wrong time",
|
625 |
-
"Jeans with torn knees",
|
626 |
-
"Dog barks next door",
|
627 |
-
"Smoke alarm beeps"
|
628 |
-
]
|
629 |
-
}
|
630 |
|
631 |
-
|
632 |
-
|
633 |
-
if primary_theme.lower() in ["love", "relationship", "romance"]:
|
634 |
-
theme_specific = ["Lipstick on glass", "Text left on read", "Scent on your coat"]
|
635 |
-
elif primary_theme.lower() in ["loss", "grief", "sadness"]:
|
636 |
-
theme_specific = ["Chair sits empty", "Photos face down", "Clothes in closet"]
|
637 |
-
elif primary_theme.lower() in ["hope", "inspiration", "triumph"]:
|
638 |
-
theme_specific = ["Seeds start to grow", "Finish line waits", "New day breaks through"]
|
639 |
|
640 |
-
#
|
641 |
-
|
642 |
-
|
643 |
-
|
644 |
-
all_placeholders = specific_placeholders[closest_group] + theme_specific
|
645 |
-
|
646 |
-
# Choose a placeholder that hasn't been used yet
|
647 |
-
available_placeholders = [p for p in all_placeholders if p not in clean_lines]
|
648 |
-
|
649 |
-
if available_placeholders:
|
650 |
-
# Use modulo for more variation
|
651 |
-
idx = (i * 17 + len(clean_lines) * 13) % len(available_placeholders)
|
652 |
-
placeholder = available_placeholders[idx]
|
653 |
else:
|
654 |
-
|
655 |
-
|
656 |
-
verbs = ["waits", "moves", "stops", "falls", "breaks", "turns", "sleeps"]
|
657 |
-
|
658 |
-
# Ensure randomness with seed that changes with each call
|
659 |
-
import random
|
660 |
-
random.seed(len(clean_lines) * 27 + i * 31)
|
661 |
-
|
662 |
-
subj = random.choice(subjects)
|
663 |
-
verb = random.choice(verbs)
|
664 |
-
|
665 |
-
placeholder = f"{subj} {verb}"
|
666 |
else:
|
667 |
-
|
668 |
-
|
669 |
-
|
670 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
671 |
# Assemble final lyrics
|
672 |
final_lyrics = '\n'.join(clean_lines)
|
673 |
-
|
674 |
-
# Add a warning if we detected too many clichés
|
675 |
-
if cliche_percentage >= 40:
|
676 |
-
final_lyrics = f"""WARNING: These lyrics contain several overused phrases and clichés.
|
677 |
-
Try regenerating for more original content.
|
678 |
|
679 |
-
|
680 |
-
|
681 |
-
|
682 |
-
|
683 |
-
return "The model generated only thinking content but no actual lyrics. Please try again."
|
684 |
-
|
685 |
return final_lyrics
|
686 |
|
687 |
except Exception as e:
|
|
|
236 |
lyric_templates = music_analysis.get("lyric_templates", [])
|
237 |
|
238 |
# Define num_phrases here to ensure it's available in all code paths
|
239 |
+
# Also define syllable limits for the prompt
|
|
|
|
|
|
|
|
|
|
|
|
|
240 |
if not lyric_templates:
|
241 |
+
num_phrases_for_prompt = 4 # Default if no templates
|
242 |
+
min_syl_for_prompt = 2
|
243 |
+
max_syl_for_prompt = 7
|
244 |
+
|
245 |
+
prompt = (f'''You are a professional songwriter. Write song lyrics for a {genre} song.
|
246 |
+
|
247 |
+
SONG DETAILS:
|
248 |
+
- Key: {key} {mode}
|
249 |
+
- Tempo: {tempo} BPM
|
250 |
+
- Primary emotion: {primary_emotion}
|
251 |
+
- Secondary emotion: {secondary_emotion}
|
252 |
+
- Primary theme: {primary_theme}
|
253 |
+
- Secondary theme: {secondary_theme}
|
254 |
+
|
255 |
+
CRITICAL REQUIREMENTS (MOST IMPORTANT):
|
256 |
+
- You MUST write EXACTLY {num_phrases_for_prompt} lines of lyrics.
|
257 |
+
- Number each lyric line starting from 1 up to {num_phrases_for_prompt}. For example:
|
258 |
+
1. First lyric line.
|
259 |
+
2. Second lyric line.
|
260 |
+
...
|
261 |
+
{num_phrases_for_prompt}. The final lyric line.
|
262 |
+
- Each numbered line (after removing the number and period) MUST be {min_syl_for_prompt}-{max_syl_for_prompt} syllables MAXIMUM.
|
263 |
+
- NO line's content (after removing the number) can exceed {max_syl_for_prompt} syllables. This is EXTREMELY IMPORTANT.
|
264 |
+
- Count syllables carefully for the content of each numbered line.
|
265 |
+
- Use SHORT WORDS and SHORT PHRASES for the content of each numbered line.
|
266 |
+
- Break long thoughts into multiple numbered lines.
|
267 |
+
|
268 |
+
CREATIVITY GUIDELINES:
|
269 |
+
- Create original, vivid imagery that captures the emotions.
|
270 |
+
- Use concrete, sensory details (what you see, hear, feel, touch).
|
271 |
+
- Avoid clichés and common phrases.
|
272 |
+
- Draw inspiration from the specific themes and emotions listed above.
|
273 |
+
- Think about unique moments, specific objects, or personal details.
|
274 |
+
- Use unexpected word combinations.
|
275 |
+
- Focus on the particular mood created by {primary_emotion} and {secondary_emotion}.
|
276 |
+
|
277 |
+
STYLE FOR SHORT LINES (for the content of each numbered line):
|
278 |
+
- Use brief, impactful phrases.
|
279 |
+
- Focus on single images or moments per line.
|
280 |
+
- Choose simple, everyday words.
|
281 |
+
- Let each line paint one clear picture.
|
282 |
+
|
283 |
+
ABSOLUTELY NO placeholders like [line], [moment], [breath], [phrase], [word], etc.
|
284 |
+
|
285 |
+
OUTPUT FORMAT:
|
286 |
+
Under the "LYRICS:" heading, provide exactly {num_phrases_for_prompt} numbered lyric lines.
|
287 |
+
|
288 |
+
LYRICS:
|
289 |
+
(Your {num_phrases_for_prompt} numbered lyric lines go here, each starting with its number, a period, and a space)
|
290 |
+
|
291 |
+
Remember: Output EXACTLY {num_phrases_for_prompt} numbered lyric lines. Each line's content (after removing the number) must be {min_syl_for_prompt}-{max_syl_for_prompt} syllables.''')
|
292 |
else:
|
293 |
# Calculate the typical syllable range for this genre
|
294 |
+
num_phrases_for_prompt = len(lyric_templates)
|
295 |
+
max_syl_for_prompt = max([t.get('max_expected', 7) for t in lyric_templates]) if lyric_templates and lyric_templates[0].get('max_expected') else 7
|
296 |
+
min_syl_for_prompt = min([t.get('min_expected', 2) for t in lyric_templates]) if lyric_templates and lyric_templates[0].get('min_expected') else 2
|
|
|
|
|
|
|
|
|
|
|
|
|
297 |
|
298 |
+
prompt = (f'''You are a professional songwriter. Write song lyrics for a {genre} song.
|
299 |
+
|
300 |
+
SONG DETAILS:
|
301 |
+
- Key: {key} {mode}
|
302 |
+
- Tempo: {tempo} BPM
|
303 |
+
- Primary emotion: {primary_emotion}
|
304 |
+
- Secondary emotion: {secondary_emotion}
|
305 |
+
- Primary theme: {primary_theme}
|
306 |
+
- Secondary theme: {secondary_theme}
|
307 |
+
|
308 |
+
CRITICAL REQUIREMENTS (MOST IMPORTANT):
|
309 |
+
- You MUST write EXACTLY {num_phrases_for_prompt} lines of lyrics.
|
310 |
+
- Number each lyric line starting from 1 up to {num_phrases_for_prompt}. For example:
|
311 |
+
1. First lyric line.
|
312 |
+
2. Second lyric line.
|
313 |
+
...
|
314 |
+
{num_phrases_for_prompt}. The final lyric line.
|
315 |
+
- Each numbered line (after removing the number and period) MUST be {min_syl_for_prompt}-{max_syl_for_prompt} syllables MAXIMUM.
|
316 |
+
- NO line's content (after removing the number) can exceed {max_syl_for_prompt} syllables. This is EXTREMELY IMPORTANT.
|
317 |
+
- Count syllables carefully for the content of each numbered line.
|
318 |
+
- Use SHORT WORDS and SHORT PHRASES for the content of each numbered line.
|
319 |
+
- Break long thoughts into multiple numbered lines.
|
320 |
+
|
321 |
+
CREATIVITY GUIDELINES:
|
322 |
+
- Create original, vivid imagery that captures the emotions.
|
323 |
+
- Use concrete, sensory details (what you see, hear, feel, touch).
|
324 |
+
- Avoid clichés and common phrases.
|
325 |
+
- Draw inspiration from the specific themes and emotions listed above.
|
326 |
+
- Think about unique moments, specific objects, or personal details.
|
327 |
+
- Use unexpected word combinations.
|
328 |
+
- Focus on the particular mood created by {primary_emotion} and {secondary_emotion}.
|
329 |
+
|
330 |
+
STYLE FOR SHORT LINES (for the content of each numbered line):
|
331 |
+
- Use brief, impactful phrases.
|
332 |
+
- Focus on single images or moments per line.
|
333 |
+
- Choose simple, everyday words.
|
334 |
+
- Let each line paint one clear picture.
|
335 |
+
|
336 |
+
ABSOLUTELY NO placeholders like [line], [moment], [breath], [phrase], [word], etc.
|
337 |
+
|
338 |
+
OUTPUT FORMAT:
|
339 |
+
Under the "LYRICS:" heading, provide exactly {num_phrases_for_prompt} numbered lyric lines.
|
340 |
+
|
341 |
+
LYRICS:
|
342 |
+
(Your {num_phrases_for_prompt} numbered lyric lines go here, each starting with its number, a period, and a space)
|
343 |
+
|
344 |
+
Remember: Output EXACTLY {num_phrases_for_prompt} numbered lyric lines. Each line's content (after removing the number) must be {min_syl_for_prompt}-{max_syl_for_prompt} syllables.''')
|
345 |
+
# Generate with optimized parameters for QwQ model
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
346 |
messages = [
|
347 |
{"role": "user", "content": prompt}
|
348 |
]
|
|
|
357 |
# Tokenize and move to model device
|
358 |
model_inputs = llm_tokenizer([text], return_tensors="pt").to(llm_model.device)
|
359 |
|
360 |
+
# Generate with optimized parameters for QwQ model
|
361 |
generated_ids = llm_model.generate(
|
362 |
**model_inputs,
|
363 |
+
max_new_tokens=2048, # Increased from 1024 to give QwQ more room
|
364 |
do_sample=True,
|
365 |
+
temperature=0.6, # QwQ recommended setting
|
366 |
+
top_p=0.95, # QwQ recommended setting
|
367 |
+
top_k=30, # QwQ recommended range 20-40
|
368 |
+
repetition_penalty=1.1, # Reduced to allow some repetition if needed
|
369 |
pad_token_id=llm_tokenizer.eos_token_id
|
370 |
)
|
371 |
|
|
|
373 |
output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()
|
374 |
lyrics = llm_tokenizer.decode(output_ids, skip_special_tokens=True).strip()
|
375 |
|
376 |
+
# ENHANCED CLEANING FOR QWQ MODEL - IMPROVED APPROACH
|
377 |
+
# ---------------------------------------------------
|
378 |
|
379 |
+
# QwQ often includes thinking process - we need to extract only the final lyrics
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
380 |
|
381 |
+
# 1. First, remove any thinking tags completely (QwQ specific)
|
382 |
+
lyrics = re.sub(r'<think>.*?</think>', '', lyrics, flags=re.DOTALL | re.IGNORECASE)
|
383 |
+
lyrics = re.sub(r'<think>', '', lyrics, flags=re.IGNORECASE)
|
384 |
+
lyrics = re.sub(r'</think>', '', lyrics, flags=re.IGNORECASE)
|
|
|
385 |
|
386 |
+
# 2. Look for the LYRICS: section specifically
|
387 |
+
lyrics_section_match = re.search(r'LYRICS:\s*\n(.*?)(?:\n\n|\Z)', lyrics, re.DOTALL | re.IGNORECASE)
|
388 |
+
if lyrics_section_match:
|
389 |
+
lyrics = lyrics_section_match.group(1).strip()
|
390 |
+
else:
|
391 |
+
# Fallback: look for other common transitions that indicate the start of actual lyrics
|
392 |
+
lyric_start_patterns = [
|
393 |
+
r'(?:here (?:are )?(?:the )?lyrics?:?|lyrics?:?|my lyrics?:?|song lyrics?:?)\s*',
|
394 |
+
r'(?:here (?:is )?(?:a )?song:?|here (?:is )?my song:?)\s*',
|
395 |
+
r'(?:\*{3,}|\={3,}|\-{3,})\s*',
|
396 |
+
r'(?:final lyrics?:?|the lyrics?:?)\s*',
|
397 |
+
r'```\s*'
|
398 |
+
]
|
399 |
+
|
400 |
+
# Try to find where actual lyrics start
|
401 |
+
lyrics_start_pos = 0
|
402 |
+
for pattern in lyric_start_patterns:
|
403 |
+
match = re.search(pattern, lyrics, re.IGNORECASE)
|
404 |
+
if match:
|
405 |
+
lyrics_start_pos = max(lyrics_start_pos, match.end())
|
406 |
+
|
407 |
+
# Keep content from the identified start position
|
408 |
+
if lyrics_start_pos > 0:
|
409 |
+
lyrics = lyrics[lyrics_start_pos:].strip()
|
410 |
|
411 |
+
# 3. Split into lines and apply basic filtering
|
412 |
lines = lyrics.strip().split('\n')
|
413 |
clean_lines = []
|
414 |
|
415 |
+
# 4. Simple filtering - keep only actual lyric lines
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
416 |
for line in lines:
|
417 |
line = line.strip()
|
|
|
|
|
418 |
if not line or line.isspace():
|
419 |
continue
|
420 |
|
421 |
+
# Strip leading numbers like "1. ", "2. ", etc.
|
422 |
+
line = re.sub(r'^\d+\.\s*', '', line)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
423 |
|
424 |
+
line_lower = line.lower()
|
|
|
|
|
425 |
|
426 |
+
# Remove placeholder lines - more comprehensive pattern
|
427 |
+
if re.match(r'^\[ *(line|moment|breath|phrase|word|sound) *\]$', line_lower):
|
428 |
continue
|
429 |
|
430 |
+
# Skip lines that are clearly not lyrics (simplified filtering)
|
431 |
+
if any(phrase in line_lower for phrase in [
|
432 |
+
'line 1', 'line 2', 'line 3',
|
433 |
+
'thinking', 'lyrics:', 'format:', 'etc...', 'commentary',
|
434 |
+
'syllables', 'requirements', 'output', 'provide'
|
435 |
+
]):
|
436 |
continue
|
437 |
|
438 |
+
# Skip numbered annotations
|
439 |
+
if re.match(r'^\d+[\.\):]|^\[.*\]$', line):
|
440 |
continue
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
441 |
|
442 |
+
# Keep lines that look like actual lyrics (not too long, not too technical)
|
443 |
+
words = line.split()
|
444 |
+
if 1 <= len(words) <= 8 and not any(tech_word in line_lower for tech_word in [
|
445 |
+
'syllable', 'beat', 'tempo', 'analysis', 'format', 'section'
|
446 |
+
]):
|
447 |
+
clean_lines.append(line)
|
448 |
+
|
449 |
+
# 5. Additional cleanup for QwQ-specific issues
|
450 |
+
# Remove any remaining thinking fragments
|
451 |
+
final_clean_lines = []
|
452 |
+
for line in clean_lines:
|
|
|
|
|
|
|
453 |
# Remove trailing thoughts/annotations
|
454 |
+
line = re.sub(r'\s+//.*$', '', line)
|
455 |
+
line = re.sub(r'\s+\(.*?\)$', '', line)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
456 |
|
457 |
# Remove syllable count annotations
|
458 |
+
line = re.sub(r'\s*\(\d+\s*syllables?\)', '', line, flags=re.IGNORECASE)
|
459 |
+
|
460 |
+
# Skip if the line became empty after cleaning
|
461 |
+
if line.strip():
|
462 |
+
final_clean_lines.append(line.strip())
|
463 |
|
464 |
+
clean_lines = final_clean_lines
|
|
|
465 |
|
466 |
+
# AGGRESSIVE SYLLABLE ENFORCEMENT - This is critical for beat matching
|
|
|
467 |
if lyric_templates:
|
468 |
+
max_allowed_syllables = max([t.get('max_expected', 6) for t in lyric_templates])
|
469 |
+
min_allowed_syllables = min([t.get('min_expected', 2) for t in lyric_templates])
|
470 |
else:
|
471 |
max_allowed_syllables = 6
|
472 |
+
min_allowed_syllables = 2
|
473 |
+
|
474 |
+
# Enforce syllable limits on every line
|
475 |
+
syllable_enforced_lines = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
476 |
for line in clean_lines:
|
477 |
+
words = line.split()
|
478 |
+
current_syllables = sum(beat_analyzer.count_syllables(word) for word in words)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
479 |
|
480 |
+
# If line is within limits, keep it
|
481 |
+
if min_allowed_syllables <= current_syllables <= max_allowed_syllables:
|
482 |
+
syllable_enforced_lines.append(line)
|
483 |
+
# If line is too long, we need to split it intelligently
|
484 |
+
elif current_syllables > max_allowed_syllables:
|
485 |
+
# Try to split into multiple shorter lines
|
486 |
+
current_line = []
|
487 |
+
current_count = 0
|
488 |
+
|
489 |
+
for word in words:
|
490 |
+
word_syllables = beat_analyzer.count_syllables(word)
|
491 |
+
|
492 |
+
# If adding this word would exceed limit, start new line
|
493 |
+
if current_count + word_syllables > max_allowed_syllables and current_line:
|
494 |
+
syllable_enforced_lines.append(" ".join(current_line))
|
495 |
+
current_line = [word]
|
496 |
+
current_count = word_syllables
|
497 |
+
else:
|
498 |
+
# Add the word to the current line
|
499 |
+
current_line.append(word)
|
500 |
+
current_count += word_syllables
|
501 |
+
|
502 |
+
# Add the remaining words as final line
|
503 |
+
if current_line and current_count >= min_allowed_syllables:
|
504 |
+
syllable_enforced_lines.append(" ".join(current_line))
|
505 |
+
# Skip lines that are too short
|
506 |
+
|
507 |
+
clean_lines = syllable_enforced_lines
|
508 |
+
|
509 |
+
# Get required number of lines
|
510 |
if lyric_templates:
|
511 |
num_required = len(lyric_templates)
|
512 |
+
else:
|
513 |
+
num_required = 4
|
514 |
+
|
515 |
+
# IMPORTANT: Adjust line count to match requirement
|
516 |
+
if len(clean_lines) > num_required:
|
517 |
+
# Too many lines - try to merge adjacent short lines first
|
518 |
+
merged_lines = []
|
519 |
+
i = 0
|
520 |
|
521 |
+
while i < len(clean_lines) and len(merged_lines) < num_required:
|
522 |
+
if i + 1 < len(clean_lines) and len(merged_lines) < num_required - 1:
|
523 |
+
# Check if we can merge current and next line
|
524 |
+
line1 = clean_lines[i]
|
525 |
+
line2 = clean_lines[i + 1]
|
|
|
|
|
|
|
|
|
|
|
|
|
526 |
|
527 |
+
words1 = line1.split()
|
528 |
+
words2 = line2.split()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
529 |
|
530 |
+
syllables1 = sum(beat_analyzer.count_syllables(word) for word in words1)
|
531 |
+
syllables2 = sum(beat_analyzer.count_syllables(word) for word in words2)
|
|
|
|
|
|
|
|
|
|
|
|
|
532 |
|
533 |
+
# If merging would stay within limits, merge them
|
534 |
+
if syllables1 + syllables2 <= max_allowed_syllables:
|
535 |
+
merged_lines.append(line1 + " " + line2)
|
536 |
+
i += 2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
537 |
else:
|
538 |
+
merged_lines.append(line1)
|
539 |
+
i += 1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
540 |
else:
|
541 |
+
merged_lines.append(clean_lines[i])
|
542 |
+
i += 1
|
543 |
+
|
544 |
+
# If still too many, truncate to required number
|
545 |
+
clean_lines = merged_lines[:num_required]
|
546 |
+
|
547 |
+
elif len(clean_lines) < num_required:
|
548 |
+
# Too few lines - this is a generation failure
|
549 |
+
# Instead of error, try to pad with empty lines or regenerate
|
550 |
+
# For now, let's return an error message
|
551 |
+
return f"Error: The model generated {len(clean_lines)} lines but {num_required} were required. Please try again."
|
552 |
+
|
553 |
+
# Final check - ensure we have exactly the required number
|
554 |
+
if len(clean_lines) != num_required:
|
555 |
+
# If we still don't have the right number, truncate or pad
|
556 |
+
if len(clean_lines) > num_required:
|
557 |
+
clean_lines = clean_lines[:num_required]
|
558 |
+
else:
|
559 |
+
# This shouldn't happen with the above logic, but just in case
|
560 |
+
return f"Error: Could not generate exactly {num_required} lines. Please try again."
|
561 |
+
|
562 |
# Assemble final lyrics
|
563 |
final_lyrics = '\n'.join(clean_lines)
|
|
|
|
|
|
|
|
|
|
|
564 |
|
565 |
+
# Final sanity check - if we have nothing or very little, return an error
|
566 |
+
if not final_lyrics or len(final_lyrics.strip()) < 15:
|
567 |
+
return "The model output appears to be mostly thinking content. Please try regenerating for cleaner lyrics."
|
568 |
+
|
|
|
|
|
569 |
return final_lyrics
|
570 |
|
571 |
except Exception as e:
|
example.py
DELETED
@@ -1,49 +0,0 @@
|
|
1 |
-
import os
|
2 |
-
import sys
|
3 |
-
from app import process_audio, music_analyzer
|
4 |
-
|
5 |
-
def main():
|
6 |
-
"""
|
7 |
-
Example function to demonstrate the application with a sample audio file.
|
8 |
-
|
9 |
-
Usage:
|
10 |
-
python example.py <path_to_audio_file>
|
11 |
-
"""
|
12 |
-
if len(sys.argv) != 2:
|
13 |
-
print("Usage: python example.py <path_to_audio_file>")
|
14 |
-
return
|
15 |
-
|
16 |
-
audio_file = sys.argv[1]
|
17 |
-
if not os.path.exists(audio_file):
|
18 |
-
print(f"Error: File {audio_file} does not exist.")
|
19 |
-
return
|
20 |
-
|
21 |
-
print(f"Processing audio file: {audio_file}")
|
22 |
-
|
23 |
-
# Call the main processing function
|
24 |
-
genre_results, lyrics = process_audio(audio_file)
|
25 |
-
|
26 |
-
# Get emotion analysis results
|
27 |
-
emotion_results = music_analyzer.analyze_music(audio_file)
|
28 |
-
|
29 |
-
# Print results
|
30 |
-
print("\n" + "="*50)
|
31 |
-
print("GENRE CLASSIFICATION RESULTS:")
|
32 |
-
print("="*50)
|
33 |
-
print(genre_results)
|
34 |
-
|
35 |
-
print("\n" + "="*50)
|
36 |
-
print("EMOTION ANALYSIS RESULTS:")
|
37 |
-
print("="*50)
|
38 |
-
print(f"Tempo: {emotion_results['summary']['tempo']:.1f} BPM")
|
39 |
-
print(f"Key: {emotion_results['summary']['key']} {emotion_results['summary']['mode']}")
|
40 |
-
print(f"Primary Emotion: {emotion_results['summary']['primary_emotion']}")
|
41 |
-
print(f"Primary Theme: {emotion_results['summary']['primary_theme']}")
|
42 |
-
|
43 |
-
print("\n" + "="*50)
|
44 |
-
print("GENERATED LYRICS:")
|
45 |
-
print("="*50)
|
46 |
-
print(lyrics)
|
47 |
-
|
48 |
-
if __name__ == "__main__":
|
49 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|