root commited on
Commit
14555be
·
1 Parent(s): 0750c9c
Files changed (3) hide show
  1. DEPLOYMENT.md +0 -42
  2. app.py +261 -377
  3. example.py +0 -49
DEPLOYMENT.md DELETED
@@ -1,42 +0,0 @@
1
- # Deploying to Hugging Face Spaces
2
-
3
- This guide explains how to deploy the Music Genre Classifier & Lyrics Generator to Hugging Face Spaces.
4
-
5
- ## Prerequisites
6
-
7
- 1. A Hugging Face account
8
- 2. Access to the Llama 3.1 8B Instruct model (requires acceptance of the model license)
9
- 3. A Hugging Face API token
10
-
11
- ## Deployment Steps
12
-
13
- ### 1. Create a New Space
14
-
15
- 1. Go to the Hugging Face website and log in
16
- 2. Navigate to "Spaces" in the top navigation
17
- 3. Click "Create new Space"
18
- 4. Choose "Gradio" as the SDK
19
- 5. Give your Space a name and description
20
- 6. Select "T4 GPU" as the hardware
21
-
22
- ### 2. Set up Environment Variables
23
-
24
- Set up your Hugging Face access token as an environment variable:
25
-
26
- 1. Go to your profile settings in Hugging Face
27
- 2. Navigate to "Access Tokens" and create a new token with "write" access
28
- 3. In your Space settings, under "Repository secrets", add a new secret:
29
- - Name: `HF_TOKEN`
30
- - Value: Your Hugging Face access token
31
-
32
- ### 3. Upload the Files
33
-
34
- Upload all the files from this repository to your Space.
35
-
36
- ### 4. Wait for Deployment
37
-
38
- Hugging Face will automatically build and deploy your Space. This may take a few minutes, especially since it needs to download the models.
39
-
40
- ### 5. Access Your Application
41
-
42
- Once deployed, you can access your application on your Hugging Face Space URL.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py CHANGED
@@ -236,126 +236,113 @@ def generate_lyrics(music_analysis, genre, duration):
236
  lyric_templates = music_analysis.get("lyric_templates", [])
237
 
238
  # Define num_phrases here to ensure it's available in all code paths
239
- num_phrases = len(lyric_templates) if lyric_templates else 4
240
-
241
- # Verify LLM is loaded
242
- if llm_model is None or llm_tokenizer is None:
243
- return "Error: LLM model not properly loaded"
244
-
245
- # If no templates, fall back to original method
246
  if not lyric_templates:
247
- # Enhanced prompt with both emotions and themes
248
- prompt = f"""Write song lyrics for a {genre} song in {key} {mode} with tempo {tempo} BPM.
249
-
250
- EMOTIONS:
251
- - Primary: {primary_emotion}
252
- - Secondary: {secondary_emotion}
253
-
254
- THEMES:
255
- - Primary: {primary_theme}
256
- - Secondary: {secondary_theme}
257
-
258
- ONLY WRITE THE ACTUAL LYRICS. NO EXPLANATIONS OR META-TEXT.
259
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
260
  else:
261
  # Calculate the typical syllable range for this genre
262
- if num_phrases > 0:
263
- # Get max syllables per line from templates
264
- max_syllables = max([t.get('max_expected', 7) for t in lyric_templates]) if lyric_templates[0].get('max_expected') else 7
265
- min_syllables = min([t.get('min_expected', 2) for t in lyric_templates]) if lyric_templates[0].get('min_expected') else 2
266
- avg_syllables = (min_syllables + max_syllables) // 2
267
- else:
268
- min_syllables = 2
269
- max_syllables = 7
270
- avg_syllables = 4
271
 
272
- # Create random examples based on the song's themes and emotions
273
- # to avoid the LLM copying our examples directly
274
- example_themes = [
275
- {"emotion": "love", "fragments": ["I see your face", "across the room", "my heart beats fast", "can't look away"]},
276
- {"emotion": "sadness", "fragments": ["tears fall like rain", "on empty streets", "memories fade", "into the dark"]},
277
- {"emotion": "nostalgia", "fragments": ["old photographs", "dusty and worn", "remind me of when", "we were young"]},
278
- {"emotion": "hope", "fragments": ["dawn breaks through clouds", "new day begins", "darkness recedes", "light fills my soul"]},
279
- {"emotion": "longing", "fragments": ["miles apart now", "under same stars", "thinking of you", "across the distance"]}
280
- ]
281
-
282
- # Select a theme that doesn't match the song's emotions to avoid copying
283
- selected_themes = [t for t in example_themes if t["emotion"].lower() not in [primary_emotion.lower(), secondary_emotion.lower()]]
284
- if not selected_themes:
285
- selected_themes = example_themes
286
-
287
- import random
288
- example_theme = random.choice(selected_themes)
289
- example_fragments = example_theme["fragments"]
290
- random.shuffle(example_fragments) # Randomize order
291
-
292
- # Create example 1 - grammatical connection with conjunction
293
- ex1_line1 = example_fragments[0] if len(example_fragments) > 0 else "The morning sun"
294
- ex1_line2 = example_fragments[1] if len(example_fragments) > 1 else "breaks through clouds"
295
- ex1_line3 = example_fragments[2] if len(example_fragments) > 2 else "as birds begin"
296
- ex1_line4 = example_fragments[3] if len(example_fragments) > 3 else "their dawn chorus"
297
-
298
- # Create example 2 - prepositional connection
299
- ex2_fragments = [
300
- "She walks alone",
301
- "through crowded streets",
302
- "with memories",
303
- "of better days"
304
- ]
305
- random.shuffle(ex2_fragments)
306
-
307
- # Create a more direct prompt with examples and specific syllable count guidance
308
- prompt = f"""Write song lyrics for a {genre} song in {key} {mode} with tempo {tempo} BPM.
309
-
310
- EMOTIONS:
311
- - Primary: {primary_emotion}
312
- - Secondary: {secondary_emotion}
313
-
314
- THEMES:
315
- - Primary: {primary_theme}
316
- - Secondary: {secondary_theme}
317
-
318
- I need EXACTLY {num_phrases} lines of lyrics with these STRICT requirements:
319
-
320
- CRITICAL INSTRUCTIONS:
321
- 1. EXTREMELY SHORT LINES: Each line MUST be between {min_syllables}-{max_syllables} syllables MAXIMUM
322
- 2. ENFORCE BREVITY: NO exceptions to the syllable limit - not a single line should exceed {max_syllables} syllables
323
- 3. FRAGMENT STYLE: Use sentence fragments and short phrases instead of complete sentences
324
- 4. CONNECTED THOUGHTS: Use prepositions and conjunctions at the start of lines to connect ideas
325
- 5. SIMPLE WORDS: Choose one or two-syllable words whenever possible
326
- 6. CONCRETE IMAGERY: Use specific, tangible details rather than abstract concepts
327
- 7. NO CLICHÉS: Avoid common phrases like "time slips away" or "memories fade"
328
- 8. ONE THOUGHT PER LINE: Express just one simple idea in each line
329
- 9. EMOTION BLEND: Blend both {primary_emotion} and {secondary_emotion} emotions naturally
330
- 10. THEME WEAVING: Weave both {primary_theme} and {secondary_theme} themes together
331
-
332
- FORMAT:
333
- - Write exactly {num_phrases} short text lines
334
- - No annotations, explanations, or line numbers
335
- - Do not count syllables in the output
336
-
337
- IMPORTANT: If you can't express an idea in {max_syllables} or fewer syllables, break it across two lines or choose a simpler way to express it.
338
-
339
- ===== EXAMPLES OF CORRECT LENGTH =====
340
-
341
- Example 1 (short fragments connected by flow):
342
- Cold tea cup (3 syllables)
343
- on windowsill (3 syllables)
344
- cat watches rain (3 syllables)
345
- through foggy glass (3 syllables)
346
-
347
- Example 2 (prepositional connections):
348
- Keys dropped here (3 syllables)
349
- by the front door (3 syllables)
350
- where shoes pile up (3 syllables)
351
- since you moved in (3 syllables)
352
-
353
- DO NOT copy my examples. Create ENTIRELY NEW lyrics that blend {primary_emotion} and {secondary_emotion} emotions while exploring {primary_theme} and {secondary_theme} themes.
354
-
355
- REMEMBER: NO LINE SHOULD EXCEED {max_syllables} SYLLABLES - this is the most important rule!
356
- """
357
-
358
- # Generate lyrics using the LLM model
359
  messages = [
360
  {"role": "user", "content": prompt}
361
  ]
@@ -370,14 +357,15 @@ REMEMBER: NO LINE SHOULD EXCEED {max_syllables} SYLLABLES - this is the most imp
370
  # Tokenize and move to model device
371
  model_inputs = llm_tokenizer([text], return_tensors="pt").to(llm_model.device)
372
 
373
- # Generate with optimized parameters
374
  generated_ids = llm_model.generate(
375
  **model_inputs,
376
- max_new_tokens=1024,
377
  do_sample=True,
378
- temperature=0.7,
379
- top_p=0.9,
380
- repetition_penalty=1.2,
 
381
  pad_token_id=llm_tokenizer.eos_token_id
382
  )
383
 
@@ -385,303 +373,199 @@ REMEMBER: NO LINE SHOULD EXCEED {max_syllables} SYLLABLES - this is the most imp
385
  output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()
386
  lyrics = llm_tokenizer.decode(output_ids, skip_special_tokens=True).strip()
387
 
388
- # ULTRA AGGRESSIVE CLEANING - COMPLETELY REVISED
389
- # ------------------------------------------------
390
 
391
- # 1. First, look for any standard dividers that might separate thinking from lyrics
392
- divider_patterns = [
393
- r'Here are the lyrics:',
394
- r'Here is my song:',
395
- r'The lyrics:',
396
- r'My lyrics:',
397
- r'Song lyrics:',
398
- r'\*\*\*+',
399
- r'===+',
400
- r'---+',
401
- r'```',
402
- r'Lyrics:'
403
- ]
404
 
405
- for pattern in divider_patterns:
406
- matches = re.finditer(pattern, lyrics, re.IGNORECASE)
407
- for match in matches:
408
- # Keep only content after the divider
409
- lyrics = lyrics[match.end():].strip()
410
 
411
- # 2. Remove thinking tags completely before splitting into lines
412
- lyrics = re.sub(r'<think>.*?</think>', '', lyrics, flags=re.DOTALL)
413
- lyrics = re.sub(r'\[thinking\].*?\[/thinking\]', '', lyrics, flags=re.DOTALL)
414
- lyrics = re.sub(r'<think>', '', lyrics, flags=re.DOTALL)
415
- lyrics = re.sub(r'</think>', '', lyrics, flags=re.DOTALL)
416
- lyrics = re.sub(r'\[thinking\]', '', lyrics, flags=re.DOTALL)
417
- lyrics = re.sub(r'\[/thinking\]', '', lyrics, flags=re.DOTALL)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
418
 
419
- # 3. Split text into lines for aggressive line-by-line filtering
420
  lines = lyrics.strip().split('\n')
421
  clean_lines = []
422
 
423
- # 4. Define comprehensive patterns for non-lyrical content
424
- non_lyric_patterns = [
425
- # Meta-commentary
426
- r'^(note|thinking|thoughts|let me|i will|i am going|i would|i can|i need to|i have to|i should|let\'s|here|now)',
427
- r'^(first|second|third|next|finally|importantly|remember|so|ok|okay|as requested|as asked|considering)',
428
- # Explanations
429
- r'syllable[s]?|phrase|rhythm|beats?|tempo|bpm|instruction|follow|alignment|match|corresponding',
430
- r'verses?|chorus|bridge|section|stanza|part|template|format|pattern|example',
431
- r'requirements?|guidelines?|song structure|stressed|unstressed',
432
- # Technical language
433
- r'generated|output|result|provide|create|write|draft|version',
434
- # Annotations and numbering
435
- r'^line \d+|^\d+[\.\):]|^\[\w+\]|^[\*\-\+] ',
436
- # Questions or analytical statements
437
- r'\?$|analysis|evaluate|review|check|ensure',
438
- # Instruction-like statements
439
- r'make sure|please note|important|notice|pay attention'
440
- ]
441
-
442
- # 5. Identify which lines are likely actual lyrics vs non-lyrics
443
  for line in lines:
444
  line = line.strip()
445
-
446
- # Skip empty lines or lines with just spaces/tabs
447
  if not line or line.isspace():
448
  continue
449
 
450
- # Skip lines that match any non-lyric pattern
451
- should_skip = False
452
- for pattern in non_lyric_patterns:
453
- if re.search(pattern, line.lower()):
454
- should_skip = True
455
- break
456
-
457
- if should_skip:
458
- continue
459
-
460
- # Skip section headers
461
- if (line.startswith('[') and ']' in line) or (line.startswith('(') and ')' in line and len(line) < 20):
462
- continue
463
-
464
- # Skip lines that look like annotations (not prose-like)
465
- if ':' in line and not any(word in line.lower() for word in ['like', 'when', 'where', 'how', 'why', 'what']):
466
- if len(line.split(':')[0]) < 15: # Short prefixes followed by colon are likely annotations
467
- continue
468
 
469
- # Skip very short lines that aren't likely to be lyrics (unless it's just a few words which could be valid)
470
- if len(line) < 3:
471
- continue
472
 
473
- # Skip lines that are numbered or bulleted
474
- if re.match(r'^\d+\.|\(#\d+\)|\d+\)', line):
475
  continue
476
 
477
- # Skip markdown-style emphasis or headers
478
- if re.match(r'^#{1,6} |^\*\*|^__', line):
 
 
 
 
479
  continue
480
 
481
- # Skip lines with think tags
482
- if '<think>' in line.lower() or '</think>' in line.lower() or '[thinking]' in line.lower() or '[/thinking]' in line.lower():
483
  continue
484
-
485
- # Add this line as it passed all filters
486
- clean_lines.append(line)
487
-
488
- # 6. Additional block-level filters for common patterns
489
- # Check beginning of lyrics for common prefixes
490
- if clean_lines and any(clean_lines[0].lower().startswith(prefix) for prefix in
491
- ['here are', 'these are', 'below are', 'following are']):
492
- clean_lines = clean_lines[1:] # Skip the first line
493
-
494
- # 7. Process blocks of lines to detect explanation blocks
495
- if len(clean_lines) > 3:
496
- # Check for explanation blocks at the beginning
497
- first_three = ' '.join(clean_lines[:3]).lower()
498
- if any(term in first_three for term in ['i will', 'i have created', 'i\'ll provide', 'i\'ll write']):
499
- # This looks like an explanation, skip the first few lines
500
- start_idx = 0
501
- for i, line in enumerate(clean_lines):
502
- if i >= 3 and not any(term in line.lower() for term in ['i will', 'created', 'write', 'provide']):
503
- start_idx = i
504
- break
505
- clean_lines = clean_lines[start_idx:]
506
 
507
- # Check for explanation blocks at the end
508
- last_three = ' '.join(clean_lines[-3:]).lower()
509
- if any(term in last_three for term in ['hope this', 'these lyrics', 'as you can see', 'this song', 'i have']):
510
- # This looks like an explanation at the end, truncate
511
- end_idx = len(clean_lines)
512
- for i in range(len(clean_lines) - 1, max(0, len(clean_lines) - 4), -1):
513
- if i < len(clean_lines) and not any(term in clean_lines[i].lower() for term in
514
- ['hope', 'these lyrics', 'as you can see', 'this song']):
515
- end_idx = i + 1
516
- break
517
- clean_lines = clean_lines[:end_idx]
518
-
519
- # 8. Cleanup - Remove remaining annotations or thinking
520
- for i in range(len(clean_lines)):
521
  # Remove trailing thoughts/annotations
522
- clean_lines[i] = re.sub(r'\s+//.*$', '', clean_lines[i])
523
- clean_lines[i] = re.sub(r'\s+\(.*?\)$', '', clean_lines[i])
524
-
525
- # Remove thinking tags completely
526
- clean_lines[i] = re.sub(r'<think>.*?</think>', '', clean_lines[i], flags=re.DOTALL)
527
- clean_lines[i] = re.sub(r'\[thinking\].*?\[/thinking\]', '', clean_lines[i], flags=re.DOTALL)
528
- clean_lines[i] = re.sub(r'<think>', '', clean_lines[i])
529
- clean_lines[i] = re.sub(r'</think>', '', clean_lines[i])
530
- clean_lines[i] = re.sub(r'\[thinking\]', '', clean_lines[i])
531
- clean_lines[i] = re.sub(r'\[/thinking\]', '', clean_lines[i])
532
 
533
  # Remove syllable count annotations
534
- clean_lines[i] = re.sub(r'\s*\(\d+\s*syllables?\)', '', clean_lines[i])
 
 
 
 
535
 
536
- # 9. Filter out any remaining empty lines after tag removal
537
- clean_lines = [line for line in clean_lines if line.strip() and not line.isspace()]
538
 
539
- # 10. NEW: Apply strict syllable enforcement - split or truncate lines that are too long
540
- # This is a critical step to ensure no line exceeds our max syllable count
541
  if lyric_templates:
542
- max_allowed_syllables = min(7, max([t.get('max_expected', 6) for t in lyric_templates]))
 
543
  else:
544
  max_allowed_syllables = 6
545
-
546
- clean_lines = enforce_syllable_limits(clean_lines, max_allowed_syllables)
547
-
548
- # 11. NEW: Check for template copying or clichéd phrases
549
- cliched_patterns = [
550
- r'moonlight (shimmers?|falls?|dances?)',
551
- r'shadows? (dance|play|fall|stretch)',
552
- r'time slips? away',
553
- r'whispers? (fade|in the)',
554
- r'silence speaks',
555
- r'stars? shine',
556
- r'hearts? beat',
557
- r'footsteps (fade|echo)',
558
- r'gentle wind',
559
- r'(old|empty) (roads?|chair)',
560
- r'night (holds?|falls?)',
561
- r'memories fade',
562
- r'dreams (linger|drift)'
563
- ]
564
-
565
- cliche_count = 0
566
  for line in clean_lines:
567
- for pattern in cliched_patterns:
568
- if re.search(pattern, line.lower()):
569
- cliche_count += 1
570
- break
571
-
572
- # Calculate percentage of clichéd lines
573
- if clean_lines:
574
- cliche_percentage = (cliche_count / len(clean_lines)) * 100
575
- else:
576
- cliche_percentage = 0
577
 
578
- # 12. If we have lyric templates, ensure we have the correct number of lines
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
579
  if lyric_templates:
580
  num_required = len(lyric_templates)
 
 
 
 
 
 
 
 
581
 
582
- # If we have too many lines, keep just the best ones
583
- if len(clean_lines) > num_required:
584
- # Keep the first num_required lines
585
- clean_lines = clean_lines[:num_required]
586
-
587
- # If we don't have enough lines, generate placeholders that fit the syllable count
588
- while len(clean_lines) < num_required:
589
- i = len(clean_lines)
590
- if i < len(lyric_templates):
591
- template = lyric_templates[i]
592
- target_syllables = min(max_allowed_syllables - 1, (template.get('min_expected', 2) + template.get('max_expected', 6)) // 2)
593
 
594
- # Generate more creative, contextual placeholders with specificity
595
- # Avoid clichés like "moonlight shimmers" or "time slips away"
596
- specific_placeholders = {
597
- # 2-3 syllables - specific, concrete phrases
598
- 2: [
599
- "Phone rings twice",
600
- "Dogs bark loud",
601
- "Keys dropped here",
602
- "Train rolls by",
603
- "Birds take flight"
604
- ],
605
- # 3-4 syllables - specific contexts
606
- 3: [
607
- "Coffee gets cold",
608
- "Fan blades spin",
609
- "Pages turn slow",
610
- "Neighbors talk",
611
- "Radio hums soft"
612
- ],
613
- # 4-5 syllables - specific details
614
- 4: [
615
- "Fingers tap table",
616
- "Taxi waits in rain",
617
- "Laptop screen blinks",
618
- "Ring left on sink",
619
- "Church bells ring loud"
620
- ],
621
- # 5-6 syllables - context rich
622
- 5: [
623
- "Letters with no stamps",
624
- "Watch shows wrong time",
625
- "Jeans with torn knees",
626
- "Dog barks next door",
627
- "Smoke alarm beeps"
628
- ]
629
- }
630
 
631
- # Make theme and emotion specific placeholders to add to the list
632
- theme_specific = []
633
- if primary_theme.lower() in ["love", "relationship", "romance"]:
634
- theme_specific = ["Lipstick on glass", "Text left on read", "Scent on your coat"]
635
- elif primary_theme.lower() in ["loss", "grief", "sadness"]:
636
- theme_specific = ["Chair sits empty", "Photos face down", "Clothes in closet"]
637
- elif primary_theme.lower() in ["hope", "inspiration", "triumph"]:
638
- theme_specific = ["Seeds start to grow", "Finish line waits", "New day breaks through"]
639
 
640
- # Get the closest matching syllable group
641
- closest_group = min(specific_placeholders.keys(), key=lambda k: abs(k - target_syllables))
642
-
643
- # Create pool of available placeholders from both specific and theme specific options
644
- all_placeholders = specific_placeholders[closest_group] + theme_specific
645
-
646
- # Choose a placeholder that hasn't been used yet
647
- available_placeholders = [p for p in all_placeholders if p not in clean_lines]
648
-
649
- if available_placeholders:
650
- # Use modulo for more variation
651
- idx = (i * 17 + len(clean_lines) * 13) % len(available_placeholders)
652
- placeholder = available_placeholders[idx]
653
  else:
654
- # If we've used all placeholders, create something random and specific
655
- subjects = ["Car", "Dog", "Kid", "Clock", "Phone", "Tree", "Book", "Door", "Light"]
656
- verbs = ["waits", "moves", "stops", "falls", "breaks", "turns", "sleeps"]
657
-
658
- # Ensure randomness with seed that changes with each call
659
- import random
660
- random.seed(len(clean_lines) * 27 + i * 31)
661
-
662
- subj = random.choice(subjects)
663
- verb = random.choice(verbs)
664
-
665
- placeholder = f"{subj} {verb}"
666
  else:
667
- placeholder = "Page turns slow"
668
-
669
- clean_lines.append(placeholder)
670
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
671
  # Assemble final lyrics
672
  final_lyrics = '\n'.join(clean_lines)
673
-
674
- # Add a warning if we detected too many clichés
675
- if cliche_percentage >= 40:
676
- final_lyrics = f"""WARNING: These lyrics contain several overused phrases and clichés.
677
- Try regenerating for more original content.
678
 
679
- {final_lyrics}"""
680
-
681
- # 13. Final sanity check - if we have nothing or garbage, return an error
682
- if not final_lyrics or len(final_lyrics) < 10:
683
- return "The model generated only thinking content but no actual lyrics. Please try again."
684
-
685
  return final_lyrics
686
 
687
  except Exception as e:
 
236
  lyric_templates = music_analysis.get("lyric_templates", [])
237
 
238
  # Define num_phrases here to ensure it's available in all code paths
239
+ # Also define syllable limits for the prompt
 
 
 
 
 
 
240
  if not lyric_templates:
241
+ num_phrases_for_prompt = 4 # Default if no templates
242
+ min_syl_for_prompt = 2
243
+ max_syl_for_prompt = 7
244
+
245
+ prompt = (f'''You are a professional songwriter. Write song lyrics for a {genre} song.
246
+
247
+ SONG DETAILS:
248
+ - Key: {key} {mode}
249
+ - Tempo: {tempo} BPM
250
+ - Primary emotion: {primary_emotion}
251
+ - Secondary emotion: {secondary_emotion}
252
+ - Primary theme: {primary_theme}
253
+ - Secondary theme: {secondary_theme}
254
+
255
+ CRITICAL REQUIREMENTS (MOST IMPORTANT):
256
+ - You MUST write EXACTLY {num_phrases_for_prompt} lines of lyrics.
257
+ - Number each lyric line starting from 1 up to {num_phrases_for_prompt}. For example:
258
+ 1. First lyric line.
259
+ 2. Second lyric line.
260
+ ...
261
+ {num_phrases_for_prompt}. The final lyric line.
262
+ - Each numbered line (after removing the number and period) MUST be {min_syl_for_prompt}-{max_syl_for_prompt} syllables MAXIMUM.
263
+ - NO line's content (after removing the number) can exceed {max_syl_for_prompt} syllables. This is EXTREMELY IMPORTANT.
264
+ - Count syllables carefully for the content of each numbered line.
265
+ - Use SHORT WORDS and SHORT PHRASES for the content of each numbered line.
266
+ - Break long thoughts into multiple numbered lines.
267
+
268
+ CREATIVITY GUIDELINES:
269
+ - Create original, vivid imagery that captures the emotions.
270
+ - Use concrete, sensory details (what you see, hear, feel, touch).
271
+ - Avoid clichés and common phrases.
272
+ - Draw inspiration from the specific themes and emotions listed above.
273
+ - Think about unique moments, specific objects, or personal details.
274
+ - Use unexpected word combinations.
275
+ - Focus on the particular mood created by {primary_emotion} and {secondary_emotion}.
276
+
277
+ STYLE FOR SHORT LINES (for the content of each numbered line):
278
+ - Use brief, impactful phrases.
279
+ - Focus on single images or moments per line.
280
+ - Choose simple, everyday words.
281
+ - Let each line paint one clear picture.
282
+
283
+ ABSOLUTELY NO placeholders like [line], [moment], [breath], [phrase], [word], etc.
284
+
285
+ OUTPUT FORMAT:
286
+ Under the "LYRICS:" heading, provide exactly {num_phrases_for_prompt} numbered lyric lines.
287
+
288
+ LYRICS:
289
+ (Your {num_phrases_for_prompt} numbered lyric lines go here, each starting with its number, a period, and a space)
290
+
291
+ Remember: Output EXACTLY {num_phrases_for_prompt} numbered lyric lines. Each line's content (after removing the number) must be {min_syl_for_prompt}-{max_syl_for_prompt} syllables.''')
292
  else:
293
  # Calculate the typical syllable range for this genre
294
+ num_phrases_for_prompt = len(lyric_templates)
295
+ max_syl_for_prompt = max([t.get('max_expected', 7) for t in lyric_templates]) if lyric_templates and lyric_templates[0].get('max_expected') else 7
296
+ min_syl_for_prompt = min([t.get('min_expected', 2) for t in lyric_templates]) if lyric_templates and lyric_templates[0].get('min_expected') else 2
 
 
 
 
 
 
297
 
298
+ prompt = (f'''You are a professional songwriter. Write song lyrics for a {genre} song.
299
+
300
+ SONG DETAILS:
301
+ - Key: {key} {mode}
302
+ - Tempo: {tempo} BPM
303
+ - Primary emotion: {primary_emotion}
304
+ - Secondary emotion: {secondary_emotion}
305
+ - Primary theme: {primary_theme}
306
+ - Secondary theme: {secondary_theme}
307
+
308
+ CRITICAL REQUIREMENTS (MOST IMPORTANT):
309
+ - You MUST write EXACTLY {num_phrases_for_prompt} lines of lyrics.
310
+ - Number each lyric line starting from 1 up to {num_phrases_for_prompt}. For example:
311
+ 1. First lyric line.
312
+ 2. Second lyric line.
313
+ ...
314
+ {num_phrases_for_prompt}. The final lyric line.
315
+ - Each numbered line (after removing the number and period) MUST be {min_syl_for_prompt}-{max_syl_for_prompt} syllables MAXIMUM.
316
+ - NO line's content (after removing the number) can exceed {max_syl_for_prompt} syllables. This is EXTREMELY IMPORTANT.
317
+ - Count syllables carefully for the content of each numbered line.
318
+ - Use SHORT WORDS and SHORT PHRASES for the content of each numbered line.
319
+ - Break long thoughts into multiple numbered lines.
320
+
321
+ CREATIVITY GUIDELINES:
322
+ - Create original, vivid imagery that captures the emotions.
323
+ - Use concrete, sensory details (what you see, hear, feel, touch).
324
+ - Avoid clichés and common phrases.
325
+ - Draw inspiration from the specific themes and emotions listed above.
326
+ - Think about unique moments, specific objects, or personal details.
327
+ - Use unexpected word combinations.
328
+ - Focus on the particular mood created by {primary_emotion} and {secondary_emotion}.
329
+
330
+ STYLE FOR SHORT LINES (for the content of each numbered line):
331
+ - Use brief, impactful phrases.
332
+ - Focus on single images or moments per line.
333
+ - Choose simple, everyday words.
334
+ - Let each line paint one clear picture.
335
+
336
+ ABSOLUTELY NO placeholders like [line], [moment], [breath], [phrase], [word], etc.
337
+
338
+ OUTPUT FORMAT:
339
+ Under the "LYRICS:" heading, provide exactly {num_phrases_for_prompt} numbered lyric lines.
340
+
341
+ LYRICS:
342
+ (Your {num_phrases_for_prompt} numbered lyric lines go here, each starting with its number, a period, and a space)
343
+
344
+ Remember: Output EXACTLY {num_phrases_for_prompt} numbered lyric lines. Each line's content (after removing the number) must be {min_syl_for_prompt}-{max_syl_for_prompt} syllables.''')
345
+ # Generate with optimized parameters for QwQ model
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
346
  messages = [
347
  {"role": "user", "content": prompt}
348
  ]
 
357
  # Tokenize and move to model device
358
  model_inputs = llm_tokenizer([text], return_tensors="pt").to(llm_model.device)
359
 
360
+ # Generate with optimized parameters for QwQ model
361
  generated_ids = llm_model.generate(
362
  **model_inputs,
363
+ max_new_tokens=2048, # Increased from 1024 to give QwQ more room
364
  do_sample=True,
365
+ temperature=0.6, # QwQ recommended setting
366
+ top_p=0.95, # QwQ recommended setting
367
+ top_k=30, # QwQ recommended range 20-40
368
+ repetition_penalty=1.1, # Reduced to allow some repetition if needed
369
  pad_token_id=llm_tokenizer.eos_token_id
370
  )
371
 
 
373
  output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()
374
  lyrics = llm_tokenizer.decode(output_ids, skip_special_tokens=True).strip()
375
 
376
+ # ENHANCED CLEANING FOR QWQ MODEL - IMPROVED APPROACH
377
+ # ---------------------------------------------------
378
 
379
+ # QwQ often includes thinking process - we need to extract only the final lyrics
 
 
 
 
 
 
 
 
 
 
 
 
380
 
381
+ # 1. First, remove any thinking tags completely (QwQ specific)
382
+ lyrics = re.sub(r'<think>.*?</think>', '', lyrics, flags=re.DOTALL | re.IGNORECASE)
383
+ lyrics = re.sub(r'<think>', '', lyrics, flags=re.IGNORECASE)
384
+ lyrics = re.sub(r'</think>', '', lyrics, flags=re.IGNORECASE)
 
385
 
386
+ # 2. Look for the LYRICS: section specifically
387
+ lyrics_section_match = re.search(r'LYRICS:\s*\n(.*?)(?:\n\n|\Z)', lyrics, re.DOTALL | re.IGNORECASE)
388
+ if lyrics_section_match:
389
+ lyrics = lyrics_section_match.group(1).strip()
390
+ else:
391
+ # Fallback: look for other common transitions that indicate the start of actual lyrics
392
+ lyric_start_patterns = [
393
+ r'(?:here (?:are )?(?:the )?lyrics?:?|lyrics?:?|my lyrics?:?|song lyrics?:?)\s*',
394
+ r'(?:here (?:is )?(?:a )?song:?|here (?:is )?my song:?)\s*',
395
+ r'(?:\*{3,}|\={3,}|\-{3,})\s*',
396
+ r'(?:final lyrics?:?|the lyrics?:?)\s*',
397
+ r'```\s*'
398
+ ]
399
+
400
+ # Try to find where actual lyrics start
401
+ lyrics_start_pos = 0
402
+ for pattern in lyric_start_patterns:
403
+ match = re.search(pattern, lyrics, re.IGNORECASE)
404
+ if match:
405
+ lyrics_start_pos = max(lyrics_start_pos, match.end())
406
+
407
+ # Keep content from the identified start position
408
+ if lyrics_start_pos > 0:
409
+ lyrics = lyrics[lyrics_start_pos:].strip()
410
 
411
+ # 3. Split into lines and apply basic filtering
412
  lines = lyrics.strip().split('\n')
413
  clean_lines = []
414
 
415
+ # 4. Simple filtering - keep only actual lyric lines
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
416
  for line in lines:
417
  line = line.strip()
 
 
418
  if not line or line.isspace():
419
  continue
420
 
421
+ # Strip leading numbers like "1. ", "2. ", etc.
422
+ line = re.sub(r'^\d+\.\s*', '', line)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
423
 
424
+ line_lower = line.lower()
 
 
425
 
426
+ # Remove placeholder lines - more comprehensive pattern
427
+ if re.match(r'^\[ *(line|moment|breath|phrase|word|sound) *\]$', line_lower):
428
  continue
429
 
430
+ # Skip lines that are clearly not lyrics (simplified filtering)
431
+ if any(phrase in line_lower for phrase in [
432
+ 'line 1', 'line 2', 'line 3',
433
+ 'thinking', 'lyrics:', 'format:', 'etc...', 'commentary',
434
+ 'syllables', 'requirements', 'output', 'provide'
435
+ ]):
436
  continue
437
 
438
+ # Skip numbered annotations
439
+ if re.match(r'^\d+[\.\):]|^\[.*\]$', line):
440
  continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
441
 
442
+ # Keep lines that look like actual lyrics (not too long, not too technical)
443
+ words = line.split()
444
+ if 1 <= len(words) <= 8 and not any(tech_word in line_lower for tech_word in [
445
+ 'syllable', 'beat', 'tempo', 'analysis', 'format', 'section'
446
+ ]):
447
+ clean_lines.append(line)
448
+
449
+ # 5. Additional cleanup for QwQ-specific issues
450
+ # Remove any remaining thinking fragments
451
+ final_clean_lines = []
452
+ for line in clean_lines:
 
 
 
453
  # Remove trailing thoughts/annotations
454
+ line = re.sub(r'\s+//.*$', '', line)
455
+ line = re.sub(r'\s+\(.*?\)$', '', line)
 
 
 
 
 
 
 
 
456
 
457
  # Remove syllable count annotations
458
+ line = re.sub(r'\s*\(\d+\s*syllables?\)', '', line, flags=re.IGNORECASE)
459
+
460
+ # Skip if the line became empty after cleaning
461
+ if line.strip():
462
+ final_clean_lines.append(line.strip())
463
 
464
+ clean_lines = final_clean_lines
 
465
 
466
+ # AGGRESSIVE SYLLABLE ENFORCEMENT - This is critical for beat matching
 
467
  if lyric_templates:
468
+ max_allowed_syllables = max([t.get('max_expected', 6) for t in lyric_templates])
469
+ min_allowed_syllables = min([t.get('min_expected', 2) for t in lyric_templates])
470
  else:
471
  max_allowed_syllables = 6
472
+ min_allowed_syllables = 2
473
+
474
+ # Enforce syllable limits on every line
475
+ syllable_enforced_lines = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
476
  for line in clean_lines:
477
+ words = line.split()
478
+ current_syllables = sum(beat_analyzer.count_syllables(word) for word in words)
 
 
 
 
 
 
 
 
479
 
480
+ # If line is within limits, keep it
481
+ if min_allowed_syllables <= current_syllables <= max_allowed_syllables:
482
+ syllable_enforced_lines.append(line)
483
+ # If line is too long, we need to split it intelligently
484
+ elif current_syllables > max_allowed_syllables:
485
+ # Try to split into multiple shorter lines
486
+ current_line = []
487
+ current_count = 0
488
+
489
+ for word in words:
490
+ word_syllables = beat_analyzer.count_syllables(word)
491
+
492
+ # If adding this word would exceed limit, start new line
493
+ if current_count + word_syllables > max_allowed_syllables and current_line:
494
+ syllable_enforced_lines.append(" ".join(current_line))
495
+ current_line = [word]
496
+ current_count = word_syllables
497
+ else:
498
+ # Add the word to the current line
499
+ current_line.append(word)
500
+ current_count += word_syllables
501
+
502
+ # Add the remaining words as final line
503
+ if current_line and current_count >= min_allowed_syllables:
504
+ syllable_enforced_lines.append(" ".join(current_line))
505
+ # Skip lines that are too short
506
+
507
+ clean_lines = syllable_enforced_lines
508
+
509
+ # Get required number of lines
510
  if lyric_templates:
511
  num_required = len(lyric_templates)
512
+ else:
513
+ num_required = 4
514
+
515
+ # IMPORTANT: Adjust line count to match requirement
516
+ if len(clean_lines) > num_required:
517
+ # Too many lines - try to merge adjacent short lines first
518
+ merged_lines = []
519
+ i = 0
520
 
521
+ while i < len(clean_lines) and len(merged_lines) < num_required:
522
+ if i + 1 < len(clean_lines) and len(merged_lines) < num_required - 1:
523
+ # Check if we can merge current and next line
524
+ line1 = clean_lines[i]
525
+ line2 = clean_lines[i + 1]
 
 
 
 
 
 
526
 
527
+ words1 = line1.split()
528
+ words2 = line2.split()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
529
 
530
+ syllables1 = sum(beat_analyzer.count_syllables(word) for word in words1)
531
+ syllables2 = sum(beat_analyzer.count_syllables(word) for word in words2)
 
 
 
 
 
 
532
 
533
+ # If merging would stay within limits, merge them
534
+ if syllables1 + syllables2 <= max_allowed_syllables:
535
+ merged_lines.append(line1 + " " + line2)
536
+ i += 2
 
 
 
 
 
 
 
 
 
537
  else:
538
+ merged_lines.append(line1)
539
+ i += 1
 
 
 
 
 
 
 
 
 
 
540
  else:
541
+ merged_lines.append(clean_lines[i])
542
+ i += 1
543
+
544
+ # If still too many, truncate to required number
545
+ clean_lines = merged_lines[:num_required]
546
+
547
+ elif len(clean_lines) < num_required:
548
+ # Too few lines - this is a generation failure
549
+ # Instead of error, try to pad with empty lines or regenerate
550
+ # For now, let's return an error message
551
+ return f"Error: The model generated {len(clean_lines)} lines but {num_required} were required. Please try again."
552
+
553
+ # Final check - ensure we have exactly the required number
554
+ if len(clean_lines) != num_required:
555
+ # If we still don't have the right number, truncate or pad
556
+ if len(clean_lines) > num_required:
557
+ clean_lines = clean_lines[:num_required]
558
+ else:
559
+ # This shouldn't happen with the above logic, but just in case
560
+ return f"Error: Could not generate exactly {num_required} lines. Please try again."
561
+
562
  # Assemble final lyrics
563
  final_lyrics = '\n'.join(clean_lines)
 
 
 
 
 
564
 
565
+ # Final sanity check - if we have nothing or very little, return an error
566
+ if not final_lyrics or len(final_lyrics.strip()) < 15:
567
+ return "The model output appears to be mostly thinking content. Please try regenerating for cleaner lyrics."
568
+
 
 
569
  return final_lyrics
570
 
571
  except Exception as e:
example.py DELETED
@@ -1,49 +0,0 @@
1
- import os
2
- import sys
3
- from app import process_audio, music_analyzer
4
-
5
- def main():
6
- """
7
- Example function to demonstrate the application with a sample audio file.
8
-
9
- Usage:
10
- python example.py <path_to_audio_file>
11
- """
12
- if len(sys.argv) != 2:
13
- print("Usage: python example.py <path_to_audio_file>")
14
- return
15
-
16
- audio_file = sys.argv[1]
17
- if not os.path.exists(audio_file):
18
- print(f"Error: File {audio_file} does not exist.")
19
- return
20
-
21
- print(f"Processing audio file: {audio_file}")
22
-
23
- # Call the main processing function
24
- genre_results, lyrics = process_audio(audio_file)
25
-
26
- # Get emotion analysis results
27
- emotion_results = music_analyzer.analyze_music(audio_file)
28
-
29
- # Print results
30
- print("\n" + "="*50)
31
- print("GENRE CLASSIFICATION RESULTS:")
32
- print("="*50)
33
- print(genre_results)
34
-
35
- print("\n" + "="*50)
36
- print("EMOTION ANALYSIS RESULTS:")
37
- print("="*50)
38
- print(f"Tempo: {emotion_results['summary']['tempo']:.1f} BPM")
39
- print(f"Key: {emotion_results['summary']['key']} {emotion_results['summary']['mode']}")
40
- print(f"Primary Emotion: {emotion_results['summary']['primary_emotion']}")
41
- print(f"Primary Theme: {emotion_results['summary']['primary_theme']}")
42
-
43
- print("\n" + "="*50)
44
- print("GENERATED LYRICS:")
45
- print("="*50)
46
- print(lyrics)
47
-
48
- if __name__ == "__main__":
49
- main()