Spaces:

jacob-c
/

largermodel_lyrics_generation

Paused

App Files Files Community

root commited on May 28

Commit

14555be

1 Parent(s): 0750c9c

ss

Browse files

Files changed (3) hide show

DEPLOYMENT.md +0 -42
app.py +261 -377
example.py +0 -49

DEPLOYMENT.md DELETED Viewed

@@ -1,42 +0,0 @@
-# Deploying to Hugging Face Spaces
-This guide explains how to deploy the Music Genre Classifier & Lyrics Generator to Hugging Face Spaces.
-## Prerequisites
-1. A Hugging Face account
-2. Access to the Llama 3.1 8B Instruct model (requires acceptance of the model license)
-3. A Hugging Face API token
-## Deployment Steps
-### 1. Create a New Space
-1. Go to the Hugging Face website and log in
-2. Navigate to "Spaces" in the top navigation
-3. Click "Create new Space"
-4. Choose "Gradio" as the SDK
-5. Give your Space a name and description
-6. Select "T4 GPU" as the hardware
-### 2. Set up Environment Variables
-Set up your Hugging Face access token as an environment variable:
-1. Go to your profile settings in Hugging Face
-2. Navigate to "Access Tokens" and create a new token with "write" access
-3. In your Space settings, under "Repository secrets", add a new secret:
-   - Name: `HF_TOKEN`
-   - Value: Your Hugging Face access token
-### 3. Upload the Files
-Upload all the files from this repository to your Space.
-### 4. Wait for Deployment
-Hugging Face will automatically build and deploy your Space. This may take a few minutes, especially since it needs to download the models.
-### 5. Access Your Application
-Once deployed, you can access your application on your Hugging Face Space URL.

app.py CHANGED Viewed

@@ -236,126 +236,113 @@ def generate_lyrics(music_analysis, genre, duration):
         lyric_templates = music_analysis.get("lyric_templates", [])
         # Define num_phrases here to ensure it's available in all code paths
-        num_phrases = len(lyric_templates) if lyric_templates else 4
-        # Verify LLM is loaded
-        if llm_model is None or llm_tokenizer is None:
-            return "Error: LLM model not properly loaded"
-        # If no templates, fall back to original method
         if not lyric_templates:
-            # Enhanced prompt with both emotions and themes
-            prompt = f"""Write song lyrics for a {genre} song in {key} {mode} with tempo {tempo} BPM.
-EMOTIONS:
-- Primary: {primary_emotion}
-- Secondary: {secondary_emotion}
-THEMES:
-- Primary: {primary_theme}
-- Secondary: {secondary_theme}
-ONLY WRITE THE ACTUAL LYRICS. NO EXPLANATIONS OR META-TEXT.
-"""
         else:
             # Calculate the typical syllable range for this genre
-            if num_phrases > 0:
-                # Get max syllables per line from templates
-                max_syllables = max([t.get('max_expected', 7) for t in lyric_templates]) if lyric_templates[0].get('max_expected') else 7
-                min_syllables = min([t.get('min_expected', 2) for t in lyric_templates]) if lyric_templates[0].get('min_expected') else 2
-                avg_syllables = (min_syllables + max_syllables) // 2
-            else:
-                min_syllables = 2
-                max_syllables = 7
-                avg_syllables = 4
-            # Create random examples based on the song's themes and emotions
-            # to avoid the LLM copying our examples directly
-            example_themes = [
-                {"emotion": "love", "fragments": ["I see your face", "across the room", "my heart beats fast", "can't look away"]},
-                {"emotion": "sadness", "fragments": ["tears fall like rain", "on empty streets", "memories fade", "into the dark"]},
-                {"emotion": "nostalgia", "fragments": ["old photographs", "dusty and worn", "remind me of when", "we were young"]},
-                {"emotion": "hope", "fragments": ["dawn breaks through clouds", "new day begins", "darkness recedes", "light fills my soul"]},
-                {"emotion": "longing", "fragments": ["miles apart now", "under same stars", "thinking of you", "across the distance"]}
-            ]
-            # Select a theme that doesn't match the song's emotions to avoid copying
-            selected_themes = [t for t in example_themes if t["emotion"].lower() not in [primary_emotion.lower(), secondary_emotion.lower()]]
-            if not selected_themes:
-                selected_themes = example_themes
-            import random
-            example_theme = random.choice(selected_themes)
-            example_fragments = example_theme["fragments"]
-            random.shuffle(example_fragments)  # Randomize order
-            # Create example 1 - grammatical connection with conjunction
-            ex1_line1 = example_fragments[0] if len(example_fragments) > 0 else "The morning sun"
-            ex1_line2 = example_fragments[1] if len(example_fragments) > 1 else "breaks through clouds"
-            ex1_line3 = example_fragments[2] if len(example_fragments) > 2 else "as birds begin"
-            ex1_line4 = example_fragments[3] if len(example_fragments) > 3 else "their dawn chorus"
-            # Create example 2 - prepositional connection
-            ex2_fragments = [
-                "She walks alone",
-                "through crowded streets",
-                "with memories",
-                "of better days"
-            ]
-            random.shuffle(ex2_fragments)
-            # Create a more direct prompt with examples and specific syllable count guidance
-            prompt = f"""Write song lyrics for a {genre} song in {key} {mode} with tempo {tempo} BPM.
-EMOTIONS:
-- Primary: {primary_emotion}
-- Secondary: {secondary_emotion}
-THEMES:
-- Primary: {primary_theme}
-- Secondary: {secondary_theme}
-I need EXACTLY {num_phrases} lines of lyrics with these STRICT requirements:
-CRITICAL INSTRUCTIONS:
-1. EXTREMELY SHORT LINES: Each line MUST be between {min_syllables}-{max_syllables} syllables MAXIMUM
-2. ENFORCE BREVITY: NO exceptions to the syllable limit - not a single line should exceed {max_syllables} syllables
-3. FRAGMENT STYLE: Use sentence fragments and short phrases instead of complete sentences
-4. CONNECTED THOUGHTS: Use prepositions and conjunctions at the start of lines to connect ideas
-5. SIMPLE WORDS: Choose one or two-syllable words whenever possible
-6. CONCRETE IMAGERY: Use specific, tangible details rather than abstract concepts
-7. NO CLICHÉS: Avoid common phrases like "time slips away" or "memories fade"
-8. ONE THOUGHT PER LINE: Express just one simple idea in each line
-9. EMOTION BLEND: Blend both {primary_emotion} and {secondary_emotion} emotions naturally
-10. THEME WEAVING: Weave both {primary_theme} and {secondary_theme} themes together
-FORMAT:
-- Write exactly {num_phrases} short text lines
-- No annotations, explanations, or line numbers
-- Do not count syllables in the output
-IMPORTANT: If you can't express an idea in {max_syllables} or fewer syllables, break it across two lines or choose a simpler way to express it.
-===== EXAMPLES OF CORRECT LENGTH =====
-Example 1 (short fragments connected by flow):
-Cold tea cup (3 syllables)
-on windowsill (3 syllables)
-cat watches rain (3 syllables)
-through foggy glass (3 syllables)
-Example 2 (prepositional connections):
-Keys dropped here (3 syllables)
-by the front door (3 syllables)
-where shoes pile up (3 syllables)
-since you moved in (3 syllables)
-DO NOT copy my examples. Create ENTIRELY NEW lyrics that blend {primary_emotion} and {secondary_emotion} emotions while exploring {primary_theme} and {secondary_theme} themes.
-REMEMBER: NO LINE SHOULD EXCEED {max_syllables} SYLLABLES - this is the most important rule!
-"""
-        # Generate lyrics using the LLM model
         messages = [
             {"role": "user", "content": prompt}
         ]
@@ -370,14 +357,15 @@ REMEMBER: NO LINE SHOULD EXCEED {max_syllables} SYLLABLES - this is the most imp
         # Tokenize and move to model device
         model_inputs = llm_tokenizer([text], return_tensors="pt").to(llm_model.device)
-        # Generate with optimized parameters
         generated_ids = llm_model.generate(
             **model_inputs,
-            max_new_tokens=1024,
             do_sample=True,
-            temperature=0.7,
-            top_p=0.9,
-            repetition_penalty=1.2,
             pad_token_id=llm_tokenizer.eos_token_id
         )
@@ -385,303 +373,199 @@ REMEMBER: NO LINE SHOULD EXCEED {max_syllables} SYLLABLES - this is the most imp
         output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()
         lyrics = llm_tokenizer.decode(output_ids, skip_special_tokens=True).strip()
-        # ULTRA AGGRESSIVE CLEANING - COMPLETELY REVISED
-        # ------------------------------------------------
-        # 1. First, look for any standard dividers that might separate thinking from lyrics
-        divider_patterns = [
-            r'Here are the lyrics:',
-            r'Here is my song:',
-            r'The lyrics:',
-            r'My lyrics:',
-            r'Song lyrics:',
-            r'\*\*\*+',
-            r'===+',
-            r'---+',
-            r'```',
-            r'Lyrics:'
-        ]
-        for pattern in divider_patterns:
-            matches = re.finditer(pattern, lyrics, re.IGNORECASE)
-            for match in matches:
-                # Keep only content after the divider
-                lyrics = lyrics[match.end():].strip()
-        # 2. Remove thinking tags completely before splitting into lines
-        lyrics = re.sub(r'<think>.*?</think>', '', lyrics, flags=re.DOTALL)
-        lyrics = re.sub(r'\[thinking\].*?\[/thinking\]', '', lyrics, flags=re.DOTALL)
-        lyrics = re.sub(r'<think>', '', lyrics, flags=re.DOTALL)
-        lyrics = re.sub(r'</think>', '', lyrics, flags=re.DOTALL)
-        lyrics = re.sub(r'\[thinking\]', '', lyrics, flags=re.DOTALL)
-        lyrics = re.sub(r'\[/thinking\]', '', lyrics, flags=re.DOTALL)
-        # 3. Split text into lines for aggressive line-by-line filtering
         lines = lyrics.strip().split('\n')
         clean_lines = []
-        # 4. Define comprehensive patterns for non-lyrical content
-        non_lyric_patterns = [
-            # Meta-commentary
-            r'^(note|thinking|thoughts|let me|i will|i am going|i would|i can|i need to|i have to|i should|let\'s|here|now)',
-            r'^(first|second|third|next|finally|importantly|remember|so|ok|okay|as requested|as asked|considering)',
-            # Explanations
-            r'syllable[s]?|phrase|rhythm|beats?|tempo|bpm|instruction|follow|alignment|match|corresponding',
-            r'verses?|chorus|bridge|section|stanza|part|template|format|pattern|example',
-            r'requirements?|guidelines?|song structure|stressed|unstressed',
-            # Technical language
-            r'generated|output|result|provide|create|write|draft|version',
-            # Annotations and numbering
-            r'^line \d+|^\d+[\.\):]|^\[\w+\]|^[\*\-\+] ',
-            # Questions or analytical statements
-            r'\?$|analysis|evaluate|review|check|ensure',
-            # Instruction-like statements
-            r'make sure|please note|important|notice|pay attention'
-        ]
-        # 5. Identify which lines are likely actual lyrics vs non-lyrics
         for line in lines:
             line = line.strip()
-            # Skip empty lines or lines with just spaces/tabs
             if not line or line.isspace():
                 continue
-            # Skip lines that match any non-lyric pattern
-            should_skip = False
-            for pattern in non_lyric_patterns:
-                if re.search(pattern, line.lower()):
-                    should_skip = True
-                    break
-            if should_skip:
-                continue
-            # Skip section headers
-            if (line.startswith('[') and ']' in line) or (line.startswith('(') and ')' in line and len(line) < 20):
-                continue
-            # Skip lines that look like annotations (not prose-like)
-            if ':' in line and not any(word in line.lower() for word in ['like', 'when', 'where', 'how', 'why', 'what']):
-                if len(line.split(':')[0]) < 15:  # Short prefixes followed by colon are likely annotations
-                    continue
-            # Skip very short lines that aren't likely to be lyrics (unless it's just a few words which could be valid)
-            if len(line) < 3:
-                continue
-            # Skip lines that are numbered or bulleted
-            if re.match(r'^\d+\.|\(#\d+\)|\d+\)', line):
                 continue
-            # Skip markdown-style emphasis or headers
-            if re.match(r'^#{1,6} |^\*\*|^__', line):
                 continue
-            # Skip lines with think tags
-            if '<think>' in line.lower() or '</think>' in line.lower() or '[thinking]' in line.lower() or '[/thinking]' in line.lower():
                 continue
-            # Add this line as it passed all filters
-            clean_lines.append(line)
-        # 6. Additional block-level filters for common patterns
-        # Check beginning of lyrics for common prefixes
-        if clean_lines and any(clean_lines[0].lower().startswith(prefix) for prefix in
-                            ['here are', 'these are', 'below are', 'following are']):
-            clean_lines = clean_lines[1:]  # Skip the first line
-        # 7. Process blocks of lines to detect explanation blocks
-        if len(clean_lines) > 3:
-            # Check for explanation blocks at the beginning
-            first_three = ' '.join(clean_lines[:3]).lower()
-            if any(term in first_three for term in ['i will', 'i have created', 'i\'ll provide', 'i\'ll write']):
-                # This looks like an explanation, skip the first few lines
-                start_idx = 0
-                for i, line in enumerate(clean_lines):
-                    if i >= 3 and not any(term in line.lower() for term in ['i will', 'created', 'write', 'provide']):
-                        start_idx = i
-                        break
-                clean_lines = clean_lines[start_idx:]
-            # Check for explanation blocks at the end
-            last_three = ' '.join(clean_lines[-3:]).lower()
-            if any(term in last_three for term in ['hope this', 'these lyrics', 'as you can see', 'this song', 'i have']):
-                # This looks like an explanation at the end, truncate
-                end_idx = len(clean_lines)
-                for i in range(len(clean_lines) - 1, max(0, len(clean_lines) - 4), -1):
-                    if i < len(clean_lines) and not any(term in clean_lines[i].lower() for term in
-                                                    ['hope', 'these lyrics', 'as you can see', 'this song']):
-                        end_idx = i + 1
-                        break
-                clean_lines = clean_lines[:end_idx]
-        # 8. Cleanup - Remove remaining annotations or thinking
-        for i in range(len(clean_lines)):
             # Remove trailing thoughts/annotations
-            clean_lines[i] = re.sub(r'\s+//.*$', '', clean_lines[i])
-            clean_lines[i] = re.sub(r'\s+\(.*?\)$', '', clean_lines[i])
-            # Remove thinking tags completely
-            clean_lines[i] = re.sub(r'<think>.*?</think>', '', clean_lines[i], flags=re.DOTALL)
-            clean_lines[i] = re.sub(r'\[thinking\].*?\[/thinking\]', '', clean_lines[i], flags=re.DOTALL)
-            clean_lines[i] = re.sub(r'<think>', '', clean_lines[i])
-            clean_lines[i] = re.sub(r'</think>', '', clean_lines[i])
-            clean_lines[i] = re.sub(r'\[thinking\]', '', clean_lines[i])
-            clean_lines[i] = re.sub(r'\[/thinking\]', '', clean_lines[i])
             # Remove syllable count annotations
-            clean_lines[i] = re.sub(r'\s*\(\d+\s*syllables?\)', '', clean_lines[i])
-        # 9. Filter out any remaining empty lines after tag removal
-        clean_lines = [line for line in clean_lines if line.strip() and not line.isspace()]
-        # 10. NEW: Apply strict syllable enforcement - split or truncate lines that are too long
-        # This is a critical step to ensure no line exceeds our max syllable count
         if lyric_templates:
-            max_allowed_syllables = min(7, max([t.get('max_expected', 6) for t in lyric_templates]))
         else:
             max_allowed_syllables = 6
-        clean_lines = enforce_syllable_limits(clean_lines, max_allowed_syllables)
-        # 11. NEW: Check for template copying or clichéd phrases
-        cliched_patterns = [
-            r'moonlight (shimmers?|falls?|dances?)',
-            r'shadows? (dance|play|fall|stretch)',
-            r'time slips? away',
-            r'whispers? (fade|in the)',
-            r'silence speaks',
-            r'stars? shine',
-            r'hearts? beat',
-            r'footsteps (fade|echo)',
-            r'gentle wind',
-            r'(old|empty) (roads?|chair)',
-            r'night (holds?|falls?)',
-            r'memories fade',
-            r'dreams (linger|drift)'
-        ]
-        cliche_count = 0
         for line in clean_lines:
-            for pattern in cliched_patterns:
-                if re.search(pattern, line.lower()):
-                    cliche_count += 1
-                    break
-        # Calculate percentage of clichéd lines
-        if clean_lines:
-            cliche_percentage = (cliche_count / len(clean_lines)) * 100
-        else:
-            cliche_percentage = 0
-        # 12. If we have lyric templates, ensure we have the correct number of lines
         if lyric_templates:
             num_required = len(lyric_templates)
-            # If we have too many lines, keep just the best ones
-            if len(clean_lines) > num_required:
-                # Keep the first num_required lines
-                clean_lines = clean_lines[:num_required]
-            # If we don't have enough lines, generate placeholders that fit the syllable count
-            while len(clean_lines) < num_required:
-                i = len(clean_lines)
-                if i < len(lyric_templates):
-                    template = lyric_templates[i]
-                    target_syllables = min(max_allowed_syllables - 1, (template.get('min_expected', 2) + template.get('max_expected', 6)) // 2)
-                    # Generate more creative, contextual placeholders with specificity
-                    # Avoid clichés like "moonlight shimmers" or "time slips away"
-                    specific_placeholders = {
-                        # 2-3 syllables - specific, concrete phrases
-                        2: [
-                            "Phone rings twice",
-                            "Dogs bark loud",
-                            "Keys dropped here",
-                            "Train rolls by",
-                            "Birds take flight"
-                        ],
-                        # 3-4 syllables - specific contexts
-                        3: [
-                            "Coffee gets cold",
-                            "Fan blades spin",
-                            "Pages turn slow",
-                            "Neighbors talk",
-                            "Radio hums soft"
-                        ],
-                        # 4-5 syllables - specific details
-                        4: [
-                            "Fingers tap table",
-                            "Taxi waits in rain",
-                            "Laptop screen blinks",
-                            "Ring left on sink",
-                            "Church bells ring loud"
-                        ],
-                        # 5-6 syllables - context rich
-                        5: [
-                            "Letters with no stamps",
-                            "Watch shows wrong time",
-                            "Jeans with torn knees",
-                            "Dog barks next door",
-                            "Smoke alarm beeps"
-                        ]
-                    }
-                    # Make theme and emotion specific placeholders to add to the list
-                    theme_specific = []
-                    if primary_theme.lower() in ["love", "relationship", "romance"]:
-                        theme_specific = ["Lipstick on glass", "Text left on read", "Scent on your coat"]
-                    elif primary_theme.lower() in ["loss", "grief", "sadness"]:
-                        theme_specific = ["Chair sits empty", "Photos face down", "Clothes in closet"]
-                    elif primary_theme.lower() in ["hope", "inspiration", "triumph"]:
-                        theme_specific = ["Seeds start to grow", "Finish line waits", "New day breaks through"]
-                    # Get the closest matching syllable group
-                    closest_group = min(specific_placeholders.keys(), key=lambda k: abs(k - target_syllables))
-                    # Create pool of available placeholders from both specific and theme specific options
-                    all_placeholders = specific_placeholders[closest_group] + theme_specific
-                    # Choose a placeholder that hasn't been used yet
-                    available_placeholders = [p for p in all_placeholders if p not in clean_lines]
-                    if available_placeholders:
-                        # Use modulo for more variation
-                        idx = (i * 17 + len(clean_lines) * 13) % len(available_placeholders)
-                        placeholder = available_placeholders[idx]
                     else:
-                        # If we've used all placeholders, create something random and specific
-                        subjects = ["Car", "Dog", "Kid", "Clock", "Phone", "Tree", "Book", "Door", "Light"]
-                        verbs = ["waits", "moves", "stops", "falls", "breaks", "turns", "sleeps"]
-                        # Ensure randomness with seed that changes with each call
-                        import random
-                        random.seed(len(clean_lines) * 27 + i * 31)
-                        subj = random.choice(subjects)
-                        verb = random.choice(verbs)
-                        placeholder = f"{subj} {verb}"
                 else:
-                    placeholder = "Page turns slow"
-                clean_lines.append(placeholder)
         # Assemble final lyrics
         final_lyrics = '\n'.join(clean_lines)
-        # Add a warning if we detected too many clichés
-        if cliche_percentage >= 40:
-            final_lyrics = f"""WARNING: These lyrics contain several overused phrases and clichés.
-Try regenerating for more original content.
-{final_lyrics}"""
-        # 13. Final sanity check - if we have nothing or garbage, return an error
-        if not final_lyrics or len(final_lyrics) < 10:
-            return "The model generated only thinking content but no actual lyrics. Please try again."
         return final_lyrics
     except Exception as e:

         lyric_templates = music_analysis.get("lyric_templates", [])
         # Define num_phrases here to ensure it's available in all code paths
+        # Also define syllable limits for the prompt
         if not lyric_templates:
+            num_phrases_for_prompt = 4  # Default if no templates
+            min_syl_for_prompt = 2
+            max_syl_for_prompt = 7
+            prompt = (f'''You are a professional songwriter. Write song lyrics for a {genre} song.
+SONG DETAILS:
+- Key: {key} {mode}
+- Tempo: {tempo} BPM
+- Primary emotion: {primary_emotion}
+- Secondary emotion: {secondary_emotion}
+- Primary theme: {primary_theme}
+- Secondary theme: {secondary_theme}
+CRITICAL REQUIREMENTS (MOST IMPORTANT):
+- You MUST write EXACTLY {num_phrases_for_prompt} lines of lyrics.
+- Number each lyric line starting from 1 up to {num_phrases_for_prompt}. For example:
+  1. First lyric line.
+  2. Second lyric line.
+  ...
+  {num_phrases_for_prompt}. The final lyric line.
+- Each numbered line (after removing the number and period) MUST be {min_syl_for_prompt}-{max_syl_for_prompt} syllables MAXIMUM.
+- NO line's content (after removing the number) can exceed {max_syl_for_prompt} syllables. This is EXTREMELY IMPORTANT.
+- Count syllables carefully for the content of each numbered line.
+- Use SHORT WORDS and SHORT PHRASES for the content of each numbered line.
+- Break long thoughts into multiple numbered lines.
+CREATIVITY GUIDELINES:
+- Create original, vivid imagery that captures the emotions.
+- Use concrete, sensory details (what you see, hear, feel, touch).
+- Avoid clichés and common phrases.
+- Draw inspiration from the specific themes and emotions listed above.
+- Think about unique moments, specific objects, or personal details.
+- Use unexpected word combinations.
+- Focus on the particular mood created by {primary_emotion} and {secondary_emotion}.
+STYLE FOR SHORT LINES (for the content of each numbered line):
+- Use brief, impactful phrases.
+- Focus on single images or moments per line.
+- Choose simple, everyday words.
+- Let each line paint one clear picture.
+ABSOLUTELY NO placeholders like [line], [moment], [breath], [phrase], [word], etc.
+OUTPUT FORMAT:
+Under the "LYRICS:" heading, provide exactly {num_phrases_for_prompt} numbered lyric lines.
+LYRICS:
+(Your {num_phrases_for_prompt} numbered lyric lines go here, each starting with its number, a period, and a space)
+Remember: Output EXACTLY {num_phrases_for_prompt} numbered lyric lines. Each line's content (after removing the number) must be {min_syl_for_prompt}-{max_syl_for_prompt} syllables.''')
         else:
             # Calculate the typical syllable range for this genre
+            num_phrases_for_prompt = len(lyric_templates)
+            max_syl_for_prompt = max([t.get('max_expected', 7) for t in lyric_templates]) if lyric_templates and lyric_templates[0].get('max_expected') else 7
+            min_syl_for_prompt = min([t.get('min_expected', 2) for t in lyric_templates]) if lyric_templates and lyric_templates[0].get('min_expected') else 2
+            prompt = (f'''You are a professional songwriter. Write song lyrics for a {genre} song.
+SONG DETAILS:
+- Key: {key} {mode}
+- Tempo: {tempo} BPM
+- Primary emotion: {primary_emotion}
+- Secondary emotion: {secondary_emotion}
+- Primary theme: {primary_theme}
+- Secondary theme: {secondary_theme}
+CRITICAL REQUIREMENTS (MOST IMPORTANT):
+- You MUST write EXACTLY {num_phrases_for_prompt} lines of lyrics.
+- Number each lyric line starting from 1 up to {num_phrases_for_prompt}. For example:
+  1. First lyric line.
+  2. Second lyric line.
+  ...
+  {num_phrases_for_prompt}. The final lyric line.
+- Each numbered line (after removing the number and period) MUST be {min_syl_for_prompt}-{max_syl_for_prompt} syllables MAXIMUM.
+- NO line's content (after removing the number) can exceed {max_syl_for_prompt} syllables. This is EXTREMELY IMPORTANT.
+- Count syllables carefully for the content of each numbered line.
+- Use SHORT WORDS and SHORT PHRASES for the content of each numbered line.
+- Break long thoughts into multiple numbered lines.
+CREATIVITY GUIDELINES:
+- Create original, vivid imagery that captures the emotions.
+- Use concrete, sensory details (what you see, hear, feel, touch).
+- Avoid clichés and common phrases.
+- Draw inspiration from the specific themes and emotions listed above.
+- Think about unique moments, specific objects, or personal details.
+- Use unexpected word combinations.
+- Focus on the particular mood created by {primary_emotion} and {secondary_emotion}.
+STYLE FOR SHORT LINES (for the content of each numbered line):
+- Use brief, impactful phrases.
+- Focus on single images or moments per line.
+- Choose simple, everyday words.
+- Let each line paint one clear picture.
+ABSOLUTELY NO placeholders like [line], [moment], [breath], [phrase], [word], etc.
+OUTPUT FORMAT:
+Under the "LYRICS:" heading, provide exactly {num_phrases_for_prompt} numbered lyric lines.
+LYRICS:
+(Your {num_phrases_for_prompt} numbered lyric lines go here, each starting with its number, a period, and a space)
+Remember: Output EXACTLY {num_phrases_for_prompt} numbered lyric lines. Each line's content (after removing the number) must be {min_syl_for_prompt}-{max_syl_for_prompt} syllables.''')
+        # Generate with optimized parameters for QwQ model
         messages = [
             {"role": "user", "content": prompt}
         ]
         # Tokenize and move to model device
         model_inputs = llm_tokenizer([text], return_tensors="pt").to(llm_model.device)
+        # Generate with optimized parameters for QwQ model
         generated_ids = llm_model.generate(
             **model_inputs,
+            max_new_tokens=2048,  # Increased from 1024 to give QwQ more room
             do_sample=True,
+            temperature=0.6,  # QwQ recommended setting
+            top_p=0.95,       # QwQ recommended setting
+            top_k=30,         # QwQ recommended range 20-40
+            repetition_penalty=1.1,  # Reduced to allow some repetition if needed
             pad_token_id=llm_tokenizer.eos_token_id
         )
         output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()
         lyrics = llm_tokenizer.decode(output_ids, skip_special_tokens=True).strip()
+        # ENHANCED CLEANING FOR QWQ MODEL - IMPROVED APPROACH
+        # ---------------------------------------------------
+        # QwQ often includes thinking process - we need to extract only the final lyrics
+        # 1. First, remove any thinking tags completely (QwQ specific)
+        lyrics = re.sub(r'<think>.*?</think>', '', lyrics, flags=re.DOTALL | re.IGNORECASE)
+        lyrics = re.sub(r'<think>', '', lyrics, flags=re.IGNORECASE)
+        lyrics = re.sub(r'</think>', '', lyrics, flags=re.IGNORECASE)
+        # 2. Look for the LYRICS: section specifically
+        lyrics_section_match = re.search(r'LYRICS:\s*\n(.*?)(?:\n\n|\Z)', lyrics, re.DOTALL | re.IGNORECASE)
+        if lyrics_section_match:
+            lyrics = lyrics_section_match.group(1).strip()
+        else:
+            # Fallback: look for other common transitions that indicate the start of actual lyrics
+            lyric_start_patterns = [
+                r'(?:here (?:are )?(?:the )?lyrics?:?|lyrics?:?|my lyrics?:?|song lyrics?:?)\s*',
+                r'(?:here (?:is )?(?:a )?song:?|here (?:is )?my song:?)\s*',
+                r'(?:\*{3,}|\={3,}|\-{3,})\s*',
+                r'(?:final lyrics?:?|the lyrics?:?)\s*',
+                r'```\s*'
+            ]
+            # Try to find where actual lyrics start
+            lyrics_start_pos = 0
+            for pattern in lyric_start_patterns:
+                match = re.search(pattern, lyrics, re.IGNORECASE)
+                if match:
+                    lyrics_start_pos = max(lyrics_start_pos, match.end())
+            # Keep content from the identified start position
+            if lyrics_start_pos > 0:
+                lyrics = lyrics[lyrics_start_pos:].strip()
+        # 3. Split into lines and apply basic filtering
         lines = lyrics.strip().split('\n')
         clean_lines = []
+        # 4. Simple filtering - keep only actual lyric lines
         for line in lines:
             line = line.strip()
             if not line or line.isspace():
                 continue
+            # Strip leading numbers like "1. ", "2. ", etc.
+            line = re.sub(r'^\d+\.\s*', '', line)
+            line_lower = line.lower()
+            # Remove placeholder lines - more comprehensive pattern
+            if re.match(r'^\[ *(line|moment|breath|phrase|word|sound) *\]$', line_lower):
                 continue
+            # Skip lines that are clearly not lyrics (simplified filtering)
+            if any(phrase in line_lower for phrase in [
+                'line 1', 'line 2', 'line 3',
+                'thinking', 'lyrics:', 'format:', 'etc...', 'commentary',
+                'syllables', 'requirements', 'output', 'provide'
+            ]):
                 continue
+            # Skip numbered annotations
+            if re.match(r'^\d+[\.\):]|^\[.*\]$', line):
                 continue
+            # Keep lines that look like actual lyrics (not too long, not too technical)
+            words = line.split()
+            if 1 <= len(words) <= 8 and not any(tech_word in line_lower for tech_word in [
+                'syllable', 'beat', 'tempo', 'analysis', 'format', 'section'
+            ]):
+                clean_lines.append(line)
+        # 5. Additional cleanup for QwQ-specific issues
+        # Remove any remaining thinking fragments
+        final_clean_lines = []
+        for line in clean_lines:
             # Remove trailing thoughts/annotations
+            line = re.sub(r'\s+//.*$', '', line)
+            line = re.sub(r'\s+\(.*?\)$', '', line)
             # Remove syllable count annotations
+            line = re.sub(r'\s*\(\d+\s*syllables?\)', '', line, flags=re.IGNORECASE)
+            # Skip if the line became empty after cleaning
+            if line.strip():
+                final_clean_lines.append(line.strip())
+        clean_lines = final_clean_lines
+        # AGGRESSIVE SYLLABLE ENFORCEMENT - This is critical for beat matching
         if lyric_templates:
+            max_allowed_syllables = max([t.get('max_expected', 6) for t in lyric_templates])
+            min_allowed_syllables = min([t.get('min_expected', 2) for t in lyric_templates])
         else:
             max_allowed_syllables = 6
+            min_allowed_syllables = 2
+        # Enforce syllable limits on every line
+        syllable_enforced_lines = []
         for line in clean_lines:
+            words = line.split()
+            current_syllables = sum(beat_analyzer.count_syllables(word) for word in words)
+            # If line is within limits, keep it
+            if min_allowed_syllables <= current_syllables <= max_allowed_syllables:
+                syllable_enforced_lines.append(line)
+            # If line is too long, we need to split it intelligently
+            elif current_syllables > max_allowed_syllables:
+                # Try to split into multiple shorter lines
+                current_line = []
+                current_count = 0
+                for word in words:
+                    word_syllables = beat_analyzer.count_syllables(word)
+                    # If adding this word would exceed limit, start new line
+                    if current_count + word_syllables > max_allowed_syllables and current_line:
+                        syllable_enforced_lines.append(" ".join(current_line))
+                        current_line = [word]
+                        current_count = word_syllables
+                    else:
+                        # Add the word to the current line
+                        current_line.append(word)
+                        current_count += word_syllables
+                # Add the remaining words as final line
+                if current_line and current_count >= min_allowed_syllables:
+                    syllable_enforced_lines.append(" ".join(current_line))
+            # Skip lines that are too short
+        clean_lines = syllable_enforced_lines
+        # Get required number of lines
         if lyric_templates:
             num_required = len(lyric_templates)
+        else:
+            num_required = 4
+        # IMPORTANT: Adjust line count to match requirement
+        if len(clean_lines) > num_required:
+            # Too many lines - try to merge adjacent short lines first
+            merged_lines = []
+            i = 0
+            while i < len(clean_lines) and len(merged_lines) < num_required:
+                if i + 1 < len(clean_lines) and len(merged_lines) < num_required - 1:
+                    # Check if we can merge current and next line
+                    line1 = clean_lines[i]
+                    line2 = clean_lines[i + 1]
+                    words1 = line1.split()
+                    words2 = line2.split()
+                    syllables1 = sum(beat_analyzer.count_syllables(word) for word in words1)
+                    syllables2 = sum(beat_analyzer.count_syllables(word) for word in words2)
+                    # If merging would stay within limits, merge them
+                    if syllables1 + syllables2 <= max_allowed_syllables:
+                        merged_lines.append(line1 + " " + line2)
+                        i += 2
                     else:
+                        merged_lines.append(line1)
+                        i += 1
                 else:
+                    merged_lines.append(clean_lines[i])
+                    i += 1
+            # If still too many, truncate to required number
+            clean_lines = merged_lines[:num_required]
+        elif len(clean_lines) < num_required:
+            # Too few lines - this is a generation failure
+            # Instead of error, try to pad with empty lines or regenerate
+            # For now, let's return an error message
+            return f"Error: The model generated {len(clean_lines)} lines but {num_required} were required. Please try again."
+        # Final check - ensure we have exactly the required number
+        if len(clean_lines) != num_required:
+            # If we still don't have the right number, truncate or pad
+            if len(clean_lines) > num_required:
+                clean_lines = clean_lines[:num_required]
+            else:
+                # This shouldn't happen with the above logic, but just in case
+                return f"Error: Could not generate exactly {num_required} lines. Please try again."
         # Assemble final lyrics
         final_lyrics = '\n'.join(clean_lines)
+        # Final sanity check - if we have nothing or very little, return an error
+        if not final_lyrics or len(final_lyrics.strip()) < 15:
+            return "The model output appears to be mostly thinking content. Please try regenerating for cleaner lyrics."
         return final_lyrics
     except Exception as e:

example.py DELETED Viewed

@@ -1,49 +0,0 @@
-import os
-import sys
-from app import process_audio, music_analyzer
-def main():
-    """
-    Example function to demonstrate the application with a sample audio file.
-    Usage:
-    python example.py <path_to_audio_file>
-    """
-    if len(sys.argv) != 2:
-        print("Usage: python example.py <path_to_audio_file>")
-        return
-    audio_file = sys.argv[1]
-    if not os.path.exists(audio_file):
-        print(f"Error: File {audio_file} does not exist.")
-        return
-    print(f"Processing audio file: {audio_file}")
-    # Call the main processing function
-    genre_results, lyrics = process_audio(audio_file)
-    # Get emotion analysis results
-    emotion_results = music_analyzer.analyze_music(audio_file)
-    # Print results
-    print("\n" + "="*50)
-    print("GENRE CLASSIFICATION RESULTS:")
-    print("="*50)
-    print(genre_results)
-    print("\n" + "="*50)
-    print("EMOTION ANALYSIS RESULTS:")
-    print("="*50)
-    print(f"Tempo: {emotion_results['summary']['tempo']:.1f} BPM")
-    print(f"Key: {emotion_results['summary']['key']} {emotion_results['summary']['mode']}")
-    print(f"Primary Emotion: {emotion_results['summary']['primary_emotion']}")
-    print(f"Primary Theme: {emotion_results['summary']['primary_theme']}")
-    print("\n" + "="*50)
-    print("GENERATED LYRICS:")
-    print("="*50)
-    print(lyrics)
-if __name__ == "__main__":
-    main()