Spaces:

patruff
/

parody-suggestions

Configuration error

App Files Files Community

patruff commited on 17 days ago

Commit

8147e43

verified ·

1 Parent(s): aa0a83c

Upload tool

Browse files

Files changed (2) hide show

requirements.txt +1 -1
tool.py +111 -62

requirements.txt CHANGED Viewed

@@ -1,2 +1,2 @@
-pronouncing
 smolagents



1	smolagents
2	+ pronouncing

tool.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from smolagents.tools import Tool
-import pronouncing
 import string
 import json
 class ParodyWordSuggestionTool(Tool):
@@ -14,6 +14,7 @@ class ParodyWordSuggestionTool(Tool):
     def _get_vowel_groups(self):
         groups = []
         group_strs = self.VOWEL_REF.split("|")
         for group_str in group_strs:
             groups.append(group_str.split(","))
         return groups
@@ -39,25 +40,31 @@ class ParodyWordSuggestionTool(Tool):
         i = 0
         phone = ""
         base_phone = ""
-        v = ""
         group = []
         # First, find the primary stressed vowel if it exists
         for i, phone in enumerate(phones):
             # Check for primary stress (1)
-            if '1' in phone and any(v in phone for v in 'AEIOU'):
                 base_phone = phone.rstrip('012')
-                last_vowel_idx = i
-                last_vowel = base_phone
-                break
         # If no primary stress, just use the last vowel
         if last_vowel_idx == -1:
             for i, phone in enumerate(phones):
                 base_phone = phone.rstrip('012')
-                if any(v in base_phone for v in 'AEIOU'):
-                    last_vowel_idx = i
-                    last_vowel = base_phone
         if last_vowel_idx == -1:
             return None, []
@@ -121,8 +128,6 @@ class ParodyWordSuggestionTool(Tool):
     def _words_have_similar_structure(self, word1, word2, phones1, phones2):
         """Check if words have similar structure beyond just ending."""
-        p = ""
-        v = ""
         # Similar word length
         if abs(len(word1) - len(word2)) > 2:
             return False
@@ -140,6 +145,10 @@ class ParodyWordSuggestionTool(Tool):
             phone_list1 = phones1.split()
             phone_list2 = phones2.split()
             # Get consonants
             consonants1 = [p for p in self._strip_stress(phone_list1) if not any(v in p for v in 'AEIOU')]
             consonants2 = [p for p in self._strip_stress(phone_list2) if not any(v in p for v in 'AEIOU')]
@@ -175,8 +184,6 @@ class ParodyWordSuggestionTool(Tool):
         common_length = 0
         matched = 0
         i = 0
-        p = ""
-        v = ""
         # Variables for whole-word matching
         primary_stress_vowel1 = None
@@ -186,6 +193,23 @@ class ParodyWordSuggestionTool(Tool):
         front_consonants1 = []
         front_consonants2 = []
         # Find primary stressed vowels
         for i, phone in enumerate(phone_list1):
             if '1' in phone and any(v in phone for v in 'AEIOU'):
@@ -225,7 +249,7 @@ class ParodyWordSuggestionTool(Tool):
         word_vowel, word_end = result1
         target_vowel, target_end = result2
-        # Perfect rhyme check (45% of score)
         if word_vowel and target_vowel:
             if self._vowels_match(word_vowel, target_vowel):
                 word_end_clean = self._strip_stress(word_end)
@@ -245,7 +269,7 @@ class ParodyWordSuggestionTool(Tool):
                     else:
                         rhyme_score = 0.6  # Still somewhat rhymes even without ending consonants
-        # Primary stressed vowel match (20% of score)
         primary_vowel_score = 0.0
         if primary_stress_vowel1 and primary_stress_vowel2:
             if primary_stress_vowel1 == primary_stress_vowel2:
@@ -260,39 +284,68 @@ class ParodyWordSuggestionTool(Tool):
         # Near rhyme check - 15% of score
         near_rhyme_score = 0.0
-        # Enhanced check for -ing endings
         if len(phone_list1) >= 2 and len(phone_list2) >= 2:
             # Check for -ing endings
             if (self._strip_stress(phone_list1[-2:]) == ['IH', 'NG'] and
                 self._strip_stress(phone_list2[-2:]) == ['IH', 'NG']):
-                # Check if the words have similar structure (important for parody)
-                if self._words_have_similar_structure(word1, word2, phones1, phones2):
-                    near_rhyme_score = 0.8
-                else:
-                    # Words ending in -ing but with very different structure
-                    # like "running" vs "kinging" should score lower
-                    near_rhyme_score = 0.4
-                # Additional boost if the consonant before -ing is similar
                 if len(phone_list1) >= 3 and len(phone_list2) >= 3:
-                    consonant1 = self._strip_stress(phone_list1[-3:-2])
-                    consonant2 = self._strip_stress(phone_list2[-3:-2])
-                    if len(consonant1) > 0 and len(consonant2) > 0:
-                        # Same consonant gets highest score
-                        if consonant1[0] == consonant2[0]:
-                            near_rhyme_score = max(near_rhyme_score, 0.9)
-                        # Similar consonants (e.g., 'N' and 'M' are both nasals)
-                        elif self._consonants_are_similar(consonant1[0], consonant2[0]):
-                            near_rhyme_score = max(near_rhyme_score, 0.8)
             # Check for -y endings (like happy/sappy)
             elif (self._strip_stress(phone_list1[-1:]) == ['IY'] and
                   self._strip_stress(phone_list2[-1:]) == ['IY']):
                 near_rhyme_score = 0.7
-        # Length and stress similarity (10% of score)
         phone_diff = abs(len(phone_list1) - len(phone_list2))
         max_phones = max(len(phone_list1), len(phone_list2))
         length_score = 1.0 if phone_diff == 0 else 1.0 - (phone_diff / max_phones)
@@ -303,39 +356,33 @@ class ParodyWordSuggestionTool(Tool):
         stress2 = pronouncing.stresses(phones2)
         stress_score = 1.0 if stress1 == stress2 else 0.5
-        # Front consonant match (10% of score)
-        front_score = front_consonant_score * 0.1
         # Weighted combination
         similarity = (
-            (rhyme_score * 0.45) +            # End rhyme (45%)
-            (primary_vowel_score * 0.2) +     # Primary vowel (20%)
-            (near_rhyme_score * 0.15) +       # Near rhyme features (15%)
-            (length_score * 0.05) +           # Length similarity (5%)
-            (stress_score * 0.05) +           # Stress pattern (5%)
-            (front_score)                     # Front consonants (10%)
         )
-        # IMPORTANT: Special case for words like "running"/"cumming"
-        # These should match well for parody purposes
-        if (word1.endswith('ing') and word2.endswith('ing') and
-            front_consonant_score < 0.5 and  # Different initial consonants
-            near_rhyme_score >= 0.8):        # Good near-rhyme pattern
-            similarity = max(similarity, 0.8)  # Ensure high enough score
-        # IMPORTANT: Penalty for words that are too similar to be funny
-        # For parody, slightly different words are better than almost identical words
-        if word1 and word2:
-            if word1[0] == word2[0] and rhyme_score > 0.9 and primary_vowel_score > 0.9:
-                # Words starting with same letter and almost perfect rhyme
-                # are less funny for parody
-                similarity *= 0.9
-        # Special case: Words need to be somewhat different to be funny in parody
-        if len(word1) > 3 and len(word2) > 3:
-            # Give boost to words with same length but different consonants
-            if len(word1) == len(word2) and front_consonant_score < 0.5 and rhyme_score > 0.8:
-                similarity = max(similarity, 0.75)  # Good for parody
         # Cap at 1.0
         similarity = min(1.0, similarity)
@@ -345,6 +392,7 @@ class ParodyWordSuggestionTool(Tool):
             "rhyme_score": round(rhyme_score, 3),
             "primary_vowel_score": round(primary_vowel_score, 3),
             "near_rhyme_score": round(near_rhyme_score, 3),
             "length_score": round(length_score, 3),
             "stress_score": round(stress_score, 3),
             "front_consonant_score": round(front_consonant_score, 3),
@@ -427,6 +475,7 @@ class ParodyWordSuggestionTool(Tool):
                         "rhyme_score": similarity_result["rhyme_score"],
                         "primary_vowel_score": similarity_result["primary_vowel_score"],
                         "near_rhyme_score": similarity_result["near_rhyme_score"],
                         "length_score": similarity_result["length_score"],
                         "stress_score": similarity_result["stress_score"],
                         "front_consonant_score": similarity_result["front_consonant_score"],

 from smolagents.tools import Tool
 import string
+import pronouncing
 import json
 class ParodyWordSuggestionTool(Tool):
     def _get_vowel_groups(self):
         groups = []
         group_strs = self.VOWEL_REF.split("|")
+        group_str = ""
         for group_str in group_strs:
             groups.append(group_str.split(","))
         return groups
         i = 0
         phone = ""
         base_phone = ""
         group = []
+        vowel_char = ""
         # First, find the primary stressed vowel if it exists
         for i, phone in enumerate(phones):
             # Check for primary stress (1)
+            if '1' in phone:
+                # Check if it's a vowel
                 base_phone = phone.rstrip('012')
+                for vowel_char in 'AEIOU':
+                    if vowel_char in base_phone:
+                        last_vowel_idx = i
+                        last_vowel = base_phone
+                        break
+                if last_vowel is not None:
+                    break
         # If no primary stress, just use the last vowel
         if last_vowel_idx == -1:
             for i, phone in enumerate(phones):
                 base_phone = phone.rstrip('012')
+                for vowel_char in 'AEIOU':
+                    if vowel_char in base_phone:
+                        last_vowel_idx = i
+                        last_vowel = base_phone
         if last_vowel_idx == -1:
             return None, []
     def _words_have_similar_structure(self, word1, word2, phones1, phones2):
         """Check if words have similar structure beyond just ending."""
         # Similar word length
         if abs(len(word1) - len(word2)) > 2:
             return False
             phone_list1 = phones1.split()
             phone_list2 = phones2.split()
+            # Initialize variables for list comprehension
+            p = ""
+            v = ""
             # Get consonants
             consonants1 = [p for p in self._strip_stress(phone_list1) if not any(v in p for v in 'AEIOU')]
             consonants2 = [p for p in self._strip_stress(phone_list2) if not any(v in p for v in 'AEIOU')]
         common_length = 0
         matched = 0
         i = 0
         # Variables for whole-word matching
         primary_stress_vowel1 = None
         front_consonants1 = []
         front_consonants2 = []
+        # Variables for special pattern matching
+        special_pattern_score = 0.0
+        stem1 = ""
+        stem2 = ""
+        consonant1 = ""
+        consonant2 = ""
+        nasals = ['m', 'n']
+        stops = ['p', 'b', 't', 'd', 'k', 'g']
+        fricatives = ['f', 'v', 'th', 's', 'z', 'sh']
+        base1 = ""
+        base2 = ""
+        # Variables for list comprehensions
+        p = ""
+        v = ""
+        group = []
         # Find primary stressed vowels
         for i, phone in enumerate(phone_list1):
             if '1' in phone and any(v in phone for v in 'AEIOU'):
         word_vowel, word_end = result1
         target_vowel, target_end = result2
+        # Perfect rhyme check (40% of score)
         if word_vowel and target_vowel:
             if self._vowels_match(word_vowel, target_vowel):
                 word_end_clean = self._strip_stress(word_end)
                     else:
                         rhyme_score = 0.6  # Still somewhat rhymes even without ending consonants
+        # Primary stressed vowel match (15% of score)
         primary_vowel_score = 0.0
         if primary_stress_vowel1 and primary_stress_vowel2:
             if primary_stress_vowel1 == primary_stress_vowel2:
         # Near rhyme check - 15% of score
         near_rhyme_score = 0.0
+        # Check for specific endings
         if len(phone_list1) >= 2 and len(phone_list2) >= 2:
             # Check for -ing endings
             if (self._strip_stress(phone_list1[-2:]) == ['IH', 'NG'] and
                 self._strip_stress(phone_list2[-2:]) == ['IH', 'NG']):
+                # Base score for -ing endings
+                near_rhyme_score = 0.6
+                # Additional checks for consonant before -ing
                 if len(phone_list1) >= 3 and len(phone_list2) >= 3:
+                    consonant1_list = self._strip_stress(phone_list1[-3:-2])
+                    consonant2_list = self._strip_stress(phone_list2[-3:-2])
+                    if consonant1_list and consonant2_list:
+                        consonant1 = consonant1_list[0]
+                        consonant2 = consonant2_list[0]
+                        # Same consonant gets highest score (like running/gunning)
+                        if consonant1 == consonant2:
+                            near_rhyme_score = 0.9
+                        # Similar consonants (nasal: 'N'/'M') get high score (running/cumming)
+                        elif self._consonants_are_similar(consonant1, consonant2):
+                            near_rhyme_score = 0.8
             # Check for -y endings (like happy/sappy)
             elif (self._strip_stress(phone_list1[-1:]) == ['IY'] and
                   self._strip_stress(phone_list2[-1:]) == ['IY']):
                 near_rhyme_score = 0.7
+        # Special pattern matching for running/cumming type pairs (15% of score)
+        if word1.endswith('ing') and word2.endswith('ing'):
+            # Get the stem (without -ing)
+            stem1 = word1[:-3]
+            stem2 = word2[:-3]
+            # Same stem length is good for parody
+            if len(stem1) == len(stem2):
+                special_pattern_score += 0.4
+            # If both stems end with same consonant (like 'n' in run-ning, 'm' in cum-ming)
+            # this makes them rhyme much better
+            if stem1 and stem2 and stem1[-1] == stem2[-1]:
+                special_pattern_score += 0.3
+            elif stem1 and stem2:
+                # Check if the final consonants are in the same phonetic group
+                # This helps pair words like running/humming (nasal consonants)
+                consonant1 = stem1[-1]
+                consonant2 = stem2[-1]
+                # Check if they're in the same group
+                if (consonant1 in nasals and consonant2 in nasals) or \
+                   (consonant1 in stops and consonant2 in stops) or \
+                   (consonant1 in fricatives and consonant2 in fricatives):
+                    special_pattern_score += 0.2
+            # Check for double consonants (like nn in running, mm in cumming)
+            if len(stem1) >= 2 and stem1[-1] == stem1[-2] and \
+               len(stem2) >= 2 and stem2[-1] == stem2[-2]:
+                special_pattern_score += 0.3
+        # Length and stress similarity (5% each)
         phone_diff = abs(len(phone_list1) - len(phone_list2))
         max_phones = max(len(phone_list1), len(phone_list2))
         length_score = 1.0 if phone_diff == 0 else 1.0 - (phone_diff / max_phones)
         stress2 = pronouncing.stresses(phones2)
         stress_score = 1.0 if stress1 == stress2 else 0.5
+        # Front consonant match (5% of score)
+        front_score = front_consonant_score * 0.05
         # Weighted combination
         similarity = (
+            (rhyme_score * 0.40) +           # End rhyme (40%)
+            (primary_vowel_score * 0.15) +   # Primary vowel (15%)
+            (near_rhyme_score * 0.15) +      # Near rhyme features (15%)
+            (special_pattern_score * 0.15) + # Special pattern match (15%)
+            (length_score * 0.05) +          # Length similarity (5%)
+            (stress_score * 0.05) +          # Stress pattern (5%)
+            (front_score)                    # Front consonants (5%)
         )
+        # Additional boost for specific word patterns that make great parody matches
+        # This specifically addresses running/cumming type pairs
+        if word1.endswith('ing') and word2.endswith('ing'):
+            base1 = word1[:-3]
+            base2 = word2[:-3]
+            # Specific pattern for words like running/cunning/cumming
+            if (len(base1) == 3 and len(base2) == 3 and
+                base1[0] != base2[0] and     # Different first consonant (good for parody)
+                len(base1) >= 2 and len(base2) >= 2 and
+                base1[-1] == base1[-2] and   # Double consonant in first word (nn in running)
+                base2[-1] == base2[-2]):     # Double consonant in second word (mm in cumming)
+                similarity = max(similarity, 0.9)  # These are excellent parody matches
         # Cap at 1.0
         similarity = min(1.0, similarity)
             "rhyme_score": round(rhyme_score, 3),
             "primary_vowel_score": round(primary_vowel_score, 3),
             "near_rhyme_score": round(near_rhyme_score, 3),
+            "special_pattern_score": round(special_pattern_score, 3),
             "length_score": round(length_score, 3),
             "stress_score": round(stress_score, 3),
             "front_consonant_score": round(front_consonant_score, 3),
                         "rhyme_score": similarity_result["rhyme_score"],
                         "primary_vowel_score": similarity_result["primary_vowel_score"],
                         "near_rhyme_score": similarity_result["near_rhyme_score"],
+                        "special_pattern_score": similarity_result.get("special_pattern_score", 0),
                         "length_score": similarity_result["length_score"],
                         "stress_score": similarity_result["stress_score"],
                         "front_consonant_score": similarity_result["front_consonant_score"],