Spaces:

patruff
/

parody-suggestions

Configuration error

App Files Files Community

patruff commited on Mar 30

Commit

2aab8aa

verified ·

1 Parent(s): 6507960

Upload tool

Browse files

Files changed (1) hide show

tool.py +173 -190

tool.py CHANGED Viewed

@@ -1,17 +1,23 @@
 from smolagents.tools import Tool
 import string
-import pronouncing
 import json
 class ParodyWordSuggestionTool(Tool):
     name = "parody_word_suggester"
-    description = "Suggests rhyming funny words using CMU dictionary pronunciations."
-    inputs = {'target': {'type': 'string', 'description': 'The word you want to find rhyming alternatives for'}, 'word_list_str': {'type': 'string', 'description': 'JSON string of word list (e.g. \'["word1", "word2"]\')'}, 'min_similarity': {'type': 'string', 'description': 'Minimum similarity threshold (0.0-1.0)', 'nullable': True, 'default': '0.5'}, 'custom_phones': {'type': 'object', 'description': 'Optional dictionary of custom word pronunciations', 'nullable': True, 'default': None}}
     output_type = "string"
-    RHYME_WEIGHT = 0.5
-    PHONE_SEQUENCE_WEIGHT = 0.3
-    LENGTH_WEIGHT = 0.2
-    PHONE_GROUPS = "M,N,NG|P,B|T,D|K,G|F,V|TH,DH|S,Z|SH,ZH|L,R|W,Y|IY,IH|UW,UH|EH,AH|AO,AA|AE,AH|AY,EY|OW,UW"
     def _get_word_phones(self, word, custom_phones=None):
         """Get phones for a word, checking custom dictionary first."""
@@ -23,226 +29,186 @@ class ParodyWordSuggestionTool(Tool):
         return phones[0] if phones else None
-    def _get_primary_vowel(self, phones: list) -> str:
-        """Get the primary stressed vowel from phone list."""
-        phone_str = ""
-        vowel_char = ""
-        for phone_str in phones:
-            if '1' in phone_str and any(vowel_char in phone_str for vowel_char in 'AEIOU'):
-                return phone_str.rstrip('012')
-        return None
-    def _phones_are_similar(self, phone1: str, phone2: str) -> bool:
-        """Check if two phones are similar enough to be considered rhyming."""
-        # Strip stress markers
-        p1 = phone1.rstrip('012')
-        p2 = phone2.rstrip('012')
-        group_str = ""
         group = []
-        # Exact match
-        if p1 == p2:
-            return True
-        # Check similarity groups
-        for group_str in self.PHONE_GROUPS.split('|'):
-            group = group_str.split(',')
-            if p1 in group and p2 in group:
-                return True
-        return False
-    def _get_phone_type(self, phone: str) -> str:
-        """Get the broad category of a phone."""
-        # Strip stress markers
-        phone = phone.rstrip('012')
-        vowel_char = ""
-        # Vowels
-        if any(vowel_char in phone for vowel_char in 'AEIOU'):
-            return 'vowel'
-        # Initialize fixed sets for categories
-        nasals = {'M', 'N', 'NG'}
-        stops = {'P', 'B', 'T', 'D', 'K', 'G'}
-        fricatives = {'F', 'V', 'TH', 'DH', 'S', 'Z', 'SH', 'ZH'}
-        liquids = {'L', 'R'}
-        glides = {'W', 'Y'}
-        if phone in nasals:
-            return 'nasal'
-        if phone in stops:
-            return 'stop'
-        if phone in fricatives:
-            return 'fricative'
-        if phone in liquids:
-            return 'liquid'
-        if phone in glides:
-            return 'glide'
-        return 'other'
-    def _get_rhyme_score(self, phones1: list, phones2: list) -> float:
-        """Calculate rhyme score based on matching phones after primary stressed vowel."""
-        # Initialize variables
-        pos1 = -1
-        pos2 = -1
-        i = 0
         phone = ""
-        vowel_char = ""
-        rhyme_part1 = []
-        rhyme_part2 = []
-        similarity_count = 0
-        p1 = ""
-        p2 = ""
-        # Find primary stressed vowel position in both words
-        for i, phone in enumerate(phones1):
-            if '1' in phone and any(vowel_char in phone for vowel_char in 'AEIOU'):
-                pos1 = i
-                break
-        for i, phone in enumerate(phones2):
-            if '1' in phone and any(vowel_char in phone for vowel_char in 'AEIOU'):
-                pos2 = i
-                break
-        if pos1 == -1 or pos2 == -1:
-            return 0.0
-        # Get all phones after and including the stressed vowel
-        rhyme_part1 = phones1[pos1:]
-        rhyme_part2 = phones2[pos2:]
-        # Check if lengths match
-        if len(rhyme_part1) != len(rhyme_part2):
-            return 0.0
-        # Calculate similarity score for rhyming part
-        for p1, p2 in zip(rhyme_part1, rhyme_part2):
-            if self._phones_are_similar(p1, p2):
-                similarity_count += 1
-        # Return score based on how many phones were similar
-        return similarity_count / len(rhyme_part1) if rhyme_part1 else 0.0
-    def _calculate_phone_sequence_similarity(self, phones1: list, phones2: list) -> float:
-        """Calculate similarity based on matching phones in sequence."""
-        if not phones1 or not phones2:
-            return 0.0
-        # Initialize variables
-        total_similarity = 0.0
-        i = 0
-        similarity = 0.0
-        comparisons = max(len(phones1), len(phones2))
-        # Compare each position
-        for i in range(min(len(phones1), len(phones2))):
-            similarity = self._get_phone_similarity(phones1[i], phones2[i])
-            total_similarity += similarity
-        return total_similarity / comparisons if comparisons > 0 else 0.0
-    def _get_phone_similarity(self, phone1: str, phone2: str) -> float:
-        """Calculate similarity between two phones."""
-        # Initialize variables
-        p1 = phone1.rstrip('012')
-        p2 = phone2.rstrip('012')
-        group_str = ""
         group = []
-        # Exact match
-        if p1 == p2:
-            return 1.0
-        # Check similarity groups
-        for group_str in self.PHONE_GROUPS.split('|'):
-            group = group_str.split(',')
-            if p1 in group and p2 in group:
-                return 0.7
-        # Check broader categories
-        if self._get_phone_type(p1) == self._get_phone_type(p2):
-            return 0.3
-        return 0.0
-    def _calculate_length_similarity(self, phones1: list, phones2: list) -> float:
-        """Calculate similarity based on phone length."""
-        max_length = max(len(phones1), len(phones2))
-        length_diff = abs(len(phones1) - len(phones2))
-        return 1.0 - (length_diff / max_length) if max_length > 0 else 0.0
     def _calculate_similarity(self, word1, phones1, word2, phones2):
-        """Calculate similarity based on multiple factors."""
-        # Initialize variables
         phone_list1 = phones1.split()
         phone_list2 = phones2.split()
         rhyme_score = 0.0
-        phone_sequence_score = 0.0
         length_score = 0.0
         similarity = 0.0
-        # Get rhyme score using new method
-        rhyme_score = self._get_rhyme_score(phone_list1, phone_list2)
-        # If rhyme score is too low (e.g. below 0.8), consider it a non-rhyme
-        if rhyme_score < 0.8:
-            return {
-                "similarity": 0.0,
-                "rhyme_score": 0.0,
-                "phone_sequence_score": 0.0,
-                "length_score": 0.0,
-                "details": {
-                    "primary_vowel1": self._get_primary_vowel(phone_list1),
-                    "primary_vowel2": self._get_primary_vowel(phone_list2),
-                    "phone_count1": len(phone_list1),
-                    "phone_count2": len(phone_list2),
-                    "matching_phones": 0
-                }
-            }
-        # Calculate other scores only if words rhyme closely enough
-        phone_sequence_score = self._calculate_phone_sequence_similarity(phone_list1, phone_list2)
-        length_score = self._calculate_length_similarity(phone_list1, phone_list2)
-        # Combined weighted score
         similarity = (
-            (rhyme_score * self.RHYME_WEIGHT) +
-            (phone_sequence_score * self.PHONE_SEQUENCE_WEIGHT) +
-            (length_score * self.LENGTH_WEIGHT)
         )
         return {
             "similarity": round(similarity, 3),
             "rhyme_score": round(rhyme_score, 3),
-            "phone_sequence_score": round(phone_sequence_score, 3),
             "length_score": round(length_score, 3),
-            "details": {
-                "primary_vowel1": self._get_primary_vowel(phone_list1),
-                "primary_vowel2": self._get_primary_vowel(phone_list2),
-                "phone_count1": len(phone_list1),
-                "phone_count2": len(phone_list2),
-                "matching_phones": round(phone_sequence_score * len(phone_list1))
-            }
         }
-    def forward(self, target: str, word_list_str: str, min_similarity: str = "0.5", custom_phones: dict = None) -> str:
         import pronouncing
         import string
         import json
-        # Initialize variables
         target = target.lower().strip(string.punctuation)
         min_similarity = float(min_similarity)
         suggestions = []
@@ -250,8 +216,14 @@ class ParodyWordSuggestionTool(Tool):
         invalid_words = []
         words = []
         target_phones = ""
-        word_phones = ""
         word = ""
         similarity_result = {}
         # Parse JSON string to list
@@ -271,6 +243,10 @@ class ParodyWordSuggestionTool(Tool):
                 "suggestions": []
             }, indent=2)
         # Filter word list
         for word in words:
             word = word.lower().strip(string.punctuation)
@@ -293,15 +269,20 @@ class ParodyWordSuggestionTool(Tool):
                 similarity_result = self._calculate_similarity(word, word_phones, target, target_phones)
                 if similarity_result["similarity"] >= min_similarity:
                     suggestions.append({
                         "word": word,
                         "similarity": similarity_result["similarity"],
                         "rhyme_score": similarity_result["rhyme_score"],
-                        "phone_sequence_score": similarity_result["phone_sequence_score"],
                         "length_score": similarity_result["length_score"],
                         "phones": word_phones,
-                        "is_custom": word in custom_phones if custom_phones else False,
-                        "details": similarity_result["details"]
                     })
         # Sort by similarity score descending
@@ -310,6 +291,8 @@ class ParodyWordSuggestionTool(Tool):
         result = {
             "target": target,
             "target_phones": target_phones,
             "invalid_words": invalid_words,
             "suggestions": suggestions
         }

 from smolagents.tools import Tool
 import string
 import json
+import pronouncing
 class ParodyWordSuggestionTool(Tool):
     name = "parody_word_suggester"
+    description = """Suggests rhyming funny words using CMU dictionary and custom pronunciations.
+    Returns similar-sounding words that rhyme, especially focusing on common vowel sounds."""
+    inputs = {'target': {'type': 'string', 'description': 'The word you want to find rhyming alternatives for'}, 'word_list_str': {'type': 'string', 'description': 'JSON string of word list (e.g. \'["word1", "word2"]\')'}, 'min_similarity': {'type': 'string', 'description': 'Minimum similarity threshold (0.0-1.0)', 'nullable': True, 'default': '0.6'}, 'custom_phones': {'type': 'object', 'description': 'Optional dictionary of custom word pronunciations', 'nullable': True, 'default': None}}
     output_type = "string"
+    VOWEL_REF = "AH,UH,AX|AE,EH|IY,IH|AO,AA|UW,UH|AY,EY|OW,AO|AW,AO|OY,OW|ER,AXR"
+    def _get_vowel_groups(self):
+        groups = []
+        group_strs = self.VOWEL_REF.split("|")
+        for group_str in group_strs:
+            groups.append(group_str.split(","))
+        return groups
     def _get_word_phones(self, word, custom_phones=None):
         """Get phones for a word, checking custom dictionary first."""
         return phones[0] if phones else None
+    def _get_last_syllable(self, phones: list) -> tuple:
+        """Extract the last syllable (vowel + remaining consonants)."""
+        last_vowel_idx = -1
+        last_vowel = None
+        vowel_groups = self._get_vowel_groups()
+        # Initialize loop variables
+        i = 0
+        phone = ""
+        base_phone = ""
         group = []
+        for i, phone in enumerate(phones):
+            base_phone = phone.rstrip('012')
+            for group in vowel_groups:
+                if base_phone in group:
+                    last_vowel_idx = i
+                    last_vowel = base_phone
+                    break
+        if last_vowel_idx == -1:
+            return None, []
+        remaining = phones[last_vowel_idx + 1:]
+        return last_vowel, remaining
+    def _strip_stress(self, phones: list) -> list:
+        """Remove stress markers from phones."""
+        result = []
+        # Initialize loop variable
         phone = ""
+        for phone in phones:
+            result.append(phone.rstrip('012'))
+        return result
+    def _vowels_match(self, v1: str, v2: str) -> bool:
+        """Check if vowels belong to the same sound group."""
+        v1 = v1.rstrip('012')
+        v2 = v2.rstrip('012')
+        if v1 == v2:
+            return True
+        # Initialize loop variables
+        vowel_groups = self._get_vowel_groups()
         group = []
+        for group in vowel_groups:
+            if v1 in group and v2 in group:
+                return True
+        return False
     def _calculate_similarity(self, word1, phones1, word2, phones2):
+        """Calculate similarity score using both perfect and near-rhyme detection."""
+        # Initialize all variables
         phone_list1 = phones1.split()
         phone_list2 = phones2.split()
+        # Variables for rhyme scoring
         rhyme_score = 0.0
+        word_vowel = None
+        word_end = []
+        target_vowel = None
+        target_end = []
+        word_end_clean = []
+        target_end_clean = []
+        common_length = 0
+        matched = 0
+        i = 0
+        # Variables for near-rhyme scoring
+        near_rhyme_score = 0.0
+        consonants1 = []
+        consonants2 = []
+        matches = 0
+        # Variables for length and stress scoring
+        phone_diff = 0
+        max_phones = 0
         length_score = 0.0
+        stress_score = 0.0
+        stress1 = ""
+        stress2 = ""
         similarity = 0.0
+        p = ""
+        v = ""
+        # Get last syllable components
+        result1 = self._get_last_syllable(phone_list1)
+        result2 = self._get_last_syllable(phone_list2)
+        word_vowel, word_end = result1
+        target_vowel, target_end = result2
+        # Perfect rhyme check (60% of score)
+        if word_vowel and target_vowel:
+            if self._vowels_match(word_vowel, target_vowel):
+                word_end_clean = self._strip_stress(word_end)
+                target_end_clean = self._strip_stress(target_end)
+                if word_end_clean == target_end_clean:
+                    rhyme_score = 1.0
+                else:
+                    # Partial rhyme based on ending similarity
+                    common_length = min(len(word_end_clean), len(target_end_clean))
+                    matched = 0
+                    for i in range(common_length):
+                        if word_end_clean[i] == target_end_clean[i]:
+                            matched += 1
+                    if max(len(word_end_clean), len(target_end_clean)) > 0:
+                        rhyme_score = 0.6 * (matched / max(1, max(len(word_end_clean), len(target_end_clean))))
+                    else:
+                        rhyme_score = 0.0
+        # Near rhyme check (for words like "running"/"cunning") - 20% of score
+        # Check if words have similar length and pattern
+        if abs(len(phone_list1) - len(phone_list2)) <= 1:
+            # Check consonant patterns are similar
+            consonants1 = [p for p in self._strip_stress(phone_list1) if not any(v in p for v in 'AEIOU')]
+            consonants2 = [p for p in self._strip_stress(phone_list2) if not any(v in p for v in 'AEIOU')]
+            if len(consonants1) == len(consonants2):
+                matches = 0
+                for a, b in zip(consonants1, consonants2):
+                    if a == b:
+                        matches += 1
+                if len(consonants1) > 0:
+                    near_rhyme_score = matches / max(1, len(consonants1))
+            # Additional check for -ing endings (special case for English)
+            if len(phone_list1) >= 3 and len(phone_list2) >= 3:
+                if (self._strip_stress(phone_list1[-2:]) == ['IH', 'NG'] and
+                    self._strip_stress(phone_list2[-2:]) == ['IH', 'NG']):
+                    near_rhyme_score = max(near_rhyme_score, 0.8)  # Boost for -ing endings
+        # Calculate length similarity score (10% of total)
+        phone_diff = abs(len(phone_list1) - len(phone_list2))
+        max_phones = max(len(phone_list1), len(phone_list2))
+        length_score = 1.0 if phone_diff == 0 else 1.0 - (phone_diff / max_phones)
+        # Calculate stress pattern similarity (10% of total)
+        import pronouncing
+        stress1 = pronouncing.stresses(phones1)
+        stress2 = pronouncing.stresses(phones2)
+        stress_score = 1.0 if stress1 == stress2 else 0.5
+        # Weighted combination
         similarity = (
+            (rhyme_score * 0.6) +       # Perfect rhyme (60%)
+            (near_rhyme_score * 0.2) +  # Near rhyme (20%)
+            (length_score * 0.1) +      # Length similarity (10%)
+            (stress_score * 0.1)        # Stress pattern (10%)
         )
+        # Special case: Boost very similar-sounding words
+        if near_rhyme_score > 0.7 and length_score > 0.8 and stress_score > 0.8:
+            similarity = max(similarity, 0.75)  # Ensure these get a high enough score
+        # Cap at 1.0
+        similarity = min(1.0, similarity)
         return {
             "similarity": round(similarity, 3),
             "rhyme_score": round(rhyme_score, 3),
+            "near_rhyme_score": round(near_rhyme_score, 3),
             "length_score": round(length_score, 3),
+            "stress_score": round(stress_score, 3),
+            "phone_length_difference": phone_diff
         }
+    def forward(self, target: str, word_list_str: str, min_similarity: str = "0.6", custom_phones: dict = None) -> str:
         import pronouncing
         import string
         import json
+        # Initialize all variables
         target = target.lower().strip(string.punctuation)
         min_similarity = float(min_similarity)
         suggestions = []
         invalid_words = []
         words = []
         target_phones = ""
+        target_phone_list = []
+        target_vowel = None
+        target_end = []
         word = ""
+        word_phones = ""
+        word_phone_list = []
+        word_vowel = None
+        word_end = []
         similarity_result = {}
         # Parse JSON string to list
                 "suggestions": []
             }, indent=2)
+        # Parse target phones
+        target_phone_list = target_phones.split()
+        target_vowel, target_end = self._get_last_syllable(target_phone_list)
         # Filter word list
         for word in words:
             word = word.lower().strip(string.punctuation)
                 similarity_result = self._calculate_similarity(word, word_phones, target, target_phones)
                 if similarity_result["similarity"] >= min_similarity:
+                    word_phone_list = word_phones.split()
+                    word_vowel, word_end = self._get_last_syllable(word_phone_list)
                     suggestions.append({
                         "word": word,
                         "similarity": similarity_result["similarity"],
                         "rhyme_score": similarity_result["rhyme_score"],
+                        "near_rhyme_score": similarity_result["near_rhyme_score"],
                         "length_score": similarity_result["length_score"],
+                        "stress_score": similarity_result["stress_score"],
                         "phones": word_phones,
+                        "last_vowel": word_vowel,
+                        "ending": " ".join(word_end) if word_end else "",
+                        "is_custom": word in custom_phones if custom_phones else False
                     })
         # Sort by similarity score descending
         result = {
             "target": target,
             "target_phones": target_phones,
+            "target_last_vowel": target_vowel,
+            "target_ending": " ".join(target_end) if target_end else "",
             "invalid_words": invalid_words,
             "suggestions": suggestions
         }