Spaces:

patruff
/

parody-suggestions

Configuration error

App Files Files Community

patruff commited on Feb 10

Commit

9bc766a

verified ·

1 Parent(s): b77eb42

Upload tool

Browse files

Files changed (1) hide show

tool.py +121 -149

tool.py CHANGED Viewed

@@ -1,27 +1,21 @@
 from smolagents.tools import Tool
 import pronouncing
-import difflib
 import json
-import string
 class ParodyWordSuggestionTool(Tool):
     name = "parody_word_suggester"
-    description = """Suggests rhyming funny words using CMU dictionary pronunciations.
-    Returns similar-sounding words that rhyme, especially focusing on common vowel sounds."""
     inputs = {'target': {'type': 'string', 'description': 'The word you want to find rhyming alternatives for'}, 'word_list_str': {'type': 'string', 'description': 'JSON string of word list (e.g. \'["word1", "word2"]\')'}, 'min_similarity': {'type': 'string', 'description': 'Minimum similarity threshold (0.0-1.0)', 'nullable': True, 'default': '0.5'}, 'custom_phones': {'type': 'object', 'description': 'Optional dictionary of custom word pronunciations', 'nullable': True, 'default': None}}
     output_type = "string"
-    VOWEL_REF = "AH,AX|UH|AE,EH|IY,IH|AO,AA|UW|AY,EY|OW,AO|AW,AO|OY,OW|ER,AXR"
     CONSONANT_REF = "M,N,NG|P,B|T,D|K,G|F,V|TH,DH|S,Z|SH,ZH|L,R|W,Y"
-    def _get_vowel_groups(self):
-        groups = []
-        group_strs = self.VOWEL_REF.split("|")
-        for group_str in group_strs:
-            groups.append(group_str.split(","))
-        return groups
     def _get_consonant_groups(self):
         groups = []
         group_strs = self.CONSONANT_REF.split("|")
         for group_str in group_strs:
@@ -39,146 +33,141 @@ class ParodyWordSuggestionTool(Tool):
         return phones[0] if phones else None
-    def _get_last_syllable(self, phones: list) -> tuple:
-        """Extract the last syllable (vowel + remaining consonants)."""
-        last_vowel_idx = -1
-        last_vowel = None
-        vowel_groups = self._get_vowel_groups()
-        for i, phone in enumerate(phones):
-            base_phone = phone.rstrip('012')
-            for group in vowel_groups:
-                if base_phone in group:
-                    last_vowel_idx = i
-                    last_vowel = base_phone
-                    break
-        if last_vowel_idx == -1:
-            return None, []
-        remaining = phones[last_vowel_idx + 1:]
-        return last_vowel, remaining
-    def _strip_stress(self, phones: list) -> list:
-        result = []
-        for phone in phones:
-            result.append(phone.rstrip('012'))
-        return result
-    def _vowels_match(self, v1: str, v2: str) -> bool:
-        v1 = v1.rstrip('012')
-        v2 = v2.rstrip('012')
-        if v1 == v2:
-            return True
-        vowel_groups = self._get_vowel_groups()
-        for group in vowel_groups:
-            if v1 in group and v2 in group:
-                return True
-        return False
     def _calculate_similarity(self, word1, phones1, word2, phones2):
-        """Calculate similarity with heavy emphasis on rhyming."""
-        from difflib import SequenceMatcher
-        import pronouncing
-        # Initialize all variables
         rhyme_score = 0.0
-        string_score = 0.0
-        pattern_score = 0.0
-        phone_list1 = []
-        phone_list2 = []
-        vowel1 = None
-        vowel2 = None
-        end1 = []
-        end2 = []
-        end1_clean = []
-        end2_clean = []
-        matching_consonants = 0
         phone_list1 = phones1.split()
         phone_list2 = phones2.split()
-        # Get last syllables
-        vowel1, end1 = self._get_last_syllable(phone_list1)
-        vowel2, end2 = self._get_last_syllable(phone_list2)
-        # Calculate rhyme score (60%)
-        if vowel1 and vowel2:
-            # Perfect vowel match
-            if vowel1.rstrip('012') == vowel2.rstrip('012'):
-                rhyme_score = 1.0
-            # Similar vowel match
-            elif self._vowels_match(vowel1, vowel2):
-                rhyme_score = 0.8
-            # Check endings
-            if end1 and end2:
-                end1_clean = self._strip_stress(end1)
-                end2_clean = self._strip_stress(end2)
-                # Perfect ending match
-                if end1_clean == end2_clean:
-                    rhyme_score = min(1.0, rhyme_score + 0.2)
-                # Partial ending match
-                else:
-                    consonant_groups = self._get_consonant_groups()
-                    matching_consonants = 0
-                    for c1, c2 in zip(end1_clean, end2_clean):
-                        if c1 == c2:
-                            matching_consonants += 1
-                        else:
-                            # Check if consonants are in same group
-                            for group in consonant_groups:
-                                if c1 in group and c2 in group:
-                                    matching_consonants += 0.5
-                                    break
-                    if matching_consonants > 0:
-                        rhyme_score = min(1.0, rhyme_score + (0.1 * matching_consonants))
-        # String similarity (25%)
-        if len(word1) > 1 and len(word2) > 1:
-            end_similarity = SequenceMatcher(None, word1[1:], word2[1:]).ratio()
-            string_score = end_similarity
-        else:
-            string_score = SequenceMatcher(None, word1, word2).ratio()
-        # Pattern/Length score (15%)
         if len(phone_list1) == len(phone_list2):
-            pattern_score = 1.0
         else:
-            pattern_score = 1.0 - (abs(len(phone_list1) - len(phone_list2)) / max(len(phone_list1), len(phone_list2)))
-        # Final weighted score
-        similarity = (
-            (rhyme_score * 0.60) +
-            (string_score * 0.25) +
-            (pattern_score * 0.15)
-        )
-        # Extra boost for exact matches minus first letter
-        if len(word1) == len(word2) and word1[1:] == word2[1:]:
-            similarity = min(1.0, similarity * 1.2)
-        # Extra penalty for very different lengths
-        if abs(len(word1) - len(word2)) > 2:
-            similarity *= 0.7
         return {
             "similarity": round(similarity, 3),
             "rhyme_score": round(rhyme_score, 3),
-            "string_score": round(string_score, 3),
-            "pattern_score": round(pattern_score, 3),
             "details": {
-                "last_vowel_match": vowel1.rstrip('012') == vowel2.rstrip('012') if vowel1 and vowel2 else False,
-                "similar_vowels": self._vowels_match(vowel1, vowel2) if vowel1 and vowel2 else False,
-                "ending_match": " ".join(end1_clean) == " ".join(end2_clean) if end1 and end2 else False,
-                "string_length_diff": abs(len(word1) - len(word2))
             }
         }
@@ -188,22 +177,16 @@ class ParodyWordSuggestionTool(Tool):
         import string
         import json
-        # Initialize all variables
         target = target.lower().strip(string.punctuation)
         min_similarity = float(min_similarity)
         suggestions = []
-        word_vowel = None
-        word_end = []
-        target_vowel = None
-        target_end = []
         valid_words = []
         invalid_words = []
-        target_phone_list = []
         words = []
         target_phones = ""
         word_phones = ""
         word = ""
-        word_phone_list = []
         similarity_result = {}
         # Parse JSON string to list
@@ -215,7 +198,7 @@ class ParodyWordSuggestionTool(Tool):
                 "suggestions": []
             }, indent=2)
-        # Get target pronunciation using custom phones
         target_phones = self._get_word_phones(target, custom_phones)
         if not target_phones:
             return json.dumps({
@@ -223,9 +206,7 @@ class ParodyWordSuggestionTool(Tool):
                 "suggestions": []
             }, indent=2)
-        # Filter word list checking both CMU and custom phones
-        valid_words = []
-        invalid_words = []
         for word in words:
             word = word.lower().strip(string.punctuation)
             if self._get_word_phones(word, custom_phones):
@@ -240,9 +221,6 @@ class ParodyWordSuggestionTool(Tool):
                 "suggestions": []
             }, indent=2)
-        target_phone_list = target_phones.split()
-        target_vowel, target_end = self._get_last_syllable(target_phone_list)
         # Check each word
         for word in valid_words:
             word_phones = self._get_word_phones(word, custom_phones)
@@ -250,18 +228,14 @@ class ParodyWordSuggestionTool(Tool):
                 similarity_result = self._calculate_similarity(word, word_phones, target, target_phones)
                 if similarity_result["similarity"] >= min_similarity:
-                    word_phone_list = word_phones.split()
-                    word_vowel, word_end = self._get_last_syllable(word_phone_list)
                     suggestions.append({
                         "word": word,
                         "similarity": similarity_result["similarity"],
                         "rhyme_score": similarity_result["rhyme_score"],
-                        "string_score": similarity_result["string_score"],
-                        "pattern_score": similarity_result["pattern_score"],
                         "phones": word_phones,
-                        "last_vowel": word_vowel,
-                        "ending": " ".join(word_end) if word_end else "",
                         "is_custom": word in custom_phones if custom_phones else False,
                         "details": similarity_result["details"]
                     })
@@ -272,8 +246,6 @@ class ParodyWordSuggestionTool(Tool):
         result = {
             "target": target,
             "target_phones": target_phones,
-            "target_last_vowel": target_vowel,
-            "target_ending": " ".join(target_end) if target_end else "",
             "invalid_words": invalid_words,
             "suggestions": suggestions
         }

 from smolagents.tools import Tool
+import string
 import pronouncing
 import json
 class ParodyWordSuggestionTool(Tool):
     name = "parody_word_suggester"
+    description = "Suggests rhyming funny words using CMU dictionary pronunciations."
     inputs = {'target': {'type': 'string', 'description': 'The word you want to find rhyming alternatives for'}, 'word_list_str': {'type': 'string', 'description': 'JSON string of word list (e.g. \'["word1", "word2"]\')'}, 'min_similarity': {'type': 'string', 'description': 'Minimum similarity threshold (0.0-1.0)', 'nullable': True, 'default': '0.5'}, 'custom_phones': {'type': 'object', 'description': 'Optional dictionary of custom word pronunciations', 'nullable': True, 'default': None}}
     output_type = "string"
+    RHYME_WEIGHT = 0.6
+    PHONE_PATTERN_WEIGHT = 0.2
+    CHAR_DIFF_WEIGHT = 0.1
+    CONSONANT_WEIGHT = 0.1
     CONSONANT_REF = "M,N,NG|P,B|T,D|K,G|F,V|TH,DH|S,Z|SH,ZH|L,R|W,Y"
     def _get_consonant_groups(self):
+        """Get consonant similarity groups."""
         groups = []
         group_strs = self.CONSONANT_REF.split("|")
         for group_str in group_strs:
         return phones[0] if phones else None
+    def _get_primary_vowel(self, phones: list) -> str:
+        """Get the primary stressed vowel from phone list."""
+        vowel_chars = 'AEIOU'  # Initialize the vowel characters set
+        phone_str = ""  # Initialize phone string
+        vowel_char = ""
+        for phone_str in phones:
+            if '1' in phone_str and any(vowel_char in phone_str for vowel_char in vowel_chars):
+                return phone_str.rstrip('012')
+        return None
+    def _calculate_char_difference(self, word1: str, word2: str) -> float:
+        """Calculate character difference score."""
+        if not word1 or not word2:
+            return 0.0
+        # Initialize variables
+        changes = 0
+        char1 = ""
+        char2 = ""
+        # Count character differences
+        for char1, char2 in zip(word1, word2):
+            if char1 != char2:
+                changes += 1
+        # Add difference for length mismatch
+        changes += abs(len(word1) - len(word2))
+        # Score based on changes (0 changes = 1.0, more changes = lower score)
+        max_changes = max(len(word1), len(word2))
+        return 1.0 - (changes / max_changes) if max_changes > 0 else 0.0
+    def _calculate_consonant_similarity(self, phone_list1: list, phone_list2: list) -> float:
+        """Calculate consonant similarity score."""
+        # Initialize variables
+        consonant_score = 0.0
+        consonant_groups = self._get_consonant_groups()
+        vowel_chars = 'AEIOU'
+        phone_str = ""
+        vowel_char = ""
+        consonants1 = []
+        consonants2 = []
+        matches = 0
+        comparisons = 0
+        cons1 = ""
+        cons2 = ""
+        group = []
+        # Get consonants (non-vowel phones)
+        consonants1 = [phone_str for phone_str in phone_list1
+                      if not any(vowel_char in phone_str for vowel_char in vowel_chars)]
+        consonants2 = [phone_str for phone_str in phone_list2
+                      if not any(vowel_char in phone_str for vowel_char in vowel_chars)]
+        if not consonants1 or not consonants2:
+            return 0.0
+        # Compare each consonant
+        matches = 0
+        comparisons = min(len(consonants1), len(consonants2))
+        for cons1, cons2 in zip(consonants1, consonants2):
+            cons1 = cons1.rstrip('012')
+            cons2 = cons2.rstrip('012')
+            if cons1 == cons2:
+                matches += 1
+                continue
+            # Check if in same group
+            for group in consonant_groups:
+                if cons1 in group and cons2 in group:
+                    matches += 0.5
+                    break
+        return matches / comparisons if comparisons > 0 else 0.0
     def _calculate_similarity(self, word1, phones1, word2, phones2):
+        """Calculate similarity based on multiple factors."""
+        # Initialize scores
         rhyme_score = 0.0
+        phone_score = 0.0
+        char_diff_score = 0.0
+        consonant_score = 0.0
+        # Initialize phone lists
         phone_list1 = phones1.split()
         phone_list2 = phones2.split()
+        # Initialize variables for details
+        vowel1 = None
+        vowel2 = None
+        # 1. Rhyme score (60%) - based on primary vowel
+        vowel1 = self._get_primary_vowel(phone_list1)
+        vowel2 = self._get_primary_vowel(phone_list2)
+        if vowel1 and vowel2 and vowel1 == vowel2:
+            rhyme_score = 1.0
+        # 2. Phone pattern score (20%) - based on number of phones
         if len(phone_list1) == len(phone_list2):
+            phone_score = 1.0
         else:
+            phone_score = 1.0 - (abs(len(phone_list1) - len(phone_list2)) / max(len(phone_list1), len(phone_list2)))
+        # 3. Character difference score (10%)
+        char_diff_score = self._calculate_char_difference(word1, word2)
+        # 4. Consonant similarity score (10%)
+        consonant_score = self._calculate_consonant_similarity(phone_list1, phone_list2)
+        # Combined weighted score
+        similarity = (
+            (rhyme_score * self.RHYME_WEIGHT) +
+            (phone_score * self.PHONE_PATTERN_WEIGHT) +
+            (char_diff_score * self.CHAR_DIFF_WEIGHT) +
+            (consonant_score * self.CONSONANT_WEIGHT)
+        )
         return {
             "similarity": round(similarity, 3),
             "rhyme_score": round(rhyme_score, 3),
+            "phone_score": round(phone_score, 3),
+            "char_diff_score": round(char_diff_score, 3),
+            "consonant_score": round(consonant_score, 3),
             "details": {
+                "primary_vowel1": vowel1,
+                "primary_vowel2": vowel2,
+                "phone_count1": len(phone_list1),
+                "phone_count2": len(phone_list2),
+                "char_differences": abs(len(word1) - len(word2))
             }
         }
         import string
         import json
+        # Initialize variables
         target = target.lower().strip(string.punctuation)
         min_similarity = float(min_similarity)
         suggestions = []
         valid_words = []
         invalid_words = []
         words = []
         target_phones = ""
         word_phones = ""
         word = ""
         similarity_result = {}
         # Parse JSON string to list
                 "suggestions": []
             }, indent=2)
+        # Get target pronunciation
         target_phones = self._get_word_phones(target, custom_phones)
         if not target_phones:
             return json.dumps({
                 "suggestions": []
             }, indent=2)
+        # Filter word list
         for word in words:
             word = word.lower().strip(string.punctuation)
             if self._get_word_phones(word, custom_phones):
                 "suggestions": []
             }, indent=2)
         # Check each word
         for word in valid_words:
             word_phones = self._get_word_phones(word, custom_phones)
                 similarity_result = self._calculate_similarity(word, word_phones, target, target_phones)
                 if similarity_result["similarity"] >= min_similarity:
                     suggestions.append({
                         "word": word,
                         "similarity": similarity_result["similarity"],
                         "rhyme_score": similarity_result["rhyme_score"],
+                        "phone_score": similarity_result["phone_score"],
+                        "char_diff_score": similarity_result["char_diff_score"],
+                        "consonant_score": similarity_result["consonant_score"],
                         "phones": word_phones,
                         "is_custom": word in custom_phones if custom_phones else False,
                         "details": similarity_result["details"]
                     })
         result = {
             "target": target,
             "target_phones": target_phones,
             "invalid_words": invalid_words,
             "suggestions": suggestions
         }