Spaces:

patruff
/

parody-suggestions

Configuration error

App Files Files Community

patruff commited on Feb 10

Commit

49ca653

verified ·

1 Parent(s): 488bbfc

Upload tool

Browse files

Files changed (1) hide show

tool.py +87 -29

tool.py CHANGED Viewed

@@ -1,13 +1,13 @@
 from smolagents.tools import Tool
-import json
 import string
 import pronouncing
 class ParodyWordSuggestionTool(Tool):
     name = "parody_word_suggester"
     description = """Suggests rhyming funny words using CMU dictionary and custom pronunciations.
     Returns similar-sounding words that rhyme, especially focusing on common vowel sounds."""
-    inputs = {'target': {'type': 'string', 'description': 'The word you want to find rhyming alternatives for'}, 'word_list_str': {'type': 'string', 'description': 'JSON string of word list (e.g. \'["word1", "word2"]\')'}, 'min_similarity': {'type': 'string', 'description': 'Minimum similarity threshold (0.0-1.0)', 'nullable': True}, 'custom_phones': {'type': 'object', 'description': 'Optional dictionary of custom word pronunciations', 'nullable': True}}
     output_type = "string"
     VOWEL_REF = "AH,AX|UH|AE,EH|IY,IH|AO,AA|UW|AY,EY|OW,AO|AW,AO|OY,OW|ER,AXR"
@@ -71,35 +71,78 @@ class ParodyWordSuggestionTool(Tool):
         return False
     def _calculate_similarity(self, word1, phones1, word2, phones2):
-        """Calculate similarity score using improved metrics."""
-        # Initialize all variables
         word_vowel = None
         word_end = []
         target_vowel = None
         target_end = []
-        phone_diff = 0
-        max_phones = 0
         length_score = 0.0
         rhyme_score = 0.0
         stress_score = 0.0
-        i = 0  # For loop counter
         word_end_clean = []
         target_end_clean = []
-        matched = 0
         common_length = 0
         phone_list1 = phones1.split()
         phone_list2 = phones2.split()
-        # Calculate length similarity score
-        phone_diff = abs(len(phone_list1) - len(phone_list2))
-        max_phones = max(len(phone_list1), len(phone_list2))
-        length_score = 1.0 if phone_diff == 0 else 1.0 - (phone_diff / max_phones)
-        # Get last syllable components
-        result1 = self._get_last_syllable(phone_list1)
-        result2 = self._get_last_syllable(phone_list2)
         word_vowel, word_end = result1
         target_vowel, target_end = result2
@@ -111,30 +154,35 @@ class ParodyWordSuggestionTool(Tool):
                 target_end_clean = self._strip_stress(target_end)
                 if word_end_clean == target_end_clean:
-                    rhyme_score = 1.0
                 else:
-                    # Partial rhyme based on ending similarity
                     common_length = min(len(word_end_clean), len(target_end_clean))
                     matched = 0
                     for i in range(common_length):
                         if word_end_clean[i] == target_end_clean[i]:
                             matched += 1
-                    rhyme_score = 0.6 * (matched / max(len(word_end_clean), len(target_end_clean)))
-        # Calculate stress pattern similarity
         import pronouncing
-        stress1 = pronouncing.stresses(phones1)
-        stress2 = pronouncing.stresses(phones2)
-        stress_score = 1.0 if stress1 == stress2 else 0.5
-        # Weighted combination (60% rhyme, 30% length, 10% stress)
         similarity = (
-            (rhyme_score * 0.6) +
-            (length_score * 0.3) +
-            (stress_score * 0.1)
         )
-        # Cap at 1.0
         similarity = min(1.0, similarity)
         return {
@@ -142,7 +190,9 @@ class ParodyWordSuggestionTool(Tool):
             "rhyme_score": round(rhyme_score, 3),
             "length_score": round(length_score, 3),
             "stress_score": round(stress_score, 3),
-            "phone_length_difference": phone_diff
         }
@@ -162,6 +212,12 @@ class ParodyWordSuggestionTool(Tool):
         valid_words = []
         invalid_words = []
         target_phone_list = []
         # Parse JSON string to list
         try:
@@ -216,7 +272,9 @@ class ParodyWordSuggestionTool(Tool):
                         "rhyme_score": similarity_result["rhyme_score"],
                         "length_score": similarity_result["length_score"],
                         "stress_score": similarity_result["stress_score"],
-                        "phone_length_difference": similarity_result["phone_length_difference"],
                         "phones": word_phones,
                         "last_vowel": word_vowel,
                         "ending": " ".join(word_end) if word_end else "",

 from smolagents.tools import Tool
 import string
 import pronouncing
+import json
 class ParodyWordSuggestionTool(Tool):
     name = "parody_word_suggester"
     description = """Suggests rhyming funny words using CMU dictionary and custom pronunciations.
     Returns similar-sounding words that rhyme, especially focusing on common vowel sounds."""
+    inputs = {'target': {'type': 'string', 'description': 'The word you want to find rhyming alternatives for'}, 'word_list_str': {'type': 'string', 'description': 'JSON string of word list (e.g. \'["word1", "word2"]\')'}, 'min_similarity': {'type': 'string', 'description': 'Minimum similarity threshold (0.0-1.0)', 'default': '0.5', 'nullable': True}, 'custom_phones': {'type': 'object', 'description': 'Optional dictionary of custom word pronunciations', 'default': None, 'nullable': True}}
     output_type = "string"
     VOWEL_REF = "AH,AX|UH|AE,EH|IY,IH|AO,AA|UW|AY,EY|OW,AO|AW,AO|OY,OW|ER,AXR"
         return False
+    def _strip_common_suffix(self, phones: list) -> tuple:
+        """Strip common suffixes and return base and suffix phones."""
+        # Initialize variables
+        suffix_name = ""
+        suffix_phones = []
+        phone1 = ""
+        phone2 = ""
+        # Common suffix patterns in CMU phonetic representation
+        SUFFIXES = {
+            'ING': ['IH0', 'NG'],  # -ing
+            'ED': ['EH0', 'D'],    # -ed
+            'ER': ['ER0'],         # -er
+            'EST': ['EH0', 'S', 'T'],  # -est
+            'LY': ['L', 'IY0'],    # -ly
+            'NESS': ['N', 'EH0', 'S'],  # -ness
+        }
+        for suffix_name, suffix_phones in SUFFIXES.items():
+            if len(phones) > len(suffix_phones):
+                if all(phone1.rstrip('012') == phone2.rstrip('012')
+                      for phone1, phone2 in zip(phones[-len(suffix_phones):], suffix_phones)):
+                    return phones[:-len(suffix_phones)], suffix_phones
+        return phones, []
     def _calculate_similarity(self, word1, phones1, word2, phones2):
+        """Calculate similarity score using improved metrics and suffix handling."""
+        # Initialize all variables first
+        phone_list1 = []
+        phone_list2 = []
+        base1 = []
+        base2 = []
+        suffix1 = []
+        suffix2 = []
         word_vowel = None
         word_end = []
         target_vowel = None
         target_end = []
+        base_length_diff = 0
+        max_base_length = 0
         length_score = 0.0
         rhyme_score = 0.0
         stress_score = 0.0
+        suffix_score = 0.0
         word_end_clean = []
         target_end_clean = []
         common_length = 0
+        matched = 0
+        stress1 = ""
+        stress2 = ""
+        similarity = 0.0
+        result1 = (None, [])
+        result2 = (None, [])
+        # Main logic
         phone_list1 = phones1.split()
         phone_list2 = phones2.split()
+        # Strip common suffixes first
+        base1, suffix1 = self._strip_common_suffix(phone_list1)
+        base2, suffix2 = self._strip_common_suffix(phone_list2)
+        # Calculate base word similarity
+        base_length_diff = abs(len(base1) - len(base2))
+        max_base_length = max(len(base1), len(base2))
+        length_score = 1.0 if base_length_diff == 0 else 1.0 - (base_length_diff / max_base_length)
+        # Get last syllable components of base words
+        result1 = self._get_last_syllable(base1)
+        result2 = self._get_last_syllable(base2)
         word_vowel, word_end = result1
         target_vowel, target_end = result2
                 target_end_clean = self._strip_stress(target_end)
                 if word_end_clean == target_end_clean:
+                    if word_vowel.rstrip('012') == target_vowel.rstrip('012'):
+                        rhyme_score = 1.0
+                    else:
+                        rhyme_score = 0.7  # Penalize different vowels in same group
                 else:
                     common_length = min(len(word_end_clean), len(target_end_clean))
                     matched = 0
                     for i in range(common_length):
                         if word_end_clean[i] == target_end_clean[i]:
                             matched += 1
+                    rhyme_score = 0.3 * (matched / max(len(word_end_clean), len(target_end_clean)))
+        # Calculate stress pattern similarity using base words
         import pronouncing
+        stress1 = pronouncing.stresses(' '.join(base1))
+        stress2 = pronouncing.stresses(' '.join(base2))
+        stress_score = 1.0 if stress1 == stress2 else 0.3
+        # Add suffix matching bonus
+        suffix_score = 1.0 if suffix1 == suffix2 else 0.0
+        # Weighted combination with emphasis on base word similarity
         similarity = (
+            (rhyme_score * 0.6) +       # Base word rhyme
+            (length_score * 0.1) +      # Base word length
+            (stress_score * 0.2) +      # Base word stress
+            (suffix_score * 0.1)        # Suffix match as small bonus
         )
         similarity = min(1.0, similarity)
         return {
             "rhyme_score": round(rhyme_score, 3),
             "length_score": round(length_score, 3),
             "stress_score": round(stress_score, 3),
+            "base_word_diff": base_length_diff,
+            "has_common_suffix": bool(suffix1 and suffix2),
+            "suffix_match": suffix_score == 1.0
         }
         valid_words = []
         invalid_words = []
         target_phone_list = []
+        words = []
+        target_phones = ""
+        word_phones = ""
+        word = ""
+        word_phone_list = []
+        similarity_result = {}
         # Parse JSON string to list
         try:
                         "rhyme_score": similarity_result["rhyme_score"],
                         "length_score": similarity_result["length_score"],
                         "stress_score": similarity_result["stress_score"],
+                        "base_word_diff": similarity_result["base_word_diff"],
+                        "has_common_suffix": similarity_result["has_common_suffix"],
+                        "suffix_match": similarity_result["suffix_match"],
                         "phones": word_phones,
                         "last_vowel": word_vowel,
                         "ending": " ".join(word_end) if word_end else "",