Spaces:

patruff
/

parody-suggestions

Configuration error

App Files Files Community

patruff commited on Feb 10

Commit

4aa24c2

verified ·

1 Parent(s): f72b145

Upload tool

Browse files

Files changed (2) hide show

requirements.txt +1 -1
tool.py +88 -87

requirements.txt CHANGED Viewed

@@ -1,2 +1,2 @@
-smolagents
 pronouncing



1	pronouncing
2	+ smolagents

tool.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from smolagents.tools import Tool
-import string
-import pronouncing
 import json
 class ParodyWordSuggestionTool(Tool):
     name = "parody_word_suggester"
@@ -10,6 +10,7 @@ class ParodyWordSuggestionTool(Tool):
     inputs = {'target': {'type': 'string', 'description': 'The word you want to find rhyming alternatives for'}, 'word_list_str': {'type': 'string', 'description': 'JSON string of word list (e.g. \'["word1", "word2"]\')'}, 'min_similarity': {'type': 'string', 'description': 'Minimum similarity threshold (0.0-1.0)', 'default': '0.5', 'nullable': True}, 'custom_phones': {'type': 'object', 'description': 'Optional dictionary of custom word pronunciations', 'default': None, 'nullable': True}}
     output_type = "string"
     VOWEL_REF = "AH,AX|UH|AE,EH|IY,IH|AO,AA|UW|AY,EY|OW,AO|AW,AO|OY,OW|ER,AXR"
     def _get_vowel_groups(self):
         groups = []
@@ -98,101 +99,101 @@ class ParodyWordSuggestionTool(Tool):
         return phones, []
     def _calculate_similarity(self, word1, phones1, word2, phones2):
-        """Calculate similarity score using improved metrics and suffix handling."""
-        # Initialize all variables first
-        phone_list1 = []
-        phone_list2 = []
-        base1 = []
-        base2 = []
-        suffix1 = []
-        suffix2 = []
-        word_vowel = None
-        word_end = []
-        target_vowel = None
-        target_end = []
-        base_length_diff = 0
-        max_base_length = 0
-        length_score = 0.0
-        rhyme_score = 0.0
-        stress_score = 0.0
-        suffix_score = 0.0
-        word_end_clean = []
-        target_end_clean = []
-        common_length = 0
-        matched = 0
-        stress1 = ""
-        stress2 = ""
-        similarity = 0.0
-        result1 = (None, [])
-        result2 = (None, [])
-        # Main logic
         phone_list1 = phones1.split()
         phone_list2 = phones2.split()
-        # Strip common suffixes first
-        base1, suffix1 = self._strip_common_suffix(phone_list1)
-        base2, suffix2 = self._strip_common_suffix(phone_list2)
-        # Calculate base word similarity
-        base_length_diff = abs(len(base1) - len(base2))
-        max_base_length = max(len(base1), len(base2))
-        length_score = 1.0 if base_length_diff == 0 else 1.0 - (base_length_diff / max_base_length)
-        # Get last syllable components of base words
-        result1 = self._get_last_syllable(base1)
-        result2 = self._get_last_syllable(base2)
-        word_vowel, word_end = result1
-        target_vowel, target_end = result2
-        # Calculate rhyme score
-        rhyme_score = 0.0
-        if word_vowel and target_vowel:
-            if self._vowels_match(word_vowel, target_vowel):
-                word_end_clean = self._strip_stress(word_end)
-                target_end_clean = self._strip_stress(target_end)
-                if word_end_clean == target_end_clean:
-                    if word_vowel.rstrip('012') == target_vowel.rstrip('012'):
-                        rhyme_score = 1.0
-                    else:
-                        rhyme_score = 0.7  # Penalize different vowels in same group
-                else:
-                    common_length = min(len(word_end_clean), len(target_end_clean))
-                    matched = 0
-                    for i in range(common_length):
-                        if word_end_clean[i] == target_end_clean[i]:
-                            matched += 1
-                    rhyme_score = 0.3 * (matched / max(len(word_end_clean), len(target_end_clean)))
-        # Calculate stress pattern similarity using base words
-        import pronouncing
-        stress1 = pronouncing.stresses(' '.join(base1))
-        stress2 = pronouncing.stresses(' '.join(base2))
-        stress_score = 1.0 if stress1 == stress2 else 0.3
-        # Add suffix matching bonus
-        suffix_score = 1.0 if suffix1 == suffix2 else 0.0
-        # Weighted combination with emphasis on base word similarity
         similarity = (
-            (rhyme_score * 0.6) +       # Base word rhyme
-            (length_score * 0.1) +      # Base word length
-            (stress_score * 0.2) +      # Base word stress
-            (suffix_score * 0.1)        # Suffix match as small bonus
         )
-        similarity = min(1.0, similarity)
         return {
             "similarity": round(similarity, 3),
-            "rhyme_score": round(rhyme_score, 3),
-            "length_score": round(length_score, 3),
-            "stress_score": round(stress_score, 3),
-            "base_word_diff": base_length_diff,
-            "has_common_suffix": bool(suffix1 and suffix2),
-            "suffix_match": suffix_score == 1.0
         }

 from smolagents.tools import Tool
 import json
+import pronouncing
+import string
 class ParodyWordSuggestionTool(Tool):
     name = "parody_word_suggester"
     inputs = {'target': {'type': 'string', 'description': 'The word you want to find rhyming alternatives for'}, 'word_list_str': {'type': 'string', 'description': 'JSON string of word list (e.g. \'["word1", "word2"]\')'}, 'min_similarity': {'type': 'string', 'description': 'Minimum similarity threshold (0.0-1.0)', 'default': '0.5', 'nullable': True}, 'custom_phones': {'type': 'object', 'description': 'Optional dictionary of custom word pronunciations', 'default': None, 'nullable': True}}
     output_type = "string"
     VOWEL_REF = "AH,AX|UH|AE,EH|IY,IH|AO,AA|UW|AY,EY|OW,AO|AW,AO|OY,OW|ER,AXR"
+    CONSONANT_REF = "M,N,NG|P,B|T,D|K,G|F,V|TH,DH|S,Z|SH,ZH|L,R|W,Y"
     def _get_vowel_groups(self):
         groups = []
         return phones, []
+    def _get_consonant_groups(self):
+        """Get consonant groups from reference string."""
+        groups = []
+        group_strs = self.CONSONANT_REF.split("|")
+        for group_str in group_strs:
+            groups.append(group_str.split(","))
+        return groups
+    def _consonants_similarity(self, c1: str, c2: str) -> float:
+        """Calculate similarity score between two consonants."""
+        if c1 == c2:
+            return 1.0
+        # Check if they're in the same group
+        consonant_groups = self._get_consonant_groups()
+        for group in consonant_groups:
+            if c1 in group and c2 in group:
+                # Nasals (first group) are more similar to each other
+                if group == consonant_groups[0]:  # M,N,NG group
+                    return 0.8
+                return 0.5
+        return 0.0
     def _calculate_similarity(self, word1, phones1, word2, phones2):
+        """Calculate similarity score with enhanced consonant matching."""
+        # Initialize variables as before
         phone_list1 = phones1.split()
         phone_list2 = phones2.split()
+        # Get stressed vowels and their positions
+        vowel_idx1 = -1
+        vowel_idx2 = -1
+        primary_vowel1 = None
+        primary_vowel2 = None
+        for i, phone in enumerate(phone_list1):
+            if '1' in phone:  # Primary stress
+                vowel_idx1 = i
+                primary_vowel1 = phone.rstrip('012')
+                break
+        for i, phone in enumerate(phone_list2):
+            if '1' in phone:
+                vowel_idx2 = i
+                primary_vowel2 = phone.rstrip('012')
+                break
+        # Calculate vowel similarity (50% of total score)
+        vowel_score = 0.0
+        if primary_vowel1 and primary_vowel2:
+            if primary_vowel1 == primary_vowel2:
+                vowel_score = 1.0
+            elif self._vowels_match(primary_vowel1, primary_vowel2):
+                vowel_score = 0.8
+        # Calculate consonant similarity (30% of total score)
+        consonant_score = 0.0
+        if vowel_idx1 >= 0 and vowel_idx2 >= 0:
+            # Compare consonants around the stressed vowel
+            pre_c1 = phone_list1[vowel_idx1-1] if vowel_idx1 > 0 else None
+            pre_c2 = phone_list2[vowel_idx2-1] if vowel_idx2 > 0 else None
+            post_c1 = phone_list1[vowel_idx1+1] if vowel_idx1 < len(phone_list1)-1 else None
+            post_c2 = phone_list2[vowel_idx2+1] if vowel_idx2 < len(phone_list2)-1 else None
+            if pre_c1 and pre_c2:
+                consonant_score += self._consonants_similarity(pre_c1, pre_c2)
+            if post_c1 and post_c2:
+                consonant_score += self._consonants_similarity(post_c1, post_c2)
+            consonant_score = consonant_score / 2  # Normalize to 0-1
+        # Pattern/length similarity (20% of total score)
+        pattern_score = 0.0
+        if len(phone_list1) == len(phone_list2):
+            pattern_score = 1.0
+        else:
+            pattern_score = 1.0 - (abs(len(phone_list1) - len(phone_list2)) / max(len(phone_list1), len(phone_list2)))
+        # Combined weighted score
         similarity = (
+            (vowel_score * 0.5) +      # Vowel similarity most important
+            (consonant_score * 0.3) +   # Consonant similarity next
+            (pattern_score * 0.2)       # Pattern/length least important
         )
         return {
             "similarity": round(similarity, 3),
+            "vowel_score": round(vowel_score, 3),
+            "consonant_score": round(consonant_score, 3),
+            "pattern_score": round(pattern_score, 3),
+            "primary_vowels": f"{primary_vowel1}-{primary_vowel2}",
+            "consonants": "similar" if consonant_score > 0.5 else "different"
         }