File size: 10,472 Bytes
d53b325
5da935d
 
6e8a124
 
d53b325
 
 
6e8a124
19ee3db
6e8a124
d53b325
eb18121
4aa24c2
d53b325
98f1f92
 
 
 
 
 
05e1f73
 
6e8a124
 
 
 
 
 
91c8f98
 
05e1f73
 
 
 
98f1f92
05e1f73
 
 
98f1f92
05e1f73
 
 
 
 
 
 
 
 
 
0111e58
 
71d9835
 
 
 
 
 
 
05e1f73
 
 
 
 
 
 
98f1f92
 
05e1f73
 
 
0111e58
 
6e8a124
 
 
 
49ca653
6e8a124
 
 
 
 
 
 
 
 
 
 
 
 
49ca653
91c8f98
 
 
6e8a124
 
 
4aa24c2
6e8a124
 
 
 
 
 
 
 
91c8f98
6e8a124
 
 
 
4aa24c2
6e8a124
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4aa24c2
 
 
 
6e8a124
 
91c8f98
6e8a124
 
 
91c8f98
6e8a124
 
 
 
 
 
 
 
 
91c8f98
 
6e8a124
 
4aa24c2
6e8a124
 
 
 
 
 
91c8f98
 
 
6e8a124
d53b325
c0a5ec1
d53b325
 
91c8f98
c0a5ec1
d53b325
 
05e1f73
 
 
 
91c8f98
 
 
49ca653
 
 
 
 
 
d53b325
aae0d96
 
 
 
 
 
 
 
c0a5ec1
d53b325
6e8a124
d53b325
 
6e8a124
3910298
 
 
6e8a124
3910298
 
 
 
6e8a124
3910298
 
 
 
 
 
6e8a124
3910298
d53b325
 
e13fea5
6e8a124
33a37e4
05e1f73
0111e58
c0a5ec1
3910298
6e8a124
 
 
91c8f98
d53b325
91c8f98
 
 
 
d53b325
 
91c8f98
6e8a124
 
5da935d
e13fea5
05e1f73
71d9835
6e8a124
d53b325
 
 
 
 
 
 
33a37e4
05e1f73
 
91c8f98
d53b325
 
 
 
 
98f1f92
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
from smolagents.tools import Tool
import pronouncing
import json
import string
import difflib

class ParodyWordSuggestionTool(Tool):
    name = "parody_word_suggester"
    description = """Suggests rhyming funny words using CMU dictionary pronunciations.

    Returns similar-sounding words that rhyme, especially focusing on common vowel sounds."""
    inputs = {'target': {'type': 'string', 'description': 'The word you want to find rhyming alternatives for'}, 'word_list_str': {'type': 'string', 'description': 'JSON string of word list (e.g. \'["word1", "word2"]\')'}, 'min_similarity': {'type': 'string', 'description': 'Minimum similarity threshold (0.0-1.0)', 'nullable': True, 'default': '0.5'}}
    output_type = "string"
    VOWEL_REF = "AH,AX|UH|AE,EH|IY,IH|AO,AA|UW|AY,EY|OW,AO|AW,AO|OY,OW|ER,AXR"
    CONSONANT_REF = "M,N,NG|P,B|T,D|K,G|F,V|TH,DH|S,Z|SH,ZH|L,R|W,Y"

    def _get_vowel_groups(self):
        groups = []
        group_strs = self.VOWEL_REF.split("|")
        for group_str in group_strs:
            groups.append(group_str.split(","))
        return groups


    def _get_consonant_groups(self):
        groups = []
        group_strs = self.CONSONANT_REF.split("|")
        for group_str in group_strs:
            groups.append(group_str.split(","))
        return groups


    def _get_last_syllable(self, phones: list) -> tuple:
        """Extract the last syllable (vowel + remaining consonants)."""
        last_vowel_idx = -1
        last_vowel = None
        vowel_groups = self._get_vowel_groups()
    
        for i, phone in enumerate(phones):
            base_phone = phone.rstrip('012')
            for group in vowel_groups:
                if base_phone in group:
                    last_vowel_idx = i
                    last_vowel = base_phone
                    break
    
        if last_vowel_idx == -1:
            return None, []
        
        remaining = phones[last_vowel_idx + 1:]
        return last_vowel, remaining


    def _strip_stress(self, phones: list) -> list:
        result = []
        for phone in phones:
            result.append(phone.rstrip('012'))
        return result


    def _vowels_match(self, v1: str, v2: str) -> bool:
        v1 = v1.rstrip('012')
        v2 = v2.rstrip('012')
    
        if v1 == v2:
            return True
        
        vowel_groups = self._get_vowel_groups()
        for group in vowel_groups:
            if v1 in group and v2 in group:
                return True
        return False


    def _calculate_similarity(self, word1, phones1, word2, phones2):
        """Calculate similarity with heavy emphasis on rhyming."""
        from difflib import SequenceMatcher
        import pronouncing
    
        # Initialize all variables
        rhyme_score = 0.0
        string_score = 0.0
        pattern_score = 0.0
        phone_list1 = []
        phone_list2 = []
        vowel1 = None
        vowel2 = None
        end1 = []
        end2 = []
        end1_clean = []
        end2_clean = []
        matching_consonants = 0
    
        phone_list1 = phones1.split()
        phone_list2 = phones2.split()
    
        # Get last syllables
        vowel1, end1 = self._get_last_syllable(phone_list1)
        vowel2, end2 = self._get_last_syllable(phone_list2)
    
        # Calculate rhyme score (60%)
        if vowel1 and vowel2:
            # Perfect vowel match
            if vowel1.rstrip('012') == vowel2.rstrip('012'):
                rhyme_score = 1.0
            # Similar vowel match
            elif self._vowels_match(vowel1, vowel2):
                rhyme_score = 0.8
            
            # Check endings
            if end1 and end2:
                end1_clean = self._strip_stress(end1)
                end2_clean = self._strip_stress(end2)
            
                # Perfect ending match
                if end1_clean == end2_clean:
                    rhyme_score = min(1.0, rhyme_score + 0.2)
                # Partial ending match
                else:
                    consonant_groups = self._get_consonant_groups()
                    matching_consonants = 0
                    for c1, c2 in zip(end1_clean, end2_clean):
                        if c1 == c2:
                            matching_consonants += 1
                        else:
                            # Check if consonants are in same group
                            for group in consonant_groups:
                                if c1 in group and c2 in group:
                                    matching_consonants += 0.5
                                    break
                
                    if matching_consonants > 0:
                        rhyme_score = min(1.0, rhyme_score + (0.1 * matching_consonants))
    
        # String similarity (25%)
        if len(word1) > 1 and len(word2) > 1:
            end_similarity = SequenceMatcher(None, word1[1:], word2[1:]).ratio()
            string_score = end_similarity
        else:
            string_score = SequenceMatcher(None, word1, word2).ratio()
    
        # Pattern/Length score (15%)
        if len(phone_list1) == len(phone_list2):
            pattern_score = 1.0
        else:
            pattern_score = 1.0 - (abs(len(phone_list1) - len(phone_list2)) / max(len(phone_list1), len(phone_list2)))
    
        # Final weighted score
        similarity = (
            (rhyme_score * 0.60) +
            (string_score * 0.25) +
            (pattern_score * 0.15)
        )
    
        # Extra boost for exact matches minus first letter
        if len(word1) == len(word2) and word1[1:] == word2[1:]:
            similarity = min(1.0, similarity * 1.2)
    
        # Extra penalty for very different lengths
        if abs(len(word1) - len(word2)) > 2:
            similarity *= 0.7
    
        return {
            "similarity": round(similarity, 3),
            "rhyme_score": round(rhyme_score, 3),
            "string_score": round(string_score, 3),
            "pattern_score": round(pattern_score, 3),
            "details": {
                "last_vowel_match": vowel1.rstrip('012') == vowel2.rstrip('012') if vowel1 and vowel2 else False,
                "similar_vowels": self._vowels_match(vowel1, vowel2) if vowel1 and vowel2 else False,
                "ending_match": " ".join(end1_clean) == " ".join(end2_clean) if end1 and end2 else False,
                "string_length_diff": abs(len(word1) - len(word2))
            }
        }


    def forward(self, target: str, word_list_str: str, min_similarity: str = "0.5") -> str:
        import pronouncing
        import string
        import json
    
        # Initialize all variables
        target = target.lower().strip(string.punctuation)
        min_similarity = float(min_similarity)
        suggestions = []
        word_vowel = None
        word_end = []
        target_vowel = None
        target_end = []
        valid_words = []
        invalid_words = []
        target_phone_list = []
        words = []
        target_phones = ""
        word_phones = ""
        word = ""
        word_phone_list = []
        similarity_result = {}
    
        # Parse JSON string to list
        try:
            words = json.loads(word_list_str)
        except json.JSONDecodeError:
            return json.dumps({
                "error": "Invalid JSON string for word_list_str",
                "suggestions": []
            }, indent=2)
        
        # Get target pronunciation
        target_phones = pronouncing.phones_for_word(target)
        if not target_phones:
            return json.dumps({
                "error": f"Target word '{target}' not found in CMU dictionary",
                "suggestions": []
            }, indent=2)
    
        # Filter word list to only words in CMU dictionary
        valid_words = []
        invalid_words = []
        for word in words:
            word = word.lower().strip(string.punctuation)
            if pronouncing.phones_for_word(word):
                valid_words.append(word)
            else:
                invalid_words.append(word)
    
        if not valid_words:
            return json.dumps({
                "error": "No valid words found in CMU dictionary",
                "invalid_words": invalid_words,
                "suggestions": []
            }, indent=2)
    
        target_phones = target_phones[0]
        target_phone_list = target_phones.split()
        target_vowel, target_end = self._get_last_syllable(target_phone_list)
    
        # Check each word
        for word in valid_words:
            phones = pronouncing.phones_for_word(word)
            if phones:
                word_phones = phones[0]
                similarity_result = self._calculate_similarity(word, word_phones, target, target_phones)
            
                if similarity_result["similarity"] >= min_similarity:
                    word_phone_list = word_phones.split()
                    word_vowel, word_end = self._get_last_syllable(word_phone_list)
                
                    suggestions.append({
                        "word": word,
                        "similarity": similarity_result["similarity"],
                        "rhyme_score": similarity_result["rhyme_score"],
                        "string_score": similarity_result["string_score"],
                        "pattern_score": similarity_result["pattern_score"],
                        "phones": word_phones,
                        "last_vowel": word_vowel,
                        "ending": " ".join(word_end) if word_end else "",
                        "details": similarity_result["details"]
                    })
    
        # Sort by similarity score descending
        suggestions.sort(key=lambda x: x["similarity"], reverse=True)
    
        result = {
            "target": target,
            "target_phones": target_phones,
            "target_last_vowel": target_vowel,
            "target_ending": " ".join(target_end) if target_end else "",
            "invalid_words": invalid_words,
            "suggestions": suggestions
        }
    
        return json.dumps(result, indent=2)


    def __init__(self, *args, **kwargs):
        self.is_initialized = False