File size: 11,861 Bytes
d53b325
4aa24c2
5da935d
 
d53b325
 
 
91c8f98
19ee3db
49ca653
d53b325
eb18121
4aa24c2
d53b325
98f1f92
 
 
 
 
 
05e1f73
 
91c8f98
 
 
 
 
 
 
 
 
 
05e1f73
 
 
 
98f1f92
05e1f73
 
 
98f1f92
05e1f73
 
 
 
 
 
 
 
 
 
0111e58
 
71d9835
 
 
 
 
 
 
05e1f73
 
 
 
 
 
 
98f1f92
 
05e1f73
 
 
0111e58
 
49ca653
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4aa24c2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91c8f98
4aa24c2
 
91c8f98
 
 
4aa24c2
 
 
 
 
 
 
 
 
 
 
91c8f98
4aa24c2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91c8f98
4aa24c2
 
 
91c8f98
4aa24c2
91c8f98
 
4aa24c2
 
 
 
 
91c8f98
 
 
 
d53b325
c0a5ec1
d53b325
 
91c8f98
c0a5ec1
d53b325
 
05e1f73
 
 
 
91c8f98
 
 
49ca653
 
 
 
 
 
d53b325
aae0d96
 
 
 
 
 
 
 
c0a5ec1
d53b325
91c8f98
d53b325
 
91c8f98
3910298
 
 
91c8f98
3910298
 
 
 
91c8f98
3910298
 
 
 
 
 
91c8f98
3910298
d53b325
 
e13fea5
33a37e4
05e1f73
0111e58
5da935d
c0a5ec1
3910298
91c8f98
 
 
d53b325
91c8f98
 
 
 
d53b325
 
91c8f98
5da935d
 
 
 
 
e13fea5
05e1f73
71d9835
91c8f98
d53b325
 
 
 
 
 
 
33a37e4
05e1f73
 
91c8f98
d53b325
 
 
 
 
98f1f92
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
from smolagents.tools import Tool
import string
import pronouncing
import json

class ParodyWordSuggestionTool(Tool):
    name = "parody_word_suggester"
    description = """Suggests rhyming funny words using CMU dictionary and custom pronunciations.

    Returns similar-sounding words that rhyme, especially focusing on common vowel sounds."""
    inputs = {'target': {'type': 'string', 'description': 'The word you want to find rhyming alternatives for'}, 'word_list_str': {'type': 'string', 'description': 'JSON string of word list (e.g. \'["word1", "word2"]\')'}, 'min_similarity': {'type': 'string', 'description': 'Minimum similarity threshold (0.0-1.0)', 'default': '0.5', 'nullable': True}, 'custom_phones': {'type': 'object', 'description': 'Optional dictionary of custom word pronunciations', 'default': None, 'nullable': True}}
    output_type = "string"
    VOWEL_REF = "AH,AX|UH|AE,EH|IY,IH|AO,AA|UW|AY,EY|OW,AO|AW,AO|OY,OW|ER,AXR"
    CONSONANT_REF = "M,N,NG|P,B|T,D|K,G|F,V|TH,DH|S,Z|SH,ZH|L,R|W,Y"

    def _get_vowel_groups(self):
        groups = []
        group_strs = self.VOWEL_REF.split("|")
        for group_str in group_strs:
            groups.append(group_str.split(","))
        return groups


    def _get_word_phones(self, word, custom_phones=None):
        """Get phones for a word, checking custom dictionary first."""
        if custom_phones and word in custom_phones:
            return custom_phones[word]["primary_phones"]
        
        import pronouncing
        phones = pronouncing.phones_for_word(word)
        return phones[0] if phones else None


    def _get_last_syllable(self, phones: list) -> tuple:
        """Extract the last syllable (vowel + remaining consonants)."""
        last_vowel_idx = -1
        last_vowel = None
        vowel_groups = self._get_vowel_groups()
    
        for i, phone in enumerate(phones):
            base_phone = phone.rstrip('012')
            for group in vowel_groups:
                if base_phone in group:
                    last_vowel_idx = i
                    last_vowel = base_phone
                    break
    
        if last_vowel_idx == -1:
            return None, []
        
        remaining = phones[last_vowel_idx + 1:]
        return last_vowel, remaining


    def _strip_stress(self, phones: list) -> list:
        result = []
        for phone in phones:
            result.append(phone.rstrip('012'))
        return result


    def _vowels_match(self, v1: str, v2: str) -> bool:
        v1 = v1.rstrip('012')
        v2 = v2.rstrip('012')
    
        if v1 == v2:
            return True
        
        vowel_groups = self._get_vowel_groups()
        for group in vowel_groups:
            if v1 in group and v2 in group:
                return True
        return False


    def _strip_common_suffix(self, phones: list) -> tuple:
        """Strip common suffixes and return base and suffix phones."""
        # Initialize variables
        suffix_name = ""
        suffix_phones = []
        phone1 = ""
        phone2 = ""
    
        # Common suffix patterns in CMU phonetic representation
        SUFFIXES = {
            'ING': ['IH0', 'NG'],  # -ing
            'ED': ['EH0', 'D'],    # -ed
            'ER': ['ER0'],         # -er
            'EST': ['EH0', 'S', 'T'],  # -est
            'LY': ['L', 'IY0'],    # -ly
            'NESS': ['N', 'EH0', 'S'],  # -ness
        }
    
        for suffix_name, suffix_phones in SUFFIXES.items():
            if len(phones) > len(suffix_phones):
                if all(phone1.rstrip('012') == phone2.rstrip('012') 
                      for phone1, phone2 in zip(phones[-len(suffix_phones):], suffix_phones)):
                    return phones[:-len(suffix_phones)], suffix_phones
    
        return phones, []


    def _get_consonant_groups(self):
        """Get consonant groups from reference string."""
        groups = []
        group_strs = self.CONSONANT_REF.split("|")
        for group_str in group_strs:
            groups.append(group_str.split(","))
        return groups


    def _consonants_similarity(self, c1: str, c2: str) -> float:
        """Calculate similarity score between two consonants."""
        if c1 == c2:
            return 1.0
        
        # Check if they're in the same group
        consonant_groups = self._get_consonant_groups()
        for group in consonant_groups:
            if c1 in group and c2 in group:
                # Nasals (first group) are more similar to each other
                if group == consonant_groups[0]:  # M,N,NG group
                    return 0.8
                return 0.5
            
        return 0.0


    def _calculate_similarity(self, word1, phones1, word2, phones2):
        """Calculate similarity score with enhanced consonant matching."""
        # Initialize variables as before
        phone_list1 = phones1.split()
        phone_list2 = phones2.split()
    
        # Get stressed vowels and their positions
        vowel_idx1 = -1
        vowel_idx2 = -1
        primary_vowel1 = None
        primary_vowel2 = None
    
        for i, phone in enumerate(phone_list1):
            if '1' in phone:  # Primary stress
                vowel_idx1 = i
                primary_vowel1 = phone.rstrip('012')
                break
            
        for i, phone in enumerate(phone_list2):
            if '1' in phone:
                vowel_idx2 = i
                primary_vowel2 = phone.rstrip('012')
                break

        # Calculate vowel similarity (50% of total score)
        vowel_score = 0.0
        if primary_vowel1 and primary_vowel2:
            if primary_vowel1 == primary_vowel2:
                vowel_score = 1.0
            elif self._vowels_match(primary_vowel1, primary_vowel2):
                vowel_score = 0.8

        # Calculate consonant similarity (30% of total score)
        consonant_score = 0.0
        if vowel_idx1 >= 0 and vowel_idx2 >= 0:
            # Compare consonants around the stressed vowel
            pre_c1 = phone_list1[vowel_idx1-1] if vowel_idx1 > 0 else None
            pre_c2 = phone_list2[vowel_idx2-1] if vowel_idx2 > 0 else None
            post_c1 = phone_list1[vowel_idx1+1] if vowel_idx1 < len(phone_list1)-1 else None
            post_c2 = phone_list2[vowel_idx2+1] if vowel_idx2 < len(phone_list2)-1 else None
        
            if pre_c1 and pre_c2:
                consonant_score += self._consonants_similarity(pre_c1, pre_c2)
            if post_c1 and post_c2:
                consonant_score += self._consonants_similarity(post_c1, post_c2)
            
            consonant_score = consonant_score / 2  # Normalize to 0-1

        # Pattern/length similarity (20% of total score)
        pattern_score = 0.0
        if len(phone_list1) == len(phone_list2):
            pattern_score = 1.0
        else:
            pattern_score = 1.0 - (abs(len(phone_list1) - len(phone_list2)) / max(len(phone_list1), len(phone_list2)))

        # Combined weighted score
        similarity = (
            (vowel_score * 0.5) +      # Vowel similarity most important
            (consonant_score * 0.3) +   # Consonant similarity next
            (pattern_score * 0.2)       # Pattern/length least important
        )

        return {
            "similarity": round(similarity, 3),
            "vowel_score": round(vowel_score, 3),
            "consonant_score": round(consonant_score, 3),
            "pattern_score": round(pattern_score, 3),
            "primary_vowels": f"{primary_vowel1}-{primary_vowel2}",
            "consonants": "similar" if consonant_score > 0.5 else "different"
        }


    def forward(self, target: str, word_list_str: str, min_similarity: str = "0.5", custom_phones: dict = None) -> str:
        import pronouncing
        import string
        import json
    
        # Initialize all variables
        target = target.lower().strip(string.punctuation)
        min_similarity = float(min_similarity)
        suggestions = []
        word_vowel = None
        word_end = []
        target_vowel = None
        target_end = []
        valid_words = []
        invalid_words = []
        target_phone_list = []
        words = []
        target_phones = ""
        word_phones = ""
        word = ""
        word_phone_list = []
        similarity_result = {}
    
        # Parse JSON string to list
        try:
            words = json.loads(word_list_str)
        except json.JSONDecodeError:
            return json.dumps({
                "error": "Invalid JSON string for word_list_str",
                "suggestions": []
            }, indent=2)
        
        # Get target pronunciation
        target_phones = self._get_word_phones(target, custom_phones)
        if not target_phones:
            return json.dumps({
                "error": f"Target word '{target}' not found in dictionary or custom phones",
                "suggestions": []
            }, indent=2)
    
        # Filter word list
        valid_words = []
        invalid_words = []
        for word in words:
            word = word.lower().strip(string.punctuation)
            if self._get_word_phones(word, custom_phones):
                valid_words.append(word)
            else:
                invalid_words.append(word)
    
        if not valid_words:
            return json.dumps({
                "error": "No valid words found in dictionary or custom phones",
                "invalid_words": invalid_words,
                "suggestions": []
            }, indent=2)
    
        target_phone_list = target_phones.split()
        target_vowel, target_end = self._get_last_syllable(target_phone_list)
    
        # Check each word
        # Check each word
        for word in valid_words:
            word_phones = self._get_word_phones(word, custom_phones)
            if word_phones:
                similarity_result = self._calculate_similarity(word, word_phones, target, target_phones)
            
                if similarity_result["similarity"] >= min_similarity:
                    word_phone_list = word_phones.split()
                    word_vowel, word_end = self._get_last_syllable(word_phone_list)
                
                    suggestions.append({
                        "word": word,
                        "similarity": similarity_result["similarity"],
                        "vowel_score": similarity_result["vowel_score"],
                        "consonant_score": similarity_result["consonant_score"],
                        "pattern_score": similarity_result["pattern_score"],
                        "primary_vowels": similarity_result["primary_vowels"],
                        "consonants": similarity_result["consonants"],
                        "phones": word_phones,
                        "last_vowel": word_vowel,
                        "ending": " ".join(word_end) if word_end else "",
                        "is_custom": word in custom_phones if custom_phones else False
                    })
    
        # Sort by similarity score descending
        suggestions.sort(key=lambda x: x["similarity"], reverse=True)
    
        result = {
            "target": target,
            "target_phones": target_phones,
            "target_last_vowel": target_vowel,
            "target_ending": " ".join(target_end) if target_end else "",
            "invalid_words": invalid_words,
            "suggestions": suggestions
        }
    
        return json.dumps(result, indent=2)


    def __init__(self, *args, **kwargs):
        self.is_initialized = False