File size: 12,017 Bytes
d53b325
294224b
5da935d
 
d53b325
 
 
9bc766a
fd49873
d53b325
2898d37
 
 
ee2b8c1
91c8f98
fd49873
 
 
 
 
 
 
 
 
 
9bc766a
 
ee2b8c1
 
 
 
 
 
9bc766a
0111e58
 
294224b
 
ee2b8c1
 
 
294224b
 
ee2b8c1
 
 
294224b
ee2b8c1
 
 
 
 
294224b
ee2b8c1
294224b
ee2b8c1
 
 
 
 
 
294224b
ee2b8c1
 
294224b
ee2b8c1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
294224b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2898d37
 
 
9bc766a
 
ee2b8c1
 
 
 
 
9bc766a
ee2b8c1
 
 
 
 
 
2898d37
 
294224b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2898d37
 
 
 
 
0111e58
 
6e8a124
9bc766a
294224b
91c8f98
 
ee2b8c1
 
 
294224b
91c8f98
294224b
 
6e8a124
294224b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6e8a124
294224b
 
2898d37
6e8a124
9bc766a
 
 
2898d37
 
9bc766a
6e8a124
91c8f98
 
6e8a124
2898d37
 
6e8a124
294224b
 
9bc766a
 
2898d37
6e8a124
91c8f98
 
 
fd49873
d53b325
c0a5ec1
d53b325
 
9bc766a
c0a5ec1
d53b325
 
91c8f98
 
ee2b8c1
 
 
 
 
d53b325
aae0d96
 
 
 
 
 
 
 
c0a5ec1
9bc766a
fd49873
d53b325
 
fd49873
3910298
 
 
9bc766a
3910298
 
fd49873
3910298
 
 
 
 
 
fd49873
3910298
d53b325
 
e13fea5
c0a5ec1
3910298
fd49873
 
91c8f98
d53b325
91c8f98
d53b325
 
91c8f98
6e8a124
2898d37
 
e13fea5
fd49873
6e8a124
d53b325
 
 
 
 
 
 
33a37e4
91c8f98
d53b325
 
 
 
 
98f1f92
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
from smolagents.tools import Tool
import string
import pronouncing
import json

class ParodyWordSuggestionTool(Tool):
    name = "parody_word_suggester"
    description = "Suggests rhyming funny words using CMU dictionary pronunciations."
    inputs = {'target': {'type': 'string', 'description': 'The word you want to find rhyming alternatives for'}, 'word_list_str': {'type': 'string', 'description': 'JSON string of word list (e.g. \'["word1", "word2"]\')'}, 'min_similarity': {'type': 'string', 'description': 'Minimum similarity threshold (0.0-1.0)', 'nullable': True, 'default': '0.5'}, 'custom_phones': {'type': 'object', 'description': 'Optional dictionary of custom word pronunciations', 'nullable': True, 'default': None}}
    output_type = "string"
    RHYME_WEIGHT = 0.5
    PHONE_SEQUENCE_WEIGHT = 0.3
    LENGTH_WEIGHT = 0.2
    PHONE_GROUPS = "M,N,NG|P,B|T,D|K,G|F,V|TH,DH|S,Z|SH,ZH|L,R|W,Y|IY,IH|UW,UH|EH,AH|AO,AA|AE,AH|AY,EY|OW,UW"

    def _get_word_phones(self, word, custom_phones=None):
        """Get phones for a word, checking custom dictionary first."""
        if custom_phones and word in custom_phones:
            return custom_phones[word]["primary_phones"]
        
        import pronouncing
        phones = pronouncing.phones_for_word(word)
        return phones[0] if phones else None


    def _get_primary_vowel(self, phones: list) -> str:
        """Get the primary stressed vowel from phone list."""
        phone_str = ""
        vowel_char = ""
    
        for phone_str in phones:
            if '1' in phone_str and any(vowel_char in phone_str for vowel_char in 'AEIOU'):
                return phone_str.rstrip('012')
        return None


    def _phones_are_similar(self, phone1: str, phone2: str) -> bool:
        """Check if two phones are similar enough to be considered rhyming."""
        # Strip stress markers
        p1 = phone1.rstrip('012')
        p2 = phone2.rstrip('012')
        group_str = ""
        group = []
    
        # Exact match
        if p1 == p2:
            return True
        
        # Check similarity groups
        for group_str in self.PHONE_GROUPS.split('|'):
            group = group_str.split(',')
            if p1 in group and p2 in group:
                return True
            
        return False


    def _get_phone_type(self, phone: str) -> str:
        """Get the broad category of a phone."""
        # Strip stress markers
        phone = phone.rstrip('012')
        vowel_char = ""
    
        # Vowels
        if any(vowel_char in phone for vowel_char in 'AEIOU'):
            return 'vowel'
        
        # Initialize fixed sets for categories
        nasals = {'M', 'N', 'NG'}
        stops = {'P', 'B', 'T', 'D', 'K', 'G'}
        fricatives = {'F', 'V', 'TH', 'DH', 'S', 'Z', 'SH', 'ZH'}
        liquids = {'L', 'R'}
        glides = {'W', 'Y'}
    
        if phone in nasals:
            return 'nasal'
        if phone in stops:
            return 'stop'
        if phone in fricatives:
            return 'fricative'
        if phone in liquids:
            return 'liquid'
        if phone in glides:
            return 'glide'
        
        return 'other'


    def _get_rhyme_score(self, phones1: list, phones2: list) -> float:
        """Calculate rhyme score based on matching phones after primary stressed vowel."""
        # Initialize variables
        pos1 = -1
        pos2 = -1
        i = 0
        phone = ""
        vowel_char = ""
        rhyme_part1 = []
        rhyme_part2 = []
        similarity_count = 0
        p1 = ""
        p2 = ""
    
        # Find primary stressed vowel position in both words
        for i, phone in enumerate(phones1):
            if '1' in phone and any(vowel_char in phone for vowel_char in 'AEIOU'):
                pos1 = i
                break
            
        for i, phone in enumerate(phones2):
            if '1' in phone and any(vowel_char in phone for vowel_char in 'AEIOU'):
                pos2 = i
                break
        
        if pos1 == -1 or pos2 == -1:
            return 0.0
    
        # Get all phones after and including the stressed vowel
        rhyme_part1 = phones1[pos1:]
        rhyme_part2 = phones2[pos2:]
    
        # Check if lengths match
        if len(rhyme_part1) != len(rhyme_part2):
            return 0.0
    
        # Calculate similarity score for rhyming part
        for p1, p2 in zip(rhyme_part1, rhyme_part2):
            if self._phones_are_similar(p1, p2):
                similarity_count += 1
    
        # Return score based on how many phones were similar
        return similarity_count / len(rhyme_part1) if rhyme_part1 else 0.0


    def _calculate_phone_sequence_similarity(self, phones1: list, phones2: list) -> float:
        """Calculate similarity based on matching phones in sequence."""
        if not phones1 or not phones2:
            return 0.0
    
        # Initialize variables
        total_similarity = 0.0
        i = 0
        similarity = 0.0
        comparisons = max(len(phones1), len(phones2))
    
        # Compare each position
        for i in range(min(len(phones1), len(phones2))):
            similarity = self._get_phone_similarity(phones1[i], phones2[i])
            total_similarity += similarity
        
        return total_similarity / comparisons if comparisons > 0 else 0.0


    def _get_phone_similarity(self, phone1: str, phone2: str) -> float:
        """Calculate similarity between two phones."""
        # Initialize variables
        p1 = phone1.rstrip('012')
        p2 = phone2.rstrip('012')
        group_str = ""
        group = []
    
        # Exact match
        if p1 == p2:
            return 1.0
        
        # Check similarity groups
        for group_str in self.PHONE_GROUPS.split('|'):
            group = group_str.split(',')
            if p1 in group and p2 in group:
                return 0.7
            
        # Check broader categories
        if self._get_phone_type(p1) == self._get_phone_type(p2):
            return 0.3
        
        return 0.0


    def _calculate_length_similarity(self, phones1: list, phones2: list) -> float:
        """Calculate similarity based on phone length."""
        max_length = max(len(phones1), len(phones2))
        length_diff = abs(len(phones1) - len(phones2))
        return 1.0 - (length_diff / max_length) if max_length > 0 else 0.0


    def _calculate_similarity(self, word1, phones1, word2, phones2):
        """Calculate similarity based on multiple factors."""
        # Initialize variables
        phone_list1 = phones1.split()
        phone_list2 = phones2.split()
        rhyme_score = 0.0
        phone_sequence_score = 0.0
        length_score = 0.0
        similarity = 0.0
    
        # Get rhyme score using new method
        rhyme_score = self._get_rhyme_score(phone_list1, phone_list2)
    
        # If rhyme score is too low (e.g. below 0.8), consider it a non-rhyme
        if rhyme_score < 0.8:
            return {
                "similarity": 0.0,
                "rhyme_score": 0.0,
                "phone_sequence_score": 0.0,
                "length_score": 0.0,
                "details": {
                    "primary_vowel1": self._get_primary_vowel(phone_list1),
                    "primary_vowel2": self._get_primary_vowel(phone_list2),
                    "phone_count1": len(phone_list1),
                    "phone_count2": len(phone_list2),
                    "matching_phones": 0
                }
            }
    
        # Calculate other scores only if words rhyme closely enough
        phone_sequence_score = self._calculate_phone_sequence_similarity(phone_list1, phone_list2)
        length_score = self._calculate_length_similarity(phone_list1, phone_list2)
    
        # Combined weighted score
        similarity = (
            (rhyme_score * self.RHYME_WEIGHT) +
            (phone_sequence_score * self.PHONE_SEQUENCE_WEIGHT) +
            (length_score * self.LENGTH_WEIGHT)
        )
    
        return {
            "similarity": round(similarity, 3),
            "rhyme_score": round(rhyme_score, 3),
            "phone_sequence_score": round(phone_sequence_score, 3),
            "length_score": round(length_score, 3),
            "details": {
                "primary_vowel1": self._get_primary_vowel(phone_list1),
                "primary_vowel2": self._get_primary_vowel(phone_list2),
                "phone_count1": len(phone_list1),
                "phone_count2": len(phone_list2),
                "matching_phones": round(phone_sequence_score * len(phone_list1))
            }
        }


    def forward(self, target: str, word_list_str: str, min_similarity: str = "0.5", custom_phones: dict = None) -> str:
        import pronouncing
        import string
        import json
    
        # Initialize variables
        target = target.lower().strip(string.punctuation)
        min_similarity = float(min_similarity)
        suggestions = []
        valid_words = []
        invalid_words = []
        words = []
        target_phones = ""
        word_phones = ""
        word = ""
        similarity_result = {}
    
        # Parse JSON string to list
        try:
            words = json.loads(word_list_str)
        except json.JSONDecodeError:
            return json.dumps({
                "error": "Invalid JSON string for word_list_str",
                "suggestions": []
            }, indent=2)
        
        # Get target pronunciation
        target_phones = self._get_word_phones(target, custom_phones)
        if not target_phones:
            return json.dumps({
                "error": f"Target word '{target}' not found in dictionary or custom phones",
                "suggestions": []
            }, indent=2)
    
        # Filter word list
        for word in words:
            word = word.lower().strip(string.punctuation)
            if self._get_word_phones(word, custom_phones):
                valid_words.append(word)
            else:
                invalid_words.append(word)
    
        if not valid_words:
            return json.dumps({
                "error": "No valid words found in dictionary or custom phones",
                "invalid_words": invalid_words,
                "suggestions": []
            }, indent=2)
    
        # Check each word
        for word in valid_words:
            word_phones = self._get_word_phones(word, custom_phones)
            if word_phones:
                similarity_result = self._calculate_similarity(word, word_phones, target, target_phones)
            
                if similarity_result["similarity"] >= min_similarity:
                    suggestions.append({
                        "word": word,
                        "similarity": similarity_result["similarity"],
                        "rhyme_score": similarity_result["rhyme_score"],
                        "phone_sequence_score": similarity_result["phone_sequence_score"],
                        "length_score": similarity_result["length_score"],
                        "phones": word_phones,
                        "is_custom": word in custom_phones if custom_phones else False,
                        "details": similarity_result["details"]
                    })
    
        # Sort by similarity score descending
        suggestions.sort(key=lambda x: x["similarity"], reverse=True)
    
        result = {
            "target": target,
            "target_phones": target_phones,
            "invalid_words": invalid_words,
            "suggestions": suggestions
        }
    
        return json.dumps(result, indent=2)


    def __init__(self, *args, **kwargs):
        self.is_initialized = False