File size: 9,363 Bytes
d53b325
5da935d
 
2898d37
d53b325
 
 
9bc766a
fd49873
d53b325
2898d37
 
 
ee2b8c1
91c8f98
fd49873
 
 
 
 
 
 
 
 
 
9bc766a
 
ee2b8c1
 
 
 
 
 
9bc766a
0111e58
 
ee2b8c1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2898d37
 
 
9bc766a
 
ee2b8c1
 
 
 
9bc766a
ee2b8c1
9bc766a
ee2b8c1
 
 
 
 
 
2898d37
 
 
 
 
 
 
0111e58
 
6e8a124
9bc766a
ee2b8c1
91c8f98
 
ee2b8c1
 
 
 
 
91c8f98
2898d37
9bc766a
 
2898d37
6e8a124
2898d37
 
6e8a124
2898d37
 
6e8a124
9bc766a
 
 
2898d37
 
9bc766a
6e8a124
91c8f98
 
6e8a124
2898d37
 
6e8a124
9bc766a
 
 
 
2898d37
6e8a124
91c8f98
 
 
fd49873
d53b325
c0a5ec1
d53b325
 
9bc766a
c0a5ec1
d53b325
 
91c8f98
 
ee2b8c1
 
 
 
 
d53b325
aae0d96
 
 
 
 
 
 
 
c0a5ec1
9bc766a
fd49873
d53b325
 
fd49873
3910298
 
 
9bc766a
3910298
 
fd49873
3910298
 
 
 
 
 
fd49873
3910298
d53b325
 
e13fea5
c0a5ec1
3910298
fd49873
 
91c8f98
d53b325
91c8f98
d53b325
 
91c8f98
6e8a124
2898d37
 
e13fea5
fd49873
6e8a124
d53b325
 
 
 
 
 
 
33a37e4
91c8f98
d53b325
 
 
 
 
98f1f92
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
from smolagents.tools import Tool
import pronouncing
import json
import string

class ParodyWordSuggestionTool(Tool):
    name = "parody_word_suggester"
    description = "Suggests rhyming funny words using CMU dictionary pronunciations."
    inputs = {'target': {'type': 'string', 'description': 'The word you want to find rhyming alternatives for'}, 'word_list_str': {'type': 'string', 'description': 'JSON string of word list (e.g. \'["word1", "word2"]\')'}, 'min_similarity': {'type': 'string', 'description': 'Minimum similarity threshold (0.0-1.0)', 'nullable': True, 'default': '0.5'}, 'custom_phones': {'type': 'object', 'description': 'Optional dictionary of custom word pronunciations', 'nullable': True, 'default': None}}
    output_type = "string"
    RHYME_WEIGHT = 0.5
    PHONE_SEQUENCE_WEIGHT = 0.3
    LENGTH_WEIGHT = 0.2
    PHONE_GROUPS = "M,N,NG|P,B|T,D|K,G|F,V|TH,DH|S,Z|SH,ZH|L,R|W,Y|IY,IH|UW,UH|EH,AH|AO,AA|AE,AH|AY,EY|OW,UW"

    def _get_word_phones(self, word, custom_phones=None):
        """Get phones for a word, checking custom dictionary first."""
        if custom_phones and word in custom_phones:
            return custom_phones[word]["primary_phones"]
        
        import pronouncing
        phones = pronouncing.phones_for_word(word)
        return phones[0] if phones else None


    def _get_primary_vowel(self, phones: list) -> str:
        """Get the primary stressed vowel from phone list."""
        phone_str = ""
        vowel_char = ""
    
        for phone_str in phones:
            if '1' in phone_str and any(vowel_char in phone_str for vowel_char in 'AEIOU'):
                return phone_str.rstrip('012')
        return None


    def _get_phone_similarity(self, phone1: str, phone2: str) -> float:
        """Calculate similarity between two phones."""
        # Initialize variables
        p1 = ""
        p2 = ""
        group_str = ""
        group = []
    
        # Strip stress markers
        p1 = phone1.rstrip('012')
        p2 = phone2.rstrip('012')
    
        # Exact match
        if p1 == p2:
            return 1.0
        
        # Check similarity groups
        for group_str in self.PHONE_GROUPS.split('|'):
            group = group_str.split(',')
            if p1 in group and p2 in group:
                return 0.7
            
        # Check broader categories
        if self._get_phone_type(p1) == self._get_phone_type(p2):
            return 0.3
        
        return 0.0


    def _get_phone_type(self, phone: str) -> str:
        """Get the broad category of a phone."""
        # Strip stress markers
        phone = phone.rstrip('012')
        v = ""
    
        # Vowels
        if any(v in phone for v in 'AEIOU'):
            return 'vowel'
        
        # Initialize fixed sets for categories
        nasals = {'M', 'N', 'NG'}
        stops = {'P', 'B', 'T', 'D', 'K', 'G'}
        fricatives = {'F', 'V', 'TH', 'DH', 'S', 'Z', 'SH', 'ZH'}
        liquids = {'L', 'R'}
        glides = {'W', 'Y'}
    
        if phone in nasals:
            return 'nasal'
        if phone in stops:
            return 'stop'
        if phone in fricatives:
            return 'fricative'
        if phone in liquids:
            return 'liquid'
        if phone in glides:
            return 'glide'
        
        return 'other'


    def _calculate_phone_sequence_similarity(self, phones1: list, phones2: list) -> float:
        """Calculate similarity based on matching phones in sequence."""
        if not phones1 or not phones2:
            return 0.0
    
        # Initialize variables
        total_similarity = 0.0
        i = 0
        similarity = 0.0
    
        comparisons = max(len(phones1), len(phones2))
    
        # Compare each position
        for i in range(min(len(phones1), len(phones2))):
            similarity = self._get_phone_similarity(phones1[i], phones2[i])
            total_similarity += similarity
        
        return total_similarity / comparisons if comparisons > 0 else 0.0


    def _calculate_length_similarity(self, phones1: list, phones2: list) -> float:
        """Calculate similarity based on phone length."""
        max_length = max(len(phones1), len(phones2))
        length_diff = abs(len(phones1) - len(phones2))
        return 1.0 - (length_diff / max_length) if max_length > 0 else 0.0


    def _calculate_similarity(self, word1, phones1, word2, phones2):
        """Calculate similarity based on multiple factors."""
        # Initialize phone lists and scores
        phone_list1 = phones1.split()
        phone_list2 = phones2.split()
        rhyme_score = 0.0
        phone_sequence_score = 0.0
        length_score = 0.0
        vowel1 = None
        vowel2 = None
    
        # 1. Rhyme score (50%) - based on primary vowel
        vowel1 = self._get_primary_vowel(phone_list1)
        vowel2 = self._get_primary_vowel(phone_list2)
        rhyme_score = 1.0 if vowel1 and vowel2 and vowel1 == vowel2 else 0.0
    
        # 2. Phone sequence similarity (30%)
        phone_sequence_score = self._calculate_phone_sequence_similarity(phone_list1, phone_list2)
    
        # 3. Length similarity (20%)
        length_score = self._calculate_length_similarity(phone_list1, phone_list2)
    
        # Combined weighted score
        similarity = (
            (rhyme_score * self.RHYME_WEIGHT) +
            (phone_sequence_score * self.PHONE_SEQUENCE_WEIGHT) +
            (length_score * self.LENGTH_WEIGHT)
        )
    
        return {
            "similarity": round(similarity, 3),
            "rhyme_score": round(rhyme_score, 3),
            "phone_sequence_score": round(phone_sequence_score, 3),
            "length_score": round(length_score, 3),
            "details": {
                "primary_vowel1": vowel1,
                "primary_vowel2": vowel2,
                "phone_count1": len(phone_list1),
                "phone_count2": len(phone_list2),
                "matching_phones": round(phone_sequence_score * len(phone_list1))
            }
        }


    def forward(self, target: str, word_list_str: str, min_similarity: str = "0.5", custom_phones: dict = None) -> str:
        import pronouncing
        import string
        import json
    
        # Initialize variables
        target = target.lower().strip(string.punctuation)
        min_similarity = float(min_similarity)
        suggestions = []
        valid_words = []
        invalid_words = []
        words = []
        target_phones = ""
        word_phones = ""
        word = ""
        similarity_result = {}
    
        # Parse JSON string to list
        try:
            words = json.loads(word_list_str)
        except json.JSONDecodeError:
            return json.dumps({
                "error": "Invalid JSON string for word_list_str",
                "suggestions": []
            }, indent=2)
        
        # Get target pronunciation
        target_phones = self._get_word_phones(target, custom_phones)
        if not target_phones:
            return json.dumps({
                "error": f"Target word '{target}' not found in dictionary or custom phones",
                "suggestions": []
            }, indent=2)
    
        # Filter word list
        for word in words:
            word = word.lower().strip(string.punctuation)
            if self._get_word_phones(word, custom_phones):
                valid_words.append(word)
            else:
                invalid_words.append(word)
    
        if not valid_words:
            return json.dumps({
                "error": "No valid words found in dictionary or custom phones",
                "invalid_words": invalid_words,
                "suggestions": []
            }, indent=2)
    
        # Check each word
        for word in valid_words:
            word_phones = self._get_word_phones(word, custom_phones)
            if word_phones:
                similarity_result = self._calculate_similarity(word, word_phones, target, target_phones)
            
                if similarity_result["similarity"] >= min_similarity:
                    suggestions.append({
                        "word": word,
                        "similarity": similarity_result["similarity"],
                        "rhyme_score": similarity_result["rhyme_score"],
                        "phone_sequence_score": similarity_result["phone_sequence_score"],
                        "length_score": similarity_result["length_score"],
                        "phones": word_phones,
                        "is_custom": word in custom_phones if custom_phones else False,
                        "details": similarity_result["details"]
                    })
    
        # Sort by similarity score descending
        suggestions.sort(key=lambda x: x["similarity"], reverse=True)
    
        result = {
            "target": target,
            "target_phones": target_phones,
            "invalid_words": invalid_words,
            "suggestions": suggestions
        }
    
        return json.dumps(result, indent=2)


    def __init__(self, *args, **kwargs):
        self.is_initialized = False