File size: 9,884 Bytes
d53b325
9bc766a
5da935d
 
d53b325
 
 
9bc766a
fd49873
d53b325
9bc766a
 
 
 
4aa24c2
d53b325
6e8a124
9bc766a
6e8a124
 
 
 
 
91c8f98
 
fd49873
 
 
 
 
 
 
 
 
 
9bc766a
 
 
 
 
05e1f73
9bc766a
 
 
 
0111e58
 
9bc766a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71d9835
 
9bc766a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
05e1f73
9bc766a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
05e1f73
9bc766a
 
 
 
 
 
 
0111e58
 
6e8a124
9bc766a
 
6e8a124
9bc766a
 
 
49ca653
9bc766a
91c8f98
 
 
9bc766a
 
 
6e8a124
9bc766a
 
 
 
 
6e8a124
9bc766a
4aa24c2
9bc766a
4aa24c2
9bc766a
6e8a124
9bc766a
 
6e8a124
9bc766a
 
6e8a124
9bc766a
 
 
 
 
 
 
6e8a124
91c8f98
 
6e8a124
9bc766a
 
 
6e8a124
9bc766a
 
 
 
 
6e8a124
91c8f98
 
 
fd49873
d53b325
c0a5ec1
d53b325
 
9bc766a
c0a5ec1
d53b325
 
91c8f98
 
49ca653
 
 
 
 
d53b325
aae0d96
 
 
 
 
 
 
 
c0a5ec1
9bc766a
fd49873
d53b325
 
fd49873
3910298
 
 
9bc766a
3910298
 
fd49873
3910298
 
 
 
 
 
fd49873
3910298
d53b325
 
e13fea5
c0a5ec1
3910298
fd49873
 
91c8f98
d53b325
91c8f98
d53b325
 
91c8f98
6e8a124
9bc766a
 
 
e13fea5
fd49873
6e8a124
d53b325
 
 
 
 
 
 
33a37e4
91c8f98
d53b325
 
 
 
 
98f1f92
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
from smolagents.tools import Tool
import string
import pronouncing
import json

class ParodyWordSuggestionTool(Tool):
    name = "parody_word_suggester"
    description = "Suggests rhyming funny words using CMU dictionary pronunciations."
    inputs = {'target': {'type': 'string', 'description': 'The word you want to find rhyming alternatives for'}, 'word_list_str': {'type': 'string', 'description': 'JSON string of word list (e.g. \'["word1", "word2"]\')'}, 'min_similarity': {'type': 'string', 'description': 'Minimum similarity threshold (0.0-1.0)', 'nullable': True, 'default': '0.5'}, 'custom_phones': {'type': 'object', 'description': 'Optional dictionary of custom word pronunciations', 'nullable': True, 'default': None}}
    output_type = "string"
    RHYME_WEIGHT = 0.6
    PHONE_PATTERN_WEIGHT = 0.2
    CHAR_DIFF_WEIGHT = 0.1
    CONSONANT_WEIGHT = 0.1
    CONSONANT_REF = "M,N,NG|P,B|T,D|K,G|F,V|TH,DH|S,Z|SH,ZH|L,R|W,Y"

    def _get_consonant_groups(self):
        """Get consonant similarity groups."""
        groups = []
        group_strs = self.CONSONANT_REF.split("|")
        for group_str in group_strs:
            groups.append(group_str.split(","))
        return groups


    def _get_word_phones(self, word, custom_phones=None):
        """Get phones for a word, checking custom dictionary first."""
        if custom_phones and word in custom_phones:
            return custom_phones[word]["primary_phones"]
        
        import pronouncing
        phones = pronouncing.phones_for_word(word)
        return phones[0] if phones else None


    def _get_primary_vowel(self, phones: list) -> str:
        """Get the primary stressed vowel from phone list."""
        vowel_chars = 'AEIOU'  # Initialize the vowel characters set
        phone_str = ""  # Initialize phone string
        vowel_char = ""
    
        for phone_str in phones:
            if '1' in phone_str and any(vowel_char in phone_str for vowel_char in vowel_chars):
                return phone_str.rstrip('012')
        return None


    def _calculate_char_difference(self, word1: str, word2: str) -> float:
        """Calculate character difference score."""
        if not word1 or not word2:
            return 0.0
    
        # Initialize variables
        changes = 0
        char1 = ""
        char2 = ""
    
        # Count character differences
        for char1, char2 in zip(word1, word2):
            if char1 != char2:
                changes += 1
            
        # Add difference for length mismatch
        changes += abs(len(word1) - len(word2))
    
        # Score based on changes (0 changes = 1.0, more changes = lower score)
        max_changes = max(len(word1), len(word2))
        return 1.0 - (changes / max_changes) if max_changes > 0 else 0.0


    def _calculate_consonant_similarity(self, phone_list1: list, phone_list2: list) -> float:
        """Calculate consonant similarity score."""
        # Initialize variables
        consonant_score = 0.0
        consonant_groups = self._get_consonant_groups()
        vowel_chars = 'AEIOU'
        phone_str = ""
        vowel_char = ""
        consonants1 = []
        consonants2 = []
        matches = 0
        comparisons = 0
        cons1 = ""
        cons2 = ""
        group = []
    
        # Get consonants (non-vowel phones)
        consonants1 = [phone_str for phone_str in phone_list1 
                      if not any(vowel_char in phone_str for vowel_char in vowel_chars)]
        consonants2 = [phone_str for phone_str in phone_list2 
                      if not any(vowel_char in phone_str for vowel_char in vowel_chars)]
    
        if not consonants1 or not consonants2:
            return 0.0
    
        # Compare each consonant
        matches = 0
        comparisons = min(len(consonants1), len(consonants2))
    
        for cons1, cons2 in zip(consonants1, consonants2):
            cons1 = cons1.rstrip('012')
            cons2 = cons2.rstrip('012')
        
            if cons1 == cons2:
                matches += 1
                continue
        
            # Check if in same group
            for group in consonant_groups:
                if cons1 in group and cons2 in group:
                    matches += 0.5
                    break
    
        return matches / comparisons if comparisons > 0 else 0.0


    def _calculate_similarity(self, word1, phones1, word2, phones2):
        """Calculate similarity based on multiple factors."""
        # Initialize scores
        rhyme_score = 0.0
        phone_score = 0.0
        char_diff_score = 0.0
        consonant_score = 0.0
    
        # Initialize phone lists
        phone_list1 = phones1.split()
        phone_list2 = phones2.split()
    
        # Initialize variables for details
        vowel1 = None
        vowel2 = None
    
        # 1. Rhyme score (60%) - based on primary vowel
        vowel1 = self._get_primary_vowel(phone_list1)
        vowel2 = self._get_primary_vowel(phone_list2)
        if vowel1 and vowel2 and vowel1 == vowel2:
            rhyme_score = 1.0
    
        # 2. Phone pattern score (20%) - based on number of phones
        if len(phone_list1) == len(phone_list2):
            phone_score = 1.0
        else:
            phone_score = 1.0 - (abs(len(phone_list1) - len(phone_list2)) / max(len(phone_list1), len(phone_list2)))
    
        # 3. Character difference score (10%)
        char_diff_score = self._calculate_char_difference(word1, word2)
    
        # 4. Consonant similarity score (10%)
        consonant_score = self._calculate_consonant_similarity(phone_list1, phone_list2)
    
        # Combined weighted score
        similarity = (
            (rhyme_score * self.RHYME_WEIGHT) +
            (phone_score * self.PHONE_PATTERN_WEIGHT) +
            (char_diff_score * self.CHAR_DIFF_WEIGHT) +
            (consonant_score * self.CONSONANT_WEIGHT)
        )
    
        return {
            "similarity": round(similarity, 3),
            "rhyme_score": round(rhyme_score, 3),
            "phone_score": round(phone_score, 3),
            "char_diff_score": round(char_diff_score, 3),
            "consonant_score": round(consonant_score, 3),
            "details": {
                "primary_vowel1": vowel1,
                "primary_vowel2": vowel2,
                "phone_count1": len(phone_list1),
                "phone_count2": len(phone_list2),
                "char_differences": abs(len(word1) - len(word2))
            }
        }


    def forward(self, target: str, word_list_str: str, min_similarity: str = "0.5", custom_phones: dict = None) -> str:
        import pronouncing
        import string
        import json
    
        # Initialize variables
        target = target.lower().strip(string.punctuation)
        min_similarity = float(min_similarity)
        suggestions = []
        valid_words = []
        invalid_words = []
        words = []
        target_phones = ""
        word_phones = ""
        word = ""
        similarity_result = {}
    
        # Parse JSON string to list
        try:
            words = json.loads(word_list_str)
        except json.JSONDecodeError:
            return json.dumps({
                "error": "Invalid JSON string for word_list_str",
                "suggestions": []
            }, indent=2)
        
        # Get target pronunciation
        target_phones = self._get_word_phones(target, custom_phones)
        if not target_phones:
            return json.dumps({
                "error": f"Target word '{target}' not found in dictionary or custom phones",
                "suggestions": []
            }, indent=2)
    
        # Filter word list
        for word in words:
            word = word.lower().strip(string.punctuation)
            if self._get_word_phones(word, custom_phones):
                valid_words.append(word)
            else:
                invalid_words.append(word)
    
        if not valid_words:
            return json.dumps({
                "error": "No valid words found in dictionary or custom phones",
                "invalid_words": invalid_words,
                "suggestions": []
            }, indent=2)
    
        # Check each word
        for word in valid_words:
            word_phones = self._get_word_phones(word, custom_phones)
            if word_phones:
                similarity_result = self._calculate_similarity(word, word_phones, target, target_phones)
            
                if similarity_result["similarity"] >= min_similarity:
                    suggestions.append({
                        "word": word,
                        "similarity": similarity_result["similarity"],
                        "rhyme_score": similarity_result["rhyme_score"],
                        "phone_score": similarity_result["phone_score"],
                        "char_diff_score": similarity_result["char_diff_score"],
                        "consonant_score": similarity_result["consonant_score"],
                        "phones": word_phones,
                        "is_custom": word in custom_phones if custom_phones else False,
                        "details": similarity_result["details"]
                    })
    
        # Sort by similarity score descending
        suggestions.sort(key=lambda x: x["similarity"], reverse=True)
    
        result = {
            "target": target,
            "target_phones": target_phones,
            "invalid_words": invalid_words,
            "suggestions": suggestions
        }
    
        return json.dumps(result, indent=2)


    def __init__(self, *args, **kwargs):
        self.is_initialized = False