File size: 7,338 Bytes
d53b325
71d9835
98f1f92
05e1f73
f9e805b
d53b325
 
 
19ee3db
 
 
d53b325
98f1f92
d53b325
98f1f92
 
 
 
 
 
 
05e1f73
 
 
 
 
 
98f1f92
05e1f73
 
 
 
 
98f1f92
05e1f73
 
 
 
 
 
 
 
 
 
 
 
0111e58
 
71d9835
 
 
 
 
 
 
 
05e1f73
 
 
 
 
 
 
 
98f1f92
 
05e1f73
 
 
0111e58
 
19ee3db
 
d53b325
c0a5ec1
d53b325
f9e805b
d53b325
05e1f73
c0a5ec1
d53b325
 
05e1f73
 
 
 
d53b325
aae0d96
 
 
 
 
 
 
 
c0a5ec1
d53b325
 
 
 
 
 
 
e13fea5
33a37e4
 
05e1f73
0111e58
c0a5ec1
 
f9e805b
d53b325
9d3290e
33a37e4
 
05e1f73
0111e58
f9e805b
05e1f73
 
71d9835
05e1f73
71d9835
 
 
 
 
05e1f73
71d9835
 
 
05e1f73
 
6280809
f9e805b
6280809
 
f9e805b
6280809
0111e58
f9e805b
d53b325
71d9835
f9e805b
d53b325
 
 
 
 
0111e58
 
f9e805b
 
 
e13fea5
05e1f73
71d9835
 
 
 
 
 
d53b325
 
 
 
 
 
 
 
33a37e4
05e1f73
 
d53b325
 
 
 
 
98f1f92
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
from smolagents.tools import Tool
import json
import pronouncing
import difflib
import string

class ParodyWordSuggestionTool(Tool):
    name = "parody_word_suggester"
    description = """Suggests rhyming funny words using CMU dictionary pronunciations.

    Returns similar-sounding words that rhyme, especially focusing on common vowel sounds."""
    inputs = {'target': {'type': 'string', 'description': 'The word you want to find rhyming alternatives for'}, 'word_list_str': {'type': 'string', 'description': 'JSON string of word list (e.g. \'["word1", "word2"]\')'}, 'min_similarity': {'type': 'string', 'description': 'Minimum similarity threshold (0.0-1.0)', 'nullable': True}}
    output_type = "string"
    VOWEL_REF = "AH,UH,AX|AE,EH|IY,IH|AO,AA|UW,UH|AY,EY|OW,AO|AW,AO|OY,OW|ER,AXR"

    def _get_vowel_groups(self):
        """Convert the simple string format to usable groups."""
        groups = []
        group_strs = self.VOWEL_REF.split("|")
        for group_str in group_strs:
            groups.append(group_str.split(","))
        return groups


    def _get_last_syllable(self, phones: list) -> tuple:
        """Extract the last syllable (vowel + remaining consonants)."""
        last_vowel_idx = -1
        last_vowel = None
        vowel_groups = self._get_vowel_groups()
    
        # Find the last vowel
        for i, phone in enumerate(phones):
            # Strip stress markers for checking
            base_phone = phone.rstrip('012')
            for group in vowel_groups:
                if base_phone in group:
                    last_vowel_idx = i
                    last_vowel = base_phone
                    break
    
        if last_vowel_idx == -1:
            return None, []
        
        # Get all consonants after the vowel
        remaining = phones[last_vowel_idx + 1:]
    
        return last_vowel, remaining


    def _strip_stress(self, phones: list) -> list:
        """Remove stress markers from phones."""
        result = []
        for phone in phones:
            result.append(phone.rstrip('012'))
        return result


    def _vowels_match(self, v1: str, v2: str) -> bool:
        """Check if two vowels are in the same group."""
        v1 = v1.rstrip('012')
        v2 = v2.rstrip('012')
    
        if v1 == v2:
            return True
        
        vowel_groups = self._get_vowel_groups()
        for group in vowel_groups:
            if v1 in group and v2 in group:
                return True
        return False


    def forward(self, target: str, word_list_str: str, min_similarity: str = "0.5") -> str:
        """Get rhyming word suggestions."""
        import pronouncing
        import string
        import json
        from difflib import SequenceMatcher
    
        # Initialize variables
        target = target.lower().strip(string.punctuation)
        min_similarity = float(min_similarity)
        suggestions = []
        word_vowel = None
        word_end = []
        target_vowel = None
        target_end = []
    
        # Parse JSON string to list
        try:
            words = json.loads(word_list_str)
        except json.JSONDecodeError:
            return json.dumps({
                "error": "Invalid JSON string for word_list_str",
                "suggestions": []
            }, indent=2)
        
        # Get target pronunciation
        target_phones = pronouncing.phones_for_word(target)
        if not target_phones:
            return json.dumps({
                "error": f"'{target}' not found in CMU dictionary",
                "suggestions": []
            }, indent=2)
    
        target_phones = target_phones[0]
        target_phone_list = target_phones.split()
        target_vowel, target_end = self._get_last_syllable(target_phone_list)
    
        # Check each word
        for word in words:
            word = word.lower().strip(string.punctuation)
            phones = pronouncing.phones_for_word(word)
            if phones:
                word_phones = phones[0]
                word_phone_list = word_phones.split()
                word_vowel, word_end = self._get_last_syllable(word_phone_list)
            
                # 1. Rhyme score (most important - 60%)
                rhyme_score = 0.0
                if word_vowel and target_vowel:
                    # Check if the vowels are similar
                    if self._vowels_match(word_vowel, target_vowel):
                        # Check if endings match (ignoring stress numbers)
                        word_end_clean = self._strip_stress(word_end)
                        target_end_clean = self._strip_stress(target_end)
                    
                        if word_end_clean == target_end_clean:
                            rhyme_score = 1.0
                            # Extra boost for exact match
                            if len(word) == len(target):
                                rhyme_score = 1.2
                        else:
                            rhyme_score = 0.6
            
                # 2. Syllable match (25%)
                target_syl = pronouncing.syllable_count(target_phones)
                word_syl = pronouncing.syllable_count(word_phones)
                syllable_score = 1.0 if target_syl == word_syl else 0.0
            
                # 3. Overall similarity (15%)
                string_similarity = SequenceMatcher(None, target, word).ratio()
            
                # Combined score with phonetic similarity bonus
                similarity = (rhyme_score * 0.6) + (syllable_score * 0.25) + (string_similarity * 0.15)
            
                if similarity >= min_similarity:
                    suggestions.append({
                        "word": word,
                        "similarity": round(similarity, 3),
                        "rhyme_match": rhyme_score > 0,
                        "rhyme_score": round(rhyme_score, 3),
                        "syllable_match": syllable_score == 1.0,
                        "string_similarity": round(string_similarity, 3),
                        "syllables": word_syl,
                        "phones": word_phones,
                        "last_vowel": word_vowel,
                        "ending": " ".join(word_end) if word_end else "",
                        "debug_info": {
                            "word_end_clean": word_end_clean,
                            "target_end_clean": target_end_clean,
                            "exact_match": word_end_clean == target_end_clean
                        }
                    })
    
        # Sort by similarity score descending
        suggestions.sort(key=lambda x: x["similarity"], reverse=True)
    
        result = {
            "target": target,
            "target_syllables": pronouncing.syllable_count(target_phones),
            "target_phones": target_phones,
            "target_last_vowel": target_vowel,
            "target_ending": " ".join(target_end) if target_end else "",
            "suggestions": suggestions
        }
    
        return json.dumps(result, indent=2)


    def __init__(self, *args, **kwargs):
        self.is_initialized = False