File size: 6,538 Bytes
d53b325
e53cc7e
f9e805b
0111e58
f9e805b
d53b325
 
 
19ee3db
 
 
d53b325
 
0111e58
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19ee3db
 
d53b325
c0a5ec1
d53b325
f9e805b
d53b325
0111e58
 
c0a5ec1
d53b325
 
 
aae0d96
 
 
 
 
 
 
 
c0a5ec1
d53b325
 
 
 
 
 
 
 
33a37e4
 
d53b325
0111e58
 
 
 
 
 
 
 
c0a5ec1
 
f9e805b
d53b325
9d3290e
33a37e4
 
 
0111e58
 
 
 
 
 
 
 
f9e805b
 
0111e58
 
 
 
 
 
 
 
 
 
 
 
6280809
f9e805b
6280809
 
f9e805b
6280809
0111e58
f9e805b
d53b325
0111e58
f9e805b
d53b325
 
 
 
 
0111e58
 
f9e805b
 
 
 
d53b325
 
 
 
 
 
 
 
33a37e4
d53b325
 
 
 
 
c0a5ec1
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
from smolagents.tools import Tool
import pronouncing
import difflib
import json
import string

class ParodyWordSuggestionTool(Tool):
    name = "parody_word_suggester"
    description = """Suggests rhyming funny words using CMU dictionary pronunciations.

    Returns similar-sounding words that rhyme, especially focusing on common vowel sounds."""
    inputs = {'target': {'type': 'string', 'description': 'The word you want to find rhyming alternatives for'}, 'word_list_str': {'type': 'string', 'description': 'JSON string of word list (e.g. \'["word1", "word2"]\')'}, 'min_similarity': {'type': 'string', 'description': 'Minimum similarity threshold (0.0-1.0)', 'nullable': True}}
    output_type = "string"

    def _are_vowels_similar(self, v1: str, v2: str) -> bool:
        """Check if two vowel sounds are similar enough to rhyme."""
        # Strip stress markers
        v1 = v1.rstrip('012')
        v2 = v2.rstrip('012')
    
        # Define groups of similar vowel sounds
        similar_vowels = [
            {'AH', 'UH'},  # Short u sounds
            {'AE', 'EH'},  # Short e/a sounds
            {'IY', 'IH'},  # Long/short i sounds
            {'AO', 'AA'},  # Open o/a sounds
            {'UW', 'UH'},  # Long/short oo sounds
        ]
    
        # Direct match
        if v1 == v2:
            return True
        
        # Check if they're in the same similarity group
        for group in similar_vowels:
            if v1 in group and v2 in group:
                return True
            
        return False


    def _contains_vowel(self, phone: str, vowels: list) -> bool:
        """Helper function to check if a phone contains any vowel from the list."""
        for v in vowels:
            if v in phone:
                return True
        return False


    def forward(self, target: str, word_list_str: str, min_similarity: str = "0.5") -> str:
        """Get rhyming word suggestions."""
        import pronouncing
        import string
        import json
        from difflib import SequenceMatcher
    
        VOWEL_LETTERS = ['A', 'E', 'I', 'O', 'U']
    
        target = target.lower().strip(string.punctuation)
        min_similarity = float(min_similarity)
        suggestions = []
    
        # Parse JSON string to list
        try:
            words = json.loads(word_list_str)
        except json.JSONDecodeError:
            return json.dumps({
                "error": "Invalid JSON string for word_list_str",
                "suggestions": []
            }, indent=2)
        
        # Get target pronunciation
        target_phones = pronouncing.phones_for_word(target)
        if not target_phones:
            return json.dumps({
                "error": f"'{target}' not found in CMU dictionary",
                "suggestions": []
            }, indent=2)
        
        target_phones = target_phones[0]
        target_phone_list = target_phones.split()
    
        # Find the last vowel in target
        target_last_vowel = None
        target_last_vowel_idx = -1
        for i, phone in enumerate(target_phone_list):
            if self._contains_vowel(phone, VOWEL_LETTERS):
                target_last_vowel = phone
                target_last_vowel_idx = i
    
        # Check each word
        for word in words:
            word = word.lower().strip(string.punctuation)
            phones = pronouncing.phones_for_word(word)
            if phones:
                word_phones = phones[0]
                word_phone_list = word_phones.split()
            
                # Find last vowel in word
                word_last_vowel = None
                word_last_vowel_idx = -1
                for i, phone in enumerate(word_phone_list):
                    if self._contains_vowel(phone, VOWEL_LETTERS):
                        word_last_vowel = phone
                        word_last_vowel_idx = i
            
                # 1. Rhyme score (most important - 60%)
                rhyme_score = 0
                if word_last_vowel and target_last_vowel:
                    # Check if the vowels are similar
                    if self._are_vowels_similar(word_last_vowel, target_last_vowel):
                        # Check if the endings after the vowel match
                        if (word_phone_list[word_last_vowel_idx:] == 
                            target_phone_list[target_last_vowel_idx:]):
                            rhyme_score = 1.0
                        # Partial match for similar endings
                        elif (len(word_phone_list) > word_last_vowel_idx + 1 and
                              len(target_phone_list) > target_last_vowel_idx + 1 and
                              word_phone_list[-1] == target_phone_list[-1]):
                            rhyme_score = 0.8
            
                # 2. Syllable match (25%)
                target_syl = pronouncing.syllable_count(target_phones)
                word_syl = pronouncing.syllable_count(word_phones)
                syllable_score = 1.0 if target_syl == word_syl else 0.0
            
                # 3. Overall similarity (15%)
                string_similarity = SequenceMatcher(None, target, word).ratio()
            
                # Combined score
                similarity = (rhyme_score * 0.6) + (syllable_score * 0.25) + (string_similarity * 0.15)
            
                if similarity >= min_similarity:
                    suggestions.append({
                        "word": word,
                        "similarity": round(similarity, 3),
                        "rhyme_match": rhyme_score > 0,
                        "rhyme_score": round(rhyme_score, 3),
                        "syllable_match": syllable_score == 1.0,
                        "string_similarity": round(string_similarity, 3),
                        "syllables": word_syl,
                        "phones": word_phones
                    })
    
        # Sort by similarity score descending
        suggestions.sort(key=lambda x: x["similarity"], reverse=True)
    
        result = {
            "target": target,
            "target_syllables": pronouncing.syllable_count(target_phones),
            "target_phones": target_phones,
            "suggestions": suggestions
        }
    
        return json.dumps(result, indent=2)


    def __init__(self, *args, **kwargs):
        self.is_initialized = False