File size: 7,310 Bytes
d53b325
5da935d
 
2898d37
d53b325
 
 
9bc766a
fd49873
d53b325
2898d37
 
 
91c8f98
fd49873
 
 
 
 
 
 
 
 
 
9bc766a
 
2898d37
 
 
 
9bc766a
0111e58
 
2898d37
 
 
 
9bc766a
 
2898d37
 
 
9bc766a
 
2898d37
9bc766a
2898d37
 
 
9bc766a
2898d37
 
 
 
 
 
 
 
 
0111e58
 
6e8a124
9bc766a
 
91c8f98
 
 
2898d37
9bc766a
 
2898d37
6e8a124
2898d37
 
6e8a124
2898d37
 
6e8a124
9bc766a
 
 
2898d37
 
9bc766a
6e8a124
91c8f98
 
6e8a124
2898d37
 
6e8a124
9bc766a
 
 
 
2898d37
6e8a124
91c8f98
 
 
fd49873
d53b325
c0a5ec1
d53b325
 
9bc766a
c0a5ec1
d53b325
 
91c8f98
 
d53b325
aae0d96
 
 
 
 
 
 
 
c0a5ec1
9bc766a
fd49873
d53b325
 
fd49873
3910298
 
 
9bc766a
3910298
 
fd49873
3910298
 
 
 
 
 
fd49873
3910298
d53b325
 
e13fea5
c0a5ec1
3910298
fd49873
 
91c8f98
d53b325
91c8f98
d53b325
 
91c8f98
6e8a124
2898d37
 
e13fea5
fd49873
6e8a124
d53b325
 
 
 
 
 
 
33a37e4
91c8f98
d53b325
 
 
 
 
98f1f92
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
from smolagents.tools import Tool
import pronouncing
import json
import string

class ParodyWordSuggestionTool(Tool):
    name = "parody_word_suggester"
    description = "Suggests rhyming funny words using CMU dictionary pronunciations."
    inputs = {'target': {'type': 'string', 'description': 'The word you want to find rhyming alternatives for'}, 'word_list_str': {'type': 'string', 'description': 'JSON string of word list (e.g. \'["word1", "word2"]\')'}, 'min_similarity': {'type': 'string', 'description': 'Minimum similarity threshold (0.0-1.0)', 'nullable': True, 'default': '0.5'}, 'custom_phones': {'type': 'object', 'description': 'Optional dictionary of custom word pronunciations', 'nullable': True, 'default': None}}
    output_type = "string"
    RHYME_WEIGHT = 0.5
    PHONE_SEQUENCE_WEIGHT = 0.3
    LENGTH_WEIGHT = 0.2

    def _get_word_phones(self, word, custom_phones=None):
        """Get phones for a word, checking custom dictionary first."""
        if custom_phones and word in custom_phones:
            return custom_phones[word]["primary_phones"]
        
        import pronouncing
        phones = pronouncing.phones_for_word(word)
        return phones[0] if phones else None


    def _get_primary_vowel(self, phones: list) -> str:
        """Get the primary stressed vowel from phone list."""
        v = ""
        for phone in phones:
            if '1' in phone and any(v in phone for v in 'AEIOU'):
                return phone.rstrip('012')
        return None


    def _calculate_phone_sequence_similarity(self, phones1: list, phones2: list) -> float:
        """Calculate similarity based on matching phones in sequence."""
        p = ""
        if not phones1 or not phones2:
            return 0.0
    
        # Strip stress markers for comparison
        clean_phones1 = [p.rstrip('012') for p in phones1]
        clean_phones2 = [p.rstrip('012') for p in phones2]
    
        matches = 0
        total_comparisons = max(len(clean_phones1), len(clean_phones2))
    
        # Compare phones in sequence
        for i in range(min(len(clean_phones1), len(clean_phones2))):
            if clean_phones1[i] == clean_phones2[i]:
                matches += 1
            
        return matches / total_comparisons if total_comparisons > 0 else 0.0


    def _calculate_length_similarity(self, phones1: list, phones2: list) -> float:
        """Calculate similarity based on phone length."""
        max_length = max(len(phones1), len(phones2))
        length_diff = abs(len(phones1) - len(phones2))
        return 1.0 - (length_diff / max_length) if max_length > 0 else 0.0


    def _calculate_similarity(self, word1, phones1, word2, phones2):
        """Calculate similarity based on multiple factors."""
        # Initialize phone lists
        phone_list1 = phones1.split()
        phone_list2 = phones2.split()
    
        # 1. Rhyme score (50%) - based on primary vowel
        vowel1 = self._get_primary_vowel(phone_list1)
        vowel2 = self._get_primary_vowel(phone_list2)
        rhyme_score = 1.0 if vowel1 and vowel2 and vowel1 == vowel2 else 0.0
    
        # 2. Phone sequence similarity (30%)
        phone_sequence_score = self._calculate_phone_sequence_similarity(phone_list1, phone_list2)
    
        # 3. Length similarity (20%)
        length_score = self._calculate_length_similarity(phone_list1, phone_list2)
    
        # Combined weighted score
        similarity = (
            (rhyme_score * self.RHYME_WEIGHT) +
            (phone_sequence_score * self.PHONE_SEQUENCE_WEIGHT) +
            (length_score * self.LENGTH_WEIGHT)
        )
    
        return {
            "similarity": round(similarity, 3),
            "rhyme_score": round(rhyme_score, 3),
            "phone_sequence_score": round(phone_sequence_score, 3),
            "length_score": round(length_score, 3),
            "details": {
                "primary_vowel1": vowel1,
                "primary_vowel2": vowel2,
                "phone_count1": len(phone_list1),
                "phone_count2": len(phone_list2),
                "matching_phones": round(phone_sequence_score * len(phone_list1))
            }
        }


    def forward(self, target: str, word_list_str: str, min_similarity: str = "0.5", custom_phones: dict = None) -> str:
        import pronouncing
        import string
        import json
    
        # Initialize variables
        target = target.lower().strip(string.punctuation)
        min_similarity = float(min_similarity)
        suggestions = []
        valid_words = []
        invalid_words = []
    
        # Parse JSON string to list
        try:
            words = json.loads(word_list_str)
        except json.JSONDecodeError:
            return json.dumps({
                "error": "Invalid JSON string for word_list_str",
                "suggestions": []
            }, indent=2)
        
        # Get target pronunciation
        target_phones = self._get_word_phones(target, custom_phones)
        if not target_phones:
            return json.dumps({
                "error": f"Target word '{target}' not found in dictionary or custom phones",
                "suggestions": []
            }, indent=2)
    
        # Filter word list
        for word in words:
            word = word.lower().strip(string.punctuation)
            if self._get_word_phones(word, custom_phones):
                valid_words.append(word)
            else:
                invalid_words.append(word)
    
        if not valid_words:
            return json.dumps({
                "error": "No valid words found in dictionary or custom phones",
                "invalid_words": invalid_words,
                "suggestions": []
            }, indent=2)
    
        # Check each word
        for word in valid_words:
            word_phones = self._get_word_phones(word, custom_phones)
            if word_phones:
                similarity_result = self._calculate_similarity(word, word_phones, target, target_phones)
            
                if similarity_result["similarity"] >= min_similarity:
                    suggestions.append({
                        "word": word,
                        "similarity": similarity_result["similarity"],
                        "rhyme_score": similarity_result["rhyme_score"],
                        "phone_sequence_score": similarity_result["phone_sequence_score"],
                        "length_score": similarity_result["length_score"],
                        "phones": word_phones,
                        "is_custom": word in custom_phones if custom_phones else False,
                        "details": similarity_result["details"]
                    })
    
        # Sort by similarity score descending
        suggestions.sort(key=lambda x: x["similarity"], reverse=True)
    
        result = {
            "target": target,
            "target_phones": target_phones,
            "invalid_words": invalid_words,
            "suggestions": suggestions
        }
    
        return json.dumps(result, indent=2)


    def __init__(self, *args, **kwargs):
        self.is_initialized = False