Spaces:
Configuration error
Configuration error
Upload tool
Browse files- requirements.txt +1 -1
- tool.py +105 -130
requirements.txt
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
-
pronouncing
|
2 |
smolagents
|
|
|
|
|
|
1 |
smolagents
|
2 |
+
pronouncing
|
tool.py
CHANGED
@@ -1,13 +1,14 @@
|
|
1 |
from smolagents.tools import Tool
|
2 |
-
import string
|
3 |
import pronouncing
|
4 |
import json
|
|
|
|
|
5 |
|
6 |
class ParodyWordSuggestionTool(Tool):
|
7 |
name = "parody_word_suggester"
|
8 |
-
description = """Suggests rhyming funny words using CMU dictionary
|
9 |
Returns similar-sounding words that rhyme, especially focusing on common vowel sounds."""
|
10 |
-
inputs = {'target': {'type': 'string', 'description': 'The word you want to find rhyming alternatives for'}, 'word_list_str': {'type': 'string', 'description': 'JSON string of word list (e.g. \'["word1", "word2"]\')'}, 'min_similarity': {'type': 'string', 'description': 'Minimum similarity threshold (0.0-1.0)', '
|
11 |
output_type = "string"
|
12 |
VOWEL_REF = "AH,AX|UH|AE,EH|IY,IH|AO,AA|UW|AY,EY|OW,AO|AW,AO|OY,OW|ER,AXR"
|
13 |
CONSONANT_REF = "M,N,NG|P,B|T,D|K,G|F,V|TH,DH|S,Z|SH,ZH|L,R|W,Y"
|
@@ -20,14 +21,12 @@ class ParodyWordSuggestionTool(Tool):
|
|
20 |
return groups
|
21 |
|
22 |
|
23 |
-
def
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
phones = pronouncing.phones_for_word(word)
|
30 |
-
return phones[0] if phones else None
|
31 |
|
32 |
|
33 |
def _get_last_syllable(self, phones: list) -> tuple:
|
@@ -72,132 +71,109 @@ class ParodyWordSuggestionTool(Tool):
|
|
72 |
return False
|
73 |
|
74 |
|
75 |
-
def
|
76 |
-
"""
|
77 |
-
|
78 |
-
|
79 |
-
suffix_phones = []
|
80 |
-
phone1 = ""
|
81 |
-
phone2 = ""
|
82 |
-
|
83 |
-
# Common suffix patterns in CMU phonetic representation
|
84 |
-
SUFFIXES = {
|
85 |
-
'ING': ['IH0', 'NG'], # -ing
|
86 |
-
'ED': ['EH0', 'D'], # -ed
|
87 |
-
'ER': ['ER0'], # -er
|
88 |
-
'EST': ['EH0', 'S', 'T'], # -est
|
89 |
-
'LY': ['L', 'IY0'], # -ly
|
90 |
-
'NESS': ['N', 'EH0', 'S'], # -ness
|
91 |
-
}
|
92 |
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
98 |
|
99 |
-
return phones, []
|
100 |
-
|
101 |
-
|
102 |
-
def _get_consonant_groups(self):
|
103 |
-
"""Get consonant groups from reference string."""
|
104 |
-
groups = []
|
105 |
-
group_strs = self.CONSONANT_REF.split("|")
|
106 |
-
for group_str in group_strs:
|
107 |
-
groups.append(group_str.split(","))
|
108 |
-
return groups
|
109 |
-
|
110 |
-
|
111 |
-
def _consonants_similarity(self, c1: str, c2: str) -> float:
|
112 |
-
"""Calculate similarity score between two consonants."""
|
113 |
-
if c1 == c2:
|
114 |
-
return 1.0
|
115 |
-
|
116 |
-
# Check if they're in the same group
|
117 |
-
consonant_groups = self._get_consonant_groups()
|
118 |
-
for group in consonant_groups:
|
119 |
-
if c1 in group and c2 in group:
|
120 |
-
# Nasals (first group) are more similar to each other
|
121 |
-
if group == consonant_groups[0]: # M,N,NG group
|
122 |
-
return 0.8
|
123 |
-
return 0.5
|
124 |
-
|
125 |
-
return 0.0
|
126 |
-
|
127 |
-
|
128 |
-
def _calculate_similarity(self, word1, phones1, word2, phones2):
|
129 |
-
"""Calculate similarity score with enhanced consonant matching."""
|
130 |
-
# Initialize variables as before
|
131 |
phone_list1 = phones1.split()
|
132 |
phone_list2 = phones2.split()
|
133 |
|
134 |
-
# Get
|
135 |
-
|
136 |
-
|
137 |
-
primary_vowel1 = None
|
138 |
-
primary_vowel2 = None
|
139 |
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
|
|
|
|
|
|
145 |
|
146 |
-
|
147 |
-
if
|
148 |
-
|
149 |
-
|
150 |
-
break
|
151 |
-
|
152 |
-
# Calculate vowel similarity (50% of total score)
|
153 |
-
vowel_score = 0.0
|
154 |
-
if primary_vowel1 and primary_vowel2:
|
155 |
-
if primary_vowel1 == primary_vowel2:
|
156 |
-
vowel_score = 1.0
|
157 |
-
elif self._vowels_match(primary_vowel1, primary_vowel2):
|
158 |
-
vowel_score = 0.8
|
159 |
-
|
160 |
-
# Calculate consonant similarity (30% of total score)
|
161 |
-
consonant_score = 0.0
|
162 |
-
if vowel_idx1 >= 0 and vowel_idx2 >= 0:
|
163 |
-
# Compare consonants around the stressed vowel
|
164 |
-
pre_c1 = phone_list1[vowel_idx1-1] if vowel_idx1 > 0 else None
|
165 |
-
pre_c2 = phone_list2[vowel_idx2-1] if vowel_idx2 > 0 else None
|
166 |
-
post_c1 = phone_list1[vowel_idx1+1] if vowel_idx1 < len(phone_list1)-1 else None
|
167 |
-
post_c2 = phone_list2[vowel_idx2+1] if vowel_idx2 < len(phone_list2)-1 else None
|
168 |
-
|
169 |
-
if pre_c1 and pre_c2:
|
170 |
-
consonant_score += self._consonants_similarity(pre_c1, pre_c2)
|
171 |
-
if post_c1 and post_c2:
|
172 |
-
consonant_score += self._consonants_similarity(post_c1, post_c2)
|
173 |
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
178 |
if len(phone_list1) == len(phone_list2):
|
179 |
pattern_score = 1.0
|
180 |
else:
|
181 |
pattern_score = 1.0 - (abs(len(phone_list1) - len(phone_list2)) / max(len(phone_list1), len(phone_list2)))
|
182 |
-
|
183 |
-
#
|
184 |
similarity = (
|
185 |
-
(
|
186 |
-
(
|
187 |
-
(pattern_score * 0.
|
188 |
)
|
189 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
190 |
return {
|
191 |
"similarity": round(similarity, 3),
|
192 |
-
"
|
193 |
-
"
|
194 |
"pattern_score": round(pattern_score, 3),
|
195 |
-
"
|
196 |
-
|
|
|
|
|
|
|
|
|
197 |
}
|
198 |
|
199 |
|
200 |
-
def forward(self, target: str, word_list_str: str, min_similarity: str = "0.5"
|
201 |
import pronouncing
|
202 |
import string
|
203 |
import json
|
@@ -230,38 +206,39 @@ class ParodyWordSuggestionTool(Tool):
|
|
230 |
}, indent=2)
|
231 |
|
232 |
# Get target pronunciation
|
233 |
-
target_phones =
|
234 |
if not target_phones:
|
235 |
return json.dumps({
|
236 |
-
"error": f"Target word '{target}' not found in dictionary
|
237 |
"suggestions": []
|
238 |
}, indent=2)
|
239 |
|
240 |
-
# Filter word list
|
241 |
valid_words = []
|
242 |
invalid_words = []
|
243 |
for word in words:
|
244 |
word = word.lower().strip(string.punctuation)
|
245 |
-
if
|
246 |
valid_words.append(word)
|
247 |
else:
|
248 |
invalid_words.append(word)
|
249 |
|
250 |
if not valid_words:
|
251 |
return json.dumps({
|
252 |
-
"error": "No valid words found in dictionary
|
253 |
"invalid_words": invalid_words,
|
254 |
"suggestions": []
|
255 |
}, indent=2)
|
256 |
|
|
|
257 |
target_phone_list = target_phones.split()
|
258 |
target_vowel, target_end = self._get_last_syllable(target_phone_list)
|
259 |
|
260 |
-
# Check each word
|
261 |
# Check each word
|
262 |
for word in valid_words:
|
263 |
-
|
264 |
-
if
|
|
|
265 |
similarity_result = self._calculate_similarity(word, word_phones, target, target_phones)
|
266 |
|
267 |
if similarity_result["similarity"] >= min_similarity:
|
@@ -271,15 +248,13 @@ class ParodyWordSuggestionTool(Tool):
|
|
271 |
suggestions.append({
|
272 |
"word": word,
|
273 |
"similarity": similarity_result["similarity"],
|
274 |
-
"
|
275 |
-
"
|
276 |
"pattern_score": similarity_result["pattern_score"],
|
277 |
-
"primary_vowels": similarity_result["primary_vowels"],
|
278 |
-
"consonants": similarity_result["consonants"],
|
279 |
"phones": word_phones,
|
280 |
"last_vowel": word_vowel,
|
281 |
"ending": " ".join(word_end) if word_end else "",
|
282 |
-
"
|
283 |
})
|
284 |
|
285 |
# Sort by similarity score descending
|
|
|
1 |
from smolagents.tools import Tool
|
|
|
2 |
import pronouncing
|
3 |
import json
|
4 |
+
import string
|
5 |
+
import difflib
|
6 |
|
7 |
class ParodyWordSuggestionTool(Tool):
|
8 |
name = "parody_word_suggester"
|
9 |
+
description = """Suggests rhyming funny words using CMU dictionary pronunciations.
|
10 |
Returns similar-sounding words that rhyme, especially focusing on common vowel sounds."""
|
11 |
+
inputs = {'target': {'type': 'string', 'description': 'The word you want to find rhyming alternatives for'}, 'word_list_str': {'type': 'string', 'description': 'JSON string of word list (e.g. \'["word1", "word2"]\')'}, 'min_similarity': {'type': 'string', 'description': 'Minimum similarity threshold (0.0-1.0)', 'nullable': True, 'default': '0.5'}}
|
12 |
output_type = "string"
|
13 |
VOWEL_REF = "AH,AX|UH|AE,EH|IY,IH|AO,AA|UW|AY,EY|OW,AO|AW,AO|OY,OW|ER,AXR"
|
14 |
CONSONANT_REF = "M,N,NG|P,B|T,D|K,G|F,V|TH,DH|S,Z|SH,ZH|L,R|W,Y"
|
|
|
21 |
return groups
|
22 |
|
23 |
|
24 |
+
def _get_consonant_groups(self):
|
25 |
+
groups = []
|
26 |
+
group_strs = self.CONSONANT_REF.split("|")
|
27 |
+
for group_str in group_strs:
|
28 |
+
groups.append(group_str.split(","))
|
29 |
+
return groups
|
|
|
|
|
30 |
|
31 |
|
32 |
def _get_last_syllable(self, phones: list) -> tuple:
|
|
|
71 |
return False
|
72 |
|
73 |
|
74 |
+
def _calculate_similarity(self, word1, phones1, word2, phones2):
|
75 |
+
"""Calculate similarity with heavy emphasis on rhyming."""
|
76 |
+
from difflib import SequenceMatcher
|
77 |
+
import pronouncing
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
|
79 |
+
# Initialize all variables
|
80 |
+
rhyme_score = 0.0
|
81 |
+
string_score = 0.0
|
82 |
+
pattern_score = 0.0
|
83 |
+
phone_list1 = []
|
84 |
+
phone_list2 = []
|
85 |
+
vowel1 = None
|
86 |
+
vowel2 = None
|
87 |
+
end1 = []
|
88 |
+
end2 = []
|
89 |
+
end1_clean = []
|
90 |
+
end2_clean = []
|
91 |
+
matching_consonants = 0
|
92 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
phone_list1 = phones1.split()
|
94 |
phone_list2 = phones2.split()
|
95 |
|
96 |
+
# Get last syllables
|
97 |
+
vowel1, end1 = self._get_last_syllable(phone_list1)
|
98 |
+
vowel2, end2 = self._get_last_syllable(phone_list2)
|
|
|
|
|
99 |
|
100 |
+
# Calculate rhyme score (60%)
|
101 |
+
if vowel1 and vowel2:
|
102 |
+
# Perfect vowel match
|
103 |
+
if vowel1.rstrip('012') == vowel2.rstrip('012'):
|
104 |
+
rhyme_score = 1.0
|
105 |
+
# Similar vowel match
|
106 |
+
elif self._vowels_match(vowel1, vowel2):
|
107 |
+
rhyme_score = 0.8
|
108 |
|
109 |
+
# Check endings
|
110 |
+
if end1 and end2:
|
111 |
+
end1_clean = self._strip_stress(end1)
|
112 |
+
end2_clean = self._strip_stress(end2)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
|
114 |
+
# Perfect ending match
|
115 |
+
if end1_clean == end2_clean:
|
116 |
+
rhyme_score = min(1.0, rhyme_score + 0.2)
|
117 |
+
# Partial ending match
|
118 |
+
else:
|
119 |
+
consonant_groups = self._get_consonant_groups()
|
120 |
+
matching_consonants = 0
|
121 |
+
for c1, c2 in zip(end1_clean, end2_clean):
|
122 |
+
if c1 == c2:
|
123 |
+
matching_consonants += 1
|
124 |
+
else:
|
125 |
+
# Check if consonants are in same group
|
126 |
+
for group in consonant_groups:
|
127 |
+
if c1 in group and c2 in group:
|
128 |
+
matching_consonants += 0.5
|
129 |
+
break
|
130 |
+
|
131 |
+
if matching_consonants > 0:
|
132 |
+
rhyme_score = min(1.0, rhyme_score + (0.1 * matching_consonants))
|
133 |
+
|
134 |
+
# String similarity (25%)
|
135 |
+
if len(word1) > 1 and len(word2) > 1:
|
136 |
+
end_similarity = SequenceMatcher(None, word1[1:], word2[1:]).ratio()
|
137 |
+
string_score = end_similarity
|
138 |
+
else:
|
139 |
+
string_score = SequenceMatcher(None, word1, word2).ratio()
|
140 |
+
|
141 |
+
# Pattern/Length score (15%)
|
142 |
if len(phone_list1) == len(phone_list2):
|
143 |
pattern_score = 1.0
|
144 |
else:
|
145 |
pattern_score = 1.0 - (abs(len(phone_list1) - len(phone_list2)) / max(len(phone_list1), len(phone_list2)))
|
146 |
+
|
147 |
+
# Final weighted score
|
148 |
similarity = (
|
149 |
+
(rhyme_score * 0.60) +
|
150 |
+
(string_score * 0.25) +
|
151 |
+
(pattern_score * 0.15)
|
152 |
)
|
153 |
+
|
154 |
+
# Extra boost for exact matches minus first letter
|
155 |
+
if len(word1) == len(word2) and word1[1:] == word2[1:]:
|
156 |
+
similarity = min(1.0, similarity * 1.2)
|
157 |
+
|
158 |
+
# Extra penalty for very different lengths
|
159 |
+
if abs(len(word1) - len(word2)) > 2:
|
160 |
+
similarity *= 0.7
|
161 |
+
|
162 |
return {
|
163 |
"similarity": round(similarity, 3),
|
164 |
+
"rhyme_score": round(rhyme_score, 3),
|
165 |
+
"string_score": round(string_score, 3),
|
166 |
"pattern_score": round(pattern_score, 3),
|
167 |
+
"details": {
|
168 |
+
"last_vowel_match": vowel1.rstrip('012') == vowel2.rstrip('012') if vowel1 and vowel2 else False,
|
169 |
+
"similar_vowels": self._vowels_match(vowel1, vowel2) if vowel1 and vowel2 else False,
|
170 |
+
"ending_match": " ".join(end1_clean) == " ".join(end2_clean) if end1 and end2 else False,
|
171 |
+
"string_length_diff": abs(len(word1) - len(word2))
|
172 |
+
}
|
173 |
}
|
174 |
|
175 |
|
176 |
+
def forward(self, target: str, word_list_str: str, min_similarity: str = "0.5") -> str:
|
177 |
import pronouncing
|
178 |
import string
|
179 |
import json
|
|
|
206 |
}, indent=2)
|
207 |
|
208 |
# Get target pronunciation
|
209 |
+
target_phones = pronouncing.phones_for_word(target)
|
210 |
if not target_phones:
|
211 |
return json.dumps({
|
212 |
+
"error": f"Target word '{target}' not found in CMU dictionary",
|
213 |
"suggestions": []
|
214 |
}, indent=2)
|
215 |
|
216 |
+
# Filter word list to only words in CMU dictionary
|
217 |
valid_words = []
|
218 |
invalid_words = []
|
219 |
for word in words:
|
220 |
word = word.lower().strip(string.punctuation)
|
221 |
+
if pronouncing.phones_for_word(word):
|
222 |
valid_words.append(word)
|
223 |
else:
|
224 |
invalid_words.append(word)
|
225 |
|
226 |
if not valid_words:
|
227 |
return json.dumps({
|
228 |
+
"error": "No valid words found in CMU dictionary",
|
229 |
"invalid_words": invalid_words,
|
230 |
"suggestions": []
|
231 |
}, indent=2)
|
232 |
|
233 |
+
target_phones = target_phones[0]
|
234 |
target_phone_list = target_phones.split()
|
235 |
target_vowel, target_end = self._get_last_syllable(target_phone_list)
|
236 |
|
|
|
237 |
# Check each word
|
238 |
for word in valid_words:
|
239 |
+
phones = pronouncing.phones_for_word(word)
|
240 |
+
if phones:
|
241 |
+
word_phones = phones[0]
|
242 |
similarity_result = self._calculate_similarity(word, word_phones, target, target_phones)
|
243 |
|
244 |
if similarity_result["similarity"] >= min_similarity:
|
|
|
248 |
suggestions.append({
|
249 |
"word": word,
|
250 |
"similarity": similarity_result["similarity"],
|
251 |
+
"rhyme_score": similarity_result["rhyme_score"],
|
252 |
+
"string_score": similarity_result["string_score"],
|
253 |
"pattern_score": similarity_result["pattern_score"],
|
|
|
|
|
254 |
"phones": word_phones,
|
255 |
"last_vowel": word_vowel,
|
256 |
"ending": " ".join(word_end) if word_end else "",
|
257 |
+
"details": similarity_result["details"]
|
258 |
})
|
259 |
|
260 |
# Sort by similarity score descending
|