Spaces:
Configuration error
Configuration error
Upload tool
Browse files- requirements.txt +1 -1
- tool.py +124 -54
requirements.txt
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
-
smolagents
|
2 |
pronouncing
|
|
|
|
|
|
1 |
pronouncing
|
2 |
+
smolagents
|
tool.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
from smolagents.tools import Tool
|
2 |
import string
|
3 |
-
import json
|
4 |
import pronouncing
|
|
|
5 |
|
6 |
class ParodyWordSuggestionTool(Tool):
|
7 |
name = "parody_word_suggester"
|
@@ -34,6 +34,7 @@ class ParodyWordSuggestionTool(Tool):
|
|
34 |
last_vowel_idx = -1
|
35 |
last_vowel = None
|
36 |
vowel_groups = self._get_vowel_groups()
|
|
|
37 |
|
38 |
# Initialize loop variables
|
39 |
i = 0
|
@@ -41,13 +42,22 @@ class ParodyWordSuggestionTool(Tool):
|
|
41 |
base_phone = ""
|
42 |
group = []
|
43 |
|
|
|
44 |
for i, phone in enumerate(phones):
|
45 |
-
|
46 |
-
for
|
47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
last_vowel_idx = i
|
49 |
last_vowel = base_phone
|
50 |
-
break
|
51 |
|
52 |
if last_vowel_idx == -1:
|
53 |
return None, []
|
@@ -86,7 +96,7 @@ class ParodyWordSuggestionTool(Tool):
|
|
86 |
|
87 |
|
88 |
def _calculate_similarity(self, word1, phones1, word2, phones2):
|
89 |
-
"""Calculate similarity score using
|
90 |
# Initialize all variables
|
91 |
phone_list1 = phones1.split()
|
92 |
phone_list2 = phones2.split()
|
@@ -102,31 +112,57 @@ class ParodyWordSuggestionTool(Tool):
|
|
102 |
common_length = 0
|
103 |
matched = 0
|
104 |
i = 0
|
105 |
-
|
106 |
-
# Variables for near-rhyme scoring
|
107 |
-
near_rhyme_score = 0.0
|
108 |
-
consonants1 = []
|
109 |
-
consonants2 = []
|
110 |
-
matches = 0
|
111 |
-
|
112 |
-
# Variables for length and stress scoring
|
113 |
-
phone_diff = 0
|
114 |
-
max_phones = 0
|
115 |
-
length_score = 0.0
|
116 |
-
stress_score = 0.0
|
117 |
-
stress1 = ""
|
118 |
-
stress2 = ""
|
119 |
-
similarity = 0.0
|
120 |
p = ""
|
121 |
v = ""
|
122 |
|
123 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
124 |
result1 = self._get_last_syllable(phone_list1)
|
125 |
result2 = self._get_last_syllable(phone_list2)
|
126 |
word_vowel, word_end = result1
|
127 |
target_vowel, target_end = result2
|
128 |
|
129 |
-
# Perfect rhyme check (
|
130 |
if word_vowel and target_vowel:
|
131 |
if self._vowels_match(word_vowel, target_vowel):
|
132 |
word_end_clean = self._strip_stress(word_end)
|
@@ -144,51 +180,81 @@ class ParodyWordSuggestionTool(Tool):
|
|
144 |
if max(len(word_end_clean), len(target_end_clean)) > 0:
|
145 |
rhyme_score = 0.6 * (matched / max(1, max(len(word_end_clean), len(target_end_clean))))
|
146 |
else:
|
147 |
-
rhyme_score = 0.
|
148 |
-
|
149 |
-
#
|
150 |
-
|
151 |
-
if
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
# Additional check for -ing endings (special case for English)
|
165 |
-
if len(phone_list1) >= 3 and len(phone_list2) >= 3:
|
166 |
-
if (self._strip_stress(phone_list1[-2:]) == ['IH', 'NG'] and
|
167 |
-
self._strip_stress(phone_list2[-2:]) == ['IH', 'NG']):
|
168 |
-
near_rhyme_score = max(near_rhyme_score, 0.8) # Boost for -ing endings
|
169 |
|
170 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
171 |
phone_diff = abs(len(phone_list1) - len(phone_list2))
|
172 |
max_phones = max(len(phone_list1), len(phone_list2))
|
173 |
length_score = 1.0 if phone_diff == 0 else 1.0 - (phone_diff / max_phones)
|
174 |
|
175 |
-
#
|
176 |
import pronouncing
|
177 |
stress1 = pronouncing.stresses(phones1)
|
178 |
stress2 = pronouncing.stresses(phones2)
|
179 |
stress_score = 1.0 if stress1 == stress2 else 0.5
|
180 |
|
|
|
|
|
|
|
181 |
# Weighted combination
|
182 |
similarity = (
|
183 |
-
(rhyme_score * 0.
|
184 |
-
(
|
185 |
-
(
|
186 |
-
(
|
|
|
|
|
187 |
)
|
188 |
|
189 |
-
#
|
190 |
-
|
191 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
192 |
|
193 |
# Cap at 1.0
|
194 |
similarity = min(1.0, similarity)
|
@@ -196,9 +262,11 @@ class ParodyWordSuggestionTool(Tool):
|
|
196 |
return {
|
197 |
"similarity": round(similarity, 3),
|
198 |
"rhyme_score": round(rhyme_score, 3),
|
|
|
199 |
"near_rhyme_score": round(near_rhyme_score, 3),
|
200 |
"length_score": round(length_score, 3),
|
201 |
"stress_score": round(stress_score, 3),
|
|
|
202 |
"phone_length_difference": phone_diff
|
203 |
}
|
204 |
|
@@ -276,9 +344,11 @@ class ParodyWordSuggestionTool(Tool):
|
|
276 |
"word": word,
|
277 |
"similarity": similarity_result["similarity"],
|
278 |
"rhyme_score": similarity_result["rhyme_score"],
|
|
|
279 |
"near_rhyme_score": similarity_result["near_rhyme_score"],
|
280 |
"length_score": similarity_result["length_score"],
|
281 |
"stress_score": similarity_result["stress_score"],
|
|
|
282 |
"phones": word_phones,
|
283 |
"last_vowel": word_vowel,
|
284 |
"ending": " ".join(word_end) if word_end else "",
|
|
|
1 |
from smolagents.tools import Tool
|
2 |
import string
|
|
|
3 |
import pronouncing
|
4 |
+
import json
|
5 |
|
6 |
class ParodyWordSuggestionTool(Tool):
|
7 |
name = "parody_word_suggester"
|
|
|
34 |
last_vowel_idx = -1
|
35 |
last_vowel = None
|
36 |
vowel_groups = self._get_vowel_groups()
|
37 |
+
v = ""
|
38 |
|
39 |
# Initialize loop variables
|
40 |
i = 0
|
|
|
42 |
base_phone = ""
|
43 |
group = []
|
44 |
|
45 |
+
# First, find the primary stressed vowel if it exists
|
46 |
for i, phone in enumerate(phones):
|
47 |
+
# Check for primary stress (1)
|
48 |
+
if '1' in phone and any(v in phone for v in 'AEIOU'):
|
49 |
+
base_phone = phone.rstrip('012')
|
50 |
+
last_vowel_idx = i
|
51 |
+
last_vowel = base_phone
|
52 |
+
break
|
53 |
+
|
54 |
+
# If no primary stress, just use the last vowel
|
55 |
+
if last_vowel_idx == -1:
|
56 |
+
for i, phone in enumerate(phones):
|
57 |
+
base_phone = phone.rstrip('012')
|
58 |
+
if any(v in base_phone for v in 'AEIOU'):
|
59 |
last_vowel_idx = i
|
60 |
last_vowel = base_phone
|
|
|
61 |
|
62 |
if last_vowel_idx == -1:
|
63 |
return None, []
|
|
|
96 |
|
97 |
|
98 |
def _calculate_similarity(self, word1, phones1, word2, phones2):
|
99 |
+
"""Calculate similarity score using refined metrics for parody."""
|
100 |
# Initialize all variables
|
101 |
phone_list1 = phones1.split()
|
102 |
phone_list2 = phones2.split()
|
|
|
112 |
common_length = 0
|
113 |
matched = 0
|
114 |
i = 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
115 |
p = ""
|
116 |
v = ""
|
117 |
|
118 |
+
# Variables for whole-word matching
|
119 |
+
primary_stress_vowel1 = None
|
120 |
+
primary_stress_vowel2 = None
|
121 |
+
primary_stress_idx1 = -1
|
122 |
+
primary_stress_idx2 = -1
|
123 |
+
front_consonants1 = []
|
124 |
+
front_consonants2 = []
|
125 |
+
|
126 |
+
# Find primary stressed vowels
|
127 |
+
for i, phone in enumerate(phone_list1):
|
128 |
+
if '1' in phone and any(v in phone for v in 'AEIOU'):
|
129 |
+
primary_stress_vowel1 = phone.rstrip('012')
|
130 |
+
primary_stress_idx1 = i
|
131 |
+
break
|
132 |
+
|
133 |
+
for i, phone in enumerate(phone_list2):
|
134 |
+
if '1' in phone and any(v in phone for v in 'AEIOU'):
|
135 |
+
primary_stress_vowel2 = phone.rstrip('012')
|
136 |
+
primary_stress_idx2 = i
|
137 |
+
break
|
138 |
+
|
139 |
+
# Get consonants before the primary stress
|
140 |
+
if primary_stress_idx1 > 0:
|
141 |
+
front_consonants1 = [p for p in self._strip_stress(phone_list1[:primary_stress_idx1])
|
142 |
+
if not any(v in p for v in 'AEIOU')]
|
143 |
+
|
144 |
+
if primary_stress_idx2 > 0:
|
145 |
+
front_consonants2 = [p for p in self._strip_stress(phone_list2[:primary_stress_idx2])
|
146 |
+
if not any(v in p for v in 'AEIOU')]
|
147 |
+
|
148 |
+
# Calculate front consonant similarity (important for parody)
|
149 |
+
front_consonant_score = 0.0
|
150 |
+
if front_consonants1 and front_consonants2:
|
151 |
+
min_length = min(len(front_consonants1), len(front_consonants2))
|
152 |
+
if min_length > 0:
|
153 |
+
matches = 0
|
154 |
+
for i in range(min_length):
|
155 |
+
if front_consonants1[i] == front_consonants2[i]:
|
156 |
+
matches += 1
|
157 |
+
front_consonant_score = matches / min_length
|
158 |
+
|
159 |
+
# Get last syllable components for rhyming
|
160 |
result1 = self._get_last_syllable(phone_list1)
|
161 |
result2 = self._get_last_syllable(phone_list2)
|
162 |
word_vowel, word_end = result1
|
163 |
target_vowel, target_end = result2
|
164 |
|
165 |
+
# Perfect rhyme check (45% of score)
|
166 |
if word_vowel and target_vowel:
|
167 |
if self._vowels_match(word_vowel, target_vowel):
|
168 |
word_end_clean = self._strip_stress(word_end)
|
|
|
180 |
if max(len(word_end_clean), len(target_end_clean)) > 0:
|
181 |
rhyme_score = 0.6 * (matched / max(1, max(len(word_end_clean), len(target_end_clean))))
|
182 |
else:
|
183 |
+
rhyme_score = 0.6 # Still somewhat rhymes even without ending consonants
|
184 |
+
|
185 |
+
# Primary stressed vowel match (20% of score)
|
186 |
+
primary_vowel_score = 0.0
|
187 |
+
if primary_stress_vowel1 and primary_stress_vowel2:
|
188 |
+
if primary_stress_vowel1 == primary_stress_vowel2:
|
189 |
+
primary_vowel_score = 1.0
|
190 |
+
else:
|
191 |
+
# Check if they're in the same vowel group
|
192 |
+
for group in self._get_vowel_groups():
|
193 |
+
if primary_stress_vowel1 in group and primary_stress_vowel2 in group:
|
194 |
+
primary_vowel_score = 0.7
|
195 |
+
break
|
196 |
+
|
197 |
+
# Near rhyme check - 15% of score
|
198 |
+
near_rhyme_score = 0.0
|
|
|
|
|
|
|
|
|
|
|
|
|
199 |
|
200 |
+
# Check for specific endings
|
201 |
+
if len(phone_list1) >= 2 and len(phone_list2) >= 2:
|
202 |
+
# Check for -ing endings
|
203 |
+
if (self._strip_stress(phone_list1[-2:]) == ['IH', 'NG'] and
|
204 |
+
self._strip_stress(phone_list2[-2:]) == ['IH', 'NG']):
|
205 |
+
|
206 |
+
# For -ing endings, also consider the consonant before -ing
|
207 |
+
if len(phone_list1) >= 3 and len(phone_list2) >= 3:
|
208 |
+
# If the consonants before -ing match, higher score
|
209 |
+
if self._strip_stress(phone_list1[-3:-2]) == self._strip_stress(phone_list2[-3:-2]):
|
210 |
+
near_rhyme_score = 0.9
|
211 |
+
else:
|
212 |
+
near_rhyme_score = 0.6
|
213 |
+
else:
|
214 |
+
near_rhyme_score = 0.6
|
215 |
+
|
216 |
+
# Check for -y endings (like happy/sappy)
|
217 |
+
elif (self._strip_stress(phone_list1[-1:]) == ['IY'] and
|
218 |
+
self._strip_stress(phone_list2[-1:]) == ['IY']):
|
219 |
+
near_rhyme_score = 0.7
|
220 |
+
|
221 |
+
# Length and stress similarity (10% of score)
|
222 |
phone_diff = abs(len(phone_list1) - len(phone_list2))
|
223 |
max_phones = max(len(phone_list1), len(phone_list2))
|
224 |
length_score = 1.0 if phone_diff == 0 else 1.0 - (phone_diff / max_phones)
|
225 |
|
226 |
+
# Check stress pattern similarity
|
227 |
import pronouncing
|
228 |
stress1 = pronouncing.stresses(phones1)
|
229 |
stress2 = pronouncing.stresses(phones2)
|
230 |
stress_score = 1.0 if stress1 == stress2 else 0.5
|
231 |
|
232 |
+
# Front consonant match (10% of score)
|
233 |
+
front_score = front_consonant_score * 0.1
|
234 |
+
|
235 |
# Weighted combination
|
236 |
similarity = (
|
237 |
+
(rhyme_score * 0.45) + # End rhyme (45%)
|
238 |
+
(primary_vowel_score * 0.2) + # Primary vowel (20%)
|
239 |
+
(near_rhyme_score * 0.15) + # Near rhyme features (15%)
|
240 |
+
(length_score * 0.05) + # Length similarity (5%)
|
241 |
+
(stress_score * 0.05) + # Stress pattern (5%)
|
242 |
+
(front_score) # Front consonants (10%)
|
243 |
)
|
244 |
|
245 |
+
# IMPORTANT: Penalty for words that are too similar to be funny
|
246 |
+
# For parody, slightly different words are better than almost identical words
|
247 |
+
if word1 and word2:
|
248 |
+
if word1[0] == word2[0] and rhyme_score > 0.9 and primary_vowel_score > 0.9:
|
249 |
+
# Words starting with same letter and almost perfect rhyme
|
250 |
+
# are less funny for parody
|
251 |
+
similarity *= 0.9
|
252 |
+
|
253 |
+
# Special case: Words need to be somewhat different to be funny in parody
|
254 |
+
if len(word1) > 3 and len(word2) > 3:
|
255 |
+
# Give boost to words with same length but different consonants
|
256 |
+
if len(word1) == len(word2) and front_consonant_score < 0.5 and rhyme_score > 0.8:
|
257 |
+
similarity = max(similarity, 0.75) # Good for parody
|
258 |
|
259 |
# Cap at 1.0
|
260 |
similarity = min(1.0, similarity)
|
|
|
262 |
return {
|
263 |
"similarity": round(similarity, 3),
|
264 |
"rhyme_score": round(rhyme_score, 3),
|
265 |
+
"primary_vowel_score": round(primary_vowel_score, 3),
|
266 |
"near_rhyme_score": round(near_rhyme_score, 3),
|
267 |
"length_score": round(length_score, 3),
|
268 |
"stress_score": round(stress_score, 3),
|
269 |
+
"front_consonant_score": round(front_consonant_score, 3),
|
270 |
"phone_length_difference": phone_diff
|
271 |
}
|
272 |
|
|
|
344 |
"word": word,
|
345 |
"similarity": similarity_result["similarity"],
|
346 |
"rhyme_score": similarity_result["rhyme_score"],
|
347 |
+
"primary_vowel_score": similarity_result["primary_vowel_score"],
|
348 |
"near_rhyme_score": similarity_result["near_rhyme_score"],
|
349 |
"length_score": similarity_result["length_score"],
|
350 |
"stress_score": similarity_result["stress_score"],
|
351 |
+
"front_consonant_score": similarity_result["front_consonant_score"],
|
352 |
"phones": word_phones,
|
353 |
"last_vowel": word_vowel,
|
354 |
"ending": " ".join(word_end) if word_end else "",
|