Spaces:
Configuration error
Configuration error
Upload tool
Browse files
tool.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
from smolagents.tools import Tool
|
2 |
-
import string
|
3 |
import pronouncing
|
|
|
4 |
import json
|
5 |
|
6 |
class ParodyWordSuggestionTool(Tool):
|
@@ -34,12 +34,12 @@ class ParodyWordSuggestionTool(Tool):
|
|
34 |
last_vowel_idx = -1
|
35 |
last_vowel = None
|
36 |
vowel_groups = self._get_vowel_groups()
|
37 |
-
v = ""
|
38 |
|
39 |
# Initialize loop variables
|
40 |
i = 0
|
41 |
phone = ""
|
42 |
base_phone = ""
|
|
|
43 |
group = []
|
44 |
|
45 |
# First, find the primary stressed vowel if it exists
|
@@ -95,6 +95,69 @@ class ParodyWordSuggestionTool(Tool):
|
|
95 |
return False
|
96 |
|
97 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
98 |
def _calculate_similarity(self, word1, phones1, word2, phones2):
|
99 |
"""Calculate similarity score using refined metrics for parody."""
|
100 |
# Initialize all variables
|
@@ -197,22 +260,33 @@ class ParodyWordSuggestionTool(Tool):
|
|
197 |
# Near rhyme check - 15% of score
|
198 |
near_rhyme_score = 0.0
|
199 |
|
200 |
-
#
|
201 |
if len(phone_list1) >= 2 and len(phone_list2) >= 2:
|
202 |
# Check for -ing endings
|
203 |
if (self._strip_stress(phone_list1[-2:]) == ['IH', 'NG'] and
|
204 |
self._strip_stress(phone_list2[-2:]) == ['IH', 'NG']):
|
205 |
|
206 |
-
#
|
207 |
-
if
|
208 |
-
|
209 |
-
if self._strip_stress(phone_list1[-3:-2]) == self._strip_stress(phone_list2[-3:-2]):
|
210 |
-
near_rhyme_score = 0.9
|
211 |
-
else:
|
212 |
-
near_rhyme_score = 0.6
|
213 |
else:
|
214 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
215 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
216 |
# Check for -y endings (like happy/sappy)
|
217 |
elif (self._strip_stress(phone_list1[-1:]) == ['IY'] and
|
218 |
self._strip_stress(phone_list2[-1:]) == ['IY']):
|
@@ -242,6 +316,13 @@ class ParodyWordSuggestionTool(Tool):
|
|
242 |
(front_score) # Front consonants (10%)
|
243 |
)
|
244 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
245 |
# IMPORTANT: Penalty for words that are too similar to be funny
|
246 |
# For parody, slightly different words are better than almost identical words
|
247 |
if word1 and word2:
|
|
|
1 |
from smolagents.tools import Tool
|
|
|
2 |
import pronouncing
|
3 |
+
import string
|
4 |
import json
|
5 |
|
6 |
class ParodyWordSuggestionTool(Tool):
|
|
|
34 |
last_vowel_idx = -1
|
35 |
last_vowel = None
|
36 |
vowel_groups = self._get_vowel_groups()
|
|
|
37 |
|
38 |
# Initialize loop variables
|
39 |
i = 0
|
40 |
phone = ""
|
41 |
base_phone = ""
|
42 |
+
v = ""
|
43 |
group = []
|
44 |
|
45 |
# First, find the primary stressed vowel if it exists
|
|
|
95 |
return False
|
96 |
|
97 |
|
98 |
+
def _consonants_are_similar(self, c1, c2):
|
99 |
+
"""Check if two consonants belong to similar phonetic groups."""
|
100 |
+
# Group consonants by articulation manner
|
101 |
+
nasals = ['M', 'N', 'NG']
|
102 |
+
stops = ['P', 'B', 'T', 'D', 'K', 'G']
|
103 |
+
fricatives = ['F', 'V', 'TH', 'DH', 'S', 'Z', 'SH', 'ZH']
|
104 |
+
liquids = ['L', 'R']
|
105 |
+
glides = ['W', 'Y']
|
106 |
+
|
107 |
+
# Check if consonants are in the same group
|
108 |
+
if c1 in nasals and c2 in nasals:
|
109 |
+
return True
|
110 |
+
if c1 in stops and c2 in stops:
|
111 |
+
return True
|
112 |
+
if c1 in fricatives and c2 in fricatives:
|
113 |
+
return True
|
114 |
+
if c1 in liquids and c2 in liquids:
|
115 |
+
return True
|
116 |
+
if c1 in glides and c2 in glides:
|
117 |
+
return True
|
118 |
+
|
119 |
+
return False
|
120 |
+
|
121 |
+
|
122 |
+
def _words_have_similar_structure(self, word1, word2, phones1, phones2):
|
123 |
+
"""Check if words have similar structure beyond just ending."""
|
124 |
+
p = ""
|
125 |
+
v = ""
|
126 |
+
# Similar word length
|
127 |
+
if abs(len(word1) - len(word2)) > 2:
|
128 |
+
return False
|
129 |
+
|
130 |
+
# Similar syllable count
|
131 |
+
import pronouncing
|
132 |
+
syllables1 = len(pronouncing.stresses(phones1))
|
133 |
+
syllables2 = len(pronouncing.stresses(phones2))
|
134 |
+
if syllables1 != syllables2:
|
135 |
+
return False
|
136 |
+
|
137 |
+
# For -ing words, check if consonants before -ing have similar patterns
|
138 |
+
if word1.endswith('ing') and word2.endswith('ing'):
|
139 |
+
# Get consonant patterns (c-v-c structure)
|
140 |
+
phone_list1 = phones1.split()
|
141 |
+
phone_list2 = phones2.split()
|
142 |
+
|
143 |
+
# Get consonants
|
144 |
+
consonants1 = [p for p in self._strip_stress(phone_list1) if not any(v in p for v in 'AEIOU')]
|
145 |
+
consonants2 = [p for p in self._strip_stress(phone_list2) if not any(v in p for v in 'AEIOU')]
|
146 |
+
|
147 |
+
# Same consonant count is promising
|
148 |
+
if len(consonants1) == len(consonants2):
|
149 |
+
return True
|
150 |
+
|
151 |
+
# For words like 'running' and 'cumming', check pre-final consonant similarity
|
152 |
+
if len(consonants1) >= 2 and len(consonants2) >= 2:
|
153 |
+
pre_final1 = consonants1[-2]
|
154 |
+
pre_final2 = consonants2[-2]
|
155 |
+
if pre_final1 == pre_final2 or self._consonants_are_similar(pre_final1, pre_final2):
|
156 |
+
return True
|
157 |
+
|
158 |
+
return False
|
159 |
+
|
160 |
+
|
161 |
def _calculate_similarity(self, word1, phones1, word2, phones2):
|
162 |
"""Calculate similarity score using refined metrics for parody."""
|
163 |
# Initialize all variables
|
|
|
260 |
# Near rhyme check - 15% of score
|
261 |
near_rhyme_score = 0.0
|
262 |
|
263 |
+
# Enhanced check for -ing endings
|
264 |
if len(phone_list1) >= 2 and len(phone_list2) >= 2:
|
265 |
# Check for -ing endings
|
266 |
if (self._strip_stress(phone_list1[-2:]) == ['IH', 'NG'] and
|
267 |
self._strip_stress(phone_list2[-2:]) == ['IH', 'NG']):
|
268 |
|
269 |
+
# Check if the words have similar structure (important for parody)
|
270 |
+
if self._words_have_similar_structure(word1, word2, phones1, phones2):
|
271 |
+
near_rhyme_score = 0.8
|
|
|
|
|
|
|
|
|
272 |
else:
|
273 |
+
# Words ending in -ing but with very different structure
|
274 |
+
# like "running" vs "kinging" should score lower
|
275 |
+
near_rhyme_score = 0.4
|
276 |
+
|
277 |
+
# Additional boost if the consonant before -ing is similar
|
278 |
+
if len(phone_list1) >= 3 and len(phone_list2) >= 3:
|
279 |
+
consonant1 = self._strip_stress(phone_list1[-3:-2])
|
280 |
+
consonant2 = self._strip_stress(phone_list2[-3:-2])
|
281 |
|
282 |
+
if len(consonant1) > 0 and len(consonant2) > 0:
|
283 |
+
# Same consonant gets highest score
|
284 |
+
if consonant1[0] == consonant2[0]:
|
285 |
+
near_rhyme_score = max(near_rhyme_score, 0.9)
|
286 |
+
# Similar consonants (e.g., 'N' and 'M' are both nasals)
|
287 |
+
elif self._consonants_are_similar(consonant1[0], consonant2[0]):
|
288 |
+
near_rhyme_score = max(near_rhyme_score, 0.8)
|
289 |
+
|
290 |
# Check for -y endings (like happy/sappy)
|
291 |
elif (self._strip_stress(phone_list1[-1:]) == ['IY'] and
|
292 |
self._strip_stress(phone_list2[-1:]) == ['IY']):
|
|
|
316 |
(front_score) # Front consonants (10%)
|
317 |
)
|
318 |
|
319 |
+
# IMPORTANT: Special case for words like "running"/"cumming"
|
320 |
+
# These should match well for parody purposes
|
321 |
+
if (word1.endswith('ing') and word2.endswith('ing') and
|
322 |
+
front_consonant_score < 0.5 and # Different initial consonants
|
323 |
+
near_rhyme_score >= 0.8): # Good near-rhyme pattern
|
324 |
+
similarity = max(similarity, 0.8) # Ensure high enough score
|
325 |
+
|
326 |
# IMPORTANT: Penalty for words that are too similar to be funny
|
327 |
# For parody, slightly different words are better than almost identical words
|
328 |
if word1 and word2:
|