patruff commited on
Commit
aa8160e
·
verified ·
1 Parent(s): 198c7f4

Upload tool

Browse files
Files changed (2) hide show
  1. requirements.txt +1 -1
  2. tool.py +124 -54
requirements.txt CHANGED
@@ -1,2 +1,2 @@
1
- smolagents
2
  pronouncing
 
 
 
1
  pronouncing
2
+ smolagents
tool.py CHANGED
@@ -1,7 +1,7 @@
1
  from smolagents.tools import Tool
2
  import string
3
- import json
4
  import pronouncing
 
5
 
6
  class ParodyWordSuggestionTool(Tool):
7
  name = "parody_word_suggester"
@@ -34,6 +34,7 @@ class ParodyWordSuggestionTool(Tool):
34
  last_vowel_idx = -1
35
  last_vowel = None
36
  vowel_groups = self._get_vowel_groups()
 
37
 
38
  # Initialize loop variables
39
  i = 0
@@ -41,13 +42,22 @@ class ParodyWordSuggestionTool(Tool):
41
  base_phone = ""
42
  group = []
43
 
 
44
  for i, phone in enumerate(phones):
45
- base_phone = phone.rstrip('012')
46
- for group in vowel_groups:
47
- if base_phone in group:
 
 
 
 
 
 
 
 
 
48
  last_vowel_idx = i
49
  last_vowel = base_phone
50
- break
51
 
52
  if last_vowel_idx == -1:
53
  return None, []
@@ -86,7 +96,7 @@ class ParodyWordSuggestionTool(Tool):
86
 
87
 
88
  def _calculate_similarity(self, word1, phones1, word2, phones2):
89
- """Calculate similarity score using both perfect and near-rhyme detection."""
90
  # Initialize all variables
91
  phone_list1 = phones1.split()
92
  phone_list2 = phones2.split()
@@ -102,31 +112,57 @@ class ParodyWordSuggestionTool(Tool):
102
  common_length = 0
103
  matched = 0
104
  i = 0
105
-
106
- # Variables for near-rhyme scoring
107
- near_rhyme_score = 0.0
108
- consonants1 = []
109
- consonants2 = []
110
- matches = 0
111
-
112
- # Variables for length and stress scoring
113
- phone_diff = 0
114
- max_phones = 0
115
- length_score = 0.0
116
- stress_score = 0.0
117
- stress1 = ""
118
- stress2 = ""
119
- similarity = 0.0
120
  p = ""
121
  v = ""
122
 
123
- # Get last syllable components
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  result1 = self._get_last_syllable(phone_list1)
125
  result2 = self._get_last_syllable(phone_list2)
126
  word_vowel, word_end = result1
127
  target_vowel, target_end = result2
128
 
129
- # Perfect rhyme check (60% of score)
130
  if word_vowel and target_vowel:
131
  if self._vowels_match(word_vowel, target_vowel):
132
  word_end_clean = self._strip_stress(word_end)
@@ -144,51 +180,81 @@ class ParodyWordSuggestionTool(Tool):
144
  if max(len(word_end_clean), len(target_end_clean)) > 0:
145
  rhyme_score = 0.6 * (matched / max(1, max(len(word_end_clean), len(target_end_clean))))
146
  else:
147
- rhyme_score = 0.0
148
-
149
- # Near rhyme check (for words like "running"/"cunning") - 20% of score
150
- # Check if words have similar length and pattern
151
- if abs(len(phone_list1) - len(phone_list2)) <= 1:
152
- # Check consonant patterns are similar
153
- consonants1 = [p for p in self._strip_stress(phone_list1) if not any(v in p for v in 'AEIOU')]
154
- consonants2 = [p for p in self._strip_stress(phone_list2) if not any(v in p for v in 'AEIOU')]
155
-
156
- if len(consonants1) == len(consonants2):
157
- matches = 0
158
- for a, b in zip(consonants1, consonants2):
159
- if a == b:
160
- matches += 1
161
- if len(consonants1) > 0:
162
- near_rhyme_score = matches / max(1, len(consonants1))
163
-
164
- # Additional check for -ing endings (special case for English)
165
- if len(phone_list1) >= 3 and len(phone_list2) >= 3:
166
- if (self._strip_stress(phone_list1[-2:]) == ['IH', 'NG'] and
167
- self._strip_stress(phone_list2[-2:]) == ['IH', 'NG']):
168
- near_rhyme_score = max(near_rhyme_score, 0.8) # Boost for -ing endings
169
 
170
- # Calculate length similarity score (10% of total)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
  phone_diff = abs(len(phone_list1) - len(phone_list2))
172
  max_phones = max(len(phone_list1), len(phone_list2))
173
  length_score = 1.0 if phone_diff == 0 else 1.0 - (phone_diff / max_phones)
174
 
175
- # Calculate stress pattern similarity (10% of total)
176
  import pronouncing
177
  stress1 = pronouncing.stresses(phones1)
178
  stress2 = pronouncing.stresses(phones2)
179
  stress_score = 1.0 if stress1 == stress2 else 0.5
180
 
 
 
 
181
  # Weighted combination
182
  similarity = (
183
- (rhyme_score * 0.6) + # Perfect rhyme (60%)
184
- (near_rhyme_score * 0.2) + # Near rhyme (20%)
185
- (length_score * 0.1) + # Length similarity (10%)
186
- (stress_score * 0.1) # Stress pattern (10%)
 
 
187
  )
188
 
189
- # Special case: Boost very similar-sounding words
190
- if near_rhyme_score > 0.7 and length_score > 0.8 and stress_score > 0.8:
191
- similarity = max(similarity, 0.75) # Ensure these get a high enough score
 
 
 
 
 
 
 
 
 
 
192
 
193
  # Cap at 1.0
194
  similarity = min(1.0, similarity)
@@ -196,9 +262,11 @@ class ParodyWordSuggestionTool(Tool):
196
  return {
197
  "similarity": round(similarity, 3),
198
  "rhyme_score": round(rhyme_score, 3),
 
199
  "near_rhyme_score": round(near_rhyme_score, 3),
200
  "length_score": round(length_score, 3),
201
  "stress_score": round(stress_score, 3),
 
202
  "phone_length_difference": phone_diff
203
  }
204
 
@@ -276,9 +344,11 @@ class ParodyWordSuggestionTool(Tool):
276
  "word": word,
277
  "similarity": similarity_result["similarity"],
278
  "rhyme_score": similarity_result["rhyme_score"],
 
279
  "near_rhyme_score": similarity_result["near_rhyme_score"],
280
  "length_score": similarity_result["length_score"],
281
  "stress_score": similarity_result["stress_score"],
 
282
  "phones": word_phones,
283
  "last_vowel": word_vowel,
284
  "ending": " ".join(word_end) if word_end else "",
 
1
  from smolagents.tools import Tool
2
  import string
 
3
  import pronouncing
4
+ import json
5
 
6
  class ParodyWordSuggestionTool(Tool):
7
  name = "parody_word_suggester"
 
34
  last_vowel_idx = -1
35
  last_vowel = None
36
  vowel_groups = self._get_vowel_groups()
37
+ v = ""
38
 
39
  # Initialize loop variables
40
  i = 0
 
42
  base_phone = ""
43
  group = []
44
 
45
+ # First, find the primary stressed vowel if it exists
46
  for i, phone in enumerate(phones):
47
+ # Check for primary stress (1)
48
+ if '1' in phone and any(v in phone for v in 'AEIOU'):
49
+ base_phone = phone.rstrip('012')
50
+ last_vowel_idx = i
51
+ last_vowel = base_phone
52
+ break
53
+
54
+ # If no primary stress, just use the last vowel
55
+ if last_vowel_idx == -1:
56
+ for i, phone in enumerate(phones):
57
+ base_phone = phone.rstrip('012')
58
+ if any(v in base_phone for v in 'AEIOU'):
59
  last_vowel_idx = i
60
  last_vowel = base_phone
 
61
 
62
  if last_vowel_idx == -1:
63
  return None, []
 
96
 
97
 
98
  def _calculate_similarity(self, word1, phones1, word2, phones2):
99
+ """Calculate similarity score using refined metrics for parody."""
100
  # Initialize all variables
101
  phone_list1 = phones1.split()
102
  phone_list2 = phones2.split()
 
112
  common_length = 0
113
  matched = 0
114
  i = 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  p = ""
116
  v = ""
117
 
118
+ # Variables for whole-word matching
119
+ primary_stress_vowel1 = None
120
+ primary_stress_vowel2 = None
121
+ primary_stress_idx1 = -1
122
+ primary_stress_idx2 = -1
123
+ front_consonants1 = []
124
+ front_consonants2 = []
125
+
126
+ # Find primary stressed vowels
127
+ for i, phone in enumerate(phone_list1):
128
+ if '1' in phone and any(v in phone for v in 'AEIOU'):
129
+ primary_stress_vowel1 = phone.rstrip('012')
130
+ primary_stress_idx1 = i
131
+ break
132
+
133
+ for i, phone in enumerate(phone_list2):
134
+ if '1' in phone and any(v in phone for v in 'AEIOU'):
135
+ primary_stress_vowel2 = phone.rstrip('012')
136
+ primary_stress_idx2 = i
137
+ break
138
+
139
+ # Get consonants before the primary stress
140
+ if primary_stress_idx1 > 0:
141
+ front_consonants1 = [p for p in self._strip_stress(phone_list1[:primary_stress_idx1])
142
+ if not any(v in p for v in 'AEIOU')]
143
+
144
+ if primary_stress_idx2 > 0:
145
+ front_consonants2 = [p for p in self._strip_stress(phone_list2[:primary_stress_idx2])
146
+ if not any(v in p for v in 'AEIOU')]
147
+
148
+ # Calculate front consonant similarity (important for parody)
149
+ front_consonant_score = 0.0
150
+ if front_consonants1 and front_consonants2:
151
+ min_length = min(len(front_consonants1), len(front_consonants2))
152
+ if min_length > 0:
153
+ matches = 0
154
+ for i in range(min_length):
155
+ if front_consonants1[i] == front_consonants2[i]:
156
+ matches += 1
157
+ front_consonant_score = matches / min_length
158
+
159
+ # Get last syllable components for rhyming
160
  result1 = self._get_last_syllable(phone_list1)
161
  result2 = self._get_last_syllable(phone_list2)
162
  word_vowel, word_end = result1
163
  target_vowel, target_end = result2
164
 
165
+ # Perfect rhyme check (45% of score)
166
  if word_vowel and target_vowel:
167
  if self._vowels_match(word_vowel, target_vowel):
168
  word_end_clean = self._strip_stress(word_end)
 
180
  if max(len(word_end_clean), len(target_end_clean)) > 0:
181
  rhyme_score = 0.6 * (matched / max(1, max(len(word_end_clean), len(target_end_clean))))
182
  else:
183
+ rhyme_score = 0.6 # Still somewhat rhymes even without ending consonants
184
+
185
+ # Primary stressed vowel match (20% of score)
186
+ primary_vowel_score = 0.0
187
+ if primary_stress_vowel1 and primary_stress_vowel2:
188
+ if primary_stress_vowel1 == primary_stress_vowel2:
189
+ primary_vowel_score = 1.0
190
+ else:
191
+ # Check if they're in the same vowel group
192
+ for group in self._get_vowel_groups():
193
+ if primary_stress_vowel1 in group and primary_stress_vowel2 in group:
194
+ primary_vowel_score = 0.7
195
+ break
196
+
197
+ # Near rhyme check - 15% of score
198
+ near_rhyme_score = 0.0
 
 
 
 
 
 
199
 
200
+ # Check for specific endings
201
+ if len(phone_list1) >= 2 and len(phone_list2) >= 2:
202
+ # Check for -ing endings
203
+ if (self._strip_stress(phone_list1[-2:]) == ['IH', 'NG'] and
204
+ self._strip_stress(phone_list2[-2:]) == ['IH', 'NG']):
205
+
206
+ # For -ing endings, also consider the consonant before -ing
207
+ if len(phone_list1) >= 3 and len(phone_list2) >= 3:
208
+ # If the consonants before -ing match, higher score
209
+ if self._strip_stress(phone_list1[-3:-2]) == self._strip_stress(phone_list2[-3:-2]):
210
+ near_rhyme_score = 0.9
211
+ else:
212
+ near_rhyme_score = 0.6
213
+ else:
214
+ near_rhyme_score = 0.6
215
+
216
+ # Check for -y endings (like happy/sappy)
217
+ elif (self._strip_stress(phone_list1[-1:]) == ['IY'] and
218
+ self._strip_stress(phone_list2[-1:]) == ['IY']):
219
+ near_rhyme_score = 0.7
220
+
221
+ # Length and stress similarity (10% of score)
222
  phone_diff = abs(len(phone_list1) - len(phone_list2))
223
  max_phones = max(len(phone_list1), len(phone_list2))
224
  length_score = 1.0 if phone_diff == 0 else 1.0 - (phone_diff / max_phones)
225
 
226
+ # Check stress pattern similarity
227
  import pronouncing
228
  stress1 = pronouncing.stresses(phones1)
229
  stress2 = pronouncing.stresses(phones2)
230
  stress_score = 1.0 if stress1 == stress2 else 0.5
231
 
232
+ # Front consonant match (10% of score)
233
+ front_score = front_consonant_score * 0.1
234
+
235
  # Weighted combination
236
  similarity = (
237
+ (rhyme_score * 0.45) + # End rhyme (45%)
238
+ (primary_vowel_score * 0.2) + # Primary vowel (20%)
239
+ (near_rhyme_score * 0.15) + # Near rhyme features (15%)
240
+ (length_score * 0.05) + # Length similarity (5%)
241
+ (stress_score * 0.05) + # Stress pattern (5%)
242
+ (front_score) # Front consonants (10%)
243
  )
244
 
245
+ # IMPORTANT: Penalty for words that are too similar to be funny
246
+ # For parody, slightly different words are better than almost identical words
247
+ if word1 and word2:
248
+ if word1[0] == word2[0] and rhyme_score > 0.9 and primary_vowel_score > 0.9:
249
+ # Words starting with same letter and almost perfect rhyme
250
+ # are less funny for parody
251
+ similarity *= 0.9
252
+
253
+ # Special case: Words need to be somewhat different to be funny in parody
254
+ if len(word1) > 3 and len(word2) > 3:
255
+ # Give boost to words with same length but different consonants
256
+ if len(word1) == len(word2) and front_consonant_score < 0.5 and rhyme_score > 0.8:
257
+ similarity = max(similarity, 0.75) # Good for parody
258
 
259
  # Cap at 1.0
260
  similarity = min(1.0, similarity)
 
262
  return {
263
  "similarity": round(similarity, 3),
264
  "rhyme_score": round(rhyme_score, 3),
265
+ "primary_vowel_score": round(primary_vowel_score, 3),
266
  "near_rhyme_score": round(near_rhyme_score, 3),
267
  "length_score": round(length_score, 3),
268
  "stress_score": round(stress_score, 3),
269
+ "front_consonant_score": round(front_consonant_score, 3),
270
  "phone_length_difference": phone_diff
271
  }
272
 
 
344
  "word": word,
345
  "similarity": similarity_result["similarity"],
346
  "rhyme_score": similarity_result["rhyme_score"],
347
+ "primary_vowel_score": similarity_result["primary_vowel_score"],
348
  "near_rhyme_score": similarity_result["near_rhyme_score"],
349
  "length_score": similarity_result["length_score"],
350
  "stress_score": similarity_result["stress_score"],
351
+ "front_consonant_score": similarity_result["front_consonant_score"],
352
  "phones": word_phones,
353
  "last_vowel": word_vowel,
354
  "ending": " ".join(word_end) if word_end else "",