patruff commited on
Commit
8147e43
·
verified ·
1 Parent(s): aa0a83c

Upload tool

Browse files
Files changed (2) hide show
  1. requirements.txt +1 -1
  2. tool.py +111 -62
requirements.txt CHANGED
@@ -1,2 +1,2 @@
1
- pronouncing
2
  smolagents
 
 
 
1
  smolagents
2
+ pronouncing
tool.py CHANGED
@@ -1,6 +1,6 @@
1
  from smolagents.tools import Tool
2
- import pronouncing
3
  import string
 
4
  import json
5
 
6
  class ParodyWordSuggestionTool(Tool):
@@ -14,6 +14,7 @@ class ParodyWordSuggestionTool(Tool):
14
  def _get_vowel_groups(self):
15
  groups = []
16
  group_strs = self.VOWEL_REF.split("|")
 
17
  for group_str in group_strs:
18
  groups.append(group_str.split(","))
19
  return groups
@@ -39,25 +40,31 @@ class ParodyWordSuggestionTool(Tool):
39
  i = 0
40
  phone = ""
41
  base_phone = ""
42
- v = ""
43
  group = []
 
44
 
45
  # First, find the primary stressed vowel if it exists
46
  for i, phone in enumerate(phones):
47
  # Check for primary stress (1)
48
- if '1' in phone and any(v in phone for v in 'AEIOU'):
 
49
  base_phone = phone.rstrip('012')
50
- last_vowel_idx = i
51
- last_vowel = base_phone
52
- break
 
 
 
 
53
 
54
  # If no primary stress, just use the last vowel
55
  if last_vowel_idx == -1:
56
  for i, phone in enumerate(phones):
57
  base_phone = phone.rstrip('012')
58
- if any(v in base_phone for v in 'AEIOU'):
59
- last_vowel_idx = i
60
- last_vowel = base_phone
 
61
 
62
  if last_vowel_idx == -1:
63
  return None, []
@@ -121,8 +128,6 @@ class ParodyWordSuggestionTool(Tool):
121
 
122
  def _words_have_similar_structure(self, word1, word2, phones1, phones2):
123
  """Check if words have similar structure beyond just ending."""
124
- p = ""
125
- v = ""
126
  # Similar word length
127
  if abs(len(word1) - len(word2)) > 2:
128
  return False
@@ -140,6 +145,10 @@ class ParodyWordSuggestionTool(Tool):
140
  phone_list1 = phones1.split()
141
  phone_list2 = phones2.split()
142
 
 
 
 
 
143
  # Get consonants
144
  consonants1 = [p for p in self._strip_stress(phone_list1) if not any(v in p for v in 'AEIOU')]
145
  consonants2 = [p for p in self._strip_stress(phone_list2) if not any(v in p for v in 'AEIOU')]
@@ -175,8 +184,6 @@ class ParodyWordSuggestionTool(Tool):
175
  common_length = 0
176
  matched = 0
177
  i = 0
178
- p = ""
179
- v = ""
180
 
181
  # Variables for whole-word matching
182
  primary_stress_vowel1 = None
@@ -186,6 +193,23 @@ class ParodyWordSuggestionTool(Tool):
186
  front_consonants1 = []
187
  front_consonants2 = []
188
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
  # Find primary stressed vowels
190
  for i, phone in enumerate(phone_list1):
191
  if '1' in phone and any(v in phone for v in 'AEIOU'):
@@ -225,7 +249,7 @@ class ParodyWordSuggestionTool(Tool):
225
  word_vowel, word_end = result1
226
  target_vowel, target_end = result2
227
 
228
- # Perfect rhyme check (45% of score)
229
  if word_vowel and target_vowel:
230
  if self._vowels_match(word_vowel, target_vowel):
231
  word_end_clean = self._strip_stress(word_end)
@@ -245,7 +269,7 @@ class ParodyWordSuggestionTool(Tool):
245
  else:
246
  rhyme_score = 0.6 # Still somewhat rhymes even without ending consonants
247
 
248
- # Primary stressed vowel match (20% of score)
249
  primary_vowel_score = 0.0
250
  if primary_stress_vowel1 and primary_stress_vowel2:
251
  if primary_stress_vowel1 == primary_stress_vowel2:
@@ -260,39 +284,68 @@ class ParodyWordSuggestionTool(Tool):
260
  # Near rhyme check - 15% of score
261
  near_rhyme_score = 0.0
262
 
263
- # Enhanced check for -ing endings
264
  if len(phone_list1) >= 2 and len(phone_list2) >= 2:
265
  # Check for -ing endings
266
  if (self._strip_stress(phone_list1[-2:]) == ['IH', 'NG'] and
267
  self._strip_stress(phone_list2[-2:]) == ['IH', 'NG']):
268
 
269
- # Check if the words have similar structure (important for parody)
270
- if self._words_have_similar_structure(word1, word2, phones1, phones2):
271
- near_rhyme_score = 0.8
272
- else:
273
- # Words ending in -ing but with very different structure
274
- # like "running" vs "kinging" should score lower
275
- near_rhyme_score = 0.4
276
 
277
- # Additional boost if the consonant before -ing is similar
278
  if len(phone_list1) >= 3 and len(phone_list2) >= 3:
279
- consonant1 = self._strip_stress(phone_list1[-3:-2])
280
- consonant2 = self._strip_stress(phone_list2[-3:-2])
281
 
282
- if len(consonant1) > 0 and len(consonant2) > 0:
283
- # Same consonant gets highest score
284
- if consonant1[0] == consonant2[0]:
285
- near_rhyme_score = max(near_rhyme_score, 0.9)
286
- # Similar consonants (e.g., 'N' and 'M' are both nasals)
287
- elif self._consonants_are_similar(consonant1[0], consonant2[0]):
288
- near_rhyme_score = max(near_rhyme_score, 0.8)
 
 
 
289
 
290
  # Check for -y endings (like happy/sappy)
291
  elif (self._strip_stress(phone_list1[-1:]) == ['IY'] and
292
  self._strip_stress(phone_list2[-1:]) == ['IY']):
293
  near_rhyme_score = 0.7
294
 
295
- # Length and stress similarity (10% of score)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
296
  phone_diff = abs(len(phone_list1) - len(phone_list2))
297
  max_phones = max(len(phone_list1), len(phone_list2))
298
  length_score = 1.0 if phone_diff == 0 else 1.0 - (phone_diff / max_phones)
@@ -303,39 +356,33 @@ class ParodyWordSuggestionTool(Tool):
303
  stress2 = pronouncing.stresses(phones2)
304
  stress_score = 1.0 if stress1 == stress2 else 0.5
305
 
306
- # Front consonant match (10% of score)
307
- front_score = front_consonant_score * 0.1
308
 
309
  # Weighted combination
310
  similarity = (
311
- (rhyme_score * 0.45) + # End rhyme (45%)
312
- (primary_vowel_score * 0.2) + # Primary vowel (20%)
313
- (near_rhyme_score * 0.15) + # Near rhyme features (15%)
314
- (length_score * 0.05) + # Length similarity (5%)
315
- (stress_score * 0.05) + # Stress pattern (5%)
316
- (front_score) # Front consonants (10%)
 
317
  )
318
 
319
- # IMPORTANT: Special case for words like "running"/"cumming"
320
- # These should match well for parody purposes
321
- if (word1.endswith('ing') and word2.endswith('ing') and
322
- front_consonant_score < 0.5 and # Different initial consonants
323
- near_rhyme_score >= 0.8): # Good near-rhyme pattern
324
- similarity = max(similarity, 0.8) # Ensure high enough score
325
-
326
- # IMPORTANT: Penalty for words that are too similar to be funny
327
- # For parody, slightly different words are better than almost identical words
328
- if word1 and word2:
329
- if word1[0] == word2[0] and rhyme_score > 0.9 and primary_vowel_score > 0.9:
330
- # Words starting with same letter and almost perfect rhyme
331
- # are less funny for parody
332
- similarity *= 0.9
333
-
334
- # Special case: Words need to be somewhat different to be funny in parody
335
- if len(word1) > 3 and len(word2) > 3:
336
- # Give boost to words with same length but different consonants
337
- if len(word1) == len(word2) and front_consonant_score < 0.5 and rhyme_score > 0.8:
338
- similarity = max(similarity, 0.75) # Good for parody
339
 
340
  # Cap at 1.0
341
  similarity = min(1.0, similarity)
@@ -345,6 +392,7 @@ class ParodyWordSuggestionTool(Tool):
345
  "rhyme_score": round(rhyme_score, 3),
346
  "primary_vowel_score": round(primary_vowel_score, 3),
347
  "near_rhyme_score": round(near_rhyme_score, 3),
 
348
  "length_score": round(length_score, 3),
349
  "stress_score": round(stress_score, 3),
350
  "front_consonant_score": round(front_consonant_score, 3),
@@ -427,6 +475,7 @@ class ParodyWordSuggestionTool(Tool):
427
  "rhyme_score": similarity_result["rhyme_score"],
428
  "primary_vowel_score": similarity_result["primary_vowel_score"],
429
  "near_rhyme_score": similarity_result["near_rhyme_score"],
 
430
  "length_score": similarity_result["length_score"],
431
  "stress_score": similarity_result["stress_score"],
432
  "front_consonant_score": similarity_result["front_consonant_score"],
 
1
  from smolagents.tools import Tool
 
2
  import string
3
+ import pronouncing
4
  import json
5
 
6
  class ParodyWordSuggestionTool(Tool):
 
14
  def _get_vowel_groups(self):
15
  groups = []
16
  group_strs = self.VOWEL_REF.split("|")
17
+ group_str = ""
18
  for group_str in group_strs:
19
  groups.append(group_str.split(","))
20
  return groups
 
40
  i = 0
41
  phone = ""
42
  base_phone = ""
 
43
  group = []
44
+ vowel_char = ""
45
 
46
  # First, find the primary stressed vowel if it exists
47
  for i, phone in enumerate(phones):
48
  # Check for primary stress (1)
49
+ if '1' in phone:
50
+ # Check if it's a vowel
51
  base_phone = phone.rstrip('012')
52
+ for vowel_char in 'AEIOU':
53
+ if vowel_char in base_phone:
54
+ last_vowel_idx = i
55
+ last_vowel = base_phone
56
+ break
57
+ if last_vowel is not None:
58
+ break
59
 
60
  # If no primary stress, just use the last vowel
61
  if last_vowel_idx == -1:
62
  for i, phone in enumerate(phones):
63
  base_phone = phone.rstrip('012')
64
+ for vowel_char in 'AEIOU':
65
+ if vowel_char in base_phone:
66
+ last_vowel_idx = i
67
+ last_vowel = base_phone
68
 
69
  if last_vowel_idx == -1:
70
  return None, []
 
128
 
129
  def _words_have_similar_structure(self, word1, word2, phones1, phones2):
130
  """Check if words have similar structure beyond just ending."""
 
 
131
  # Similar word length
132
  if abs(len(word1) - len(word2)) > 2:
133
  return False
 
145
  phone_list1 = phones1.split()
146
  phone_list2 = phones2.split()
147
 
148
+ # Initialize variables for list comprehension
149
+ p = ""
150
+ v = ""
151
+
152
  # Get consonants
153
  consonants1 = [p for p in self._strip_stress(phone_list1) if not any(v in p for v in 'AEIOU')]
154
  consonants2 = [p for p in self._strip_stress(phone_list2) if not any(v in p for v in 'AEIOU')]
 
184
  common_length = 0
185
  matched = 0
186
  i = 0
 
 
187
 
188
  # Variables for whole-word matching
189
  primary_stress_vowel1 = None
 
193
  front_consonants1 = []
194
  front_consonants2 = []
195
 
196
+ # Variables for special pattern matching
197
+ special_pattern_score = 0.0
198
+ stem1 = ""
199
+ stem2 = ""
200
+ consonant1 = ""
201
+ consonant2 = ""
202
+ nasals = ['m', 'n']
203
+ stops = ['p', 'b', 't', 'd', 'k', 'g']
204
+ fricatives = ['f', 'v', 'th', 's', 'z', 'sh']
205
+ base1 = ""
206
+ base2 = ""
207
+
208
+ # Variables for list comprehensions
209
+ p = ""
210
+ v = ""
211
+ group = []
212
+
213
  # Find primary stressed vowels
214
  for i, phone in enumerate(phone_list1):
215
  if '1' in phone and any(v in phone for v in 'AEIOU'):
 
249
  word_vowel, word_end = result1
250
  target_vowel, target_end = result2
251
 
252
+ # Perfect rhyme check (40% of score)
253
  if word_vowel and target_vowel:
254
  if self._vowels_match(word_vowel, target_vowel):
255
  word_end_clean = self._strip_stress(word_end)
 
269
  else:
270
  rhyme_score = 0.6 # Still somewhat rhymes even without ending consonants
271
 
272
+ # Primary stressed vowel match (15% of score)
273
  primary_vowel_score = 0.0
274
  if primary_stress_vowel1 and primary_stress_vowel2:
275
  if primary_stress_vowel1 == primary_stress_vowel2:
 
284
  # Near rhyme check - 15% of score
285
  near_rhyme_score = 0.0
286
 
287
+ # Check for specific endings
288
  if len(phone_list1) >= 2 and len(phone_list2) >= 2:
289
  # Check for -ing endings
290
  if (self._strip_stress(phone_list1[-2:]) == ['IH', 'NG'] and
291
  self._strip_stress(phone_list2[-2:]) == ['IH', 'NG']):
292
 
293
+ # Base score for -ing endings
294
+ near_rhyme_score = 0.6
 
 
 
 
 
295
 
296
+ # Additional checks for consonant before -ing
297
  if len(phone_list1) >= 3 and len(phone_list2) >= 3:
298
+ consonant1_list = self._strip_stress(phone_list1[-3:-2])
299
+ consonant2_list = self._strip_stress(phone_list2[-3:-2])
300
 
301
+ if consonant1_list and consonant2_list:
302
+ consonant1 = consonant1_list[0]
303
+ consonant2 = consonant2_list[0]
304
+
305
+ # Same consonant gets highest score (like running/gunning)
306
+ if consonant1 == consonant2:
307
+ near_rhyme_score = 0.9
308
+ # Similar consonants (nasal: 'N'/'M') get high score (running/cumming)
309
+ elif self._consonants_are_similar(consonant1, consonant2):
310
+ near_rhyme_score = 0.8
311
 
312
  # Check for -y endings (like happy/sappy)
313
  elif (self._strip_stress(phone_list1[-1:]) == ['IY'] and
314
  self._strip_stress(phone_list2[-1:]) == ['IY']):
315
  near_rhyme_score = 0.7
316
 
317
+ # Special pattern matching for running/cumming type pairs (15% of score)
318
+ if word1.endswith('ing') and word2.endswith('ing'):
319
+ # Get the stem (without -ing)
320
+ stem1 = word1[:-3]
321
+ stem2 = word2[:-3]
322
+
323
+ # Same stem length is good for parody
324
+ if len(stem1) == len(stem2):
325
+ special_pattern_score += 0.4
326
+
327
+ # If both stems end with same consonant (like 'n' in run-ning, 'm' in cum-ming)
328
+ # this makes them rhyme much better
329
+ if stem1 and stem2 and stem1[-1] == stem2[-1]:
330
+ special_pattern_score += 0.3
331
+ elif stem1 and stem2:
332
+ # Check if the final consonants are in the same phonetic group
333
+ # This helps pair words like running/humming (nasal consonants)
334
+ consonant1 = stem1[-1]
335
+ consonant2 = stem2[-1]
336
+
337
+ # Check if they're in the same group
338
+ if (consonant1 in nasals and consonant2 in nasals) or \
339
+ (consonant1 in stops and consonant2 in stops) or \
340
+ (consonant1 in fricatives and consonant2 in fricatives):
341
+ special_pattern_score += 0.2
342
+
343
+ # Check for double consonants (like nn in running, mm in cumming)
344
+ if len(stem1) >= 2 and stem1[-1] == stem1[-2] and \
345
+ len(stem2) >= 2 and stem2[-1] == stem2[-2]:
346
+ special_pattern_score += 0.3
347
+
348
+ # Length and stress similarity (5% each)
349
  phone_diff = abs(len(phone_list1) - len(phone_list2))
350
  max_phones = max(len(phone_list1), len(phone_list2))
351
  length_score = 1.0 if phone_diff == 0 else 1.0 - (phone_diff / max_phones)
 
356
  stress2 = pronouncing.stresses(phones2)
357
  stress_score = 1.0 if stress1 == stress2 else 0.5
358
 
359
+ # Front consonant match (5% of score)
360
+ front_score = front_consonant_score * 0.05
361
 
362
  # Weighted combination
363
  similarity = (
364
+ (rhyme_score * 0.40) + # End rhyme (40%)
365
+ (primary_vowel_score * 0.15) + # Primary vowel (15%)
366
+ (near_rhyme_score * 0.15) + # Near rhyme features (15%)
367
+ (special_pattern_score * 0.15) + # Special pattern match (15%)
368
+ (length_score * 0.05) + # Length similarity (5%)
369
+ (stress_score * 0.05) + # Stress pattern (5%)
370
+ (front_score) # Front consonants (5%)
371
  )
372
 
373
+ # Additional boost for specific word patterns that make great parody matches
374
+ # This specifically addresses running/cumming type pairs
375
+ if word1.endswith('ing') and word2.endswith('ing'):
376
+ base1 = word1[:-3]
377
+ base2 = word2[:-3]
378
+
379
+ # Specific pattern for words like running/cunning/cumming
380
+ if (len(base1) == 3 and len(base2) == 3 and
381
+ base1[0] != base2[0] and # Different first consonant (good for parody)
382
+ len(base1) >= 2 and len(base2) >= 2 and
383
+ base1[-1] == base1[-2] and # Double consonant in first word (nn in running)
384
+ base2[-1] == base2[-2]): # Double consonant in second word (mm in cumming)
385
+ similarity = max(similarity, 0.9) # These are excellent parody matches
 
 
 
 
 
 
 
386
 
387
  # Cap at 1.0
388
  similarity = min(1.0, similarity)
 
392
  "rhyme_score": round(rhyme_score, 3),
393
  "primary_vowel_score": round(primary_vowel_score, 3),
394
  "near_rhyme_score": round(near_rhyme_score, 3),
395
+ "special_pattern_score": round(special_pattern_score, 3),
396
  "length_score": round(length_score, 3),
397
  "stress_score": round(stress_score, 3),
398
  "front_consonant_score": round(front_consonant_score, 3),
 
475
  "rhyme_score": similarity_result["rhyme_score"],
476
  "primary_vowel_score": similarity_result["primary_vowel_score"],
477
  "near_rhyme_score": similarity_result["near_rhyme_score"],
478
+ "special_pattern_score": similarity_result.get("special_pattern_score", 0),
479
  "length_score": similarity_result["length_score"],
480
  "stress_score": similarity_result["stress_score"],
481
  "front_consonant_score": similarity_result["front_consonant_score"],