patruff commited on
Commit
6e8a124
·
verified ·
1 Parent(s): fb0f810

Upload tool

Browse files
Files changed (2) hide show
  1. requirements.txt +1 -1
  2. tool.py +105 -130
requirements.txt CHANGED
@@ -1,2 +1,2 @@
1
- pronouncing
2
  smolagents
 
 
 
1
  smolagents
2
+ pronouncing
tool.py CHANGED
@@ -1,13 +1,14 @@
1
  from smolagents.tools import Tool
2
- import string
3
  import pronouncing
4
  import json
 
 
5
 
6
  class ParodyWordSuggestionTool(Tool):
7
  name = "parody_word_suggester"
8
- description = """Suggests rhyming funny words using CMU dictionary and custom pronunciations.
9
  Returns similar-sounding words that rhyme, especially focusing on common vowel sounds."""
10
- inputs = {'target': {'type': 'string', 'description': 'The word you want to find rhyming alternatives for'}, 'word_list_str': {'type': 'string', 'description': 'JSON string of word list (e.g. \'["word1", "word2"]\')'}, 'min_similarity': {'type': 'string', 'description': 'Minimum similarity threshold (0.0-1.0)', 'default': '0.5', 'nullable': True}, 'custom_phones': {'type': 'object', 'description': 'Optional dictionary of custom word pronunciations', 'default': None, 'nullable': True}}
11
  output_type = "string"
12
  VOWEL_REF = "AH,AX|UH|AE,EH|IY,IH|AO,AA|UW|AY,EY|OW,AO|AW,AO|OY,OW|ER,AXR"
13
  CONSONANT_REF = "M,N,NG|P,B|T,D|K,G|F,V|TH,DH|S,Z|SH,ZH|L,R|W,Y"
@@ -20,14 +21,12 @@ class ParodyWordSuggestionTool(Tool):
20
  return groups
21
 
22
 
23
- def _get_word_phones(self, word, custom_phones=None):
24
- """Get phones for a word, checking custom dictionary first."""
25
- if custom_phones and word in custom_phones:
26
- return custom_phones[word]["primary_phones"]
27
-
28
- import pronouncing
29
- phones = pronouncing.phones_for_word(word)
30
- return phones[0] if phones else None
31
 
32
 
33
  def _get_last_syllable(self, phones: list) -> tuple:
@@ -72,132 +71,109 @@ class ParodyWordSuggestionTool(Tool):
72
  return False
73
 
74
 
75
- def _strip_common_suffix(self, phones: list) -> tuple:
76
- """Strip common suffixes and return base and suffix phones."""
77
- # Initialize variables
78
- suffix_name = ""
79
- suffix_phones = []
80
- phone1 = ""
81
- phone2 = ""
82
-
83
- # Common suffix patterns in CMU phonetic representation
84
- SUFFIXES = {
85
- 'ING': ['IH0', 'NG'], # -ing
86
- 'ED': ['EH0', 'D'], # -ed
87
- 'ER': ['ER0'], # -er
88
- 'EST': ['EH0', 'S', 'T'], # -est
89
- 'LY': ['L', 'IY0'], # -ly
90
- 'NESS': ['N', 'EH0', 'S'], # -ness
91
- }
92
 
93
- for suffix_name, suffix_phones in SUFFIXES.items():
94
- if len(phones) > len(suffix_phones):
95
- if all(phone1.rstrip('012') == phone2.rstrip('012')
96
- for phone1, phone2 in zip(phones[-len(suffix_phones):], suffix_phones)):
97
- return phones[:-len(suffix_phones)], suffix_phones
 
 
 
 
 
 
 
 
98
 
99
- return phones, []
100
-
101
-
102
- def _get_consonant_groups(self):
103
- """Get consonant groups from reference string."""
104
- groups = []
105
- group_strs = self.CONSONANT_REF.split("|")
106
- for group_str in group_strs:
107
- groups.append(group_str.split(","))
108
- return groups
109
-
110
-
111
- def _consonants_similarity(self, c1: str, c2: str) -> float:
112
- """Calculate similarity score between two consonants."""
113
- if c1 == c2:
114
- return 1.0
115
-
116
- # Check if they're in the same group
117
- consonant_groups = self._get_consonant_groups()
118
- for group in consonant_groups:
119
- if c1 in group and c2 in group:
120
- # Nasals (first group) are more similar to each other
121
- if group == consonant_groups[0]: # M,N,NG group
122
- return 0.8
123
- return 0.5
124
-
125
- return 0.0
126
-
127
-
128
- def _calculate_similarity(self, word1, phones1, word2, phones2):
129
- """Calculate similarity score with enhanced consonant matching."""
130
- # Initialize variables as before
131
  phone_list1 = phones1.split()
132
  phone_list2 = phones2.split()
133
 
134
- # Get stressed vowels and their positions
135
- vowel_idx1 = -1
136
- vowel_idx2 = -1
137
- primary_vowel1 = None
138
- primary_vowel2 = None
139
 
140
- for i, phone in enumerate(phone_list1):
141
- if '1' in phone: # Primary stress
142
- vowel_idx1 = i
143
- primary_vowel1 = phone.rstrip('012')
144
- break
 
 
 
145
 
146
- for i, phone in enumerate(phone_list2):
147
- if '1' in phone:
148
- vowel_idx2 = i
149
- primary_vowel2 = phone.rstrip('012')
150
- break
151
-
152
- # Calculate vowel similarity (50% of total score)
153
- vowel_score = 0.0
154
- if primary_vowel1 and primary_vowel2:
155
- if primary_vowel1 == primary_vowel2:
156
- vowel_score = 1.0
157
- elif self._vowels_match(primary_vowel1, primary_vowel2):
158
- vowel_score = 0.8
159
-
160
- # Calculate consonant similarity (30% of total score)
161
- consonant_score = 0.0
162
- if vowel_idx1 >= 0 and vowel_idx2 >= 0:
163
- # Compare consonants around the stressed vowel
164
- pre_c1 = phone_list1[vowel_idx1-1] if vowel_idx1 > 0 else None
165
- pre_c2 = phone_list2[vowel_idx2-1] if vowel_idx2 > 0 else None
166
- post_c1 = phone_list1[vowel_idx1+1] if vowel_idx1 < len(phone_list1)-1 else None
167
- post_c2 = phone_list2[vowel_idx2+1] if vowel_idx2 < len(phone_list2)-1 else None
168
-
169
- if pre_c1 and pre_c2:
170
- consonant_score += self._consonants_similarity(pre_c1, pre_c2)
171
- if post_c1 and post_c2:
172
- consonant_score += self._consonants_similarity(post_c1, post_c2)
173
 
174
- consonant_score = consonant_score / 2 # Normalize to 0-1
175
-
176
- # Pattern/length similarity (20% of total score)
177
- pattern_score = 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
  if len(phone_list1) == len(phone_list2):
179
  pattern_score = 1.0
180
  else:
181
  pattern_score = 1.0 - (abs(len(phone_list1) - len(phone_list2)) / max(len(phone_list1), len(phone_list2)))
182
-
183
- # Combined weighted score
184
  similarity = (
185
- (vowel_score * 0.5) + # Vowel similarity most important
186
- (consonant_score * 0.3) + # Consonant similarity next
187
- (pattern_score * 0.2) # Pattern/length least important
188
  )
189
-
 
 
 
 
 
 
 
 
190
  return {
191
  "similarity": round(similarity, 3),
192
- "vowel_score": round(vowel_score, 3),
193
- "consonant_score": round(consonant_score, 3),
194
  "pattern_score": round(pattern_score, 3),
195
- "primary_vowels": f"{primary_vowel1}-{primary_vowel2}",
196
- "consonants": "similar" if consonant_score > 0.5 else "different"
 
 
 
 
197
  }
198
 
199
 
200
- def forward(self, target: str, word_list_str: str, min_similarity: str = "0.5", custom_phones: dict = None) -> str:
201
  import pronouncing
202
  import string
203
  import json
@@ -230,38 +206,39 @@ class ParodyWordSuggestionTool(Tool):
230
  }, indent=2)
231
 
232
  # Get target pronunciation
233
- target_phones = self._get_word_phones(target, custom_phones)
234
  if not target_phones:
235
  return json.dumps({
236
- "error": f"Target word '{target}' not found in dictionary or custom phones",
237
  "suggestions": []
238
  }, indent=2)
239
 
240
- # Filter word list
241
  valid_words = []
242
  invalid_words = []
243
  for word in words:
244
  word = word.lower().strip(string.punctuation)
245
- if self._get_word_phones(word, custom_phones):
246
  valid_words.append(word)
247
  else:
248
  invalid_words.append(word)
249
 
250
  if not valid_words:
251
  return json.dumps({
252
- "error": "No valid words found in dictionary or custom phones",
253
  "invalid_words": invalid_words,
254
  "suggestions": []
255
  }, indent=2)
256
 
 
257
  target_phone_list = target_phones.split()
258
  target_vowel, target_end = self._get_last_syllable(target_phone_list)
259
 
260
- # Check each word
261
  # Check each word
262
  for word in valid_words:
263
- word_phones = self._get_word_phones(word, custom_phones)
264
- if word_phones:
 
265
  similarity_result = self._calculate_similarity(word, word_phones, target, target_phones)
266
 
267
  if similarity_result["similarity"] >= min_similarity:
@@ -271,15 +248,13 @@ class ParodyWordSuggestionTool(Tool):
271
  suggestions.append({
272
  "word": word,
273
  "similarity": similarity_result["similarity"],
274
- "vowel_score": similarity_result["vowel_score"],
275
- "consonant_score": similarity_result["consonant_score"],
276
  "pattern_score": similarity_result["pattern_score"],
277
- "primary_vowels": similarity_result["primary_vowels"],
278
- "consonants": similarity_result["consonants"],
279
  "phones": word_phones,
280
  "last_vowel": word_vowel,
281
  "ending": " ".join(word_end) if word_end else "",
282
- "is_custom": word in custom_phones if custom_phones else False
283
  })
284
 
285
  # Sort by similarity score descending
 
1
  from smolagents.tools import Tool
 
2
  import pronouncing
3
  import json
4
+ import string
5
+ import difflib
6
 
7
  class ParodyWordSuggestionTool(Tool):
8
  name = "parody_word_suggester"
9
+ description = """Suggests rhyming funny words using CMU dictionary pronunciations.
10
  Returns similar-sounding words that rhyme, especially focusing on common vowel sounds."""
11
+ inputs = {'target': {'type': 'string', 'description': 'The word you want to find rhyming alternatives for'}, 'word_list_str': {'type': 'string', 'description': 'JSON string of word list (e.g. \'["word1", "word2"]\')'}, 'min_similarity': {'type': 'string', 'description': 'Minimum similarity threshold (0.0-1.0)', 'nullable': True, 'default': '0.5'}}
12
  output_type = "string"
13
  VOWEL_REF = "AH,AX|UH|AE,EH|IY,IH|AO,AA|UW|AY,EY|OW,AO|AW,AO|OY,OW|ER,AXR"
14
  CONSONANT_REF = "M,N,NG|P,B|T,D|K,G|F,V|TH,DH|S,Z|SH,ZH|L,R|W,Y"
 
21
  return groups
22
 
23
 
24
+ def _get_consonant_groups(self):
25
+ groups = []
26
+ group_strs = self.CONSONANT_REF.split("|")
27
+ for group_str in group_strs:
28
+ groups.append(group_str.split(","))
29
+ return groups
 
 
30
 
31
 
32
  def _get_last_syllable(self, phones: list) -> tuple:
 
71
  return False
72
 
73
 
74
+ def _calculate_similarity(self, word1, phones1, word2, phones2):
75
+ """Calculate similarity with heavy emphasis on rhyming."""
76
+ from difflib import SequenceMatcher
77
+ import pronouncing
 
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
+ # Initialize all variables
80
+ rhyme_score = 0.0
81
+ string_score = 0.0
82
+ pattern_score = 0.0
83
+ phone_list1 = []
84
+ phone_list2 = []
85
+ vowel1 = None
86
+ vowel2 = None
87
+ end1 = []
88
+ end2 = []
89
+ end1_clean = []
90
+ end2_clean = []
91
+ matching_consonants = 0
92
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  phone_list1 = phones1.split()
94
  phone_list2 = phones2.split()
95
 
96
+ # Get last syllables
97
+ vowel1, end1 = self._get_last_syllable(phone_list1)
98
+ vowel2, end2 = self._get_last_syllable(phone_list2)
 
 
99
 
100
+ # Calculate rhyme score (60%)
101
+ if vowel1 and vowel2:
102
+ # Perfect vowel match
103
+ if vowel1.rstrip('012') == vowel2.rstrip('012'):
104
+ rhyme_score = 1.0
105
+ # Similar vowel match
106
+ elif self._vowels_match(vowel1, vowel2):
107
+ rhyme_score = 0.8
108
 
109
+ # Check endings
110
+ if end1 and end2:
111
+ end1_clean = self._strip_stress(end1)
112
+ end2_clean = self._strip_stress(end2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
 
114
+ # Perfect ending match
115
+ if end1_clean == end2_clean:
116
+ rhyme_score = min(1.0, rhyme_score + 0.2)
117
+ # Partial ending match
118
+ else:
119
+ consonant_groups = self._get_consonant_groups()
120
+ matching_consonants = 0
121
+ for c1, c2 in zip(end1_clean, end2_clean):
122
+ if c1 == c2:
123
+ matching_consonants += 1
124
+ else:
125
+ # Check if consonants are in same group
126
+ for group in consonant_groups:
127
+ if c1 in group and c2 in group:
128
+ matching_consonants += 0.5
129
+ break
130
+
131
+ if matching_consonants > 0:
132
+ rhyme_score = min(1.0, rhyme_score + (0.1 * matching_consonants))
133
+
134
+ # String similarity (25%)
135
+ if len(word1) > 1 and len(word2) > 1:
136
+ end_similarity = SequenceMatcher(None, word1[1:], word2[1:]).ratio()
137
+ string_score = end_similarity
138
+ else:
139
+ string_score = SequenceMatcher(None, word1, word2).ratio()
140
+
141
+ # Pattern/Length score (15%)
142
  if len(phone_list1) == len(phone_list2):
143
  pattern_score = 1.0
144
  else:
145
  pattern_score = 1.0 - (abs(len(phone_list1) - len(phone_list2)) / max(len(phone_list1), len(phone_list2)))
146
+
147
+ # Final weighted score
148
  similarity = (
149
+ (rhyme_score * 0.60) +
150
+ (string_score * 0.25) +
151
+ (pattern_score * 0.15)
152
  )
153
+
154
+ # Extra boost for exact matches minus first letter
155
+ if len(word1) == len(word2) and word1[1:] == word2[1:]:
156
+ similarity = min(1.0, similarity * 1.2)
157
+
158
+ # Extra penalty for very different lengths
159
+ if abs(len(word1) - len(word2)) > 2:
160
+ similarity *= 0.7
161
+
162
  return {
163
  "similarity": round(similarity, 3),
164
+ "rhyme_score": round(rhyme_score, 3),
165
+ "string_score": round(string_score, 3),
166
  "pattern_score": round(pattern_score, 3),
167
+ "details": {
168
+ "last_vowel_match": vowel1.rstrip('012') == vowel2.rstrip('012') if vowel1 and vowel2 else False,
169
+ "similar_vowels": self._vowels_match(vowel1, vowel2) if vowel1 and vowel2 else False,
170
+ "ending_match": " ".join(end1_clean) == " ".join(end2_clean) if end1 and end2 else False,
171
+ "string_length_diff": abs(len(word1) - len(word2))
172
+ }
173
  }
174
 
175
 
176
+ def forward(self, target: str, word_list_str: str, min_similarity: str = "0.5") -> str:
177
  import pronouncing
178
  import string
179
  import json
 
206
  }, indent=2)
207
 
208
  # Get target pronunciation
209
+ target_phones = pronouncing.phones_for_word(target)
210
  if not target_phones:
211
  return json.dumps({
212
+ "error": f"Target word '{target}' not found in CMU dictionary",
213
  "suggestions": []
214
  }, indent=2)
215
 
216
+ # Filter word list to only words in CMU dictionary
217
  valid_words = []
218
  invalid_words = []
219
  for word in words:
220
  word = word.lower().strip(string.punctuation)
221
+ if pronouncing.phones_for_word(word):
222
  valid_words.append(word)
223
  else:
224
  invalid_words.append(word)
225
 
226
  if not valid_words:
227
  return json.dumps({
228
+ "error": "No valid words found in CMU dictionary",
229
  "invalid_words": invalid_words,
230
  "suggestions": []
231
  }, indent=2)
232
 
233
+ target_phones = target_phones[0]
234
  target_phone_list = target_phones.split()
235
  target_vowel, target_end = self._get_last_syllable(target_phone_list)
236
 
 
237
  # Check each word
238
  for word in valid_words:
239
+ phones = pronouncing.phones_for_word(word)
240
+ if phones:
241
+ word_phones = phones[0]
242
  similarity_result = self._calculate_similarity(word, word_phones, target, target_phones)
243
 
244
  if similarity_result["similarity"] >= min_similarity:
 
248
  suggestions.append({
249
  "word": word,
250
  "similarity": similarity_result["similarity"],
251
+ "rhyme_score": similarity_result["rhyme_score"],
252
+ "string_score": similarity_result["string_score"],
253
  "pattern_score": similarity_result["pattern_score"],
 
 
254
  "phones": word_phones,
255
  "last_vowel": word_vowel,
256
  "ending": " ".join(word_end) if word_end else "",
257
+ "details": similarity_result["details"]
258
  })
259
 
260
  # Sort by similarity score descending