patruff commited on
Commit
3910298
·
verified ·
1 Parent(s): ae1df3c

Upload tool

Browse files
Files changed (1) hide show
  1. tool.py +32 -10
tool.py CHANGED
@@ -1,8 +1,8 @@
1
  from smolagents.tools import Tool
 
2
  import json
3
- import pronouncing
4
  import difflib
5
- import string
6
 
7
  class ParodyWordSuggestionTool(Tool):
8
  name = "parody_word_suggester"
@@ -98,7 +98,24 @@ class ParodyWordSuggestionTool(Tool):
98
  target_phones = pronouncing.phones_for_word(target)
99
  if not target_phones:
100
  return json.dumps({
101
- "error": f"'{target}' not found in CMU dictionary",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  "suggestions": []
103
  }, indent=2)
104
 
@@ -107,8 +124,7 @@ class ParodyWordSuggestionTool(Tool):
107
  target_vowel, target_end = self._get_last_syllable(target_phone_list)
108
 
109
  # Check each word
110
- for word in words:
111
- word = word.lower().strip(string.punctuation)
112
  phones = pronouncing.phones_for_word(word)
113
  if phones:
114
  word_phones = phones[0]
@@ -126,8 +142,8 @@ class ParodyWordSuggestionTool(Tool):
126
 
127
  if word_end_clean == target_end_clean:
128
  rhyme_score = 1.0
129
- # Extra boost for exact match
130
- if len(word) == len(target):
131
  rhyme_score = 1.2
132
  else:
133
  rhyme_score = 0.6
@@ -137,10 +153,15 @@ class ParodyWordSuggestionTool(Tool):
137
  word_syl = pronouncing.syllable_count(word_phones)
138
  syllable_score = 1.0 if target_syl == word_syl else 0.0
139
 
140
- # 3. Overall similarity (15%)
141
- string_similarity = SequenceMatcher(None, target, word).ratio()
 
 
 
 
 
142
 
143
- # Combined score with phonetic similarity bonus
144
  similarity = (rhyme_score * 0.6) + (syllable_score * 0.25) + (string_similarity * 0.15)
145
 
146
  if similarity >= min_similarity:
@@ -171,6 +192,7 @@ class ParodyWordSuggestionTool(Tool):
171
  "target_phones": target_phones,
172
  "target_last_vowel": target_vowel,
173
  "target_ending": " ".join(target_end) if target_end else "",
 
174
  "suggestions": suggestions
175
  }
176
 
 
1
  from smolagents.tools import Tool
2
+ import string
3
  import json
 
4
  import difflib
5
+ import pronouncing
6
 
7
  class ParodyWordSuggestionTool(Tool):
8
  name = "parody_word_suggester"
 
98
  target_phones = pronouncing.phones_for_word(target)
99
  if not target_phones:
100
  return json.dumps({
101
+ "error": f"Target word '{target}' not found in CMU dictionary",
102
+ "suggestions": []
103
+ }, indent=2)
104
+
105
+ # Filter word list to only words in CMU dictionary
106
+ valid_words = []
107
+ invalid_words = []
108
+ for word in words:
109
+ word = word.lower().strip(string.punctuation)
110
+ if pronouncing.phones_for_word(word):
111
+ valid_words.append(word)
112
+ else:
113
+ invalid_words.append(word)
114
+
115
+ if not valid_words:
116
+ return json.dumps({
117
+ "error": "No valid words found in CMU dictionary",
118
+ "invalid_words": invalid_words,
119
  "suggestions": []
120
  }, indent=2)
121
 
 
124
  target_vowel, target_end = self._get_last_syllable(target_phone_list)
125
 
126
  # Check each word
127
+ for word in valid_words:
 
128
  phones = pronouncing.phones_for_word(word)
129
  if phones:
130
  word_phones = phones[0]
 
142
 
143
  if word_end_clean == target_end_clean:
144
  rhyme_score = 1.0
145
+ # Extra boost for exact match minus first letter
146
+ if len(word) == len(target) and word[1:] == target[1:]:
147
  rhyme_score = 1.2
148
  else:
149
  rhyme_score = 0.6
 
153
  word_syl = pronouncing.syllable_count(word_phones)
154
  syllable_score = 1.0 if target_syl == word_syl else 0.0
155
 
156
+ # 3. String similarity (15%)
157
+ # Higher weight for end of word similarity
158
+ if len(word) > 1 and len(target) > 1:
159
+ end_similarity = SequenceMatcher(None, word[1:], target[1:]).ratio()
160
+ string_similarity = end_similarity
161
+ else:
162
+ string_similarity = SequenceMatcher(None, target, word).ratio()
163
 
164
+ # Combined score
165
  similarity = (rhyme_score * 0.6) + (syllable_score * 0.25) + (string_similarity * 0.15)
166
 
167
  if similarity >= min_similarity:
 
192
  "target_phones": target_phones,
193
  "target_last_vowel": target_vowel,
194
  "target_ending": " ".join(target_end) if target_end else "",
195
+ "invalid_words": invalid_words, # List of words not in CMU
196
  "suggestions": suggestions
197
  }
198