patruff commited on
Commit
4aa24c2
·
verified ·
1 Parent(s): f72b145

Upload tool

Browse files
Files changed (2) hide show
  1. requirements.txt +1 -1
  2. tool.py +88 -87
requirements.txt CHANGED
@@ -1,2 +1,2 @@
1
- smolagents
2
  pronouncing
 
 
 
1
  pronouncing
2
+ smolagents
tool.py CHANGED
@@ -1,7 +1,7 @@
1
  from smolagents.tools import Tool
2
- import string
3
- import pronouncing
4
  import json
 
 
5
 
6
  class ParodyWordSuggestionTool(Tool):
7
  name = "parody_word_suggester"
@@ -10,6 +10,7 @@ class ParodyWordSuggestionTool(Tool):
10
  inputs = {'target': {'type': 'string', 'description': 'The word you want to find rhyming alternatives for'}, 'word_list_str': {'type': 'string', 'description': 'JSON string of word list (e.g. \'["word1", "word2"]\')'}, 'min_similarity': {'type': 'string', 'description': 'Minimum similarity threshold (0.0-1.0)', 'default': '0.5', 'nullable': True}, 'custom_phones': {'type': 'object', 'description': 'Optional dictionary of custom word pronunciations', 'default': None, 'nullable': True}}
11
  output_type = "string"
12
  VOWEL_REF = "AH,AX|UH|AE,EH|IY,IH|AO,AA|UW|AY,EY|OW,AO|AW,AO|OY,OW|ER,AXR"
 
13
 
14
  def _get_vowel_groups(self):
15
  groups = []
@@ -98,101 +99,101 @@ class ParodyWordSuggestionTool(Tool):
98
  return phones, []
99
 
100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  def _calculate_similarity(self, word1, phones1, word2, phones2):
102
- """Calculate similarity score using improved metrics and suffix handling."""
103
- # Initialize all variables first
104
- phone_list1 = []
105
- phone_list2 = []
106
- base1 = []
107
- base2 = []
108
- suffix1 = []
109
- suffix2 = []
110
- word_vowel = None
111
- word_end = []
112
- target_vowel = None
113
- target_end = []
114
- base_length_diff = 0
115
- max_base_length = 0
116
- length_score = 0.0
117
- rhyme_score = 0.0
118
- stress_score = 0.0
119
- suffix_score = 0.0
120
- word_end_clean = []
121
- target_end_clean = []
122
- common_length = 0
123
- matched = 0
124
- stress1 = ""
125
- stress2 = ""
126
- similarity = 0.0
127
- result1 = (None, [])
128
- result2 = (None, [])
129
-
130
- # Main logic
131
  phone_list1 = phones1.split()
132
  phone_list2 = phones2.split()
133
 
134
- # Strip common suffixes first
135
- base1, suffix1 = self._strip_common_suffix(phone_list1)
136
- base2, suffix2 = self._strip_common_suffix(phone_list2)
137
-
138
- # Calculate base word similarity
139
- base_length_diff = abs(len(base1) - len(base2))
140
- max_base_length = max(len(base1), len(base2))
141
- length_score = 1.0 if base_length_diff == 0 else 1.0 - (base_length_diff / max_base_length)
142
 
143
- # Get last syllable components of base words
144
- result1 = self._get_last_syllable(base1)
145
- result2 = self._get_last_syllable(base2)
146
- word_vowel, word_end = result1
147
- target_vowel, target_end = result2
148
-
149
- # Calculate rhyme score
150
- rhyme_score = 0.0
151
- if word_vowel and target_vowel:
152
- if self._vowels_match(word_vowel, target_vowel):
153
- word_end_clean = self._strip_stress(word_end)
154
- target_end_clean = self._strip_stress(target_end)
155
 
156
- if word_end_clean == target_end_clean:
157
- if word_vowel.rstrip('012') == target_vowel.rstrip('012'):
158
- rhyme_score = 1.0
159
- else:
160
- rhyme_score = 0.7 # Penalize different vowels in same group
161
- else:
162
- common_length = min(len(word_end_clean), len(target_end_clean))
163
- matched = 0
164
- for i in range(common_length):
165
- if word_end_clean[i] == target_end_clean[i]:
166
- matched += 1
167
- rhyme_score = 0.3 * (matched / max(len(word_end_clean), len(target_end_clean)))
168
-
169
- # Calculate stress pattern similarity using base words
170
- import pronouncing
171
- stress1 = pronouncing.stresses(' '.join(base1))
172
- stress2 = pronouncing.stresses(' '.join(base2))
173
- stress_score = 1.0 if stress1 == stress2 else 0.3
174
-
175
- # Add suffix matching bonus
176
- suffix_score = 1.0 if suffix1 == suffix2 else 0.0
177
-
178
- # Weighted combination with emphasis on base word similarity
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
  similarity = (
180
- (rhyme_score * 0.6) + # Base word rhyme
181
- (length_score * 0.1) + # Base word length
182
- (stress_score * 0.2) + # Base word stress
183
- (suffix_score * 0.1) # Suffix match as small bonus
184
  )
185
-
186
- similarity = min(1.0, similarity)
187
-
188
  return {
189
  "similarity": round(similarity, 3),
190
- "rhyme_score": round(rhyme_score, 3),
191
- "length_score": round(length_score, 3),
192
- "stress_score": round(stress_score, 3),
193
- "base_word_diff": base_length_diff,
194
- "has_common_suffix": bool(suffix1 and suffix2),
195
- "suffix_match": suffix_score == 1.0
196
  }
197
 
198
 
 
1
  from smolagents.tools import Tool
 
 
2
  import json
3
+ import pronouncing
4
+ import string
5
 
6
  class ParodyWordSuggestionTool(Tool):
7
  name = "parody_word_suggester"
 
10
  inputs = {'target': {'type': 'string', 'description': 'The word you want to find rhyming alternatives for'}, 'word_list_str': {'type': 'string', 'description': 'JSON string of word list (e.g. \'["word1", "word2"]\')'}, 'min_similarity': {'type': 'string', 'description': 'Minimum similarity threshold (0.0-1.0)', 'default': '0.5', 'nullable': True}, 'custom_phones': {'type': 'object', 'description': 'Optional dictionary of custom word pronunciations', 'default': None, 'nullable': True}}
11
  output_type = "string"
12
  VOWEL_REF = "AH,AX|UH|AE,EH|IY,IH|AO,AA|UW|AY,EY|OW,AO|AW,AO|OY,OW|ER,AXR"
13
+ CONSONANT_REF = "M,N,NG|P,B|T,D|K,G|F,V|TH,DH|S,Z|SH,ZH|L,R|W,Y"
14
 
15
  def _get_vowel_groups(self):
16
  groups = []
 
99
  return phones, []
100
 
101
 
102
+ def _get_consonant_groups(self):
103
+ """Get consonant groups from reference string."""
104
+ groups = []
105
+ group_strs = self.CONSONANT_REF.split("|")
106
+ for group_str in group_strs:
107
+ groups.append(group_str.split(","))
108
+ return groups
109
+
110
+
111
+ def _consonants_similarity(self, c1: str, c2: str) -> float:
112
+ """Calculate similarity score between two consonants."""
113
+ if c1 == c2:
114
+ return 1.0
115
+
116
+ # Check if they're in the same group
117
+ consonant_groups = self._get_consonant_groups()
118
+ for group in consonant_groups:
119
+ if c1 in group and c2 in group:
120
+ # Nasals (first group) are more similar to each other
121
+ if group == consonant_groups[0]: # M,N,NG group
122
+ return 0.8
123
+ return 0.5
124
+
125
+ return 0.0
126
+
127
+
128
  def _calculate_similarity(self, word1, phones1, word2, phones2):
129
+ """Calculate similarity score with enhanced consonant matching."""
130
+ # Initialize variables as before
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  phone_list1 = phones1.split()
132
  phone_list2 = phones2.split()
133
 
134
+ # Get stressed vowels and their positions
135
+ vowel_idx1 = -1
136
+ vowel_idx2 = -1
137
+ primary_vowel1 = None
138
+ primary_vowel2 = None
 
 
 
139
 
140
+ for i, phone in enumerate(phone_list1):
141
+ if '1' in phone: # Primary stress
142
+ vowel_idx1 = i
143
+ primary_vowel1 = phone.rstrip('012')
144
+ break
 
 
 
 
 
 
 
145
 
146
+ for i, phone in enumerate(phone_list2):
147
+ if '1' in phone:
148
+ vowel_idx2 = i
149
+ primary_vowel2 = phone.rstrip('012')
150
+ break
151
+
152
+ # Calculate vowel similarity (50% of total score)
153
+ vowel_score = 0.0
154
+ if primary_vowel1 and primary_vowel2:
155
+ if primary_vowel1 == primary_vowel2:
156
+ vowel_score = 1.0
157
+ elif self._vowels_match(primary_vowel1, primary_vowel2):
158
+ vowel_score = 0.8
159
+
160
+ # Calculate consonant similarity (30% of total score)
161
+ consonant_score = 0.0
162
+ if vowel_idx1 >= 0 and vowel_idx2 >= 0:
163
+ # Compare consonants around the stressed vowel
164
+ pre_c1 = phone_list1[vowel_idx1-1] if vowel_idx1 > 0 else None
165
+ pre_c2 = phone_list2[vowel_idx2-1] if vowel_idx2 > 0 else None
166
+ post_c1 = phone_list1[vowel_idx1+1] if vowel_idx1 < len(phone_list1)-1 else None
167
+ post_c2 = phone_list2[vowel_idx2+1] if vowel_idx2 < len(phone_list2)-1 else None
168
+
169
+ if pre_c1 and pre_c2:
170
+ consonant_score += self._consonants_similarity(pre_c1, pre_c2)
171
+ if post_c1 and post_c2:
172
+ consonant_score += self._consonants_similarity(post_c1, post_c2)
173
+
174
+ consonant_score = consonant_score / 2 # Normalize to 0-1
175
+
176
+ # Pattern/length similarity (20% of total score)
177
+ pattern_score = 0.0
178
+ if len(phone_list1) == len(phone_list2):
179
+ pattern_score = 1.0
180
+ else:
181
+ pattern_score = 1.0 - (abs(len(phone_list1) - len(phone_list2)) / max(len(phone_list1), len(phone_list2)))
182
+
183
+ # Combined weighted score
184
  similarity = (
185
+ (vowel_score * 0.5) + # Vowel similarity most important
186
+ (consonant_score * 0.3) + # Consonant similarity next
187
+ (pattern_score * 0.2) # Pattern/length least important
 
188
  )
189
+
 
 
190
  return {
191
  "similarity": round(similarity, 3),
192
+ "vowel_score": round(vowel_score, 3),
193
+ "consonant_score": round(consonant_score, 3),
194
+ "pattern_score": round(pattern_score, 3),
195
+ "primary_vowels": f"{primary_vowel1}-{primary_vowel2}",
196
+ "consonants": "similar" if consonant_score > 0.5 else "different"
 
197
  }
198
 
199