patruff commited on
Commit
2898d37
·
verified ·
1 Parent(s): 7ebad8d

Upload tool

Browse files
Files changed (2) hide show
  1. requirements.txt +1 -1
  2. tool.py +41 -121
requirements.txt CHANGED
@@ -1,2 +1,2 @@
1
- pronouncing
2
  smolagents
 
 
 
1
  smolagents
2
+ pronouncing
tool.py CHANGED
@@ -1,27 +1,16 @@
1
  from smolagents.tools import Tool
2
- import string
3
  import pronouncing
4
  import json
 
5
 
6
  class ParodyWordSuggestionTool(Tool):
7
  name = "parody_word_suggester"
8
  description = "Suggests rhyming funny words using CMU dictionary pronunciations."
9
  inputs = {'target': {'type': 'string', 'description': 'The word you want to find rhyming alternatives for'}, 'word_list_str': {'type': 'string', 'description': 'JSON string of word list (e.g. \'["word1", "word2"]\')'}, 'min_similarity': {'type': 'string', 'description': 'Minimum similarity threshold (0.0-1.0)', 'nullable': True, 'default': '0.5'}, 'custom_phones': {'type': 'object', 'description': 'Optional dictionary of custom word pronunciations', 'nullable': True, 'default': None}}
10
  output_type = "string"
11
- RHYME_WEIGHT = 0.6
12
- PHONE_PATTERN_WEIGHT = 0.2
13
- CHAR_DIFF_WEIGHT = 0.1
14
- CONSONANT_WEIGHT = 0.1
15
- CONSONANT_REF = "M,N,NG|P,B|T,D|K,G|F,V|TH,DH|S,Z|SH,ZH|L,R|W,Y"
16
-
17
- def _get_consonant_groups(self):
18
- """Get consonant similarity groups."""
19
- groups = []
20
- group_strs = self.CONSONANT_REF.split("|")
21
- for group_str in group_strs:
22
- groups.append(group_str.split(","))
23
- return groups
24
-
25
 
26
  def _get_word_phones(self, word, custom_phones=None):
27
  """Get phones for a word, checking custom dictionary first."""
@@ -35,139 +24,76 @@ class ParodyWordSuggestionTool(Tool):
35
 
36
  def _get_primary_vowel(self, phones: list) -> str:
37
  """Get the primary stressed vowel from phone list."""
38
- vowel_chars = 'AEIOU' # Initialize the vowel characters set
39
- phone_str = "" # Initialize phone string
40
- vowel_char = ""
41
-
42
- for phone_str in phones:
43
- if '1' in phone_str and any(vowel_char in phone_str for vowel_char in vowel_chars):
44
- return phone_str.rstrip('012')
45
  return None
46
 
47
 
48
- def _calculate_char_difference(self, word1: str, word2: str) -> float:
49
- """Calculate character difference score."""
50
- if not word1 or not word2:
 
51
  return 0.0
52
 
53
- # Initialize variables
54
- changes = 0
55
- char1 = ""
56
- char2 = ""
57
 
58
- # Count character differences
59
- for char1, char2 in zip(word1, word2):
60
- if char1 != char2:
61
- changes += 1
62
-
63
- # Add difference for length mismatch
64
- changes += abs(len(word1) - len(word2))
65
-
66
- # Score based on changes (0 changes = 1.0, more changes = lower score)
67
- max_changes = max(len(word1), len(word2))
68
- return 1.0 - (changes / max_changes) if max_changes > 0 else 0.0
69
-
70
-
71
- def _calculate_consonant_similarity(self, phone_list1: list, phone_list2: list) -> float:
72
- """Calculate consonant similarity score."""
73
- # Initialize variables
74
- consonant_score = 0.0
75
- consonant_groups = self._get_consonant_groups()
76
- vowel_chars = 'AEIOU'
77
- phone_str = ""
78
- vowel_char = ""
79
- consonants1 = []
80
- consonants2 = []
81
  matches = 0
82
- comparisons = 0
83
- cons1 = ""
84
- cons2 = ""
85
- group = []
86
-
87
- # Get consonants (non-vowel phones)
88
- consonants1 = [phone_str for phone_str in phone_list1
89
- if not any(vowel_char in phone_str for vowel_char in vowel_chars)]
90
- consonants2 = [phone_str for phone_str in phone_list2
91
- if not any(vowel_char in phone_str for vowel_char in vowel_chars)]
92
 
93
- if not consonants1 or not consonants2:
94
- return 0.0
95
-
96
- # Compare each consonant
97
- matches = 0
98
- comparisons = min(len(consonants1), len(consonants2))
99
-
100
- for cons1, cons2 in zip(consonants1, consonants2):
101
- cons1 = cons1.rstrip('012')
102
- cons2 = cons2.rstrip('012')
103
-
104
- if cons1 == cons2:
105
  matches += 1
106
- continue
107
-
108
- # Check if in same group
109
- for group in consonant_groups:
110
- if cons1 in group and cons2 in group:
111
- matches += 0.5
112
- break
113
-
114
- return matches / comparisons if comparisons > 0 else 0.0
115
 
116
 
117
  def _calculate_similarity(self, word1, phones1, word2, phones2):
118
  """Calculate similarity based on multiple factors."""
119
- # Initialize scores
120
- rhyme_score = 0.0
121
- phone_score = 0.0
122
- char_diff_score = 0.0
123
- consonant_score = 0.0
124
-
125
  # Initialize phone lists
126
  phone_list1 = phones1.split()
127
  phone_list2 = phones2.split()
128
 
129
- # Initialize variables for details
130
- vowel1 = None
131
- vowel2 = None
132
-
133
- # 1. Rhyme score (60%) - based on primary vowel
134
  vowel1 = self._get_primary_vowel(phone_list1)
135
  vowel2 = self._get_primary_vowel(phone_list2)
136
- if vowel1 and vowel2 and vowel1 == vowel2:
137
- rhyme_score = 1.0
138
-
139
- # 2. Phone pattern score (20%) - based on number of phones
140
- if len(phone_list1) == len(phone_list2):
141
- phone_score = 1.0
142
- else:
143
- phone_score = 1.0 - (abs(len(phone_list1) - len(phone_list2)) / max(len(phone_list1), len(phone_list2)))
144
 
145
- # 3. Character difference score (10%)
146
- char_diff_score = self._calculate_char_difference(word1, word2)
147
 
148
- # 4. Consonant similarity score (10%)
149
- consonant_score = self._calculate_consonant_similarity(phone_list1, phone_list2)
150
 
151
  # Combined weighted score
152
  similarity = (
153
  (rhyme_score * self.RHYME_WEIGHT) +
154
- (phone_score * self.PHONE_PATTERN_WEIGHT) +
155
- (char_diff_score * self.CHAR_DIFF_WEIGHT) +
156
- (consonant_score * self.CONSONANT_WEIGHT)
157
  )
158
 
159
  return {
160
  "similarity": round(similarity, 3),
161
  "rhyme_score": round(rhyme_score, 3),
162
- "phone_score": round(phone_score, 3),
163
- "char_diff_score": round(char_diff_score, 3),
164
- "consonant_score": round(consonant_score, 3),
165
  "details": {
166
  "primary_vowel1": vowel1,
167
  "primary_vowel2": vowel2,
168
  "phone_count1": len(phone_list1),
169
  "phone_count2": len(phone_list2),
170
- "char_differences": abs(len(word1) - len(word2))
171
  }
172
  }
173
 
@@ -183,11 +109,6 @@ class ParodyWordSuggestionTool(Tool):
183
  suggestions = []
184
  valid_words = []
185
  invalid_words = []
186
- words = []
187
- target_phones = ""
188
- word_phones = ""
189
- word = ""
190
- similarity_result = {}
191
 
192
  # Parse JSON string to list
193
  try:
@@ -232,9 +153,8 @@ class ParodyWordSuggestionTool(Tool):
232
  "word": word,
233
  "similarity": similarity_result["similarity"],
234
  "rhyme_score": similarity_result["rhyme_score"],
235
- "phone_score": similarity_result["phone_score"],
236
- "char_diff_score": similarity_result["char_diff_score"],
237
- "consonant_score": similarity_result["consonant_score"],
238
  "phones": word_phones,
239
  "is_custom": word in custom_phones if custom_phones else False,
240
  "details": similarity_result["details"]
 
1
  from smolagents.tools import Tool
 
2
  import pronouncing
3
  import json
4
+ import string
5
 
6
  class ParodyWordSuggestionTool(Tool):
7
  name = "parody_word_suggester"
8
  description = "Suggests rhyming funny words using CMU dictionary pronunciations."
9
  inputs = {'target': {'type': 'string', 'description': 'The word you want to find rhyming alternatives for'}, 'word_list_str': {'type': 'string', 'description': 'JSON string of word list (e.g. \'["word1", "word2"]\')'}, 'min_similarity': {'type': 'string', 'description': 'Minimum similarity threshold (0.0-1.0)', 'nullable': True, 'default': '0.5'}, 'custom_phones': {'type': 'object', 'description': 'Optional dictionary of custom word pronunciations', 'nullable': True, 'default': None}}
10
  output_type = "string"
11
+ RHYME_WEIGHT = 0.5
12
+ PHONE_SEQUENCE_WEIGHT = 0.3
13
+ LENGTH_WEIGHT = 0.2
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  def _get_word_phones(self, word, custom_phones=None):
16
  """Get phones for a word, checking custom dictionary first."""
 
24
 
25
  def _get_primary_vowel(self, phones: list) -> str:
26
  """Get the primary stressed vowel from phone list."""
27
+ v = ""
28
+ for phone in phones:
29
+ if '1' in phone and any(v in phone for v in 'AEIOU'):
30
+ return phone.rstrip('012')
 
 
 
31
  return None
32
 
33
 
34
+ def _calculate_phone_sequence_similarity(self, phones1: list, phones2: list) -> float:
35
+ """Calculate similarity based on matching phones in sequence."""
36
+ p = ""
37
+ if not phones1 or not phones2:
38
  return 0.0
39
 
40
+ # Strip stress markers for comparison
41
+ clean_phones1 = [p.rstrip('012') for p in phones1]
42
+ clean_phones2 = [p.rstrip('012') for p in phones2]
 
43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  matches = 0
45
+ total_comparisons = max(len(clean_phones1), len(clean_phones2))
 
 
 
 
 
 
 
 
 
46
 
47
+ # Compare phones in sequence
48
+ for i in range(min(len(clean_phones1), len(clean_phones2))):
49
+ if clean_phones1[i] == clean_phones2[i]:
 
 
 
 
 
 
 
 
 
50
  matches += 1
51
+
52
+ return matches / total_comparisons if total_comparisons > 0 else 0.0
53
+
54
+
55
+ def _calculate_length_similarity(self, phones1: list, phones2: list) -> float:
56
+ """Calculate similarity based on phone length."""
57
+ max_length = max(len(phones1), len(phones2))
58
+ length_diff = abs(len(phones1) - len(phones2))
59
+ return 1.0 - (length_diff / max_length) if max_length > 0 else 0.0
60
 
61
 
62
  def _calculate_similarity(self, word1, phones1, word2, phones2):
63
  """Calculate similarity based on multiple factors."""
 
 
 
 
 
 
64
  # Initialize phone lists
65
  phone_list1 = phones1.split()
66
  phone_list2 = phones2.split()
67
 
68
+ # 1. Rhyme score (50%) - based on primary vowel
 
 
 
 
69
  vowel1 = self._get_primary_vowel(phone_list1)
70
  vowel2 = self._get_primary_vowel(phone_list2)
71
+ rhyme_score = 1.0 if vowel1 and vowel2 and vowel1 == vowel2 else 0.0
 
 
 
 
 
 
 
72
 
73
+ # 2. Phone sequence similarity (30%)
74
+ phone_sequence_score = self._calculate_phone_sequence_similarity(phone_list1, phone_list2)
75
 
76
+ # 3. Length similarity (20%)
77
+ length_score = self._calculate_length_similarity(phone_list1, phone_list2)
78
 
79
  # Combined weighted score
80
  similarity = (
81
  (rhyme_score * self.RHYME_WEIGHT) +
82
+ (phone_sequence_score * self.PHONE_SEQUENCE_WEIGHT) +
83
+ (length_score * self.LENGTH_WEIGHT)
 
84
  )
85
 
86
  return {
87
  "similarity": round(similarity, 3),
88
  "rhyme_score": round(rhyme_score, 3),
89
+ "phone_sequence_score": round(phone_sequence_score, 3),
90
+ "length_score": round(length_score, 3),
 
91
  "details": {
92
  "primary_vowel1": vowel1,
93
  "primary_vowel2": vowel2,
94
  "phone_count1": len(phone_list1),
95
  "phone_count2": len(phone_list2),
96
+ "matching_phones": round(phone_sequence_score * len(phone_list1))
97
  }
98
  }
99
 
 
109
  suggestions = []
110
  valid_words = []
111
  invalid_words = []
 
 
 
 
 
112
 
113
  # Parse JSON string to list
114
  try:
 
153
  "word": word,
154
  "similarity": similarity_result["similarity"],
155
  "rhyme_score": similarity_result["rhyme_score"],
156
+ "phone_sequence_score": similarity_result["phone_sequence_score"],
157
+ "length_score": similarity_result["length_score"],
 
158
  "phones": word_phones,
159
  "is_custom": word in custom_phones if custom_phones else False,
160
  "details": similarity_result["details"]