patruff commited on
Commit
294224b
·
verified ·
1 Parent(s): 06ae4d0

Upload tool

Browse files
Files changed (2) hide show
  1. requirements.txt +1 -1
  2. tool.py +103 -31
requirements.txt CHANGED
@@ -1,2 +1,2 @@
1
- pronouncing
2
  smolagents
 
 
 
1
  smolagents
2
+ pronouncing
tool.py CHANGED
@@ -1,7 +1,7 @@
1
  from smolagents.tools import Tool
 
2
  import pronouncing
3
  import json
4
- import string
5
 
6
  class ParodyWordSuggestionTool(Tool):
7
  name = "parody_word_suggester"
@@ -34,43 +34,35 @@ class ParodyWordSuggestionTool(Tool):
34
  return None
35
 
36
 
37
- def _get_phone_similarity(self, phone1: str, phone2: str) -> float:
38
- """Calculate similarity between two phones."""
39
- # Initialize variables
40
- p1 = ""
41
- p2 = ""
42
- group_str = ""
43
- group = []
44
-
45
  # Strip stress markers
46
  p1 = phone1.rstrip('012')
47
  p2 = phone2.rstrip('012')
 
 
48
 
49
  # Exact match
50
  if p1 == p2:
51
- return 1.0
52
 
53
  # Check similarity groups
54
  for group_str in self.PHONE_GROUPS.split('|'):
55
  group = group_str.split(',')
56
  if p1 in group and p2 in group:
57
- return 0.7
58
 
59
- # Check broader categories
60
- if self._get_phone_type(p1) == self._get_phone_type(p2):
61
- return 0.3
62
-
63
- return 0.0
64
 
65
 
66
  def _get_phone_type(self, phone: str) -> str:
67
  """Get the broad category of a phone."""
68
  # Strip stress markers
69
  phone = phone.rstrip('012')
70
- v = ""
71
 
72
  # Vowels
73
- if any(v in phone for v in 'AEIOU'):
74
  return 'vowel'
75
 
76
  # Initialize fixed sets for categories
@@ -94,6 +86,51 @@ class ParodyWordSuggestionTool(Tool):
94
  return 'other'
95
 
96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  def _calculate_phone_sequence_similarity(self, phones1: list, phones2: list) -> float:
98
  """Calculate similarity based on matching phones in sequence."""
99
  if not phones1 or not phones2:
@@ -103,7 +140,6 @@ class ParodyWordSuggestionTool(Tool):
103
  total_similarity = 0.0
104
  i = 0
105
  similarity = 0.0
106
-
107
  comparisons = max(len(phones1), len(phones2))
108
 
109
  # Compare each position
@@ -114,6 +150,31 @@ class ParodyWordSuggestionTool(Tool):
114
  return total_similarity / comparisons if comparisons > 0 else 0.0
115
 
116
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  def _calculate_length_similarity(self, phones1: list, phones2: list) -> float:
118
  """Calculate similarity based on phone length."""
119
  max_length = max(len(phones1), len(phones2))
@@ -123,24 +184,35 @@ class ParodyWordSuggestionTool(Tool):
123
 
124
  def _calculate_similarity(self, word1, phones1, word2, phones2):
125
  """Calculate similarity based on multiple factors."""
126
- # Initialize phone lists and scores
127
  phone_list1 = phones1.split()
128
  phone_list2 = phones2.split()
129
  rhyme_score = 0.0
130
  phone_sequence_score = 0.0
131
  length_score = 0.0
132
- vowel1 = None
133
- vowel2 = None
134
 
135
- # 1. Rhyme score (50%) - based on primary vowel
136
- vowel1 = self._get_primary_vowel(phone_list1)
137
- vowel2 = self._get_primary_vowel(phone_list2)
138
- rhyme_score = 1.0 if vowel1 and vowel2 and vowel1 == vowel2 else 0.0
139
 
140
- # 2. Phone sequence similarity (30%)
141
- phone_sequence_score = self._calculate_phone_sequence_similarity(phone_list1, phone_list2)
 
 
 
 
 
 
 
 
 
 
 
 
 
142
 
143
- # 3. Length similarity (20%)
 
144
  length_score = self._calculate_length_similarity(phone_list1, phone_list2)
145
 
146
  # Combined weighted score
@@ -156,8 +228,8 @@ class ParodyWordSuggestionTool(Tool):
156
  "phone_sequence_score": round(phone_sequence_score, 3),
157
  "length_score": round(length_score, 3),
158
  "details": {
159
- "primary_vowel1": vowel1,
160
- "primary_vowel2": vowel2,
161
  "phone_count1": len(phone_list1),
162
  "phone_count2": len(phone_list2),
163
  "matching_phones": round(phone_sequence_score * len(phone_list1))
 
1
  from smolagents.tools import Tool
2
+ import string
3
  import pronouncing
4
  import json
 
5
 
6
  class ParodyWordSuggestionTool(Tool):
7
  name = "parody_word_suggester"
 
34
  return None
35
 
36
 
37
+ def _phones_are_similar(self, phone1: str, phone2: str) -> bool:
38
+ """Check if two phones are similar enough to be considered rhyming."""
 
 
 
 
 
 
39
  # Strip stress markers
40
  p1 = phone1.rstrip('012')
41
  p2 = phone2.rstrip('012')
42
+ group_str = ""
43
+ group = []
44
 
45
  # Exact match
46
  if p1 == p2:
47
+ return True
48
 
49
  # Check similarity groups
50
  for group_str in self.PHONE_GROUPS.split('|'):
51
  group = group_str.split(',')
52
  if p1 in group and p2 in group:
53
+ return True
54
 
55
+ return False
 
 
 
 
56
 
57
 
58
  def _get_phone_type(self, phone: str) -> str:
59
  """Get the broad category of a phone."""
60
  # Strip stress markers
61
  phone = phone.rstrip('012')
62
+ vowel_char = ""
63
 
64
  # Vowels
65
+ if any(vowel_char in phone for vowel_char in 'AEIOU'):
66
  return 'vowel'
67
 
68
  # Initialize fixed sets for categories
 
86
  return 'other'
87
 
88
 
89
+ def _get_rhyme_score(self, phones1: list, phones2: list) -> float:
90
+ """Calculate rhyme score based on matching phones after primary stressed vowel."""
91
+ # Initialize variables
92
+ pos1 = -1
93
+ pos2 = -1
94
+ i = 0
95
+ phone = ""
96
+ vowel_char = ""
97
+ rhyme_part1 = []
98
+ rhyme_part2 = []
99
+ similarity_count = 0
100
+ p1 = ""
101
+ p2 = ""
102
+
103
+ # Find primary stressed vowel position in both words
104
+ for i, phone in enumerate(phones1):
105
+ if '1' in phone and any(vowel_char in phone for vowel_char in 'AEIOU'):
106
+ pos1 = i
107
+ break
108
+
109
+ for i, phone in enumerate(phones2):
110
+ if '1' in phone and any(vowel_char in phone for vowel_char in 'AEIOU'):
111
+ pos2 = i
112
+ break
113
+
114
+ if pos1 == -1 or pos2 == -1:
115
+ return 0.0
116
+
117
+ # Get all phones after and including the stressed vowel
118
+ rhyme_part1 = phones1[pos1:]
119
+ rhyme_part2 = phones2[pos2:]
120
+
121
+ # Check if lengths match
122
+ if len(rhyme_part1) != len(rhyme_part2):
123
+ return 0.0
124
+
125
+ # Calculate similarity score for rhyming part
126
+ for p1, p2 in zip(rhyme_part1, rhyme_part2):
127
+ if self._phones_are_similar(p1, p2):
128
+ similarity_count += 1
129
+
130
+ # Return score based on how many phones were similar
131
+ return similarity_count / len(rhyme_part1) if rhyme_part1 else 0.0
132
+
133
+
134
  def _calculate_phone_sequence_similarity(self, phones1: list, phones2: list) -> float:
135
  """Calculate similarity based on matching phones in sequence."""
136
  if not phones1 or not phones2:
 
140
  total_similarity = 0.0
141
  i = 0
142
  similarity = 0.0
 
143
  comparisons = max(len(phones1), len(phones2))
144
 
145
  # Compare each position
 
150
  return total_similarity / comparisons if comparisons > 0 else 0.0
151
 
152
 
153
+ def _get_phone_similarity(self, phone1: str, phone2: str) -> float:
154
+ """Calculate similarity between two phones."""
155
+ # Initialize variables
156
+ p1 = phone1.rstrip('012')
157
+ p2 = phone2.rstrip('012')
158
+ group_str = ""
159
+ group = []
160
+
161
+ # Exact match
162
+ if p1 == p2:
163
+ return 1.0
164
+
165
+ # Check similarity groups
166
+ for group_str in self.PHONE_GROUPS.split('|'):
167
+ group = group_str.split(',')
168
+ if p1 in group and p2 in group:
169
+ return 0.7
170
+
171
+ # Check broader categories
172
+ if self._get_phone_type(p1) == self._get_phone_type(p2):
173
+ return 0.3
174
+
175
+ return 0.0
176
+
177
+
178
  def _calculate_length_similarity(self, phones1: list, phones2: list) -> float:
179
  """Calculate similarity based on phone length."""
180
  max_length = max(len(phones1), len(phones2))
 
184
 
185
  def _calculate_similarity(self, word1, phones1, word2, phones2):
186
  """Calculate similarity based on multiple factors."""
187
+ # Initialize variables
188
  phone_list1 = phones1.split()
189
  phone_list2 = phones2.split()
190
  rhyme_score = 0.0
191
  phone_sequence_score = 0.0
192
  length_score = 0.0
193
+ similarity = 0.0
 
194
 
195
+ # Get rhyme score using new method
196
+ rhyme_score = self._get_rhyme_score(phone_list1, phone_list2)
 
 
197
 
198
+ # If rhyme score is too low (e.g. below 0.8), consider it a non-rhyme
199
+ if rhyme_score < 0.8:
200
+ return {
201
+ "similarity": 0.0,
202
+ "rhyme_score": 0.0,
203
+ "phone_sequence_score": 0.0,
204
+ "length_score": 0.0,
205
+ "details": {
206
+ "primary_vowel1": self._get_primary_vowel(phone_list1),
207
+ "primary_vowel2": self._get_primary_vowel(phone_list2),
208
+ "phone_count1": len(phone_list1),
209
+ "phone_count2": len(phone_list2),
210
+ "matching_phones": 0
211
+ }
212
+ }
213
 
214
+ # Calculate other scores only if words rhyme closely enough
215
+ phone_sequence_score = self._calculate_phone_sequence_similarity(phone_list1, phone_list2)
216
  length_score = self._calculate_length_similarity(phone_list1, phone_list2)
217
 
218
  # Combined weighted score
 
228
  "phone_sequence_score": round(phone_sequence_score, 3),
229
  "length_score": round(length_score, 3),
230
  "details": {
231
+ "primary_vowel1": self._get_primary_vowel(phone_list1),
232
+ "primary_vowel2": self._get_primary_vowel(phone_list2),
233
  "phone_count1": len(phone_list1),
234
  "phone_count2": len(phone_list2),
235
  "matching_phones": round(phone_sequence_score * len(phone_list1))