Spaces:
Configuration error
Configuration error
Upload tool
Browse files- requirements.txt +1 -1
- tool.py +103 -31
requirements.txt
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
-
pronouncing
|
2 |
smolagents
|
|
|
|
|
|
1 |
smolagents
|
2 |
+
pronouncing
|
tool.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
from smolagents.tools import Tool
|
|
|
2 |
import pronouncing
|
3 |
import json
|
4 |
-
import string
|
5 |
|
6 |
class ParodyWordSuggestionTool(Tool):
|
7 |
name = "parody_word_suggester"
|
@@ -34,43 +34,35 @@ class ParodyWordSuggestionTool(Tool):
|
|
34 |
return None
|
35 |
|
36 |
|
37 |
-
def
|
38 |
-
"""
|
39 |
-
# Initialize variables
|
40 |
-
p1 = ""
|
41 |
-
p2 = ""
|
42 |
-
group_str = ""
|
43 |
-
group = []
|
44 |
-
|
45 |
# Strip stress markers
|
46 |
p1 = phone1.rstrip('012')
|
47 |
p2 = phone2.rstrip('012')
|
|
|
|
|
48 |
|
49 |
# Exact match
|
50 |
if p1 == p2:
|
51 |
-
return
|
52 |
|
53 |
# Check similarity groups
|
54 |
for group_str in self.PHONE_GROUPS.split('|'):
|
55 |
group = group_str.split(',')
|
56 |
if p1 in group and p2 in group:
|
57 |
-
return
|
58 |
|
59 |
-
|
60 |
-
if self._get_phone_type(p1) == self._get_phone_type(p2):
|
61 |
-
return 0.3
|
62 |
-
|
63 |
-
return 0.0
|
64 |
|
65 |
|
66 |
def _get_phone_type(self, phone: str) -> str:
|
67 |
"""Get the broad category of a phone."""
|
68 |
# Strip stress markers
|
69 |
phone = phone.rstrip('012')
|
70 |
-
|
71 |
|
72 |
# Vowels
|
73 |
-
if any(
|
74 |
return 'vowel'
|
75 |
|
76 |
# Initialize fixed sets for categories
|
@@ -94,6 +86,51 @@ class ParodyWordSuggestionTool(Tool):
|
|
94 |
return 'other'
|
95 |
|
96 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
def _calculate_phone_sequence_similarity(self, phones1: list, phones2: list) -> float:
|
98 |
"""Calculate similarity based on matching phones in sequence."""
|
99 |
if not phones1 or not phones2:
|
@@ -103,7 +140,6 @@ class ParodyWordSuggestionTool(Tool):
|
|
103 |
total_similarity = 0.0
|
104 |
i = 0
|
105 |
similarity = 0.0
|
106 |
-
|
107 |
comparisons = max(len(phones1), len(phones2))
|
108 |
|
109 |
# Compare each position
|
@@ -114,6 +150,31 @@ class ParodyWordSuggestionTool(Tool):
|
|
114 |
return total_similarity / comparisons if comparisons > 0 else 0.0
|
115 |
|
116 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
117 |
def _calculate_length_similarity(self, phones1: list, phones2: list) -> float:
|
118 |
"""Calculate similarity based on phone length."""
|
119 |
max_length = max(len(phones1), len(phones2))
|
@@ -123,24 +184,35 @@ class ParodyWordSuggestionTool(Tool):
|
|
123 |
|
124 |
def _calculate_similarity(self, word1, phones1, word2, phones2):
|
125 |
"""Calculate similarity based on multiple factors."""
|
126 |
-
# Initialize
|
127 |
phone_list1 = phones1.split()
|
128 |
phone_list2 = phones2.split()
|
129 |
rhyme_score = 0.0
|
130 |
phone_sequence_score = 0.0
|
131 |
length_score = 0.0
|
132 |
-
|
133 |
-
vowel2 = None
|
134 |
|
135 |
-
#
|
136 |
-
|
137 |
-
vowel2 = self._get_primary_vowel(phone_list2)
|
138 |
-
rhyme_score = 1.0 if vowel1 and vowel2 and vowel1 == vowel2 else 0.0
|
139 |
|
140 |
-
#
|
141 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
142 |
|
143 |
-
#
|
|
|
144 |
length_score = self._calculate_length_similarity(phone_list1, phone_list2)
|
145 |
|
146 |
# Combined weighted score
|
@@ -156,8 +228,8 @@ class ParodyWordSuggestionTool(Tool):
|
|
156 |
"phone_sequence_score": round(phone_sequence_score, 3),
|
157 |
"length_score": round(length_score, 3),
|
158 |
"details": {
|
159 |
-
"primary_vowel1":
|
160 |
-
"primary_vowel2":
|
161 |
"phone_count1": len(phone_list1),
|
162 |
"phone_count2": len(phone_list2),
|
163 |
"matching_phones": round(phone_sequence_score * len(phone_list1))
|
|
|
1 |
from smolagents.tools import Tool
|
2 |
+
import string
|
3 |
import pronouncing
|
4 |
import json
|
|
|
5 |
|
6 |
class ParodyWordSuggestionTool(Tool):
|
7 |
name = "parody_word_suggester"
|
|
|
34 |
return None
|
35 |
|
36 |
|
37 |
+
def _phones_are_similar(self, phone1: str, phone2: str) -> bool:
|
38 |
+
"""Check if two phones are similar enough to be considered rhyming."""
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
# Strip stress markers
|
40 |
p1 = phone1.rstrip('012')
|
41 |
p2 = phone2.rstrip('012')
|
42 |
+
group_str = ""
|
43 |
+
group = []
|
44 |
|
45 |
# Exact match
|
46 |
if p1 == p2:
|
47 |
+
return True
|
48 |
|
49 |
# Check similarity groups
|
50 |
for group_str in self.PHONE_GROUPS.split('|'):
|
51 |
group = group_str.split(',')
|
52 |
if p1 in group and p2 in group:
|
53 |
+
return True
|
54 |
|
55 |
+
return False
|
|
|
|
|
|
|
|
|
56 |
|
57 |
|
58 |
def _get_phone_type(self, phone: str) -> str:
|
59 |
"""Get the broad category of a phone."""
|
60 |
# Strip stress markers
|
61 |
phone = phone.rstrip('012')
|
62 |
+
vowel_char = ""
|
63 |
|
64 |
# Vowels
|
65 |
+
if any(vowel_char in phone for vowel_char in 'AEIOU'):
|
66 |
return 'vowel'
|
67 |
|
68 |
# Initialize fixed sets for categories
|
|
|
86 |
return 'other'
|
87 |
|
88 |
|
89 |
+
def _get_rhyme_score(self, phones1: list, phones2: list) -> float:
|
90 |
+
"""Calculate rhyme score based on matching phones after primary stressed vowel."""
|
91 |
+
# Initialize variables
|
92 |
+
pos1 = -1
|
93 |
+
pos2 = -1
|
94 |
+
i = 0
|
95 |
+
phone = ""
|
96 |
+
vowel_char = ""
|
97 |
+
rhyme_part1 = []
|
98 |
+
rhyme_part2 = []
|
99 |
+
similarity_count = 0
|
100 |
+
p1 = ""
|
101 |
+
p2 = ""
|
102 |
+
|
103 |
+
# Find primary stressed vowel position in both words
|
104 |
+
for i, phone in enumerate(phones1):
|
105 |
+
if '1' in phone and any(vowel_char in phone for vowel_char in 'AEIOU'):
|
106 |
+
pos1 = i
|
107 |
+
break
|
108 |
+
|
109 |
+
for i, phone in enumerate(phones2):
|
110 |
+
if '1' in phone and any(vowel_char in phone for vowel_char in 'AEIOU'):
|
111 |
+
pos2 = i
|
112 |
+
break
|
113 |
+
|
114 |
+
if pos1 == -1 or pos2 == -1:
|
115 |
+
return 0.0
|
116 |
+
|
117 |
+
# Get all phones after and including the stressed vowel
|
118 |
+
rhyme_part1 = phones1[pos1:]
|
119 |
+
rhyme_part2 = phones2[pos2:]
|
120 |
+
|
121 |
+
# Check if lengths match
|
122 |
+
if len(rhyme_part1) != len(rhyme_part2):
|
123 |
+
return 0.0
|
124 |
+
|
125 |
+
# Calculate similarity score for rhyming part
|
126 |
+
for p1, p2 in zip(rhyme_part1, rhyme_part2):
|
127 |
+
if self._phones_are_similar(p1, p2):
|
128 |
+
similarity_count += 1
|
129 |
+
|
130 |
+
# Return score based on how many phones were similar
|
131 |
+
return similarity_count / len(rhyme_part1) if rhyme_part1 else 0.0
|
132 |
+
|
133 |
+
|
134 |
def _calculate_phone_sequence_similarity(self, phones1: list, phones2: list) -> float:
|
135 |
"""Calculate similarity based on matching phones in sequence."""
|
136 |
if not phones1 or not phones2:
|
|
|
140 |
total_similarity = 0.0
|
141 |
i = 0
|
142 |
similarity = 0.0
|
|
|
143 |
comparisons = max(len(phones1), len(phones2))
|
144 |
|
145 |
# Compare each position
|
|
|
150 |
return total_similarity / comparisons if comparisons > 0 else 0.0
|
151 |
|
152 |
|
153 |
+
def _get_phone_similarity(self, phone1: str, phone2: str) -> float:
|
154 |
+
"""Calculate similarity between two phones."""
|
155 |
+
# Initialize variables
|
156 |
+
p1 = phone1.rstrip('012')
|
157 |
+
p2 = phone2.rstrip('012')
|
158 |
+
group_str = ""
|
159 |
+
group = []
|
160 |
+
|
161 |
+
# Exact match
|
162 |
+
if p1 == p2:
|
163 |
+
return 1.0
|
164 |
+
|
165 |
+
# Check similarity groups
|
166 |
+
for group_str in self.PHONE_GROUPS.split('|'):
|
167 |
+
group = group_str.split(',')
|
168 |
+
if p1 in group and p2 in group:
|
169 |
+
return 0.7
|
170 |
+
|
171 |
+
# Check broader categories
|
172 |
+
if self._get_phone_type(p1) == self._get_phone_type(p2):
|
173 |
+
return 0.3
|
174 |
+
|
175 |
+
return 0.0
|
176 |
+
|
177 |
+
|
178 |
def _calculate_length_similarity(self, phones1: list, phones2: list) -> float:
|
179 |
"""Calculate similarity based on phone length."""
|
180 |
max_length = max(len(phones1), len(phones2))
|
|
|
184 |
|
185 |
def _calculate_similarity(self, word1, phones1, word2, phones2):
|
186 |
"""Calculate similarity based on multiple factors."""
|
187 |
+
# Initialize variables
|
188 |
phone_list1 = phones1.split()
|
189 |
phone_list2 = phones2.split()
|
190 |
rhyme_score = 0.0
|
191 |
phone_sequence_score = 0.0
|
192 |
length_score = 0.0
|
193 |
+
similarity = 0.0
|
|
|
194 |
|
195 |
+
# Get rhyme score using new method
|
196 |
+
rhyme_score = self._get_rhyme_score(phone_list1, phone_list2)
|
|
|
|
|
197 |
|
198 |
+
# If rhyme score is too low (e.g. below 0.8), consider it a non-rhyme
|
199 |
+
if rhyme_score < 0.8:
|
200 |
+
return {
|
201 |
+
"similarity": 0.0,
|
202 |
+
"rhyme_score": 0.0,
|
203 |
+
"phone_sequence_score": 0.0,
|
204 |
+
"length_score": 0.0,
|
205 |
+
"details": {
|
206 |
+
"primary_vowel1": self._get_primary_vowel(phone_list1),
|
207 |
+
"primary_vowel2": self._get_primary_vowel(phone_list2),
|
208 |
+
"phone_count1": len(phone_list1),
|
209 |
+
"phone_count2": len(phone_list2),
|
210 |
+
"matching_phones": 0
|
211 |
+
}
|
212 |
+
}
|
213 |
|
214 |
+
# Calculate other scores only if words rhyme closely enough
|
215 |
+
phone_sequence_score = self._calculate_phone_sequence_similarity(phone_list1, phone_list2)
|
216 |
length_score = self._calculate_length_similarity(phone_list1, phone_list2)
|
217 |
|
218 |
# Combined weighted score
|
|
|
228 |
"phone_sequence_score": round(phone_sequence_score, 3),
|
229 |
"length_score": round(length_score, 3),
|
230 |
"details": {
|
231 |
+
"primary_vowel1": self._get_primary_vowel(phone_list1),
|
232 |
+
"primary_vowel2": self._get_primary_vowel(phone_list2),
|
233 |
"phone_count1": len(phone_list1),
|
234 |
"phone_count2": len(phone_list2),
|
235 |
"matching_phones": round(phone_sequence_score * len(phone_list1))
|