Spaces:
Configuration error
Configuration error
Upload tool
Browse files- requirements.txt +1 -1
- tool.py +88 -87
requirements.txt
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
-
smolagents
|
2 |
pronouncing
|
|
|
|
|
|
1 |
pronouncing
|
2 |
+
smolagents
|
tool.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
from smolagents.tools import Tool
|
2 |
-
import string
|
3 |
-
import pronouncing
|
4 |
import json
|
|
|
|
|
5 |
|
6 |
class ParodyWordSuggestionTool(Tool):
|
7 |
name = "parody_word_suggester"
|
@@ -10,6 +10,7 @@ class ParodyWordSuggestionTool(Tool):
|
|
10 |
inputs = {'target': {'type': 'string', 'description': 'The word you want to find rhyming alternatives for'}, 'word_list_str': {'type': 'string', 'description': 'JSON string of word list (e.g. \'["word1", "word2"]\')'}, 'min_similarity': {'type': 'string', 'description': 'Minimum similarity threshold (0.0-1.0)', 'default': '0.5', 'nullable': True}, 'custom_phones': {'type': 'object', 'description': 'Optional dictionary of custom word pronunciations', 'default': None, 'nullable': True}}
|
11 |
output_type = "string"
|
12 |
VOWEL_REF = "AH,AX|UH|AE,EH|IY,IH|AO,AA|UW|AY,EY|OW,AO|AW,AO|OY,OW|ER,AXR"
|
|
|
13 |
|
14 |
def _get_vowel_groups(self):
|
15 |
groups = []
|
@@ -98,101 +99,101 @@ class ParodyWordSuggestionTool(Tool):
|
|
98 |
return phones, []
|
99 |
|
100 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
def _calculate_similarity(self, word1, phones1, word2, phones2):
|
102 |
-
"""Calculate similarity score
|
103 |
-
# Initialize
|
104 |
-
phone_list1 = []
|
105 |
-
phone_list2 = []
|
106 |
-
base1 = []
|
107 |
-
base2 = []
|
108 |
-
suffix1 = []
|
109 |
-
suffix2 = []
|
110 |
-
word_vowel = None
|
111 |
-
word_end = []
|
112 |
-
target_vowel = None
|
113 |
-
target_end = []
|
114 |
-
base_length_diff = 0
|
115 |
-
max_base_length = 0
|
116 |
-
length_score = 0.0
|
117 |
-
rhyme_score = 0.0
|
118 |
-
stress_score = 0.0
|
119 |
-
suffix_score = 0.0
|
120 |
-
word_end_clean = []
|
121 |
-
target_end_clean = []
|
122 |
-
common_length = 0
|
123 |
-
matched = 0
|
124 |
-
stress1 = ""
|
125 |
-
stress2 = ""
|
126 |
-
similarity = 0.0
|
127 |
-
result1 = (None, [])
|
128 |
-
result2 = (None, [])
|
129 |
-
|
130 |
-
# Main logic
|
131 |
phone_list1 = phones1.split()
|
132 |
phone_list2 = phones2.split()
|
133 |
|
134 |
-
#
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
base_length_diff = abs(len(base1) - len(base2))
|
140 |
-
max_base_length = max(len(base1), len(base2))
|
141 |
-
length_score = 1.0 if base_length_diff == 0 else 1.0 - (base_length_diff / max_base_length)
|
142 |
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
# Calculate rhyme score
|
150 |
-
rhyme_score = 0.0
|
151 |
-
if word_vowel and target_vowel:
|
152 |
-
if self._vowels_match(word_vowel, target_vowel):
|
153 |
-
word_end_clean = self._strip_stress(word_end)
|
154 |
-
target_end_clean = self._strip_stress(target_end)
|
155 |
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
179 |
similarity = (
|
180 |
-
(
|
181 |
-
(
|
182 |
-
(
|
183 |
-
(suffix_score * 0.1) # Suffix match as small bonus
|
184 |
)
|
185 |
-
|
186 |
-
similarity = min(1.0, similarity)
|
187 |
-
|
188 |
return {
|
189 |
"similarity": round(similarity, 3),
|
190 |
-
"
|
191 |
-
"
|
192 |
-
"
|
193 |
-
"
|
194 |
-
"
|
195 |
-
"suffix_match": suffix_score == 1.0
|
196 |
}
|
197 |
|
198 |
|
|
|
1 |
from smolagents.tools import Tool
|
|
|
|
|
2 |
import json
|
3 |
+
import pronouncing
|
4 |
+
import string
|
5 |
|
6 |
class ParodyWordSuggestionTool(Tool):
|
7 |
name = "parody_word_suggester"
|
|
|
10 |
inputs = {'target': {'type': 'string', 'description': 'The word you want to find rhyming alternatives for'}, 'word_list_str': {'type': 'string', 'description': 'JSON string of word list (e.g. \'["word1", "word2"]\')'}, 'min_similarity': {'type': 'string', 'description': 'Minimum similarity threshold (0.0-1.0)', 'default': '0.5', 'nullable': True}, 'custom_phones': {'type': 'object', 'description': 'Optional dictionary of custom word pronunciations', 'default': None, 'nullable': True}}
|
11 |
output_type = "string"
|
12 |
VOWEL_REF = "AH,AX|UH|AE,EH|IY,IH|AO,AA|UW|AY,EY|OW,AO|AW,AO|OY,OW|ER,AXR"
|
13 |
+
CONSONANT_REF = "M,N,NG|P,B|T,D|K,G|F,V|TH,DH|S,Z|SH,ZH|L,R|W,Y"
|
14 |
|
15 |
def _get_vowel_groups(self):
|
16 |
groups = []
|
|
|
99 |
return phones, []
|
100 |
|
101 |
|
102 |
+
def _get_consonant_groups(self):
|
103 |
+
"""Get consonant groups from reference string."""
|
104 |
+
groups = []
|
105 |
+
group_strs = self.CONSONANT_REF.split("|")
|
106 |
+
for group_str in group_strs:
|
107 |
+
groups.append(group_str.split(","))
|
108 |
+
return groups
|
109 |
+
|
110 |
+
|
111 |
+
def _consonants_similarity(self, c1: str, c2: str) -> float:
|
112 |
+
"""Calculate similarity score between two consonants."""
|
113 |
+
if c1 == c2:
|
114 |
+
return 1.0
|
115 |
+
|
116 |
+
# Check if they're in the same group
|
117 |
+
consonant_groups = self._get_consonant_groups()
|
118 |
+
for group in consonant_groups:
|
119 |
+
if c1 in group and c2 in group:
|
120 |
+
# Nasals (first group) are more similar to each other
|
121 |
+
if group == consonant_groups[0]: # M,N,NG group
|
122 |
+
return 0.8
|
123 |
+
return 0.5
|
124 |
+
|
125 |
+
return 0.0
|
126 |
+
|
127 |
+
|
128 |
def _calculate_similarity(self, word1, phones1, word2, phones2):
|
129 |
+
"""Calculate similarity score with enhanced consonant matching."""
|
130 |
+
# Initialize variables as before
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
phone_list1 = phones1.split()
|
132 |
phone_list2 = phones2.split()
|
133 |
|
134 |
+
# Get stressed vowels and their positions
|
135 |
+
vowel_idx1 = -1
|
136 |
+
vowel_idx2 = -1
|
137 |
+
primary_vowel1 = None
|
138 |
+
primary_vowel2 = None
|
|
|
|
|
|
|
139 |
|
140 |
+
for i, phone in enumerate(phone_list1):
|
141 |
+
if '1' in phone: # Primary stress
|
142 |
+
vowel_idx1 = i
|
143 |
+
primary_vowel1 = phone.rstrip('012')
|
144 |
+
break
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
145 |
|
146 |
+
for i, phone in enumerate(phone_list2):
|
147 |
+
if '1' in phone:
|
148 |
+
vowel_idx2 = i
|
149 |
+
primary_vowel2 = phone.rstrip('012')
|
150 |
+
break
|
151 |
+
|
152 |
+
# Calculate vowel similarity (50% of total score)
|
153 |
+
vowel_score = 0.0
|
154 |
+
if primary_vowel1 and primary_vowel2:
|
155 |
+
if primary_vowel1 == primary_vowel2:
|
156 |
+
vowel_score = 1.0
|
157 |
+
elif self._vowels_match(primary_vowel1, primary_vowel2):
|
158 |
+
vowel_score = 0.8
|
159 |
+
|
160 |
+
# Calculate consonant similarity (30% of total score)
|
161 |
+
consonant_score = 0.0
|
162 |
+
if vowel_idx1 >= 0 and vowel_idx2 >= 0:
|
163 |
+
# Compare consonants around the stressed vowel
|
164 |
+
pre_c1 = phone_list1[vowel_idx1-1] if vowel_idx1 > 0 else None
|
165 |
+
pre_c2 = phone_list2[vowel_idx2-1] if vowel_idx2 > 0 else None
|
166 |
+
post_c1 = phone_list1[vowel_idx1+1] if vowel_idx1 < len(phone_list1)-1 else None
|
167 |
+
post_c2 = phone_list2[vowel_idx2+1] if vowel_idx2 < len(phone_list2)-1 else None
|
168 |
+
|
169 |
+
if pre_c1 and pre_c2:
|
170 |
+
consonant_score += self._consonants_similarity(pre_c1, pre_c2)
|
171 |
+
if post_c1 and post_c2:
|
172 |
+
consonant_score += self._consonants_similarity(post_c1, post_c2)
|
173 |
+
|
174 |
+
consonant_score = consonant_score / 2 # Normalize to 0-1
|
175 |
+
|
176 |
+
# Pattern/length similarity (20% of total score)
|
177 |
+
pattern_score = 0.0
|
178 |
+
if len(phone_list1) == len(phone_list2):
|
179 |
+
pattern_score = 1.0
|
180 |
+
else:
|
181 |
+
pattern_score = 1.0 - (abs(len(phone_list1) - len(phone_list2)) / max(len(phone_list1), len(phone_list2)))
|
182 |
+
|
183 |
+
# Combined weighted score
|
184 |
similarity = (
|
185 |
+
(vowel_score * 0.5) + # Vowel similarity most important
|
186 |
+
(consonant_score * 0.3) + # Consonant similarity next
|
187 |
+
(pattern_score * 0.2) # Pattern/length least important
|
|
|
188 |
)
|
189 |
+
|
|
|
|
|
190 |
return {
|
191 |
"similarity": round(similarity, 3),
|
192 |
+
"vowel_score": round(vowel_score, 3),
|
193 |
+
"consonant_score": round(consonant_score, 3),
|
194 |
+
"pattern_score": round(pattern_score, 3),
|
195 |
+
"primary_vowels": f"{primary_vowel1}-{primary_vowel2}",
|
196 |
+
"consonants": "similar" if consonant_score > 0.5 else "different"
|
|
|
197 |
}
|
198 |
|
199 |
|