Spaces:
Configuration error
Configuration error
Upload tool
Browse files- requirements.txt +1 -1
- tool.py +41 -121
requirements.txt
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
-
pronouncing
|
2 |
smolagents
|
|
|
|
|
|
1 |
smolagents
|
2 |
+
pronouncing
|
tool.py
CHANGED
@@ -1,27 +1,16 @@
|
|
1 |
from smolagents.tools import Tool
|
2 |
-
import string
|
3 |
import pronouncing
|
4 |
import json
|
|
|
5 |
|
6 |
class ParodyWordSuggestionTool(Tool):
|
7 |
name = "parody_word_suggester"
|
8 |
description = "Suggests rhyming funny words using CMU dictionary pronunciations."
|
9 |
inputs = {'target': {'type': 'string', 'description': 'The word you want to find rhyming alternatives for'}, 'word_list_str': {'type': 'string', 'description': 'JSON string of word list (e.g. \'["word1", "word2"]\')'}, 'min_similarity': {'type': 'string', 'description': 'Minimum similarity threshold (0.0-1.0)', 'nullable': True, 'default': '0.5'}, 'custom_phones': {'type': 'object', 'description': 'Optional dictionary of custom word pronunciations', 'nullable': True, 'default': None}}
|
10 |
output_type = "string"
|
11 |
-
RHYME_WEIGHT = 0.
|
12 |
-
|
13 |
-
|
14 |
-
CONSONANT_WEIGHT = 0.1
|
15 |
-
CONSONANT_REF = "M,N,NG|P,B|T,D|K,G|F,V|TH,DH|S,Z|SH,ZH|L,R|W,Y"
|
16 |
-
|
17 |
-
def _get_consonant_groups(self):
|
18 |
-
"""Get consonant similarity groups."""
|
19 |
-
groups = []
|
20 |
-
group_strs = self.CONSONANT_REF.split("|")
|
21 |
-
for group_str in group_strs:
|
22 |
-
groups.append(group_str.split(","))
|
23 |
-
return groups
|
24 |
-
|
25 |
|
26 |
def _get_word_phones(self, word, custom_phones=None):
|
27 |
"""Get phones for a word, checking custom dictionary first."""
|
@@ -35,139 +24,76 @@ class ParodyWordSuggestionTool(Tool):
|
|
35 |
|
36 |
def _get_primary_vowel(self, phones: list) -> str:
|
37 |
"""Get the primary stressed vowel from phone list."""
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
for phone_str in phones:
|
43 |
-
if '1' in phone_str and any(vowel_char in phone_str for vowel_char in vowel_chars):
|
44 |
-
return phone_str.rstrip('012')
|
45 |
return None
|
46 |
|
47 |
|
48 |
-
def
|
49 |
-
"""Calculate
|
50 |
-
|
|
|
51 |
return 0.0
|
52 |
|
53 |
-
#
|
54 |
-
|
55 |
-
|
56 |
-
char2 = ""
|
57 |
|
58 |
-
# Count character differences
|
59 |
-
for char1, char2 in zip(word1, word2):
|
60 |
-
if char1 != char2:
|
61 |
-
changes += 1
|
62 |
-
|
63 |
-
# Add difference for length mismatch
|
64 |
-
changes += abs(len(word1) - len(word2))
|
65 |
-
|
66 |
-
# Score based on changes (0 changes = 1.0, more changes = lower score)
|
67 |
-
max_changes = max(len(word1), len(word2))
|
68 |
-
return 1.0 - (changes / max_changes) if max_changes > 0 else 0.0
|
69 |
-
|
70 |
-
|
71 |
-
def _calculate_consonant_similarity(self, phone_list1: list, phone_list2: list) -> float:
|
72 |
-
"""Calculate consonant similarity score."""
|
73 |
-
# Initialize variables
|
74 |
-
consonant_score = 0.0
|
75 |
-
consonant_groups = self._get_consonant_groups()
|
76 |
-
vowel_chars = 'AEIOU'
|
77 |
-
phone_str = ""
|
78 |
-
vowel_char = ""
|
79 |
-
consonants1 = []
|
80 |
-
consonants2 = []
|
81 |
matches = 0
|
82 |
-
|
83 |
-
cons1 = ""
|
84 |
-
cons2 = ""
|
85 |
-
group = []
|
86 |
-
|
87 |
-
# Get consonants (non-vowel phones)
|
88 |
-
consonants1 = [phone_str for phone_str in phone_list1
|
89 |
-
if not any(vowel_char in phone_str for vowel_char in vowel_chars)]
|
90 |
-
consonants2 = [phone_str for phone_str in phone_list2
|
91 |
-
if not any(vowel_char in phone_str for vowel_char in vowel_chars)]
|
92 |
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
# Compare each consonant
|
97 |
-
matches = 0
|
98 |
-
comparisons = min(len(consonants1), len(consonants2))
|
99 |
-
|
100 |
-
for cons1, cons2 in zip(consonants1, consonants2):
|
101 |
-
cons1 = cons1.rstrip('012')
|
102 |
-
cons2 = cons2.rstrip('012')
|
103 |
-
|
104 |
-
if cons1 == cons2:
|
105 |
matches += 1
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
return
|
115 |
|
116 |
|
117 |
def _calculate_similarity(self, word1, phones1, word2, phones2):
|
118 |
"""Calculate similarity based on multiple factors."""
|
119 |
-
# Initialize scores
|
120 |
-
rhyme_score = 0.0
|
121 |
-
phone_score = 0.0
|
122 |
-
char_diff_score = 0.0
|
123 |
-
consonant_score = 0.0
|
124 |
-
|
125 |
# Initialize phone lists
|
126 |
phone_list1 = phones1.split()
|
127 |
phone_list2 = phones2.split()
|
128 |
|
129 |
-
#
|
130 |
-
vowel1 = None
|
131 |
-
vowel2 = None
|
132 |
-
|
133 |
-
# 1. Rhyme score (60%) - based on primary vowel
|
134 |
vowel1 = self._get_primary_vowel(phone_list1)
|
135 |
vowel2 = self._get_primary_vowel(phone_list2)
|
136 |
-
if vowel1 and vowel2 and vowel1 == vowel2
|
137 |
-
rhyme_score = 1.0
|
138 |
-
|
139 |
-
# 2. Phone pattern score (20%) - based on number of phones
|
140 |
-
if len(phone_list1) == len(phone_list2):
|
141 |
-
phone_score = 1.0
|
142 |
-
else:
|
143 |
-
phone_score = 1.0 - (abs(len(phone_list1) - len(phone_list2)) / max(len(phone_list1), len(phone_list2)))
|
144 |
|
145 |
-
#
|
146 |
-
|
147 |
|
148 |
-
#
|
149 |
-
|
150 |
|
151 |
# Combined weighted score
|
152 |
similarity = (
|
153 |
(rhyme_score * self.RHYME_WEIGHT) +
|
154 |
-
(
|
155 |
-
(
|
156 |
-
(consonant_score * self.CONSONANT_WEIGHT)
|
157 |
)
|
158 |
|
159 |
return {
|
160 |
"similarity": round(similarity, 3),
|
161 |
"rhyme_score": round(rhyme_score, 3),
|
162 |
-
"
|
163 |
-
"
|
164 |
-
"consonant_score": round(consonant_score, 3),
|
165 |
"details": {
|
166 |
"primary_vowel1": vowel1,
|
167 |
"primary_vowel2": vowel2,
|
168 |
"phone_count1": len(phone_list1),
|
169 |
"phone_count2": len(phone_list2),
|
170 |
-
"
|
171 |
}
|
172 |
}
|
173 |
|
@@ -183,11 +109,6 @@ class ParodyWordSuggestionTool(Tool):
|
|
183 |
suggestions = []
|
184 |
valid_words = []
|
185 |
invalid_words = []
|
186 |
-
words = []
|
187 |
-
target_phones = ""
|
188 |
-
word_phones = ""
|
189 |
-
word = ""
|
190 |
-
similarity_result = {}
|
191 |
|
192 |
# Parse JSON string to list
|
193 |
try:
|
@@ -232,9 +153,8 @@ class ParodyWordSuggestionTool(Tool):
|
|
232 |
"word": word,
|
233 |
"similarity": similarity_result["similarity"],
|
234 |
"rhyme_score": similarity_result["rhyme_score"],
|
235 |
-
"
|
236 |
-
"
|
237 |
-
"consonant_score": similarity_result["consonant_score"],
|
238 |
"phones": word_phones,
|
239 |
"is_custom": word in custom_phones if custom_phones else False,
|
240 |
"details": similarity_result["details"]
|
|
|
1 |
from smolagents.tools import Tool
|
|
|
2 |
import pronouncing
|
3 |
import json
|
4 |
+
import string
|
5 |
|
6 |
class ParodyWordSuggestionTool(Tool):
|
7 |
name = "parody_word_suggester"
|
8 |
description = "Suggests rhyming funny words using CMU dictionary pronunciations."
|
9 |
inputs = {'target': {'type': 'string', 'description': 'The word you want to find rhyming alternatives for'}, 'word_list_str': {'type': 'string', 'description': 'JSON string of word list (e.g. \'["word1", "word2"]\')'}, 'min_similarity': {'type': 'string', 'description': 'Minimum similarity threshold (0.0-1.0)', 'nullable': True, 'default': '0.5'}, 'custom_phones': {'type': 'object', 'description': 'Optional dictionary of custom word pronunciations', 'nullable': True, 'default': None}}
|
10 |
output_type = "string"
|
11 |
+
RHYME_WEIGHT = 0.5
|
12 |
+
PHONE_SEQUENCE_WEIGHT = 0.3
|
13 |
+
LENGTH_WEIGHT = 0.2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
def _get_word_phones(self, word, custom_phones=None):
|
16 |
"""Get phones for a word, checking custom dictionary first."""
|
|
|
24 |
|
25 |
def _get_primary_vowel(self, phones: list) -> str:
|
26 |
"""Get the primary stressed vowel from phone list."""
|
27 |
+
v = ""
|
28 |
+
for phone in phones:
|
29 |
+
if '1' in phone and any(v in phone for v in 'AEIOU'):
|
30 |
+
return phone.rstrip('012')
|
|
|
|
|
|
|
31 |
return None
|
32 |
|
33 |
|
34 |
+
def _calculate_phone_sequence_similarity(self, phones1: list, phones2: list) -> float:
|
35 |
+
"""Calculate similarity based on matching phones in sequence."""
|
36 |
+
p = ""
|
37 |
+
if not phones1 or not phones2:
|
38 |
return 0.0
|
39 |
|
40 |
+
# Strip stress markers for comparison
|
41 |
+
clean_phones1 = [p.rstrip('012') for p in phones1]
|
42 |
+
clean_phones2 = [p.rstrip('012') for p in phones2]
|
|
|
43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
matches = 0
|
45 |
+
total_comparisons = max(len(clean_phones1), len(clean_phones2))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
|
47 |
+
# Compare phones in sequence
|
48 |
+
for i in range(min(len(clean_phones1), len(clean_phones2))):
|
49 |
+
if clean_phones1[i] == clean_phones2[i]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
matches += 1
|
51 |
+
|
52 |
+
return matches / total_comparisons if total_comparisons > 0 else 0.0
|
53 |
+
|
54 |
+
|
55 |
+
def _calculate_length_similarity(self, phones1: list, phones2: list) -> float:
|
56 |
+
"""Calculate similarity based on phone length."""
|
57 |
+
max_length = max(len(phones1), len(phones2))
|
58 |
+
length_diff = abs(len(phones1) - len(phones2))
|
59 |
+
return 1.0 - (length_diff / max_length) if max_length > 0 else 0.0
|
60 |
|
61 |
|
62 |
def _calculate_similarity(self, word1, phones1, word2, phones2):
|
63 |
"""Calculate similarity based on multiple factors."""
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
# Initialize phone lists
|
65 |
phone_list1 = phones1.split()
|
66 |
phone_list2 = phones2.split()
|
67 |
|
68 |
+
# 1. Rhyme score (50%) - based on primary vowel
|
|
|
|
|
|
|
|
|
69 |
vowel1 = self._get_primary_vowel(phone_list1)
|
70 |
vowel2 = self._get_primary_vowel(phone_list2)
|
71 |
+
rhyme_score = 1.0 if vowel1 and vowel2 and vowel1 == vowel2 else 0.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
|
73 |
+
# 2. Phone sequence similarity (30%)
|
74 |
+
phone_sequence_score = self._calculate_phone_sequence_similarity(phone_list1, phone_list2)
|
75 |
|
76 |
+
# 3. Length similarity (20%)
|
77 |
+
length_score = self._calculate_length_similarity(phone_list1, phone_list2)
|
78 |
|
79 |
# Combined weighted score
|
80 |
similarity = (
|
81 |
(rhyme_score * self.RHYME_WEIGHT) +
|
82 |
+
(phone_sequence_score * self.PHONE_SEQUENCE_WEIGHT) +
|
83 |
+
(length_score * self.LENGTH_WEIGHT)
|
|
|
84 |
)
|
85 |
|
86 |
return {
|
87 |
"similarity": round(similarity, 3),
|
88 |
"rhyme_score": round(rhyme_score, 3),
|
89 |
+
"phone_sequence_score": round(phone_sequence_score, 3),
|
90 |
+
"length_score": round(length_score, 3),
|
|
|
91 |
"details": {
|
92 |
"primary_vowel1": vowel1,
|
93 |
"primary_vowel2": vowel2,
|
94 |
"phone_count1": len(phone_list1),
|
95 |
"phone_count2": len(phone_list2),
|
96 |
+
"matching_phones": round(phone_sequence_score * len(phone_list1))
|
97 |
}
|
98 |
}
|
99 |
|
|
|
109 |
suggestions = []
|
110 |
valid_words = []
|
111 |
invalid_words = []
|
|
|
|
|
|
|
|
|
|
|
112 |
|
113 |
# Parse JSON string to list
|
114 |
try:
|
|
|
153 |
"word": word,
|
154 |
"similarity": similarity_result["similarity"],
|
155 |
"rhyme_score": similarity_result["rhyme_score"],
|
156 |
+
"phone_sequence_score": similarity_result["phone_sequence_score"],
|
157 |
+
"length_score": similarity_result["length_score"],
|
|
|
158 |
"phones": word_phones,
|
159 |
"is_custom": word in custom_phones if custom_phones else False,
|
160 |
"details": similarity_result["details"]
|