Spaces:
Configuration error
Configuration error
Upload tool
Browse files- requirements.txt +1 -1
- tool.py +89 -17
requirements.txt
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
-
smolagents
|
2 |
pronouncing
|
|
|
|
|
|
1 |
pronouncing
|
2 |
+
smolagents
|
tool.py
CHANGED
@@ -11,6 +11,7 @@ class ParodyWordSuggestionTool(Tool):
|
|
11 |
RHYME_WEIGHT = 0.5
|
12 |
PHONE_SEQUENCE_WEIGHT = 0.3
|
13 |
LENGTH_WEIGHT = 0.2
|
|
|
14 |
|
15 |
def _get_word_phones(self, word, custom_phones=None):
|
16 |
"""Get phones for a word, checking custom dictionary first."""
|
@@ -24,32 +25,93 @@ class ParodyWordSuggestionTool(Tool):
|
|
24 |
|
25 |
def _get_primary_vowel(self, phones: list) -> str:
|
26 |
"""Get the primary stressed vowel from phone list."""
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
|
|
|
|
31 |
return None
|
32 |
|
33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
def _calculate_phone_sequence_similarity(self, phones1: list, phones2: list) -> float:
|
35 |
"""Calculate similarity based on matching phones in sequence."""
|
36 |
-
p = ""
|
37 |
if not phones1 or not phones2:
|
38 |
return 0.0
|
39 |
|
40 |
-
#
|
41 |
-
|
42 |
-
|
|
|
43 |
|
44 |
-
|
45 |
-
total_comparisons = max(len(clean_phones1), len(clean_phones2))
|
46 |
|
47 |
-
# Compare
|
48 |
-
for i in range(min(len(
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
return
|
53 |
|
54 |
|
55 |
def _calculate_length_similarity(self, phones1: list, phones2: list) -> float:
|
@@ -61,9 +123,14 @@ class ParodyWordSuggestionTool(Tool):
|
|
61 |
|
62 |
def _calculate_similarity(self, word1, phones1, word2, phones2):
|
63 |
"""Calculate similarity based on multiple factors."""
|
64 |
-
# Initialize phone lists
|
65 |
phone_list1 = phones1.split()
|
66 |
phone_list2 = phones2.split()
|
|
|
|
|
|
|
|
|
|
|
67 |
|
68 |
# 1. Rhyme score (50%) - based on primary vowel
|
69 |
vowel1 = self._get_primary_vowel(phone_list1)
|
@@ -109,6 +176,11 @@ class ParodyWordSuggestionTool(Tool):
|
|
109 |
suggestions = []
|
110 |
valid_words = []
|
111 |
invalid_words = []
|
|
|
|
|
|
|
|
|
|
|
112 |
|
113 |
# Parse JSON string to list
|
114 |
try:
|
|
|
11 |
RHYME_WEIGHT = 0.5
|
12 |
PHONE_SEQUENCE_WEIGHT = 0.3
|
13 |
LENGTH_WEIGHT = 0.2
|
14 |
+
PHONE_GROUPS = "M,N,NG|P,B|T,D|K,G|F,V|TH,DH|S,Z|SH,ZH|L,R|W,Y|IY,IH|UW,UH|EH,AH|AO,AA|AE,AH|AY,EY|OW,UW"
|
15 |
|
16 |
def _get_word_phones(self, word, custom_phones=None):
|
17 |
"""Get phones for a word, checking custom dictionary first."""
|
|
|
25 |
|
26 |
def _get_primary_vowel(self, phones: list) -> str:
|
27 |
"""Get the primary stressed vowel from phone list."""
|
28 |
+
phone_str = ""
|
29 |
+
vowel_char = ""
|
30 |
+
|
31 |
+
for phone_str in phones:
|
32 |
+
if '1' in phone_str and any(vowel_char in phone_str for vowel_char in 'AEIOU'):
|
33 |
+
return phone_str.rstrip('012')
|
34 |
return None
|
35 |
|
36 |
|
37 |
+
def _get_phone_similarity(self, phone1: str, phone2: str) -> float:
|
38 |
+
"""Calculate similarity between two phones."""
|
39 |
+
# Initialize variables
|
40 |
+
p1 = ""
|
41 |
+
p2 = ""
|
42 |
+
group_str = ""
|
43 |
+
group = []
|
44 |
+
|
45 |
+
# Strip stress markers
|
46 |
+
p1 = phone1.rstrip('012')
|
47 |
+
p2 = phone2.rstrip('012')
|
48 |
+
|
49 |
+
# Exact match
|
50 |
+
if p1 == p2:
|
51 |
+
return 1.0
|
52 |
+
|
53 |
+
# Check similarity groups
|
54 |
+
for group_str in self.PHONE_GROUPS.split('|'):
|
55 |
+
group = group_str.split(',')
|
56 |
+
if p1 in group and p2 in group:
|
57 |
+
return 0.7
|
58 |
+
|
59 |
+
# Check broader categories
|
60 |
+
if self._get_phone_type(p1) == self._get_phone_type(p2):
|
61 |
+
return 0.3
|
62 |
+
|
63 |
+
return 0.0
|
64 |
+
|
65 |
+
|
66 |
+
def _get_phone_type(self, phone: str) -> str:
|
67 |
+
"""Get the broad category of a phone."""
|
68 |
+
# Strip stress markers
|
69 |
+
phone = phone.rstrip('012')
|
70 |
+
v = ""
|
71 |
+
|
72 |
+
# Vowels
|
73 |
+
if any(v in phone for v in 'AEIOU'):
|
74 |
+
return 'vowel'
|
75 |
+
|
76 |
+
# Initialize fixed sets for categories
|
77 |
+
nasals = {'M', 'N', 'NG'}
|
78 |
+
stops = {'P', 'B', 'T', 'D', 'K', 'G'}
|
79 |
+
fricatives = {'F', 'V', 'TH', 'DH', 'S', 'Z', 'SH', 'ZH'}
|
80 |
+
liquids = {'L', 'R'}
|
81 |
+
glides = {'W', 'Y'}
|
82 |
+
|
83 |
+
if phone in nasals:
|
84 |
+
return 'nasal'
|
85 |
+
if phone in stops:
|
86 |
+
return 'stop'
|
87 |
+
if phone in fricatives:
|
88 |
+
return 'fricative'
|
89 |
+
if phone in liquids:
|
90 |
+
return 'liquid'
|
91 |
+
if phone in glides:
|
92 |
+
return 'glide'
|
93 |
+
|
94 |
+
return 'other'
|
95 |
+
|
96 |
+
|
97 |
def _calculate_phone_sequence_similarity(self, phones1: list, phones2: list) -> float:
|
98 |
"""Calculate similarity based on matching phones in sequence."""
|
|
|
99 |
if not phones1 or not phones2:
|
100 |
return 0.0
|
101 |
|
102 |
+
# Initialize variables
|
103 |
+
total_similarity = 0.0
|
104 |
+
i = 0
|
105 |
+
similarity = 0.0
|
106 |
|
107 |
+
comparisons = max(len(phones1), len(phones2))
|
|
|
108 |
|
109 |
+
# Compare each position
|
110 |
+
for i in range(min(len(phones1), len(phones2))):
|
111 |
+
similarity = self._get_phone_similarity(phones1[i], phones2[i])
|
112 |
+
total_similarity += similarity
|
113 |
+
|
114 |
+
return total_similarity / comparisons if comparisons > 0 else 0.0
|
115 |
|
116 |
|
117 |
def _calculate_length_similarity(self, phones1: list, phones2: list) -> float:
|
|
|
123 |
|
124 |
def _calculate_similarity(self, word1, phones1, word2, phones2):
|
125 |
"""Calculate similarity based on multiple factors."""
|
126 |
+
# Initialize phone lists and scores
|
127 |
phone_list1 = phones1.split()
|
128 |
phone_list2 = phones2.split()
|
129 |
+
rhyme_score = 0.0
|
130 |
+
phone_sequence_score = 0.0
|
131 |
+
length_score = 0.0
|
132 |
+
vowel1 = None
|
133 |
+
vowel2 = None
|
134 |
|
135 |
# 1. Rhyme score (50%) - based on primary vowel
|
136 |
vowel1 = self._get_primary_vowel(phone_list1)
|
|
|
176 |
suggestions = []
|
177 |
valid_words = []
|
178 |
invalid_words = []
|
179 |
+
words = []
|
180 |
+
target_phones = ""
|
181 |
+
word_phones = ""
|
182 |
+
word = ""
|
183 |
+
similarity_result = {}
|
184 |
|
185 |
# Parse JSON string to list
|
186 |
try:
|