patruff commited on
Commit
b1c9207
·
verified ·
1 Parent(s): 24809f4

Upload tool

Browse files
Files changed (1) hide show
  1. tool.py +170 -17
tool.py CHANGED
@@ -1,38 +1,191 @@
1
  from smolagents.tools import Tool
 
2
  import pronouncing
3
  import string
4
- import json
5
 
6
  class WordPhoneTool(Tool):
7
  name = "word_phonetic_analyzer"
8
- description = "Analyzes the pronunciation of a word using the CMU dictionary to get its phonemes, syllable count and stress pattern"
9
- inputs = {'word': {'type': 'string', 'description': 'The word to analyze for pronunciation patterns'}}
 
10
  output_type = "string"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
- def forward(self, word: str) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  import pronouncing
14
- import string
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  import json
 
 
16
 
17
- word = word.lower().strip(string.punctuation)
18
- phones = pronouncing.phones_for_word(word)
 
19
 
20
  if not phones:
21
  result = {
22
- 'word': word,
23
  'found': False,
24
  'error': 'Word not found in dictionary'
25
  }
26
- else:
27
- primary_phones = phones[0]
28
- result = {
29
- 'word': word,
30
- 'found': True,
31
- 'syllable_count': pronouncing.syllable_count(primary_phones),
32
- 'phones': primary_phones.split(),
33
- 'stresses': pronouncing.stresses(primary_phones)
34
- }
 
 
 
 
 
 
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  return json.dumps(result, indent=2)
37
 
38
 
 
1
  from smolagents.tools import Tool
2
+ import json
3
  import pronouncing
4
  import string
5
+ import difflib
6
 
7
  class WordPhoneTool(Tool):
8
  name = "word_phonetic_analyzer"
9
+ description = """Analyzes word pronunciation using CMU dictionary to get phonemes, syllables, and stress patterns.
10
+ Can also compare two words for phonetic similarity."""
11
+ inputs = {'word': {'type': 'string', 'description': 'Primary word to analyze for pronunciation patterns'}, 'compare_to': {'type': 'string', 'description': 'Optional word to compare against for similarity scoring', 'nullable': True}}
12
  output_type = "string"
13
+ VOWEL_REF = "AH,UH,AX|AE,EH|IY,IH|AO,AA|UW,UH|AY,EY|OW,AO|AW,AO|OY,OW|ER,AXR"
14
+
15
+ def _get_vowel_groups(self):
16
+ groups = []
17
+ group_strs = self.VOWEL_REF.split("|")
18
+ for group_str in group_strs:
19
+ groups.append(group_str.split(","))
20
+ return groups
21
+
22
+
23
+ def _get_last_syllable(self, phones):
24
+ last_vowel_idx = -1
25
+ last_vowel = None
26
+ vowel_groups = self._get_vowel_groups()
27
+
28
+ for i in range(len(phones)):
29
+ phone = phones[i]
30
+ base_phone = ""
31
+ for j in range(len(phone)):
32
+ if phone[j] not in "012":
33
+ base_phone += phone[j]
34
+
35
+ for group in vowel_groups:
36
+ if base_phone in group:
37
+ last_vowel_idx = i
38
+ last_vowel = base_phone
39
+ break
40
+
41
+ if last_vowel_idx == -1:
42
+ return None, []
43
+
44
+ remaining = []
45
+ for i in range(last_vowel_idx + 1, len(phones)):
46
+ remaining.append(phones[i])
47
+
48
+ return last_vowel, remaining
49
 
50
+
51
+ def _strip_stress(self, phones):
52
+ result = []
53
+ for phone in phones:
54
+ stripped = ""
55
+ for char in phone:
56
+ if char not in "012":
57
+ stripped += char
58
+ result.append(stripped)
59
+ return result
60
+
61
+
62
+ def _vowels_match(self, v1, v2):
63
+ v1_stripped = ""
64
+ v2_stripped = ""
65
+
66
+ for char in v1:
67
+ if char not in "012":
68
+ v1_stripped += char
69
+
70
+ for char in v2:
71
+ if char not in "012":
72
+ v2_stripped += char
73
+
74
+ if v1_stripped == v2_stripped:
75
+ return True
76
+
77
+ vowel_groups = self._get_vowel_groups()
78
+ for group in vowel_groups:
79
+ if v1_stripped in group and v2_stripped in group:
80
+ return True
81
+ return False
82
+
83
+
84
+ def _calculate_similarity(self, word1, phones1, word2, phones2):
85
  import pronouncing
86
+ from difflib import SequenceMatcher
87
+
88
+ phone_list1 = phones1.split()
89
+ phone_list2 = phones2.split()
90
+
91
+ result1 = self._get_last_syllable(phone_list1)
92
+ result2 = self._get_last_syllable(phone_list2)
93
+
94
+ last_vowel1 = result1[0]
95
+ word1_end = result1[1]
96
+ last_vowel2 = result2[0]
97
+ word2_end = result2[1]
98
+
99
+ rhyme_score = 0.0
100
+ syllable_score = 0.0
101
+ string_similarity = 0.0
102
+
103
+ if last_vowel1 and last_vowel2:
104
+ if self._vowels_match(last_vowel1, last_vowel2):
105
+ word1_end_clean = self._strip_stress(word1_end)
106
+ word2_end_clean = self._strip_stress(word2_end)
107
+
108
+ if word1_end_clean == word2_end_clean:
109
+ rhyme_score = 1.0
110
+ if len(word1) == len(word2):
111
+ if word1[1:] == word2[1:]:
112
+ rhyme_score = 1.2
113
+ else:
114
+ rhyme_score = 0.6
115
+
116
+ syl1 = pronouncing.syllable_count(phones1)
117
+ syl2 = pronouncing.syllable_count(phones2)
118
+ if syl1 == syl2:
119
+ syllable_score = 1.0
120
+
121
+ matcher = SequenceMatcher(None)
122
+ if len(word1) > 1 and len(word2) > 1:
123
+ matcher.set_seqs(word1[1:], word2[1:])
124
+ string_similarity = matcher.ratio()
125
+ else:
126
+ matcher.set_seqs(word1, word2)
127
+ string_similarity = matcher.ratio()
128
+
129
+ total_similarity = (rhyme_score * 0.6) + (syllable_score * 0.25) + (string_similarity * 0.15)
130
+
131
+ return {
132
+ "similarity": round(total_similarity, 3),
133
+ "rhyme_score": round(rhyme_score, 3),
134
+ "syllable_match": syllable_score == 1.0,
135
+ "string_similarity": round(string_similarity, 3)
136
+ }
137
+
138
+
139
+ def forward(self, word, compare_to=None):
140
  import json
141
+ import string
142
+ import pronouncing
143
 
144
+ word_clean = word.lower()
145
+ word_clean = word_clean.strip(string.punctuation)
146
+ phones = pronouncing.phones_for_word(word_clean)
147
 
148
  if not phones:
149
  result = {
150
+ 'word': word_clean,
151
  'found': False,
152
  'error': 'Word not found in dictionary'
153
  }
154
+ return json.dumps(result, indent=2)
155
+
156
+ primary_phones = phones[0]
157
+ result = {
158
+ 'word': word_clean,
159
+ 'found': True,
160
+ 'syllable_count': pronouncing.syllable_count(primary_phones),
161
+ 'phones': primary_phones.split(),
162
+ 'stresses': pronouncing.stresses(primary_phones)
163
+ }
164
+
165
+ if compare_to:
166
+ compare_clean = compare_to.lower()
167
+ compare_clean = compare_clean.strip(string.punctuation)
168
+ compare_phones = pronouncing.phones_for_word(compare_clean)
169
 
170
+ if not compare_phones:
171
+ result['comparison'] = {
172
+ 'error': f'Comparison word "{compare_clean}" not found in dictionary'
173
+ }
174
+ else:
175
+ compare_primary = compare_phones[0]
176
+ result['comparison'] = {
177
+ 'word': compare_clean,
178
+ 'syllable_count': pronouncing.syllable_count(compare_primary),
179
+ 'phones': compare_primary.split(),
180
+ 'stresses': pronouncing.stresses(compare_primary)
181
+ }
182
+
183
+ similarity_result = self._calculate_similarity(
184
+ word_clean, primary_phones,
185
+ compare_clean, compare_primary
186
+ )
187
+ result['similarity'] = similarity_result
188
+
189
  return json.dumps(result, indent=2)
190
 
191