patruff commited on
Commit
ee2b8c1
·
verified ·
1 Parent(s): fc052dd

Upload tool

Browse files
Files changed (2) hide show
  1. requirements.txt +1 -1
  2. tool.py +89 -17
requirements.txt CHANGED
@@ -1,2 +1,2 @@
1
- smolagents
2
  pronouncing
 
 
 
1
  pronouncing
2
+ smolagents
tool.py CHANGED
@@ -11,6 +11,7 @@ class ParodyWordSuggestionTool(Tool):
11
  RHYME_WEIGHT = 0.5
12
  PHONE_SEQUENCE_WEIGHT = 0.3
13
  LENGTH_WEIGHT = 0.2
 
14
 
15
  def _get_word_phones(self, word, custom_phones=None):
16
  """Get phones for a word, checking custom dictionary first."""
@@ -24,32 +25,93 @@ class ParodyWordSuggestionTool(Tool):
24
 
25
  def _get_primary_vowel(self, phones: list) -> str:
26
  """Get the primary stressed vowel from phone list."""
27
- v = ""
28
- for phone in phones:
29
- if '1' in phone and any(v in phone for v in 'AEIOU'):
30
- return phone.rstrip('012')
 
 
31
  return None
32
 
33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  def _calculate_phone_sequence_similarity(self, phones1: list, phones2: list) -> float:
35
  """Calculate similarity based on matching phones in sequence."""
36
- p = ""
37
  if not phones1 or not phones2:
38
  return 0.0
39
 
40
- # Strip stress markers for comparison
41
- clean_phones1 = [p.rstrip('012') for p in phones1]
42
- clean_phones2 = [p.rstrip('012') for p in phones2]
 
43
 
44
- matches = 0
45
- total_comparisons = max(len(clean_phones1), len(clean_phones2))
46
 
47
- # Compare phones in sequence
48
- for i in range(min(len(clean_phones1), len(clean_phones2))):
49
- if clean_phones1[i] == clean_phones2[i]:
50
- matches += 1
51
-
52
- return matches / total_comparisons if total_comparisons > 0 else 0.0
53
 
54
 
55
  def _calculate_length_similarity(self, phones1: list, phones2: list) -> float:
@@ -61,9 +123,14 @@ class ParodyWordSuggestionTool(Tool):
61
 
62
  def _calculate_similarity(self, word1, phones1, word2, phones2):
63
  """Calculate similarity based on multiple factors."""
64
- # Initialize phone lists
65
  phone_list1 = phones1.split()
66
  phone_list2 = phones2.split()
 
 
 
 
 
67
 
68
  # 1. Rhyme score (50%) - based on primary vowel
69
  vowel1 = self._get_primary_vowel(phone_list1)
@@ -109,6 +176,11 @@ class ParodyWordSuggestionTool(Tool):
109
  suggestions = []
110
  valid_words = []
111
  invalid_words = []
 
 
 
 
 
112
 
113
  # Parse JSON string to list
114
  try:
 
11
  RHYME_WEIGHT = 0.5
12
  PHONE_SEQUENCE_WEIGHT = 0.3
13
  LENGTH_WEIGHT = 0.2
14
+ PHONE_GROUPS = "M,N,NG|P,B|T,D|K,G|F,V|TH,DH|S,Z|SH,ZH|L,R|W,Y|IY,IH|UW,UH|EH,AH|AO,AA|AE,AH|AY,EY|OW,UW"
15
 
16
  def _get_word_phones(self, word, custom_phones=None):
17
  """Get phones for a word, checking custom dictionary first."""
 
25
 
26
  def _get_primary_vowel(self, phones: list) -> str:
27
  """Get the primary stressed vowel from phone list."""
28
+ phone_str = ""
29
+ vowel_char = ""
30
+
31
+ for phone_str in phones:
32
+ if '1' in phone_str and any(vowel_char in phone_str for vowel_char in 'AEIOU'):
33
+ return phone_str.rstrip('012')
34
  return None
35
 
36
 
37
+ def _get_phone_similarity(self, phone1: str, phone2: str) -> float:
38
+ """Calculate similarity between two phones."""
39
+ # Initialize variables
40
+ p1 = ""
41
+ p2 = ""
42
+ group_str = ""
43
+ group = []
44
+
45
+ # Strip stress markers
46
+ p1 = phone1.rstrip('012')
47
+ p2 = phone2.rstrip('012')
48
+
49
+ # Exact match
50
+ if p1 == p2:
51
+ return 1.0
52
+
53
+ # Check similarity groups
54
+ for group_str in self.PHONE_GROUPS.split('|'):
55
+ group = group_str.split(',')
56
+ if p1 in group and p2 in group:
57
+ return 0.7
58
+
59
+ # Check broader categories
60
+ if self._get_phone_type(p1) == self._get_phone_type(p2):
61
+ return 0.3
62
+
63
+ return 0.0
64
+
65
+
66
+ def _get_phone_type(self, phone: str) -> str:
67
+ """Get the broad category of a phone."""
68
+ # Strip stress markers
69
+ phone = phone.rstrip('012')
70
+ v = ""
71
+
72
+ # Vowels
73
+ if any(v in phone for v in 'AEIOU'):
74
+ return 'vowel'
75
+
76
+ # Initialize fixed sets for categories
77
+ nasals = {'M', 'N', 'NG'}
78
+ stops = {'P', 'B', 'T', 'D', 'K', 'G'}
79
+ fricatives = {'F', 'V', 'TH', 'DH', 'S', 'Z', 'SH', 'ZH'}
80
+ liquids = {'L', 'R'}
81
+ glides = {'W', 'Y'}
82
+
83
+ if phone in nasals:
84
+ return 'nasal'
85
+ if phone in stops:
86
+ return 'stop'
87
+ if phone in fricatives:
88
+ return 'fricative'
89
+ if phone in liquids:
90
+ return 'liquid'
91
+ if phone in glides:
92
+ return 'glide'
93
+
94
+ return 'other'
95
+
96
+
97
  def _calculate_phone_sequence_similarity(self, phones1: list, phones2: list) -> float:
98
  """Calculate similarity based on matching phones in sequence."""
 
99
  if not phones1 or not phones2:
100
  return 0.0
101
 
102
+ # Initialize variables
103
+ total_similarity = 0.0
104
+ i = 0
105
+ similarity = 0.0
106
 
107
+ comparisons = max(len(phones1), len(phones2))
 
108
 
109
+ # Compare each position
110
+ for i in range(min(len(phones1), len(phones2))):
111
+ similarity = self._get_phone_similarity(phones1[i], phones2[i])
112
+ total_similarity += similarity
113
+
114
+ return total_similarity / comparisons if comparisons > 0 else 0.0
115
 
116
 
117
  def _calculate_length_similarity(self, phones1: list, phones2: list) -> float:
 
123
 
124
  def _calculate_similarity(self, word1, phones1, word2, phones2):
125
  """Calculate similarity based on multiple factors."""
126
+ # Initialize phone lists and scores
127
  phone_list1 = phones1.split()
128
  phone_list2 = phones2.split()
129
+ rhyme_score = 0.0
130
+ phone_sequence_score = 0.0
131
+ length_score = 0.0
132
+ vowel1 = None
133
+ vowel2 = None
134
 
135
  # 1. Rhyme score (50%) - based on primary vowel
136
  vowel1 = self._get_primary_vowel(phone_list1)
 
176
  suggestions = []
177
  valid_words = []
178
  invalid_words = []
179
+ words = []
180
+ target_phones = ""
181
+ word_phones = ""
182
+ word = ""
183
+ similarity_result = {}
184
 
185
  # Parse JSON string to list
186
  try: