patruff commited on
Commit
9d3290e
·
verified ·
1 Parent(s): 5df9aec

Upload tool

Browse files
Files changed (2) hide show
  1. requirements.txt +1 -1
  2. tool.py +37 -64
requirements.txt CHANGED
@@ -1,2 +1,2 @@
1
- pronouncing
2
  smolagents
 
 
 
1
  smolagents
2
+ pronouncing
tool.py CHANGED
@@ -1,6 +1,6 @@
1
  from smolagents.tools import Tool
2
- import json
3
  import pronouncing
 
4
  import string
5
 
6
  class ParodyWordSuggestionTool(Tool):
@@ -16,16 +16,6 @@ class ParodyWordSuggestionTool(Tool):
16
  import string
17
  import json
18
 
19
- # Define vowel sound groups (common sounds that rhyme)
20
- vowel_groups = {
21
- 'UW1': ['UW0', 'UW1', 'UW2'], # oo sounds
22
- 'UW0': ['UW0', 'UW1', 'UW2'],
23
- 'UW2': ['UW0', 'UW1', 'UW2'],
24
- 'AH1': ['AH0', 'AH1', 'AH2'], # uh sounds
25
- 'AH0': ['AH0', 'AH1', 'AH2'],
26
- 'AH2': ['AH0', 'AH1', 'AH2'],
27
- }
28
-
29
  target = target.lower().strip(string.punctuation)
30
  min_similarity = float(min_similarity)
31
  suggestions = []
@@ -50,79 +40,62 @@ class ParodyWordSuggestionTool(Tool):
50
  target_phones = target_phones[0]
51
  target_phone_list = target_phones.split()
52
 
53
- # Focus on the vowel sound and end consonant(s)
54
- target_end = []
55
- found_vowel = False
56
- for i in range(len(target_phone_list) - 1, -1, -1):
57
- current_phone = target_phone_list[i]
58
- target_end.insert(0, current_phone)
59
- # Check if current phone contains a vowel
60
- has_vowel = False
61
- for vowel in ['A', 'E', 'I', 'O', 'U']:
62
- if vowel in current_phone:
63
- has_vowel = True
64
- found_vowel = True
65
- break
66
- if found_vowel:
67
- break
68
-
69
  # Check each word
70
  for word in words:
71
  phones = pronouncing.phones_for_word(word)
72
- if phones: # Only process if word is in dictionary
73
  word_phones = phones[0]
74
  word_phone_list = word_phones.split()
75
 
76
- # Get ending pattern (vowel + following consonants)
77
- word_end = []
78
- found_vowel = False
79
- for i in range(len(word_phone_list) - 1, -1, -1):
80
- current_phone = word_phone_list[i]
81
- word_end.insert(0, current_phone)
82
- # Check if current phone contains a vowel
83
- has_vowel = False
84
- for vowel in ['A', 'E', 'I', 'O', 'U']:
85
- if vowel in current_phone:
86
- has_vowel = True
87
- found_vowel = True
88
- break
89
- if found_vowel:
90
- break
91
-
92
- # Calculate rhyme score
93
- matches = 0
94
- total_checks = max(len(word_end), len(target_end))
95
 
96
- for i in range(min(len(word_end), len(target_end))):
97
- w_phone = word_end[i]
98
- t_phone = target_end[i]
 
 
 
99
 
100
- # Check for exact match
101
  if w_phone == t_phone:
102
- matches += 1
 
 
 
 
103
  else:
104
- # Check for vowel sound
105
  has_vowel = False
106
- for vowel in ['A', 'E', 'I', 'O', 'U']:
 
107
  if vowel in w_phone:
108
  has_vowel = True
109
  break
110
-
111
- if has_vowel:
112
- # Check vowel groups
113
- is_match = False
114
- for base_vowel in vowel_groups:
115
- if w_phone in vowel_groups[base_vowel] and t_phone in vowel_groups[base_vowel]:
116
- is_match = True
117
- matches += 1
118
- break
 
 
 
119
 
120
- similarity = matches / total_checks if total_checks > 0 else 0.0
 
121
 
122
  if similarity >= min_similarity:
123
  suggestions.append({
124
  "word": word,
125
  "similarity": round(similarity, 3),
 
 
126
  "syllables": pronouncing.syllable_count(word_phones),
127
  "phones": word_phones,
128
  })
 
1
  from smolagents.tools import Tool
 
2
  import pronouncing
3
+ import json
4
  import string
5
 
6
  class ParodyWordSuggestionTool(Tool):
 
16
  import string
17
  import json
18
 
 
 
 
 
 
 
 
 
 
 
19
  target = target.lower().strip(string.punctuation)
20
  min_similarity = float(min_similarity)
21
  suggestions = []
 
40
  target_phones = target_phones[0]
41
  target_phone_list = target_phones.split()
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  # Check each word
44
  for word in words:
45
  phones = pronouncing.phones_for_word(word)
46
+ if phones:
47
  word_phones = phones[0]
48
  word_phone_list = word_phones.split()
49
 
50
+ # Calculate full phonetic similarity
51
+ phonetic_matches = 0
52
+ max_length = max(len(word_phone_list), len(target_phone_list))
53
+ min_length = min(len(word_phone_list), len(target_phone_list))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
+ for i in range(max_length):
56
+ if i >= min_length:
57
+ break
58
+
59
+ w_phone = word_phone_list[i]
60
+ t_phone = target_phone_list[i]
61
 
62
+ # Exact phone match
63
  if w_phone == t_phone:
64
+ phonetic_matches += 1.0
65
+ # Match without stress numbers
66
+ elif w_phone.rstrip('012') == t_phone.rstrip('012'):
67
+ phonetic_matches += 0.8
68
+ # Consonant match at start
69
  else:
70
+ # Check if current phone is a consonant
71
  has_vowel = False
72
+ vowels = ['A', 'E', 'I', 'O', 'U']
73
+ for vowel in vowels:
74
  if vowel in w_phone:
75
  has_vowel = True
76
  break
77
+
78
+ if i == 0 and not has_vowel:
79
+ if w_phone.rstrip('012') == t_phone.rstrip('012'):
80
+ phonetic_matches += 0.5
81
+
82
+ phonetic_similarity = phonetic_matches / max_length
83
+
84
+ # Calculate rhyme similarity (focusing on end phones)
85
+ rhyme_score = 1.0 if word_phone_list[-1] == target_phone_list[-1] else 0.0
86
+ if len(word_phone_list) > 1 and len(target_phone_list) > 1:
87
+ if word_phone_list[-2] == target_phone_list[-2]:
88
+ rhyme_score += 1.0
89
 
90
+ # Combined score (weighing both phonetic similarity and rhyming)
91
+ similarity = (phonetic_similarity * 0.6) + (rhyme_score * 0.4)
92
 
93
  if similarity >= min_similarity:
94
  suggestions.append({
95
  "word": word,
96
  "similarity": round(similarity, 3),
97
+ "phonetic_similarity": round(phonetic_similarity, 3),
98
+ "rhyme_score": round(rhyme_score, 3),
99
  "syllables": pronouncing.syllable_count(word_phones),
100
  "phones": word_phones,
101
  })