patruff commited on
Commit
f9e805b
·
verified ·
1 Parent(s): 5c405fc

Upload tool

Browse files
Files changed (2) hide show
  1. requirements.txt +1 -1
  2. tool.py +31 -45
requirements.txt CHANGED
@@ -1,2 +1,2 @@
1
- smolagents
2
  pronouncing
 
 
 
1
  pronouncing
2
+ smolagents
tool.py CHANGED
@@ -1,7 +1,8 @@
1
  from smolagents.tools import Tool
2
  import pronouncing
3
- import string
4
  import json
 
 
5
 
6
  class ParodyWordSuggestionTool(Tool):
7
  name = "parody_word_suggester"
@@ -15,6 +16,7 @@ class ParodyWordSuggestionTool(Tool):
15
  import pronouncing
16
  import string
17
  import json
 
18
 
19
  target = target.lower().strip(string.punctuation)
20
  min_similarity = float(min_similarity)
@@ -42,65 +44,49 @@ class ParodyWordSuggestionTool(Tool):
42
 
43
  # Check each word
44
  for word in words:
 
45
  phones = pronouncing.phones_for_word(word)
46
  if phones:
47
  word_phones = phones[0]
48
  word_phone_list = word_phones.split()
49
 
50
- # Calculate full phonetic similarity
51
- phonetic_matches = 0
52
- max_length = max(len(word_phone_list), len(target_phone_list))
53
- min_length = min(len(word_phone_list), len(target_phone_list))
54
-
55
- # Initial consonant bonus
56
- initial_match_score = 0
57
- if word_phone_list[0].rstrip('012') == target_phone_list[0].rstrip('012'):
58
- initial_match_score = 1.0 # Reduced bonus for matching first consonant
 
 
 
 
 
 
 
59
 
60
- # Check if syllable counts match
61
  target_syl = pronouncing.syllable_count(target_phones)
62
  word_syl = pronouncing.syllable_count(word_phones)
63
- syllable_match = 1.0 if target_syl == word_syl else 0.0
64
 
65
- for i in range(max_length):
66
- if i >= min_length:
67
- break
68
-
69
- w_phone = word_phone_list[i]
70
- t_phone = target_phone_list[i]
71
-
72
- # Exact phone match
73
- if w_phone == t_phone:
74
- phonetic_matches += 1.0
75
- # Match without stress numbers
76
- elif w_phone.rstrip('012') == t_phone.rstrip('012'):
77
- phonetic_matches += 0.8
78
-
79
- phonetic_similarity = (phonetic_matches / max_length) + initial_match_score
80
-
81
- # Rhyme score (focusing on vowel and end consonant)
82
- rhyme_score = 0
83
- if len(word_phone_list) > 1 and len(target_phone_list) > 1:
84
- # Check final syllable (vowel + final consonant)
85
- if word_phone_list[-1] == target_phone_list[-1]: # End consonant match
86
- rhyme_score += 1.0
87
- if word_phone_list[-2] == target_phone_list[-2]: # Vowel match
88
- rhyme_score += 2.0 # Higher weight for vowel match
89
 
90
- # Combined score with new weights prioritizing rhyme
91
- # 50% rhyme, 30% syllable match, 20% phonetic similarity
92
- similarity = (rhyme_score * 0.5) + (syllable_match * 0.3) + (phonetic_similarity * 0.2)
93
 
94
  if similarity >= min_similarity:
95
  suggestions.append({
96
  "word": word,
97
  "similarity": round(similarity, 3),
98
- "phonetic_similarity": round(phonetic_similarity, 3),
99
- "rhyme_score": round(rhyme_score, 3),
100
- "syllable_match": syllable_match,
101
- "initial_match": initial_match_score > 0,
102
- "syllables": pronouncing.syllable_count(word_phones),
103
- "phones": word_phones,
104
  })
105
 
106
  # Sort by similarity score descending
 
1
  from smolagents.tools import Tool
2
  import pronouncing
 
3
  import json
4
+ import difflib
5
+ import string
6
 
7
  class ParodyWordSuggestionTool(Tool):
8
  name = "parody_word_suggester"
 
16
  import pronouncing
17
  import string
18
  import json
19
+ from difflib import SequenceMatcher
20
 
21
  target = target.lower().strip(string.punctuation)
22
  min_similarity = float(min_similarity)
 
44
 
45
  # Check each word
46
  for word in words:
47
+ word = word.lower().strip(string.punctuation)
48
  phones = pronouncing.phones_for_word(word)
49
  if phones:
50
  word_phones = phones[0]
51
  word_phone_list = word_phones.split()
52
 
53
+ # 1. Rhyme score (most important - 60%)
54
+ rhyme_score = 0
55
+ if len(word_phone_list) > 1 and len(target_phone_list) > 1:
56
+ # Check if words share the same ending (vowel + final consonants)
57
+ vowel_plus_end = -2 # Index of the vowel in final syllable
58
+ while vowel_plus_end < -1:
59
+ if 'A' in word_phone_list[vowel_plus_end] or 'E' in word_phone_list[vowel_plus_end] or 'I' in word_phone_list[vowel_plus_end] or 'O' in word_phone_list[vowel_plus_end] or 'U' in word_phone_list[vowel_plus_end]:
60
+ break
61
+ vowel_plus_end += 1
62
+
63
+ if vowel_plus_end == -1:
64
+ vowel_plus_end = -2 # Fall back if no vowel found
65
+
66
+ # Check if the ending (from vowel onwards) matches
67
+ if word_phone_list[vowel_plus_end:] == target_phone_list[vowel_plus_end:]:
68
+ rhyme_score = 1.0
69
 
70
+ # 2. Syllable match (25%)
71
  target_syl = pronouncing.syllable_count(target_phones)
72
  word_syl = pronouncing.syllable_count(word_phones)
73
+ syllable_score = 1.0 if target_syl == word_syl else 0.0
74
 
75
+ # 3. Overall similarity (15%) - using string similarity
76
+ string_similarity = SequenceMatcher(None, target, word).ratio()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
 
78
+ # Combined score (60% rhyme, 25% syllables, 15% similarity)
79
+ similarity = (rhyme_score * 0.6) + (syllable_score * 0.25) + (string_similarity * 0.15)
 
80
 
81
  if similarity >= min_similarity:
82
  suggestions.append({
83
  "word": word,
84
  "similarity": round(similarity, 3),
85
+ "rhyme_match": rhyme_score == 1.0,
86
+ "syllable_match": syllable_score == 1.0,
87
+ "string_similarity": round(string_similarity, 3),
88
+ "syllables": word_syl,
89
+ "phones": word_phones
 
90
  })
91
 
92
  # Sort by similarity score descending