File size: 5,457 Bytes
d53b325
e53cc7e
33a37e4
6280809
d53b325
 
 
19ee3db
 
 
d53b325
 
19ee3db
 
d53b325
c0a5ec1
d53b325
 
c0a5ec1
d53b325
 
 
aae0d96
 
 
 
 
 
 
 
c0a5ec1
d53b325
 
 
 
 
 
 
 
33a37e4
 
d53b325
c0a5ec1
 
d53b325
9d3290e
33a37e4
 
 
9d3290e
 
 
 
33a37e4
6280809
 
 
 
 
 
 
 
 
 
9d3290e
 
 
 
 
 
19ee3db
9d3290e
19ee3db
9d3290e
 
 
 
 
6280809
9d3290e
6280809
 
9d3290e
6280809
 
9d3290e
6280809
 
d53b325
6280809
 
 
d53b325
 
 
 
 
9d3290e
 
6280809
 
33a37e4
 
d53b325
 
 
 
 
 
 
 
33a37e4
d53b325
 
 
 
 
c0a5ec1
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
from smolagents.tools import Tool
import pronouncing
import string
import json

class ParodyWordSuggestionTool(Tool):
    name = "parody_word_suggester"
    description = """Suggests rhyming funny words using CMU dictionary pronunciations.

    Returns similar-sounding words that rhyme, especially focusing on common vowel sounds."""
    inputs = {'target': {'type': 'string', 'description': 'The word you want to find rhyming alternatives for'}, 'word_list_str': {'type': 'string', 'description': 'JSON string of word list (e.g. \'["word1", "word2"]\')'}, 'min_similarity': {'type': 'string', 'description': 'Minimum similarity threshold (0.0-1.0)', 'nullable': True}}
    output_type = "string"

    def forward(self, target: str, word_list_str: str, min_similarity: str = "0.5") -> str:
        """Get rhyming word suggestions."""
        import pronouncing
        import string
        import json
    
        target = target.lower().strip(string.punctuation)
        min_similarity = float(min_similarity)
        suggestions = []
    
        # Parse JSON string to list
        try:
            words = json.loads(word_list_str)
        except json.JSONDecodeError:
            return json.dumps({
                "error": "Invalid JSON string for word_list_str",
                "suggestions": []
            }, indent=2)
        
        # Get target pronunciation
        target_phones = pronouncing.phones_for_word(target)
        if not target_phones:
            return json.dumps({
                "error": f"'{target}' not found in CMU dictionary",
                "suggestions": []
            }, indent=2)
        
        target_phones = target_phones[0]
        target_phone_list = target_phones.split()
    
        # Check each word
        for word in words:
            phones = pronouncing.phones_for_word(word)
            if phones:
                word_phones = phones[0]
                word_phone_list = word_phones.split()
            
                # Calculate full phonetic similarity
                phonetic_matches = 0
                max_length = max(len(word_phone_list), len(target_phone_list))
                min_length = min(len(word_phone_list), len(target_phone_list))
            
                # Initial consonant bonus
                initial_match_score = 0
                if word_phone_list[0].rstrip('012') == target_phone_list[0].rstrip('012'):
                    initial_match_score = 1.0  # Reduced bonus for matching first consonant
            
                # Check if syllable counts match
                target_syl = pronouncing.syllable_count(target_phones)
                word_syl = pronouncing.syllable_count(word_phones)
                syllable_match = 1.0 if target_syl == word_syl else 0.0
            
                for i in range(max_length):
                    if i >= min_length:
                        break
                    
                    w_phone = word_phone_list[i]
                    t_phone = target_phone_list[i]
                
                    # Exact phone match
                    if w_phone == t_phone:
                        phonetic_matches += 1.0
                    # Match without stress numbers
                    elif w_phone.rstrip('012') == t_phone.rstrip('012'):
                        phonetic_matches += 0.8
            
                phonetic_similarity = (phonetic_matches / max_length) + initial_match_score
            
                # Rhyme score (focusing on vowel and end consonant)
                rhyme_score = 0
                if len(word_phone_list) > 1 and len(target_phone_list) > 1:
                    # Check final syllable (vowel + final consonant)
                    if word_phone_list[-1] == target_phone_list[-1]:  # End consonant match
                        rhyme_score += 1.0
                    if word_phone_list[-2] == target_phone_list[-2]:  # Vowel match
                        rhyme_score += 2.0  # Higher weight for vowel match
            
                # Combined score with new weights prioritizing rhyme
                # 50% rhyme, 30% syllable match, 20% phonetic similarity
                similarity = (rhyme_score * 0.5) + (syllable_match * 0.3) + (phonetic_similarity * 0.2)
            
                if similarity >= min_similarity:
                    suggestions.append({
                        "word": word,
                        "similarity": round(similarity, 3),
                        "phonetic_similarity": round(phonetic_similarity, 3),
                        "rhyme_score": round(rhyme_score, 3),
                        "syllable_match": syllable_match,
                        "initial_match": initial_match_score > 0,
                        "syllables": pronouncing.syllable_count(word_phones),
                        "phones": word_phones,
                    })
    
        # Sort by similarity score descending
        suggestions.sort(key=lambda x: x["similarity"], reverse=True)
    
        result = {
            "target": target,
            "target_syllables": pronouncing.syllable_count(target_phones),
            "target_phones": target_phones,
            "suggestions": suggestions
        }
    
        return json.dumps(result, indent=2)


    def __init__(self, *args, **kwargs):
        self.is_initialized = False