File size: 4,858 Bytes
d53b325
e53cc7e
6280809
f9e805b
 
d53b325
 
 
19ee3db
 
 
d53b325
 
19ee3db
 
d53b325
c0a5ec1
d53b325
f9e805b
d53b325
c0a5ec1
d53b325
 
 
aae0d96
 
 
 
 
 
 
 
c0a5ec1
d53b325
 
 
 
 
 
 
 
33a37e4
 
d53b325
c0a5ec1
 
f9e805b
d53b325
9d3290e
33a37e4
 
 
f9e805b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6280809
f9e805b
6280809
 
f9e805b
6280809
f9e805b
 
d53b325
f9e805b
 
d53b325
 
 
 
 
f9e805b
 
 
 
 
d53b325
 
 
 
 
 
 
 
33a37e4
d53b325
 
 
 
 
c0a5ec1
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
from smolagents.tools import Tool
import pronouncing
import json
import difflib
import string

class ParodyWordSuggestionTool(Tool):
    name = "parody_word_suggester"
    description = """Suggests rhyming funny words using CMU dictionary pronunciations.

    Returns similar-sounding words that rhyme, especially focusing on common vowel sounds."""
    inputs = {'target': {'type': 'string', 'description': 'The word you want to find rhyming alternatives for'}, 'word_list_str': {'type': 'string', 'description': 'JSON string of word list (e.g. \'["word1", "word2"]\')'}, 'min_similarity': {'type': 'string', 'description': 'Minimum similarity threshold (0.0-1.0)', 'nullable': True}}
    output_type = "string"

    def forward(self, target: str, word_list_str: str, min_similarity: str = "0.5") -> str:
        """Get rhyming word suggestions."""
        import pronouncing
        import string
        import json
        from difflib import SequenceMatcher
    
        target = target.lower().strip(string.punctuation)
        min_similarity = float(min_similarity)
        suggestions = []
    
        # Parse JSON string to list
        try:
            words = json.loads(word_list_str)
        except json.JSONDecodeError:
            return json.dumps({
                "error": "Invalid JSON string for word_list_str",
                "suggestions": []
            }, indent=2)
        
        # Get target pronunciation
        target_phones = pronouncing.phones_for_word(target)
        if not target_phones:
            return json.dumps({
                "error": f"'{target}' not found in CMU dictionary",
                "suggestions": []
            }, indent=2)
        
        target_phones = target_phones[0]
        target_phone_list = target_phones.split()
    
        # Check each word
        for word in words:
            word = word.lower().strip(string.punctuation)
            phones = pronouncing.phones_for_word(word)
            if phones:
                word_phones = phones[0]
                word_phone_list = word_phones.split()
            
                # 1. Rhyme score (most important - 60%)
                rhyme_score = 0
                if len(word_phone_list) > 1 and len(target_phone_list) > 1:
                    # Check if words share the same ending (vowel + final consonants)
                    vowel_plus_end = -2  # Index of the vowel in final syllable
                    while vowel_plus_end < -1:
                        if 'A' in word_phone_list[vowel_plus_end] or 'E' in word_phone_list[vowel_plus_end] or 'I' in word_phone_list[vowel_plus_end] or 'O' in word_phone_list[vowel_plus_end] or 'U' in word_phone_list[vowel_plus_end]:
                            break
                        vowel_plus_end += 1
                
                    if vowel_plus_end == -1:
                        vowel_plus_end = -2  # Fall back if no vowel found
                    
                    # Check if the ending (from vowel onwards) matches
                    if word_phone_list[vowel_plus_end:] == target_phone_list[vowel_plus_end:]:
                        rhyme_score = 1.0
            
                # 2. Syllable match (25%)
                target_syl = pronouncing.syllable_count(target_phones)
                word_syl = pronouncing.syllable_count(word_phones)
                syllable_score = 1.0 if target_syl == word_syl else 0.0
            
                # 3. Overall similarity (15%) - using string similarity
                string_similarity = SequenceMatcher(None, target, word).ratio()
            
                # Combined score (60% rhyme, 25% syllables, 15% similarity)
                similarity = (rhyme_score * 0.6) + (syllable_score * 0.25) + (string_similarity * 0.15)
            
                if similarity >= min_similarity:
                    suggestions.append({
                        "word": word,
                        "similarity": round(similarity, 3),
                        "rhyme_match": rhyme_score == 1.0,
                        "syllable_match": syllable_score == 1.0,
                        "string_similarity": round(string_similarity, 3),
                        "syllables": word_syl,
                        "phones": word_phones
                    })
    
        # Sort by similarity score descending
        suggestions.sort(key=lambda x: x["similarity"], reverse=True)
    
        result = {
            "target": target,
            "target_syllables": pronouncing.syllable_count(target_phones),
            "target_phones": target_phones,
            "suggestions": suggestions
        }
    
        return json.dumps(result, indent=2)


    def __init__(self, *args, **kwargs):
        self.is_initialized = False