Spaces:

patruff
/

parody-suggestions

Configuration error

File size: 7,310 Bytes

d53b325
5da935d
 
2898d37
d53b325
 
 
9bc766a
fd49873
d53b325
2898d37
 
 
91c8f98
fd49873
 
 
 
 
 
 
 
 
 
9bc766a
 
2898d37
 
 
 
9bc766a
0111e58
 
2898d37
 
 
 
9bc766a
 
2898d37
 
 
9bc766a
 
2898d37
9bc766a
2898d37
 
 
9bc766a
2898d37
 
 
 
 
 
 
 
 
0111e58
 
6e8a124
9bc766a
 
91c8f98
 
 
2898d37
9bc766a
 
2898d37
6e8a124
2898d37
 
6e8a124
2898d37
 
6e8a124
9bc766a
 
 
2898d37
 
9bc766a
6e8a124
91c8f98
 
6e8a124
2898d37
 
6e8a124
9bc766a
 
 
 
2898d37
6e8a124
91c8f98
 
 
fd49873
d53b325
c0a5ec1
d53b325
 
9bc766a
c0a5ec1
d53b325
 
91c8f98
 
d53b325
aae0d96
 
 
 
 
 
 
 
c0a5ec1
9bc766a
fd49873
d53b325
 
fd49873
3910298
 
 
9bc766a
3910298
 
fd49873
3910298
 
 
 
 
 
fd49873
3910298
d53b325
 
e13fea5
c0a5ec1
3910298
fd49873
 
91c8f98
d53b325
91c8f98
d53b325
 
91c8f98
6e8a124
2898d37
 
e13fea5
fd49873
6e8a124
d53b325
 
 
 
 
 
 
33a37e4
91c8f98
d53b325
 
 
 
 
98f1f92

from smolagents.tools import Tool
import pronouncing
import json
import string

class ParodyWordSuggestionTool(Tool):
    name = "parody_word_suggester"
    description = "Suggests rhyming funny words using CMU dictionary pronunciations."
    inputs = {'target': {'type': 'string', 'description': 'The word you want to find rhyming alternatives for'}, 'word_list_str': {'type': 'string', 'description': 'JSON string of word list (e.g. \'["word1", "word2"]\')'}, 'min_similarity': {'type': 'string', 'description': 'Minimum similarity threshold (0.0-1.0)', 'nullable': True, 'default': '0.5'}, 'custom_phones': {'type': 'object', 'description': 'Optional dictionary of custom word pronunciations', 'nullable': True, 'default': None}}
    output_type = "string"
    RHYME_WEIGHT = 0.5
    PHONE_SEQUENCE_WEIGHT = 0.3
    LENGTH_WEIGHT = 0.2

    def _get_word_phones(self, word, custom_phones=None):
        """Get phones for a word, checking custom dictionary first."""
        if custom_phones and word in custom_phones:
            return custom_phones[word]["primary_phones"]
        
        import pronouncing
        phones = pronouncing.phones_for_word(word)
        return phones[0] if phones else None


    def _get_primary_vowel(self, phones: list) -> str:
        """Get the primary stressed vowel from phone list."""
        v = ""
        for phone in phones:
            if '1' in phone and any(v in phone for v in 'AEIOU'):
                return phone.rstrip('012')
        return None


    def _calculate_phone_sequence_similarity(self, phones1: list, phones2: list) -> float:
        """Calculate similarity based on matching phones in sequence."""
        p = ""
        if not phones1 or not phones2:
            return 0.0
    
        # Strip stress markers for comparison
        clean_phones1 = [p.rstrip('012') for p in phones1]
        clean_phones2 = [p.rstrip('012') for p in phones2]
    
        matches = 0
        total_comparisons = max(len(clean_phones1), len(clean_phones2))
    
        # Compare phones in sequence
        for i in range(min(len(clean_phones1), len(clean_phones2))):
            if clean_phones1[i] == clean_phones2[i]:
                matches += 1
            
        return matches / total_comparisons if total_comparisons > 0 else 0.0


    def _calculate_length_similarity(self, phones1: list, phones2: list) -> float:
        """Calculate similarity based on phone length."""
        max_length = max(len(phones1), len(phones2))
        length_diff = abs(len(phones1) - len(phones2))
        return 1.0 - (length_diff / max_length) if max_length > 0 else 0.0


    def _calculate_similarity(self, word1, phones1, word2, phones2):
        """Calculate similarity based on multiple factors."""
        # Initialize phone lists
        phone_list1 = phones1.split()
        phone_list2 = phones2.split()
    
        # 1. Rhyme score (50%) - based on primary vowel
        vowel1 = self._get_primary_vowel(phone_list1)
        vowel2 = self._get_primary_vowel(phone_list2)
        rhyme_score = 1.0 if vowel1 and vowel2 and vowel1 == vowel2 else 0.0
    
        # 2. Phone sequence similarity (30%)
        phone_sequence_score = self._calculate_phone_sequence_similarity(phone_list1, phone_list2)
    
        # 3. Length similarity (20%)
        length_score = self._calculate_length_similarity(phone_list1, phone_list2)
    
        # Combined weighted score
        similarity = (
            (rhyme_score * self.RHYME_WEIGHT) +
            (phone_sequence_score * self.PHONE_SEQUENCE_WEIGHT) +
            (length_score * self.LENGTH_WEIGHT)
        )
    
        return {
            "similarity": round(similarity, 3),
            "rhyme_score": round(rhyme_score, 3),
            "phone_sequence_score": round(phone_sequence_score, 3),
            "length_score": round(length_score, 3),
            "details": {
                "primary_vowel1": vowel1,
                "primary_vowel2": vowel2,
                "phone_count1": len(phone_list1),
                "phone_count2": len(phone_list2),
                "matching_phones": round(phone_sequence_score * len(phone_list1))
            }
        }


    def forward(self, target: str, word_list_str: str, min_similarity: str = "0.5", custom_phones: dict = None) -> str:
        import pronouncing
        import string
        import json
    
        # Initialize variables
        target = target.lower().strip(string.punctuation)
        min_similarity = float(min_similarity)
        suggestions = []
        valid_words = []
        invalid_words = []
    
        # Parse JSON string to list
        try:
            words = json.loads(word_list_str)
        except json.JSONDecodeError:
            return json.dumps({
                "error": "Invalid JSON string for word_list_str",
                "suggestions": []
            }, indent=2)
        
        # Get target pronunciation
        target_phones = self._get_word_phones(target, custom_phones)
        if not target_phones:
            return json.dumps({
                "error": f"Target word '{target}' not found in dictionary or custom phones",
                "suggestions": []
            }, indent=2)
    
        # Filter word list
        for word in words:
            word = word.lower().strip(string.punctuation)
            if self._get_word_phones(word, custom_phones):
                valid_words.append(word)
            else:
                invalid_words.append(word)
    
        if not valid_words:
            return json.dumps({
                "error": "No valid words found in dictionary or custom phones",
                "invalid_words": invalid_words,
                "suggestions": []
            }, indent=2)
    
        # Check each word
        for word in valid_words:
            word_phones = self._get_word_phones(word, custom_phones)
            if word_phones:
                similarity_result = self._calculate_similarity(word, word_phones, target, target_phones)
            
                if similarity_result["similarity"] >= min_similarity:
                    suggestions.append({
                        "word": word,
                        "similarity": similarity_result["similarity"],
                        "rhyme_score": similarity_result["rhyme_score"],
                        "phone_sequence_score": similarity_result["phone_sequence_score"],
                        "length_score": similarity_result["length_score"],
                        "phones": word_phones,
                        "is_custom": word in custom_phones if custom_phones else False,
                        "details": similarity_result["details"]
                    })
    
        # Sort by similarity score descending
        suggestions.sort(key=lambda x: x["similarity"], reverse=True)
    
        result = {
            "target": target,
            "target_phones": target_phones,
            "invalid_words": invalid_words,
            "suggestions": suggestions
        }
    
        return json.dumps(result, indent=2)


    def __init__(self, *args, **kwargs):
        self.is_initialized = False