from smolagents.tools import Tool import pronouncing import json import string class ParodyWordSuggestionTool(Tool): name = "parody_word_suggester" description = "Suggests rhyming funny words using CMU dictionary pronunciations." inputs = {'target': {'type': 'string', 'description': 'The word you want to find rhyming alternatives for'}, 'word_list_str': {'type': 'string', 'description': 'JSON string of word list (e.g. \'["word1", "word2"]\')'}, 'min_similarity': {'type': 'string', 'description': 'Minimum similarity threshold (0.0-1.0)', 'nullable': True, 'default': '0.5'}, 'custom_phones': {'type': 'object', 'description': 'Optional dictionary of custom word pronunciations', 'nullable': True, 'default': None}} output_type = "string" RHYME_WEIGHT = 0.5 PHONE_SEQUENCE_WEIGHT = 0.3 LENGTH_WEIGHT = 0.2 PHONE_GROUPS = "M,N,NG|P,B|T,D|K,G|F,V|TH,DH|S,Z|SH,ZH|L,R|W,Y|IY,IH|UW,UH|EH,AH|AO,AA|AE,AH|AY,EY|OW,UW" def _get_word_phones(self, word, custom_phones=None): """Get phones for a word, checking custom dictionary first.""" if custom_phones and word in custom_phones: return custom_phones[word]["primary_phones"] import pronouncing phones = pronouncing.phones_for_word(word) return phones[0] if phones else None def _get_primary_vowel(self, phones: list) -> str: """Get the primary stressed vowel from phone list.""" phone_str = "" vowel_char = "" for phone_str in phones: if '1' in phone_str and any(vowel_char in phone_str for vowel_char in 'AEIOU'): return phone_str.rstrip('012') return None def _get_phone_similarity(self, phone1: str, phone2: str) -> float: """Calculate similarity between two phones.""" # Initialize variables p1 = "" p2 = "" group_str = "" group = [] # Strip stress markers p1 = phone1.rstrip('012') p2 = phone2.rstrip('012') # Exact match if p1 == p2: return 1.0 # Check similarity groups for group_str in self.PHONE_GROUPS.split('|'): group = group_str.split(',') if p1 in group and p2 in group: return 0.7 # Check broader categories if self._get_phone_type(p1) == self._get_phone_type(p2): return 0.3 return 0.0 def _get_phone_type(self, phone: str) -> str: """Get the broad category of a phone.""" # Strip stress markers phone = phone.rstrip('012') v = "" # Vowels if any(v in phone for v in 'AEIOU'): return 'vowel' # Initialize fixed sets for categories nasals = {'M', 'N', 'NG'} stops = {'P', 'B', 'T', 'D', 'K', 'G'} fricatives = {'F', 'V', 'TH', 'DH', 'S', 'Z', 'SH', 'ZH'} liquids = {'L', 'R'} glides = {'W', 'Y'} if phone in nasals: return 'nasal' if phone in stops: return 'stop' if phone in fricatives: return 'fricative' if phone in liquids: return 'liquid' if phone in glides: return 'glide' return 'other' def _calculate_phone_sequence_similarity(self, phones1: list, phones2: list) -> float: """Calculate similarity based on matching phones in sequence.""" if not phones1 or not phones2: return 0.0 # Initialize variables total_similarity = 0.0 i = 0 similarity = 0.0 comparisons = max(len(phones1), len(phones2)) # Compare each position for i in range(min(len(phones1), len(phones2))): similarity = self._get_phone_similarity(phones1[i], phones2[i]) total_similarity += similarity return total_similarity / comparisons if comparisons > 0 else 0.0 def _calculate_length_similarity(self, phones1: list, phones2: list) -> float: """Calculate similarity based on phone length.""" max_length = max(len(phones1), len(phones2)) length_diff = abs(len(phones1) - len(phones2)) return 1.0 - (length_diff / max_length) if max_length > 0 else 0.0 def _calculate_similarity(self, word1, phones1, word2, phones2): """Calculate similarity based on multiple factors.""" # Initialize phone lists and scores phone_list1 = phones1.split() phone_list2 = phones2.split() rhyme_score = 0.0 phone_sequence_score = 0.0 length_score = 0.0 vowel1 = None vowel2 = None # 1. Rhyme score (50%) - based on primary vowel vowel1 = self._get_primary_vowel(phone_list1) vowel2 = self._get_primary_vowel(phone_list2) rhyme_score = 1.0 if vowel1 and vowel2 and vowel1 == vowel2 else 0.0 # 2. Phone sequence similarity (30%) phone_sequence_score = self._calculate_phone_sequence_similarity(phone_list1, phone_list2) # 3. Length similarity (20%) length_score = self._calculate_length_similarity(phone_list1, phone_list2) # Combined weighted score similarity = ( (rhyme_score * self.RHYME_WEIGHT) + (phone_sequence_score * self.PHONE_SEQUENCE_WEIGHT) + (length_score * self.LENGTH_WEIGHT) ) return { "similarity": round(similarity, 3), "rhyme_score": round(rhyme_score, 3), "phone_sequence_score": round(phone_sequence_score, 3), "length_score": round(length_score, 3), "details": { "primary_vowel1": vowel1, "primary_vowel2": vowel2, "phone_count1": len(phone_list1), "phone_count2": len(phone_list2), "matching_phones": round(phone_sequence_score * len(phone_list1)) } } def forward(self, target: str, word_list_str: str, min_similarity: str = "0.5", custom_phones: dict = None) -> str: import pronouncing import string import json # Initialize variables target = target.lower().strip(string.punctuation) min_similarity = float(min_similarity) suggestions = [] valid_words = [] invalid_words = [] words = [] target_phones = "" word_phones = "" word = "" similarity_result = {} # Parse JSON string to list try: words = json.loads(word_list_str) except json.JSONDecodeError: return json.dumps({ "error": "Invalid JSON string for word_list_str", "suggestions": [] }, indent=2) # Get target pronunciation target_phones = self._get_word_phones(target, custom_phones) if not target_phones: return json.dumps({ "error": f"Target word '{target}' not found in dictionary or custom phones", "suggestions": [] }, indent=2) # Filter word list for word in words: word = word.lower().strip(string.punctuation) if self._get_word_phones(word, custom_phones): valid_words.append(word) else: invalid_words.append(word) if not valid_words: return json.dumps({ "error": "No valid words found in dictionary or custom phones", "invalid_words": invalid_words, "suggestions": [] }, indent=2) # Check each word for word in valid_words: word_phones = self._get_word_phones(word, custom_phones) if word_phones: similarity_result = self._calculate_similarity(word, word_phones, target, target_phones) if similarity_result["similarity"] >= min_similarity: suggestions.append({ "word": word, "similarity": similarity_result["similarity"], "rhyme_score": similarity_result["rhyme_score"], "phone_sequence_score": similarity_result["phone_sequence_score"], "length_score": similarity_result["length_score"], "phones": word_phones, "is_custom": word in custom_phones if custom_phones else False, "details": similarity_result["details"] }) # Sort by similarity score descending suggestions.sort(key=lambda x: x["similarity"], reverse=True) result = { "target": target, "target_phones": target_phones, "invalid_words": invalid_words, "suggestions": suggestions } return json.dumps(result, indent=2) def __init__(self, *args, **kwargs): self.is_initialized = False