from smolagents.tools import Tool import pronouncing import json import string class ParodyWordSuggestionTool(Tool): name = "parody_word_suggester" description = "Suggests rhyming funny words using CMU dictionary pronunciations." inputs = {'target': {'type': 'string', 'description': 'The word you want to find rhyming alternatives for'}, 'word_list_str': {'type': 'string', 'description': 'JSON string of word list (e.g. \'["word1", "word2"]\')'}, 'min_similarity': {'type': 'string', 'description': 'Minimum similarity threshold (0.0-1.0)', 'nullable': True, 'default': '0.5'}, 'custom_phones': {'type': 'object', 'description': 'Optional dictionary of custom word pronunciations', 'nullable': True, 'default': None}} output_type = "string" RHYME_WEIGHT = 0.5 PHONE_SEQUENCE_WEIGHT = 0.3 LENGTH_WEIGHT = 0.2 def _get_word_phones(self, word, custom_phones=None): """Get phones for a word, checking custom dictionary first.""" if custom_phones and word in custom_phones: return custom_phones[word]["primary_phones"] import pronouncing phones = pronouncing.phones_for_word(word) return phones[0] if phones else None def _get_primary_vowel(self, phones: list) -> str: """Get the primary stressed vowel from phone list.""" v = "" for phone in phones: if '1' in phone and any(v in phone for v in 'AEIOU'): return phone.rstrip('012') return None def _calculate_phone_sequence_similarity(self, phones1: list, phones2: list) -> float: """Calculate similarity based on matching phones in sequence.""" p = "" if not phones1 or not phones2: return 0.0 # Strip stress markers for comparison clean_phones1 = [p.rstrip('012') for p in phones1] clean_phones2 = [p.rstrip('012') for p in phones2] matches = 0 total_comparisons = max(len(clean_phones1), len(clean_phones2)) # Compare phones in sequence for i in range(min(len(clean_phones1), len(clean_phones2))): if clean_phones1[i] == clean_phones2[i]: matches += 1 return matches / total_comparisons if total_comparisons > 0 else 0.0 def _calculate_length_similarity(self, phones1: list, phones2: list) -> float: """Calculate similarity based on phone length.""" max_length = max(len(phones1), len(phones2)) length_diff = abs(len(phones1) - len(phones2)) return 1.0 - (length_diff / max_length) if max_length > 0 else 0.0 def _calculate_similarity(self, word1, phones1, word2, phones2): """Calculate similarity based on multiple factors.""" # Initialize phone lists phone_list1 = phones1.split() phone_list2 = phones2.split() # 1. Rhyme score (50%) - based on primary vowel vowel1 = self._get_primary_vowel(phone_list1) vowel2 = self._get_primary_vowel(phone_list2) rhyme_score = 1.0 if vowel1 and vowel2 and vowel1 == vowel2 else 0.0 # 2. Phone sequence similarity (30%) phone_sequence_score = self._calculate_phone_sequence_similarity(phone_list1, phone_list2) # 3. Length similarity (20%) length_score = self._calculate_length_similarity(phone_list1, phone_list2) # Combined weighted score similarity = ( (rhyme_score * self.RHYME_WEIGHT) + (phone_sequence_score * self.PHONE_SEQUENCE_WEIGHT) + (length_score * self.LENGTH_WEIGHT) ) return { "similarity": round(similarity, 3), "rhyme_score": round(rhyme_score, 3), "phone_sequence_score": round(phone_sequence_score, 3), "length_score": round(length_score, 3), "details": { "primary_vowel1": vowel1, "primary_vowel2": vowel2, "phone_count1": len(phone_list1), "phone_count2": len(phone_list2), "matching_phones": round(phone_sequence_score * len(phone_list1)) } } def forward(self, target: str, word_list_str: str, min_similarity: str = "0.5", custom_phones: dict = None) -> str: import pronouncing import string import json # Initialize variables target = target.lower().strip(string.punctuation) min_similarity = float(min_similarity) suggestions = [] valid_words = [] invalid_words = [] # Parse JSON string to list try: words = json.loads(word_list_str) except json.JSONDecodeError: return json.dumps({ "error": "Invalid JSON string for word_list_str", "suggestions": [] }, indent=2) # Get target pronunciation target_phones = self._get_word_phones(target, custom_phones) if not target_phones: return json.dumps({ "error": f"Target word '{target}' not found in dictionary or custom phones", "suggestions": [] }, indent=2) # Filter word list for word in words: word = word.lower().strip(string.punctuation) if self._get_word_phones(word, custom_phones): valid_words.append(word) else: invalid_words.append(word) if not valid_words: return json.dumps({ "error": "No valid words found in dictionary or custom phones", "invalid_words": invalid_words, "suggestions": [] }, indent=2) # Check each word for word in valid_words: word_phones = self._get_word_phones(word, custom_phones) if word_phones: similarity_result = self._calculate_similarity(word, word_phones, target, target_phones) if similarity_result["similarity"] >= min_similarity: suggestions.append({ "word": word, "similarity": similarity_result["similarity"], "rhyme_score": similarity_result["rhyme_score"], "phone_sequence_score": similarity_result["phone_sequence_score"], "length_score": similarity_result["length_score"], "phones": word_phones, "is_custom": word in custom_phones if custom_phones else False, "details": similarity_result["details"] }) # Sort by similarity score descending suggestions.sort(key=lambda x: x["similarity"], reverse=True) result = { "target": target, "target_phones": target_phones, "invalid_words": invalid_words, "suggestions": suggestions } return json.dumps(result, indent=2) def __init__(self, *args, **kwargs): self.is_initialized = False