patruff's picture
Upload tool
2898d37 verified
raw
history blame
7.31 kB
from smolagents.tools import Tool
import pronouncing
import json
import string
class ParodyWordSuggestionTool(Tool):
name = "parody_word_suggester"
description = "Suggests rhyming funny words using CMU dictionary pronunciations."
inputs = {'target': {'type': 'string', 'description': 'The word you want to find rhyming alternatives for'}, 'word_list_str': {'type': 'string', 'description': 'JSON string of word list (e.g. \'["word1", "word2"]\')'}, 'min_similarity': {'type': 'string', 'description': 'Minimum similarity threshold (0.0-1.0)', 'nullable': True, 'default': '0.5'}, 'custom_phones': {'type': 'object', 'description': 'Optional dictionary of custom word pronunciations', 'nullable': True, 'default': None}}
output_type = "string"
RHYME_WEIGHT = 0.5
PHONE_SEQUENCE_WEIGHT = 0.3
LENGTH_WEIGHT = 0.2
def _get_word_phones(self, word, custom_phones=None):
"""Get phones for a word, checking custom dictionary first."""
if custom_phones and word in custom_phones:
return custom_phones[word]["primary_phones"]
import pronouncing
phones = pronouncing.phones_for_word(word)
return phones[0] if phones else None
def _get_primary_vowel(self, phones: list) -> str:
"""Get the primary stressed vowel from phone list."""
v = ""
for phone in phones:
if '1' in phone and any(v in phone for v in 'AEIOU'):
return phone.rstrip('012')
return None
def _calculate_phone_sequence_similarity(self, phones1: list, phones2: list) -> float:
"""Calculate similarity based on matching phones in sequence."""
p = ""
if not phones1 or not phones2:
return 0.0
# Strip stress markers for comparison
clean_phones1 = [p.rstrip('012') for p in phones1]
clean_phones2 = [p.rstrip('012') for p in phones2]
matches = 0
total_comparisons = max(len(clean_phones1), len(clean_phones2))
# Compare phones in sequence
for i in range(min(len(clean_phones1), len(clean_phones2))):
if clean_phones1[i] == clean_phones2[i]:
matches += 1
return matches / total_comparisons if total_comparisons > 0 else 0.0
def _calculate_length_similarity(self, phones1: list, phones2: list) -> float:
"""Calculate similarity based on phone length."""
max_length = max(len(phones1), len(phones2))
length_diff = abs(len(phones1) - len(phones2))
return 1.0 - (length_diff / max_length) if max_length > 0 else 0.0
def _calculate_similarity(self, word1, phones1, word2, phones2):
"""Calculate similarity based on multiple factors."""
# Initialize phone lists
phone_list1 = phones1.split()
phone_list2 = phones2.split()
# 1. Rhyme score (50%) - based on primary vowel
vowel1 = self._get_primary_vowel(phone_list1)
vowel2 = self._get_primary_vowel(phone_list2)
rhyme_score = 1.0 if vowel1 and vowel2 and vowel1 == vowel2 else 0.0
# 2. Phone sequence similarity (30%)
phone_sequence_score = self._calculate_phone_sequence_similarity(phone_list1, phone_list2)
# 3. Length similarity (20%)
length_score = self._calculate_length_similarity(phone_list1, phone_list2)
# Combined weighted score
similarity = (
(rhyme_score * self.RHYME_WEIGHT) +
(phone_sequence_score * self.PHONE_SEQUENCE_WEIGHT) +
(length_score * self.LENGTH_WEIGHT)
)
return {
"similarity": round(similarity, 3),
"rhyme_score": round(rhyme_score, 3),
"phone_sequence_score": round(phone_sequence_score, 3),
"length_score": round(length_score, 3),
"details": {
"primary_vowel1": vowel1,
"primary_vowel2": vowel2,
"phone_count1": len(phone_list1),
"phone_count2": len(phone_list2),
"matching_phones": round(phone_sequence_score * len(phone_list1))
}
}
def forward(self, target: str, word_list_str: str, min_similarity: str = "0.5", custom_phones: dict = None) -> str:
import pronouncing
import string
import json
# Initialize variables
target = target.lower().strip(string.punctuation)
min_similarity = float(min_similarity)
suggestions = []
valid_words = []
invalid_words = []
# Parse JSON string to list
try:
words = json.loads(word_list_str)
except json.JSONDecodeError:
return json.dumps({
"error": "Invalid JSON string for word_list_str",
"suggestions": []
}, indent=2)
# Get target pronunciation
target_phones = self._get_word_phones(target, custom_phones)
if not target_phones:
return json.dumps({
"error": f"Target word '{target}' not found in dictionary or custom phones",
"suggestions": []
}, indent=2)
# Filter word list
for word in words:
word = word.lower().strip(string.punctuation)
if self._get_word_phones(word, custom_phones):
valid_words.append(word)
else:
invalid_words.append(word)
if not valid_words:
return json.dumps({
"error": "No valid words found in dictionary or custom phones",
"invalid_words": invalid_words,
"suggestions": []
}, indent=2)
# Check each word
for word in valid_words:
word_phones = self._get_word_phones(word, custom_phones)
if word_phones:
similarity_result = self._calculate_similarity(word, word_phones, target, target_phones)
if similarity_result["similarity"] >= min_similarity:
suggestions.append({
"word": word,
"similarity": similarity_result["similarity"],
"rhyme_score": similarity_result["rhyme_score"],
"phone_sequence_score": similarity_result["phone_sequence_score"],
"length_score": similarity_result["length_score"],
"phones": word_phones,
"is_custom": word in custom_phones if custom_phones else False,
"details": similarity_result["details"]
})
# Sort by similarity score descending
suggestions.sort(key=lambda x: x["similarity"], reverse=True)
result = {
"target": target,
"target_phones": target_phones,
"invalid_words": invalid_words,
"suggestions": suggestions
}
return json.dumps(result, indent=2)
def __init__(self, *args, **kwargs):
self.is_initialized = False