Spaces:

patruff
/

parody-suggestions

Configuration error

App Files Files Community

parody-suggestions / tool.py

patruff

Upload tool

9bc766a verified 5 months ago

raw

history blame

9.88 kB

	from smolagents.tools import Tool
	import string
	import pronouncing
	import json

	class ParodyWordSuggestionTool(Tool):
	name = "parody_word_suggester"
	description = "Suggests rhyming funny words using CMU dictionary pronunciations."
	inputs = {'target': {'type': 'string', 'description': 'The word you want to find rhyming alternatives for'}, 'word_list_str': {'type': 'string', 'description': 'JSON string of word list (e.g. \'["word1", "word2"]\')'}, 'min_similarity': {'type': 'string', 'description': 'Minimum similarity threshold (0.0-1.0)', 'nullable': True, 'default': '0.5'}, 'custom_phones': {'type': 'object', 'description': 'Optional dictionary of custom word pronunciations', 'nullable': True, 'default': None}}
	output_type = "string"
	RHYME_WEIGHT = 0.6
	PHONE_PATTERN_WEIGHT = 0.2
	CHAR_DIFF_WEIGHT = 0.1
	CONSONANT_WEIGHT = 0.1
	CONSONANT_REF = "M,N,NG\|P,B\|T,D\|K,G\|F,V\|TH,DH\|S,Z\|SH,ZH\|L,R\|W,Y"

	def _get_consonant_groups(self):
	"""Get consonant similarity groups."""
	groups = []
	group_strs = self.CONSONANT_REF.split("\|")
	for group_str in group_strs:
	groups.append(group_str.split(","))
	return groups


	def _get_word_phones(self, word, custom_phones=None):
	"""Get phones for a word, checking custom dictionary first."""
	if custom_phones and word in custom_phones:
	return custom_phones[word]["primary_phones"]

	import pronouncing
	phones = pronouncing.phones_for_word(word)
	return phones[0] if phones else None


	def _get_primary_vowel(self, phones: list) -> str:
	"""Get the primary stressed vowel from phone list."""
	vowel_chars = 'AEIOU' # Initialize the vowel characters set
	phone_str = "" # Initialize phone string
	vowel_char = ""

	for phone_str in phones:
	if '1' in phone_str and any(vowel_char in phone_str for vowel_char in vowel_chars):
	return phone_str.rstrip('012')
	return None


	def _calculate_char_difference(self, word1: str, word2: str) -> float:
	"""Calculate character difference score."""
	if not word1 or not word2:
	return 0.0

	# Initialize variables
	changes = 0
	char1 = ""
	char2 = ""

	# Count character differences
	for char1, char2 in zip(word1, word2):
	if char1 != char2:
	changes += 1

	# Add difference for length mismatch
	changes += abs(len(word1) - len(word2))

	# Score based on changes (0 changes = 1.0, more changes = lower score)
	max_changes = max(len(word1), len(word2))
	return 1.0 - (changes / max_changes) if max_changes > 0 else 0.0


	def _calculate_consonant_similarity(self, phone_list1: list, phone_list2: list) -> float:
	"""Calculate consonant similarity score."""
	# Initialize variables
	consonant_score = 0.0
	consonant_groups = self._get_consonant_groups()
	vowel_chars = 'AEIOU'
	phone_str = ""
	vowel_char = ""
	consonants1 = []
	consonants2 = []
	matches = 0
	comparisons = 0
	cons1 = ""
	cons2 = ""
	group = []

	# Get consonants (non-vowel phones)
	consonants1 = [phone_str for phone_str in phone_list1
	if not any(vowel_char in phone_str for vowel_char in vowel_chars)]
	consonants2 = [phone_str for phone_str in phone_list2
	if not any(vowel_char in phone_str for vowel_char in vowel_chars)]

	if not consonants1 or not consonants2:
	return 0.0

	# Compare each consonant
	matches = 0
	comparisons = min(len(consonants1), len(consonants2))

	for cons1, cons2 in zip(consonants1, consonants2):
	cons1 = cons1.rstrip('012')
	cons2 = cons2.rstrip('012')

	if cons1 == cons2:
	matches += 1
	continue

	# Check if in same group
	for group in consonant_groups:
	if cons1 in group and cons2 in group:
	matches += 0.5
	break

	return matches / comparisons if comparisons > 0 else 0.0


	def _calculate_similarity(self, word1, phones1, word2, phones2):
	"""Calculate similarity based on multiple factors."""
	# Initialize scores
	rhyme_score = 0.0
	phone_score = 0.0
	char_diff_score = 0.0
	consonant_score = 0.0

	# Initialize phone lists
	phone_list1 = phones1.split()
	phone_list2 = phones2.split()

	# Initialize variables for details
	vowel1 = None
	vowel2 = None

	# 1. Rhyme score (60%) - based on primary vowel
	vowel1 = self._get_primary_vowel(phone_list1)
	vowel2 = self._get_primary_vowel(phone_list2)
	if vowel1 and vowel2 and vowel1 == vowel2:
	rhyme_score = 1.0

	# 2. Phone pattern score (20%) - based on number of phones
	if len(phone_list1) == len(phone_list2):
	phone_score = 1.0
	else:
	phone_score = 1.0 - (abs(len(phone_list1) - len(phone_list2)) / max(len(phone_list1), len(phone_list2)))

	# 3. Character difference score (10%)
	char_diff_score = self._calculate_char_difference(word1, word2)

	# 4. Consonant similarity score (10%)
	consonant_score = self._calculate_consonant_similarity(phone_list1, phone_list2)

	# Combined weighted score
	similarity = (
	(rhyme_score * self.RHYME_WEIGHT) +
	(phone_score * self.PHONE_PATTERN_WEIGHT) +
	(char_diff_score * self.CHAR_DIFF_WEIGHT) +
	(consonant_score * self.CONSONANT_WEIGHT)
	)

	return {
	"similarity": round(similarity, 3),
	"rhyme_score": round(rhyme_score, 3),
	"phone_score": round(phone_score, 3),
	"char_diff_score": round(char_diff_score, 3),
	"consonant_score": round(consonant_score, 3),
	"details": {
	"primary_vowel1": vowel1,
	"primary_vowel2": vowel2,
	"phone_count1": len(phone_list1),
	"phone_count2": len(phone_list2),
	"char_differences": abs(len(word1) - len(word2))
	}
	}


	def forward(self, target: str, word_list_str: str, min_similarity: str = "0.5", custom_phones: dict = None) -> str:
	import pronouncing
	import string
	import json

	# Initialize variables
	target = target.lower().strip(string.punctuation)
	min_similarity = float(min_similarity)
	suggestions = []
	valid_words = []
	invalid_words = []
	words = []
	target_phones = ""
	word_phones = ""
	word = ""
	similarity_result = {}

	# Parse JSON string to list
	try:
	words = json.loads(word_list_str)
	except json.JSONDecodeError:
	return json.dumps({
	"error": "Invalid JSON string for word_list_str",
	"suggestions": []
	}, indent=2)

	# Get target pronunciation
	target_phones = self._get_word_phones(target, custom_phones)
	if not target_phones:
	return json.dumps({
	"error": f"Target word '{target}' not found in dictionary or custom phones",
	"suggestions": []
	}, indent=2)

	# Filter word list
	for word in words:
	word = word.lower().strip(string.punctuation)
	if self._get_word_phones(word, custom_phones):
	valid_words.append(word)
	else:
	invalid_words.append(word)

	if not valid_words:
	return json.dumps({
	"error": "No valid words found in dictionary or custom phones",
	"invalid_words": invalid_words,
	"suggestions": []
	}, indent=2)

	# Check each word
	for word in valid_words:
	word_phones = self._get_word_phones(word, custom_phones)
	if word_phones:
	similarity_result = self._calculate_similarity(word, word_phones, target, target_phones)

	if similarity_result["similarity"] >= min_similarity:
	suggestions.append({
	"word": word,
	"similarity": similarity_result["similarity"],
	"rhyme_score": similarity_result["rhyme_score"],
	"phone_score": similarity_result["phone_score"],
	"char_diff_score": similarity_result["char_diff_score"],
	"consonant_score": similarity_result["consonant_score"],
	"phones": word_phones,
	"is_custom": word in custom_phones if custom_phones else False,
	"details": similarity_result["details"]
	})

	# Sort by similarity score descending
	suggestions.sort(key=lambda x: x["similarity"], reverse=True)

	result = {
	"target": target,
	"target_phones": target_phones,
	"invalid_words": invalid_words,
	"suggestions": suggestions
	}

	return json.dumps(result, indent=2)


	def __init__(self, args, *kwargs):
	self.is_initialized = False