Spaces:

patruff
/

parody-suggestions

Configuration error

App Files Files Community

patruff commited on Mar 30

Commit

aa8160e

verified ·

1 Parent(s): 198c7f4

Upload tool

Browse files

Files changed (2) hide show

requirements.txt +1 -1
tool.py +124 -54

requirements.txt CHANGED Viewed

@@ -1,2 +1,2 @@
-smolagents
 pronouncing



1	pronouncing
2	+ smolagents

tool.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from smolagents.tools import Tool
 import string
-import json
 import pronouncing
 class ParodyWordSuggestionTool(Tool):
     name = "parody_word_suggester"
@@ -34,6 +34,7 @@ class ParodyWordSuggestionTool(Tool):
         last_vowel_idx = -1
         last_vowel = None
         vowel_groups = self._get_vowel_groups()
         # Initialize loop variables
         i = 0
@@ -41,13 +42,22 @@ class ParodyWordSuggestionTool(Tool):
         base_phone = ""
         group = []
         for i, phone in enumerate(phones):
-            base_phone = phone.rstrip('012')
-            for group in vowel_groups:
-                if base_phone in group:
                     last_vowel_idx = i
                     last_vowel = base_phone
-                    break
         if last_vowel_idx == -1:
             return None, []
@@ -86,7 +96,7 @@ class ParodyWordSuggestionTool(Tool):
     def _calculate_similarity(self, word1, phones1, word2, phones2):
-        """Calculate similarity score using both perfect and near-rhyme detection."""
         # Initialize all variables
         phone_list1 = phones1.split()
         phone_list2 = phones2.split()
@@ -102,31 +112,57 @@ class ParodyWordSuggestionTool(Tool):
         common_length = 0
         matched = 0
         i = 0
-        # Variables for near-rhyme scoring
-        near_rhyme_score = 0.0
-        consonants1 = []
-        consonants2 = []
-        matches = 0
-        # Variables for length and stress scoring
-        phone_diff = 0
-        max_phones = 0
-        length_score = 0.0
-        stress_score = 0.0
-        stress1 = ""
-        stress2 = ""
-        similarity = 0.0
         p = ""
         v = ""
-        # Get last syllable components
         result1 = self._get_last_syllable(phone_list1)
         result2 = self._get_last_syllable(phone_list2)
         word_vowel, word_end = result1
         target_vowel, target_end = result2
-        # Perfect rhyme check (60% of score)
         if word_vowel and target_vowel:
             if self._vowels_match(word_vowel, target_vowel):
                 word_end_clean = self._strip_stress(word_end)
@@ -144,51 +180,81 @@ class ParodyWordSuggestionTool(Tool):
                     if max(len(word_end_clean), len(target_end_clean)) > 0:
                         rhyme_score = 0.6 * (matched / max(1, max(len(word_end_clean), len(target_end_clean))))
                     else:
-                        rhyme_score = 0.0
-        # Near rhyme check (for words like "running"/"cunning") - 20% of score
-        # Check if words have similar length and pattern
-        if abs(len(phone_list1) - len(phone_list2)) <= 1:
-            # Check consonant patterns are similar
-            consonants1 = [p for p in self._strip_stress(phone_list1) if not any(v in p for v in 'AEIOU')]
-            consonants2 = [p for p in self._strip_stress(phone_list2) if not any(v in p for v in 'AEIOU')]
-            if len(consonants1) == len(consonants2):
-                matches = 0
-                for a, b in zip(consonants1, consonants2):
-                    if a == b:
-                        matches += 1
-                if len(consonants1) > 0:
-                    near_rhyme_score = matches / max(1, len(consonants1))
-            # Additional check for -ing endings (special case for English)
-            if len(phone_list1) >= 3 and len(phone_list2) >= 3:
-                if (self._strip_stress(phone_list1[-2:]) == ['IH', 'NG'] and
-                    self._strip_stress(phone_list2[-2:]) == ['IH', 'NG']):
-                    near_rhyme_score = max(near_rhyme_score, 0.8)  # Boost for -ing endings
-        # Calculate length similarity score (10% of total)
         phone_diff = abs(len(phone_list1) - len(phone_list2))
         max_phones = max(len(phone_list1), len(phone_list2))
         length_score = 1.0 if phone_diff == 0 else 1.0 - (phone_diff / max_phones)
-        # Calculate stress pattern similarity (10% of total)
         import pronouncing
         stress1 = pronouncing.stresses(phones1)
         stress2 = pronouncing.stresses(phones2)
         stress_score = 1.0 if stress1 == stress2 else 0.5
         # Weighted combination
         similarity = (
-            (rhyme_score * 0.6) +       # Perfect rhyme (60%)
-            (near_rhyme_score * 0.2) +  # Near rhyme (20%)
-            (length_score * 0.1) +      # Length similarity (10%)
-            (stress_score * 0.1)        # Stress pattern (10%)
         )
-        # Special case: Boost very similar-sounding words
-        if near_rhyme_score > 0.7 and length_score > 0.8 and stress_score > 0.8:
-            similarity = max(similarity, 0.75)  # Ensure these get a high enough score
         # Cap at 1.0
         similarity = min(1.0, similarity)
@@ -196,9 +262,11 @@ class ParodyWordSuggestionTool(Tool):
         return {
             "similarity": round(similarity, 3),
             "rhyme_score": round(rhyme_score, 3),
             "near_rhyme_score": round(near_rhyme_score, 3),
             "length_score": round(length_score, 3),
             "stress_score": round(stress_score, 3),
             "phone_length_difference": phone_diff
         }
@@ -276,9 +344,11 @@ class ParodyWordSuggestionTool(Tool):
                         "word": word,
                         "similarity": similarity_result["similarity"],
                         "rhyme_score": similarity_result["rhyme_score"],
                         "near_rhyme_score": similarity_result["near_rhyme_score"],
                         "length_score": similarity_result["length_score"],
                         "stress_score": similarity_result["stress_score"],
                         "phones": word_phones,
                         "last_vowel": word_vowel,
                         "ending": " ".join(word_end) if word_end else "",

 from smolagents.tools import Tool
 import string
 import pronouncing
+import json
 class ParodyWordSuggestionTool(Tool):
     name = "parody_word_suggester"
         last_vowel_idx = -1
         last_vowel = None
         vowel_groups = self._get_vowel_groups()
+        v = ""
         # Initialize loop variables
         i = 0
         base_phone = ""
         group = []
+        # First, find the primary stressed vowel if it exists
         for i, phone in enumerate(phones):
+            # Check for primary stress (1)
+            if '1' in phone and any(v in phone for v in 'AEIOU'):
+                base_phone = phone.rstrip('012')
+                last_vowel_idx = i
+                last_vowel = base_phone
+                break
+        # If no primary stress, just use the last vowel
+        if last_vowel_idx == -1:
+            for i, phone in enumerate(phones):
+                base_phone = phone.rstrip('012')
+                if any(v in base_phone for v in 'AEIOU'):
                     last_vowel_idx = i
                     last_vowel = base_phone
         if last_vowel_idx == -1:
             return None, []
     def _calculate_similarity(self, word1, phones1, word2, phones2):
+        """Calculate similarity score using refined metrics for parody."""
         # Initialize all variables
         phone_list1 = phones1.split()
         phone_list2 = phones2.split()
         common_length = 0
         matched = 0
         i = 0
         p = ""
         v = ""
+        # Variables for whole-word matching
+        primary_stress_vowel1 = None
+        primary_stress_vowel2 = None
+        primary_stress_idx1 = -1
+        primary_stress_idx2 = -1
+        front_consonants1 = []
+        front_consonants2 = []
+        # Find primary stressed vowels
+        for i, phone in enumerate(phone_list1):
+            if '1' in phone and any(v in phone for v in 'AEIOU'):
+                primary_stress_vowel1 = phone.rstrip('012')
+                primary_stress_idx1 = i
+                break
+        for i, phone in enumerate(phone_list2):
+            if '1' in phone and any(v in phone for v in 'AEIOU'):
+                primary_stress_vowel2 = phone.rstrip('012')
+                primary_stress_idx2 = i
+                break
+        # Get consonants before the primary stress
+        if primary_stress_idx1 > 0:
+            front_consonants1 = [p for p in self._strip_stress(phone_list1[:primary_stress_idx1])
+                                if not any(v in p for v in 'AEIOU')]
+        if primary_stress_idx2 > 0:
+            front_consonants2 = [p for p in self._strip_stress(phone_list2[:primary_stress_idx2])
+                                if not any(v in p for v in 'AEIOU')]
+        # Calculate front consonant similarity (important for parody)
+        front_consonant_score = 0.0
+        if front_consonants1 and front_consonants2:
+            min_length = min(len(front_consonants1), len(front_consonants2))
+            if min_length > 0:
+                matches = 0
+                for i in range(min_length):
+                    if front_consonants1[i] == front_consonants2[i]:
+                        matches += 1
+                front_consonant_score = matches / min_length
+        # Get last syllable components for rhyming
         result1 = self._get_last_syllable(phone_list1)
         result2 = self._get_last_syllable(phone_list2)
         word_vowel, word_end = result1
         target_vowel, target_end = result2
+        # Perfect rhyme check (45% of score)
         if word_vowel and target_vowel:
             if self._vowels_match(word_vowel, target_vowel):
                 word_end_clean = self._strip_stress(word_end)
                     if max(len(word_end_clean), len(target_end_clean)) > 0:
                         rhyme_score = 0.6 * (matched / max(1, max(len(word_end_clean), len(target_end_clean))))
                     else:
+                        rhyme_score = 0.6  # Still somewhat rhymes even without ending consonants
+        # Primary stressed vowel match (20% of score)
+        primary_vowel_score = 0.0
+        if primary_stress_vowel1 and primary_stress_vowel2:
+            if primary_stress_vowel1 == primary_stress_vowel2:
+                primary_vowel_score = 1.0
+            else:
+                # Check if they're in the same vowel group
+                for group in self._get_vowel_groups():
+                    if primary_stress_vowel1 in group and primary_stress_vowel2 in group:
+                        primary_vowel_score = 0.7
+                        break
+        # Near rhyme check - 15% of score
+        near_rhyme_score = 0.0
+        # Check for specific endings
+        if len(phone_list1) >= 2 and len(phone_list2) >= 2:
+            # Check for -ing endings
+            if (self._strip_stress(phone_list1[-2:]) == ['IH', 'NG'] and
+                self._strip_stress(phone_list2[-2:]) == ['IH', 'NG']):
+                # For -ing endings, also consider the consonant before -ing
+                if len(phone_list1) >= 3 and len(phone_list2) >= 3:
+                    # If the consonants before -ing match, higher score
+                    if self._strip_stress(phone_list1[-3:-2]) == self._strip_stress(phone_list2[-3:-2]):
+                        near_rhyme_score = 0.9
+                    else:
+                        near_rhyme_score = 0.6
+                else:
+                    near_rhyme_score = 0.6
+            # Check for -y endings (like happy/sappy)
+            elif (self._strip_stress(phone_list1[-1:]) == ['IY'] and
+                  self._strip_stress(phone_list2[-1:]) == ['IY']):
+                near_rhyme_score = 0.7
+        # Length and stress similarity (10% of score)
         phone_diff = abs(len(phone_list1) - len(phone_list2))
         max_phones = max(len(phone_list1), len(phone_list2))
         length_score = 1.0 if phone_diff == 0 else 1.0 - (phone_diff / max_phones)
+        # Check stress pattern similarity
         import pronouncing
         stress1 = pronouncing.stresses(phones1)
         stress2 = pronouncing.stresses(phones2)
         stress_score = 1.0 if stress1 == stress2 else 0.5
+        # Front consonant match (10% of score)
+        front_score = front_consonant_score * 0.1
         # Weighted combination
         similarity = (
+            (rhyme_score * 0.45) +            # End rhyme (45%)
+            (primary_vowel_score * 0.2) +     # Primary vowel (20%)
+            (near_rhyme_score * 0.15) +       # Near rhyme features (15%)
+            (length_score * 0.05) +           # Length similarity (5%)
+            (stress_score * 0.05) +           # Stress pattern (5%)
+            (front_score)                     # Front consonants (10%)
         )
+        # IMPORTANT: Penalty for words that are too similar to be funny
+        # For parody, slightly different words are better than almost identical words
+        if word1 and word2:
+            if word1[0] == word2[0] and rhyme_score > 0.9 and primary_vowel_score > 0.9:
+                # Words starting with same letter and almost perfect rhyme
+                # are less funny for parody
+                similarity *= 0.9
+        # Special case: Words need to be somewhat different to be funny in parody
+        if len(word1) > 3 and len(word2) > 3:
+            # Give boost to words with same length but different consonants
+            if len(word1) == len(word2) and front_consonant_score < 0.5 and rhyme_score > 0.8:
+                similarity = max(similarity, 0.75)  # Good for parody
         # Cap at 1.0
         similarity = min(1.0, similarity)
         return {
             "similarity": round(similarity, 3),
             "rhyme_score": round(rhyme_score, 3),
+            "primary_vowel_score": round(primary_vowel_score, 3),
             "near_rhyme_score": round(near_rhyme_score, 3),
             "length_score": round(length_score, 3),
             "stress_score": round(stress_score, 3),
+            "front_consonant_score": round(front_consonant_score, 3),
             "phone_length_difference": phone_diff
         }
                         "word": word,
                         "similarity": similarity_result["similarity"],
                         "rhyme_score": similarity_result["rhyme_score"],
+                        "primary_vowel_score": similarity_result["primary_vowel_score"],
                         "near_rhyme_score": similarity_result["near_rhyme_score"],
                         "length_score": similarity_result["length_score"],
                         "stress_score": similarity_result["stress_score"],
+                        "front_consonant_score": similarity_result["front_consonant_score"],
                         "phones": word_phones,
                         "last_vowel": word_vowel,
                         "ending": " ".join(word_end) if word_end else "",