ikarasz commited on
Commit
4260808
·
1 Parent(s): 81d1687

put back plural exclusions

Browse files
Files changed (1) hide show
  1. utils.py +7 -5
utils.py CHANGED
@@ -847,7 +847,7 @@ def is_plural_wordnet(word):
847
  plural_synsets = wordnet.synsets(word.rstrip('s'), pos=wordnet.NOUN)
848
  return len(plural_synsets) > len(singular_synsets)
849
 
850
- def is_plural(word):
851
  """Determine if a word is plural using NLTK's part-of-speech tagging."""
852
  # Tokenize the input word (necessary for NLTK tagging)
853
  tokens = word_tokenize(word)
@@ -856,6 +856,12 @@ def is_plural(word):
856
  # Check if the word is tagged as plural (NNS or NNPS in Penn Treebank tags)
857
  return pos in ["NNS", "NNPS"]
858
 
 
 
 
 
 
 
859
  def singular_to_plural(word):
860
  """Convert singular words to plural using inflect."""
861
  plural = p.plural(word)
@@ -865,10 +871,6 @@ def plural_to_singular(word):
865
  """Convert plural word to singular using inflect."""
866
  if is_plural(word):
867
  return p.singular_noun(word) or word
868
- if is_plural_regex(word):
869
- return p.singular_noun(word) or word
870
- if is_plural_wordnet(word):
871
- return p.singular_noun(word) or word
872
  return word
873
 
874
  plural_MATH_WORDS = [singular_to_plural(word) for word in MATH_WORDS]
 
847
  plural_synsets = wordnet.synsets(word.rstrip('s'), pos=wordnet.NOUN)
848
  return len(plural_synsets) > len(singular_synsets)
849
 
850
+ def is_plural_pos(word):
851
  """Determine if a word is plural using NLTK's part-of-speech tagging."""
852
  # Tokenize the input word (necessary for NLTK tagging)
853
  tokens = word_tokenize(word)
 
856
  # Check if the word is tagged as plural (NNS or NNPS in Penn Treebank tags)
857
  return pos in ["NNS", "NNPS"]
858
 
859
+ def is_plural(word):
860
+ """Check if a word is plural."""
861
+ if word in PLURAL_TO_SINGULAR_EXCLUSIONS:
862
+ return False
863
+ return is_plural_regex(word) or is_plural_pos(word) or is_plural_wordnet(word)
864
+
865
  def singular_to_plural(word):
866
  """Convert singular words to plural using inflect."""
867
  plural = p.plural(word)
 
871
  """Convert plural word to singular using inflect."""
872
  if is_plural(word):
873
  return p.singular_noun(word) or word
 
 
 
 
874
  return word
875
 
876
  plural_MATH_WORDS = [singular_to_plural(word) for word in MATH_WORDS]