check if word is plural
Browse files
utils.py
CHANGED
@@ -1,4 +1,7 @@
|
|
1 |
import torch
|
|
|
|
|
|
|
2 |
from transformers.models.bert.modeling_bert import BertModel, BertPreTrainedModel
|
3 |
from torch import nn
|
4 |
from itertools import chain
|
@@ -9,6 +12,9 @@ import re
|
|
9 |
import string
|
10 |
import inflect
|
11 |
|
|
|
|
|
|
|
12 |
punct_chars = list((set(string.punctuation) | {'β', 'β', 'β', 'β', '~', '|', 'β', 'β', 'β¦', "'", "`", '_'}))
|
13 |
punct_chars.sort()
|
14 |
punctuation = ''.join(punct_chars)
|
@@ -825,6 +831,15 @@ PLURAL_TO_SINGULAR_EXCLUSIONS = [
|
|
825 |
|
826 |
p = inflect.engine()
|
827 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
828 |
def singular_to_plural(word):
|
829 |
"""Convert singular words to plural using inflect."""
|
830 |
plural = p.plural(word)
|
@@ -832,9 +847,9 @@ def singular_to_plural(word):
|
|
832 |
|
833 |
def plural_to_singular(word):
|
834 |
"""Convert plural word to singular using inflect."""
|
835 |
-
if word
|
836 |
-
return word
|
837 |
-
return
|
838 |
|
839 |
plural_MATH_WORDS = [singular_to_plural(word) for word in MATH_WORDS]
|
840 |
|
|
|
1 |
import torch
|
2 |
+
import nltk
|
3 |
+
from nltk import pos_tag
|
4 |
+
from nltk.tokenize import word_tokenize
|
5 |
from transformers.models.bert.modeling_bert import BertModel, BertPreTrainedModel
|
6 |
from torch import nn
|
7 |
from itertools import chain
|
|
|
12 |
import string
|
13 |
import inflect
|
14 |
|
15 |
+
nltk.download("averaged_perceptron_tagger")
|
16 |
+
nltk.download("punkt")
|
17 |
+
|
18 |
punct_chars = list((set(string.punctuation) | {'β', 'β', 'β', 'β', '~', '|', 'β', 'β', 'β¦', "'", "`", '_'}))
|
19 |
punct_chars.sort()
|
20 |
punctuation = ''.join(punct_chars)
|
|
|
831 |
|
832 |
p = inflect.engine()
|
833 |
|
834 |
+
def is_plural(word):
|
835 |
+
"""Determine if a word is plural using NLTK's part-of-speech tagging."""
|
836 |
+
# Tokenize the input word (necessary for NLTK tagging)
|
837 |
+
tokens = word_tokenize(word)
|
838 |
+
# Get the part-of-speech tag for the word
|
839 |
+
pos = pos_tag(tokens)[0][1]
|
840 |
+
# Check if the word is tagged as plural (NNS or NNPS in Penn Treebank tags)
|
841 |
+
return pos in ["NNS", "NNPS"]
|
842 |
+
|
843 |
def singular_to_plural(word):
|
844 |
"""Convert singular words to plural using inflect."""
|
845 |
plural = p.plural(word)
|
|
|
847 |
|
848 |
def plural_to_singular(word):
|
849 |
"""Convert plural word to singular using inflect."""
|
850 |
+
if is_plural(word):
|
851 |
+
return p.singular_noun(word) or word
|
852 |
+
return word
|
853 |
|
854 |
plural_MATH_WORDS = [singular_to_plural(word) for word in MATH_WORDS]
|
855 |
|