nei-demo-backup

Sleeping

App Files Files Community

SmitaGautam commited on Nov 4, 2024

Commit

18eb789

verified ·

1 Parent(s): 587b779

Update train.py

Browse files

Files changed (1) hide show

train.py +32 -4

train.py CHANGED Viewed

@@ -37,7 +37,7 @@ pos_tags = [ 'CC', 'CD', 'DT', 'EX', 'FW', 'IN', 'JJ', 'JJR', 'JJS', 'LS', 'MD',
                 'VBG', 'VBN', 'VBP', 'VBZ', 'WDT', 'WP', 'WP$', 'WRB'
             ]
-def feature_vector(word, scaled_position, current_word_pos_tag):
     features = []
     features.append(int(word.lower() in stopwords))
     features.append(int(word.isupper()))
@@ -49,17 +49,45 @@ def feature_vector(word, scaled_position, current_word_pos_tag):
     features.append(int(word in people))
     features.append(int(word in countries))
     features.append(int(word in nationalities))
     if (current_word_pos_tag==12) or (current_word_pos_tag==13): ##NNP, NNPS
         features.append(1)
     else:
         features.append(0)
-    features.append(scaled_position)
     if 27 <= current_word_pos_tag <= 32: ##isVERB
         features.append(1)
     else:
         features.append(0)
-    return np.asarray(features, dtype = np.float32)
 def feature_vector2(word, prev_word_pos_tag, next_word_pos_tag, current_word_pos_tag):

                 'VBG', 'VBN', 'VBP', 'VBZ', 'WDT', 'WP', 'WP$', 'WRB'
             ]
+def feature_vector(word, scaled_pos, current_word_pos_tag, prev_word, next_word):
     features = []
     features.append(int(word.lower() in stopwords))
     features.append(int(word.isupper()))
     features.append(int(word in people))
     features.append(int(word in countries))
     features.append(int(word in nationalities))
     if (current_word_pos_tag==12) or (current_word_pos_tag==13): ##NNP, NNPS
         features.append(1)
     else:
         features.append(0)
+    features.append(scaled_pos)
     if 27 <= current_word_pos_tag <= 32: ##isVERB
         features.append(1)
     else:
         features.append(0)
+    if prev_word!="":
+        features.append(int(prev_word.lower() in stopwords))
+        features.append(int(prev_word.isupper()))
+        features.append(int(prev_word in PUNCT))
+        features.append(int(prev_word.isdigit()))
+        features.append(len(prev_word))
+        features.append((prev_word in places))
+        features.append((prev_word in people))
+        features.append((prev_word in countries or prev_word in nationalities))
+    else:
+        for _ in range(8):
+            features.append(0)
+    if next_word!="":
+        features.append(int(next_word.lower() in stopwords))
+        features.append(int(next_word.isupper()))
+        features.append(int(next_word in PUNCT))
+        features.append(int(next_word.isdigit()))
+        features.append(len(next_word))
+        features.append((next_word in places))
+        features.append((next_word in people))
+        features.append((next_word in countries or prev_word in nationalities))
+    else:
+        for _ in range(8):
+            features.append(0)
+    return np.asarray(features, dtype=np.float32)
 def feature_vector2(word, prev_word_pos_tag, next_word_pos_tag, current_word_pos_tag):