Spaces:
Sleeping
Sleeping
SmitaGautam
commited on
Commit
•
18eb789
1
Parent(s):
587b779
Update train.py
Browse files
train.py
CHANGED
@@ -37,7 +37,7 @@ pos_tags = [ 'CC', 'CD', 'DT', 'EX', 'FW', 'IN', 'JJ', 'JJR', 'JJS', 'LS', 'MD',
|
|
37 |
'VBG', 'VBN', 'VBP', 'VBZ', 'WDT', 'WP', 'WP$', 'WRB'
|
38 |
]
|
39 |
|
40 |
-
def feature_vector(word,
|
41 |
features = []
|
42 |
features.append(int(word.lower() in stopwords))
|
43 |
features.append(int(word.isupper()))
|
@@ -49,17 +49,45 @@ def feature_vector(word, scaled_position, current_word_pos_tag):
|
|
49 |
features.append(int(word in people))
|
50 |
features.append(int(word in countries))
|
51 |
features.append(int(word in nationalities))
|
52 |
-
|
53 |
if (current_word_pos_tag==12) or (current_word_pos_tag==13): ##NNP, NNPS
|
54 |
features.append(1)
|
55 |
else:
|
56 |
features.append(0)
|
57 |
-
features.append(
|
58 |
if 27 <= current_word_pos_tag <= 32: ##isVERB
|
59 |
features.append(1)
|
60 |
else:
|
61 |
features.append(0)
|
62 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
|
64 |
|
65 |
def feature_vector2(word, prev_word_pos_tag, next_word_pos_tag, current_word_pos_tag):
|
|
|
37 |
'VBG', 'VBN', 'VBP', 'VBZ', 'WDT', 'WP', 'WP$', 'WRB'
|
38 |
]
|
39 |
|
40 |
+
def feature_vector(word, scaled_pos, current_word_pos_tag, prev_word, next_word):
|
41 |
features = []
|
42 |
features.append(int(word.lower() in stopwords))
|
43 |
features.append(int(word.isupper()))
|
|
|
49 |
features.append(int(word in people))
|
50 |
features.append(int(word in countries))
|
51 |
features.append(int(word in nationalities))
|
52 |
+
|
53 |
if (current_word_pos_tag==12) or (current_word_pos_tag==13): ##NNP, NNPS
|
54 |
features.append(1)
|
55 |
else:
|
56 |
features.append(0)
|
57 |
+
features.append(scaled_pos)
|
58 |
if 27 <= current_word_pos_tag <= 32: ##isVERB
|
59 |
features.append(1)
|
60 |
else:
|
61 |
features.append(0)
|
62 |
+
|
63 |
+
if prev_word!="":
|
64 |
+
features.append(int(prev_word.lower() in stopwords))
|
65 |
+
features.append(int(prev_word.isupper()))
|
66 |
+
features.append(int(prev_word in PUNCT))
|
67 |
+
features.append(int(prev_word.isdigit()))
|
68 |
+
features.append(len(prev_word))
|
69 |
+
features.append((prev_word in places))
|
70 |
+
features.append((prev_word in people))
|
71 |
+
features.append((prev_word in countries or prev_word in nationalities))
|
72 |
+
else:
|
73 |
+
for _ in range(8):
|
74 |
+
features.append(0)
|
75 |
+
|
76 |
+
if next_word!="":
|
77 |
+
features.append(int(next_word.lower() in stopwords))
|
78 |
+
features.append(int(next_word.isupper()))
|
79 |
+
features.append(int(next_word in PUNCT))
|
80 |
+
features.append(int(next_word.isdigit()))
|
81 |
+
features.append(len(next_word))
|
82 |
+
features.append((next_word in places))
|
83 |
+
features.append((next_word in people))
|
84 |
+
features.append((next_word in countries or prev_word in nationalities))
|
85 |
+
else:
|
86 |
+
for _ in range(8):
|
87 |
+
features.append(0)
|
88 |
+
|
89 |
+
return np.asarray(features, dtype=np.float32)
|
90 |
+
|
91 |
|
92 |
|
93 |
def feature_vector2(word, prev_word_pos_tag, next_word_pos_tag, current_word_pos_tag):
|