SmitaGautam commited on
Commit
18eb789
1 Parent(s): 587b779

Update train.py

Browse files
Files changed (1) hide show
  1. train.py +32 -4
train.py CHANGED
@@ -37,7 +37,7 @@ pos_tags = [ 'CC', 'CD', 'DT', 'EX', 'FW', 'IN', 'JJ', 'JJR', 'JJS', 'LS', 'MD',
37
  'VBG', 'VBN', 'VBP', 'VBZ', 'WDT', 'WP', 'WP$', 'WRB'
38
  ]
39
 
40
- def feature_vector(word, scaled_position, current_word_pos_tag):
41
  features = []
42
  features.append(int(word.lower() in stopwords))
43
  features.append(int(word.isupper()))
@@ -49,17 +49,45 @@ def feature_vector(word, scaled_position, current_word_pos_tag):
49
  features.append(int(word in people))
50
  features.append(int(word in countries))
51
  features.append(int(word in nationalities))
52
-
53
  if (current_word_pos_tag==12) or (current_word_pos_tag==13): ##NNP, NNPS
54
  features.append(1)
55
  else:
56
  features.append(0)
57
- features.append(scaled_position)
58
  if 27 <= current_word_pos_tag <= 32: ##isVERB
59
  features.append(1)
60
  else:
61
  features.append(0)
62
- return np.asarray(features, dtype = np.float32)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
 
65
  def feature_vector2(word, prev_word_pos_tag, next_word_pos_tag, current_word_pos_tag):
 
37
  'VBG', 'VBN', 'VBP', 'VBZ', 'WDT', 'WP', 'WP$', 'WRB'
38
  ]
39
 
40
+ def feature_vector(word, scaled_pos, current_word_pos_tag, prev_word, next_word):
41
  features = []
42
  features.append(int(word.lower() in stopwords))
43
  features.append(int(word.isupper()))
 
49
  features.append(int(word in people))
50
  features.append(int(word in countries))
51
  features.append(int(word in nationalities))
52
+
53
  if (current_word_pos_tag==12) or (current_word_pos_tag==13): ##NNP, NNPS
54
  features.append(1)
55
  else:
56
  features.append(0)
57
+ features.append(scaled_pos)
58
  if 27 <= current_word_pos_tag <= 32: ##isVERB
59
  features.append(1)
60
  else:
61
  features.append(0)
62
+
63
+ if prev_word!="":
64
+ features.append(int(prev_word.lower() in stopwords))
65
+ features.append(int(prev_word.isupper()))
66
+ features.append(int(prev_word in PUNCT))
67
+ features.append(int(prev_word.isdigit()))
68
+ features.append(len(prev_word))
69
+ features.append((prev_word in places))
70
+ features.append((prev_word in people))
71
+ features.append((prev_word in countries or prev_word in nationalities))
72
+ else:
73
+ for _ in range(8):
74
+ features.append(0)
75
+
76
+ if next_word!="":
77
+ features.append(int(next_word.lower() in stopwords))
78
+ features.append(int(next_word.isupper()))
79
+ features.append(int(next_word in PUNCT))
80
+ features.append(int(next_word.isdigit()))
81
+ features.append(len(next_word))
82
+ features.append((next_word in places))
83
+ features.append((next_word in people))
84
+ features.append((next_word in countries or prev_word in nationalities))
85
+ else:
86
+ for _ in range(8):
87
+ features.append(0)
88
+
89
+ return np.asarray(features, dtype=np.float32)
90
+
91
 
92
 
93
  def feature_vector2(word, prev_word_pos_tag, next_word_pos_tag, current_word_pos_tag):