non2013 commited on
Commit
7c57e03
·
1 Parent(s): c200368

edit preprocess

Browse files
Files changed (1) hide show
  1. app.py +4 -3
app.py CHANGED
@@ -29,6 +29,8 @@ nlp = spacy.load('en_core_web_lg', disable=['parser', 'ner', 'tagger'])
29
  nlp.vocab.add_flag(lambda s: s.lower() in spacy.lang.en.stop_words.STOP_WORDS, spacy.attrs.IS_STOP)
30
 
31
  OOV_INDEX = 0
 
 
32
 
33
  def preprocess_text(text):
34
  """Preprocess the input text using SpaCy and return word indices."""
@@ -37,9 +39,8 @@ def preprocess_text(text):
37
  for doc in docs:
38
  for token in doc:
39
  if token.pos_ != "PUNCT":
40
- if token.text not in word_dict:
41
- index = word_dict.get(token.text, OOV_INDEX)
42
- word_seq.append(index)
43
  return word_seq
44
 
45
  def classify_question(text):
 
29
  nlp.vocab.add_flag(lambda s: s.lower() in spacy.lang.en.stop_words.STOP_WORDS, spacy.attrs.IS_STOP)
30
 
31
  OOV_INDEX = 0
32
+ word_dict = {"<OOV>": OOV_INDEX} # OOV token at index 0.
33
+ word_index = 1
34
 
35
  def preprocess_text(text):
36
  """Preprocess the input text using SpaCy and return word indices."""
 
39
  for doc in docs:
40
  for token in doc:
41
  if token.pos_ != "PUNCT":
42
+ index = word_dict.get(token.text, OOV_INDEX)
43
+ word_seq.append(index)
 
44
  return word_seq
45
 
46
  def classify_question(text):