non2013 commited on
Commit
530dbfa
·
1 Parent(s): dbda7cf

update interface

Browse files
Files changed (1) hide show
  1. app.py +4 -2
app.py CHANGED
@@ -28,6 +28,8 @@ with open('lemma_dict.pkl', 'rb') as f:
28
  nlp = spacy.load('en_core_web_lg', disable=['parser', 'ner', 'tagger'])
29
  nlp.vocab.add_flag(lambda s: s.lower() in spacy.lang.en.stop_words.STOP_WORDS, spacy.attrs.IS_STOP)
30
 
 
 
31
  def preprocess_text(text):
32
  """Preprocess the input text using SpaCy and return word indices."""
33
  docs = nlp.pipe([text], n_process=1)
@@ -36,8 +38,8 @@ def preprocess_text(text):
36
  for token in doc:
37
  if token.pos_ != "PUNCT":
38
  if token.text not in word_dict:
39
- word_dict[token.text] = len(word_dict) + 1 # Increment index.
40
- word_seq.append(word_dict[token.text])
41
  return word_seq
42
 
43
  def classify_question(text):
 
28
  nlp = spacy.load('en_core_web_lg', disable=['parser', 'ner', 'tagger'])
29
  nlp.vocab.add_flag(lambda s: s.lower() in spacy.lang.en.stop_words.STOP_WORDS, spacy.attrs.IS_STOP)
30
 
31
+ OOV_INDEX = 0
32
+
33
  def preprocess_text(text):
34
  """Preprocess the input text using SpaCy and return word indices."""
35
  docs = nlp.pipe([text], n_process=1)
 
38
  for token in doc:
39
  if token.pos_ != "PUNCT":
40
  if token.text not in word_dict:
41
+ index = word_dict.get(token.text, OOV_INDEX)
42
+ word_seq.append(index)
43
  return word_seq
44
 
45
  def classify_question(text):