Spaces:

non2013
/

SincereQuestions

Sleeping

non2013 commited on Oct 20, 2024

Commit

7c57e03

1 Parent(s): c200368

edit preprocess

Files changed (1) hide show

app.py CHANGED Viewed

@@ -29,6 +29,8 @@ nlp = spacy.load('en_core_web_lg', disable=['parser', 'ner', 'tagger'])
 nlp.vocab.add_flag(lambda s: s.lower() in spacy.lang.en.stop_words.STOP_WORDS, spacy.attrs.IS_STOP)
 OOV_INDEX = 0
 def preprocess_text(text):
     """Preprocess the input text using SpaCy and return word indices."""
@@ -37,9 +39,8 @@ def preprocess_text(text):
     for doc in docs:
         for token in doc:
             if token.pos_ != "PUNCT":
-                if token.text not in word_dict:
-                    index = word_dict.get(token.text, OOV_INDEX)
-                    word_seq.append(index)
     return word_seq
 def classify_question(text):

 nlp.vocab.add_flag(lambda s: s.lower() in spacy.lang.en.stop_words.STOP_WORDS, spacy.attrs.IS_STOP)
 OOV_INDEX = 0
+word_dict = {"<OOV>": OOV_INDEX}  # OOV token at index 0.
+word_index = 1
 def preprocess_text(text):
     """Preprocess the input text using SpaCy and return word indices."""
     for doc in docs:
         for token in doc:
             if token.pos_ != "PUNCT":
+                index = word_dict.get(token.text, OOV_INDEX)
+                word_seq.append(index)
     return word_seq
 def classify_question(text):