Spaces:

non2013
/

SincereQuestions

Sleeping

non2013 commited on Oct 20, 2024

Commit

530dbfa

1 Parent(s): dbda7cf

update interface

Files changed (1) hide show

app.py CHANGED Viewed

@@ -28,6 +28,8 @@ with open('lemma_dict.pkl', 'rb') as f:
 nlp = spacy.load('en_core_web_lg', disable=['parser', 'ner', 'tagger'])
 nlp.vocab.add_flag(lambda s: s.lower() in spacy.lang.en.stop_words.STOP_WORDS, spacy.attrs.IS_STOP)
 def preprocess_text(text):
     """Preprocess the input text using SpaCy and return word indices."""
     docs = nlp.pipe([text], n_process=1)
@@ -36,8 +38,8 @@ def preprocess_text(text):
         for token in doc:
             if token.pos_ != "PUNCT":
                 if token.text not in word_dict:
-                    word_dict[token.text] = len(word_dict) + 1  # Increment index.
-                word_seq.append(word_dict[token.text])
     return word_seq
 def classify_question(text):

 nlp = spacy.load('en_core_web_lg', disable=['parser', 'ner', 'tagger'])
 nlp.vocab.add_flag(lambda s: s.lower() in spacy.lang.en.stop_words.STOP_WORDS, spacy.attrs.IS_STOP)
+OOV_INDEX = 0
 def preprocess_text(text):
     """Preprocess the input text using SpaCy and return word indices."""
     docs = nlp.pipe([text], n_process=1)
         for token in doc:
             if token.pos_ != "PUNCT":
                 if token.text not in word_dict:
+                    index = word_dict.get(token.text, OOV_INDEX)
+                    word_seq.append(index)
     return word_seq
 def classify_question(text):