Spaces:
Sleeping
Sleeping
import os | |
import spacy | |
import pickle | |
import numpy as np | |
import tensorflow as tf | |
BATCH_SIZE = 512 | |
os.environ["TF_ENABLE_ONEDNN_OPTS"] = "0" | |
def preprocess_text(text): | |
"""Preprocess the input text using SpaCy and return word indices.""" | |
docs = nlp.pipe([text], n_process=1) | |
word_seq = [] | |
for doc in docs: | |
for token in doc: | |
if token.pos_ != "PUNCT": | |
if token.text not in word_dict: | |
word_dict[token.text] = 0 # OOV_INDEX | |
word_seq.append(word_dict[token.text]) | |
return word_seq | |
def BiLSTM_predict(text): | |
seq = preprocess_text(text) | |
padded_seq = tf.keras.preprocessing.sequence.pad_sequences([seq], maxlen=55) | |
pred1 = 0.15 * np.squeeze(model_1.predict(padded_seq, batch_size=BATCH_SIZE, verbose=2)) | |
pred2 = 0.35 * np.squeeze(model_2.predict(padded_seq, batch_size=BATCH_SIZE, verbose=2)) | |
pred3 = 0.15 * np.squeeze(model_3.predict(padded_seq, batch_size=BATCH_SIZE, verbose=2)) | |
pred4 = 0.35 * np.squeeze(model_4.predict(padded_seq, batch_size=BATCH_SIZE, verbose=2)) | |
pred = pred1 + pred2 + pred3 + pred4 | |
return pred | |
model_1 = tf.keras.models.load_model("./lstm/model_1.h5") | |
model_2 = tf.keras.models.load_model("./lstm/model_2.h5") | |
model_3 = tf.keras.models.load_model("./lstm/model_3.h5") | |
model_4 = tf.keras.models.load_model("./lstm/model_4.h5") | |
with open('./lstm/word_dict.pkl', 'rb') as f: | |
word_dict = pickle.load(f) | |
os.system("python -m spacy download en_core_web_lg") | |
nlp = spacy.load('en_core_web_lg', disable=['parser', 'ner', 'tagger']) | |
nlp.vocab.add_flag(lambda s: s.lower() in spacy.lang.en.stop_words.STOP_WORDS, spacy.attrs.IS_STOP) |