Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import numpy as np | |
import joblib | |
import tensorflow as tf | |
import nltk | |
from nltk.corpus import stopwords | |
from nltk.tokenize import word_tokenize | |
from nltk.stem import WordNetLemmatizer | |
import re | |
import ast | |
import string | |
from tensorflow.keras.models import load_model | |
nltk.download('wordnet') | |
nltk.download('stopwords') | |
nltk.download('omw-1.4') | |
nltk.download('punkt') | |
# open chatwords.txt | |
with open('chatwords.txt') as f: | |
data = f.read() | |
chatwords = ast.literal_eval(data) | |
# open abbreviation.txt | |
with open('abbreviation.txt') as abb: | |
ab2 = abb.read() | |
abbreviation = ast.literal_eval(ab2) | |
# define stopwords | |
stop_words = stopwords.words('english') | |
# define lemmatizer | |
lem = WordNetLemmatizer() | |
# load model | |
final_gru = tf.keras.models.load_model('model_gru') | |
# import functions | |
def check_chatwords(text): | |
temp=[] | |
for chat in text.split(): | |
if chat.upper() in chatwords: | |
temp.append(chatwords[chat.upper()]) | |
else: | |
temp.append(chat) | |
return " ".join(temp) | |
def lower(text): | |
data = text.lower() | |
return data | |
def check_abbr(text): | |
temp2=[] | |
for abbr in text.split(): | |
if abbr in abbreviation: | |
temp2.append(abbreviation[abbr]) | |
else: | |
temp2.append(abbr) | |
return " ".join(temp2) | |
def check_punctuation(text): | |
data = re.sub("[^a-zA-Z]",' ', text) | |
data = re.sub("[[^]]*]", ' ', data) | |
data = re.sub(r"\n", " ", data) | |
data = data.strip() | |
data = ' '.join(data.split()) | |
return data | |
def token_stopwords_lemma(text): | |
tokens = word_tokenize(text) | |
stop_words2 = ' '.join([word for word in tokens if word not in stop_words]) | |
data = [lem.lemmatize(word) for word in stop_words2.split()] | |
data = ' '.join(data) | |
return data | |
st.title("SPAM Message Detection") | |
message = st.text_input('Please input your message here (in English):') | |
st.write('Message:', message) | |
df_inf = [message] | |
df_inf1 = pd.DataFrame() | |
df_inf1['message'] = df_inf | |
df_inf1['message'] = df_inf1['message'].apply(lambda j: check_chatwords(j)) | |
df_inf1['message'] = df_inf1['message'].apply(lambda k: lower(k)) | |
df_inf1['message'] = df_inf1['message'].apply(lambda v: check_abbr(v)) | |
df_inf1['message'] = df_inf1['message'].apply(lambda r: check_punctuation(r)) | |
df_inf1['message'] = df_inf1['message'].apply(lambda m: token_stopwords_lemma(m)) | |
y_pred_inf = final_gru.predict(df_inf1['message']) | |
y_pred_inf = np.where(y_pred_inf >= 0.5, 1, 0) | |
# Membuat dataframe dari array | |
pred_df = pd.DataFrame(y_pred_inf, columns=['label']) | |
# Melakukan prediksi pada new dataframe | |
df_inf2 = pd.DataFrame(df_inf, columns=['message']) | |
df_combined = pd.concat([df_inf2, pred_df], axis=1) | |
# Predict | |
if st.button('Predict'): | |
y_pred_inf = final_gru.predict(df_inf1['message']) | |
y_pred_inf = np.where(y_pred_inf >= 0.5, 1, 0) | |
spam_status = str(y_pred_inf[0][0]) | |
if spam_status == "0": | |
st.success("Your message is not spam.") | |
else: | |
st.error("Your message is spam") |