Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
import joblib
|
5 |
+
import tensorflow as tf
|
6 |
+
import nltk
|
7 |
+
|
8 |
+
from nltk.corpus import stopwords
|
9 |
+
from nltk.tokenize import word_tokenize
|
10 |
+
from nltk.stem import WordNetLemmatizer
|
11 |
+
|
12 |
+
import re
|
13 |
+
import ast
|
14 |
+
import string
|
15 |
+
from tensorflow.keras.models import load_model
|
16 |
+
|
17 |
+
nltk.download('wordnet')
|
18 |
+
nltk.download('stopwords')
|
19 |
+
nltk.download('omw-1.4')
|
20 |
+
nltk.download('punkt')
|
21 |
+
|
22 |
+
# open chatwords.txt
|
23 |
+
with open('chatwords.txt') as f:
|
24 |
+
data = f.read()
|
25 |
+
chatwords = ast.literal_eval(data)
|
26 |
+
|
27 |
+
# open abbreviation.txt
|
28 |
+
with open('abbreviation.txt') as abb:
|
29 |
+
ab2 = abb.read()
|
30 |
+
abbreviation = ast.literal_eval(ab2)
|
31 |
+
|
32 |
+
# define stopwords
|
33 |
+
stop_words = stopwords.words('english')
|
34 |
+
|
35 |
+
# define lemmatizer
|
36 |
+
lem = WordNetLemmatizer()
|
37 |
+
|
38 |
+
# load model
|
39 |
+
final_gru = tf.keras.models.load_model('model_gru')
|
40 |
+
|
41 |
+
# import functions
|
42 |
+
def check_chatwords(text):
|
43 |
+
temp=[]
|
44 |
+
for chat in text.split()
|
45 |
+
if chat.upper() in chatwords:
|
46 |
+
temp.append(chatwords[chat.upper()])
|
47 |
+
else:
|
48 |
+
temp.append(chat)
|
49 |
+
return " ".join(temp)
|
50 |
+
|
51 |
+
def lower(text):
|
52 |
+
data = text.lower()
|
53 |
+
return data
|
54 |
+
|
55 |
+
def check_abbr(text):
|
56 |
+
temp2=[]
|
57 |
+
for abbr in text.split():
|
58 |
+
if abbr in abbreviation:
|
59 |
+
temp2.append(abbreviation[abbr])
|
60 |
+
else:
|
61 |
+
temp2.append(abbr)
|
62 |
+
|
63 |
+
return " ".join(temp2)
|
64 |
+
|
65 |
+
def check_punctuation(text):
|
66 |
+
data = re.sub("[^a-zA-Z]",' ', text)
|
67 |
+
data = re.sub("[[^]]*]", ' ', data)
|
68 |
+
data = re.sub(r"\n", " ", data)
|
69 |
+
data = data.strip()
|
70 |
+
data = ' '.join(data.split())
|
71 |
+
return data
|
72 |
+
|
73 |
+
def token_stopwords_lemma(text):
|
74 |
+
tokens = word_tokenize(text)
|
75 |
+
stop_words2 = ' '.join([word for word in tokens if word not in stop_words])
|
76 |
+
data = [lem.lemmatize(word) for word in stop_words2.split()]
|
77 |
+
data = ' '.join(data)
|
78 |
+
return data
|
79 |
+
|
80 |
+
st.title("SPAM Message Detection")
|
81 |
+
|
82 |
+
message = st.text_input('Please input your message here:')
|
83 |
+
st.write('Message:', message)
|
84 |
+
|
85 |
+
df_inf = [message]
|
86 |
+
df_inf1 = pd.DataFrame()
|
87 |
+
df_inf1['message'] = df_inf
|
88 |
+
|
89 |
+
df_inf1['message'] = df_inf1['message'].apply(lambda j: check_chatwords(j))
|
90 |
+
df_inf1['message'] = df_inf1['message'].apply(lambda k: lower(k))
|
91 |
+
df_inf1['message'] = df_inf1['message'].apply(lambda v: check_abbr(v))
|
92 |
+
df_inf1['message'] = df_inf1['message'].apply(lambda r: check_punctuation(r))
|
93 |
+
df_inf1['message'] = df_inf1['message'].apply(lambda r: token_stopwords_lemma(m))
|
94 |
+
|
95 |
+
y_pred_inf = final_gru.predict(df_inf1['message'])
|
96 |
+
y_pred_inf = np.where(y_pred_inf >= 0.5, 1, 0)
|
97 |
+
|
98 |
+
# Membuat dataframe dari array
|
99 |
+
pred_df = pd.DataFrame(y_pred_inf, columns=['label'])
|
100 |
+
|
101 |
+
# Melakukan prediksi pada new dataframe
|
102 |
+
df_inf2 = pd.DataFrame(df_inf, columns=['message'])
|
103 |
+
df_combined = pd.concat([df_inf2, pred_df], axis=1)
|
104 |
+
|
105 |
+
# Predict
|
106 |
+
if st.button('Predict'):
|
107 |
+
y_pred_inf = final_gru.predict(df_inf1['message'])
|
108 |
+
y_pred_inf = np.where(y_pred_inf >= 0.5, 1, 0)
|
109 |
+
spam_status = str(y_pred_inf[0][0])
|
110 |
+
|
111 |
+
if spam_status == "0":
|
112 |
+
st.success("Your message is not spam.")
|
113 |
+
else:
|
114 |
+
st.error("Your message is spam")
|