Nzlul commited on
Commit
b6da8f8
·
1 Parent(s): 156f9e3

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +114 -0
app.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import joblib
5
+ import tensorflow as tf
6
+ import nltk
7
+
8
+ from nltk.corpus import stopwords
9
+ from nltk.tokenize import word_tokenize
10
+ from nltk.stem import WordNetLemmatizer
11
+
12
+ import re
13
+ import ast
14
+ import string
15
+ from tensorflow.keras.models import load_model
16
+
17
+ nltk.download('wordnet')
18
+ nltk.download('stopwords')
19
+ nltk.download('omw-1.4')
20
+ nltk.download('punkt')
21
+
22
+ # open chatwords.txt
23
+ with open('chatwords.txt') as f:
24
+ data = f.read()
25
+ chatwords = ast.literal_eval(data)
26
+
27
+ # open abbreviation.txt
28
+ with open('abbreviation.txt') as abb:
29
+ ab2 = abb.read()
30
+ abbreviation = ast.literal_eval(ab2)
31
+
32
+ # define stopwords
33
+ stop_words = stopwords.words('english')
34
+
35
+ # define lemmatizer
36
+ lem = WordNetLemmatizer()
37
+
38
+ # load model
39
+ final_gru = tf.keras.models.load_model('model_gru')
40
+
41
+ # import functions
42
+ def check_chatwords(text):
43
+ temp=[]
44
+ for chat in text.split()
45
+ if chat.upper() in chatwords:
46
+ temp.append(chatwords[chat.upper()])
47
+ else:
48
+ temp.append(chat)
49
+ return " ".join(temp)
50
+
51
+ def lower(text):
52
+ data = text.lower()
53
+ return data
54
+
55
+ def check_abbr(text):
56
+ temp2=[]
57
+ for abbr in text.split():
58
+ if abbr in abbreviation:
59
+ temp2.append(abbreviation[abbr])
60
+ else:
61
+ temp2.append(abbr)
62
+
63
+ return " ".join(temp2)
64
+
65
+ def check_punctuation(text):
66
+ data = re.sub("[^a-zA-Z]",' ', text)
67
+ data = re.sub("[[^]]*]", ' ', data)
68
+ data = re.sub(r"\n", " ", data)
69
+ data = data.strip()
70
+ data = ' '.join(data.split())
71
+ return data
72
+
73
+ def token_stopwords_lemma(text):
74
+ tokens = word_tokenize(text)
75
+ stop_words2 = ' '.join([word for word in tokens if word not in stop_words])
76
+ data = [lem.lemmatize(word) for word in stop_words2.split()]
77
+ data = ' '.join(data)
78
+ return data
79
+
80
+ st.title("SPAM Message Detection")
81
+
82
+ message = st.text_input('Please input your message here:')
83
+ st.write('Message:', message)
84
+
85
+ df_inf = [message]
86
+ df_inf1 = pd.DataFrame()
87
+ df_inf1['message'] = df_inf
88
+
89
+ df_inf1['message'] = df_inf1['message'].apply(lambda j: check_chatwords(j))
90
+ df_inf1['message'] = df_inf1['message'].apply(lambda k: lower(k))
91
+ df_inf1['message'] = df_inf1['message'].apply(lambda v: check_abbr(v))
92
+ df_inf1['message'] = df_inf1['message'].apply(lambda r: check_punctuation(r))
93
+ df_inf1['message'] = df_inf1['message'].apply(lambda r: token_stopwords_lemma(m))
94
+
95
+ y_pred_inf = final_gru.predict(df_inf1['message'])
96
+ y_pred_inf = np.where(y_pred_inf >= 0.5, 1, 0)
97
+
98
+ # Membuat dataframe dari array
99
+ pred_df = pd.DataFrame(y_pred_inf, columns=['label'])
100
+
101
+ # Melakukan prediksi pada new dataframe
102
+ df_inf2 = pd.DataFrame(df_inf, columns=['message'])
103
+ df_combined = pd.concat([df_inf2, pred_df], axis=1)
104
+
105
+ # Predict
106
+ if st.button('Predict'):
107
+ y_pred_inf = final_gru.predict(df_inf1['message'])
108
+ y_pred_inf = np.where(y_pred_inf >= 0.5, 1, 0)
109
+ spam_status = str(y_pred_inf[0][0])
110
+
111
+ if spam_status == "0":
112
+ st.success("Your message is not spam.")
113
+ else:
114
+ st.error("Your message is spam")