Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Imports
|
2 |
+
import tensorflow as tf
|
3 |
+
import numpy as np
|
4 |
+
import pandas as pd
|
5 |
+
import matplotlib.pyplot as plt
|
6 |
+
from tensorflow.keras.preprocessing.text import Tokenizer
|
7 |
+
from tensorflow.keras.preprocessing.sequence import pad_sequences
|
8 |
+
import gradio
|
9 |
+
|
10 |
+
## Load Data
|
11 |
+
dataset = pd.read_csv('/content/drive/MyDrive/SPAMtextmessage.csv')
|
12 |
+
|
13 |
+
## Data Preprocessing
|
14 |
+
# Convert ham to 0 and spam to 1
|
15 |
+
dataset['Category']= dataset['Category'].str.replace('ham','0')
|
16 |
+
dataset['Category']= dataset['Category'].str.replace('spam','1')
|
17 |
+
dataset['Category']= dataset['Category'].astype(int)
|
18 |
+
sentences = dataset['Message'].tolist()
|
19 |
+
labels = dataset['Category'].tolist()
|
20 |
+
# Separate out the sentences and labels into training and test sets
|
21 |
+
training_size = int(len(sentences) * 0.8)
|
22 |
+
# Sentence variables
|
23 |
+
training_sentences = sentences[0:training_size]
|
24 |
+
testing_sentences = sentences[training_size:]
|
25 |
+
# Labels variables
|
26 |
+
training_labels = labels[0:training_size]
|
27 |
+
testing_labels = labels[training_size:]
|
28 |
+
# Make labels into numpy arrays for use with the network later
|
29 |
+
training_labels_final = np.array(training_labels)
|
30 |
+
testing_labels_final = np.array(testing_labels)
|
31 |
+
|
32 |
+
## Text Preprocessing
|
33 |
+
vocab_size = 1000
|
34 |
+
embedding_dim = 16
|
35 |
+
max_length = 100
|
36 |
+
trunc_type='post'
|
37 |
+
padding_type='post'
|
38 |
+
oov_tok = ""
|
39 |
+
tokenizer = Tokenizer(num_words = vocab_size, oov_token=oov_tok)
|
40 |
+
tokenizer.fit_on_texts(training_sentences)
|
41 |
+
word_index = tokenizer.word_index
|
42 |
+
sequences = tokenizer.texts_to_sequences(training_sentences)
|
43 |
+
padded = pad_sequences(sequences,maxlen=max_length, padding=padding_type,
|
44 |
+
truncating=trunc_type)
|
45 |
+
testing_sequences = tokenizer.texts_to_sequences(testing_sentences)
|
46 |
+
testing_padded = pad_sequences(testing_sequences,maxlen=max_length,
|
47 |
+
padding=padding_type, truncating=trunc_type)
|
48 |
+
|
49 |
+
## Modeling
|
50 |
+
# Set lr = 0.01
|
51 |
+
model = tf.keras.Sequential([
|
52 |
+
tf.keras.layers.Embedding(vocab_size,embedding_dim,input_length=max_length),
|
53 |
+
tf.keras.layers.Flatten(),
|
54 |
+
tf.keras.layers.Dense(20,activation='relu'),
|
55 |
+
tf.keras.layers.Dense(10,activation= 'relu'),
|
56 |
+
tf.keras.layers.Dense(1,activation= 'sigmoid')
|
57 |
+
])
|
58 |
+
|
59 |
+
model.compile(loss='binary_crossentropy',metrics=['accuracy'],
|
60 |
+
optimizer=tf.keras.optimizers.Adam(learning_rate=0.01))
|
61 |
+
model.fit(padded,training_labels_final,batch_size=128,epochs=50,
|
62 |
+
validation_data=(testing_padded,testing_labels_final))
|
63 |
+
|
64 |
+
## Gradio App
|
65 |
+
def spam_detection(message):
|
66 |
+
# Preprocess the input message
|
67 |
+
sequence = tokenizer.texts_to_sequences([message])
|
68 |
+
padded_sequence = pad_sequences(sequence, maxlen=max_length, padding=padding_type, truncating=trunc_type)
|
69 |
+
|
70 |
+
# Make prediction
|
71 |
+
prediction = model.predict(padded_sequence)[0, 0]
|
72 |
+
|
73 |
+
# Return the result
|
74 |
+
return "Spam" if prediction >= 0.5 else "Not Spam"
|
75 |
+
|
76 |
+
# Gradio Interface
|
77 |
+
iface = gr.Interface(
|
78 |
+
fn=spam_detection,
|
79 |
+
inputs=gr.Textbox(prompt="Enter a message:"),
|
80 |
+
outputs="text",
|
81 |
+
live=True,
|
82 |
+
theme="huggingface",
|
83 |
+
title="Spam Message Detection",
|
84 |
+
description="A demo app for learning purposes. Detects spam messages with 98% accuracy based on the dataset."
|
85 |
+
)
|
86 |
+
|
87 |
+
# Launch the app
|
88 |
+
iface.launch()
|