## Imports import tensorflow as tf import numpy as np import pandas as pd import matplotlib.pyplot as plt from tensorflow.keras.preprocessing.text import Tokenizer from tensorflow.keras.preprocessing.sequence import pad_sequences import gradio ## Load Data dataset = pd.read_csv('./SPAMtextmessage.csv') ## Data Preprocessing # Convert ham to 0 and spam to 1 dataset['Category']= dataset['Category'].str.replace('ham','0') dataset['Category']= dataset['Category'].str.replace('spam','1') dataset['Category']= dataset['Category'].astype(int) sentences = dataset['Message'].tolist() labels = dataset['Category'].tolist() # Separate out the sentences and labels into training and test sets training_size = int(len(sentences) * 0.8) # Sentence variables training_sentences = sentences[0:training_size] testing_sentences = sentences[training_size:] # Labels variables training_labels = labels[0:training_size] testing_labels = labels[training_size:] # Make labels into numpy arrays for use with the network later training_labels_final = np.array(training_labels) testing_labels_final = np.array(testing_labels) ## Text Preprocessing vocab_size = 1000 embedding_dim = 16 max_length = 100 trunc_type='post' padding_type='post' oov_tok = "" tokenizer = Tokenizer(num_words = vocab_size, oov_token=oov_tok) tokenizer.fit_on_texts(training_sentences) word_index = tokenizer.word_index sequences = tokenizer.texts_to_sequences(training_sentences) padded = pad_sequences(sequences,maxlen=max_length, padding=padding_type, truncating=trunc_type) testing_sequences = tokenizer.texts_to_sequences(testing_sentences) testing_padded = pad_sequences(testing_sequences,maxlen=max_length, padding=padding_type, truncating=trunc_type) ## Modeling # Set lr = 0.01 model = tf.keras.Sequential([ tf.keras.layers.Embedding(vocab_size,embedding_dim,input_length=max_length), tf.keras.layers.Flatten(), tf.keras.layers.Dense(20,activation='relu'), tf.keras.layers.Dense(10,activation= 'relu'), tf.keras.layers.Dense(1,activation= 'sigmoid') ]) model.compile(loss='binary_crossentropy',metrics=['accuracy'], optimizer=tf.keras.optimizers.Adam(learning_rate=0.01)) model.fit(padded,training_labels_final,batch_size=128,epochs=50, validation_data=(testing_padded,testing_labels_final)) ## Gradio App def spam_detection(message): # Preprocess the input message sequence = tokenizer.texts_to_sequences([message]) padded_sequence = pad_sequences(sequence, maxlen=max_length, padding=padding_type, truncating=trunc_type) # Make prediction prediction = model.predict(padded_sequence)[0, 0] # Return the result return "Spam" if prediction >= 0.5 else "Not Spam" # Gradio Interface iface = gr.Interface( fn=spam_detection, inputs=gr.Textbox(prompt="Enter a message:"), outputs="text", live=True, theme="huggingface", title="Spam Message Detection", description="A demo app for learning purposes. Detects spam messages with 98% accuracy based on the dataset." ) # Launch the app iface.launch()