TensorFlowClass / pages /21_NLP.py
eaglelandsonce's picture
Update pages/21_NLP.py
cc6f9f3 verified
raw
history blame
2.86 kB
import streamlit as st
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, SpatialDropout1D
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from tensorflow.keras.datasets import imdb
# Load the dataset
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=5000)
# Data Preprocessing
max_words = 500
max_len = 500
embedding_dim = 128
X_train = pad_sequences(X_train, maxlen=max_len)
X_test = pad_sequences(X_test, maxlen=max_len)
# Build the Model
model = Sequential()
model.add(Embedding(input_dim=5000, output_dim=embedding_dim, input_length=max_len))
model.add(SpatialDropout1D(0.2))
model.add(LSTM(100, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
# Train the Model
X_train_partial, X_val, y_train_partial, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
history = model.fit(X_train_partial, y_train_partial, epochs=10, batch_size=64, validation_data=(X_val, y_val), callbacks=[early_stopping])
# Evaluate the Model
loss, accuracy = model.evaluate(X_test, y_test)
st.write(f'Test Accuracy: {accuracy:.4f}')
# Plotting functions
def plot_accuracy(history):
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
st.pyplot(plt)
def plot_loss(history):
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
st.pyplot(plt)
# Display plots
plot_accuracy(history)
plot_loss(history)
# Text Input and Prediction
st.header("Movie Review Sentiment Analysis")
review_input = st.text_area("Enter your movie review:", "This movie was fantastic! I loved it.")
# Tokenization and padding
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(review_input)
review_seq = tokenizer.texts_to_sequences([review_input])
review_pad = pad_sequences(review_seq, maxlen=max_len)
# Prediction
if st.button("Classify Review"):
prediction = (model.predict(review_pad) > 0.5).astype("int32")
sentiment = "Positive" if prediction[0][0] == 1 else "Negative"
st.write(f'Sentiment: **{sentiment}**')