Spaces:

eaglelandsonce
/

TensorFlowClass

Sleeping

App Files Files Community

TensorFlowClass / pages /21_NLP.py

eaglelandsonce

Update pages/21_NLP.py

cc6f9f3 verified 6 months ago

raw

history blame

2.86 kB

	import streamlit as st
	import numpy as np
	import pandas as pd
	import matplotlib.pyplot as plt
	from tensorflow.keras.preprocessing.text import Tokenizer
	from tensorflow.keras.preprocessing.sequence import pad_sequences
	from tensorflow.keras.models import Sequential
	from tensorflow.keras.layers import Embedding, LSTM, Dense, SpatialDropout1D
	from tensorflow.keras.callbacks import EarlyStopping
	from sklearn.model_selection import train_test_split
	from sklearn.metrics import accuracy_score, confusion_matrix
	from tensorflow.keras.datasets import imdb

	# Load the dataset
	(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=5000)

	# Data Preprocessing
	max_words = 500
	max_len = 500
	embedding_dim = 128

	X_train = pad_sequences(X_train, maxlen=max_len)
	X_test = pad_sequences(X_test, maxlen=max_len)

	# Build the Model
	model = Sequential()
	model.add(Embedding(input_dim=5000, output_dim=embedding_dim, input_length=max_len))
	model.add(SpatialDropout1D(0.2))
	model.add(LSTM(100, dropout=0.2, recurrent_dropout=0.2))
	model.add(Dense(1, activation='sigmoid'))

	model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

	# Train the Model
	X_train_partial, X_val, y_train_partial, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

	early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

	history = model.fit(X_train_partial, y_train_partial, epochs=10, batch_size=64, validation_data=(X_val, y_val), callbacks=[early_stopping])

	# Evaluate the Model
	loss, accuracy = model.evaluate(X_test, y_test)
	st.write(f'Test Accuracy: {accuracy:.4f}')

	# Plotting functions
	def plot_accuracy(history):
	plt.plot(history.history['accuracy'])
	plt.plot(history.history['val_accuracy'])
	plt.title('Model accuracy')
	plt.ylabel('Accuracy')
	plt.xlabel('Epoch')
	plt.legend(['Train', 'Validation'], loc='upper left')
	st.pyplot(plt)

	def plot_loss(history):
	plt.plot(history.history['loss'])
	plt.plot(history.history['val_loss'])
	plt.title('Model loss')
	plt.ylabel('Loss')
	plt.xlabel('Epoch')
	plt.legend(['Train', 'Validation'], loc='upper left')
	st.pyplot(plt)

	# Display plots
	plot_accuracy(history)
	plot_loss(history)

	# Text Input and Prediction
	st.header("Movie Review Sentiment Analysis")
	review_input = st.text_area("Enter your movie review:", "This movie was fantastic! I loved it.")

	# Tokenization and padding
	tokenizer = Tokenizer(num_words=5000)
	tokenizer.fit_on_texts(review_input)
	review_seq = tokenizer.texts_to_sequences([review_input])
	review_pad = pad_sequences(review_seq, maxlen=max_len)

	# Prediction
	if st.button("Classify Review"):
	prediction = (model.predict(review_pad) > 0.5).astype("int32")
	sentiment = "Positive" if prediction[0][0] == 1 else "Negative"
	st.write(f'Sentiment: {sentiment}')