TensorFlowClass / pages /21_NLP.py
eaglelandsonce's picture
Update pages/21_NLP.py
08cf096 verified
raw
history blame
2.65 kB
import streamlit as st
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
# Load the IMDb dataset
from datasets import load_dataset
# Load dataset
dataset = load_dataset("imdb")
# Split dataset into training and testing
train_data, test_data = train_test_split(dataset['train'].to_pandas(), test_size=0.2)
# Tokenizer parameters
vocab_size = 10000
max_length = 128
embedding_dim = 128
# Tokenize the data
tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=vocab_size, oov_token="<OOV>")
tokenizer.fit_on_texts(train_data['text'].values)
word_index = tokenizer.word_index
# Convert text to sequences
X_train = tokenizer.texts_to_sequences(train_data['text'].values)
X_test = tokenizer.texts_to_sequences(test_data['text'].values)
# Pad sequences
X_train = pad_sequences(X_train, maxlen=max_length, padding='post', truncating='post')
X_test = pad_sequences(X_test, maxlen=max_length, padding='post', truncating='post')
# Labels
y_train = train_data['label'].values
y_test = test_data['label'].values
# Build the LSTM model
model = tf.keras.Sequential([
tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),
tf.keras.layers.LSTM(64, return_sequences=True),
tf.keras.layers.LSTM(32),
tf.keras.layers.Dense(24, activation='relu'),
tf.keras.layers.Dense(1, activation='sigmoid')
])
model.summary()
# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
# Train the model
history = model.fit(X_train, y_train, epochs=3, validation_split=0.1, batch_size=32)
# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
st.write(f'Test Accuracy: {accuracy}')
# Plot training & validation accuracy values
st.subheader("Training and Validation Accuracy")
fig, ax = plt.subplots()
ax.plot(history.history['accuracy'], label='Training Accuracy')
ax.plot(history.history['val_accuracy'], label='Validation Accuracy')
ax.set_xlabel('Epoch')
ax.set_ylabel('Accuracy')
ax.legend()
st.pyplot(fig)
st.subheader("Training and Validation Loss")
fig, ax = plt.subplots()
ax.plot(history.history['loss'], label='Training Loss')
ax.plot(history.history['val_loss'], label='Validation Loss')
ax.set_xlabel('Epoch')
ax.set_ylabel('Loss')
ax.legend()
st.pyplot(fig)
# Convert the model to TensorFlow.js format
import tensorflowjs as tfjs
tfjs_target_dir = 'tfjs_model'
model.save('model.h5')
tfjs.converters.save_keras_model(model, tfjs_target_dir)
st.write("Model saved and converted to TensorFlow.js format.")