Spaces:
Sleeping
Sleeping
import streamlit as st | |
import tensorflow as tf | |
from tensorflow.keras.preprocessing.sequence import pad_sequences | |
import numpy as np | |
import matplotlib.pyplot as plt | |
from sklearn.model_selection import train_test_split | |
# Load the IMDb dataset | |
from datasets import load_dataset | |
# Load dataset | |
dataset = load_dataset("imdb") | |
# Split dataset into training and testing | |
train_data, test_data = train_test_split(dataset['train'].to_pandas(), test_size=0.2) | |
# Tokenizer parameters | |
vocab_size = 10000 | |
max_length = 128 | |
embedding_dim = 128 | |
# Tokenize the data | |
tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=vocab_size, oov_token="<OOV>") | |
tokenizer.fit_on_texts(train_data['text'].values) | |
word_index = tokenizer.word_index | |
# Convert text to sequences | |
X_train = tokenizer.texts_to_sequences(train_data['text'].values) | |
X_test = tokenizer.texts_to_sequences(test_data['text'].values) | |
# Pad sequences | |
X_train = pad_sequences(X_train, maxlen=max_length, padding='post', truncating='post') | |
X_test = pad_sequences(X_test, maxlen=max_length, padding='post', truncating='post') | |
# Labels | |
y_train = train_data['label'].values | |
y_test = test_data['label'].values | |
# Build the LSTM model | |
model = tf.keras.Sequential([ | |
tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length), | |
tf.keras.layers.LSTM(64, return_sequences=True), | |
tf.keras.layers.LSTM(32), | |
tf.keras.layers.Dense(24, activation='relu'), | |
tf.keras.layers.Dense(1, activation='sigmoid') | |
]) | |
model.summary() | |
# Compile the model | |
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) | |
# Train the model | |
history = model.fit(X_train, y_train, epochs=3, validation_split=0.1, batch_size=32) | |
# Evaluate the model | |
loss, accuracy = model.evaluate(X_test, y_test) | |
st.write(f'Test Accuracy: {accuracy}') | |
# Plot training & validation accuracy values | |
st.subheader("Training and Validation Accuracy") | |
fig, ax = plt.subplots() | |
ax.plot(history.history['accuracy'], label='Training Accuracy') | |
ax.plot(history.history['val_accuracy'], label='Validation Accuracy') | |
ax.set_xlabel('Epoch') | |
ax.set_ylabel('Accuracy') | |
ax.legend() | |
st.pyplot(fig) | |
st.subheader("Training and Validation Loss") | |
fig, ax = plt.subplots() | |
ax.plot(history.history['loss'], label='Training Loss') | |
ax.plot(history.history['val_loss'], label='Validation Loss') | |
ax.set_xlabel('Epoch') | |
ax.set_ylabel('Loss') | |
ax.legend() | |
st.pyplot(fig) | |
# Convert the model to TensorFlow.js format | |
import tensorflowjs as tfjs | |
tfjs_target_dir = 'tfjs_model' | |
model.save('model.h5') | |
tfjs.converters.save_keras_model(model, tfjs_target_dir) | |
st.write("Model saved and converted to TensorFlow.js format.") | |