import streamlit as st import tensorflow as tf from tensorflow.keras.preprocessing.sequence import pad_sequences import numpy as np import matplotlib.pyplot as plt from sklearn.model_selection import train_test_split # Load the IMDb dataset from datasets import load_dataset # Load dataset dataset = load_dataset("imdb") # Split dataset into training and testing train_data, test_data = train_test_split(dataset['train'].to_pandas(), test_size=0.2) # Tokenizer parameters vocab_size = 10000 max_length = 128 embedding_dim = 128 # Tokenize the data tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=vocab_size, oov_token="") tokenizer.fit_on_texts(train_data['text'].values) word_index = tokenizer.word_index # Convert text to sequences X_train = tokenizer.texts_to_sequences(train_data['text'].values) X_test = tokenizer.texts_to_sequences(test_data['text'].values) # Pad sequences X_train = pad_sequences(X_train, maxlen=max_length, padding='post', truncating='post') X_test = pad_sequences(X_test, maxlen=max_length, padding='post', truncating='post') # Labels y_train = train_data['label'].values y_test = test_data['label'].values # Build the LSTM model model = tf.keras.Sequential([ tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length), tf.keras.layers.LSTM(64, return_sequences=True), tf.keras.layers.LSTM(32), tf.keras.layers.Dense(24, activation='relu'), tf.keras.layers.Dense(1, activation='sigmoid') ]) model.summary() # Compile the model model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) # Train the model history = model.fit(X_train, y_train, epochs=3, validation_split=0.1, batch_size=32) # Evaluate the model loss, accuracy = model.evaluate(X_test, y_test) st.write(f'Test Accuracy: {accuracy}') # Plot training & validation accuracy values st.subheader("Training and Validation Accuracy") fig, ax = plt.subplots() ax.plot(history.history['accuracy'], label='Training Accuracy') ax.plot(history.history['val_accuracy'], label='Validation Accuracy') ax.set_xlabel('Epoch') ax.set_ylabel('Accuracy') ax.legend() st.pyplot(fig) st.subheader("Training and Validation Loss") fig, ax = plt.subplots() ax.plot(history.history['loss'], label='Training Loss') ax.plot(history.history['val_loss'], label='Validation Loss') ax.set_xlabel('Epoch') ax.set_ylabel('Loss') ax.legend() st.pyplot(fig) # Convert the model to TensorFlow.js format import tensorflowjs as tfjs tfjs_target_dir = 'tfjs_model' model.save('model.h5') tfjs.converters.save_keras_model(model, tfjs_target_dir) st.write("Model saved and converted to TensorFlow.js format.")