Spaces:

eaglelandsonce
/

TensorFlowClass

Sleeping

App Files Files Community

TensorFlowClass / pages /21_NLP.py

eaglelandsonce

Update pages/21_NLP.py

08cf096 verified 11 months ago

raw

history blame

2.65 kB

	import streamlit as st
	import tensorflow as tf
	from tensorflow.keras.preprocessing.sequence import pad_sequences
	import numpy as np
	import matplotlib.pyplot as plt
	from sklearn.model_selection import train_test_split

	# Load the IMDb dataset
	from datasets import load_dataset

	# Load dataset
	dataset = load_dataset("imdb")

	# Split dataset into training and testing
	train_data, test_data = train_test_split(dataset['train'].to_pandas(), test_size=0.2)

	# Tokenizer parameters
	vocab_size = 10000
	max_length = 128
	embedding_dim = 128

	# Tokenize the data
	tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=vocab_size, oov_token="<OOV>")
	tokenizer.fit_on_texts(train_data['text'].values)
	word_index = tokenizer.word_index

	# Convert text to sequences
	X_train = tokenizer.texts_to_sequences(train_data['text'].values)
	X_test = tokenizer.texts_to_sequences(test_data['text'].values)

	# Pad sequences
	X_train = pad_sequences(X_train, maxlen=max_length, padding='post', truncating='post')
	X_test = pad_sequences(X_test, maxlen=max_length, padding='post', truncating='post')

	# Labels
	y_train = train_data['label'].values
	y_test = test_data['label'].values

	# Build the LSTM model
	model = tf.keras.Sequential([
	tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),
	tf.keras.layers.LSTM(64, return_sequences=True),
	tf.keras.layers.LSTM(32),
	tf.keras.layers.Dense(24, activation='relu'),
	tf.keras.layers.Dense(1, activation='sigmoid')
	])

	model.summary()

	# Compile the model
	model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

	# Train the model
	history = model.fit(X_train, y_train, epochs=3, validation_split=0.1, batch_size=32)

	# Evaluate the model
	loss, accuracy = model.evaluate(X_test, y_test)
	st.write(f'Test Accuracy: {accuracy}')

	# Plot training & validation accuracy values
	st.subheader("Training and Validation Accuracy")
	fig, ax = plt.subplots()
	ax.plot(history.history['accuracy'], label='Training Accuracy')
	ax.plot(history.history['val_accuracy'], label='Validation Accuracy')
	ax.set_xlabel('Epoch')
	ax.set_ylabel('Accuracy')
	ax.legend()
	st.pyplot(fig)

	st.subheader("Training and Validation Loss")
	fig, ax = plt.subplots()
	ax.plot(history.history['loss'], label='Training Loss')
	ax.plot(history.history['val_loss'], label='Validation Loss')
	ax.set_xlabel('Epoch')
	ax.set_ylabel('Loss')
	ax.legend()
	st.pyplot(fig)

	# Convert the model to TensorFlow.js format
	import tensorflowjs as tfjs

	tfjs_target_dir = 'tfjs_model'
	model.save('model.h5')
	tfjs.converters.save_keras_model(model, tfjs_target_dir)
	st.write("Model saved and converted to TensorFlow.js format.")