import streamlit as st
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, SpatialDropout1D
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from tensorflow.keras.datasets import imdb

# Load the dataset
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=5000)

# Data Preprocessing
max_words = 500
max_len = 500
embedding_dim = 128

X_train = pad_sequences(X_train, maxlen=max_len)
X_test = pad_sequences(X_test, maxlen=max_len)

# Build the Model
model = Sequential()
model.add(Embedding(input_dim=5000, output_dim=embedding_dim, input_length=max_len))
model.add(SpatialDropout1D(0.2))
model.add(LSTM(100, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the Model
X_train_partial, X_val, y_train_partial, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

history = model.fit(X_train_partial, y_train_partial, epochs=10, batch_size=64, validation_data=(X_val, y_val), callbacks=[early_stopping])

# Evaluate the Model
loss, accuracy = model.evaluate(X_test, y_test)
st.write(f'Test Accuracy: {accuracy:.4f}')

# Plotting functions
def plot_accuracy(history):
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title('Model accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Validation'], loc='upper left')
    st.pyplot(plt)

def plot_loss(history):
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('Model loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Validation'], loc='upper left')
    st.pyplot(plt)

# Display plots
plot_accuracy(history)
plot_loss(history)

# Text Input and Prediction
st.header("Movie Review Sentiment Analysis")
review_input = st.text_area("Enter your movie review:", "This movie was fantastic! I loved it.")

# Tokenization and padding
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(review_input)
review_seq = tokenizer.texts_to_sequences([review_input])
review_pad = pad_sequences(review_seq, maxlen=max_len)

# Prediction
if st.button("Classify Review"):
    prediction = (model.predict(review_pad) > 0.5).astype("int32")
    sentiment = "Positive" if prediction[0][0] == 1 else "Negative"
    st.write(f'Sentiment: **{sentiment}**')