Spaces:
Runtime error
Runtime error
import numpy as np | |
import pandas as pd | |
import tensorflow as tf | |
from tensorflow.keras.preprocessing.text import Tokenizer | |
from tensorflow.keras.preprocessing.sequence import pad_sequences | |
from tensorflow.keras.models import Sequential | |
from tensorflow.keras.layers import Embedding, LSTM, Dense | |
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping | |
from sklearn.model_selection import train_test_split | |
from sklearn.preprocessing import LabelEncoder | |
from tensorflow.keras import utils | |
import os | |
import matplotlib.pyplot as plt | |
from nltk.tokenize import word_tokenize | |
import nltk | |
import gradio as gr | |
nltk.download('punkt') | |
from wordcloud import WordCloud, STOPWORDS | |
# Load the dataset | |
df = pd.read_csv("Twitter_Data.csv") | |
# Check for missing values and fill or drop them accordingly | |
df['clean_text'].fillna('', inplace=True) | |
df.dropna(subset=['category'], inplace=True) | |
df.drop_duplicates(inplace=True) | |
# Tokenize words | |
tokenized_text = [word_tokenize(text.lower()) for text in df['clean_text']] | |
# Word2Vec model | |
from gensim.models import Word2Vec | |
model = Word2Vec(tokenized_text, vector_size=100, window=5, min_count=1, workers=4) | |
# Define input and target variables | |
X = df['clean_text'] | |
y = df['category'] | |
# Encode target variable | |
encoder = LabelEncoder() | |
y = encoder.fit_transform(y) | |
y = utils.to_categorical(y) | |
# Tokenize text | |
tokenizer = Tokenizer() | |
tokenizer.fit_on_texts(X) | |
sequences = tokenizer.texts_to_sequences(X) | |
# Vocabulary size | |
vocab_size = len(tokenizer.word_index) + 1 | |
# Max sequence length | |
max_seq_length = max([len(seq) for seq in sequences]) | |
# Pad sequences | |
X_pad = pad_sequences(sequences, maxlen=max_seq_length) | |
# Train-test split | |
X_train, X_test, y_train, y_test = train_test_split(X_pad, y, test_size=0.2, random_state=42) | |
# Define LSTM model | |
model = Sequential() | |
model.add(Embedding(input_dim=vocab_size, output_dim=100, input_length=max_seq_length)) | |
model.add(LSTM(units=128, dropout=0.2, recurrent_dropout=0.2)) | |
model.add(Dense(units=3, activation='softmax')) | |
# Compile model | |
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) | |
# Define callbacks | |
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=2, min_lr=0.001) | |
early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True) | |
# Train model | |
history = model.fit(X_train, y_train, batch_size=128, epochs=10, validation_split=0.1, callbacks=[reduce_lr, early_stop]) | |
# Save the model | |
model_path = 'sentiment_analysis_model.h5' | |
model.save(model_path) | |
# Define a function to classify sentiment | |
def classify_sentiment(text): | |
# Preprocess the text (tokenization, padding, etc.) | |
text_sequence = tokenizer.texts_to_sequences([text]) | |
padded_sequence = pad_sequences(text_sequence, maxlen=max_seq_length) | |
# Make prediction using the trained model | |
prediction = model.predict(padded_sequence) | |
# Convert prediction to class label | |
predicted_label = np.argmax(prediction) | |
# Map class label to sentiment | |
sentiment_mapping = {0: "Negative", 1: "Neutral", 2: "Positive"} | |
sentiment = sentiment_mapping[predicted_label] | |
return sentiment | |
# Define the Gradio interface | |
def gradio_sentiment_analysis(text): | |
sentiment = classify_sentiment(text) | |
return sentiment | |
# Create the Gradio interface | |
iface = gr.Interface( | |
fn=gradio_sentiment_analysis, | |
inputs=gr.inputs.Textbox(lines=2, placeholder="Enter text here..."), | |
outputs="text", | |
title="Sentiment Analysis", | |
description="Enter a sentence to classify its sentiment as Positive, Neutral, or Negative." | |
) | |
# Launch the Gradio app | |
iface.launch() | |