Ahtisham1583's picture
Update app.py
95b60ba verified
raw
history blame
3.63 kB
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras import utils
import os
import matplotlib.pyplot as plt
from nltk.tokenize import word_tokenize
import nltk
import gradio as gr
nltk.download('punkt')
from wordcloud import WordCloud, STOPWORDS
# Load the dataset
df = pd.read_csv("Twitter_Data.csv")
# Check for missing values and fill or drop them accordingly
df['clean_text'].fillna('', inplace=True)
df.dropna(subset=['category'], inplace=True)
df.drop_duplicates(inplace=True)
# Tokenize words
tokenized_text = [word_tokenize(text.lower()) for text in df['clean_text']]
# Word2Vec model
from gensim.models import Word2Vec
model = Word2Vec(tokenized_text, vector_size=100, window=5, min_count=1, workers=4)
# Define input and target variables
X = df['clean_text']
y = df['category']
# Encode target variable
encoder = LabelEncoder()
y = encoder.fit_transform(y)
y = utils.to_categorical(y)
# Tokenize text
tokenizer = Tokenizer()
tokenizer.fit_on_texts(X)
sequences = tokenizer.texts_to_sequences(X)
# Vocabulary size
vocab_size = len(tokenizer.word_index) + 1
# Max sequence length
max_seq_length = max([len(seq) for seq in sequences])
# Pad sequences
X_pad = pad_sequences(sequences, maxlen=max_seq_length)
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_pad, y, test_size=0.2, random_state=42)
# Define LSTM model
model = Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim=100, input_length=max_seq_length))
model.add(LSTM(units=128, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(units=3, activation='softmax'))
# Compile model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# Define callbacks
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=2, min_lr=0.001)
early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
# Train model
history = model.fit(X_train, y_train, batch_size=128, epochs=10, validation_split=0.1, callbacks=[reduce_lr, early_stop])
# Save the model
model_path = 'sentiment_analysis_model.h5'
model.save(model_path)
# Define a function to classify sentiment
def classify_sentiment(text):
# Preprocess the text (tokenization, padding, etc.)
text_sequence = tokenizer.texts_to_sequences([text])
padded_sequence = pad_sequences(text_sequence, maxlen=max_seq_length)
# Make prediction using the trained model
prediction = model.predict(padded_sequence)
# Convert prediction to class label
predicted_label = np.argmax(prediction)
# Map class label to sentiment
sentiment_mapping = {0: "Negative", 1: "Neutral", 2: "Positive"}
sentiment = sentiment_mapping[predicted_label]
return sentiment
# Define the Gradio interface
def gradio_sentiment_analysis(text):
sentiment = classify_sentiment(text)
return sentiment
# Create the Gradio interface
iface = gr.Interface(
fn=gradio_sentiment_analysis,
inputs=gr.inputs.Textbox(lines=2, placeholder="Enter text here..."),
outputs="text",
title="Sentiment Analysis",
description="Enter a sentence to classify its sentiment as Positive, Neutral, or Negative."
)
# Launch the Gradio app
iface.launch()