Spaces:

Ahtisham1583
/

sentiment_analysis_by_Ahtisham

Runtime error

App Files Files Community

sentiment_analysis_by_Ahtisham / app.py

Ahtisham1583

Update app.py

95b60ba verified about 1 year ago

raw

history blame

3.63 kB

	import numpy as np
	import pandas as pd
	import tensorflow as tf
	from tensorflow.keras.preprocessing.text import Tokenizer
	from tensorflow.keras.preprocessing.sequence import pad_sequences
	from tensorflow.keras.models import Sequential
	from tensorflow.keras.layers import Embedding, LSTM, Dense
	from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
	from sklearn.model_selection import train_test_split
	from sklearn.preprocessing import LabelEncoder
	from tensorflow.keras import utils
	import os
	import matplotlib.pyplot as plt
	from nltk.tokenize import word_tokenize
	import nltk
	import gradio as gr
	nltk.download('punkt')
	from wordcloud import WordCloud, STOPWORDS

	# Load the dataset
	df = pd.read_csv("Twitter_Data.csv")

	# Check for missing values and fill or drop them accordingly
	df['clean_text'].fillna('', inplace=True)
	df.dropna(subset=['category'], inplace=True)
	df.drop_duplicates(inplace=True)

	# Tokenize words
	tokenized_text = [word_tokenize(text.lower()) for text in df['clean_text']]

	# Word2Vec model
	from gensim.models import Word2Vec
	model = Word2Vec(tokenized_text, vector_size=100, window=5, min_count=1, workers=4)

	# Define input and target variables
	X = df['clean_text']
	y = df['category']

	# Encode target variable
	encoder = LabelEncoder()
	y = encoder.fit_transform(y)
	y = utils.to_categorical(y)

	# Tokenize text
	tokenizer = Tokenizer()
	tokenizer.fit_on_texts(X)
	sequences = tokenizer.texts_to_sequences(X)

	# Vocabulary size
	vocab_size = len(tokenizer.word_index) + 1

	# Max sequence length
	max_seq_length = max([len(seq) for seq in sequences])

	# Pad sequences
	X_pad = pad_sequences(sequences, maxlen=max_seq_length)

	# Train-test split
	X_train, X_test, y_train, y_test = train_test_split(X_pad, y, test_size=0.2, random_state=42)

	# Define LSTM model
	model = Sequential()
	model.add(Embedding(input_dim=vocab_size, output_dim=100, input_length=max_seq_length))
	model.add(LSTM(units=128, dropout=0.2, recurrent_dropout=0.2))
	model.add(Dense(units=3, activation='softmax'))

	# Compile model
	model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

	# Define callbacks
	reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=2, min_lr=0.001)
	early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

	# Train model
	history = model.fit(X_train, y_train, batch_size=128, epochs=10, validation_split=0.1, callbacks=[reduce_lr, early_stop])

	# Save the model
	model_path = 'sentiment_analysis_model.h5'
	model.save(model_path)

	# Define a function to classify sentiment
	def classify_sentiment(text):
	# Preprocess the text (tokenization, padding, etc.)
	text_sequence = tokenizer.texts_to_sequences([text])
	padded_sequence = pad_sequences(text_sequence, maxlen=max_seq_length)

	# Make prediction using the trained model
	prediction = model.predict(padded_sequence)

	# Convert prediction to class label
	predicted_label = np.argmax(prediction)

	# Map class label to sentiment
	sentiment_mapping = {0: "Negative", 1: "Neutral", 2: "Positive"}
	sentiment = sentiment_mapping[predicted_label]

	return sentiment

	# Define the Gradio interface
	def gradio_sentiment_analysis(text):
	sentiment = classify_sentiment(text)
	return sentiment

	# Create the Gradio interface
	iface = gr.Interface(
	fn=gradio_sentiment_analysis,
	inputs=gr.inputs.Textbox(lines=2, placeholder="Enter text here..."),
	outputs="text",
	title="Sentiment Analysis",
	description="Enter a sentence to classify its sentiment as Positive, Neutral, or Negative."
	)

	# Launch the Gradio app
	iface.launch()