Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import pandas as pd
|
3 |
+
import tensorflow as tf
|
4 |
+
from tensorflow.keras.preprocessing.text import Tokenizer
|
5 |
+
from tensorflow.keras.preprocessing.sequence import pad_sequences
|
6 |
+
from tensorflow.keras.models import Sequential
|
7 |
+
from tensorflow.keras.layers import Embedding, LSTM, Dense
|
8 |
+
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
|
9 |
+
from sklearn.model_selection import train_test_split
|
10 |
+
from sklearn.preprocessing import LabelEncoder
|
11 |
+
from tensorflow.keras import utils
|
12 |
+
import os
|
13 |
+
import matplotlib.pyplot as plt
|
14 |
+
from nltk.tokenize import word_tokenize
|
15 |
+
import nltk
|
16 |
+
import gradio as gr
|
17 |
+
nltk.download('punkt')
|
18 |
+
from wordcloud import WordCloud, STOPWORDS
|
19 |
+
|
20 |
+
# Load the dataset
|
21 |
+
df = pd.read_csv("Twitter_Data.csv")
|
22 |
+
|
23 |
+
# Check for missing values and fill or drop them accordingly
|
24 |
+
df['clean_text'].fillna('', inplace=True)
|
25 |
+
df.dropna(subset=['category'], inplace=True)
|
26 |
+
df.drop_duplicates(inplace=True)
|
27 |
+
|
28 |
+
# Tokenize words
|
29 |
+
tokenized_text = [word_tokenize(text.lower()) for text in df['clean_text']]
|
30 |
+
|
31 |
+
# Word2Vec model
|
32 |
+
from gensim.models import Word2Vec
|
33 |
+
model = Word2Vec(tokenized_text, vector_size=100, window=5, min_count=1, workers=4)
|
34 |
+
|
35 |
+
# Define input and target variables
|
36 |
+
X = df['clean_text']
|
37 |
+
y = df['category']
|
38 |
+
|
39 |
+
# Encode target variable
|
40 |
+
encoder = LabelEncoder()
|
41 |
+
y = encoder.fit_transform(y)
|
42 |
+
y = utils.to_categorical(y)
|
43 |
+
|
44 |
+
# Tokenize text
|
45 |
+
tokenizer = Tokenizer()
|
46 |
+
tokenizer.fit_on_texts(X)
|
47 |
+
sequences = tokenizer.texts_to_sequences(X)
|
48 |
+
|
49 |
+
# Vocabulary size
|
50 |
+
vocab_size = len(tokenizer.word_index) + 1
|
51 |
+
|
52 |
+
# Max sequence length
|
53 |
+
max_seq_length = max([len(seq) for seq in sequences])
|
54 |
+
|
55 |
+
# Pad sequences
|
56 |
+
X_pad = pad_sequences(sequences, maxlen=max_seq_length)
|
57 |
+
|
58 |
+
# Train-test split
|
59 |
+
X_train, X_test, y_train, y_test = train_test_split(X_pad, y, test_size=0.2, random_state=42)
|
60 |
+
|
61 |
+
# Define LSTM model
|
62 |
+
model = Sequential()
|
63 |
+
model.add(Embedding(input_dim=vocab_size, output_dim=100, input_length=max_seq_length))
|
64 |
+
model.add(LSTM(units=128, dropout=0.2, recurrent_dropout=0.2))
|
65 |
+
model.add(Dense(units=3, activation='softmax'))
|
66 |
+
|
67 |
+
# Compile model
|
68 |
+
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
|
69 |
+
|
70 |
+
# Define callbacks
|
71 |
+
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=2, min_lr=0.001)
|
72 |
+
early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
|
73 |
+
|
74 |
+
# Train model
|
75 |
+
history = model.fit(X_train, y_train, batch_size=128, epochs=10, validation_split=0.1, callbacks=[reduce_lr, early_stop])
|
76 |
+
|
77 |
+
# Save the model
|
78 |
+
model_path = 'sentiment_analysis_model.h5'
|
79 |
+
model.save(model_path)
|
80 |
+
|
81 |
+
# Define a function to classify sentiment
|
82 |
+
def classify_sentiment(text):
|
83 |
+
# Preprocess the text (tokenization, padding, etc.)
|
84 |
+
text_sequence = tokenizer.texts_to_sequences([text])
|
85 |
+
padded_sequence = pad_sequences(text_sequence, maxlen=max_seq_length)
|
86 |
+
|
87 |
+
# Make prediction using the trained model
|
88 |
+
prediction = model.predict(padded_sequence)
|
89 |
+
|
90 |
+
# Convert prediction to class label
|
91 |
+
predicted_label = np.argmax(prediction)
|
92 |
+
|
93 |
+
# Map class label to sentiment
|
94 |
+
sentiment_mapping = {0: "Negative", 1: "Neutral", 2: "Positive"}
|
95 |
+
sentiment = sentiment_mapping[predicted_label]
|
96 |
+
|
97 |
+
return sentiment
|
98 |
+
|
99 |
+
# Define the Gradio interface
|
100 |
+
def gradio_sentiment_analysis(text):
|
101 |
+
sentiment = classify_sentiment(text)
|
102 |
+
return sentiment
|
103 |
+
|
104 |
+
# Create the Gradio interface
|
105 |
+
iface = gr.Interface(
|
106 |
+
fn=gradio_sentiment_analysis,
|
107 |
+
inputs=gr.inputs.Textbox(lines=2, placeholder="Enter text here..."),
|
108 |
+
outputs="text",
|
109 |
+
title="Sentiment Analysis",
|
110 |
+
description="Enter a sentence to classify its sentiment as Positive, Neutral, or Negative."
|
111 |
+
)
|
112 |
+
|
113 |
+
# Launch the Gradio app
|
114 |
+
iface.launch()
|