Ahtisham1583 commited on
Commit
95b60ba
·
verified ·
1 Parent(s): f5a6753

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +114 -0
app.py CHANGED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pandas as pd
3
+ import tensorflow as tf
4
+ from tensorflow.keras.preprocessing.text import Tokenizer
5
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
6
+ from tensorflow.keras.models import Sequential
7
+ from tensorflow.keras.layers import Embedding, LSTM, Dense
8
+ from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
9
+ from sklearn.model_selection import train_test_split
10
+ from sklearn.preprocessing import LabelEncoder
11
+ from tensorflow.keras import utils
12
+ import os
13
+ import matplotlib.pyplot as plt
14
+ from nltk.tokenize import word_tokenize
15
+ import nltk
16
+ import gradio as gr
17
+ nltk.download('punkt')
18
+ from wordcloud import WordCloud, STOPWORDS
19
+
20
+ # Load the dataset
21
+ df = pd.read_csv("Twitter_Data.csv")
22
+
23
+ # Check for missing values and fill or drop them accordingly
24
+ df['clean_text'].fillna('', inplace=True)
25
+ df.dropna(subset=['category'], inplace=True)
26
+ df.drop_duplicates(inplace=True)
27
+
28
+ # Tokenize words
29
+ tokenized_text = [word_tokenize(text.lower()) for text in df['clean_text']]
30
+
31
+ # Word2Vec model
32
+ from gensim.models import Word2Vec
33
+ model = Word2Vec(tokenized_text, vector_size=100, window=5, min_count=1, workers=4)
34
+
35
+ # Define input and target variables
36
+ X = df['clean_text']
37
+ y = df['category']
38
+
39
+ # Encode target variable
40
+ encoder = LabelEncoder()
41
+ y = encoder.fit_transform(y)
42
+ y = utils.to_categorical(y)
43
+
44
+ # Tokenize text
45
+ tokenizer = Tokenizer()
46
+ tokenizer.fit_on_texts(X)
47
+ sequences = tokenizer.texts_to_sequences(X)
48
+
49
+ # Vocabulary size
50
+ vocab_size = len(tokenizer.word_index) + 1
51
+
52
+ # Max sequence length
53
+ max_seq_length = max([len(seq) for seq in sequences])
54
+
55
+ # Pad sequences
56
+ X_pad = pad_sequences(sequences, maxlen=max_seq_length)
57
+
58
+ # Train-test split
59
+ X_train, X_test, y_train, y_test = train_test_split(X_pad, y, test_size=0.2, random_state=42)
60
+
61
+ # Define LSTM model
62
+ model = Sequential()
63
+ model.add(Embedding(input_dim=vocab_size, output_dim=100, input_length=max_seq_length))
64
+ model.add(LSTM(units=128, dropout=0.2, recurrent_dropout=0.2))
65
+ model.add(Dense(units=3, activation='softmax'))
66
+
67
+ # Compile model
68
+ model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
69
+
70
+ # Define callbacks
71
+ reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=2, min_lr=0.001)
72
+ early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
73
+
74
+ # Train model
75
+ history = model.fit(X_train, y_train, batch_size=128, epochs=10, validation_split=0.1, callbacks=[reduce_lr, early_stop])
76
+
77
+ # Save the model
78
+ model_path = 'sentiment_analysis_model.h5'
79
+ model.save(model_path)
80
+
81
+ # Define a function to classify sentiment
82
+ def classify_sentiment(text):
83
+ # Preprocess the text (tokenization, padding, etc.)
84
+ text_sequence = tokenizer.texts_to_sequences([text])
85
+ padded_sequence = pad_sequences(text_sequence, maxlen=max_seq_length)
86
+
87
+ # Make prediction using the trained model
88
+ prediction = model.predict(padded_sequence)
89
+
90
+ # Convert prediction to class label
91
+ predicted_label = np.argmax(prediction)
92
+
93
+ # Map class label to sentiment
94
+ sentiment_mapping = {0: "Negative", 1: "Neutral", 2: "Positive"}
95
+ sentiment = sentiment_mapping[predicted_label]
96
+
97
+ return sentiment
98
+
99
+ # Define the Gradio interface
100
+ def gradio_sentiment_analysis(text):
101
+ sentiment = classify_sentiment(text)
102
+ return sentiment
103
+
104
+ # Create the Gradio interface
105
+ iface = gr.Interface(
106
+ fn=gradio_sentiment_analysis,
107
+ inputs=gr.inputs.Textbox(lines=2, placeholder="Enter text here..."),
108
+ outputs="text",
109
+ title="Sentiment Analysis",
110
+ description="Enter a sentence to classify its sentiment as Positive, Neutral, or Negative."
111
+ )
112
+
113
+ # Launch the Gradio app
114
+ iface.launch()