Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,118 +1,34 @@
|
|
|
|
1 |
import pandas as pd
|
2 |
-
import numpy as np
|
3 |
-
import matplotlib.pyplot as plt
|
4 |
import seaborn as sns
|
5 |
-
import
|
6 |
-
from sklearn.feature_extraction.text import CountVectorizer
|
7 |
-
import re
|
8 |
-
from nltk.corpus import stopwords
|
9 |
-
from nltk.stem import SnowballStemmer
|
10 |
-
import gradio as gr
|
11 |
-
|
12 |
-
# Download NLTK punkt tokenizer and stopwords
|
13 |
-
nltk.download('punkt')
|
14 |
-
nltk.download('stopwords')
|
15 |
-
|
16 |
-
# Function to preprocess text
|
17 |
-
def preprocess_text(text):
|
18 |
-
# Convert text to lowercase
|
19 |
-
text = text.lower()
|
20 |
-
# Remove URLs
|
21 |
-
text = re.sub(r'http\S+', '', text)
|
22 |
-
# Remove @ mentions
|
23 |
-
text = re.sub(r'@\S+', '', text)
|
24 |
-
# Remove hashtags
|
25 |
-
text = re.sub(r'#\S+', '', text)
|
26 |
-
# Remove non-alphabetic characters
|
27 |
-
text = re.sub(r'[^a-zA-Z]', ' ', text)
|
28 |
-
# Tokenize text
|
29 |
-
tokens = nltk.word_tokenize(text)
|
30 |
-
# Remove stopwords
|
31 |
-
stop_words = set(stopwords.words('english'))
|
32 |
-
filtered_tokens = [word for word in tokens if word not in stop_words]
|
33 |
-
# Stemming
|
34 |
-
stemmer = SnowballStemmer('english')
|
35 |
-
stemmed_tokens = [stemmer.stem(word) for word in filtered_tokens]
|
36 |
-
# Join tokens into a single string
|
37 |
-
processed_text = ' '.join(stemmed_tokens)
|
38 |
-
return processed_text
|
39 |
-
|
40 |
-
# Load the DataFrame
|
41 |
-
def load_dataframe(file_path):
|
42 |
-
return pd.read_csv(file_path)
|
43 |
-
|
44 |
-
# Preprocess the DataFrame
|
45 |
-
def preprocess_dataframe(dataframe):
|
46 |
-
dataframe['processed_text'] = dataframe['text'].apply(preprocess_text)
|
47 |
-
return dataframe
|
48 |
-
|
49 |
-
# Create CountVectorizer and fit_transform
|
50 |
-
def create_dtm(dataframe):
|
51 |
-
vectorizer = CountVectorizer()
|
52 |
-
dtm = vectorizer.fit_transform(dataframe['processed_text'])
|
53 |
-
return dtm, vectorizer
|
54 |
|
55 |
-
#
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
plt.title('Top 20 Words Frequency')
|
68 |
-
plt.xlabel('Words')
|
69 |
plt.ylabel('Frequency')
|
70 |
-
plt.
|
71 |
plt.tight_layout()
|
72 |
-
|
|
|
|
|
|
|
|
|
73 |
|
74 |
-
# Define Gradio Interface
|
75 |
iface = gr.Interface(
|
76 |
-
fn=
|
77 |
-
inputs=gr.inputs.
|
78 |
-
outputs=
|
79 |
-
title="DataFrame Preprocessing",
|
80 |
-
description="Upload a CSV file containing tweet data and preprocess it.",
|
81 |
-
allow_flagging=False
|
82 |
-
)
|
83 |
-
|
84 |
-
# Preprocess DataFrame
|
85 |
-
iface2 = gr.Interface(
|
86 |
-
fn=preprocess_dataframe,
|
87 |
-
inputs=gr.inputs.Dataframe(label="Enter your DataFrame here"),
|
88 |
-
outputs="dataframe",
|
89 |
-
title="DataFrame Preprocessing",
|
90 |
-
description="Preprocess the DataFrame by removing stopwords, URLs, and non-alphabetic characters, and perform stemming.",
|
91 |
-
allow_flagging=False
|
92 |
-
)
|
93 |
-
|
94 |
-
# Create DTM
|
95 |
-
iface3 = gr.Interface(
|
96 |
-
fn=create_dtm,
|
97 |
-
inputs=gr.inputs.Dataframe(label="Enter your DataFrame here"),
|
98 |
-
outputs=["dataframe", "text"],
|
99 |
-
title="Create Document-Term Matrix",
|
100 |
-
description="Create a Document-Term Matrix (DTM) from the preprocessed DataFrame.",
|
101 |
-
allow_flagging=False
|
102 |
-
)
|
103 |
-
|
104 |
-
# Plot Word Frequency
|
105 |
-
iface4 = gr.Interface(
|
106 |
-
fn=plot_word_frequency,
|
107 |
-
inputs=["dataframe", "text"],
|
108 |
-
outputs="plot",
|
109 |
-
title="Plot Word Frequency",
|
110 |
-
description="Plot the frequency of the top 20 words in the Document-Term Matrix.",
|
111 |
-
allow_flagging=False
|
112 |
)
|
113 |
|
114 |
-
# Launch the interfaces
|
115 |
iface.launch()
|
116 |
-
|
117 |
-
iface3.launch()
|
118 |
-
iface4.launch()
|
|
|
1 |
+
import gradio as gr
|
2 |
import pandas as pd
|
|
|
|
|
3 |
import seaborn as sns
|
4 |
+
import matplotlib.pyplot as plt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
+
# Load your data here if not already loaded
|
7 |
+
# data = pd.read_csv('path_to_your_data.csv')
|
8 |
+
|
9 |
+
def analyze_sentiment(data):
|
10 |
+
# Calculate summary global sentiment
|
11 |
+
overall_sentiment = data['airline_sentiment'].value_counts().reset_index()
|
12 |
+
overall_sentiment.columns = ['Sentiment', 'Freq']
|
13 |
+
|
14 |
+
# Plot histogram
|
15 |
+
plt.figure(figsize=(8, 6))
|
16 |
+
sns.barplot(data=overall_sentiment, x='Sentiment', y='Freq', palette=['indianred', 'deepskyblue'])
|
17 |
+
plt.xlabel('Sentiment')
|
|
|
|
|
18 |
plt.ylabel('Frequency')
|
19 |
+
plt.title('Airline Sentiment Analysis')
|
20 |
plt.tight_layout()
|
21 |
+
|
22 |
+
# Save the plot
|
23 |
+
plt.savefig('sentiment_analysis.png')
|
24 |
+
|
25 |
+
return 'sentiment_analysis.png'
|
26 |
|
|
|
27 |
iface = gr.Interface(
|
28 |
+
fn=analyze_sentiment,
|
29 |
+
inputs=gr.inputs.Dataframe(type='csv'),
|
30 |
+
outputs='image'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
)
|
32 |
|
|
|
33 |
iface.launch()
|
34 |
+
|
|
|
|