Ahtisham1583 commited on
Commit
919b20b
·
verified ·
1 Parent(s): de9149f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -108
app.py CHANGED
@@ -1,118 +1,34 @@
 
1
  import pandas as pd
2
- import numpy as np
3
- import matplotlib.pyplot as plt
4
  import seaborn as sns
5
- import nltk
6
- from sklearn.feature_extraction.text import CountVectorizer
7
- import re
8
- from nltk.corpus import stopwords
9
- from nltk.stem import SnowballStemmer
10
- import gradio as gr
11
-
12
- # Download NLTK punkt tokenizer and stopwords
13
- nltk.download('punkt')
14
- nltk.download('stopwords')
15
-
16
- # Function to preprocess text
17
- def preprocess_text(text):
18
- # Convert text to lowercase
19
- text = text.lower()
20
- # Remove URLs
21
- text = re.sub(r'http\S+', '', text)
22
- # Remove @ mentions
23
- text = re.sub(r'@\S+', '', text)
24
- # Remove hashtags
25
- text = re.sub(r'#\S+', '', text)
26
- # Remove non-alphabetic characters
27
- text = re.sub(r'[^a-zA-Z]', ' ', text)
28
- # Tokenize text
29
- tokens = nltk.word_tokenize(text)
30
- # Remove stopwords
31
- stop_words = set(stopwords.words('english'))
32
- filtered_tokens = [word for word in tokens if word not in stop_words]
33
- # Stemming
34
- stemmer = SnowballStemmer('english')
35
- stemmed_tokens = [stemmer.stem(word) for word in filtered_tokens]
36
- # Join tokens into a single string
37
- processed_text = ' '.join(stemmed_tokens)
38
- return processed_text
39
-
40
- # Load the DataFrame
41
- def load_dataframe(file_path):
42
- return pd.read_csv(file_path)
43
-
44
- # Preprocess the DataFrame
45
- def preprocess_dataframe(dataframe):
46
- dataframe['processed_text'] = dataframe['text'].apply(preprocess_text)
47
- return dataframe
48
-
49
- # Create CountVectorizer and fit_transform
50
- def create_dtm(dataframe):
51
- vectorizer = CountVectorizer()
52
- dtm = vectorizer.fit_transform(dataframe['processed_text'])
53
- return dtm, vectorizer
54
 
55
- # Plot word frequency
56
- def plot_word_frequency(dtm, vectorizer):
57
- # Sum word frequencies
58
- word_freq = dtm.sum(axis=0)
59
- words = vectorizer.get_feature_names_out()
60
- # Create DataFrame
61
- word_freq_df = pd.DataFrame({'word': words, 'frequency': np.ravel(word_freq)})
62
- # Sort by frequency
63
- word_freq_df = word_freq_df.sort_values(by='frequency', ascending=False)
64
- # Plot
65
- plt.figure(figsize=(10, 6))
66
- sns.barplot(x='word', y='frequency', data=word_freq_df.head(20))
67
- plt.title('Top 20 Words Frequency')
68
- plt.xlabel('Words')
69
  plt.ylabel('Frequency')
70
- plt.xticks(rotation=45, ha='right')
71
  plt.tight_layout()
72
- plt.show()
 
 
 
 
73
 
74
- # Define Gradio Interface
75
  iface = gr.Interface(
76
- fn=load_dataframe,
77
- inputs=gr.inputs.File(label="Upload CSV file"),
78
- outputs=["dataframe", "text"],
79
- title="DataFrame Preprocessing",
80
- description="Upload a CSV file containing tweet data and preprocess it.",
81
- allow_flagging=False
82
- )
83
-
84
- # Preprocess DataFrame
85
- iface2 = gr.Interface(
86
- fn=preprocess_dataframe,
87
- inputs=gr.inputs.Dataframe(label="Enter your DataFrame here"),
88
- outputs="dataframe",
89
- title="DataFrame Preprocessing",
90
- description="Preprocess the DataFrame by removing stopwords, URLs, and non-alphabetic characters, and perform stemming.",
91
- allow_flagging=False
92
- )
93
-
94
- # Create DTM
95
- iface3 = gr.Interface(
96
- fn=create_dtm,
97
- inputs=gr.inputs.Dataframe(label="Enter your DataFrame here"),
98
- outputs=["dataframe", "text"],
99
- title="Create Document-Term Matrix",
100
- description="Create a Document-Term Matrix (DTM) from the preprocessed DataFrame.",
101
- allow_flagging=False
102
- )
103
-
104
- # Plot Word Frequency
105
- iface4 = gr.Interface(
106
- fn=plot_word_frequency,
107
- inputs=["dataframe", "text"],
108
- outputs="plot",
109
- title="Plot Word Frequency",
110
- description="Plot the frequency of the top 20 words in the Document-Term Matrix.",
111
- allow_flagging=False
112
  )
113
 
114
- # Launch the interfaces
115
  iface.launch()
116
- iface2.launch()
117
- iface3.launch()
118
- iface4.launch()
 
1
+ import gradio as gr
2
  import pandas as pd
 
 
3
  import seaborn as sns
4
+ import matplotlib.pyplot as plt
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
+ # Load your data here if not already loaded
7
+ # data = pd.read_csv('path_to_your_data.csv')
8
+
9
+ def analyze_sentiment(data):
10
+ # Calculate summary global sentiment
11
+ overall_sentiment = data['airline_sentiment'].value_counts().reset_index()
12
+ overall_sentiment.columns = ['Sentiment', 'Freq']
13
+
14
+ # Plot histogram
15
+ plt.figure(figsize=(8, 6))
16
+ sns.barplot(data=overall_sentiment, x='Sentiment', y='Freq', palette=['indianred', 'deepskyblue'])
17
+ plt.xlabel('Sentiment')
 
 
18
  plt.ylabel('Frequency')
19
+ plt.title('Airline Sentiment Analysis')
20
  plt.tight_layout()
21
+
22
+ # Save the plot
23
+ plt.savefig('sentiment_analysis.png')
24
+
25
+ return 'sentiment_analysis.png'
26
 
 
27
  iface = gr.Interface(
28
+ fn=analyze_sentiment,
29
+ inputs=gr.inputs.Dataframe(type='csv'),
30
+ outputs='image'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  )
32
 
 
33
  iface.launch()
34
+