Spaces:

Ahtisham1583
/

Social_Media

Runtime error

App Files Files Community

Ahtisham1583 commited on May 10, 2024

Commit

256d027

verified ·

1 Parent(s): 70142f1

Create app.py

Browse files

Files changed (1) hide show

app.py +151 -0

app.py ADDED Viewed

	@@ -0,0 +1,151 @@

+import gradio as gr
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+import seaborn as sns
+import re
+import nltk
+from nltk.corpus import stopwords
+from nltk.tokenize import word_tokenize
+from nltk.stem import SnowballStemmer
+from sklearn.feature_extraction.text import CountVectorizer
+from scipy.cluster.hierarchy import dendrogram, ward
+from scipy.sparse import csr_matrix
+# Download NLTK resources
+nltk.download('punkt')
+nltk.download('stopwords')
+def preprocess_text(text):
+    # Convert text to lowercase
+    text = text.lower()
+    # Remove URLs
+    text = re.sub(r'http\S+', '', text)
+    # Remove @ mentions
+    text = re.sub(r'@\S+', '', text)
+    # Remove hashtags
+    text = re.sub(r'#\S+', '', text)
+    # Tokenize
+    tokens = word_tokenize(text)
+    # Remove stopwords
+    stop_words = set(stopwords.words('english'))
+    filtered_tokens = [word for word in tokens if word not in stop_words]
+    # Stemming
+    stemmer = SnowballStemmer('english')
+    stemmed_tokens = [stemmer.stem(word) for word in filtered_tokens]
+    # Join tokens into a single string
+    processed_text = ' '.join(stemmed_tokens)
+    return processed_text
+def preprocess_and_plot(data):
+    # Preprocess the text
+    data['clean_text'] = data['text'].apply(preprocess_text)
+    # CountVectorizer
+    vectorizer = CountVectorizer()
+    dtm = vectorizer.fit_transform(data['clean_text'])
+    # Convert DTM to DataFrame
+    tweets = pd.DataFrame(dtm.toarray(), columns=vectorizer.get_feature_names_out())
+    tweets['airline_sentiment'] = data['airline_sentiment']
+    # Plot histogram of global sentiment
+    overall_sentiment = tweets['airline_sentiment'].value_counts().reset_index()
+    overall_sentiment.columns = ['Sentiment', 'Freq']
+    sns.barplot(data=overall_sentiment, x='Sentiment', y='Freq', palette=['indianred', 'deepskyblue', 'chartreuse'])
+    plt.title('Summary Global Sentiment')
+    plt.xlabel('Sentiment')
+    plt.ylabel('Frequency')
+    plt.show()
+    # Plot sentiment distribution for each airline
+    airline_sentiment = tweets.groupby(['airline', 'airline_sentiment']).size().reset_index(name='Freq')
+    plt.figure(figsize=(10, 6))
+    sns.barplot(data=airline_sentiment, x='airline', y='Freq', hue='airline_sentiment', palette=['indianred', 'deepskyblue', 'chartreuse'])
+    plt.title('Number of Tweets and Sentiment for Each Airline')
+    plt.xlabel('Airline')
+    plt.ylabel('Frequency')
+    plt.legend(title='Sentiment')
+    plt.xticks(rotation=45)
+    plt.show()
+def plot_pie_chart(data):
+    def plot_airline_sentiment_pie(data, airline_name):
+        subset = data[data['airline'] == airline_name]
+        sentiment_counts = subset['airline_sentiment'].value_counts()
+        colors = ['indianred', 'deepskyblue', 'chartreuse']
+        plt.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=140, colors=colors)
+        plt.title(f'{airline_name} Sentiment Distribution')
+        plt.show()
+    airlines = data['airline'].unique()
+    for airline in airlines:
+        plot_airline_sentiment_pie(data, airline)
+def plot_reasons(data):
+    reason_table = pd.crosstab(data['negativereason'], data['airline'])
+    globalSentReasons = reason_table.reset_index().melt(id_vars='negativereason', var_name='Airline', value_name='Freq')
+    total_by_airline = globalSentReasons.groupby('Airline')['Freq'].sum().reset_index()
+    total_by_airline.columns = ['Airline', 'Total']
+    globalSentReasons = globalSentReasons.merge(total_by_airline, on='Airline', how='left')
+    globalSentReasons['PercentOfTotal'] = (globalSentReasons['Freq'] / globalSentReasons['Total']) * 100
+    plt.figure(figsize=(12, 8))
+    sns.barplot(data=globalSentReasons, x='negativereason', y='PercentOfTotal', hue='Airline')
+    plt.xticks(rotation=45)
+    plt.title('Percentage of Total Complaints by Reason and Airline')
+    plt.ylabel('Percentage of Total Complaints')
+    plt.xlabel('Reason for Complaint')
+    plt.legend(title='Airline', bbox_to_anchor=(1.05, 1), loc='upper left')
+    plt.tight_layout()
+    plt.show()
+def plot_word_frequency(data):
+    def count_word_freq(text):
+        vectorizer = CountVectorizer()
+        dtm = vectorizer.fit_transform(text)
+        words = vectorizer.get_feature_names_out()
+        freq = dtm.sum(axis=0)
+        return pd.DataFrame({'word': words, 'freq': freq.tolist()[0]})
+    words_freq = count_word_freq(data['clean_text'])
+    wf_filtered = words_freq[words_freq['freq'] > 50].sort_values(by='freq', ascending=False)
+    plt.figure(figsize=(12, 6))
+    plt.bar(wf_filtered['word'], wf_filtered['freq'], color='skyblue')
+    plt.xlabel('Word', fontsize=12)
+    plt.ylabel('Frequency', fontsize=12)
+    plt.title('Word Frequency', fontsize=14)
+    plt.xticks(rotation=45, ha='right', fontsize=10)
+    plt.yticks(fontsize=10)
+    plt.show()
+def plot_dendrogram(data):
+    vectorizer = CountVectorizer()
+    dtm = vectorizer.fit_transform(data['clean_text'])
+    # Convert dtm to a dense matrix
+    dense_dtm = dtm.toarray()
+    # Remove sparse terms
+    min_occurrences = 3
+    non_sparse_cols = (dense_dtm.sum(axis=0) >= min_occurrences).ravel()
+    sparse = dense_dtm[:, non_sparse_cols]
+    # Calculate the distance matrix
+    dist = ward(sparse.T)
+    # Plot the dendrogram
+    plt.figure(figsize=(15, 6))
+    dendrogram(dist, leaf_rotation=90, leaf_font_size=10, labels=vectorizer.get_feature_names_out()[non_sparse_cols])
+    plt.xlabel('Terms')
+    plt.ylabel('Distance')
+    plt.title('Dendrogram')
+    plt.show()
+# Create the interface
+iface = gr.Interface(preprocess_and_plot,
+                     inputs=gr.inputs.Dataframe(label="Enter your DataFrame here"),
+                     outputs=None,
+                     title="Social Media Trend Analysis",
+                     description="Analyze sentiment and trends in your social media data.")
+iface.launch()