Spaces:

Ahtisham1583
/

Social_Media

Runtime error

App Files Files Community

Ahtisham1583 commited on May 10, 2024

Commit

919b20b

verified ·

1 Parent(s): de9149f

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -108

app.py CHANGED Viewed

@@ -1,118 +1,34 @@
 import pandas as pd
-import numpy as np
-import matplotlib.pyplot as plt
 import seaborn as sns
-import nltk
-from sklearn.feature_extraction.text import CountVectorizer
-import re
-from nltk.corpus import stopwords
-from nltk.stem import SnowballStemmer
-import gradio as gr
-# Download NLTK punkt tokenizer and stopwords
-nltk.download('punkt')
-nltk.download('stopwords')
-# Function to preprocess text
-def preprocess_text(text):
-    # Convert text to lowercase
-    text = text.lower()
-    # Remove URLs
-    text = re.sub(r'http\S+', '', text)
-    # Remove @ mentions
-    text = re.sub(r'@\S+', '', text)
-    # Remove hashtags
-    text = re.sub(r'#\S+', '', text)
-    # Remove non-alphabetic characters
-    text = re.sub(r'[^a-zA-Z]', ' ', text)
-    # Tokenize text
-    tokens = nltk.word_tokenize(text)
-    # Remove stopwords
-    stop_words = set(stopwords.words('english'))
-    filtered_tokens = [word for word in tokens if word not in stop_words]
-    # Stemming
-    stemmer = SnowballStemmer('english')
-    stemmed_tokens = [stemmer.stem(word) for word in filtered_tokens]
-    # Join tokens into a single string
-    processed_text = ' '.join(stemmed_tokens)
-    return processed_text
-# Load the DataFrame
-def load_dataframe(file_path):
-    return pd.read_csv(file_path)
-# Preprocess the DataFrame
-def preprocess_dataframe(dataframe):
-    dataframe['processed_text'] = dataframe['text'].apply(preprocess_text)
-    return dataframe
-# Create CountVectorizer and fit_transform
-def create_dtm(dataframe):
-    vectorizer = CountVectorizer()
-    dtm = vectorizer.fit_transform(dataframe['processed_text'])
-    return dtm, vectorizer
-# Plot word frequency
-def plot_word_frequency(dtm, vectorizer):
-    # Sum word frequencies
-    word_freq = dtm.sum(axis=0)
-    words = vectorizer.get_feature_names_out()
-    # Create DataFrame
-    word_freq_df = pd.DataFrame({'word': words, 'frequency': np.ravel(word_freq)})
-    # Sort by frequency
-    word_freq_df = word_freq_df.sort_values(by='frequency', ascending=False)
-    # Plot
-    plt.figure(figsize=(10, 6))
-    sns.barplot(x='word', y='frequency', data=word_freq_df.head(20))
-    plt.title('Top 20 Words Frequency')
-    plt.xlabel('Words')
     plt.ylabel('Frequency')
-    plt.xticks(rotation=45, ha='right')
     plt.tight_layout()
-    plt.show()
-# Define Gradio Interface
 iface = gr.Interface(
-    fn=load_dataframe,
-    inputs=gr.inputs.File(label="Upload CSV file"),
-    outputs=["dataframe", "text"],
-    title="DataFrame Preprocessing",
-    description="Upload a CSV file containing tweet data and preprocess it.",
-    allow_flagging=False
-)
-# Preprocess DataFrame
-iface2 = gr.Interface(
-    fn=preprocess_dataframe,
-    inputs=gr.inputs.Dataframe(label="Enter your DataFrame here"),
-    outputs="dataframe",
-    title="DataFrame Preprocessing",
-    description="Preprocess the DataFrame by removing stopwords, URLs, and non-alphabetic characters, and perform stemming.",
-    allow_flagging=False
-)
-# Create DTM
-iface3 = gr.Interface(
-    fn=create_dtm,
-    inputs=gr.inputs.Dataframe(label="Enter your DataFrame here"),
-    outputs=["dataframe", "text"],
-    title="Create Document-Term Matrix",
-    description="Create a Document-Term Matrix (DTM) from the preprocessed DataFrame.",
-    allow_flagging=False
-)
-# Plot Word Frequency
-iface4 = gr.Interface(
-    fn=plot_word_frequency,
-    inputs=["dataframe", "text"],
-    outputs="plot",
-    title="Plot Word Frequency",
-    description="Plot the frequency of the top 20 words in the Document-Term Matrix.",
-    allow_flagging=False
 )
-# Launch the interfaces
 iface.launch()
-iface2.launch()
-iface3.launch()
-iface4.launch()

+import gradio as gr
 import pandas as pd
 import seaborn as sns
+import matplotlib.pyplot as plt
+# Load your data here if not already loaded
+# data = pd.read_csv('path_to_your_data.csv')
+def analyze_sentiment(data):
+    # Calculate summary global sentiment
+    overall_sentiment = data['airline_sentiment'].value_counts().reset_index()
+    overall_sentiment.columns = ['Sentiment', 'Freq']
+    # Plot histogram
+    plt.figure(figsize=(8, 6))
+    sns.barplot(data=overall_sentiment, x='Sentiment', y='Freq', palette=['indianred', 'deepskyblue'])
+    plt.xlabel('Sentiment')
     plt.ylabel('Frequency')
+    plt.title('Airline Sentiment Analysis')
     plt.tight_layout()
+    # Save the plot
+    plt.savefig('sentiment_analysis.png')
+    return 'sentiment_analysis.png'
 iface = gr.Interface(
+    fn=analyze_sentiment,
+    inputs=gr.inputs.Dataframe(type='csv'),
+    outputs='image'
 )
 iface.launch()