_211 / _211.py
antitheft159's picture
Upload _211.py
2ce3fcd verified
# -*- coding: utf-8 -*-
""".211
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1uZZV_SkJj2tua-CdVEbGu85Tl8vrTbWD
"""
import numpy as np
import pandas as pd
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
for filename in filenames:
print(os.path.join(dirname, filename))
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
data = pd.read_csv('/content/synthetic_ecommerce_data.csv')
print("Dataset Preview:")
print(data.head())
print("\nDescriptive Statistics:")
print(data.describe(include='all'))
print("\nMissing Values:")
print(data.isnull().sum())
data['Transaction_Date'] = pd.to_datetime(data['Transaction_Date'])
daily_revenue = data.groupby('Transaction_Date')['Revenue'].sum()
plt.figure(figsize=(10, 5))
plt.plot(daily_revenue, label='Daily Revenue')
plt.title('Revenue Over Time')
plt.xlabel('Date')
plt.ylabel('Revenue')
plt.legend()
plt.show()
top_products = data.groupby('Product_ID')['Revenue'].sum().sort_values(ascending=False).head(10)
plt.figure(figsize=(10, 5))
top_products.plot(kind='bar')
plt.title('Top 10 Products by Revenue')
plt.xlabel('Product Id')
plt.show()
category_revenue = data.groupby('Category')['Revenue'].sum()
plt.figure(figsize=(10, 5))
sns.scatterplot(x=data=['Ad_Spend'], y=data['Revenue'])
plt.title('Ad Spend vs Revenue')
plt.xlabel('Ad Spend')
plt.ylabel('Revenue')
plt.show()
plt.figure(figsize=(10, 5))
sns.histplot(data['Ad_CTR'], bins=20, kde=True)
plt.title('Distribution of Ad Click-Through Rate (CTR)')
plt.xlabel('CTR')
plt.ylabel('Frequency')
plt.show()
region_revenue = data.groupby('Region')['Revenue'].sum()
plt.figure(figsize=(10, 5))
region_revenue.plot(kind='bar')
plt.title('Revenue by Region')
plt.xlabel('Region')
plt.ylabel('Revenue')
plt.show()
data['Month'] = data['Transaction_Date'].dt.month
monthly_revenue = data.groupby('Month')['Revenue'].sum()
plt.figure(figsize=(10, 5))
monthly_revenue.plot(kind='bar')
plt.title('Monthly Reveneu Trend')
plt.xlabel('Month')
plt.ylabel('Revenue')
plt.show()
plt.figure(figsize=(10, 5))
sns.scatterplot(x=data['Discount_Applied'], y=data['Revenue'])
plt.title('Discount Applied vs Revenue')
plt.xlabel('Discount (%)')
plt.ylabel('Revenue')
plt.show()
plt.figure(figsize=(10, 5))
sns.scatterplot(x=data['Clicks'], y=data['Revenue'])
plt.title('Clicks vs Revenue')
plt.ylabel('Revenue')
plt.show()
plt.figure(figsize=(10, 5))
sns.histplot(data['Conversion_Rate'], bins=20, kde=True)
plt.title('Distribution of Conversion Rate')
plt.xlabel('Conversion Rate')
plt.ylabel('Frequency')
plt.show()
plt.figure(figsize=(10, 5))
sns.scatterplot(x=data['Conversion_Rate'], y=data['Revenue'])
plt.title('Conversion Rate vs Revenue')
plt.xlabel('Conversion Rate')
plt.ylabel('Revenue')
plt.show()
region_revenue = data.groupby('Region')['Revenue'].sum()
total_revenue = region_revenue.sum()
region_contribution = (region_revenue / total_revenue) * 100
plt.figure(figsize=(10, 5))
region_contribution.plot(kind='bar')
plt.title('Revenue Contribution by Reigion (%)')
plt.xlabel('Region')
plt.ylabel('Revenue Contribution (%)')
plt.show()
data['Ad_Efficiency'] = data['Revenue'] / data['Ad_Spend']
plt.figure(figsize=(10, 5))
sns.boxplot(data=data, x='Category', y='Ad_Efficiency')
plt.title('Ad Spend Efficiency by Category')
plt.xlabel('Category')
plt.ylabel('Revenue per Unit of Ad Spend')
plt.show()
plt.figure(figsize=(10, 5))
sns.histplot(data['Units_Sold'], bins=20, kde=True)
plt.title('Distribution of Units Sold')
plt.xlabel('Units Sold')
plt.ylabel('Frequency')
plt.show()
plt.figure(figsize=(10, 5))
sns.scatterplot(x=data['Units_Sold'], y=data['Revenue'])
plt.title('Units Sold vs Revenue')
plt.xlabel('Units Sold')
plt.ylabel('Revenue')
plt.show()
units_by_category = data.groupby('Category')['Units_Sold'].sum()
plt.figure(figsize=(10, 5))
units_by_category.plot(kind='bar')
plt.title('Units Sold by Category')
plt.xlabel('Category')
plt.ylabel('Units Sold')
plt.show()
data['Revenue_per_Impression'] = data['Revenue'] / data ['Impressions'].astype(float)
plt.figure(figsize=(10, 5))
sns.boxplot(data=data, x='Category', y='Revenue_per_Impression')
plt.title('Revenue per Impression by Category')
plt.xlabel('Category')
plt.ylabel('Revenue per Impression')
plt.show()