|
|
|
""".211 |
|
|
|
Automatically generated by Colab. |
|
|
|
Original file is located at |
|
https://colab.research.google.com/drive/1uZZV_SkJj2tua-CdVEbGu85Tl8vrTbWD |
|
""" |
|
|
|
import numpy as np |
|
import pandas as pd |
|
|
|
import os |
|
for dirname, _, filenames in os.walk('/kaggle/input'): |
|
for filename in filenames: |
|
print(os.path.join(dirname, filename)) |
|
|
|
import pandas as pd |
|
import numpy as np |
|
import matplotlib.pyplot as plt |
|
import seaborn as sns |
|
|
|
data = pd.read_csv('/content/synthetic_ecommerce_data.csv') |
|
|
|
print("Dataset Preview:") |
|
print(data.head()) |
|
|
|
print("\nDescriptive Statistics:") |
|
print(data.describe(include='all')) |
|
|
|
print("\nMissing Values:") |
|
print(data.isnull().sum()) |
|
|
|
data['Transaction_Date'] = pd.to_datetime(data['Transaction_Date']) |
|
daily_revenue = data.groupby('Transaction_Date')['Revenue'].sum() |
|
|
|
plt.figure(figsize=(10, 5)) |
|
plt.plot(daily_revenue, label='Daily Revenue') |
|
plt.title('Revenue Over Time') |
|
plt.xlabel('Date') |
|
plt.ylabel('Revenue') |
|
plt.legend() |
|
plt.show() |
|
|
|
top_products = data.groupby('Product_ID')['Revenue'].sum().sort_values(ascending=False).head(10) |
|
|
|
plt.figure(figsize=(10, 5)) |
|
top_products.plot(kind='bar') |
|
plt.title('Top 10 Products by Revenue') |
|
plt.xlabel('Product Id') |
|
plt.show() |
|
|
|
category_revenue = data.groupby('Category')['Revenue'].sum() |
|
|
|
plt.figure(figsize=(10, 5)) |
|
sns.scatterplot(x=data=['Ad_Spend'], y=data['Revenue']) |
|
plt.title('Ad Spend vs Revenue') |
|
plt.xlabel('Ad Spend') |
|
plt.ylabel('Revenue') |
|
plt.show() |
|
|
|
plt.figure(figsize=(10, 5)) |
|
sns.histplot(data['Ad_CTR'], bins=20, kde=True) |
|
plt.title('Distribution of Ad Click-Through Rate (CTR)') |
|
plt.xlabel('CTR') |
|
plt.ylabel('Frequency') |
|
plt.show() |
|
|
|
region_revenue = data.groupby('Region')['Revenue'].sum() |
|
|
|
plt.figure(figsize=(10, 5)) |
|
region_revenue.plot(kind='bar') |
|
plt.title('Revenue by Region') |
|
plt.xlabel('Region') |
|
plt.ylabel('Revenue') |
|
plt.show() |
|
|
|
data['Month'] = data['Transaction_Date'].dt.month |
|
monthly_revenue = data.groupby('Month')['Revenue'].sum() |
|
|
|
plt.figure(figsize=(10, 5)) |
|
monthly_revenue.plot(kind='bar') |
|
plt.title('Monthly Reveneu Trend') |
|
plt.xlabel('Month') |
|
plt.ylabel('Revenue') |
|
plt.show() |
|
|
|
plt.figure(figsize=(10, 5)) |
|
sns.scatterplot(x=data['Discount_Applied'], y=data['Revenue']) |
|
plt.title('Discount Applied vs Revenue') |
|
plt.xlabel('Discount (%)') |
|
plt.ylabel('Revenue') |
|
plt.show() |
|
|
|
plt.figure(figsize=(10, 5)) |
|
sns.scatterplot(x=data['Clicks'], y=data['Revenue']) |
|
plt.title('Clicks vs Revenue') |
|
plt.ylabel('Revenue') |
|
plt.show() |
|
|
|
plt.figure(figsize=(10, 5)) |
|
sns.histplot(data['Conversion_Rate'], bins=20, kde=True) |
|
plt.title('Distribution of Conversion Rate') |
|
plt.xlabel('Conversion Rate') |
|
plt.ylabel('Frequency') |
|
plt.show() |
|
|
|
plt.figure(figsize=(10, 5)) |
|
sns.scatterplot(x=data['Conversion_Rate'], y=data['Revenue']) |
|
plt.title('Conversion Rate vs Revenue') |
|
plt.xlabel('Conversion Rate') |
|
plt.ylabel('Revenue') |
|
plt.show() |
|
|
|
region_revenue = data.groupby('Region')['Revenue'].sum() |
|
total_revenue = region_revenue.sum() |
|
region_contribution = (region_revenue / total_revenue) * 100 |
|
|
|
plt.figure(figsize=(10, 5)) |
|
region_contribution.plot(kind='bar') |
|
plt.title('Revenue Contribution by Reigion (%)') |
|
plt.xlabel('Region') |
|
plt.ylabel('Revenue Contribution (%)') |
|
plt.show() |
|
|
|
data['Ad_Efficiency'] = data['Revenue'] / data['Ad_Spend'] |
|
plt.figure(figsize=(10, 5)) |
|
sns.boxplot(data=data, x='Category', y='Ad_Efficiency') |
|
plt.title('Ad Spend Efficiency by Category') |
|
plt.xlabel('Category') |
|
plt.ylabel('Revenue per Unit of Ad Spend') |
|
plt.show() |
|
|
|
plt.figure(figsize=(10, 5)) |
|
sns.histplot(data['Units_Sold'], bins=20, kde=True) |
|
plt.title('Distribution of Units Sold') |
|
plt.xlabel('Units Sold') |
|
plt.ylabel('Frequency') |
|
plt.show() |
|
|
|
plt.figure(figsize=(10, 5)) |
|
sns.scatterplot(x=data['Units_Sold'], y=data['Revenue']) |
|
plt.title('Units Sold vs Revenue') |
|
plt.xlabel('Units Sold') |
|
plt.ylabel('Revenue') |
|
plt.show() |
|
|
|
units_by_category = data.groupby('Category')['Units_Sold'].sum() |
|
|
|
plt.figure(figsize=(10, 5)) |
|
units_by_category.plot(kind='bar') |
|
plt.title('Units Sold by Category') |
|
plt.xlabel('Category') |
|
plt.ylabel('Units Sold') |
|
plt.show() |
|
|
|
data['Revenue_per_Impression'] = data['Revenue'] / data ['Impressions'].astype(float) |
|
plt.figure(figsize=(10, 5)) |
|
sns.boxplot(data=data, x='Category', y='Revenue_per_Impression') |
|
plt.title('Revenue per Impression by Category') |
|
plt.xlabel('Category') |
|
plt.ylabel('Revenue per Impression') |
|
plt.show() |