""".211 |
Automatically generated by Colab. |
Original file is located at |
https://colab.research.google.com/drive/1uZZV_SkJj2tua-CdVEbGu85Tl8vrTbWD |
""" |
import numpy as np |
import pandas as pd |
import os |
for dirname, _, filenames in os.walk('/kaggle/input'): |
for filename in filenames: |
print(os.path.join(dirname, filename)) |
import pandas as pd |
import numpy as np |
import matplotlib.pyplot as plt |
import seaborn as sns |
data = pd.read_csv('/content/synthetic_ecommerce_data.csv') |
print("Dataset Preview:") |
print(data.head()) |
print("\nDescriptive Statistics:") |
print(data.describe(include='all')) |
print("\nMissing Values:") |
print(data.isnull().sum()) |
data['Transaction_Date'] = pd.to_datetime(data['Transaction_Date']) |
daily_revenue = data.groupby('Transaction_Date')['Revenue'].sum() |
plt.figure(figsize=(10, 5)) |
plt.plot(daily_revenue, label='Daily Revenue') |
plt.title('Revenue Over Time') |
plt.xlabel('Date') |
plt.ylabel('Revenue') |
plt.legend() |
plt.show() |
top_products = data.groupby('Product_ID')['Revenue'].sum().sort_values(ascending=False).head(10) |
plt.figure(figsize=(10, 5)) |
top_products.plot(kind='bar') |
plt.title('Top 10 Products by Revenue') |
plt.xlabel('Product Id') |
plt.show() |
category_revenue = data.groupby('Category')['Revenue'].sum() |
plt.figure(figsize=(10, 5)) |
sns.scatterplot(x=data=['Ad_Spend'], y=data['Revenue']) |
plt.title('Ad Spend vs Revenue') |
plt.xlabel('Ad Spend') |
plt.ylabel('Revenue') |
plt.show() |
plt.figure(figsize=(10, 5)) |
sns.histplot(data['Ad_CTR'], bins=20, kde=True) |
plt.title('Distribution of Ad Click-Through Rate (CTR)') |
plt.xlabel('CTR') |
plt.ylabel('Frequency') |
plt.show() |
region_revenue = data.groupby('Region')['Revenue'].sum() |
plt.figure(figsize=(10, 5)) |
region_revenue.plot(kind='bar') |
plt.title('Revenue by Region') |
plt.xlabel('Region') |
plt.ylabel('Revenue') |
plt.show() |
data['Month'] = data['Transaction_Date'].dt.month |
monthly_revenue = data.groupby('Month')['Revenue'].sum() |
plt.figure(figsize=(10, 5)) |
monthly_revenue.plot(kind='bar') |
plt.title('Monthly Reveneu Trend') |
plt.xlabel('Month') |
plt.ylabel('Revenue') |
plt.show() |
plt.figure(figsize=(10, 5)) |
sns.scatterplot(x=data['Discount_Applied'], y=data['Revenue']) |
plt.title('Discount Applied vs Revenue') |
plt.xlabel('Discount (%)') |
plt.ylabel('Revenue') |
plt.show() |
plt.figure(figsize=(10, 5)) |
sns.scatterplot(x=data['Clicks'], y=data['Revenue']) |
plt.title('Clicks vs Revenue') |
plt.ylabel('Revenue') |
plt.show() |
plt.figure(figsize=(10, 5)) |
sns.histplot(data['Conversion_Rate'], bins=20, kde=True) |
plt.title('Distribution of Conversion Rate') |
plt.xlabel('Conversion Rate') |
plt.ylabel('Frequency') |
plt.show() |
plt.figure(figsize=(10, 5)) |
sns.scatterplot(x=data['Conversion_Rate'], y=data['Revenue']) |
plt.title('Conversion Rate vs Revenue') |
plt.xlabel('Conversion Rate') |
plt.ylabel('Revenue') |
plt.show() |
region_revenue = data.groupby('Region')['Revenue'].sum() |
total_revenue = region_revenue.sum() |
region_contribution = (region_revenue / total_revenue) * 100 |
plt.figure(figsize=(10, 5)) |
region_contribution.plot(kind='bar') |
plt.title('Revenue Contribution by Reigion (%)') |
plt.xlabel('Region') |
plt.ylabel('Revenue Contribution (%)') |
plt.show() |
data['Ad_Efficiency'] = data['Revenue'] / data['Ad_Spend'] |
plt.figure(figsize=(10, 5)) |
sns.boxplot(data=data, x='Category', y='Ad_Efficiency') |
plt.title('Ad Spend Efficiency by Category') |
plt.xlabel('Category') |
plt.ylabel('Revenue per Unit of Ad Spend') |
plt.show() |
plt.figure(figsize=(10, 5)) |
sns.histplot(data['Units_Sold'], bins=20, kde=True) |
plt.title('Distribution of Units Sold') |
plt.xlabel('Units Sold') |
plt.ylabel('Frequency') |
plt.show() |
plt.figure(figsize=(10, 5)) |
sns.scatterplot(x=data['Units_Sold'], y=data['Revenue']) |
plt.title('Units Sold vs Revenue') |
plt.xlabel('Units Sold') |
plt.ylabel('Revenue') |
plt.show() |
units_by_category = data.groupby('Category')['Units_Sold'].sum() |
plt.figure(figsize=(10, 5)) |
units_by_category.plot(kind='bar') |
plt.title('Units Sold by Category') |
plt.xlabel('Category') |
plt.ylabel('Units Sold') |
plt.show() |
data['Revenue_per_Impression'] = data['Revenue'] / data ['Impressions'].astype(float) |
plt.figure(figsize=(10, 5)) |
sns.boxplot(data=data, x='Category', y='Revenue_per_Impression') |
plt.title('Revenue per Impression by Category') |
plt.xlabel('Category') |
plt.ylabel('Revenue per Impression') |
plt.show() |