taupirho commited on
Commit
325df58
·
verified ·
1 Parent(s): bbeec06

Upload folder using huggingface_hub

Browse files
Files changed (5) hide show
  1. .github/workflows/update_space.yml +28 -0
  2. README.md +3 -9
  3. app.py +295 -0
  4. requirements.txt +3 -0
  5. sales_data.csv +0 -0
.github/workflows/update_space.yml ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Run Python script
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+
8
+ jobs:
9
+ build:
10
+ runs-on: ubuntu-latest
11
+
12
+ steps:
13
+ - name: Checkout
14
+ uses: actions/checkout@v2
15
+
16
+ - name: Set up Python
17
+ uses: actions/setup-python@v2
18
+ with:
19
+ python-version: '3.9'
20
+
21
+ - name: Install Gradio
22
+ run: python -m pip install gradio
23
+
24
+ - name: Log in to Hugging Face
25
+ run: python -c 'import huggingface_hub; huggingface_hub.login(token="${{ secrets.hf_token }}")'
26
+
27
+ - name: Deploy to Spaces
28
+ run: gradio deploy
README.md CHANGED
@@ -1,12 +1,6 @@
1
  ---
2
- title: Data Dashboard
3
- emoji: 🐨
4
- colorFrom: yellow
5
- colorTo: purple
6
- sdk: gradio
7
- sdk_version: 5.19.0
8
  app_file: app.py
9
- pinned: false
 
10
  ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: data-dashboard
 
 
 
 
 
3
  app_file: app.py
4
+ sdk: gradio
5
+ sdk_version: 5.9.1
6
  ---
 
 
app.py ADDED
@@ -0,0 +1,295 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import matplotlib.pyplot as plt
4
+ import datetime
5
+ import warnings
6
+ import os
7
+ import tempfile
8
+ from cachetools import cached, TTLCache
9
+
10
+ warnings.filterwarnings("ignore", category=FutureWarning, module="seaborn")
11
+
12
+ # ------------------------------------------------------------------
13
+ # 1) Load CSV data once
14
+ # ------------------------------------------------------------------
15
+ csv_data = None
16
+
17
+ def load_csv_data():
18
+ global csv_data
19
+
20
+ # Optional: specify column dtypes if known; adjust as necessary
21
+ dtype_dict = {
22
+ "order_id": "Int64",
23
+ "customer_id": "Int64",
24
+ "product_id": "Int64",
25
+ "quantity": "Int64",
26
+ "price": "float",
27
+ "total": "float",
28
+ "customer_name": "string",
29
+ "product_names": "string",
30
+ "categories": "string"
31
+ }
32
+
33
+ csv_data = pd.read_csv(
34
+ "sales_data.csv",
35
+ parse_dates=["order_date"],
36
+ dayfirst=True, # if your dates are DD/MM/YYYY format
37
+ low_memory=False,
38
+ dtype=dtype_dict
39
+ )
40
+
41
+ load_csv_data()
42
+
43
+ cache = TTLCache(maxsize=128, ttl=300)
44
+
45
+ @cached(cache)
46
+ def get_unique_categories():
47
+ global csv_data
48
+ if csv_data is None:
49
+ return []
50
+ cats = sorted(csv_data['categories'].dropna().unique().tolist())
51
+ cats = [cat.capitalize() for cat in cats]
52
+ return cats
53
+
54
+ def get_date_range():
55
+ global csv_data
56
+ if csv_data is None or csv_data.empty:
57
+ return None, None
58
+ return csv_data['order_date'].min(), csv_data['order_date'].max()
59
+
60
+ def filter_data(start_date, end_date, category):
61
+ global csv_data
62
+
63
+ if isinstance(start_date, str):
64
+ start_date = datetime.datetime.strptime(start_date, '%Y-%m-%d').date()
65
+ if isinstance(end_date, str):
66
+ end_date = datetime.datetime.strptime(end_date, '%Y-%m-%d').date()
67
+
68
+ df = csv_data.loc[
69
+ (csv_data['order_date'] >= pd.to_datetime(start_date)) &
70
+ (csv_data['order_date'] <= pd.to_datetime(end_date))
71
+ ].copy()
72
+
73
+ if category != "All Categories":
74
+ df = df.loc[df['categories'].str.capitalize() == category].copy()
75
+
76
+ return df
77
+
78
+ def get_dashboard_stats(start_date, end_date, category):
79
+ df = filter_data(start_date, end_date, category)
80
+ if df.empty:
81
+ return (0, 0, 0, "N/A")
82
+
83
+ df['revenue'] = df['price'] * df['quantity']
84
+ total_revenue = df['revenue'].sum()
85
+ total_orders = df['order_id'].nunique()
86
+ avg_order_value = total_revenue / total_orders if total_orders else 0
87
+
88
+ cat_revenues = df.groupby('categories')['revenue'].sum().sort_values(ascending=False)
89
+ top_category = cat_revenues.index[0] if not cat_revenues.empty else "N/A"
90
+
91
+ return (total_revenue, total_orders, avg_order_value, top_category.capitalize())
92
+
93
+ def get_data_for_table(start_date, end_date, category):
94
+ df = filter_data(start_date, end_date, category)
95
+ if df.empty:
96
+ return pd.DataFrame()
97
+
98
+ df = df.sort_values(by=["order_id", "order_date"], ascending=[True, False]).copy()
99
+
100
+ columns_order = [
101
+ "order_id", "order_date", "customer_id", "customer_name",
102
+ "product_id", "product_names", "categories", "quantity",
103
+ "price", "total"
104
+ ]
105
+ columns_order = [col for col in columns_order if col in df.columns]
106
+ df = df[columns_order].copy()
107
+
108
+ df['revenue'] = df['price'] * df['quantity']
109
+ return df
110
+
111
+ def get_plot_data(start_date, end_date, category):
112
+ df = filter_data(start_date, end_date, category)
113
+ if df.empty:
114
+ return pd.DataFrame()
115
+ df['revenue'] = df['price'] * df['quantity']
116
+ plot_data = df.groupby(df['order_date'].dt.date)['revenue'].sum().reset_index()
117
+ plot_data.rename(columns={'order_date': 'date'}, inplace=True)
118
+ return plot_data
119
+
120
+ def get_revenue_by_category(start_date, end_date, category):
121
+ df = filter_data(start_date, end_date, category)
122
+ if df.empty:
123
+ return pd.DataFrame()
124
+ df['revenue'] = df['price'] * df['quantity']
125
+ cat_data = df.groupby('categories')['revenue'].sum().reset_index()
126
+ cat_data = cat_data.sort_values(by='revenue', ascending=False)
127
+ return cat_data
128
+
129
+ def get_top_products(start_date, end_date, category):
130
+ df = filter_data(start_date, end_date, category)
131
+ if df.empty:
132
+ return pd.DataFrame()
133
+ df['revenue'] = df['price'] * df['quantity']
134
+ prod_data = df.groupby('product_names')['revenue'].sum().reset_index()
135
+ prod_data = prod_data.sort_values(by='revenue', ascending=False).head(10)
136
+ return prod_data
137
+
138
+ def create_matplotlib_figure(data, x_col, y_col, title, xlabel, ylabel, orientation='v'):
139
+ plt.figure(figsize=(10, 6))
140
+ if data.empty:
141
+ plt.text(0.5, 0.5, 'No data available', ha='center', va='center')
142
+ else:
143
+ if orientation == 'v':
144
+ plt.bar(data[x_col], data[y_col])
145
+ plt.xticks(rotation=45, ha='right')
146
+ else:
147
+ plt.barh(data[x_col], data[y_col])
148
+ plt.gca().invert_yaxis()
149
+
150
+ plt.title(title)
151
+ plt.xlabel(xlabel)
152
+ plt.ylabel(ylabel)
153
+ plt.tight_layout()
154
+
155
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmpfile:
156
+ plt.savefig(tmpfile.name)
157
+ plt.close()
158
+ return tmpfile.name
159
+
160
+ def update_dashboard(start_date, end_date, category):
161
+ total_revenue, total_orders, avg_order_value, top_category = get_dashboard_stats(start_date, end_date, category)
162
+
163
+ # Generate plots
164
+ revenue_data = get_plot_data(start_date, end_date, category)
165
+ category_data = get_revenue_by_category(start_date, end_date, category)
166
+ top_products_data = get_top_products(start_date, end_date, category)
167
+
168
+ revenue_over_time_path = create_matplotlib_figure(
169
+ revenue_data, 'date', 'revenue',
170
+ "Revenue Over Time", "Date", "Revenue"
171
+ )
172
+ revenue_by_category_path = create_matplotlib_figure(
173
+ category_data, 'categories', 'revenue',
174
+ "Revenue by Category", "Category", "Revenue"
175
+ )
176
+ top_products_path = create_matplotlib_figure(
177
+ top_products_data, 'product_names', 'revenue',
178
+ "Top Products", "Revenue", "Product Name", orientation='h'
179
+ )
180
+
181
+ # Data table
182
+ table_data = get_data_for_table(start_date, end_date, category)
183
+
184
+ return (
185
+ revenue_over_time_path,
186
+ revenue_by_category_path,
187
+ top_products_path,
188
+ table_data,
189
+ total_revenue,
190
+ total_orders,
191
+ avg_order_value,
192
+ top_category
193
+ )
194
+
195
+ def create_dashboard():
196
+ min_date, max_date = get_date_range()
197
+ if min_date is None or max_date is None:
198
+ min_date = datetime.datetime.now()
199
+ max_date = datetime.datetime.now()
200
+
201
+ default_start_date = min_date
202
+ default_end_date = max_date
203
+
204
+ with gr.Blocks(css="""
205
+ footer {display: none !important;}
206
+ .tabs {border: none !important;}
207
+ .gr-plot {border: none !important; box-shadow: none !important;}
208
+ """) as dashboard:
209
+
210
+ gr.Markdown("# Sales Performance Dashboard")
211
+
212
+ # Filters row
213
+ with gr.Row():
214
+ start_date = gr.DateTime(
215
+ label="Start Date",
216
+ value=default_start_date.strftime('%Y-%m-%d'),
217
+ include_time=False,
218
+ type="datetime"
219
+ )
220
+ end_date = gr.DateTime(
221
+ label="End Date",
222
+ value=default_end_date.strftime('%Y-%m-%d'),
223
+ include_time=False,
224
+ type="datetime"
225
+ )
226
+ category_filter = gr.Dropdown(
227
+ choices=["All Categories"] + get_unique_categories(),
228
+ label="Category",
229
+ value="All Categories"
230
+ )
231
+
232
+ gr.Markdown("# Key Metrics")
233
+
234
+ # Stats row
235
+ with gr.Row():
236
+ total_revenue = gr.Number(label="Total Revenue", value=0)
237
+ total_orders = gr.Number(label="Total Orders", value=0)
238
+ avg_order_value = gr.Number(label="Average Order Value", value=0)
239
+ top_category = gr.Textbox(label="Top Category", value="N/A")
240
+
241
+ gr.Markdown("# Visualisations")
242
+ # Tabs for Plots
243
+ with gr.Tabs():
244
+ with gr.Tab("Revenue Over Time"):
245
+ revenue_over_time_image = gr.Image(label="Revenue Over Time", container=False)
246
+ with gr.Tab("Revenue by Category"):
247
+ revenue_by_category_image = gr.Image(label="Revenue by Category", container=False)
248
+ with gr.Tab("Top Products"):
249
+ top_products_image = gr.Image(label="Top Products", container=False)
250
+
251
+ gr.Markdown("# Raw Data")
252
+ # Data Table (below the plots)
253
+ data_table = gr.DataFrame(
254
+ label="Sales Data",
255
+ type="pandas",
256
+ interactive=False
257
+ )
258
+
259
+ # When filters change, update everything
260
+ for f in [start_date, end_date, category_filter]:
261
+ f.change(
262
+ fn=lambda s, e, c: update_dashboard(s, e, c),
263
+ inputs=[start_date, end_date, category_filter],
264
+ outputs=[
265
+ revenue_over_time_image,
266
+ revenue_by_category_image,
267
+ top_products_image,
268
+ data_table,
269
+ total_revenue,
270
+ total_orders,
271
+ avg_order_value,
272
+ top_category
273
+ ]
274
+ )
275
+
276
+ # Initial load
277
+ dashboard.load(
278
+ fn=lambda: update_dashboard(default_start_date, default_end_date, "All Categories"),
279
+ outputs=[
280
+ revenue_over_time_image,
281
+ revenue_by_category_image,
282
+ top_products_image,
283
+ data_table,
284
+ total_revenue,
285
+ total_orders,
286
+ avg_order_value,
287
+ top_category
288
+ ]
289
+ )
290
+
291
+ return dashboard
292
+
293
+ if __name__ == "__main__":
294
+ dashboard = create_dashboard()
295
+ dashboard.launch(share=False)
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio
2
+ pandas
3
+ matplotlib
sales_data.csv ADDED
The diff for this file is too large to render. See raw diff