adamnarozniak
commited on
Commit
•
3a3be3b
1
Parent(s):
a44b795
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,361 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from flwr_datasets import FederatedDataset
|
3 |
+
from flwr_datasets.partitioner import (
|
4 |
+
DirichletPartitioner,
|
5 |
+
IidPartitioner,
|
6 |
+
PathologicalPartitioner,
|
7 |
+
ShardPartitioner,
|
8 |
+
LinearPartitioner,
|
9 |
+
SquarePartitioner,
|
10 |
+
ExponentialPartitioner,
|
11 |
+
NaturalIdPartitioner
|
12 |
+
)
|
13 |
+
from flwr_datasets.visualization import plot_label_distributions
|
14 |
+
import matplotlib.pyplot as plt
|
15 |
+
|
16 |
+
partitioner_types = {
|
17 |
+
"DirichletPartitioner": DirichletPartitioner,
|
18 |
+
"IidPartitioner": IidPartitioner,
|
19 |
+
"PathologicalPartitioner": PathologicalPartitioner,
|
20 |
+
"ShardPartitioner": ShardPartitioner,
|
21 |
+
"LinearPartitioner": LinearPartitioner,
|
22 |
+
"SquarePartitioner": SquarePartitioner,
|
23 |
+
"ExponentialPartitioner": ExponentialPartitioner,
|
24 |
+
"NaturalIdPartitioner": NaturalIdPartitioner,
|
25 |
+
}
|
26 |
+
|
27 |
+
partitioner_parameters = {
|
28 |
+
"DirichletPartitioner": ["num_partitions", "alpha", "partition_by", "min_partition_size", "self_balancing"],
|
29 |
+
"IidPartitioner": ["num_partitions"],
|
30 |
+
"PathologicalPartitioner": ["num_partitions", "partition_by", "num_classes_per_partition", "class_assignment_mode"],
|
31 |
+
"ShardPartitioner": ["num_partitions", "partition_by", "num_shards_per_partition", "shard_size", "keep_incomplete_shard"],
|
32 |
+
"NaturalIdPartitioner": ["partition_by"],
|
33 |
+
"LinearPartitioner": ["num_partitions"],
|
34 |
+
"SquarePartitioner": ["num_partitions"],
|
35 |
+
"ExponentialPartitioner": ["num_partitions"],
|
36 |
+
}
|
37 |
+
|
38 |
+
def update_parameter_visibility(partitioner_type):
|
39 |
+
print("calling update_parameter_visibility")
|
40 |
+
print(partitioner_type)
|
41 |
+
required_params = partitioner_parameters.get(partitioner_type, [])
|
42 |
+
updates = []
|
43 |
+
# For num_partitions_input
|
44 |
+
if "num_partitions" in required_params:
|
45 |
+
updates.append(gr.update(visible=True))
|
46 |
+
else:
|
47 |
+
updates.append(gr.update(visible=False))
|
48 |
+
# For alpha_input
|
49 |
+
if "alpha" in required_params:
|
50 |
+
updates.append(gr.update(visible=True))
|
51 |
+
else:
|
52 |
+
updates.append(gr.update(visible=False))
|
53 |
+
# For partition_by_input
|
54 |
+
if "partition_by" in required_params:
|
55 |
+
updates.append(gr.update(visible=True))
|
56 |
+
else:
|
57 |
+
updates.append(gr.update(visible=False))
|
58 |
+
# For min_partition_size_input
|
59 |
+
if "min_partition_size" in required_params:
|
60 |
+
updates.append(gr.update(visible=True))
|
61 |
+
else:
|
62 |
+
updates.append(gr.update(visible=False))
|
63 |
+
# For self_balancing_input
|
64 |
+
if "self_balancing" in required_params:
|
65 |
+
updates.append(gr.update(visible=True))
|
66 |
+
else:
|
67 |
+
updates.append(gr.update(visible=False))
|
68 |
+
# For num_classes_per_partition_input
|
69 |
+
if "num_classes_per_partition" in required_params:
|
70 |
+
updates.append(gr.update(visible=True))
|
71 |
+
else:
|
72 |
+
updates.append(gr.update(visible=False))
|
73 |
+
# For class_assignment_mode_input
|
74 |
+
if "class_assignment_mode" in required_params:
|
75 |
+
updates.append(gr.update(visible=True))
|
76 |
+
else:
|
77 |
+
updates.append(gr.update(visible=False))
|
78 |
+
# For num_shards_per_partition_input
|
79 |
+
if "num_shards_per_partition" in required_params:
|
80 |
+
updates.append(gr.update(visible=True))
|
81 |
+
else:
|
82 |
+
updates.append(gr.update(visible=False))
|
83 |
+
# For shard_size_input
|
84 |
+
if "shard_size" in required_params:
|
85 |
+
updates.append(gr.update(visible=True))
|
86 |
+
else:
|
87 |
+
updates.append(gr.update(visible=False))
|
88 |
+
# For keep_incomplete_shard_input
|
89 |
+
if "keep_incomplete_shard" in required_params:
|
90 |
+
updates.append(gr.update(visible=True))
|
91 |
+
else:
|
92 |
+
updates.append(gr.update(visible=False))
|
93 |
+
return updates
|
94 |
+
|
95 |
+
def partition_and_plot(
|
96 |
+
dataset,
|
97 |
+
partitioner_type,
|
98 |
+
num_partitions,
|
99 |
+
alpha,
|
100 |
+
partition_by,
|
101 |
+
min_partition_size,
|
102 |
+
self_balancing,
|
103 |
+
num_classes_per_partition,
|
104 |
+
class_assignment_mode,
|
105 |
+
num_shards_per_partition,
|
106 |
+
shard_size,
|
107 |
+
keep_incomplete_shard,
|
108 |
+
label_name,
|
109 |
+
title,
|
110 |
+
legend,
|
111 |
+
verbose_labels,
|
112 |
+
size_unit,
|
113 |
+
partition_id_axis,
|
114 |
+
):
|
115 |
+
partitioner_params = {}
|
116 |
+
try:
|
117 |
+
if partitioner_type == "DirichletPartitioner":
|
118 |
+
partitioner_params = {
|
119 |
+
"num_partitions": int(num_partitions),
|
120 |
+
"partition_by": partition_by,
|
121 |
+
"alpha": float(alpha),
|
122 |
+
"min_partition_size": int(min_partition_size),
|
123 |
+
"self_balancing": self_balancing,
|
124 |
+
}
|
125 |
+
elif partitioner_type == "IidPartitioner":
|
126 |
+
partitioner_params = {
|
127 |
+
"num_partitions": int(num_partitions),
|
128 |
+
}
|
129 |
+
elif partitioner_type == "PathologicalPartitioner":
|
130 |
+
partitioner_params = {
|
131 |
+
"num_partitions": int(num_partitions),
|
132 |
+
"partition_by": partition_by,
|
133 |
+
"num_classes_per_partition": int(num_classes_per_partition),
|
134 |
+
"class_assignment_mode": class_assignment_mode,
|
135 |
+
}
|
136 |
+
elif partitioner_type == "ShardPartitioner":
|
137 |
+
partitioner_params = {
|
138 |
+
"num_partitions": int(num_partitions),
|
139 |
+
"partition_by": partition_by,
|
140 |
+
"num_shards_per_partition": int(num_shards_per_partition),
|
141 |
+
"shard_size": int(shard_size),
|
142 |
+
"keep_incomplete_shard": keep_incomplete_shard == "True",
|
143 |
+
}
|
144 |
+
elif partitioner_type == "NaturalIdPartitioner":
|
145 |
+
partitioner_params = {
|
146 |
+
"partition_by": partition_by,
|
147 |
+
}
|
148 |
+
elif partitioner_type in ["LinearPartitioner", "SquarePartitioner", "ExponentialPartitioner"]:
|
149 |
+
partitioner_params = {
|
150 |
+
"num_partitions": int(num_partitions),
|
151 |
+
}
|
152 |
+
|
153 |
+
partitioner_class = partitioner_types[partitioner_type]
|
154 |
+
partitioner = partitioner_class(**partitioner_params)
|
155 |
+
fds = FederatedDataset(
|
156 |
+
dataset=dataset,
|
157 |
+
partitioners={
|
158 |
+
"train": partitioner,
|
159 |
+
},
|
160 |
+
trust_remote_code=True,
|
161 |
+
)
|
162 |
+
partitioner = fds.partitioners["train"]
|
163 |
+
figure, axis, dataframe = plot_label_distributions(
|
164 |
+
partitioner=partitioner,
|
165 |
+
label_name=label_name,
|
166 |
+
title=title,
|
167 |
+
legend=legend,
|
168 |
+
verbose_labels=verbose_labels,
|
169 |
+
size_unit=size_unit,
|
170 |
+
partition_id_axis=partition_id_axis,
|
171 |
+
)
|
172 |
+
|
173 |
+
# Save plot to a file
|
174 |
+
plot_filename = "label_distribution.png"
|
175 |
+
figure.savefig(plot_filename, bbox_inches='tight')
|
176 |
+
|
177 |
+
# Generate the code
|
178 |
+
partitioner_params_str = "\n"
|
179 |
+
n_params = len(partitioner_params)
|
180 |
+
i = 0
|
181 |
+
for k, v in partitioner_params.items():
|
182 |
+
if isinstance(v, str):
|
183 |
+
v = f'"{v}"'
|
184 |
+
if i != (n_params - 1):
|
185 |
+
partitioner_params_str = partitioner_params_str + f"\t{k} = {v},\n"
|
186 |
+
else:
|
187 |
+
partitioner_params_str = partitioner_params_str + f"\t{k} = {v}\n"
|
188 |
+
i +=1
|
189 |
+
|
190 |
+
code = f"""
|
191 |
+
from flwr_datasets import FederatedDataset
|
192 |
+
from flwr_datasets.partitioner import {partitioner_type}
|
193 |
+
from flwr_datasets.visualization import plot_label_distributions
|
194 |
+
|
195 |
+
partitioner = {partitioner_type}({partitioner_params_str})
|
196 |
+
fds = FederatedDataset(
|
197 |
+
dataset="{dataset}",
|
198 |
+
partitioners={{
|
199 |
+
"train": partitioner,
|
200 |
+
}},
|
201 |
+
trust_remote_code=True,
|
202 |
+
)
|
203 |
+
partitioner = fds.partitioners["train"]
|
204 |
+
figure, axis, dataframe = plot_label_distributions(
|
205 |
+
partitioner=partitioner,
|
206 |
+
label_name="label",
|
207 |
+
title="{title}",
|
208 |
+
legend={legend},
|
209 |
+
verbose_labels={verbose_labels},
|
210 |
+
size_unit="{size_unit}",
|
211 |
+
partition_id_axis="{partition_id_axis}",
|
212 |
+
)
|
213 |
+
"""
|
214 |
+
return plot_filename, code#, plot_filename # with df: plot_filename, code, dataframe, plot_filename
|
215 |
+
except Exception as e:
|
216 |
+
# Return error messages
|
217 |
+
error_message = str(e)
|
218 |
+
return None, f"Error: {error_message}", None, None
|
219 |
+
|
220 |
+
with gr.Blocks() as demo:
|
221 |
+
gr.Markdown("# Federated Dataset: Partitioning Visualization")
|
222 |
+
gr.Markdown("See partitioned datasets for Federated Learning experiments. The partitioning and visualization was created using `flwr-datasets`.")
|
223 |
+
|
224 |
+
with gr.Row():
|
225 |
+
with gr.Column(scale=1):
|
226 |
+
# gr.Markdown("## Federated Dataset Parameters")
|
227 |
+
with gr.Accordion("Federated Dataset Parameters", open=True):
|
228 |
+
dataset_input = gr.Textbox(label="Dataset", value="cifar10")
|
229 |
+
partitioner_type_input = gr.Dropdown(label="Partitioner", choices=list(partitioner_types.keys()), value="DirichletPartitioner")
|
230 |
+
num_partitions_input = gr.Number(label="num_partitions", value=10, visible=True)
|
231 |
+
alpha_input = gr.Number(label="alpha", value=0.3, visible=True)
|
232 |
+
partition_by_input = gr.Textbox(label="partition_by", value="label", visible=True)
|
233 |
+
min_partition_size_input = gr.Number(label="min_partition_size", value=0, visible=True)
|
234 |
+
self_balancing_input = gr.Radio(label="self_balancing", choices=[True, False], value=False, visible=True)
|
235 |
+
|
236 |
+
num_classes_per_partition_input = gr.Number(label="num_classes_per_partition", value=2, visible=False)
|
237 |
+
class_assignment_mode_input = gr.Dropdown(label="class_assignment_mode", choices=["random", "first-deterministic", "deterministic"], value="first-deterministic", visible=False)
|
238 |
+
num_shards_per_partition_input = gr.Number(label="num_shards_per_partition", value=2, visible=False)
|
239 |
+
shard_size_input = gr.Number(label="shard_size", value=0, visible=False)
|
240 |
+
keep_incomplete_shard_input = gr.Radio(label="keep_incomplete_shard", choices=["True", "False"], value="True", visible=False)
|
241 |
+
with gr.Accordion("Plot Parameters", open=False):
|
242 |
+
label_name = gr.Textbox(label="label_name", value="label")
|
243 |
+
title = gr.Textbox(label="title", value="Per Partition Label Distribution")
|
244 |
+
# legend_title = gr.Textbox(label="legend_title", value=None)
|
245 |
+
legend = gr.Radio(label="legend", choices=[True, False], value=True)
|
246 |
+
verbose_labels = gr.Radio(label="verbose_labels", choices=[True, False], value=True)
|
247 |
+
size_unit = gr.Radio(label="size_unit", choices=["absolute", "percent"], value="absolute")
|
248 |
+
partition_id_axis = gr.Radio(label="partition_id_axis", choices=["x", "y"], value="x")
|
249 |
+
|
250 |
+
|
251 |
+
|
252 |
+
# Update parameter visibility when partitioner_type_input changes
|
253 |
+
partitioner_type_input.change(
|
254 |
+
fn=update_parameter_visibility,
|
255 |
+
inputs=[partitioner_type_input],
|
256 |
+
outputs=[
|
257 |
+
num_partitions_input,
|
258 |
+
alpha_input,
|
259 |
+
partition_by_input,
|
260 |
+
min_partition_size_input,
|
261 |
+
self_balancing_input,
|
262 |
+
num_classes_per_partition_input,
|
263 |
+
class_assignment_mode_input,
|
264 |
+
num_shards_per_partition_input,
|
265 |
+
shard_size_input,
|
266 |
+
keep_incomplete_shard_input
|
267 |
+
]
|
268 |
+
)
|
269 |
+
with gr.Column(scale=3):
|
270 |
+
gr.Markdown("## Label Distribution Plot")
|
271 |
+
plot_output = gr.Image(label="Label Distribution Plot")
|
272 |
+
submit_button = gr.Button("Partition and Plot", variant="primary")
|
273 |
+
# download_button = gr.DownloadButton(label="Download Plot", value="label_distribution.png")
|
274 |
+
gr.Markdown("## Code")
|
275 |
+
code_output = gr.Code(label="Code", language="python")
|
276 |
+
# Uncomment to show dataframe (note that it only works with header that is of type "string")
|
277 |
+
# gr.Markdown("## Partitioning DataFrame")
|
278 |
+
# dataframe_output = gr.Dataframe(label="Partitioning DataFrame")
|
279 |
+
size_skew_examples = gr.Examples(
|
280 |
+
examples=[
|
281 |
+
["cifar10", "IidPartitioner", 10],
|
282 |
+
["cifar10", "LinearPartitioner", 10],
|
283 |
+
["cifar10", "SquarePartitioner", 10],
|
284 |
+
["cifar10", "ExponentialPartitioner", 10],
|
285 |
+
],
|
286 |
+
inputs=[
|
287 |
+
dataset_input,
|
288 |
+
partitioner_type_input,
|
289 |
+
num_partitions_input,
|
290 |
+
],
|
291 |
+
label="Size Skew Examples",
|
292 |
+
)
|
293 |
+
|
294 |
+
dirichlet_examples = gr.Examples(
|
295 |
+
examples=[
|
296 |
+
["cifar10", "DirichletPartitioner", 10, 0.1, "label", 0, False, "absolute"],
|
297 |
+
["cifar10", "DirichletPartitioner", 10, 0.1, "label", 0, False, "percent"],
|
298 |
+
],
|
299 |
+
inputs=[
|
300 |
+
dataset_input,
|
301 |
+
partitioner_type_input,
|
302 |
+
num_partitions_input,
|
303 |
+
alpha_input,
|
304 |
+
partition_by_input,
|
305 |
+
min_partition_size_input,
|
306 |
+
self_balancing_input,
|
307 |
+
size_unit,
|
308 |
+
],
|
309 |
+
label="Dirichlet Examples",
|
310 |
+
)
|
311 |
+
|
312 |
+
pathological_examples = gr.Examples(
|
313 |
+
examples=[
|
314 |
+
["cifar10", "PathologicalPartitioner", 10, 2, "first-deterministic", "label"],
|
315 |
+
["cifar10", "PathologicalPartitioner", 10, 3, "deterministic", "label"],
|
316 |
+
],
|
317 |
+
inputs=[
|
318 |
+
dataset_input,
|
319 |
+
partitioner_type_input,
|
320 |
+
num_partitions_input,
|
321 |
+
num_classes_per_partition_input,
|
322 |
+
class_assignment_mode_input,
|
323 |
+
partition_by_input,
|
324 |
+
],
|
325 |
+
label="Pathological Examples",
|
326 |
+
)
|
327 |
+
markdown = gr.Markdown("See more tutorial, examples and documentation on [https://flower.ai/docs/datasets/index.html](https://flower.ai/docs/datasets/index.html).")
|
328 |
+
|
329 |
+
# Set up the event handler for the submit_button
|
330 |
+
submit_button.click(
|
331 |
+
fn=partition_and_plot,
|
332 |
+
inputs=[
|
333 |
+
dataset_input,
|
334 |
+
partitioner_type_input,
|
335 |
+
num_partitions_input,
|
336 |
+
alpha_input,
|
337 |
+
partition_by_input,
|
338 |
+
min_partition_size_input,
|
339 |
+
self_balancing_input,
|
340 |
+
num_classes_per_partition_input,
|
341 |
+
class_assignment_mode_input,
|
342 |
+
num_shards_per_partition_input,
|
343 |
+
shard_size_input,
|
344 |
+
keep_incomplete_shard_input,
|
345 |
+
label_name,
|
346 |
+
title,
|
347 |
+
legend,
|
348 |
+
verbose_labels,
|
349 |
+
size_unit,
|
350 |
+
partition_id_axis,
|
351 |
+
],
|
352 |
+
outputs=[
|
353 |
+
plot_output,
|
354 |
+
code_output,
|
355 |
+
# dataframe_output,
|
356 |
+
# download_button
|
357 |
+
]
|
358 |
+
)
|
359 |
+
|
360 |
+
if __name__ == "__main__":
|
361 |
+
demo.launch()
|