Spaces:
Runtime error
Runtime error
Commit
·
8eae5c4
1
Parent(s):
0401543
Update app.py
Browse files
app.py
CHANGED
@@ -22,53 +22,53 @@ def generate_data(file, num_samples):
|
|
22 |
|
23 |
return samples
|
24 |
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
#
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
#
|
39 |
-
#
|
40 |
-
#
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
#
|
59 |
-
|
60 |
-
|
61 |
-
#
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
#
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
|
73 |
|
74 |
|
@@ -162,22 +162,18 @@ with gr.Blocks(css = css) as demo:
|
|
162 |
''')
|
163 |
|
164 |
with gr.Column():
|
165 |
-
#gr.Markdown(""" ### Record audio """)
|
166 |
-
# with gr.Tab("Record Audio"):
|
167 |
-
# audio_input_r = gr.Audio(label = 'Record Audio Input',source="microphone",type="filepath")
|
168 |
-
# transcribe_audio_r = gr.Button('Transcribe')
|
169 |
|
170 |
with gr.Tab("Upload Data as File: Tabular Data"):
|
171 |
data_input_u = gr.File(label = 'Upload Data File (Currently supports CSV and ARFF)', file_types=[".csv", ".arff"])
|
172 |
num_samples = gr.Slider(label="Number of Samples", minimum=5, maximum=100, value=5, step=10)
|
173 |
generate_data_btn = gr.Button('Generate Synthetic Data')
|
174 |
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
|
180 |
-
|
181 |
|
182 |
with gr.Row():
|
183 |
#data_sample = gr.Dataframe(label = "Original Data")
|
@@ -187,7 +183,7 @@ with gr.Blocks(css = css) as demo:
|
|
187 |
|
188 |
|
189 |
generate_data_btn.click(generate_data, inputs = [data_input_u,num_samples], outputs = [data_output])
|
190 |
-
|
191 |
examples = gr.Examples(examples=[['diabetes.arff',5], ["titanic.csv", 15]],inputs = [data_input_u,num_samples], outputs = [data_output], cache_examples = True, fn = generate_data)
|
192 |
|
193 |
|
|
|
22 |
|
23 |
return samples
|
24 |
|
25 |
+
def generate_relational_data(parent_file, child_file, join_on):
|
26 |
+
parent_df = pd.read_csv(parent_file.name)
|
27 |
+
child_df = pd.read_csv(child_file.name)
|
28 |
+
|
29 |
+
#Make sure join_on column exists in both
|
30 |
+
assert ((join_on in parent_df.columns) and
|
31 |
+
(join_on in child_df.columns))
|
32 |
+
|
33 |
+
rtf_model.fit(parent_df.drop(join_on, axis=1), num_bootstrap=100)
|
34 |
+
|
35 |
+
pdir = Path("rtf_parent/")
|
36 |
+
rtf_model.save(pdir)
|
37 |
+
|
38 |
+
# # Get the most recently saved parent model,
|
39 |
+
# # or a specify some other saved model.
|
40 |
+
# parent_model_path = pdir / "idXXX"
|
41 |
+
parent_model_path = sorted([
|
42 |
+
p for p in pdir.glob("id*") if p.is_dir()],
|
43 |
+
key=os.path.getmtime)[-1]
|
44 |
+
|
45 |
+
child_model = REaLTabFormer(
|
46 |
+
model_type="relational",
|
47 |
+
parent_realtabformer_path=parent_model_path,
|
48 |
+
epochs = 25,
|
49 |
+
output_max_length=None,
|
50 |
+
train_size=0.8)
|
51 |
+
|
52 |
+
child_model.fit(
|
53 |
+
df=child_df,
|
54 |
+
in_df=parent_df,
|
55 |
+
join_on=join_on,
|
56 |
+
num_bootstrap=10)
|
57 |
+
|
58 |
+
# Generate parent samples.
|
59 |
+
parent_samples = rtf_model.sample(5)
|
60 |
+
|
61 |
+
# Create the unique ids based on the index.
|
62 |
+
parent_samples.index.name = join_on
|
63 |
+
parent_samples = parent_samples.reset_index()
|
64 |
+
|
65 |
+
# Generate the relational observations.
|
66 |
+
child_samples = child_model.sample(
|
67 |
+
input_unique_ids=parent_samples[join_on],
|
68 |
+
input_df=parent_samples.drop(join_on, axis=1),
|
69 |
+
gen_batch=5)
|
70 |
+
|
71 |
+
return parent_samples, child_samples, gr.update(visible = True)
|
72 |
|
73 |
|
74 |
|
|
|
162 |
''')
|
163 |
|
164 |
with gr.Column():
|
|
|
|
|
|
|
|
|
165 |
|
166 |
with gr.Tab("Upload Data as File: Tabular Data"):
|
167 |
data_input_u = gr.File(label = 'Upload Data File (Currently supports CSV and ARFF)', file_types=[".csv", ".arff"])
|
168 |
num_samples = gr.Slider(label="Number of Samples", minimum=5, maximum=100, value=5, step=10)
|
169 |
generate_data_btn = gr.Button('Generate Synthetic Data')
|
170 |
|
171 |
+
with gr.Tab("Upload Data as File: Relational Data"):
|
172 |
+
data_input_parent = gr.File(label = 'Upload Data File for Parent Dataset', file_types=[ ".csv"])
|
173 |
+
data_input_child = gr.File(label = 'Upload Data File for Child Dataset', file_types=[ ".csv"])
|
174 |
+
join_on = gr.Textbox(label = 'Column name to join on')
|
175 |
|
176 |
+
generate_data_btn_relational = gr.Button('Generate Synthetic Data')
|
177 |
|
178 |
with gr.Row():
|
179 |
#data_sample = gr.Dataframe(label = "Original Data")
|
|
|
183 |
|
184 |
|
185 |
generate_data_btn.click(generate_data, inputs = [data_input_u,num_samples], outputs = [data_output])
|
186 |
+
generate_data_btn_relational.click(generate_relational_data, inputs = [data_input_parent,data_input_child,join_on], outputs = [data_output, data_output_child])
|
187 |
examples = gr.Examples(examples=[['diabetes.arff',5], ["titanic.csv", 15]],inputs = [data_input_u,num_samples], outputs = [data_output], cache_examples = True, fn = generate_data)
|
188 |
|
189 |
|