Spaces:
Sleeping
Sleeping
Delete app.py
Browse files
app.py
DELETED
@@ -1,500 +0,0 @@
|
|
1 |
-
import streamlit as st
|
2 |
-
import requests
|
3 |
-
import re
|
4 |
-
import json
|
5 |
-
import time
|
6 |
-
import pandas as pd
|
7 |
-
import labelbox
|
8 |
-
|
9 |
-
def validate_dataset_name(name):
|
10 |
-
"""Validate the dataset name."""
|
11 |
-
# Check length
|
12 |
-
if len(name) > 256:
|
13 |
-
return "Dataset name should be limited to 256 characters."
|
14 |
-
# Check allowed characters
|
15 |
-
allowed_characters_pattern = re.compile(r'^[A-Za-z0-9 _\-.,()\/]+$')
|
16 |
-
if not allowed_characters_pattern.match(name):
|
17 |
-
return ("Dataset name can only contain letters, numbers, spaces, and the following punctuation symbols: _-.,()/. Other characters are not supported.")
|
18 |
-
return None
|
19 |
-
|
20 |
-
def create_new_dataset_labelbox (new_dataset_name):
|
21 |
-
client = labelbox.Client(api_key=labelbox_api_key)
|
22 |
-
dataset_name = new_dataset_name
|
23 |
-
dataset = client.create_dataset(name=dataset_name)
|
24 |
-
dataset_id = dataset.uid
|
25 |
-
return dataset_id
|
26 |
-
|
27 |
-
|
28 |
-
def get_dataset_from_labelbox(labelbox_api_key):
|
29 |
-
client = labelbox.Client(api_key=labelbox_api_key)
|
30 |
-
datasets = client.get_datasets()
|
31 |
-
return datasets
|
32 |
-
|
33 |
-
def destroy_databricks_context(cluster_id, context_id, domain, databricks_api_key):
|
34 |
-
DOMAIN = f"https://{domain}"
|
35 |
-
TOKEN = f"Bearer {databricks_api_key}"
|
36 |
-
|
37 |
-
headers = {
|
38 |
-
"Authorization": TOKEN,
|
39 |
-
"Content-Type": "application/json",
|
40 |
-
}
|
41 |
-
|
42 |
-
# Destroy context
|
43 |
-
destroy_payload = {
|
44 |
-
"clusterId": cluster_id,
|
45 |
-
"contextId": context_id
|
46 |
-
}
|
47 |
-
destroy_response = requests.post(
|
48 |
-
f"{DOMAIN}/api/1.2/contexts/destroy",
|
49 |
-
headers=headers,
|
50 |
-
data=json.dumps(destroy_payload)
|
51 |
-
)
|
52 |
-
|
53 |
-
if destroy_response.status_code != 200:
|
54 |
-
raise ValueError("Failed to destroy context.")
|
55 |
-
|
56 |
-
def execute_databricks_query(query, cluster_id, domain, databricks_api_key):
|
57 |
-
DOMAIN = f"https://{domain}"
|
58 |
-
TOKEN = f"Bearer {databricks_api_key}"
|
59 |
-
|
60 |
-
headers = {
|
61 |
-
"Authorization": TOKEN,
|
62 |
-
"Content-Type": "application/json",
|
63 |
-
}
|
64 |
-
|
65 |
-
# Create context
|
66 |
-
context_payload = {
|
67 |
-
"clusterId": cluster_id,
|
68 |
-
"language": "sql"
|
69 |
-
}
|
70 |
-
context_response = requests.post(
|
71 |
-
f"{DOMAIN}/api/1.2/contexts/create",
|
72 |
-
headers=headers,
|
73 |
-
data=json.dumps(context_payload)
|
74 |
-
)
|
75 |
-
context_response_data = context_response.json()
|
76 |
-
|
77 |
-
if 'id' not in context_response_data:
|
78 |
-
raise ValueError("Failed to create context.")
|
79 |
-
context_id = context_response_data['id']
|
80 |
-
|
81 |
-
# Execute query
|
82 |
-
command_payload = {
|
83 |
-
"clusterId": cluster_id,
|
84 |
-
"contextId": context_id,
|
85 |
-
"language": "sql",
|
86 |
-
"command": query
|
87 |
-
}
|
88 |
-
command_response = requests.post(
|
89 |
-
f"{DOMAIN}/api/1.2/commands/execute",
|
90 |
-
headers=headers,
|
91 |
-
data=json.dumps(command_payload)
|
92 |
-
).json()
|
93 |
-
|
94 |
-
if 'id' not in command_response:
|
95 |
-
raise ValueError("Failed to execute command.")
|
96 |
-
command_id = command_response['id']
|
97 |
-
|
98 |
-
# Wait for the command to complete
|
99 |
-
while True:
|
100 |
-
status_response = requests.get(
|
101 |
-
f"{DOMAIN}/api/1.2/commands/status",
|
102 |
-
headers=headers,
|
103 |
-
params={
|
104 |
-
"clusterId": cluster_id,
|
105 |
-
"contextId": context_id,
|
106 |
-
"commandId": command_id
|
107 |
-
}
|
108 |
-
).json()
|
109 |
-
|
110 |
-
command_status = status_response.get("status")
|
111 |
-
|
112 |
-
if command_status == "Finished":
|
113 |
-
break
|
114 |
-
elif command_status in ["Error", "Cancelled"]:
|
115 |
-
raise ValueError(f"Command {command_status}. Reason: {status_response.get('results', {}).get('summary')}")
|
116 |
-
else:
|
117 |
-
time.sleep(1) # Wait for 5 seconds before checking again
|
118 |
-
|
119 |
-
# Convert the results into a pandas DataFrame
|
120 |
-
data = status_response.get('results', {}).get('data', [])
|
121 |
-
columns = [col['name'] for col in status_response.get('results', {}).get('schema', [])]
|
122 |
-
df = pd.DataFrame(data, columns=columns)
|
123 |
-
|
124 |
-
destroy_databricks_context(cluster_id, context_id, domain, databricks_api_key)
|
125 |
-
|
126 |
-
return df
|
127 |
-
|
128 |
-
|
129 |
-
st.title("Labelbox 🤝 Databricks")
|
130 |
-
st.header("Pipeline Creator", divider='rainbow')
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
def is_valid_url_or_uri(value):
|
135 |
-
"""Check if the provided value is a valid URL or URI."""
|
136 |
-
# Check general URLs
|
137 |
-
url_pattern = re.compile(
|
138 |
-
r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'
|
139 |
-
)
|
140 |
-
|
141 |
-
# Check general URIs including cloud storage URIs (like gs://, s3://, etc.)
|
142 |
-
uri_pattern = re.compile(
|
143 |
-
r'^(?:[a-z][a-z0-9+.-]*:|/)(?:/?[^\s]*)?$|^(gs|s3|azure|blob)://[^\s]+'
|
144 |
-
)
|
145 |
-
|
146 |
-
return url_pattern.match(value) or uri_pattern.match(value)
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
is_preview = st.toggle('Run in Preview Mode', value=False)
|
151 |
-
if is_preview:
|
152 |
-
st.success('Running in Preview mode!', icon="✅")
|
153 |
-
else:
|
154 |
-
st.success('Running in Production mode!', icon="✅")
|
155 |
-
|
156 |
-
st.subheader("Tell us about your Databricks and Labelbox environments", divider='grey')
|
157 |
-
title = st.text_input('Enter Databricks Domain (e.g., 3980281744248452.2.gcp.databricks.com)', '')
|
158 |
-
databricks_api_key = st.text_input('Databricks API Key', type='password')
|
159 |
-
labelbox_api_key = st.text_input('Labelbox API Key', type='password')
|
160 |
-
|
161 |
-
# After Labelbox API key is entered
|
162 |
-
if labelbox_api_key:
|
163 |
-
# Fetching datasets
|
164 |
-
datasets = get_dataset_from_labelbox(labelbox_api_key)
|
165 |
-
create_new_dataset = st.toggle("Make me a new dataset", value=False)
|
166 |
-
|
167 |
-
if not create_new_dataset:
|
168 |
-
# The existing logic for selecting datasets goes here.
|
169 |
-
dataset_name_to_id = {dataset.name: dataset.uid for dataset in datasets}
|
170 |
-
selected_dataset_name = st.selectbox("Select an existing dataset:", list(dataset_name_to_id.keys()))
|
171 |
-
dataset_id = dataset_name_to_id[selected_dataset_name]
|
172 |
-
|
173 |
-
else:
|
174 |
-
# If user toggles "make me a new dataset"
|
175 |
-
new_dataset_name = st.text_input("Enter the new dataset name:")
|
176 |
-
|
177 |
-
# Check if the name is valid
|
178 |
-
if new_dataset_name:
|
179 |
-
validation_message = validate_dataset_name(new_dataset_name)
|
180 |
-
if validation_message:
|
181 |
-
st.error(validation_message, icon="🚫")
|
182 |
-
else:
|
183 |
-
st.success(f"Valid dataset name! Dataset_id", icon="✅")
|
184 |
-
dataset_name = new_dataset_name
|
185 |
-
|
186 |
-
# Define the variables beforehand with default values (if not defined)
|
187 |
-
new_dataset_name = new_dataset_name if 'new_dataset_name' in locals() else None
|
188 |
-
selected_dataset_name = selected_dataset_name if 'selected_dataset_name' in locals() else None
|
189 |
-
|
190 |
-
if new_dataset_name or selected_dataset_name:
|
191 |
-
# Handling various formats of input
|
192 |
-
formatted_title = re.sub(r'^https?://', '', title) # Remove http:// or https://
|
193 |
-
formatted_title = re.sub(r'/$', '', formatted_title) # Remove trailing slash if present
|
194 |
-
|
195 |
-
if formatted_title:
|
196 |
-
st.subheader("Select and existing cluster or make a new one", divider='grey', help="Jobs in preview mode will use all purpose compute clusters to help you itersate faster. Jobs in production mode will use job clusters to reduce DBUs consumed.")
|
197 |
-
DOMAIN = f"https://{formatted_title}"
|
198 |
-
TOKEN = f"Bearer {databricks_api_key}"
|
199 |
-
|
200 |
-
HEADERS = {
|
201 |
-
"Authorization": TOKEN,
|
202 |
-
"Content-Type": "application/json",
|
203 |
-
}
|
204 |
-
|
205 |
-
# Endpoint to list clusters
|
206 |
-
ENDPOINT = "/api/2.0/clusters/list"
|
207 |
-
|
208 |
-
try:
|
209 |
-
response = requests.get(DOMAIN + ENDPOINT, headers=HEADERS)
|
210 |
-
response.raise_for_status()
|
211 |
-
|
212 |
-
# Include clusters with cluster_source "UI" or "API"
|
213 |
-
clusters = response.json().get("clusters", [])
|
214 |
-
cluster_dict = {
|
215 |
-
cluster["cluster_name"]: cluster["cluster_id"]
|
216 |
-
for cluster in clusters if cluster.get("cluster_source") in ["UI", "API"]
|
217 |
-
}
|
218 |
-
|
219 |
-
# Display dropdown with cluster names
|
220 |
-
make_cluster = st.toggle('Make me a new cluster', value=False)
|
221 |
-
if make_cluster:
|
222 |
-
#make a cluster
|
223 |
-
st.write("Making a new cluster")
|
224 |
-
else:
|
225 |
-
if cluster_dict:
|
226 |
-
selected_cluster_name = st.selectbox(
|
227 |
-
'Select a cluster to run on',
|
228 |
-
list(cluster_dict.keys()),
|
229 |
-
key='unique_key_for_cluster_selectbox',
|
230 |
-
index=None,
|
231 |
-
placeholder="Select a cluster..",
|
232 |
-
)
|
233 |
-
if selected_cluster_name:
|
234 |
-
cluster_id = cluster_dict[selected_cluster_name]
|
235 |
-
else:
|
236 |
-
st.write("No UI or API-based compute clusters found.")
|
237 |
-
|
238 |
-
except requests.RequestException as e:
|
239 |
-
st.write(f"Error communicating with Databricks API: {str(e)}")
|
240 |
-
except ValueError:
|
241 |
-
st.write("Received unexpected response from Databricks API.")
|
242 |
-
|
243 |
-
if selected_cluster_name and cluster_id:
|
244 |
-
# Check if the selected cluster is running
|
245 |
-
cluster_state = [cluster["state"] for cluster in clusters if cluster["cluster_id"] == cluster_id][0]
|
246 |
-
|
247 |
-
# If the cluster is not running, start it
|
248 |
-
if cluster_state != "RUNNING":
|
249 |
-
with st.spinner("Starting the selected cluster. This typically takes 10 minutes. Please wait..."):
|
250 |
-
start_response = requests.post(f"{DOMAIN}/api/2.0/clusters/start", headers=HEADERS, json={"cluster_id": cluster_id})
|
251 |
-
start_response.raise_for_status()
|
252 |
-
|
253 |
-
# Poll until the cluster is up or until timeout
|
254 |
-
start_time = time.time()
|
255 |
-
timeout = 1200 # 20 minutes in seconds
|
256 |
-
while True:
|
257 |
-
cluster_response = requests.get(f"{DOMAIN}/api/2.0/clusters/get", headers=HEADERS, params={"cluster_id": cluster_id}).json()
|
258 |
-
if "state" in cluster_response:
|
259 |
-
if cluster_response["state"] == "RUNNING":
|
260 |
-
break
|
261 |
-
elif cluster_response["state"] in ["TERMINATED", "ERROR"]:
|
262 |
-
st.write(f"Error starting cluster. Current state: {cluster_response['state']}")
|
263 |
-
break
|
264 |
-
|
265 |
-
if (time.time() - start_time) > timeout:
|
266 |
-
st.write("Timeout reached while starting the cluster.")
|
267 |
-
break
|
268 |
-
|
269 |
-
time.sleep(10) # Check every 10 seconds
|
270 |
-
|
271 |
-
st.success(f"{selected_cluster_name} is now running!", icon="🏃♂️")
|
272 |
-
else:
|
273 |
-
st.success(f"{selected_cluster_name} is already running!", icon="🏃♂️")
|
274 |
-
|
275 |
-
|
276 |
-
def generate_cron_expression(freq, hour=0, minute=0, day_of_week=None, day_of_month=None):
|
277 |
-
"""
|
278 |
-
Generate a cron expression based on the provided frequency and time.
|
279 |
-
"""
|
280 |
-
if freq == "1 minute":
|
281 |
-
return "0 * * * * ?"
|
282 |
-
elif freq == "1 hour":
|
283 |
-
return f"0 {minute} * * * ?"
|
284 |
-
elif freq == "1 day":
|
285 |
-
return f"0 {minute} {hour} * * ?"
|
286 |
-
elif freq == "1 week":
|
287 |
-
if not day_of_week:
|
288 |
-
raise ValueError("Day of week not provided for weekly frequency.")
|
289 |
-
return f"0 {minute} {hour} ? * {day_of_week}"
|
290 |
-
elif freq == "1 month":
|
291 |
-
if not day_of_month:
|
292 |
-
raise ValueError("Day of month not provided for monthly frequency.")
|
293 |
-
return f"0 {minute} {hour} {day_of_month} * ?"
|
294 |
-
else:
|
295 |
-
raise ValueError("Invalid frequency provided")
|
296 |
-
|
297 |
-
# Streamlit UI
|
298 |
-
st.subheader("Run Frequency", divider='grey')
|
299 |
-
|
300 |
-
# Dropdown to select frequency
|
301 |
-
freq_options = ["1 minute", "1 hour", "1 day", "1 week", "1 month"]
|
302 |
-
selected_freq = st.selectbox("Select frequency:", freq_options, placeholder="Select frequency..")
|
303 |
-
|
304 |
-
day_of_week = None
|
305 |
-
day_of_month = None
|
306 |
-
|
307 |
-
# If the frequency is hourly, daily, weekly, or monthly, ask for a specific time
|
308 |
-
if selected_freq != "1 minute":
|
309 |
-
col1, col2 = st.columns(2)
|
310 |
-
with col1:
|
311 |
-
hour = st.selectbox("Hour:", list(range(0, 24)))
|
312 |
-
with col2:
|
313 |
-
minute = st.selectbox("Minute:", list(range(0, 60)))
|
314 |
-
|
315 |
-
if selected_freq == "1 week":
|
316 |
-
days_options = ["MON", "TUE", "WED", "THU", "FRI", "SAT", "SUN"]
|
317 |
-
day_of_week = st.selectbox("Select day of the week:", days_options)
|
318 |
-
|
319 |
-
elif selected_freq == "1 month":
|
320 |
-
day_of_month = st.selectbox("Select day of the month:", list(range(1, 32)))
|
321 |
-
|
322 |
-
else:
|
323 |
-
hour, minute = 0, 0
|
324 |
-
|
325 |
-
# Generate the cron expression
|
326 |
-
frequency = generate_cron_expression(selected_freq, hour, minute, day_of_week, day_of_month)
|
327 |
-
|
328 |
-
def generate_human_readable_message(freq, hour=0, minute=0, day_of_week=None, day_of_month=None):
|
329 |
-
"""
|
330 |
-
Generate a human-readable message for the scheduling.
|
331 |
-
"""
|
332 |
-
if freq == "1 minute":
|
333 |
-
return "Job will run every minute."
|
334 |
-
elif freq == "1 hour":
|
335 |
-
return f"Job will run once an hour at minute {minute}."
|
336 |
-
elif freq == "1 day":
|
337 |
-
return f"Job will run daily at {hour:02}:{minute:02}."
|
338 |
-
elif freq == "1 week":
|
339 |
-
if not day_of_week:
|
340 |
-
raise ValueError("Day of week not provided for weekly frequency.")
|
341 |
-
return f"Job will run every {day_of_week} at {hour:02}:{minute:02}."
|
342 |
-
elif freq == "1 month":
|
343 |
-
if not day_of_month:
|
344 |
-
raise ValueError("Day of month not provided for monthly frequency.")
|
345 |
-
return f"Job will run once a month on day {day_of_month} at {hour:02}:{minute:02}."
|
346 |
-
else:
|
347 |
-
raise ValueError("Invalid frequency provided")
|
348 |
-
|
349 |
-
# Generate the human-readable message
|
350 |
-
readable_msg = generate_human_readable_message(selected_freq, hour, minute, day_of_week, day_of_month)
|
351 |
-
|
352 |
-
if frequency:
|
353 |
-
st.success(readable_msg, icon="📅")
|
354 |
-
|
355 |
-
st.subheader("Select a table", divider="grey")
|
356 |
-
|
357 |
-
with st.spinner('Querying Databricks...'):
|
358 |
-
query = "SHOW DATABASES;"
|
359 |
-
result_data = execute_databricks_query(query, cluster_id, formatted_title, databricks_api_key)
|
360 |
-
|
361 |
-
# Extract the databaseName values from the DataFrame
|
362 |
-
database_names = result_data['databaseName'].tolist()
|
363 |
-
|
364 |
-
# Create a dropdown with the database names
|
365 |
-
selected_database = st.selectbox("Select a Database:", database_names, index=None, placeholder="Select a database..")
|
366 |
-
|
367 |
-
if selected_database:
|
368 |
-
with st.spinner('Querying Databricks...'):
|
369 |
-
query = f"SHOW TABLES IN {selected_database};"
|
370 |
-
result_data = execute_databricks_query(query, cluster_id, formatted_title, databricks_api_key)
|
371 |
-
|
372 |
-
# Extract the tableName values from the DataFrame
|
373 |
-
table_names = result_data['tableName'].tolist()
|
374 |
-
|
375 |
-
# Create a dropdown with the database names
|
376 |
-
selected_table = st.selectbox("Select a Table:", table_names, index=None, placeholder="Select a table..")
|
377 |
-
|
378 |
-
if selected_table:
|
379 |
-
with st.spinner('Querying Databricks...'):
|
380 |
-
query = f"SHOW COLUMNS IN {selected_database}.{selected_table};"
|
381 |
-
result_data = execute_databricks_query(query, cluster_id, formatted_title, databricks_api_key)
|
382 |
-
column_names = result_data['col_name'].tolist()
|
383 |
-
|
384 |
-
st.subheader("Map table schema to Labelbox schema", divider="grey")
|
385 |
-
# Your existing code to handle schema mapping...
|
386 |
-
|
387 |
-
# Fetch the first 5 rows of the selected table
|
388 |
-
with st.spinner('Fetching first 5 rows of the selected table...'):
|
389 |
-
query = f"SELECT * FROM {selected_database}.{selected_table} LIMIT 5;"
|
390 |
-
table_sample_data = execute_databricks_query(query, cluster_id, formatted_title, databricks_api_key)
|
391 |
-
|
392 |
-
# Display the sample data in the Streamlit UI
|
393 |
-
st.write(table_sample_data)
|
394 |
-
|
395 |
-
|
396 |
-
# Define two columns for side-by-side selectboxes
|
397 |
-
col1, col2 = st.columns(2)
|
398 |
-
|
399 |
-
with col1:
|
400 |
-
selected_row_data = st.selectbox(
|
401 |
-
"row_data (required):",
|
402 |
-
column_names,
|
403 |
-
index=None,
|
404 |
-
placeholder="Select a column..",
|
405 |
-
help="Select the column that contains the URL/URI bucket location of the data rows you wish to import into Labelbox."
|
406 |
-
)
|
407 |
-
|
408 |
-
with col2:
|
409 |
-
selected_global_key = st.selectbox(
|
410 |
-
"global_key (optional):",
|
411 |
-
column_names,
|
412 |
-
index=None,
|
413 |
-
placeholder="Select a column..",
|
414 |
-
help="Select the column that contains the global key. If not provided, a new key will be generated for you."
|
415 |
-
)
|
416 |
-
|
417 |
-
# Fetch a single row from the selected table
|
418 |
-
query_sample_row = f"SELECT * FROM {selected_database}.{selected_table} LIMIT 1;"
|
419 |
-
result_sample = execute_databricks_query(query_sample_row, cluster_id, formatted_title, databricks_api_key)
|
420 |
-
|
421 |
-
if selected_row_data:
|
422 |
-
# Extract the value from the selected row_data column
|
423 |
-
sample_row_data_value = result_sample[selected_row_data].iloc[0]
|
424 |
-
|
425 |
-
# Validate the extracted value
|
426 |
-
if is_valid_url_or_uri(sample_row_data_value):
|
427 |
-
st.success(f"Sample URI/URL from selected row data column: {sample_row_data_value}", icon="✅")
|
428 |
-
dataset_id = create_new_dataset_labelbox(new_dataset_name) if create_new_dataset else dataset_id
|
429 |
-
# Mode
|
430 |
-
mode = "preview" if is_preview else "production"
|
431 |
-
|
432 |
-
# Databricks instance and API key
|
433 |
-
databricks_instance = formatted_title
|
434 |
-
databricks_api_key = databricks_api_key
|
435 |
-
|
436 |
-
# Dataset ID and New Dataset
|
437 |
-
new_dataset = 1 if create_new_dataset else 0
|
438 |
-
dataset_id = dataset_id
|
439 |
-
|
440 |
-
# Table Path
|
441 |
-
table_path = f"{selected_database}.{selected_table}"
|
442 |
-
# Frequency
|
443 |
-
frequency = frequency
|
444 |
-
|
445 |
-
# Cluster ID and New Cluster
|
446 |
-
new_cluster = 1 if make_cluster else 0
|
447 |
-
cluster_id = cluster_id if not make_cluster else ""
|
448 |
-
|
449 |
-
# Schema Map
|
450 |
-
row_data_input = selected_row_data
|
451 |
-
global_key_input = selected_global_key
|
452 |
-
schema_map_dict = {'row_data': row_data_input}
|
453 |
-
if global_key_input:
|
454 |
-
schema_map_dict['global_key'] = global_key_input
|
455 |
-
|
456 |
-
# Convert the dict to a stringified JSON
|
457 |
-
schema_map_str = json.dumps(schema_map_dict)
|
458 |
-
|
459 |
-
|
460 |
-
data = {
|
461 |
-
"mode": mode,
|
462 |
-
"databricks_instance": databricks_instance,
|
463 |
-
"databricks_api_key": databricks_api_key,
|
464 |
-
"new_dataset": new_dataset,
|
465 |
-
"dataset_id": dataset_id,
|
466 |
-
"table_path": table_path,
|
467 |
-
"labelbox_api_key": labelbox_api_key,
|
468 |
-
"frequency": frequency,
|
469 |
-
"new_cluster": new_cluster,
|
470 |
-
"cluster_id": cluster_id,
|
471 |
-
"schema_map": schema_map_str
|
472 |
-
}
|
473 |
-
|
474 |
-
# Display the constructed data using Streamlit
|
475 |
-
st.json(data)
|
476 |
-
|
477 |
-
if st.button("Deploy Pipeline!", type="primary"):
|
478 |
-
# Ensure all fields are filled out
|
479 |
-
required_fields = [
|
480 |
-
mode, databricks_instance, databricks_api_key, new_dataset, dataset_id,
|
481 |
-
table_path, labelbox_api_key, frequency, new_cluster, cluster_id, schema_map_str
|
482 |
-
]
|
483 |
-
|
484 |
-
|
485 |
-
# Sending a POST request to the Flask app endpoint
|
486 |
-
with st.spinner("Deploying pipeline..."):
|
487 |
-
response = requests.post("http://127.0.0.1:5000/create-databricks-job", json=data)
|
488 |
-
|
489 |
-
# Check if request was successful
|
490 |
-
if response.status_code == 200:
|
491 |
-
# Display the response using Streamlit
|
492 |
-
st.balloons()
|
493 |
-
st.success("Pipeline deployed successfully!", icon="🚀")
|
494 |
-
st.json(response.json())
|
495 |
-
else:
|
496 |
-
st.error(f"Failed to deploy pipeline. Response: {response.text}", icon="🚫")
|
497 |
-
|
498 |
-
else:
|
499 |
-
st.error(f"row_data '{sample_row_data_value}' is not a valid URI or URL. Please select a different column.", icon="🚫")
|
500 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|