import os import re import subprocess import yaml import gradio as gr import requests from huggingface_hub import HfApi, get_token CMD = ["python" ,"run_job.py"] with open("README.md") as f: METADATA = yaml.safe_load(f.read().split("---\n")[1]) TITLE = METADATA["title"] EMOJI = METADATA["emoji"] spaceId = os.environ.get("SPACE_ID") or "lhoestq/run-duckdb" try: process = subprocess.run(CMD + ["--help"], stdout=subprocess.PIPE, stderr=subprocess.PIPE) HELP = not process.returncode and (process.stdout or process.stderr).decode() except Exception: HELP = False DRY_RUN = bool(HELP) and bool(m :=re.search("--dry(-|_)run", HELP)) and m.group(0) def parse_log(line: str, pbars: dict[str, float]): if (percent_match := re.search("\\d+(?:\\.\\d+)?%", line)) and any(c in line.split("%")[1][:10] for c in "|█▌"): [pbars.pop(desc) for desc, percent in pbars.items() if percent == 1.] percent = float(percent_match.group(0)[:-1]) / 100 desc = line[:percent_match.start()].strip() or "Progress" pbars[desc] = percent yield "" else: yield line def dry_run(src, config, split, dst, query): if not all([src, config, split, dst, query]): raise gr.Error("Please fill source, destination and query.") args = ["--src", src, "--config", config, "--split", split, "--dst", dst, "--query", query, DRY_RUN] cmd = CMD + args logs = "Job:\n\n```bash\n" + " ".join('"' + arg.replace('"', '\"""') + '"' if " " in arg else arg for arg in cmd) + "\n```\nOutput:\n\n" yield {output_markdown: logs, progress_labels: gr.Label(visible=False)} process = subprocess.Popen(cmd, stdout=subprocess.PIPE) for line in iter(process.stdout.readline, b""): logs += line.decode() yield {output_markdown: logs} def run(src, config, split, dst, query, oauth_token: gr.OAuthToken | None, profile: gr.OAuthProfile | None): if not all([src, config, split, dst, query]): raise gr.Error("Please fill source, destination and query.") if oauth_token and profile: token = oauth_token.token username = profile.username elif (token := get_token()): username = HfApi().whoami(token=token)["name"] else: raise gr.Error("Please log in to run the job.") args = ["--src", src, "--config", config, "--split", split, "--dst", dst, "--query", query] cmd = CMD + args logs = "Job:\n\n```bash\n" + " ".join('"' + arg.replace('"', '\"""') + '"' if " " in arg else arg for arg in cmd) + "\n```\nOutput:\n\n" pbars = {} yield {output_markdown: logs, progress_labels: gr.Label(pbars, visible=bool(pbars))} resp = requests.post( f"https://huggingface.co/api/jobs/{username}", json={ "spaceId": spaceId, "arguments": args, "command": CMD, "environment": {}, "flavor": "cpu-basic" }, headers={"Authorization": f"Bearer {token}"} ) if resp.status_code != 200: logs += resp.text pbars = {"Finished with an error ❌": 1.0} else: job_id = resp.json()["metadata"]["job_id"] resp = requests.get( f"https://huggingface.co/api/jobs/{username}/{job_id}/logs-stream", headers={"Authorization": f"Bearer {token}"} ) for line in iter(resp.raw.readline, b""): logs += parse_log(line.decode(), pbars=pbars) yield {output_markdown: logs, progress_labels: gr.Label(pbars, visible=bool(pbars))} pbars = {"Finished" + (" ✅" if process.returncode == 0 else " with an error ❌"): 1.0} yield {output_markdown: logs, progress_labels: gr.Label(pbars, visible=bool(pbars))} READ_FUNCTIONS = ("pl.read_parquet", "pl.read_csv", "pl.read_json") NUM_TRENDING_DATASETS = 10 with gr.Blocks() as demo: with gr.Row(): with gr.Column(scale=10): gr.Markdown(f"# {TITLE} {EMOJI}") with gr.Column(): gr.LoginButton() with gr.Row(): with gr.Column(scale=10): with gr.Row(): loading_codes_json = gr.JSON([], visible=False) dataset_dropdown = gr.Dropdown(label="Source Dataset", allow_custom_value=True, scale=10) subset_dropdown = gr.Dropdown(info="Subset", allow_custom_value=True, show_label=False, visible=False) split_dropdown = gr.Dropdown(info="Split", allow_custom_value=True, show_label=False, visible=False) with gr.Column(min_width=60): gr.HTML("