Spaces:
Running
Running
File size: 3,329 Bytes
8a9db0e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
import gradio as gr
import requests
from huggingface_hub import HfApi
session = requests.Session()
css = """
@media (prefers-color-scheme: dark) {
.transparent-dropdown, .transparent-dropdown .container .wrap {
background: var(--bg-dark);
}
}
@media (prefers-color-scheme: light) {
.transparent-dropdown, .transparent-dropdown .container .wrap {
background: var(--bg);
}
}
"""
with gr.Blocks(css=css) as demo:
with gr.Row():
with gr.Column(scale=4):
with gr.Group():
dataset_dropdown = gr.Dropdown(label="Dataset", allow_custom_value=True, interactive=True)
with gr.Row():
subset_dropdown = gr.Dropdown(info="Subset", show_label=False, visible=False, elem_classes="transparent-dropdown")
split_dropdown = gr.Dropdown(info="Split", show_label=False, visible=False, elem_classes="transparent-dropdown")
gr.LoginButton()
loading_codes_json = gr.JSON(visible=False)
dataset_subset_split_textbox = gr.Textbox(visible=False)
dataframe = gr.DataFrame()
@demo.load(outputs=dataset_dropdown)
def fetch_datasets(request: gr.Request, oauth_token: gr.OAuthToken | None):
api = HfApi(token=oauth_token.token if oauth_token else None)
datasets = list(api.list_datasets(limit=3, sort="trendingScore", direction=-1, filter=["format:parquet"]))
if oauth_token and (user := api.whoami().get("user")):
datasets += list(api.list_datasets(limit=3, sort="trendingScore", direction=-1, filter=["format:parquet"], author=user))
dataset = request.query_params.get("dataset") or datasets[0].id
return {dataset_dropdown: gr.Dropdown(choices=[dataset.id for dataset in datasets], value=dataset)}
@dataset_dropdown.change(inputs=dataset_dropdown, outputs=loading_codes_json)
def fetch_read_parquet_loading(dataset: str):
if "/" not in dataset.strip().strip("/"):
return []
resp = session.get(f"https://datasets-server.huggingface.co/compatible-libraries?dataset={dataset}", timeout=3).json()
return ([lib["loading_codes"] for lib in resp.get("libraries", []) if lib["function"] == "dd.read_parquet"] or [[]])[0] or []
@loading_codes_json.change(inputs=loading_codes_json, outputs=[subset_dropdown, split_dropdown])
def show_subset_dropdown(loading_codes: list[dict]):
subsets = [loading_code["config_name"] for loading_code in loading_codes]
subset = (subsets or [""])[0]
splits = ([list(loading_code["arguments"]["splits"]) for loading_code in loading_codes if loading_code["config_name"] == subset] or [[]])[0]
split = (splits or [""])[0]
return gr.Dropdown(subsets, value=subset, visible=len(subsets) > 1), gr.Dropdown(splits, value=split, visible=len(splits) > 1)
@subset_dropdown.change(inputs=[loading_codes_json, subset_dropdown], outputs=split_dropdown)
def show_split_dropdown(loading_codes: list[dict], subset: str):
splits = ([list(loading_code["arguments"]["splits"]) for loading_code in loading_codes if loading_code["config_name"] == subset] or [[]])[0]
split = (splits or [""])[0]
return gr.Dropdown(splits, value=split, visible=len(splits) > 1)
if __name__ == "__main__":
demo.launch()
|