File size: 3,352 Bytes
d6b3b9f
01942d8
d6b3b9f
 
 
 
 
 
01942d8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d6b3b9f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
01942d8
d6b3b9f
01942d8
 
 
 
 
d6b3b9f
 
01942d8
d6b3b9f
01942d8
 
 
 
 
d6b3b9f
 
01942d8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d6b3b9f
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import gradio as gr
import datasets


theme = gr.themes.Soft(
    primary_hue="green",
)


def check_dataset(dataset_id, dataset_config="default", dataset_split="test"):
    try:
        configs = datasets.get_dataset_config_names(dataset_id)
    except Exception:
        # Dataset may not exist
        return None, dataset_config, dataset_split

    if dataset_config not in configs:
        # Need to choose dataset subset (config)
        return dataset_id, configs, dataset_split

    ds = datasets.load_dataset(dataset_id, dataset_config)

    if isinstance(ds, datasets.DatasetDict):
        # Need to choose dataset split
        if dataset_split not in ds.keys():
            return dataset_id, None, list(ds.keys())
    elif not isinstance(ds, datasets.Dataset):
        # Unknown type
        return dataset_id, None, None
    return dataset_id, dataset_config, dataset_split


def try_submit(dataset_id, dataset_config, dataset_split):
    # Validate dataset
    d_id, config, split = check_dataset(dataset_id=dataset_id, dataset_config=dataset_config, dataset_split=dataset_split)

    if d_id is None:
        gr.Warning(f'Dataset "{dataset_id}" is not accessible. Please set your HF_TOKEN if it is a private dataset.')
    if isinstance(config, list):
        gr.Warning(f'Dataset "{dataset_id}" does have "{dataset_config}" config. Please choose a valid config.')
        config = gr.Dropdown.update(choices=config, value=config[0])
    if isinstance(split, list):
        gr.Warning(f'Dataset "{dataset_id}" does have "{dataset_split}" split. Please choose a valid split.')
        split = gr.Dropdown.update(choices=split, value=split[0])

    return config, split

with gr.Blocks(theme=theme) as iface:
    with gr.Row():
        with gr.Column():
            model_id_input = gr.Textbox(
                label="Hugging Face model id",
                placeholder="cardiffnlp/twitter-roberta-base-sentiment-latest",
            )

            model_type = gr.Dropdown(
                label="Hugging Face model type",
                choices=[
                    ("Auto-detect", 0),
                    ("Text Classification", 1),
                ],
                value=0,
            )

        with gr.Column():
            dataset_id_input = gr.Textbox(
                label="Hugging Face dataset id",
                placeholder="tweet_eval",
            )

            dataset_config_input = gr.Dropdown(
                label="Hugging Face dataset subset",
                choices=[
                    "default",
                ],
                allow_custom_value=True,
                value="default",
            )

            dataset_split_input = gr.Dropdown(
                label="Hugging Face dataset split",
                choices=[
                    "test",
                ],
                allow_custom_value=True,
                value="test",
            )

    with gr.Row():
        run_btn = gr.Button("Validate and submit", variant="primary")
        run_btn.click(
            try_submit,
            inputs=[
                dataset_id_input,
                dataset_config_input,
                dataset_split_input
            ],
            outputs=[
                dataset_config_input,
                dataset_split_input
            ],
        )

iface.queue(max_size=20)
iface.launch()