Commit
•
14f85b1
1
Parent(s):
ab58a29
feat: slight formatting updated
Browse files
src/distilabel_dataset_generator/apps/eval.py
CHANGED
@@ -73,7 +73,7 @@ def define_evaluation_aspects(task_type: str):
|
|
73 |
interactive=True,
|
74 |
)
|
75 |
else:
|
76 |
-
return gr.Dropdown(interactive=False)
|
77 |
|
78 |
|
79 |
def evaluate_instruction(df: pd.DataFrame, aspects: list[str], instruction_column: str):
|
@@ -176,12 +176,10 @@ def push_to_hub(
|
|
176 |
structured_output,
|
177 |
)
|
178 |
new_repo_id = f"{org_name}/{repo_name}"
|
179 |
-
print(df)
|
180 |
|
181 |
|
182 |
with gr.Blocks() as app:
|
183 |
-
gr.Markdown("## Select your input dataset")
|
184 |
-
gr.HTML("<hr>")
|
185 |
with gr.Row():
|
186 |
with gr.Column(scale=1):
|
187 |
search_in = HuggingfaceHubSearch(
|
@@ -190,12 +188,12 @@ with gr.Blocks() as app:
|
|
190 |
search_type="dataset",
|
191 |
sumbit_on_select=True,
|
192 |
)
|
193 |
-
load_btn = gr.Button("Load
|
194 |
with gr.Column(scale=3):
|
195 |
search_out = gr.HTML(label="Dataset Preview")
|
196 |
|
197 |
-
gr.Markdown("## Configure your task")
|
198 |
gr.HTML("<hr>")
|
|
|
199 |
with gr.Row():
|
200 |
with gr.Column(scale=1):
|
201 |
eval_type = gr.Dropdown(
|
@@ -251,8 +249,8 @@ with gr.Blocks() as app:
|
|
251 |
with gr.Column(scale=3):
|
252 |
dataframe = gr.Dataframe()
|
253 |
|
254 |
-
gr.Markdown("## Generate your dataset")
|
255 |
gr.HTML("<hr>")
|
|
|
256 |
with gr.Row():
|
257 |
with gr.Column(scale=1):
|
258 |
org_name = get_org_dropdown()
|
|
|
73 |
interactive=True,
|
74 |
)
|
75 |
else:
|
76 |
+
return gr.Dropdown(interactive=False, visible=False)
|
77 |
|
78 |
|
79 |
def evaluate_instruction(df: pd.DataFrame, aspects: list[str], instruction_column: str):
|
|
|
176 |
structured_output,
|
177 |
)
|
178 |
new_repo_id = f"{org_name}/{repo_name}"
|
|
|
179 |
|
180 |
|
181 |
with gr.Blocks() as app:
|
182 |
+
gr.Markdown("## 1. Select your input dataset")
|
|
|
183 |
with gr.Row():
|
184 |
with gr.Column(scale=1):
|
185 |
search_in = HuggingfaceHubSearch(
|
|
|
188 |
search_type="dataset",
|
189 |
sumbit_on_select=True,
|
190 |
)
|
191 |
+
load_btn = gr.Button("Load dataset")
|
192 |
with gr.Column(scale=3):
|
193 |
search_out = gr.HTML(label="Dataset Preview")
|
194 |
|
|
|
195 |
gr.HTML("<hr>")
|
196 |
+
gr.Markdown("## 2. Configure your task")
|
197 |
with gr.Row():
|
198 |
with gr.Column(scale=1):
|
199 |
eval_type = gr.Dropdown(
|
|
|
249 |
with gr.Column(scale=3):
|
250 |
dataframe = gr.Dataframe()
|
251 |
|
|
|
252 |
gr.HTML("<hr>")
|
253 |
+
gr.Markdown("## 3. Generate your dataset")
|
254 |
with gr.Row():
|
255 |
with gr.Column(scale=1):
|
256 |
org_name = get_org_dropdown()
|
src/distilabel_dataset_generator/apps/sft.py
CHANGED
@@ -346,8 +346,7 @@ def push_dataset_to_argilla(
|
|
346 |
|
347 |
with gr.Blocks(css=_LOGGED_OUT_CSS) as app:
|
348 |
with gr.Column() as main_ui:
|
349 |
-
gr.Markdown("## Describe the dataset you want")
|
350 |
-
gr.HTML("<hr>")
|
351 |
with gr.Row():
|
352 |
with gr.Column(scale=1):
|
353 |
dataset_description = gr.Textbox(
|
@@ -360,19 +359,19 @@ with gr.Blocks(css=_LOGGED_OUT_CSS) as app:
|
|
360 |
cache_examples=False,
|
361 |
label="Example descriptions",
|
362 |
)
|
363 |
-
|
364 |
-
|
365 |
-
placeholder="You are a helpful assistant.",
|
366 |
-
visible=False,
|
367 |
-
)
|
368 |
-
load_btn = gr.Button("Load Dataset")
|
369 |
with gr.Column(scale=3):
|
370 |
pass
|
371 |
|
372 |
-
gr.
|
373 |
-
gr.
|
374 |
with gr.Row():
|
375 |
with gr.Column(scale=1):
|
|
|
|
|
|
|
|
|
376 |
num_turns = gr.Number(
|
377 |
value=1,
|
378 |
label="Number of turns in the conversation",
|
@@ -386,8 +385,8 @@ with gr.Blocks(css=_LOGGED_OUT_CSS) as app:
|
|
386 |
with gr.Column(scale=3):
|
387 |
dataframe = gr.Dataframe()
|
388 |
|
389 |
-
gr.
|
390 |
-
gr.
|
391 |
with gr.Row():
|
392 |
with gr.Column(scale=1):
|
393 |
org_name = get_org_dropdown()
|
|
|
346 |
|
347 |
with gr.Blocks(css=_LOGGED_OUT_CSS) as app:
|
348 |
with gr.Column() as main_ui:
|
349 |
+
gr.Markdown(value="## 1. Describe the dataset you want")
|
|
|
350 |
with gr.Row():
|
351 |
with gr.Column(scale=1):
|
352 |
dataset_description = gr.Textbox(
|
|
|
359 |
cache_examples=False,
|
360 |
label="Example descriptions",
|
361 |
)
|
362 |
+
|
363 |
+
load_btn = gr.Button("Load dataset")
|
|
|
|
|
|
|
|
|
364 |
with gr.Column(scale=3):
|
365 |
pass
|
366 |
|
367 |
+
gr.HTML(value="<hr>")
|
368 |
+
gr.Markdown(value="## 2. Configure your task")
|
369 |
with gr.Row():
|
370 |
with gr.Column(scale=1):
|
371 |
+
system_prompt = gr.Textbox(
|
372 |
+
label="System prompt",
|
373 |
+
placeholder="You are a helpful assistant.",
|
374 |
+
)
|
375 |
num_turns = gr.Number(
|
376 |
value=1,
|
377 |
label="Number of turns in the conversation",
|
|
|
385 |
with gr.Column(scale=3):
|
386 |
dataframe = gr.Dataframe()
|
387 |
|
388 |
+
gr.HTML(value="<hr>")
|
389 |
+
gr.Markdown(value="## 3. Generate your dataset")
|
390 |
with gr.Row():
|
391 |
with gr.Column(scale=1):
|
392 |
org_name = get_org_dropdown()
|
src/distilabel_dataset_generator/apps/textcat.py
CHANGED
@@ -355,8 +355,7 @@ def update_max_num_labels(labels):
|
|
355 |
|
356 |
with gr.Blocks(css=_LOGGED_OUT_CSS) as app:
|
357 |
with gr.Column() as main_ui:
|
358 |
-
gr.Markdown("## Describe the dataset you want")
|
359 |
-
gr.HTML("<hr>")
|
360 |
with gr.Row():
|
361 |
with gr.Column(scale=1):
|
362 |
dataset_description = gr.Textbox(
|
@@ -369,19 +368,19 @@ with gr.Blocks(css=_LOGGED_OUT_CSS) as app:
|
|
369 |
cache_examples=False,
|
370 |
label="Example descriptions",
|
371 |
)
|
372 |
-
|
373 |
-
label="System prompt",
|
374 |
-
placeholder="You are a helpful assistant.",
|
375 |
-
visible=False,
|
376 |
-
)
|
377 |
-
load_btn = gr.Button("Load Dataset")
|
378 |
with gr.Column(scale=3):
|
379 |
pass
|
380 |
|
381 |
-
gr.Markdown("## Configure your task")
|
382 |
gr.HTML("<hr>")
|
|
|
383 |
with gr.Row():
|
384 |
with gr.Column(scale=1):
|
|
|
|
|
|
|
|
|
|
|
385 |
difficulty = gr.Dropdown(
|
386 |
choices=[
|
387 |
("High School", "high school"),
|
@@ -429,8 +428,8 @@ with gr.Blocks(css=_LOGGED_OUT_CSS) as app:
|
|
429 |
with gr.Column(scale=3):
|
430 |
dataframe = gr.Dataframe()
|
431 |
|
432 |
-
gr.Markdown("## Generate your dataset")
|
433 |
gr.HTML("<hr>")
|
|
|
434 |
with gr.Row():
|
435 |
with gr.Column(scale=1):
|
436 |
org_name = get_org_dropdown()
|
|
|
355 |
|
356 |
with gr.Blocks(css=_LOGGED_OUT_CSS) as app:
|
357 |
with gr.Column() as main_ui:
|
358 |
+
gr.Markdown("## 1. Describe the dataset you want")
|
|
|
359 |
with gr.Row():
|
360 |
with gr.Column(scale=1):
|
361 |
dataset_description = gr.Textbox(
|
|
|
368 |
cache_examples=False,
|
369 |
label="Example descriptions",
|
370 |
)
|
371 |
+
load_btn = gr.Button("Load dataset")
|
|
|
|
|
|
|
|
|
|
|
372 |
with gr.Column(scale=3):
|
373 |
pass
|
374 |
|
|
|
375 |
gr.HTML("<hr>")
|
376 |
+
gr.Markdown("## 2. Configure your task")
|
377 |
with gr.Row():
|
378 |
with gr.Column(scale=1):
|
379 |
+
system_prompt = gr.Textbox(
|
380 |
+
label="System prompt",
|
381 |
+
placeholder="You are a helpful assistant.",
|
382 |
+
visible=True,
|
383 |
+
)
|
384 |
difficulty = gr.Dropdown(
|
385 |
choices=[
|
386 |
("High School", "high school"),
|
|
|
428 |
with gr.Column(scale=3):
|
429 |
dataframe = gr.Dataframe()
|
430 |
|
|
|
431 |
gr.HTML("<hr>")
|
432 |
+
gr.Markdown("## 3. Generate your dataset")
|
433 |
with gr.Row():
|
434 |
with gr.Column(scale=1):
|
435 |
org_name = get_org_dropdown()
|
src/distilabel_dataset_generator/utils.py
CHANGED
@@ -45,26 +45,31 @@ def get_duplicate_button():
|
|
45 |
|
46 |
|
47 |
def list_orgs(oauth_token: OAuthToken = None):
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
|
|
|
|
|
|
|
|
|
|
63 |
return organisations
|
64 |
|
65 |
|
66 |
def get_org_dropdown(oauth_token: OAuthToken = None):
|
67 |
-
if oauth_token:
|
68 |
orgs = list_orgs(oauth_token)
|
69 |
else:
|
70 |
orgs = []
|
|
|
45 |
|
46 |
|
47 |
def list_orgs(oauth_token: OAuthToken = None):
|
48 |
+
try:
|
49 |
+
if oauth_token is None:
|
50 |
+
return []
|
51 |
+
data = whoami(oauth_token.token)
|
52 |
+
if data["auth"]["type"] == "oauth":
|
53 |
+
organisations = [data["name"]] + [org["name"] for org in data["orgs"]]
|
54 |
+
elif data["auth"]["type"] == "access_token":
|
55 |
+
organisations = [org["name"] for org in data["orgs"]]
|
56 |
+
else:
|
57 |
+
organisations = [
|
58 |
+
entry["entity"]["name"]
|
59 |
+
for entry in data["auth"]["accessToken"]["fineGrained"]["scoped"]
|
60 |
+
if "repo.write" in entry["permissions"]
|
61 |
+
]
|
62 |
+
organisations = [org for org in organisations if org != data["name"]]
|
63 |
+
organisations = [data["name"]] + organisations
|
64 |
+
except Exception as e:
|
65 |
+
raise gr.Error(
|
66 |
+
f"Failed to get organizations: {e}. See if you are logged and connected: https://huggingface.co/settings/connected-applications."
|
67 |
+
)
|
68 |
return organisations
|
69 |
|
70 |
|
71 |
def get_org_dropdown(oauth_token: OAuthToken = None):
|
72 |
+
if oauth_token is not None:
|
73 |
orgs = list_orgs(oauth_token)
|
74 |
else:
|
75 |
orgs = []
|