Commit
•
7314f90
1
Parent(s):
7835b60
feat: add login css
Browse files
src/distilabel_dataset_generator/apps/sft.py
CHANGED
@@ -33,7 +33,10 @@ from src.distilabel_dataset_generator.pipelines.sft import (
|
|
33 |
get_response_generator,
|
34 |
)
|
35 |
from src.distilabel_dataset_generator.utils import (
|
|
|
|
|
36 |
get_org_dropdown,
|
|
|
37 |
)
|
38 |
|
39 |
|
@@ -341,77 +344,78 @@ def push_dataset_to_argilla(
|
|
341 |
return ""
|
342 |
|
343 |
|
344 |
-
with gr.Blocks() as app:
|
345 |
-
gr.
|
346 |
-
|
347 |
-
|
348 |
-
with gr.
|
349 |
-
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
examples=
|
355 |
-
|
356 |
-
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
|
361 |
-
|
362 |
-
|
363 |
-
|
364 |
-
|
365 |
-
|
366 |
-
|
|
|
367 |
|
368 |
-
|
369 |
-
|
370 |
-
|
371 |
-
|
372 |
-
|
373 |
-
|
374 |
-
|
375 |
-
|
376 |
-
|
377 |
-
|
378 |
-
|
379 |
-
|
380 |
-
|
381 |
-
|
382 |
-
|
383 |
-
|
384 |
|
385 |
-
|
386 |
-
|
387 |
-
|
388 |
-
|
389 |
-
|
390 |
-
|
391 |
-
|
392 |
-
|
393 |
-
|
394 |
-
|
395 |
-
|
396 |
-
|
397 |
-
|
398 |
-
|
399 |
-
|
400 |
-
|
401 |
-
|
402 |
-
|
403 |
-
|
404 |
-
|
405 |
-
|
406 |
-
|
407 |
-
|
408 |
-
|
409 |
-
|
410 |
-
|
411 |
|
412 |
-
|
413 |
-
|
414 |
-
|
415 |
|
416 |
gr.on(
|
417 |
triggers=[load_btn.click, btn_apply_to_sample_dataset.click],
|
@@ -457,4 +461,5 @@ with gr.Blocks() as app:
|
|
457 |
inputs=[org_name, repo_name],
|
458 |
outputs=[success_message],
|
459 |
)
|
|
|
460 |
app.load(fn=get_org_dropdown, outputs=[org_name])
|
|
|
33 |
get_response_generator,
|
34 |
)
|
35 |
from src.distilabel_dataset_generator.utils import (
|
36 |
+
_LOGGED_OUT_CSS,
|
37 |
+
get_argilla_client,
|
38 |
get_org_dropdown,
|
39 |
+
swap_visibilty,
|
40 |
)
|
41 |
|
42 |
|
|
|
344 |
return ""
|
345 |
|
346 |
|
347 |
+
with gr.Blocks(css=_LOGGED_OUT_CSS) as app:
|
348 |
+
with gr.Column() as main_ui:
|
349 |
+
gr.Markdown("## Describe the dataset you want")
|
350 |
+
gr.HTML("<hr>")
|
351 |
+
with gr.Row():
|
352 |
+
with gr.Column(scale=1):
|
353 |
+
dataset_description = gr.Textbox(
|
354 |
+
label="Dataset description",
|
355 |
+
placeholder="Give a precise description of your desired dataset.",
|
356 |
+
)
|
357 |
+
examples = gr.Examples(
|
358 |
+
examples=DEFAULT_DATASET_DESCRIPTIONS,
|
359 |
+
inputs=[dataset_description],
|
360 |
+
cache_examples=False,
|
361 |
+
label="Example descriptions",
|
362 |
+
)
|
363 |
+
system_prompt = gr.Textbox(
|
364 |
+
label="System prompt",
|
365 |
+
placeholder="You are a helpful assistant.",
|
366 |
+
visible=False,
|
367 |
+
)
|
368 |
+
load_btn = gr.Button("Load Dataset")
|
369 |
+
with gr.Column(scale=3):
|
370 |
+
pass
|
371 |
|
372 |
+
gr.Markdown("## Configure your task")
|
373 |
+
gr.HTML("<hr>")
|
374 |
+
with gr.Row():
|
375 |
+
with gr.Column(scale=1):
|
376 |
+
num_turns = gr.Number(
|
377 |
+
value=1,
|
378 |
+
label="Number of turns in the conversation",
|
379 |
+
minimum=1,
|
380 |
+
maximum=4,
|
381 |
+
step=1,
|
382 |
+
interactive=True,
|
383 |
+
info="Choose between 1 (single turn with 'instruction-response' columns) and 2-4 (multi-turn conversation with a 'messages' column).",
|
384 |
+
)
|
385 |
+
btn_apply_to_sample_dataset = gr.Button("Refresh dataset")
|
386 |
+
with gr.Column(scale=3):
|
387 |
+
dataframe = gr.Dataframe()
|
388 |
|
389 |
+
gr.Markdown("## Generate your dataset")
|
390 |
+
gr.HTML("<hr>")
|
391 |
+
with gr.Row():
|
392 |
+
with gr.Column(scale=1):
|
393 |
+
org_name = get_org_dropdown()
|
394 |
+
repo_name = gr.Textbox(
|
395 |
+
label="Repo name",
|
396 |
+
placeholder="dataset_name",
|
397 |
+
value=f"my-distiset-{str(uuid.uuid4())[:8]}",
|
398 |
+
interactive=True,
|
399 |
+
)
|
400 |
+
n_rows = gr.Number(
|
401 |
+
label="Number of rows",
|
402 |
+
value=10,
|
403 |
+
interactive=True,
|
404 |
+
scale=1,
|
405 |
+
)
|
406 |
+
private = gr.Checkbox(
|
407 |
+
label="Private dataset",
|
408 |
+
value=False,
|
409 |
+
interactive=True,
|
410 |
+
scale=1,
|
411 |
+
)
|
412 |
+
btn_push_to_hub = gr.Button("Push to Hub", variant="primary", scale=2)
|
413 |
+
with gr.Column(scale=3):
|
414 |
+
success_message = gr.Markdown()
|
415 |
|
416 |
+
pipeline_code = get_pipeline_code_ui(
|
417 |
+
generate_pipeline_code(system_prompt.value, num_turns.value, n_rows.value)
|
418 |
+
)
|
419 |
|
420 |
gr.on(
|
421 |
triggers=[load_btn.click, btn_apply_to_sample_dataset.click],
|
|
|
461 |
inputs=[org_name, repo_name],
|
462 |
outputs=[success_message],
|
463 |
)
|
464 |
+
app.load(fn=swap_visibilty, outputs=main_ui)
|
465 |
app.load(fn=get_org_dropdown, outputs=[org_name])
|
src/distilabel_dataset_generator/apps/textcat.py
CHANGED
@@ -33,8 +33,11 @@ from src.distilabel_dataset_generator.pipelines.textcat import (
|
|
33 |
get_textcat_generator,
|
34 |
)
|
35 |
from src.distilabel_dataset_generator.utils import (
|
|
|
|
|
36 |
get_org_dropdown,
|
37 |
get_preprocess_labels,
|
|
|
38 |
)
|
39 |
|
40 |
|
@@ -350,118 +353,119 @@ def update_max_num_labels(labels):
|
|
350 |
return gr.update(maximum=len(labels) if labels else 1)
|
351 |
|
352 |
|
353 |
-
with gr.Blocks() as app:
|
354 |
-
gr.
|
355 |
-
|
356 |
-
|
357 |
-
with gr.
|
358 |
-
|
359 |
-
|
360 |
-
|
361 |
-
|
362 |
-
|
363 |
-
examples=
|
364 |
-
|
365 |
-
|
366 |
-
|
367 |
-
|
368 |
-
|
369 |
-
|
370 |
-
|
371 |
-
|
372 |
-
|
373 |
-
|
374 |
-
|
375 |
-
|
376 |
-
|
377 |
-
|
378 |
-
|
379 |
-
|
380 |
-
with gr.
|
381 |
-
|
382 |
-
|
383 |
-
|
384 |
-
|
385 |
-
|
386 |
-
|
387 |
-
|
388 |
-
|
389 |
-
|
390 |
-
|
391 |
-
|
392 |
-
|
393 |
-
|
394 |
-
|
395 |
-
|
396 |
-
|
397 |
-
|
398 |
-
|
399 |
-
|
400 |
-
|
401 |
-
|
402 |
-
|
403 |
-
|
404 |
-
|
405 |
-
|
406 |
-
|
407 |
-
|
408 |
-
|
409 |
-
|
410 |
-
|
411 |
-
|
412 |
-
|
413 |
-
|
414 |
-
|
415 |
-
|
416 |
-
|
417 |
-
|
418 |
-
|
419 |
-
|
420 |
-
|
421 |
-
|
422 |
-
|
423 |
-
|
424 |
-
|
425 |
-
|
426 |
-
|
427 |
-
|
428 |
-
|
429 |
-
|
430 |
-
|
431 |
-
with gr.
|
432 |
-
|
433 |
-
|
434 |
-
|
435 |
-
|
436 |
-
|
437 |
-
|
438 |
-
|
439 |
-
|
440 |
-
|
441 |
-
|
442 |
-
|
443 |
-
|
444 |
-
|
445 |
-
|
446 |
-
|
447 |
-
|
448 |
-
|
449 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
450 |
)
|
451 |
-
btn_push_to_hub = gr.Button("Push to Hub", variant="primary", scale=2)
|
452 |
-
with gr.Column(scale=3):
|
453 |
-
success_message = gr.Markdown(visible=True)
|
454 |
-
|
455 |
-
pipeline_code = get_pipeline_code_ui(
|
456 |
-
generate_pipeline_code(
|
457 |
-
system_prompt.value,
|
458 |
-
difficulty=difficulty.value,
|
459 |
-
clarity=clarity.value,
|
460 |
-
labels=labels.value,
|
461 |
-
num_labels=num_labels.value,
|
462 |
-
num_rows=n_rows.value,
|
463 |
)
|
464 |
-
)
|
465 |
|
466 |
gr.on(
|
467 |
triggers=[load_btn.click, btn_apply_to_sample_dataset.click],
|
@@ -518,5 +522,5 @@ with gr.Blocks() as app:
|
|
518 |
inputs=[org_name, repo_name],
|
519 |
outputs=[success_message],
|
520 |
)
|
521 |
-
|
522 |
app.load(fn=get_org_dropdown, outputs=[org_name])
|
|
|
33 |
get_textcat_generator,
|
34 |
)
|
35 |
from src.distilabel_dataset_generator.utils import (
|
36 |
+
_LOGGED_OUT_CSS,
|
37 |
+
get_argilla_client,
|
38 |
get_org_dropdown,
|
39 |
get_preprocess_labels,
|
40 |
+
swap_visibilty,
|
41 |
)
|
42 |
|
43 |
|
|
|
353 |
return gr.update(maximum=len(labels) if labels else 1)
|
354 |
|
355 |
|
356 |
+
with gr.Blocks(css=_LOGGED_OUT_CSS) as app:
|
357 |
+
with gr.Column() as main_ui:
|
358 |
+
gr.Markdown("## Describe the dataset you want")
|
359 |
+
gr.HTML("<hr>")
|
360 |
+
with gr.Row():
|
361 |
+
with gr.Column(scale=1):
|
362 |
+
dataset_description = gr.Textbox(
|
363 |
+
label="Dataset description",
|
364 |
+
placeholder="Give a precise description of your desired dataset.",
|
365 |
+
)
|
366 |
+
examples = gr.Examples(
|
367 |
+
examples=DEFAULT_DATASET_DESCRIPTIONS,
|
368 |
+
inputs=[dataset_description],
|
369 |
+
cache_examples=False,
|
370 |
+
label="Example descriptions",
|
371 |
+
)
|
372 |
+
system_prompt = gr.Textbox(
|
373 |
+
label="System prompt",
|
374 |
+
placeholder="You are a helpful assistant.",
|
375 |
+
visible=False,
|
376 |
+
)
|
377 |
+
load_btn = gr.Button("Load Dataset")
|
378 |
+
with gr.Column(scale=3):
|
379 |
+
pass
|
380 |
+
|
381 |
+
gr.Markdown("## Configure your task")
|
382 |
+
gr.HTML("<hr>")
|
383 |
+
with gr.Row():
|
384 |
+
with gr.Column(scale=1):
|
385 |
+
difficulty = gr.Dropdown(
|
386 |
+
choices=[
|
387 |
+
("High School", "high school"),
|
388 |
+
("College", "college"),
|
389 |
+
("PhD", "PhD"),
|
390 |
+
("Mixed", "mixed"),
|
391 |
+
],
|
392 |
+
value="mixed",
|
393 |
+
label="Difficulty",
|
394 |
+
info="Select the comprehension level for the text. Ensure it matches the task context.",
|
395 |
+
interactive=True,
|
396 |
+
)
|
397 |
+
clarity = gr.Dropdown(
|
398 |
+
choices=[
|
399 |
+
("Clear", "clear"),
|
400 |
+
(
|
401 |
+
"Understandable",
|
402 |
+
"understandable with some effort",
|
403 |
+
),
|
404 |
+
("Ambiguous", "ambiguous"),
|
405 |
+
("Mixed", "mixed"),
|
406 |
+
],
|
407 |
+
value="mixed",
|
408 |
+
label="Clarity",
|
409 |
+
info="Set how easily the correct label or labels can be identified.",
|
410 |
+
interactive=True,
|
411 |
+
)
|
412 |
+
labels = gr.Dropdown(
|
413 |
+
choices=[],
|
414 |
+
allow_custom_value=True,
|
415 |
+
interactive=True,
|
416 |
+
label="Labels",
|
417 |
+
multiselect=True,
|
418 |
+
info="Add the labels to classify the text.",
|
419 |
+
)
|
420 |
+
num_labels = gr.Number(
|
421 |
+
label="Number of labels per text",
|
422 |
+
value=1,
|
423 |
+
minimum=1,
|
424 |
+
maximum=10,
|
425 |
+
info="Select 1 for single-label and >1 for multi-label.",
|
426 |
+
interactive=True,
|
427 |
+
)
|
428 |
+
btn_apply_to_sample_dataset = gr.Button("Refresh dataset")
|
429 |
+
with gr.Column(scale=3):
|
430 |
+
dataframe = gr.Dataframe()
|
431 |
+
|
432 |
+
gr.Markdown("## Generate your dataset")
|
433 |
+
gr.HTML("<hr>")
|
434 |
+
with gr.Row():
|
435 |
+
with gr.Column(scale=1):
|
436 |
+
org_name = get_org_dropdown()
|
437 |
+
repo_name = gr.Textbox(
|
438 |
+
label="Repo name",
|
439 |
+
placeholder="dataset_name",
|
440 |
+
value=f"my-distiset-{str(uuid.uuid4())[:8]}",
|
441 |
+
interactive=True,
|
442 |
+
)
|
443 |
+
n_rows = gr.Number(
|
444 |
+
label="Number of rows",
|
445 |
+
value=10,
|
446 |
+
interactive=True,
|
447 |
+
scale=1,
|
448 |
+
)
|
449 |
+
private = gr.Checkbox(
|
450 |
+
label="Private dataset",
|
451 |
+
value=False,
|
452 |
+
interactive=True,
|
453 |
+
scale=1,
|
454 |
+
)
|
455 |
+
btn_push_to_hub = gr.Button("Push to Hub", variant="primary", scale=2)
|
456 |
+
with gr.Column(scale=3):
|
457 |
+
success_message = gr.Markdown(visible=True)
|
458 |
+
|
459 |
+
pipeline_code = get_pipeline_code_ui(
|
460 |
+
generate_pipeline_code(
|
461 |
+
system_prompt.value,
|
462 |
+
difficulty=difficulty.value,
|
463 |
+
clarity=clarity.value,
|
464 |
+
labels=labels.value,
|
465 |
+
num_labels=num_labels.value,
|
466 |
+
num_rows=n_rows.value,
|
467 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
468 |
)
|
|
|
469 |
|
470 |
gr.on(
|
471 |
triggers=[load_btn.click, btn_apply_to_sample_dataset.click],
|
|
|
522 |
inputs=[org_name, repo_name],
|
523 |
outputs=[success_message],
|
524 |
)
|
525 |
+
app.load(fn=swap_visibilty, outputs=main_ui)
|
526 |
app.load(fn=get_org_dropdown, outputs=[org_name])
|