davidberenstein1957 HF staff commited on
Commit
14f85b1
1 Parent(s): ab58a29

feat: slight formatting updated

Browse files
src/distilabel_dataset_generator/apps/eval.py CHANGED
@@ -73,7 +73,7 @@ def define_evaluation_aspects(task_type: str):
73
  interactive=True,
74
  )
75
  else:
76
- return gr.Dropdown(interactive=False)
77
 
78
 
79
  def evaluate_instruction(df: pd.DataFrame, aspects: list[str], instruction_column: str):
@@ -176,12 +176,10 @@ def push_to_hub(
176
  structured_output,
177
  )
178
  new_repo_id = f"{org_name}/{repo_name}"
179
- print(df)
180
 
181
 
182
  with gr.Blocks() as app:
183
- gr.Markdown("## Select your input dataset")
184
- gr.HTML("<hr>")
185
  with gr.Row():
186
  with gr.Column(scale=1):
187
  search_in = HuggingfaceHubSearch(
@@ -190,12 +188,12 @@ with gr.Blocks() as app:
190
  search_type="dataset",
191
  sumbit_on_select=True,
192
  )
193
- load_btn = gr.Button("Load Dataset")
194
  with gr.Column(scale=3):
195
  search_out = gr.HTML(label="Dataset Preview")
196
 
197
- gr.Markdown("## Configure your task")
198
  gr.HTML("<hr>")
 
199
  with gr.Row():
200
  with gr.Column(scale=1):
201
  eval_type = gr.Dropdown(
@@ -251,8 +249,8 @@ with gr.Blocks() as app:
251
  with gr.Column(scale=3):
252
  dataframe = gr.Dataframe()
253
 
254
- gr.Markdown("## Generate your dataset")
255
  gr.HTML("<hr>")
 
256
  with gr.Row():
257
  with gr.Column(scale=1):
258
  org_name = get_org_dropdown()
 
73
  interactive=True,
74
  )
75
  else:
76
+ return gr.Dropdown(interactive=False, visible=False)
77
 
78
 
79
  def evaluate_instruction(df: pd.DataFrame, aspects: list[str], instruction_column: str):
 
176
  structured_output,
177
  )
178
  new_repo_id = f"{org_name}/{repo_name}"
 
179
 
180
 
181
  with gr.Blocks() as app:
182
+ gr.Markdown("## 1. Select your input dataset")
 
183
  with gr.Row():
184
  with gr.Column(scale=1):
185
  search_in = HuggingfaceHubSearch(
 
188
  search_type="dataset",
189
  sumbit_on_select=True,
190
  )
191
+ load_btn = gr.Button("Load dataset")
192
  with gr.Column(scale=3):
193
  search_out = gr.HTML(label="Dataset Preview")
194
 
 
195
  gr.HTML("<hr>")
196
+ gr.Markdown("## 2. Configure your task")
197
  with gr.Row():
198
  with gr.Column(scale=1):
199
  eval_type = gr.Dropdown(
 
249
  with gr.Column(scale=3):
250
  dataframe = gr.Dataframe()
251
 
 
252
  gr.HTML("<hr>")
253
+ gr.Markdown("## 3. Generate your dataset")
254
  with gr.Row():
255
  with gr.Column(scale=1):
256
  org_name = get_org_dropdown()
src/distilabel_dataset_generator/apps/sft.py CHANGED
@@ -346,8 +346,7 @@ def push_dataset_to_argilla(
346
 
347
  with gr.Blocks(css=_LOGGED_OUT_CSS) as app:
348
  with gr.Column() as main_ui:
349
- gr.Markdown("## Describe the dataset you want")
350
- gr.HTML("<hr>")
351
  with gr.Row():
352
  with gr.Column(scale=1):
353
  dataset_description = gr.Textbox(
@@ -360,19 +359,19 @@ with gr.Blocks(css=_LOGGED_OUT_CSS) as app:
360
  cache_examples=False,
361
  label="Example descriptions",
362
  )
363
- system_prompt = gr.Textbox(
364
- label="System prompt",
365
- placeholder="You are a helpful assistant.",
366
- visible=False,
367
- )
368
- load_btn = gr.Button("Load Dataset")
369
  with gr.Column(scale=3):
370
  pass
371
 
372
- gr.Markdown("## Configure your task")
373
- gr.HTML("<hr>")
374
  with gr.Row():
375
  with gr.Column(scale=1):
 
 
 
 
376
  num_turns = gr.Number(
377
  value=1,
378
  label="Number of turns in the conversation",
@@ -386,8 +385,8 @@ with gr.Blocks(css=_LOGGED_OUT_CSS) as app:
386
  with gr.Column(scale=3):
387
  dataframe = gr.Dataframe()
388
 
389
- gr.Markdown("## Generate your dataset")
390
- gr.HTML("<hr>")
391
  with gr.Row():
392
  with gr.Column(scale=1):
393
  org_name = get_org_dropdown()
 
346
 
347
  with gr.Blocks(css=_LOGGED_OUT_CSS) as app:
348
  with gr.Column() as main_ui:
349
+ gr.Markdown(value="## 1. Describe the dataset you want")
 
350
  with gr.Row():
351
  with gr.Column(scale=1):
352
  dataset_description = gr.Textbox(
 
359
  cache_examples=False,
360
  label="Example descriptions",
361
  )
362
+
363
+ load_btn = gr.Button("Load dataset")
 
 
 
 
364
  with gr.Column(scale=3):
365
  pass
366
 
367
+ gr.HTML(value="<hr>")
368
+ gr.Markdown(value="## 2. Configure your task")
369
  with gr.Row():
370
  with gr.Column(scale=1):
371
+ system_prompt = gr.Textbox(
372
+ label="System prompt",
373
+ placeholder="You are a helpful assistant.",
374
+ )
375
  num_turns = gr.Number(
376
  value=1,
377
  label="Number of turns in the conversation",
 
385
  with gr.Column(scale=3):
386
  dataframe = gr.Dataframe()
387
 
388
+ gr.HTML(value="<hr>")
389
+ gr.Markdown(value="## 3. Generate your dataset")
390
  with gr.Row():
391
  with gr.Column(scale=1):
392
  org_name = get_org_dropdown()
src/distilabel_dataset_generator/apps/textcat.py CHANGED
@@ -355,8 +355,7 @@ def update_max_num_labels(labels):
355
 
356
  with gr.Blocks(css=_LOGGED_OUT_CSS) as app:
357
  with gr.Column() as main_ui:
358
- gr.Markdown("## Describe the dataset you want")
359
- gr.HTML("<hr>")
360
  with gr.Row():
361
  with gr.Column(scale=1):
362
  dataset_description = gr.Textbox(
@@ -369,19 +368,19 @@ with gr.Blocks(css=_LOGGED_OUT_CSS) as app:
369
  cache_examples=False,
370
  label="Example descriptions",
371
  )
372
- system_prompt = gr.Textbox(
373
- label="System prompt",
374
- placeholder="You are a helpful assistant.",
375
- visible=False,
376
- )
377
- load_btn = gr.Button("Load Dataset")
378
  with gr.Column(scale=3):
379
  pass
380
 
381
- gr.Markdown("## Configure your task")
382
  gr.HTML("<hr>")
 
383
  with gr.Row():
384
  with gr.Column(scale=1):
 
 
 
 
 
385
  difficulty = gr.Dropdown(
386
  choices=[
387
  ("High School", "high school"),
@@ -429,8 +428,8 @@ with gr.Blocks(css=_LOGGED_OUT_CSS) as app:
429
  with gr.Column(scale=3):
430
  dataframe = gr.Dataframe()
431
 
432
- gr.Markdown("## Generate your dataset")
433
  gr.HTML("<hr>")
 
434
  with gr.Row():
435
  with gr.Column(scale=1):
436
  org_name = get_org_dropdown()
 
355
 
356
  with gr.Blocks(css=_LOGGED_OUT_CSS) as app:
357
  with gr.Column() as main_ui:
358
+ gr.Markdown("## 1. Describe the dataset you want")
 
359
  with gr.Row():
360
  with gr.Column(scale=1):
361
  dataset_description = gr.Textbox(
 
368
  cache_examples=False,
369
  label="Example descriptions",
370
  )
371
+ load_btn = gr.Button("Load dataset")
 
 
 
 
 
372
  with gr.Column(scale=3):
373
  pass
374
 
 
375
  gr.HTML("<hr>")
376
+ gr.Markdown("## 2. Configure your task")
377
  with gr.Row():
378
  with gr.Column(scale=1):
379
+ system_prompt = gr.Textbox(
380
+ label="System prompt",
381
+ placeholder="You are a helpful assistant.",
382
+ visible=True,
383
+ )
384
  difficulty = gr.Dropdown(
385
  choices=[
386
  ("High School", "high school"),
 
428
  with gr.Column(scale=3):
429
  dataframe = gr.Dataframe()
430
 
 
431
  gr.HTML("<hr>")
432
+ gr.Markdown("## 3. Generate your dataset")
433
  with gr.Row():
434
  with gr.Column(scale=1):
435
  org_name = get_org_dropdown()
src/distilabel_dataset_generator/utils.py CHANGED
@@ -45,26 +45,31 @@ def get_duplicate_button():
45
 
46
 
47
  def list_orgs(oauth_token: OAuthToken = None):
48
- if oauth_token is None:
49
- return []
50
- data = whoami(oauth_token.token)
51
- if data["auth"]["type"] == "oauth":
52
- organisations = [data["name"]] + [org["name"] for org in data["orgs"]]
53
- elif data["auth"]["type"] == "access_token":
54
- organisations = [org["name"] for org in data["orgs"]]
55
- else:
56
- organisations = [
57
- entry["entity"]["name"]
58
- for entry in data["auth"]["accessToken"]["fineGrained"]["scoped"]
59
- if "repo.write" in entry["permissions"]
60
- ]
61
- organisations = [org for org in organisations if org != data["name"]]
62
- organisations = [data["name"]] + organisations
 
 
 
 
 
63
  return organisations
64
 
65
 
66
  def get_org_dropdown(oauth_token: OAuthToken = None):
67
- if oauth_token:
68
  orgs = list_orgs(oauth_token)
69
  else:
70
  orgs = []
 
45
 
46
 
47
  def list_orgs(oauth_token: OAuthToken = None):
48
+ try:
49
+ if oauth_token is None:
50
+ return []
51
+ data = whoami(oauth_token.token)
52
+ if data["auth"]["type"] == "oauth":
53
+ organisations = [data["name"]] + [org["name"] for org in data["orgs"]]
54
+ elif data["auth"]["type"] == "access_token":
55
+ organisations = [org["name"] for org in data["orgs"]]
56
+ else:
57
+ organisations = [
58
+ entry["entity"]["name"]
59
+ for entry in data["auth"]["accessToken"]["fineGrained"]["scoped"]
60
+ if "repo.write" in entry["permissions"]
61
+ ]
62
+ organisations = [org for org in organisations if org != data["name"]]
63
+ organisations = [data["name"]] + organisations
64
+ except Exception as e:
65
+ raise gr.Error(
66
+ f"Failed to get organizations: {e}. See if you are logged and connected: https://huggingface.co/settings/connected-applications."
67
+ )
68
  return organisations
69
 
70
 
71
  def get_org_dropdown(oauth_token: OAuthToken = None):
72
+ if oauth_token is not None:
73
  orgs = list_orgs(oauth_token)
74
  else:
75
  orgs = []