Spaces:

argilla
/

synthetic-data-generator

Running

App Files Files Community

davidberenstein1957 commited on Nov 22, 2024

Commit

14f85b1

1 Parent(s): ab58a29

feat: slight formatting updated

Browse files

Files changed (4) hide show

src/distilabel_dataset_generator/apps/eval.py +5 -7
src/distilabel_dataset_generator/apps/sft.py +11 -12
src/distilabel_dataset_generator/apps/textcat.py +9 -10
src/distilabel_dataset_generator/utils.py +21 -16

src/distilabel_dataset_generator/apps/eval.py CHANGED Viewed

@@ -73,7 +73,7 @@ def define_evaluation_aspects(task_type: str):
             interactive=True,
         )
     else:
-        return gr.Dropdown(interactive=False)
 def evaluate_instruction(df: pd.DataFrame, aspects: list[str], instruction_column: str):
@@ -176,12 +176,10 @@ def push_to_hub(
         structured_output,
     )
     new_repo_id = f"{org_name}/{repo_name}"
-    print(df)
 with gr.Blocks() as app:
-    gr.Markdown("## Select your input dataset")
-    gr.HTML("<hr>")
     with gr.Row():
         with gr.Column(scale=1):
             search_in = HuggingfaceHubSearch(
@@ -190,12 +188,12 @@ with gr.Blocks() as app:
                 search_type="dataset",
                 sumbit_on_select=True,
             )
-            load_btn = gr.Button("Load Dataset")
         with gr.Column(scale=3):
             search_out = gr.HTML(label="Dataset Preview")
-    gr.Markdown("## Configure your task")
     gr.HTML("<hr>")
     with gr.Row():
         with gr.Column(scale=1):
             eval_type = gr.Dropdown(
@@ -251,8 +249,8 @@ with gr.Blocks() as app:
         with gr.Column(scale=3):
             dataframe = gr.Dataframe()
-    gr.Markdown("## Generate your dataset")
     gr.HTML("<hr>")
     with gr.Row():
         with gr.Column(scale=1):
             org_name = get_org_dropdown()

             interactive=True,
         )
     else:
+        return gr.Dropdown(interactive=False, visible=False)
 def evaluate_instruction(df: pd.DataFrame, aspects: list[str], instruction_column: str):
         structured_output,
     )
     new_repo_id = f"{org_name}/{repo_name}"
 with gr.Blocks() as app:
+    gr.Markdown("## 1. Select your input dataset")
     with gr.Row():
         with gr.Column(scale=1):
             search_in = HuggingfaceHubSearch(
                 search_type="dataset",
                 sumbit_on_select=True,
             )
+            load_btn = gr.Button("Load dataset")
         with gr.Column(scale=3):
             search_out = gr.HTML(label="Dataset Preview")
     gr.HTML("<hr>")
+    gr.Markdown("## 2. Configure your task")
     with gr.Row():
         with gr.Column(scale=1):
             eval_type = gr.Dropdown(
         with gr.Column(scale=3):
             dataframe = gr.Dataframe()
     gr.HTML("<hr>")
+    gr.Markdown("## 3. Generate your dataset")
     with gr.Row():
         with gr.Column(scale=1):
             org_name = get_org_dropdown()

src/distilabel_dataset_generator/apps/sft.py CHANGED Viewed

@@ -346,8 +346,7 @@ def push_dataset_to_argilla(
 with gr.Blocks(css=_LOGGED_OUT_CSS) as app:
     with gr.Column() as main_ui:
-        gr.Markdown("## Describe the dataset you want")
-        gr.HTML("<hr>")
         with gr.Row():
             with gr.Column(scale=1):
                 dataset_description = gr.Textbox(
@@ -360,19 +359,19 @@ with gr.Blocks(css=_LOGGED_OUT_CSS) as app:
                     cache_examples=False,
                     label="Example descriptions",
                 )
-                system_prompt = gr.Textbox(
-                    label="System prompt",
-                    placeholder="You are a helpful assistant.",
-                    visible=False,
-                )
-                load_btn = gr.Button("Load Dataset")
             with gr.Column(scale=3):
                 pass
-        gr.Markdown("## Configure your task")
-        gr.HTML("<hr>")
         with gr.Row():
             with gr.Column(scale=1):
                 num_turns = gr.Number(
                     value=1,
                     label="Number of turns in the conversation",
@@ -386,8 +385,8 @@ with gr.Blocks(css=_LOGGED_OUT_CSS) as app:
             with gr.Column(scale=3):
                 dataframe = gr.Dataframe()
-        gr.Markdown("## Generate your dataset")
-        gr.HTML("<hr>")
         with gr.Row():
             with gr.Column(scale=1):
                 org_name = get_org_dropdown()

 with gr.Blocks(css=_LOGGED_OUT_CSS) as app:
     with gr.Column() as main_ui:
+        gr.Markdown(value="## 1. Describe the dataset you want")
         with gr.Row():
             with gr.Column(scale=1):
                 dataset_description = gr.Textbox(
                     cache_examples=False,
                     label="Example descriptions",
                 )
+                load_btn = gr.Button("Load dataset")
             with gr.Column(scale=3):
                 pass
+        gr.HTML(value="<hr>")
+        gr.Markdown(value="## 2. Configure your task")
         with gr.Row():
             with gr.Column(scale=1):
+                system_prompt = gr.Textbox(
+                    label="System prompt",
+                    placeholder="You are a helpful assistant.",
+                )
                 num_turns = gr.Number(
                     value=1,
                     label="Number of turns in the conversation",
             with gr.Column(scale=3):
                 dataframe = gr.Dataframe()
+        gr.HTML(value="<hr>")
+        gr.Markdown(value="## 3. Generate your dataset")
         with gr.Row():
             with gr.Column(scale=1):
                 org_name = get_org_dropdown()

src/distilabel_dataset_generator/apps/textcat.py CHANGED Viewed

@@ -355,8 +355,7 @@ def update_max_num_labels(labels):
 with gr.Blocks(css=_LOGGED_OUT_CSS) as app:
     with gr.Column() as main_ui:
-        gr.Markdown("## Describe the dataset you want")
-        gr.HTML("<hr>")
         with gr.Row():
             with gr.Column(scale=1):
                 dataset_description = gr.Textbox(
@@ -369,19 +368,19 @@ with gr.Blocks(css=_LOGGED_OUT_CSS) as app:
                     cache_examples=False,
                     label="Example descriptions",
                 )
-                system_prompt = gr.Textbox(
-                    label="System prompt",
-                    placeholder="You are a helpful assistant.",
-                    visible=False,
-                )
-                load_btn = gr.Button("Load Dataset")
             with gr.Column(scale=3):
                 pass
-        gr.Markdown("## Configure your task")
         gr.HTML("<hr>")
         with gr.Row():
             with gr.Column(scale=1):
                 difficulty = gr.Dropdown(
                     choices=[
                         ("High School", "high school"),
@@ -429,8 +428,8 @@ with gr.Blocks(css=_LOGGED_OUT_CSS) as app:
             with gr.Column(scale=3):
                 dataframe = gr.Dataframe()
-        gr.Markdown("## Generate your dataset")
         gr.HTML("<hr>")
         with gr.Row():
             with gr.Column(scale=1):
                 org_name = get_org_dropdown()

 with gr.Blocks(css=_LOGGED_OUT_CSS) as app:
     with gr.Column() as main_ui:
+        gr.Markdown("## 1. Describe the dataset you want")
         with gr.Row():
             with gr.Column(scale=1):
                 dataset_description = gr.Textbox(
                     cache_examples=False,
                     label="Example descriptions",
                 )
+                load_btn = gr.Button("Load dataset")
             with gr.Column(scale=3):
                 pass
         gr.HTML("<hr>")
+        gr.Markdown("## 2. Configure your task")
         with gr.Row():
             with gr.Column(scale=1):
+                system_prompt = gr.Textbox(
+                    label="System prompt",
+                    placeholder="You are a helpful assistant.",
+                    visible=True,
+                )
                 difficulty = gr.Dropdown(
                     choices=[
                         ("High School", "high school"),
             with gr.Column(scale=3):
                 dataframe = gr.Dataframe()
         gr.HTML("<hr>")
+        gr.Markdown("## 3. Generate your dataset")
         with gr.Row():
             with gr.Column(scale=1):
                 org_name = get_org_dropdown()

src/distilabel_dataset_generator/utils.py CHANGED Viewed

@@ -45,26 +45,31 @@ def get_duplicate_button():
 def list_orgs(oauth_token: OAuthToken = None):
-    if oauth_token is None:
-        return []
-    data = whoami(oauth_token.token)
-    if data["auth"]["type"] == "oauth":
-        organisations = [data["name"]] + [org["name"] for org in data["orgs"]]
-    elif data["auth"]["type"] == "access_token":
-        organisations = [org["name"] for org in data["orgs"]]
-    else:
-        organisations = [
-            entry["entity"]["name"]
-            for entry in data["auth"]["accessToken"]["fineGrained"]["scoped"]
-            if "repo.write" in entry["permissions"]
-        ]
-        organisations = [org for org in organisations if org != data["name"]]
-        organisations = [data["name"]] + organisations
     return organisations
 def get_org_dropdown(oauth_token: OAuthToken = None):
-    if oauth_token:
         orgs = list_orgs(oauth_token)
     else:
         orgs = []

 def list_orgs(oauth_token: OAuthToken = None):
+    try:
+        if oauth_token is None:
+            return []
+        data = whoami(oauth_token.token)
+        if data["auth"]["type"] == "oauth":
+            organisations = [data["name"]] + [org["name"] for org in data["orgs"]]
+        elif data["auth"]["type"] == "access_token":
+            organisations = [org["name"] for org in data["orgs"]]
+        else:
+            organisations = [
+                entry["entity"]["name"]
+                for entry in data["auth"]["accessToken"]["fineGrained"]["scoped"]
+                if "repo.write" in entry["permissions"]
+            ]
+            organisations = [org for org in organisations if org != data["name"]]
+            organisations = [data["name"]] + organisations
+    except Exception as e:
+        raise gr.Error(
+            f"Failed to get organizations: {e}. See if you are logged and connected: https://huggingface.co/settings/connected-applications."
+        )
     return organisations
 def get_org_dropdown(oauth_token: OAuthToken = None):
+    if oauth_token is not None:
         orgs = list_orgs(oauth_token)
     else:
         orgs = []