Spaces:

argilla
/

synthetic-data-generator

Running

App Files Files Community

davidberenstein1957 commited on Sep 11, 2024

Commit

9ac3da0

1 Parent(s): fd936a6

feat: add examples

Browse files

Files changed (4) hide show

app.py +4 -0
src/distilabel_dataset_generator/apps/sft.py +9 -11
src/distilabel_dataset_generator/pipelines/sft.py +5 -2
src/distilabel_dataset_generator/utils.py +12 -5

app.py CHANGED Viewed

@@ -16,8 +16,12 @@ h3{margin-top: 0}
 .tabitem{border: 0px}
 .group_padding{padding: .55em}
 #space_model .wrap > label:last-child{opacity: 0.3; pointer-events:none}
 """
 demo = gr.TabbedInterface(
     [sft_app, faq_app],
     ["Supervised Fine-Tuning", "FAQ"],

 .tabitem{border: 0px}
 .group_padding{padding: .55em}
 #space_model .wrap > label:last-child{opacity: 0.3; pointer-events:none}
+#system_prompt_examples {
+    color: black;
+}
 """
 demo = gr.TabbedInterface(
     [sft_app, faq_app],
     ["Supervised Fine-Tuning", "FAQ"],

src/distilabel_dataset_generator/apps/sft.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import multiprocessing
 import time
-from typing import Union
 import gradio as gr
 import pandas as pd
@@ -8,7 +7,7 @@ from distilabel.distiset import Distiset
 from src.distilabel_dataset_generator.pipelines.sft import (
     DEFAULT_DATASET,
-    DEFAULT_DATASET_DESCRIPTION,
     DEFAULT_SYSTEM_PROMPT,
     PROMPT_CREATION_PROMPT,
     generate_pipeline_code,
@@ -19,6 +18,7 @@ from src.distilabel_dataset_generator.utils import (
     get_login_button,
     get_org_dropdown,
     get_token,
 )
@@ -141,13 +141,6 @@ def generate_dataset(
     return pd.DataFrame(outputs)
-def swap_visibilty(profile: Union[gr.OAuthProfile, None]):
-    if profile is None:
-        return gr.update(elem_classes=["main_ui_logged_out"]), gr.Mark
-    else:
-        return gr.update(elem_classes=["main_ui_logged_in"])
 css = """
 .main_ui_logged_out{opacity: 0.3; pointer-events: none}
 """
@@ -162,14 +155,19 @@ with gr.Blocks(
             get_login_button()
         with gr.Column(scale=2):
             gr.Markdown(
-                "This token will only be used to push the dataset to the Hugging Face Hub. It won't be incurring any costs because we are using Free Serverless Inference Endpoints."
             )
     gr.Markdown("## Iterate on a sample dataset")
     with gr.Column() as main_ui:
         dataset_description = gr.TextArea(
             label="Provide a description of the dataset",
-            value=DEFAULT_DATASET_DESCRIPTION,
         )
         with gr.Row():
             gr.Column(scale=1)

 import multiprocessing
 import time
 import gradio as gr
 import pandas as pd
 from src.distilabel_dataset_generator.pipelines.sft import (
     DEFAULT_DATASET,
+    DEFAULT_DATASET_DESCRIPTIONS,
     DEFAULT_SYSTEM_PROMPT,
     PROMPT_CREATION_PROMPT,
     generate_pipeline_code,
     get_login_button,
     get_org_dropdown,
     get_token,
+    swap_visibilty,
 )
     return pd.DataFrame(outputs)
 css = """
 .main_ui_logged_out{opacity: 0.3; pointer-events: none}
 """
             get_login_button()
         with gr.Column(scale=2):
             gr.Markdown(
+                "This token will only be used to push the dataset to the Hugging Face Hub. There are no generation costs because we are using Free Serverless Inference Endpoints."
             )
     gr.Markdown("## Iterate on a sample dataset")
     with gr.Column() as main_ui:
         dataset_description = gr.TextArea(
             label="Provide a description of the dataset",
+            value=DEFAULT_DATASET_DESCRIPTIONS[0],
+        )
+        examples = gr.Examples(
+            elem_id="system_prompt_examples",
+            examples=[[example] for example in DEFAULT_DATASET_DESCRIPTIONS[1:]],
+            inputs=[dataset_description],
         )
         with gr.Row():
             gr.Column(scale=1)

src/distilabel_dataset_generator/pipelines/sft.py CHANGED Viewed

@@ -115,8 +115,11 @@ User dataset description:
 """
 MODEL = "meta-llama/Meta-Llama-3.1-70B-Instruct"
-DEFAULT_DATASET_DESCRIPTION = (
-    "A chemistry dataset for an assistant that explains chemical reactions and formulas"
 )
 DEFAULT_SYSTEM_PROMPT = "You are an AI assistant specializing in chemistry and chemical reactions. Your purpose is to help users understand and work with chemical formulas, equations, and reactions. Provide clear explanations of reaction mechanisms, assist in balancing chemical equations, and offer guidance on the interpretation of chemical structures. Explain the roles of reactants, products, catalysts, and solvents, and define key chemistry terms when necessary."
 DEFAULT_DATASET = pd.DataFrame(

 """
 MODEL = "meta-llama/Meta-Llama-3.1-70B-Instruct"
+DEFAULT_DATASET_DESCRIPTIONS = (
+    "A chemistry dataset for an assistant that explains chemical reactions and formulas.",
+    "A dataset for an assistant that work in the customer support domain.",
+    "A dataset for an assistant that writes code.",
+    "A dataset for an assistant that work in the legal domain.",
 )
 DEFAULT_SYSTEM_PROMPT = "You are an AI assistant specializing in chemistry and chemical reactions. Your purpose is to help users understand and work with chemical formulas, equations, and reactions. Provide clear explanations of reaction mechanisms, assist in balancing chemical equations, and offer guidance on the interpretation of chemical structures. Explain the roles of reactants, products, catalysts, and solvents, and define key chemistry terms when necessary."
 DEFAULT_DATASET = pd.DataFrame(

src/distilabel_dataset_generator/utils.py CHANGED Viewed

@@ -52,8 +52,8 @@ def list_orgs(oauth_token: OAuthToken = None):
     return list(set(organisations))
-def get_org_dropdown(token: OAuthToken = None):
-    orgs = list_orgs(token)
     return gr.Dropdown(
         label="Organization",
         choices=orgs,
@@ -62,8 +62,15 @@ def get_org_dropdown(token: OAuthToken = None):
     )
-def get_token(token: OAuthToken = None):
-    if token:
-        return token.token
     else:
         return ""

     return list(set(organisations))
+def get_org_dropdown(oauth_token: OAuthToken = None):
+    orgs = list_orgs(oauth_token)
     return gr.Dropdown(
         label="Organization",
         choices=orgs,
     )
+def get_token(oauth_token: OAuthToken = None):
+    if oauth_token:
+        return oauth_token.token
     else:
         return ""
+def swap_visibilty(oauth_token: OAuthToken = None):
+    if oauth_token is None:
+        return gr.update(elem_classes=["main_ui_logged_out"])
+    else:
+        return gr.update(elem_classes=["main_ui_logged_in"])