Commit
•
9ac3da0
1
Parent(s):
fd936a6
feat: add examples
Browse files
app.py
CHANGED
@@ -16,8 +16,12 @@ h3{margin-top: 0}
|
|
16 |
.tabitem{border: 0px}
|
17 |
.group_padding{padding: .55em}
|
18 |
#space_model .wrap > label:last-child{opacity: 0.3; pointer-events:none}
|
|
|
|
|
|
|
19 |
"""
|
20 |
|
|
|
21 |
demo = gr.TabbedInterface(
|
22 |
[sft_app, faq_app],
|
23 |
["Supervised Fine-Tuning", "FAQ"],
|
|
|
16 |
.tabitem{border: 0px}
|
17 |
.group_padding{padding: .55em}
|
18 |
#space_model .wrap > label:last-child{opacity: 0.3; pointer-events:none}
|
19 |
+
#system_prompt_examples {
|
20 |
+
color: black;
|
21 |
+
}
|
22 |
"""
|
23 |
|
24 |
+
|
25 |
demo = gr.TabbedInterface(
|
26 |
[sft_app, faq_app],
|
27 |
["Supervised Fine-Tuning", "FAQ"],
|
src/distilabel_dataset_generator/apps/sft.py
CHANGED
@@ -1,6 +1,5 @@
|
|
1 |
import multiprocessing
|
2 |
import time
|
3 |
-
from typing import Union
|
4 |
|
5 |
import gradio as gr
|
6 |
import pandas as pd
|
@@ -8,7 +7,7 @@ from distilabel.distiset import Distiset
|
|
8 |
|
9 |
from src.distilabel_dataset_generator.pipelines.sft import (
|
10 |
DEFAULT_DATASET,
|
11 |
-
|
12 |
DEFAULT_SYSTEM_PROMPT,
|
13 |
PROMPT_CREATION_PROMPT,
|
14 |
generate_pipeline_code,
|
@@ -19,6 +18,7 @@ from src.distilabel_dataset_generator.utils import (
|
|
19 |
get_login_button,
|
20 |
get_org_dropdown,
|
21 |
get_token,
|
|
|
22 |
)
|
23 |
|
24 |
|
@@ -141,13 +141,6 @@ def generate_dataset(
|
|
141 |
return pd.DataFrame(outputs)
|
142 |
|
143 |
|
144 |
-
def swap_visibilty(profile: Union[gr.OAuthProfile, None]):
|
145 |
-
if profile is None:
|
146 |
-
return gr.update(elem_classes=["main_ui_logged_out"]), gr.Mark
|
147 |
-
else:
|
148 |
-
return gr.update(elem_classes=["main_ui_logged_in"])
|
149 |
-
|
150 |
-
|
151 |
css = """
|
152 |
.main_ui_logged_out{opacity: 0.3; pointer-events: none}
|
153 |
"""
|
@@ -162,14 +155,19 @@ with gr.Blocks(
|
|
162 |
get_login_button()
|
163 |
with gr.Column(scale=2):
|
164 |
gr.Markdown(
|
165 |
-
"This token will only be used to push the dataset to the Hugging Face Hub.
|
166 |
)
|
167 |
|
168 |
gr.Markdown("## Iterate on a sample dataset")
|
169 |
with gr.Column() as main_ui:
|
170 |
dataset_description = gr.TextArea(
|
171 |
label="Provide a description of the dataset",
|
172 |
-
value=
|
|
|
|
|
|
|
|
|
|
|
173 |
)
|
174 |
with gr.Row():
|
175 |
gr.Column(scale=1)
|
|
|
1 |
import multiprocessing
|
2 |
import time
|
|
|
3 |
|
4 |
import gradio as gr
|
5 |
import pandas as pd
|
|
|
7 |
|
8 |
from src.distilabel_dataset_generator.pipelines.sft import (
|
9 |
DEFAULT_DATASET,
|
10 |
+
DEFAULT_DATASET_DESCRIPTIONS,
|
11 |
DEFAULT_SYSTEM_PROMPT,
|
12 |
PROMPT_CREATION_PROMPT,
|
13 |
generate_pipeline_code,
|
|
|
18 |
get_login_button,
|
19 |
get_org_dropdown,
|
20 |
get_token,
|
21 |
+
swap_visibilty,
|
22 |
)
|
23 |
|
24 |
|
|
|
141 |
return pd.DataFrame(outputs)
|
142 |
|
143 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
144 |
css = """
|
145 |
.main_ui_logged_out{opacity: 0.3; pointer-events: none}
|
146 |
"""
|
|
|
155 |
get_login_button()
|
156 |
with gr.Column(scale=2):
|
157 |
gr.Markdown(
|
158 |
+
"This token will only be used to push the dataset to the Hugging Face Hub. There are no generation costs because we are using Free Serverless Inference Endpoints."
|
159 |
)
|
160 |
|
161 |
gr.Markdown("## Iterate on a sample dataset")
|
162 |
with gr.Column() as main_ui:
|
163 |
dataset_description = gr.TextArea(
|
164 |
label="Provide a description of the dataset",
|
165 |
+
value=DEFAULT_DATASET_DESCRIPTIONS[0],
|
166 |
+
)
|
167 |
+
examples = gr.Examples(
|
168 |
+
elem_id="system_prompt_examples",
|
169 |
+
examples=[[example] for example in DEFAULT_DATASET_DESCRIPTIONS[1:]],
|
170 |
+
inputs=[dataset_description],
|
171 |
)
|
172 |
with gr.Row():
|
173 |
gr.Column(scale=1)
|
src/distilabel_dataset_generator/pipelines/sft.py
CHANGED
@@ -115,8 +115,11 @@ User dataset description:
|
|
115 |
"""
|
116 |
|
117 |
MODEL = "meta-llama/Meta-Llama-3.1-70B-Instruct"
|
118 |
-
|
119 |
-
"A chemistry dataset for an assistant that explains chemical reactions and formulas"
|
|
|
|
|
|
|
120 |
)
|
121 |
DEFAULT_SYSTEM_PROMPT = "You are an AI assistant specializing in chemistry and chemical reactions. Your purpose is to help users understand and work with chemical formulas, equations, and reactions. Provide clear explanations of reaction mechanisms, assist in balancing chemical equations, and offer guidance on the interpretation of chemical structures. Explain the roles of reactants, products, catalysts, and solvents, and define key chemistry terms when necessary."
|
122 |
DEFAULT_DATASET = pd.DataFrame(
|
|
|
115 |
"""
|
116 |
|
117 |
MODEL = "meta-llama/Meta-Llama-3.1-70B-Instruct"
|
118 |
+
DEFAULT_DATASET_DESCRIPTIONS = (
|
119 |
+
"A chemistry dataset for an assistant that explains chemical reactions and formulas.",
|
120 |
+
"A dataset for an assistant that work in the customer support domain.",
|
121 |
+
"A dataset for an assistant that writes code.",
|
122 |
+
"A dataset for an assistant that work in the legal domain.",
|
123 |
)
|
124 |
DEFAULT_SYSTEM_PROMPT = "You are an AI assistant specializing in chemistry and chemical reactions. Your purpose is to help users understand and work with chemical formulas, equations, and reactions. Provide clear explanations of reaction mechanisms, assist in balancing chemical equations, and offer guidance on the interpretation of chemical structures. Explain the roles of reactants, products, catalysts, and solvents, and define key chemistry terms when necessary."
|
125 |
DEFAULT_DATASET = pd.DataFrame(
|
src/distilabel_dataset_generator/utils.py
CHANGED
@@ -52,8 +52,8 @@ def list_orgs(oauth_token: OAuthToken = None):
|
|
52 |
return list(set(organisations))
|
53 |
|
54 |
|
55 |
-
def get_org_dropdown(
|
56 |
-
orgs = list_orgs(
|
57 |
return gr.Dropdown(
|
58 |
label="Organization",
|
59 |
choices=orgs,
|
@@ -62,8 +62,15 @@ def get_org_dropdown(token: OAuthToken = None):
|
|
62 |
)
|
63 |
|
64 |
|
65 |
-
def get_token(
|
66 |
-
if
|
67 |
-
return
|
68 |
else:
|
69 |
return ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
return list(set(organisations))
|
53 |
|
54 |
|
55 |
+
def get_org_dropdown(oauth_token: OAuthToken = None):
|
56 |
+
orgs = list_orgs(oauth_token)
|
57 |
return gr.Dropdown(
|
58 |
label="Organization",
|
59 |
choices=orgs,
|
|
|
62 |
)
|
63 |
|
64 |
|
65 |
+
def get_token(oauth_token: OAuthToken = None):
|
66 |
+
if oauth_token:
|
67 |
+
return oauth_token.token
|
68 |
else:
|
69 |
return ""
|
70 |
+
|
71 |
+
|
72 |
+
def swap_visibilty(oauth_token: OAuthToken = None):
|
73 |
+
if oauth_token is None:
|
74 |
+
return gr.update(elem_classes=["main_ui_logged_out"])
|
75 |
+
else:
|
76 |
+
return gr.update(elem_classes=["main_ui_logged_in"])
|