Spaces:
Sleeping
Sleeping
Commit
·
5dc0abf
1
Parent(s):
762b8c6
Implement dynamic fields management
Browse files- services/huggingface.py +48 -9
- services/json_generator.py +25 -20
- ui/form_components.py +135 -16
services/huggingface.py
CHANGED
@@ -37,6 +37,45 @@ def update_dataset(json_data):
|
|
37 |
|
38 |
def create_flattened_data(data):
|
39 |
"""Create a flattened data structure for the dataset."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
return {
|
41 |
# Header
|
42 |
"licensing": [data["header"]["licensing"]],
|
@@ -60,8 +99,8 @@ def create_flattened_data(data):
|
|
60 |
"frameworkVersion": [data["task"]["algorithms"][0]["frameworkVersion"]],
|
61 |
"classPath": [data["task"]["algorithms"][0]["classPath"]],
|
62 |
"tuning_method": [data["task"]["algorithms"][0]["hyperparameters"]["tuning_method"]],
|
63 |
-
"hyperparameterName": [
|
64 |
-
"hyperparameterValue": [
|
65 |
"quantization": [data["task"]["algorithms"][0]["quantization"]],
|
66 |
"dataType": [data["task"]["dataset"][0]["dataType"]],
|
67 |
"fileType": [data["task"]["dataset"][0]["fileType"]],
|
@@ -69,13 +108,13 @@ def create_flattened_data(data):
|
|
69 |
"volumeUnit": [data["task"]["dataset"][0]["volumeUnit"]],
|
70 |
"items": [data["task"]["dataset"][0]["items"]],
|
71 |
"shape_item": [data["task"]["dataset"][0]["shape"][0]["item"]],
|
72 |
-
"nbRequest": [
|
73 |
-
"nbTokensInput": [
|
74 |
-
"nbWordsInput": [
|
75 |
-
"nbTokensOutput": [
|
76 |
-
"nbWordsOutput": [
|
77 |
-
"contextWindowSize": [
|
78 |
-
"cache": [
|
79 |
"source": [data["task"]["dataset"][0]["source"]],
|
80 |
"sourceUri": [data["task"]["dataset"][0]["sourceUri"]],
|
81 |
"owner": [data["task"]["dataset"][0]["owner"]],
|
|
|
37 |
|
38 |
def create_flattened_data(data):
|
39 |
"""Create a flattened data structure for the dataset."""
|
40 |
+
# Handle hyperparameters
|
41 |
+
hyperparameters = data.get("task", {}).get("algorithms", [{}])[0].get("hyperparameters", {}).get("values", [])
|
42 |
+
|
43 |
+
# Process hyperparameters
|
44 |
+
hyperparameter_names = []
|
45 |
+
hyperparameter_values = []
|
46 |
+
for hp in hyperparameters:
|
47 |
+
if "name" in hp and "value" in hp: # Match the keys used in JSON
|
48 |
+
hyperparameter_names.append(hp["name"])
|
49 |
+
hyperparameter_values.append(str(hp["value"]))
|
50 |
+
|
51 |
+
hyperparameter_name_str = ", ".join(hyperparameter_names) if hyperparameter_names else None
|
52 |
+
hyperparameter_value_str = ", ".join(hyperparameter_values) if hyperparameter_values else None
|
53 |
+
|
54 |
+
# Handle inference properties
|
55 |
+
inference_props = data.get("task", {}).get("dataset", [{}])[0].get("inferenceProperties", [])
|
56 |
+
print("Extracted inference properties:", inference_props)
|
57 |
+
|
58 |
+
# Process inference properties
|
59 |
+
inference_data = []
|
60 |
+
for props in inference_props:
|
61 |
+
if props:
|
62 |
+
inference_data.append({
|
63 |
+
"nbRequest": props.get("nbRequest"),
|
64 |
+
"nbTokensInput": props.get("nbTokensInput"),
|
65 |
+
"nbWordsInput": props.get("nbWordsInput"),
|
66 |
+
"nbTokensOutput": props.get("nbTokensOutput"),
|
67 |
+
"nbWordsOutput": props.get("nbWordsOutput"),
|
68 |
+
"contextWindowSize": props.get("contextWindowSize"),
|
69 |
+
"cache": props.get("cache")
|
70 |
+
})
|
71 |
+
|
72 |
+
nbRequest_str = ", ".join([str(p["nbRequest"]) for p in inference_data if p.get("nbRequest")]) if inference_data else None
|
73 |
+
nbTokensInput_str = ", ".join([str(p["nbTokensInput"]) for p in inference_data if p.get("nbTokensInput")]) if inference_data else None
|
74 |
+
nbWordsInput_str = ", ".join([str(p["nbWordsInput"]) for p in inference_data if p.get("nbWordsInput")]) if inference_data else None
|
75 |
+
nbTokensOutput_str = ", ".join([str(p["nbTokensOutput"]) for p in inference_data if p.get("nbTokensOutput")]) if inference_data else None
|
76 |
+
nbWordsOutput_str = ", ".join([str(p["nbWordsOutput"]) for p in inference_data if p.get("nbWordsOutput")]) if inference_data else None
|
77 |
+
contextWindowSize_str = ", ".join([str(p["contextWindowSize"]) for p in inference_data if p.get("contextWindowSize")]) if inference_data else None
|
78 |
+
cache_str = ", ".join([str(p["cache"]) for p in inference_data if p.get("cache")]) if inference_data else None
|
79 |
return {
|
80 |
# Header
|
81 |
"licensing": [data["header"]["licensing"]],
|
|
|
99 |
"frameworkVersion": [data["task"]["algorithms"][0]["frameworkVersion"]],
|
100 |
"classPath": [data["task"]["algorithms"][0]["classPath"]],
|
101 |
"tuning_method": [data["task"]["algorithms"][0]["hyperparameters"]["tuning_method"]],
|
102 |
+
"hyperparameterName": [hyperparameter_name_str],
|
103 |
+
"hyperparameterValue": [hyperparameter_value_str],
|
104 |
"quantization": [data["task"]["algorithms"][0]["quantization"]],
|
105 |
"dataType": [data["task"]["dataset"][0]["dataType"]],
|
106 |
"fileType": [data["task"]["dataset"][0]["fileType"]],
|
|
|
108 |
"volumeUnit": [data["task"]["dataset"][0]["volumeUnit"]],
|
109 |
"items": [data["task"]["dataset"][0]["items"]],
|
110 |
"shape_item": [data["task"]["dataset"][0]["shape"][0]["item"]],
|
111 |
+
"nbRequest": [nbRequest_str],
|
112 |
+
"nbTokensInput": [nbTokensInput_str],
|
113 |
+
"nbWordsInput": [nbWordsInput_str],
|
114 |
+
"nbTokensOutput": [nbTokensOutput_str],
|
115 |
+
"nbWordsOutput": [nbWordsOutput_str],
|
116 |
+
"contextWindowSize": [contextWindowSize_str],
|
117 |
+
"cache": [cache_str],
|
118 |
"source": [data["task"]["dataset"][0]["source"]],
|
119 |
"sourceUri": [data["task"]["dataset"][0]["sourceUri"]],
|
120 |
"owner": [data["task"]["dataset"][0]["owner"]],
|
services/json_generator.py
CHANGED
@@ -9,7 +9,7 @@ def generate_json(
|
|
9 |
publisher_name, publisher_division, publisher_projectName, publisher_confidentialityLevel, publisher_publicKey,
|
10 |
# Task
|
11 |
taskType, taskFamily, taskStage, algorithmName, framework, frameworkVersion, classPath, tuning_method,
|
12 |
-
|
13 |
shape_item, nbRequest, nbTokensInput, nbWordsInput, nbTokensOutput, nbWordsOutput, contextWindowSize, cache,
|
14 |
source, sourceUri, owner, measuredAccuracy, estimatedAccuracy,
|
15 |
# Measures
|
@@ -30,6 +30,28 @@ def generate_json(
|
|
30 |
hashAlgorithm, cryptographicAlgorithm, value_hash
|
31 |
):
|
32 |
"""Generate JSON data from form inputs."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
data = {
|
34 |
"header": {
|
35 |
"licensing": licensing,
|
@@ -58,12 +80,7 @@ def generate_json(
|
|
58 |
"classPath": classPath,
|
59 |
"hyperparameters": {
|
60 |
"tuning_method": tuning_method,
|
61 |
-
"values":
|
62 |
-
{
|
63 |
-
"hyperparameterName": hyperparameterName,
|
64 |
-
"hyperparameterValue": hyperparameterValue
|
65 |
-
}
|
66 |
-
]
|
67 |
},
|
68 |
"quantization": quantization
|
69 |
}
|
@@ -80,19 +97,7 @@ def generate_json(
|
|
80 |
"item": shape_item
|
81 |
}
|
82 |
],
|
83 |
-
"inferenceProperties":
|
84 |
-
{
|
85 |
-
"nbRequest": nbRequest,
|
86 |
-
"parametersNLP": {
|
87 |
-
"nbTokensInput": nbTokensInput,
|
88 |
-
"nbWordsInput": nbWordsInput,
|
89 |
-
"nbTokensOutput": nbTokensOutput,
|
90 |
-
"nbWordsOutput": nbWordsOutput,
|
91 |
-
"contextWindowSize": contextWindowSize,
|
92 |
-
"cache": cache
|
93 |
-
}
|
94 |
-
}
|
95 |
-
],
|
96 |
"source": source,
|
97 |
"sourceUri": sourceUri,
|
98 |
"owner": owner
|
|
|
9 |
publisher_name, publisher_division, publisher_projectName, publisher_confidentialityLevel, publisher_publicKey,
|
10 |
# Task
|
11 |
taskType, taskFamily, taskStage, algorithmName, framework, frameworkVersion, classPath, tuning_method,
|
12 |
+
hyperparameter_names, hyperparameter_values, quantization, dataType, fileType, volume, volumeUnit, items,
|
13 |
shape_item, nbRequest, nbTokensInput, nbWordsInput, nbTokensOutput, nbWordsOutput, contextWindowSize, cache,
|
14 |
source, sourceUri, owner, measuredAccuracy, estimatedAccuracy,
|
15 |
# Measures
|
|
|
30 |
hashAlgorithm, cryptographicAlgorithm, value_hash
|
31 |
):
|
32 |
"""Generate JSON data from form inputs."""
|
33 |
+
# Process hyperparameters
|
34 |
+
hyperparameters = []
|
35 |
+
for name, value in zip(hyperparameter_names, hyperparameter_values):
|
36 |
+
if name and value:
|
37 |
+
hyperparameters.append({
|
38 |
+
"name": name,
|
39 |
+
"value": value
|
40 |
+
})
|
41 |
+
|
42 |
+
# Process inference properties
|
43 |
+
inference_props_list = []
|
44 |
+
for i in range(len(nbRequest)):
|
45 |
+
inference_props_list.append({
|
46 |
+
"nbRequest": nbRequest[i],
|
47 |
+
"nbTokensInput": nbTokensInput[i],
|
48 |
+
"nbWordsInput": nbWordsInput[i],
|
49 |
+
"nbTokensOutput": nbTokensOutput[i],
|
50 |
+
"nbWordsOutput": nbWordsOutput[i],
|
51 |
+
"contextWindowSize": contextWindowSize[i],
|
52 |
+
"cache": cache[i]
|
53 |
+
})
|
54 |
+
|
55 |
data = {
|
56 |
"header": {
|
57 |
"licensing": licensing,
|
|
|
80 |
"classPath": classPath,
|
81 |
"hyperparameters": {
|
82 |
"tuning_method": tuning_method,
|
83 |
+
"values": hyperparameters,
|
|
|
|
|
|
|
|
|
|
|
84 |
},
|
85 |
"quantization": quantization
|
86 |
}
|
|
|
97 |
"item": shape_item
|
98 |
}
|
99 |
],
|
100 |
+
"inferenceProperties": inference_props_list,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
"source": source,
|
102 |
"sourceUri": sourceUri,
|
103 |
"owner": owner
|
ui/form_components.py
CHANGED
@@ -6,6 +6,78 @@ from config import (
|
|
6 |
HASH_ALGORITHMS, CRYPTO_ALGORITHMS, CACHE_OPTIONS
|
7 |
)
|
8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
def create_header_tab():
|
10 |
"""Create the header tab components."""
|
11 |
with gr.Tab("Header"):
|
@@ -52,10 +124,25 @@ def create_task_tab():
|
|
52 |
tuning_method = gr.Textbox(label="Tuning Method", info="(the method of hyperparameters tuning used (if any), example: gridSearch, randomizedSearch...)")
|
53 |
|
54 |
with gr.Accordion("Hyperparameters"):
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
quantization = gr.Textbox(label="Quantization", info="(the data weights (in bits) obtained thanks to the quantization, example: 2, 8, 16...)")
|
60 |
|
61 |
with gr.Accordion("Dataset"):
|
@@ -71,16 +158,48 @@ def create_task_tab():
|
|
71 |
shape_item = gr.Textbox(label="Shape Item", info="(the shape of each dataset item)")
|
72 |
|
73 |
with gr.Accordion("Inference Properties"):
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
84 |
)
|
85 |
|
86 |
source = gr.Textbox(label="Source", info="(the kind of source of the dataset)")
|
@@ -97,8 +216,8 @@ def create_task_tab():
|
|
97 |
|
98 |
return [
|
99 |
taskType, taskFamily, taskStage, algorithmName, framework,
|
100 |
-
frameworkVersion, classPath, tuning_method,
|
101 |
-
|
102 |
volumeUnit, items, shape_item, nbRequest, nbTokensInput,
|
103 |
nbWordsInput, nbTokensOutput, nbWordsOutput, contextWindowSize,
|
104 |
cache, source, sourceUri, owner, measuredAccuracy, estimatedAccuracy
|
|
|
6 |
HASH_ALGORITHMS, CRYPTO_ALGORITHMS, CACHE_OPTIONS
|
7 |
)
|
8 |
|
9 |
+
def create_dynamic_section(section_name, fields_config, initial_count = 1, layout="row"):
|
10 |
+
# State management
|
11 |
+
count_state = gr.State(value=initial_count+1)
|
12 |
+
field_states = [gr.State([]) for _ in fields_config]
|
13 |
+
all_components = []
|
14 |
+
|
15 |
+
def update_fields(*states_and_values):
|
16 |
+
"""Generic update function for multiple fields"""
|
17 |
+
# Split states and current values
|
18 |
+
states = list(states_and_values[:len(fields_config)])
|
19 |
+
current_values = states_and_values[len(fields_config):-1]
|
20 |
+
index = states_and_values[-1]
|
21 |
+
|
22 |
+
# Update each field's state
|
23 |
+
for field_idx, (state, value) in enumerate(zip(states, current_values)):
|
24 |
+
# Ensure state list is long enough
|
25 |
+
while len(state) <= index:
|
26 |
+
state.append("")
|
27 |
+
# Update the value at the correct index
|
28 |
+
state[index] = value if value is not None else ""
|
29 |
+
|
30 |
+
return tuple(states)
|
31 |
+
|
32 |
+
@gr.render(inputs=count_state)
|
33 |
+
def render_dynamic_section(count):
|
34 |
+
nonlocal all_components
|
35 |
+
all_components = []
|
36 |
+
|
37 |
+
for i in range(count):
|
38 |
+
with (gr.Row() if layout == "row" else gr.Column()):
|
39 |
+
row_components = []
|
40 |
+
field_refs = [] # To store references to current row's components
|
41 |
+
|
42 |
+
for field_idx, config in enumerate(fields_config):
|
43 |
+
component = config["type"](
|
44 |
+
label=f"{config['label']} {i + 1}",
|
45 |
+
info=config.get("info", ""),
|
46 |
+
**config.get("kwargs", {})
|
47 |
+
)
|
48 |
+
row_components.append(component)
|
49 |
+
field_refs.append(component)
|
50 |
+
|
51 |
+
# Create change event with ALL current field values
|
52 |
+
component.change(
|
53 |
+
fn=update_fields,
|
54 |
+
inputs=[*field_states, *field_refs, gr.State(i)],
|
55 |
+
outputs=field_states
|
56 |
+
)
|
57 |
+
|
58 |
+
# Remove button
|
59 |
+
remove_btn = gr.Button("❌", variant="secondary")
|
60 |
+
remove_btn.click(
|
61 |
+
lambda x, idx=i, fs=field_states: (
|
62 |
+
max(0, x-1),
|
63 |
+
*[fs[i].value[:idx] + fs[i].value[idx+1:] for i in range(len(fs))]
|
64 |
+
),
|
65 |
+
inputs=count_state,
|
66 |
+
outputs=[count_state, *field_states]
|
67 |
+
)
|
68 |
+
row_components.append(remove_btn)
|
69 |
+
|
70 |
+
all_components.extend(row_components)
|
71 |
+
return all_components
|
72 |
+
|
73 |
+
# Initialize with initial count
|
74 |
+
render_dynamic_section(count=initial_count)
|
75 |
+
|
76 |
+
add_btn = gr.Button(f"Add {section_name}")
|
77 |
+
add_btn.click(lambda x: x + 1, count_state, count_state)
|
78 |
+
|
79 |
+
return (count_state, *field_states, add_btn)
|
80 |
+
|
81 |
def create_header_tab():
|
82 |
"""Create the header tab components."""
|
83 |
with gr.Tab("Header"):
|
|
|
124 |
tuning_method = gr.Textbox(label="Tuning Method", info="(the method of hyperparameters tuning used (if any), example: gridSearch, randomizedSearch...)")
|
125 |
|
126 |
with gr.Accordion("Hyperparameters"):
|
127 |
+
_, hyperparameter_names, hyperparameter_values, add_btn = create_dynamic_section(
|
128 |
+
section_name="Hyperparameter",
|
129 |
+
fields_config=[
|
130 |
+
{
|
131 |
+
"type": gr.Textbox,
|
132 |
+
"label": "Hyperparameter Name",
|
133 |
+
"info": "(name of the hyperparameter)",
|
134 |
+
"kwargs": {"interactive": True}
|
135 |
+
},
|
136 |
+
{
|
137 |
+
"type": gr.Textbox,
|
138 |
+
"label": "Hyperparameter Value",
|
139 |
+
"info": "(value of the hyperparameter)",
|
140 |
+
"kwargs": {"placeholder": "Enter value..."}
|
141 |
+
}
|
142 |
+
],
|
143 |
+
initial_count=0,
|
144 |
+
)
|
145 |
+
|
146 |
quantization = gr.Textbox(label="Quantization", info="(the data weights (in bits) obtained thanks to the quantization, example: 2, 8, 16...)")
|
147 |
|
148 |
with gr.Accordion("Dataset"):
|
|
|
158 |
shape_item = gr.Textbox(label="Shape Item", info="(the shape of each dataset item)")
|
159 |
|
160 |
with gr.Accordion("Inference Properties"):
|
161 |
+
_, nbRequest, nbTokensInput, nbWordsInput, nbTokensOutput, nbWordsOutput, contextWindowSize, cache, add_inference_btn = create_dynamic_section(
|
162 |
+
section_name="Inference Property",
|
163 |
+
fields_config=[
|
164 |
+
{
|
165 |
+
"type": gr.Textbox,
|
166 |
+
"label": "Number of Requests",
|
167 |
+
"info": "Required field<br>(the number of requests the measure corresponds to)",
|
168 |
+
},
|
169 |
+
{
|
170 |
+
"type": gr.Textbox,
|
171 |
+
"label": "Number of Tokens Input",
|
172 |
+
"info": "(the number of tokens in the input)",
|
173 |
+
},
|
174 |
+
{
|
175 |
+
"type": gr.Textbox,
|
176 |
+
"label": "Number of Words Input",
|
177 |
+
"info": "(the number of words in the input)",
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"type": gr.Textbox,
|
181 |
+
"label": "Number of Tokens Output",
|
182 |
+
"info": "(the number of tokens in the output)",
|
183 |
+
},
|
184 |
+
{
|
185 |
+
"type": gr.Textbox,
|
186 |
+
"label": "Number of Words Output",
|
187 |
+
"info": "(the number of words in the output)",
|
188 |
+
},
|
189 |
+
{
|
190 |
+
"type": gr.Textbox,
|
191 |
+
"label": "Context Window Size",
|
192 |
+
"info": "(the number of tokens kept in memory)",
|
193 |
+
},
|
194 |
+
{
|
195 |
+
"type": gr.Dropdown,
|
196 |
+
"label": "Cache",
|
197 |
+
"info": "(the presence of a cache function)",
|
198 |
+
"kwargs": {"choices": CACHE_OPTIONS, "value": None}
|
199 |
+
}
|
200 |
+
],
|
201 |
+
initial_count=0,
|
202 |
+
layout="column"
|
203 |
)
|
204 |
|
205 |
source = gr.Textbox(label="Source", info="(the kind of source of the dataset)")
|
|
|
216 |
|
217 |
return [
|
218 |
taskType, taskFamily, taskStage, algorithmName, framework,
|
219 |
+
frameworkVersion, classPath, tuning_method, hyperparameter_names, hyperparameter_values,
|
220 |
+
quantization, dataType, fileType, volume,
|
221 |
volumeUnit, items, shape_item, nbRequest, nbTokensInput,
|
222 |
nbWordsInput, nbTokensOutput, nbWordsOutput, contextWindowSize,
|
223 |
cache, source, sourceUri, owner, measuredAccuracy, estimatedAccuracy
|