FILALIHicham commited on
Commit
5dc0abf
·
1 Parent(s): 762b8c6

Implement dynamic fields management

Browse files
services/huggingface.py CHANGED
@@ -37,6 +37,45 @@ def update_dataset(json_data):
37
 
38
  def create_flattened_data(data):
39
  """Create a flattened data structure for the dataset."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  return {
41
  # Header
42
  "licensing": [data["header"]["licensing"]],
@@ -60,8 +99,8 @@ def create_flattened_data(data):
60
  "frameworkVersion": [data["task"]["algorithms"][0]["frameworkVersion"]],
61
  "classPath": [data["task"]["algorithms"][0]["classPath"]],
62
  "tuning_method": [data["task"]["algorithms"][0]["hyperparameters"]["tuning_method"]],
63
- "hyperparameterName": [data["task"]["algorithms"][0]["hyperparameters"]["values"][0]["hyperparameterName"]],
64
- "hyperparameterValue": [data["task"]["algorithms"][0]["hyperparameters"]["values"][0]["hyperparameterValue"]],
65
  "quantization": [data["task"]["algorithms"][0]["quantization"]],
66
  "dataType": [data["task"]["dataset"][0]["dataType"]],
67
  "fileType": [data["task"]["dataset"][0]["fileType"]],
@@ -69,13 +108,13 @@ def create_flattened_data(data):
69
  "volumeUnit": [data["task"]["dataset"][0]["volumeUnit"]],
70
  "items": [data["task"]["dataset"][0]["items"]],
71
  "shape_item": [data["task"]["dataset"][0]["shape"][0]["item"]],
72
- "nbRequest": [data["task"]["dataset"][0]["inferenceProperties"][0]["nbRequest"]],
73
- "nbTokensInput": [data["task"]["dataset"][0]["inferenceProperties"][0]["parametersNLP"]["nbTokensInput"]],
74
- "nbWordsInput": [data["task"]["dataset"][0]["inferenceProperties"][0]["parametersNLP"]["nbWordsInput"]],
75
- "nbTokensOutput": [data["task"]["dataset"][0]["inferenceProperties"][0]["parametersNLP"]["nbTokensOutput"]],
76
- "nbWordsOutput": [data["task"]["dataset"][0]["inferenceProperties"][0]["parametersNLP"]["nbWordsOutput"]],
77
- "contextWindowSize": [data["task"]["dataset"][0]["inferenceProperties"][0]["parametersNLP"]["contextWindowSize"]],
78
- "cache": [data["task"]["dataset"][0]["inferenceProperties"][0]["parametersNLP"]["cache"]],
79
  "source": [data["task"]["dataset"][0]["source"]],
80
  "sourceUri": [data["task"]["dataset"][0]["sourceUri"]],
81
  "owner": [data["task"]["dataset"][0]["owner"]],
 
37
 
38
  def create_flattened_data(data):
39
  """Create a flattened data structure for the dataset."""
40
+ # Handle hyperparameters
41
+ hyperparameters = data.get("task", {}).get("algorithms", [{}])[0].get("hyperparameters", {}).get("values", [])
42
+
43
+ # Process hyperparameters
44
+ hyperparameter_names = []
45
+ hyperparameter_values = []
46
+ for hp in hyperparameters:
47
+ if "name" in hp and "value" in hp: # Match the keys used in JSON
48
+ hyperparameter_names.append(hp["name"])
49
+ hyperparameter_values.append(str(hp["value"]))
50
+
51
+ hyperparameter_name_str = ", ".join(hyperparameter_names) if hyperparameter_names else None
52
+ hyperparameter_value_str = ", ".join(hyperparameter_values) if hyperparameter_values else None
53
+
54
+ # Handle inference properties
55
+ inference_props = data.get("task", {}).get("dataset", [{}])[0].get("inferenceProperties", [])
56
+ print("Extracted inference properties:", inference_props)
57
+
58
+ # Process inference properties
59
+ inference_data = []
60
+ for props in inference_props:
61
+ if props:
62
+ inference_data.append({
63
+ "nbRequest": props.get("nbRequest"),
64
+ "nbTokensInput": props.get("nbTokensInput"),
65
+ "nbWordsInput": props.get("nbWordsInput"),
66
+ "nbTokensOutput": props.get("nbTokensOutput"),
67
+ "nbWordsOutput": props.get("nbWordsOutput"),
68
+ "contextWindowSize": props.get("contextWindowSize"),
69
+ "cache": props.get("cache")
70
+ })
71
+
72
+ nbRequest_str = ", ".join([str(p["nbRequest"]) for p in inference_data if p.get("nbRequest")]) if inference_data else None
73
+ nbTokensInput_str = ", ".join([str(p["nbTokensInput"]) for p in inference_data if p.get("nbTokensInput")]) if inference_data else None
74
+ nbWordsInput_str = ", ".join([str(p["nbWordsInput"]) for p in inference_data if p.get("nbWordsInput")]) if inference_data else None
75
+ nbTokensOutput_str = ", ".join([str(p["nbTokensOutput"]) for p in inference_data if p.get("nbTokensOutput")]) if inference_data else None
76
+ nbWordsOutput_str = ", ".join([str(p["nbWordsOutput"]) for p in inference_data if p.get("nbWordsOutput")]) if inference_data else None
77
+ contextWindowSize_str = ", ".join([str(p["contextWindowSize"]) for p in inference_data if p.get("contextWindowSize")]) if inference_data else None
78
+ cache_str = ", ".join([str(p["cache"]) for p in inference_data if p.get("cache")]) if inference_data else None
79
  return {
80
  # Header
81
  "licensing": [data["header"]["licensing"]],
 
99
  "frameworkVersion": [data["task"]["algorithms"][0]["frameworkVersion"]],
100
  "classPath": [data["task"]["algorithms"][0]["classPath"]],
101
  "tuning_method": [data["task"]["algorithms"][0]["hyperparameters"]["tuning_method"]],
102
+ "hyperparameterName": [hyperparameter_name_str],
103
+ "hyperparameterValue": [hyperparameter_value_str],
104
  "quantization": [data["task"]["algorithms"][0]["quantization"]],
105
  "dataType": [data["task"]["dataset"][0]["dataType"]],
106
  "fileType": [data["task"]["dataset"][0]["fileType"]],
 
108
  "volumeUnit": [data["task"]["dataset"][0]["volumeUnit"]],
109
  "items": [data["task"]["dataset"][0]["items"]],
110
  "shape_item": [data["task"]["dataset"][0]["shape"][0]["item"]],
111
+ "nbRequest": [nbRequest_str],
112
+ "nbTokensInput": [nbTokensInput_str],
113
+ "nbWordsInput": [nbWordsInput_str],
114
+ "nbTokensOutput": [nbTokensOutput_str],
115
+ "nbWordsOutput": [nbWordsOutput_str],
116
+ "contextWindowSize": [contextWindowSize_str],
117
+ "cache": [cache_str],
118
  "source": [data["task"]["dataset"][0]["source"]],
119
  "sourceUri": [data["task"]["dataset"][0]["sourceUri"]],
120
  "owner": [data["task"]["dataset"][0]["owner"]],
services/json_generator.py CHANGED
@@ -9,7 +9,7 @@ def generate_json(
9
  publisher_name, publisher_division, publisher_projectName, publisher_confidentialityLevel, publisher_publicKey,
10
  # Task
11
  taskType, taskFamily, taskStage, algorithmName, framework, frameworkVersion, classPath, tuning_method,
12
- hyperparameterName, hyperparameterValue, quantization, dataType, fileType, volume, volumeUnit, items,
13
  shape_item, nbRequest, nbTokensInput, nbWordsInput, nbTokensOutput, nbWordsOutput, contextWindowSize, cache,
14
  source, sourceUri, owner, measuredAccuracy, estimatedAccuracy,
15
  # Measures
@@ -30,6 +30,28 @@ def generate_json(
30
  hashAlgorithm, cryptographicAlgorithm, value_hash
31
  ):
32
  """Generate JSON data from form inputs."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  data = {
34
  "header": {
35
  "licensing": licensing,
@@ -58,12 +80,7 @@ def generate_json(
58
  "classPath": classPath,
59
  "hyperparameters": {
60
  "tuning_method": tuning_method,
61
- "values": [
62
- {
63
- "hyperparameterName": hyperparameterName,
64
- "hyperparameterValue": hyperparameterValue
65
- }
66
- ]
67
  },
68
  "quantization": quantization
69
  }
@@ -80,19 +97,7 @@ def generate_json(
80
  "item": shape_item
81
  }
82
  ],
83
- "inferenceProperties": [
84
- {
85
- "nbRequest": nbRequest,
86
- "parametersNLP": {
87
- "nbTokensInput": nbTokensInput,
88
- "nbWordsInput": nbWordsInput,
89
- "nbTokensOutput": nbTokensOutput,
90
- "nbWordsOutput": nbWordsOutput,
91
- "contextWindowSize": contextWindowSize,
92
- "cache": cache
93
- }
94
- }
95
- ],
96
  "source": source,
97
  "sourceUri": sourceUri,
98
  "owner": owner
 
9
  publisher_name, publisher_division, publisher_projectName, publisher_confidentialityLevel, publisher_publicKey,
10
  # Task
11
  taskType, taskFamily, taskStage, algorithmName, framework, frameworkVersion, classPath, tuning_method,
12
+ hyperparameter_names, hyperparameter_values, quantization, dataType, fileType, volume, volumeUnit, items,
13
  shape_item, nbRequest, nbTokensInput, nbWordsInput, nbTokensOutput, nbWordsOutput, contextWindowSize, cache,
14
  source, sourceUri, owner, measuredAccuracy, estimatedAccuracy,
15
  # Measures
 
30
  hashAlgorithm, cryptographicAlgorithm, value_hash
31
  ):
32
  """Generate JSON data from form inputs."""
33
+ # Process hyperparameters
34
+ hyperparameters = []
35
+ for name, value in zip(hyperparameter_names, hyperparameter_values):
36
+ if name and value:
37
+ hyperparameters.append({
38
+ "name": name,
39
+ "value": value
40
+ })
41
+
42
+ # Process inference properties
43
+ inference_props_list = []
44
+ for i in range(len(nbRequest)):
45
+ inference_props_list.append({
46
+ "nbRequest": nbRequest[i],
47
+ "nbTokensInput": nbTokensInput[i],
48
+ "nbWordsInput": nbWordsInput[i],
49
+ "nbTokensOutput": nbTokensOutput[i],
50
+ "nbWordsOutput": nbWordsOutput[i],
51
+ "contextWindowSize": contextWindowSize[i],
52
+ "cache": cache[i]
53
+ })
54
+
55
  data = {
56
  "header": {
57
  "licensing": licensing,
 
80
  "classPath": classPath,
81
  "hyperparameters": {
82
  "tuning_method": tuning_method,
83
+ "values": hyperparameters,
 
 
 
 
 
84
  },
85
  "quantization": quantization
86
  }
 
97
  "item": shape_item
98
  }
99
  ],
100
+ "inferenceProperties": inference_props_list,
 
 
 
 
 
 
 
 
 
 
 
 
101
  "source": source,
102
  "sourceUri": sourceUri,
103
  "owner": owner
ui/form_components.py CHANGED
@@ -6,6 +6,78 @@ from config import (
6
  HASH_ALGORITHMS, CRYPTO_ALGORITHMS, CACHE_OPTIONS
7
  )
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  def create_header_tab():
10
  """Create the header tab components."""
11
  with gr.Tab("Header"):
@@ -52,10 +124,25 @@ def create_task_tab():
52
  tuning_method = gr.Textbox(label="Tuning Method", info="(the method of hyperparameters tuning used (if any), example: gridSearch, randomizedSearch...)")
53
 
54
  with gr.Accordion("Hyperparameters"):
55
- with gr.Row():
56
- hyperparameterName = gr.Textbox(label="Hyperparameter Name", info="(the name of the hyperparameter, example: c, kernel, gamma, class_weight...)")
57
- hyperparameterValue = gr.Textbox(label="Hyperparameter Value", info="(the value of the hyperparameter, example: rbf, 1e-4, 10, linear...)")
58
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  quantization = gr.Textbox(label="Quantization", info="(the data weights (in bits) obtained thanks to the quantization, example: 2, 8, 16...)")
60
 
61
  with gr.Accordion("Dataset"):
@@ -71,16 +158,48 @@ def create_task_tab():
71
  shape_item = gr.Textbox(label="Shape Item", info="(the shape of each dataset item)")
72
 
73
  with gr.Accordion("Inference Properties"):
74
- nbRequest = gr.Textbox(label="Number of Requests", info="Required field<br>(the number of requests the measure corresponds to)")
75
- nbTokensInput = gr.Textbox(label="Number of Tokens Input", info="(the number of tokens in the input)")
76
- nbWordsInput = gr.Textbox(label="Number of Words Input", info="(the number of words in the input)")
77
- nbTokensOutput = gr.Textbox(label="Number of Tokens Output", info="(the number of tokens in the output)")
78
- nbWordsOutput = gr.Textbox(label="Number of Words Output", info="(the number of words in the output)")
79
- contextWindowSize = gr.Textbox(label="Context Window Size", info="(the number of tokens kept in memory)")
80
- cache = gr.Dropdown(value=None,
81
- label="Cache",
82
- choices=CACHE_OPTIONS,
83
- info="(the presence of a cache function)"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  )
85
 
86
  source = gr.Textbox(label="Source", info="(the kind of source of the dataset)")
@@ -97,8 +216,8 @@ def create_task_tab():
97
 
98
  return [
99
  taskType, taskFamily, taskStage, algorithmName, framework,
100
- frameworkVersion, classPath, tuning_method, hyperparameterName,
101
- hyperparameterValue, quantization, dataType, fileType, volume,
102
  volumeUnit, items, shape_item, nbRequest, nbTokensInput,
103
  nbWordsInput, nbTokensOutput, nbWordsOutput, contextWindowSize,
104
  cache, source, sourceUri, owner, measuredAccuracy, estimatedAccuracy
 
6
  HASH_ALGORITHMS, CRYPTO_ALGORITHMS, CACHE_OPTIONS
7
  )
8
 
9
+ def create_dynamic_section(section_name, fields_config, initial_count = 1, layout="row"):
10
+ # State management
11
+ count_state = gr.State(value=initial_count+1)
12
+ field_states = [gr.State([]) for _ in fields_config]
13
+ all_components = []
14
+
15
+ def update_fields(*states_and_values):
16
+ """Generic update function for multiple fields"""
17
+ # Split states and current values
18
+ states = list(states_and_values[:len(fields_config)])
19
+ current_values = states_and_values[len(fields_config):-1]
20
+ index = states_and_values[-1]
21
+
22
+ # Update each field's state
23
+ for field_idx, (state, value) in enumerate(zip(states, current_values)):
24
+ # Ensure state list is long enough
25
+ while len(state) <= index:
26
+ state.append("")
27
+ # Update the value at the correct index
28
+ state[index] = value if value is not None else ""
29
+
30
+ return tuple(states)
31
+
32
+ @gr.render(inputs=count_state)
33
+ def render_dynamic_section(count):
34
+ nonlocal all_components
35
+ all_components = []
36
+
37
+ for i in range(count):
38
+ with (gr.Row() if layout == "row" else gr.Column()):
39
+ row_components = []
40
+ field_refs = [] # To store references to current row's components
41
+
42
+ for field_idx, config in enumerate(fields_config):
43
+ component = config["type"](
44
+ label=f"{config['label']} {i + 1}",
45
+ info=config.get("info", ""),
46
+ **config.get("kwargs", {})
47
+ )
48
+ row_components.append(component)
49
+ field_refs.append(component)
50
+
51
+ # Create change event with ALL current field values
52
+ component.change(
53
+ fn=update_fields,
54
+ inputs=[*field_states, *field_refs, gr.State(i)],
55
+ outputs=field_states
56
+ )
57
+
58
+ # Remove button
59
+ remove_btn = gr.Button("❌", variant="secondary")
60
+ remove_btn.click(
61
+ lambda x, idx=i, fs=field_states: (
62
+ max(0, x-1),
63
+ *[fs[i].value[:idx] + fs[i].value[idx+1:] for i in range(len(fs))]
64
+ ),
65
+ inputs=count_state,
66
+ outputs=[count_state, *field_states]
67
+ )
68
+ row_components.append(remove_btn)
69
+
70
+ all_components.extend(row_components)
71
+ return all_components
72
+
73
+ # Initialize with initial count
74
+ render_dynamic_section(count=initial_count)
75
+
76
+ add_btn = gr.Button(f"Add {section_name}")
77
+ add_btn.click(lambda x: x + 1, count_state, count_state)
78
+
79
+ return (count_state, *field_states, add_btn)
80
+
81
  def create_header_tab():
82
  """Create the header tab components."""
83
  with gr.Tab("Header"):
 
124
  tuning_method = gr.Textbox(label="Tuning Method", info="(the method of hyperparameters tuning used (if any), example: gridSearch, randomizedSearch...)")
125
 
126
  with gr.Accordion("Hyperparameters"):
127
+ _, hyperparameter_names, hyperparameter_values, add_btn = create_dynamic_section(
128
+ section_name="Hyperparameter",
129
+ fields_config=[
130
+ {
131
+ "type": gr.Textbox,
132
+ "label": "Hyperparameter Name",
133
+ "info": "(name of the hyperparameter)",
134
+ "kwargs": {"interactive": True}
135
+ },
136
+ {
137
+ "type": gr.Textbox,
138
+ "label": "Hyperparameter Value",
139
+ "info": "(value of the hyperparameter)",
140
+ "kwargs": {"placeholder": "Enter value..."}
141
+ }
142
+ ],
143
+ initial_count=0,
144
+ )
145
+
146
  quantization = gr.Textbox(label="Quantization", info="(the data weights (in bits) obtained thanks to the quantization, example: 2, 8, 16...)")
147
 
148
  with gr.Accordion("Dataset"):
 
158
  shape_item = gr.Textbox(label="Shape Item", info="(the shape of each dataset item)")
159
 
160
  with gr.Accordion("Inference Properties"):
161
+ _, nbRequest, nbTokensInput, nbWordsInput, nbTokensOutput, nbWordsOutput, contextWindowSize, cache, add_inference_btn = create_dynamic_section(
162
+ section_name="Inference Property",
163
+ fields_config=[
164
+ {
165
+ "type": gr.Textbox,
166
+ "label": "Number of Requests",
167
+ "info": "Required field<br>(the number of requests the measure corresponds to)",
168
+ },
169
+ {
170
+ "type": gr.Textbox,
171
+ "label": "Number of Tokens Input",
172
+ "info": "(the number of tokens in the input)",
173
+ },
174
+ {
175
+ "type": gr.Textbox,
176
+ "label": "Number of Words Input",
177
+ "info": "(the number of words in the input)",
178
+ },
179
+ {
180
+ "type": gr.Textbox,
181
+ "label": "Number of Tokens Output",
182
+ "info": "(the number of tokens in the output)",
183
+ },
184
+ {
185
+ "type": gr.Textbox,
186
+ "label": "Number of Words Output",
187
+ "info": "(the number of words in the output)",
188
+ },
189
+ {
190
+ "type": gr.Textbox,
191
+ "label": "Context Window Size",
192
+ "info": "(the number of tokens kept in memory)",
193
+ },
194
+ {
195
+ "type": gr.Dropdown,
196
+ "label": "Cache",
197
+ "info": "(the presence of a cache function)",
198
+ "kwargs": {"choices": CACHE_OPTIONS, "value": None}
199
+ }
200
+ ],
201
+ initial_count=0,
202
+ layout="column"
203
  )
204
 
205
  source = gr.Textbox(label="Source", info="(the kind of source of the dataset)")
 
216
 
217
  return [
218
  taskType, taskFamily, taskStage, algorithmName, framework,
219
+ frameworkVersion, classPath, tuning_method, hyperparameter_names, hyperparameter_values,
220
+ quantization, dataType, fileType, volume,
221
  volumeUnit, items, shape_item, nbRequest, nbTokensInput,
222
  nbWordsInput, nbTokensOutput, nbWordsOutput, contextWindowSize,
223
  cache, source, sourceUri, owner, measuredAccuracy, estimatedAccuracy