Spaces:

FILALIHicham
/

EcoMindAI

Running

App Files Files Community

FILALIHicham commited on Jan 22

Commit

5dc0abf

1 Parent(s): 762b8c6

Implement dynamic fields management

Browse files

Files changed (3) hide show

services/huggingface.py +48 -9
services/json_generator.py +25 -20
ui/form_components.py +135 -16

services/huggingface.py CHANGED Viewed

@@ -37,6 +37,45 @@ def update_dataset(json_data):
 def create_flattened_data(data):
     """Create a flattened data structure for the dataset."""
     return {
         # Header
         "licensing": [data["header"]["licensing"]],
@@ -60,8 +99,8 @@ def create_flattened_data(data):
         "frameworkVersion": [data["task"]["algorithms"][0]["frameworkVersion"]],
         "classPath": [data["task"]["algorithms"][0]["classPath"]],
         "tuning_method": [data["task"]["algorithms"][0]["hyperparameters"]["tuning_method"]],
-        "hyperparameterName": [data["task"]["algorithms"][0]["hyperparameters"]["values"][0]["hyperparameterName"]],
-        "hyperparameterValue": [data["task"]["algorithms"][0]["hyperparameters"]["values"][0]["hyperparameterValue"]],
         "quantization": [data["task"]["algorithms"][0]["quantization"]],
         "dataType": [data["task"]["dataset"][0]["dataType"]],
         "fileType": [data["task"]["dataset"][0]["fileType"]],
@@ -69,13 +108,13 @@ def create_flattened_data(data):
         "volumeUnit": [data["task"]["dataset"][0]["volumeUnit"]],
         "items": [data["task"]["dataset"][0]["items"]],
         "shape_item": [data["task"]["dataset"][0]["shape"][0]["item"]],
-        "nbRequest": [data["task"]["dataset"][0]["inferenceProperties"][0]["nbRequest"]],
-        "nbTokensInput": [data["task"]["dataset"][0]["inferenceProperties"][0]["parametersNLP"]["nbTokensInput"]],
-        "nbWordsInput": [data["task"]["dataset"][0]["inferenceProperties"][0]["parametersNLP"]["nbWordsInput"]],
-        "nbTokensOutput": [data["task"]["dataset"][0]["inferenceProperties"][0]["parametersNLP"]["nbTokensOutput"]],
-        "nbWordsOutput": [data["task"]["dataset"][0]["inferenceProperties"][0]["parametersNLP"]["nbWordsOutput"]],
-        "contextWindowSize": [data["task"]["dataset"][0]["inferenceProperties"][0]["parametersNLP"]["contextWindowSize"]],
-        "cache": [data["task"]["dataset"][0]["inferenceProperties"][0]["parametersNLP"]["cache"]],
         "source": [data["task"]["dataset"][0]["source"]],
         "sourceUri": [data["task"]["dataset"][0]["sourceUri"]],
         "owner": [data["task"]["dataset"][0]["owner"]],

 def create_flattened_data(data):
     """Create a flattened data structure for the dataset."""
+    # Handle hyperparameters
+    hyperparameters = data.get("task", {}).get("algorithms", [{}])[0].get("hyperparameters", {}).get("values", [])
+    # Process hyperparameters
+    hyperparameter_names = []
+    hyperparameter_values = []
+    for hp in hyperparameters:
+        if "name" in hp and "value" in hp:  # Match the keys used in JSON
+            hyperparameter_names.append(hp["name"])
+            hyperparameter_values.append(str(hp["value"]))
+    hyperparameter_name_str = ", ".join(hyperparameter_names) if hyperparameter_names else None
+    hyperparameter_value_str = ", ".join(hyperparameter_values) if hyperparameter_values else None
+    # Handle inference properties
+    inference_props = data.get("task", {}).get("dataset", [{}])[0].get("inferenceProperties", [])
+    print("Extracted inference properties:", inference_props)
+    # Process inference properties
+    inference_data = []
+    for props in inference_props:
+        if props:
+            inference_data.append({
+                "nbRequest": props.get("nbRequest"),
+                "nbTokensInput": props.get("nbTokensInput"),
+                "nbWordsInput": props.get("nbWordsInput"),
+                "nbTokensOutput": props.get("nbTokensOutput"),
+                "nbWordsOutput": props.get("nbWordsOutput"),
+                "contextWindowSize": props.get("contextWindowSize"),
+                "cache": props.get("cache")
+            })
+    nbRequest_str = ", ".join([str(p["nbRequest"]) for p in inference_data if p.get("nbRequest")]) if inference_data else None
+    nbTokensInput_str = ", ".join([str(p["nbTokensInput"]) for p in inference_data if p.get("nbTokensInput")]) if inference_data else None
+    nbWordsInput_str = ", ".join([str(p["nbWordsInput"]) for p in inference_data if p.get("nbWordsInput")]) if inference_data else None
+    nbTokensOutput_str = ", ".join([str(p["nbTokensOutput"]) for p in inference_data if p.get("nbTokensOutput")]) if inference_data else None
+    nbWordsOutput_str = ", ".join([str(p["nbWordsOutput"]) for p in inference_data if p.get("nbWordsOutput")]) if inference_data else None
+    contextWindowSize_str = ", ".join([str(p["contextWindowSize"]) for p in inference_data if p.get("contextWindowSize")]) if inference_data else None
+    cache_str = ", ".join([str(p["cache"]) for p in inference_data if p.get("cache")]) if inference_data else None
     return {
         # Header
         "licensing": [data["header"]["licensing"]],
         "frameworkVersion": [data["task"]["algorithms"][0]["frameworkVersion"]],
         "classPath": [data["task"]["algorithms"][0]["classPath"]],
         "tuning_method": [data["task"]["algorithms"][0]["hyperparameters"]["tuning_method"]],
+        "hyperparameterName": [hyperparameter_name_str],
+        "hyperparameterValue": [hyperparameter_value_str],
         "quantization": [data["task"]["algorithms"][0]["quantization"]],
         "dataType": [data["task"]["dataset"][0]["dataType"]],
         "fileType": [data["task"]["dataset"][0]["fileType"]],
         "volumeUnit": [data["task"]["dataset"][0]["volumeUnit"]],
         "items": [data["task"]["dataset"][0]["items"]],
         "shape_item": [data["task"]["dataset"][0]["shape"][0]["item"]],
+        "nbRequest": [nbRequest_str],
+        "nbTokensInput": [nbTokensInput_str],
+        "nbWordsInput": [nbWordsInput_str],
+        "nbTokensOutput": [nbTokensOutput_str],
+        "nbWordsOutput": [nbWordsOutput_str],
+        "contextWindowSize": [contextWindowSize_str],
+        "cache": [cache_str],
         "source": [data["task"]["dataset"][0]["source"]],
         "sourceUri": [data["task"]["dataset"][0]["sourceUri"]],
         "owner": [data["task"]["dataset"][0]["owner"]],

services/json_generator.py CHANGED Viewed

@@ -9,7 +9,7 @@ def generate_json(
     publisher_name, publisher_division, publisher_projectName, publisher_confidentialityLevel, publisher_publicKey,
     # Task
     taskType, taskFamily, taskStage, algorithmName, framework, frameworkVersion, classPath, tuning_method,
-    hyperparameterName, hyperparameterValue, quantization, dataType, fileType, volume, volumeUnit, items,
     shape_item, nbRequest, nbTokensInput, nbWordsInput, nbTokensOutput, nbWordsOutput, contextWindowSize, cache,
     source, sourceUri, owner, measuredAccuracy, estimatedAccuracy,
     # Measures
@@ -30,6 +30,28 @@ def generate_json(
     hashAlgorithm, cryptographicAlgorithm, value_hash
 ):
     """Generate JSON data from form inputs."""
     data = {
         "header": {
             "licensing": licensing,
@@ -58,12 +80,7 @@ def generate_json(
                     "classPath": classPath,
                     "hyperparameters": {
                         "tuning_method": tuning_method,
-                        "values": [
-                            {
-                                "hyperparameterName": hyperparameterName,
-                                "hyperparameterValue": hyperparameterValue
-                            }
-                        ]
                     },
                     "quantization": quantization
                 }
@@ -80,19 +97,7 @@ def generate_json(
                             "item": shape_item
                         }
                     ],
-                    "inferenceProperties": [
-                        {
-                            "nbRequest": nbRequest,
-                            "parametersNLP": {
-                                "nbTokensInput": nbTokensInput,
-                                "nbWordsInput": nbWordsInput,
-                                "nbTokensOutput": nbTokensOutput,
-                                "nbWordsOutput": nbWordsOutput,
-                                "contextWindowSize": contextWindowSize,
-                                "cache": cache
-                            }
-                        }
-                    ],
                     "source": source,
                     "sourceUri": sourceUri,
                     "owner": owner

     publisher_name, publisher_division, publisher_projectName, publisher_confidentialityLevel, publisher_publicKey,
     # Task
     taskType, taskFamily, taskStage, algorithmName, framework, frameworkVersion, classPath, tuning_method,
+    hyperparameter_names, hyperparameter_values, quantization, dataType, fileType, volume, volumeUnit, items,
     shape_item, nbRequest, nbTokensInput, nbWordsInput, nbTokensOutput, nbWordsOutput, contextWindowSize, cache,
     source, sourceUri, owner, measuredAccuracy, estimatedAccuracy,
     # Measures
     hashAlgorithm, cryptographicAlgorithm, value_hash
 ):
     """Generate JSON data from form inputs."""
+    # Process hyperparameters
+    hyperparameters = []
+    for name, value in zip(hyperparameter_names, hyperparameter_values):
+        if name and value:
+            hyperparameters.append({
+                "name": name,
+                "value": value
+            })
+    # Process inference properties
+    inference_props_list = []
+    for i in range(len(nbRequest)):
+        inference_props_list.append({
+            "nbRequest": nbRequest[i],
+            "nbTokensInput": nbTokensInput[i],
+            "nbWordsInput": nbWordsInput[i],
+            "nbTokensOutput": nbTokensOutput[i],
+            "nbWordsOutput": nbWordsOutput[i],
+            "contextWindowSize": contextWindowSize[i],
+            "cache": cache[i]
+        })
     data = {
         "header": {
             "licensing": licensing,
                     "classPath": classPath,
                     "hyperparameters": {
                         "tuning_method": tuning_method,
+                        "values": hyperparameters,
                     },
                     "quantization": quantization
                 }
                             "item": shape_item
                         }
                     ],
+                    "inferenceProperties": inference_props_list,
                     "source": source,
                     "sourceUri": sourceUri,
                     "owner": owner

ui/form_components.py CHANGED Viewed

@@ -6,6 +6,78 @@ from config import (
     HASH_ALGORITHMS, CRYPTO_ALGORITHMS, CACHE_OPTIONS
 )
 def create_header_tab():
     """Create the header tab components."""
     with gr.Tab("Header"):
@@ -52,10 +124,25 @@ def create_task_tab():
             tuning_method = gr.Textbox(label="Tuning Method", info="(the method of hyperparameters tuning used (if any), example: gridSearch, randomizedSearch...)")
             with gr.Accordion("Hyperparameters"):
-                with gr.Row():
-                    hyperparameterName = gr.Textbox(label="Hyperparameter Name", info="(the name of the hyperparameter, example: c, kernel, gamma, class_weight...)")
-                    hyperparameterValue = gr.Textbox(label="Hyperparameter Value", info="(the value of the hyperparameter, example: rbf, 1e-4, 10, linear...)")
             quantization = gr.Textbox(label="Quantization", info="(the data weights (in bits) obtained thanks to the quantization, example: 2, 8, 16...)")
         with gr.Accordion("Dataset"):
@@ -71,16 +158,48 @@ def create_task_tab():
             shape_item = gr.Textbox(label="Shape Item", info="(the shape of each dataset item)")
             with gr.Accordion("Inference Properties"):
-                nbRequest = gr.Textbox(label="Number of Requests", info="Required field<br>(the number of requests the measure corresponds to)")
-                nbTokensInput = gr.Textbox(label="Number of Tokens Input", info="(the number of tokens in the input)")
-                nbWordsInput = gr.Textbox(label="Number of Words Input", info="(the number of words in the input)")
-                nbTokensOutput = gr.Textbox(label="Number of Tokens Output", info="(the number of tokens in the output)")
-                nbWordsOutput = gr.Textbox(label="Number of Words Output", info="(the number of words in the output)")
-                contextWindowSize = gr.Textbox(label="Context Window Size", info="(the number of tokens kept in memory)")
-                cache = gr.Dropdown(value=None,
-                    label="Cache",
-                    choices=CACHE_OPTIONS,
-                    info="(the presence of a cache function)"
                 )
             source = gr.Textbox(label="Source", info="(the kind of source of the dataset)")
@@ -97,8 +216,8 @@ def create_task_tab():
         return [
             taskType, taskFamily, taskStage, algorithmName, framework,
-            frameworkVersion, classPath, tuning_method, hyperparameterName,
-            hyperparameterValue, quantization, dataType, fileType, volume,
             volumeUnit, items, shape_item, nbRequest, nbTokensInput,
             nbWordsInput, nbTokensOutput, nbWordsOutput, contextWindowSize,
             cache, source, sourceUri, owner, measuredAccuracy, estimatedAccuracy

     HASH_ALGORITHMS, CRYPTO_ALGORITHMS, CACHE_OPTIONS
 )
+def create_dynamic_section(section_name, fields_config, initial_count = 1, layout="row"):
+    # State management
+    count_state = gr.State(value=initial_count+1)
+    field_states = [gr.State([]) for _ in fields_config]
+    all_components = []
+    def update_fields(*states_and_values):
+        """Generic update function for multiple fields"""
+        # Split states and current values
+        states = list(states_and_values[:len(fields_config)])
+        current_values = states_and_values[len(fields_config):-1]
+        index = states_and_values[-1]
+        # Update each field's state
+        for field_idx, (state, value) in enumerate(zip(states, current_values)):
+            # Ensure state list is long enough
+            while len(state) <= index:
+                state.append("")
+            # Update the value at the correct index
+            state[index] = value if value is not None else ""
+        return tuple(states)
+    @gr.render(inputs=count_state)
+    def render_dynamic_section(count):
+        nonlocal all_components
+        all_components = []
+        for i in range(count):
+            with (gr.Row() if layout == "row" else gr.Column()):
+                row_components = []
+                field_refs = []  # To store references to current row's components
+                for field_idx, config in enumerate(fields_config):
+                    component = config["type"](
+                        label=f"{config['label']} {i + 1}",
+                        info=config.get("info", ""),
+                        **config.get("kwargs", {})
+                    )
+                    row_components.append(component)
+                    field_refs.append(component)
+                    # Create change event with ALL current field values
+                    component.change(
+                        fn=update_fields,
+                        inputs=[*field_states, *field_refs, gr.State(i)],
+                        outputs=field_states
+                    )
+                # Remove button
+                remove_btn = gr.Button("❌", variant="secondary")
+                remove_btn.click(
+                    lambda x, idx=i, fs=field_states: (
+                        max(0, x-1),
+                        *[fs[i].value[:idx] + fs[i].value[idx+1:] for i in range(len(fs))]
+                    ),
+                    inputs=count_state,
+                    outputs=[count_state, *field_states]
+                )
+                row_components.append(remove_btn)
+                all_components.extend(row_components)
+        return all_components
+    # Initialize with initial count
+    render_dynamic_section(count=initial_count)
+    add_btn = gr.Button(f"Add {section_name}")
+    add_btn.click(lambda x: x + 1, count_state, count_state)
+    return (count_state, *field_states, add_btn)
 def create_header_tab():
     """Create the header tab components."""
     with gr.Tab("Header"):
             tuning_method = gr.Textbox(label="Tuning Method", info="(the method of hyperparameters tuning used (if any), example: gridSearch, randomizedSearch...)")
             with gr.Accordion("Hyperparameters"):
+                _, hyperparameter_names, hyperparameter_values, add_btn = create_dynamic_section(
+                    section_name="Hyperparameter",
+                    fields_config=[
+                        {
+                            "type": gr.Textbox,
+                            "label": "Hyperparameter Name",
+                            "info": "(name of the hyperparameter)",
+                            "kwargs": {"interactive": True}
+                        },
+                        {
+                            "type": gr.Textbox,
+                            "label": "Hyperparameter Value",
+                            "info": "(value of the hyperparameter)",
+                            "kwargs": {"placeholder": "Enter value..."}
+                        }
+                    ],
+                    initial_count=0,
+                )
             quantization = gr.Textbox(label="Quantization", info="(the data weights (in bits) obtained thanks to the quantization, example: 2, 8, 16...)")
         with gr.Accordion("Dataset"):
             shape_item = gr.Textbox(label="Shape Item", info="(the shape of each dataset item)")
             with gr.Accordion("Inference Properties"):
+                    _, nbRequest, nbTokensInput, nbWordsInput,  nbTokensOutput, nbWordsOutput, contextWindowSize, cache, add_inference_btn = create_dynamic_section(
+                    section_name="Inference Property",
+                    fields_config=[
+                        {
+                            "type": gr.Textbox,
+                            "label": "Number of Requests",
+                            "info": "Required field<br>(the number of requests the measure corresponds to)",
+                        },
+                        {
+                            "type": gr.Textbox,
+                            "label": "Number of Tokens Input",
+                            "info": "(the number of tokens in the input)",
+                        },
+                        {
+                            "type": gr.Textbox,
+                            "label": "Number of Words Input",
+                            "info": "(the number of words in the input)",
+                        },
+                        {
+                            "type": gr.Textbox,
+                            "label": "Number of Tokens Output",
+                            "info": "(the number of tokens in the output)",
+                        },
+                        {
+                            "type": gr.Textbox,
+                            "label": "Number of Words Output",
+                            "info": "(the number of words in the output)",
+                        },
+                        {
+                            "type": gr.Textbox,
+                            "label": "Context Window Size",
+                            "info": "(the number of tokens kept in memory)",
+                        },
+                        {
+                            "type": gr.Dropdown,
+                            "label": "Cache",
+                            "info": "(the presence of a cache function)",
+                            "kwargs": {"choices": CACHE_OPTIONS, "value": None}
+                        }
+                    ],
+                    initial_count=0,
+                    layout="column"
                 )
             source = gr.Textbox(label="Source", info="(the kind of source of the dataset)")
         return [
             taskType, taskFamily, taskStage, algorithmName, framework,
+            frameworkVersion, classPath, tuning_method, hyperparameter_names, hyperparameter_values,
+            quantization, dataType, fileType, volume,
             volumeUnit, items, shape_item, nbRequest, nbTokensInput,
             nbWordsInput, nbTokensOutput, nbWordsOutput, contextWindowSize,
             cache, source, sourceUri, owner, measuredAccuracy, estimatedAccuracy