FILALIHicham commited on
Commit
762b8c6
·
1 Parent(s): 42cf5d2

Add initial implementation of data collection form

Browse files
app.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from services.huggingface import init_huggingface, update_dataset
3
+ from services.json_generator import generate_json
4
+ from ui.form_components import (
5
+ create_header_tab,
6
+ create_task_tab,
7
+ create_measures_tab,
8
+ create_system_tab,
9
+ create_software_tab,
10
+ create_infrastructure_tab,
11
+ create_environment_tab,
12
+ create_quality_tab,
13
+ create_hash_tab
14
+ )
15
+
16
+ # Initialize Hugging Face
17
+ init_huggingface()
18
+
19
+ # Create Gradio interface
20
+ with gr.Blocks() as demo:
21
+ gr.Markdown("## Data Collection Form")
22
+ gr.Markdown("Welcome to this Huggingface space that helps you fill in a form for monitoring the energy consumption of an AI model.")
23
+
24
+ # Create form tabs
25
+ header_components = create_header_tab()
26
+ task_components = create_task_tab()
27
+ measures_components = create_measures_tab()
28
+ system_components = create_system_tab()
29
+ software_components = create_software_tab()
30
+ infrastructure_components = create_infrastructure_tab()
31
+ environment_components = create_environment_tab()
32
+ quality_components = create_quality_tab()
33
+ hash_components = create_hash_tab()
34
+
35
+ # Submit and Download Buttons
36
+ submit_button = gr.Button("Submit")
37
+ output = gr.Textbox(label="Output", lines=1)
38
+ json_output = gr.Textbox(visible=False)
39
+ file_output = gr.File(label="Downloadable JSON")
40
+
41
+ # Event Handlers
42
+ submit_button.click(
43
+ generate_json,
44
+ inputs=[
45
+ *header_components,
46
+ *task_components,
47
+ *measures_components,
48
+ *system_components,
49
+ *software_components,
50
+ *infrastructure_components,
51
+ *environment_components,
52
+ *quality_components,
53
+ *hash_components
54
+ ],
55
+ outputs=[output, file_output, json_output]
56
+ ).then(
57
+ update_dataset,
58
+ inputs=json_output,
59
+ outputs=output
60
+ )
61
+
62
+ if __name__ == "__main__":
63
+ demo.launch()
config.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ # Hugging Face Configuration
4
+ HF_TOKEN = os.environ.get("HF_TOKEN")
5
+ DATASET_NAME = "FILALIHicham/EcoMindAI-Data"
6
+
7
+ # Form Field Configurations
8
+ OBLIGATORY_FIELDS = [
9
+ "formatVersion", "reportId", "reportStatus", "confidentialityLevel",
10
+ "taskType", "taskFamily", "taskStage", "algorithmName", "dataType",
11
+ "volume", "volumeUnit", "nbRequest", "measurementMethod", "unit",
12
+ "powerConsumption", "os", "language", "infraType", "componentName",
13
+ "nbComponent", "country", "hashAlgorithm", "cryptographicAlgorithm", "value"
14
+ ]
15
+
16
+ # Dropdown Options
17
+ REPORT_STATUS_OPTIONS = ["draft", "final", "corrective", "$other"]
18
+ CONFIDENTIALITY_LEVELS = ["public", "internal", "confidential", "secret"]
19
+ DATA_TYPES = ["tabular", "audio", "boolean", "image", "video", "object", "text", "$other"]
20
+ ACCURACY_LEVELS = ["veryPoor", "poor", "average", "good", "veryGood"]
21
+ MEASUREMENT_UNITS = ["Wh", "kWh", "MWh", "GWh", "kJoule", "MJoule", "GJoule", "TJoule", "PJoule",
22
+ "BTU", "kiloFLOPS", "megaFLOPS", "gigaFLOPS", "teraFLOPS", "petaFLOPS",
23
+ "exaFLOPS", "zettaFLOPS", "yottaFLOPS"]
24
+ INFRA_TYPES = ["publicCloud", "privateCloud", "onPremise", "$other"]
25
+ POWER_SUPPLIER_TYPES = ["public", "private", "internal", "$other"]
26
+ POWER_SOURCES = ["solar", "wind", "nuclear", "hydroelectric", "gas", "coal", "$other"]
27
+ QUALITY_LEVELS = ["high", "medium", "low"]
28
+ HASH_ALGORITHMS = ["MD5", "RIPEMD-128", "RIPEMD-160", "RIPEMD-256", "RIPEMD-320",
29
+ "SHA-1", "SHA-224", "SHA256", "SHA-384", "SHA-512"]
30
+ CRYPTO_ALGORITHMS = ["RSA", "DSA", "ECDSA", "EDDSA"]
31
+ CACHE_OPTIONS = ["true", "false"]
services/huggingface.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from huggingface_hub import login
2
+ from datasets import load_dataset, Dataset, concatenate_datasets
3
+ import json
4
+ from config import HF_TOKEN, DATASET_NAME
5
+
6
+ def init_huggingface():
7
+ """Initialize Hugging Face authentication."""
8
+ if HF_TOKEN is None:
9
+ raise ValueError("Hugging Face token not found in environment variables.")
10
+ login(token=HF_TOKEN)
11
+
12
+ def update_dataset(json_data):
13
+ """Update the Hugging Face dataset with new data."""
14
+ if json_data is None or json_data.startswith("The following fields are required"):
15
+ return json_data or "No data to submit. Please fill in all required fields."
16
+
17
+ try:
18
+ data = json.loads(json_data)
19
+ except json.JSONDecodeError:
20
+ return "Invalid JSON data. Please ensure all required fields are filled correctly."
21
+
22
+ try:
23
+ dataset = load_dataset(DATASET_NAME, split="train")
24
+ except:
25
+ dataset = Dataset.from_dict({})
26
+
27
+ new_data = create_flattened_data(data)
28
+ new_dataset = Dataset.from_dict(new_data)
29
+
30
+ if len(dataset) > 0:
31
+ updated_dataset = concatenate_datasets([dataset, new_dataset])
32
+ else:
33
+ updated_dataset = new_dataset
34
+
35
+ updated_dataset.push_to_hub(DATASET_NAME)
36
+ return "Data submitted successfully and dataset updated!"
37
+
38
+ def create_flattened_data(data):
39
+ """Create a flattened data structure for the dataset."""
40
+ return {
41
+ # Header
42
+ "licensing": [data["header"]["licensing"]],
43
+ "formatVersion": [data["header"]["formatVersion"]],
44
+ "formatVersionSpecificationUri": [data["header"]["formatVersionSpecificationUri"]],
45
+ "reportId": [data["header"]["reportId"]],
46
+ "reportDatetime": [data["header"]["reportDatetime"]],
47
+ "reportStatus": [data["header"]["reportStatus"]],
48
+ "publisher_name": [data["header"]["publisher"]["name"]],
49
+ "publisher_division": [data["header"]["publisher"]["division"]],
50
+ "publisher_projectName": [data["header"]["publisher"]["projectName"]],
51
+ "publisher_confidentialityLevel": [data["header"]["publisher"]["confidentialityLevel"]],
52
+ "publisher_publicKey": [data["header"]["publisher"]["publicKey"]],
53
+
54
+ # Task
55
+ "taskType": [data["task"]["taskType"]],
56
+ "taskFamily": [data["task"]["taskFamily"]],
57
+ "taskStage": [data["task"]["taskStage"]],
58
+ "algorithmName": [data["task"]["algorithms"][0]["algorithmName"]],
59
+ "framework": [data["task"]["algorithms"][0]["framework"]],
60
+ "frameworkVersion": [data["task"]["algorithms"][0]["frameworkVersion"]],
61
+ "classPath": [data["task"]["algorithms"][0]["classPath"]],
62
+ "tuning_method": [data["task"]["algorithms"][0]["hyperparameters"]["tuning_method"]],
63
+ "hyperparameterName": [data["task"]["algorithms"][0]["hyperparameters"]["values"][0]["hyperparameterName"]],
64
+ "hyperparameterValue": [data["task"]["algorithms"][0]["hyperparameters"]["values"][0]["hyperparameterValue"]],
65
+ "quantization": [data["task"]["algorithms"][0]["quantization"]],
66
+ "dataType": [data["task"]["dataset"][0]["dataType"]],
67
+ "fileType": [data["task"]["dataset"][0]["fileType"]],
68
+ "volume": [data["task"]["dataset"][0]["volume"]],
69
+ "volumeUnit": [data["task"]["dataset"][0]["volumeUnit"]],
70
+ "items": [data["task"]["dataset"][0]["items"]],
71
+ "shape_item": [data["task"]["dataset"][0]["shape"][0]["item"]],
72
+ "nbRequest": [data["task"]["dataset"][0]["inferenceProperties"][0]["nbRequest"]],
73
+ "nbTokensInput": [data["task"]["dataset"][0]["inferenceProperties"][0]["parametersNLP"]["nbTokensInput"]],
74
+ "nbWordsInput": [data["task"]["dataset"][0]["inferenceProperties"][0]["parametersNLP"]["nbWordsInput"]],
75
+ "nbTokensOutput": [data["task"]["dataset"][0]["inferenceProperties"][0]["parametersNLP"]["nbTokensOutput"]],
76
+ "nbWordsOutput": [data["task"]["dataset"][0]["inferenceProperties"][0]["parametersNLP"]["nbWordsOutput"]],
77
+ "contextWindowSize": [data["task"]["dataset"][0]["inferenceProperties"][0]["parametersNLP"]["contextWindowSize"]],
78
+ "cache": [data["task"]["dataset"][0]["inferenceProperties"][0]["parametersNLP"]["cache"]],
79
+ "source": [data["task"]["dataset"][0]["source"]],
80
+ "sourceUri": [data["task"]["dataset"][0]["sourceUri"]],
81
+ "owner": [data["task"]["dataset"][0]["owner"]],
82
+ "measuredAccuracy": [data["task"]["measuredAccuracy"]],
83
+ "estimatedAccuracy": [data["task"]["estimatedAccuracy"]],
84
+
85
+ # Measures
86
+ "measurementMethod": [data["measures"][0]["measurementMethod"]],
87
+ "manufacturer": [data["measures"][0]["manufacturer"]],
88
+ "version": [data["measures"][0]["version"]],
89
+ "cpuTrackingMode": [data["measures"][0]["cpuTrackingMode"]],
90
+ "gpuTrackingMode": [data["measures"][0]["gpuTrackingMode"]],
91
+ "averageUtilizationCpu": [data["measures"][0]["averageUtilizationCpu"]],
92
+ "averageUtilizationGpu": [data["measures"][0]["averageUtilizationGpu"]],
93
+ "serverSideInference": [data["measures"][0]["serverSideInference"]],
94
+ "unit": [data["measures"][0]["unit"]],
95
+ "powerCalibrationMeasurement": [data["measures"][0]["powerCalibrationMeasurement"]],
96
+ "durationCalibrationMeasurement": [data["measures"][0]["durationCalibrationMeasurement"]],
97
+ "powerConsumption": [data["measures"][0]["powerConsumption"]],
98
+ "measurementDuration": [data["measures"][0]["measurementDuration"]],
99
+ "measurementDateTime": [data["measures"][0]["measurementDateTime"]],
100
+
101
+ # System
102
+ "os": [data["system"]["os"]],
103
+ "distribution": [data["system"]["distribution"]],
104
+ "distributionVersion": [data["system"]["distributionVersion"]],
105
+
106
+ # Software
107
+ "language": [data["software"]["language"]],
108
+ "version_software": [data["software"]["version"]],
109
+
110
+ # Infrastructure
111
+ "infraType": [data["infrastructure"]["infraType"]],
112
+ "cloudProvider": [data["infrastructure"]["cloudProvider"]],
113
+ "cloudInstance": [data["infrastructure"]["cloudInstance"]],
114
+ "componentName": [data["infrastructure"]["components"][0]["componentName"]],
115
+ "nbComponent": [data["infrastructure"]["components"][0]["nbComponent"]],
116
+ "memorySize": [data["infrastructure"]["components"][0]["memorySize"]],
117
+ "manufacturer_infra": [data["infrastructure"]["components"][0]["manufacturer"]],
118
+ "family": [data["infrastructure"]["components"][0]["family"]],
119
+ "series": [data["infrastructure"]["components"][0]["series"]],
120
+ "share": [data["infrastructure"]["components"][0]["share"]],
121
+
122
+ # Environment
123
+ "country": [data["environment"]["country"]],
124
+ "latitude": [data["environment"]["latitude"]],
125
+ "longitude": [data["environment"]["longitude"]],
126
+ "location": [data["environment"]["location"]],
127
+ "powerSupplierType": [data["environment"]["powerSupplierType"]],
128
+ "powerSource": [data["environment"]["powerSource"]],
129
+ "powerSourceCarbonIntensity": [data["environment"]["powerSourceCarbonIntensity"]],
130
+
131
+ # Quality
132
+ "quality": [data["quality"]],
133
+
134
+ # Hash
135
+ "hashAlgorithm": [data["$hash"]["hashAlgorithm"]],
136
+ "cryptographicAlgorithm": [data["$hash"]["cryptographicAlgorithm"]],
137
+ "value": [data["$hash"]["value"]]
138
+ }
services/json_generator.py ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import tempfile
3
+ from datetime import datetime
4
+ from utils.validation import validate_obligatory_fields
5
+
6
+ def generate_json(
7
+ # Header
8
+ licensing, formatVersion, formatVersionSpecificationUri, reportId, reportDatetime, reportStatus,
9
+ publisher_name, publisher_division, publisher_projectName, publisher_confidentialityLevel, publisher_publicKey,
10
+ # Task
11
+ taskType, taskFamily, taskStage, algorithmName, framework, frameworkVersion, classPath, tuning_method,
12
+ hyperparameterName, hyperparameterValue, quantization, dataType, fileType, volume, volumeUnit, items,
13
+ shape_item, nbRequest, nbTokensInput, nbWordsInput, nbTokensOutput, nbWordsOutput, contextWindowSize, cache,
14
+ source, sourceUri, owner, measuredAccuracy, estimatedAccuracy,
15
+ # Measures
16
+ measurementMethod, manufacturer, version, cpuTrackingMode, gpuTrackingMode, averageUtilizationCpu,
17
+ averageUtilizationGpu, serverSideInference, unit, powerCalibrationMeasurement, durationCalibrationMeasurement,
18
+ powerConsumption, measurementDuration, measurementDateTime,
19
+ # System
20
+ os, distribution, distributionVersion,
21
+ # Software
22
+ language, version_software,
23
+ # Infrastructure
24
+ infraType, cloudProvider, cloudInstance, componentName, nbComponent, memorySize, manufacturer_infra, family, series, share,
25
+ # Environment
26
+ country, latitude, longitude, location, powerSupplierType, powerSource, powerSourceCarbonIntensity,
27
+ # Quality
28
+ quality,
29
+ # Hash
30
+ hashAlgorithm, cryptographicAlgorithm, value_hash
31
+ ):
32
+ """Generate JSON data from form inputs."""
33
+ data = {
34
+ "header": {
35
+ "licensing": licensing,
36
+ "formatVersion": formatVersion,
37
+ "formatVersionSpecificationUri": formatVersionSpecificationUri,
38
+ "reportId": reportId,
39
+ "reportDatetime": reportDatetime or datetime.now().isoformat(),
40
+ "reportStatus": reportStatus,
41
+ "publisher": {
42
+ "name": publisher_name,
43
+ "division": publisher_division,
44
+ "projectName": publisher_projectName,
45
+ "confidentialityLevel": publisher_confidentialityLevel,
46
+ "publicKey": publisher_publicKey
47
+ }
48
+ },
49
+ "task": {
50
+ "taskType": taskType,
51
+ "taskFamily": taskFamily,
52
+ "taskStage": taskStage,
53
+ "algorithms": [
54
+ {
55
+ "algorithmName": algorithmName,
56
+ "framework": framework,
57
+ "frameworkVersion": frameworkVersion,
58
+ "classPath": classPath,
59
+ "hyperparameters": {
60
+ "tuning_method": tuning_method,
61
+ "values": [
62
+ {
63
+ "hyperparameterName": hyperparameterName,
64
+ "hyperparameterValue": hyperparameterValue
65
+ }
66
+ ]
67
+ },
68
+ "quantization": quantization
69
+ }
70
+ ],
71
+ "dataset": [
72
+ {
73
+ "dataType": dataType,
74
+ "fileType": fileType,
75
+ "volume": volume,
76
+ "volumeUnit": volumeUnit,
77
+ "items": items,
78
+ "shape": [
79
+ {
80
+ "item": shape_item
81
+ }
82
+ ],
83
+ "inferenceProperties": [
84
+ {
85
+ "nbRequest": nbRequest,
86
+ "parametersNLP": {
87
+ "nbTokensInput": nbTokensInput,
88
+ "nbWordsInput": nbWordsInput,
89
+ "nbTokensOutput": nbTokensOutput,
90
+ "nbWordsOutput": nbWordsOutput,
91
+ "contextWindowSize": contextWindowSize,
92
+ "cache": cache
93
+ }
94
+ }
95
+ ],
96
+ "source": source,
97
+ "sourceUri": sourceUri,
98
+ "owner": owner
99
+ }
100
+ ],
101
+ "measuredAccuracy": measuredAccuracy,
102
+ "estimatedAccuracy": estimatedAccuracy
103
+ },
104
+ "measures": [
105
+ {
106
+ "measurementMethod": measurementMethod,
107
+ "manufacturer": manufacturer,
108
+ "version": version,
109
+ "cpuTrackingMode": cpuTrackingMode,
110
+ "gpuTrackingMode": gpuTrackingMode,
111
+ "averageUtilizationCpu": averageUtilizationCpu,
112
+ "averageUtilizationGpu": averageUtilizationGpu,
113
+ "serverSideInference": serverSideInference,
114
+ "unit": unit,
115
+ "powerCalibrationMeasurement": powerCalibrationMeasurement,
116
+ "durationCalibrationMeasurement": durationCalibrationMeasurement,
117
+ "powerConsumption": powerConsumption,
118
+ "measurementDuration": measurementDuration,
119
+ "measurementDateTime": measurementDateTime
120
+ }
121
+ ],
122
+ "system": {
123
+ "os": os,
124
+ "distribution": distribution,
125
+ "distributionVersion": distributionVersion
126
+ },
127
+ "software": {
128
+ "language": language,
129
+ "version": version_software
130
+ },
131
+ "infrastructure": {
132
+ "infraType": infraType,
133
+ "cloudProvider": cloudProvider,
134
+ "cloudInstance": cloudInstance,
135
+ "components": [
136
+ {
137
+ "componentName": componentName,
138
+ "nbComponent": nbComponent,
139
+ "memorySize": memorySize,
140
+ "manufacturer": manufacturer_infra,
141
+ "family": family,
142
+ "series": series,
143
+ "share": share
144
+ }
145
+ ]
146
+ },
147
+ "environment": {
148
+ "country": country,
149
+ "latitude": latitude,
150
+ "longitude": longitude,
151
+ "location": location,
152
+ "powerSupplierType": powerSupplierType,
153
+ "powerSource": powerSource,
154
+ "powerSourceCarbonIntensity": powerSourceCarbonIntensity
155
+ },
156
+ "quality": quality,
157
+ "$hash": {
158
+ "hashAlgorithm": hashAlgorithm,
159
+ "cryptographicAlgorithm": cryptographicAlgorithm,
160
+ "value": value_hash
161
+ }
162
+ }
163
+
164
+ # Validate obligatory fields
165
+ is_valid, message = validate_obligatory_fields(data)
166
+ if not is_valid:
167
+ return message, None, ""
168
+
169
+ # Create the JSON string
170
+ json_str = json.dumps(data, indent=4)
171
+
172
+ # Create and save the JSON file
173
+ with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json') as f:
174
+ json.dump(data, f, indent=4)
175
+ return message, f.name, json_str
ui/form_components.py ADDED
@@ -0,0 +1,228 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from config import (
3
+ REPORT_STATUS_OPTIONS, CONFIDENTIALITY_LEVELS, DATA_TYPES,
4
+ ACCURACY_LEVELS, MEASUREMENT_UNITS, INFRA_TYPES,
5
+ POWER_SUPPLIER_TYPES, POWER_SOURCES, QUALITY_LEVELS,
6
+ HASH_ALGORITHMS, CRYPTO_ALGORITHMS, CACHE_OPTIONS
7
+ )
8
+
9
+ def create_header_tab():
10
+ """Create the header tab components."""
11
+ with gr.Tab("Header"):
12
+ licensing = gr.Textbox(label="Licensing", info="(the type of licensing applicable for the sharing of the report)")
13
+ formatVersion = gr.Textbox(label="Format Version", info="Required field<br>(the version of the specification of this set of schemas defining the report's fields)")
14
+ formatVersionSpecificationUri = gr.Textbox(label="Format Version Specification URI", info="(the URI of the present specification of this set of schemas)")
15
+ reportId = gr.Textbox(label="Report ID", info="Required field<br>(the unique identifier of this report, preferably as a uuid4 string)")
16
+ reportDatetime = gr.Textbox(label="Report Datetime", info="(the publishing date of this report in format YYYY-MM-DD HH:MM:SS)")
17
+ reportStatus = gr.Dropdown(value=None,
18
+ label="Report Status",
19
+ choices=REPORT_STATUS_OPTIONS,
20
+ info="Required field<br>(the status of this report)"
21
+ )
22
+
23
+ with gr.Accordion("Publisher"):
24
+ publisher_name = gr.Textbox(label="Name", info="(name of the organization)")
25
+ publisher_division = gr.Textbox(label="Division", info="(name of the publishing department within the organization)")
26
+ publisher_projectName = gr.Textbox(label="Project Name", info="(name of the publishing project within the organization)")
27
+ publisher_confidentialityLevel = gr.Dropdown(value=None,
28
+ label="Confidentiality Level",
29
+ choices=CONFIDENTIALITY_LEVELS,
30
+ info="Required field<br>(the confidentiality of the report)"
31
+ )
32
+ publisher_publicKey = gr.Textbox(label="Public Key", info="(the cryptographic public key to check the identity of the publishing organization)")
33
+
34
+ return [
35
+ licensing, formatVersion, formatVersionSpecificationUri, reportId,
36
+ reportDatetime, reportStatus, publisher_name, publisher_division,
37
+ publisher_projectName, publisher_confidentialityLevel, publisher_publicKey
38
+ ]
39
+
40
+ def create_task_tab():
41
+ """Create the task tab components."""
42
+ with gr.Tab("Task"):
43
+ taskType = gr.Textbox(label="Task Type", info="Required field<br>(type of the computing task of machine learning, example : datacreation, preprocessing, supervisedLearning, unsupervisedLearning, semiSupervisedLearning ...)")
44
+ taskFamily = gr.Textbox(label="Task Family", info="Required field<br>(the family of task performed, example : classification, regression, chatbot, summarization, keyword extraction, image recognition...)")
45
+ taskStage = gr.Textbox(label="Task Stage", info="Required field<br>(stage of the task, example: training, finetuning, reinforcement, inference, rag...)")
46
+
47
+ with gr.Accordion("Algorithms"):
48
+ algorithmName = gr.Textbox(label="Algorithm Name", info="Required field<br>(the case-sensitive common name of the algorithm, example: randomForest, svm, xgboost...)")
49
+ framework = gr.Textbox(label="Framework", info="(the common name of the software framework implementing the algorithm)")
50
+ frameworkVersion = gr.Textbox(label="Framework Version", info="(the version of the software framework)")
51
+ classPath = gr.Textbox(label="Class Path", info="(the full class path of the algorithm within the framework)")
52
+ tuning_method = gr.Textbox(label="Tuning Method", info="(the method of hyperparameters tuning used (if any), example: gridSearch, randomizedSearch...)")
53
+
54
+ with gr.Accordion("Hyperparameters"):
55
+ with gr.Row():
56
+ hyperparameterName = gr.Textbox(label="Hyperparameter Name", info="(the name of the hyperparameter, example: c, kernel, gamma, class_weight...)")
57
+ hyperparameterValue = gr.Textbox(label="Hyperparameter Value", info="(the value of the hyperparameter, example: rbf, 1e-4, 10, linear...)")
58
+
59
+ quantization = gr.Textbox(label="Quantization", info="(the data weights (in bits) obtained thanks to the quantization, example: 2, 8, 16...)")
60
+
61
+ with gr.Accordion("Dataset"):
62
+ dataType = gr.Dropdown(value=None,
63
+ label="Data Type",
64
+ choices=DATA_TYPES,
65
+ info="Required field<br>(the nature of the data)"
66
+ )
67
+ fileType = gr.Textbox(label="File Type", info="(the file type of the dataset)")
68
+ volume = gr.Textbox(label="Volume", info="Required field<br>(the size of the dataset)")
69
+ volumeUnit = gr.Textbox(label="Volume Unit", info="Required field<br>(the unit of the size)")
70
+ items = gr.Textbox(label="Items", info="(the number of items in the dataset)")
71
+ shape_item = gr.Textbox(label="Shape Item", info="(the shape of each dataset item)")
72
+
73
+ with gr.Accordion("Inference Properties"):
74
+ nbRequest = gr.Textbox(label="Number of Requests", info="Required field<br>(the number of requests the measure corresponds to)")
75
+ nbTokensInput = gr.Textbox(label="Number of Tokens Input", info="(the number of tokens in the input)")
76
+ nbWordsInput = gr.Textbox(label="Number of Words Input", info="(the number of words in the input)")
77
+ nbTokensOutput = gr.Textbox(label="Number of Tokens Output", info="(the number of tokens in the output)")
78
+ nbWordsOutput = gr.Textbox(label="Number of Words Output", info="(the number of words in the output)")
79
+ contextWindowSize = gr.Textbox(label="Context Window Size", info="(the number of tokens kept in memory)")
80
+ cache = gr.Dropdown(value=None,
81
+ label="Cache",
82
+ choices=CACHE_OPTIONS,
83
+ info="(the presence of a cache function)"
84
+ )
85
+
86
+ source = gr.Textbox(label="Source", info="(the kind of source of the dataset)")
87
+ sourceUri = gr.Textbox(label="Source URI", info="(the URI of the dataset)")
88
+ owner = gr.Textbox(label="Owner", info="(the owner of the dataset)")
89
+
90
+ with gr.Row():
91
+ measuredAccuracy = gr.Textbox(label="Measured Accuracy", info="(the measured accuracy of your model (between 0 and 1))")
92
+ estimatedAccuracy = gr.Dropdown(value=None,
93
+ label="Estimated Accuracy",
94
+ choices=ACCURACY_LEVELS,
95
+ info="(estimated accuracy assessment)"
96
+ )
97
+
98
+ return [
99
+ taskType, taskFamily, taskStage, algorithmName, framework,
100
+ frameworkVersion, classPath, tuning_method, hyperparameterName,
101
+ hyperparameterValue, quantization, dataType, fileType, volume,
102
+ volumeUnit, items, shape_item, nbRequest, nbTokensInput,
103
+ nbWordsInput, nbTokensOutput, nbWordsOutput, contextWindowSize,
104
+ cache, source, sourceUri, owner, measuredAccuracy, estimatedAccuracy
105
+ ]
106
+
107
+ def create_measures_tab():
108
+ """Create the measures tab components."""
109
+ with gr.Tab("Measures"):
110
+ measurementMethod = gr.Textbox(label="Measurement Method", info="Required field<br>(the method used to perform the energy or FLOPS measure)")
111
+ manufacturer = gr.Textbox(label="Manufacturer", info="(the builder of the measuring tool)")
112
+ version = gr.Textbox(label="Version", info="(the version of the measuring tool)")
113
+ cpuTrackingMode = gr.Textbox(label="CPU Tracking Mode", info="(the method used to track CPU consumption)")
114
+ gpuTrackingMode = gr.Textbox(label="GPU Tracking Mode", info="(the method used to track GPU consumption)")
115
+ averageUtilizationCpu = gr.Textbox(label="Average Utilization CPU", info="(the average percentage of CPU use)")
116
+ averageUtilizationGpu = gr.Textbox(label="Average Utilization GPU", info="(the average percentage of GPU use)")
117
+ serverSideInference = gr.Textbox(label="Server Side Inference", info="(inference server consumption estimation)")
118
+ unit = gr.Dropdown(value=None,
119
+ label="Unit",
120
+ choices=MEASUREMENT_UNITS,
121
+ info="Required field<br>(the unit of power consumption measure)"
122
+ )
123
+ powerCalibrationMeasurement = gr.Textbox(label="Power Calibration Measurement", info="(power consumed during calibration)")
124
+ durationCalibrationMeasurement = gr.Textbox(label="Duration Calibration Measurement", info="(duration of calibration in seconds)")
125
+ powerConsumption = gr.Textbox(label="Power Consumption", info="Required field<br>(the power consumption measure)")
126
+ measurementDuration = gr.Textbox(label="Measurement Duration", info="(the duration of measurement in seconds)")
127
+ measurementDateTime = gr.Textbox(label="Measurement DateTime", info="(when measurement began)")
128
+
129
+ return [
130
+ measurementMethod, manufacturer, version, cpuTrackingMode,
131
+ gpuTrackingMode, averageUtilizationCpu, averageUtilizationGpu,
132
+ serverSideInference, unit, powerCalibrationMeasurement,
133
+ durationCalibrationMeasurement, powerConsumption,
134
+ measurementDuration, measurementDateTime
135
+ ]
136
+
137
+ def create_system_tab():
138
+ """Create the system tab components."""
139
+ with gr.Tab("System"):
140
+ os = gr.Textbox(label="OS", info="Required field<br>(name of the operating system)")
141
+ distribution = gr.Textbox(label="Distribution", info="(distribution of the operating system)")
142
+ distributionVersion = gr.Textbox(label="Distribution Version", info="(distribution version)")
143
+
144
+ return [os, distribution, distributionVersion]
145
+
146
+ def create_software_tab():
147
+ """Create the software tab components."""
148
+ with gr.Tab("Software"):
149
+ language = gr.Textbox(label="Language", info="Required field<br>(programming language information)")
150
+ version_software = gr.Textbox(label="Version", info="(version of the programming language)")
151
+
152
+ return [language, version_software]
153
+
154
+ def create_infrastructure_tab():
155
+ """Create the infrastructure tab components."""
156
+ with gr.Tab("Infrastructure"):
157
+ infraType = gr.Dropdown(value=None,
158
+ label="Infrastructure Type",
159
+ choices=INFRA_TYPES,
160
+ info="Required field<br>(the type of infrastructure used)"
161
+ )
162
+ cloudProvider = gr.Textbox(label="Cloud Provider", info="(name of your cloud provider)")
163
+ cloudInstance = gr.Textbox(label="Cloud Instance", info="(name of your cloud instance)")
164
+ componentName = gr.Textbox(label="Component Name", info="Required field<br>(type of subsystem part)")
165
+ nbComponent = gr.Textbox(label="Number of Components", info="Required field<br>(number of items of this component)")
166
+ memorySize = gr.Textbox(label="Memory Size", info="(size of memory in Gbytes)")
167
+ manufacturer_infra = gr.Textbox(label="Manufacturer", info="(name of the manufacturer)")
168
+ family = gr.Textbox(label="Family", info="(family of this component)")
169
+ series = gr.Textbox(label="Series", info="(series of this component)")
170
+ share = gr.Textbox(label="Share", info="(percentage of equipment used)")
171
+
172
+ return [
173
+ infraType, cloudProvider, cloudInstance, componentName,
174
+ nbComponent, memorySize, manufacturer_infra, family,
175
+ series, share
176
+ ]
177
+
178
+ def create_environment_tab():
179
+ """Create the environment tab components."""
180
+ with gr.Tab("Environment"):
181
+ country = gr.Textbox(label="Country", info="Required field")
182
+ latitude = gr.Textbox(label="Latitude")
183
+ longitude = gr.Textbox(label="Longitude")
184
+ location = gr.Textbox(label="Location")
185
+ powerSupplierType = gr.Dropdown(value=None,
186
+ label="Power Supplier Type",
187
+ choices=POWER_SUPPLIER_TYPES,
188
+ info="(the type of power supplier)"
189
+ )
190
+ powerSource = gr.Dropdown(value=None,
191
+ label="Power Source",
192
+ choices=POWER_SOURCES,
193
+ info="(the source of power)"
194
+ )
195
+ powerSourceCarbonIntensity = gr.Textbox(label="Power Source Carbon Intensity")
196
+
197
+ return [
198
+ country, latitude, longitude, location,
199
+ powerSupplierType, powerSource, powerSourceCarbonIntensity
200
+ ]
201
+
202
+ def create_quality_tab():
203
+ """Create the quality tab components."""
204
+ with gr.Tab("Quality"):
205
+ quality = gr.Dropdown(value=None,
206
+ label="Quality",
207
+ choices=QUALITY_LEVELS,
208
+ info="(the quality of the information provided)"
209
+ )
210
+
211
+ return [quality]
212
+
213
+ def create_hash_tab():
214
+ """Create the hash tab components."""
215
+ with gr.Tab("Hash"):
216
+ hashAlgorithm = gr.Dropdown(value=None,
217
+ label="Hash Algorithm",
218
+ choices=HASH_ALGORITHMS,
219
+ info="Required field<br>(the hash function to apply)"
220
+ )
221
+ cryptographicAlgorithm = gr.Dropdown(value=None,
222
+ label="Cryptographic Algorithm",
223
+ choices=CRYPTO_ALGORITHMS,
224
+ info="Required field<br>(the public key function to apply)"
225
+ )
226
+ value_hash = gr.Textbox(label="Value", info="Required field<br>(encrypted value of the hash)")
227
+
228
+ return [hashAlgorithm, cryptographicAlgorithm, value_hash]
utils/validation.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from config import OBLIGATORY_FIELDS
2
+
3
+ def validate_obligatory_fields(data):
4
+ """Validate that all required fields are present in the data."""
5
+ def find_field(d, field):
6
+ if field in d:
7
+ return d[field]
8
+ for k, v in d.items():
9
+ if isinstance(v, dict):
10
+ result = find_field(v, field)
11
+ if result is not None:
12
+ return result
13
+ elif isinstance(v, list):
14
+ for item in v:
15
+ if isinstance(item, dict):
16
+ result = find_field(item, field)
17
+ if result is not None:
18
+ return result
19
+ return None
20
+
21
+ missing_fields = []
22
+ for field in OBLIGATORY_FIELDS:
23
+ value = find_field(data, field)
24
+ if not value and value != 0: # Allow 0 as a valid value
25
+ missing_fields.append(field)
26
+
27
+ if missing_fields:
28
+ return False, f"The following fields are required: {', '.join(missing_fields)}"
29
+ return True, "All required fields are filled."