qqubb commited on
Commit
e733251
·
1 Parent(s): 7b1b2a5

input and analysis via UI working

Browse files
__pycache__/compliance_analysis.cpython-310.pyc CHANGED
Binary files a/__pycache__/compliance_analysis.cpython-310.pyc and b/__pycache__/compliance_analysis.cpython-310.pyc differ
 
__pycache__/utils.cpython-310.pyc CHANGED
Binary files a/__pycache__/utils.cpython-310.pyc and b/__pycache__/utils.cpython-310.pyc differ
 
app.py CHANGED
@@ -2,169 +2,177 @@ import os
2
  import yaml
3
  import json
4
  from pathlib import Path
5
- from compliance_analysis import check_overall_compliance
6
-
7
- # if __name__ == "__main__":
8
- # demo.launch()
9
-
10
  import streamlit as st
11
- import yaml
12
- from pathlib import Path
13
- import pandas as pd
14
-
15
-
16
- def load_data(files):
17
- cards = []
18
- for file in files:
19
- content = file.read().decode("utf-8")
20
- if Path(file.name).name == "project_cc.yaml":
21
- project_cc_yaml = yaml.safe_load(content)
22
- data = project_cc_yaml
23
- card_type = "project"
24
- cards.append((card_type, data))
25
- if Path(file.name).name == "data_cc.yaml":
26
- data_cc_yaml = yaml.safe_load(content)
27
- data = data_cc_yaml
28
- card_type = "data"
29
- cards.append((card_type, data))
30
- if Path(file.name).name == "model_cc.yaml":
31
- model_cc_yaml = yaml.safe_load(content)
32
- data = model_cc_yaml
33
- card_type = "model"
34
- cards.append((card_type, data))
35
- return cards
36
-
37
- # def process_files(files):
38
- # results = []
39
- # for file in files:
40
- # content = file.read().decode("utf-8")
41
- # if Path(file.name).name == "project_cc.yaml":
42
- # project_cc_yaml = yaml.safe_load(content)
43
- # if project_cc_yaml:
44
- # msg = run_compliance_analysis_on_project(project_cc_yaml)
45
- # results.append(msg)
46
- # return results
47
-
48
- # def process_files(data):
49
- # results = []
50
- # msg = run_compliance_analysis_on_project(yaml.safe_load(data))
51
- # results.append(msg)
52
- # return results
53
-
54
- # def extract_properties(data):
55
-
56
- # flattened_data = []
57
-
58
- # for category, items in data.items():
59
- # for item, attributes in items.items():
60
- # flattened_data.append({
61
- # "Category": category,
62
- # "Item": item,
63
- # "Verbose": attributes["verbose"],
64
- # "Value": attributes["value"]
65
- # })
66
- # df = pd.DataFrame(flattened_data)
67
-
68
- # return df
69
-
70
- def gather_cards(files):
71
- cards = {}
72
- cards['project_file'] = ''
73
- cards['data_files'] = []
74
- cards['model_files'] = []
75
- for file in files:
76
- file_path = os.path.join('/tmp', file.name)
77
- with open(file_path, 'wb') as f:
78
- f.write(file.getbuffer())
79
- with open(file_path, 'r') as file_path:
80
- content = yaml.safe_load(file_path.read())
81
- if content['card_type'] == 'project':
82
- cards['project_file'] = file_path.name
83
- if content['card_type'] == "data":
84
- cards['data_files'].append(file_path.name)
85
- if content['card_type'] == "model":
86
- cards['model_files'].append(file_path.name)
87
- return cards
88
 
89
  def compliance_analysis(cards):
90
- results = []
91
- dispositive_variables = check_overall_compliance(cards)
92
- results.append(dispositive_variables)#['msg'])
93
- return results
 
 
94
 
 
 
 
95
  # Streamlit app
96
- # st.set_page_config(page_title="AI", layout="wide")
97
- # st.markdown(
98
- # """
99
- # <style>
100
- # [data-testid="stSidebar"][aria-expanded="true"] > div:first-child{
101
- # width: 600px;
102
- # }
103
- # [data-testid="stSidebar"][aria-expanded="false"] > div:first-child{
104
- # width: 600px;
105
- # margin-left: -400px;
106
- # }
107
 
108
- # """,
109
- # unsafe_allow_html=True,
110
- # )
111
 
112
  st.title("AI")
113
 
114
  uploaded_files = st.file_uploader("Upload YAML Files", type="yaml", accept_multiple_files=True)
115
  # project_files = st.file_uploader("Upload Project Files", type="yaml", accept_multiple_files=True)
116
 
 
 
117
  if uploaded_files:
118
 
119
- cards = load_data(uploaded_files)
120
- for card in cards:
121
-
122
- data = card[1]
123
-
124
- if data != None:
 
 
 
125
 
126
- st.title("Compliance Checkboxes")
127
- st.title(f"{card[0]}")
128
 
129
- for section, items in data.items():
130
- if section != 'card_type':
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  st.header(section.replace('_', ' ').title()) # section header
132
  for key, details in items.items():
133
  if 'verbose' in details and 'value' in details:
134
  st.subheader(key.replace('_', ' ').title()) # section header
135
  # details['value'] = st.checkbox(details['verbose'], value=details['value'])
136
  if isinstance(details['value'], str):
137
- details['value'] = st.text_input(details['verbose'], value=details['value'])
138
  elif isinstance(details['value'], bool):
139
- details['value'] = st.checkbox(details['verbose'], value=details['value'])
140
  if 'verbose' not in details and 'value' not in details:
141
  st.subheader(key.replace('_', ' ').title()) # section header
142
  for key, details in details.items():
143
  st.subheader(key.replace('_', ' ').title()) # section header
144
- details['value'] = st.checkbox(details['verbose'], value=details['value'])
145
- # st.divider()
146
- # st.divider()
147
- # st.write("Updated Data:", data)
148
-
149
- yaml_data = yaml.dump(data, sort_keys=False)
150
-
151
- # st.download_button(
152
- # label=f"Download Updated Data as YAML{card[0]}",
153
- # data=yaml_data,
154
- # file_name="updated_data.yaml",
155
- # mime="text/yaml"
156
- # )
157
-
158
- # json_data = json.dumps(data, indent=2)
159
- # st.download_button(
160
- # label="Download Updated Data as JSON",
161
- # data=json_data,
162
- # file_name="updated_data.json",
163
- # mime="application/json"
164
- # )
165
-
166
- cards = gather_cards(uploaded_files)
167
- if st.button(f"Run Analysis"):
168
- results = compliance_analysis(cards)
169
- # st.text_area("Analysis Results", value=json.dumps(results, indent=4), height=600)
170
- st.write("Analysis Results", results)
 
2
  import yaml
3
  import json
4
  from pathlib import Path
 
 
 
 
 
5
  import streamlit as st
6
+ from compliance_analysis import check_overall_compliance_ui
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
  def compliance_analysis(cards):
9
+ dispositive_variables = check_overall_compliance_ui(cards)
10
+ return dispositive_variables
11
+
12
+ def load_yaml(file_path):
13
+ with open(file_path, 'r') as file:
14
+ return yaml.safe_load(file)
15
 
16
+ def format_card_label(card):
17
+ return card[0]
18
+
19
  # Streamlit app
20
+ st.set_page_config(page_title="AI", layout="wide")
21
+ st.markdown(
22
+ """
23
+ <style>
24
+ [data-testid="stSidebar"][aria-expanded="true"] > div:first-child{
25
+ width: 600px;
26
+ }
27
+ [data-testid="stSidebar"][aria-expanded="false"] > div:first-child{
28
+ width: 600px;
29
+ margin-left: -400px;
30
+ }
31
 
32
+ """,
33
+ unsafe_allow_html=True,
34
+ )
35
 
36
  st.title("AI")
37
 
38
  uploaded_files = st.file_uploader("Upload YAML Files", type="yaml", accept_multiple_files=True)
39
  # project_files = st.file_uploader("Upload Project Files", type="yaml", accept_multiple_files=True)
40
 
41
+ cards = {"project_file": None, "data_files": [], "model_files": []}
42
+
43
  if uploaded_files:
44
 
45
+ for uploaded_file in uploaded_files:
46
+ cc = load_yaml(uploaded_file.name)
47
+ card_type = cc['card_details'].get('card_type', '').lower()
48
+ if card_type == 'project':
49
+ cards["project_file"] = cc
50
+ elif card_type == 'data':
51
+ cards["data_files"].append((cc['card_details']['card_label'], cc))
52
+ elif card_type == 'model':
53
+ cards["model_files"].append((cc['card_details']['card_label'], cc))
54
 
55
+ project_col, data_col, model_col = st.columns(3)
 
56
 
57
+ with project_col:
58
+ st.title("Project CC")
59
+
60
+ if cards["project_file"]:
61
+ project_cc = cards["project_file"]
62
+
63
+ for section, items in project_cc.items():
64
+ if section != 'card_details':
65
+ st.header(section.replace('_', ' ').title()) # section header
66
+ for key, details in items.items():
67
+ if 'verbose' in details and 'value' in details:
68
+ st.subheader(key.replace('_', ' ').title()) # section header
69
+ # details['value'] = st.checkbox(details['verbose'], value=details['value'])
70
+ if isinstance(details['value'], str):
71
+ details['value'] = st.text_input(details['verbose'], value=details['value'])
72
+ elif isinstance(details['value'], bool):
73
+ details['value'] = st.checkbox(details['verbose'], value=details['value'])
74
+ if 'verbose' not in details and 'value' not in details:
75
+ st.subheader(key.replace('_', ' ').title()) # section header
76
+ for key, details in details.items():
77
+ st.subheader(key.replace('_', ' ').title()) # section header
78
+ details['value'] = st.checkbox(details['verbose'], value=details['value'])
79
+ st.divider()
80
+ st.divider()
81
+ # st.write("Updated Data:", project_cc)
82
+
83
+ updated_project_cc = yaml.dump(project_cc, sort_keys=False)
84
+
85
+ st.download_button(
86
+ label=f"Download Updated Project CC as YAML",
87
+ data=updated_project_cc,
88
+ file_name="updated_project.yaml",
89
+ mime="text/yaml"
90
+ )
91
+
92
+ with data_col:
93
+
94
+ st.title("Data CC")
95
+ if cards['data_files']:
96
+ # selected_data_file = st.selectbox("Select a Data CC", cards['data_files'], format_func=format_card_label)
97
+ # data_cc = selected_data_file[1]
98
+ for card in cards['data_files']:
99
+ data_cc = card[1]
100
+ st.title(f"{card[0]}")
101
+ for section, items in data_cc.items():
102
+ if section != 'card_details':
103
+ st.header(section.replace('_', ' ').title()) # section header
104
+ for key, details in items.items():
105
+ if 'verbose' in details and 'value' in details:
106
+ st.subheader(key.replace('_', ' ').title()) # section header
107
+ # details['value'] = st.checkbox(details['verbose'], value=details['value'])
108
+ if isinstance(details['value'], str):
109
+ details['value'] = st.text_input(details['verbose'], value=details['value'], key=f"data_{card[0]}_{key}")
110
+ elif isinstance(details['value'], bool):
111
+ details['value'] = st.checkbox(details['verbose'], value=details['value'], key=f"data_{card[0]}_{details}_{key}")
112
+ if 'verbose' not in details and 'value' not in details:
113
+ st.subheader(key.replace('_', ' ').title()) # section header
114
+ for key, details in details.items():
115
+ st.subheader(key.replace('_', ' ').title()) # section header
116
+ details['value'] = st.checkbox(details['verbose'], value=details['value'], key=f"data_{card[0]}_{details}_{key}")
117
+ st.divider()
118
+ st.divider()
119
+ # st.write("Updated Data:", data_cc)
120
+
121
+ data_cc_yaml_data = yaml.dump(data_cc, sort_keys=False)
122
+
123
+ st.download_button(
124
+ label=f"Download Updated {card[0]} CC as YAML",
125
+ data=data_cc_yaml_data,
126
+ file_name="updated_data.yaml",
127
+ mime="text/yaml"
128
+ )
129
+
130
+ with model_col:
131
+
132
+ st.title("Model CC")
133
+ if cards['model_files']:
134
+ # selected_data_file = st.selectbox("Select a Modle CC", cards['model_files'], format_func=format_card_label)
135
+ # model_cc = selected_data_file[1]
136
+ for card in cards['model_files']:
137
+ model_cc = card[1]
138
+ st.title(f"{card[0]}")
139
+ for section, items in model_cc.items():
140
+ if section != 'card_details':
141
  st.header(section.replace('_', ' ').title()) # section header
142
  for key, details in items.items():
143
  if 'verbose' in details and 'value' in details:
144
  st.subheader(key.replace('_', ' ').title()) # section header
145
  # details['value'] = st.checkbox(details['verbose'], value=details['value'])
146
  if isinstance(details['value'], str):
147
+ details['value'] = st.text_input(details['verbose'], value=details['value'], key=f"model_{card[0]}_{key}")
148
  elif isinstance(details['value'], bool):
149
+ details['value'] = st.checkbox(details['verbose'], value=details['value'], key=f"model_{card[0]}_{details}_{key}")
150
  if 'verbose' not in details and 'value' not in details:
151
  st.subheader(key.replace('_', ' ').title()) # section header
152
  for key, details in details.items():
153
  st.subheader(key.replace('_', ' ').title()) # section header
154
+ details['value'] = st.checkbox(details['verbose'], value=details['value'], key=f"model_{card[0]}_{details}_{key}")
155
+ st.divider()
156
+ st.divider()
157
+ # st.write("Updated Data:", model_cc)
158
+
159
+ model_cc_yaml_data = yaml.dump(model_cc, sort_keys=False)
160
+
161
+ st.download_button(
162
+ label=f"Download Updated {card[0]} CC as YAML",
163
+ data=model_cc_yaml_data,
164
+ file_name="updated_model.yaml",
165
+ mime="text/yaml"
166
+ )
167
+
168
+ # # # # json_data = json.dumps(data, indent=2)
169
+ # # # # st.download_button(
170
+ # # # # label="Download Updated Data as JSON",
171
+ # # # # data=json_data,
172
+ # # # # file_name="updated_data.json",
173
+ # # # # mime="application/json"
174
+ # # # # )
175
+
176
+ if st.button(f"Run Analysis"):
177
+ results = compliance_analysis(cards)
178
+ st.write("Analysis Results", results)
 
 
compliance_analysis.py CHANGED
@@ -1,36 +1,86 @@
1
  import yaml
2
- from utils import set_operator_role_and_location, set_eu_market_status, check_within_scope_cc, check_within_scope_act, check_prohibited
3
 
4
  # TODO tells the user where the compliance analysis failed
5
  # TODO cite article from yaml file as explanation
6
 
7
- def check_overall_compliance(cards):
8
-
 
 
9
  dispositive_variables = {
10
  "ai_project_type": {
11
- "ai_system": False,
12
- "gpai_model": True,
13
- "high_risk_ai_system": True,
14
  "gpai_model_systemic_risk": False
15
  },
16
  "operator_details": {
17
- "provider": False,
18
- "eu_located": False,
19
- "output_used": False
20
  },
21
  "eu_market_status": {
22
- "placed_on_market": False,
23
- "put_into_service": False
24
  },
25
- "intended_purposes": [],
26
  "project_cc_pass": False,
27
  "data_cc_pass": False,
28
  "model_cc_pass": False,
29
  "msg": []
30
  }
31
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  with open(cards['project_file'], 'r') as project_filepath:
 
33
  project_cc = yaml.safe_load(project_filepath.read())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
  # check intended purposes
36
  for card in cards['data_files']:
@@ -92,21 +142,23 @@ def run_compliance_analysis_on_project(dispositive_variables, project_cc_yaml):
92
  dispositive_variables = set_eu_market_status(dispositive_variables, project_cc_yaml)
93
 
94
  # Check if project is within scope of the Compliance Cards project. If not, inform user.
95
- if check_within_scope_cc(dispositive_variables, project_cc_yaml):
96
  dispositive_variables['msg'].append("Project is within the scope of the Compliance Cards system. Let's continue...")
97
  else:
98
  dispositive_variables['msg'].append("Project is not within the scope of the initial version of the Compliance Cards system.")
 
99
 
100
  # Check if the project is within scope of the Act. If it's not, the analysis is over.
101
  if check_within_scope_act(dispositive_variables, project_cc_yaml):
102
  dispositive_variables['msg'].append("Project is within the scope of Act. Let's continue...")
103
  else:
104
  dispositive_variables['msg'].append("Project is not within the scope of what is regulated by the Act.")
 
105
 
106
  # Check for prohibited practices. If any exist, the analysis is over.
107
  if check_prohibited(project_cc_yaml) == True:
108
- print("Project contains prohibited practices and is therefore non-compliant.")
109
  dispositive_variables['msg'].append("Project is non-compliant due to a prohibited practice.")
 
110
  else:
111
  print("Project does not contain prohibited practies. Let's continue...")
112
 
@@ -223,7 +275,7 @@ def check_intended_purpose(dispositive_variables, project_cc, other_cc):
223
 
224
  # For each Data CC, put the intended uses in a set and then make sure the Project's intended use is in the set
225
 
226
- if other_cc['card_type'] == 'data':
227
  data_cc = other_cc
228
  for key in data_cc['intended_purpose']:
229
  if data_cc['intended_purpose'][f'{key}']['value']:
@@ -235,7 +287,7 @@ def check_intended_purpose(dispositive_variables, project_cc, other_cc):
235
 
236
  # Now do the exact same thing for all models
237
 
238
- if other_cc['card_type'] == 'model':
239
  model_cc = other_cc
240
  for key in model_cc['intended_purpose']:
241
  if model_cc['intended_purpose'][f'{key}']['value']:
@@ -245,7 +297,7 @@ def check_intended_purpose(dispositive_variables, project_cc, other_cc):
245
  if purpose not in model_intended_purposes:
246
  dispositive_variables['msg'].append(f"You are not compliant because {purpose} is not a valid purpose for the model")
247
 
248
- dispositive_variables['intended_purposes'] = project_intended_purposes
249
 
250
  return dispositive_variables
251
 
 
1
  import yaml
2
+ from utils import set_operator_role_and_location, set_eu_market_status, check_within_scope_act, check_prohibited
3
 
4
  # TODO tells the user where the compliance analysis failed
5
  # TODO cite article from yaml file as explanation
6
 
7
+ def check_overall_compliance_ui(cards):
8
+
9
+ project_cc = cards['project_file']
10
+
11
  dispositive_variables = {
12
  "ai_project_type": {
13
+ "ai_system": project_cc['ai_system']['ai_system']['value'],
14
+ "gpai_model": project_cc['gpai_model']['gpai_model']['value'],
15
+ "high_risk_ai_system": False,
16
  "gpai_model_systemic_risk": False
17
  },
18
  "operator_details": {
19
+ "provider": project_cc['operator_details']['provider']['value'],
20
+ "eu_located": project_cc['operator_details']['eu_located']['value'],
21
+ "output_used": project_cc['operator_details']['output_used']['value']
22
  },
23
  "eu_market_status": {
24
+ "placed_on_market": project_cc['eu_market_status']['placed_on_market']['value'],
25
+ "put_into_service": project_cc['eu_market_status']['put_into_service']['value']
26
  },
27
+ "project_intended_purposes": [],
28
  "project_cc_pass": False,
29
  "data_cc_pass": False,
30
  "model_cc_pass": False,
31
  "msg": []
32
  }
33
+
34
+ # check intended purposes
35
+ for card in cards['data_files']:
36
+ data_cc = card[1]
37
+ dispositive_variables = check_intended_purpose(dispositive_variables, project_cc, data_cc)
38
+
39
+ for card in cards['model_files']:
40
+ model_cc = card[1]
41
+ dispositive_variables = check_intended_purpose(dispositive_variables, project_cc, model_cc)
42
+
43
+ # for each model_cc and data_cc - run analysis with ref to project_cc
44
+ dispositive_variables = run_compliance_analysis_on_project(dispositive_variables, project_cc)
45
+
46
+ for card in cards['data_files']:
47
+ data_cc = card[1]
48
+ dispositive_variables = run_compliance_analysis_on_data(dispositive_variables, data_cc)
49
+
50
+ for card in cards['model_files']:
51
+ model_cc = card[1]
52
+ dispositive_variables = run_compliance_analysis_on_model(dispositive_variables, model_cc)
53
+
54
+ return dispositive_variables
55
+
56
+ def check_overall_compliance(cards):
57
+
58
  with open(cards['project_file'], 'r') as project_filepath:
59
+ print(project_filepath)
60
  project_cc = yaml.safe_load(project_filepath.read())
61
+
62
+ dispositive_variables = {
63
+ "ai_project_type": {
64
+ "ai_system": project_cc['ai_system']['ai_system']['value'],
65
+ "gpai_model": project_cc['gpai_model']['gpai_model']['value'],
66
+ "high_risk_ai_system": False,
67
+ "gpai_model_systemic_risk": False
68
+ },
69
+ "operator_details": {
70
+ "provider": project_cc['operator_details']['provider']['value'],
71
+ "eu_located": project_cc['operator_details']['eu_located']['value'],
72
+ "output_used": project_cc['operator_details']['output_used']['value']
73
+ },
74
+ "eu_market_status": {
75
+ "placed_on_market": project_cc['eu_market_status']['placed_on_market']['value'],
76
+ "put_into_service": project_cc['eu_market_status']['put_into_service']['value']
77
+ },
78
+ "project_intended_purposes": [],
79
+ "project_cc_pass": False,
80
+ "data_cc_pass": False,
81
+ "model_cc_pass": False,
82
+ "msg": []
83
+ }
84
 
85
  # check intended purposes
86
  for card in cards['data_files']:
 
142
  dispositive_variables = set_eu_market_status(dispositive_variables, project_cc_yaml)
143
 
144
  # Check if project is within scope of the Compliance Cards project. If not, inform user.
145
+ if project_cc_yaml['operator_details']['provider']['value'] == True:
146
  dispositive_variables['msg'].append("Project is within the scope of the Compliance Cards system. Let's continue...")
147
  else:
148
  dispositive_variables['msg'].append("Project is not within the scope of the initial version of the Compliance Cards system.")
149
+ return dispositive_variables
150
 
151
  # Check if the project is within scope of the Act. If it's not, the analysis is over.
152
  if check_within_scope_act(dispositive_variables, project_cc_yaml):
153
  dispositive_variables['msg'].append("Project is within the scope of Act. Let's continue...")
154
  else:
155
  dispositive_variables['msg'].append("Project is not within the scope of what is regulated by the Act.")
156
+ return dispositive_variables
157
 
158
  # Check for prohibited practices. If any exist, the analysis is over.
159
  if check_prohibited(project_cc_yaml) == True:
 
160
  dispositive_variables['msg'].append("Project is non-compliant due to a prohibited practice.")
161
+ return dispositive_variables
162
  else:
163
  print("Project does not contain prohibited practies. Let's continue...")
164
 
 
275
 
276
  # For each Data CC, put the intended uses in a set and then make sure the Project's intended use is in the set
277
 
278
+ if other_cc['card_details']['card_type'] == 'data':
279
  data_cc = other_cc
280
  for key in data_cc['intended_purpose']:
281
  if data_cc['intended_purpose'][f'{key}']['value']:
 
287
 
288
  # Now do the exact same thing for all models
289
 
290
+ if other_cc['card_details']['card_type'] == 'model':
291
  model_cc = other_cc
292
  for key in model_cc['intended_purpose']:
293
  if model_cc['intended_purpose'][f'{key}']['value']:
 
297
  if purpose not in model_intended_purposes:
298
  dispositive_variables['msg'].append(f"You are not compliant because {purpose} is not a valid purpose for the model")
299
 
300
+ dispositive_variables['project_intended_purposes'] = project_intended_purposes
301
 
302
  return dispositive_variables
303
 
data_cc 02.yaml ADDED
@@ -0,0 +1,228 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ card_details:
2
+ card_type: "data" # "project", "data" or "model"
3
+ card_label: "data_02"
4
+
5
+ # Metadata related to intended purpose(s) of data
6
+
7
+ intended_purpose:
8
+ safety_component:
9
+ article: 'Art. 6(1)(a)'
10
+ verbose: 'This dataset is appropriate to use for AI projects involving product safety components'
11
+ value: !!bool true
12
+ product_regulated_machinery:
13
+ article: 'Art. 6(1)(b); Annex I'
14
+ verbose: 'This dataset is appropriate to use for AI projects involving products covered by Directive 2006/42/EC of the European Parliament and of the Council of 17 May 2006 on machinery, and amending Directive 95/16/EC (OJ L 157, 9.6.2006, p. 24) [as repealed by the Machinery Regulation]'
15
+ value: !!bool false
16
+ product_regulated_toy:
17
+ article: 'Art. 6(1)(b); Annex I'
18
+ verbose: 'This dataset is appropriate to use for AI projects involving products covered by Directive 2009/48/EC of the European Parliament and of the Council of 18 June 2009 on the safety of toys (OJ L 170, 30.6.2009, p. 1)'
19
+ value: !!bool false
20
+ product_regulated_watercraft:
21
+ article: 'Art. 6(1)(b); Annex I'
22
+ verbose: 'This dataset is appropriate to use for AI projects involving products covered by Directive 2013/53/EU of the European Parliament and of the Council of 20 November 2013 on recreational craft and personal watercraft and repealing Directive 94/25/EC (OJ L 354, 28.12.2013, p. 90)'
23
+ value: !!bool false
24
+ biometric_categorization:
25
+ article: 'Art. 6(2); Annex III(1)(b)'
26
+ verbose: 'This dataset is appropriate to use for AI projects involving biometric categorisation, according to sensitive or protected attributes or characteristics based on the inference of those attributes or characteristics'
27
+ value: !!bool false
28
+ emotion_recognition:
29
+ article: 'Art. 6(2); Annex III(1)(c)'
30
+ verbose: 'This dataset is appropriate to use for AI projects involving emotion recognition'
31
+ value: !!bool true
32
+ critical_infrastructure:
33
+ article: 'Art. 6(2); Annex III(2)'
34
+ verbose: 'This dataset is appropriate to use for AI projects involving safety components in the management and operation of critical digital infrastructure, road traffic, or in the supply of water, gas, heating or electricity'
35
+ value: !!bool true
36
+ admission:
37
+ article: 'Art. 6(2); Annex III(3)(a)'
38
+ verbose: 'This dataset is appropriate to use for AI projects involving the determination of access or admission or to assigning natural persons to educational and vocational training institutions at all levels'
39
+ value: !!bool false
40
+ recruitment:
41
+ article: 'Art. 6(2); Annex III(4)(a)'
42
+ verbose: 'This dataset is appropriate to use for AI projects involving the recruitment or selection of natural persons, in particular to place targeted job advertisements, to analyse and filter job applications, and to evaluate candidates'
43
+ value: !!bool false
44
+ public_assistance:
45
+ article: 'Art. 6(2); Annex III(5)(a)'
46
+ verbose: 'This dataset is appropriate to use for AI projects intended to be used by public authorities or on behalf of public authorities to evaluate the eligibility of natural persons for essential public assistance benefits and services, including healthcare services, as well as to grant, reduce, revoke, or reclaim such benefits and services'
47
+ value: !!bool false
48
+ victim_assessment:
49
+ article: 'Art. 6(2); Annex III(6)(a)'
50
+ verbose: 'This dataset is appropriate to use for AI projects intended to be used by or on behalf of law enforcement authorities, or by Union institutions, bodies, offices or agencies in support of law enforcement authorities or on their behalf to assess the risk of a natural person becoming the victim of criminal offences'
51
+ value: !!bool false
52
+ polygraph:
53
+ article: 'Art. 6(2); Annex III(7)(a)'
54
+ verbose: 'This dataset is appropriate to use for AI projects intended to be used by or on behalf of competent public authorities or by Union institutions, bodies, offices or agencies as polygraphs or similar tools'
55
+ value: !!bool false
56
+ judicial:
57
+ article: 'Art. 6(2); Annex III(8)(a)'
58
+ verbose: 'This dataset is appropriate to use for AI projects intended to be used by a judicial authority or on their behalf to assist a judicial authority in researching and interpreting facts and the law and in applying the law to a concrete set of facts, or to be used in a similar way in alternative dispute resolution'
59
+ value: !!bool false
60
+
61
+ # Metadata related to data-related requirements for high-risk AI systems
62
+
63
+ high_risk_ai_system_requirements:
64
+ # data governance
65
+ data_and_data_governance_data_governance:
66
+ article: 'Art. 10(1)-(2)'
67
+ verbose: 'The dataset was subject to data governance and management practices appropriate to the intended use case'
68
+ value: !!bool false
69
+ data_and_data_governance_design_choices:
70
+ article: 'Art. 10(2)(a)'
71
+ verbose: 'The dataset has been subject to data governance and management practices as regards its relevant design choices'
72
+ value: !!bool false
73
+ data_and_data_governance_data_origin:
74
+ article: 'Art. 10(2)(b)'
75
+ verbose: 'The dataset has been subject to data governance and management practices as regards its data collection processes and the origin of data, and in the case of personal data, the original purpose of the data collection'
76
+ value: !!bool false
77
+ data_and_data_governance_data_preparation:
78
+ article: 'Art. 10(2)(c)'
79
+ verbose: 'The dataset has been subject to data governance and management practices as regards its data-preparation processing operations, such as annotation, labelling, cleaning, updating, enrichment and aggregation'
80
+ value: !!bool false
81
+ data_and_data_governance_data_assumptions:
82
+ article: 'Art. 10(2)(d)'
83
+ verbose: 'The dataset has been subject to data governance and management practices as regards its formulation of assumptions, in particular with respect to the information that the data are supposed to measure and represent'
84
+ value: !!bool false
85
+ data_and_data_governance_data_quantity:
86
+ article: 'Art. 10(2)(e)'
87
+ verbose: 'The dataset has been subject to data governance and management practices that include an assessment of the availability, quantity and suitability of the data sets that are needed'
88
+ value: !!bool false
89
+ data_and_data_governance_ata_bias_examination:
90
+ article: 'Art. 10(2)(f)'
91
+ verbose: 'The dataset has been subject to data governance and management practices that include an examination of possible biases that are likely to affect the health and safety of persons, have a negative impact on fundamental rights or lead to discrimination prohibited under Union law, especially where data outputs influence inputs for future operations'
92
+ value: !!bool false
93
+ data_and_data_governance_data_and_data_governance_data_bias_mitigation:
94
+ article: 'Art. 10(2)(g)'
95
+ verbose: 'The dataset has been subject to data governance and management practices that include appropriate measures to detect, prevent and mitigate possible biases that are likely to affect the health and safety of persons, have a negative impact on fundamental rights or lead to discrimination prohibited under Union law, especially where data outputs influence inputs for future operations'
96
+ value: !!bool true
97
+ data_and_data_governance_data_compliance:
98
+ article: 'Art. 10(2)(h)'
99
+ verbose: 'The dataset has been subject to data governance and management practices that include identification of relevant data gaps or shortcomings that prevent compliance with this Regulation, and how those gaps and shortcomings can be addressed'
100
+ value: !!bool false
101
+ # data_characteristics
102
+ data_and_data_governance_data_relevance:
103
+ article: 'Art. 10(3); Rec. 67'
104
+ verbose: 'Training data is relevant'
105
+ value: !!bool false
106
+ data_and_data_governance_data_representativity:
107
+ article: 'Art. 10(3); Rec. 67'
108
+ verbose: 'Training data is sufficiently representative'
109
+ value: !!bool false
110
+ data_and_data_governance_data_errors:
111
+ article: 'Art. 10(3); Rec. 67'
112
+ verbose: 'Training data is, to the best extent possible, free of errors'
113
+ value: !!bool false
114
+ data_and_data_governance_data_completeness:
115
+ article: 'Art. 10(3); Rec. 67'
116
+ verbose: 'Training data is complete in view of the intended purpose'
117
+ value: !!bool false
118
+ data_and_data_governance_statistical_properties:
119
+ article: 'Art. 10(3)'
120
+ verbose: 'Training data possesses the appropriate statistical properties, including, where applicable, as regards the people in relation to whom it is intended to be used'
121
+ value: !!bool false
122
+ data_and_data_governance_contextual:
123
+ article: 'Art. 10(4)'
124
+ verbose: 'Training data takes into account, to the extent required by the intended purpose, the characteristics or elements that are particular to the specific geographical, contextual, behavioural or functional setting within which it is intended to be used'
125
+ value: !!bool false
126
+ # special_categories_of_personal_data:
127
+ data_and_data_governance_personal_data_necessary:
128
+ article: 'Art. 10(5)'
129
+ verbose: 'Where special categories of personal data have been used to ensure the detection and correction of possible biases that are likely to affect the health and safety of persons, have a negative impact on fundamental rights or lead to discrimination prohibited under Union law, especially where data outputs influence inputs for future operations, the use of this data was strictly necessary'
130
+ value: !!bool false
131
+ data_and_data_governance_personal_data_safeguards:
132
+ article: 'Art. 10(5)'
133
+ verbose: 'Where special categories of personal data have been used to ensure the detection and correction of possible biases that are likely to affect the health and safety of persons, have a negative impact on fundamental rights or lead to discrimination prohibited under Union law, especially where data outputs influence inputs for future operations, the use complied with appropriate safeguards for the fundamental rights and freedoms of natural persons'
134
+ value: !!bool false
135
+ data_and_data_governance_personal_data_gdpr:
136
+ article: 'Art. 10(5)'
137
+ verbose: 'Where special categories of personal data have been used to ensure the detection and correction of possible biases that are likely to affect the health and safety of persons, have a negative impact on fundamental rights or lead to discrimination prohibited under Union law, especially where data outputs influence inputs for future operations, the use of this data satisfied the provisions set out in Regulations (EU) 2016/679 and (EU) 2018/1725 and Directive (EU) 2016/680'
138
+ value: !!bool false
139
+ data_and_data_governance_personal_data_other_options:
140
+ article: 'Art. 10(5)(a)'
141
+ verbose: 'Where special categories of personal data have been used to ensure the detection and correction of possible biases that are likely to affect the health and safety of persons, have a negative impact on fundamental rights or lead to discrimination prohibited under Union law, especially where data outputs influence inputs for future operations, the bias detection and correction was not effectively fulfilled by processing other data, including synthetic or anonymised data'
142
+ value: !!bool false
143
+ data_and_data_governance_personal_data_limitations:
144
+ article: 'Art. 10(5)(b)'
145
+ verbose: 'Where special categories of personal data have been used to ensure the detection and correction of possible biases that are likely to affect the health and safety of persons, have a negative impact on fundamental rights or lead to discrimination prohibited under Union law, especially where data outputs influence inputs for future operations, the special categories of personal data were not subject to technical limitations on the re-use of the personal data, and state-of-the-art security and privacy-preserving measures, including pseudonymisation'
146
+ value: !!bool false
147
+ data_and_data_governance_personal_data_controls:
148
+ article: 'Art. 10(5)(c)'
149
+ verbose: 'Where special categories of personal data have been used to ensure the detection and correction of possible biases that are likely to affect the health and safety of persons, have a negative impact on fundamental rights or lead to discrimination prohibited under Union law, especially where data outputs influence inputs for future operations, the special categories of personal data were subject to measures to ensure that the personal data processed are secured, protected, subject to suitable safeguards, including strict controls and documentation of the access, to avoid misuse and ensure that only authorised persons have access to those personal data with appropriate confidentiality obligations'
150
+ value: !!bool false
151
+ data_and_data_governance_personal_data_access:
152
+ article: 'Art. 10(5)(d)'
153
+ verbose: 'Where special categories of personal data have been used to ensure the detection and correction of possible biases that are likely to affect the health and safety of persons, have a negative impact on fundamental rights or lead to discrimination prohibited under Union law, especially where data outputs influence inputs for future operations, the special categories of personal data were not to be transmitted, transferred or otherwise accessed by other parties'
154
+ value: !!bool false
155
+ data_and_data_governance_personal_data_deletion:
156
+ article: 'Art. 10(5)(e)'
157
+ verbose: 'Where special categories of personal data have been used to ensure the detection and correction of possible biases that are likely to affect the health and safety of persons, have a negative impact on fundamental rights or lead to discrimination prohibited under Union law, especially where data outputs influence inputs for future operations, the special categories of personal data were deleted once the bias was corrected or the personal data reached the end of its retention period (whichever came first)'
158
+ value: !!bool false
159
+ data_and_data_governance_personal_data_necessary_105f:
160
+ article: 'Art. 10(5)(f)'
161
+ verbose: 'Where special categories of personal data have been used to ensure the detection and correction of possible biases that are likely to affect the health and safety of persons, have a negative impact on fundamental rights or lead to discrimination prohibited under Union law, especially where data outputs influence inputs for future operations, the records of processing activities pursuant to Regulations (EU) 2016/679 and (EU) 2018/1725 and Directive (EU) 2016/680 include the reasons why the processing of special categories of personal data was strictly necessary to detect and correct biases, and why that objective could not be achieved by processing other data'
162
+ value: !!bool false
163
+ # technical_documentation:
164
+ technical_documentation_general_description:
165
+ article: 'Art. 11; Annex IV(2)(d)'
166
+ verbose: 'Dataset carries technical documention, such as a dataseet, including a general description of the dataset.'
167
+ value: !!bool false
168
+ technical_documentation_provenance:
169
+ article: 'Art. 11; Annex IV(2)(d)'
170
+ verbose: 'Dataset carries technical documention, such as a dataseet, including information about its provenance'
171
+ value: !!bool false
172
+ technical_documentation_scope:
173
+ article: 'Art. 11; Annex IV(2)(d)'
174
+ verbose: 'Dataset carries technical documention, such as a dataseet, including information about scope and main characteristics'
175
+ value: !!bool false
176
+ technical_documentation_origins:
177
+ article: 'Art. 11; Annex IV(2)(d)'
178
+ verbose: 'Dataset carries technical documention, such as a dataseet, including information about how the data was obtained and selected'
179
+ value: !!bool false
180
+ technical_documentation_labelling:
181
+ article: 'Art. 11; Annex IV(2)(d)'
182
+ verbose: 'Dataset carries technical documention, such as a dataseet, including information about labelling procedures (e.g. for supervised learning)'
183
+ value: !!bool false
184
+ technical_documentation_cleaning:
185
+ article: 'Art. 11; Annex IV(2)(d)'
186
+ verbose: 'Dataset carries technical documention, such as a dataseet, including information about data cleaning methodologies (e.g. outliers detection)'
187
+ value: !!bool false
188
+ technical_documentation_cybersecurity:
189
+ article: 'Art. 11; Annex IV(2)(h)'
190
+ verbose: 'Cybersecurity measures were put in place as regards the data (e.g., scanning for data poisoning)'
191
+ value: !!bool false
192
+
193
+ transparency_and_provision_of_information_to_deployers:
194
+ article: '# Art. 13(3)(b)(vi)'
195
+ verbose: 'Dataset is accompanied by instructions for use that convery relevant information about it, taking into account its intended purpose'
196
+ value: !!bool false
197
+ quality_management_system:
198
+ article: 'Art. 17(1)(f)'
199
+ verbose: 'Datset was subject to a quality management system that is documented in a systematic and orderly manner in the form of written policies, procedures and instructions, and includes a description of the systems and procedures for data management, including data acquisition, data collection, data analysis, data labelling, data storage, data filtration, data mining, data aggregation, data retention and any other operation regarding the data'
200
+ value: !!bool false
201
+
202
+ # Metadata related to data-related requirements for GPAI models
203
+
204
+ gpai_model_requirements:
205
+ data_type:
206
+ article: 'Art. 53(1); Annex XI(2)(c)'
207
+ verbose: 'Documentation for the dataset is available that contains the type of data'
208
+ value: !!bool false
209
+ data_provenance:
210
+ article: 'Art. 53(1); Annex XI(2)(c)'
211
+ verbose: 'Documentation for the dataset is available that contains the provenance of data'
212
+ value: !!bool false
213
+ data_curation:
214
+ article: 'Art. 53(1); Annex XI(2)(c)'
215
+ verbose: 'Documentation for the dataset is available that contains the curation methodologies (e.g. cleaning, filtering, etc.)'
216
+ value: !!bool false
217
+ data_number:
218
+ article: 'Art. 53(1); Annex XI(2)(c)'
219
+ verbose: 'Documentation for the dataset is available that contains the number of data points'
220
+ value: !!bool false
221
+ data_scope:
222
+ article: 'Art. 53(1); Annex XI(2)(c)'
223
+ verbose: 'Documentation for the dataset is available that contains the number of data scope and main characteristics'
224
+ value: !!bool false
225
+ data_origin:
226
+ article: 'Art. 53(1); Annex XI(2)(c)'
227
+ verbose: 'Documentation for the dataset is available that contains information on how the data was obtained and selected as well as all other measures to detect the unsuitability of data sources and methods to detect identifiable biases'
228
+ value: !!bool false
data_cc.yaml CHANGED
@@ -1,4 +1,6 @@
1
- card_type: "data" # "project", "data" or "model"
 
 
2
 
3
  # Metadata related to intended purpose(s) of data
4
 
 
1
+ card_details:
2
+ card_type: "data" # "project", "data" or "model"
3
+ card_label: "data_01"
4
 
5
  # Metadata related to intended purpose(s) of data
6
 
model_cc 02.yaml ADDED
@@ -0,0 +1,313 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ card_details:
2
+ card_type: "model" # "project", "data" or "model"
3
+ card_label: "model_02"
4
+
5
+ # Metadata related to intended purpose(s) of model
6
+
7
+ intended_purpose:
8
+ safety_component:
9
+ article: 'Art. 6(1)(a)'
10
+ verbose: 'This model is appropriate to use for AI projects involving product safety components'
11
+ value: !!bool false
12
+ product_regulated_machinery:
13
+ article: 'Art. 6(1)(b); Annex I'
14
+ verbose: 'This model is appropriate to use for AI projects involving products covered by Directive 2006/42/EC of the European Parliament and of the Council of 17 May 2006 on machinery, and amending Directive 95/16/EC (OJ L 157, 9.6.2006, p. 24) [as repealed by the Machinery Regulation]'
15
+ value: !!bool false
16
+ product_regulated_toy:
17
+ article: 'Art. 6(1)(b); Annex I'
18
+ verbose: 'This model is appropriate to use for AI projects involving products covered by Directive 2009/48/EC of the European Parliament and of the Council of 18 June 2009 on the safety of toys (OJ L 170, 30.6.2009, p. 1)'
19
+ value: !!bool false
20
+ product_regulated_watercraft:
21
+ article: 'Art. 6(1)(b); Annex I'
22
+ verbose: 'This model is appropriate to use for AI projects involving products covered by Directive 2013/53/EU of the European Parliament and of the Council of 20 November 2013 on recreational craft and personal watercraft and repealing Directive 94/25/EC (OJ L 354, 28.12.2013, p. 90)'
23
+ value: !!bool false
24
+ biometric_categorization:
25
+ article: 'Art. 6(2); Annex III(1)(b)'
26
+ verbose: 'This model is appropriate to use for AI projects involving biometric categorisation, according to sensitive or protected attributes or characteristics based on the inference of those attributes or characteristics'
27
+ value: !!bool false
28
+ emotion_recognition:
29
+ article: 'Art. 6(2); Annex III(1)(c)'
30
+ verbose: 'This model is appropriate to use for AI projects involving emotion recognition'
31
+ value: !!bool true
32
+ critical_infrastructure:
33
+ article: 'Art. 6(2); Annex III(2)'
34
+ verbose: 'This model is appropriate to use for AI projects involving safety components in the management and operation of critical digital infrastructure, road traffic, or in the supply of water, gas, heating or electricity'
35
+ value: !!bool true
36
+ admission:
37
+ article: 'Art. 6(2); Annex III(3)(a)'
38
+ verbose: 'This model is appropriate to use for AI projects involving the determination of access or admission or to assigning natural persons to educational and vocational training institutions at all levels'
39
+ value: !!bool false
40
+ recruitment:
41
+ article: 'Art. 6(2); Annex III(4)(a)'
42
+ verbose: 'This model is appropriate to use for AI projects involving the recruitment or selection of natural persons, in particular to place targeted job advertisements, to analyse and filter job applications, and to evaluate candidates'
43
+ value: !!bool false
44
+ public_assistance:
45
+ article: 'Art. 6(2); Annex III(5)(a)'
46
+ verbose: 'This model is appropriate to use for AI projects intended to be used by public authorities or on behalf of public authorities to evaluate the eligibility of natural persons for essential public assistance benefits and services, including healthcare services, as well as to grant, reduce, revoke, or reclaim such benefits and services'
47
+ value: !!bool false
48
+ victim_assessment:
49
+ article: 'Art. 6(2); Annex III(6)(a)'
50
+ verbose: 'This model is appropriate to use for AI projects intended to be used by or on behalf of law enforcement authorities, or by Union institutions, bodies, offices or agencies in support of law enforcement authorities or on their behalf to assess the risk of a natural person becoming the victim of criminal offences'
51
+ value: !!bool false
52
+ polygraph:
53
+ article: 'Art. 6(2); Annex III(7)(a)'
54
+ verbose: 'This model is appropriate to use for AI projects intended to be used by or on behalf of competent public authorities or by Union institutions, bodies, offices or agencies as polygraphs or similar tools'
55
+ value: !!bool false
56
+ judicial:
57
+ article: 'Art. 6(2); Annex III(8)(a)'
58
+ verbose: 'This model is appropriate to use for AI projects intended to be used by a judicial authority or on their behalf to assist a judicial authority in researching and interpreting facts and the law and in applying the law to a concrete set of facts, or to be used in a similar way in alternative dispute resolution'
59
+ value: !!bool false
60
+
61
+ # Metadata that will help us determine if the model itself is a GPAI and, therefore, must satisfy the requirements of GPAI models
62
+
63
+ classification_of_gpai_models:
64
+ high_impact_capabilities:
65
+ article: 'Art. 51(1)(a)'
66
+ verbose: 'The model has high impact capabilities evaluated on the basis of appropriate technical tools and methodologies, including indicators and benchmarks'
67
+ value: !!bool false
68
+ flops:
69
+ article: 'Art. 51(2)'
70
+ verbose: 'The cumulative compute used for training the model, as measured in floating point operations (FLOPs), was greater than 10^25.'
71
+ value: !!bool false
72
+
73
+ # Metadata related to model-related requirements for high-risk AI systems
74
+
75
+ high_risk_ai_system_requirements:
76
+ risk_management_system_general:
77
+ article: 'Art. 9(2)'
78
+ verbose: 'A risk management system has been planned, run, reviewed, and updated throughout the model lifecycle'
79
+ value: !!bool false
80
+ risk_management_system_foreseeable_risks:
81
+ article: 'Art. 9(2)(a)'
82
+ verbose: 'The risk management system that was established, implemented, documented and maintained througout the model lifecycle included the identification and analysis of any known or reasonably foreseeable risks the model can pose to health or safety when used for intended purpose'
83
+ value: !!bool false
84
+ risk_management_system_evaluation:
85
+ article: 'Art. 9(2)(b)'
86
+ verbose: 'The risk management system that was established, implemented, documented and maintained througout the model lifecycle included the estimation and evaluation of risks when model used for intended purpose'
87
+ value: !!bool false
88
+ risk_management_system_misuse:
89
+ article: 'Art. 9(2)(b)'
90
+ verbose: 'The risk management system that was established, implemented, documented and maintained througout the model lifecycle included the estimation and evaluation of risks when model used under conditions of reasonably foreseeable misuse'
91
+ value: !!bool false
92
+ risk_management_system_testing_performance:
93
+ article: 'Art. 9(6)'
94
+ verbose: 'The risk management system that was established, implemented, documented and maintained througout the model lifecycle included testing to ensure model performs consistently for intended purpose'
95
+ value: !!bool false
96
+ risk_management_system_testing_compliance:
97
+ article: 'Art. 9(6)'
98
+ verbose: 'The risk management system that was established, implemented, documented and maintained througout the model lifecycle included testing to ensure model complies with Act'
99
+ value: !!bool false
100
+ risk_management_system_testing_benchmark:
101
+ article: 'Art. 9(8)'
102
+ verbose: 'The risk management system that was established, implemented, documented and maintained througout the model lifecycle included testing against prior defined metrics appropriate to intended purpose'
103
+ value: !!bool false
104
+ risk_management_system_testing_probabilistic:
105
+ article: 'Art. 9(8)'
106
+ verbose: 'The risk management system that was established, implemented, documented and maintained througout the model lifecycle included testing against probabilistic thresholds appropriate to intended purpose'
107
+ value: !!bool false
108
+ technical_documentation_pre_trained_elements:
109
+ article: 'Art. 11; Annex IV(2)(a)'
110
+ verbose: 'Model has technical documentation that describes pre-trained elements of model provided by third parties and how used, integrated or modified'
111
+ value: !!bool false
112
+ technical_documentation_logic:
113
+ article: 'Art. 11; Annex IV(2)(b)'
114
+ verbose: 'Model has technical documentation that describes general logic of model'
115
+ value: !!bool false
116
+ technical_documentation_design_choices:
117
+ article: 'Art. 11; Annex IV(2)(b)'
118
+ verbose: 'Model has technical documentation that describes key design choices including rationale and assumptions made, including with regard to persons or groups on which model intended to be used'
119
+ value: !!bool false
120
+ technical_documentation_classification_choices:
121
+ article: 'Art. 11; Annex IV(2)(b)'
122
+ verbose: 'Model has technical documentation that describes main classification choices'
123
+ value: !!bool false
124
+ technical_documentation_parameters:
125
+ article: 'Art. 11; Annex IV(2)(b)'
126
+ verbose: 'Model has technical documentation that describes what model is designed to optimise for and relevance of its different parameters'
127
+ value: !!bool false
128
+ technical_documentation_expected_output:
129
+ article: 'Art. 11; Annex IV(2)(b)'
130
+ verbose: 'Model has technical documentation that the expected output and output quality of the system'
131
+ value: !!bool false
132
+ technical_documentation_act_compliance:
133
+ article: 'Art. 11; Annex IV(2)'
134
+ verbose: 'Model has technical documentation that describes decisions about any possible trade-off made regarding the technical solutions adopted to comply with the requirements set out in Title III, Chapter 2'
135
+ value: !!bool false
136
+ technical_documentation_human_oversight:
137
+ article: 'Art. 11; Annex IV(2)(e)'
138
+ verbose: 'Model has technical documentation that describes an assessment of the human oversight measures needed in accordance with Article 14, including an assessment of the technical measures needed to facilitate the interpretation of the outputs of AI systems by the deployers, in accordance with Articles 13(3)(d)'
139
+ value: !!bool false
140
+ technical_documentation_validation:
141
+ article: 'Art. 11; Annex IV(2)(g)'
142
+ verbose: 'Model has technical documentation that describes validation and testing procedures used, including information about the validation and testing data used and their main characteristics; metrics used to measure accuracy, robustness and compliance with other relevant requirements set out in Title III, Chapter 2 as well as potentially discriminatory impacts; test logs and all test reports dated and signed by the responsible persons, including with regard to predetermined changes as referred to under point (f)'
143
+ value: !!bool false
144
+ technical_documentation_cybersecurity:
145
+ article: 'Art. 11; Annex IV(2)(h)'
146
+ verbose: 'Model has technical documentation that describes cybersecurity measures put in place'
147
+ value: !!bool false
148
+ transparency_to_deployers_intended_purpose:
149
+ article: 'Art. 13(3)(b)(i)'
150
+ verbose: 'Model is accompanied by instructions for use that include the characteristics, capabilities, performance limitations, and intended purpose of the model'
151
+ value: !!bool false
152
+ transparency_to_deployers_metrics:
153
+ article: 'Art. 13(3)(b)(ii)'
154
+ verbose: 'Model is accompanied by instructions for use that include the level of accuracy, including its metrics, robustness and cybersecurity against which the model has been tested and validated and which can be expected, and any known and foreseeable circumstances that may have an impact on that expected level of accuracy, robustness and cybersecurity'
155
+ value: !!bool false
156
+ transparency_to_deployers_foreseeable_misuse:
157
+ article: 'Art. 13(3)(b)(iii)'
158
+ verbose: 'Model is accompanied by instructions for use that include any known or foreseeable circumstance, related to the use of the model in accordance with its intended purpose or under conditions of reasonably foreseeable misuse, which may lead to risks to the health and safety or fundamental rights referred to in Article 9(2)'
159
+ value: !!bool false
160
+ transparency_to_deployers_explainability:
161
+ article: 'Art. 13(3)(b)(iv)'
162
+ verbose: 'Model is accompanied by instructions for use that include technical capabilities and characteristics of the model to provide information that is relevant to explain its output'
163
+ value: !!bool false
164
+ transparency_to_deployers_specific_groups:
165
+ article: 'Art. 13(3)(b)(v)'
166
+ verbose: 'Model is accompanied by instructions for use that include performance regarding specific persons or groups of persons on which the model is intended to be used'
167
+ value: !!bool false
168
+ transparency_to_deployers_data:
169
+ article: 'Art. 13(3)(b)(vi)'
170
+ verbose: 'Model is accompanied by instructions for use that include specifications for the input data, or any other relevant information in terms of the training, validation and testing data sets used, taking into account the intended purpose of the model'
171
+ value: !!bool false
172
+ transparency_to_deployers_interpretability:
173
+ article: 'Art. 13(3)(b)(vii)'
174
+ verbose: 'Model is accompanied by instructions for use that include information to enable deployers to interpret the output of the model and use it appropriately'
175
+ value: !!bool false
176
+ transparency_to_deployers_human_oversight:
177
+ article: 'Art. 13(3)(d)'
178
+ verbose: 'Model is accompanied by instructions for use that include human oversight measures, including the technical measures put in place to facilitate the interpretation of the outputs of model by the deployers'
179
+ value: !!bool false
180
+ transparency_to_deployers_hardware:
181
+ article: 'Art. 13(3)(e)'
182
+ verbose: 'Model is accompanied by instructions for use that include computational and hardware resources needed, the expected lifetime of the model and any necessary maintenance and care measures, including their frequency, to ensure the proper functioning of that model, including as regards software updates'
183
+ value: !!bool false
184
+ accuracy_robustness_cybersecurity_accuracy: # These need to be cleaned up and to match/compliment project cc
185
+ article: 'Art. 15(1)'
186
+ verbose: 'Model is designed and developed to achieve appropriate level of accuracy'
187
+ value: !!bool false
188
+ accuracy_robustness_cybersecurity_robustiness:
189
+ article: 'Art. 15(1)'
190
+ verbose: 'Model is designed and developed to achieve appropriate level of robustness'
191
+ value: !!bool false
192
+ accuracy_robustness_cybersecurity_cybersecurity:
193
+ article: 'Art. 15(1)'
194
+ verbose: 'Model is designed and developed to achieve appropriate level of cybersecurity'
195
+ value: !!bool false
196
+ accuracy_robustness_cybersecurity_accuracy_metrics:
197
+ article: 'Art. 15(2)'
198
+ verbose: 'Use of relevant accuracy metrics'
199
+ value: !!bool false
200
+ accuracy_robustness_cybersecurity_fault_resilience:
201
+ article: 'Art. 15(4)'
202
+ verbose: 'Maximum possible resilience regarding errors, faults or inconsistencies that may occur within the system or the environment in which the system operates, in particular due to their interaction with natural persons or other systems. Technical and organisational measures shall be taken towards this regard'
203
+ value: !!bool false
204
+ accuracy_robustness_cybersecurity_attacks:
205
+ article: 'Art. 15(5)'
206
+ verbose: 'Measures were taken to prevent, detect, respond to, resolve and control for model poisoning attacks, adversarial examples or model evasion attacks (attacks using inputs designed to cause the model to make a mistake), and confidentiality attacks or model flaws'
207
+ value: !!bool false
208
+ quality_management_system:
209
+ article: 'Art. 17(1)(d)'
210
+ verbose: 'Examination, test and validation procedures to be carried out before, during and after the development of the high-risk AI system, and the frequency with which they have to be carried out'
211
+ value: !!bool false
212
+
213
+ # Metadata related to model-related requirements for GPAI models
214
+
215
+ gpai_model_requirements:
216
+ task:
217
+ article: 'Art. 53; Annex XI(1)(1)(a)'
218
+ verbose: 'The tasks that the model is intended to perform and the type and nature of AI systems in which it can be integrated'
219
+ value: !!bool false
220
+ acceptable_use:
221
+ article: 'Art. 53; Annex XI(1)(1)(b)'
222
+ verbose: 'Acceptable use policies applicable'
223
+ value: !!bool false
224
+ release_date:
225
+ article: 'Art. 53; Annex XI(1)(1)(c)'
226
+ verbose: 'The date of release and methods of distribution'
227
+ value: !!bool false
228
+ architecture:
229
+ article: 'Art. 53; Annex XI(1)(1)(d)'
230
+ verbose: 'The architecture and number of parameters'
231
+ value: !!bool false
232
+ input_output_modality:
233
+ article: 'Art. 53; Annex XI(1)(1)(e)'
234
+ verbos: 'Modality (e.g. text, image) and format of inputs and outputs'
235
+ value: !!bool false
236
+ license:
237
+ article: 'Art. 53; Annex XI(1)(1)(f)'
238
+ verbose: 'The license'
239
+ value: !!bool false
240
+ training:
241
+ article: 'Art. 53; Annex XI(1)(2)(b)'
242
+ verbose: 'Training methodologies and techniques'
243
+ value: !!bool false
244
+ design_choices:
245
+ article: 'Art. 53; Annex XI(1)(2)(b)'
246
+ verbose: 'Key design choices including the rationale and assumptions made'
247
+ value: !!bool false
248
+ optimized_for:
249
+ article: 'Art. 53; Annex XI(1)(2)(b)'
250
+ verbose: 'What the model is designed to optimise for'
251
+ value: !!bool false
252
+ parameters:
253
+ article: 'Art. 53; Annex XI(1)(2)(b)'
254
+ verbose: 'The relevance of the different parameters, as applicable'
255
+ value: !!bool false
256
+ data_type:
257
+ article: 'Art. 53; Annex XI(1)(2)(c)'
258
+ verbose: 'Information on the data used for training, testing and validation: type of data'
259
+ value: !!bool false
260
+ data_provenance:
261
+ article: 'Art. 53; Annex XI(1)(2)(c)'
262
+ verbose: 'Information on the data used for training, testing and validation: provenance of data'
263
+ value: !!bool false
264
+ data_curation:
265
+ article: 'Art. 53; Annex XI(1)(2)(c)'
266
+ verbose: 'Information on the data used for training: curation methodologies (e.g. cleaning, filtering etc)'
267
+ value: !!bool false
268
+ data_number:
269
+ article: 'Art. 53; Annex XI(1)(2)(c)'
270
+ verbose: 'Information on the data used for training: the number of data points'
271
+ value: !!bool false
272
+ data_characteristics:
273
+ article: 'Art. 53; Annex XI(1)(2)(c)'
274
+ verbose: 'Information on the data used for training: data points scope and main characteristics applicable'
275
+ value: !!bool false
276
+ data_origin:
277
+ article: 'Art. 53; Annex XI(1)(2)(c)'
278
+ verbose: 'Information on the data used for training: how the data was obtained and selected'
279
+ value: !!bool false
280
+ data_bias:
281
+ article: 'Art. 53; Annex XI(1)(2)(c)'
282
+ verbose: 'Information on the data used for training: all other measures to detect the unsuitability of data sources and methods to detect identifiable biases, where applicable'
283
+ value: !!bool false
284
+ computation:
285
+ article: 'Art. 53; Annex XI(1)(2)(d)'
286
+ verbose: 'The computational resources used to train the model (e.g. number of floating point operations – FLOPs), training time, and other relevant details related to the training'
287
+ value: !!bool false
288
+ energy_consumption:
289
+ article: 'Art. 53; Annex XI(1)(2)(e)'
290
+ verbose: 'Known or estimated energy consumption of the model; in case not known, this could be based on information about computational resources used'
291
+ value: !!bool false
292
+ evaluation:
293
+ article: 'Art. 53; Annex XI(2)(1)'
294
+ verbose: 'Detailed description of the evaluation strategies, including evaluation results, on the basis of available public evaluation protocols and tools or otherwise of other evaluation methodologies. Evaluation strategies shall include evaluation criteria, metrics and the methodology on the identification of limitations'
295
+ value: !!bool false
296
+ adversarial_testing:
297
+ article: 'Art. 53; Annex XI(2)(2)'
298
+ verbose: 'Where applicable, detailed description of the measures put in place for the purpose of conducting internal and/or external adversarial testing (e.g. red teaming), model adaptations, including alignment and fine-tuning'
299
+ value: !!bool false
300
+
301
+ gpai_model_with_systemic_risk_requirements:
302
+ evaluation:
303
+ article: 'Art. 55(1)(a)'
304
+ verbose: 'Perform model evaluation in accordance with standardised protocols and tools reflecting the state of the art, including conducting and documenting adversarial testing of the model with a view to identify and mitigate systemic risk'
305
+ value: !!bool false
306
+ systematic_risk:
307
+ article: 'Art. 55(1)(b)'
308
+ verbose: 'Assess and mitigate possible systemic risks at Union level, including their sources, that may stem from the development'
309
+ value: !!bool false
310
+ cybersecurity:
311
+ article: 'Art. 55(1)(d)'
312
+ verbose: 'Ensure an adequate level of cybersecurity protection for the GPAI model with systemic risk and the physical infrastructure of the mode'
313
+ value: !!bool false
model_cc.yaml CHANGED
@@ -1,4 +1,6 @@
1
- card_type: "model" # "project", "data" or "model"
 
 
2
 
3
  # Metadata related to intended purpose(s) of model
4
 
 
1
+ card_details:
2
+ card_type: "model" # "project", "data" or "model"
3
+ card_label: "model_01"
4
 
5
  # Metadata related to intended purpose(s) of model
6
 
project_cc.yaml CHANGED
@@ -1,6 +1,8 @@
1
 
2
  # Information related to high-level characteristics of AI project, including the role of the operator, their location, and where the output is used
3
- card_type: "project" # "project", "data" or "model"
 
 
4
 
5
  # TODO potentially add scenarios that get the provider off the hook per Article 25
6
 
 
1
 
2
  # Information related to high-level characteristics of AI project, including the role of the operator, their location, and where the output is used
3
+ card_details:
4
+ card_type: "project" # "project", "data" or "model"
5
+ card_label: "project"
6
 
7
  # TODO potentially add scenarios that get the provider off the hook per Article 25
8
 
run.py CHANGED
@@ -2,7 +2,7 @@ import yaml
2
  import json
3
  from pathlib import Path
4
  import pandas as pd
5
- from compliance_analysis import check_overall_compliance
6
 
7
  pd.set_option('display.max_columns', None)
8
  pd.set_option('display.max_rows', None)
 
2
  import json
3
  from pathlib import Path
4
  import pandas as pd
5
+ from src.compliance_analysis import check_overall_compliance
6
 
7
  pd.set_option('display.max_columns', None)
8
  pd.set_option('display.max_rows', None)
utils.py CHANGED
@@ -31,16 +31,6 @@ def set_eu_market_status(dispositive_variables, project_cc_yaml):
31
 
32
  return dispositive_variables
33
 
34
-
35
- def check_within_scope_cc(dispositive_variables, project_cc_yaml):
36
-
37
- # Check that the person filling out the form (the operator) is in fact a provider;
38
- if project_cc_yaml['operator_details']['provider']['value']:
39
- return True
40
- else:
41
- print("The initial versiton of the Compliance Cards System is for provider-side compliance analyses only.")
42
- return False
43
-
44
  def check_within_scope_act(dispositive_variables, project_cc_yaml):
45
 
46
  # Check that the project is within the scope of the Act
 
31
 
32
  return dispositive_variables
33
 
 
 
 
 
 
 
 
 
 
 
34
  def check_within_scope_act(dispositive_variables, project_cc_yaml):
35
 
36
  # Check that the project is within the scope of the Act