Spaces:
Sleeping
Sleeping
qqubb
commited on
Commit
·
a469d8f
1
Parent(s):
65f24dd
update check_overall_compliance, remove comments
Browse files- compliance_analysis.py +85 -110
- project_cc.yaml +10 -6
- utils.py +1 -0
compliance_analysis.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
import yaml
|
2 |
-
from utils import set_type, set_operator_role_and_location, set_eu_market_status,
|
3 |
|
4 |
# Create some variables we will use throughout our analysis
|
5 |
|
@@ -20,38 +20,30 @@ dispositive_variables = {
|
|
20 |
"put_into_service": False
|
21 |
},
|
22 |
"intended_purposes": [],
|
|
|
|
|
|
|
|
|
23 |
}
|
24 |
|
25 |
-
#
|
26 |
-
#
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
#
|
31 |
-
|
32 |
-
|
33 |
-
#
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
# -This should include a "cross comparison" of the intended uses listed in the model CC and the project_intended_purpose parsed from the Project CC, something that is not yet integrated
|
44 |
-
# -This function must check if GPAI requirements are met, if that value for ai_project_type is passed in -- it does not yet do this
|
45 |
-
#
|
46 |
-
# Call run_compliance_analysis_on_data() *for all data CCs in the folder*, passing in the ai_project_type variable and maybe project_intended_purpose
|
47 |
-
# -This should include a "cross comparison" of the intended uses listed in the data CC and the project_intended_purpose parsed from the Project CC, something that is not yet integrated
|
48 |
-
# -This function must check if GPAI requirements are met, if that value for ai_project_type is passed in -- it does not yet do this
|
49 |
-
#
|
50 |
-
# This function could also more gracefully handle the internal exits/reports and generate a single, digestible compliance report that
|
51 |
-
# tells the user where the compliance analysis failed. If we wanted to get really fancy, we could include error messages for each individual
|
52 |
-
# entry in the yaml files, possibly citing the part of the Act that they need to reference (currently in comments that user does not see)
|
53 |
-
|
54 |
-
def run_compliance_analysis_on_project(project_cc_yaml):
|
55 |
|
56 |
# Determine project type (AI system vs. GPAI model) as well as operator type. We will use these for different things.
|
57 |
project_type = set_type(dispositive_variables, project_cc_yaml)
|
@@ -70,8 +62,9 @@ def run_compliance_analysis_on_project(project_cc_yaml):
|
|
70 |
else:
|
71 |
msg = ("Project is not within the scope of what is regulated by the Act.")
|
72 |
|
73 |
-
#
|
74 |
|
|
|
75 |
# # Check for prohibited practices. If any exist, the analysis is over.
|
76 |
# if check_prohibited(project_cc_yaml) == True:
|
77 |
# print("Project contains prohibited practices and is therefore non-compliant.")
|
@@ -107,13 +100,47 @@ def run_compliance_analysis_on_project(project_cc_yaml):
|
|
107 |
if not value:
|
108 |
msg = ("Because of project-level characteristics, this high-risk AI system fails the accuracy, robustness, and cybersecurity requirements under Article 17.")
|
109 |
|
110 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
# GPAI models with and without systemic risk and then check to see if the relevant requirement have met if either of these values applies.
|
112 |
# This will look a lot like what is happening above for high-risk AI systems.
|
113 |
|
114 |
-
return
|
115 |
|
116 |
-
def run_compliance_analysis_on_data(
|
|
|
|
|
117 |
|
118 |
for key, value in data_cc_yaml['data_and_data_governance']:
|
119 |
if not value:
|
@@ -128,16 +155,23 @@ def run_compliance_analysis_on_data(data_cc_yaml, project_intended_purpose): # T
|
|
128 |
if not value:
|
129 |
msg = (f"Because of the dataset represented by , this high-risk AI system fails the quality management requirements under Article 17.")
|
130 |
|
131 |
-
#
|
|
|
|
|
|
|
|
|
|
|
132 |
# GPAI models with and without systemic risk and then check to see if the relevant requirements have met if either of these values applies.
|
133 |
# Right now it is only checking high-risk AI system requirements. Another thing that we likely have to add here is the cross-comparison of the
|
134 |
# intended purposes. That might look like this:
|
135 |
# if data_cc_yaml['intended_purpose'] not in intended_purposes:
|
136 |
# return false
|
137 |
|
138 |
-
return
|
|
|
|
|
139 |
|
140 |
-
|
141 |
|
142 |
for key, value in model_cc_yaml['risk_management_system']:
|
143 |
if not value:
|
@@ -154,17 +188,26 @@ def run_compliance_analysis_on_model(model_cc_yaml, project_intended_purpose):
|
|
154 |
for key, value in data_cc_yaml['quality_management_system']:
|
155 |
if not value:
|
156 |
msg = (f"Because of the model represented by , this high-risk AI system fails the quality management requirements under Article 17.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
157 |
|
158 |
-
#
|
159 |
# GPAI models with and without systemic risk and then check to see if the relevant requirements have met if either of these values applies.
|
160 |
# Right now it is only checking high-risk AI system requirements. Another thing that we likely have to add here is the cross-comparison of the
|
161 |
# intended purposes. That might look like this:
|
162 |
# if model_cc_yaml['intended_purpose'] not in intended_purposes:
|
163 |
# return false
|
164 |
|
165 |
-
return
|
166 |
|
167 |
-
def check_intended_purpose():
|
168 |
|
169 |
# We want to run this function for everything classified as a high_risk_ai_system
|
170 |
# We also need to run it for all
|
@@ -214,74 +257,6 @@ def check_intended_purpose():
|
|
214 |
|
215 |
# TODO return list of intended purpose
|
216 |
|
217 |
-
return
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
# # If the project is a GPAI model, check that is has met all the requirements for such systems:
|
222 |
-
|
223 |
-
# if gpai_model:
|
224 |
-
|
225 |
-
# # Do this by examining the Project CC
|
226 |
-
|
227 |
-
# for key, value in project_cc_yaml['gpai_model_provider_obligations']:
|
228 |
-
# if not value:
|
229 |
-
# msg = ("GPAI model fails the transparency requirements under Article 53.")
|
230 |
-
|
231 |
-
# # Do this by examining any and all Data CCs too
|
232 |
-
|
233 |
-
# for filename in os.listdir(folder_path):
|
234 |
-
# # Check if the search word is in the filename
|
235 |
-
# if "data_cc.md" in filename.lower():
|
236 |
-
|
237 |
-
# # If it is, load the yaml
|
238 |
-
|
239 |
-
# with open(folder_path + filename, 'r') as file:
|
240 |
-
# data_cc_yaml = yaml.safe_load(file)
|
241 |
-
|
242 |
-
# for key, value in data_cc_yaml['gpai_requirements']['gpai_requirements']:
|
243 |
-
# if not value:
|
244 |
-
# msg = (f"Because of the dataset represented by {filename}, this GPAI fails the transparency requirements under Article 53.")
|
245 |
-
|
246 |
-
# # Do this by examining any and all Model CCs too
|
247 |
-
|
248 |
-
# for filename in os.listdir(folder_path):
|
249 |
-
# # Check if the search word is in the filename
|
250 |
-
# if "model_cc.md" in filename.lower():
|
251 |
-
|
252 |
-
# # If it is, load the yaml
|
253 |
-
|
254 |
-
# with open(folder_path + filename, 'r') as file:
|
255 |
-
# model_cc_yaml = yaml.safe_load(file)
|
256 |
-
|
257 |
-
# for key, value in model_cc_yaml['obligations_for_providers_of_gpai_models']:
|
258 |
-
# if not value:
|
259 |
-
# msg = (f"Because of the model represented by {filename}, this GPAI fails the transparency requirements under Article 53.")
|
260 |
-
|
261 |
-
# # If the project is a GPAI model with systematic risk, check that is has additionally met all the requirements for such systems:
|
262 |
-
|
263 |
-
# if gpai_model_systematic_risk:
|
264 |
-
|
265 |
-
# # Do this by examining the Project CC
|
266 |
-
|
267 |
-
# for key, value in project_cc_yaml['gpai_obligations_for_systemic_risk_models']:
|
268 |
-
# if not value:
|
269 |
-
# msg = ("GPAI model with systematic risk fails the transparency requirements under Article 55.")
|
270 |
-
|
271 |
-
# # Do this by examining any and all Model CCs too
|
272 |
-
|
273 |
-
# for filename in os.listdir(folder_path):
|
274 |
-
# # Check if the search word is in the filename
|
275 |
-
# if "model_cc.md" in filename.lower():
|
276 |
-
|
277 |
-
# # If it is, load the yaml
|
278 |
-
|
279 |
-
# with open(folder_path + filename, 'r') as file:
|
280 |
-
# model_cc_yaml = yaml.safe_load(file)
|
281 |
-
|
282 |
-
# for key, value in model_cc_yaml['obligations_for_providers_of_gpai_models_with_systemic_risk']:
|
283 |
-
# if not value:
|
284 |
-
# msg = (f"Because of the model represented by {filename}, this GPAI model with systematic risk fails the transparency requirements under Article 55.")
|
285 |
-
|
286 |
|
287 |
|
|
|
1 |
import yaml
|
2 |
+
from utils import set_type, set_operator_role_and_location, set_eu_market_status, check_within_scope_cc, check_within_scope_act
|
3 |
|
4 |
# Create some variables we will use throughout our analysis
|
5 |
|
|
|
20 |
"put_into_service": False
|
21 |
},
|
22 |
"intended_purposes": [],
|
23 |
+
"project_cc_pass": False,
|
24 |
+
"data_cc_pass": False,
|
25 |
+
"model_cc_pass": False,
|
26 |
+
"msg": []
|
27 |
}
|
28 |
|
29 |
+
# TODO tells the user where the compliance analysis failed
|
30 |
+
# TODO cite article from yaml file as explanation
|
31 |
+
|
32 |
+
def check_overall_compliance(dispositive_variables, cc_files):
|
33 |
+
|
34 |
+
# check intended purposes
|
35 |
+
dispositive_variables = check_intended_purpose(dispositive_variables, cc_files)
|
36 |
+
|
37 |
+
# for each model_cc and data_cc - run analysis with ref to project_cc
|
38 |
+
|
39 |
+
dispositive_variables = run_compliance_analysis_on_data(dispositive_variables, data_cc_yaml)
|
40 |
+
dispositive_variables = run_compliance_analysis_on_model(dispositive_variables, model_cc_yaml)
|
41 |
+
|
42 |
+
dispositive_variables = run_compliance_analysis_on_project(dispositive_variables, project_cc_yaml)
|
43 |
+
|
44 |
+
return dispositive_variables
|
45 |
+
|
46 |
+
def run_compliance_analysis_on_project(dispositive_variables, project_cc_yaml):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
|
48 |
# Determine project type (AI system vs. GPAI model) as well as operator type. We will use these for different things.
|
49 |
project_type = set_type(dispositive_variables, project_cc_yaml)
|
|
|
62 |
else:
|
63 |
msg = ("Project is not within the scope of what is regulated by the Act.")
|
64 |
|
65 |
+
# TODO: reactivate the prohibited practices check below
|
66 |
|
67 |
+
# TODO: fix and uncomment
|
68 |
# # Check for prohibited practices. If any exist, the analysis is over.
|
69 |
# if check_prohibited(project_cc_yaml) == True:
|
70 |
# print("Project contains prohibited practices and is therefore non-compliant.")
|
|
|
100 |
if not value:
|
101 |
msg = ("Because of project-level characteristics, this high-risk AI system fails the accuracy, robustness, and cybersecurity requirements under Article 17.")
|
102 |
|
103 |
+
|
104 |
+
# TODO
|
105 |
+
# # If the project is a GPAI model, check that is has met all the requirements for such systems:
|
106 |
+
|
107 |
+
if gpai_model:
|
108 |
+
|
109 |
+
# # If the project is a GPAI model with systematic risk, check that is has additionally met all the requirements for such systems:
|
110 |
+
|
111 |
+
# if gpai_model_systematic_risk:
|
112 |
+
|
113 |
+
# # Do this by examining the Project CC
|
114 |
+
|
115 |
+
# for key, value in project_cc_yaml['gpai_obligations_for_systemic_risk_models']:
|
116 |
+
# if not value:
|
117 |
+
# msg = ("GPAI model with systematic risk fails the transparency requirements under Article 55.")
|
118 |
+
|
119 |
+
# Do this by examining the Project CC
|
120 |
+
|
121 |
+
for key, value in project_cc_yaml['gpai_model_obligations']:
|
122 |
+
if not value:
|
123 |
+
msg = ("GPAI model fails the transparency requirements under Article 53.")
|
124 |
+
|
125 |
+
|
126 |
+
if gpai_model_systematic_risk:
|
127 |
+
for key, value in project_cc_yaml['gpai_models_with_systemic_risk_obligations']:
|
128 |
+
|
129 |
+
|
130 |
+
# if ai_system:
|
131 |
+
# for key, value in project_cc_yaml['']:
|
132 |
+
# TODO to be included in project_cc
|
133 |
+
|
134 |
+
|
135 |
+
# TODO: No matter where we land with an orchestrator function, this function must also check to the value it has set for both
|
136 |
# GPAI models with and without systemic risk and then check to see if the relevant requirement have met if either of these values applies.
|
137 |
# This will look a lot like what is happening above for high-risk AI systems.
|
138 |
|
139 |
+
return dispositive_variables
|
140 |
|
141 |
+
def run_compliance_analysis_on_data(dispositive_variables, data_cc_yaml):
|
142 |
+
|
143 |
+
# TODO: we probably have to pass ai_project_type and project_intended_purpose into this function
|
144 |
|
145 |
for key, value in data_cc_yaml['data_and_data_governance']:
|
146 |
if not value:
|
|
|
155 |
if not value:
|
156 |
msg = (f"Because of the dataset represented by , this high-risk AI system fails the quality management requirements under Article 17.")
|
157 |
|
158 |
+
# for key, value in data_cc_yaml['gpai_requirements']['gpai_requirements']:
|
159 |
+
# if not value:
|
160 |
+
# msg = (f"Because of the dataset represented by {filename}, this GPAI fails the transparency requirements under Article 53.")
|
161 |
+
|
162 |
+
|
163 |
+
# TODO: No matter where we land with an orchestrator function, this function must also check to the value that has been set for both
|
164 |
# GPAI models with and without systemic risk and then check to see if the relevant requirements have met if either of these values applies.
|
165 |
# Right now it is only checking high-risk AI system requirements. Another thing that we likely have to add here is the cross-comparison of the
|
166 |
# intended purposes. That might look like this:
|
167 |
# if data_cc_yaml['intended_purpose'] not in intended_purposes:
|
168 |
# return false
|
169 |
|
170 |
+
return dispositive_variables
|
171 |
+
|
172 |
+
def run_compliance_analysis_on_model(dispositive_variables, model_cc_yaml):
|
173 |
|
174 |
+
# TODO: we probably have to pass ai_project_type and project_intended_purpose into this function
|
175 |
|
176 |
for key, value in model_cc_yaml['risk_management_system']:
|
177 |
if not value:
|
|
|
188 |
for key, value in data_cc_yaml['quality_management_system']:
|
189 |
if not value:
|
190 |
msg = (f"Because of the model represented by , this high-risk AI system fails the quality management requirements under Article 17.")
|
191 |
+
|
192 |
+
|
193 |
+
# for key, value in model_cc_yaml['obligations_for_providers_of_gpai_models']:
|
194 |
+
# if not value:
|
195 |
+
# msg = (f"Because of the model represented by {filename}, this GPAI fails the transparency requirements under Article 53.")
|
196 |
+
|
197 |
+
# for key, value in model_cc_yaml['obligations_for_providers_of_gpai_models_with_systemic_risk']:
|
198 |
+
# if not value:
|
199 |
+
# msg = (f"Because of the model represented by {filename}, this GPAI model with systematic risk fails the transparency requirements under Article 55.")
|
200 |
|
201 |
+
# TODO: No matter where we land with an orchestrator function, this function must also check to the value that has been set for both
|
202 |
# GPAI models with and without systemic risk and then check to see if the relevant requirements have met if either of these values applies.
|
203 |
# Right now it is only checking high-risk AI system requirements. Another thing that we likely have to add here is the cross-comparison of the
|
204 |
# intended purposes. That might look like this:
|
205 |
# if model_cc_yaml['intended_purpose'] not in intended_purposes:
|
206 |
# return false
|
207 |
|
208 |
+
return dispositive_variables
|
209 |
|
210 |
+
def check_intended_purpose(dispositive_variables, cc_files):
|
211 |
|
212 |
# We want to run this function for everything classified as a high_risk_ai_system
|
213 |
# We also need to run it for all
|
|
|
257 |
|
258 |
# TODO return list of intended purpose
|
259 |
|
260 |
+
return dispositive_variables
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
261 |
|
262 |
|
project_cc.yaml
CHANGED
@@ -2,13 +2,16 @@
|
|
2 |
# Information related to high-level characteristics of AI project, including the role of the operator, their location, and where the output is used
|
3 |
|
4 |
operator_details:
|
5 |
-
provider:
|
|
|
6 |
verbose: 'The operator of this AI project is a natural or legal person, public authority, agency or other body that develops an AI project or a general-purpose AI model or that has an AI system or a general-purpose AI model developed and places it on the market or puts the AI system into service under its own name or trademark, whether for payment or free of charge'
|
7 |
value: !!bool false
|
8 |
-
eu_located:
|
|
|
9 |
verbose: 'AI project operator has its place of establishment or location within the Union'
|
10 |
value: !!bool True
|
11 |
-
output_used:
|
|
|
12 |
verbose: 'The output produced by the AI project is used in the Union'
|
13 |
value: !!bool false
|
14 |
|
@@ -458,7 +461,7 @@ transparency_obligations:
|
|
458 |
|
459 |
# Information related to the Act's requirements for GPAI models
|
460 |
|
461 |
-
|
462 |
documentation:
|
463 |
intended_uses: # Art. 53(1)(a); Annex XI(1)(1)(a)
|
464 |
verbose: 'The provider has drawn up and will keep up-to-date technical documentation of the model that include a general description of the model includes a description of the tasks that the model is intended to perform and the type and nature of AI systems in which it can be integrated'
|
@@ -539,7 +542,7 @@ gpai_model_provider_obligations:
|
|
539 |
|
540 |
# Information related to the Act's requirements for GPAI models with systematic risk
|
541 |
|
542 |
-
|
543 |
notification: # Art 52(1)
|
544 |
verbose: 'Within two weeks of it being known that the AI project should be classified as a GPAI model with systemtic ris, tkhe Commission was notified and provided with the information that supports this finding'
|
545 |
evaluation: # Art. 55(1)(a)
|
@@ -564,7 +567,8 @@ obligations_for_gpai_models_with_systemic_risk:
|
|
564 |
verbose: 'The provider has drawn up and will keep up-to-date technical documentation of the model that includes, where applicable, a detailed description of the measures put in place for the purpose of conducting internal and/or external adversarial testing (e.g. red teaming), model adaptations, including alignment and fine-tuning.'
|
565 |
value: !!bool false
|
566 |
documentation_architecture:
|
567 |
-
verbose: '
|
|
|
568 |
|
569 |
additional_provider_obligations: # apply these only if operator == provider and ai_project_type == high_risk_ai_system
|
570 |
contact: # Article 16 (b)
|
|
|
2 |
# Information related to high-level characteristics of AI project, including the role of the operator, their location, and where the output is used
|
3 |
|
4 |
operator_details:
|
5 |
+
provider:
|
6 |
+
article: "Art. 2"
|
7 |
verbose: 'The operator of this AI project is a natural or legal person, public authority, agency or other body that develops an AI project or a general-purpose AI model or that has an AI system or a general-purpose AI model developed and places it on the market or puts the AI system into service under its own name or trademark, whether for payment or free of charge'
|
8 |
value: !!bool false
|
9 |
+
eu_located:
|
10 |
+
article: 'Art. 2'
|
11 |
verbose: 'AI project operator has its place of establishment or location within the Union'
|
12 |
value: !!bool True
|
13 |
+
output_used:
|
14 |
+
article: 'Art. 2'
|
15 |
verbose: 'The output produced by the AI project is used in the Union'
|
16 |
value: !!bool false
|
17 |
|
|
|
461 |
|
462 |
# Information related to the Act's requirements for GPAI models
|
463 |
|
464 |
+
gpai_model_obligations:
|
465 |
documentation:
|
466 |
intended_uses: # Art. 53(1)(a); Annex XI(1)(1)(a)
|
467 |
verbose: 'The provider has drawn up and will keep up-to-date technical documentation of the model that include a general description of the model includes a description of the tasks that the model is intended to perform and the type and nature of AI systems in which it can be integrated'
|
|
|
542 |
|
543 |
# Information related to the Act's requirements for GPAI models with systematic risk
|
544 |
|
545 |
+
gpai_models_with_systemic_risk_obligations:
|
546 |
notification: # Art 52(1)
|
547 |
verbose: 'Within two weeks of it being known that the AI project should be classified as a GPAI model with systemtic ris, tkhe Commission was notified and provided with the information that supports this finding'
|
548 |
evaluation: # Art. 55(1)(a)
|
|
|
567 |
verbose: 'The provider has drawn up and will keep up-to-date technical documentation of the model that includes, where applicable, a detailed description of the measures put in place for the purpose of conducting internal and/or external adversarial testing (e.g. red teaming), model adaptations, including alignment and fine-tuning.'
|
568 |
value: !!bool false
|
569 |
documentation_architecture:
|
570 |
+
verbose: 'The provider has drawn up and will keep up-to-date technical documentation of the model that includes, where applicable, a detailed description of the system architecture explaining how software components build or feed into each other and integrate into the overall processing.'
|
571 |
+
value: !!bool false
|
572 |
|
573 |
additional_provider_obligations: # apply these only if operator == provider and ai_project_type == high_risk_ai_system
|
574 |
contact: # Article 16 (b)
|
utils.py
CHANGED
@@ -97,6 +97,7 @@ def check_excepted(project_cc_yaml):
|
|
97 |
else:
|
98 |
return False
|
99 |
|
|
|
100 |
def check_prohibited(dispositive_variables, project_cc_yaml):
|
101 |
|
102 |
ai_system = project_variables['ai_project_type']['ai_system']
|
|
|
97 |
else:
|
98 |
return False
|
99 |
|
100 |
+
# TODO update function
|
101 |
def check_prohibited(dispositive_variables, project_cc_yaml):
|
102 |
|
103 |
ai_system = project_variables['ai_project_type']['ai_system']
|