qqubb commited on
Commit
a469d8f
·
1 Parent(s): 65f24dd

update check_overall_compliance, remove comments

Browse files
Files changed (3) hide show
  1. compliance_analysis.py +85 -110
  2. project_cc.yaml +10 -6
  3. utils.py +1 -0
compliance_analysis.py CHANGED
@@ -1,5 +1,5 @@
1
  import yaml
2
- from utils import set_type, set_operator_role_and_location, set_eu_market_status, check_within_scope
3
 
4
  # Create some variables we will use throughout our analysis
5
 
@@ -20,38 +20,30 @@ dispositive_variables = {
20
  "put_into_service": False
21
  },
22
  "intended_purposes": [],
 
 
 
 
23
  }
24
 
25
- # Here is the potential orchestrator function that I think is the key missing part:
26
- #
27
- # def orchestrator():
28
- #
29
- # -make sure there is at least one Project CC, one Data CC, and one Model CC -- need at least one of each
30
- # -do some administrative stuff to make your life easier like maybe getting all the files in the folder into a list, etc.
31
- #
32
- # -Call set_dispositive_variables, passing in all the cards as the argument:
33
- # -This must loop through all the cards to set the dispositive_variables where applicable. There is no function for this yet. I can write it.
34
- # -It must set the intended purposes by parsing them from the Project CC and. I wrote a utility function for this.
35
- # -Optionally call the functions that check whethe the project is in scope of CC and in scope of the Act. These could also be called from run_compliance_analysis_on_project
36
- # -Optionally check for prohibited practices. This has been commented out, but the functionality is there as-is. This could also be called from run_compliance_analysis_on_project
37
- #
38
- # Call run_compliance_analysis_on_project, passing in the sole Project CC as the argument
39
- # -This must run the internal check of the project CC based on the dispositive_variables it has set. It is only partially doing this as-is. To finish the job, we must:
40
- # -Be sure to run the check for all types of models and systems including AI systems without high risk, GPAI without systemic risk, GPAI with systemic risk. It is only doing high-risk AI systems at the moment.
41
- #
42
- # Call run_compliance_analysis_on_model() *for all model CCs in the folder*, passing in the ai_project_type variable and maybe project_intended_purpose
43
- # -This should include a "cross comparison" of the intended uses listed in the model CC and the project_intended_purpose parsed from the Project CC, something that is not yet integrated
44
- # -This function must check if GPAI requirements are met, if that value for ai_project_type is passed in -- it does not yet do this
45
- #
46
- # Call run_compliance_analysis_on_data() *for all data CCs in the folder*, passing in the ai_project_type variable and maybe project_intended_purpose
47
- # -This should include a "cross comparison" of the intended uses listed in the data CC and the project_intended_purpose parsed from the Project CC, something that is not yet integrated
48
- # -This function must check if GPAI requirements are met, if that value for ai_project_type is passed in -- it does not yet do this
49
- #
50
- # This function could also more gracefully handle the internal exits/reports and generate a single, digestible compliance report that
51
- # tells the user where the compliance analysis failed. If we wanted to get really fancy, we could include error messages for each individual
52
- # entry in the yaml files, possibly citing the part of the Act that they need to reference (currently in comments that user does not see)
53
-
54
- def run_compliance_analysis_on_project(project_cc_yaml):
55
 
56
  # Determine project type (AI system vs. GPAI model) as well as operator type. We will use these for different things.
57
  project_type = set_type(dispositive_variables, project_cc_yaml)
@@ -70,8 +62,9 @@ def run_compliance_analysis_on_project(project_cc_yaml):
70
  else:
71
  msg = ("Project is not within the scope of what is regulated by the Act.")
72
 
73
- # TO-DO: reactivate the prohibited practices check below
74
 
 
75
  # # Check for prohibited practices. If any exist, the analysis is over.
76
  # if check_prohibited(project_cc_yaml) == True:
77
  # print("Project contains prohibited practices and is therefore non-compliant.")
@@ -107,13 +100,47 @@ def run_compliance_analysis_on_project(project_cc_yaml):
107
  if not value:
108
  msg = ("Because of project-level characteristics, this high-risk AI system fails the accuracy, robustness, and cybersecurity requirements under Article 17.")
109
 
110
- # TO-DO: No matter where we land with an orchestrator function, this function must also check to the value it has set for both
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
  # GPAI models with and without systemic risk and then check to see if the relevant requirement have met if either of these values applies.
112
  # This will look a lot like what is happening above for high-risk AI systems.
113
 
114
- return msg
115
 
116
- def run_compliance_analysis_on_data(data_cc_yaml, project_intended_purpose): # TO-DO: we probably have to pass ai_project_type and project_intended_purpose into this function
 
 
117
 
118
  for key, value in data_cc_yaml['data_and_data_governance']:
119
  if not value:
@@ -128,16 +155,23 @@ def run_compliance_analysis_on_data(data_cc_yaml, project_intended_purpose): # T
128
  if not value:
129
  msg = (f"Because of the dataset represented by , this high-risk AI system fails the quality management requirements under Article 17.")
130
 
131
- # TO-DO: No matter where we land with an orchestrator function, this function must also check to the value that has been set for both
 
 
 
 
 
132
  # GPAI models with and without systemic risk and then check to see if the relevant requirements have met if either of these values applies.
133
  # Right now it is only checking high-risk AI system requirements. Another thing that we likely have to add here is the cross-comparison of the
134
  # intended purposes. That might look like this:
135
  # if data_cc_yaml['intended_purpose'] not in intended_purposes:
136
  # return false
137
 
138
- return msg
 
 
139
 
140
- def run_compliance_analysis_on_model(model_cc_yaml, project_intended_purpose): # TO-DO: we probably have to pass ai_project_type and project_intended_purpose into this function
141
 
142
  for key, value in model_cc_yaml['risk_management_system']:
143
  if not value:
@@ -154,17 +188,26 @@ def run_compliance_analysis_on_model(model_cc_yaml, project_intended_purpose):
154
  for key, value in data_cc_yaml['quality_management_system']:
155
  if not value:
156
  msg = (f"Because of the model represented by , this high-risk AI system fails the quality management requirements under Article 17.")
 
 
 
 
 
 
 
 
 
157
 
158
- # TO-DO: No matter where we land with an orchestrator function, this function must also check to the value that has been set for both
159
  # GPAI models with and without systemic risk and then check to see if the relevant requirements have met if either of these values applies.
160
  # Right now it is only checking high-risk AI system requirements. Another thing that we likely have to add here is the cross-comparison of the
161
  # intended purposes. That might look like this:
162
  # if model_cc_yaml['intended_purpose'] not in intended_purposes:
163
  # return false
164
 
165
- return msg
166
 
167
- def check_intended_purpose():
168
 
169
  # We want to run this function for everything classified as a high_risk_ai_system
170
  # We also need to run it for all
@@ -214,74 +257,6 @@ def check_intended_purpose():
214
 
215
  # TODO return list of intended purpose
216
 
217
- return msg
218
-
219
-
220
-
221
- # # If the project is a GPAI model, check that is has met all the requirements for such systems:
222
-
223
- # if gpai_model:
224
-
225
- # # Do this by examining the Project CC
226
-
227
- # for key, value in project_cc_yaml['gpai_model_provider_obligations']:
228
- # if not value:
229
- # msg = ("GPAI model fails the transparency requirements under Article 53.")
230
-
231
- # # Do this by examining any and all Data CCs too
232
-
233
- # for filename in os.listdir(folder_path):
234
- # # Check if the search word is in the filename
235
- # if "data_cc.md" in filename.lower():
236
-
237
- # # If it is, load the yaml
238
-
239
- # with open(folder_path + filename, 'r') as file:
240
- # data_cc_yaml = yaml.safe_load(file)
241
-
242
- # for key, value in data_cc_yaml['gpai_requirements']['gpai_requirements']:
243
- # if not value:
244
- # msg = (f"Because of the dataset represented by {filename}, this GPAI fails the transparency requirements under Article 53.")
245
-
246
- # # Do this by examining any and all Model CCs too
247
-
248
- # for filename in os.listdir(folder_path):
249
- # # Check if the search word is in the filename
250
- # if "model_cc.md" in filename.lower():
251
-
252
- # # If it is, load the yaml
253
-
254
- # with open(folder_path + filename, 'r') as file:
255
- # model_cc_yaml = yaml.safe_load(file)
256
-
257
- # for key, value in model_cc_yaml['obligations_for_providers_of_gpai_models']:
258
- # if not value:
259
- # msg = (f"Because of the model represented by {filename}, this GPAI fails the transparency requirements under Article 53.")
260
-
261
- # # If the project is a GPAI model with systematic risk, check that is has additionally met all the requirements for such systems:
262
-
263
- # if gpai_model_systematic_risk:
264
-
265
- # # Do this by examining the Project CC
266
-
267
- # for key, value in project_cc_yaml['gpai_obligations_for_systemic_risk_models']:
268
- # if not value:
269
- # msg = ("GPAI model with systematic risk fails the transparency requirements under Article 55.")
270
-
271
- # # Do this by examining any and all Model CCs too
272
-
273
- # for filename in os.listdir(folder_path):
274
- # # Check if the search word is in the filename
275
- # if "model_cc.md" in filename.lower():
276
-
277
- # # If it is, load the yaml
278
-
279
- # with open(folder_path + filename, 'r') as file:
280
- # model_cc_yaml = yaml.safe_load(file)
281
-
282
- # for key, value in model_cc_yaml['obligations_for_providers_of_gpai_models_with_systemic_risk']:
283
- # if not value:
284
- # msg = (f"Because of the model represented by {filename}, this GPAI model with systematic risk fails the transparency requirements under Article 55.")
285
-
286
 
287
 
 
1
  import yaml
2
+ from utils import set_type, set_operator_role_and_location, set_eu_market_status, check_within_scope_cc, check_within_scope_act
3
 
4
  # Create some variables we will use throughout our analysis
5
 
 
20
  "put_into_service": False
21
  },
22
  "intended_purposes": [],
23
+ "project_cc_pass": False,
24
+ "data_cc_pass": False,
25
+ "model_cc_pass": False,
26
+ "msg": []
27
  }
28
 
29
+ # TODO tells the user where the compliance analysis failed
30
+ # TODO cite article from yaml file as explanation
31
+
32
+ def check_overall_compliance(dispositive_variables, cc_files):
33
+
34
+ # check intended purposes
35
+ dispositive_variables = check_intended_purpose(dispositive_variables, cc_files)
36
+
37
+ # for each model_cc and data_cc - run analysis with ref to project_cc
38
+
39
+ dispositive_variables = run_compliance_analysis_on_data(dispositive_variables, data_cc_yaml)
40
+ dispositive_variables = run_compliance_analysis_on_model(dispositive_variables, model_cc_yaml)
41
+
42
+ dispositive_variables = run_compliance_analysis_on_project(dispositive_variables, project_cc_yaml)
43
+
44
+ return dispositive_variables
45
+
46
+ def run_compliance_analysis_on_project(dispositive_variables, project_cc_yaml):
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
  # Determine project type (AI system vs. GPAI model) as well as operator type. We will use these for different things.
49
  project_type = set_type(dispositive_variables, project_cc_yaml)
 
62
  else:
63
  msg = ("Project is not within the scope of what is regulated by the Act.")
64
 
65
+ # TODO: reactivate the prohibited practices check below
66
 
67
+ # TODO: fix and uncomment
68
  # # Check for prohibited practices. If any exist, the analysis is over.
69
  # if check_prohibited(project_cc_yaml) == True:
70
  # print("Project contains prohibited practices and is therefore non-compliant.")
 
100
  if not value:
101
  msg = ("Because of project-level characteristics, this high-risk AI system fails the accuracy, robustness, and cybersecurity requirements under Article 17.")
102
 
103
+
104
+ # TODO
105
+ # # If the project is a GPAI model, check that is has met all the requirements for such systems:
106
+
107
+ if gpai_model:
108
+
109
+ # # If the project is a GPAI model with systematic risk, check that is has additionally met all the requirements for such systems:
110
+
111
+ # if gpai_model_systematic_risk:
112
+
113
+ # # Do this by examining the Project CC
114
+
115
+ # for key, value in project_cc_yaml['gpai_obligations_for_systemic_risk_models']:
116
+ # if not value:
117
+ # msg = ("GPAI model with systematic risk fails the transparency requirements under Article 55.")
118
+
119
+ # Do this by examining the Project CC
120
+
121
+ for key, value in project_cc_yaml['gpai_model_obligations']:
122
+ if not value:
123
+ msg = ("GPAI model fails the transparency requirements under Article 53.")
124
+
125
+
126
+ if gpai_model_systematic_risk:
127
+ for key, value in project_cc_yaml['gpai_models_with_systemic_risk_obligations']:
128
+
129
+
130
+ # if ai_system:
131
+ # for key, value in project_cc_yaml['']:
132
+ # TODO to be included in project_cc
133
+
134
+
135
+ # TODO: No matter where we land with an orchestrator function, this function must also check to the value it has set for both
136
  # GPAI models with and without systemic risk and then check to see if the relevant requirement have met if either of these values applies.
137
  # This will look a lot like what is happening above for high-risk AI systems.
138
 
139
+ return dispositive_variables
140
 
141
+ def run_compliance_analysis_on_data(dispositive_variables, data_cc_yaml):
142
+
143
+ # TODO: we probably have to pass ai_project_type and project_intended_purpose into this function
144
 
145
  for key, value in data_cc_yaml['data_and_data_governance']:
146
  if not value:
 
155
  if not value:
156
  msg = (f"Because of the dataset represented by , this high-risk AI system fails the quality management requirements under Article 17.")
157
 
158
+ # for key, value in data_cc_yaml['gpai_requirements']['gpai_requirements']:
159
+ # if not value:
160
+ # msg = (f"Because of the dataset represented by {filename}, this GPAI fails the transparency requirements under Article 53.")
161
+
162
+
163
+ # TODO: No matter where we land with an orchestrator function, this function must also check to the value that has been set for both
164
  # GPAI models with and without systemic risk and then check to see if the relevant requirements have met if either of these values applies.
165
  # Right now it is only checking high-risk AI system requirements. Another thing that we likely have to add here is the cross-comparison of the
166
  # intended purposes. That might look like this:
167
  # if data_cc_yaml['intended_purpose'] not in intended_purposes:
168
  # return false
169
 
170
+ return dispositive_variables
171
+
172
+ def run_compliance_analysis_on_model(dispositive_variables, model_cc_yaml):
173
 
174
+ # TODO: we probably have to pass ai_project_type and project_intended_purpose into this function
175
 
176
  for key, value in model_cc_yaml['risk_management_system']:
177
  if not value:
 
188
  for key, value in data_cc_yaml['quality_management_system']:
189
  if not value:
190
  msg = (f"Because of the model represented by , this high-risk AI system fails the quality management requirements under Article 17.")
191
+
192
+
193
+ # for key, value in model_cc_yaml['obligations_for_providers_of_gpai_models']:
194
+ # if not value:
195
+ # msg = (f"Because of the model represented by {filename}, this GPAI fails the transparency requirements under Article 53.")
196
+
197
+ # for key, value in model_cc_yaml['obligations_for_providers_of_gpai_models_with_systemic_risk']:
198
+ # if not value:
199
+ # msg = (f"Because of the model represented by {filename}, this GPAI model with systematic risk fails the transparency requirements under Article 55.")
200
 
201
+ # TODO: No matter where we land with an orchestrator function, this function must also check to the value that has been set for both
202
  # GPAI models with and without systemic risk and then check to see if the relevant requirements have met if either of these values applies.
203
  # Right now it is only checking high-risk AI system requirements. Another thing that we likely have to add here is the cross-comparison of the
204
  # intended purposes. That might look like this:
205
  # if model_cc_yaml['intended_purpose'] not in intended_purposes:
206
  # return false
207
 
208
+ return dispositive_variables
209
 
210
+ def check_intended_purpose(dispositive_variables, cc_files):
211
 
212
  # We want to run this function for everything classified as a high_risk_ai_system
213
  # We also need to run it for all
 
257
 
258
  # TODO return list of intended purpose
259
 
260
+ return dispositive_variables
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
261
 
262
 
project_cc.yaml CHANGED
@@ -2,13 +2,16 @@
2
  # Information related to high-level characteristics of AI project, including the role of the operator, their location, and where the output is used
3
 
4
  operator_details:
5
- provider: # Art. 2
 
6
  verbose: 'The operator of this AI project is a natural or legal person, public authority, agency or other body that develops an AI project or a general-purpose AI model or that has an AI system or a general-purpose AI model developed and places it on the market or puts the AI system into service under its own name or trademark, whether for payment or free of charge'
7
  value: !!bool false
8
- eu_located: # Art. 2
 
9
  verbose: 'AI project operator has its place of establishment or location within the Union'
10
  value: !!bool True
11
- output_used: # Art. 2
 
12
  verbose: 'The output produced by the AI project is used in the Union'
13
  value: !!bool false
14
 
@@ -458,7 +461,7 @@ transparency_obligations:
458
 
459
  # Information related to the Act's requirements for GPAI models
460
 
461
- gpai_model_provider_obligations:
462
  documentation:
463
  intended_uses: # Art. 53(1)(a); Annex XI(1)(1)(a)
464
  verbose: 'The provider has drawn up and will keep up-to-date technical documentation of the model that include a general description of the model includes a description of the tasks that the model is intended to perform and the type and nature of AI systems in which it can be integrated'
@@ -539,7 +542,7 @@ gpai_model_provider_obligations:
539
 
540
  # Information related to the Act's requirements for GPAI models with systematic risk
541
 
542
- obligations_for_gpai_models_with_systemic_risk:
543
  notification: # Art 52(1)
544
  verbose: 'Within two weeks of it being known that the AI project should be classified as a GPAI model with systemtic ris, tkhe Commission was notified and provided with the information that supports this finding'
545
  evaluation: # Art. 55(1)(a)
@@ -564,7 +567,8 @@ obligations_for_gpai_models_with_systemic_risk:
564
  verbose: 'The provider has drawn up and will keep up-to-date technical documentation of the model that includes, where applicable, a detailed description of the measures put in place for the purpose of conducting internal and/or external adversarial testing (e.g. red teaming), model adaptations, including alignment and fine-tuning.'
565
  value: !!bool false
566
  documentation_architecture:
567
- verbose: ''The provider has drawn up and will keep up-to-date technical documentation of the model that includes, where applicable, a detailed description of the system architecture explaining how software components build or feed into each other and integrate into the overall processing.'
 
568
 
569
  additional_provider_obligations: # apply these only if operator == provider and ai_project_type == high_risk_ai_system
570
  contact: # Article 16 (b)
 
2
  # Information related to high-level characteristics of AI project, including the role of the operator, their location, and where the output is used
3
 
4
  operator_details:
5
+ provider:
6
+ article: "Art. 2"
7
  verbose: 'The operator of this AI project is a natural or legal person, public authority, agency or other body that develops an AI project or a general-purpose AI model or that has an AI system or a general-purpose AI model developed and places it on the market or puts the AI system into service under its own name or trademark, whether for payment or free of charge'
8
  value: !!bool false
9
+ eu_located:
10
+ article: 'Art. 2'
11
  verbose: 'AI project operator has its place of establishment or location within the Union'
12
  value: !!bool True
13
+ output_used:
14
+ article: 'Art. 2'
15
  verbose: 'The output produced by the AI project is used in the Union'
16
  value: !!bool false
17
 
 
461
 
462
  # Information related to the Act's requirements for GPAI models
463
 
464
+ gpai_model_obligations:
465
  documentation:
466
  intended_uses: # Art. 53(1)(a); Annex XI(1)(1)(a)
467
  verbose: 'The provider has drawn up and will keep up-to-date technical documentation of the model that include a general description of the model includes a description of the tasks that the model is intended to perform and the type and nature of AI systems in which it can be integrated'
 
542
 
543
  # Information related to the Act's requirements for GPAI models with systematic risk
544
 
545
+ gpai_models_with_systemic_risk_obligations:
546
  notification: # Art 52(1)
547
  verbose: 'Within two weeks of it being known that the AI project should be classified as a GPAI model with systemtic ris, tkhe Commission was notified and provided with the information that supports this finding'
548
  evaluation: # Art. 55(1)(a)
 
567
  verbose: 'The provider has drawn up and will keep up-to-date technical documentation of the model that includes, where applicable, a detailed description of the measures put in place for the purpose of conducting internal and/or external adversarial testing (e.g. red teaming), model adaptations, including alignment and fine-tuning.'
568
  value: !!bool false
569
  documentation_architecture:
570
+ verbose: 'The provider has drawn up and will keep up-to-date technical documentation of the model that includes, where applicable, a detailed description of the system architecture explaining how software components build or feed into each other and integrate into the overall processing.'
571
+ value: !!bool false
572
 
573
  additional_provider_obligations: # apply these only if operator == provider and ai_project_type == high_risk_ai_system
574
  contact: # Article 16 (b)
utils.py CHANGED
@@ -97,6 +97,7 @@ def check_excepted(project_cc_yaml):
97
  else:
98
  return False
99
 
 
100
  def check_prohibited(dispositive_variables, project_cc_yaml):
101
 
102
  ai_system = project_variables['ai_project_type']['ai_system']
 
97
  else:
98
  return False
99
 
100
+ # TODO update function
101
  def check_prohibited(dispositive_variables, project_cc_yaml):
102
 
103
  ai_system = project_variables['ai_project_type']['ai_system']