a1c00l commited on
Commit
018daa2
·
verified ·
1 Parent(s): 35fb2a2

Update src/aibom_generator/generator.py

Browse files
Files changed (1) hide show
  1. src/aibom_generator/generator.py +124 -22
src/aibom_generator/generator.py CHANGED
@@ -1,7 +1,7 @@
1
  import json
2
  import uuid
3
  import datetime
4
- from typing import Dict, Optional, Any, Tuple
5
 
6
  from huggingface_hub import HfApi, ModelCard
7
  from .utils import calculate_completeness_score
@@ -25,29 +25,83 @@ class AIBOMGenerator:
25
  model_id: str,
26
  output_file: Optional[str] = None,
27
  include_inference: Optional[bool] = None,
28
- ) -> Tuple[Dict[str, Any], Dict[str, Any]]:
29
  use_inference = include_inference if include_inference is not None else self.use_inference
30
  model_info = self._fetch_model_info(model_id)
31
  model_card = self._fetch_model_card(model_id)
32
- aibom = self._create_aibom_structure(model_id, model_info, model_card, use_inference)
33
 
34
- # Calculate score after AIBOM is complete
35
- score_report = calculate_completeness_score(aibom)
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
- # Add score to metadata properties
38
- if "metadata" in aibom and not "properties" in aibom["metadata"]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  aibom["metadata"]["properties"] = []
40
 
41
  if "metadata" in aibom and "properties" in aibom["metadata"]:
42
- aibom["metadata"]["properties"].append({"name": "aibom:quality-score", "value": str(score_report["total_score"])})
43
- aibom["metadata"]["properties"].append({"name": "aibom:quality-breakdown", "value": json.dumps(score_report["section_scores"])})
44
- aibom["metadata"]["properties"].append({"name": "aibom:max-scores", "value": json.dumps(score_report["max_scores"])})
 
 
 
 
 
 
 
 
 
45
 
46
  if output_file:
47
  with open(output_file, 'w') as f:
48
  json.dump(aibom, f, indent=2)
49
 
50
- return aibom, score_report
 
 
 
 
 
 
 
 
 
51
 
52
  def _fetch_model_info(self, model_id: str) -> Dict[str, Any]:
53
  try:
@@ -66,16 +120,8 @@ class AIBOMGenerator:
66
  def _create_aibom_structure(
67
  self,
68
  model_id: str,
69
- model_info: Dict[str, Any],
70
- model_card: Optional[ModelCard],
71
- use_inference: bool,
72
  ) -> Dict[str, Any]:
73
- metadata = self._extract_structured_metadata(model_id, model_info, model_card)
74
-
75
- if use_inference and model_card and self.inference_model_url:
76
- unstructured_metadata = self._extract_unstructured_metadata(model_card)
77
- metadata = {**unstructured_metadata, **metadata}
78
-
79
  aibom = {
80
  "bomFormat": "CycloneDX",
81
  "specVersion": "1.6",
@@ -123,6 +169,7 @@ class AIBOMGenerator:
123
  "datasets": card_data.get("datasets"),
124
  "model_name": card_data.get("model_name"),
125
  "tags": card_data.get("tags", metadata.get("tags", [])),
 
126
  })
127
  if hasattr(model_card.data, "eval_results") and model_card.data.eval_results:
128
  metadata["eval_results"] = model_card.data.eval_results
@@ -133,8 +180,63 @@ class AIBOMGenerator:
133
 
134
  return {k: v for k, v in metadata.items() if v is not None}
135
 
136
- def _extract_unstructured_metadata(self, model_card: ModelCard) -> Dict[str, Any]:
137
- return {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
 
139
  def _create_metadata_section(self, model_id: str, metadata: Dict[str, Any]) -> Dict[str, Any]:
140
  timestamp = datetime.datetime.utcnow().isoformat() + "Z"
 
1
  import json
2
  import uuid
3
  import datetime
4
+ from typing import Dict, Optional, Any
5
 
6
  from huggingface_hub import HfApi, ModelCard
7
  from .utils import calculate_completeness_score
 
25
  model_id: str,
26
  output_file: Optional[str] = None,
27
  include_inference: Optional[bool] = None,
28
+ ) -> Dict[str, Any]:
29
  use_inference = include_inference if include_inference is not None else self.use_inference
30
  model_info = self._fetch_model_info(model_id)
31
  model_card = self._fetch_model_card(model_id)
 
32
 
33
+ # Store original metadata before any AI enhancement
34
+ original_metadata = self._extract_structured_metadata(model_id, model_info, model_card)
35
+
36
+ # Create initial AIBOM with original metadata
37
+ original_aibom = self._create_aibom_structure(model_id, original_metadata)
38
+
39
+ # Calculate initial score
40
+ original_score = calculate_completeness_score(original_aibom)
41
+
42
+ # Final metadata starts with original metadata
43
+ final_metadata = original_metadata.copy()
44
+
45
+ # Apply AI enhancement if requested
46
+ ai_enhanced = False
47
+ ai_model_name = None
48
 
49
+ if use_inference and self.inference_model_url:
50
+ try:
51
+ # Extract additional metadata using AI
52
+ enhanced_metadata = self._extract_unstructured_metadata(model_card, model_id)
53
+
54
+ # If we got enhanced metadata, merge it with original
55
+ if enhanced_metadata:
56
+ ai_enhanced = True
57
+ ai_model_name = "BERT-base-uncased" # Will be replaced with actual model name
58
+
59
+ # Merge enhanced metadata with original (enhanced takes precedence)
60
+ for key, value in enhanced_metadata.items():
61
+ if value is not None and (key not in final_metadata or not final_metadata[key]):
62
+ final_metadata[key] = value
63
+ except Exception as e:
64
+ print(f"Error during AI enhancement: {e}")
65
+ # Continue with original metadata if enhancement fails
66
+
67
+ # Create final AIBOM with potentially enhanced metadata
68
+ aibom = self._create_aibom_structure(model_id, final_metadata)
69
+
70
+ # Calculate final score
71
+ final_score = calculate_completeness_score(aibom)
72
+
73
+ # Add score and enhancement info to metadata properties
74
+ if "metadata" in aibom and "properties" not in aibom["metadata"]:
75
  aibom["metadata"]["properties"] = []
76
 
77
  if "metadata" in aibom and "properties" in aibom["metadata"]:
78
+ # Add score information
79
+ aibom["metadata"]["properties"].append({"name": "aibom:quality-score", "value": str(final_score["total_score"])})
80
+ aibom["metadata"]["properties"].append({"name": "aibom:quality-breakdown", "value": json.dumps(final_score["section_scores"])})
81
+ aibom["metadata"]["properties"].append({"name": "aibom:max-scores", "value": json.dumps(final_score["max_scores"])})
82
+
83
+ # Add AI enhancement information
84
+ if ai_enhanced:
85
+ aibom["metadata"]["properties"].append({"name": "aibom:ai-enhanced", "value": "true"})
86
+ aibom["metadata"]["properties"].append({"name": "aibom:ai-model", "value": ai_model_name})
87
+ aibom["metadata"]["properties"].append({"name": "aibom:original-score", "value": str(original_score["total_score"])})
88
+ aibom["metadata"]["properties"].append({"name": "aibom:score-improvement",
89
+ "value": str(round(final_score["total_score"] - original_score["total_score"], 2))})
90
 
91
  if output_file:
92
  with open(output_file, 'w') as f:
93
  json.dump(aibom, f, indent=2)
94
 
95
+ # Create enhancement report for UI display
96
+ enhancement_report = {
97
+ "ai_enhanced": ai_enhanced,
98
+ "ai_model": ai_model_name if ai_enhanced else None,
99
+ "original_score": original_score,
100
+ "final_score": final_score,
101
+ "improvement": round(final_score["total_score"] - original_score["total_score"], 2) if ai_enhanced else 0
102
+ }
103
+
104
+ return aibom, enhancement_report
105
 
106
  def _fetch_model_info(self, model_id: str) -> Dict[str, Any]:
107
  try:
 
120
  def _create_aibom_structure(
121
  self,
122
  model_id: str,
123
+ metadata: Dict[str, Any],
 
 
124
  ) -> Dict[str, Any]:
 
 
 
 
 
 
125
  aibom = {
126
  "bomFormat": "CycloneDX",
127
  "specVersion": "1.6",
 
169
  "datasets": card_data.get("datasets"),
170
  "model_name": card_data.get("model_name"),
171
  "tags": card_data.get("tags", metadata.get("tags", [])),
172
+ "description": card_data.get("model_summary", None)
173
  })
174
  if hasattr(model_card.data, "eval_results") and model_card.data.eval_results:
175
  metadata["eval_results"] = model_card.data.eval_results
 
180
 
181
  return {k: v for k, v in metadata.items() if v is not None}
182
 
183
+ def _extract_unstructured_metadata(self, model_card: Optional[ModelCard], model_id: str) -> Dict[str, Any]:
184
+ """
185
+ Extract additional metadata from model card using BERT model.
186
+ This is a placeholder implementation that would be replaced with actual BERT inference.
187
+
188
+ In a real implementation, this would:
189
+ 1. Extract text from model card
190
+ 2. Use BERT to identify key information
191
+ 3. Structure the extracted information
192
+
193
+ For now, we'll simulate this with some basic extraction logic.
194
+ """
195
+ enhanced_metadata = {}
196
+
197
+ # In a real implementation, we would use a BERT model here
198
+ # Since we can't install the required libraries due to space constraints,
199
+ # we'll simulate the enhancement with a placeholder implementation
200
+
201
+ if model_card and hasattr(model_card, "text"):
202
+ card_text = model_card.text
203
+
204
+ # Simulate BERT extraction with basic text analysis
205
+ # In reality, this would be done with NLP models
206
+
207
+ # Extract description if missing
208
+ if card_text and "description" not in enhanced_metadata:
209
+ # Take first paragraph that's longer than 20 chars as description
210
+ paragraphs = [p.strip() for p in card_text.split('\n\n')]
211
+ for p in paragraphs:
212
+ if len(p) > 20 and not p.startswith('#'):
213
+ enhanced_metadata["description"] = p
214
+ break
215
+
216
+ # Extract limitations if present
217
+ if "limitations" not in enhanced_metadata:
218
+ if "## Limitations" in card_text:
219
+ limitations_section = card_text.split("## Limitations")[1].split("##")[0].strip()
220
+ if limitations_section:
221
+ enhanced_metadata["limitations"] = limitations_section
222
+
223
+ # Extract ethical considerations if present
224
+ if "ethical_considerations" not in enhanced_metadata:
225
+ for heading in ["## Ethical Considerations", "## Ethics", "## Bias"]:
226
+ if heading in card_text:
227
+ section = card_text.split(heading)[1].split("##")[0].strip()
228
+ if section:
229
+ enhanced_metadata["ethical_considerations"] = section
230
+ break
231
+
232
+ # Extract risks if present
233
+ if "risks" not in enhanced_metadata:
234
+ if "## Risks" in card_text:
235
+ risks_section = card_text.split("## Risks")[1].split("##")[0].strip()
236
+ if risks_section:
237
+ enhanced_metadata["risks"] = risks_section
238
+
239
+ return enhanced_metadata
240
 
241
  def _create_metadata_section(self, model_id: str, metadata: Dict[str, Any]) -> Dict[str, Any]:
242
  timestamp = datetime.datetime.utcnow().isoformat() + "Z"