Molbap HF Staff commited on
Commit
070e318
·
verified ·
1 Parent(s): 043ca49

Upload generate_graph_modular.py

Browse files
Files changed (1) hide show
  1. generate_graph_modular.py +359 -0
generate_graph_modular.py ADDED
@@ -0,0 +1,359 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import ast
3
+ import json
4
+ import argparse
5
+ from collections import defaultdict, Counter
6
+ import re
7
+
8
+
9
+ def find_modular_files(transformers_path):
10
+ """
11
+ Scans for files containing 'modular' in their name within the specified
12
+ Hugging Face Transformers repository path. This includes modeling, configuration,
13
+ and processing files.
14
+ """
15
+ modular_files = []
16
+ models_path = os.path.join(transformers_path, 'src', 'transformers', 'models')
17
+ for root, _, files in os.walk(models_path):
18
+ for file in files:
19
+ if 'modular' in file and file.endswith('.py'):
20
+ modular_files.append(os.path.join(root, file))
21
+ return modular_files
22
+
23
+
24
+ def build_dependency_graph(modular_files):
25
+ """
26
+ Builds a dependency graph by parsing the abstract syntax tree (AST) of each
27
+ modular file. It identifies imports from other models, configurations, and
28
+ processing files within the Transformers library.
29
+ """
30
+ dependencies = defaultdict(list)
31
+ for file_path in modular_files:
32
+ derived_model_name = os.path.basename(os.path.dirname(file_path))
33
+ with open(file_path, 'r', encoding='utf-8') as f:
34
+ try:
35
+ tree = ast.parse(f.read(), filename=file_path)
36
+ for node in ast.walk(tree):
37
+ if not isinstance(node, ast.ImportFrom) or not node.module:
38
+ continue
39
+
40
+ is_relevant_import = ((
41
+ node.module.startswith('transformers.models.') or
42
+ 'modeling_' in node.module or
43
+ 'configuration_' in node.module or
44
+ 'processing_' in node.module or
45
+ node.module.startswith('..'))
46
+ and (all([x not in node.module for x in ['modeling_attn_mask_utils']]))
47
+ )
48
+
49
+ if is_relevant_import:
50
+ path_parts = re.split(r'\.|\.', node.module)
51
+ if len(path_parts) > 1:
52
+ # Heuristic to find the source model name
53
+ source_model_name = ""
54
+ for part in path_parts:
55
+ if part not in ("", "models", "transformers"):
56
+ source_model_name = part
57
+ break
58
+
59
+ if source_model_name and source_model_name != derived_model_name:
60
+ for alias in node.names:
61
+ dependencies[derived_model_name].append({
62
+ 'source': source_model_name,
63
+ 'imported_class': alias.name
64
+ })
65
+ except Exception as e:
66
+ print(f"Could not parse {file_path}: {e}")
67
+ return dict(dependencies)
68
+
69
+
70
+ def print_debug_info(dependencies):
71
+ """Prints a human-readable summary of the model dependencies."""
72
+ print("--- Model Dependency Debug ---")
73
+ if not dependencies:
74
+ print("No modular dependencies found.")
75
+ return
76
+ for derived_model, deps in sorted(dependencies.items()):
77
+ print(f"\n🎨 Derived Model: {derived_model}")
78
+ source_groups = defaultdict(list)
79
+ for dep in deps:
80
+ source_groups[dep['source']].append(dep['imported_class'])
81
+ for source, imports in sorted(source_groups.items()):
82
+ print(f" └── inherits from '{source}' (imports: {', '.join(sorted(imports))})")
83
+ print("\n--------------------------")
84
+
85
+
86
+ def generate_d3_visualization(dependencies, output_filename='d3_dependency_graph.html', hf_logo_path='hf-logo.svg'):
87
+ """
88
+ Generates a self‑contained, interactive D3.js HTML file for visualizing
89
+ the dependency graph. The visualization is zoomable and uses a custom
90
+ SVG path for source nodes to resemble the Hugging Face logo.
91
+
92
+ Minor finetuning over the original version:
93
+ – Larger base‐model icons & labels
94
+ – Cleaner sans‑serif font (Inter/Arial fallback)
95
+ – Transparent page background
96
+ – Tighter layout (reduced repulsion & link distance)
97
+ – Fixed legend in top‑left corner
98
+ """
99
+ # 1️⃣ Assemble graph‑data ------------------------------------------------------------------
100
+ nodes = set()
101
+ links = []
102
+ source_models = set()
103
+ derived_models = set(dependencies.keys())
104
+
105
+ for derived_model, deps in dependencies.items():
106
+ nodes.add(derived_model)
107
+ for dep in deps:
108
+ nodes.add(dep['source'])
109
+ source_models.add(dep['source'])
110
+ links.append({
111
+ "source": dep['source'],
112
+ "target": derived_model,
113
+ "label": dep['imported_class']
114
+ })
115
+
116
+ base_models = source_models - derived_models
117
+
118
+ consolidated_links = defaultdict(list)
119
+ for link in links:
120
+ key = (link['source'], link['target'])
121
+ consolidated_links[key].append(link['label'])
122
+ final_links = [
123
+ {"source": k[0], "target": k[1], "label": f"{len(v)} classes"}
124
+ for k, v in consolidated_links.items()
125
+ ]
126
+
127
+ degree = Counter()
128
+ for link in final_links:
129
+ degree[link["source"]] += 1
130
+ degree[link["target"]] += 1
131
+ max_deg = max(degree.values() or [1]) # prevent div by 0
132
+
133
+ node_list = []
134
+ for name in sorted(nodes):
135
+ node_list.append({
136
+ "id": name,
137
+ "is_base": name in base_models,
138
+ "size": 1 + 2 * (degree[name] / max_deg)
139
+ })
140
+ graph_data = {
141
+ "nodes": node_list,
142
+ "links": final_links
143
+ }
144
+
145
+
146
+ # 2️⃣ Static path for the HF logo outline (unused but kept for reference) ------------------
147
+ hf_svg_path = (
148
+ "M21.2,6.7c-0.2-0.2-0.5-0.3-0.8-0.3H3.6C3.3,6.4,3,6.5,2.8,6.7s-0.3,0.5-0.3,0.8v10.8c0,0.3,0.1,0.5,0.3,0.8 "
149
+ "c0.2,0.2,0.5,0.3,0.8,0.3h16.8c0.3,0,0.5-0.1,0.8-0.3c0.2-0.2,0.3-0.5,0.3-0.8V7.5C21.5,7.2,21.4,6.9,21.2,6.7z "
150
+ "M12,17.8L5.9,9.4h3.1 V8.3h6v1.1h3.1L12,17.8z"
151
+ )
152
+
153
+ # 3️⃣ HTML / CSS / JS ---------------------------------------------------------------------
154
+ html_template = f"""
155
+ <!DOCTYPE html>
156
+ <html lang=\"en\">
157
+ <head>
158
+ <meta charset=\"UTF-8\">
159
+ <title>Transformers Modular Model Dependencies</title>
160
+ <style>
161
+ /* Google‑font – small fallback cost & optional */
162
+ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;600&display=swap');
163
+
164
+ :root {{
165
+ --base‑size: 60px; /* icon radius helper */
166
+ }}
167
+
168
+ body {{
169
+ font-family: 'Inter', Arial, sans-serif;
170
+ margin: 0;
171
+ overflow: hidden;
172
+ background-color: transparent; /* requested transparency */
173
+ }}
174
+
175
+ svg {{
176
+ width: 100vw;
177
+ height: 100vh;
178
+ }}
179
+
180
+ .link {{
181
+ stroke: #999;
182
+ stroke-opacity: 0.6;
183
+ }}
184
+
185
+ .node-label {{
186
+ fill: #333;
187
+ pointer-events: none;
188
+ text-anchor: middle;
189
+ font-weight: 600;
190
+ }}
191
+
192
+ .link-label {{
193
+ fill: #555;
194
+ font-size: 10px;
195
+ pointer-events: none;
196
+ text-anchor: middle;
197
+ }}
198
+
199
+ .node.base path {{ fill: #ffbe0b; }}
200
+ .node.derived circle {{ fill: #1f77b4; }}
201
+
202
+ /* Legend styling */
203
+ #legend {{
204
+ position: fixed;
205
+ top: 18px;
206
+ left: 18px;
207
+ font-size: 20px;
208
+ background: rgba(255,255,255,0.92);
209
+ padding: 18px 28px;
210
+ border-radius: 10px;
211
+ border: 1.5px solid #bbb;
212
+ font-family: 'Inter', Arial, sans-serif;
213
+ box-shadow: 0 2px 8px rgba(0,0,0,0.08);
214
+ z-index: 1000;
215
+ }}
216
+ </style>
217
+ </head>
218
+ <body>
219
+ <div id=\"legend\">🟡 base model (HF icon)<br>🔵 derived modular model<br>Edge label: #classes imported</div>
220
+ <svg id=\"dependency-graph\"></svg>
221
+ <script src=\"https://d3js.org/d3.v7.min.js\"></script>
222
+ <script>
223
+ const graphData = {json.dumps(graph_data, indent=4)};
224
+ const hfLogoPath = "{hf_svg_path}"; // kept for potential future use
225
+
226
+ const width = window.innerWidth;
227
+ const height = window.innerHeight;
228
+
229
+ const svg = d3.select('#dependency-graph')
230
+ .call(
231
+ d3.zoom().on('zoom', (event) => {{
232
+ g.attr('transform', event.transform);
233
+ }})
234
+ );
235
+
236
+ const g = svg.append('g');
237
+
238
+ // Forces – tweaked for tighter graph
239
+ const simulation = d3.forceSimulation(graphData.nodes)
240
+ .force('link', d3.forceLink(graphData.links).id(d => d.id).distance(500))
241
+ .force('charge', d3.forceManyBody().strength(-500))
242
+ .force('center', d3.forceCenter(width / 2, height / 2))
243
+ .force('collide', d3.forceCollide(0.01 * parseFloat(getComputedStyle(document.documentElement).getPropertyValue('--base‑size'))));
244
+
245
+ // Links
246
+ const link = g.append('g')
247
+ .selectAll('line')
248
+ .data(graphData.links)
249
+ .join('line')
250
+ .attr('class', 'link')
251
+ .attr('stroke-width', 1.5);
252
+
253
+ // Link‑labels (#classes)
254
+ const linkLabel = g.append('g')
255
+ .selectAll('text')
256
+ .data(graphData.links)
257
+ .join('text')
258
+ .attr('class', 'link-label')
259
+ .text(d => d.label);
260
+
261
+ // Nodes (base vs derived)
262
+ const node = g.append('g')
263
+ .selectAll('g')
264
+ .data(graphData.nodes)
265
+ .join('g')
266
+ .attr('class', d => d.is_base ? 'node base' : 'node derived')
267
+ .call(d3.drag()
268
+ .on('start', dragstarted)
269
+ .on('drag', dragged)
270
+ .on('end', dragended)
271
+ );
272
+
273
+ // Base‑model icon (HF logo)
274
+ node.filter(d => d.is_base)
275
+ .append('image')
276
+ .attr('xlink:href', '{hf_logo_path}')
277
+ .attr('x', -parseFloat(getComputedStyle(document.documentElement).getPropertyValue('--base‑size')) / 2)
278
+ .attr('y', -parseFloat(getComputedStyle(document.documentElement).getPropertyValue('--base‑size')) / 2)
279
+ .attr('width', parseFloat(getComputedStyle(document.documentElement).getPropertyValue('--base‑size')))
280
+ .attr('height', parseFloat(getComputedStyle(document.documentElement).getPropertyValue('--base‑size')));
281
+
282
+ // Base‑model label (below icon)
283
+ node.filter(d => d.is_base)
284
+ .append('text')
285
+ .attr('class', 'node-label')
286
+ .attr('y', d => 30 * d.size + 8) // keep under the icon
287
+ .style('font-size', d => `${{26 * d.size}}px`) // scale 26–78 px for size 1-3
288
+ .text(d => d.id);
289
+
290
+ // Derived‑model circle + label w/ background rect
291
+ const derived = node.filter(d => !d.is_base);
292
+
293
+ derived.append('circle')
294
+ .attr('r', d => 20 * d.size); // scaled
295
+
296
+ const labelGroup = derived.append('g').attr('class', 'label-group');
297
+ labelGroup.append('rect')
298
+ .attr('x', -45)
299
+ .attr('y', -18)
300
+ .attr('width', 90)
301
+ .attr('height', 36)
302
+ .attr('rx', 8)
303
+ .attr('fill', '#fffbe6')
304
+ .attr('stroke', '#ccc');
305
+ labelGroup.append('text')
306
+ .attr('class', 'node-label')
307
+ .attr('dy', '0.35em')
308
+ .style('font-size', '18px')
309
+ .text(d => d.id);
310
+
311
+ // Tick
312
+ simulation.on('tick', () => {{
313
+ link.attr('x1', d => d.source.x)
314
+ .attr('y1', d => d.source.y)
315
+ .attr('x2', d => d.target.x)
316
+ .attr('y2', d => d.target.y);
317
+
318
+ linkLabel.attr('x', d => (d.source.x + d.target.x) / 2)
319
+ .attr('y', d => (d.source.y + d.target.y) / 2);
320
+
321
+ node.attr('transform', d => `translate(${{d.x}}, ${{d.y}})`);
322
+ }});
323
+
324
+ // Drag helpers
325
+ function dragstarted(event, d) {{
326
+ if (!event.active) simulation.alphaTarget(0.3).restart();
327
+ d.fx = d.x; d.fy = d.y;
328
+ }}
329
+ function dragged(event, d) {{
330
+ d.fx = event.x; d.fy = event.y;
331
+ }}
332
+ function dragended(event, d) {{
333
+ if (!event.active) simulation.alphaTarget(0);
334
+ d.fx = null; d.fy = null;
335
+ }}
336
+ </script>
337
+ </body>
338
+ </html>
339
+ """
340
+
341
+ with open(output_filename, 'w', encoding='utf-8') as f:
342
+ f.write(html_template)
343
+ print(f"✅ D3.js visualization saved to '{output_filename}'. Open this file in your browser.")
344
+
345
+
346
+ if __name__ == "__main__":
347
+ parser = argparse.ArgumentParser(
348
+ description="Visualize modular model dependencies in Transformers using D3.js.")
349
+ parser.add_argument("transformers_path", type=str,
350
+ help="The local path to the Hugging Face transformers repository.")
351
+ args = parser.parse_args()
352
+
353
+ modular_files = find_modular_files(args.transformers_path)
354
+ if not modular_files:
355
+ print("No modular files found. Make sure the path to the transformers repository is correct.")
356
+ else:
357
+ dependencies = build_dependency_graph(modular_files)
358
+ print_debug_info(dependencies)
359
+ generate_d3_visualization(dependencies)