import os import ast import json import argparse from collections import defaultdict, Counter import re def find_modular_files(transformers_path): """ Scans for files containing 'modular' in their name within the specified Hugging Face Transformers repository path. This includes modeling, configuration, and processing files. """ modular_files = [] models_path = os.path.join(transformers_path, 'src', 'transformers', 'models') for root, _, files in os.walk(models_path): for file in files: if 'modular' in file and file.endswith('.py'): modular_files.append(os.path.join(root, file)) return modular_files def build_dependency_graph(modular_files): """ Builds a dependency graph by parsing the abstract syntax tree (AST) of each modular file. It identifies imports from other models, configurations, and processing files within the Transformers library. """ dependencies = defaultdict(list) for file_path in modular_files: derived_model_name = os.path.basename(os.path.dirname(file_path)) with open(file_path, 'r', encoding='utf-8') as f: try: tree = ast.parse(f.read(), filename=file_path) for node in ast.walk(tree): if not isinstance(node, ast.ImportFrom) or not node.module: continue is_relevant_import = (( node.module.startswith('transformers.models.') or 'modeling_' in node.module or 'configuration_' in node.module or 'processing_' in node.module or node.module.startswith('..')) and (all([x not in node.module for x in ['modeling_attn_mask_utils']])) ) if is_relevant_import: path_parts = re.split(r'\.|\.', node.module) if len(path_parts) > 1: # Heuristic to find the source model name source_model_name = "" for part in path_parts: if part not in ("", "models", "transformers"): source_model_name = part break if source_model_name and source_model_name != derived_model_name: for alias in node.names: dependencies[derived_model_name].append({ 'source': source_model_name, 'imported_class': alias.name }) except Exception as e: print(f"Could not parse {file_path}: {e}") return dict(dependencies) def print_debug_info(dependencies): """Prints a human-readable summary of the model dependencies.""" print("--- Model Dependency Debug ---") if not dependencies: print("No modular dependencies found.") return for derived_model, deps in sorted(dependencies.items()): print(f"\nšØ Derived Model: {derived_model}") source_groups = defaultdict(list) for dep in deps: source_groups[dep['source']].append(dep['imported_class']) for source, imports in sorted(source_groups.items()): print(f" āāā inherits from '{source}' (imports: {', '.join(sorted(imports))})") print("\n--------------------------") def generate_d3_visualization(dependencies, output_filename='d3_dependency_graph.html', hf_logo_path='hf-logo.svg'): """ Generates a selfācontained, interactive D3.js HTML file for visualizing the dependency graph. The visualization is zoomable and uses a custom SVG path for source nodes to resemble the Hugging Face logo. Minor finetuning over the original version: ā Larger baseāmodel icons & labels ā Cleaner sansāserif font (Inter/Arial fallback) ā Transparent page background ā Tighter layout (reduced repulsion & link distance) ā Fixed legend in topāleft corner """ # 1ļøā£ Assemble graphādata ------------------------------------------------------------------ nodes = set() links = [] source_models = set() derived_models = set(dependencies.keys()) for derived_model, deps in dependencies.items(): nodes.add(derived_model) for dep in deps: nodes.add(dep['source']) source_models.add(dep['source']) links.append({ "source": dep['source'], "target": derived_model, "label": dep['imported_class'] }) base_models = source_models - derived_models consolidated_links = defaultdict(list) for link in links: key = (link['source'], link['target']) consolidated_links[key].append(link['label']) final_links = [ {"source": k[0], "target": k[1], "label": f"{len(v)} classes"} for k, v in consolidated_links.items() ] degree = Counter() for link in final_links: degree[link["source"]] += 1 degree[link["target"]] += 1 max_deg = max(degree.values() or [1]) # prevent div by 0 node_list = [] for name in sorted(nodes): node_list.append({ "id": name, "is_base": name in base_models, "size": 1 + 2 * (degree[name] / max_deg) }) graph_data = { "nodes": node_list, "links": final_links } # 2ļøā£ Static path for the HF logo outline (unused but kept for reference) ------------------ hf_svg_path = ( "M21.2,6.7c-0.2-0.2-0.5-0.3-0.8-0.3H3.6C3.3,6.4,3,6.5,2.8,6.7s-0.3,0.5-0.3,0.8v10.8c0,0.3,0.1,0.5,0.3,0.8 " "c0.2,0.2,0.5,0.3,0.8,0.3h16.8c0.3,0,0.5-0.1,0.8-0.3c0.2-0.2,0.3-0.5,0.3-0.8V7.5C21.5,7.2,21.4,6.9,21.2,6.7z " "M12,17.8L5.9,9.4h3.1 V8.3h6v1.1h3.1L12,17.8z" ) # 3ļøā£ HTML / CSS / JS --------------------------------------------------------------------- html_template = f"""