Spaces:
Sleeping
Sleeping
File size: 8,581 Bytes
6f6019f 0c52573 6f6019f 0a9a29e 0c52573 6f6019f dc07873 1aeb49e 0c52573 1aeb49e 0c52573 0a9a29e 0c52573 dc07873 1aeb49e 0c52573 0a9a29e dc07873 6f6019f 0c52573 1aeb49e 6f6019f 1aeb49e 6f6019f 1aeb49e 6f6019f dc07873 6f6019f dc07873 1aeb49e 0a9a29e 1aeb49e 0a9a29e 1aeb49e 0a9a29e 1aeb49e 6f6019f 0a9a29e 6f6019f 1aeb49e 0a9a29e 1aeb49e 0a9a29e 1aeb49e 0a9a29e 1aeb49e 0a9a29e 1aeb49e 0a9a29e 1aeb49e 0a9a29e 1aeb49e dc07873 0a9a29e 6f6019f 0a9a29e 6f6019f dc07873 0a9a29e dc07873 0a9a29e 6f6019f 0a9a29e dc07873 6f6019f 0a9a29e 6f6019f dc07873 6f6019f dc07873 0a9a29e dc07873 6f6019f dc07873 0a9a29e dc07873 6f6019f 0a9a29e dc07873 6f6019f 0a9a29e 6f6019f dc07873 0a9a29e 6f6019f 0a9a29e dc07873 0a9a29e dc07873 0a9a29e dc07873 0a9a29e dc07873 0a9a29e dc07873 0a9a29e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 |
import ast
from langchain.schema import Document
import ast
from langchain.schema import Document
def chunk_python_code_with_metadata(source_code, reference):
"""
Entry point method to process the Python file.
It invokes the iterate_ast function.
"""
documents = []
print(f"Processing file: {reference}")
iterate_ast(source_code, documents, reference)
# Determine usage based on the reference path
if reference.startswith("kadi_apy/lib/"):
usage = "library"
elif reference.startswith("kadi_apy/cli/"):
usage = "cli_library"
elif reference.startswith("doc/"):
usage = "doc"
else:
usage = "undefined"
# Add metadata for usage to all documents
for doc in documents:
doc.metadata["reference"] = reference
doc.metadata["usage"] = usage # Add the determined usage metadata
print(doc)
return documents
def iterate_ast(source_code, documents, reference):
"""
Parses the AST of the given Python file and delegates
handling to specific methods based on node types.
"""
# Parse the source code into an abstract syntax tree (AST)
tree = ast.parse(source_code, filename=reference)
# Gather all top-level imports for later use
imports_dict = extract_imports(tree)
first_level_nodes = list(ast.iter_child_nodes(tree))
# Check if there are no first-level nodes
if not first_level_nodes:
handle_no_first_level_node_found(documents, source_code, imports_dict, reference)
return
all_imports = all(isinstance(node, (ast.Import, ast.ImportFrom)) for node in first_level_nodes)
if all_imports:
handle_first_level_imports_only(documents, source_code, imports_dict, reference)
# Iterate over first-level nodes
for first_level_node in ast.iter_child_nodes(tree):
if isinstance(first_level_node, ast.ClassDef):
handle_first_level_class(first_level_node, documents, source_code, imports_dict)
elif isinstance(first_level_node, ast.FunctionDef):
handle_first_level_func(first_level_node, documents, source_code, imports_dict)
elif isinstance(first_level_node, ast.Assign):
handle_first_level_assign(first_level_node, documents, source_code, imports_dict)
def handle_first_level_imports_only(documents, source_code, imports_dict, reference):
"""
Handles cases where the first-level nodes are only imports.
"""
if reference.endswith("__init__.py"):
type = "__init__-file"
else:
type = "undefined"
# Create metadata with "none" if imports are empty
metadata = {"type": type, "imports": "none" if not imports_dict else imports_dict}
# Create and store a Document with the full source code
doc = Document(
page_content=source_code,
metadata=metadata
)
documents.append(doc)
def extract_imports(tree):
"""
Extracts all import statements from the AST tree and organizes them
into a dictionary keyed by their fully qualified names for later analysis.
"""
imports_dict = {}
for node in ast.walk(tree):
if isinstance(node, ast.Import):
for alias in node.names:
imports_dict[alias.name] = alias.name
elif isinstance(node, ast.ImportFrom):
module = node.module if node.module else ""
for alias in node.names:
full_name = f"{module}.{alias.name}" if module else alias.name
imports_dict[alias.name] = full_name
return imports_dict
def analyze_imports(node, imports_dict):
"""
Analyzes the given node's body and signature to find relevant imports.
"""
relevant_imports = set()
for sub_node in ast.walk(node):
if isinstance(sub_node, ast.Name) and sub_node.id in imports_dict:
relevant_imports.add(imports_dict[sub_node.id])
return list(relevant_imports)
def handle_no_first_level_node_found(documents, source_code, imports_dict, reference):
"""
Handles cases where no top-level nodes are found in the AST.
"""
if reference.endswith("__init__.py"):
type = "__init__-file"
else:
type = "undefined"
# Create metadata with "none" if imports are empty
metadata = {"type": type, "imports": "none" if not imports_dict else imports_dict}
# Create and store a Document with the full source code
doc = Document(
page_content=source_code,
metadata=metadata
)
documents.append(doc)
def handle_first_level_assign(assign_node, documents, source_code, imports_dict):
"""
Handles assignment statements at the first level of the AST.
"""
assign_start_line = assign_node.lineno
assign_end_line = assign_node.end_lineno
assign_source = '\n'.join(source_code.splitlines()[assign_start_line-1:assign_end_line])
# Extract relevant imports
assign_imports = analyze_imports(assign_node, imports_dict)
# Create metadata with "none" if imports are empty
metadata = {"type": "Assign", "imports": "none" if not assign_imports else assign_imports}
# Create and store Document for the assignment
doc = Document(
page_content=assign_source,
metadata=metadata
)
documents.append(doc)
def handle_first_level_class(class_node, documents, source_code, imports_dict):
"""
Handles classes at the first level of the AST.
"""
class_start_line = class_node.lineno
class_body_lines = [child.lineno for child in class_node.body if isinstance(child, ast.FunctionDef)]
class_end_line = min(class_body_lines, default=class_node.end_lineno) - 1
class_source = '\n'.join(source_code.splitlines()[class_start_line-1:class_end_line])
# Extract relevant imports
class_imports = analyze_imports(class_node, imports_dict)
# Create metadata with "none" if imports are empty
metadata = {
"type": "class",
"class": class_node.name,
"visibility": "public",
"imports": "none" if not class_imports else class_imports
}
# Create and store Document for the class
doc = Document(
page_content=class_source,
metadata=metadata
)
documents.append(doc)
# Handle methods within the class
for second_level_node in ast.iter_child_nodes(class_node):
if isinstance(second_level_node, ast.FunctionDef):
method_start_line = (
second_level_node.decorator_list[0].lineno
if second_level_node.decorator_list else second_level_node.lineno
)
method_end_line = second_level_node.end_lineno
method_source = '\n'.join(source_code.splitlines()[method_start_line-1:method_end_line])
visibility = "internal" if second_level_node.name.startswith("_") else "public"
method_imports = analyze_imports(second_level_node, imports_dict)
doc = Document(
page_content=method_source,
metadata={
"type": "method",
"method": second_level_node.name,
"visibility": visibility,
"imports": "none" if not method_imports else method_imports,
"class": class_node.name
}
)
documents.append(doc)
def handle_first_level_func(function_node, documents, source_code, imports_dict):
"""
Handles functions at the first level of the AST.
"""
function_start_line = (
function_node.decorator_list[0].lineno
if function_node.decorator_list else function_node.lineno
)
function_end_line = function_node.end_lineno
function_source = '\n'.join(source_code.splitlines()[function_start_line-1:function_end_line])
# Extract relevant imports
function_imports = analyze_imports(function_node, imports_dict)
visibility = "internal" if function_node.name.startswith("_") else "public"
is_command = any(
decorator.id == "apy_command"
for decorator in function_node.decorator_list
if hasattr(decorator, "id")
)
metadata = {
"type": "command" if is_command else "function",
"visibility": visibility,
"imports": "none" if not function_imports else function_imports
}
if is_command:
metadata["command"] = function_node.name
else:
metadata["method"] = function_node.name
doc = Document(
page_content=function_source,
metadata=metadata
)
documents.append(doc)
|