Spaces:
Sleeping
Sleeping
File size: 8,421 Bytes
5a58620 bba9630 5a58620 217d002 c190b69 5a58620 c190b69 5a58620 4039137 217d002 c190b69 217d002 4039137 c190b69 217d002 4039137 c190b69 217d002 4039137 217d002 4039137 c190b69 217d002 bba9630 4039137 bba9630 4039137 bba9630 217d002 4039137 bba9630 4039137 bba9630 4039137 bba9630 217d002 4039137 bba9630 5a58620 c190b69 4039137 c190b69 4039137 c190b69 4039137 c190b69 217d002 bba9630 5a58620 bba9630 217d002 5a58620 217d002 bba9630 4039137 5a58620 217d002 bba9630 5a58620 217d002 5a58620 4039137 5a58620 217d002 4039137 5a58620 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 |
# parser.py
import ast
def get_category(node):
"""Determine the category of an AST node."""
if isinstance(node, (ast.Import, ast.ImportFrom)):
return 'import'
elif isinstance(node, (ast.Assign, ast.AnnAssign, ast.AugAssign)):
return 'assignment'
elif isinstance(node, ast.FunctionDef):
return 'function'
elif isinstance(node, ast.AsyncFunctionDef):
return 'async_function'
elif isinstance(node, ast.ClassDef):
return 'class'
elif isinstance(node, ast.If):
return 'if'
elif isinstance(node, ast.While):
return 'while'
elif isinstance(node, ast.For):
return 'for'
elif isinstance(node, ast.Try):
return 'try'
elif isinstance(node, ast.Expr):
return 'expression'
else:
return 'other' # Fallback for uncategorized nodes (e.g., 'pass')
def create_vector(category, level, location, total_lines, parent_path):
"""Create a vector representation including hierarchy info."""
category_map = {
'import': 1, 'assignment': 2, 'function': 3, 'async_function': 4, 'class': 5,
'if': 6, 'while': 7, 'for': 8, 'try': 9, 'expression': 10, 'spacer': 11, 'other': 12
}
category_id = category_map.get(category, 0)
start_line, end_line = location
parent_depth = len(parent_path)
parent_hash = hash(' -> '.join(parent_path)) % 1000
return [
category_id,
level,
start_line / total_lines,
end_line / total_lines,
parent_depth,
parent_hash / 1000
]
def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=None, counters=None):
"""Recursively parse an AST node with full hierarchy tracking."""
if total_lines is None:
total_lines = len(lines)
if parent_path is None:
parent_path = []
if counters is None:
counters = {'if': 0, 'while': 0, 'for': 0, 'function': 0, 'class': 0, 'try': 0}
parts = []
start_line = getattr(node, 'lineno', prev_end + 1)
end_line = getattr(node, 'end_lineno', start_line)
category = get_category(node)
# Assign a node ID for relevant categories
node_id = ''
if category in counters:
counters[category] += 1
node_id = f"{category.capitalize()}[{counters[category]}]"
# Handle spacers before the node
if start_line > prev_end + 1:
spacer_lines = lines[prev_end:start_line - 1]
spacer_vector = create_vector('spacer', level, (prev_end + 1, start_line - 1), total_lines, parent_path)
parts.append({
'category': 'spacer',
'source': ''.join(spacer_lines),
'location': (prev_end + 1, start_line - 1),
'level': level,
'vector': spacer_vector,
'parent_path': ' -> '.join(parent_path) if parent_path else 'Top-Level',
'node_id': ''
})
# Capture the node's source
stmt_lines = lines[start_line - 1:end_line]
current_path = parent_path + ([node_id] if node_id else [])
node_vector = create_vector(category, level, (start_line, end_line), total_lines, current_path)
parts.append({
'category': category,
'source': ''.join(stmt_lines),
'location': (start_line, end_line),
'level': level,
'vector': node_vector,
'parent_path': ' -> '.join(parent_path) if parent_path else 'Top-Level',
'node_id': node_id
})
# Process nested bodies (body, orelse, handlers, finalbody)
nested_prev_end = end_line - 1
for attr in ('body', 'orelse', 'handlers', 'finalbody'):
if hasattr(node, attr) and getattr(node, attr):
sub_parts = []
for child in getattr(node, attr):
# Special case for elif: treat as part of the if's orelse but same level
if attr == 'orelse' and isinstance(node, ast.If) and child.lineno != start_line:
sub_category = 'elif' if child.lineno != end_line else 'else'
sub_vector = create_vector(sub_category, level, (child.lineno, getattr(child, 'end_lineno', child.lineno)), total_lines, current_path)
sub_parts.append({
'category': sub_category,
'source': ''.join(lines[child.lineno - 1:getattr(child, 'end_lineno', child.lineno)]),
'location': (child.lineno, getattr(child, 'end_lineno', child.lineno)),
'level': level,
'vector': sub_vector,
'parent_path': ' -> '.join(parent_path) if parent_path else 'Top-Level',
'node_id': node_id # Inherits the If's node_id
})
child_parts = parse_node(child, lines, child.lineno - 1, level + 1, total_lines, current_path, counters)
sub_parts.extend(child_parts)
# Special case for except: treat as part of try
elif attr == 'handlers' and isinstance(child, ast.ExceptHandler):
sub_vector = create_vector('except', level, (child.lineno, getattr(child, 'end_lineno', child.lineno)), total_lines, current_path)
sub_parts.append({
'category': 'except',
'source': ''.join(lines[child.lineno - 1:getattr(child, 'end_lineno', child.lineno)]),
'location': (child.lineno, getattr(child, 'end_lineno', child.lineno)),
'level': level,
'vector': sub_vector,
'parent_path': ' -> '.join(parent_path) if parent_path else 'Top-Level',
'node_id': node_id # Inherits the Try's node_id
})
child_parts = parse_node(child, lines, child.lineno - 1, level + 1, total_lines, current_path, counters)
sub_parts.extend(child_parts)
# Special case for finally
elif attr == 'finalbody':
sub_vector = create_vector('finally', level, (child.lineno, getattr(child, 'end_lineno', child.lineno)), total_lines, current_path)
sub_parts.append({
'category': 'finally',
'source': ''.join(lines[child.lineno - 1:getattr(child, 'end_lineno', child.lineno)]),
'location': (child.lineno, getattr(child, 'end_lineno', child.lineno)),
'level': level,
'vector': sub_vector,
'parent_path': ' -> '.join(parent_path) if parent_path else 'Top-Level',
'node_id': node_id # Inherits the Try's node_id
})
child_parts = parse_node(child, lines, child.lineno - 1, level + 1, total_lines, current_path, counters)
sub_parts.extend(child_parts)
else:
child_parts = parse_node(child, lines, nested_prev_end, level + 1, total_lines, current_path, counters)
sub_parts.extend(child_parts)
nested_prev_end = sub_parts[-1]['location'][1] if sub_parts else nested_prev_end
parts.extend(sub_parts)
return parts
def parse_python_code(code):
"""Parse Python code string and return parts with hierarchy and vectors."""
lines = code.splitlines(keepends=True)
total_lines = len(lines)
try:
tree = ast.parse(code)
except SyntaxError:
return [{'category': 'error', 'source': 'Invalid Python code', 'location': (1, 1), 'level': 0, 'vector': [0, 0, 1.0, 1.0, 0, 0], 'parent_path': 'Top-Level', 'node_id': ''}]
parts = []
prev_end = 0
for stmt in tree.body:
stmt_parts = parse_node(stmt, lines, prev_end, total_lines=total_lines)
parts.extend(stmt_parts)
prev_end = stmt_parts[-1]['location'][1]
if prev_end < total_lines:
remaining_lines = lines[prev_end:]
spacer_vector = create_vector('spacer', 0, (prev_end + 1, total_lines + 1), total_lines, [])
parts.append({
'category': 'spacer',
'source': ''.join(remaining_lines),
'location': (prev_end + 1, total_lines + 1),
'level': 0,
'vector': spacer_vector,
'parent_path': 'Top-Level',
'node_id': ''
})
return parts |