Spaces:
Sleeping
Sleeping
File size: 9,079 Bytes
5a58620 bba9630 5a58620 217d002 c190b69 c2f70be 5a58620 4d5c304 5a58620 4039137 4d5c304 217d002 4d5c304 c2f70be 217d002 c2f70be 4039137 4d5c304 c2f70be 217d002 4039137 217d002 4039137 c2f70be 217d002 bba9630 4039137 c2f70be bba9630 c2f70be bba9630 c2f70be 4039137 bba9630 217d002 4039137 c2f70be bba9630 c2f70be bba9630 c2f70be 4039137 bba9630 4039137 bba9630 217d002 4039137 bba9630 5a58620 c2f70be 4039137 c190b69 4039137 c190b69 4039137 c190b69 c2f70be c190b69 c2f70be c190b69 c2f70be c190b69 4d5c304 c190b69 c2f70be c190b69 4d5c304 c190b69 217d002 bba9630 5a58620 bba9630 217d002 5a58620 217d002 bba9630 c2f70be 5a58620 217d002 bba9630 5a58620 217d002 5a58620 c2f70be 4039137 5a58620 217d002 4039137 c2f70be 5a58620 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 |
# parser.py
import ast
def get_category(node):
"""Determine the category of an AST node."""
if isinstance(node, (ast.Import, ast.ImportFrom)):
return 'import'
elif isinstance(node, (ast.Assign, ast.AnnAssign, ast.AugAssign)):
return 'assignment'
elif isinstance(node, ast.FunctionDef):
return 'function'
elif isinstance(node, ast.AsyncFunctionDef):
return 'async_function'
elif isinstance(node, ast.ClassDef):
return 'class'
elif isinstance(node, ast.If):
return 'if'
elif isinstance(node, ast.While):
return 'while'
elif isinstance(node, ast.For):
return 'for'
elif isinstance(node, ast.Try):
return 'try'
elif isinstance(node, ast.Return):
return 'return'
elif isinstance(node, ast.Expr):
return 'expression'
else:
return 'other'
def create_vector(category, level, location, total_lines, parent_path):
"""Create an optimized vector representation for a code part."""
category_map = {
'import': 1, 'assignment': 2, 'function': 3, 'async_function': 4, 'class': 5,
'if': 6, 'while': 7, 'for': 8, 'try': 9, 'expression': 10, 'spacer': 11,
'other': 12, 'elif': 13, 'else': 14, 'except': 15, 'finally': 16, 'return': 17
}
category_id = category_map.get(category, 0)
start_line, end_line = location
span = (end_line - start_line + 1) / total_lines
center_pos = ((start_line + end_line) / 2) / total_lines
parent_depth = len(parent_path)
parent_weight = 0
for i, parent in enumerate(parent_path):
parent_category = parent.split('[')[0].lower()
parent_weight += category_map.get(parent_category, 0) * (1 / (i + 1))
parent_weight = parent_weight / max(1, len(category_map))
return [category_id, level, center_pos, span, parent_depth, parent_weight]
def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=None, counters=None):
"""Recursively parse an AST node with full hierarchy tracking."""
if total_lines is None:
total_lines = len(lines)
if parent_path is None:
parent_path = []
if counters is None:
counters = {cat: 0 for cat in ['import', 'assignment', 'function', 'async_function', 'class', 'if', 'while', 'for', 'try', 'return', 'expression', 'other', 'spacer', 'elif', 'else', 'except', 'finally']}
parts = []
start_line = getattr(node, 'lineno', prev_end + 1)
end_line = getattr(node, 'end_lineno', start_line)
category = get_category(node)
# Assign a node_id to every part
counters[category] += 1
node_id = f"{category.capitalize()}[{counters[category]}]"
# Spacer before the node
if start_line > prev_end + 1:
spacer_lines = lines[prev_end:start_line - 1]
counters['spacer'] += 1
spacer_node_id = f"Spacer[{counters['spacer']}]"
spacer_vector = create_vector('spacer', level, (prev_end + 1, start_line - 1), total_lines, parent_path)
parts.append({
'category': 'spacer',
'source': ''.join(spacer_lines),
'location': (prev_end + 1, start_line - 1),
'level': level,
'vector': spacer_vector,
'parent_path': ' -> '.join(parent_path) if parent_path else 'Top-Level',
'node_id': spacer_node_id
})
# Main node
stmt_lines = lines[start_line - 1:end_line]
current_path = parent_path + [node_id]
node_vector = create_vector(category, level, (start_line, end_line), total_lines, current_path)
parts.append({
'category': category,
'source': ''.join(stmt_lines),
'location': (start_line, end_line),
'level': level,
'vector': node_vector,
'parent_path': ' -> '.join(parent_path) if parent_path else 'Top-Level',
'node_id': node_id
})
# Process nested bodies
nested_prev_end = end_line - 1
for attr in ('body', 'orelse', 'handlers', 'finalbody'):
if hasattr(node, attr) and getattr(node, attr):
sub_parts = []
for child in getattr(node, attr):
if attr == 'orelse' and isinstance(node, ast.If) and child.lineno != start_line:
sub_category = 'elif' if child.lineno != end_line else 'else'
counters[sub_category] += 1
sub_node_id = f"{sub_category.capitalize()}[{counters[sub_category]}]"
sub_vector = create_vector(sub_category, level, (child.lineno, getattr(child, 'end_lineno', child.lineno)), total_lines, current_path)
sub_parts.append({
'category': sub_category,
'source': ''.join(lines[child.lineno - 1:getattr(child, 'end_lineno', child.lineno)]),
'location': (child.lineno, getattr(child, 'end_lineno', child.lineno)),
'level': level,
'vector': sub_vector,
'parent_path': ' -> '.join(parent_path) if parent_path else 'Top-Level',
'node_id': node_id # Inherits parent's node_id for context
})
child_parts = parse_node(child, lines, child.lineno - 1, level + 1, total_lines, current_path, counters)
sub_parts.extend(child_parts)
elif attr == 'handlers' and isinstance(child, ast.ExceptHandler):
counters['except'] += 1
sub_node_id = f"Except[{counters['except']}]"
sub_vector = create_vector('except', level, (child.lineno, getattr(child, 'end_lineno', child.lineno)), total_lines, current_path)
sub_parts.append({
'category': 'except',
'source': ''.join(lines[child.lineno - 1:getattr(child, 'end_lineno', child.lineno)]),
'location': (child.lineno, getattr(child, 'end_lineno', child.lineno)),
'level': level,
'vector': sub_vector,
'parent_path': ' -> '.join(parent_path) if parent_path else 'Top-Level',
'node_id': node_id
})
child_parts = parse_node(child, lines, child.lineno - 1, level + 1, total_lines, current_path, counters)
sub_parts.extend(child_parts)
elif attr == 'finalbody':
counters['finally'] += 1
sub_node_id = f"Finally[{counters['finally']}]"
sub_vector = create_vector('finally', level, (child.lineno, getattr(child, 'end_lineno', child.lineno)), total_lines, current_path)
sub_parts.append({
'category': 'finally',
'source': ''.join(lines[child.lineno - 1:getattr(child, 'end_lineno', child.lineno)]),
'location': (child.lineno, getattr(child, 'end_lineno', child.lineno)),
'level': level,
'vector': sub_vector,
'parent_path': ' -> '.join(parent_path) if parent_path else 'Top-Level',
'node_id': node_id
})
child_parts = parse_node(child, lines, child.lineno - 1, level + 1, total_lines, current_path, counters)
sub_parts.extend(child_parts)
else:
child_parts = parse_node(child, lines, nested_prev_end, level + 1, total_lines, current_path, counters)
sub_parts.extend(child_parts)
nested_prev_end = sub_parts[-1]['location'][1] if sub_parts else nested_prev_end
parts.extend(sub_parts)
return parts
def parse_python_code(code):
"""Parse Python code string and return parts with hierarchy and vectors."""
lines = code.splitlines(keepends=True)
total_lines = len(lines)
try:
tree = ast.parse(code)
except SyntaxError:
return [{'category': 'error', 'source': 'Invalid Python code', 'location': (1, 1), 'level': 0, 'vector': [0, 0, 1.0, 0.0, 0, 0], 'parent_path': 'Top-Level', 'node_id': 'Error[1]'}]
parts = []
prev_end = 0
for stmt in tree.body:
stmt_parts = parse_node(stmt, lines, prev_end, total_lines=total_lines)
parts.extend(stmt_parts)
prev_end = stmt_parts[-1]['location'][1]
if prev_end < total_lines:
remaining_lines = lines[prev_end:]
counters = {'spacer': 0}
counters['spacer'] += 1
spacer_node_id = f"Spacer[{counters['spacer']}]"
spacer_vector = create_vector('spacer', 0, (prev_end + 1, total_lines + 1), total_lines, [])
parts.append({
'category': 'spacer',
'source': ''.join(remaining_lines),
'location': (prev_end + 1, total_lines + 1),
'level': 0,
'vector': spacer_vector,
'parent_path': 'Top-Level',
'node_id': spacer_node_id
})
return parts |