Spaces:
Sleeping
Sleeping
File size: 4,430 Bytes
5a58620 bba9630 5a58620 217d002 5a58620 217d002 bba9630 217d002 bba9630 217d002 bba9630 217d002 bba9630 217d002 bba9630 217d002 bba9630 5a58620 217d002 bba9630 217d002 bba9630 5a58620 217d002 bba9630 5a58620 bba9630 217d002 5a58620 217d002 bba9630 217d002 5a58620 217d002 bba9630 5a58620 bba9630 217d002 5a58620 217d002 5a58620 217d002 5a58620 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 |
# parser.py
import ast
def get_category(node):
"""Determine the category of an AST node."""
if isinstance(node, (ast.Import, ast.ImportFrom)):
return 'import'
elif isinstance(node, (ast.Assign, ast.AnnAssign, ast.AugAssign)):
return 'assignment'
elif isinstance(node, ast.FunctionDef):
return 'function'
elif isinstance(node, ast.AsyncFunctionDef):
return 'async_function'
elif isinstance(node, ast.ClassDef):
return 'class'
elif isinstance(node, ast.If):
return 'if'
elif isinstance(node, ast.While):
return 'while'
elif isinstance(node, ast.For):
return 'for'
elif isinstance(node, ast.Expr):
return 'expression'
else:
return 'other'
def create_vector(category, level, location, total_lines):
"""Create a vector representation for a code part."""
# Vector: [category_id, level, start_line_normalized, end_line_normalized]
category_map = {
'import': 1, 'assignment': 2, 'function': 3, 'async_function': 4, 'class': 5,
'if': 6, 'while': 7, 'for': 8, 'expression': 9, 'spacer': 10, 'other': 11
}
category_id = category_map.get(category, 0)
start_line, end_line = location
return [
category_id,
level,
start_line / total_lines, # Normalized start position
end_line / total_lines # Normalized end position
]
def parse_node(node, lines, prev_end, level=0, total_lines=None):
"""Recursively parse an AST node and its children, assigning hierarchy levels."""
if total_lines is None:
total_lines = len(lines)
parts = []
start_line = getattr(node, 'lineno', prev_end + 1)
end_line = getattr(node, 'end_lineno', start_line)
# Handle spacers before the node
if start_line > prev_end + 1:
spacer_lines = lines[prev_end:start_line - 1]
spacer_vector = create_vector('spacer', level, (prev_end + 1, start_line - 1), total_lines)
parts.append({
'category': 'spacer',
'source': ''.join(spacer_lines),
'location': (prev_end + 1, start_line - 1),
'level': level,
'vector': spacer_vector
})
# Capture the node's source
stmt_lines = lines[start_line - 1:end_line]
node_vector = create_vector(get_category(node), level, (start_line, end_line), total_lines)
parts.append({
'category': get_category(node),
'source': ''.join(stmt_lines),
'location': (start_line, end_line),
'level': level,
'vector': node_vector
})
# Process nested nodes (e.g., class/function bodies, control structures)
if hasattr(node, 'body'):
nested_prev_end = end_line - 1
for child in node.body:
child_parts = parse_node(child, lines, nested_prev_end, level + 1, total_lines)
parts.extend(child_parts)
nested_prev_end = child_parts[-1]['location'][1]
# Handle additional bodies (e.g., else, elif, orelse for loops)
if hasattr(node, 'orelse') and node.orelse:
orelse_prev_end = parts[-1]['location'][1]
for child in node.orelse:
child_parts = parse_node(child, lines, orelse_prev_end, level + 1, total_lines)
parts.extend(child_parts)
orelse_prev_end = child_parts[-1]['location'][1]
return parts
def parse_python_code(code):
"""Parse Python code string and return parts with hierarchy and vectors."""
lines = code.splitlines(keepends=True)
total_lines = len(lines)
try:
tree = ast.parse(code)
except SyntaxError:
return [{'category': 'error', 'source': 'Invalid Python code', 'location': (1, 1), 'level': 0, 'vector': [0, 0, 1.0, 1.0]}]
parts = []
prev_end = 0
for stmt in tree.body:
stmt_parts = parse_node(stmt, lines, prev_end, total_lines=total_lines)
parts.extend(stmt_parts)
prev_end = stmt_parts[-1]['location'][1]
# Capture trailing spacers
if prev_end < total_lines:
remaining_lines = lines[prev_end:]
spacer_vector = create_vector('spacer', 0, (prev_end + 1, total_lines + 1), total_lines)
parts.append({
'category': 'spacer',
'source': ''.join(remaining_lines),
'location': (prev_end + 1, total_lines + 1),
'level': 0,
'vector': spacer_vector
})
return parts |