Spaces:
Sleeping
Sleeping
File size: 8,672 Bytes
5a58620 bba9630 5a58620 217d002 c190b69 5a58620 4d5c304 5a58620 4039137 4d5c304 217d002 4d5c304 217d002 4d5c304 4039137 4d5c304 217d002 4d5c304 217d002 4039137 217d002 4039137 c190b69 217d002 bba9630 4039137 bba9630 4039137 bba9630 217d002 4039137 bba9630 4039137 bba9630 4039137 bba9630 217d002 4039137 bba9630 5a58620 4039137 c190b69 4039137 c190b69 4039137 c190b69 4d5c304 c190b69 4d5c304 c190b69 4d5c304 c190b69 217d002 bba9630 5a58620 bba9630 217d002 5a58620 217d002 bba9630 4d5c304 5a58620 217d002 bba9630 5a58620 217d002 5a58620 4039137 5a58620 217d002 4039137 5a58620 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 |
# parser.py
import ast
def get_category(node):
"""Determine the category of an AST node."""
if isinstance(node, (ast.Import, ast.ImportFrom)):
return 'import'
elif isinstance(node, (ast.Assign, ast.AnnAssign, ast.AugAssign)):
return 'assignment'
elif isinstance(node, ast.FunctionDef):
return 'function'
elif isinstance(node, ast.AsyncFunctionDef):
return 'async_function'
elif isinstance(node, ast.ClassDef):
return 'class'
elif isinstance(node, ast.If):
return 'if'
elif isinstance(node, ast.While):
return 'while'
elif isinstance(node, ast.For):
return 'for'
elif isinstance(node, ast.Try):
return 'try'
elif isinstance(node, ast.Expr):
return 'expression'
else:
return 'other'
def create_vector(category, level, location, total_lines, parent_path):
"""Create an optimized vector representation for a code part."""
category_map = {
'import': 1, 'assignment': 2, 'function': 3, 'async_function': 4, 'class': 5,
'if': 6, 'while': 7, 'for': 8, 'try': 9, 'expression': 10, 'spacer': 11,
'other': 12, 'elif': 13, 'else': 14, 'except': 15, 'finally': 16
}
category_id = category_map.get(category, 0)
start_line, end_line = location
span = (end_line - start_line + 1) / total_lines # Normalized size of the part
center_pos = ((start_line + end_line) / 2) / total_lines # Center position normalized
parent_depth = len(parent_path)
# Weighted sum of parent categories (simple weighting by position)
parent_weight = 0
for i, parent in enumerate(parent_path):
parent_category = parent.split('[')[0].lower()
parent_weight += category_map.get(parent_category, 0) * (1 / (i + 1)) # Decay with depth
parent_weight = parent_weight / max(1, len(category_map)) # Normalize by max category ID
return [
category_id, # Type of the part
level, # Nesting depth
center_pos, # Center position in file (0.0 to 1.0)
span, # Relative size in file (0.0 to 1.0)
parent_depth, # Number of ancestors
parent_weight # Semantic connection to parents (0.0 to 1.0-ish)
]
def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=None, counters=None):
"""Recursively parse an AST node with full hierarchy tracking."""
if total_lines is None:
total_lines = len(lines)
if parent_path is None:
parent_path = []
if counters is None:
counters = {'if': 0, 'while': 0, 'for': 0, 'function': 0, 'class': 0, 'try': 0}
parts = []
start_line = getattr(node, 'lineno', prev_end + 1)
end_line = getattr(node, 'end_lineno', start_line)
category = get_category(node)
node_id = ''
if category in counters:
counters[category] += 1
node_id = f"{category.capitalize()}[{counters[category]}]"
if start_line > prev_end + 1:
spacer_lines = lines[prev_end:start_line - 1]
spacer_vector = create_vector('spacer', level, (prev_end + 1, start_line - 1), total_lines, parent_path)
parts.append({
'category': 'spacer',
'source': ''.join(spacer_lines),
'location': (prev_end + 1, start_line - 1),
'level': level,
'vector': spacer_vector,
'parent_path': ' -> '.join(parent_path) if parent_path else 'Top-Level',
'node_id': ''
})
stmt_lines = lines[start_line - 1:end_line]
current_path = parent_path + ([node_id] if node_id else [])
node_vector = create_vector(category, level, (start_line, end_line), total_lines, current_path)
parts.append({
'category': category,
'source': ''.join(stmt_lines),
'location': (start_line, end_line),
'level': level,
'vector': node_vector,
'parent_path': ' -> '.join(parent_path) if parent_path else 'Top-Level',
'node_id': node_id
})
nested_prev_end = end_line - 1
for attr in ('body', 'orelse', 'handlers', 'finalbody'):
if hasattr(node, attr) and getattr(node, attr):
sub_parts = []
for child in getattr(node, attr):
if attr == 'orelse' and isinstance(node, ast.If) and child.lineno != start_line:
sub_category = 'elif' if child.lineno != end_line else 'else'
sub_vector = create_vector(sub_category, level, (child.lineno, getattr(child, 'end_lineno', child.lineno)), total_lines, current_path)
sub_parts.append({
'category': sub_category,
'source': ''.join(lines[child.lineno - 1:getattr(child, 'end_lineno', child.lineno)]),
'location': (child.lineno, getattr(child, 'end_lineno', child.lineno)),
'level': level,
'vector': sub_vector,
'parent_path': ' -> '.join(parent_path) if parent_path else 'Top-Level',
'node_id': node_id
})
child_parts = parse_node(child, lines, child.lineno - 1, level + 1, total_lines, current_path, counters)
sub_parts.extend(child_parts)
elif attr == 'handlers' and isinstance(child, ast.ExceptHandler):
sub_vector = create_vector('except', level, (child.lineno, getattr(child, 'end_lineno', child.lineno)), total_lines, current_path)
sub_parts.append({
'category': 'except',
'source': ''.join(lines[child.lineno - 1:getattr(child, 'end_lineno', child.lineno)]),
'location': (child.lineno, getattr(child, 'end_lineno', child.lineno)),
'level': level,
'vector': sub_vector,
'parent_path': ' -> '.join(parent_path) if parent_path else 'Top-Level',
'node_id': node_id
})
child_parts = parse_node(child, lines, child.lineno - 1, level + 1, total_lines, current_path, counters)
sub_parts.extend(child_parts)
elif attr == 'finalbody':
sub_vector = create_vector('finally', level, (child.lineno, getattr(child, 'end_lineno', child.lineno)), total_lines, current_path)
sub_parts.append({
'category': 'finally',
'source': ''.join(lines[child.lineno - 1:getattr(child, 'end_lineno', child.lineno)]),
'location': (child.lineno, getattr(child, 'end_lineno', child.lineno)),
'level': level,
'vector': sub_vector,
'parent_path': ' -> '.join(parent_path) if parent_path else 'Top-Level',
'node_id': node_id
})
child_parts = parse_node(child, lines, child.lineno - 1, level + 1, total_lines, current_path, counters)
sub_parts.extend(child_parts)
else:
child_parts = parse_node(child, lines, nested_prev_end, level + 1, total_lines, current_path, counters)
sub_parts.extend(child_parts)
nested_prev_end = sub_parts[-1]['location'][1] if sub_parts else nested_prev_end
parts.extend(sub_parts)
return parts
def parse_python_code(code):
"""Parse Python code string and return parts with hierarchy and vectors."""
lines = code.splitlines(keepends=True)
total_lines = len(lines)
try:
tree = ast.parse(code)
except SyntaxError:
return [{'category': 'error', 'source': 'Invalid Python code', 'location': (1, 1), 'level': 0, 'vector': [0, 0, 1.0, 0.0, 0, 0], 'parent_path': 'Top-Level', 'node_id': ''}]
parts = []
prev_end = 0
for stmt in tree.body:
stmt_parts = parse_node(stmt, lines, prev_end, total_lines=total_lines)
parts.extend(stmt_parts)
prev_end = stmt_parts[-1]['location'][1]
if prev_end < total_lines:
remaining_lines = lines[prev_end:]
spacer_vector = create_vector('spacer', 0, (prev_end + 1, total_lines + 1), total_lines, [])
parts.append({
'category': 'spacer',
'source': ''.join(remaining_lines),
'location': (prev_end + 1, total_lines + 1),
'level': 0,
'vector': spacer_vector,
'parent_path': 'Top-Level',
'node_id': ''
})
return parts |