Spaces:
Running
Running
Update parser.py
Browse files
parser.py
CHANGED
@@ -21,6 +21,8 @@ def get_category(node):
|
|
21 |
return 'for'
|
22 |
elif isinstance(node, ast.Try):
|
23 |
return 'try'
|
|
|
|
|
24 |
elif isinstance(node, ast.Expr):
|
25 |
return 'expression'
|
26 |
else:
|
@@ -31,29 +33,19 @@ def create_vector(category, level, location, total_lines, parent_path):
|
|
31 |
category_map = {
|
32 |
'import': 1, 'assignment': 2, 'function': 3, 'async_function': 4, 'class': 5,
|
33 |
'if': 6, 'while': 7, 'for': 8, 'try': 9, 'expression': 10, 'spacer': 11,
|
34 |
-
'other': 12, 'elif': 13, 'else': 14, 'except': 15, 'finally': 16
|
35 |
}
|
36 |
category_id = category_map.get(category, 0)
|
37 |
start_line, end_line = location
|
38 |
-
span = (end_line - start_line + 1) / total_lines
|
39 |
-
center_pos = ((start_line + end_line) / 2) / total_lines
|
40 |
parent_depth = len(parent_path)
|
41 |
-
|
42 |
-
# Weighted sum of parent categories (simple weighting by position)
|
43 |
parent_weight = 0
|
44 |
for i, parent in enumerate(parent_path):
|
45 |
parent_category = parent.split('[')[0].lower()
|
46 |
-
parent_weight += category_map.get(parent_category, 0) * (1 / (i + 1))
|
47 |
-
parent_weight = parent_weight / max(1, len(category_map))
|
48 |
-
|
49 |
-
return [
|
50 |
-
category_id, # Type of the part
|
51 |
-
level, # Nesting depth
|
52 |
-
center_pos, # Center position in file (0.0 to 1.0)
|
53 |
-
span, # Relative size in file (0.0 to 1.0)
|
54 |
-
parent_depth, # Number of ancestors
|
55 |
-
parent_weight # Semantic connection to parents (0.0 to 1.0-ish)
|
56 |
-
]
|
57 |
|
58 |
def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=None, counters=None):
|
59 |
"""Recursively parse an AST node with full hierarchy tracking."""
|
@@ -62,20 +54,22 @@ def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=Non
|
|
62 |
if parent_path is None:
|
63 |
parent_path = []
|
64 |
if counters is None:
|
65 |
-
counters = {
|
66 |
|
67 |
parts = []
|
68 |
start_line = getattr(node, 'lineno', prev_end + 1)
|
69 |
end_line = getattr(node, 'end_lineno', start_line)
|
70 |
category = get_category(node)
|
71 |
|
72 |
-
node_id
|
73 |
-
|
74 |
-
|
75 |
-
node_id = f"{category.capitalize()}[{counters[category]}]"
|
76 |
|
|
|
77 |
if start_line > prev_end + 1:
|
78 |
spacer_lines = lines[prev_end:start_line - 1]
|
|
|
|
|
79 |
spacer_vector = create_vector('spacer', level, (prev_end + 1, start_line - 1), total_lines, parent_path)
|
80 |
parts.append({
|
81 |
'category': 'spacer',
|
@@ -84,11 +78,12 @@ def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=Non
|
|
84 |
'level': level,
|
85 |
'vector': spacer_vector,
|
86 |
'parent_path': ' -> '.join(parent_path) if parent_path else 'Top-Level',
|
87 |
-
'node_id':
|
88 |
})
|
89 |
|
|
|
90 |
stmt_lines = lines[start_line - 1:end_line]
|
91 |
-
current_path = parent_path +
|
92 |
node_vector = create_vector(category, level, (start_line, end_line), total_lines, current_path)
|
93 |
parts.append({
|
94 |
'category': category,
|
@@ -100,6 +95,7 @@ def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=Non
|
|
100 |
'node_id': node_id
|
101 |
})
|
102 |
|
|
|
103 |
nested_prev_end = end_line - 1
|
104 |
for attr in ('body', 'orelse', 'handlers', 'finalbody'):
|
105 |
if hasattr(node, attr) and getattr(node, attr):
|
@@ -107,6 +103,8 @@ def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=Non
|
|
107 |
for child in getattr(node, attr):
|
108 |
if attr == 'orelse' and isinstance(node, ast.If) and child.lineno != start_line:
|
109 |
sub_category = 'elif' if child.lineno != end_line else 'else'
|
|
|
|
|
110 |
sub_vector = create_vector(sub_category, level, (child.lineno, getattr(child, 'end_lineno', child.lineno)), total_lines, current_path)
|
111 |
sub_parts.append({
|
112 |
'category': sub_category,
|
@@ -115,11 +113,13 @@ def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=Non
|
|
115 |
'level': level,
|
116 |
'vector': sub_vector,
|
117 |
'parent_path': ' -> '.join(parent_path) if parent_path else 'Top-Level',
|
118 |
-
'node_id': node_id
|
119 |
})
|
120 |
child_parts = parse_node(child, lines, child.lineno - 1, level + 1, total_lines, current_path, counters)
|
121 |
sub_parts.extend(child_parts)
|
122 |
elif attr == 'handlers' and isinstance(child, ast.ExceptHandler):
|
|
|
|
|
123 |
sub_vector = create_vector('except', level, (child.lineno, getattr(child, 'end_lineno', child.lineno)), total_lines, current_path)
|
124 |
sub_parts.append({
|
125 |
'category': 'except',
|
@@ -133,6 +133,8 @@ def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=Non
|
|
133 |
child_parts = parse_node(child, lines, child.lineno - 1, level + 1, total_lines, current_path, counters)
|
134 |
sub_parts.extend(child_parts)
|
135 |
elif attr == 'finalbody':
|
|
|
|
|
136 |
sub_vector = create_vector('finally', level, (child.lineno, getattr(child, 'end_lineno', child.lineno)), total_lines, current_path)
|
137 |
sub_parts.append({
|
138 |
'category': 'finally',
|
@@ -160,7 +162,7 @@ def parse_python_code(code):
|
|
160 |
try:
|
161 |
tree = ast.parse(code)
|
162 |
except SyntaxError:
|
163 |
-
return [{'category': 'error', 'source': 'Invalid Python code', 'location': (1, 1), 'level': 0, 'vector': [0, 0, 1.0, 0.0, 0, 0], 'parent_path': 'Top-Level', 'node_id': ''}]
|
164 |
|
165 |
parts = []
|
166 |
prev_end = 0
|
@@ -172,6 +174,9 @@ def parse_python_code(code):
|
|
172 |
|
173 |
if prev_end < total_lines:
|
174 |
remaining_lines = lines[prev_end:]
|
|
|
|
|
|
|
175 |
spacer_vector = create_vector('spacer', 0, (prev_end + 1, total_lines + 1), total_lines, [])
|
176 |
parts.append({
|
177 |
'category': 'spacer',
|
@@ -180,7 +185,7 @@ def parse_python_code(code):
|
|
180 |
'level': 0,
|
181 |
'vector': spacer_vector,
|
182 |
'parent_path': 'Top-Level',
|
183 |
-
'node_id':
|
184 |
})
|
185 |
|
186 |
return parts
|
|
|
21 |
return 'for'
|
22 |
elif isinstance(node, ast.Try):
|
23 |
return 'try'
|
24 |
+
elif isinstance(node, ast.Return):
|
25 |
+
return 'return'
|
26 |
elif isinstance(node, ast.Expr):
|
27 |
return 'expression'
|
28 |
else:
|
|
|
33 |
category_map = {
|
34 |
'import': 1, 'assignment': 2, 'function': 3, 'async_function': 4, 'class': 5,
|
35 |
'if': 6, 'while': 7, 'for': 8, 'try': 9, 'expression': 10, 'spacer': 11,
|
36 |
+
'other': 12, 'elif': 13, 'else': 14, 'except': 15, 'finally': 16, 'return': 17
|
37 |
}
|
38 |
category_id = category_map.get(category, 0)
|
39 |
start_line, end_line = location
|
40 |
+
span = (end_line - start_line + 1) / total_lines
|
41 |
+
center_pos = ((start_line + end_line) / 2) / total_lines
|
42 |
parent_depth = len(parent_path)
|
|
|
|
|
43 |
parent_weight = 0
|
44 |
for i, parent in enumerate(parent_path):
|
45 |
parent_category = parent.split('[')[0].lower()
|
46 |
+
parent_weight += category_map.get(parent_category, 0) * (1 / (i + 1))
|
47 |
+
parent_weight = parent_weight / max(1, len(category_map))
|
48 |
+
return [category_id, level, center_pos, span, parent_depth, parent_weight]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
|
50 |
def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=None, counters=None):
|
51 |
"""Recursively parse an AST node with full hierarchy tracking."""
|
|
|
54 |
if parent_path is None:
|
55 |
parent_path = []
|
56 |
if counters is None:
|
57 |
+
counters = {cat: 0 for cat in ['import', 'assignment', 'function', 'async_function', 'class', 'if', 'while', 'for', 'try', 'return', 'expression', 'other', 'spacer', 'elif', 'else', 'except', 'finally']}
|
58 |
|
59 |
parts = []
|
60 |
start_line = getattr(node, 'lineno', prev_end + 1)
|
61 |
end_line = getattr(node, 'end_lineno', start_line)
|
62 |
category = get_category(node)
|
63 |
|
64 |
+
# Assign a node_id to every part
|
65 |
+
counters[category] += 1
|
66 |
+
node_id = f"{category.capitalize()}[{counters[category]}]"
|
|
|
67 |
|
68 |
+
# Spacer before the node
|
69 |
if start_line > prev_end + 1:
|
70 |
spacer_lines = lines[prev_end:start_line - 1]
|
71 |
+
counters['spacer'] += 1
|
72 |
+
spacer_node_id = f"Spacer[{counters['spacer']}]"
|
73 |
spacer_vector = create_vector('spacer', level, (prev_end + 1, start_line - 1), total_lines, parent_path)
|
74 |
parts.append({
|
75 |
'category': 'spacer',
|
|
|
78 |
'level': level,
|
79 |
'vector': spacer_vector,
|
80 |
'parent_path': ' -> '.join(parent_path) if parent_path else 'Top-Level',
|
81 |
+
'node_id': spacer_node_id
|
82 |
})
|
83 |
|
84 |
+
# Main node
|
85 |
stmt_lines = lines[start_line - 1:end_line]
|
86 |
+
current_path = parent_path + [node_id]
|
87 |
node_vector = create_vector(category, level, (start_line, end_line), total_lines, current_path)
|
88 |
parts.append({
|
89 |
'category': category,
|
|
|
95 |
'node_id': node_id
|
96 |
})
|
97 |
|
98 |
+
# Process nested bodies
|
99 |
nested_prev_end = end_line - 1
|
100 |
for attr in ('body', 'orelse', 'handlers', 'finalbody'):
|
101 |
if hasattr(node, attr) and getattr(node, attr):
|
|
|
103 |
for child in getattr(node, attr):
|
104 |
if attr == 'orelse' and isinstance(node, ast.If) and child.lineno != start_line:
|
105 |
sub_category = 'elif' if child.lineno != end_line else 'else'
|
106 |
+
counters[sub_category] += 1
|
107 |
+
sub_node_id = f"{sub_category.capitalize()}[{counters[sub_category]}]"
|
108 |
sub_vector = create_vector(sub_category, level, (child.lineno, getattr(child, 'end_lineno', child.lineno)), total_lines, current_path)
|
109 |
sub_parts.append({
|
110 |
'category': sub_category,
|
|
|
113 |
'level': level,
|
114 |
'vector': sub_vector,
|
115 |
'parent_path': ' -> '.join(parent_path) if parent_path else 'Top-Level',
|
116 |
+
'node_id': node_id # Inherits parent's node_id for context
|
117 |
})
|
118 |
child_parts = parse_node(child, lines, child.lineno - 1, level + 1, total_lines, current_path, counters)
|
119 |
sub_parts.extend(child_parts)
|
120 |
elif attr == 'handlers' and isinstance(child, ast.ExceptHandler):
|
121 |
+
counters['except'] += 1
|
122 |
+
sub_node_id = f"Except[{counters['except']}]"
|
123 |
sub_vector = create_vector('except', level, (child.lineno, getattr(child, 'end_lineno', child.lineno)), total_lines, current_path)
|
124 |
sub_parts.append({
|
125 |
'category': 'except',
|
|
|
133 |
child_parts = parse_node(child, lines, child.lineno - 1, level + 1, total_lines, current_path, counters)
|
134 |
sub_parts.extend(child_parts)
|
135 |
elif attr == 'finalbody':
|
136 |
+
counters['finally'] += 1
|
137 |
+
sub_node_id = f"Finally[{counters['finally']}]"
|
138 |
sub_vector = create_vector('finally', level, (child.lineno, getattr(child, 'end_lineno', child.lineno)), total_lines, current_path)
|
139 |
sub_parts.append({
|
140 |
'category': 'finally',
|
|
|
162 |
try:
|
163 |
tree = ast.parse(code)
|
164 |
except SyntaxError:
|
165 |
+
return [{'category': 'error', 'source': 'Invalid Python code', 'location': (1, 1), 'level': 0, 'vector': [0, 0, 1.0, 0.0, 0, 0], 'parent_path': 'Top-Level', 'node_id': 'Error[1]'}]
|
166 |
|
167 |
parts = []
|
168 |
prev_end = 0
|
|
|
174 |
|
175 |
if prev_end < total_lines:
|
176 |
remaining_lines = lines[prev_end:]
|
177 |
+
counters = {'spacer': 0}
|
178 |
+
counters['spacer'] += 1
|
179 |
+
spacer_node_id = f"Spacer[{counters['spacer']}]"
|
180 |
spacer_vector = create_vector('spacer', 0, (prev_end + 1, total_lines + 1), total_lines, [])
|
181 |
parts.append({
|
182 |
'category': 'spacer',
|
|
|
185 |
'level': 0,
|
186 |
'vector': spacer_vector,
|
187 |
'parent_path': 'Top-Level',
|
188 |
+
'node_id': spacer_node_id
|
189 |
})
|
190 |
|
191 |
return parts
|