Spaces:
Running
Running
Update parser.py
Browse files
parser.py
CHANGED
@@ -19,29 +19,30 @@ def get_category(node):
|
|
19 |
return 'while'
|
20 |
elif isinstance(node, ast.For):
|
21 |
return 'for'
|
|
|
|
|
22 |
elif isinstance(node, ast.Expr):
|
23 |
return 'expression'
|
24 |
else:
|
25 |
-
return 'other'
|
26 |
|
27 |
def create_vector(category, level, location, total_lines, parent_path):
|
28 |
"""Create a vector representation including hierarchy info."""
|
29 |
category_map = {
|
30 |
'import': 1, 'assignment': 2, 'function': 3, 'async_function': 4, 'class': 5,
|
31 |
-
'if': 6, 'while': 7, 'for': 8, '
|
32 |
}
|
33 |
category_id = category_map.get(category, 0)
|
34 |
start_line, end_line = location
|
35 |
-
# Extend vector with parent path length and a hash of the path for uniqueness
|
36 |
parent_depth = len(parent_path)
|
37 |
-
parent_hash = hash(' -> '.join(parent_path)) % 1000
|
38 |
return [
|
39 |
category_id,
|
40 |
level,
|
41 |
start_line / total_lines,
|
42 |
end_line / total_lines,
|
43 |
parent_depth,
|
44 |
-
parent_hash / 1000
|
45 |
]
|
46 |
|
47 |
def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=None, counters=None):
|
@@ -51,7 +52,7 @@ def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=Non
|
|
51 |
if parent_path is None:
|
52 |
parent_path = []
|
53 |
if counters is None:
|
54 |
-
counters = {'if': 0, 'while': 0, 'for': 0, 'function': 0, 'class': 0
|
55 |
|
56 |
parts = []
|
57 |
start_line = getattr(node, 'lineno', prev_end + 1)
|
@@ -92,14 +93,60 @@ def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=Non
|
|
92 |
'node_id': node_id
|
93 |
})
|
94 |
|
95 |
-
# Process nested
|
96 |
nested_prev_end = end_line - 1
|
97 |
-
for attr in ('body', 'orelse'):
|
98 |
if hasattr(node, attr) and getattr(node, attr):
|
|
|
99 |
for child in getattr(node, attr):
|
100 |
-
|
101 |
-
|
102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
103 |
|
104 |
return parts
|
105 |
|
|
|
19 |
return 'while'
|
20 |
elif isinstance(node, ast.For):
|
21 |
return 'for'
|
22 |
+
elif isinstance(node, ast.Try):
|
23 |
+
return 'try'
|
24 |
elif isinstance(node, ast.Expr):
|
25 |
return 'expression'
|
26 |
else:
|
27 |
+
return 'other' # Fallback for uncategorized nodes (e.g., 'pass')
|
28 |
|
29 |
def create_vector(category, level, location, total_lines, parent_path):
|
30 |
"""Create a vector representation including hierarchy info."""
|
31 |
category_map = {
|
32 |
'import': 1, 'assignment': 2, 'function': 3, 'async_function': 4, 'class': 5,
|
33 |
+
'if': 6, 'while': 7, 'for': 8, 'try': 9, 'expression': 10, 'spacer': 11, 'other': 12
|
34 |
}
|
35 |
category_id = category_map.get(category, 0)
|
36 |
start_line, end_line = location
|
|
|
37 |
parent_depth = len(parent_path)
|
38 |
+
parent_hash = hash(' -> '.join(parent_path)) % 1000
|
39 |
return [
|
40 |
category_id,
|
41 |
level,
|
42 |
start_line / total_lines,
|
43 |
end_line / total_lines,
|
44 |
parent_depth,
|
45 |
+
parent_hash / 1000
|
46 |
]
|
47 |
|
48 |
def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=None, counters=None):
|
|
|
52 |
if parent_path is None:
|
53 |
parent_path = []
|
54 |
if counters is None:
|
55 |
+
counters = {'if': 0, 'while': 0, 'for': 0, 'function': 0, 'class': 0, 'try': 0}
|
56 |
|
57 |
parts = []
|
58 |
start_line = getattr(node, 'lineno', prev_end + 1)
|
|
|
93 |
'node_id': node_id
|
94 |
})
|
95 |
|
96 |
+
# Process nested bodies (body, orelse, handlers, finalbody)
|
97 |
nested_prev_end = end_line - 1
|
98 |
+
for attr in ('body', 'orelse', 'handlers', 'finalbody'):
|
99 |
if hasattr(node, attr) and getattr(node, attr):
|
100 |
+
sub_parts = []
|
101 |
for child in getattr(node, attr):
|
102 |
+
# Special case for elif: treat as part of the if's orelse but same level
|
103 |
+
if attr == 'orelse' and isinstance(node, ast.If) and child.lineno != start_line:
|
104 |
+
sub_category = 'elif' if child.lineno != end_line else 'else'
|
105 |
+
sub_vector = create_vector(sub_category, level, (child.lineno, getattr(child, 'end_lineno', child.lineno)), total_lines, current_path)
|
106 |
+
sub_parts.append({
|
107 |
+
'category': sub_category,
|
108 |
+
'source': ''.join(lines[child.lineno - 1:getattr(child, 'end_lineno', child.lineno)]),
|
109 |
+
'location': (child.lineno, getattr(child, 'end_lineno', child.lineno)),
|
110 |
+
'level': level,
|
111 |
+
'vector': sub_vector,
|
112 |
+
'parent_path': ' -> '.join(parent_path) if parent_path else 'Top-Level',
|
113 |
+
'node_id': node_id # Inherits the If's node_id
|
114 |
+
})
|
115 |
+
child_parts = parse_node(child, lines, child.lineno - 1, level + 1, total_lines, current_path, counters)
|
116 |
+
sub_parts.extend(child_parts)
|
117 |
+
# Special case for except: treat as part of try
|
118 |
+
elif attr == 'handlers' and isinstance(child, ast.ExceptHandler):
|
119 |
+
sub_vector = create_vector('except', level, (child.lineno, getattr(child, 'end_lineno', child.lineno)), total_lines, current_path)
|
120 |
+
sub_parts.append({
|
121 |
+
'category': 'except',
|
122 |
+
'source': ''.join(lines[child.lineno - 1:getattr(child, 'end_lineno', child.lineno)]),
|
123 |
+
'location': (child.lineno, getattr(child, 'end_lineno', child.lineno)),
|
124 |
+
'level': level,
|
125 |
+
'vector': sub_vector,
|
126 |
+
'parent_path': ' -> '.join(parent_path) if parent_path else 'Top-Level',
|
127 |
+
'node_id': node_id # Inherits the Try's node_id
|
128 |
+
})
|
129 |
+
child_parts = parse_node(child, lines, child.lineno - 1, level + 1, total_lines, current_path, counters)
|
130 |
+
sub_parts.extend(child_parts)
|
131 |
+
# Special case for finally
|
132 |
+
elif attr == 'finalbody':
|
133 |
+
sub_vector = create_vector('finally', level, (child.lineno, getattr(child, 'end_lineno', child.lineno)), total_lines, current_path)
|
134 |
+
sub_parts.append({
|
135 |
+
'category': 'finally',
|
136 |
+
'source': ''.join(lines[child.lineno - 1:getattr(child, 'end_lineno', child.lineno)]),
|
137 |
+
'location': (child.lineno, getattr(child, 'end_lineno', child.lineno)),
|
138 |
+
'level': level,
|
139 |
+
'vector': sub_vector,
|
140 |
+
'parent_path': ' -> '.join(parent_path) if parent_path else 'Top-Level',
|
141 |
+
'node_id': node_id # Inherits the Try's node_id
|
142 |
+
})
|
143 |
+
child_parts = parse_node(child, lines, child.lineno - 1, level + 1, total_lines, current_path, counters)
|
144 |
+
sub_parts.extend(child_parts)
|
145 |
+
else:
|
146 |
+
child_parts = parse_node(child, lines, nested_prev_end, level + 1, total_lines, current_path, counters)
|
147 |
+
sub_parts.extend(child_parts)
|
148 |
+
nested_prev_end = sub_parts[-1]['location'][1] if sub_parts else nested_prev_end
|
149 |
+
parts.extend(sub_parts)
|
150 |
|
151 |
return parts
|
152 |
|