Spaces:
Running
Running
Update parser.py
Browse files
parser.py
CHANGED
@@ -29,7 +29,7 @@ def get_category(node):
|
|
29 |
return 'other'
|
30 |
|
31 |
def create_vector(category, level, location, total_lines, parent_path):
|
32 |
-
"""Create
|
33 |
category_map = {
|
34 |
'import': 1, 'assignment': 2, 'function': 3, 'async_function': 4, 'class': 5,
|
35 |
'if': 6, 'while': 7, 'for': 8, 'try': 9, 'expression': 10, 'spacer': 11,
|
@@ -48,7 +48,6 @@ def create_vector(category, level, location, total_lines, parent_path):
|
|
48 |
return [category_id, level, center_pos, span, parent_depth, parent_weight]
|
49 |
|
50 |
def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=None, counters=None):
|
51 |
-
"""Recursively parse an AST node with full hierarchy tracking."""
|
52 |
if total_lines is None:
|
53 |
total_lines = len(lines)
|
54 |
if parent_path is None:
|
@@ -61,11 +60,9 @@ def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=Non
|
|
61 |
end_line = getattr(node, 'end_lineno', start_line)
|
62 |
category = get_category(node)
|
63 |
|
64 |
-
# Assign a node_id to every part
|
65 |
counters[category] += 1
|
66 |
node_id = f"{category.capitalize()}[{counters[category]}]"
|
67 |
|
68 |
-
# Spacer before the node
|
69 |
if start_line > prev_end + 1:
|
70 |
spacer_lines = lines[prev_end:start_line - 1]
|
71 |
counters['spacer'] += 1
|
@@ -81,7 +78,6 @@ def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=Non
|
|
81 |
'node_id': spacer_node_id
|
82 |
})
|
83 |
|
84 |
-
# Main node
|
85 |
stmt_lines = lines[start_line - 1:end_line]
|
86 |
current_path = parent_path + [node_id]
|
87 |
node_vector = create_vector(category, level, (start_line, end_line), total_lines, current_path)
|
@@ -95,7 +91,6 @@ def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=Non
|
|
95 |
'node_id': node_id
|
96 |
})
|
97 |
|
98 |
-
# Process nested bodies
|
99 |
nested_prev_end = end_line - 1
|
100 |
for attr in ('body', 'orelse', 'handlers', 'finalbody'):
|
101 |
if hasattr(node, attr) and getattr(node, attr):
|
@@ -113,7 +108,7 @@ def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=Non
|
|
113 |
'level': level,
|
114 |
'vector': sub_vector,
|
115 |
'parent_path': ' -> '.join(parent_path) if parent_path else 'Top-Level',
|
116 |
-
'node_id': node_id
|
117 |
})
|
118 |
child_parts = parse_node(child, lines, child.lineno - 1, level + 1, total_lines, current_path, counters)
|
119 |
sub_parts.extend(child_parts)
|
@@ -156,7 +151,6 @@ def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=Non
|
|
156 |
return parts
|
157 |
|
158 |
def parse_python_code(code):
|
159 |
-
"""Parse Python code string and return parts with hierarchy and vectors."""
|
160 |
lines = code.splitlines(keepends=True)
|
161 |
total_lines = len(lines)
|
162 |
try:
|
|
|
29 |
return 'other'
|
30 |
|
31 |
def create_vector(category, level, location, total_lines, parent_path):
|
32 |
+
"""Create a vector optimized for role similarity."""
|
33 |
category_map = {
|
34 |
'import': 1, 'assignment': 2, 'function': 3, 'async_function': 4, 'class': 5,
|
35 |
'if': 6, 'while': 7, 'for': 8, 'try': 9, 'expression': 10, 'spacer': 11,
|
|
|
48 |
return [category_id, level, center_pos, span, parent_depth, parent_weight]
|
49 |
|
50 |
def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=None, counters=None):
|
|
|
51 |
if total_lines is None:
|
52 |
total_lines = len(lines)
|
53 |
if parent_path is None:
|
|
|
60 |
end_line = getattr(node, 'end_lineno', start_line)
|
61 |
category = get_category(node)
|
62 |
|
|
|
63 |
counters[category] += 1
|
64 |
node_id = f"{category.capitalize()}[{counters[category]}]"
|
65 |
|
|
|
66 |
if start_line > prev_end + 1:
|
67 |
spacer_lines = lines[prev_end:start_line - 1]
|
68 |
counters['spacer'] += 1
|
|
|
78 |
'node_id': spacer_node_id
|
79 |
})
|
80 |
|
|
|
81 |
stmt_lines = lines[start_line - 1:end_line]
|
82 |
current_path = parent_path + [node_id]
|
83 |
node_vector = create_vector(category, level, (start_line, end_line), total_lines, current_path)
|
|
|
91 |
'node_id': node_id
|
92 |
})
|
93 |
|
|
|
94 |
nested_prev_end = end_line - 1
|
95 |
for attr in ('body', 'orelse', 'handlers', 'finalbody'):
|
96 |
if hasattr(node, attr) and getattr(node, attr):
|
|
|
108 |
'level': level,
|
109 |
'vector': sub_vector,
|
110 |
'parent_path': ' -> '.join(parent_path) if parent_path else 'Top-Level',
|
111 |
+
'node_id': node_id
|
112 |
})
|
113 |
child_parts = parse_node(child, lines, child.lineno - 1, level + 1, total_lines, current_path, counters)
|
114 |
sub_parts.extend(child_parts)
|
|
|
151 |
return parts
|
152 |
|
153 |
def parse_python_code(code):
|
|
|
154 |
lines = code.splitlines(keepends=True)
|
155 |
total_lines = len(lines)
|
156 |
try:
|