broadfield-dev commited on
Commit
9778ec9
·
verified ·
1 Parent(s): 7151f8d

Update parser.py

Browse files
Files changed (1) hide show
  1. parser.py +109 -94
parser.py CHANGED
@@ -25,6 +25,8 @@ def get_category(node):
25
  return 'return'
26
  elif isinstance(node, ast.Expr):
27
  return 'expression'
 
 
28
  else:
29
  return 'other'
30
 
@@ -44,6 +46,11 @@ def create_vector(category, level, location, total_lines, parent_path):
44
  for i, parent in enumerate(parent_path)) / max(1, len(category_map))
45
  return [category_id, level, center_pos, span, parent_depth, parent_weight]
46
 
 
 
 
 
 
47
  def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=None, counters=None, processed_lines=None):
48
  if total_lines is None:
49
  total_lines = len(lines)
@@ -66,27 +73,29 @@ def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=Non
66
  counters[category] += 1
67
  node_id = f"{category.capitalize()}[{counters[category]}]"
68
 
69
- # Spacer before node
70
  if start_line > prev_end + 1:
71
  spacer_lines = lines[prev_end:start_line - 1]
72
  spacer_lines_set = set(range(prev_end + 1, start_line))
73
  if not spacer_lines_set.issubset(processed_lines):
74
- counters['spacer'] += 1
75
- spacer_node_id = f"Spacer[{counters['spacer']}]"
76
- parts.append({
77
- 'category': 'spacer',
78
- 'source': ''.join(spacer_lines),
79
- 'location': (prev_end + 1, start_line - 1),
80
- 'level': level,
81
- 'vector': create_vector('spacer', level, (prev_end + 1, start_line - 1), total_lines, parent_path),
82
- 'parent_path': ' -> '.join(parent_path) if parent_path else 'Top-Level',
83
- 'node_id': spacer_node_id
84
- })
85
- processed_lines.update(spacer_lines_set)
86
-
87
- # Current node's header (e.g., 'def', 'if')
 
 
88
  current_path = parent_path + [node_id]
89
- if start_line not in processed_lines:
90
  parts.append({
91
  'category': category,
92
  'source': lines[start_line - 1],
@@ -103,76 +112,74 @@ def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=Non
103
  for attr in ('body', 'orelse', 'handlers', 'finalbody'):
104
  if hasattr(node, attr) and getattr(node, attr):
105
  for child in getattr(node, attr):
106
- if attr == 'orelse' and isinstance(node, ast.If) and child.lineno != start_line:
107
- sub_category = 'elif' if 'elif' in lines[child.lineno - 1] else 'else'
108
- sub_start = child.lineno
109
- sub_end = getattr(child, 'end_lineno', sub_start)
110
- if not any(line in processed_lines for line in range(sub_start, sub_end + 1)):
111
- counters[sub_category] += 1
112
- sub_node_id = f"{sub_category.capitalize()}[{counters[sub_category]}]"
113
- parts.append({
114
- 'category': sub_category,
115
- 'source': lines[sub_start - 1],
116
- 'location': (sub_start, sub_start),
117
- 'level': level,
118
- 'vector': create_vector(sub_category, level, (sub_start, sub_start), total_lines, current_path),
119
- 'parent_path': ' -> '.join(parent_path) if parent_path else 'Top-Level',
120
- 'node_id': sub_node_id
121
- })
122
- processed_lines.add(sub_start)
123
- child_parts = parse_node(child, lines, sub_start, level + 1, total_lines, current_path, counters, processed_lines)
 
124
  parts.extend(child_parts)
125
- nested_prev_end = max(nested_prev_end, child_parts[-1]['location'][1] if child_parts else sub_start)
126
- elif attr == 'handlers' and isinstance(child, ast.ExceptHandler):
127
- sub_start = child.lineno
128
- sub_end = getattr(child, 'end_lineno', sub_start)
129
- if not any(line in processed_lines for line in range(sub_start, sub_end + 1)):
130
- counters['except'] += 1
131
- sub_node_id = f"Except[{counters['except']}]"
132
- parts.append({
133
- 'category': 'except',
134
- 'source': lines[sub_start - 1],
135
- 'location': (sub_start, sub_start),
136
- 'level': level,
137
- 'vector': create_vector('except', level, (sub_start, sub_start), total_lines, current_path),
138
- 'parent_path': ' -> '.join(parent_path) if parent_path else 'Top-Level',
139
- 'node_id': sub_node_id
140
- })
141
- processed_lines.add(sub_start)
142
- child_parts = parse_node(child, lines, sub_start, level + 1, total_lines, current_path, counters, processed_lines)
143
  parts.extend(child_parts)
144
- nested_prev_end = max(nested_prev_end, child_parts[-1]['location'][1] if child_parts else sub_start)
145
- elif attr == 'finalbody':
146
- sub_start = child.lineno
147
- sub_end = getattr(child, 'end_lineno', sub_start)
148
- if not any(line in processed_lines for line in range(sub_start, sub_end + 1)):
149
- counters['finally'] += 1
150
- sub_node_id = f"Finally[{counters['finally']}]"
151
- parts.append({
152
- 'category': 'finally',
153
- 'source': lines[sub_start - 1],
154
- 'location': (sub_start, sub_start),
155
- 'level': level,
156
- 'vector': create_vector('finally', level, (sub_start, sub_start), total_lines, current_path),
157
- 'parent_path': ' -> '.join(parent_path) if parent_path else 'Top-Level',
158
- 'node_id': sub_node_id
159
- })
160
- processed_lines.add(sub_start)
161
- child_parts = parse_node(child, lines, sub_start, level + 1, total_lines, current_path, counters, processed_lines)
162
  parts.extend(child_parts)
163
- nested_prev_end = max(nested_prev_end, child_parts[-1]['location'][1] if child_parts else sub_start)
164
- else:
165
- child_parts = parse_node(child, lines, nested_prev_end, level + 1, total_lines, current_path, counters, processed_lines)
166
- parts.extend(child_parts)
167
- nested_prev_end = child_parts[-1]['location'][1] if child_parts else nested_prev_end
168
 
169
  # Update end_line and source of the parent node if its body extends it
170
  if nested_prev_end > start_line and start_line not in processed_lines:
171
  final_end = nested_prev_end
172
- parts[-1]['location'] = (start_line, final_end)
173
- parts[-1]['source'] = ''.join(lines[start_line - 1:final_end])
174
- parts[-1]['vector'] = create_vector(category, level, (start_line, final_end), total_lines, current_path)
175
- processed_lines.update(range(start_line, final_end + 1))
 
176
 
177
  return parts
178
 
@@ -197,18 +204,26 @@ def parse_python_code(code):
197
  remaining_lines = lines[prev_end:]
198
  remaining_lines_set = set(range(prev_end + 1, total_lines + 1))
199
  if not remaining_lines_set.issubset(processed_lines):
200
- counters = {'spacer': 0}
201
- counters['spacer'] += 1
202
- spacer_node_id = f"Spacer[{counters['spacer']}]"
203
- parts.append({
204
- 'category': 'spacer',
205
- 'source': ''.join(remaining_lines),
206
- 'location': (prev_end + 1, total_lines + 1),
207
- 'level': 0,
208
- 'vector': create_vector('spacer', 0, (prev_end + 1, total_lines + 1), total_lines, []),
209
- 'parent_path': 'Top-Level',
210
- 'node_id': spacer_node_id
211
- })
212
- processed_lines.update(remaining_lines_set)
213
-
214
- return parts
 
 
 
 
 
 
 
 
 
25
  return 'return'
26
  elif isinstance(node, ast.Expr):
27
  return 'expression'
28
+ elif isinstance(node, ast.ExceptHandler):
29
+ return 'except'
30
  else:
31
  return 'other'
32
 
 
46
  for i, parent in enumerate(parent_path)) / max(1, len(category_map))
47
  return [category_id, level, center_pos, span, parent_depth, parent_weight]
48
 
49
+ def is_blank_or_comment(line):
50
+ """Check if a line is blank or a comment."""
51
+ stripped = line.strip()
52
+ return not stripped or stripped.startswith('#')
53
+
54
  def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=None, counters=None, processed_lines=None):
55
  if total_lines is None:
56
  total_lines = len(lines)
 
73
  counters[category] += 1
74
  node_id = f"{category.capitalize()}[{counters[category]}]"
75
 
76
+ # Spacer before node (only for blank lines or comments)
77
  if start_line > prev_end + 1:
78
  spacer_lines = lines[prev_end:start_line - 1]
79
  spacer_lines_set = set(range(prev_end + 1, start_line))
80
  if not spacer_lines_set.issubset(processed_lines):
81
+ for i, line in enumerate(spacer_lines, prev_end + 1):
82
+ if i not in processed_lines and is_blank_or_comment(line):
83
+ counters['spacer'] += 1
84
+ spacer_node_id = f"Spacer[{counters['spacer']}]"
85
+ parts.append({
86
+ 'category': 'spacer',
87
+ 'source': line,
88
+ 'location': (i, i),
89
+ 'level': level,
90
+ 'vector': create_vector('spacer', level, (i, i), total_lines, parent_path),
91
+ 'parent_path': ' -> '.join(parent_path) if parent_path else 'Top-Level',
92
+ 'node_id': spacer_node_id
93
+ })
94
+ processed_lines.add(i)
95
+
96
+ # Current node's header (e.g., 'def', 'if', 'try')
97
  current_path = parent_path + [node_id]
98
+ if start_line not in processed_lines and not is_blank_or_comment(lines[start_line - 1]):
99
  parts.append({
100
  'category': category,
101
  'source': lines[start_line - 1],
 
112
  for attr in ('body', 'orelse', 'handlers', 'finalbody'):
113
  if hasattr(node, attr) and getattr(node, attr):
114
  for child in getattr(node, attr):
115
+ child_start = getattr(child, 'lineno', nested_prev_end + 1)
116
+ child_end = getattr(child, 'end_lineno', child_start)
117
+ if not any(line in processed_lines for line in range(child_start, child_end + 1)):
118
+ if attr == 'orelse' and isinstance(node, ast.If) and child_start != start_line:
119
+ sub_category = 'elif' if 'elif' in lines[child_start - 1] else 'else'
120
+ if child_start not in processed_lines and not is_blank_or_comment(lines[child_start - 1]):
121
+ counters[sub_category] += 1
122
+ sub_node_id = f"{sub_category.capitalize()}[{counters[sub_category]}]"
123
+ parts.append({
124
+ 'category': sub_category,
125
+ 'source': lines[child_start - 1],
126
+ 'location': (child_start, child_start),
127
+ 'level': level,
128
+ 'vector': create_vector(sub_category, level, (child_start, child_start), total_lines, current_path),
129
+ 'parent_path': ' -> '.join(parent_path) if parent_path else 'Top-Level',
130
+ 'node_id': sub_node_id
131
+ })
132
+ processed_lines.add(child_start)
133
+ child_parts = parse_node(child, lines, child_start, level + 1, total_lines, current_path, counters, processed_lines)
134
  parts.extend(child_parts)
135
+ nested_prev_end = max(nested_prev_end, child_parts[-1]['location'][1] if child_parts else child_start)
136
+ elif attr == 'handlers' and isinstance(child, ast.ExceptHandler):
137
+ if child_start not in processed_lines and not is_blank_or_comment(lines[child_start - 1]):
138
+ counters['except'] += 1
139
+ sub_node_id = f"Except[{counters['except']}]"
140
+ parts.append({
141
+ 'category': 'except',
142
+ 'source': lines[child_start - 1],
143
+ 'location': (child_start, child_start),
144
+ 'level': level,
145
+ 'vector': create_vector('except', level, (child_start, child_start), total_lines, current_path),
146
+ 'parent_path': ' -> '.join(parent_path) if parent_path else 'Top-Level',
147
+ 'node_id': sub_node_id
148
+ })
149
+ processed_lines.add(child_start)
150
+ child_parts = parse_node(child, lines, child_start, level + 1, total_lines, current_path, counters, processed_lines)
 
 
151
  parts.extend(child_parts)
152
+ nested_prev_end = max(nested_prev_end, child_parts[-1]['location'][1] if child_parts else child_start)
153
+ elif attr == 'finalbody':
154
+ if child_start not in processed_lines and not is_blank_or_comment(lines[child_start - 1]):
155
+ counters['finally'] += 1
156
+ sub_node_id = f"Finally[{counters['finally']}]"
157
+ parts.append({
158
+ 'category': 'finally',
159
+ 'source': lines[child_start - 1],
160
+ 'location': (child_start, child_start),
161
+ 'level': level,
162
+ 'vector': create_vector('finally', level, (child_start, child_start), total_lines, current_path),
163
+ 'parent_path': ' -> '.join(parent_path) if parent_path else 'Top-Level',
164
+ 'node_id': sub_node_id
165
+ })
166
+ processed_lines.add(child_start)
167
+ child_parts = parse_node(child, lines, child_start, level + 1, total_lines, current_path, counters, processed_lines)
 
 
168
  parts.extend(child_parts)
169
+ nested_prev_end = max(nested_prev_end, child_parts[-1]['location'][1] if child_parts else child_start)
170
+ else:
171
+ child_parts = parse_node(child, lines, nested_prev_end, level + 1, total_lines, current_path, counters, processed_lines)
172
+ parts.extend(child_parts)
173
+ nested_prev_end = child_parts[-1]['location'][1] if child_parts else nested_prev_end
174
 
175
  # Update end_line and source of the parent node if its body extends it
176
  if nested_prev_end > start_line and start_line not in processed_lines:
177
  final_end = nested_prev_end
178
+ if start_line not in processed_lines:
179
+ parts[-1]['location'] = (start_line, final_end)
180
+ parts[-1]['source'] = ''.join(lines[start_line - 1:final_end])
181
+ parts[-1]['vector'] = create_vector(category, level, (start_line, final_end), total_lines, current_path)
182
+ processed_lines.update(range(start_line, final_end + 1))
183
 
184
  return parts
185
 
 
204
  remaining_lines = lines[prev_end:]
205
  remaining_lines_set = set(range(prev_end + 1, total_lines + 1))
206
  if not remaining_lines_set.issubset(processed_lines):
207
+ for i, line in enumerate(remaining_lines, prev_end + 1):
208
+ if i not in processed_lines:
209
+ if is_blank_or_comment(line):
210
+ counters = {'spacer': 0}
211
+ counters['spacer'] += 1
212
+ spacer_node_id = f"Spacer[{counters['spacer']}]"
213
+ parts.append({
214
+ 'category': 'spacer',
215
+ 'source': line,
216
+ 'location': (i, i),
217
+ 'level': 0,
218
+ 'vector': create_vector('spacer', 0, (i, i), total_lines, []),
219
+ 'parent_path': 'Top-Level',
220
+ 'node_id': spacer_node_id
221
+ })
222
+ processed_lines.add(i)
223
+
224
+ return parts
225
+
226
+ def is_blank_or_comment(line):
227
+ """Check if a line is blank or a comment."""
228
+ stripped = line.strip()
229
+ return not stripped or stripped.startswith('#')