broadfield-dev commited on
Commit
df0d26b
·
verified ·
1 Parent(s): fbf8543

Update parser.py

Browse files
Files changed (1) hide show
  1. parser.py +51 -44
parser.py CHANGED
@@ -40,11 +40,8 @@ def create_vector(category, level, location, total_lines, parent_path):
40
  span = (end_line - start_line + 1) / total_lines
41
  center_pos = ((start_line + end_line) / 2) / total_lines
42
  parent_depth = len(parent_path)
43
- parent_weight = 0
44
- for i, parent in enumerate(parent_path):
45
- parent_category = parent.split('[')[0].lower()
46
- parent_weight += category_map.get(parent_category, 0) * (1 / (i + 1))
47
- parent_weight = parent_weight / max(1, len(category_map))
48
  return [category_id, level, center_pos, span, parent_depth, parent_weight]
49
 
50
  def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=None, counters=None):
@@ -63,90 +60,101 @@ def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=Non
63
  counters[category] += 1
64
  node_id = f"{category.capitalize()}[{counters[category]}]"
65
 
 
66
  if start_line > prev_end + 1:
67
  spacer_lines = lines[prev_end:start_line - 1]
68
  counters['spacer'] += 1
69
  spacer_node_id = f"Spacer[{counters['spacer']}]"
70
- spacer_vector = create_vector('spacer', level, (prev_end + 1, start_line - 1), total_lines, parent_path)
71
  parts.append({
72
  'category': 'spacer',
73
  'source': ''.join(spacer_lines),
74
  'location': (prev_end + 1, start_line - 1),
75
  'level': level,
76
- 'vector': spacer_vector,
77
  'parent_path': ' -> '.join(parent_path) if parent_path else 'Top-Level',
78
  'node_id': spacer_node_id
79
  })
80
 
81
- stmt_lines = lines[start_line - 1:end_line]
82
  current_path = parent_path + [node_id]
83
- node_vector = create_vector(category, level, (start_line, end_line), total_lines, current_path)
84
  parts.append({
85
  'category': category,
86
- 'source': ''.join(stmt_lines),
87
- 'location': (start_line, end_line),
88
  'level': level,
89
- 'vector': node_vector,
90
  'parent_path': ' -> '.join(parent_path) if parent_path else 'Top-Level',
91
  'node_id': node_id
92
  })
93
 
94
- nested_prev_end = end_line - 1
 
95
  for attr in ('body', 'orelse', 'handlers', 'finalbody'):
96
  if hasattr(node, attr) and getattr(node, attr):
97
- sub_parts = []
98
  for child in getattr(node, attr):
99
  if attr == 'orelse' and isinstance(node, ast.If) and child.lineno != start_line:
100
- sub_category = 'elif' if child.lineno != end_line else 'else'
101
  counters[sub_category] += 1
102
  sub_node_id = f"{sub_category.capitalize()}[{counters[sub_category]}]"
103
- sub_vector = create_vector(sub_category, level, (child.lineno, getattr(child, 'end_lineno', child.lineno)), total_lines, current_path)
104
- sub_parts.append({
 
105
  'category': sub_category,
106
- 'source': ''.join(lines[child.lineno - 1:getattr(child, 'end_lineno', child.lineno)]),
107
- 'location': (child.lineno, getattr(child, 'end_lineno', child.lineno)),
108
  'level': level,
109
- 'vector': sub_vector,
110
  'parent_path': ' -> '.join(parent_path) if parent_path else 'Top-Level',
111
- 'node_id': node_id
112
  })
113
- child_parts = parse_node(child, lines, child.lineno - 1, level + 1, total_lines, current_path, counters)
114
- sub_parts.extend(child_parts)
 
115
  elif attr == 'handlers' and isinstance(child, ast.ExceptHandler):
116
  counters['except'] += 1
117
  sub_node_id = f"Except[{counters['except']}]"
118
- sub_vector = create_vector('except', level, (child.lineno, getattr(child, 'end_lineno', child.lineno)), total_lines, current_path)
119
- sub_parts.append({
 
120
  'category': 'except',
121
- 'source': ''.join(lines[child.lineno - 1:getattr(child, 'end_lineno', child.lineno)]),
122
- 'location': (child.lineno, getattr(child, 'end_lineno', child.lineno)),
123
  'level': level,
124
- 'vector': sub_vector,
125
  'parent_path': ' -> '.join(parent_path) if parent_path else 'Top-Level',
126
- 'node_id': node_id
127
  })
128
- child_parts = parse_node(child, lines, child.lineno - 1, level + 1, total_lines, current_path, counters)
129
- sub_parts.extend(child_parts)
 
130
  elif attr == 'finalbody':
131
  counters['finally'] += 1
132
  sub_node_id = f"Finally[{counters['finally']}]"
133
- sub_vector = create_vector('finally', level, (child.lineno, getattr(child, 'end_lineno', child.lineno)), total_lines, current_path)
134
- sub_parts.append({
 
135
  'category': 'finally',
136
- 'source': ''.join(lines[child.lineno - 1:getattr(child, 'end_lineno', child.lineno)]),
137
- 'location': (child.lineno, getattr(child, 'end_lineno', child.lineno)),
138
  'level': level,
139
- 'vector': sub_vector,
140
  'parent_path': ' -> '.join(parent_path) if parent_path else 'Top-Level',
141
- 'node_id': node_id
142
  })
143
- child_parts = parse_node(child, lines, child.lineno - 1, level + 1, total_lines, current_path, counters)
144
- sub_parts.extend(child_parts)
 
145
  else:
146
  child_parts = parse_node(child, lines, nested_prev_end, level + 1, total_lines, current_path, counters)
147
- sub_parts.extend(child_parts)
148
- nested_prev_end = sub_parts[-1]['location'][1] if sub_parts else nested_prev_end
149
- parts.extend(sub_parts)
 
 
 
 
 
150
 
151
  return parts
152
 
@@ -171,13 +179,12 @@ def parse_python_code(code):
171
  counters = {'spacer': 0}
172
  counters['spacer'] += 1
173
  spacer_node_id = f"Spacer[{counters['spacer']}]"
174
- spacer_vector = create_vector('spacer', 0, (prev_end + 1, total_lines + 1), total_lines, [])
175
  parts.append({
176
  'category': 'spacer',
177
  'source': ''.join(remaining_lines),
178
  'location': (prev_end + 1, total_lines + 1),
179
  'level': 0,
180
- 'vector': spacer_vector,
181
  'parent_path': 'Top-Level',
182
  'node_id': spacer_node_id
183
  })
 
40
  span = (end_line - start_line + 1) / total_lines
41
  center_pos = ((start_line + end_line) / 2) / total_lines
42
  parent_depth = len(parent_path)
43
+ parent_weight = sum(category_map.get(parent.split('[')[0].lower(), 0) * (1 / (i + 1))
44
+ for i, parent in enumerate(parent_path)) / max(1, len(category_map))
 
 
 
45
  return [category_id, level, center_pos, span, parent_depth, parent_weight]
46
 
47
  def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=None, counters=None):
 
60
  counters[category] += 1
61
  node_id = f"{category.capitalize()}[{counters[category]}]"
62
 
63
+ # Spacer before node
64
  if start_line > prev_end + 1:
65
  spacer_lines = lines[prev_end:start_line - 1]
66
  counters['spacer'] += 1
67
  spacer_node_id = f"Spacer[{counters['spacer']}]"
 
68
  parts.append({
69
  'category': 'spacer',
70
  'source': ''.join(spacer_lines),
71
  'location': (prev_end + 1, start_line - 1),
72
  'level': level,
73
+ 'vector': create_vector('spacer', level, (prev_end + 1, start_line - 1), total_lines, parent_path),
74
  'parent_path': ' -> '.join(parent_path) if parent_path else 'Top-Level',
75
  'node_id': spacer_node_id
76
  })
77
 
78
+ # Current node's header (e.g., 'def', 'if')
79
  current_path = parent_path + [node_id]
80
+ node_lines = lines[start_line - 1:end_line]
81
  parts.append({
82
  'category': category,
83
+ 'source': node_lines[0], # Only the header line
84
+ 'location': (start_line, start_line),
85
  'level': level,
86
+ 'vector': create_vector(category, level, (start_line, start_line), total_lines, current_path),
87
  'parent_path': ' -> '.join(parent_path) if parent_path else 'Top-Level',
88
  'node_id': node_id
89
  })
90
 
91
+ # Process nested bodies
92
+ nested_prev_end = start_line
93
  for attr in ('body', 'orelse', 'handlers', 'finalbody'):
94
  if hasattr(node, attr) and getattr(node, attr):
 
95
  for child in getattr(node, attr):
96
  if attr == 'orelse' and isinstance(node, ast.If) and child.lineno != start_line:
97
+ sub_category = 'elif' if 'elif' in lines[child.lineno - 1] else 'else'
98
  counters[sub_category] += 1
99
  sub_node_id = f"{sub_category.capitalize()}[{counters[sub_category]}]"
100
+ sub_start = child.lineno
101
+ sub_end = getattr(child, 'end_lineno', sub_start)
102
+ parts.append({
103
  'category': sub_category,
104
+ 'source': lines[sub_start - 1],
105
+ 'location': (sub_start, sub_start),
106
  'level': level,
107
+ 'vector': create_vector(sub_category, level, (sub_start, sub_start), total_lines, current_path),
108
  'parent_path': ' -> '.join(parent_path) if parent_path else 'Top-Level',
109
+ 'node_id': sub_node_id
110
  })
111
+ child_parts = parse_node(child, lines, sub_start, level + 1, total_lines, current_path, counters)
112
+ parts.extend(child_parts)
113
+ nested_prev_end = max(nested_prev_end, child_parts[-1]['location'][1] if child_parts else sub_start)
114
  elif attr == 'handlers' and isinstance(child, ast.ExceptHandler):
115
  counters['except'] += 1
116
  sub_node_id = f"Except[{counters['except']}]"
117
+ sub_start = child.lineno
118
+ sub_end = getattr(child, 'end_lineno', sub_start)
119
+ parts.append({
120
  'category': 'except',
121
+ 'source': lines[sub_start - 1],
122
+ 'location': (sub_start, sub_start),
123
  'level': level,
124
+ 'vector': create_vector('except', level, (sub_start, sub_start), total_lines, current_path),
125
  'parent_path': ' -> '.join(parent_path) if parent_path else 'Top-Level',
126
+ 'node_id': sub_node_id
127
  })
128
+ child_parts = parse_node(child, lines, sub_start, level + 1, total_lines, current_path, counters)
129
+ parts.extend(child_parts)
130
+ nested_prev_end = max(nested_prev_end, child_parts[-1]['location'][1] if child_parts else sub_start)
131
  elif attr == 'finalbody':
132
  counters['finally'] += 1
133
  sub_node_id = f"Finally[{counters['finally']}]"
134
+ sub_start = child.lineno
135
+ sub_end = getattr(child, 'end_lineno', sub_start)
136
+ parts.append({
137
  'category': 'finally',
138
+ 'source': lines[sub_start - 1],
139
+ 'location': (sub_start, sub_start),
140
  'level': level,
141
+ 'vector': create_vector('finally', level, (sub_start, sub_start), total_lines, current_path),
142
  'parent_path': ' -> '.join(parent_path) if parent_path else 'Top-Level',
143
+ 'node_id': sub_node_id
144
  })
145
+ child_parts = parse_node(child, lines, sub_start, level + 1, total_lines, current_path, counters)
146
+ parts.extend(child_parts)
147
+ nested_prev_end = max(nested_prev_end, child_parts[-1]['location'][1] if child_parts else sub_start)
148
  else:
149
  child_parts = parse_node(child, lines, nested_prev_end, level + 1, total_lines, current_path, counters)
150
+ parts.extend(child_parts)
151
+ nested_prev_end = child_parts[-1]['location'][1] if child_parts else nested_prev_end
152
+
153
+ # Update end_line of the parent node to reflect its full scope
154
+ if nested_prev_end > start_line:
155
+ parts[-1]['location'] = (start_line, nested_prev_end)
156
+ parts[-1]['source'] = ''.join(lines[start_line - 1:nested_prev_end])
157
+ parts[-1]['vector'] = create_vector(category, level, (start_line, nested_prev_end), total_lines, current_path)
158
 
159
  return parts
160
 
 
179
  counters = {'spacer': 0}
180
  counters['spacer'] += 1
181
  spacer_node_id = f"Spacer[{counters['spacer']}]"
 
182
  parts.append({
183
  'category': 'spacer',
184
  'source': ''.join(remaining_lines),
185
  'location': (prev_end + 1, total_lines + 1),
186
  'level': 0,
187
+ 'vector': create_vector('spacer', 0, (prev_end + 1, total_lines + 1), total_lines, []),
188
  'parent_path': 'Top-Level',
189
  'node_id': spacer_node_id
190
  })