broadfield-dev commited on
Commit
c2f70be
·
verified ·
1 Parent(s): 27abd0b

Update parser.py

Browse files
Files changed (1) hide show
  1. parser.py +31 -26
parser.py CHANGED
@@ -21,6 +21,8 @@ def get_category(node):
21
  return 'for'
22
  elif isinstance(node, ast.Try):
23
  return 'try'
 
 
24
  elif isinstance(node, ast.Expr):
25
  return 'expression'
26
  else:
@@ -31,29 +33,19 @@ def create_vector(category, level, location, total_lines, parent_path):
31
  category_map = {
32
  'import': 1, 'assignment': 2, 'function': 3, 'async_function': 4, 'class': 5,
33
  'if': 6, 'while': 7, 'for': 8, 'try': 9, 'expression': 10, 'spacer': 11,
34
- 'other': 12, 'elif': 13, 'else': 14, 'except': 15, 'finally': 16
35
  }
36
  category_id = category_map.get(category, 0)
37
  start_line, end_line = location
38
- span = (end_line - start_line + 1) / total_lines # Normalized size of the part
39
- center_pos = ((start_line + end_line) / 2) / total_lines # Center position normalized
40
  parent_depth = len(parent_path)
41
-
42
- # Weighted sum of parent categories (simple weighting by position)
43
  parent_weight = 0
44
  for i, parent in enumerate(parent_path):
45
  parent_category = parent.split('[')[0].lower()
46
- parent_weight += category_map.get(parent_category, 0) * (1 / (i + 1)) # Decay with depth
47
- parent_weight = parent_weight / max(1, len(category_map)) # Normalize by max category ID
48
-
49
- return [
50
- category_id, # Type of the part
51
- level, # Nesting depth
52
- center_pos, # Center position in file (0.0 to 1.0)
53
- span, # Relative size in file (0.0 to 1.0)
54
- parent_depth, # Number of ancestors
55
- parent_weight # Semantic connection to parents (0.0 to 1.0-ish)
56
- ]
57
 
58
  def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=None, counters=None):
59
  """Recursively parse an AST node with full hierarchy tracking."""
@@ -62,20 +54,22 @@ def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=Non
62
  if parent_path is None:
63
  parent_path = []
64
  if counters is None:
65
- counters = {'if': 0, 'while': 0, 'for': 0, 'function': 0, 'class': 0, 'try': 0}
66
 
67
  parts = []
68
  start_line = getattr(node, 'lineno', prev_end + 1)
69
  end_line = getattr(node, 'end_lineno', start_line)
70
  category = get_category(node)
71
 
72
- node_id = ''
73
- if category in counters:
74
- counters[category] += 1
75
- node_id = f"{category.capitalize()}[{counters[category]}]"
76
 
 
77
  if start_line > prev_end + 1:
78
  spacer_lines = lines[prev_end:start_line - 1]
 
 
79
  spacer_vector = create_vector('spacer', level, (prev_end + 1, start_line - 1), total_lines, parent_path)
80
  parts.append({
81
  'category': 'spacer',
@@ -84,11 +78,12 @@ def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=Non
84
  'level': level,
85
  'vector': spacer_vector,
86
  'parent_path': ' -> '.join(parent_path) if parent_path else 'Top-Level',
87
- 'node_id': ''
88
  })
89
 
 
90
  stmt_lines = lines[start_line - 1:end_line]
91
- current_path = parent_path + ([node_id] if node_id else [])
92
  node_vector = create_vector(category, level, (start_line, end_line), total_lines, current_path)
93
  parts.append({
94
  'category': category,
@@ -100,6 +95,7 @@ def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=Non
100
  'node_id': node_id
101
  })
102
 
 
103
  nested_prev_end = end_line - 1
104
  for attr in ('body', 'orelse', 'handlers', 'finalbody'):
105
  if hasattr(node, attr) and getattr(node, attr):
@@ -107,6 +103,8 @@ def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=Non
107
  for child in getattr(node, attr):
108
  if attr == 'orelse' and isinstance(node, ast.If) and child.lineno != start_line:
109
  sub_category = 'elif' if child.lineno != end_line else 'else'
 
 
110
  sub_vector = create_vector(sub_category, level, (child.lineno, getattr(child, 'end_lineno', child.lineno)), total_lines, current_path)
111
  sub_parts.append({
112
  'category': sub_category,
@@ -115,11 +113,13 @@ def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=Non
115
  'level': level,
116
  'vector': sub_vector,
117
  'parent_path': ' -> '.join(parent_path) if parent_path else 'Top-Level',
118
- 'node_id': node_id
119
  })
120
  child_parts = parse_node(child, lines, child.lineno - 1, level + 1, total_lines, current_path, counters)
121
  sub_parts.extend(child_parts)
122
  elif attr == 'handlers' and isinstance(child, ast.ExceptHandler):
 
 
123
  sub_vector = create_vector('except', level, (child.lineno, getattr(child, 'end_lineno', child.lineno)), total_lines, current_path)
124
  sub_parts.append({
125
  'category': 'except',
@@ -133,6 +133,8 @@ def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=Non
133
  child_parts = parse_node(child, lines, child.lineno - 1, level + 1, total_lines, current_path, counters)
134
  sub_parts.extend(child_parts)
135
  elif attr == 'finalbody':
 
 
136
  sub_vector = create_vector('finally', level, (child.lineno, getattr(child, 'end_lineno', child.lineno)), total_lines, current_path)
137
  sub_parts.append({
138
  'category': 'finally',
@@ -160,7 +162,7 @@ def parse_python_code(code):
160
  try:
161
  tree = ast.parse(code)
162
  except SyntaxError:
163
- return [{'category': 'error', 'source': 'Invalid Python code', 'location': (1, 1), 'level': 0, 'vector': [0, 0, 1.0, 0.0, 0, 0], 'parent_path': 'Top-Level', 'node_id': ''}]
164
 
165
  parts = []
166
  prev_end = 0
@@ -172,6 +174,9 @@ def parse_python_code(code):
172
 
173
  if prev_end < total_lines:
174
  remaining_lines = lines[prev_end:]
 
 
 
175
  spacer_vector = create_vector('spacer', 0, (prev_end + 1, total_lines + 1), total_lines, [])
176
  parts.append({
177
  'category': 'spacer',
@@ -180,7 +185,7 @@ def parse_python_code(code):
180
  'level': 0,
181
  'vector': spacer_vector,
182
  'parent_path': 'Top-Level',
183
- 'node_id': ''
184
  })
185
 
186
  return parts
 
21
  return 'for'
22
  elif isinstance(node, ast.Try):
23
  return 'try'
24
+ elif isinstance(node, ast.Return):
25
+ return 'return'
26
  elif isinstance(node, ast.Expr):
27
  return 'expression'
28
  else:
 
33
  category_map = {
34
  'import': 1, 'assignment': 2, 'function': 3, 'async_function': 4, 'class': 5,
35
  'if': 6, 'while': 7, 'for': 8, 'try': 9, 'expression': 10, 'spacer': 11,
36
+ 'other': 12, 'elif': 13, 'else': 14, 'except': 15, 'finally': 16, 'return': 17
37
  }
38
  category_id = category_map.get(category, 0)
39
  start_line, end_line = location
40
+ span = (end_line - start_line + 1) / total_lines
41
+ center_pos = ((start_line + end_line) / 2) / total_lines
42
  parent_depth = len(parent_path)
 
 
43
  parent_weight = 0
44
  for i, parent in enumerate(parent_path):
45
  parent_category = parent.split('[')[0].lower()
46
+ parent_weight += category_map.get(parent_category, 0) * (1 / (i + 1))
47
+ parent_weight = parent_weight / max(1, len(category_map))
48
+ return [category_id, level, center_pos, span, parent_depth, parent_weight]
 
 
 
 
 
 
 
 
49
 
50
  def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=None, counters=None):
51
  """Recursively parse an AST node with full hierarchy tracking."""
 
54
  if parent_path is None:
55
  parent_path = []
56
  if counters is None:
57
+ counters = {cat: 0 for cat in ['import', 'assignment', 'function', 'async_function', 'class', 'if', 'while', 'for', 'try', 'return', 'expression', 'other', 'spacer', 'elif', 'else', 'except', 'finally']}
58
 
59
  parts = []
60
  start_line = getattr(node, 'lineno', prev_end + 1)
61
  end_line = getattr(node, 'end_lineno', start_line)
62
  category = get_category(node)
63
 
64
+ # Assign a node_id to every part
65
+ counters[category] += 1
66
+ node_id = f"{category.capitalize()}[{counters[category]}]"
 
67
 
68
+ # Spacer before the node
69
  if start_line > prev_end + 1:
70
  spacer_lines = lines[prev_end:start_line - 1]
71
+ counters['spacer'] += 1
72
+ spacer_node_id = f"Spacer[{counters['spacer']}]"
73
  spacer_vector = create_vector('spacer', level, (prev_end + 1, start_line - 1), total_lines, parent_path)
74
  parts.append({
75
  'category': 'spacer',
 
78
  'level': level,
79
  'vector': spacer_vector,
80
  'parent_path': ' -> '.join(parent_path) if parent_path else 'Top-Level',
81
+ 'node_id': spacer_node_id
82
  })
83
 
84
+ # Main node
85
  stmt_lines = lines[start_line - 1:end_line]
86
+ current_path = parent_path + [node_id]
87
  node_vector = create_vector(category, level, (start_line, end_line), total_lines, current_path)
88
  parts.append({
89
  'category': category,
 
95
  'node_id': node_id
96
  })
97
 
98
+ # Process nested bodies
99
  nested_prev_end = end_line - 1
100
  for attr in ('body', 'orelse', 'handlers', 'finalbody'):
101
  if hasattr(node, attr) and getattr(node, attr):
 
103
  for child in getattr(node, attr):
104
  if attr == 'orelse' and isinstance(node, ast.If) and child.lineno != start_line:
105
  sub_category = 'elif' if child.lineno != end_line else 'else'
106
+ counters[sub_category] += 1
107
+ sub_node_id = f"{sub_category.capitalize()}[{counters[sub_category]}]"
108
  sub_vector = create_vector(sub_category, level, (child.lineno, getattr(child, 'end_lineno', child.lineno)), total_lines, current_path)
109
  sub_parts.append({
110
  'category': sub_category,
 
113
  'level': level,
114
  'vector': sub_vector,
115
  'parent_path': ' -> '.join(parent_path) if parent_path else 'Top-Level',
116
+ 'node_id': node_id # Inherits parent's node_id for context
117
  })
118
  child_parts = parse_node(child, lines, child.lineno - 1, level + 1, total_lines, current_path, counters)
119
  sub_parts.extend(child_parts)
120
  elif attr == 'handlers' and isinstance(child, ast.ExceptHandler):
121
+ counters['except'] += 1
122
+ sub_node_id = f"Except[{counters['except']}]"
123
  sub_vector = create_vector('except', level, (child.lineno, getattr(child, 'end_lineno', child.lineno)), total_lines, current_path)
124
  sub_parts.append({
125
  'category': 'except',
 
133
  child_parts = parse_node(child, lines, child.lineno - 1, level + 1, total_lines, current_path, counters)
134
  sub_parts.extend(child_parts)
135
  elif attr == 'finalbody':
136
+ counters['finally'] += 1
137
+ sub_node_id = f"Finally[{counters['finally']}]"
138
  sub_vector = create_vector('finally', level, (child.lineno, getattr(child, 'end_lineno', child.lineno)), total_lines, current_path)
139
  sub_parts.append({
140
  'category': 'finally',
 
162
  try:
163
  tree = ast.parse(code)
164
  except SyntaxError:
165
+ return [{'category': 'error', 'source': 'Invalid Python code', 'location': (1, 1), 'level': 0, 'vector': [0, 0, 1.0, 0.0, 0, 0], 'parent_path': 'Top-Level', 'node_id': 'Error[1]'}]
166
 
167
  parts = []
168
  prev_end = 0
 
174
 
175
  if prev_end < total_lines:
176
  remaining_lines = lines[prev_end:]
177
+ counters = {'spacer': 0}
178
+ counters['spacer'] += 1
179
+ spacer_node_id = f"Spacer[{counters['spacer']}]"
180
  spacer_vector = create_vector('spacer', 0, (prev_end + 1, total_lines + 1), total_lines, [])
181
  parts.append({
182
  'category': 'spacer',
 
185
  'level': 0,
186
  'vector': spacer_vector,
187
  'parent_path': 'Top-Level',
188
+ 'node_id': spacer_node_id
189
  })
190
 
191
  return parts