broadfield-dev commited on
Commit
c190b69
·
verified ·
1 Parent(s): 4d622b1

Update parser.py

Browse files
Files changed (1) hide show
  1. parser.py +58 -11
parser.py CHANGED
@@ -19,29 +19,30 @@ def get_category(node):
19
  return 'while'
20
  elif isinstance(node, ast.For):
21
  return 'for'
 
 
22
  elif isinstance(node, ast.Expr):
23
  return 'expression'
24
  else:
25
- return 'other'
26
 
27
  def create_vector(category, level, location, total_lines, parent_path):
28
  """Create a vector representation including hierarchy info."""
29
  category_map = {
30
  'import': 1, 'assignment': 2, 'function': 3, 'async_function': 4, 'class': 5,
31
- 'if': 6, 'while': 7, 'for': 8, 'expression': 9, 'spacer': 10, 'other': 11
32
  }
33
  category_id = category_map.get(category, 0)
34
  start_line, end_line = location
35
- # Extend vector with parent path length and a hash of the path for uniqueness
36
  parent_depth = len(parent_path)
37
- parent_hash = hash(' -> '.join(parent_path)) % 1000 # Simple hash for vector
38
  return [
39
  category_id,
40
  level,
41
  start_line / total_lines,
42
  end_line / total_lines,
43
  parent_depth,
44
- parent_hash / 1000 # Normalized hash (0.0 to 1.0)
45
  ]
46
 
47
  def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=None, counters=None):
@@ -51,7 +52,7 @@ def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=Non
51
  if parent_path is None:
52
  parent_path = []
53
  if counters is None:
54
- counters = {'if': 0, 'while': 0, 'for': 0, 'function': 0, 'class': 0} # Track counts per type
55
 
56
  parts = []
57
  start_line = getattr(node, 'lineno', prev_end + 1)
@@ -92,14 +93,60 @@ def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=Non
92
  'node_id': node_id
93
  })
94
 
95
- # Process nested nodes (body and orelse)
96
  nested_prev_end = end_line - 1
97
- for attr in ('body', 'orelse'):
98
  if hasattr(node, attr) and getattr(node, attr):
 
99
  for child in getattr(node, attr):
100
- child_parts = parse_node(child, lines, nested_prev_end, level + 1, total_lines, current_path, counters)
101
- parts.extend(child_parts)
102
- nested_prev_end = child_parts[-1]['location'][1]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
 
104
  return parts
105
 
 
19
  return 'while'
20
  elif isinstance(node, ast.For):
21
  return 'for'
22
+ elif isinstance(node, ast.Try):
23
+ return 'try'
24
  elif isinstance(node, ast.Expr):
25
  return 'expression'
26
  else:
27
+ return 'other' # Fallback for uncategorized nodes (e.g., 'pass')
28
 
29
  def create_vector(category, level, location, total_lines, parent_path):
30
  """Create a vector representation including hierarchy info."""
31
  category_map = {
32
  'import': 1, 'assignment': 2, 'function': 3, 'async_function': 4, 'class': 5,
33
+ 'if': 6, 'while': 7, 'for': 8, 'try': 9, 'expression': 10, 'spacer': 11, 'other': 12
34
  }
35
  category_id = category_map.get(category, 0)
36
  start_line, end_line = location
 
37
  parent_depth = len(parent_path)
38
+ parent_hash = hash(' -> '.join(parent_path)) % 1000
39
  return [
40
  category_id,
41
  level,
42
  start_line / total_lines,
43
  end_line / total_lines,
44
  parent_depth,
45
+ parent_hash / 1000
46
  ]
47
 
48
  def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=None, counters=None):
 
52
  if parent_path is None:
53
  parent_path = []
54
  if counters is None:
55
+ counters = {'if': 0, 'while': 0, 'for': 0, 'function': 0, 'class': 0, 'try': 0}
56
 
57
  parts = []
58
  start_line = getattr(node, 'lineno', prev_end + 1)
 
93
  'node_id': node_id
94
  })
95
 
96
+ # Process nested bodies (body, orelse, handlers, finalbody)
97
  nested_prev_end = end_line - 1
98
+ for attr in ('body', 'orelse', 'handlers', 'finalbody'):
99
  if hasattr(node, attr) and getattr(node, attr):
100
+ sub_parts = []
101
  for child in getattr(node, attr):
102
+ # Special case for elif: treat as part of the if's orelse but same level
103
+ if attr == 'orelse' and isinstance(node, ast.If) and child.lineno != start_line:
104
+ sub_category = 'elif' if child.lineno != end_line else 'else'
105
+ sub_vector = create_vector(sub_category, level, (child.lineno, getattr(child, 'end_lineno', child.lineno)), total_lines, current_path)
106
+ sub_parts.append({
107
+ 'category': sub_category,
108
+ 'source': ''.join(lines[child.lineno - 1:getattr(child, 'end_lineno', child.lineno)]),
109
+ 'location': (child.lineno, getattr(child, 'end_lineno', child.lineno)),
110
+ 'level': level,
111
+ 'vector': sub_vector,
112
+ 'parent_path': ' -> '.join(parent_path) if parent_path else 'Top-Level',
113
+ 'node_id': node_id # Inherits the If's node_id
114
+ })
115
+ child_parts = parse_node(child, lines, child.lineno - 1, level + 1, total_lines, current_path, counters)
116
+ sub_parts.extend(child_parts)
117
+ # Special case for except: treat as part of try
118
+ elif attr == 'handlers' and isinstance(child, ast.ExceptHandler):
119
+ sub_vector = create_vector('except', level, (child.lineno, getattr(child, 'end_lineno', child.lineno)), total_lines, current_path)
120
+ sub_parts.append({
121
+ 'category': 'except',
122
+ 'source': ''.join(lines[child.lineno - 1:getattr(child, 'end_lineno', child.lineno)]),
123
+ 'location': (child.lineno, getattr(child, 'end_lineno', child.lineno)),
124
+ 'level': level,
125
+ 'vector': sub_vector,
126
+ 'parent_path': ' -> '.join(parent_path) if parent_path else 'Top-Level',
127
+ 'node_id': node_id # Inherits the Try's node_id
128
+ })
129
+ child_parts = parse_node(child, lines, child.lineno - 1, level + 1, total_lines, current_path, counters)
130
+ sub_parts.extend(child_parts)
131
+ # Special case for finally
132
+ elif attr == 'finalbody':
133
+ sub_vector = create_vector('finally', level, (child.lineno, getattr(child, 'end_lineno', child.lineno)), total_lines, current_path)
134
+ sub_parts.append({
135
+ 'category': 'finally',
136
+ 'source': ''.join(lines[child.lineno - 1:getattr(child, 'end_lineno', child.lineno)]),
137
+ 'location': (child.lineno, getattr(child, 'end_lineno', child.lineno)),
138
+ 'level': level,
139
+ 'vector': sub_vector,
140
+ 'parent_path': ' -> '.join(parent_path) if parent_path else 'Top-Level',
141
+ 'node_id': node_id # Inherits the Try's node_id
142
+ })
143
+ child_parts = parse_node(child, lines, child.lineno - 1, level + 1, total_lines, current_path, counters)
144
+ sub_parts.extend(child_parts)
145
+ else:
146
+ child_parts = parse_node(child, lines, nested_prev_end, level + 1, total_lines, current_path, counters)
147
+ sub_parts.extend(child_parts)
148
+ nested_prev_end = sub_parts[-1]['location'][1] if sub_parts else nested_prev_end
149
+ parts.extend(sub_parts)
150
 
151
  return parts
152