broadfield-dev commited on
Commit
c1b4e6f
·
verified ·
1 Parent(s): 9778ec9

Update parser.py

Browse files
Files changed (1) hide show
  1. parser.py +89 -13
parser.py CHANGED
@@ -2,15 +2,11 @@
2
  import ast
3
 
4
  def get_category(node):
5
- """Determine the category of an AST node."""
6
  if isinstance(node, (ast.Import, ast.ImportFrom)):
7
  return 'import'
8
- elif isinstance(node, (ast.Assign, ast.AnnAssign, ast.AugAssign)):
9
- return 'assignment'
10
- elif isinstance(node, ast.FunctionDef):
11
  return 'function'
12
- elif isinstance(node, ast.AsyncFunctionDef):
13
- return 'async_function'
14
  elif isinstance(node, ast.ClassDef):
15
  return 'class'
16
  elif isinstance(node, ast.If):
@@ -27,15 +23,30 @@ def get_category(node):
27
  return 'expression'
28
  elif isinstance(node, ast.ExceptHandler):
29
  return 'except'
 
 
30
  else:
31
  return 'other'
32
 
33
- def create_vector(category, level, location, total_lines, parent_path):
34
- """Create a vector optimized for role similarity."""
 
 
 
 
 
 
 
 
 
 
 
 
35
  category_map = {
36
- 'import': 1, 'assignment': 2, 'function': 3, 'async_function': 4, 'class': 5,
37
- 'if': 6, 'while': 7, 'for': 8, 'try': 9, 'expression': 10, 'spacer': 11,
38
- 'other': 12, 'elif': 13, 'else': 14, 'except': 15, 'finally': 16, 'return': 17
 
39
  }
40
  category_id = category_map.get(category, 0)
41
  start_line, end_line = location
@@ -44,7 +55,16 @@ def create_vector(category, level, location, total_lines, parent_path):
44
  parent_depth = len(parent_path)
45
  parent_weight = sum(category_map.get(parent.split('[')[0].lower(), 0) * (1 / (i + 1))
46
  for i, parent in enumerate(parent_path)) / max(1, len(category_map))
47
- return [category_id, level, center_pos, span, parent_depth, parent_weight]
 
 
 
 
 
 
 
 
 
48
 
49
  def is_blank_or_comment(line):
50
  """Check if a line is blank or a comment."""
@@ -57,7 +77,7 @@ def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=Non
57
  if parent_path is None:
58
  parent_path = []
59
  if counters is None:
60
- counters = {cat: 0 for cat in ['import', 'assignment', 'function', 'async_function', 'class', 'if', 'while', 'for', 'try', 'return', 'expression', 'other', 'spacer', 'elif', 'else', 'except', 'finally']}
61
  if processed_lines is None:
62
  processed_lines = set()
63
 
@@ -107,6 +127,25 @@ def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=Non
107
  })
108
  processed_lines.add(start_line)
109
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  # Process nested bodies
111
  nested_prev_end = start_line
112
  for attr in ('body', 'orelse', 'handlers', 'finalbody'):
@@ -168,6 +207,43 @@ def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=Non
168
  parts.extend(child_parts)
169
  nested_prev_end = max(nested_prev_end, child_parts[-1]['location'][1] if child_parts else child_start)
170
  else:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
  child_parts = parse_node(child, lines, nested_prev_end, level + 1, total_lines, current_path, counters, processed_lines)
172
  parts.extend(child_parts)
173
  nested_prev_end = child_parts[-1]['location'][1] if child_parts else nested_prev_end
 
2
  import ast
3
 
4
  def get_category(node):
5
+ """Determine the category of an AST node or variable context."""
6
  if isinstance(node, (ast.Import, ast.ImportFrom)):
7
  return 'import'
8
+ elif isinstance(node, ast.FunctionDef) or isinstance(node, ast.AsyncFunctionDef):
 
 
9
  return 'function'
 
 
10
  elif isinstance(node, ast.ClassDef):
11
  return 'class'
12
  elif isinstance(node, ast.If):
 
23
  return 'expression'
24
  elif isinstance(node, ast.ExceptHandler):
25
  return 'except'
26
+ elif isinstance(node, ast.Assign) or isinstance(node, ast.AnnAssign) or isinstance(node, ast.AugAssign):
27
+ return 'assigned_variable'
28
  else:
29
  return 'other'
30
 
31
+ def get_variable_role(node, parent):
32
+ """Determine the role of a variable (input, assigned, returned) based on context."""
33
+ if isinstance(parent, (ast.FunctionDef, ast.AsyncFunctionDef)):
34
+ if isinstance(node, ast.arg):
35
+ return 'input_variable'
36
+ elif isinstance(parent, ast.Return) and isinstance(node, ast.Name):
37
+ return 'returned_variable'
38
+ elif isinstance(parent, (ast.Assign, ast.AnnAssign, ast.AugAssign)):
39
+ if isinstance(node, ast.Name) and node.id in [target.id for target in parent.targets if isinstance(target, ast.Name)]:
40
+ return 'assigned_variable'
41
+ return None
42
+
43
+ def create_vector(category, level, location, total_lines, parent_path, variable_info=None):
44
+ """Create a vector optimized for role similarity, including variable info if applicable."""
45
  category_map = {
46
+ 'import': 1, 'function': 2, 'async_function': 3, 'class': 4,
47
+ 'if': 5, 'while': 6, 'for': 7, 'try': 8, 'expression': 9, 'spacer': 10,
48
+ 'other': 11, 'elif': 12, 'else': 13, 'except': 14, 'finally': 15, 'return': 16,
49
+ 'assigned_variable': 17, 'input_variable': 18, 'returned_variable': 19
50
  }
51
  category_id = category_map.get(category, 0)
52
  start_line, end_line = location
 
55
  parent_depth = len(parent_path)
56
  parent_weight = sum(category_map.get(parent.split('[')[0].lower(), 0) * (1 / (i + 1))
57
  for i, parent in enumerate(parent_path)) / max(1, len(category_map))
58
+
59
+ # Extend vector with variable-specific info if applicable
60
+ variable_role_id = 0
61
+ variable_name = ''
62
+ if variable_info:
63
+ role_map = {'input_variable': 1, 'assigned_variable': 2, 'returned_variable': 3}
64
+ variable_role_id = role_map.get(variable_info['role'], 0)
65
+ variable_name = variable_info['name'][:10] # Truncate for brevity, hash if needed
66
+
67
+ return [category_id, level, center_pos, span, parent_depth, parent_weight, variable_role_id, hash(variable_name) % 1000 / 1000]
68
 
69
  def is_blank_or_comment(line):
70
  """Check if a line is blank or a comment."""
 
77
  if parent_path is None:
78
  parent_path = []
79
  if counters is None:
80
+ counters = {cat: 0 for cat in ['import', 'function', 'async_function', 'class', 'if', 'while', 'for', 'try', 'return', 'expression', 'other', 'spacer', 'elif', 'else', 'except', 'finally', 'assigned_variable', 'input_variable', 'returned_variable']}
81
  if processed_lines is None:
82
  processed_lines = set()
83
 
 
127
  })
128
  processed_lines.add(start_line)
129
 
130
+ # Handle variables in function definitions (input variables)
131
+ if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)) and node.args.args:
132
+ for arg in node.args.args:
133
+ var_start = start_line # Assume args are on the same line as function def for simplicity
134
+ if var_start not in processed_lines:
135
+ counters['input_variable'] += 1
136
+ var_node_id = f"InputVariable[{counters['input_variable']}]"
137
+ var_info = {'role': 'input_variable', 'name': arg.arg}
138
+ parts.append({
139
+ 'category': 'input_variable',
140
+ 'source': f" {arg.arg},", # Indented as part of function
141
+ 'location': (var_start, var_start),
142
+ 'level': level + 1,
143
+ 'vector': create_vector('input_variable', level + 1, (var_start, var_start), total_lines, current_path, var_info),
144
+ 'parent_path': f"{current_path[0]} -> {var_node_id}",
145
+ 'node_id': var_node_id
146
+ })
147
+ processed_lines.add(var_start)
148
+
149
  # Process nested bodies
150
  nested_prev_end = start_line
151
  for attr in ('body', 'orelse', 'handlers', 'finalbody'):
 
207
  parts.extend(child_parts)
208
  nested_prev_end = max(nested_prev_end, child_parts[-1]['location'][1] if child_parts else child_start)
209
  else:
210
+ # Handle assignments and returns for variable detection
211
+ if isinstance(child, ast.Assign) or isinstance(child, ast.AnnAssign) or isinstance(child, ast.AugAssign):
212
+ for target in child.targets:
213
+ if isinstance(target, ast.Name):
214
+ var_start = child.lineno
215
+ if var_start not in processed_lines and not is_blank_or_comment(lines[var_start - 1]):
216
+ counters['assigned_variable'] += 1
217
+ var_node_id = f"AssignedVariable[{counters['assigned_variable']}]"
218
+ var_info = {'role': 'assigned_variable', 'name': target.id}
219
+ parts.append({
220
+ 'category': 'assigned_variable',
221
+ 'source': lines[var_start - 1],
222
+ 'location': (var_start, var_start),
223
+ 'level': level + 1,
224
+ 'vector': create_vector('assigned_variable', level + 1, (var_start, var_start), total_lines, current_path, var_info),
225
+ 'parent_path': f"{current_path[0]} -> {var_node_id}",
226
+ 'node_id': var_node_id
227
+ })
228
+ processed_lines.add(var_start)
229
+ elif isinstance(child, ast.Return):
230
+ for value in ast.walk(child):
231
+ if isinstance(value, ast.Name):
232
+ var_start = child.lineno
233
+ if var_start not in processed_lines and not is_blank_or_comment(lines[var_start - 1]):
234
+ counters['returned_variable'] += 1
235
+ var_node_id = f"ReturnedVariable[{counters['returned_variable']}]"
236
+ var_info = {'role': 'returned_variable', 'name': value.id}
237
+ parts.append({
238
+ 'category': 'returned_variable',
239
+ 'source': lines[var_start - 1],
240
+ 'location': (var_start, var_start),
241
+ 'level': level + 1,
242
+ 'vector': create_vector('returned_variable', level + 1, (var_start, var_start), total_lines, current_path, var_info),
243
+ 'parent_path': f"{current_path[0]} -> {var_node_id}",
244
+ 'node_id': var_node_id
245
+ })
246
+ processed_lines.add(var_start)
247
  child_parts = parse_node(child, lines, nested_prev_end, level + 1, total_lines, current_path, counters, processed_lines)
248
  parts.extend(child_parts)
249
  nested_prev_end = child_parts[-1]['location'][1] if child_parts else nested_prev_end