broadfield-dev commited on
Commit
81f613a
·
verified ·
1 Parent(s): 6cec40e

Update parser.py

Browse files
Files changed (1) hide show
  1. parser.py +38 -62
parser.py CHANGED
@@ -1,8 +1,8 @@
1
  # parser.py
2
  import ast
3
 
4
- def get_category(node):
5
- """Determine the category of an AST node or variable context."""
6
  if isinstance(node, (ast.Import, ast.ImportFrom)):
7
  return 'import'
8
  elif isinstance(node, ast.FunctionDef) or isinstance(node, ast.AsyncFunctionDef):
@@ -24,32 +24,19 @@ def get_category(node):
24
  elif isinstance(node, ast.ExceptHandler):
25
  return 'except'
26
  elif isinstance(node, (ast.Assign, ast.AnnAssign, ast.AugAssign)):
27
- return 'assigned_variable'
28
- else:
29
- return 'other'
30
-
31
- def get_variable_role(node, parent):
32
- """Determine the role of a variable (input, assigned, returned) based on context."""
33
- if isinstance(parent, (ast.FunctionDef, ast.AsyncFunctionDef)):
34
- if isinstance(node, ast.arg):
35
  return 'input_variable'
36
- elif isinstance(parent, ast.Return) and isinstance(node, ast.Name):
 
37
  return 'returned_variable'
38
- elif isinstance(parent, (ast.Assign, ast.AnnAssign, ast.AugAssign)):
39
- if isinstance(node, ast.Name):
40
- # Handle different target structures
41
- if isinstance(parent, ast.Assign):
42
- for target in parent.targets:
43
- if isinstance(target, ast.Name) and target.id == node.id:
44
- return 'assigned_variable'
45
- elif isinstance(parent, (ast.AnnAssign, ast.AugAssign)):
46
- target = parent.target
47
- if isinstance(target, ast.Name) and target.id == node.id:
48
- return 'assigned_variable'
49
- return None
50
 
51
- def create_vector(category, level, location, total_lines, parent_path, variable_info=None):
52
- """Create a vector optimized for role similarity, including variable info if applicable."""
53
  category_map = {
54
  'import': 1, 'function': 2, 'async_function': 3, 'class': 4,
55
  'if': 5, 'while': 6, 'for': 7, 'try': 8, 'expression': 9, 'spacer': 10,
@@ -63,16 +50,7 @@ def create_vector(category, level, location, total_lines, parent_path, variable_
63
  parent_depth = len(parent_path)
64
  parent_weight = sum(category_map.get(parent.split('[')[0].lower(), 0) * (1 / (i + 1))
65
  for i, parent in enumerate(parent_path)) / max(1, len(category_map))
66
-
67
- # Extend vector with variable-specific info if applicable
68
- variable_role_id = 0
69
- variable_name = ''
70
- if variable_info:
71
- role_map = {'input_variable': 1, 'assigned_variable': 2, 'returned_variable': 3}
72
- variable_role_id = role_map.get(variable_info['role'], 0)
73
- variable_name = variable_info['name'][:10] # Truncate for brevity, hash if needed
74
-
75
- return [category_id, level, center_pos, span, parent_depth, parent_weight, variable_role_id, hash(variable_name) % 1000 / 1000]
76
 
77
  def is_blank_or_comment(line):
78
  """Check if a line is blank or a comment."""
@@ -142,13 +120,12 @@ def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=Non
142
  if var_start not in processed_lines:
143
  counters['input_variable'] += 1
144
  var_node_id = f"InputVariable[{counters['input_variable']}]"
145
- var_info = {'role': 'input_variable', 'name': arg.arg}
146
  parts.append({
147
  'category': 'input_variable',
148
  'source': f" {arg.arg},", # Indented as part of function
149
  'location': (var_start, var_start),
150
  'level': level + 1,
151
- 'vector': create_vector('input_variable', level + 1, (var_start, var_start), total_lines, current_path, var_info),
152
  'parent_path': f"{current_path[0]} -> {var_node_id}",
153
  'node_id': var_node_id
154
  })
@@ -216,42 +193,42 @@ def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=Non
216
  nested_prev_end = max(nested_prev_end, child_parts[-1]['location'][1] if child_parts else child_start)
217
  else:
218
  # Handle assignments and returns for variable detection
219
- if isinstance(child, ast.Assign):
220
- for target in child.targets:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
221
  if isinstance(target, ast.Name):
222
  var_start = child.lineno
223
  if var_start not in processed_lines and not is_blank_or_comment(lines[var_start - 1]):
224
  counters['assigned_variable'] += 1
225
  var_node_id = f"AssignedVariable[{counters['assigned_variable']}]"
226
- var_info = {'role': 'assigned_variable', 'name': target.id}
227
  parts.append({
228
  'category': 'assigned_variable',
229
  'source': lines[var_start - 1],
230
  'location': (var_start, var_start),
231
  'level': level + 1,
232
- 'vector': create_vector('assigned_variable', level + 1, (var_start, var_start), total_lines, current_path, var_info),
233
  'parent_path': f"{current_path[0]} -> {var_node_id}",
234
  'node_id': var_node_id
235
  })
236
  processed_lines.add(var_start)
237
- elif isinstance(child, ast.AnnAssign) or isinstance(child, ast.AugAssign):
238
- target = child.target
239
- if isinstance(target, ast.Name):
240
- var_start = child.lineno
241
- if var_start not in processed_lines and not is_blank_or_comment(lines[var_start - 1]):
242
- counters['assigned_variable'] += 1
243
- var_node_id = f"AssignedVariable[{counters['assigned_variable']}]"
244
- var_info = {'role': 'assigned_variable', 'name': target.id}
245
- parts.append({
246
- 'category': 'assigned_variable',
247
- 'source': lines[var_start - 1],
248
- 'location': (var_start, var_start),
249
- 'level': level + 1,
250
- 'vector': create_vector('assigned_variable', level + 1, (var_start, var_start), total_lines, current_path, var_info),
251
- 'parent_path': f"{current_path[0]} -> {var_node_id}",
252
- 'node_id': var_node_id
253
- })
254
- processed_lines.add(var_start)
255
  elif isinstance(child, ast.Return):
256
  for value in ast.walk(child):
257
  if isinstance(value, ast.Name):
@@ -259,13 +236,12 @@ def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=Non
259
  if var_start not in processed_lines and not is_blank_or_comment(lines[var_start - 1]):
260
  counters['returned_variable'] += 1
261
  var_node_id = f"ReturnedVariable[{counters['returned_variable']}]"
262
- var_info = {'role': 'returned_variable', 'name': value.id}
263
  parts.append({
264
  'category': 'returned_variable',
265
  'source': lines[var_start - 1],
266
  'location': (var_start, var_start),
267
  'level': level + 1,
268
- 'vector': create_vector('returned_variable', level + 1, (var_start, var_start), total_lines, current_path, var_info),
269
  'parent_path': f"{current_path[0]} -> {var_node_id}",
270
  'node_id': var_node_id
271
  })
@@ -291,7 +267,7 @@ def parse_python_code(code):
291
  try:
292
  tree = ast.parse(code)
293
  except SyntaxError:
294
- return [{'category': 'error', 'source': 'Invalid Python code', 'location': (1, 1), 'level': 0, 'vector': [0, 0, 1.0, 0.0, 0, 0, 0, 0], 'parent_path': 'Top-Level', 'node_id': 'Error[1]'}]
295
 
296
  parts = []
297
  prev_end = 0
 
1
  # parser.py
2
  import ast
3
 
4
+ def get_category(node, parent=None):
5
+ """Determine the category of an AST node or variable context, including variable roles."""
6
  if isinstance(node, (ast.Import, ast.ImportFrom)):
7
  return 'import'
8
  elif isinstance(node, ast.FunctionDef) or isinstance(node, ast.AsyncFunctionDef):
 
24
  elif isinstance(node, ast.ExceptHandler):
25
  return 'except'
26
  elif isinstance(node, (ast.Assign, ast.AnnAssign, ast.AugAssign)):
27
+ if parent and isinstance(parent, (ast.FunctionDef, ast.AsyncFunctionDef, ast.If, ast.Try, ast.While, ast.For)):
28
+ return 'assigned_variable'
29
+ elif isinstance(node, ast.arg): # Input variables in function definitions
30
+ if parent and isinstance(parent, (ast.FunctionDef, ast.AsyncFunctionDef)):
 
 
 
 
31
  return 'input_variable'
32
+ elif isinstance(node, ast.Name): # Returned variables in return statements
33
+ if parent and isinstance(parent, ast.Return):
34
  return 'returned_variable'
35
+ else:
36
+ return 'other'
 
 
 
 
 
 
 
 
 
 
37
 
38
+ def create_vector(category, level, location, total_lines, parent_path):
39
+ """Create a 6D vector optimized for role similarity, integrating variable roles into category_id."""
40
  category_map = {
41
  'import': 1, 'function': 2, 'async_function': 3, 'class': 4,
42
  'if': 5, 'while': 6, 'for': 7, 'try': 8, 'expression': 9, 'spacer': 10,
 
50
  parent_depth = len(parent_path)
51
  parent_weight = sum(category_map.get(parent.split('[')[0].lower(), 0) * (1 / (i + 1))
52
  for i, parent in enumerate(parent_path)) / max(1, len(category_map))
53
+ return [category_id, level, center_pos, span, parent_depth, parent_weight]
 
 
 
 
 
 
 
 
 
54
 
55
  def is_blank_or_comment(line):
56
  """Check if a line is blank or a comment."""
 
120
  if var_start not in processed_lines:
121
  counters['input_variable'] += 1
122
  var_node_id = f"InputVariable[{counters['input_variable']}]"
 
123
  parts.append({
124
  'category': 'input_variable',
125
  'source': f" {arg.arg},", # Indented as part of function
126
  'location': (var_start, var_start),
127
  'level': level + 1,
128
+ 'vector': create_vector('input_variable', level + 1, (var_start, var_start), total_lines, current_path),
129
  'parent_path': f"{current_path[0]} -> {var_node_id}",
130
  'node_id': var_node_id
131
  })
 
193
  nested_prev_end = max(nested_prev_end, child_parts[-1]['location'][1] if child_parts else child_start)
194
  else:
195
  # Handle assignments and returns for variable detection
196
+ if isinstance(child, (ast.Assign, ast.AnnAssign, ast.AugAssign)):
197
+ # Handle different target structures
198
+ if isinstance(child, ast.Assign):
199
+ for target in child.targets:
200
+ if isinstance(target, ast.Name):
201
+ var_start = child.lineno
202
+ if var_start not in processed_lines and not is_blank_or_comment(lines[var_start - 1]):
203
+ counters['assigned_variable'] += 1
204
+ var_node_id = f"AssignedVariable[{counters['assigned_variable']}]"
205
+ parts.append({
206
+ 'category': 'assigned_variable',
207
+ 'source': lines[var_start - 1],
208
+ 'location': (var_start, var_start),
209
+ 'level': level + 1,
210
+ 'vector': create_vector('assigned_variable', level + 1, (var_start, var_start), total_lines, current_path),
211
+ 'parent_path': f"{current_path[0]} -> {var_node_id}",
212
+ 'node_id': var_node_id
213
+ })
214
+ processed_lines.add(var_start)
215
+ else: # AnnAssign or AugAssign
216
+ target = child.target
217
  if isinstance(target, ast.Name):
218
  var_start = child.lineno
219
  if var_start not in processed_lines and not is_blank_or_comment(lines[var_start - 1]):
220
  counters['assigned_variable'] += 1
221
  var_node_id = f"AssignedVariable[{counters['assigned_variable']}]"
 
222
  parts.append({
223
  'category': 'assigned_variable',
224
  'source': lines[var_start - 1],
225
  'location': (var_start, var_start),
226
  'level': level + 1,
227
+ 'vector': create_vector('assigned_variable', level + 1, (var_start, var_start), total_lines, current_path),
228
  'parent_path': f"{current_path[0]} -> {var_node_id}",
229
  'node_id': var_node_id
230
  })
231
  processed_lines.add(var_start)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
232
  elif isinstance(child, ast.Return):
233
  for value in ast.walk(child):
234
  if isinstance(value, ast.Name):
 
236
  if var_start not in processed_lines and not is_blank_or_comment(lines[var_start - 1]):
237
  counters['returned_variable'] += 1
238
  var_node_id = f"ReturnedVariable[{counters['returned_variable']}]"
 
239
  parts.append({
240
  'category': 'returned_variable',
241
  'source': lines[var_start - 1],
242
  'location': (var_start, var_start),
243
  'level': level + 1,
244
+ 'vector': create_vector('returned_variable', level + 1, (var_start, var_start), total_lines, current_path),
245
  'parent_path': f"{current_path[0]} -> {var_node_id}",
246
  'node_id': var_node_id
247
  })
 
267
  try:
268
  tree = ast.parse(code)
269
  except SyntaxError:
270
+ return [{'category': 'error', 'source': 'Invalid Python code', 'location': (1, 1), 'level': 0, 'vector': [0, 0, 1.0, 0.0, 0, 0], 'parent_path': 'Top-Level', 'node_id': 'Error[1]'}]
271
 
272
  parts = []
273
  prev_end = 0