broadfield-dev commited on
Commit
b28c8cc
·
verified ·
1 Parent(s): c2f70be

Update parser.py

Browse files
Files changed (1) hide show
  1. parser.py +2 -8
parser.py CHANGED
@@ -29,7 +29,7 @@ def get_category(node):
29
  return 'other'
30
 
31
  def create_vector(category, level, location, total_lines, parent_path):
32
- """Create an optimized vector representation for a code part."""
33
  category_map = {
34
  'import': 1, 'assignment': 2, 'function': 3, 'async_function': 4, 'class': 5,
35
  'if': 6, 'while': 7, 'for': 8, 'try': 9, 'expression': 10, 'spacer': 11,
@@ -48,7 +48,6 @@ def create_vector(category, level, location, total_lines, parent_path):
48
  return [category_id, level, center_pos, span, parent_depth, parent_weight]
49
 
50
  def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=None, counters=None):
51
- """Recursively parse an AST node with full hierarchy tracking."""
52
  if total_lines is None:
53
  total_lines = len(lines)
54
  if parent_path is None:
@@ -61,11 +60,9 @@ def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=Non
61
  end_line = getattr(node, 'end_lineno', start_line)
62
  category = get_category(node)
63
 
64
- # Assign a node_id to every part
65
  counters[category] += 1
66
  node_id = f"{category.capitalize()}[{counters[category]}]"
67
 
68
- # Spacer before the node
69
  if start_line > prev_end + 1:
70
  spacer_lines = lines[prev_end:start_line - 1]
71
  counters['spacer'] += 1
@@ -81,7 +78,6 @@ def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=Non
81
  'node_id': spacer_node_id
82
  })
83
 
84
- # Main node
85
  stmt_lines = lines[start_line - 1:end_line]
86
  current_path = parent_path + [node_id]
87
  node_vector = create_vector(category, level, (start_line, end_line), total_lines, current_path)
@@ -95,7 +91,6 @@ def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=Non
95
  'node_id': node_id
96
  })
97
 
98
- # Process nested bodies
99
  nested_prev_end = end_line - 1
100
  for attr in ('body', 'orelse', 'handlers', 'finalbody'):
101
  if hasattr(node, attr) and getattr(node, attr):
@@ -113,7 +108,7 @@ def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=Non
113
  'level': level,
114
  'vector': sub_vector,
115
  'parent_path': ' -> '.join(parent_path) if parent_path else 'Top-Level',
116
- 'node_id': node_id # Inherits parent's node_id for context
117
  })
118
  child_parts = parse_node(child, lines, child.lineno - 1, level + 1, total_lines, current_path, counters)
119
  sub_parts.extend(child_parts)
@@ -156,7 +151,6 @@ def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=Non
156
  return parts
157
 
158
  def parse_python_code(code):
159
- """Parse Python code string and return parts with hierarchy and vectors."""
160
  lines = code.splitlines(keepends=True)
161
  total_lines = len(lines)
162
  try:
 
29
  return 'other'
30
 
31
  def create_vector(category, level, location, total_lines, parent_path):
32
+ """Create a vector optimized for role similarity."""
33
  category_map = {
34
  'import': 1, 'assignment': 2, 'function': 3, 'async_function': 4, 'class': 5,
35
  'if': 6, 'while': 7, 'for': 8, 'try': 9, 'expression': 10, 'spacer': 11,
 
48
  return [category_id, level, center_pos, span, parent_depth, parent_weight]
49
 
50
  def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=None, counters=None):
 
51
  if total_lines is None:
52
  total_lines = len(lines)
53
  if parent_path is None:
 
60
  end_line = getattr(node, 'end_lineno', start_line)
61
  category = get_category(node)
62
 
 
63
  counters[category] += 1
64
  node_id = f"{category.capitalize()}[{counters[category]}]"
65
 
 
66
  if start_line > prev_end + 1:
67
  spacer_lines = lines[prev_end:start_line - 1]
68
  counters['spacer'] += 1
 
78
  'node_id': spacer_node_id
79
  })
80
 
 
81
  stmt_lines = lines[start_line - 1:end_line]
82
  current_path = parent_path + [node_id]
83
  node_vector = create_vector(category, level, (start_line, end_line), total_lines, current_path)
 
91
  'node_id': node_id
92
  })
93
 
 
94
  nested_prev_end = end_line - 1
95
  for attr in ('body', 'orelse', 'handlers', 'finalbody'):
96
  if hasattr(node, attr) and getattr(node, attr):
 
108
  'level': level,
109
  'vector': sub_vector,
110
  'parent_path': ' -> '.join(parent_path) if parent_path else 'Top-Level',
111
+ 'node_id': node_id
112
  })
113
  child_parts = parse_node(child, lines, child.lineno - 1, level + 1, total_lines, current_path, counters)
114
  sub_parts.extend(child_parts)
 
151
  return parts
152
 
153
  def parse_python_code(code):
 
154
  lines = code.splitlines(keepends=True)
155
  total_lines = len(lines)
156
  try: