broadfield-dev commited on
Commit
4d5c304
·
verified ·
1 Parent(s): c190b69

Update parser.py

Browse files
Files changed (1) hide show
  1. parser.py +24 -21
parser.py CHANGED
@@ -24,25 +24,35 @@ def get_category(node):
24
  elif isinstance(node, ast.Expr):
25
  return 'expression'
26
  else:
27
- return 'other' # Fallback for uncategorized nodes (e.g., 'pass')
28
 
29
  def create_vector(category, level, location, total_lines, parent_path):
30
- """Create a vector representation including hierarchy info."""
31
  category_map = {
32
  'import': 1, 'assignment': 2, 'function': 3, 'async_function': 4, 'class': 5,
33
- 'if': 6, 'while': 7, 'for': 8, 'try': 9, 'expression': 10, 'spacer': 11, 'other': 12
 
34
  }
35
  category_id = category_map.get(category, 0)
36
  start_line, end_line = location
 
 
37
  parent_depth = len(parent_path)
38
- parent_hash = hash(' -> '.join(parent_path)) % 1000
 
 
 
 
 
 
 
39
  return [
40
- category_id,
41
- level,
42
- start_line / total_lines,
43
- end_line / total_lines,
44
- parent_depth,
45
- parent_hash / 1000
46
  ]
47
 
48
  def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=None, counters=None):
@@ -59,13 +69,11 @@ def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=Non
59
  end_line = getattr(node, 'end_lineno', start_line)
60
  category = get_category(node)
61
 
62
- # Assign a node ID for relevant categories
63
  node_id = ''
64
  if category in counters:
65
  counters[category] += 1
66
  node_id = f"{category.capitalize()}[{counters[category]}]"
67
 
68
- # Handle spacers before the node
69
  if start_line > prev_end + 1:
70
  spacer_lines = lines[prev_end:start_line - 1]
71
  spacer_vector = create_vector('spacer', level, (prev_end + 1, start_line - 1), total_lines, parent_path)
@@ -79,7 +87,6 @@ def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=Non
79
  'node_id': ''
80
  })
81
 
82
- # Capture the node's source
83
  stmt_lines = lines[start_line - 1:end_line]
84
  current_path = parent_path + ([node_id] if node_id else [])
85
  node_vector = create_vector(category, level, (start_line, end_line), total_lines, current_path)
@@ -93,13 +100,11 @@ def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=Non
93
  'node_id': node_id
94
  })
95
 
96
- # Process nested bodies (body, orelse, handlers, finalbody)
97
  nested_prev_end = end_line - 1
98
  for attr in ('body', 'orelse', 'handlers', 'finalbody'):
99
  if hasattr(node, attr) and getattr(node, attr):
100
  sub_parts = []
101
  for child in getattr(node, attr):
102
- # Special case for elif: treat as part of the if's orelse but same level
103
  if attr == 'orelse' and isinstance(node, ast.If) and child.lineno != start_line:
104
  sub_category = 'elif' if child.lineno != end_line else 'else'
105
  sub_vector = create_vector(sub_category, level, (child.lineno, getattr(child, 'end_lineno', child.lineno)), total_lines, current_path)
@@ -110,11 +115,10 @@ def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=Non
110
  'level': level,
111
  'vector': sub_vector,
112
  'parent_path': ' -> '.join(parent_path) if parent_path else 'Top-Level',
113
- 'node_id': node_id # Inherits the If's node_id
114
  })
115
  child_parts = parse_node(child, lines, child.lineno - 1, level + 1, total_lines, current_path, counters)
116
  sub_parts.extend(child_parts)
117
- # Special case for except: treat as part of try
118
  elif attr == 'handlers' and isinstance(child, ast.ExceptHandler):
119
  sub_vector = create_vector('except', level, (child.lineno, getattr(child, 'end_lineno', child.lineno)), total_lines, current_path)
120
  sub_parts.append({
@@ -124,11 +128,10 @@ def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=Non
124
  'level': level,
125
  'vector': sub_vector,
126
  'parent_path': ' -> '.join(parent_path) if parent_path else 'Top-Level',
127
- 'node_id': node_id # Inherits the Try's node_id
128
  })
129
  child_parts = parse_node(child, lines, child.lineno - 1, level + 1, total_lines, current_path, counters)
130
  sub_parts.extend(child_parts)
131
- # Special case for finally
132
  elif attr == 'finalbody':
133
  sub_vector = create_vector('finally', level, (child.lineno, getattr(child, 'end_lineno', child.lineno)), total_lines, current_path)
134
  sub_parts.append({
@@ -138,7 +141,7 @@ def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=Non
138
  'level': level,
139
  'vector': sub_vector,
140
  'parent_path': ' -> '.join(parent_path) if parent_path else 'Top-Level',
141
- 'node_id': node_id # Inherits the Try's node_id
142
  })
143
  child_parts = parse_node(child, lines, child.lineno - 1, level + 1, total_lines, current_path, counters)
144
  sub_parts.extend(child_parts)
@@ -157,7 +160,7 @@ def parse_python_code(code):
157
  try:
158
  tree = ast.parse(code)
159
  except SyntaxError:
160
- return [{'category': 'error', 'source': 'Invalid Python code', 'location': (1, 1), 'level': 0, 'vector': [0, 0, 1.0, 1.0, 0, 0], 'parent_path': 'Top-Level', 'node_id': ''}]
161
 
162
  parts = []
163
  prev_end = 0
 
24
  elif isinstance(node, ast.Expr):
25
  return 'expression'
26
  else:
27
+ return 'other'
28
 
29
  def create_vector(category, level, location, total_lines, parent_path):
30
+ """Create an optimized vector representation for a code part."""
31
  category_map = {
32
  'import': 1, 'assignment': 2, 'function': 3, 'async_function': 4, 'class': 5,
33
+ 'if': 6, 'while': 7, 'for': 8, 'try': 9, 'expression': 10, 'spacer': 11,
34
+ 'other': 12, 'elif': 13, 'else': 14, 'except': 15, 'finally': 16
35
  }
36
  category_id = category_map.get(category, 0)
37
  start_line, end_line = location
38
+ span = (end_line - start_line + 1) / total_lines # Normalized size of the part
39
+ center_pos = ((start_line + end_line) / 2) / total_lines # Center position normalized
40
  parent_depth = len(parent_path)
41
+
42
+ # Weighted sum of parent categories (simple weighting by position)
43
+ parent_weight = 0
44
+ for i, parent in enumerate(parent_path):
45
+ parent_category = parent.split('[')[0].lower()
46
+ parent_weight += category_map.get(parent_category, 0) * (1 / (i + 1)) # Decay with depth
47
+ parent_weight = parent_weight / max(1, len(category_map)) # Normalize by max category ID
48
+
49
  return [
50
+ category_id, # Type of the part
51
+ level, # Nesting depth
52
+ center_pos, # Center position in file (0.0 to 1.0)
53
+ span, # Relative size in file (0.0 to 1.0)
54
+ parent_depth, # Number of ancestors
55
+ parent_weight # Semantic connection to parents (0.0 to 1.0-ish)
56
  ]
57
 
58
  def parse_node(node, lines, prev_end, level=0, total_lines=None, parent_path=None, counters=None):
 
69
  end_line = getattr(node, 'end_lineno', start_line)
70
  category = get_category(node)
71
 
 
72
  node_id = ''
73
  if category in counters:
74
  counters[category] += 1
75
  node_id = f"{category.capitalize()}[{counters[category]}]"
76
 
 
77
  if start_line > prev_end + 1:
78
  spacer_lines = lines[prev_end:start_line - 1]
79
  spacer_vector = create_vector('spacer', level, (prev_end + 1, start_line - 1), total_lines, parent_path)
 
87
  'node_id': ''
88
  })
89
 
 
90
  stmt_lines = lines[start_line - 1:end_line]
91
  current_path = parent_path + ([node_id] if node_id else [])
92
  node_vector = create_vector(category, level, (start_line, end_line), total_lines, current_path)
 
100
  'node_id': node_id
101
  })
102
 
 
103
  nested_prev_end = end_line - 1
104
  for attr in ('body', 'orelse', 'handlers', 'finalbody'):
105
  if hasattr(node, attr) and getattr(node, attr):
106
  sub_parts = []
107
  for child in getattr(node, attr):
 
108
  if attr == 'orelse' and isinstance(node, ast.If) and child.lineno != start_line:
109
  sub_category = 'elif' if child.lineno != end_line else 'else'
110
  sub_vector = create_vector(sub_category, level, (child.lineno, getattr(child, 'end_lineno', child.lineno)), total_lines, current_path)
 
115
  'level': level,
116
  'vector': sub_vector,
117
  'parent_path': ' -> '.join(parent_path) if parent_path else 'Top-Level',
118
+ 'node_id': node_id
119
  })
120
  child_parts = parse_node(child, lines, child.lineno - 1, level + 1, total_lines, current_path, counters)
121
  sub_parts.extend(child_parts)
 
122
  elif attr == 'handlers' and isinstance(child, ast.ExceptHandler):
123
  sub_vector = create_vector('except', level, (child.lineno, getattr(child, 'end_lineno', child.lineno)), total_lines, current_path)
124
  sub_parts.append({
 
128
  'level': level,
129
  'vector': sub_vector,
130
  'parent_path': ' -> '.join(parent_path) if parent_path else 'Top-Level',
131
+ 'node_id': node_id
132
  })
133
  child_parts = parse_node(child, lines, child.lineno - 1, level + 1, total_lines, current_path, counters)
134
  sub_parts.extend(child_parts)
 
135
  elif attr == 'finalbody':
136
  sub_vector = create_vector('finally', level, (child.lineno, getattr(child, 'end_lineno', child.lineno)), total_lines, current_path)
137
  sub_parts.append({
 
141
  'level': level,
142
  'vector': sub_vector,
143
  'parent_path': ' -> '.join(parent_path) if parent_path else 'Top-Level',
144
+ 'node_id': node_id
145
  })
146
  child_parts = parse_node(child, lines, child.lineno - 1, level + 1, total_lines, current_path, counters)
147
  sub_parts.extend(child_parts)
 
160
  try:
161
  tree = ast.parse(code)
162
  except SyntaxError:
163
+ return [{'category': 'error', 'source': 'Invalid Python code', 'location': (1, 1), 'level': 0, 'vector': [0, 0, 1.0, 0.0, 0, 0], 'parent_path': 'Top-Level', 'node_id': ''}]
164
 
165
  parts = []
166
  prev_end = 0