broadfield-dev commited on
Commit
5a58620
·
verified ·
1 Parent(s): cd2fa5a

Create parser.py

Browse files
Files changed (1) hide show
  1. parser.py +71 -0
parser.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parser.py
2
+ import ast
3
+
4
+ def get_category(node):
5
+ """
6
+ Determine the category of an AST node based on its type.
7
+ """
8
+ if isinstance(node, (ast.Import, ast.ImportFrom)):
9
+ return 'import'
10
+ elif isinstance(node, (ast.Assign, ast.AnnAssign, ast.AugAssign)):
11
+ return 'assignment'
12
+ elif isinstance(node, ast.FunctionDef):
13
+ return 'function'
14
+ elif isinstance(node, ast.AsyncFunctionDef):
15
+ return 'async_function'
16
+ elif isinstance(node, ast.ClassDef):
17
+ return 'class'
18
+ elif isinstance(node, ast.Expr):
19
+ return 'expression'
20
+ else:
21
+ return 'other'
22
+
23
+ def parse_python_file(file_path):
24
+ """
25
+ Parse a Python file and return a list of its parts with categories, source code, and locations.
26
+
27
+ Args:
28
+ file_path (str): Path to the Python file to parse.
29
+
30
+ Returns:
31
+ list: A list of dictionaries, each containing 'category', 'source', and 'location'.
32
+ """
33
+ with open(file_path, 'r') as file:
34
+ code = file.read()
35
+
36
+ lines = code.splitlines(keepends=True)
37
+ tree = ast.parse(code)
38
+
39
+ parts = []
40
+ prev_end = 0
41
+
42
+ for stmt in tree.body:
43
+ start_line = stmt.lineno
44
+ end_line = getattr(stmt, 'end_lineno', start_line)
45
+
46
+ if start_line > prev_end + 1:
47
+ spacer_lines = lines[prev_end:start_line - 1]
48
+ parts.append({
49
+ 'category': 'spacer',
50
+ 'source': ''.join(spacer_lines),
51
+ 'location': (prev_end + 1, start_line - 1)
52
+ })
53
+
54
+ stmt_lines = lines[start_line - 1:end_line]
55
+ parts.append({
56
+ 'category': get_category(stmt),
57
+ 'source': ''.join(stmt_lines),
58
+ 'location': (start_line, end_line)
59
+ })
60
+
61
+ prev_end = end_line
62
+
63
+ if prev_end < len(lines):
64
+ remaining_lines = lines[prev_end:]
65
+ parts.append({
66
+ 'category': 'spacer',
67
+ 'source': ''.join(remaining_lines),
68
+ 'location': (prev_end + 1, len(lines) + 1)
69
+ })
70
+
71
+ return parts