bupa1018 commited on
Commit
6f6019f
·
1 Parent(s): d0c3226

Create process_python_code

Browse files
Files changed (1) hide show
  1. process_python_code +158 -0
process_python_code ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import ast
2
+ from langchain.schema import Document
3
+
4
+ def chunk_python_source_code(source_code, references):
5
+ """
6
+ Entry point method to process the Python file.
7
+ It invokes the iterate_ast function.
8
+ """
9
+ print(f"Processing file: {references}")
10
+
11
+ for source_code, reference in zip(source_code, references):
12
+ print(f"Processing text: {source_code} with reference: {reference}")
13
+ iterate_ast(source_code, reference)
14
+
15
+
16
+ def iterate_ast(source_code, reference):
17
+ """
18
+ Parses the AST of the given Python file and delegates
19
+ handling to specific methods based on node types.
20
+ """
21
+ # Parse the source code into an abstract syntax tree (AST)
22
+ tree = ast.parse(source_code, filename=reference)
23
+
24
+ # Gather all top-level imports for later use
25
+ imports_dict = extract_imports(tree)
26
+
27
+ # Iterate over first-level nodes
28
+ for first_level_node in ast.iter_child_nodes(tree):
29
+ if isinstance(first_level_node, ast.ClassDef):
30
+ handle_first_level_class(first_level_node, source_code, imports_dict)
31
+ elif isinstance(first_level_node, ast.FunctionDef):
32
+ handle_first_level_func(first_level_node, source_code, imports_dict)
33
+
34
+ def extract_imports(tree):
35
+ """
36
+ Extracts all import statements from the AST tree and organizes them
37
+ into a dictionary keyed by their fully qualified names for later analysis.
38
+ """
39
+ imports_dict = {}
40
+ for node in ast.walk(tree):
41
+ if isinstance(node, ast.Import):
42
+ for alias in node.names:
43
+ imports_dict[alias.name] = alias.name
44
+ elif isinstance(node, ast.ImportFrom):
45
+ module = node.module if node.module else ""
46
+ for alias in node.names:
47
+ full_name = f"{module}.{alias.name}" if module else alias.name
48
+ imports_dict[alias.name] = full_name
49
+ return imports_dict
50
+
51
+ def analyze_imports(node, imports_dict):
52
+ """
53
+ Analyzes the given node's body and signature to find relevant imports.
54
+ """
55
+ relevant_imports = set()
56
+ for sub_node in ast.walk(node):
57
+ if isinstance(sub_node, ast.Name) and sub_node.id in imports_dict:
58
+ relevant_imports.add(imports_dict[sub_node.id])
59
+ return list(relevant_imports)
60
+
61
+ def handle_first_level_class(class_node, source_code, imports_dict):
62
+ """
63
+ Handles classes at the first level of the AST by processing
64
+ the class and its methods. Stores each class method in a Document object.
65
+ """
66
+ print(f"Class detected: {class_node.name}")
67
+
68
+ # Extract relevant imports for this class
69
+ class_imports = analyze_imports(class_node, imports_dict)
70
+
71
+ # Extract the class source code
72
+ class_start_line = class_node.lineno
73
+ class_end_line = max(
74
+ [n.end_lineno for n in ast.walk(class_node) if hasattr(n, "end_lineno")], default=class_node.lineno
75
+ )
76
+ class_source = '\n'.join(source_code.splitlines()[class_start_line - 1:class_end_line])
77
+
78
+ # Store the class-level Document
79
+ class_doc = Document(
80
+ page_content=class_source,
81
+ metadata={
82
+ "type": "class",
83
+ "class_name": class_node.name,
84
+ "imports": class_imports
85
+ }
86
+ )
87
+ print(f"Stored Class Document: {class_doc}\n")
88
+
89
+ # Process methods within the class
90
+ for second_level_node in ast.iter_child_nodes(class_node):
91
+ if isinstance(second_level_node, ast.FunctionDef):
92
+ handle_class_method(second_level_node, class_node.name, source_code, imports_dict)
93
+
94
+ def handle_class_method(method_node, class_name, source_code, imports_dict):
95
+ """
96
+ Handles methods within a class by storing them in a Document object.
97
+ """
98
+ print(f"Method detected: {method_node.name} in class {class_name}")
99
+
100
+ # Extract method source code
101
+ method_start_line = (
102
+ method_node.decorator_list[0].lineno
103
+ if method_node.decorator_list else method_node.lineno
104
+ )
105
+ method_end_line = method_node.end_lineno
106
+ method_source = '\n'.join(source_code.splitlines()[method_start_line - 1:method_end_line])
107
+
108
+ # Determine visibility metadata
109
+ visibility = "internal" if method_node.name.startswith("_") else "public"
110
+
111
+ # Extract relevant imports for this method
112
+ method_imports = analyze_imports(method_node, imports_dict)
113
+
114
+ # Store the method-level Document
115
+ method_doc = Document(
116
+ page_content=method_source,
117
+ metadata={
118
+ "type": "method",
119
+ "class_name": class_name,
120
+ "method_name": method_node.name,
121
+ "visibility": visibility,
122
+ "imports": method_imports
123
+ }
124
+ )
125
+ print(f"Stored Method Document: {method_doc}\n")
126
+
127
+ def handle_first_level_func(function_node, source_code, imports_dict):
128
+ """
129
+ Handles functions at the first level of the AST by storing them
130
+ in a Document object with metadata, including relevant imports.
131
+ """
132
+ print(f"Function detected: {function_node.name}")
133
+
134
+ # Extract function source code
135
+ function_start_line = (
136
+ function_node.decorator_list[0].lineno
137
+ if function_node.decorator_list else function_node.lineno
138
+ )
139
+ function_end_line = function_node.end_lineno
140
+ function_source = '\n'.join(source_code.splitlines()[function_start_line - 1:function_end_line])
141
+
142
+ # Determine visibility metadata
143
+ visibility = "internal" if function_node.name.startswith("_") else "public"
144
+
145
+ # Extract relevant imports for this function
146
+ function_imports = analyze_imports(function_node, imports_dict)
147
+
148
+ # Store the function-level Document
149
+ function_doc = Document(
150
+ page_content=function_source,
151
+ metadata={
152
+ "type": "function",
153
+ "function_name": function_node.name,
154
+ "visibility": visibility,
155
+ "imports": function_imports
156
+ }
157
+ )
158
+ print(f"Stored Function Document: {function_doc}\n")