bupa1018 commited on
Commit
dc07873
·
1 Parent(s): 6f6019f

Update process_python_code

Browse files
Files changed (1) hide show
  1. process_python_code +94 -78
process_python_code CHANGED
@@ -1,25 +1,28 @@
1
  import ast
2
- from langchain.schema import Document
3
 
4
- def chunk_python_source_code(source_code, references):
 
5
  """
6
  Entry point method to process the Python file.
7
  It invokes the iterate_ast function.
8
  """
9
- print(f"Processing file: {references}")
10
-
11
- for source_code, reference in zip(source_code, references):
12
- print(f"Processing text: {source_code} with reference: {reference}")
13
- iterate_ast(source_code, reference)
14
-
 
 
15
 
16
- def iterate_ast(source_code, reference):
17
  """
18
  Parses the AST of the given Python file and delegates
19
  handling to specific methods based on node types.
20
  """
21
  # Parse the source code into an abstract syntax tree (AST)
22
- tree = ast.parse(source_code, filename=reference)
23
 
24
  # Gather all top-level imports for later use
25
  imports_dict = extract_imports(tree)
@@ -27,9 +30,9 @@ def iterate_ast(source_code, reference):
27
  # Iterate over first-level nodes
28
  for first_level_node in ast.iter_child_nodes(tree):
29
  if isinstance(first_level_node, ast.ClassDef):
30
- handle_first_level_class(first_level_node, source_code, imports_dict)
31
  elif isinstance(first_level_node, ast.FunctionDef):
32
- handle_first_level_func(first_level_node, source_code, imports_dict)
33
 
34
  def extract_imports(tree):
35
  """
@@ -58,101 +61,114 @@ def analyze_imports(node, imports_dict):
58
  relevant_imports.add(imports_dict[sub_node.id])
59
  return list(relevant_imports)
60
 
61
- def handle_first_level_class(class_node, source_code, imports_dict):
 
62
  """
63
- Handles classes at the first level of the AST by processing
64
- the class and its methods. Stores each class method in a Document object.
65
  """
66
- print(f"Class detected: {class_node.name}")
 
67
 
 
 
 
 
 
 
 
68
  # Extract relevant imports for this class
69
  class_imports = analyze_imports(class_node, imports_dict)
70
 
71
- # Extract the class source code
72
- class_start_line = class_node.lineno
73
- class_end_line = max(
74
- [n.end_lineno for n in ast.walk(class_node) if hasattr(n, "end_lineno")], default=class_node.lineno
75
- )
76
- class_source = '\n'.join(source_code.splitlines()[class_start_line - 1:class_end_line])
77
-
78
- # Store the class-level Document
79
- class_doc = Document(
80
  page_content=class_source,
81
  metadata={
82
  "type": "class",
83
- "class_name": class_node.name,
84
- "imports": class_imports
 
85
  }
86
  )
87
- print(f"Stored Class Document: {class_doc}\n")
88
-
89
- # Process methods within the class
 
90
  for second_level_node in ast.iter_child_nodes(class_node):
91
  if isinstance(second_level_node, ast.FunctionDef):
92
- handle_class_method(second_level_node, class_node.name, source_code, imports_dict)
 
 
 
 
 
 
 
 
 
 
 
 
 
93
 
94
- def handle_class_method(method_node, class_name, source_code, imports_dict):
95
- """
96
- Handles methods within a class by storing them in a Document object.
97
- """
98
- print(f"Method detected: {method_node.name} in class {class_name}")
99
-
100
- # Extract method source code
101
- method_start_line = (
102
- method_node.decorator_list[0].lineno
103
- if method_node.decorator_list else method_node.lineno
104
- )
105
- method_end_line = method_node.end_lineno
106
- method_source = '\n'.join(source_code.splitlines()[method_start_line - 1:method_end_line])
107
-
108
- # Determine visibility metadata
109
- visibility = "internal" if method_node.name.startswith("_") else "public"
110
-
111
- # Extract relevant imports for this method
112
- method_imports = analyze_imports(method_node, imports_dict)
113
-
114
- # Store the method-level Document
115
- method_doc = Document(
116
- page_content=method_source,
117
- metadata={
118
- "type": "method",
119
- "class_name": class_name,
120
- "method_name": method_node.name,
121
- "visibility": visibility,
122
- "imports": method_imports
123
- }
124
- )
125
- print(f"Stored Method Document: {method_doc}\n")
126
 
127
- def handle_first_level_func(function_node, source_code, imports_dict):
128
  """
129
  Handles functions at the first level of the AST by storing them
130
  in a Document object with metadata, including relevant imports.
131
  """
132
- print(f"Function detected: {function_node.name}")
133
-
134
  # Extract function source code
135
  function_start_line = (
136
  function_node.decorator_list[0].lineno
137
  if function_node.decorator_list else function_node.lineno
138
  )
139
  function_end_line = function_node.end_lineno
140
- function_source = '\n'.join(source_code.splitlines()[function_start_line - 1:function_end_line])
141
 
142
  # Determine visibility metadata
143
  visibility = "internal" if function_node.name.startswith("_") else "public"
144
 
 
 
 
 
 
 
 
145
  # Extract relevant imports for this function
146
  function_imports = analyze_imports(function_node, imports_dict)
147
 
148
- # Store the function-level Document
149
- function_doc = Document(
150
- page_content=function_source,
151
- metadata={
152
- "type": "function",
153
- "function_name": function_node.name,
154
- "visibility": visibility,
155
- "imports": function_imports
156
- }
157
- )
158
- print(f"Stored Function Document: {function_doc}\n")
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import ast
2
+ from langchain.schema import Document # Assuming "Document" is imported from LangChain
3
 
4
+
5
+ def chunk_python_code_with_metadata(source_code, references):
6
  """
7
  Entry point method to process the Python file.
8
  It invokes the iterate_ast function.
9
  """
10
+ documents = []
11
+ print(f"Processing file: {file_path}")
12
+ iterate_ast(source_code, documents)
13
+ for doc in documents:
14
+ print(f"Stored Document:\n")
15
+ print(doc)
16
+ print(len(documents))
17
+ return documents
18
 
19
+ def iterate_ast(source_code, documents):
20
  """
21
  Parses the AST of the given Python file and delegates
22
  handling to specific methods based on node types.
23
  """
24
  # Parse the source code into an abstract syntax tree (AST)
25
+ tree = ast.parse(source_code, filename=file_path)
26
 
27
  # Gather all top-level imports for later use
28
  imports_dict = extract_imports(tree)
 
30
  # Iterate over first-level nodes
31
  for first_level_node in ast.iter_child_nodes(tree):
32
  if isinstance(first_level_node, ast.ClassDef):
33
+ handle_first_level_class(first_level_node, documents, source_code, imports_dict)
34
  elif isinstance(first_level_node, ast.FunctionDef):
35
+ handle_first_level_func(first_level_node, documents, source_code, imports_dict)
36
 
37
  def extract_imports(tree):
38
  """
 
61
  relevant_imports.add(imports_dict[sub_node.id])
62
  return list(relevant_imports)
63
 
64
+ def handle_first_level_class(class_node, documents, source_code, imports_dict):
65
+
66
  """
67
+ Handles classes at the first level of the AST by storing them
68
+ in a Document object with metadata, including relevant imports.
69
  """
70
+ # Extract class source code
71
+ class_start_line = class_node.lineno
72
 
73
+ # Find the line where the first function (def) starts or the next top-level node
74
+ class_body_lines = [child.lineno for child in class_node.body if isinstance(child, ast.FunctionDef)]
75
+ class_end_line = min(class_body_lines, default=class_node.end_lineno) - 1 # Use `-1` to exclude the next node
76
+
77
+ # Generate the class source code
78
+ class_source = '\n'.join(source_code.splitlines()[class_start_line-1:class_end_line])
79
+
80
  # Extract relevant imports for this class
81
  class_imports = analyze_imports(class_node, imports_dict)
82
 
83
+ # Create and store Document for the class
84
+ doc = Document(
 
 
 
 
 
 
 
85
  page_content=class_source,
86
  metadata={
87
  "type": "class",
88
+ "class": class_node.name,
89
+ "visibility": "public",
90
+ "imports": class_imports # Add class-specific imports
91
  }
92
  )
93
+ documents.append(doc)
94
+
95
+
96
+ # Handle methods within the class
97
  for second_level_node in ast.iter_child_nodes(class_node):
98
  if isinstance(second_level_node, ast.FunctionDef):
99
+ # Extract method source code
100
+ method_start_line = (
101
+ second_level_node.decorator_list[0].lineno
102
+ if second_level_node.decorator_list else second_level_node.lineno
103
+ )
104
+ method_end_line = second_level_node.end_lineno
105
+ method_source = '\n'.join(source_code.splitlines()[method_start_line-1:method_end_line])
106
+
107
+ # Determine visibility metadata
108
+ visibility = "internal" if second_level_node.name.startswith("_") else "public"
109
+ # Extract relevant imports for this method
110
+ method_imports = analyze_imports(second_level_node, imports_dict)
111
+
112
+ # Create and store Document
113
 
114
+ doc = Document(
115
+ page_content=method_source,
116
+ metadata={
117
+ "type": "method",
118
+ "method": second_level_node.name,
119
+ "visibility": "visibility",
120
+ "imports": method_imports,
121
+ "class": class_node.name
122
+ }
123
+ )
124
+ documents.append(doc)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
 
126
+ def handle_first_level_func(function_node, documents, source_code, imports_dict):
127
  """
128
  Handles functions at the first level of the AST by storing them
129
  in a Document object with metadata, including relevant imports.
130
  """
 
 
131
  # Extract function source code
132
  function_start_line = (
133
  function_node.decorator_list[0].lineno
134
  if function_node.decorator_list else function_node.lineno
135
  )
136
  function_end_line = function_node.end_lineno
137
+ function_source = '\n'.join(source_code.splitlines()[function_start_line-1:function_end_line])
138
 
139
  # Determine visibility metadata
140
  visibility = "internal" if function_node.name.startswith("_") else "public"
141
 
142
+ # Check if the function is a CLI command (e.g., decorated with `@apy_command`)
143
+ is_command = any(
144
+ decorator.id == "apy_command" # Check decorator name
145
+ for decorator in function_node.decorator_list
146
+ if hasattr(decorator, "id") # Ensure the decorator has an identifier
147
+ )
148
+
149
  # Extract relevant imports for this function
150
  function_imports = analyze_imports(function_node, imports_dict)
151
 
152
+ # Create and store Document
153
+
154
+ if is_command:
155
+ doc = Document(
156
+ page_content=function_source,
157
+ metadata={
158
+ "type": "command",
159
+ "command": function_node.name,
160
+ "visibility": "public",
161
+ "imports": function_imports
162
+ }
163
+ )
164
+ else:
165
+ doc = Document(
166
+ page_content=function_source,
167
+ metadata={
168
+ "type": "function",
169
+ "method": function_node.name,
170
+ "visibility": visibility,
171
+ "imports": function_imports
172
+ }
173
+ )
174
+ documents.append(doc)