bupa1018 commited on
Commit
66a05ca
·
1 Parent(s): 0a9a29e

Update chunk_python_code.py

Browse files
Files changed (1) hide show
  1. chunk_python_code.py +24 -67
chunk_python_code.py CHANGED
@@ -1,9 +1,6 @@
1
  import ast
2
  from langchain.schema import Document
3
 
4
- import ast
5
- from langchain.schema import Document
6
-
7
  def chunk_python_code_with_metadata(source_code, reference):
8
  """
9
  Entry point method to process the Python file.
@@ -40,31 +37,28 @@ def iterate_ast(source_code, documents, reference):
40
  # Parse the source code into an abstract syntax tree (AST)
41
  tree = ast.parse(source_code, filename=reference)
42
 
43
- # Gather all top-level imports for later use
44
- imports_dict = extract_imports(tree)
45
-
46
  first_level_nodes = list(ast.iter_child_nodes(tree))
47
 
48
  # Check if there are no first-level nodes
49
  if not first_level_nodes:
50
- handle_no_first_level_node_found(documents, source_code, imports_dict, reference)
51
  return
52
 
53
  all_imports = all(isinstance(node, (ast.Import, ast.ImportFrom)) for node in first_level_nodes)
54
  if all_imports:
55
- handle_first_level_imports_only(documents, source_code, imports_dict, reference)
56
 
57
  # Iterate over first-level nodes
58
  for first_level_node in ast.iter_child_nodes(tree):
59
  if isinstance(first_level_node, ast.ClassDef):
60
- handle_first_level_class(first_level_node, documents, source_code, imports_dict)
61
  elif isinstance(first_level_node, ast.FunctionDef):
62
- handle_first_level_func(first_level_node, documents, source_code, imports_dict)
63
  elif isinstance(first_level_node, ast.Assign):
64
- handle_first_level_assign(first_level_node, documents, source_code, imports_dict)
65
 
66
 
67
- def handle_first_level_imports_only(documents, source_code, imports_dict, reference):
68
  """
69
  Handles cases where the first-level nodes are only imports.
70
  """
@@ -73,8 +67,8 @@ def handle_first_level_imports_only(documents, source_code, imports_dict, refere
73
  else:
74
  type = "undefined"
75
 
76
- # Create metadata with "none" if imports are empty
77
- metadata = {"type": type, "imports": "none" if not imports_dict else imports_dict}
78
 
79
  # Create and store a Document with the full source code
80
  doc = Document(
@@ -84,36 +78,7 @@ def handle_first_level_imports_only(documents, source_code, imports_dict, refere
84
  documents.append(doc)
85
 
86
 
87
- def extract_imports(tree):
88
- """
89
- Extracts all import statements from the AST tree and organizes them
90
- into a dictionary keyed by their fully qualified names for later analysis.
91
- """
92
- imports_dict = {}
93
- for node in ast.walk(tree):
94
- if isinstance(node, ast.Import):
95
- for alias in node.names:
96
- imports_dict[alias.name] = alias.name
97
- elif isinstance(node, ast.ImportFrom):
98
- module = node.module if node.module else ""
99
- for alias in node.names:
100
- full_name = f"{module}.{alias.name}" if module else alias.name
101
- imports_dict[alias.name] = full_name
102
- return imports_dict
103
-
104
-
105
- def analyze_imports(node, imports_dict):
106
- """
107
- Analyzes the given node's body and signature to find relevant imports.
108
- """
109
- relevant_imports = set()
110
- for sub_node in ast.walk(node):
111
- if isinstance(sub_node, ast.Name) and sub_node.id in imports_dict:
112
- relevant_imports.add(imports_dict[sub_node.id])
113
- return list(relevant_imports)
114
-
115
-
116
- def handle_no_first_level_node_found(documents, source_code, imports_dict, reference):
117
  """
118
  Handles cases where no top-level nodes are found in the AST.
119
  """
@@ -122,8 +87,8 @@ def handle_no_first_level_node_found(documents, source_code, imports_dict, refer
122
  else:
123
  type = "undefined"
124
 
125
- # Create metadata with "none" if imports are empty
126
- metadata = {"type": type, "imports": "none" if not imports_dict else imports_dict}
127
 
128
  # Create and store a Document with the full source code
129
  doc = Document(
@@ -133,7 +98,7 @@ def handle_no_first_level_node_found(documents, source_code, imports_dict, refer
133
  documents.append(doc)
134
 
135
 
136
- def handle_first_level_assign(assign_node, documents, source_code, imports_dict):
137
  """
138
  Handles assignment statements at the first level of the AST.
139
  """
@@ -141,11 +106,8 @@ def handle_first_level_assign(assign_node, documents, source_code, imports_dict)
141
  assign_end_line = assign_node.end_lineno
142
  assign_source = '\n'.join(source_code.splitlines()[assign_start_line-1:assign_end_line])
143
 
144
- # Extract relevant imports
145
- assign_imports = analyze_imports(assign_node, imports_dict)
146
-
147
- # Create metadata with "none" if imports are empty
148
- metadata = {"type": "Assign", "imports": "none" if not assign_imports else assign_imports}
149
 
150
  # Create and store Document for the assignment
151
  doc = Document(
@@ -155,7 +117,7 @@ def handle_first_level_assign(assign_node, documents, source_code, imports_dict)
155
  documents.append(doc)
156
 
157
 
158
- def handle_first_level_class(class_node, documents, source_code, imports_dict):
159
  """
160
  Handles classes at the first level of the AST.
161
  """
@@ -164,15 +126,11 @@ def handle_first_level_class(class_node, documents, source_code, imports_dict):
164
  class_end_line = min(class_body_lines, default=class_node.end_lineno) - 1
165
  class_source = '\n'.join(source_code.splitlines()[class_start_line-1:class_end_line])
166
 
167
- # Extract relevant imports
168
- class_imports = analyze_imports(class_node, imports_dict)
169
-
170
- # Create metadata with "none" if imports are empty
171
  metadata = {
172
  "type": "class",
173
  "class": class_node.name,
174
- "visibility": "public",
175
- "imports": "none" if not class_imports else class_imports
176
  }
177
 
178
  # Create and store Document for the class
@@ -193,7 +151,6 @@ def handle_first_level_class(class_node, documents, source_code, imports_dict):
193
  method_source = '\n'.join(source_code.splitlines()[method_start_line-1:method_end_line])
194
 
195
  visibility = "internal" if second_level_node.name.startswith("_") else "public"
196
- method_imports = analyze_imports(second_level_node, imports_dict)
197
 
198
  doc = Document(
199
  page_content=method_source,
@@ -201,14 +158,13 @@ def handle_first_level_class(class_node, documents, source_code, imports_dict):
201
  "type": "method",
202
  "method": second_level_node.name,
203
  "visibility": visibility,
204
- "imports": "none" if not method_imports else method_imports,
205
  "class": class_node.name
206
  }
207
  )
208
  documents.append(doc)
209
 
210
 
211
- def handle_first_level_func(function_node, documents, source_code, imports_dict):
212
  """
213
  Handles functions at the first level of the AST.
214
  """
@@ -219,9 +175,6 @@ def handle_first_level_func(function_node, documents, source_code, imports_dict)
219
  function_end_line = function_node.end_lineno
220
  function_source = '\n'.join(source_code.splitlines()[function_start_line-1:function_end_line])
221
 
222
- # Extract relevant imports
223
- function_imports = analyze_imports(function_node, imports_dict)
224
-
225
  visibility = "internal" if function_node.name.startswith("_") else "public"
226
 
227
  is_command = any(
@@ -232,8 +185,7 @@ def handle_first_level_func(function_node, documents, source_code, imports_dict)
232
 
233
  metadata = {
234
  "type": "command" if is_command else "function",
235
- "visibility": visibility,
236
- "imports": "none" if not function_imports else function_imports
237
  }
238
  if is_command:
239
  metadata["command"] = function_node.name
@@ -245,3 +197,8 @@ def handle_first_level_func(function_node, documents, source_code, imports_dict)
245
  metadata=metadata
246
  )
247
  documents.append(doc)
 
 
 
 
 
 
1
  import ast
2
  from langchain.schema import Document
3
 
 
 
 
4
  def chunk_python_code_with_metadata(source_code, reference):
5
  """
6
  Entry point method to process the Python file.
 
37
  # Parse the source code into an abstract syntax tree (AST)
38
  tree = ast.parse(source_code, filename=reference)
39
 
 
 
 
40
  first_level_nodes = list(ast.iter_child_nodes(tree))
41
 
42
  # Check if there are no first-level nodes
43
  if not first_level_nodes:
44
+ handle_no_first_level_node_found(documents, source_code, reference)
45
  return
46
 
47
  all_imports = all(isinstance(node, (ast.Import, ast.ImportFrom)) for node in first_level_nodes)
48
  if all_imports:
49
+ handle_first_level_imports_only(documents, source_code, reference)
50
 
51
  # Iterate over first-level nodes
52
  for first_level_node in ast.iter_child_nodes(tree):
53
  if isinstance(first_level_node, ast.ClassDef):
54
+ handle_first_level_class(first_level_node, documents, source_code)
55
  elif isinstance(first_level_node, ast.FunctionDef):
56
+ handle_first_level_func(first_level_node, documents, source_code)
57
  elif isinstance(first_level_node, ast.Assign):
58
+ handle_first_level_assign(first_level_node, documents, source_code)
59
 
60
 
61
+ def handle_first_level_imports_only(documents, source_code, reference):
62
  """
63
  Handles cases where the first-level nodes are only imports.
64
  """
 
67
  else:
68
  type = "undefined"
69
 
70
+ # Create metadata without imports
71
+ metadata = {"type": type}
72
 
73
  # Create and store a Document with the full source code
74
  doc = Document(
 
78
  documents.append(doc)
79
 
80
 
81
+ def handle_no_first_level_node_found(documents, source_code, reference):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  """
83
  Handles cases where no top-level nodes are found in the AST.
84
  """
 
87
  else:
88
  type = "undefined"
89
 
90
+ # Create metadata without imports
91
+ metadata = {"type": type}
92
 
93
  # Create and store a Document with the full source code
94
  doc = Document(
 
98
  documents.append(doc)
99
 
100
 
101
+ def handle_first_level_assign(assign_node, documents, source_code):
102
  """
103
  Handles assignment statements at the first level of the AST.
104
  """
 
106
  assign_end_line = assign_node.end_lineno
107
  assign_source = '\n'.join(source_code.splitlines()[assign_start_line-1:assign_end_line])
108
 
109
+ # Create metadata without imports
110
+ metadata = {"type": "Assign"}
 
 
 
111
 
112
  # Create and store Document for the assignment
113
  doc = Document(
 
117
  documents.append(doc)
118
 
119
 
120
+ def handle_first_level_class(class_node, documents, source_code):
121
  """
122
  Handles classes at the first level of the AST.
123
  """
 
126
  class_end_line = min(class_body_lines, default=class_node.end_lineno) - 1
127
  class_source = '\n'.join(source_code.splitlines()[class_start_line-1:class_end_line])
128
 
129
+ # Create metadata without imports
 
 
 
130
  metadata = {
131
  "type": "class",
132
  "class": class_node.name,
133
+ "visibility": "public"
 
134
  }
135
 
136
  # Create and store Document for the class
 
151
  method_source = '\n'.join(source_code.splitlines()[method_start_line-1:method_end_line])
152
 
153
  visibility = "internal" if second_level_node.name.startswith("_") else "public"
 
154
 
155
  doc = Document(
156
  page_content=method_source,
 
158
  "type": "method",
159
  "method": second_level_node.name,
160
  "visibility": visibility,
 
161
  "class": class_node.name
162
  }
163
  )
164
  documents.append(doc)
165
 
166
 
167
+ def handle_first_level_func(function_node, documents, source_code):
168
  """
169
  Handles functions at the first level of the AST.
170
  """
 
175
  function_end_line = function_node.end_lineno
176
  function_source = '\n'.join(source_code.splitlines()[function_start_line-1:function_end_line])
177
 
 
 
 
178
  visibility = "internal" if function_node.name.startswith("_") else "public"
179
 
180
  is_command = any(
 
185
 
186
  metadata = {
187
  "type": "command" if is_command else "function",
188
+ "visibility": visibility
 
189
  }
190
  if is_command:
191
  metadata["command"] = function_node.name
 
197
  metadata=metadata
198
  )
199
  documents.append(doc)
200
+
201
+
202
+
203
+
204
+