bupa1018 commited on
Commit
64756f0
·
1 Parent(s): 0114c32

Update chunk_python_code.py

Browse files
Files changed (1) hide show
  1. chunk_python_code.py +78 -73
chunk_python_code.py CHANGED
@@ -66,79 +66,6 @@ def _iterate_ast(python_code, documents, file_path):
66
  _handle_not_defined_case(python_code))
67
 
68
 
69
- def _chunk_import_only_python_code(python_code, file_path):
70
- """
71
- Handles cases where the first-level nodes are only imports.
72
- """
73
- documents = []
74
- if file_path.endswith("__init__.py"):
75
- type = "__init__-file"
76
- else:
77
- type = "undefined"
78
-
79
- # Create metadata without imports
80
- metadata = {"type": type}
81
-
82
- # Create and store a Document with the full source code
83
- doc = Document(
84
- page_content=python_code,
85
- metadata=metadata
86
- )
87
- documents.append(doc)
88
- return documents
89
-
90
-
91
-
92
- def _handle_not_defined_case(python_code):
93
- documents = []
94
- documents.extend(
95
- _chunk_python_code_by_character(python_code)
96
- return documents
97
-
98
-
99
- def _chunk_nodeless_python_code(python_code, file_path):
100
- """
101
- Handles cases where no top-level nodes are found in the AST.
102
- """
103
- documents = []
104
- if file_path.endswith("__init__.py"):
105
- type = "__init__-file"
106
- else:
107
- type = "undefined"
108
-
109
- # Create metadata without imports
110
- metadata = {"type": type}
111
-
112
- # Create and store a Document with the full source code
113
- doc = Document(
114
- page_content=python_code,
115
- metadata=metadata
116
- )
117
- documents.append(doc)
118
-
119
- return documents
120
-
121
- def _chunk_first_level_assign_node(ast_node, python_code):
122
-
123
- """
124
- Handles assignment statements at the first level of the AST.
125
- """
126
- documents = []
127
- assign_start_line = ast_node.lineno
128
- assign_end_line = ast_node.end_lineno
129
- assign_source = '\n'.join(python_code.splitlines()[assign_start_line-1:assign_end_line])
130
-
131
- # Create metadata without imports
132
- metadata = {"type": "Assign"}
133
-
134
- # Create and store Document for the assignment
135
- doc = Document(
136
- page_content=assign_source,
137
- metadata=metadata
138
- )
139
- documents.append(doc)
140
-
141
- return documents
142
 
143
  def _handle_first_level_class(ast_node , python_code):
144
  """
@@ -188,6 +115,14 @@ def _handle_first_level_class(ast_node , python_code):
188
 
189
  return documents
190
 
 
 
 
 
 
 
 
 
191
  def _chunk_first_level_func_node(ast_node, python_code):
192
  """
193
  Handles functions at the first level of the AST.
@@ -226,6 +161,76 @@ def _chunk_first_level_func_node(ast_node, python_code):
226
  return documents
227
 
228
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
229
  from langchain.text_splitter import RecursiveCharacterTextSplitter
230
 
231
 
 
66
  _handle_not_defined_case(python_code))
67
 
68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
  def _handle_first_level_class(ast_node , python_code):
71
  """
 
115
 
116
  return documents
117
 
118
+
119
+ def _handle_not_defined_case(python_code):
120
+ documents = []
121
+ documents.extend(
122
+ _chunk_python_code_by_character(python_code))
123
+ return documents
124
+
125
+
126
  def _chunk_first_level_func_node(ast_node, python_code):
127
  """
128
  Handles functions at the first level of the AST.
 
161
  return documents
162
 
163
 
164
+
165
+ def _chunk_first_level_assign_node(ast_node, python_code):
166
+
167
+ """
168
+ Handles assignment statements at the first level of the AST.
169
+ """
170
+ documents = []
171
+ assign_start_line = ast_node.lineno
172
+ assign_end_line = ast_node.end_lineno
173
+ assign_source = '\n'.join(python_code.splitlines()[assign_start_line-1:assign_end_line])
174
+
175
+ # Create metadata without imports
176
+ metadata = {"type": "Assign"}
177
+
178
+ # Create and store Document for the assignment
179
+ doc = Document(
180
+ page_content=assign_source,
181
+ metadata=metadata
182
+ )
183
+ documents.append(doc)
184
+
185
+ return documents
186
+
187
+
188
+
189
+ def _chunk_import_only_python_code(python_code, file_path):
190
+ """
191
+ Handles cases where the first-level nodes are only imports.
192
+ """
193
+ documents = []
194
+ if file_path.endswith("__init__.py"):
195
+ type = "__init__-file"
196
+ else:
197
+ type = "undefined"
198
+
199
+ # Create metadata without imports
200
+ metadata = {"type": type}
201
+
202
+ # Create and store a Document with the full source code
203
+ doc = Document(
204
+ page_content=python_code,
205
+ metadata=metadata
206
+ )
207
+ documents.append(doc)
208
+ return documents
209
+
210
+ def _chunk_nodeless_python_code(python_code, file_path):
211
+ """
212
+ Handles cases where no top-level nodes are found in the AST.
213
+ """
214
+ documents = []
215
+ if file_path.endswith("__init__.py"):
216
+ type = "__init__-file"
217
+ else:
218
+ type = "undefined"
219
+
220
+ # Create metadata without imports
221
+ metadata = {"type": type}
222
+
223
+ # Create and store a Document with the full source code
224
+ doc = Document(
225
+ page_content=python_code,
226
+ metadata=metadata
227
+ )
228
+ documents.append(doc)
229
+
230
+ return documents
231
+
232
+
233
+
234
  from langchain.text_splitter import RecursiveCharacterTextSplitter
235
 
236