bupa1018 commited on
Commit
0114c32
·
1 Parent(s): ebfd153

Update chunk_python_code.py

Browse files
Files changed (1) hide show
  1. chunk_python_code.py +25 -25
chunk_python_code.py CHANGED
@@ -63,7 +63,7 @@ def _iterate_ast(python_code, documents, file_path):
63
  _chunk_first_level_assign_node(first_level_node, documents, python_code))
64
  else:
65
  documents.extend(
66
- _handle_notdefined_case(python_code))
67
 
68
 
69
  def _chunk_import_only_python_code(python_code, file_path):
@@ -89,34 +89,12 @@ def _chunk_import_only_python_code(python_code, file_path):
89
 
90
 
91
 
92
- def _handle_notdefined_case(python_code):
93
  documents = []
94
  documents.extend(
95
- _chunk_python_code_by_character)
96
  return documents
97
 
98
- from langchain.text_splitter import RecursiveCharacterTextSplitter
99
-
100
-
101
- def _chunk_python_code_by_character(python_code):
102
- documents = []
103
- text_splitter = RecursiveCharacterTextSplitter(
104
- chunk_size=512,
105
- chunk_overlap=128,
106
- separators=[]
107
- )
108
-
109
- chunks = text_splitter.split_text(python_code)
110
-
111
- for chunk in chunks:
112
- doc = Document(
113
- page_content=chunk
114
- )
115
- documents.append(doc)
116
-
117
- return documents
118
-
119
-
120
 
121
  def _chunk_nodeless_python_code(python_code, file_path):
122
  """
@@ -245,4 +223,26 @@ def _chunk_first_level_func_node(ast_node, python_code):
245
  )
246
  documents.append(doc)
247
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
248
  return documents
 
63
  _chunk_first_level_assign_node(first_level_node, documents, python_code))
64
  else:
65
  documents.extend(
66
+ _handle_not_defined_case(python_code))
67
 
68
 
69
  def _chunk_import_only_python_code(python_code, file_path):
 
89
 
90
 
91
 
92
+ def _handle_not_defined_case(python_code):
93
  documents = []
94
  documents.extend(
95
+ _chunk_python_code_by_character(python_code)
96
  return documents
97
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
 
99
  def _chunk_nodeless_python_code(python_code, file_path):
100
  """
 
223
  )
224
  documents.append(doc)
225
 
226
+ return documents
227
+
228
+
229
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
230
+
231
+
232
+ def _chunk_python_code_by_character(python_code):
233
+ documents = []
234
+ text_splitter = RecursiveCharacterTextSplitter(
235
+ chunk_size=512,
236
+ chunk_overlap=128,
237
+ separators=[]
238
+ )
239
+
240
+ chunks = text_splitter.split_text(python_code)
241
+
242
+ for chunk in chunks:
243
+ doc = Document(
244
+ page_content=chunk
245
+ )
246
+ documents.append(doc)
247
+
248
  return documents