bupa1018 commited on
Commit
b33162a
·
1 Parent(s): 66a05ca

Update chunk_python_code.py

Browse files
Files changed (1) hide show
  1. chunk_python_code.py +17 -22
chunk_python_code.py CHANGED
@@ -1,52 +1,52 @@
1
  import ast
2
  from langchain.schema import Document
3
 
4
- def chunk_python_code_with_metadata(source_code, reference):
5
  """
6
  Entry point method to process the Python file.
7
  It invokes the iterate_ast function.
8
  """
9
  documents = []
10
- print(f"Processing file: {reference}")
11
 
12
- iterate_ast(source_code, documents, reference)
13
 
14
- # Determine usage based on the reference path
15
- if reference.startswith("kadi_apy/lib/"):
16
  usage = "library"
17
- elif reference.startswith("kadi_apy/cli/"):
18
  usage = "cli_library"
19
- elif reference.startswith("doc/"):
20
  usage = "doc"
21
  else:
22
  usage = "undefined"
23
 
24
  # Add metadata for usage to all documents
25
  for doc in documents:
26
- doc.metadata["reference"] = reference
27
  doc.metadata["usage"] = usage # Add the determined usage metadata
28
  print(doc)
29
  return documents
30
 
31
 
32
- def iterate_ast(source_code, documents, reference):
33
  """
34
  Parses the AST of the given Python file and delegates
35
  handling to specific methods based on node types.
36
  """
37
  # Parse the source code into an abstract syntax tree (AST)
38
- tree = ast.parse(source_code, filename=reference)
39
 
40
  first_level_nodes = list(ast.iter_child_nodes(tree))
41
 
42
  # Check if there are no first-level nodes
43
  if not first_level_nodes:
44
- handle_no_first_level_node_found(documents, source_code, reference)
45
  return
46
 
47
  all_imports = all(isinstance(node, (ast.Import, ast.ImportFrom)) for node in first_level_nodes)
48
  if all_imports:
49
- handle_first_level_imports_only(documents, source_code, reference)
50
 
51
  # Iterate over first-level nodes
52
  for first_level_node in ast.iter_child_nodes(tree):
@@ -58,11 +58,11 @@ def iterate_ast(source_code, documents, reference):
58
  handle_first_level_assign(first_level_node, documents, source_code)
59
 
60
 
61
- def handle_first_level_imports_only(documents, source_code, reference):
62
  """
63
  Handles cases where the first-level nodes are only imports.
64
  """
65
- if reference.endswith("__init__.py"):
66
  type = "__init__-file"
67
  else:
68
  type = "undefined"
@@ -78,11 +78,11 @@ def handle_first_level_imports_only(documents, source_code, reference):
78
  documents.append(doc)
79
 
80
 
81
- def handle_no_first_level_node_found(documents, source_code, reference):
82
  """
83
  Handles cases where no top-level nodes are found in the AST.
84
  """
85
- if reference.endswith("__init__.py"):
86
  type = "__init__-file"
87
  else:
88
  type = "undefined"
@@ -196,9 +196,4 @@ def handle_first_level_func(function_node, documents, source_code):
196
  page_content=function_source,
197
  metadata=metadata
198
  )
199
- documents.append(doc)
200
-
201
-
202
-
203
-
204
-
 
1
  import ast
2
  from langchain.schema import Document
3
 
4
+ def chunk_python_code_with_metadata(source_code, source):
5
  """
6
  Entry point method to process the Python file.
7
  It invokes the iterate_ast function.
8
  """
9
  documents = []
10
+ print(f"Processing file: {source}")
11
 
12
+ iterate_ast(source_code, documents, source)
13
 
14
+ # Determine usage based on the source path
15
+ if source.startswith("kadi_apy/lib/"):
16
  usage = "library"
17
+ elif source.startswith("kadi_apy/cli/"):
18
  usage = "cli_library"
19
+ elif source.startswith("doc/"):
20
  usage = "doc"
21
  else:
22
  usage = "undefined"
23
 
24
  # Add metadata for usage to all documents
25
  for doc in documents:
26
+ doc.metadata["source"] = source
27
  doc.metadata["usage"] = usage # Add the determined usage metadata
28
  print(doc)
29
  return documents
30
 
31
 
32
+ def iterate_ast(source_code, documents, source):
33
  """
34
  Parses the AST of the given Python file and delegates
35
  handling to specific methods based on node types.
36
  """
37
  # Parse the source code into an abstract syntax tree (AST)
38
+ tree = ast.parse(source_code, filename=source)
39
 
40
  first_level_nodes = list(ast.iter_child_nodes(tree))
41
 
42
  # Check if there are no first-level nodes
43
  if not first_level_nodes:
44
+ handle_no_first_level_node_found(documents, source_code, source)
45
  return
46
 
47
  all_imports = all(isinstance(node, (ast.Import, ast.ImportFrom)) for node in first_level_nodes)
48
  if all_imports:
49
+ handle_first_level_imports_only(documents, source_code, source)
50
 
51
  # Iterate over first-level nodes
52
  for first_level_node in ast.iter_child_nodes(tree):
 
58
  handle_first_level_assign(first_level_node, documents, source_code)
59
 
60
 
61
+ def handle_first_level_imports_only(documents, source_code, source):
62
  """
63
  Handles cases where the first-level nodes are only imports.
64
  """
65
+ if source.endswith("__init__.py"):
66
  type = "__init__-file"
67
  else:
68
  type = "undefined"
 
78
  documents.append(doc)
79
 
80
 
81
+ def handle_no_first_level_node_found(documents, source_code, source):
82
  """
83
  Handles cases where no top-level nodes are found in the AST.
84
  """
85
+ if source.endswith("__init__.py"):
86
  type = "__init__-file"
87
  else:
88
  type = "undefined"
 
196
  page_content=function_source,
197
  metadata=metadata
198
  )
199
+ documents.append(doc)