Spaces:

bupa1018
/

KadiAPY_Coding_Assistant

Sleeping

App Files Files Community

bupa1018 commited on Mar 8

Commit

b33162a

1 Parent(s): 66a05ca

Update chunk_python_code.py

Browse files

Files changed (1) hide show

chunk_python_code.py +17 -22

chunk_python_code.py CHANGED Viewed

@@ -1,52 +1,52 @@
 import ast
 from langchain.schema import Document
-def chunk_python_code_with_metadata(source_code, reference):
     """
     Entry point method to process the Python file.
     It invokes the iterate_ast function.
     """
     documents = []
-    print(f"Processing file: {reference}")
-    iterate_ast(source_code, documents, reference)
-    # Determine usage based on the reference path
-    if reference.startswith("kadi_apy/lib/"):
         usage = "library"
-    elif reference.startswith("kadi_apy/cli/"):
         usage = "cli_library"
-    elif reference.startswith("doc/"):
         usage = "doc"
     else:
         usage = "undefined"
     # Add metadata for usage to all documents
     for doc in documents:
-        doc.metadata["reference"] = reference
         doc.metadata["usage"] = usage  # Add the determined usage metadata
         print(doc)
     return documents
-def iterate_ast(source_code, documents, reference):
     """
     Parses the AST of the given Python file and delegates
     handling to specific methods based on node types.
     """
     # Parse the source code into an abstract syntax tree (AST)
-    tree = ast.parse(source_code, filename=reference)
     first_level_nodes = list(ast.iter_child_nodes(tree))
     # Check if there are no first-level nodes
     if not first_level_nodes:
-        handle_no_first_level_node_found(documents, source_code, reference)
         return
     all_imports = all(isinstance(node, (ast.Import, ast.ImportFrom)) for node in first_level_nodes)
     if all_imports:
-        handle_first_level_imports_only(documents, source_code, reference)
     # Iterate over first-level nodes
     for first_level_node in ast.iter_child_nodes(tree):
@@ -58,11 +58,11 @@ def iterate_ast(source_code, documents, reference):
             handle_first_level_assign(first_level_node, documents, source_code)
-def handle_first_level_imports_only(documents, source_code, reference):
     """
     Handles cases where the first-level nodes are only imports.
     """
-    if reference.endswith("__init__.py"):
         type = "__init__-file"
     else:
         type = "undefined"
@@ -78,11 +78,11 @@ def handle_first_level_imports_only(documents, source_code, reference):
     documents.append(doc)
-def handle_no_first_level_node_found(documents, source_code, reference):
     """
     Handles cases where no top-level nodes are found in the AST.
     """
-    if reference.endswith("__init__.py"):
         type = "__init__-file"
     else:
         type = "undefined"
@@ -196,9 +196,4 @@ def handle_first_level_func(function_node, documents, source_code):
         page_content=function_source,
         metadata=metadata
     )
-    documents.append(doc)

 import ast
 from langchain.schema import Document
+def chunk_python_code_with_metadata(source_code, source):
     """
     Entry point method to process the Python file.
     It invokes the iterate_ast function.
     """
     documents = []
+    print(f"Processing file: {source}")
+    iterate_ast(source_code, documents, source)
+    # Determine usage based on the source path
+    if source.startswith("kadi_apy/lib/"):
         usage = "library"
+    elif source.startswith("kadi_apy/cli/"):
         usage = "cli_library"
+    elif source.startswith("doc/"):
         usage = "doc"
     else:
         usage = "undefined"
     # Add metadata for usage to all documents
     for doc in documents:
+        doc.metadata["source"] = source
         doc.metadata["usage"] = usage  # Add the determined usage metadata
         print(doc)
     return documents
+def iterate_ast(source_code, documents, source):
     """
     Parses the AST of the given Python file and delegates
     handling to specific methods based on node types.
     """
     # Parse the source code into an abstract syntax tree (AST)
+    tree = ast.parse(source_code, filename=source)
     first_level_nodes = list(ast.iter_child_nodes(tree))
     # Check if there are no first-level nodes
     if not first_level_nodes:
+        handle_no_first_level_node_found(documents, source_code, source)
         return
     all_imports = all(isinstance(node, (ast.Import, ast.ImportFrom)) for node in first_level_nodes)
     if all_imports:
+        handle_first_level_imports_only(documents, source_code, source)
     # Iterate over first-level nodes
     for first_level_node in ast.iter_child_nodes(tree):
             handle_first_level_assign(first_level_node, documents, source_code)
+def handle_first_level_imports_only(documents, source_code, source):
     """
     Handles cases where the first-level nodes are only imports.
     """
+    if source.endswith("__init__.py"):
         type = "__init__-file"
     else:
         type = "undefined"
     documents.append(doc)
+def handle_no_first_level_node_found(documents, source_code, source):
     """
     Handles cases where no top-level nodes are found in the AST.
     """
+    if source.endswith("__init__.py"):
         type = "__init__-file"
     else:
         type = "undefined"
         page_content=function_source,
         metadata=metadata
     )
+    documents.append(doc)