Spaces:

bupa1018
/

KadiAPY_Coding_Assistant

Sleeping

App Files Files Community

bupa1018 commited on Mar 8

Commit

1aeb49e

1 Parent(s): 0eb49ff

Update process_python_code

Browse files

Files changed (1) hide show

process_python_code +142 -11

process_python_code CHANGED Viewed

@@ -1,38 +1,72 @@
 import ast
-from langchain.schema import Document
-def chunk_python_code_with_metadata(source_code, references):
     """
     Entry point method to process the Python file.
     It invokes the iterate_ast function.
     """
     documents = []
-    print(f"Processing file: {file_path}")
-    iterate_ast(source_code, documents)
     for doc in documents:
-        print(f"Stored Document:\n")
-        print(doc)
-    print(len(documents))
     return documents
-def iterate_ast(source_code, documents):
     """
     Parses the AST of the given Python file and delegates
     handling to specific methods based on node types.
     """
     # Parse the source code into an abstract syntax tree (AST)
-    tree = ast.parse(source_code, filename=file_path)
     # Gather all top-level imports for later use
     imports_dict = extract_imports(tree)
     # Iterate over first-level nodes
     for first_level_node in ast.iter_child_nodes(tree):
         if isinstance(first_level_node, ast.ClassDef):
             handle_first_level_class(first_level_node, documents, source_code, imports_dict)
         elif isinstance(first_level_node, ast.FunctionDef):
             handle_first_level_func(first_level_node, documents, source_code, imports_dict)
 def extract_imports(tree):
     """
@@ -61,6 +95,65 @@ def analyze_imports(node, imports_dict):
             relevant_imports.add(imports_dict[sub_node.id])
     return list(relevant_imports)
 def handle_first_level_class(class_node, documents, source_code, imports_dict):
     """
@@ -87,7 +180,7 @@ def handle_first_level_class(class_node, documents, source_code, imports_dict):
             "type": "class",
             "class": class_node.name,
             "visibility": "public",
-            "imports": class_imports  # Add class-specific imports
         }
     )
     documents.append(doc)
@@ -172,3 +265,41 @@ def handle_first_level_func(function_node, documents, source_code, imports_dict)
             }
         )
     documents.append(doc)

 import ast
+from langchain.schema import Document  # Assuming "Document" is imported from LangChain
+def chunkPythonFiles(source_code, reference):
     """
     Entry point method to process the Python file.
     It invokes the iterate_ast function.
     """
     documents = []
+    print(f"Processing file: {reference}")
+    iterate_ast(source_code, documents, reference)
     for doc in documents:
+        doc.metadata["reference"] = reference
+        #print("HERE IS A DOC\n", doc)
+    #print(len(documents))
     return documents
+def iterate_ast(source_code, documents, reference):
     """
     Parses the AST of the given Python file and delegates
     handling to specific methods based on node types.
     """
     # Parse the source code into an abstract syntax tree (AST)
+    tree = ast.parse(source_code, filename=reference)
     # Gather all top-level imports for later use
     imports_dict = extract_imports(tree)
+    first_level_nodes = list(ast.iter_child_nodes(tree))
+    # Check if there are no first-level nodes
+    if not first_level_nodes:
+        handle_no_first_level_node_found(documents, source_code, imports_dict, reference)
+        return
+    all_imports = all(isinstance(node, (ast.Import, ast.ImportFrom)) for node in first_level_nodes)
+    if all_imports:
+        handle_first_level_imports_only(documents, source_code, imports_dict, reference)
     # Iterate over first-level nodes
     for first_level_node in ast.iter_child_nodes(tree):
         if isinstance(first_level_node, ast.ClassDef):
             handle_first_level_class(first_level_node, documents, source_code, imports_dict)
         elif isinstance(first_level_node, ast.FunctionDef):
             handle_first_level_func(first_level_node, documents, source_code, imports_dict)
+        elif isinstance(first_level_node, ast.Assign):
+            handle_first_level_assign(first_level_node, documents, source_code, imports_dict)
+def  handle_first_level_imports_only(documents, source_code, imports_dict, reference):
+        # Check if the file path before ".py" is "__init__"
+    if reference.endswith("__init__.py"):
+        type = "__init__-file"
+    else:
+        type = "undefined"
+    # Create and store a Document with the full source code
+    doc = Document(
+        page_content=source_code,
+        metadata={
+            "type": type,
+            "imports": imports_dict
+        }
+    )
+    documents.append(doc)
 def extract_imports(tree):
     """
             relevant_imports.add(imports_dict[sub_node.id])
     return list(relevant_imports)
+def handle_not_yet_defined_first_level_cases(documents, source_code, imports_dict):
+    if source_code:
+        doc = Document(
+            page_content=source_code,
+            metadata={
+                "type": "undefined",
+                "imports": imports_dict
+            }
+        )
+        documents.append(doc)
+def handle_no_first_level_node_found(documents, source_code, imports_dict, reference):
+    """
+    Handles cases where no top-level nodes are found in the AST.
+    Stores the full content (likely comments) in a Document object
+    with metadata indicating type 'no code' or 'init' based on the reference.
+    """
+    # Check if the file path before ".py" is "__init__"
+    if reference.endswith("__init__.py"):
+        type = "__init__-file"
+    else:
+        type = "undefined"
+    # Create and store a Document with the full source code
+    doc = Document(
+        page_content=source_code,
+        metadata={
+            "type": type,
+            "imports": imports_dict
+        }
+    )
+    documents.append(doc)
+def handle_first_level_assign(assign_node, documents, source_code, imports_dict):
+    """
+    Handles assignment statements at the first level of the AST by storing them
+    in a Document object with metadata, including relevant imports.
+    """
+    # Extract assignment source code
+    assign_start_line = assign_node.lineno
+    assign_end_line = assign_node.end_lineno
+    assign_source = '\n'.join(source_code.splitlines()[assign_start_line-1:assign_end_line])
+    # Extract relevant imports for this assignment
+    assign_imports = analyze_imports(assign_node, imports_dict)
+    # Create and store Document for the assignment
+    doc = Document(
+        page_content=assign_source,
+        metadata={
+            "type": "Assign",
+            "imports": assign_imports
+        }
+    )
+    documents.append(doc)
 def handle_first_level_class(class_node, documents, source_code, imports_dict):
     """
             "type": "class",
             "class": class_node.name,
             "visibility": "public",
+            "imports": class_imports,
         }
     )
     documents.append(doc)
             }
         )
     documents.append(doc)
+# Example usage
+#file_path = r"C:\Users\Anwender\Downloads\exampleScript.py"
+#with open(file_path, "r", encoding="utf-8") as file:
+#        source_code = file.read()
+#chunkPythonFiles(source_code, file_path)
+import os
+def process_folder(folder_path):
+    # Initialize a counter for the number of Python files
+    python_file_count = 0
+    docsT = []
+    # Walk through all subdirectories and files in the folder
+    for root, _, files in os.walk(folder_path):
+        for file_name in files:
+            # Create the full file path
+            file_path = os.path.join(root, file_name)
+            #print(file_path)
+            # Ensure it's a Python file
+            if file_name.endswith(".py"):
+                python_file_count += 1  # Increment the counter
+                with open(file_path, "r", encoding="utf-8") as file:
+                    source_code = file.read()
+                    print(file_name)
+                # Call your function
+                docs = chunkPythonFiles(source_code, file_path)
+                print("HWHWHWWHWHWHWH!:" ,len(docs))
+                docsT.extend(docs)
+    # Print the total number of Python files processed
+    print(f"Total Python files processed: {python_file_count}")
+    print(f"Total docs files processed: {len(docsT)}")