Spaces:

bupa1018
/

KadiAPY_Coding_Assistant

Sleeping

App Files Files Community

bupa1018 commited on Mar 8

Commit

0c52573

1 Parent(s): 155ae5a

Update process_python_code

Browse files

Files changed (1) hide show

process_python_code +47 -4

process_python_code CHANGED Viewed

@@ -1,20 +1,34 @@
 import ast
-from langchain.schema import Document  # Assuming "Document" is imported from LangChain
-def chunkPythonFiles(source_code, reference):
     """
     Entry point method to process the Python file.
     It invokes the iterate_ast function.
     """
     documents = []
     print(f"Processing file: {reference}")
     iterate_ast(source_code, documents, reference)
     for doc in documents:
         doc.metadata["reference"] = reference
-        #print("HERE IS A DOC\n", doc)
-    #print(len(documents))
     return documents
 def iterate_ast(source_code, documents, reference):
     """
     Parses the AST of the given Python file and delegates
@@ -275,3 +289,32 @@ def handle_first_level_func(function_node, documents, source_code, imports_dict)
 #chunkPythonFiles(source_code, file_path)

 import ast
+from langchain.schema import Document
+def chunk_python_code_with_metadata(source_code, reference):
     """
     Entry point method to process the Python file.
     It invokes the iterate_ast function.
     """
     documents = []
     print(f"Processing file: {reference}")
     iterate_ast(source_code, documents, reference)
+    # Determine usage based on the reference path
+    if reference.startswith("kadi_apy/lib/"):
+        usage = "library"
+    elif reference.startswith("kadi_apy/cli/"):
+        usage = "cli_library"
+    elif reference.startswith("doc/"):
+        usage = "doc"
+    else:
+        usage = "undefined"
+    # Add metadata for usage to all documents
     for doc in documents:
         doc.metadata["reference"] = reference
+        doc.metadata["usage"] = usage  # Add the determined usage metadata
     return documents
 def iterate_ast(source_code, documents, reference):
     """
     Parses the AST of the given Python file and delegates
 #chunkPythonFiles(source_code, file_path)
+import os
+def process_folder(folder_path):
+    # Initialize a counter for the number of Python files
+    python_file_count = 0
+    docsT = []
+    # Walk through all subdirectories and files in the folder
+    for root, _, files in os.walk(folder_path):
+        for file_name in files:
+            # Create the full file path
+            file_path = os.path.join(root, file_name)
+            #print(file_path)
+            # Ensure it's a Python file
+            if file_name.endswith(".py"):
+                python_file_count += 1  # Increment the counter
+                with open(file_path, "r", encoding="utf-8") as file:
+                    source_code = file.read()
+                    print(file_name)
+                # Call your function
+                docs = chunkPythonFiles(source_code, file_path)
+                print("HWHWHWWHWHWHWH!:" ,len(docs))
+                docsT.extend(docs)
+    # Print the total number of Python files processed
+    print(f"Total Python files processed: {python_file_count}")
+    print(f"Total docs files processed: {len(docsT)}")