bupa1018 commited on
Commit
0c52573
·
1 Parent(s): 155ae5a

Update process_python_code

Browse files
Files changed (1) hide show
  1. process_python_code +47 -4
process_python_code CHANGED
@@ -1,20 +1,34 @@
1
  import ast
2
- from langchain.schema import Document # Assuming "Document" is imported from LangChain
3
 
4
- def chunkPythonFiles(source_code, reference):
5
  """
6
  Entry point method to process the Python file.
7
  It invokes the iterate_ast function.
8
  """
9
  documents = []
10
  print(f"Processing file: {reference}")
 
11
  iterate_ast(source_code, documents, reference)
 
 
 
 
 
 
 
 
 
 
 
 
12
  for doc in documents:
13
  doc.metadata["reference"] = reference
14
- #print("HERE IS A DOC\n", doc)
15
- #print(len(documents))
16
  return documents
17
 
 
18
  def iterate_ast(source_code, documents, reference):
19
  """
20
  Parses the AST of the given Python file and delegates
@@ -275,3 +289,32 @@ def handle_first_level_func(function_node, documents, source_code, imports_dict)
275
  #chunkPythonFiles(source_code, file_path)
276
 
277
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import ast
2
+ from langchain.schema import Document
3
 
4
+ def chunk_python_code_with_metadata(source_code, reference):
5
  """
6
  Entry point method to process the Python file.
7
  It invokes the iterate_ast function.
8
  """
9
  documents = []
10
  print(f"Processing file: {reference}")
11
+
12
  iterate_ast(source_code, documents, reference)
13
+
14
+ # Determine usage based on the reference path
15
+ if reference.startswith("kadi_apy/lib/"):
16
+ usage = "library"
17
+ elif reference.startswith("kadi_apy/cli/"):
18
+ usage = "cli_library"
19
+ elif reference.startswith("doc/"):
20
+ usage = "doc"
21
+ else:
22
+ usage = "undefined"
23
+
24
+ # Add metadata for usage to all documents
25
  for doc in documents:
26
  doc.metadata["reference"] = reference
27
+ doc.metadata["usage"] = usage # Add the determined usage metadata
28
+
29
  return documents
30
 
31
+
32
  def iterate_ast(source_code, documents, reference):
33
  """
34
  Parses the AST of the given Python file and delegates
 
289
  #chunkPythonFiles(source_code, file_path)
290
 
291
 
292
+ import os
293
+
294
+ def process_folder(folder_path):
295
+ # Initialize a counter for the number of Python files
296
+ python_file_count = 0
297
+ docsT = []
298
+ # Walk through all subdirectories and files in the folder
299
+ for root, _, files in os.walk(folder_path):
300
+ for file_name in files:
301
+ # Create the full file path
302
+ file_path = os.path.join(root, file_name)
303
+ #print(file_path)
304
+
305
+ # Ensure it's a Python file
306
+ if file_name.endswith(".py"):
307
+ python_file_count += 1 # Increment the counter
308
+ with open(file_path, "r", encoding="utf-8") as file:
309
+ source_code = file.read()
310
+ print(file_name)
311
+
312
+ # Call your function
313
+ docs = chunkPythonFiles(source_code, file_path)
314
+
315
+ print("HWHWHWWHWHWHWH!:" ,len(docs))
316
+ docsT.extend(docs)
317
+ # Print the total number of Python files processed
318
+ print(f"Total Python files processed: {python_file_count}")
319
+ print(f"Total docs files processed: {len(docsT)}")
320
+