Spaces:
Sleeping
Sleeping
Update process_python_code
Browse files- process_python_code +47 -4
process_python_code
CHANGED
@@ -1,20 +1,34 @@
|
|
1 |
import ast
|
2 |
-
from langchain.schema import Document
|
3 |
|
4 |
-
def
|
5 |
"""
|
6 |
Entry point method to process the Python file.
|
7 |
It invokes the iterate_ast function.
|
8 |
"""
|
9 |
documents = []
|
10 |
print(f"Processing file: {reference}")
|
|
|
11 |
iterate_ast(source_code, documents, reference)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
for doc in documents:
|
13 |
doc.metadata["reference"] = reference
|
14 |
-
|
15 |
-
|
16 |
return documents
|
17 |
|
|
|
18 |
def iterate_ast(source_code, documents, reference):
|
19 |
"""
|
20 |
Parses the AST of the given Python file and delegates
|
@@ -275,3 +289,32 @@ def handle_first_level_func(function_node, documents, source_code, imports_dict)
|
|
275 |
#chunkPythonFiles(source_code, file_path)
|
276 |
|
277 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import ast
|
2 |
+
from langchain.schema import Document
|
3 |
|
4 |
+
def chunk_python_code_with_metadata(source_code, reference):
|
5 |
"""
|
6 |
Entry point method to process the Python file.
|
7 |
It invokes the iterate_ast function.
|
8 |
"""
|
9 |
documents = []
|
10 |
print(f"Processing file: {reference}")
|
11 |
+
|
12 |
iterate_ast(source_code, documents, reference)
|
13 |
+
|
14 |
+
# Determine usage based on the reference path
|
15 |
+
if reference.startswith("kadi_apy/lib/"):
|
16 |
+
usage = "library"
|
17 |
+
elif reference.startswith("kadi_apy/cli/"):
|
18 |
+
usage = "cli_library"
|
19 |
+
elif reference.startswith("doc/"):
|
20 |
+
usage = "doc"
|
21 |
+
else:
|
22 |
+
usage = "undefined"
|
23 |
+
|
24 |
+
# Add metadata for usage to all documents
|
25 |
for doc in documents:
|
26 |
doc.metadata["reference"] = reference
|
27 |
+
doc.metadata["usage"] = usage # Add the determined usage metadata
|
28 |
+
|
29 |
return documents
|
30 |
|
31 |
+
|
32 |
def iterate_ast(source_code, documents, reference):
|
33 |
"""
|
34 |
Parses the AST of the given Python file and delegates
|
|
|
289 |
#chunkPythonFiles(source_code, file_path)
|
290 |
|
291 |
|
292 |
+
import os
|
293 |
+
|
294 |
+
def process_folder(folder_path):
|
295 |
+
# Initialize a counter for the number of Python files
|
296 |
+
python_file_count = 0
|
297 |
+
docsT = []
|
298 |
+
# Walk through all subdirectories and files in the folder
|
299 |
+
for root, _, files in os.walk(folder_path):
|
300 |
+
for file_name in files:
|
301 |
+
# Create the full file path
|
302 |
+
file_path = os.path.join(root, file_name)
|
303 |
+
#print(file_path)
|
304 |
+
|
305 |
+
# Ensure it's a Python file
|
306 |
+
if file_name.endswith(".py"):
|
307 |
+
python_file_count += 1 # Increment the counter
|
308 |
+
with open(file_path, "r", encoding="utf-8") as file:
|
309 |
+
source_code = file.read()
|
310 |
+
print(file_name)
|
311 |
+
|
312 |
+
# Call your function
|
313 |
+
docs = chunkPythonFiles(source_code, file_path)
|
314 |
+
|
315 |
+
print("HWHWHWWHWHWHWH!:" ,len(docs))
|
316 |
+
docsT.extend(docs)
|
317 |
+
# Print the total number of Python files processed
|
318 |
+
print(f"Total Python files processed: {python_file_count}")
|
319 |
+
print(f"Total docs files processed: {len(docsT)}")
|
320 |
+
|