bupa1018 commited on
Commit
0429b48
·
1 Parent(s): 4b20ff5

Update chunking.py

Browse files
Files changed (1) hide show
  1. chunking.py +6 -6
chunking.py CHANGED
@@ -17,7 +17,7 @@ def chunk_pythoncode_and_add_metadata(code_files_content, code_files_path):
17
 
18
 
19
  """
20
- document_chunks = generate_code_chunks_with_metadata(code_file_content, code_files_path)
21
  chunks.extend(document_chunks)
22
  return chunks
23
 
@@ -41,7 +41,7 @@ def chunk_text_and_add_metadata(texts, references, chunk_size, chunk_overlap):
41
  return chunks
42
 
43
 
44
- def generate_code_chunks_with_metadata(code_file_content, code_files_path):
45
  """
46
  Custom Python Code Splitter
47
  chunks python file by length of func/method body
@@ -55,18 +55,18 @@ def generate_code_chunks_with_metadata(code_file_content, code_files_path):
55
  documents = []
56
  #print(f"Processing file: {file_path}")
57
 
58
- _iterate_ast(code_file_content, documents, code_files_path)
59
  # Determine usage based on the file_path
60
- if file_path.startswith("kadi_apy/lib/"):
61
  usage = "kadi-apy python library"
62
- elif file_path.startswith("kadi_apy/cli/"):
63
  usage = "kadi-apy python cli library"
64
  else:
65
  usage = "undefined"
66
 
67
  # Add metadata-type "usage" to all documents
68
  for doc in documents:
69
- doc.metadata["source"] = file_path
70
  doc.metadata["usage"] = usage # Add the determined usage metadata
71
  #print(doc)
72
  return documents
 
17
 
18
 
19
  """
20
+ document_chunks = generate_code_chunks_with_metadata(code_file_content, code_file_path)
21
  chunks.extend(document_chunks)
22
  return chunks
23
 
 
41
  return chunks
42
 
43
 
44
+ def generate_code_chunks_with_metadata(code_file_content, code_file_path):
45
  """
46
  Custom Python Code Splitter
47
  chunks python file by length of func/method body
 
55
  documents = []
56
  #print(f"Processing file: {file_path}")
57
 
58
+ _iterate_ast(code_file_content, documents, code_file_path)
59
  # Determine usage based on the file_path
60
+ if code_file_path.startswith("kadi_apy/lib/"):
61
  usage = "kadi-apy python library"
62
+ elif code_file_path.startswith("kadi_apy/cli/"):
63
  usage = "kadi-apy python cli library"
64
  else:
65
  usage = "undefined"
66
 
67
  # Add metadata-type "usage" to all documents
68
  for doc in documents:
69
+ doc.metadata["source"] = code_file_path
70
  doc.metadata["usage"] = usage # Add the determined usage metadata
71
  #print(doc)
72
  return documents