bupa1018 commited on
Commit
d37f774
·
1 Parent(s): a446f18

Update chunking.py

Browse files
Files changed (1) hide show
  1. chunking.py +13 -8
chunking.py CHANGED
@@ -33,7 +33,7 @@ def chunk_text_and_add_metadata(texts, references, chunk_size, chunk_overlap):
33
  page_content=chunk,
34
  metadata={
35
  "source": reference,
36
- "folder": "doc/"
37
  }
38
  )
39
  for chunk in text_splitter.split_text(text)
@@ -58,16 +58,21 @@ def generate_code_chunks_with_metadata(code_file_content, code_file_path):
58
  _iterate_ast(code_file_content, documents, code_file_path)
59
  # Determine usage based on the file_path
60
  if code_file_path.startswith("kadi_apy"):
61
- folder = "kadi_apy/"
62
- if code_file_path.startswith("kadi_apy/lib/"):
63
- usage = "kadi_apy/lib/"
64
- elif code_file_path.startswith("kadi_apy/cli/"):
65
- usage = "kadi_apy/cli/"
66
-
 
 
 
 
 
67
  # Add metadata-type "usage" to all documents
68
  for doc in documents:
69
  doc.metadata["source"] = code_file_path
70
- doc.metadata["folder"] = folder
71
  doc.metadata["usage"] = usage # Add the determined usage metadata
72
  #print(doc)
73
  return documents
 
33
  page_content=chunk,
34
  metadata={
35
  "source": reference,
36
+ "directory": "doc/"
37
  }
38
  )
39
  for chunk in text_splitter.split_text(text)
 
58
  _iterate_ast(code_file_content, documents, code_file_path)
59
  # Determine usage based on the file_path
60
  if code_file_path.startswith("kadi_apy"):
61
+ directory = "kadi_apy/"
62
+ if code_file_path.startswith("kadi_apy/lib/"):
63
+ usage = "kadi_apy/lib/"
64
+ elif code_file_path.startswith("kadi_apy/cli/"):
65
+ usage = "kadi_apy/cli/"
66
+ else:
67
+ usage = "kadi_apy/top_level_file.py"
68
+ else:
69
+ directory = "undefined"
70
+ usage = "undefined"
71
+
72
  # Add metadata-type "usage" to all documents
73
  for doc in documents:
74
  doc.metadata["source"] = code_file_path
75
+ doc.metadata["directory"] = directory
76
  doc.metadata["usage"] = usage # Add the determined usage metadata
77
  #print(doc)
78
  return documents