bupa1018 commited on
Commit
a446f18
·
1 Parent(s): 8e4422b

Update chunking.py

Browse files
Files changed (1) hide show
  1. chunking.py +4 -3
chunking.py CHANGED
@@ -33,7 +33,7 @@ def chunk_text_and_add_metadata(texts, references, chunk_size, chunk_overlap):
33
  page_content=chunk,
34
  metadata={
35
  "source": reference,
36
- "usage": "doc"
37
  }
38
  )
39
  for chunk in text_splitter.split_text(text)
@@ -57,16 +57,17 @@ def generate_code_chunks_with_metadata(code_file_content, code_file_path):
57
 
58
  _iterate_ast(code_file_content, documents, code_file_path)
59
  # Determine usage based on the file_path
 
 
60
  if code_file_path.startswith("kadi_apy/lib/"):
61
  usage = "kadi_apy/lib/"
62
  elif code_file_path.startswith("kadi_apy/cli/"):
63
  usage = "kadi_apy/cli/"
64
- else:
65
- usage = "kadiAPY"
66
 
67
  # Add metadata-type "usage" to all documents
68
  for doc in documents:
69
  doc.metadata["source"] = code_file_path
 
70
  doc.metadata["usage"] = usage # Add the determined usage metadata
71
  #print(doc)
72
  return documents
 
33
  page_content=chunk,
34
  metadata={
35
  "source": reference,
36
+ "folder": "doc/"
37
  }
38
  )
39
  for chunk in text_splitter.split_text(text)
 
57
 
58
  _iterate_ast(code_file_content, documents, code_file_path)
59
  # Determine usage based on the file_path
60
+ if code_file_path.startswith("kadi_apy"):
61
+ folder = "kadi_apy/"
62
  if code_file_path.startswith("kadi_apy/lib/"):
63
  usage = "kadi_apy/lib/"
64
  elif code_file_path.startswith("kadi_apy/cli/"):
65
  usage = "kadi_apy/cli/"
 
 
66
 
67
  # Add metadata-type "usage" to all documents
68
  for doc in documents:
69
  doc.metadata["source"] = code_file_path
70
+ doc.metadata["folder"] = folder
71
  doc.metadata["usage"] = usage # Add the determined usage metadata
72
  #print(doc)
73
  return documents