ppsingh commited on
Commit
2e60fe0
·
verified ·
1 Parent(s): 5639e64

Update auditqa/process_chunks.py

Browse files
Files changed (1) hide show
  1. auditqa/process_chunks.py +15 -6
auditqa/process_chunks.py CHANGED
@@ -67,12 +67,21 @@ def load_chunks():
67
  # placeholder for collection
68
  qdrant_collections = {}
69
  print("embeddings started")
70
- qdrant_collections['reportsFeb2025'] = Qdrant.from_documents(
71
- chunks_list,
72
- embeddings,
73
- path="/data/local_qdrant",
74
- collection_name='reportsFeb2025',
75
- )
 
 
 
 
 
 
 
 
 
76
  print(qdrant_collections)
77
  print("vector embeddings done")
78
  return qdrant_collections
 
67
  # placeholder for collection
68
  qdrant_collections = {}
69
  print("embeddings started")
70
+ batch_size = 10000 # Adjust this value based on your system's memory capacity
71
+ for i in range(0, len(docs), batch_size):
72
+ batch_docs = chunks_list[i:i+batch_size]
73
+ qdrant = Qdrant.from_documents(
74
+ batch_docs, embeddings,
75
+ path="/data/local_qdrant",
76
+ collection_name='reportsFeb2025',
77
+ )
78
+
79
+ #qdrant_collections['reportsFeb2025'] = Qdrant.from_documents(
80
+ # chunks_list,
81
+ # embeddings,
82
+ # path="/data/local_qdrant",
83
+ # collection_name='reportsFeb2025',
84
+ # )
85
  print(qdrant_collections)
86
  print("vector embeddings done")
87
  return qdrant_collections