Spaces:
Running
on
T4
Running
on
T4
Update auditqa/process_chunks.py
Browse files
auditqa/process_chunks.py
CHANGED
@@ -67,13 +67,14 @@ def load_chunks():
|
|
67 |
# placeholder for collection
|
68 |
qdrant_collections = {}
|
69 |
print("embeddings started")
|
70 |
-
batch_size =
|
71 |
for i in range(0, len(chunks_list), batch_size):
|
72 |
-
print("embedding",(i+batch_size)/
|
73 |
batch_docs = chunks_list[i:i+batch_size]
|
74 |
qdrant = Qdrant.from_documents(
|
75 |
batch_docs, embeddings,
|
76 |
path="/data/local_qdrant",
|
|
|
77 |
collection_name='reportsFeb2025',
|
78 |
)
|
79 |
|
|
|
67 |
# placeholder for collection
|
68 |
qdrant_collections = {}
|
69 |
print("embeddings started")
|
70 |
+
batch_size = 1000 # Adjust this value based on your system's memory capacity
|
71 |
for i in range(0, len(chunks_list), batch_size):
|
72 |
+
print("embedding",(i+batch_size)/1000)
|
73 |
batch_docs = chunks_list[i:i+batch_size]
|
74 |
qdrant = Qdrant.from_documents(
|
75 |
batch_docs, embeddings,
|
76 |
path="/data/local_qdrant",
|
77 |
+
recreate_collection=False,
|
78 |
collection_name='reportsFeb2025',
|
79 |
)
|
80 |
|