Spaces:
Running
on
T4
Running
on
T4
Update auditqa/process_chunks.py
Browse files- auditqa/process_chunks.py +16 -17
auditqa/process_chunks.py
CHANGED
@@ -61,30 +61,29 @@ def load_chunks():
|
|
61 |
# define embedding model
|
62 |
embeddings = HuggingFaceEmbeddings(
|
63 |
model_kwargs = {'device': device},
|
64 |
-
multi_process = True,
|
65 |
encode_kwargs = {'normalize_embeddings': bool(int(config.get('retriever','NORMALIZE')))},
|
66 |
model_name=config.get('retriever','MODEL')
|
67 |
)
|
68 |
# placeholder for collection
|
69 |
qdrant_collections = {}
|
70 |
print("embeddings started")
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
|
82 |
-
qdrant_collections['reportsFeb2025'] = Qdrant.from_documents(
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
print(qdrant_collections)
|
89 |
print("vector embeddings done")
|
90 |
return qdrant_collections
|
|
|
61 |
# define embedding model
|
62 |
embeddings = HuggingFaceEmbeddings(
|
63 |
model_kwargs = {'device': device},
|
|
|
64 |
encode_kwargs = {'normalize_embeddings': bool(int(config.get('retriever','NORMALIZE')))},
|
65 |
model_name=config.get('retriever','MODEL')
|
66 |
)
|
67 |
# placeholder for collection
|
68 |
qdrant_collections = {}
|
69 |
print("embeddings started")
|
70 |
+
batch_size = 1000 # Adjust this value based on your system's memory capacity
|
71 |
+
for i in range(0, len(chunks_list), batch_size):
|
72 |
+
print("embedding",(i+batch_size)/1000)
|
73 |
+
batch_docs = chunks_list[i:i+batch_size]
|
74 |
+
qdrant = Qdrant.from_documents(
|
75 |
+
batch_docs, embeddings,
|
76 |
+
path="/data/local_qdrant",
|
77 |
+
recreate_collection=False,
|
78 |
+
collection_name='reportsFeb2025',
|
79 |
+
)
|
80 |
|
81 |
+
#qdrant_collections['reportsFeb2025'] = Qdrant.from_documents(
|
82 |
+
# chunks_list,
|
83 |
+
# embeddings,
|
84 |
+
# path="/data/local_qdrant",
|
85 |
+
# collection_name='reportsFeb2025',
|
86 |
+
# )
|
87 |
print(qdrant_collections)
|
88 |
print("vector embeddings done")
|
89 |
return qdrant_collections
|