ppsingh commited on
Commit
163d6ef
·
verified ·
1 Parent(s): 2f7f80c

Update auditqa/process_chunks.py

Browse files
Files changed (1) hide show
  1. auditqa/process_chunks.py +16 -17
auditqa/process_chunks.py CHANGED
@@ -61,30 +61,29 @@ def load_chunks():
61
  # define embedding model
62
  embeddings = HuggingFaceEmbeddings(
63
  model_kwargs = {'device': device},
64
- multi_process = True,
65
  encode_kwargs = {'normalize_embeddings': bool(int(config.get('retriever','NORMALIZE')))},
66
  model_name=config.get('retriever','MODEL')
67
  )
68
  # placeholder for collection
69
  qdrant_collections = {}
70
  print("embeddings started")
71
- #batch_size = 1000 # Adjust this value based on your system's memory capacity
72
- #for i in range(0, len(chunks_list), batch_size):
73
- # print("embedding",(i+batch_size)/1000)
74
- # batch_docs = chunks_list[i:i+batch_size]
75
- # qdrant = Qdrant.from_documents(
76
- # batch_docs, embeddings,
77
- # path="/data/local_qdrant",
78
- # recreate_collection=False,
79
- # collection_name='reportsFeb2025',
80
- # )
81
 
82
- qdrant_collections['reportsFeb2025'] = Qdrant.from_documents(
83
- chunks_list,
84
- embeddings,
85
- path="/data/local_qdrant",
86
- collection_name='reportsFeb2025',
87
- )
88
  print(qdrant_collections)
89
  print("vector embeddings done")
90
  return qdrant_collections
 
61
  # define embedding model
62
  embeddings = HuggingFaceEmbeddings(
63
  model_kwargs = {'device': device},
 
64
  encode_kwargs = {'normalize_embeddings': bool(int(config.get('retriever','NORMALIZE')))},
65
  model_name=config.get('retriever','MODEL')
66
  )
67
  # placeholder for collection
68
  qdrant_collections = {}
69
  print("embeddings started")
70
+ batch_size = 1000 # Adjust this value based on your system's memory capacity
71
+ for i in range(0, len(chunks_list), batch_size):
72
+ print("embedding",(i+batch_size)/1000)
73
+ batch_docs = chunks_list[i:i+batch_size]
74
+ qdrant = Qdrant.from_documents(
75
+ batch_docs, embeddings,
76
+ path="/data/local_qdrant",
77
+ recreate_collection=False,
78
+ collection_name='reportsFeb2025',
79
+ )
80
 
81
+ #qdrant_collections['reportsFeb2025'] = Qdrant.from_documents(
82
+ # chunks_list,
83
+ # embeddings,
84
+ # path="/data/local_qdrant",
85
+ # collection_name='reportsFeb2025',
86
+ # )
87
  print(qdrant_collections)
88
  print("vector embeddings done")
89
  return qdrant_collections