eliot-hub commited on
Commit
467c88c
·
1 Parent(s): 619a467
Files changed (1) hide show
  1. tools.py +7 -7
tools.py CHANGED
@@ -41,13 +41,13 @@ memoires_ds = load_dataset("eliot-hub/memoires_vec_800", split="data", token=HF_
41
  batched_ds = memoires_ds.batch(batch_size=41000)
42
  client = chromadb.Client()
43
  collection = client.get_or_create_collection(name="embeddings_mxbai")
44
- for batch in tqdm(batched_ds, desc="Processing dataset batches"):
45
- collection.add(
46
- ids=batch["id"],
47
- metadatas=batch["metadata"],
48
- documents=batch["document"],
49
- embeddings=batch["embedding"],
50
- )
51
  print(f"Collection complete: {collection.count()}")
52
  del memoires_ds, batched_ds
53
 
 
41
  batched_ds = memoires_ds.batch(batch_size=41000)
42
  client = chromadb.Client()
43
  collection = client.get_or_create_collection(name="embeddings_mxbai")
44
+ # for batch in tqdm(batched_ds, desc="Processing dataset batches"):
45
+ # collection.add(
46
+ # ids=batch["id"],
47
+ # metadatas=batch["metadata"],
48
+ # documents=batch["document"],
49
+ # embeddings=batch["embedding"],
50
+ # )
51
  print(f"Collection complete: {collection.count()}")
52
  del memoires_ds, batched_ds
53