eliot-hub commited on
Commit
69a7d1e
·
1 Parent(s): 4066cbb
Files changed (1) hide show
  1. tools.py +6 -6
tools.py CHANGED
@@ -41,14 +41,14 @@ memoires_ds = load_dataset("eliot-hub/memoires_vec_800", split="data", token=HF_
41
  batched_ds = memoires_ds.batch(batch_size=41000)
42
  client = chromadb.Client()
43
  collection = client.get_or_create_collection(name="embeddings_mxbai")
44
- for batch in tqdm(batched_ds, desc=f"[{datetime.now().strftime('%H:%M:%S')}] Processing dataset batches"):
45
  collection.add(
46
- ids=batch["ids"],
47
- # metadatas=batch["metadatas"],
48
- documents=batch["documents"],
49
- embeddings=batch["embeddings"],
50
  )
51
- print("DB done")
52
  del memoires_ds, batched_ds
53
 
54
  llm_4o = ChatOpenAI(model="gpt-4o-mini", api_key=OPENAI_API_KEY, temperature=0)
 
41
  batched_ds = memoires_ds.batch(batch_size=41000)
42
  client = chromadb.Client()
43
  collection = client.get_or_create_collection(name="embeddings_mxbai")
44
+ for batch in tqdm(batched_ds, desc="Processing dataset batches"):
45
  collection.add(
46
+ ids=batch["id"],
47
+ metadatas=batch["metadata"],
48
+ documents=batch["document"],
49
+ embeddings=batch["embedding"],
50
  )
51
+ print(f"Collection complete: {collection.count()}")
52
  del memoires_ds, batched_ds
53
 
54
  llm_4o = ChatOpenAI(model="gpt-4o-mini", api_key=OPENAI_API_KEY, temperature=0)