MaryamKarimi080 commited on
Commit
3c7db82
·
verified ·
1 Parent(s): 854668c

Update scripts/setup_vectorstore.py

Browse files
Files changed (1) hide show
  1. scripts/setup_vectorstore.py +19 -15
scripts/setup_vectorstore.py CHANGED
@@ -3,19 +3,23 @@ from pathlib import Path
3
  from langchain_community.vectorstores import Chroma
4
  from langchain_openai import OpenAIEmbeddings
5
 
6
- BASE_DIR = Path(__file__).resolve().parent.parent
7
- CHUNKS_PATH = BASE_DIR / "output" / "chunks.pkl"
8
- DB_DIR = BASE_DIR / "db"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
- with open(CHUNKS_PATH, "rb") as f:
11
- chunks = pickle.load(f)
12
-
13
- embedding = OpenAIEmbeddings(model="text-embedding-3-small")
14
- vectorstore = Chroma(persist_directory=str(DB_DIR), embedding_function=embedding)
15
-
16
- BATCH_SIZE = 100
17
- print(f"🧠 Embedding and adding {len(chunks)} chunks in batches...")
18
- for i in range(0, len(chunks), BATCH_SIZE):
19
- batch = chunks[i:i + BATCH_SIZE]
20
- vectorstore.add_documents(batch)
21
- print(f"✅ Added batch {i // BATCH_SIZE + 1}")
 
3
  from langchain_community.vectorstores import Chroma
4
  from langchain_openai import OpenAIEmbeddings
5
 
6
+ def main():
7
+ BASE_DIR = Path(__file__).resolve().parent.parent
8
+ CHUNKS_PATH = BASE_DIR / "output" / "chunks.pkl"
9
+ DB_DIR = BASE_DIR / "db"
10
+
11
+ with open(CHUNKS_PATH, "rb") as f:
12
+ chunks = pickle.load(f)
13
+
14
+ embedding = OpenAIEmbeddings(model="text-embedding-3-small")
15
+ vectorstore = Chroma(persist_directory=str(DB_DIR), embedding_function=embedding)
16
+
17
+ BATCH_SIZE = 100
18
+ print(f"🧠 Embedding and adding {len(chunks)} chunks in batches...")
19
+ for i in range(0, len(chunks), BATCH_SIZE):
20
+ batch = chunks[i:i + BATCH_SIZE]
21
+ vectorstore.add_documents(batch)
22
+ print(f"✅ Added batch {i // BATCH_SIZE + 1}")
23
 
24
+ if __name__ == "__main__":
25
+ main()