MaryamKarimi080 commited on
Commit
1458fa6
·
verified ·
1 Parent(s): 4e3dac9

Update scripts/chunk_and_embed.py

Browse files
Files changed (1) hide show
  1. scripts/chunk_and_embed.py +18 -19
scripts/chunk_and_embed.py CHANGED
@@ -1,20 +1,19 @@
1
- from langchain.text_splitter import RecursiveCharacterTextSplitter
2
- import pickle
3
- import os
4
-
5
- DOCS_PATH = "E:/courses/LangChain Project/main root/output/all_docs.pkl"
6
- CHUNKS_PATH = "E:/courses/LangChain Project/main root/output/chunks.pkl"
7
-
8
- if not os.path.exists(DOCS_PATH):
9
- raise FileNotFoundError("Run load_documents.py first")
10
-
11
- with open(DOCS_PATH, "rb") as f:
12
- all_docs = pickle.load(f)
13
-
14
- splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
15
- chunks = splitter.split_documents(all_docs)
16
- print(f"✅ Split into {len(chunks)} chunks")
17
-
18
- with open(CHUNKS_PATH, "wb") as f:
19
- pickle.dump(chunks, f)
20
  print(f"📦 Chunks saved to {CHUNKS_PATH}")
 
1
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
2
+ import pickle
3
+ import os
4
+
5
+
6
+ BASE_DIR = Path(__file__).resolve().parent.parent
7
+ DOCS_PATH = BASE_DIR / "output" / "all_docs.pkl"
8
+ CHUNKS_PATH = BASE_DIR / "output" / "chunks.pkl"
9
+
10
+ with open(DOCS_PATH, "rb") as f:
11
+ all_docs = pickle.load(f)
12
+
13
+ splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
14
+ chunks = splitter.split_documents(all_docs)
15
+ print(f"✅ Split into {len(chunks)} chunks")
16
+
17
+ with open(CHUNKS_PATH, "wb") as f:
18
+ pickle.dump(chunks, f)
 
19
  print(f"📦 Chunks saved to {CHUNKS_PATH}")