Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -51,33 +51,31 @@ else:
|
|
51 |
pdf_path = None
|
52 |
|
53 |
# Step 2: Process PDF
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
docs = loader.load()
|
58 |
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
st.success("β
**Vector Store Created!**")
|
81 |
|
82 |
# Step 5: Query Input
|
83 |
query = st.text_input("π Enter a Query:")
|
|
|
51 |
pdf_path = None
|
52 |
|
53 |
# Step 2: Process PDF
|
54 |
+
with st.spinner("Loading PDF..."):
|
55 |
+
loader = PDFPlumberLoader(pdf_path)
|
56 |
+
docs = loader.load()
|
|
|
57 |
|
58 |
+
st.success(f"β
**PDF Loaded!** Total Pages: {len(docs)}")
|
59 |
+
|
60 |
+
# Step 3: Chunking
|
61 |
+
with st.spinner("Chunking the document..."):
|
62 |
+
model_name = "nomic-ai/modernbert-embed-base"
|
63 |
+
embedding_model = HuggingFaceEmbeddings(model_name=model_name, model_kwargs={'device': 'cpu'})
|
64 |
+
text_splitter = SemanticChunker(embedding_model)
|
65 |
+
documents = text_splitter.split_documents(docs)
|
66 |
+
|
67 |
+
st.success(f"β
**Document Chunked!** Total Chunks: {len(documents)}")
|
68 |
+
|
69 |
+
# Step 4: Setup Vectorstore
|
70 |
+
with st.spinner("Creating vector store..."):
|
71 |
+
vector_store = Chroma(
|
72 |
+
collection_name="deepseek_collection",
|
73 |
+
collection_metadata={"hnsw:space": "cosine"},
|
74 |
+
embedding_function=embedding_model
|
75 |
+
)
|
76 |
+
vector_store.add_documents(documents)
|
77 |
+
|
78 |
+
st.success("β
**Vector Store Created!**")
|
|
|
79 |
|
80 |
# Step 5: Query Input
|
81 |
query = st.text_input("π Enter a Query:")
|