DrishtiSharma commited on
Commit
6374316
Β·
verified Β·
1 Parent(s): 87b256d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -26
app.py CHANGED
@@ -51,33 +51,31 @@ else:
51
  pdf_path = None
52
 
53
  # Step 2: Process PDF
54
- if pdf_path:
55
- with st.spinner("Loading PDF..."):
56
- loader = PDFPlumberLoader(pdf_path)
57
- docs = loader.load()
58
 
59
- st.success(f"βœ… **PDF Loaded!** Total Pages: {len(docs)}")
60
-
61
- # Step 3: Chunking
62
- with st.spinner("Chunking the document..."):
63
- model_name = "nomic-ai/modernbert-embed-base"
64
- embedding_model = HuggingFaceEmbeddings(model_name=model_name, model_kwargs={'device': 'cpu'})
65
-
66
- text_splitter = SemanticChunker(embedding_model)
67
- documents = text_splitter.split_documents(docs)
68
-
69
- st.success(f"βœ… **Document Chunked!** Total Chunks: {len(documents)}")
70
-
71
- # Step 4: Setup Vectorstore
72
- with st.spinner("Creating vector store..."):
73
- vector_store = Chroma(
74
- collection_name="deepseek_collection",
75
- collection_metadata={"hnsw:space": "cosine"},
76
- embedding_function=embedding_model
77
- )
78
- vector_store.add_documents(documents)
79
-
80
- st.success("βœ… **Vector Store Created!**")
81
 
82
  # Step 5: Query Input
83
  query = st.text_input("πŸ” Enter a Query:")
 
51
  pdf_path = None
52
 
53
  # Step 2: Process PDF
54
+ with st.spinner("Loading PDF..."):
55
+ loader = PDFPlumberLoader(pdf_path)
56
+ docs = loader.load()
 
57
 
58
+ st.success(f"βœ… **PDF Loaded!** Total Pages: {len(docs)}")
59
+
60
+ # Step 3: Chunking
61
+ with st.spinner("Chunking the document..."):
62
+ model_name = "nomic-ai/modernbert-embed-base"
63
+ embedding_model = HuggingFaceEmbeddings(model_name=model_name, model_kwargs={'device': 'cpu'})
64
+ text_splitter = SemanticChunker(embedding_model)
65
+ documents = text_splitter.split_documents(docs)
66
+
67
+ st.success(f"βœ… **Document Chunked!** Total Chunks: {len(documents)}")
68
+
69
+ # Step 4: Setup Vectorstore
70
+ with st.spinner("Creating vector store..."):
71
+ vector_store = Chroma(
72
+ collection_name="deepseek_collection",
73
+ collection_metadata={"hnsw:space": "cosine"},
74
+ embedding_function=embedding_model
75
+ )
76
+ vector_store.add_documents(documents)
77
+
78
+ st.success("βœ… **Vector Store Created!**")
 
79
 
80
  # Step 5: Query Input
81
  query = st.text_input("πŸ” Enter a Query:")