Spaces:

AreesaAshfaq
/

BlogRetrievalQA

Sleeping

AreesaAshfaq commited on Aug 28, 2024

Commit

cd87b5d

verified ·

1 Parent(s): 46018ec

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -53,23 +53,40 @@ else:
     # Load, chunk, and index the contents of the blog
     def load_data(url):
-        try:
-            loader = WebBaseLoader(
-                web_paths=(url,),
-                bs_kwargs=dict(
-                    parse_only=bs4.SoupStrainer(
-                        class_=("post-content", "post-title", "post-header")
-                    )
-                ),
-            )
-            docs = loader.load()
-            text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
-            splits = text_splitter.split_documents(docs)
-            vectorstore = Chroma.from_documents(documents=splits, embedding=embedding_model)
-            return vectorstore
-        except Exception as e:
-            st.error(f"An error occurred while loading the blog: {e}")
             return None
     # Load the data if a URL is provided
     if blog_url:

     # Load, chunk, and index the contents of the blog
     def load_data(url):
+    try:
+        loader = WebBaseLoader(
+            web_paths=(url,),
+            bs_kwargs=dict(
+                parse_only=bs4.SoupStrainer(
+                    class_=("post-content", "post-title", "post-header")
+                )
+            ),
+        )
+        docs = loader.load()
+        if not docs:
+            st.error("No documents were loaded. Please check the URL and try again.")
             return None
+        st.write(f"Loaded {len(docs)} documents.")
+        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
+        splits = text_splitter.split_documents(docs)
+        if not splits:
+            st.error("No document splits were created. Please check the document content.")
+            return None
+        st.write(f"Created {len(splits)} document splits.")
+        vectorstore = Chroma.from_documents(documents=splits, embedding=embedding_model)
+        if vectorstore is None:
+            st.error("Failed to create the vectorstore.")
+            return None
+        return vectorstore
+    except Exception as e:
+        st.error(f"An error occurred while loading the blog: {e}")
+        return None
     # Load the data if a URL is provided
     if blog_url: