AreesaAshfaq commited on
Commit
cd87b5d
·
verified ·
1 Parent(s): 46018ec

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -16
app.py CHANGED
@@ -53,23 +53,40 @@ else:
53
 
54
  # Load, chunk, and index the contents of the blog
55
  def load_data(url):
56
- try:
57
- loader = WebBaseLoader(
58
- web_paths=(url,),
59
- bs_kwargs=dict(
60
- parse_only=bs4.SoupStrainer(
61
- class_=("post-content", "post-title", "post-header")
62
- )
63
- ),
64
- )
65
- docs = loader.load()
66
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
67
- splits = text_splitter.split_documents(docs)
68
- vectorstore = Chroma.from_documents(documents=splits, embedding=embedding_model)
69
- return vectorstore
70
- except Exception as e:
71
- st.error(f"An error occurred while loading the blog: {e}")
72
  return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
 
74
  # Load the data if a URL is provided
75
  if blog_url:
 
53
 
54
  # Load, chunk, and index the contents of the blog
55
  def load_data(url):
56
+ try:
57
+ loader = WebBaseLoader(
58
+ web_paths=(url,),
59
+ bs_kwargs=dict(
60
+ parse_only=bs4.SoupStrainer(
61
+ class_=("post-content", "post-title", "post-header")
62
+ )
63
+ ),
64
+ )
65
+ docs = loader.load()
66
+ if not docs:
67
+ st.error("No documents were loaded. Please check the URL and try again.")
 
 
 
 
68
  return None
69
+
70
+ st.write(f"Loaded {len(docs)} documents.")
71
+
72
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
73
+ splits = text_splitter.split_documents(docs)
74
+ if not splits:
75
+ st.error("No document splits were created. Please check the document content.")
76
+ return None
77
+
78
+ st.write(f"Created {len(splits)} document splits.")
79
+
80
+ vectorstore = Chroma.from_documents(documents=splits, embedding=embedding_model)
81
+ if vectorstore is None:
82
+ st.error("Failed to create the vectorstore.")
83
+ return None
84
+
85
+ return vectorstore
86
+ except Exception as e:
87
+ st.error(f"An error occurred while loading the blog: {e}")
88
+ return None
89
+
90
 
91
  # Load the data if a URL is provided
92
  if blog_url: