datascientist22 commited on
Commit
36fb430
·
verified ·
1 Parent(s): 7d6baca

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -2
app.py CHANGED
@@ -5,7 +5,7 @@ import bs4
5
  from sentence_transformers import SentenceTransformer
6
  import torch
7
  from langchain.chains import ChatGroq
8
- from langchain.document_loaders import UnstructuredURLLoader # Updated import
9
  from langchain.prompts import RunnablePassthrough
10
  from langchain.text_splitter import RecursiveCharacterTextSplitter
11
  from langchain.vectorstores import Chroma
@@ -96,7 +96,12 @@ if st.button("Submit Query"):
96
  st.markdown('<p style="color:red; font-weight:bold;">Please enter a valid URL before submitting</p>', unsafe_allow_html=True)
97
  else:
98
  # Blog loading logic based on user input URL
99
- loader = UnstructuredURLLoader(urls=[url_input]) # Using UnstructuredURLLoader instead of WebBaseLoader
 
 
 
 
 
100
  docs = loader.load()
101
 
102
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
 
5
  from sentence_transformers import SentenceTransformer
6
  import torch
7
  from langchain.chains import ChatGroq
8
+ from langchain.document_loaders import WebBaseLoader
9
  from langchain.prompts import RunnablePassthrough
10
  from langchain.text_splitter import RecursiveCharacterTextSplitter
11
  from langchain.vectorstores import Chroma
 
96
  st.markdown('<p style="color:red; font-weight:bold;">Please enter a valid URL before submitting</p>', unsafe_allow_html=True)
97
  else:
98
  # Blog loading logic based on user input URL
99
+ loader = WebBaseLoader(
100
+ web_paths=(url_input,), # Use the user-input URL
101
+ bs_kwargs=dict(
102
+ parse_only=bs4.SoupStrainer() # Adjust based on the user's URL structure
103
+ ),
104
+ )
105
  docs = loader.load()
106
 
107
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)