Deaksh commited on
Commit
d809e9e
Β·
verified Β·
1 Parent(s): 536c37e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -6
app.py CHANGED
@@ -11,6 +11,8 @@ from langchain_groq import ChatGroq
11
  from langchain.embeddings import OpenAIEmbeddings
12
  from langchain.vectorstores import FAISS
13
  from langchain.vectorstores import Chroma
 
 
14
 
15
 
16
  from dotenv import load_dotenv
@@ -32,12 +34,29 @@ llm = ChatGroq(model_name="llama-3.3-70b-versatile", temperature=0.9, max_tokens
32
 
33
  if process_url_clicked:
34
  # load data
35
- loader = UnstructuredURLLoader(urls=urls)
36
- main_placeholder.text("Data Loading...Started...βœ…βœ…βœ…")
37
- data = loader.load()
38
- if not data:
39
- st.error("Failed to load content from the provided URLs. Please check their availability.")
40
- st.stop()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  # split data
42
  text_splitter = RecursiveCharacterTextSplitter(
43
  separators=['\n\n', '\n', '.', ','],
 
11
  from langchain.embeddings import OpenAIEmbeddings
12
  from langchain.vectorstores import FAISS
13
  from langchain.vectorstores import Chroma
14
+ import requests
15
+ from bs4 import BeautifulSoup
16
 
17
 
18
  from dotenv import load_dotenv
 
34
 
35
  if process_url_clicked:
36
  # load data
37
+ #loader = UnstructuredURLLoader(urls=urls)
38
+ #main_placeholder.text("Data Loading...Started...βœ…βœ…βœ…")
39
+ #data = loader.load()
40
+ def fetch_web_content(url):
41
+ try:
42
+ response = requests.get(url, timeout=10)
43
+ response.raise_for_status()
44
+ soup = BeautifulSoup(response.text, "html.parser")
45
+ return soup.get_text()
46
+ except Exception as e:
47
+ return f"Error fetching {url}: {str(e)}"
48
+
49
+ # Your list of URLs
50
+ url = url
51
+
52
+ # Display status message
53
+ main_placeholder.text("Data Loading...Started...βœ…βœ…βœ…")
54
+
55
+ # Fetch content
56
+ data = [fetch_web_content(url) for url in urls if url.strip()]
57
+
58
+ # Display completion message
59
+ main_placeholder.text("Data Loading...Completed...βœ…βœ…βœ…")
60
  # split data
61
  text_splitter = RecursiveCharacterTextSplitter(
62
  separators=['\n\n', '\n', '.', ','],