Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -11,6 +11,8 @@ from langchain_groq import ChatGroq
|
|
11 |
from langchain.embeddings import OpenAIEmbeddings
|
12 |
from langchain.vectorstores import FAISS
|
13 |
from langchain.vectorstores import Chroma
|
|
|
|
|
14 |
|
15 |
|
16 |
from dotenv import load_dotenv
|
@@ -32,12 +34,29 @@ llm = ChatGroq(model_name="llama-3.3-70b-versatile", temperature=0.9, max_tokens
|
|
32 |
|
33 |
if process_url_clicked:
|
34 |
# load data
|
35 |
-
loader = UnstructuredURLLoader(urls=urls)
|
36 |
-
main_placeholder.text("Data Loading...Started...β
β
β
")
|
37 |
-
data = loader.load()
|
38 |
-
|
39 |
-
|
40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
# split data
|
42 |
text_splitter = RecursiveCharacterTextSplitter(
|
43 |
separators=['\n\n', '\n', '.', ','],
|
|
|
11 |
from langchain.embeddings import OpenAIEmbeddings
|
12 |
from langchain.vectorstores import FAISS
|
13 |
from langchain.vectorstores import Chroma
|
14 |
+
import requests
|
15 |
+
from bs4 import BeautifulSoup
|
16 |
|
17 |
|
18 |
from dotenv import load_dotenv
|
|
|
34 |
|
35 |
if process_url_clicked:
|
36 |
# load data
|
37 |
+
#loader = UnstructuredURLLoader(urls=urls)
|
38 |
+
#main_placeholder.text("Data Loading...Started...β
β
β
")
|
39 |
+
#data = loader.load()
|
40 |
+
def fetch_web_content(url):
|
41 |
+
try:
|
42 |
+
response = requests.get(url, timeout=10)
|
43 |
+
response.raise_for_status()
|
44 |
+
soup = BeautifulSoup(response.text, "html.parser")
|
45 |
+
return soup.get_text()
|
46 |
+
except Exception as e:
|
47 |
+
return f"Error fetching {url}: {str(e)}"
|
48 |
+
|
49 |
+
# Your list of URLs
|
50 |
+
url = url
|
51 |
+
|
52 |
+
# Display status message
|
53 |
+
main_placeholder.text("Data Loading...Started...β
β
β
")
|
54 |
+
|
55 |
+
# Fetch content
|
56 |
+
data = [fetch_web_content(url) for url in urls if url.strip()]
|
57 |
+
|
58 |
+
# Display completion message
|
59 |
+
main_placeholder.text("Data Loading...Completed...β
β
β
")
|
60 |
# split data
|
61 |
text_splitter = RecursiveCharacterTextSplitter(
|
62 |
separators=['\n\n', '\n', '.', ','],
|