Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
import os
|
2 |
import streamlit as st
|
3 |
import pickle
|
@@ -8,7 +9,6 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
8 |
from langchain.document_loaders import UnstructuredURLLoader
|
9 |
from langchain_groq import ChatGroq
|
10 |
from langchain.vectorstores import FAISS
|
11 |
-
import numpy as np
|
12 |
|
13 |
from dotenv import load_dotenv
|
14 |
load_dotenv() # take environment variables from .env (especially openai api key)
|
@@ -27,11 +27,36 @@ file_path = "faiss_store_openai.pkl"
|
|
27 |
main_placeholder = st.empty()
|
28 |
llm = ChatGroq(model_name="llama-3.3-70b-versatile", temperature=0.9, max_tokens=500)
|
29 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
if process_url_clicked:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
# Load data from URLs
|
32 |
-
loader = UnstructuredURLLoader(urls=
|
33 |
main_placeholder.text("Data Loading...Started...β
β
β
")
|
34 |
-
|
|
|
|
|
|
|
35 |
|
36 |
# Split data into chunks
|
37 |
text_splitter = RecursiveCharacterTextSplitter(
|
@@ -66,7 +91,7 @@ if process_url_clicked:
|
|
66 |
# Convert embeddings to numpy array (needed by FAISS)
|
67 |
embeddings_np = np.array(embeddings).astype(np.float32)
|
68 |
|
69 |
-
# Check the shape of
|
70 |
main_placeholder.text(f"Shape of embeddings: {embeddings_np.shape}")
|
71 |
|
72 |
# Create FAISS index
|
|
|
1 |
+
import requests
|
2 |
import os
|
3 |
import streamlit as st
|
4 |
import pickle
|
|
|
9 |
from langchain.document_loaders import UnstructuredURLLoader
|
10 |
from langchain_groq import ChatGroq
|
11 |
from langchain.vectorstores import FAISS
|
|
|
12 |
|
13 |
from dotenv import load_dotenv
|
14 |
load_dotenv() # take environment variables from .env (especially openai api key)
|
|
|
27 |
main_placeholder = st.empty()
|
28 |
llm = ChatGroq(model_name="llama-3.3-70b-versatile", temperature=0.9, max_tokens=500)
|
29 |
|
30 |
+
# Debugging: Check if URLs are accessible
|
31 |
+
def check_url(url):
|
32 |
+
try:
|
33 |
+
response = requests.get(url)
|
34 |
+
if response.status_code == 200:
|
35 |
+
return True
|
36 |
+
else:
|
37 |
+
return False
|
38 |
+
except Exception as e:
|
39 |
+
return False
|
40 |
+
|
41 |
if process_url_clicked:
|
42 |
+
# Debugging: Verify URL accessibility
|
43 |
+
valid_urls = []
|
44 |
+
for url in urls:
|
45 |
+
if check_url(url):
|
46 |
+
valid_urls.append(url)
|
47 |
+
else:
|
48 |
+
main_placeholder.text(f"URL is not accessible: {url}")
|
49 |
+
|
50 |
+
if not valid_urls:
|
51 |
+
main_placeholder.text("None of the URLs are accessible.")
|
52 |
+
|
53 |
# Load data from URLs
|
54 |
+
loader = UnstructuredURLLoader(urls=valid_urls)
|
55 |
main_placeholder.text("Data Loading...Started...β
β
β
")
|
56 |
+
try:
|
57 |
+
data = loader.load()
|
58 |
+
except Exception as e:
|
59 |
+
main_placeholder.text(f"Error loading data: {e}")
|
60 |
|
61 |
# Split data into chunks
|
62 |
text_splitter = RecursiveCharacterTextSplitter(
|
|
|
91 |
# Convert embeddings to numpy array (needed by FAISS)
|
92 |
embeddings_np = np.array(embeddings).astype(np.float32)
|
93 |
|
94 |
+
# Check the shape of embeddings
|
95 |
main_placeholder.text(f"Shape of embeddings: {embeddings_np.shape}")
|
96 |
|
97 |
# Create FAISS index
|