Deaksh commited on
Commit
b7c716e
Β·
verified Β·
1 Parent(s): cd921da

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -4
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import os
2
  import streamlit as st
3
  import pickle
@@ -8,7 +9,6 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
8
  from langchain.document_loaders import UnstructuredURLLoader
9
  from langchain_groq import ChatGroq
10
  from langchain.vectorstores import FAISS
11
- import numpy as np
12
 
13
  from dotenv import load_dotenv
14
  load_dotenv() # take environment variables from .env (especially openai api key)
@@ -27,11 +27,36 @@ file_path = "faiss_store_openai.pkl"
27
  main_placeholder = st.empty()
28
  llm = ChatGroq(model_name="llama-3.3-70b-versatile", temperature=0.9, max_tokens=500)
29
 
 
 
 
 
 
 
 
 
 
 
 
30
  if process_url_clicked:
 
 
 
 
 
 
 
 
 
 
 
31
  # Load data from URLs
32
- loader = UnstructuredURLLoader(urls=urls)
33
  main_placeholder.text("Data Loading...Started...βœ…βœ…βœ…")
34
- data = loader.load()
 
 
 
35
 
36
  # Split data into chunks
37
  text_splitter = RecursiveCharacterTextSplitter(
@@ -66,7 +91,7 @@ if process_url_clicked:
66
  # Convert embeddings to numpy array (needed by FAISS)
67
  embeddings_np = np.array(embeddings).astype(np.float32)
68
 
69
- # Check the shape of the embeddings
70
  main_placeholder.text(f"Shape of embeddings: {embeddings_np.shape}")
71
 
72
  # Create FAISS index
 
1
+ import requests
2
  import os
3
  import streamlit as st
4
  import pickle
 
9
  from langchain.document_loaders import UnstructuredURLLoader
10
  from langchain_groq import ChatGroq
11
  from langchain.vectorstores import FAISS
 
12
 
13
  from dotenv import load_dotenv
14
  load_dotenv() # take environment variables from .env (especially openai api key)
 
27
  main_placeholder = st.empty()
28
  llm = ChatGroq(model_name="llama-3.3-70b-versatile", temperature=0.9, max_tokens=500)
29
 
30
+ # Debugging: Check if URLs are accessible
31
+ def check_url(url):
32
+ try:
33
+ response = requests.get(url)
34
+ if response.status_code == 200:
35
+ return True
36
+ else:
37
+ return False
38
+ except Exception as e:
39
+ return False
40
+
41
  if process_url_clicked:
42
+ # Debugging: Verify URL accessibility
43
+ valid_urls = []
44
+ for url in urls:
45
+ if check_url(url):
46
+ valid_urls.append(url)
47
+ else:
48
+ main_placeholder.text(f"URL is not accessible: {url}")
49
+
50
+ if not valid_urls:
51
+ main_placeholder.text("None of the URLs are accessible.")
52
+
53
  # Load data from URLs
54
+ loader = UnstructuredURLLoader(urls=valid_urls)
55
  main_placeholder.text("Data Loading...Started...βœ…βœ…βœ…")
56
+ try:
57
+ data = loader.load()
58
+ except Exception as e:
59
+ main_placeholder.text(f"Error loading data: {e}")
60
 
61
  # Split data into chunks
62
  text_splitter = RecursiveCharacterTextSplitter(
 
91
  # Convert embeddings to numpy array (needed by FAISS)
92
  embeddings_np = np.array(embeddings).astype(np.float32)
93
 
94
+ # Check the shape of embeddings
95
  main_placeholder.text(f"Shape of embeddings: {embeddings_np.shape}")
96
 
97
  # Create FAISS index