Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -41,6 +41,14 @@ if process_url_clicked:
|
|
41 |
main_placeholder.text("Text Splitter...Started...β
β
β
")
|
42 |
docs = text_splitter.split_documents(data)
|
43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
# Create embeddings using HuggingFaceEmbeddings
|
45 |
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
46 |
main_placeholder.text("Embedding Vector Started Building...β
β
β
")
|
@@ -48,22 +56,35 @@ if process_url_clicked:
|
|
48 |
# Generate embeddings
|
49 |
embeddings = embedding_model.embed_documents([doc.page_content for doc in docs])
|
50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
# Convert embeddings to numpy array (needed by FAISS)
|
52 |
embeddings_np = np.array(embeddings).astype(np.float32)
|
53 |
|
54 |
-
#
|
55 |
-
|
56 |
-
index = FAISS(dimension)
|
57 |
-
index.add(embeddings_np) # Add embeddings to FAISS index
|
58 |
|
59 |
-
#
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
|
68 |
query = main_placeholder.text_input("Question: ")
|
69 |
if query:
|
@@ -89,3 +110,4 @@ if query:
|
|
89 |
|
90 |
|
91 |
|
|
|
|
41 |
main_placeholder.text("Text Splitter...Started...β
β
β
")
|
42 |
docs = text_splitter.split_documents(data)
|
43 |
|
44 |
+
# Debugging: Check if docs is empty
|
45 |
+
if not docs:
|
46 |
+
main_placeholder.text("No valid documents found! Please check the URLs.")
|
47 |
+
|
48 |
+
# Debugging: Check the content of docs
|
49 |
+
for doc in docs:
|
50 |
+
main_placeholder.text(f"Document content: {doc.page_content[:200]}") # Show first 200 chars of each document
|
51 |
+
|
52 |
# Create embeddings using HuggingFaceEmbeddings
|
53 |
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
54 |
main_placeholder.text("Embedding Vector Started Building...β
β
β
")
|
|
|
56 |
# Generate embeddings
|
57 |
embeddings = embedding_model.embed_documents([doc.page_content for doc in docs])
|
58 |
|
59 |
+
# Debugging: Check if embeddings are generated
|
60 |
+
if not embeddings:
|
61 |
+
main_placeholder.text("No embeddings were generated! Check the embedding model or document content.")
|
62 |
+
|
63 |
+
# Check the size of embeddings
|
64 |
+
main_placeholder.text(f"Generated {len(embeddings)} embeddings.")
|
65 |
+
|
66 |
# Convert embeddings to numpy array (needed by FAISS)
|
67 |
embeddings_np = np.array(embeddings).astype(np.float32)
|
68 |
|
69 |
+
# Check the shape of the embeddings
|
70 |
+
main_placeholder.text(f"Shape of embeddings: {embeddings_np.shape}")
|
|
|
|
|
71 |
|
72 |
+
# Create FAISS index
|
73 |
+
if len(embeddings) > 0:
|
74 |
+
dimension = len(embeddings[0]) # Embedding vector dimension
|
75 |
+
index = FAISS(dimension)
|
76 |
+
index.add(embeddings_np) # Add embeddings to FAISS index
|
77 |
+
|
78 |
+
# Wrap FAISS index using LangChain FAISS wrapper
|
79 |
+
vectorstore_huggingface = FAISS(embedding_function=embedding_model, index=index)
|
80 |
+
|
81 |
+
# Save the FAISS index to a pickle file
|
82 |
+
with open(file_path, "wb") as f:
|
83 |
+
pickle.dump(vectorstore_huggingface, f)
|
84 |
+
|
85 |
+
time.sleep(2)
|
86 |
+
else:
|
87 |
+
main_placeholder.text("Embeddings could not be generated, skipping FAISS index creation.")
|
88 |
|
89 |
query = main_placeholder.text_input("Question: ")
|
90 |
if query:
|
|
|
110 |
|
111 |
|
112 |
|
113 |
+
|