Deaksh commited on
Commit
cd921da
Β·
verified Β·
1 Parent(s): a471490

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -12
app.py CHANGED
@@ -41,6 +41,14 @@ if process_url_clicked:
41
  main_placeholder.text("Text Splitter...Started...βœ…βœ…βœ…")
42
  docs = text_splitter.split_documents(data)
43
 
 
 
 
 
 
 
 
 
44
  # Create embeddings using HuggingFaceEmbeddings
45
  embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
46
  main_placeholder.text("Embedding Vector Started Building...βœ…βœ…βœ…")
@@ -48,22 +56,35 @@ if process_url_clicked:
48
  # Generate embeddings
49
  embeddings = embedding_model.embed_documents([doc.page_content for doc in docs])
50
 
 
 
 
 
 
 
 
51
  # Convert embeddings to numpy array (needed by FAISS)
52
  embeddings_np = np.array(embeddings).astype(np.float32)
53
 
54
- # Create FAISS index
55
- dimension = len(embeddings[0]) # Embedding vector dimension
56
- index = FAISS(dimension)
57
- index.add(embeddings_np) # Add embeddings to FAISS index
58
 
59
- # Wrap FAISS index using LangChain FAISS wrapper
60
- vectorstore_huggingface = FAISS(embedding_function=embedding_model, index=index)
61
-
62
- # Save the FAISS index to a pickle file
63
- with open(file_path, "wb") as f:
64
- pickle.dump(vectorstore_huggingface, f)
65
-
66
- time.sleep(2)
 
 
 
 
 
 
 
 
67
 
68
  query = main_placeholder.text_input("Question: ")
69
  if query:
@@ -89,3 +110,4 @@ if query:
89
 
90
 
91
 
 
 
41
  main_placeholder.text("Text Splitter...Started...βœ…βœ…βœ…")
42
  docs = text_splitter.split_documents(data)
43
 
44
+ # Debugging: Check if docs is empty
45
+ if not docs:
46
+ main_placeholder.text("No valid documents found! Please check the URLs.")
47
+
48
+ # Debugging: Check the content of docs
49
+ for doc in docs:
50
+ main_placeholder.text(f"Document content: {doc.page_content[:200]}") # Show first 200 chars of each document
51
+
52
  # Create embeddings using HuggingFaceEmbeddings
53
  embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
54
  main_placeholder.text("Embedding Vector Started Building...βœ…βœ…βœ…")
 
56
  # Generate embeddings
57
  embeddings = embedding_model.embed_documents([doc.page_content for doc in docs])
58
 
59
+ # Debugging: Check if embeddings are generated
60
+ if not embeddings:
61
+ main_placeholder.text("No embeddings were generated! Check the embedding model or document content.")
62
+
63
+ # Check the size of embeddings
64
+ main_placeholder.text(f"Generated {len(embeddings)} embeddings.")
65
+
66
  # Convert embeddings to numpy array (needed by FAISS)
67
  embeddings_np = np.array(embeddings).astype(np.float32)
68
 
69
+ # Check the shape of the embeddings
70
+ main_placeholder.text(f"Shape of embeddings: {embeddings_np.shape}")
 
 
71
 
72
+ # Create FAISS index
73
+ if len(embeddings) > 0:
74
+ dimension = len(embeddings[0]) # Embedding vector dimension
75
+ index = FAISS(dimension)
76
+ index.add(embeddings_np) # Add embeddings to FAISS index
77
+
78
+ # Wrap FAISS index using LangChain FAISS wrapper
79
+ vectorstore_huggingface = FAISS(embedding_function=embedding_model, index=index)
80
+
81
+ # Save the FAISS index to a pickle file
82
+ with open(file_path, "wb") as f:
83
+ pickle.dump(vectorstore_huggingface, f)
84
+
85
+ time.sleep(2)
86
+ else:
87
+ main_placeholder.text("Embeddings could not be generated, skipping FAISS index creation.")
88
 
89
  query = main_placeholder.text_input("Question: ")
90
  if query:
 
110
 
111
 
112
 
113
+