Rulga commited on
Commit
3cf2176
·
1 Parent(s): 44a1807

Add debug logging to document loading and vector store creation process

Browse files
Files changed (1) hide show
  1. app.py +15 -0
app.py CHANGED
@@ -91,29 +91,44 @@ def build_knowledge_base(embeddings):
91
  documents = []
92
  os.makedirs(VECTOR_STORE_PATH, exist_ok=True)
93
 
 
 
94
  for url in URLS:
95
  try:
 
96
  loader = WebBaseLoader(url)
97
  docs = loader.load()
98
  documents.extend(docs)
 
99
  except Exception as e:
100
  print(f"Failed to load {url}: {str(e)}")
 
101
  continue
102
 
103
  if not documents:
104
  raise Exception("No documents loaded!")
105
 
 
 
106
  text_splitter = RecursiveCharacterTextSplitter(
107
  chunk_size=500,
108
  chunk_overlap=100
109
  )
 
110
  chunks = text_splitter.split_documents(documents)
 
111
 
 
112
  vector_store = FAISS.from_documents(chunks, embeddings)
 
 
113
  vector_store.save_local(folder_path=VECTOR_STORE_PATH, index_name="index")
114
 
 
115
  return vector_store
116
  except Exception as e:
 
 
117
  raise Exception(f"Knowledge base creation failed: {str(e)}")
118
 
119
  # Initialize models and knowledge base on startup
 
91
  documents = []
92
  os.makedirs(VECTOR_STORE_PATH, exist_ok=True)
93
 
94
+ print("Starting to load documents...") # Debug log
95
+
96
  for url in URLS:
97
  try:
98
+ print(f"Attempting to load {url}") # Debug log
99
  loader = WebBaseLoader(url)
100
  docs = loader.load()
101
  documents.extend(docs)
102
+ print(f"Successfully loaded {url}") # Debug log
103
  except Exception as e:
104
  print(f"Failed to load {url}: {str(e)}")
105
+ traceback.print_exc() # Print full traceback
106
  continue
107
 
108
  if not documents:
109
  raise Exception("No documents loaded!")
110
 
111
+ print(f"Total documents loaded: {len(documents)}") # Debug log
112
+
113
  text_splitter = RecursiveCharacterTextSplitter(
114
  chunk_size=500,
115
  chunk_overlap=100
116
  )
117
+ print("Splitting documents into chunks...") # Debug log
118
  chunks = text_splitter.split_documents(documents)
119
+ print(f"Created {len(chunks)} chunks") # Debug log
120
 
121
+ print("Creating vector store...") # Debug log
122
  vector_store = FAISS.from_documents(chunks, embeddings)
123
+
124
+ print("Saving vector store...") # Debug log
125
  vector_store.save_local(folder_path=VECTOR_STORE_PATH, index_name="index")
126
 
127
+ print("Vector store successfully created and saved") # Debug log
128
  return vector_store
129
  except Exception as e:
130
+ print("Error in build_knowledge_base:") # Debug log
131
+ traceback.print_exc() # Print full traceback
132
  raise Exception(f"Knowledge base creation failed: {str(e)}")
133
 
134
  # Initialize models and knowledge base on startup