drkareemkamal commited on
Commit
d171fad
Β·
verified Β·
1 Parent(s): 4483ce9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -18
app.py CHANGED
@@ -10,7 +10,9 @@ from langchain.llms import CTransformers
10
 
11
  # === Configuration ===
12
  pdfs_directory = 'pdfs'
 
13
  os.makedirs(pdfs_directory, exist_ok=True)
 
14
 
15
  PREDEFINED_BOOKS = [f for f in os.listdir(pdfs_directory) if f.endswith(".pdf")]
16
 
@@ -23,10 +25,9 @@ Context: {context}
23
  Answer:
24
  """
25
 
26
- # === Load Embeddings (CPU Friendly) ===
27
  embedding_model = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
28
 
29
- # === LLM (Quantized, CPU Efficient) ===
30
  llm = CTransformers(
31
  model='TheBloke/Mistral-7B-Instruct-v0.1-GGUF',
32
  model_file='mistral-7b-instruct-v0.1.Q4_K_M.gguf',
@@ -53,8 +54,24 @@ def split_text(documents):
53
  )
54
  return splitter.split_documents(documents)
55
 
56
- def create_vector_store(docs):
57
- return FAISS.from_documents(docs, embedding_model)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
  def retrieve_docs(vector_store, query):
60
  return vector_store.similarity_search(query)
@@ -66,8 +83,8 @@ def answer_question(question, documents):
66
  return chain.run({"question": question, "context": context})
67
 
68
  # === UI ===
69
- st.set_page_config(page_title="πŸ“„ PDF Q&A (CPU Version)", layout="centered")
70
- st.title("πŸ“š Chat with PDF - CPU Optimized")
71
 
72
  with st.sidebar:
73
  st.header("Select or Upload a Book")
@@ -80,17 +97,26 @@ with st.sidebar:
80
  st.success(f"Uploaded: {filename}")
81
  selected_book = filename
82
 
 
83
  if selected_book and selected_book != "Upload new book":
84
- st.info(f"πŸ“– You selected: {selected_book}")
85
  file_path = os.path.join(pdfs_directory, selected_book)
86
-
87
- documents = load_pdf(file_path)
88
- chunks = split_text(documents)
89
- vector_store = create_vector_store(chunks)
90
-
91
- question = st.chat_input("Ask a question about the book...")
92
- if question:
93
- st.chat_message("user").write(question)
94
- related_docs = retrieve_docs(vector_store, question)
95
- answer = answer_question(question, related_docs)
96
- st.chat_message("assistant").write(answer)
 
 
 
 
 
 
 
 
 
 
10
 
11
  # === Configuration ===
12
  pdfs_directory = 'pdfs'
13
+ vectorstores_directory = 'vectorstores'
14
  os.makedirs(pdfs_directory, exist_ok=True)
15
+ os.makedirs(vectorstores_directory, exist_ok=True)
16
 
17
  PREDEFINED_BOOKS = [f for f in os.listdir(pdfs_directory) if f.endswith(".pdf")]
18
 
 
25
  Answer:
26
  """
27
 
28
+ # === Embeddings and LLM (CPU-friendly) ===
29
  embedding_model = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
30
 
 
31
  llm = CTransformers(
32
  model='TheBloke/Mistral-7B-Instruct-v0.1-GGUF',
33
  model_file='mistral-7b-instruct-v0.1.Q4_K_M.gguf',
 
54
  )
55
  return splitter.split_documents(documents)
56
 
57
+ def get_vectorstore_path(book_filename):
58
+ base_name = os.path.splitext(book_filename)[0]
59
+ return os.path.join(vectorstores_directory, base_name)
60
+
61
+ def load_or_create_vectorstore(book_filename, documents=None):
62
+ vs_path = get_vectorstore_path(book_filename)
63
+
64
+ if os.path.exists(os.path.join(vs_path, "index.faiss")):
65
+ return FAISS.load_local(vs_path, embedding_model, allow_dangerous_deserialization=True)
66
+
67
+ if documents is None:
68
+ raise ValueError("Documents must be provided when creating a new vectorstore.")
69
+
70
+ os.makedirs(vs_path, exist_ok=True)
71
+ chunks = split_text(documents)
72
+ vector_store = FAISS.from_documents(chunks, embedding_model)
73
+ vector_store.save_local(vs_path)
74
+ return vector_store
75
 
76
  def retrieve_docs(vector_store, query):
77
  return vector_store.similarity_search(query)
 
83
  return chain.run({"question": question, "context": context})
84
 
85
  # === UI ===
86
+ st.set_page_config(page_title="πŸ“„ PDF Q&A (Cached FAISS)", layout="centered")
87
+ st.title("πŸ“š Chat with PDF - Cached Vector Stores")
88
 
89
  with st.sidebar:
90
  st.header("Select or Upload a Book")
 
97
  st.success(f"Uploaded: {filename}")
98
  selected_book = filename
99
 
100
+ # === Load or Create Vector Store ===
101
  if selected_book and selected_book != "Upload new book":
 
102
  file_path = os.path.join(pdfs_directory, selected_book)
103
+ vectorstore_path = get_vectorstore_path(selected_book)
104
+
105
+ try:
106
+ if os.path.exists(os.path.join(vectorstore_path, "index.faiss")):
107
+ st.info("βœ… Using cached vector store.")
108
+ vector_store = load_or_create_vectorstore(selected_book)
109
+ else:
110
+ st.warning("⏳ Creating new vector store (first-time load)...")
111
+ documents = load_pdf(file_path)
112
+ vector_store = load_or_create_vectorstore(selected_book, documents)
113
+ st.success("βœ… Vector store created and cached.")
114
+
115
+ question = st.chat_input("Ask a question about the book...")
116
+ if question:
117
+ st.chat_message("user").write(question)
118
+ related_docs = retrieve_docs(vector_store, question)
119
+ answer = answer_question(question, related_docs)
120
+ st.chat_message("assistant").write(answer)
121
+ except Exception as e:
122
+ st.error(f"❌ Error loading PDF or vector store: {e}")