Spaces:

vineethn
/

qna

Sleeping

App Files Files Community

vineeth N commited on Sep 30, 2024

Commit

29d0fc0

verified ·

1 Parent(s): 4f4392b

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -22

app.py CHANGED Viewed

@@ -9,6 +9,7 @@ from langchain_community.document_loaders import PyPDFLoader
 from langchain.chains import RetrievalQA
 from langchain_openai import ChatOpenAI
 from langchain_openai import OpenAIEmbeddings
 # Load environment variables
 load_dotenv()
@@ -30,51 +31,51 @@ FAISS_INDEX_PATH = "faiss_index"
 FAISS_INDEX_FILE = os.path.join(FAISS_INDEX_PATH, "index.faiss")
 @st.cache_resource
-def process_pdfs(directory: str) -> None:
-    """Process all PDFs in the given directory and add them to the vector store."""
     global vector_store, pdf_files
-    documents = []
-    for filename in os.listdir(directory):
-        if filename.endswith(".pdf"):
-            file_path = os.path.join(directory, filename)
-            loader = PyPDFLoader(file_path)
-            documents.extend(loader.load())
-            pdf_files[filename] = file_path
     text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
     texts = text_splitter.split_documents(documents)
-    if os.path.exists(FAISS_INDEX_FILE):
-        try:
-            vector_store = FAISS.load_local(FAISS_INDEX_PATH, embeddings, allow_dangerous_deserialization=True)
-            vector_store.add_documents(texts)
-        except Exception as e:
-            st.error(f"Error loading FAISS index: {e}")
-            vector_store = FAISS.from_documents(texts, embeddings)
-    else:
         vector_store = FAISS.from_documents(texts, embeddings)
     # Save the updated vector store
     if not os.path.exists(FAISS_INDEX_PATH):
         os.makedirs(FAISS_INDEX_PATH)
     vector_store.save_local(FAISS_INDEX_PATH)
 def main():
     st.title("PDF Question Answering System")
-    # Process PDFs
-    pdf_directory = "/path/to/your/pdf/directory"  # Update this path
-    process_pdfs(pdf_directory)
-    st.success("PDFs processed. You can now ask questions!")
     # User input
     user_question = st.text_input("Ask a question about the PDFs:")
     if user_question:
         if vector_store is None:
-            st.error("Error: Vector store not initialized.")
             return
         retriever = vector_store.as_retriever(search_kwargs={"k": 3})

 from langchain.chains import RetrievalQA
 from langchain_openai import ChatOpenAI
 from langchain_openai import OpenAIEmbeddings
+import tempfile
 # Load environment variables
 load_dotenv()
 FAISS_INDEX_FILE = os.path.join(FAISS_INDEX_PATH, "index.faiss")
 @st.cache_resource
+def process_pdf(uploaded_file):
+    """Process the uploaded PDF and add it to the vector store."""
     global vector_store, pdf_files
+    # Create a temporary file to store the uploaded PDF
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
+        tmp_file.write(uploaded_file.getvalue())
+        tmp_file_path = tmp_file.name
+    loader = PyPDFLoader(tmp_file_path)
+    documents = loader.load()
+    pdf_files[uploaded_file.name] = tmp_file_path
     text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
     texts = text_splitter.split_documents(documents)
+    if vector_store is None:
         vector_store = FAISS.from_documents(texts, embeddings)
+    else:
+        vector_store.add_documents(texts)
     # Save the updated vector store
     if not os.path.exists(FAISS_INDEX_PATH):
         os.makedirs(FAISS_INDEX_PATH)
     vector_store.save_local(FAISS_INDEX_PATH)
+    # Clean up the temporary file
+    os.unlink(tmp_file_path)
 def main():
     st.title("PDF Question Answering System")
+    # File uploader
+    uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
+    if uploaded_file is not None:
+        process_pdf(uploaded_file)
+        st.success(f"PDF '{uploaded_file.name}' processed. You can now ask questions!")
     # User input
     user_question = st.text_input("Ask a question about the PDFs:")
     if user_question:
         if vector_store is None:
+            st.error("Error: No PDFs have been uploaded yet.")
             return
         retriever = vector_store.as_retriever(search_kwargs={"k": 3})