Spaces:

vineethn
/

qna

Sleeping

App Files Files Community

vineeth N commited on Sep 30, 2024

Commit

4f4392b

verified ·

1 Parent(s): aae26f3

Update app.py

Browse files

Files changed (1) hide show

app.py +66 -61

app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import os
 from typing import List
 from dotenv import load_dotenv
-import chainlit as cl
 from langchain_community.embeddings import OpenAIEmbeddings
 from langchain_text_splitters import RecursiveCharacterTextSplitter
 from langchain_community.vectorstores import FAISS
@@ -14,10 +14,10 @@ from langchain_openai import OpenAIEmbeddings
 load_dotenv()
 # Initialize OpenAI API key
-openai_api_key = os.getenv('sk-None-Nn6BodKwwjNYiNYT2QtWT3BlbkFJqTm7b3Fq4HftPntWdkUa')
 # Initialize embedding model using OpenAI
-embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key,model="text-embedding-3-small")
 # Initialize vector store
 vector_store = None
@@ -29,6 +29,7 @@ pdf_files = {}
 FAISS_INDEX_PATH = "faiss_index"
 FAISS_INDEX_FILE = os.path.join(FAISS_INDEX_PATH, "index.faiss")
 def process_pdfs(directory: str) -> None:
     """Process all PDFs in the given directory and add them to the vector store."""
     global vector_store, pdf_files
@@ -49,7 +50,7 @@ def process_pdfs(directory: str) -> None:
             vector_store = FAISS.load_local(FAISS_INDEX_PATH, embeddings, allow_dangerous_deserialization=True)
             vector_store.add_documents(texts)
         except Exception as e:
-            print(f"Error loading FAISS index: {e}")
             vector_store = FAISS.from_documents(texts, embeddings)
     else:
         vector_store = FAISS.from_documents(texts, embeddings)
@@ -59,60 +60,64 @@ def process_pdfs(directory: str) -> None:
         os.makedirs(FAISS_INDEX_PATH)
     vector_store.save_local(FAISS_INDEX_PATH)
-@cl.on_chat_start
-async def start():
-    """Initialize the chat session."""
-    await cl.Message(content="Welcome! Processing PDFs...").send()
-    # Process PDFs (replace with your PDF directory)
-    process_pdfs(r"C:\Users\sumes\OneDrive\Documents\pdf_docs")
-    await cl.Message(content="PDFs processed. You can now ask questions!").send()
-@cl.on_message
-async def main(message: cl.Message):
-    """Handle user messages and generate responses."""
-    if vector_store is None:
-        await cl.Message(content="Error: Vector store not initialized.").send()
-        return
-    query = message.content
-    retriever = vector_store.as_retriever(search_kwargs={"k": 3})
-    # Initialize the OpenAI language model
-    llm = ChatOpenAI(openai_api_key=openai_api_key, model="gpt-4o-mini", temperature=0)
-    qa_chain = RetrievalQA.from_chain_type(
-        llm=llm,
-        chain_type="stuff",
-        retriever=retriever,
-        return_source_documents=True
-    )
-    result = qa_chain(query)
-    answer = result['result']
-    source_docs = result['source_documents']
-    await cl.Message(content=answer).send()
-    if source_docs:
-        unique_sources = set()
-        for doc in source_docs:
-            file_name = os.path.basename(doc.metadata['source'])
-            if file_name in pdf_files and file_name not in unique_sources:
-                unique_sources.add(file_name)
-                file_path = pdf_files[file_name]
-                elements = [
-                    cl.Text(name=file_name, content=f"Source: {file_name}"),
-                    cl.File(name=file_name, path=file_path, display="inline")
-                ]
-                await cl.Message(content=f"Source: {file_name}", elements=elements).send()
-        other_sources = [doc.metadata['source'] for doc in source_docs if os.path.basename(doc.metadata['source']) not in pdf_files]
-        unique_other_sources = set(other_sources)
-        if unique_other_sources:
-            sources_message = "Other Sources:\n" + "\n".join(f"- {source}" for source in unique_other_sources)
-            await cl.Message(content=sources_message).send()

 import os
+import streamlit as st
 from typing import List
 from dotenv import load_dotenv
 from langchain_community.embeddings import OpenAIEmbeddings
 from langchain_text_splitters import RecursiveCharacterTextSplitter
 from langchain_community.vectorstores import FAISS
 load_dotenv()
 # Initialize OpenAI API key
+openai_api_key = os.getenv('OPENAI_API_KEY')
 # Initialize embedding model using OpenAI
+embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key, model="text-embedding-3-small")
 # Initialize vector store
 vector_store = None
 FAISS_INDEX_PATH = "faiss_index"
 FAISS_INDEX_FILE = os.path.join(FAISS_INDEX_PATH, "index.faiss")
+@st.cache_resource
 def process_pdfs(directory: str) -> None:
     """Process all PDFs in the given directory and add them to the vector store."""
     global vector_store, pdf_files
             vector_store = FAISS.load_local(FAISS_INDEX_PATH, embeddings, allow_dangerous_deserialization=True)
             vector_store.add_documents(texts)
         except Exception as e:
+            st.error(f"Error loading FAISS index: {e}")
             vector_store = FAISS.from_documents(texts, embeddings)
     else:
         vector_store = FAISS.from_documents(texts, embeddings)
         os.makedirs(FAISS_INDEX_PATH)
     vector_store.save_local(FAISS_INDEX_PATH)
+def main():
+    st.title("PDF Question Answering System")
+    # Process PDFs
+    pdf_directory = "/path/to/your/pdf/directory"  # Update this path
+    process_pdfs(pdf_directory)
+    st.success("PDFs processed. You can now ask questions!")
+    # User input
+    user_question = st.text_input("Ask a question about the PDFs:")
+    if user_question:
+        if vector_store is None:
+            st.error("Error: Vector store not initialized.")
+            return
+        retriever = vector_store.as_retriever(search_kwargs={"k": 3})
+        # Initialize the OpenAI language model
+        llm = ChatOpenAI(openai_api_key=openai_api_key, model="gpt-4o-mini", temperature=0)
+        qa_chain = RetrievalQA.from_chain_type(
+            llm=llm,
+            chain_type="stuff",
+            retriever=retriever,
+            return_source_documents=True
+        )
+        result = qa_chain(user_question)
+        answer = result['result']
+        source_docs = result['source_documents']
+        st.write("Answer:", answer)
+        if source_docs:
+            st.subheader("Sources:")
+            unique_sources = set()
+            for doc in source_docs:
+                file_name = os.path.basename(doc.metadata['source'])
+                if file_name in pdf_files and file_name not in unique_sources:
+                    unique_sources.add(file_name)
+                    file_path = pdf_files[file_name]
+                    st.write(f"Source: {file_name}")
+                    with open(file_path, "rb") as file:
+                        st.download_button(
+                            label=f"Download {file_name}",
+                            data=file,
+                            file_name=file_name,
+                            mime="application/pdf"
+                        )
+            other_sources = [doc.metadata['source'] for doc in source_docs if os.path.basename(doc.metadata['source']) not in pdf_files]
+            unique_other_sources = set(other_sources)
+            if unique_other_sources:
+                st.subheader("Other Sources:")
+                for source in unique_other_sources:
+                    st.write(f"- {source}")
+if __name__ == "__main__":
+    main()