Spaces:

AjiNiktech
/

Document_search

Sleeping

App Files Files Community

AjiNiktech commited on Jul 12, 2024

Commit

b3b3691

verified ·

1 Parent(s): fe58b7c

Update app.py

Browse files

Files changed (1) hide show

app.py +56 -49

app.py CHANGED Viewed

@@ -26,7 +26,6 @@ with st.sidebar:
 # Main app logic
 if "OPENAI_API_KEY" in os.environ:
-    # with st.sidebar:
     st.header('Multiple File Upload')
     uploaded_files = st.file_uploader('Upload your files', accept_multiple_files=True, type=['txt', 'pdf', 'csv', 'ppt', 'doc', 'xls', 'pptx', 'xlsx'])
@@ -58,72 +57,80 @@ if "OPENAI_API_KEY" in os.environ:
     # Process uploaded files
     if uploaded_files:
-        all_documents = []
-        for file in uploaded_files:
-            all_documents.extend(load_file(file))
-        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
-        all_splits = text_splitter.split_documents(all_documents)
-        # Initialize components
-        @st.cache_resource
-        def initialize_components(_all_splits):
-            dotenv.load_dotenv()
-            chat = ChatOpenAI(model="gpt-3.5-turbo-1106", temperature=0.2)
-            embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")
-            vectorstore = Chroma.from_documents(documents=_all_splits, embedding=embeddings)
-            retriever = vectorstore.as_retriever(k=4)
-            SYSTEM_TEMPLATE = """
-            You are an advanced AI assistant designed for document search and chatbot functionality. Your primary functions are:
-            1. Process and structure multiple documents in various formats, including:
-                .txt, .pdf, .csv, .ppt, .doc, .xls, .pptx, and .xlsx
-            2. Extract and organize information from these unstructured documents into a coherent, searchable format.
-            3. Retrieve relevant information from the processed documents based on user queries.
-            4. Act as a chatbot, engaging in conversations about the content of the documents.
-            5. Provide accurate and contextual responses to user questions, drawing solely from the information contained within the processed documents.
-            6. If a user's question is not related to the content of the provided documents, politely inform them that you can only answer questions based on the information in the given documents.
-            7. When answering, cite the specific document or section where the information was found, if possible.
-            8. If there's ambiguity in a query, ask for clarification to ensure you provide the most relevant information.
-            9. Maintain confidentiality and do not share or discuss information from one user's documents with other users.
-            Remember, your knowledge is limited to the content of the documents you've been given to process. Do not provide information or answer questions that are outside the scope of these documents. Always strive for accuracy and relevance in your responses.
-            <context>
-            {context}
-            </context>
-            Chat History:
-            {chat_history}
-            """
-            question_answering_prompt = ChatPromptTemplate.from_messages(
-                [
-                    (
-                        "system",
-                        SYSTEM_TEMPLATE,
-                    ),
-                    MessagesPlaceholder(variable_name="chat_history"),
-                    MessagesPlaceholder(variable_name="messages"),
-                ]
-            )
-            document_chain = create_stuff_documents_chain(chat, question_answering_prompt)
-            return retriever, document_chain
-        # Load components
-        with st.spinner("Initializing Assistant..."):
-            retriever, document_chain = initialize_components(all_splits)
         # Initialize memory for each session
         if "memory" not in st.session_state:
@@ -176,7 +183,7 @@ if "OPENAI_API_KEY" in os.environ:
             st.session_state.memory.save_context({"input": prompt}, {"output": full_response})
     else:
-        st.warning("Please upload files to continue.")
 else:
     st.warning("Please enter your OpenAI API Key in the sidebar to start the chatbot.")

 # Main app logic
 if "OPENAI_API_KEY" in os.environ:
     st.header('Multiple File Upload')
     uploaded_files = st.file_uploader('Upload your files', accept_multiple_files=True, type=['txt', 'pdf', 'csv', 'ppt', 'doc', 'xls', 'pptx', 'xlsx'])
     # Process uploaded files
     if uploaded_files:
+        if st.button("Process Documents"):
+            with st.spinner("Processing documents..."):
+                all_documents = []
+                for file in uploaded_files:
+                    all_documents.extend(load_file(file))
+                text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
+                all_splits = text_splitter.split_documents(all_documents)
+                # Store processed documents in session state
+                st.session_state.processed_documents = all_splits
+            st.success("Documents processed successfully!")
+    # Initialize components
+    @st.cache_resource
+    def initialize_components():
+        dotenv.load_dotenv()
+        chat = ChatOpenAI(model="gpt-3.5-turbo-1106", temperature=0.2)
+        embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")
+        return chat, embeddings
+    # Load components
+    with st.spinner("Initializing Assistant..."):
+        chat, embeddings = initialize_components()
+    # Create vectorstore and retriever only if documents are processed
+    if 'processed_documents' in st.session_state:
+        vectorstore = Chroma.from_documents(documents=st.session_state.processed_documents, embedding=embeddings)
+        retriever = vectorstore.as_retriever(k=4)
+        SYSTEM_TEMPLATE = """
+        You are an advanced AI assistant designed for document search and chatbot functionality. Your primary functions are:
+        1. Process and structure multiple documents in various formats, including:
+            .txt, .pdf, .csv, .ppt, .doc, .xls, .pptx, and .xlsx
+        2. Extract and organize information from these unstructured documents into a coherent, searchable format.
+        3. Retrieve relevant information from the processed documents based on user queries.
+        4. Act as a chatbot, engaging in conversations about the content of the documents.
+        5. Provide accurate and contextual responses to user questions, drawing solely from the information contained within the processed documents.
+        6. If a user's question is not related to the content of the provided documents, politely inform them that you can only answer questions based on the information in the given documents.
+        7. When answering, cite the specific document or section where the information was found, if possible.
+        8. If there's ambiguity in a query, ask for clarification to ensure you provide the most relevant information.
+        9. Maintain confidentiality and do not share or discuss information from one user's documents with other users.
+        Remember, your knowledge is limited to the content of the documents you've been given to process. Do not provide information or answer questions that are outside the scope of these documents. Always strive for accuracy and relevance in your responses.
+        <context>
+        {context}
+        </context>
+        Chat History:
+        {chat_history}
+        """
+        question_answering_prompt = ChatPromptTemplate.from_messages(
+            [
+                (
+                    "system",
+                    SYSTEM_TEMPLATE,
+                ),
+                MessagesPlaceholder(variable_name="chat_history"),
+                MessagesPlaceholder(variable_name="messages"),
+            ]
+        )
+        document_chain = create_stuff_documents_chain(chat, question_answering_prompt)
         # Initialize memory for each session
         if "memory" not in st.session_state:
             st.session_state.memory.save_context({"input": prompt}, {"output": full_response})
     else:
+        st.warning("Please upload and process documents to start chatting.")
 else:
     st.warning("Please enter your OpenAI API Key in the sidebar to start the chatbot.")