Spaces:

vanderbilt-dsi
/

climate-plan-summary-tool

Running

App Files Files Community

umangchaudhry commited on Nov 18, 2024

Commit

8af57c5

verified ·

1 Parent(s): 832b728

fixed errors

Browse files

Files changed (1) hide show

app.py +96 -342

app.py CHANGED Viewed

@@ -1,29 +1,20 @@
 import os
-import re
 import streamlit as st
 from tempfile import NamedTemporaryFile
-import anthropic
-# Import necessary modules from LangChain
 from langchain.chains import create_retrieval_chain
 from langchain.chains.combine_documents import create_stuff_documents_chain
 from langchain_core.prompts import ChatPromptTemplate
-from langchain_openai import ChatOpenAI, OpenAIEmbeddings
-from langchain_community.document_loaders import PyPDFLoader, TextLoader
 from langchain_community.vectorstores import FAISS
 from langchain_text_splitters import RecursiveCharacterTextSplitter
 # Function to remove code block markers from the answer
 def remove_code_blocks(text):
-    """
-    Removes code block markers from the answer text.
-    Args:
-        text (str): The text from which code block markers should be removed.
-    Returns:
-        str: The text without code block markers.
-    """
     code_block_pattern = r"^```(?:\w+)?\n(.*?)\n```$"
     match = re.match(code_block_pattern, text, re.DOTALL)
     if match:
@@ -32,49 +23,30 @@ def remove_code_blocks(text):
         return text
 # Function to process PDF, run Q&A, and return results
-def generate_summary_from_pdf(api_key, uploaded_file, questions_path, prompt_path, display_placeholder):
-    """
-    Processes a PDF file, runs Q&A, and returns the results.
-    Args:
-        api_key (str): OpenAI API key.
-        uploaded_file: Uploaded PDF file.
-        questions_path (str): Path to the questions file.
-        prompt_path (str): Path to the system prompt file.
-        display_placeholder: Streamlit placeholder for displaying results.
-    Returns:
-        list: List of QA results.
-    """
-    # Set the OpenAI API key
     os.environ["OPENAI_API_KEY"] = api_key
-    # Save the uploaded PDF to a temporary file
     with NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
         temp_pdf.write(uploaded_file.read())
         temp_pdf_path = temp_pdf.name
-    # Load and split the PDF into documents
     loader = PyPDFLoader(temp_pdf_path)
     docs = loader.load()
     text_splitter = RecursiveCharacterTextSplitter(chunk_size=3000, chunk_overlap=500)
     splits = text_splitter.split_documents(docs)
-    # Create a vector store from the documents
     vectorstore = FAISS.from_documents(
-        documents=splits,
-        embedding=OpenAIEmbeddings(model="text-embedding-3-large")
     )
     retriever = vectorstore.as_retriever(search_kwargs={"k": 10})
-    # Load the system prompt
     if os.path.exists(prompt_path):
         with open(prompt_path, "r") as file:
             system_prompt = file.read()
     else:
         raise FileNotFoundError(f"The specified file was not found: {prompt_path}")
-    # Create the prompt template
     prompt = ChatPromptTemplate.from_messages(
         [
             ("system", system_prompt),
@@ -82,60 +54,38 @@ def generate_summary_from_pdf(api_key, uploaded_file, questions_path, prompt_pat
         ]
     )
-    # Initialize the language model
     llm = ChatOpenAI(model="gpt-4o")
-    # Create the question-answering chain
-    question_answer_chain = create_stuff_documents_chain(
-        llm, prompt, document_variable_name="context"
-    )
     rag_chain = create_retrieval_chain(retriever, question_answer_chain)
-    # Load the questions
     if os.path.exists(questions_path):
         with open(questions_path, "r") as file:
             questions = [line.strip() for line in file.readlines() if line.strip()]
     else:
         raise FileNotFoundError(f"The specified file was not found: {questions_path}")
-    # Process each question
     qa_results = []
     for question in questions:
         result = rag_chain.invoke({"input": question})
         answer = result["answer"]
-        # Remove code block markers
         answer = remove_code_blocks(answer)
         qa_text = f"### Question: {question}\n**Answer:**\n{answer}\n"
         qa_results.append(qa_text)
         display_placeholder.markdown("\n".join(qa_results), unsafe_allow_html=True)
-    # Clean up temporary PDF file
     os.remove(temp_pdf_path)
     return qa_results
-# Function to perform multi-plan QA using an existing shared vector store
-def perform_multi_plan_qa_shared_vectorstore(api_key, input_text, display_placeholder):
-    """
-    Performs multi-plan QA using an existing shared vector store.
-    Args:
-        api_key (str): OpenAI API key.
-        input_text (str): The question to ask.
-        display_placeholder: Streamlit placeholder for displaying results.
-    """
-    # Set the OpenAI API key
     os.environ["OPENAI_API_KEY"] = api_key
     # Load the existing vector store
     embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
-    vector_store = FAISS.load_local(
-        "Combined_Summary_Vectorstore",
-        embeddings,
-        allow_dangerous_deserialization=True
-    )
     # Convert the vector store to a retriever
     retriever = vector_store.as_retriever(search_kwargs={"k": 50})
@@ -158,9 +108,7 @@ def perform_multi_plan_qa_shared_vectorstore(api_key, input_text, display_placeh
     # Create the question-answering chain
     llm = ChatOpenAI(model="gpt-4o")
-    question_answer_chain = create_stuff_documents_chain(
-        llm, prompt, document_variable_name="context"
-    )
     rag_chain = create_retrieval_chain(retriever, question_answer_chain)
     # Process the input text
@@ -170,27 +118,14 @@ def perform_multi_plan_qa_shared_vectorstore(api_key, input_text, display_placeh
     # Display the answer
     display_placeholder.markdown(f"**Answer:**\n{answer}")
-# Function to perform multi-plan QA using multiple individual vector stores
-def perform_multi_plan_qa_multi_vectorstore(api_key, input_text, display_placeholder):
-    """
-    Performs multi-plan QA using multiple individual vector stores.
-    Args:
-        api_key (str): OpenAI API key.
-        input_text (str): The question to ask.
-        display_placeholder: Streamlit placeholder for displaying results.
-    """
-    # Set the OpenAI API key
     os.environ["OPENAI_API_KEY"] = api_key
     # Directory containing individual vector stores
     vectorstore_directory = "Individual_Summary_Vectorstores"
     # List all vector store directories
-    vectorstore_names = [
-        d for d in os.listdir(vectorstore_directory)
-        if os.path.isdir(os.path.join(vectorstore_directory, d))
-    ]
     # Initialize a list to collect all retrieved chunks
     all_retrieved_chunks = []
@@ -201,17 +136,13 @@ def perform_multi_plan_qa_multi_vectorstore(api_key, input_text, display_placeho
         # Load the vector store
         embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
-        vector_store = FAISS.load_local(
-            vectorstore_path,
-            embeddings,
-            allow_dangerous_deserialization=True
-        )
         # Convert the vector store to a retriever
         retriever = vector_store.as_retriever(search_kwargs={"k": 2})
         # Retrieve relevant chunks for the input text
-        retrieved_chunks = retriever.invoke(input_text)
         all_retrieved_chunks.extend(retrieved_chunks)
     # Read the system prompt for multi-document QA
@@ -232,118 +163,75 @@ def perform_multi_plan_qa_multi_vectorstore(api_key, input_text, display_placeho
     # Create the question-answering chain
     llm = ChatOpenAI(model="gpt-4o")
-    question_answer_chain = create_stuff_documents_chain(
-        llm, prompt, document_variable_name="context"
-    )
     # Process the combined context
-    result = question_answer_chain.invoke({
-        "input": input_text,
-        "context": all_retrieved_chunks
-    })
     # Display the answer
-    answer = result["answer"] if "answer" in result else result
-    display_placeholder.markdown(f"**Answer:**\n{answer}")
-# Function to compare documents via one-to-many query approach
-def compare_documents_one_to_many(api_key, focus_input, comparison_inputs, input_text, display_placeholder):
-    """
-    Compares a focus document against multiple comparison documents using a one-to-many query approach.
-    Args:
-        api_key (str): OpenAI API key.
-        focus_input: Focus document (uploaded file or path to vector store).
-        comparison_inputs: List of comparison documents (uploaded files or paths to vector stores).
-        input_text (str): The comparison question to ask.
-        display_placeholder: Streamlit placeholder for displaying results.
-    """
-    # Set the OpenAI API key
-    os.environ["OPENAI_API_KEY"] = api_key
-    def load_documents_from_pdf(file):
-        """
-        Loads documents from a PDF file.
-        Args:
-            file: Uploaded PDF file.
-        Returns:
-            list: List of documents.
-        """
-        with NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
-            temp_pdf.write(file.read())
-            temp_pdf_path = temp_pdf.name
-        loader = PyPDFLoader(temp_pdf_path)
-        docs = loader.load()
-        os.remove(temp_pdf_path)
-        return docs
-    def load_vector_store_from_path(path):
-        """
-        Loads a vector store from a given path.
-        Args:
-            path (str): Path to the vector store.
-        Returns:
-            FAISS: Loaded vector store.
-        """
-        embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
-        return FAISS.load_local(
-            path,
-            embeddings,
-            allow_dangerous_deserialization=True
-        )
     # Load focus documents or vector store
     if isinstance(focus_input, st.runtime.uploaded_file_manager.UploadedFile):
-        # If focus_input is an uploaded PDF file
         focus_docs = load_documents_from_pdf(focus_input)
         text_splitter = RecursiveCharacterTextSplitter(chunk_size=3000, chunk_overlap=500)
         focus_splits = text_splitter.split_documents(focus_docs)
-        focus_vector_store = FAISS.from_documents(
-            focus_splits,
-            OpenAIEmbeddings(model="text-embedding-3-large")
-        )
         focus_retriever = focus_vector_store.as_retriever(search_kwargs={"k": 5})
     elif isinstance(focus_input, str) and os.path.isdir(focus_input):
-        # If focus_input is a path to a vector store
         focus_vector_store = load_vector_store_from_path(focus_input)
         focus_retriever = focus_vector_store.as_retriever(search_kwargs={"k": 5})
     else:
         raise ValueError("Invalid focus input type. Must be a PDF file or a path to a vector store.")
-    # Retrieve relevant chunks from the focus document
     focus_docs = focus_retriever.invoke(input_text)
-    # Initialize list to collect comparison chunks
     comparison_chunks = []
     for comparison_input in comparison_inputs:
         if isinstance(comparison_input, st.runtime.uploaded_file_manager.UploadedFile):
-            # If comparison_input is an uploaded PDF file
             comparison_docs = load_documents_from_pdf(comparison_input)
             text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=500)
             comparison_splits = text_splitter.split_documents(comparison_docs)
-            comparison_vector_store = FAISS.from_documents(
-                comparison_splits,
-                OpenAIEmbeddings(model="text-embedding-3-large")
-            )
             comparison_retriever = comparison_vector_store.as_retriever(search_kwargs={"k": 5})
         elif isinstance(comparison_input, str) and os.path.isdir(comparison_input):
-            # If comparison_input is a path to a vector store
             comparison_vector_store = load_vector_store_from_path(comparison_input)
             comparison_retriever = comparison_vector_store.as_retriever(search_kwargs={"k": 5})
         else:
             raise ValueError("Invalid comparison input type. Must be a PDF file or a path to a vector store.")
-        # Retrieve relevant chunks from the comparison document
         comparison_docs = comparison_retriever.invoke(input_text)
         comparison_chunks.extend(comparison_docs)
     # Construct the combined context
-    combined_context = focus_docs + comparison_chunks
     # Read the system prompt
     prompt_path = "Prompts/comparison_prompt.md"
@@ -364,7 +252,7 @@ def compare_documents_one_to_many(api_key, focus_input, comparison_inputs, input
     # Create the question-answering chain
     llm = ChatOpenAI(model="gpt-4o")
     question_answer_chain = create_stuff_documents_chain(
-        llm,
         prompt,
         document_variable_name="context"
     )
@@ -376,66 +264,35 @@ def compare_documents_one_to_many(api_key, focus_input, comparison_inputs, input
     })
     # Display the answer
-    answer = result["answer"] if "answer" in result else result
-    display_placeholder.markdown(f"**Answer:**\n{answer}")
 # Function to list vector store documents
 def list_vector_store_documents():
-    """
-    Lists available vector store documents.
-    Returns:
-        list: List of document names.
-    """
     # Assuming documents are stored in the "Individual_All_Vectorstores" directory
     directory_path = "Individual_All_Vectorstores"
     if not os.path.exists(directory_path):
-        raise FileNotFoundError(
-            f"The directory '{directory_path}' does not exist. "
-            "Run `create_and_save_individual_vector_stores()` to create it."
-        )
     # List all available vector stores by document name
-    documents = [
-        f.replace("_vectorstore", "").replace("_", " ")
-        for f in os.listdir(directory_path)
-        if f.endswith("_vectorstore")
-    ]
     return documents
-# Function to compare plans using a long context model
-def compare_plans_with_long_context_model(api_key, anthropic_api_key, input_text, focus_plan_path, focus_city_name, selected_summaries, display_placeholder):
-    """
-    Compares plans using a long context model.
-    Args:
-        api_key (str): OpenAI API key.
-        anthropic_api_key (str): Anthropic API key.
-        input_text (str): The comparison question to ask.
-        focus_plan_path (str): Path to the focus plan.
-        focus_city_name (str): Name of the focus city.
-        selected_summaries (list): List of selected summary documents.
-        display_placeholder: Streamlit placeholder for displaying results.
-    """
-    # Set the API keys
     os.environ["OPENAI_API_KEY"] = api_key
     os.environ["ANTHROPIC_API_KEY"] = anthropic_api_key
     # Load the focus plan
-    focus_docs = []
-    if focus_plan_path.endswith('.pdf'):
         focus_loader = PyPDFLoader(focus_plan_path)
         focus_docs = focus_loader.load()
-    elif focus_plan_path.endswith('.md'):
-        focus_loader = TextLoader(focus_plan_path)
-        focus_docs = focus_loader.load()
-    else:
-        raise ValueError("Unsupported file format for focus plan.")
     # Concatenate selected summary documents
     summaries_directory = "CAPS_Summaries"
     summaries_content = ""
     for filename in selected_summaries:
-        with open(os.path.join(summaries_directory, filename), 'r') as file:
             summaries_content += file.read() + "\n\n"
     # Prepare the context
@@ -454,6 +311,7 @@ def compare_plans_with_long_context_model(api_key, anthropic_api_key, input_text
     # Display the answer
     display_placeholder.markdown(f"**Answer:**\n{message.content}", unsafe_allow_html=True)
 # Streamlit app layout with tabs
 st.title("Climate Policy Analysis Tool")
@@ -461,21 +319,11 @@ st.title("Climate Policy Analysis Tool")
 api_key = st.text_input("Enter your OpenAI API key:", type="password", key="openai_key")
 # Create tabs
-tab1, tab2, tab3, tab4, tab5 = st.tabs([
-    "Summary Generation",
-    "Multi-Plan QA (Shared Vectorstore)",
-    "Multi-Plan QA (Multi-Vectorstore)",
-    "Plan Comparison Tool",
-    "Plan Comparison with Long Context Model"
-])
 # First tab: Summary Generation
 with tab1:
-    uploaded_file = st.file_uploader(
-        "Upload a Climate Action Plan in PDF format",
-        type="pdf",
-        key="upload_file"
-    )
     prompt_file_path = "Prompts/summary_tool_system_prompt.md"
     questions_file_path = "Prompts/summary_tool_questions.md"
@@ -489,19 +337,14 @@ with tab1:
             display_placeholder = st.empty()
             with st.spinner("Processing..."):
                 try:
-                    results = generate_summary_from_pdf(
-                        api_key,
-                        uploaded_file,
-                        questions_file_path,
-                        prompt_file_path,
-                        display_placeholder
-                    )
                     markdown_text = "\n".join(results)
                     # Use the uploaded file's name for the download file
                     base_name = os.path.splitext(uploaded_file.name)[0]
                     download_file_name = f"{base_name}_Summary.md"
                     st.download_button(
                         label="Download Results as Markdown",
                         data=markdown_text,
@@ -512,7 +355,7 @@ with tab1:
                 except Exception as e:
                     st.error(f"An error occurred: {e}")
-# Second tab: Multi-Plan QA (Shared Vectorstore)
 with tab2:
     input_text = st.text_input("Ask a question:", key="multi_plan_input")
     if st.button("Ask", key="multi_plan_qa_button"):
@@ -524,7 +367,7 @@ with tab2:
             display_placeholder2 = st.empty()
             with st.spinner("Processing..."):
                 try:
-                    perform_multi_plan_qa_shared_vectorstore(
                         api_key,
                         input_text,
                         display_placeholder2
@@ -532,9 +375,9 @@ with tab2:
                 except Exception as e:
                     st.error(f"An error occurred: {e}")
-# Third tab: Multi-Plan QA (Multi-Vectorstore)
 with tab3:
-    user_input = st.text_input("Ask a Question", key="multi_vectorstore_input")
     if st.button("Ask", key="multi_vectorstore_qa_button"):
         if not api_key:
             st.warning("Please provide your OpenAI API key.")
@@ -544,7 +387,7 @@ with tab3:
             display_placeholder3 = st.empty()
             with st.spinner("Processing..."):
                 try:
-                    perform_multi_plan_qa_multi_vectorstore(
                         api_key,
                         user_input,
                         display_placeholder3
@@ -560,73 +403,32 @@ with tab4:
     vectorstore_documents = list_vector_store_documents()
     # Option to upload a new plan or select from existing vector stores
-    focus_option = st.radio(
-        "Choose a focus plan:",
-        ("Select from existing vector stores", "Upload a new plan"),
-        key="focus_option"
-    )
     if focus_option == "Upload a new plan":
-        focus_uploaded_file = st.file_uploader(
-            "Upload a Climate Action Plan to compare",
-            type="pdf",
-            key="focus_upload"
-        )
-        focus_city_name = st.text_input(
-            "Enter the city name for the uploaded plan:",
-            key="focus_city_name"
-        )
-        if focus_uploaded_file is not None and focus_city_name:
             # Directly use the uploaded file
             focus_input = focus_uploaded_file
         else:
             focus_input = None
     else:
         # Select a focus plan from existing vector stores
-        selected_focus_plan = st.selectbox(
-            "Select a focus plan:",
-            vectorstore_documents,
-            key="select_focus_plan"
-        )
-        focus_input = os.path.join(
-            "Individual_All_Vectorstores",
-            f"{selected_focus_plan}_vectorstore"
-        )
-        focus_city_name = selected_focus_plan.replace("_", " ")
     # Option to upload comparison documents or select from existing vector stores
-    comparison_option = st.radio(
-        "Choose comparison documents:",
-        ("Select from existing vector stores", "Upload new documents"),
-        key="comparison_option"
-    )
     if comparison_option == "Upload new documents":
-        comparison_files = st.file_uploader(
-            "Upload comparison documents",
-            type="pdf",
-            accept_multiple_files=True,
-            key="comparison_files"
-        )
         comparison_inputs = comparison_files
     else:
         # Select comparison documents from existing vector stores
-        selected_comparison_plans = st.multiselect(
-            "Select comparison documents:",
-            vectorstore_documents,
-            key="select_comparison_plans"
-        )
-        comparison_inputs = [
-            os.path.join(
-                "Individual_All_Vectorstores",
-                f"{doc}_vectorstore"
-            ) for doc in selected_comparison_plans
-        ]
-    input_text = st.text_input(
-        "Ask a comparison question:",
-        key="comparison_input"
-    )
     if st.button("Compare", key="compare_button"):
         if not api_key:
@@ -641,13 +443,9 @@ with tab4:
             display_placeholder4 = st.empty()
             with st.spinner("Processing..."):
                 try:
-                    compare_documents_one_to_many(
-                        api_key,
-                        focus_input,
-                        comparison_inputs,
-                        input_text,
-                        display_placeholder4
-                    )
                 except Exception as e:
                     st.error(f"An error occurred: {e}")
@@ -656,64 +454,30 @@ with tab5:
     st.header("Plan Comparison with Long Context Model")
     # Anthropics API Key Input
-    anthropic_api_key = st.text_input(
-        "Enter your Anthropic API key:",
-        type="password",
-        key="anthropic_key"
-    )
     # Option to upload a new plan or select from a list
-    upload_option = st.radio(
-        "Choose a focus plan:",
-        ("Select from existing plans", "Upload a new plan"),
-        key="upload_option_long_context"
-    )
-    if upload_option == "Upload a new plan":
-        focus_uploaded_file = st.file_uploader(
-            "Upload a Climate Action Plan to compare",
-            type="pdf",
-            key="focus_upload_long_context"
-        )
-        focus_city_name = st.text_input(
-            "Enter the city name for the uploaded plan:",
-            key="focus_city_name_long_context"
-        )
-        if focus_uploaded_file is not None and focus_city_name:
-            # Save uploaded file temporarily
-            with NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
-                temp_pdf.write(focus_uploaded_file.read())
-                focus_plan_path = temp_pdf.name
         else:
             focus_plan_path = None
     else:
-        # List of existing plans in CAPS
         plan_list = [f.replace(".pdf", "") for f in os.listdir("CAPS") if f.endswith('.pdf')]
-        selected_plan = st.selectbox(
-            "Select a plan:",
-            plan_list,
-            key="selected_plan_long_context"
-        )
-        focus_plan_path = os.path.join("CAPS", selected_plan)
-        # Extract city name from the file name
-        focus_city_name = os.path.splitext(selected_plan)[0].replace("_", " ")
     # List available summary documents for selection
     summaries_directory = "CAPS_Summaries"
-    summary_files = [
-        f.replace(".md", "").replace("_", " ")
-        for f in os.listdir(summaries_directory) if f.endswith('.md')
-    ]
-    selected_summaries = st.multiselect(
-        "Select summary documents for comparison:",
-        summary_files,
-        key="selected_summaries"
-    )
-    input_text = st.text_input(
-        "Ask a comparison question:",
-        key="comparison_input_long_context"
-    )
     if st.button("Compare with Long Context", key="compare_button_long_context"):
         if not api_key:
@@ -724,20 +488,10 @@ with tab5:
             st.warning("Please enter a comparison question.")
         elif not focus_plan_path:
             st.warning("Please provide a focus plan.")
-        elif not focus_city_name:
-            st.warning("Please enter the city name for the focus plan.")
         else:
             display_placeholder = st.empty()
             with st.spinner("Processing..."):
                 try:
-                    compare_plans_with_long_context_model(
-                        api_key,
-                        anthropic_api_key,
-                        input_text,
-                        focus_plan_path,
-                        focus_city_name,
-                        selected_summaries,
-                        display_placeholder
-                    )
                 except Exception as e:
-                    st.error(f"An error occurred: {e}")

 import os
 import streamlit as st
 from tempfile import NamedTemporaryFile
 from langchain.chains import create_retrieval_chain
 from langchain.chains.combine_documents import create_stuff_documents_chain
 from langchain_core.prompts import ChatPromptTemplate
+from langchain_openai import ChatOpenAI
+from langchain_community.document_loaders import PyPDFLoader
+from langchain_community.document_loaders import TextLoader
 from langchain_community.vectorstores import FAISS
+from langchain_openai import OpenAIEmbeddings
 from langchain_text_splitters import RecursiveCharacterTextSplitter
+import re
+import anthropic
 # Function to remove code block markers from the answer
 def remove_code_blocks(text):
     code_block_pattern = r"^```(?:\w+)?\n(.*?)\n```$"
     match = re.match(code_block_pattern, text, re.DOTALL)
     if match:
         return text
 # Function to process PDF, run Q&A, and return results
+def process_pdf(api_key, uploaded_file, questions_path, prompt_path, display_placeholder):
     os.environ["OPENAI_API_KEY"] = api_key
     with NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
         temp_pdf.write(uploaded_file.read())
         temp_pdf_path = temp_pdf.name
     loader = PyPDFLoader(temp_pdf_path)
     docs = loader.load()
     text_splitter = RecursiveCharacterTextSplitter(chunk_size=3000, chunk_overlap=500)
     splits = text_splitter.split_documents(docs)
     vectorstore = FAISS.from_documents(
+        documents=splits, embedding=OpenAIEmbeddings(model="text-embedding-3-large")
     )
     retriever = vectorstore.as_retriever(search_kwargs={"k": 10})
     if os.path.exists(prompt_path):
         with open(prompt_path, "r") as file:
             system_prompt = file.read()
     else:
         raise FileNotFoundError(f"The specified file was not found: {prompt_path}")
     prompt = ChatPromptTemplate.from_messages(
         [
             ("system", system_prompt),
         ]
     )
     llm = ChatOpenAI(model="gpt-4o")
+    question_answer_chain = create_stuff_documents_chain(llm, prompt, document_variable_name="context")
     rag_chain = create_retrieval_chain(retriever, question_answer_chain)
     if os.path.exists(questions_path):
         with open(questions_path, "r") as file:
             questions = [line.strip() for line in file.readlines() if line.strip()]
     else:
         raise FileNotFoundError(f"The specified file was not found: {questions_path}")
     qa_results = []
     for question in questions:
         result = rag_chain.invoke({"input": question})
         answer = result["answer"]
         answer = remove_code_blocks(answer)
         qa_text = f"### Question: {question}\n**Answer:**\n{answer}\n"
         qa_results.append(qa_text)
         display_placeholder.markdown("\n".join(qa_results), unsafe_allow_html=True)
     os.remove(temp_pdf_path)
     return qa_results
+# New function to process multi-plan QA using an existing vector store
+def process_multi_plan_qa(api_key, input_text, display_placeholder):
     os.environ["OPENAI_API_KEY"] = api_key
     # Load the existing vector store
     embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
+    vector_store = FAISS.load_local("Combined_Summary_Vectorstore", embeddings, allow_dangerous_deserialization=True)
     # Convert the vector store to a retriever
     retriever = vector_store.as_retriever(search_kwargs={"k": 50})
     # Create the question-answering chain
     llm = ChatOpenAI(model="gpt-4o")
+    question_answer_chain = create_stuff_documents_chain(llm, prompt, document_variable_name="context")
     rag_chain = create_retrieval_chain(retriever, question_answer_chain)
     # Process the input text
     # Display the answer
     display_placeholder.markdown(f"**Answer:**\n{answer}")
+def multi_plan_qa_multi_vectorstore(api_key, input_text, display_placeholder):
     os.environ["OPENAI_API_KEY"] = api_key
     # Directory containing individual vector stores
     vectorstore_directory = "Individual_Summary_Vectorstores"
     # List all vector store directories
+    vectorstore_names = [d for d in os.listdir(vectorstore_directory) if os.path.isdir(os.path.join(vectorstore_directory, d))]
     # Initialize a list to collect all retrieved chunks
     all_retrieved_chunks = []
         # Load the vector store
         embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
+        vector_store = FAISS.load_local(vectorstore_path, embeddings, allow_dangerous_deserialization=True)
         # Convert the vector store to a retriever
         retriever = vector_store.as_retriever(search_kwargs={"k": 2})
         # Retrieve relevant chunks for the input text
+        retrieved_chunks = retriever.invoke("input_text")
         all_retrieved_chunks.extend(retrieved_chunks)
     # Read the system prompt for multi-document QA
     # Create the question-answering chain
     llm = ChatOpenAI(model="gpt-4o")
+    question_answer_chain = create_stuff_documents_chain(llm, prompt, document_variable_name="context")
     # Process the combined context
+    result = question_answer_chain.invoke({"input": input_text, "context": all_retrieved_chunks})
     # Display the answer
+    display_placeholder.markdown(f"**Answer:**\n{result}")
+def load_documents_from_pdf(file):
+    # Check if the file is a PDF
+    if not file.name.endswith('.pdf'):
+        raise ValueError("The uploaded file is not a PDF. Please upload a PDF file.")
+    with NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
+        temp_pdf.write(file.read())
+        temp_pdf_path = temp_pdf.name
+    loader = PyPDFLoader(temp_pdf_path)
+    docs = loader.load()
+    os.remove(temp_pdf_path)
+    return docs
+def load_vector_store_from_path(path):
+    embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
+    return FAISS.load_local(path, embeddings, allow_dangerous_deserialization=True)
+# Function to compare document via one-to-many query approach
+def process_one_to_many_query(api_key, focus_input, comparison_inputs, input_text, display_placeholder):
+    os.environ["OPENAI_API_KEY"] = api_key
+    print(comparison_inputs)
     # Load focus documents or vector store
     if isinstance(focus_input, st.runtime.uploaded_file_manager.UploadedFile):
         focus_docs = load_documents_from_pdf(focus_input)
         text_splitter = RecursiveCharacterTextSplitter(chunk_size=3000, chunk_overlap=500)
         focus_splits = text_splitter.split_documents(focus_docs)
+        focus_vector_store = FAISS.from_documents(focus_splits, OpenAIEmbeddings(model="text-embedding-3-large"))
         focus_retriever = focus_vector_store.as_retriever(search_kwargs={"k": 5})
     elif isinstance(focus_input, str) and os.path.isdir(focus_input):
         focus_vector_store = load_vector_store_from_path(focus_input)
         focus_retriever = focus_vector_store.as_retriever(search_kwargs={"k": 5})
     else:
         raise ValueError("Invalid focus input type. Must be a PDF file or a path to a vector store.")
     focus_docs = focus_retriever.invoke(input_text)
     comparison_chunks = []
     for comparison_input in comparison_inputs:
         if isinstance(comparison_input, st.runtime.uploaded_file_manager.UploadedFile):
             comparison_docs = load_documents_from_pdf(comparison_input)
             text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=500)
             comparison_splits = text_splitter.split_documents(comparison_docs)
+            comparison_vector_store = FAISS.from_documents(comparison_splits, OpenAIEmbeddings(model="text-embedding-3-large"))
             comparison_retriever = comparison_vector_store.as_retriever(search_kwargs={"k": 5})
         elif isinstance(comparison_input, str) and os.path.isdir(comparison_input):
             comparison_vector_store = load_vector_store_from_path(comparison_input)
             comparison_retriever = comparison_vector_store.as_retriever(search_kwargs={"k": 5})
         else:
             raise ValueError("Invalid comparison input type. Must be a PDF file or a path to a vector store.")
         comparison_docs = comparison_retriever.invoke(input_text)
         comparison_chunks.extend(comparison_docs)
     # Construct the combined context
+    combined_context = (
+        focus_docs +
+        comparison_chunks
+    )
     # Read the system prompt
     prompt_path = "Prompts/comparison_prompt.md"
     # Create the question-answering chain
     llm = ChatOpenAI(model="gpt-4o")
     question_answer_chain = create_stuff_documents_chain(
+        llm,
         prompt,
         document_variable_name="context"
     )
     })
     # Display the answer
+    display_placeholder.markdown(f"**Answer:**\n{result}")
 # Function to list vector store documents
 def list_vector_store_documents():
     # Assuming documents are stored in the "Individual_All_Vectorstores" directory
     directory_path = "Individual_All_Vectorstores"
     if not os.path.exists(directory_path):
+        raise FileNotFoundError(f"The directory '{directory_path}' does not exist. Run `create_and_save_individual_vector_stores()` to create it.")
     # List all available vector stores by document name
+    documents = [f.replace("_vectorstore", "").replace("_", " ") for f in os.listdir(directory_path) if f.endswith("_vectorstore")]
     return documents
+def compare_with_long_context(api_key, anthropic_api_key, input_text, focus_plan_path, selected_summaries, display_placeholder):
     os.environ["OPENAI_API_KEY"] = api_key
     os.environ["ANTHROPIC_API_KEY"] = anthropic_api_key
     # Load the focus plan
+# Load focus documents or vector store
+    if isinstance(focus_plan_path, st.runtime.uploaded_file_manager.UploadedFile):
+        focus_docs = load_documents_from_pdf(focus_plan_path)
+    elif isinstance(focus_plan_path, str):
         focus_loader = PyPDFLoader(focus_plan_path)
         focus_docs = focus_loader.load()
     # Concatenate selected summary documents
     summaries_directory = "CAPS_Summaries"
     summaries_content = ""
     for filename in selected_summaries:
+        with open(os.path.join(summaries_directory, f"{filename.replace(" Summary", "_Summary")}.md"), 'r') as file:
             summaries_content += file.read() + "\n\n"
     # Prepare the context
     # Display the answer
     display_placeholder.markdown(f"**Answer:**\n{message.content}", unsafe_allow_html=True)
 # Streamlit app layout with tabs
 st.title("Climate Policy Analysis Tool")
 api_key = st.text_input("Enter your OpenAI API key:", type="password", key="openai_key")
 # Create tabs
+tab1, tab2, tab3, tab4, tab5 = st.tabs(["Summary Generation", "Multi-Plan QA (Shared Vectorstore)", "Multi-Plan QA (Multi-Vectorstore)", "Plan Comparison Tool", "Plan Comparison with Long Context Model"])
 # First tab: Summary Generation
 with tab1:
+    uploaded_file = st.file_uploader("Upload a Climate Action Plan in PDF format", type="pdf", key="upload_file")
     prompt_file_path = "Prompts/summary_tool_system_prompt.md"
     questions_file_path = "Prompts/summary_tool_questions.md"
             display_placeholder = st.empty()
             with st.spinner("Processing..."):
                 try:
+                    results = process_pdf(api_key, uploaded_file, questions_file_path, prompt_file_path, display_placeholder)
                     markdown_text = "\n".join(results)
                     # Use the uploaded file's name for the download file
                     base_name = os.path.splitext(uploaded_file.name)[0]
                     download_file_name = f"{base_name}_Summary.md"
                     st.download_button(
                         label="Download Results as Markdown",
                         data=markdown_text,
                 except Exception as e:
                     st.error(f"An error occurred: {e}")
+# Second tab: Multi-Plan QA
 with tab2:
     input_text = st.text_input("Ask a question:", key="multi_plan_input")
     if st.button("Ask", key="multi_plan_qa_button"):
             display_placeholder2 = st.empty()
             with st.spinner("Processing..."):
                 try:
+                    process_multi_plan_qa(
                         api_key,
                         input_text,
                         display_placeholder2
                 except Exception as e:
                     st.error(f"An error occurred: {e}")
 with tab3:
+    user_input = st.text_input("Ask a question:", key="multi_vectorstore_input")
     if st.button("Ask", key="multi_vectorstore_qa_button"):
         if not api_key:
             st.warning("Please provide your OpenAI API key.")
             display_placeholder3 = st.empty()
             with st.spinner("Processing..."):
                 try:
+                    multi_plan_qa_multi_vectorstore(
                         api_key,
                         user_input,
                         display_placeholder3
     vectorstore_documents = list_vector_store_documents()
     # Option to upload a new plan or select from existing vector stores
+    focus_option = st.radio("Choose a focus plan:", ("Select from existing vector stores", "Upload a new plan"), key="focus_option")
     if focus_option == "Upload a new plan":
+        focus_uploaded_file = st.file_uploader("Upload a Climate Action Plan to compare", type="pdf", key="focus_upload")
+        if focus_uploaded_file is not None:
             # Directly use the uploaded file
             focus_input = focus_uploaded_file
         else:
             focus_input = None
     else:
         # Select a focus plan from existing vector stores
+        selected_focus_plan = st.selectbox("Select a focus plan:", vectorstore_documents, key="select_focus_plan")
+        focus_input = os.path.join("Individual_All_Vectorstores", f"{selected_focus_plan.replace(" Summary", "_Summary")}_vectorstore")
     # Option to upload comparison documents or select from existing vector stores
+    comparison_option = st.radio("Choose comparison documents:", ("Select from existing vector stores", "Upload new documents"), key="comparison_option")
     if comparison_option == "Upload new documents":
+        comparison_files = st.file_uploader("Upload comparison documents", type="pdf", accept_multiple_files=True, key="comparison_files")
         comparison_inputs = comparison_files
     else:
         # Select comparison documents from existing vector stores
+        selected_comparison_plans = st.multiselect("Select comparison documents:", vectorstore_documents, key="select_comparison_plans")
+        comparison_inputs = [os.path.join("Individual_All_Vectorstores", f"{doc.replace(" Summary", "_Summary")}_vectorstore") for doc in selected_comparison_plans]
+    input_text = st.text_input("Ask a comparison question:", key="comparison_input")
     if st.button("Compare", key="compare_button"):
         if not api_key:
             display_placeholder4 = st.empty()
             with st.spinner("Processing..."):
                 try:
+                    # Call the process_one_to_many_query function
+                    process_one_to_many_query(api_key, focus_input, comparison_inputs, input_text, display_placeholder4)
                 except Exception as e:
                     st.error(f"An error occurred: {e}")
     st.header("Plan Comparison with Long Context Model")
     # Anthropics API Key Input
+    anthropic_api_key = st.text_input("Enter your Anthropic API key:", type="password", key="anthropic_key")
     # Option to upload a new plan or select from a list
+    focus_option = st.radio("Choose a focus plan:", ("Select from existing plans", "Upload a new plan"), key="focus_option_long_context")
+    if focus_option == "Upload a new plan":
+        focus_uploaded_file = st.file_uploader("Upload a Climate Action Plan to compare", type="pdf", key="focus_upload_long_context")
+        if focus_uploaded_file is not None:
+            # Directly use the uploaded file
+            focus_plan_path = focus_uploaded_file
         else:
             focus_plan_path = None
     else:
+        # Select a focus plan from existing vector stores
         plan_list = [f.replace(".pdf", "") for f in os.listdir("CAPS") if f.endswith('.pdf')]
+        selected_focus_plan = st.selectbox("Select a focus plan:", plan_list, key="select_focus_plan_long_context")
+        focus_plan_path = os.path.join("CAPS", f"{selected_focus_plan}.pdf")
     # List available summary documents for selection
     summaries_directory = "CAPS_Summaries"
+    summary_files = [f.replace(".md", "").replace("_", " ") for f in os.listdir(summaries_directory) if f.endswith('.md')]
+    selected_summaries = st.multiselect("Select summary documents for comparison:", summary_files, key="selected_summaries")
+    input_text = st.text_input("Ask a comparison question:", key="comparison_input_long_context")
     if st.button("Compare with Long Context", key="compare_button_long_context"):
         if not api_key:
             st.warning("Please enter a comparison question.")
         elif not focus_plan_path:
             st.warning("Please provide a focus plan.")
         else:
             display_placeholder = st.empty()
             with st.spinner("Processing..."):
                 try:
+                    compare_with_long_context(api_key, anthropic_api_key, input_text, focus_plan_path, selected_summaries, display_placeholder)
                 except Exception as e:
+                    st.error(f"An error occurred: {e}")