import os
import re
import streamlit as st
from tempfile import NamedTemporaryFile
import anthropic

# Import necessary modules from LangChain
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.document_loaders import PyPDFLoader, TextLoader
from langchain_community.vectorstores import FAISS
from langchain_text_splitters import RecursiveCharacterTextSplitter

# Function to remove code block markers from the answer
def remove_code_blocks(text):
    """
    Removes code block markers from the answer text.

    Args:
        text (str): The text from which code block markers should be removed.

    Returns:
        str: The text without code block markers.
    """
    code_block_pattern = r"^```(?:\w+)?\n(.*?)\n```$"
    match = re.match(code_block_pattern, text, re.DOTALL)
    if match:
        return match.group(1).strip()
    else:
        return text

# Function to process PDF, run Q&A, and return results
def process_pdf(api_key, uploaded_file, questions_path, prompt_path, display_placeholder):
    """
    Processes a PDF file, runs Q&A, and returns the results.

    Args:
        api_key (str): OpenAI API key.
        uploaded_file: Uploaded PDF file.
        questions_path (str): Path to the questions file.
        prompt_path (str): Path to the system prompt file.
        display_placeholder: Streamlit placeholder for displaying results.

    Returns:
        list: List of QA results.
    """
    # Set the OpenAI API key
    os.environ["OPENAI_API_KEY"] = api_key

    # Save the uploaded PDF to a temporary file
    with NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
        temp_pdf.write(uploaded_file.read())
        temp_pdf_path = temp_pdf.name

    # Load and split the PDF into documents
    loader = PyPDFLoader(temp_pdf_path)
    docs = loader.load()
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=3000, chunk_overlap=500)
    splits = text_splitter.split_documents(docs)

    # Create a vector store from the documents
    vectorstore = FAISS.from_documents(
        documents=splits,
        embedding=OpenAIEmbeddings(model="text-embedding-3-large")
    )
    retriever = vectorstore.as_retriever(search_kwargs={"k": 10})

    # Load the system prompt
    if os.path.exists(prompt_path):
        with open(prompt_path, "r") as file:
            system_prompt = file.read()
    else:
        raise FileNotFoundError(f"The specified file was not found: {prompt_path}")

    # Create the prompt template
    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", system_prompt),
            ("human", "{input}"),
        ]
    )

    # Initialize the language model
    llm = ChatOpenAI(model="gpt-4o")

    # Create the question-answering chain
    question_answer_chain = create_stuff_documents_chain(
        llm, prompt, document_variable_name="context"
    )
    rag_chain = create_retrieval_chain(retriever, question_answer_chain)

    # Load the questions
    if os.path.exists(questions_path):
        with open(questions_path, "r") as file:
            questions = [line.strip() for line in file.readlines() if line.strip()]
    else:
        raise FileNotFoundError(f"The specified file was not found: {questions_path}")

    # Process each question
    qa_results = []
    for question in questions:
        result = rag_chain.invoke({"input": question})
        answer = result["answer"]

        # Remove code block markers
        answer = remove_code_blocks(answer)

        qa_text = f"### Question: {question}\n**Answer:**\n{answer}\n"
        qa_results.append(qa_text)
        display_placeholder.markdown("\n".join(qa_results), unsafe_allow_html=True)

    # Clean up temporary PDF file
    os.remove(temp_pdf_path)

    return qa_results

# Function to perform multi-plan QA using an existing vector store
def process_multi_plan_qa(api_key, input_text, display_placeholder):
    """
    Performs multi-plan QA using an existing shared vector store.

    Args:
        api_key (str): OpenAI API key.
        input_text (str): The question to ask.
        display_placeholder: Streamlit placeholder for displaying results.
    """
    # Set the OpenAI API key
    os.environ["OPENAI_API_KEY"] = api_key

    # Load the existing vector store
    embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
    vector_store = FAISS.load_local(
        "Combined_Summary_Vectorstore",
        embeddings,
        allow_dangerous_deserialization=True
    )

    # Convert the vector store to a retriever
    retriever = vector_store.as_retriever(search_kwargs={"k": 50})

    # Read the system prompt for multi-document QA
    prompt_path = "Prompts/multi_document_qa_system_prompt.md"
    if os.path.exists(prompt_path):
        with open(prompt_path, "r") as file:
            system_prompt = file.read()
    else:
        raise FileNotFoundError(f"The specified file was not found: {prompt_path}")

    # Create the prompt template
    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", system_prompt),
            ("human", "{input}"),
        ]
    )

    # Create the question-answering chain
    llm = ChatOpenAI(model="gpt-4o")
    question_answer_chain = create_stuff_documents_chain(
        llm, prompt, document_variable_name="context"
    )
    rag_chain = create_retrieval_chain(retriever, question_answer_chain)

    # Process the input text
    result = rag_chain.invoke({"input": input_text})
    answer = result["answer"]

    # Display the answer
    display_placeholder.markdown(f"**Answer:**\n{answer}")

# Function to perform multi-plan QA using multiple individual vector stores
def process_multi_plan_qa_multi_vectorstore(api_key, input_text, display_placeholder):
    """
    Performs multi-plan QA using multiple individual vector stores.

    Args:
        api_key (str): OpenAI API key.
        input_text (str): The question to ask.
        display_placeholder: Streamlit placeholder for displaying results.
    """
    # Set the OpenAI API key
    os.environ["OPENAI_API_KEY"] = api_key

    # Directory containing individual vector stores
    vectorstore_directory = "Individual_Summary_Vectorstores"

    # List all vector store directories
    vectorstore_names = [
        d for d in os.listdir(vectorstore_directory)
        if os.path.isdir(os.path.join(vectorstore_directory, d))
    ]

    # Initialize a list to collect all retrieved chunks
    all_retrieved_chunks = []

    # Process each vector store
    for vectorstore_name in vectorstore_names:
        vectorstore_path = os.path.join(vectorstore_directory, vectorstore_name)

        # Load the vector store
        embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
        vector_store = FAISS.load_local(
            vectorstore_path,
            embeddings,
            allow_dangerous_deserialization=True
        )

        # Convert the vector store to a retriever
        retriever = vector_store.as_retriever(search_kwargs={"k": 2})

        # Retrieve relevant chunks for the input text
        retrieved_chunks = retriever.invoke(input_text)
        all_retrieved_chunks.extend(retrieved_chunks)

    # Read the system prompt for multi-document QA
    prompt_path = "Prompts/multi_document_qa_system_prompt.md"
    if os.path.exists(prompt_path):
        with open(prompt_path, "r") as file:
            system_prompt = file.read()
    else:
        raise FileNotFoundError(f"The specified file was not found: {prompt_path}")

    # Create the prompt template
    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", system_prompt),
            ("human", "{input}"),
        ]
    )

    # Create the question-answering chain
    llm = ChatOpenAI(model="gpt-4o")
    question_answer_chain = create_stuff_documents_chain(
        llm, prompt, document_variable_name="context"
    )

    # Process the combined context
    result = question_answer_chain.invoke({
        "input": input_text,
        "context": all_retrieved_chunks
    })

    # Display the answer
    answer = result["answer"] if "answer" in result else result
    display_placeholder.markdown(f"**Answer:**\n{answer}")

def load_documents_from_pdf(file):
    """
    Loads documents from a PDF file.

    Args:
        file: Uploaded PDF file.

    Returns:
        list: List of documents.
    """
    # Check if the file is a PDF
    if not file.name.endswith('.pdf'):
        raise ValueError("The uploaded file is not a PDF. Please upload a PDF file.")

    with NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
        temp_pdf.write(file.read())
        temp_pdf_path = temp_pdf.name

    loader = PyPDFLoader(temp_pdf_path)
    docs = loader.load()
    os.remove(temp_pdf_path)
    return docs

def load_vector_store_from_path(path):
    """
    Loads a vector store from a given path.

    Args:
        path (str): Path to the vector store.

    Returns:
        FAISS: Loaded vector store.
    """
    embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
    return FAISS.load_local(
        path,
        embeddings,
        allow_dangerous_deserialization=True
    )

# Function to compare documents via one-to-many query approach
def process_one_to_many_query(api_key, focus_input, comparison_inputs, input_text, display_placeholder):
    """
    Compares a focus document against multiple comparison documents using a one-to-many query approach.

    Args:
        api_key (str): OpenAI API key.
        focus_input: Focus document (uploaded file or path to vector store).
        comparison_inputs: List of comparison documents (uploaded files or paths to vector stores).
        input_text (str): The comparison question to ask.
        display_placeholder: Streamlit placeholder for displaying results.
    """
    # Set the OpenAI API key
    os.environ["OPENAI_API_KEY"] = api_key
    print(comparison_inputs)
    # Load focus documents or vector store
    if isinstance(focus_input, st.runtime.uploaded_file_manager.UploadedFile):
        # If focus_input is an uploaded PDF file
        focus_docs = load_documents_from_pdf(focus_input)
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=3000, chunk_overlap=500)
        focus_splits = text_splitter.split_documents(focus_docs)
        focus_vector_store = FAISS.from_documents(
            focus_splits,
            OpenAIEmbeddings(model="text-embedding-3-large")
        )
        focus_retriever = focus_vector_store.as_retriever(search_kwargs={"k": 5})
    elif isinstance(focus_input, str) and os.path.isdir(focus_input):
        # If focus_input is a path to a vector store
        focus_vector_store = load_vector_store_from_path(focus_input)
        focus_retriever = focus_vector_store.as_retriever(search_kwargs={"k": 5})
    else:
        raise ValueError("Invalid focus input type. Must be a PDF file or a path to a vector store.")

    # Retrieve relevant chunks from the focus document
    focus_docs = focus_retriever.invoke(input_text)

    # Initialize list to collect comparison chunks
    comparison_chunks = []
    for comparison_input in comparison_inputs:
        if isinstance(comparison_input, st.runtime.uploaded_file_manager.UploadedFile):
            # If comparison_input is an uploaded PDF file
            comparison_docs = load_documents_from_pdf(comparison_input)
            text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=500)
            comparison_splits = text_splitter.split_documents(comparison_docs)
            comparison_vector_store = FAISS.from_documents(
                comparison_splits,
                OpenAIEmbeddings(model="text-embedding-3-large")
            )
            comparison_retriever = comparison_vector_store.as_retriever(search_kwargs={"k": 5})
        elif isinstance(comparison_input, str) and os.path.isdir(comparison_input):
            # If comparison_input is a path to a vector store
            comparison_vector_store = load_vector_store_from_path(comparison_input)
            comparison_retriever = comparison_vector_store.as_retriever(search_kwargs={"k": 5})
        else:
            raise ValueError("Invalid comparison input type. Must be a PDF file or a path to a vector store.")

        # Retrieve relevant chunks from the comparison document
        comparison_docs = comparison_retriever.invoke(input_text)
        comparison_chunks.extend(comparison_docs)

    # Construct the combined context
    combined_context = focus_docs + comparison_chunks

    # Read the system prompt
    prompt_path = "Prompts/comparison_prompt.md"
    if os.path.exists(prompt_path):
        with open(prompt_path, "r") as file:
            system_prompt = file.read()
    else:
        raise FileNotFoundError(f"The specified file was not found: {prompt_path}")

    # Create the prompt template
    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", system_prompt),
            ("human", "{input}")
        ]
    )

    # Create the question-answering chain
    llm = ChatOpenAI(model="gpt-4o")
    question_answer_chain = create_stuff_documents_chain(
        llm,
        prompt,
        document_variable_name="context"
    )

    # Process the combined context
    result = question_answer_chain.invoke({
        "context": combined_context,
        "input": input_text
    })

    # Display the answer
    answer = result["answer"] if "answer" in result else result
    display_placeholder.markdown(f"**Answer:**\n{answer}")

# Function to list vector store documents
def list_vector_store_documents():
    """
    Lists available vector store documents.

    Returns:
        list: List of document names.
    """
    # Assuming documents are stored in the "Individual_All_Vectorstores" directory
    directory_path = "Individual_All_Vectorstores"
    if not os.path.exists(directory_path):
        raise FileNotFoundError(
            f"The directory '{directory_path}' does not exist. "
            "Run `create_and_save_individual_vector_stores()` to create it."
        )
    # List all available vector stores by document name
    documents = [
        f.replace("_vectorstore", "").replace("_", " ")
        for f in os.listdir(directory_path)
        if f.endswith("_vectorstore")
    ]
    return documents

# Function to compare plans using a long context model
def compare_with_long_context(api_key, anthropic_api_key, input_text, focus_plan_path, selected_summaries, display_placeholder):
    """
    Compares plans using a long context model.

    Args:
        api_key (str): OpenAI API key.
        anthropic_api_key (str): Anthropic API key.
        input_text (str): The comparison question to ask.
        focus_plan_path: Path to the focus plan or uploaded file.
        selected_summaries (list): List of selected summary documents.
        display_placeholder: Streamlit placeholder for displaying results.
    """
    # Set the API keys
    os.environ["OPENAI_API_KEY"] = api_key
    os.environ["ANTHROPIC_API_KEY"] = anthropic_api_key

    # Load focus documents
    if isinstance(focus_plan_path, st.runtime.uploaded_file_manager.UploadedFile):
        # If focus_plan_path is an uploaded file
        focus_docs = load_documents_from_pdf(focus_plan_path)
    elif isinstance(focus_plan_path, str):
        # If focus_plan_path is a file path
        focus_loader = PyPDFLoader(focus_plan_path)
        focus_docs = focus_loader.load()
    else:
        raise ValueError("Invalid focus plan input type. Must be an uploaded file or a file path.")

    # Concatenate selected summary documents
    summaries_directory = "CAPS_Summaries"
    summaries_content = ""
    for filename in selected_summaries:
        # Fix the filename by replacing ' Summary' with '_Summary'
        summary_filename = f"{filename.replace(' Summary', '_Summary')}.md"
        with open(os.path.join(summaries_directory, summary_filename), 'r') as file:
            summaries_content += file.read() + "\n\n"

    # Prepare the context
    focus_context = "\n\n".join([doc.page_content for doc in focus_docs])

    # Create the client and message
    client = anthropic.Anthropic(api_key=anthropic_api_key)
    response = client.completions.create(
        model="claude-2",
        max_tokens_to_sample=1024,
        prompt=f"{input_text}\n\nFocus Document:\n{focus_context}\n\nSummaries:\n{summaries_content}"
    )

    # Display the answer
    answer = response.completion
    display_placeholder.markdown(f"**Answer:**\n{answer}", unsafe_allow_html=True)

# Streamlit app layout with tabs
st.title("Climate Policy Analysis Tool")

# API Key Input
api_key = st.text_input("Enter your OpenAI API key:", type="password", key="openai_key")

# Create tabs
tab1, tab2, tab3, tab4, tab5 = st.tabs([
    "Summary Generation",
    "Multi-Plan QA (Shared Vectorstore)",
    "Multi-Plan QA (Multi-Vectorstore)",
    "Plan Comparison Tool",
    "Plan Comparison with Long Context Model"
])

# First tab: Summary Generation
with tab1:
    uploaded_file = st.file_uploader(
        "Upload a Climate Action Plan in PDF format",
        type="pdf",
        key="upload_file"
    )

    prompt_file_path = "Prompts/summary_tool_system_prompt.md"
    questions_file_path = "Prompts/summary_tool_questions.md"

    if st.button("Generate", key="generate_button"):
        if not api_key:
            st.warning("Please provide your OpenAI API key.")
        elif not uploaded_file:
            st.warning("Please upload a PDF file.")
        else:
            display_placeholder = st.empty()
            with st.spinner("Processing..."):
                try:
                    results = process_pdf(
                        api_key,
                        uploaded_file,
                        questions_file_path,
                        prompt_file_path,
                        display_placeholder
                    )
                    markdown_text = "\n".join(results)

                    # Use the uploaded file's name for the download file
                    base_name = os.path.splitext(uploaded_file.name)[0]
                    download_file_name = f"{base_name}_Summary.md"

                    st.download_button(
                        label="Download Results as Markdown",
                        data=markdown_text,
                        file_name=download_file_name,
                        mime="text/markdown",
                        key="download_button"
                    )
                except Exception as e:
                    st.error(f"An error occurred: {e}")

# Second tab: Multi-Plan QA (Shared Vectorstore)
with tab2:
    input_text = st.text_input("Ask a question:", key="multi_plan_input")
    if st.button("Ask", key="multi_plan_qa_button"):
        if not api_key:
            st.warning("Please provide your OpenAI API key.")
        elif not input_text:
            st.warning("Please enter a question.")
        else:
            display_placeholder2 = st.empty()
            with st.spinner("Processing..."):
                try:
                    process_multi_plan_qa(
                        api_key,
                        input_text,
                        display_placeholder2
                    )
                except Exception as e:
                    st.error(f"An error occurred: {e}")

# Third tab: Multi-Plan QA (Multi-Vectorstore)
with tab3:
    user_input = st.text_input("Ask a question:", key="multi_vectorstore_input")
    if st.button("Ask", key="multi_vectorstore_qa_button"):
        if not api_key:
            st.warning("Please provide your OpenAI API key.")
        elif not user_input:
            st.warning("Please enter a question.")
        else:
            display_placeholder3 = st.empty()
            with st.spinner("Processing..."):
                try:
                    process_multi_plan_qa_multi_vectorstore(
                        api_key,
                        user_input,
                        display_placeholder3
                    )
                except Exception as e:
                    st.error(f"An error occurred: {e}")

# Fourth tab: Plan Comparison Tool
with tab4:
    st.header("Plan Comparison Tool")

    # List of documents from vector stores
    vectorstore_documents = list_vector_store_documents()

    # Option to upload a new plan or select from existing vector stores
    focus_option = st.radio(
        "Choose a focus plan:",
        ("Select from existing vector stores", "Upload a new plan"),
        key="focus_option"
    )

    if focus_option == "Upload a new plan":
        focus_uploaded_file = st.file_uploader(
            "Upload a Climate Action Plan to compare",
            type="pdf",
            key="focus_upload"
        )
        if focus_uploaded_file is not None:
            # Directly use the uploaded file
            focus_input = focus_uploaded_file
        else:
            focus_input = None
    else:
        # Select a focus plan from existing vector stores
        selected_focus_plan = st.selectbox(
            "Select a focus plan:",
            vectorstore_documents,
            key="select_focus_plan"
        )
        focus_input = os.path.join(
            "Individual_All_Vectorstores",
            f"{selected_focus_plan.replace(' Summary', '_Summary')}_vectorstore"
        )

    # Option to upload comparison documents or select from existing vector stores
    comparison_option = st.radio(
        "Choose comparison documents:",
        ("Select from existing vector stores", "Upload new documents"),
        key="comparison_option"
    )

    if comparison_option == "Upload new documents":
        comparison_files = st.file_uploader(
            "Upload comparison documents",
            type="pdf",
            accept_multiple_files=True,
            key="comparison_files"
        )
        comparison_inputs = comparison_files
    else:
        # Select comparison documents from existing vector stores
        selected_comparison_plans = st.multiselect(
            "Select comparison documents:",
            vectorstore_documents,
            key="select_comparison_plans"
        )
        comparison_inputs = [
            os.path.join(
                "Individual_All_Vectorstores",
                f"{doc.replace(' Summary', '_Summary')}_vectorstore"
            ) for doc in selected_comparison_plans
        ]

    input_text = st.text_input(
        "Ask a comparison question:",
        key="comparison_input"
    )

    if st.button("Compare", key="compare_button"):
        if not api_key:
            st.warning("Please provide your OpenAI API key.")
        elif not input_text:
            st.warning("Please enter a comparison question.")
        elif not focus_input:
            st.warning("Please provide a focus plan.")
        elif not comparison_inputs:
            st.warning("Please provide comparison documents.")
        else:
            display_placeholder4 = st.empty()
            with st.spinner("Processing..."):
                try:
                    # Call the process_one_to_many_query function
                    process_one_to_many_query(
                        api_key,
                        focus_input,
                        comparison_inputs,
                        input_text,
                        display_placeholder4
                    )
                except Exception as e:
                    st.error(f"An error occurred: {e}")

# Fifth tab: Plan Comparison with Long Context Model
with tab5:
    st.header("Plan Comparison with Long Context Model")

    # Anthropics API Key Input
    anthropic_api_key = st.text_input(
        "Enter your Anthropic API key:",
        type="password",
        key="anthropic_key"
    )

    # Option to upload a new plan or select from a list
    focus_option = st.radio(
        "Choose a focus plan:",
        ("Select from existing plans", "Upload a new plan"),
        key="focus_option_long_context"
    )

    if focus_option == "Upload a new plan":
        focus_uploaded_file = st.file_uploader(
            "Upload a Climate Action Plan to compare",
            type="pdf",
            key="focus_upload_long_context"
        )
        if focus_uploaded_file is not None:
            # Directly use the uploaded file
            focus_plan_path = focus_uploaded_file
        else:
            focus_plan_path = None
    else:
        # List of existing plans in CAPS
        plan_list = [f.replace(".pdf", "") for f in os.listdir("CAPS") if f.endswith('.pdf')]
        selected_focus_plan = st.selectbox(
            "Select a focus plan:",
            plan_list,
            key="select_focus_plan_long_context"
        )
        focus_plan_path = os.path.join("CAPS", f"{selected_focus_plan}.pdf")

    # List available summary documents for selection
    summaries_directory = "CAPS_Summaries"
    summary_files = [
        f.replace(".md", "").replace("_", " ")
        for f in os.listdir(summaries_directory) if f.endswith('.md')
    ]
    selected_summaries = st.multiselect(
        "Select summary documents for comparison:",
        summary_files,
        key="selected_summaries"
    )

    input_text = st.text_input(
        "Ask a comparison question:",
        key="comparison_input_long_context"
    )

    if st.button("Compare with Long Context", key="compare_button_long_context"):
        if not api_key:
            st.warning("Please provide your OpenAI API key.")
        elif not anthropic_api_key:
            st.warning("Please provide your Anthropic API key.")
        elif not input_text:
            st.warning("Please enter a comparison question.")
        elif not focus_plan_path:
            st.warning("Please provide a focus plan.")
        else:
            display_placeholder = st.empty()
            with st.spinner("Processing..."):
                try:
                    compare_with_long_context(
                        api_key,
                        anthropic_api_key,
                        input_text,
                        focus_plan_path,
                        selected_summaries,
                        display_placeholder
                    )
                except Exception as e:
                    st.error(f"An error occurred: {e}")