import os
import logging
import tempfile
from typing import List
from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
from sentence_transformers import SentenceTransformer
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFLoader
from langchain.prompts import PromptTemplate
from langchain.schema import Document
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains import MapReduceDocumentsChain, ReduceDocumentsChain

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Constants
DB_FAISS_PATH = 'vectorstore/db_faiss'
EMBEDDING_MODEL = 'sentence-transformers/all-MiniLM-L6-v2'
DEFAULT_MODEL = "facebook/bart-large-cnn"

# Default model parameters
DEFAULT_PARAMS = {
    "temperature": 0.7,
    "max_length": 1024,
    "num_beams": 4,
    "top_p": 0.95,
    "repetition_penalty": 1.2,
}

def get_default_value(param_name: str, default: float) -> float:
    """Safely get a float value from DEFAULT_PARAMS."""
    value = DEFAULT_PARAMS.get(param_name, default)
    return float(value) if not isinstance(value, list) else float(value[0]) if value else default

def load_embeddings():
    """Load and cache the embedding model."""
    try:
        return SentenceTransformer(EMBEDDING_MODEL)
    except Exception as e:
        logger.error(f"Failed to load embeddings: {e}")
        raise

def load_llm(model_name, custom_params=None):
    """Load the language model with specific parameters."""
    try:
        params = custom_params or DEFAULT_PARAMS
        model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        return pipeline("summarization", model=model, tokenizer=tokenizer, **params)
    except Exception as e:
        logger.error(f"Failed to load LLM: {e}")
        raise

def process_pdf(file) -> List[Document]:
    """Process the PDF and convert it into a list of Document objects."""
    try:
        loader = PyPDFLoader(file_path=file)
        documents = loader.load()  # Load each page as a separate Document
        return documents
    except Exception as e:
        logger.error(f"Error processing PDF: {e}")
        raise

def create_vector_store(documents: List[Document], embeddings):
    """Create and save the vector store."""
    try:
        db = FAISS.from_documents(documents, embeddings)
        db.save_local(DB_FAISS_PATH)
        return db
    except Exception as e:
        logger.error(f"Error creating vector store: {e}")
        raise

def summarize_report(documents: List[Document], llm) -> str:
    """Summarize the report using a map-reduce approach."""
    try:
        # Limit the number of chunks to process
        max_chunks = 50  # Adjust this value based on your needs
        if len(documents) > max_chunks:
            logger.warning(f"Document is very large. Summarizing first {max_chunks} chunks only.")
            documents = documents[:max_chunks]

        # Map prompt
        map_template = """Summarize the following text:\n\n{text}\n\nSummary:"""
        map_prompt = PromptTemplate.from_template(map_template)

        # Reduce prompt
        reduce_template = """Combine these summaries into a final summary:\n\nSummary:\n{doc_summaries}\n\nFinal Summary:"""
        reduce_prompt = PromptTemplate.from_template(reduce_template)

        # Create the chains
        map_chain = MapReduceDocumentsChain(
            llm_chain=lambda text: llm(text=map_prompt.format(text=text)),
            reduce_documents_chain=ReduceDocumentsChain(
                combine_documents_chain=lambda summaries: llm(text=reduce_prompt.format(doc_summaries=summaries))
            ),
        )

        summary = map_chain.run(documents)

        return summary

    except Exception as e:
        logger.error(f"Error summarizing report: {e}")
        raise

def main(pdf_path: str, model_name: str = DEFAULT_MODEL):
    """Main function to summarize the PDF report."""
    try:
        # Load models and embeddings
        embeddings = load_embeddings()
        llm = load_llm(model_name)

        # Process the PDF
        documents = process_pdf(pdf_path)

        # Create vector store
        create_vector_store(documents, embeddings)

        # Generate summary
        summary = summarize_report(documents, llm)

        print("Structured Summary:\n", summary)
    except Exception as e:
        logger.error(f"Failed to summarize the report: {e}")

if __name__ == "__main__":
    pdf_path = "path/to/your/report.pdf"  # Replace with the path to your PDF
    main(pdf_path)