import os
import gradio as gr
import logging
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chat_models import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain, LLMChain
from langchain.memory import ConversationBufferMemory
from langchain.prompts import PromptTemplate
import concurrent.futures
import timeout_decorator

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class QueryRefiner:
    def __init__(self):
        self.refinement_llm = ChatOpenAI(temperature=0.2, model_name='gpt-4o')
        self.refinement_prompt = PromptTemplate(
            input_variables=['query', 'context'],
            template="""Refine and enhance the following query for maximum clarity and precision:

Original Query: {query}
Document Context: {context}

Enhanced Query Requirements:
- Restructure for optimal comprehension
- rewrite the original query for best comprehension for getting all the details in great attention to details
- Use specific structure and the response be according to context such as paragraphs or bullet points, headlines and subtexts

Refined Query:"""
        )
        self.refinement_chain = LLMChain(
            llm=self.refinement_llm, 
            prompt=self.refinement_prompt
        )


    def refine_query(self, original_query, context_hints=''):
        try:
            refined_query = self.refinement_chain.run({
                'query': original_query, 
                'context': context_hints or "General academic document"
            })
            return refined_query.strip()
        except Exception as e:
            logger.error(f"Query refinement error: {e}")
            return original_query

class AdvancedPdfChatbot:
    def __init__(self, openai_api_key):
        os.environ["OPENAI_API_KEY"] = openai_api_key
        self.embeddings = OpenAIEmbeddings()
        self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
        self.llm = ChatOpenAI(temperature=0, model_name='gpt-4o')
        
        self.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
        self.query_refiner = QueryRefiner()
        self.db = None
        self.chain = None
        
        self.qa_prompt = PromptTemplate(
            template="""You are an expert academic assistant analyzing a document. Provide well structured response in Markdown

Context: {context}
Question: {question}

Provide a comprehensive, precise answer based strictly on the document's content.
Use this format: 
- Short summary of the response with a relevant title
- Headlines and bullet points with descriptions with breakdowns of each topics and details
- Conclusion

NOTE: Give precise and short answers when asked about specific terms and summary of specific topic

If the answer isn't directly available, explain why. """,
            input_variables=["context", "question"]
        )


    def load_and_process_pdf(self, pdf_path):
        loader = PyPDFLoader(pdf_path)
        documents = loader.load()
        texts = self.text_splitter.split_documents(documents)
        self.db = FAISS.from_documents(texts, self.embeddings)
        
        self.chain = ConversationalRetrievalChain.from_llm(
            llm=self.llm,
            retriever=self.db.as_retriever(search_kwargs={"k": 3}),
            memory=self.memory,
            combine_docs_chain_kwargs={"prompt": self.qa_prompt}
        )


    def chat(self, query):
        if not self.chain:
            return "Please upload a PDF first."
        
        context_hints = self._extract_document_type()
        refined_query = self.query_refiner.refine_query(query, context_hints)
        
        result = self.chain({"question": refined_query})
        return result['answer']

    def _extract_document_type(self):
        """Extract basic document characteristics"""
        if not self.db:
            return ""
        try:
            first_doc = list(self.db.docstore._dict.values())[0].page_content[:500]
            return f"Document appears to cover: {first_doc[:100]}..."
        except:
            return "Academic/technical document"

    def clear_memory(self):
        self.memory.clear()

# Gradio Interface
pdf_chatbot = AdvancedPdfChatbot(os.environ.get("OPENAI_API_KEY"))

def upload_pdf(pdf_file):
    if pdf_file is None:
        return "Please upload a PDF file."
    file_path = pdf_file.name if hasattr(pdf_file, 'name') else pdf_file
    try:
        pdf_chatbot.load_and_process_pdf(file_path)
        return f"PDF processed successfully: {file_path}"
    except Exception as e:
        logger.error(f"PDF processing error: {e}")
        return f"Error processing PDF: {str(e)}"

def respond(message, history):
    if not message:
        return "", history
    try:
        bot_message = pdf_chatbot.chat(message)
        history.append((message, bot_message))
        return "", history
    except Exception as e:
        logger.error(f"Chat response error: {e}")
        return f"Error: {str(e)}", history

def clear_chatbot():
    pdf_chatbot.clear_memory()
    return []

# Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("# Advanced PDF Chatbot")
    
    with gr.Row():
        pdf_upload = gr.File(label="Upload PDF", file_types=[".pdf"])
        upload_button = gr.Button("Process PDF")

    upload_status = gr.Textbox(label="Upload Status")
    upload_button.click(upload_pdf, inputs=[pdf_upload], outputs=[upload_status])
    
    chatbot_interface = gr.Chatbot()
    msg = gr.Textbox(placeholder="Enter your query...")
    msg.submit(respond, inputs=[msg, chatbot_interface], outputs=[msg, chatbot_interface])
    
    clear_button = gr.Button("Clear Conversation")
    clear_button.click(clear_chatbot, outputs=[chatbot_interface])

if __name__ == "__main__":
    demo.launch()