# ------------------------------
# Enhanced NeuroResearch AI System with Domain Adaptability,
# Refinement Counter, Dynamic Difficulty Gradient, and Meta-Refinement Inspired by LADDER
# ------------------------------
import logging
import os
import re
import hashlib
import json
import time
import sys
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import List, Dict, Any, Optional, Sequence

import chromadb
import requests
import streamlit as st

# LangChain and LangGraph imports
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_core.messages import HumanMessage, AIMessage, ToolMessage
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langgraph.graph import END, StateGraph
from langgraph.prebuilt import ToolNode
from langgraph.graph.message import add_messages
from typing_extensions import TypedDict, Annotated
from langchain.tools.retriever import create_retriever_tool

# Increase Python's recursion limit at the very start (if needed)
sys.setrecursionlimit(1000)

# ------------------------------
# Logging Configuration
# ------------------------------
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(message)s"
)
logger = logging.getLogger(__name__)

# ------------------------------
# State Schema Definition
# ------------------------------
class AgentState(TypedDict):
    messages: Annotated[Sequence[AIMessage | HumanMessage | ToolMessage], add_messages]
    context: Dict[str, Any]
    metadata: Dict[str, Any]

# ------------------------------
# Configuration
# ------------------------------
class ResearchConfig:
    DEEPSEEK_API_KEY = os.environ.get("DEEPSEEK_API_KEY")
    CHROMA_PATH = "chroma_db"
    CHUNK_SIZE = 512
    CHUNK_OVERLAP = 64
    MAX_CONCURRENT_REQUESTS = 5
    EMBEDDING_DIMENSIONS = 1536
    DOCUMENT_MAP = {
        "Research Report: Results of a New AI Model Improving Image Recognition Accuracy to 98%":
            "CV-Transformer Hybrid Architecture",
        "Academic Paper Summary: Why Transformers Became the Mainstream Architecture in Natural Language Processing":
            "Transformer Architecture Analysis",
        "Latest Trends in Machine Learning Methods Using Quantum Computing":
            "Quantum ML Frontiers"
    }
    # Base analysis template remains unchanged.
    ANALYSIS_TEMPLATE = (
        "Analyze these technical documents with scientific rigor:\n{context}\n\n"
        "Respond with:\n"
        "1. Key Technical Contributions (bullet points)\n"
        "2. Novel Methodologies\n"
        "3. Empirical Results (with metrics)\n"
        "4. Potential Applications\n"
        "5. Limitations & Future Directions\n\n"
        "Format: Markdown with LaTeX mathematical notation where applicable"
    )
    # Domain-specific prompt additions for customizable analysis
    DOMAIN_PROMPTS = {
        "Biomedical Research": "Consider clinical trial design, patient outcomes, and recent biomedical breakthroughs.",
        "Legal Research": "Emphasize legal precedents, case law, and nuanced statutory interpretations.",
        "Environmental and Energy Studies": "Highlight renewable energy technologies, efficiency metrics, and policy implications.",
        "Competitive Programming and Theoretical Computer Science": "Focus on algorithmic complexity, innovative proofs, and computational techniques.",
        "Social Sciences": "Concentrate on economic trends, sociological data, and correlations impacting public policy."
    }

if not ResearchConfig.DEEPSEEK_API_KEY:
    st.error(
        """**Research Portal Configuration Required**  
1. Obtain DeepSeek API key: [platform.deepseek.com](https://platform.deepseek.com/)  
2. Configure secret: `DEEPSEEK_API_KEY` in Space settings  
3. Rebuild deployment"""
    )
    st.stop()

# ------------------------------
# Quantum Document Processing
# ------------------------------
class QuantumDocumentManager:
    """
    Manages creation of Chroma collections from raw document texts.
    """
    def __init__(self) -> None:
        try:
            self.client = chromadb.PersistentClient(path=ResearchConfig.CHROMA_PATH)
            logger.info("Initialized PersistentClient for Chroma.")
        except Exception as e:
            logger.exception("Error initializing PersistentClient; falling back to in-memory client.")
            self.client = chromadb.Client()  # Fallback to in-memory client
        self.embeddings = OpenAIEmbeddings(
            model="text-embedding-3-large",
            dimensions=ResearchConfig.EMBEDDING_DIMENSIONS
        )

    def create_collection(self, documents: List[str], collection_name: str) -> Chroma:
        """
        Splits documents into chunks and stores them as a Chroma collection.
        """
        splitter = RecursiveCharacterTextSplitter(
            chunk_size=ResearchConfig.CHUNK_SIZE,
            chunk_overlap=ResearchConfig.CHUNK_OVERLAP,
            separators=["\n\n", "\n", "|||"]
        )
        try:
            docs = splitter.create_documents(documents)
            logger.info(f"Created {len(docs)} document chunks for collection '{collection_name}'.")
        except Exception as e:
            logger.exception("Error during document splitting.")
            raise e

        return Chroma.from_documents(
            documents=docs,
            embedding=self.embeddings,
            client=self.client,
            collection_name=collection_name,
            ids=[self._document_id(doc.page_content) for doc in docs]
        )

    def _document_id(self, content: str) -> str:
        """
        Generates a unique document ID using SHA256 and the current timestamp.
        """
        return f"{hashlib.sha256(content.encode()).hexdigest()[:16]}-{int(time.time())}"

# Initialize document collections
qdm = QuantumDocumentManager()
research_docs = qdm.create_collection([
    "Research Report: Results of a New AI Model Improving Image Recognition Accuracy to 98%",
    "Academic Paper Summary: Why Transformers Became the Mainstream Architecture in Natural Language Processing",
    "Latest Trends in Machine Learning Methods Using Quantum Computing"
], "research")

development_docs = qdm.create_collection([
    "Project A: UI Design Completed, API Integration in Progress",
    "Project B: Testing New Feature X, Bug Fixes Needed",
    "Product Y: In the Performance Optimization Stage Before Release"
], "development")

# ------------------------------
# Advanced Retrieval System
# ------------------------------
class ResearchRetriever:
    """
    Provides retrieval methods for different domains.
    """
    def __init__(self) -> None:
        try:
            self.research_retriever = research_docs.as_retriever(
                search_type="mmr",
                search_kwargs={'k': 4, 'fetch_k': 20, 'lambda_mult': 0.85}
            )
            self.development_retriever = development_docs.as_retriever(
                search_type="similarity",
                search_kwargs={'k': 3}
            )
            logger.info("Initialized retrievers for research and development domains.")
        except Exception as e:
            logger.exception("Error initializing retrievers.")
            raise e

    def retrieve(self, query: str, domain: str) -> List[Any]:
        """
        Retrieves documents based on the query and domain.
        For now, domain differentiation is minimal; however, you can extend this method to use domain-specific collections.
        """
        try:
            # For demonstration, we use the "research" collection for all domains.
            return self.research_retriever.invoke(query)
        except Exception as e:
            logger.exception(f"Retrieval error for domain '{domain}'.")
            return []

retriever = ResearchRetriever()

# ------------------------------
# Cognitive Processing Unit
# ------------------------------
class CognitiveProcessor:
    """
    Executes API requests to the DeepSeek backend using triple redundancy
    and consolidates results via a consensus mechanism.
    """
    def __init__(self) -> None:
        self.executor = ThreadPoolExecutor(max_workers=ResearchConfig.MAX_CONCURRENT_REQUESTS)
        self.session_id = hashlib.sha256(datetime.now().isoformat().encode()).hexdigest()[:12]

    def process_query(self, prompt: str) -> Dict:
        """
        Processes a query by sending multiple API requests in parallel.
        """
        futures = []
        for _ in range(3):  # Triple redundancy for reliability
            futures.append(self.executor.submit(self._execute_api_request, prompt))

        results = []
        for future in as_completed(futures):
            try:
                results.append(future.result())
            except Exception as e:
                logger.exception("Error during API request execution.")
                st.error(f"Processing Error: {str(e)}")

        return self._consensus_check(results)

    def _execute_api_request(self, prompt: str) -> Dict:
        """
        Executes a single API request to the DeepSeek endpoint.
        """
        headers = {
            "Authorization": f"Bearer {ResearchConfig.DEEPSEEK_API_KEY}",
            "Content-Type": "application/json",
            "X-Research-Session": self.session_id
        }
        payload = {
            "model": "deepseek-chat",
            "messages": [{
                "role": "user",
                "content": f"Respond as Senior AI Researcher:\n{prompt}"
            }],
            "temperature": 0.7,
            "max_tokens": 1500,
            "top_p": 0.9
        }
        try:
            response = requests.post(
                "https://api.deepseek.com/v1/chat/completions",
                headers=headers,
                json=payload,
                timeout=45
            )
            response.raise_for_status()
            logger.info("DeepSeek API request successful.")
            return response.json()
        except requests.exceptions.RequestException as e:
            logger.exception("DeepSeek API request failed.")
            return {"error": str(e)}

    def _consensus_check(self, results: List[Dict]) -> Dict:
        """
        Consolidates multiple API responses, selecting the one with the most content.
        """
        valid_results = [r for r in results if "error" not in r]
        if not valid_results:
            logger.error("All API requests failed.")
            return {"error": "All API requests failed"}
        return max(valid_results, key=lambda x: len(x.get('choices', [{}])[0].get('message', {}).get('content', '')))

# ------------------------------
# Research Workflow Engine
# ------------------------------
class ResearchWorkflow:
    """
    Defines the multi-step research workflow using a state graph.
    """
    def __init__(self) -> None:
        self.processor = CognitiveProcessor()
        self.workflow = StateGraph(AgentState)
        self._build_workflow()
        self.app = self.workflow.compile()

    def _build_workflow(self) -> None:
        # Define nodes
        self.workflow.add_node("ingest", self.ingest_query)
        self.workflow.add_node("retrieve", self.retrieve_documents)
        self.workflow.add_node("analyze", self.analyze_content)
        self.workflow.add_node("validate", self.validate_output)
        self.workflow.add_node("refine", self.refine_results)
        # Set entry point and edges
        self.workflow.set_entry_point("ingest")
        self.workflow.add_edge("ingest", "retrieve")
        self.workflow.add_edge("retrieve", "analyze")
        self.workflow.add_conditional_edges(
            "analyze",
            self._quality_check,
            {"valid": "validate", "invalid": "refine"}
        )
        self.workflow.add_edge("validate", END)
        self.workflow.add_edge("refine", "retrieve")

    def ingest_query(self, state: AgentState) -> Dict:
        """
        Ingests the research query and initializes the refinement counter, refinement history, and domain.
        """
        try:
            query = state["messages"][-1].content
            # Retrieve domain from the query context if available; otherwise, default to "Biomedical Research"
            domain = state.get("domain", "Biomedical Research")
            # Initialize context with raw query, selected domain, refinement counter, and empty refinement history
            new_context = {"raw_query": query, "domain": domain, "refine_count": 0, "refinement_history": []}
            logger.info(f"Query ingested. Domain: {domain}")
            return {
                "messages": [AIMessage(content="Query ingested successfully")],
                "context": new_context,
                "metadata": {"timestamp": datetime.now().isoformat()}
            }
        except Exception as e:
            logger.exception("Error during query ingestion.")
            return self._error_state(f"Ingestion Error: {str(e)}")

    def retrieve_documents(self, state: AgentState) -> Dict:
        """
        Retrieves research documents based on the query.
        """
        try:
            query = state["context"]["raw_query"]
            # For demonstration, we use the "research" collection for all domains.
            docs = retriever.retrieve(query, state["context"].get("domain", "Biomedical Research"))
            logger.info(f"Retrieved {len(docs)} documents for query.")
            return {
                "messages": [AIMessage(content=f"Retrieved {len(docs)} documents")],
                "context": {
                    "documents": docs,
                    "retrieval_time": time.time(),
                    "refine_count": state["context"].get("refine_count", 0),
                    "refinement_history": state["context"].get("refinement_history", []),
                    "domain": state["context"].get("domain", "Biomedical Research")
                }
            }
        except Exception as e:
            logger.exception("Error during document retrieval.")
            return self._error_state(f"Retrieval Error: {str(e)}")

    def analyze_content(self, state: AgentState) -> Dict:
        """
        Analyzes the retrieved documents using the DeepSeek API.
        Augments the prompt with domain-specific instructions.
        """
        try:
            docs = state["context"].get("documents", [])
            docs_text = "\n\n".join([d.page_content for d in docs])
            domain = state["context"].get("domain", "Biomedical Research")
            domain_prompt = ResearchConfig.DOMAIN_PROMPTS.get(domain, "")
            # Combine domain-specific instructions with the base analysis template
            full_prompt = f"{domain_prompt}\n\n" + ResearchConfig.ANALYSIS_TEMPLATE.format(context=docs_text)
            response = self.processor.process_query(full_prompt)
            if "error" in response:
                logger.error("DeepSeek response error during analysis.")
                return self._error_state(response["error"])
            logger.info("Content analysis completed.")
            return {
                "messages": [AIMessage(content=response.get('choices', [{}])[0].get('message', {}).get('content', ''))],
                "context": {
                    "analysis": response,
                    "refine_count": state["context"].get("refine_count", 0),
                    "refinement_history": state["context"].get("refinement_history", []),
                    "domain": domain
                }
            }
        except Exception as e:
            logger.exception("Error during content analysis.")
            return self._error_state(f"Analysis Error: {str(e)}")

    def validate_output(self, state: AgentState) -> Dict:
        """
        Validates the technical analysis report.
        """
        try:
            analysis = state["messages"][-1].content
            validation_prompt = (
                f"Validate research analysis:\n{analysis}\n\n"
                "Check for:\n1. Technical accuracy\n2. Citation support\n3. Logical consistency\n4. Methodological soundness\n\n"
                "Respond with 'VALID' or 'INVALID'"
            )
            response = self.processor.process_query(validation_prompt)
            logger.info("Output validation completed.")
            return {
                "messages": [AIMessage(content=analysis + f"\n\nValidation: {response.get('choices', [{}])[0].get('message', {}).get('content', '')}")]
            }
        except Exception as e:
            logger.exception("Error during output validation.")
            return self._error_state(f"Validation Error: {str(e)}")

    def refine_results(self, state: AgentState) -> Dict:
        """
        Refines the analysis report if validation fails.
        Implements an innovative meta-refinement mechanism inspired by LADDER.
        Tracks refinement history, uses a dynamic difficulty gradient, and if the refinement count exceeds a threshold,
        summarizes the history into a final output.
        """
        try:
            current_count = state["context"].get("refine_count", 0)
            state["context"]["refine_count"] = current_count + 1
            # Append current analysis to refinement history
            refinement_history = state["context"].setdefault("refinement_history", [])
            current_analysis = state["messages"][-1].content
            refinement_history.append(current_analysis)
            # Compute a "difficulty level" (from 3 to 0) based on refinement count
            difficulty_level = max(0, 3 - state["context"]["refine_count"])
            logger.info(f"Refinement iteration: {state['context']['refine_count']}, Difficulty level: {difficulty_level}")
            
            # If refinement count exceeds threshold, perform meta-refinement by summarizing the history
            if state["context"]["refine_count"] >= 3:
                meta_prompt = (
                    "You are given the following series of refinement outputs:\n" +
                    "\n---\n".join(refinement_history) +
                    "\n\nSummarize the above into a final, concise, and high-quality technical analysis report. Do not introduce new ideas; just synthesize the improvements."
                )
                meta_response = self.processor.process_query(meta_prompt)
                logger.info("Meta-refinement completed.")
                return {
                    "messages": [AIMessage(content=meta_response.get('choices', [{}])[0].get('message', {}).get('content', ''))],
                    "context": state["context"]
                }
            else:
                # Standard refinement with a dynamic difficulty prompt
                refinement_prompt = (
                    f"Refine this analysis (current difficulty level: {difficulty_level}):\n{current_analysis}\n\n"
                    "Improve the following aspects:\n1. Technical precision\n2. Empirical grounding\n3. Theoretical coherence\n\n"
                    "Use a structured difficulty gradient approach (similar to LADDER) to produce a simpler yet more accurate variant."
                )
                response = self.processor.process_query(refinement_prompt)
                logger.info("Refinement completed.")
                return {
                    "messages": [AIMessage(content=response.get('choices', [{}])[0].get('message', {}).get('content', ''))],
                    "context": state["context"]
                }
        except Exception as e:
            logger.exception("Error during refinement.")
            return self._error_state(f"Refinement Error: {str(e)}")

    def _quality_check(self, state: AgentState) -> str:
        """
        Checks whether the analysis report is valid.
        Forces a valid state if the refinement count exceeds a threshold.
        """
        refine_count = state["context"].get("refine_count", 0)
        if refine_count >= 3:
            logger.warning("Refinement limit reached. Forcing valid outcome to prevent infinite recursion.")
            return "valid"
        content = state["messages"][-1].content
        quality = "valid" if "VALID" in content else "invalid"
        logger.info(f"Quality check returned: {quality}")
        return quality

    def _error_state(self, message: str) -> Dict:
        """
        Returns a standardized error state.
        """
        logger.error(message)
        return {
            "messages": [AIMessage(content=f"❌ {message}")],
            "context": {"error": True},
            "metadata": {"status": "error"}
        }

# ------------------------------
# Research Interface (Streamlit UI)
# ------------------------------
class ResearchInterface:
    """
    Provides the Streamlit-based interface for executing the research workflow.
    """
    def __init__(self) -> None:
        self.workflow = ResearchWorkflow()
        self._initialize_interface()

    def _initialize_interface(self) -> None:
        st.set_page_config(
            page_title="NeuroResearch AI",
            layout="wide",
            initial_sidebar_state="expanded"
        )
        self._inject_styles()
        self._build_sidebar()
        self._build_main_interface()

    def _inject_styles(self) -> None:
        st.markdown(
            """
            <style>
            :root {
                --primary: #2ecc71;
                --secondary: #3498db;
                --background: #0a0a0a;
                --text: #ecf0f1;
            }
            .stApp {
                background: var(--background);
                color: var(--text);
                font-family: 'Roboto', sans-serif;
            }
            .stTextArea textarea {
                background: #1a1a1a !important;
                color: var(--text) !important;
                border: 2px solid var(--secondary);
                border-radius: 8px;
                padding: 1rem;
            }
            .stButton>button {
                background: linear-gradient(135deg, var(--primary), var(--secondary));
                border: none;
                border-radius: 8px;
                padding: 1rem 2rem;
                transition: all 0.3s;
            }
            .stButton>button:hover {
                transform: translateY(-2px);
                box-shadow: 0 4px 12px rgba(46, 204, 113, 0.3);
            }
            .stExpander {
                background: #1a1a1a;
                border: 1px solid #2a2a2a;
                border-radius: 8px;
                margin: 1rem 0;
            }
            </style>
            """,
            unsafe_allow_html=True
        )

    def _build_sidebar(self) -> None:
        with st.sidebar:
            st.title("🔍 Research Database")
            st.subheader("Technical Papers")
            for title, short in ResearchConfig.DOCUMENT_MAP.items():
                with st.expander(short):
                    st.markdown(f"```\n{title}\n```")
            st.subheader("Analysis Metrics")
            st.metric("Vector Collections", 2)
            st.metric("Embedding Dimensions", ResearchConfig.EMBEDDING_DIMENSIONS)

    def _build_main_interface(self) -> None:
        st.title("🧠 NeuroResearch AI")
        query = st.text_area(
            "Research Query:",
            height=200,
            placeholder="Enter technical research question..."
        )
        # Domain selection for research use cases
        domain = st.selectbox(
            "Select Research Domain:",
            options=[
                "Biomedical Research",
                "Legal Research",
                "Environmental and Energy Studies",
                "Competitive Programming and Theoretical Computer Science",
                "Social Sciences"
            ],
            index=0
        )
        if st.button("Execute Analysis", type="primary"):
            self._execute_analysis(query, domain)

    def _execute_analysis(self, query: str, domain: str) -> None:
        try:
            with st.spinner("Initializing Quantum Analysis..."):
                # Pass domain into the context by adding it to the initial state
                results = self.workflow.app.stream({
                    "messages": [HumanMessage(content=query)],
                    "context": {"domain": domain},
                    "metadata": {}
                }, {"recursion_limit": 100})
                for event in results:
                    self._render_event(event)
                st.success("✅ Analysis Completed Successfully")
        except Exception as e:
            logger.exception("Workflow execution failed.")
            st.error(
                f"""**Analysis Failed**  
{str(e)}  
Potential issues:
- Complex query structure
- Document correlation failure
- Temporal processing constraints"""
            )

    def _render_event(self, event: Dict) -> None:
        if 'ingest' in event:
            with st.container():
                st.success("✅ Query Ingested")
        elif 'retrieve' in event:
            with st.container():
                docs = event['retrieve']['context'].get('documents', [])
                st.info(f"📚 Retrieved {len(docs)} documents")
                with st.expander("View Retrieved Documents", expanded=False):
                    for idx, doc in enumerate(docs, start=1):
                        st.markdown(f"**Document {idx}**")
                        st.code(doc.page_content, language='text')
        elif 'analyze' in event:
            with st.container():
                content = event['analyze']['messages'][0].content
                with st.expander("Technical Analysis Report", expanded=True):
                    st.markdown(content)
        elif 'validate' in event:
            with st.container():
                content = event['validate']['messages'][0].content
                if "VALID" in content:
                    st.success("✅ Validation Passed")
                    with st.expander("View Validated Analysis", expanded=True):
                        st.markdown(content.split("Validation:")[0])
                else:
                    st.warning("⚠️ Validation Issues Detected")
                    with st.expander("View Validation Details", expanded=True):
                        st.markdown(content)

if __name__ == "__main__":
    ResearchInterface()