KIG

Sleeping

App Files Files Community

heymenn commited on Apr 16

Commit

ec6d5f9

verified ·

1 Parent(s): bcecc8c

Upload 12 files

Browse files

Files changed (12) hide show

.env +34 -0
app.py +175 -0
kig_core/config.py +89 -0
kig_core/graph_client.py +91 -0
kig_core/graph_operations.py +210 -0
kig_core/llm_interface.py +59 -0
kig_core/planner.py +226 -0
kig_core/processing.py +127 -0
kig_core/prompts.py +140 -0
kig_core/schemas.py +55 -0
kig_core/utils.py +41 -0
requirements.txt +26 -0

.env ADDED Viewed

	@@ -0,0 +1,34 @@

+# Neo4j Credentials
+NEO4J_URI="neo4j+s://4985272f.databases.neo4j.io"
+NEO4J_USERNAME="neo4j"
+NEO4J_PASSWORD="YOUR_NEO4J_PASSWORD" # Replace with your actual password
+# API Keys
+OPENAI_API_KEY="YOUR_OPENAI_API_KEY" # Replace if using OpenAI models
+GEMINI_API_KEY="YOUR_GEMINI_API_KEY" # Replace with your actual key
+LANGSMITH_API_KEY="YOUR_LANGSMITH_API_KEY" # Replace with your actual key (optional but recommended for tracing)
+LANGCHAIN_PROJECT="KIG_Refactored" # Optional: For LangSmith tracing
+# LLM Configuration
+MAIN_LLM_MODEL="gemini-1.5-flash" # Or another preferred model
+EVAL_LLM_MODEL="gemini-1.5-flash"
+SUMMARIZE_LLM_MODEL="gemini-1.5-flash"
+# Planner Configuration
+PLAN_METHOD="generation" # or "modification"
+USE_DETAILED_QUERY="false" # or "true"
+# Graph Operations Configuration
+CYPHER_GEN_METHOD="guided" # or "auto"
+VALIDATE_CYPHER="false" # or "true"
+EVAL_METHOD="binary" # or "score"
+EVAL_THRESHOLD="0.7"
+MAX_DOCS="10"
+# Processing Configuration
+# Define processing steps as a JSON string or handle differently if complex needed
+PROCESS_STEPS='["summarize"]' # Example: Just summarize
+COMPRESSION_METHOD="llm_lingua" # if used
+COMPRESS_RATE="0.5" # if used
+# Add other parameters as needed

app.py ADDED Viewed

	@@ -0,0 +1,175 @@

+import streamlit as st
+import pandas as pd
+import logging
+import time
+import json # For displaying dicts/lists nicely
+# Import core components from the refactored library
+from kig_core.config import settings # Loads config on import
+from kig_core.schemas import PlannerState, KeyIssue, GraphConfig
+from kig_core.planner import build_graph
+from kig_core.utils import key_issues_to_dataframe, dataframe_to_excel_bytes
+from kig_core.graph_client import neo4j_client # Import the initialized client instance
+# Configure logging for Streamlit app
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+# --- Streamlit Page Configuration ---
+st.set_page_config(page_title="Key Issue Generator (KIG)", layout="wide")
+st.title(" KIG - Key Issue Generator ")
+st.write("Generate structured Key Issues from knowledge graph context.")
+# --- Sidebar ---
+with st.sidebar:
+    st.header(" Status & Info ")
+    # Check Neo4j Connectivity on startup
+    neo4j_status = st.empty()
+    try:
+        # Accessing the client instance will trigger verification if not already done
+        neo4j_client._get_driver() # Ensure connection is attempted
+        neo4j_status.success("Neo4j Connection Verified")
+        can_run = True
+    except ConnectionError as e:
+        neo4j_status.error(f"Neo4j Error: {e}")
+        can_run = False
+    except Exception as e:
+        neo4j_status.error(f"Neo4j Init Error: {e}")
+        can_run = False
+    st.header("Configuration")
+    # Display some key settings (be careful with secrets)
+    st.text(f"Main LLM: {settings.main_llm_model}")
+    st.text(f"Neo4j URI: {settings.neo4j_uri}")
+    st.text(f"Plan Method: {settings.plan_method}")
+    st.text(f"Max Docs: {settings.max_docs}")
+    st.header("About")
+    st.info("""
+    This app uses LLMs and a Neo4j graph to:
+    1. Plan an approach based on your query.
+    2. Execute the plan, retrieving & processing graph data.
+    3. Generate structured Key Issues.
+    4. Output results to an Excel file.
+    """)
+# --- Main Application Logic ---
+st.header("Enter Your Query")
+user_query = st.text_area(
+    "Describe the technical requirement or area you want to explore for Key Issues:",
+    "What are the main challenges and potential key issues in deploying edge computing for real-time AI-driven traffic management systems in smart cities?",
+    height=150
+)
+# Session state to store results across reruns if needed
+if 'key_issues_result' not in st.session_state:
+    st.session_state.key_issues_result = None
+if 'log_messages' not in st.session_state:
+    st.session_state.log_messages = []
+# Placeholder for status updates
+status_placeholder = st.empty()
+results_placeholder = st.container()
+log_placeholder = st.expander("Show Execution Log")
+if st.button("Generate Key Issues", type="primary", disabled=not can_run):
+    if not user_query:
+        st.error("Please enter a query.")
+    else:
+        st.session_state.key_issues_result = None # Clear previous results
+        st.session_state.log_messages = ["Starting Key Issue generation..."]
+        with st.spinner("Processing... Building graph and executing workflow..."):
+            start_time = time.time()
+            try:
+                # Build the graph
+                status_placeholder.info("Building workflow graph...")
+                app_graph = build_graph()
+                st.session_state.log_messages.append("Workflow graph built.")
+                # Define the initial state
+                initial_state: PlannerState = {
+                    "user_query": user_query,
+                    "messages": [HumanMessage(content=user_query)],
+                    "plan": [],
+                    "current_plan_step_index": -1, # Will be set by start_planning
+                    "step_outputs": {},
+                    "key_issues": [],
+                    "error": None
+                }
+                # Configuration for the graph run (e.g., thread_id for memory)
+                # Using user query hash as a simple thread identifier for memory (if used)
+                import hashlib
+                thread_id = hashlib.sha256(user_query.encode()).hexdigest()[:8]
+                config: GraphConfig = {"configurable": {"thread_id": thread_id}}
+                status_placeholder.info("Executing workflow... (This may take a while)")
+                st.session_state.log_messages.append("Invoking graph stream...")
+                final_state = None
+                # Stream events for logging/updates
+                for i, step_state in enumerate(app_graph.stream(initial_state, config=config)):
+                    # step_state is a dictionary where keys are node names
+                    node_name = list(step_state.keys())[0]
+                    node_output = step_state[node_name]
+                    log_msg = f"Step {i+1}: Node '{node_name}' executed."
+                    st.session_state.log_messages.append(log_msg)
+                    # logger.info(log_msg) # Log to console as well
+                    # logger.debug(f"Node output: {node_output}")
+                    # You could update the status placeholder more dynamically here
+                    # status_placeholder.info(f"Executing: {node_name}...")
+                    final_state = node_output # Keep track of the latest state
+                end_time = time.time()
+                st.session_state.log_messages.append(f"Workflow finished in {end_time - start_time:.2f} seconds.")
+                status_placeholder.success(f"Processing Complete! ({end_time - start_time:.2f}s)")
+                # --- Process Final Results ---
+                if final_state and not final_state.get("error"):
+                    generated_issues = final_state.get("key_issues", [])
+                    st.session_state.key_issues_result = generated_issues
+                    st.session_state.log_messages.append(f"Successfully extracted {len(generated_issues)} key issues.")
+                elif final_state and final_state.get("error"):
+                    error_msg = final_state.get("error", "Unknown error")
+                    st.session_state.log_messages.append(f"Workflow failed: {error_msg}")
+                    status_placeholder.error(f"Workflow failed: {error_msg}")
+                else:
+                    st.session_state.log_messages.append("Workflow finished, but no final state or key issues found.")
+                    status_placeholder.warning("Workflow finished, but no key issues were generated.")
+            except Exception as e:
+                end_time = time.time()
+                logger.error(f"An error occurred during graph execution: {e}", exc_info=True)
+                status_placeholder.error(f"An unexpected error occurred: {e}")
+                st.session_state.log_messages.append(f"FATAL ERROR: {e}")
+# --- Display Results ---
+if st.session_state.key_issues_result:
+    issues = st.session_state.key_issues_result
+    results_placeholder.subheader(f"Generated Key Issues ({len(issues)})")
+    df = key_issues_to_dataframe(issues)
+    if not df.empty:
+        # Display as DataFrame
+        results_placeholder.dataframe(df, use_container_width=True)
+        # Provide download button
+        excel_bytes = dataframe_to_excel_bytes(df)
+        results_placeholder.download_button(
+            label="📥 Download Key Issues as Excel",
+            data=excel_bytes,
+            file_name="key_issues_output.xlsx",
+            mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
+        )
+    else:
+        results_placeholder.info("No key issues were generated or parsed correctly.")
+# Display logs
+with log_placeholder:
+    st.code("\n".join(st.session_state.log_messages), language="text")

kig_core/config.py ADDED Viewed

	@@ -0,0 +1,89 @@

+import os
+from pydantic_settings import BaseSettings, SettingsConfigDict
+from pydantic import Field, SecretStr, HttpUrl, validator, Json
+from typing import List, Optional, Literal, Union
+# Helper function to load .env file if it exists
+# Ensure python-dotenv is installed: pip install python-dotenv
+try:
+    from dotenv import load_dotenv
+    print("Attempting to load .env file...")
+    if load_dotenv():
+        print(".env file loaded successfully.")
+    else:
+        print(".env file not found or empty.")
+except ImportError:
+    print("python-dotenv not installed, skipping .env file loading.")
+    pass # Optional: Handle missing dotenv library
+class Settings(BaseSettings):
+    # Load from .env file
+    model_config = SettingsConfigDict(env_file='.env', env_file_encoding='utf-8', extra='ignore')
+    # Neo4j Credentials
+    neo4j_uri: str = Field(..., validation_alias='NEO4J_URI')
+    neo4j_username: str = Field("neo4j", validation_alias='NEO4J_USERNAME')
+    neo4j_password: SecretStr = Field(..., validation_alias='NEO4J_PASSWORD')
+    # API Keys
+    openai_api_key: Optional[SecretStr] = Field(None, validation_alias='OPENAI_API_KEY')
+    gemini_api_key: Optional[SecretStr] = Field(None, validation_alias='GEMINI_API_KEY')
+    langsmith_api_key: Optional[SecretStr] = Field(None, validation_alias='LANGSMITH_API_KEY')
+    langchain_project: Optional[str] = Field("KIG_Refactored", validation_alias='LANGCHAIN_PROJECT')
+    # LLM Configuration
+    main_llm_model: str = Field("gemini-1.5-flash", validation_alias='MAIN_LLM_MODEL')
+    eval_llm_model: str = Field("gemini-1.5-flash", validation_alias='EVAL_LLM_MODEL')
+    summarize_llm_model: str = Field("gemini-1.5-flash", validation_alias='SUMMARIZE_LLM_MODEL')
+    # Add other models if needed (e.g., cypher gen, concept selection)
+    # Planner Configuration
+    plan_method: Literal["generation", "modification"] = Field("generation", validation_alias='PLAN_METHOD')
+    use_detailed_query: bool = Field(False, validation_alias='USE_DETAILED_QUERY')
+    # Graph Operations Configuration
+    cypher_gen_method: Literal["guided", "auto"] = Field("guided", validation_alias='CYPHER_GEN_METHOD')
+    validate_cypher: bool = Field(False, validation_alias='VALIDATE_CYPHER')
+    eval_method: Literal["binary", "score"] = Field("binary", validation_alias='EVAL_METHOD')
+    eval_threshold: float = Field(0.7, validation_alias='EVAL_THRESHOLD')
+    max_docs: int = Field(10, validation_alias='MAX_DOCS')
+    # Processing Configuration
+    # Load processing steps from JSON string in .env
+    process_steps: Json[List[Union[str, dict]]] = Field('["summarize"]', validation_alias='PROCESS_STEPS')
+    compression_method: Optional[str] = Field(None, validation_alias='COMPRESSION_METHOD')
+    compress_rate: Optional[float] = Field(0.5, validation_alias='COMPRESS_RATE')
+    # Langsmith Tracing (set automatically based on key)
+    langsmith_tracing_v2: str = "false"
+    @validator('langsmith_tracing_v2', pre=True, always=True)
+    def set_langsmith_tracing(cls, v, values):
+        return "true" if values.get('langsmith_api_key') else "false"
+    def configure_langsmith(self):
+        """Sets Langsmith environment variables if API key is provided."""
+        if self.langsmith_api_key:
+            os.environ["LANGCHAIN_TRACING_V2"] = self.langsmith_tracing_v2
+            os.environ["LANGCHAIN_API_KEY"] = self.langsmith_api_key.get_secret_value()
+            if self.langchain_project:
+                os.environ["LANGCHAIN_PROJECT"] = self.langchain_project
+            print("Langsmith configured.")
+        else:
+             # Ensure tracing is disabled if no key
+            os.environ["LANGCHAIN_TRACING_V2"] = "false"
+            print("Langsmith key not found, tracing disabled.")
+# Create a single instance to be imported elsewhere
+settings = Settings()
+# Automatically configure Langsmith on import
+settings.configure_langsmith()
+# Optionally set Gemini key in environment if needed by library implicitly
+if settings.gemini_api_key:
+    os.environ["GOOGLE_API_KEY"] = settings.gemini_api_key.get_secret_value()
+    print("Set GOOGLE_API_KEY environment variable.")
+if settings.openai_api_key:
+    os.environ["OPENAI_API_KEY"] = settings.openai_api_key.get_secret_value()
+    print("Set OPENAI_API_KEY environment variable.")

kig_core/graph_client.py ADDED Viewed

	@@ -0,0 +1,91 @@

+from neo4j import GraphDatabase, Driver, exceptions
+from .config import settings
+import logging
+from typing import List, Dict, Any, Optional
+logger = logging.getLogger(__name__)
+class Neo4jClient:
+    _driver: Optional[Driver] = None
+    def _get_driver(self) -> Driver:
+        """Initializes and returns the Neo4j driver instance."""
+        if self._driver is None or self._driver.closed():
+            logger.info(f"Initializing Neo4j Driver for URI: {settings.neo4j_uri}")
+            try:
+                self._driver = GraphDatabase.driver(
+                    settings.neo4j_uri,
+                    auth=(settings.neo4j_username, settings.neo4j_password.get_secret_value())
+                )
+                # Verify connectivity during initialization
+                self._driver.verify_connectivity()
+                logger.info("Neo4j Driver initialized and connection verified.")
+            except exceptions.AuthError as e:
+                logger.error(f"Neo4j Authentication Error: {e}", exc_info=True)
+                raise ConnectionError("Neo4j Authentication Failed. Check credentials.") from e
+            except exceptions.ServiceUnavailable as e:
+                logger.error(f"Neo4j Service Unavailable: {e}", exc_info=True)
+                raise ConnectionError(f"Could not connect to Neo4j at {settings.neo4j_uri}. Ensure DB is running and reachable.") from e
+            except Exception as e:
+                logger.error(f"Unexpected error initializing Neo4j Driver: {e}", exc_info=True)
+                raise ConnectionError("An unexpected error occurred connecting to Neo4j.") from e
+        return self._driver
+    def close(self):
+        """Closes the Neo4j driver connection."""
+        if self._driver and not self._driver.closed():
+            logger.info("Closing Neo4j Driver.")
+            self._driver.close()
+            self._driver = None
+    def query(self, cypher_query: str, params: Optional[Dict[str, Any]] = None) -> List[Dict[str, Any]]:
+        """Executes a Cypher query and returns the results."""
+        driver = self._get_driver()
+        logger.debug(f"Executing Cypher: {cypher_query} with params: {params}")
+        try:
+            # Use session/transaction for robust execution
+            with driver.session() as session:
+                result = session.run(cypher_query, params or {})
+                # Convert Neo4j Records to dictionaries
+                data = [record.data() for record in result]
+                logger.debug(f"Query returned {len(data)} records.")
+                return data
+        except (exceptions.ServiceUnavailable, exceptions.SessionExpired) as e:
+            logger.error(f"Neo4j connection error during query: {e}", exc_info=True)
+            # Attempt to close the potentially broken driver so it reconnects next time
+            self.close()
+            raise ConnectionError("Neo4j connection error during query execution.") from e
+        except exceptions.CypherSyntaxError as e:
+            logger.error(f"Neo4j Cypher Syntax Error: {e}\nQuery: {cypher_query}", exc_info=True)
+            raise ValueError("Invalid Cypher query syntax.") from e
+        except Exception as e:
+            logger.error(f"Unexpected error during Neo4j query: {e}", exc_info=True)
+            raise RuntimeError("An unexpected error occurred during the Neo4j query.") from e
+    def get_schema(self, force_refresh: bool = False) -> Dict[str, Any]:
+         """ Fetches the graph schema. Placeholder - Langchain community graph has better schema fetching."""
+         # For simplicity, returning empty. Implement actual schema fetching if needed.
+         # Consider using langchain_community.graphs.Neo4jGraph for schema handling if complex interactions are needed.
+         logger.warning("Neo4jClient.get_schema() is a placeholder. Implement if schema needed.")
+         return {} # Placeholder
+    def get_concepts(self) -> List[str]:
+        """Fetches all Concept names from the graph."""
+        cypher = "MATCH (c:Concept) RETURN c.name AS name ORDER BY name"
+        results = self.query(cypher)
+        return [record['name'] for record in results if 'name' in record]
+    def get_concept_description(self, concept_name: str) -> Optional[str]:
+         """Fetches the description for a specific concept."""
+         cypher = "MATCH (c:Concept {name: $name}) RETURN c.description AS description LIMIT 1"
+         params = {"name": concept_name}
+         results = self.query(cypher, params)
+         return results[0]['description'] if results and 'description' in results[0] else None
+# Create a single instance for the application to use
+neo4j_client = Neo4jClient()
+# Ensure the client is closed gracefully when the application exits
+import atexit
+atexit.register(neo4j_client.close)

kig_core/graph_operations.py ADDED Viewed

	@@ -0,0 +1,210 @@

+import re
+import logging
+from typing import List, Dict, Any, Optional, Tuple
+from random import sample, shuffle
+from langchain_core.output_parsers import StrOutputParser, JsonOutputParser
+from langchain_core.runnables import Runnable, RunnablePassthrough
+from langchain_core.pydantic_v1 import Field, BaseModel as V1BaseModel # For grader models if needed
+from .config import settings
+from .graph_client import neo4j_client # Use the central client
+from .llm_interface import get_llm
+from .prompts import (
+    CYPHER_GENERATION_PROMPT, CONCEPT_SELECTION_PROMPT,
+    BINARY_GRADER_PROMPT, SCORE_GRADER_PROMPT
+)
+from .schemas import KeyIssue # Import if needed here, maybe not
+logger = logging.getLogger(__name__)
+# --- Helper Functions ---
+def extract_cypher(text: str) -> str:
+    """Extracts the first Cypher code block or returns the text itself."""
+    pattern = r"```(?:cypher)?\s*(.*?)\s*```"
+    match = re.search(pattern, text, re.DOTALL | re.IGNORECASE)
+    return match.group(1).strip() if match else text.strip()
+def format_doc_for_llm(doc: Dict[str, Any]) -> str:
+    """Formats a document dictionary into a string for LLM context."""
+    return "\n".join(f"**{key}**: {value}" for key, value in doc.items() if value)
+# --- Cypher Generation ---
+def generate_cypher_auto(question: str) -> str:
+    """Generates Cypher using the 'auto' method."""
+    logger.info("Generating Cypher using 'auto' method.")
+    # Schema fetching needs implementation if required by the prompt/LLM
+    # schema_info = neo4j_client.get_schema() # Placeholder
+    schema_info = "Schema not available." # Default if not implemented
+    cypher_llm = get_llm(settings.main_llm_model) # Or a specific cypher model
+    chain = (
+        {"question": RunnablePassthrough(), "schema": lambda x: schema_info}
+        | CYPHER_GENERATION_PROMPT
+        | cypher_llm
+        | StrOutputParser()
+        | extract_cypher
+    )
+    return chain.invoke(question)
+def generate_cypher_guided(question: str, plan_step: int) -> str:
+    """Generates Cypher using the 'guided' method based on concepts."""
+    logger.info(f"Generating Cypher using 'guided' method for plan step {plan_step}.")
+    try:
+        concepts = neo4j_client.get_concepts()
+        if not concepts:
+            logger.warning("No concepts found in Neo4j for guided cypher generation.")
+            return "" # Or raise error
+        concept_llm = get_llm(settings.main_llm_model) # Or a specific concept model
+        concept_chain = (
+            CONCEPT_SELECTION_PROMPT
+            | concept_llm
+            | StrOutputParser()
+        )
+        selected_concept = concept_chain.invoke({
+            "question": question,
+            "concepts": "\n".join(concepts)
+        }).strip()
+        logger.info(f"Concept selected by LLM: {selected_concept}")
+        # Basic check if the selected concept is valid
+        if selected_concept not in concepts:
+             logger.warning(f"LLM selected concept '{selected_concept}' not in the known list. Attempting fallback or ignoring.")
+             # Optional: Add fuzzy matching or similarity search here
+             # For now, we might default or return empty
+             # Let's try a simple substring check as a fallback
+             found_match = None
+             for c in concepts:
+                 if selected_concept.lower() in c.lower():
+                     found_match = c
+                     logger.info(f"Found potential match: '{found_match}'")
+                     break
+             if not found_match:
+                 logger.error(f"Could not validate selected concept: {selected_concept}")
+                 return "" # Return empty query if concept is invalid
+             selected_concept = found_match
+        # Determine the target node type based on plan step (example logic)
+        # This mapping might need adjustment based on the actual plan structure
+        if plan_step <= 1: # Steps 0 and 1: Context gathering
+             target = "(ts:TechnicalSpecification)"
+             fields = "ts.title, ts.scope, ts.description"
+        elif plan_step == 2: # Step 2: Research papers?
+             target = "(rp:ResearchPaper)"
+             fields = "rp.title, rp.abstract"
+        else: # Later steps might involve KeyIssues themselves or other types
+             target = "(n)" # Generic fallback
+             fields = "n.title, n.description" # Assuming common fields
+        # Construct Cypher query
+        # Ensure selected_concept is properly escaped if needed, though parameters are safer
+        cypher = f"MATCH (c:Concept {{name: $conceptName}})-[:RELATED_TO]-{target} RETURN {fields}"
+        # We return the query and the parameters separately for safe execution
+        # However, the planner currently expects just the string. Let's construct it directly for now.
+        # Be cautious about injection if concept names can contain special chars. Binding is preferred.
+        escaped_concept = selected_concept.replace("'", "\\'") # Basic escaping
+        cypher = f"MATCH (c:Concept {{name: '{escaped_concept}'}})-[:RELATED_TO]-{target} RETURN {fields}"
+        logger.info(f"Generated guided Cypher: {cypher}")
+        return cypher
+    except Exception as e:
+        logger.error(f"Error during guided cypher generation: {e}", exc_info=True)
+        return "" # Return empty on error
+# --- Document Retrieval ---
+def retrieve_documents(cypher_query: str) -> List[Dict[str, Any]]:
+    """Retrieves documents from Neo4j using a Cypher query."""
+    if not cypher_query:
+        logger.warning("Received empty Cypher query, skipping retrieval.")
+        return []
+    logger.info(f"Retrieving documents with Cypher: {cypher_query}")
+    try:
+        # Use the centralized client's query method
+        raw_results = neo4j_client.query(cypher_query)
+        # Basic cleaning/deduplication (can be enhanced)
+        processed_results = []
+        seen = set()
+        for doc in raw_results:
+             # Create a frozenset of items for hashable representation to detect duplicates
+             doc_items = frozenset(doc.items())
+             if doc_items not in seen:
+                 processed_results.append(doc)
+                 seen.add(doc_items)
+        logger.info(f"Retrieved {len(processed_results)} unique documents.")
+        return processed_results
+    except (ConnectionError, ValueError, RuntimeError) as e:
+        # Errors already logged in neo4j_client
+        logger.error(f"Document retrieval failed: {e}")
+        return [] # Return empty list on failure
+# --- Document Evaluation ---
+# Define Pydantic models for structured LLM grader output (if not using built-in LCEL structured output)
+class GradeDocumentsBinary(V1BaseModel):
+    """Binary score for relevance check."""
+    binary_score: str = Field(description="Relevant? 'yes' or 'no'")
+class GradeDocumentsScore(V1BaseModel):
+     """Score for relevance check."""
+     rationale: str = Field(description="Rationale for the score.")
+     score: float = Field(description="Relevance score (0.0 to 1.0)")
+def evaluate_documents(
+    docs: List[Dict[str, Any]],
+    query: str
+) -> List[Dict[str, Any]]:
+    """Evaluates document relevance to a query using configured method."""
+    if not docs:
+        return []
+    logger.info(f"Evaluating {len(docs)} documents for relevance to query: '{query}' using method: {settings.eval_method}")
+    eval_llm = get_llm(settings.eval_llm_model)
+    valid_docs_with_scores: List[Tuple[Dict[str, Any], float]] = []
+    # Consider using LCEL's structured output capabilities directly if the model supports it well
+    # This simplifies parsing. Example for binary:
+    # binary_grader = BINARY_GRADER_PROMPT | eval_llm.with_structured_output(GradeDocumentsBinary)
+    if settings.eval_method == "binary":
+        binary_grader = BINARY_GRADER_PROMPT | eval_llm | StrOutputParser() # Fallback to string parsing
+        for doc in docs:
+            formatted_doc = format_doc_for_llm(doc)
+            if not formatted_doc.strip(): continue
+            try:
+                result = binary_grader.invoke({"question": query, "document": formatted_doc})
+                logger.debug(f"Binary grader result for doc '{doc.get('title', 'N/A')}': {result}")
+                if result and 'yes' in result.lower():
+                    valid_docs_with_scores.append((doc, 1.0)) # Score 1.0 for relevant
+            except Exception as e:
+                logger.warning(f"Binary grading failed for a document: {e}", exc_info=True)
+    elif settings.eval_method == "score":
+        # Using JSON parser as a robust fallback for score extraction
+        score_grader = SCORE_GRADER_PROMPT | eval_llm | JsonOutputParser(pydantic_object=GradeDocumentsScore)
+        for doc in docs:
+            formatted_doc = format_doc_for_llm(doc)
+            if not formatted_doc.strip(): continue
+            try:
+                result: GradeDocumentsScore = score_grader.invoke({"query": query, "document": formatted_doc})
+                logger.debug(f"Score grader result for doc '{doc.get('title', 'N/A')}': Score={result.score}, Rationale={result.rationale}")
+                if result.score >= settings.eval_threshold:
+                    valid_docs_with_scores.append((doc, result.score))
+            except Exception as e:
+                logger.warning(f"Score grading failed for a document: {e}", exc_info=True)
+                # Optionally treat as relevant on failure? Or skip? Skipping for now.
+    # Sort by score if applicable, then limit
+    if settings.eval_method == 'score':
+        valid_docs_with_scores.sort(key=lambda item: item[1], reverse=True)
+    # Limit to max_docs
+    final_docs = [doc for doc, score in valid_docs_with_scores[:settings.max_docs]]
+    logger.info(f"Found {len(final_docs)} relevant documents after evaluation and filtering.")
+    return final_docs

kig_core/llm_interface.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import os
+from langchain_google_genai import ChatGoogleGenerativeAI
+from langchain_openai import ChatOpenAI
+from langchain_core.language_models.chat_models import BaseChatModel
+from .config import settings
+import logging
+logger = logging.getLogger(__name__)
+# Store initialized models to avoid re-creating them repeatedly
+_llm_cache = {}
+def get_llm(model_name: str) -> BaseChatModel:
+    """
+    Returns an initialized LangChain chat model based on the provided name.
+    Caches initialized models.
+    """
+    global _llm_cache
+    if model_name in _llm_cache:
+        return _llm_cache[model_name]
+    logger.info(f"Initializing LLM: {model_name}")
+    if model_name.startswith("gemini"):
+        if not settings.gemini_api_key:
+            raise ValueError("GEMINI_API_KEY is not configured.")
+        try:
+            # Use GOOGLE_API_KEY environment variable set in config.py
+            llm = ChatGoogleGenerativeAI(model=model_name)
+            _llm_cache[model_name] = llm
+            logger.info(f"Initialized Google Generative AI model: {model_name}")
+            return llm
+        except Exception as e:
+            logger.error(f"Failed to initialize Gemini model '{model_name}': {e}", exc_info=True)
+            raise RuntimeError(f"Could not initialize Gemini model: {e}") from e
+    elif model_name.startswith("gpt"):
+        if not settings.openai_api_key:
+            raise ValueError("OPENAI_API_KEY is not configured.")
+        try:
+            # Base URL can be added here if using a proxy
+            # base_url = "https://your-proxy-if-needed/"
+            llm = ChatOpenAI(model=model_name, api_key=settings.openai_api_key) # Base URL optional
+            _llm_cache[model_name] = llm
+            logger.info(f"Initialized OpenAI model: {model_name}")
+            return llm
+        except Exception as e:
+            logger.error(f"Failed to initialize OpenAI model '{model_name}': {e}", exc_info=True)
+            raise RuntimeError(f"Could not initialize OpenAI model: {e}") from e
+    # Add other model providers (Anthropic, Groq, etc.) here if needed
+    else:
+        logger.error(f"Unsupported model provider for model name: {model_name}")
+        raise ValueError(f"Model '{model_name}' is not supported or configuration is missing.")
+# Example usage (could be called from other modules)
+# main_llm = get_llm(settings.main_llm_model)
+# eval_llm = get_llm(settings.eval_llm_model)

kig_core/planner.py ADDED Viewed

	@@ -0,0 +1,226 @@

+import logging
+import re
+from typing import List, Dict, Any
+from langgraph.graph import StateGraph, END
+from langgraph.checkpoint.memory import MemorySaver # Or SqliteSaver etc.
+from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
+from langchain_core.output_parsers import StrOutputParser, JsonOutputParser
+from .config import settings
+from .schemas import PlannerState, KeyIssue, GraphConfig # Import schemas
+from .prompts import get_initial_planner_prompt, KEY_ISSUE_STRUCTURING_PROMPT
+from .llm_interface import get_llm
+from .graph_operations import (
+    generate_cypher_auto, generate_cypher_guided,
+    retrieve_documents, evaluate_documents
+)
+from .processing import process_documents
+logger = logging.getLogger(__name__)
+# --- Graph Nodes ---
+def start_planning(state: PlannerState) -> Dict[str, Any]:
+    """Generates the initial plan based on the user query."""
+    logger.info("Node: start_planning")
+    user_query = state['user_query']
+    if not user_query:
+        return {"error": "User query is empty."}
+    initial_prompt = get_initial_planner_prompt(settings.plan_method, user_query)
+    llm = get_llm(settings.main_llm_model)
+    chain = initial_prompt | llm | StrOutputParser()
+    try:
+        plan_text = chain.invoke({}) # Prompt already includes query
+        logger.debug(f"Raw plan text: {plan_text}")
+        # Extract plan steps (simple regex, might need refinement)
+        plan_match = re.search(r"Plan:(.*?)<END_OF_PLAN>", plan_text, re.DOTALL | re.IGNORECASE)
+        if plan_match:
+            plan_steps = [step.strip() for step in re.split(r"\n\s*\d+\.\s*", plan_match.group(1)) if step.strip()]
+            logger.info(f"Extracted plan: {plan_steps}")
+            return {
+                "plan": plan_steps,
+                "current_plan_step_index": 0,
+                "messages": [AIMessage(content=plan_text)],
+                 "step_outputs": {} # Initialize step outputs
+            }
+        else:
+            logger.error("Could not parse plan from LLM response.")
+            return {"error": "Failed to parse plan from LLM response.", "messages": [AIMessage(content=plan_text)]}
+    except Exception as e:
+        logger.error(f"Error during plan generation: {e}", exc_info=True)
+        return {"error": f"LLM error during plan generation: {e}"}
+def execute_plan_step(state: PlannerState) -> Dict[str, Any]:
+    """Executes the current step of the plan (retrieval, processing)."""
+    current_index = state['current_plan_step_index']
+    plan = state['plan']
+    user_query = state['user_query'] # Use original query for context
+    if current_index >= len(plan):
+        logger.warning("Plan step index out of bounds, attempting to finalize.")
+        # This should ideally be handled by the conditional edge, but as a fallback
+        return {"error": "Plan execution finished unexpectedly."}
+    step_description = plan[current_index]
+    logger.info(f"Node: execute_plan_step - Step {current_index + 1}/{len(plan)}: {step_description}")
+    # --- Determine Query for Retrieval ---
+    # Simple approach: Use step description or original query?
+    # Let's use the step description combined with the original query for context.
+    query_for_retrieval = f"Regarding the query '{user_query}', focus on: {step_description}"
+    logger.info(f"Query for retrieval: {query_for_retrieval}")
+    # --- Generate Cypher ---
+    cypher_query = ""
+    if settings.cypher_gen_method == 'auto':
+        cypher_query = generate_cypher_auto(query_for_retrieval)
+    elif settings.cypher_gen_method == 'guided':
+        cypher_query = generate_cypher_guided(query_for_retrieval, current_index)
+    # TODO: Add cypher validation if settings.validate_cypher is True
+    # --- Retrieve Documents ---
+    retrieved_docs = retrieve_documents(cypher_query)
+    # --- Evaluate Documents ---
+    evaluated_docs = evaluate_documents(retrieved_docs, query_for_retrieval)
+    # --- Process Documents ---
+    # Using configured processing steps
+    processed_docs_content = process_documents(evaluated_docs, settings.process_steps)
+    # --- Store Step Output ---
+    # Store the processed content relevant to this step
+    step_output = "\n\n".join(processed_docs_content) if processed_docs_content else "No relevant information found for this step."
+    current_step_outputs = state.get('step_outputs', {})
+    current_step_outputs[current_index] = step_output
+    logger.info(f"Finished executing plan step {current_index + 1}. Stored output.")
+    return {
+        "current_plan_step_index": current_index + 1,
+        "messages": [SystemMessage(content=f"Completed plan step {current_index + 1}. Context gathered:\n{step_output[:500]}...")], # Add summary message
+        "step_outputs": current_step_outputs
+    }
+def generate_structured_issues(state: PlannerState) -> Dict[str, Any]:
+    """Generates the final structured Key Issues based on all gathered context."""
+    logger.info("Node: generate_structured_issues")
+    user_query = state['user_query']
+    step_outputs = state.get('step_outputs', {})
+    # --- Combine Context from All Steps ---
+    full_context = f"Original User Query: {user_query}\n\n"
+    full_context += "Context gathered during planning:\n"
+    for i, output in sorted(step_outputs.items()):
+        full_context += f"--- Context from Step {i+1} ---\n{output}\n\n"
+    if not step_outputs:
+        full_context += "No context was gathered during the planning steps.\n"
+    logger.info(f"Generating key issues using combined context (length: {len(full_context)} chars).")
+    # logger.debug(f"Full Context for Key Issue Generation:\n{full_context}") # Optional: log full context
+    # --- Call LLM for Structured Output ---
+    issue_llm = get_llm(settings.main_llm_model)
+    # Use PydanticOutputParser for robust parsing
+    output_parser = JsonOutputParser(pydantic_object=List[KeyIssue])
+    prompt = KEY_ISSUE_STRUCTURING_PROMPT.partial(
+        # schema=output_parser.get_format_instructions(), # Inject schema instructions if needed by prompt
+    )
+    chain = prompt | issue_llm | output_parser
+    try:
+        structured_issues = chain.invoke({
+            "user_query": user_query,
+            "context": full_context
+        })
+        # Ensure IDs are sequential if the LLM didn't assign them correctly
+        for i, issue in enumerate(structured_issues):
+            issue.id = i + 1
+        logger.info(f"Successfully generated {len(structured_issues)} structured key issues.")
+        final_message = f"Generated {len(structured_issues)} Key Issues based on the query '{user_query}'."
+        return {
+            "key_issues": structured_issues,
+            "messages": [AIMessage(content=final_message)], # Final summary message
+            "error": None # Clear any previous errors
+        }
+    except Exception as e:
+        logger.error(f"Failed to generate or parse structured key issues: {e}", exc_info=True)
+        # Attempt to get raw output for debugging if possible
+        raw_output = "Could not retrieve raw output."
+        try:
+             raw_chain = prompt | issue_llm | StrOutputParser()
+             raw_output = raw_chain.invoke({"user_query": user_query, "context": full_context})
+             logger.debug(f"Raw output from failed JSON parsing:\n{raw_output}")
+        except Exception as raw_e:
+             logger.error(f"Could not even get raw output: {raw_e}")
+        return {"error": f"Failed to generate structured key issues: {e}. Raw output hint: {raw_output[:500]}..."}
+# --- Conditional Edges ---
+def should_continue_planning(state: PlannerState) -> str:
+    """Determines if there are more plan steps to execute."""
+    logger.debug("Edge: should_continue_planning")
+    if state.get("error"):
+        logger.error(f"Error state detected: {state['error']}. Ending execution.")
+        return "error_state" # Go to a potential error handling end node
+    current_index = state['current_plan_step_index']
+    plan_length = len(state.get('plan', []))
+    if current_index < plan_length:
+        logger.debug(f"Continuing plan execution. Next step index: {current_index}")
+        return "continue_execution"
+    else:
+        logger.debug("Plan finished. Proceeding to final generation.")
+        return "finalize"
+# --- Build Graph ---
+def build_graph():
+    """Builds the LangGraph workflow."""
+    workflow = StateGraph(PlannerState)
+    # Add nodes
+    workflow.add_node("start_planning", start_planning)
+    workflow.add_node("execute_plan_step", execute_plan_step)
+    workflow.add_node("generate_issues", generate_structured_issues)
+    # Optional: Add an error handling node
+    workflow.add_node("error_node", lambda state: {"messages": [SystemMessage(content=f"Execution failed: {state.get('error', 'Unknown error')}") ]})
+    # Define edges
+    workflow.set_entry_point("start_planning")
+    workflow.add_edge("start_planning", "execute_plan_step") # Assume plan is always generated
+    workflow.add_conditional_edges(
+        "execute_plan_step",
+        should_continue_planning,
+        {
+            "continue_execution": "execute_plan_step", # Loop back to execute next step
+            "finalize": "generate_issues",          # Move to final generation
+            "error_state": "error_node"             # Go to error node
+        }
+    )
+    workflow.add_edge("generate_issues", END)
+    workflow.add_edge("error_node", END) # End after error
+    # Compile the graph with memory (optional)
+    # memory = MemorySaver() # Use if state needs persistence between runs
+    # app_graph = workflow.compile(checkpointer=memory)
+    app_graph = workflow.compile()
+    return app_graph

kig_core/processing.py ADDED Viewed

	@@ -0,0 +1,127 @@

+import logging
+from typing import List, Dict, Any, Union
+from langchain_core.output_parsers import StrOutputParser
+from .config import settings
+from .llm_interface import get_llm
+from .prompts import SUMMARIZER_PROMPT
+from .graph_operations import format_doc_for_llm # Reuse formatting
+# Import llmlingua if compression is used
+try:
+    from llmlingua import PromptCompressor
+    LLMLINGUA_AVAILABLE = True
+except ImportError:
+    LLMLINGUA_AVAILABLE = False
+    PromptCompressor = None # Define as None if not available
+logger = logging.getLogger(__name__)
+_compressor_cache = {}
+def get_compressor(method: str) -> Optional['PromptCompressor']:
+    """Initializes and caches llmlingua compressor."""
+    if not LLMLINGUA_AVAILABLE:
+        logger.warning("LLMLingua not installed, compression unavailable.")
+        return None
+    if method not in _compressor_cache:
+        logger.info(f"Initializing LLMLingua compressor: {method}")
+        try:
+            # Adjust model names and params as needed
+            if method == "llm_lingua2":
+                model_name = "microsoft/llmlingua-2-xlm-roberta-large-meetingbank"
+                use_llmlingua2 = True
+            elif method == "llm_lingua":
+                 model_name = "microsoft/phi-2" # Requires ~8GB RAM
+                 use_llmlingua2 = False
+            else:
+                logger.error(f"Unsupported compression method: {method}")
+                return None
+            _compressor_cache[method] = PromptCompressor(
+                model_name=model_name,
+                use_llmlingua2=use_llmlingua2,
+                device_map="cpu" # Or "cuda" if GPU available
+            )
+        except Exception as e:
+            logger.error(f"Failed to initialize LLMLingua compressor '{method}': {e}", exc_info=True)
+            return None
+    return _compressor_cache[method]
+def summarize_document(doc_content: str) -> str:
+    """Summarizes a single document using the configured LLM."""
+    logger.debug("Summarizing document...")
+    try:
+        summarize_llm = get_llm(settings.summarize_llm_model)
+        summarize_chain = SUMMARIZER_PROMPT | summarize_llm | StrOutputParser()
+        summary = summarize_chain.invoke({"document": doc_content})
+        logger.debug("Summarization complete.")
+        return summary
+    except Exception as e:
+        logger.error(f"Summarization failed: {e}", exc_info=True)
+        return f"Error during summarization: {e}" # Return error message instead of failing
+def compress_document(doc_content: str) -> str:
+    """Compresses a single document using LLMLingua."""
+    logger.debug(f"Compressing document using method: {settings.compression_method}...")
+    if not settings.compression_method:
+        logger.warning("Compression method not configured, skipping.")
+        return doc_content
+    compressor = get_compressor(settings.compression_method)
+    if not compressor:
+        logger.warning("Compressor not available, skipping compression.")
+        return doc_content
+    try:
+        # Adjust compression parameters as needed
+        # rate = settings.compress_rate or 0.5
+        # force_tokens = ['\n', '.', ',', '?', '!'] # Example tokens
+        # context? instructions? question?
+        # Simple compression for now:
+        result = compressor.compress_prompt(doc_content, rate=settings.compress_rate or 0.5)
+        compressed_text = result.get("compressed_prompt", doc_content)
+        original_len = len(doc_content.split())
+        compressed_len = len(compressed_text.split())
+        logger.debug(f"Compression complete. Original words: {original_len}, Compressed words: {compressed_len}")
+        return compressed_text
+    except Exception as e:
+        logger.error(f"Compression failed: {e}", exc_info=True)
+        return f"Error during compression: {e}" # Return error message
+def process_documents(
+    docs: List[Dict[str, Any]],
+    processing_steps: List[Union[str, dict]]
+) -> List[str]:
+    """Processes a list of documents according to the specified steps."""
+    logger.info(f"Processing {len(docs)} documents with steps: {processing_steps}")
+    if not docs:
+        return []
+    processed_outputs = []
+    for i, doc in enumerate(docs):
+        logger.info(f"Processing document {i+1}/{len(docs)}...")
+        current_content = format_doc_for_llm(doc) # Start with formatted original doc
+        for step in processing_steps:
+            if step == "summarize":
+                current_content = summarize_document(current_content)
+            elif step == "compress":
+                 current_content = compress_document(current_content)
+            elif isinstance(step, dict):
+                # Placeholder for custom processing steps defined by dicts
+                logger.warning(f"Custom processing step not implemented: {step}")
+                # Add logic here if needed: extract params, call specific LLM/function
+                pass
+            else:
+                logger.warning(f"Unknown processing step type: {step}")
+        processed_outputs.append(current_content) # Add the final processed content for this doc
+    logger.info("Document processing finished.")
+    return processed_outputs

kig_core/prompts.py ADDED Viewed

	@@ -0,0 +1,140 @@

+from langchain_core.prompts import PromptTemplate, ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
+from .schemas import KeyIssue # Import the Pydantic model
+# --- Cypher Generation ---
+CYPHER_GENERATION_TEMPLATE = """Task: Generate Cypher statement to query a graph database.
+Instructions:
+Use only the provided relationship types and properties in the schema.
+Do not use any other relationship types or properties that are not provided.
+Schema:
+{schema}
+Note: Do not include explanations or apologies. Respond only with the Cypher statement.
+Do not respond to questions unrelated to Cypher generation.
+The question is:
+{question}"""
+CYPHER_GENERATION_PROMPT = PromptTemplate.from_template(CYPHER_GENERATION_TEMPLATE)
+# --- Concept Selection (for 'guided' cypher gen) ---
+CONCEPT_SELECTION_TEMPLATE = """Task: Select the most relevant Concept from the list below for the user's question.
+Instructions:
+Output ONLY the name of the single most relevant concept. No explanations.
+Concepts:
+{concepts}
+User Question:
+{question}"""
+CONCEPT_SELECTION_PROMPT = PromptTemplate.from_template(CONCEPT_SELECTION_TEMPLATE)
+# --- Document Relevance Grading ---
+BINARY_GRADER_TEMPLATE = """Assess the relevance of the retrieved document to the user question.
+Goal is to filter out clearly erroneous retrievals.
+If the document contains keywords or semantic meaning related to the question, grade as relevant.
+Output 'yes' or 'no'."""
+BINARY_GRADER_PROMPT = ChatPromptTemplate.from_messages([
+    ("system", BINARY_GRADER_TEMPLATE),
+    ("human", "Retrieved document:\n\n{document}\n\nUser question: {question}"),
+])
+SCORE_GRADER_TEMPLATE = """Analyze the query and the document. Quantify the relevance.
+Provide rationale before the score.
+Output a score between 0 (irrelevant) and 1 (completely relevant)."""
+SCORE_GRADER_PROMPT = ChatPromptTemplate.from_messages([
+    ("system", SCORE_GRADER_TEMPLATE),
+    ("human", "Passage:\n\n{document}\n\nUser query: {query}"),
+])
+# --- Planning ---
+PLAN_GENERATION_TEMPLATE = """You are a standardization expert planning to identify NEW and INNOVATIVE Key Issues related to a technical requirement.
+Devise a concise, step-by-step plan to achieve this.
+Consider steps like: Understanding the core problem, Researching existing standards/innovations, Identifying potential gaps/challenges, Formulating Key Issues, and Refining/Detailing them.
+Output the plan starting with 'Plan:' and numbering each step. End the plan with '<END_OF_PLAN>'."""
+PLAN_MODIFICATION_TEMPLATE = """You are a standardization expert planning to identify NEW and INNOVATIVE Key Issues related to a technical requirement.
+Adapt the following generic plan template to the specific requirement. Keep it concise.
+### PLAN TEMPLATE ###
+Plan:
+1. **Understand Core Requirement**: Analyze the user query to define the scope.
+2. **Gather Context**: Retrieve relevant specifications, standards, and recent research papers.
+3. **Identify Gaps & Challenges**: Based on context, brainstorm potential new issues and challenges.
+4. **Formulate Key Issues**: Structure the findings into distinct Key Issues.
+5. **Refine & Detail**: Elaborate on each Key Issue, outlining specific challenges.
+<END_OF_PLAN>
+### END OF PLAN TEMPLATE ###
+Output the adapted plan starting with 'Plan:' and numbering each step. End with '<END_OF_PLAN>'."""
+# --- Document Processing ---
+SUMMARIZER_TEMPLATE = """You are a 3GPP standardization expert.
+Summarize the key information in the provided document in simple technical English relevant to identifying potential Key Issues. Focus on challenges, gaps, or novel aspects.
+Document:
+{document}"""
+SUMMARIZER_PROMPT = ChatPromptTemplate.from_template(SUMMARIZER_TEMPLATE)
+# --- Key Issue Structuring (New) ---
+# This prompt guides the LLM to output structured Key Issues based on gathered context.
+# It references the Pydantic model 'KeyIssue' for the desired format.
+KEY_ISSUE_STRUCTURING_TEMPLATE = f"""Based on the provided context (summaries of relevant documents, research findings, etc.), identify and formulate distinct Key Issues related to the original user query.
+User Query: {{user_query}}
+Context:
+{{context}}
+For each Key Issue identified, provide the following information in the exact JSON format described below. Output a JSON list containing multiple KeyIssue objects.
+JSON Schema for each Key Issue object:
+{{
+  "id": "Sequential integer ID starting from 1",
+  "title": "Concise title for the key issue (max 15 words)",
+  "description": "Detailed description of the key issue (2-4 sentences)",
+  "challenges": ["List of specific challenges related to this issue (strings)", "Each challenge as a separate string"],
+  "potential_impact": "Brief description of the potential impact if not addressed (optional, max 30 words)"
+}}
+Example Format:
+[
+  {{
+    "id": 1,
+    "title": "Scalability of AI Models in Low-Resource Settings",
+    "description": "Deploying complex AI models for healthcare diagnostics in areas with limited computational power and data connectivity presents significant scalability challenges. Existing models often require substantial resources.",
+    "challenges": ["High computational requirements of current models", "Intermittent or low-bandwidth network connectivity", "Lack of large, localized datasets for training/fine-tuning"],
+    "potential_impact": "Limits equitable access to advanced AI-driven healthcare diagnostics."
+  }},
+  {{
+    "id": 2,
+    "title": "...",
+    "description": "...",
+    "challenges": ["...", "..."],
+    "potential_impact": "..."
+  }}
+]
+Generate the JSON list of Key Issues based *only* on the provided context and user query. Ensure the output is a valid JSON list.
+"""
+KEY_ISSUE_STRUCTURING_PROMPT = ChatPromptTemplate.from_template(KEY_ISSUE_STRUCTURING_TEMPLATE)
+# --- Initial Prompt Selection ---
+def get_initial_planner_prompt(plan_method: str, user_query: str) -> ChatPromptTemplate:
+    if plan_method == "generation":
+        template = PLAN_GENERATION_TEMPLATE
+    elif plan_method == "modification":
+        template = PLAN_MODIFICATION_TEMPLATE
+    else:
+        raise ValueError("Invalid plan_method")
+    # Return as ChatPromptTemplate for consistency
+    return ChatPromptTemplate.from_messages([
+        ("system", template),
+        ("human", user_query)
+    ])

kig_core/schemas.py ADDED Viewed

	@@ -0,0 +1,55 @@

+from typing import List, Dict, Any, Optional, Union
+from typing_extensions import TypedDict
+from langchain_core.messages import BaseMessage
+from pydantic import BaseModel, Field
+from langgraph.graph.message import add_messages
+# --- Pydantic Models for Structured Output ---
+class KeyIssue(BaseModel):
+    """Represents a single generated Key Issue."""
+    id: int = Field(..., description="Sequential ID for the key issue")
+    title: str = Field(..., description="A concise title for the key issue")
+    description: str = Field(..., description="Detailed description of the key issue")
+    challenges: List[str] = Field(default_factory=list, description="Specific challenges associated with this issue")
+    potential_impact: Optional[str] = Field(None, description="Potential impact if the issue is not addressed")
+    # Add source tracking if possible/needed from the processed docs
+    # sources: List[str] = Field(default_factory=list, description="Source documents relevant to this issue")
+# --- TypedDicts for LangGraph State ---
+class GraphConfig(TypedDict):
+    """Configuration passed to the graph execution."""
+    thread_id: str
+    # Add other config items needed at runtime if not globally available via settings
+class BaseState(TypedDict):
+    """Base state common across potentially multiple graphs."""
+    messages: Annotated[List[BaseMessage], add_messages]
+    error: Optional[str] # To store potential errors during execution
+class PlannerState(BaseState):
+    """State specific to the main planner graph."""
+    user_query: str
+    plan: List[str] # The high-level plan steps
+    current_plan_step_index: int # Index of the current step being executed
+    # Stored data from previous steps (e.g., summaries)
+    # Use a dictionary to store context relevant to each plan step
+    step_outputs: Dict[int, Any] # Stores output (e.g., processed docs) from each step
+    # Final structured output
+    key_issues: List[KeyIssue]
+class DataRetrievalState(TypedDict):
+    """State for a potential data retrieval sub-graph."""
+    query_for_retrieval: str # The specific query for this retrieval step
+    retrieved_docs: List[Dict[str, Any]] # Raw docs from Neo4j
+    evaluated_docs: List[Dict[str, Any]] # Docs after relevance grading
+    cypher_queries: List[str] # Generated Cypher queries
+class ProcessingState(TypedDict):
+    """State for a potential document processing sub-graph."""
+    docs_to_process: List[Dict[str, Any]] # Documents passed for processing
+    processed_docs: List[Union[str, Dict[str, Any]]] # Processed/summarized docs
+    processing_steps_config: List[Union[str, dict]] # Configuration for processing

kig_core/utils.py ADDED Viewed

	@@ -0,0 +1,41 @@

+import pandas as pd
+import io
+import logging
+from typing import List
+from .schemas import KeyIssue # Import the Pydantic model
+logger = logging.getLogger(__name__)
+def key_issues_to_dataframe(key_issues: List[KeyIssue]) -> pd.DataFrame:
+    """Converts a list of KeyIssue objects into a Pandas DataFrame."""
+    if not key_issues:
+        return pd.DataFrame()
+    # Use Pydantic's .model_dump() for robust serialization
+    data = [ki.model_dump() for ki in key_issues]
+    df = pd.DataFrame(data)
+    # Optional: Reorder or rename columns if needed
+    # df = df[['id', 'title', 'description', 'challenges', 'potential_impact']] # Example reordering
+    return df
+def dataframe_to_excel_bytes(df: pd.DataFrame) -> bytes:
+    """Converts a Pandas DataFrame to Excel format in memory (bytes)."""
+    logger.info("Generating Excel file from DataFrame...")
+    output = io.BytesIO()
+    try:
+        # Use BytesIO object as the target file
+        with pd.ExcelWriter(output, engine='openpyxl') as writer:
+            df.to_excel(writer, index=False, sheet_name='Key Issues')
+        excel_data = output.getvalue()
+        logger.info("Excel file generated successfully.")
+        return excel_data
+    except Exception as e:
+        logger.error(f"Failed to generate Excel file: {e}", exc_info=True)
+        raise RuntimeError("Failed to create Excel output.") from e
+# Removed: format_df (HTML specific, less relevant for Excel output)
+# Removed: init_app (handled by config.py)
+# Removed: get_model (handled by llm_interface.py)
+# Removed: clear_memory (handle state/memory management within LangGraph setup if needed)
+# Removed: _set_env (handled by config.py and dotenv)
+# Kept: format_doc (renamed to format_doc_for_llm in graph_operations.py)
+# Removed: update_doc_history (reducer logic should be handled in LangGraph state definition/nodes)

requirements.txt ADDED Viewed

	@@ -0,0 +1,26 @@

+# Langchain Core & Ecosystem
+langchain-core>=0.2.29
+langchain-google-genai>=1.0.9 # For Gemini
+langchain-openai>=0.1.21 # If using OpenAI
+langgraph>=0.1.10
+langchain-community>=0.2.10 # For Neo4jGraph if needed, other community integrations
+# LLM & Processing Libraries
+# llmlingua==0.2.2 # Uncomment if using compression
+google-generativeai>=0.7.2 # Underlying Gemini library
+# Neo4j
+neo4j>=5.24.0
+# Streamlit & Data Handling
+streamlit>=1.31.0
+pandas>=2.1.3
+openpyxl>=3.1.5 # For Excel writing with Pandas
+# Configuration & Utilities
+pydantic>=2.9.0
+pydantic-settings>=2.4.0 # For BaseSettings
+python-dotenv>=1.0.1 # For loading .env files
+# Optional: For LangSmith Tracing
+# langsmith>=0.1.100