Spaces:

mgbam
/

NeuroResearch_AI

Sleeping

App Files Files Community

mgbam commited on Mar 13

Commit

de3ef7d

verified ·

1 Parent(s): 9370b00

Update app.py

Browse files

Files changed (1) hide show

app.py +277 -378

app.py CHANGED Viewed

@@ -1,34 +1,48 @@
 # ------------------------------
-# NeuroResearch 2.0: Advanced Research Cognition System
 # ------------------------------
 from langchain_openai import OpenAIEmbeddings
 from langchain_community.vectorstores import Chroma
 from langchain_community.retrievers import BM25Retriever
-from langchain_core.messages import HumanMessage, AIMessage, ToolMessage
-from langchain.text_splitter import SemanticChunker
-from langgraph.graph import END, StateGraph
-from langgraph.prebuilt import ToolNode
-from langgraph.graph.message import add_messages
 from typing_extensions import TypedDict, Annotated
-from typing import Sequence, Dict, List, Optional, Any, Tuple
 import chromadb
 import os
-import streamlit as st
-import requests
 import hashlib
 import json
 import time
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from datetime import datetime
 import plotly.express as px
 import pandas as pd
-from rank_bm25 import BM25Okapi
-from sentence_transformers import CrossEncoder
 # ------------------------------
-# Quantum Cognition Configuration
 # ------------------------------
 class NeuroConfig:
     DEEPSEEK_API_KEY = os.environ.get("DEEPSEEK_API_KEY")
     CHROMA_PATH = "neuro_db"
     CHUNK_SIZE = 512
@@ -45,391 +59,276 @@ class NeuroConfig:
     CACHE_TTL = 3600  # 1 hour
 # ------------------------------
-# Quantum State Schema
-# ------------------------------
-class ResearchState(TypedDict):
-    messages: Annotated[Sequence[AIMessage | HumanMessage | ToolMessage], add_messages]
-    context: Dict[str, Any]
-    metadata: Dict[str, Any]
-    cognitive_artifacts: Dict[str, Any]
-# ------------------------------
-# Neural Document Processor
 # ------------------------------
 class NeuralDocumentProcessor:
-    def __init__(self):
-        self.client = chromadb.PersistentClient(path=NeuroConfig.CHROMA_PATH)
         self.embeddings = OpenAIEmbeddings(
             model="text-embedding-3-large",
             dimensions=NeuroConfig.EMBEDDING_DIMENSIONS
         )
         self.cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-12-v2')
-    def process_documents(self, documents: List[str], collection: str) -> Chroma:
-        splitter = SemanticChunker(
-            self.embeddings,
-            breakpoint_threshold_type="percentile",
-            breakpoint_threshold_amount=0.8
-        )
-        docs = splitter.create_documents(documents)
-        return Chroma.from_documents(
-            documents=docs,
-            embedding=self.embeddings,
-            client=self.client,
-            collection_name=collection,
-            ids=[self._quantum_id(doc.page_content) for doc in docs]
-        )
-    def hybrid_retrieval(self, query: str, collection: str) -> List[Tuple[str, float]]:
-        vector_retriever = Chroma(
-            client=self.client,
-            collection_name=collection,
-            embedding_function=self.embeddings
-        ).as_retriever(search_kwargs={"k": NeuroConfig.HYBRID_RERANK_TOP_K})
-        bm25_retriever = BM25Retriever.from_documents(
-            vector_retriever.get()["documents"],
-            preprocess_func=lambda x: x.split()
         )
-        vector_results = vector_retriever.invoke(query)
-        bm25_results = bm25_retriever.invoke(query)
-        combined = list({doc.page_content: doc for doc in vector_results + bm25_results}.values())
-        scores = self.cross_encoder.predict([(query, doc.page_content) for doc in combined])
-        reranked = sorted(zip(combined, scores), key=lambda x: x[1], reverse=True)
-        return [doc for doc, _ in reranked[:NeuroConfig.HYBRID_RERANK_TOP_K]]
-    def _quantum_id(self, content: str) -> str:
-        return f"neuro_{hashlib.sha3_256(content.encode()).hexdigest()[:24]}"
-# ------------------------------
-# Cognitive Processing Units
-# ------------------------------
-class NeuroAnalyticalEngine:
-    def __init__(self):
-        self.executor = ThreadPoolExecutor(max_workers=NeuroConfig.MAX_CONCURRENT_REQUESTS)
-        self.cache = {}
-    def parallel_analysis(self, query: str, context: str, mode: str) -> Dict:
-        cache_key = f"{hashlib.sha256(query.encode()).hexdigest()[:16]}_{mode}"
-        if cached := self.cache.get(cache_key):
-            if time.time() - cached["timestamp"] < NeuroConfig.CACHE_TTL:
-                return cached["response"]
-        futures = []
-        for _ in range(3):
-            futures.append(self.executor.submit(
-                self._cognitive_process,
-                query,
-                context,
-                mode
-            ))
-        results = [f.result() for f in as_completed(futures)]
-        best_response = max(results, key=lambda x: x.get('quality_score', 0))
-        self.cache[cache_key] = {
-            "response": best_response,
-            "timestamp": time.time()
-        }
-        return best_response
-    def _cognitive_process(self, query: str, context: str, mode: str) -> Dict:
-        headers = {
-            "Authorization": f"Bearer {NeuroConfig.DEEPSEEK_API_KEY}",
-            "Content-Type": "application/json",
-            "X-Neuro-Mode": mode
-        }
         try:
-            response = requests.post(
-                "https://api.deepseek.com/v1/chat/completions",
-                headers=headers,
-                json={
-                    "model": "deepseek-researcher-v2",
-                    "messages": [{
-                        "role": "system",
-                        "content": f"""Perform {mode} analysis. Context:
-                        {context}"""
-                    }, {
-                        "role": "user",
-                        "content": query
-                    }],
-                    "temperature": 0.3 if mode == "technical" else 0.7,
-                    "max_tokens": 2048,
-                    "top_p": 0.95,
-                    "response_format": {"type": "json_object"},
-                    "seed": 42
-                },
-                timeout=60
             )
-            response.raise_for_status()
-            analysis = json.loads(response.json()["choices"][0]["message"]["content"])
-            return {
-                **analysis,
-                "quality_score": self._evaluate_quality(analysis)
-            }
         except Exception as e:
-            return {"error": str(e), "quality_score": 0}
-    def _evaluate_quality(self, analysis: Dict) -> float:
-        score = 0.0
-        score += len(analysis.get("key_points", [])) * 0.2
-        score += len(analysis.get("comparisons", [])) * 0.3
-        score += len(analysis.get("citations", [])) * 0.5
-        return min(score, 1.0)
-# ------------------------------
-# Advanced Research Workflow
-# ------------------------------
-class NeuroResearchWorkflow:
-    def __init__(self):
-        self.processor = NeuralDocumentProcessor()
-        self.engine = NeuroAnalyticalEngine()
-        self._build_cognitive_graph()
-    def _build_cognitive_graph(self):
-        workflow = StateGraph(ResearchState)
-        workflow.add_node("ingest", self.ingest_query)
-        workflow.add_node("retrieve", self.retrieve_documents)
-        workflow.add_node("analyze", self.analyze_content)
-        workflow.add_node("visualize", self.generate_insights)
-        workflow.add_node("validate", self.validate_knowledge)
-        workflow.set_entry_point("ingest")
-        workflow.add_edge("ingest", "retrieve")
-        workflow.add_edge("retrieve", "analyze")
-        workflow.add_edge("analyze", "visualize")
-        workflow.add_edge("visualize", "validate")
-        workflow.add_edge("validate", END)
-        self.app = workflow.compile()
-    def ingest_query(self, state: ResearchState) -> ResearchState:
-        query = state["messages"][-1].content
-        return {
-            **state,
-            "context": {
-                "raw_query": query,
-                "analysis_mode": "technical"
-            },
-            "metadata": {
-                "timestamp": datetime.now().isoformat(),
-                "session_id": hashlib.sha256(query.encode()).hexdigest()[:16]
-            }
-        }
-    def retrieve_documents(self, state: ResearchState) -> ResearchState:
-        docs = self.processor.hybrid_retrieval(
-            state["context"]["raw_query"],
-            "research"
-        )
-        return {
-            **state,
-            "context": {
-                **state["context"],
-                "documents": docs,
-                "retrieval_metrics": {
-                    "total": len(docs),
-                    "relevance_scores": [doc.metadata.get("score", 0) for doc in docs]
-                }
-            }
-        }
-    def analyze_content(self, state: ResearchState) -> ResearchState:
-        context = "\n".join([doc.page_content for doc in state["context"]["documents"]])
-        analysis = self.engine.parallel_analysis(
-            query=state["context"]["raw_query"],
-            context=context,
-            mode=state["context"]["analysis_mode"]
-        )
-        return {
-            **state,
-            "cognitive_artifacts": analysis,
-            "messages": [AIMessage(content=json.dumps(analysis, indent=2))]
-        }
-    def generate_insights(self, state: ResearchState) -> ResearchState:
-        df = pd.DataFrame({
-            "document": [doc.metadata.get("source", "") for doc in state["context"]["documents"]],
-            "relevance": [doc.metadata.get("score", 0) for doc in state["context"]["documents"]],
-            "year": [doc.metadata.get("year", 2023) for doc in state["context"]["documents"]]
-        })
-        figures = {
-            "temporal": px.line(df, x="year", y="relevance", title="Temporal Relevance"),
-            "distribution": px.histogram(df, x="relevance", title="Score Distribution")
-        }
-        return {
-            **state,
-            "cognitive_artifacts": {
-                **state["cognitive_artifacts"],
-                "visualizations": figures
-            }
-        }
-    def validate_knowledge(self, state: ResearchState) -> ResearchState:
-        validation_prompt = f"""
-        Validate research artifacts:
-        {json.dumps(state['cognitive_artifacts'], indent=2)}
-        Return JSON with:
-        - validity_score: 0-1
-        - critical_issues: List[str]
-        - strength_points: List[str]
         """
-        validation = self.engine.parallel_analysis(
-            query=validation_prompt,
-            context="",
-            mode="critical"
         )
-        return {
-            **state,
-            "cognitive_artifacts": {
-                **state["cognitive_artifacts"],
-                "validation": validation
-            }
-        }
 # ------------------------------
-# Holographic Research Interface
 # ------------------------------
-class NeuroInterface:
-    def __init__(self):
-        self.workflow = NeuroResearchWorkflow()
-        self._initialize_nexus()
-    def _initialize_nexus(self):
-        st.set_page_config(
-            page_title="NeuroResearch Nexus",
-            layout="wide",
-            initial_sidebar_state="expanded"
         )
-        self._inject_neuro_styles()
-        self._build_quantum_sidebar()
-        self._build_main_nexus()
-    def _inject_neuro_styles(self):
-        st.markdown("""
-        <style>
-        :root {
-            --neuro-primary: #7F00FF;
-            --neuro-secondary: #E100FF;
-            --neuro-background: #0A0A2E;
-            --neuro-text: #F0F2F6;
-        }
-        .stApp {
-            background: var(--neuro-background);
-            color: var(--neuro-text);
-            font-family: 'Inter', sans-serif;
-        }
-        .stTextArea textarea {
-            background: #1A1A4E !important;
-            color: var(--neuro-text) !important;
-            border: 2px solid var(--neuro-secondary);
-            border-radius: 12px;
-            padding: 1.5rem;
-            font-size: 1.1rem;
-        }
-        .stButton>button {
-            background: linear-gradient(135deg, var(--neuro-primary), var(--neuro-secondary));
-            border: none;
-            border-radius: 12px;
-            padding: 1.2rem 2.4rem;
-            font-weight: 600;
-            transition: all 0.4s cubic-bezier(0.4, 0, 0.2, 1);
-        }
-        .stButton>button:hover {
-            transform: translateY(-2px);
-            box-shadow: 0 8px 24px rgba(127, 0, 255, 0.3);
-        }
-        .neuro-card {
-            background: #1A1A4E;
-            border-radius: 16px;
-            padding: 2rem;
-            margin: 1.5rem 0;
-            border: 1px solid #2E2E6E;
-        }
-        </style>
-        """, unsafe_allow_html=True)
-    def _build_quantum_sidebar(self):
-        with st.sidebar:
-            st.title("🌀 Neuro Nexus")
-            st.subheader("Analysis Modes")
-            selected_mode = st.selectbox(
-                "Select Cognitive Mode",
-                options=list(NeuroConfig.ANALYSIS_MODES.keys()),
-                format_func=lambda x: NeuroConfig.ANALYSIS_MODES[x]
             )
-            st.subheader("Quantum Metrics")
-            col1, col2 = st.columns(2)
-            col1.metric("Vector Dimensions", NeuroConfig.EMBEDDING_DIMENSIONS)
-            col2.metric("Hybrid Recall", "92.4%", "1.2% ↑")
-            st.divider()
-            st.write("**Cognitive Filters**")
-            st.checkbox("Temporal Analysis", True)
-            st.checkbox("Methodology Comparison")
-            st.checkbox("Citation Graph")
-    def _build_main_nexus(self):
-        st.title("🧠 NeuroResearch Nexus")
-        query = st.text_area("Enter Research Query:", height=200,
-                           placeholder="Query our knowledge continuum...")
-        if st.button("Initiate NeuroAnalysis", type="primary"):
-            self._execute_neuro_analysis(query)
-    def _execute_neuro_analysis(self, query: str):
-        with st.spinner("Activating Cognitive Matrix..."):
-            result = self.workflow.app.invoke({
-                "messages": [HumanMessage(content=query)],
-                "context": {},
-                "metadata": {},
-                "cognitive_artifacts": {}
-            })
-            self._render_quantum_results(result)
-    def _render_quantum_results(self, result: Dict):
-        with st.container():
-            st.subheader("🧬 Cognitive Artifacts")
-            with st.expander("Core Analysis", expanded=True):
-                st.json(result["cognitive_artifacts"].get("analysis", {}))
-            with st.expander("Visual Insights", expanded=True):
-                visuals = result["cognitive_artifacts"].get("visualizations", {})
-                col1, col2 = st.columns(2)
-                with col1:
-                    st.plotly_chart(visuals.get("temporal"), use_container_width=True)
-                with col2:
-                    st.plotly_chart(visuals.get("distribution"), use_container_width=True)
-            with st.expander("Validation Report", expanded=False):
-                validation = result["cognitive_artifacts"].get("validation", {})
-                st.metric("Validity Score", f"{validation.get('validity_score', 0)*100:.1f}%")
-                st.write("**Critical Issues**")
-                st.write(validation.get("critical_issues", []))
-                st.write("**Strengths**")
-                st.write(validation.get("strength_points", []))
 if __name__ == "__main__":
-    NeuroInterface()

 # ------------------------------
+# NeuroResearch 2.1: Robust Research System
 # ------------------------------
 from langchain_openai import OpenAIEmbeddings
 from langchain_community.vectorstores import Chroma
 from langchain_community.retrievers import BM25Retriever
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from rank_bm25 import BM25Okapi
+from sentence_transformers import CrossEncoder
 from typing_extensions import TypedDict, Annotated
+from typing import (
+    Sequence, Dict, List, Optional, Any, Tuple, Union
+)
 import chromadb
 import os
 import hashlib
 import json
 import time
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from datetime import datetime
+import streamlit as st
 import plotly.express as px
 import pandas as pd
 # ------------------------------
+# Configuration
 # ------------------------------
 class NeuroConfig:
+    """
+    Configuration class for NeuroResearch system.
+    Attributes:
+        DEEPSEEK_API_KEY (str): Optional API key for external services.
+        CHROMA_PATH (str): File path for Chroma's persistent storage.
+        CHUNK_SIZE (int): Maximum length of text chunks for splitting.
+        CHUNK_OVERLAP (int): Overlap between text chunks to preserve context.
+        MAX_CONCURRENT_REQUESTS (int): Number of concurrent threads for processing.
+        EMBEDDING_DIMENSIONS (int): Dimensionality of embeddings.
+        HYBRID_RERANK_TOP_K (int): Number of documents to retrieve and rerank.
+        ANALYSIS_MODES (dict): Possible analysis modes and their descriptions.
+        CACHE_TTL (int): Time-to-live (seconds) for cached items.
+    """
     DEEPSEEK_API_KEY = os.environ.get("DEEPSEEK_API_KEY")
     CHROMA_PATH = "neuro_db"
     CHUNK_SIZE = 512
     CACHE_TTL = 3600  # 1 hour
 # ------------------------------
+# Document Processor
 # ------------------------------
 class NeuralDocumentProcessor:
+    """
+    A document processing and retrieval utility class.
+    Responsibilities:
+      - Splitting documents into manageable chunks.
+      - Storing and retrieving embeddings with Chroma.
+      - Performing hybrid retrieval (vector + BM25) with cross-encoder reranking.
+      - Handling concurrency during document ingestion (optional).
+    """
+    def __init__(self) -> None:
+        """
+        Initialize the NeuralDocumentProcessor with a persistent Chroma client,
+        OpenAI-based embeddings, a CrossEncoder for reranking, and a text splitter.
+        """
+        # Persistent Chroma client
+        try:
+            self.client = chromadb.PersistentClient(path=NeuroConfig.CHROMA_PATH)
+        except Exception as e:
+            # Fallback to in-memory client if persistent fails
+            print(f"Error initializing Chroma PersistentClient: {e}")
+            self.client = chromadb.Client()
+        # Embeddings (OpenAI-based)
         self.embeddings = OpenAIEmbeddings(
             model="text-embedding-3-large",
             dimensions=NeuroConfig.EMBEDDING_DIMENSIONS
         )
+        # Cross-encoder for reranking
         self.cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-12-v2')
+        # Text splitter
+        self.text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=NeuroConfig.CHUNK_SIZE,
+            chunk_overlap=NeuroConfig.CHUNK_OVERLAP,
+            separators=["\n\n", "\n", "(?<=\\. )", " "],
         )
+    def process_documents(
+        self,
+        documents: List[str],
+        collection: str,
+        use_concurrency: bool = False
+    ) -> Optional[Chroma]:
+        """
+        Process a list of document strings by splitting, embedding, and storing them in Chroma.
+        Optionally uses concurrency for splitting documents.
+        Args:
+            documents (List[str]): The list of raw document texts.
+            collection (str): The Chroma collection name to store these documents in.
+            use_concurrency (bool, optional): If True, process documents concurrently. Defaults to False.
+        Returns:
+            Optional[Chroma]: The Chroma vectorstore for the specified collection, or None if no docs.
+        """
+        if not documents:
+            print("No documents provided to process_documents.")
+            return None
+        # Split documents into chunks
+        if use_concurrency and len(documents) > 1:
+            chunks = []
+            with ThreadPoolExecutor(max_workers=NeuroConfig.MAX_CONCURRENT_REQUESTS) as executor:
+                future_to_doc = {
+                    executor.submit(self.text_splitter.create_documents, [doc]): doc
+                    for doc in documents
+                }
+                for future in as_completed(future_to_doc):
+                    try:
+                        result = future.result()
+                        chunks.extend(result)
+                    except Exception as e:
+                        print(f"Error splitting document: {e}")
+        else:
+            # Single-threaded splitting
+            chunks = []
+            for doc in documents:
+                chunks.extend(self.text_splitter.create_documents([doc]))
+        # Build unique IDs for each chunk
+        chunk_ids = [self._quantum_id(doc.page_content) for doc in chunks]
+        # Create Chroma from documents
         try:
+            vectorstore = Chroma.from_documents(
+                documents=chunks,
+                embedding=self.embeddings,
+                client=self.client,
+                collection_name=collection,
+                ids=chunk_ids
             )
+            return vectorstore
         except Exception as e:
+            print(f"Error creating Chroma collection: {e}")
+            return None
+    def hybrid_retrieval(
+        self,
+        query: str,
+        collection: str,
+        return_scores: bool = False
+    ) -> Union[List[str], List[Tuple[str, float]]]:
+        """
+        Perform hybrid retrieval combining vector-based search with BM25,
+        then re-rank the combined results using a cross-encoder.
+        Args:
+            query (str): The user query for retrieving documents.
+            collection (str): The name of the Chroma collection to search.
+            return_scores (bool): If True, return a list of (document, score) tuples.
+                                  Otherwise, return a list of document strings only.
+        Returns:
+            Union[List[str], List[Tuple[str, float]]]: The top-k reranked results,
+            either as strings or (string, score) pairs.
         """
+        # Try to load the existing collection
+        try:
+            vector_store = Chroma(
+                client=self.client,
+                collection_name=collection,
+                embedding_function=self.embeddings
+            )
+        except Exception as e:
+            print(f"Error loading Chroma collection '{collection}': {e}")
+            return [] if not return_scores else []
+        # Check if the collection is empty
+        stored_docs = vector_store.get()
+        if not stored_docs or "documents" not in stored_docs or not stored_docs["documents"]:
+            print(f"No documents found in collection '{collection}'.")
+            return [] if not return_scores else []
+        all_docs = [doc.page_content for doc in stored_docs["documents"]]
+        if not all_docs:
+            print(f"No documents found in collection '{collection}'.")
+            return [] if not return_scores else []
+        # Vector-based retrieval
+        try:
+            vector_retriever = vector_store.as_retriever(
+                search_kwargs={"k": NeuroConfig.HYBRID_RERANK_TOP_K}
+            )
+            vector_results = [doc.page_content for doc in vector_retriever.invoke(query)]
+        except Exception as e:
+            print(f"Error during vector retrieval: {e}")
+            vector_results = []
+        # BM25 retrieval
+        tokenized_docs = [doc.split() for doc in all_docs]
+        bm25 = BM25Okapi(tokenized_docs)
+        bm25_results = bm25.get_top_n(
+            query.split(),
+            all_docs,
+            n=NeuroConfig.HYBRID_RERANK_TOP_K
         )
+        # Combine results and remove duplicates
+        combined = list(set(vector_results + bm25_results))
+        if not combined:
+            print("No documents retrieved by either BM25 or vector search.")
+            return [] if not return_scores else []
+        # Cross-encoder reranking
+        scores = self.cross_encoder.predict([(query, doc) for doc in combined])
+        reranked = sorted(zip(combined, scores), key=lambda x: x[1], reverse=True)
+        top_results = reranked[:NeuroConfig.HYBRID_RERANK_TOP_K]
+        # Return based on user preference
+        if return_scores:
+            return top_results  # List[Tuple[str, float]]
+        else:
+            return [doc for doc, _ in top_results]
+    def _quantum_id(self, content: str) -> str:
+        """
+        Create a unique ID for each text chunk by hashing its content.
+        Args:
+            content (str): The text content of the chunk.
+        Returns:
+            str: A unique hash-based identifier.
+        """
+        return f"neuro_{hashlib.sha3_256(content.encode()).hexdigest()[:24]}"
 # ------------------------------
+# NeuroInterface (Streamlit Example)
 # ------------------------------
+def NeuroInterface() -> None:
+    """
+    A basic Streamlit-based interface to demonstrate usage of the NeuralDocumentProcessor.
+    This function can be adapted for Hugging Face Spaces or other frontends.
+    """
+    st.title("NeuroResearch 2.1: Robust Research System")
+    # Initialize Document Processor
+    processor = NeuralDocumentProcessor()
+    # Sidebar for uploading and processing documents
+    with st.sidebar:
+        st.header("Document Ingestion")
+        uploaded_files = st.file_uploader(
+            "Upload one or more text files",
+            type=["txt", "md", "pdf"],
+            accept_multiple_files=True
         )
+        collection_name = st.text_input("Collection Name", value="default_collection")
+        use_concurrency = st.checkbox("Use Concurrency for Processing?", value=False)
+        if st.button("Process Documents"):
+            if uploaded_files and collection_name.strip():
+                # Read files
+                docs_content = []
+                for uf in uploaded_files:
+                    content = uf.read()
+                    # Assume UTF-8; adapt as needed
+                    try:
+                        docs_content.append(content.decode("utf-8"))
+                    except UnicodeDecodeError:
+                        st.error(f"Could not decode {uf.name}. Make sure it's UTF-8 text.")
+                st.write("Processing documents...")
+                vectorstore = processor.process_documents(
+                    documents=docs_content,
+                    collection=collection_name,
+                    use_concurrency=use_concurrency
+                )
+                if vectorstore:
+                    st.success(f"Documents processed and stored in collection: {collection_name}")
+                else:
+                    st.error("Processing failed or returned no vectorstore.")
+    # Main interface for querying
+    st.subheader("Query Documents")
+    user_query = st.text_input("Enter your query:")
+    return_scores = st.checkbox("Return Scores?")
+    if st.button("Search"):
+        if not user_query.strip() or not collection_name.strip():
+            st.warning("Please provide both a query and a valid collection name.")
+        else:
+            st.write(f"Retrieving from collection: {collection_name}")
+            results = processor.hybrid_retrieval(
+                query=user_query,
+                collection=collection_name,
+                return_scores=return_scores
             )
+            if results:
+                st.write("Top Reranked Results:")
+                if return_scores:
+                    # Each result is (doc, score)
+                    for idx, (doc, score) in enumerate(results, start=1):
+                        st.markdown(f"**Result {idx} | Score: {score:.4f}**")
+                        st.write(doc[:500] + ("..." if len(doc) > 500 else ""))
+                else:
+                    # Just doc texts
+                    for idx, doc in enumerate(results, start=1):
+                        st.markdown(f"**Result {idx}**")
+                        st.write(doc[:500] + ("..." if len(doc) > 500 else ""))
+            else:
+                st.warning("No results found or collection may be empty.")
+# ------------------------------
+# Main Entry Point
+# ------------------------------
 if __name__ == "__main__":
+    NeuroInterface()