Spaces:

mgbam
/

AuditXCodeInsights

Sleeping

App Files Files Community

mgbam commited on Apr 6

Commit

81a11e5

verified ·

1 Parent(s): ed31030

Update app.py

Browse files

Files changed (1) hide show

app.py +101 -98

app.py CHANGED Viewed

@@ -1,18 +1,3 @@
-# --- Docstring ---
-"""
-Streamlit application for Medical Image Analysis using Google Gemini Vision
-and Retrieval-Augmented Generation (RAG) with Chroma DB, enhanced for
-Hugging Face Spaces deployment and improved practices.
-Features:
-- Image analysis via Google Gemini Pro Vision.
-- RAG using Chroma DB with Hugging Face embeddings.
-- Caching for performance.
-- Basic logging.
-- Improved UX and error handling.
-- Explicit Disclaimer.
-"""
 # --- Imports ---
 import streamlit as st
 import google.generativeai as genai
@@ -24,6 +9,10 @@ import time
 import logging
 from typing import Optional, Dict, List, Any, Tuple
 # --- Basic Logging Setup ---
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 logger = logging.getLogger(__name__)
@@ -32,10 +21,10 @@ logger = logging.getLogger(__name__)
 # Secrets Management (Prioritize Hugging Face Secrets)
 try:
     GOOGLE_API_KEY = st.secrets["GOOGLE_API_KEY"]
-    # HF_TOKEN is optional for many public models, but required for gated/private ones
     HF_TOKEN = st.secrets.get("HF_TOKEN") # Use .get() for optional token
 except KeyError as e:
     err_msg = f"❌ Missing Secret: {e}. Please add it to your Hugging Face Space secrets."
     st.error(err_msg)
     logger.error(err_msg)
     st.stop()
@@ -73,16 +62,16 @@ Structure the output clearly, perhaps using bullet points for findings.
 """
 # Chroma DB Configuration
-CHROMA_PATH = "chroma_data_hf" # Use a distinct path if needed
-COLLECTION_NAME = "medical_docs_hf"
-# IMPORTANT: Choose an appropriate HF embedding model. 'all-mpnet-base-v2' is general purpose.
-# For better medical results, consider models like:
-# - 'microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext' (might need more RAM/compute)
-# - 'dmis-lab/sapbert-from-pubmedbert-sentencetransformer'
-# - Other models tagged 'medical' or 'biomedical' on Hugging Face Hub.
-# Ensure the chosen model is compatible with chromadb's HuggingFaceEmbeddingFunction.
-EMBEDDING_MODEL_NAME = "sentence-transformers/all-mpnet-base-v2" # <-- REPLACE if possible
-CHROMA_DISTANCE_METRIC = "cosine"
 # --- Caching Resource Initialization ---
@@ -100,36 +89,38 @@ def initialize_gemini_model() -> Optional[genai.GenerativeModel]:
         return model
     except Exception as e:
         err_msg = f"❌ Error initializing Gemini Model ({VISION_MODEL_NAME}): {e}"
-        st.error(err_msg)
         logger.error(err_msg, exc_info=True)
         return None
 @st.cache_resource
 def initialize_embedding_function() -> Optional[embedding_functions.HuggingFaceEmbeddingFunction]:
     """Initializes and returns the Hugging Face Embedding Function."""
     try:
         # Pass HF_TOKEN if it exists (required for private/gated models)
-        api_key_param = {"api_key": HF_TOKEN} if HF_TOKEN else {}
         embed_func = embedding_functions.HuggingFaceEmbeddingFunction(
             api_key=HF_TOKEN, # Pass token here if needed by model
             model_name=EMBEDDING_MODEL_NAME
         )
         logger.info(f"Successfully initialized HuggingFace Embedding Function: {EMBEDDING_MODEL_NAME}")
         return embed_func
     except Exception as e:
         err_msg = f"❌ Error initializing HuggingFace Embedding Function ({EMBEDDING_MODEL_NAME}): {e}"
-        st.error(err_msg)
         logger.error(err_msg, exc_info=True)
         st.info("ℹ️ Make sure the embedding model name is correct and you have network access. "
-                "If using a private model, ensure HF_TOKEN is set in secrets.")
         return None
 @st.cache_resource
 def initialize_chroma_collection(_embedding_func: embedding_functions.EmbeddingFunction) -> Optional[chromadb.Collection]:
     """Initializes the Chroma DB client and returns the collection."""
     if not _embedding_func:
-        st.error("❌ Cannot initialize Chroma DB without a valid embedding function.")
         return None
     try:
         chroma_client = chromadb.PersistentClient(path=CHROMA_PATH)
         collection = chroma_client.get_or_create_collection(
@@ -138,12 +129,13 @@ def initialize_chroma_collection(_embedding_func: embedding_functions.EmbeddingF
             metadata={"hnsw:space": CHROMA_DISTANCE_METRIC}
         )
         logger.info(f"Chroma DB collection '{COLLECTION_NAME}' loaded/created at '{CHROMA_PATH}' using {CHROMA_DISTANCE_METRIC}.")
         return collection
     except Exception as e:
         err_msg = f"❌ Error initializing Chroma DB at '{CHROMA_PATH}': {e}"
-        st.error(err_msg)
         logger.error(err_msg, exc_info=True)
-        st.info(f"ℹ️ Ensure the path '{CHROMA_PATH}' is writable.")
         return None
 # --- Core Logic Functions (with Caching for Data Operations) ---
@@ -187,13 +179,12 @@ def analyze_image_with_gemini(_gemini_model: genai.GenerativeModel, image_bytes:
 def query_chroma(_collection: chromadb.Collection, query_text: str, n_results: int = 5) -> Optional[Dict[str, List[Any]]]:
     """Queries Chroma DB, returns results dict or None on error."""
     if not _collection:
         return None
     if not query_text:
         logger.warning("Attempted to query Chroma with empty text.")
         return None
     try:
-        # Placeholder for potential query refinement:
-        # refined_query = refine_query_for_chroma(query_text) # Implement this if needed
         refined_query = query_text # Using direct analysis text for now
         results = _collection.query(
@@ -204,9 +195,9 @@ def query_chroma(_collection: chromadb.Collection, query_text: str, n_results: i
         logger.info(f"Chroma query successful for text snippet: '{query_text[:50]}...'")
         return results
     except Exception as e:
-        err_msg = f"Error querying Chroma DB: {e}"
-        st.error(err_msg) # Show error in UI as well
-        logger.error(err_msg, exc_info=True)
         return None
 def add_dummy_data_to_chroma(collection: chromadb.Collection, embedding_func: embedding_functions.EmbeddingFunction):
@@ -215,10 +206,23 @@ def add_dummy_data_to_chroma(collection: chromadb.Collection, embedding_func: em
         st.error("❌ Cannot add dummy data: Chroma Collection or Embedding Function not available.")
         return
-    status = st.status("Adding dummy data to Chroma DB...", expanded=False)
     try:
         # --- Dummy Data Definition ---
-        # (Same data as before, but ensure metadata is useful)
         docs = [
             "Figure 1A shows adenocarcinoma of the lung, papillary subtype. Note the glandular structures and nuclear atypia. TTF-1 staining was positive.",
             "Pathology slide 34B demonstrates high-grade glioma (glioblastoma) with significant necrosis and microvascular proliferation. Ki-67 index was high.",
@@ -233,52 +237,49 @@ def add_dummy_data_to_chroma(collection: chromadb.Collection, embedding_func: em
             {"source": "Path Report 101", "topic": "Gastrointestinal Pathology", "entities": "chronic gastritis, Helicobacter pylori, intestinal metaplasia", "IMAGE_ID": "micrograph_h_pylori_gastritis.jpg"},
             {"source": "Case Study CJD", "topic": "Neuropathology", "entities": "prion disease, Spongiform changes, Gliosis, cerebral cortex", "IMAGE_ID": "slide_cjd_sample_02.jpg"}
         ]
-        ids = [f"doc_hf_{int(time.time())}_{i}" for i in range(len(docs))]
-        # Check for existing documents (simple check based on text)
-        status.update(label="Checking for existing dummy documents...")
-        existing_docs = collection.get(where={"$or": [{"document": doc} for doc in docs]}, include=[])
-        if not existing_docs or not existing_docs.get('ids'):
-            status.update(label=f"Generating embeddings for {len(docs)} documents (may take time)...")
-            # Embeddings are generated implicitly by ChromaDB during .add()
-            # when an embedding_function is configured for the collection.
-            collection.add(
-                documents=docs,
-                metadatas=metadatas,
-                ids=ids
-            )
-            status.update(label=f"✅ Added {len(docs)} dummy documents.", state="complete")
-            logger.info(f"Added {len(docs)} dummy documents to collection '{COLLECTION_NAME}'.")
-        else:
-            status.update(label="⚠️ Dummy data already exists. No new data added.", state="complete")
-            logger.warning("Dummy data seems to already exist in the collection based on text match.")
     except Exception as e:
         err_msg = f"Error adding dummy data to Chroma: {e}"
-        status.update(label=f"❌ Error: {err_msg}", state="error")
         logger.error(err_msg, exc_info=True)
 # --- Initialize Resources ---
-# These calls use @st.cache_resource, so they run only once per session/resource change.
 gemini_model = initialize_gemini_model()
 embedding_func = initialize_embedding_function()
-collection = initialize_chroma_collection(embedding_func) # Pass embedding func to chroma init
 # --- Streamlit UI ---
-st.set_page_config(layout="wide", page_title="Medical Image Analysis & RAG (HF)")
-st.title("⚕️ Medical Image Analysis & RAG (Hugging Face Enhanced)")
 # --- DISCLAIMER ---
 st.warning("""
 **⚠️ Disclaimer:** This tool is for demonstration and informational purposes ONLY.
 It is **NOT** a medical device and should **NOT** be used for actual medical diagnosis, treatment, or decision-making.
 AI analysis can be imperfect. Always consult with qualified healthcare professionals for any medical concerns.
-Do **NOT** upload identifiable patient data (PHI).
-""")
-st.markdown("""
-Upload a medical image. Gemini Vision will analyze it, and related information
-will be retrieved from a Chroma DB knowledge base using Hugging Face embeddings.
 """)
 # Sidebar
@@ -292,7 +293,7 @@ with st.sidebar:
     st.divider()
-    if st.button("➕ Add/Verify Dummy KB Data", help="Adds example text data to Chroma DB if it doesn't exist."):
          if collection and embedding_func:
              add_dummy_data_to_chroma(collection, embedding_func)
          else:
@@ -300,15 +301,14 @@ with st.sidebar:
     st.divider()
-    st.info(f"""
-    **Setup Info:**
-    - Gemini Model: `{VISION_MODEL_NAME}`
-    - Embedding Model: `{EMBEDDING_MODEL_NAME}`
-    - Chroma Collection: `{COLLECTION_NAME}` (at `{CHROMA_PATH}`)
-    - Distance Metric: `{CHROMA_DISTANCE_METRIC}`
-    """)
-    st.caption(f"Using Google API Key: {'*' * (len(GOOGLE_API_KEY)-4)}{GOOGLE_API_KEY[-4:]}" if GOOGLE_API_KEY else "Not Set")
-    st.caption(f"Using HF Token: {'Provided' if HF_TOKEN else 'Not Provided'}")
 # Main Display Area
 col1, col2 = st.columns(2)
@@ -328,56 +328,59 @@ with col2:
         analysis_text = ""
         analysis_error = False
         with st.status("🧠 Analyzing image with Gemini Vision...", expanded=True) as status_gemini:
-            # The actual analysis function is cached via @st.cache_data
             analysis_text, analysis_error = analyze_image_with_gemini(gemini_model, image_bytes)
             if analysis_error:
-                status_gemini.update(label=f"⚠️ Analysis Failed/Blocked: {analysis_text.split(':')[1].strip() if ':' in analysis_text else 'See details'}", state="error")
-                st.error(f"**Analysis Output:** {analysis_text}") # Show error/block message
             else:
-                status_gemini.update(label="✅ Analysis Complete", state="complete")
                 st.markdown("**Gemini Vision Analysis:**")
-                st.markdown(analysis_text)
         # 2. Query Chroma if Analysis Succeeded
         if not analysis_error and analysis_text:
-            st.markdown("---")
             st.subheader("📚 Related Information (RAG)")
-            with st.status("🔍 Searching knowledge base (Chroma DB)...", expanded=True) as status_chroma:
-                # The actual query function is cached via @st.cache_data
-                chroma_results = query_chroma(collection, analysis_text, n_results=3)
                 if chroma_results and chroma_results.get('documents') and chroma_results['documents'][0]:
                     num_results = len(chroma_results['documents'][0])
-                    status_chroma.update(label=f"✅ Found {num_results} related entries.", state="complete")
                     for i in range(num_results):
                         doc = chroma_results['documents'][0][i]
                         meta = chroma_results['metadatas'][0][i]
                         dist = chroma_results['distances'][0][i]
-                        similarity = 1.0 - dist # For cosine distance
                         expander_title = f"Result {i+1} (Similarity: {similarity:.4f}) | Source: {meta.get('source', 'N/A')}"
                         with st.expander(expander_title):
                             st.markdown("**Retrieved Text:**")
-                            st.markdown(f"> {doc}")
                             st.markdown("**Metadata:**")
-                            # Display metadata keys/values more nicely
                             for key, value in meta.items():
                                 st.markdown(f"- **{key.replace('_', ' ').title()}:** `{value}`")
-                            # Highlight linked image ID
                             if meta.get("IMAGE_ID"):
                                 st.info(f"ℹ️ Associated visual asset ID: `{meta['IMAGE_ID']}`")
                 elif chroma_results is not None: # Query ran, no results
-                    status_chroma.update(label="⚠️ No relevant information found.", state="warning")
-                else: # Error occurred during query (already logged and shown via st.error)
-                     status_chroma.update(label="❌ Failed to retrieve results.", state="error")
     elif not uploaded_file:
         st.info("Analysis results will appear here once an image is uploaded.")
     else:
-        st.error("❌ Analysis cannot proceed. Check if Gemini model or Chroma DB failed to initialize (see sidebar/logs).")
 st.markdown("---")
 st.markdown("<div style='text-align: center; font-size: small;'>Powered by Google Gemini, Chroma DB, Hugging Face, and Streamlit</div>", unsafe_allow_html=True)

 # --- Imports ---
 import streamlit as st
 import google.generativeai as genai
 import logging
 from typing import Optional, Dict, List, Any, Tuple
+# --- Set Page Config FIRST ---
+# This MUST be the first Streamlit command executed in the script.
+st.set_page_config(layout="wide", page_title="Medical Image Analysis & RAG (HF/BioBERT)")
 # --- Basic Logging Setup ---
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 logger = logging.getLogger(__name__)
 # Secrets Management (Prioritize Hugging Face Secrets)
 try:
     GOOGLE_API_KEY = st.secrets["GOOGLE_API_KEY"]
     HF_TOKEN = st.secrets.get("HF_TOKEN") # Use .get() for optional token
 except KeyError as e:
     err_msg = f"❌ Missing Secret: {e}. Please add it to your Hugging Face Space secrets."
+    # Now it's safe to call st.error after set_page_config
     st.error(err_msg)
     logger.error(err_msg)
     st.stop()
 """
 # Chroma DB Configuration
+CHROMA_PATH = "chroma_data_biobert" # Changed path to reflect model change
+COLLECTION_NAME = "medical_docs_biobert" # Changed collection name
+# --- Embedding Model Selection ---
+# Using BioBERT v1.1 - Good domain knowledge, but potentially suboptimal for *semantic similarity search*.
+# Default pooling (likely CLS token) will be used by sentence-transformers.
+# Consider models fine-tuned for sentence similarity if retrieval quality is low:
+# e.g., 'dmis-lab/sapbert-from-pubmedbert-sentencetransformer'
+EMBEDDING_MODEL_NAME = "dmis-lab/biobert-v1.1"
+CHROMA_DISTANCE_METRIC = "cosine" # Cosine is generally good for sentence embeddings
 # --- Caching Resource Initialization ---
         return model
     except Exception as e:
         err_msg = f"❌ Error initializing Gemini Model ({VISION_MODEL_NAME}): {e}"
+        st.error(err_msg) # Safe to call st.error here now
         logger.error(err_msg, exc_info=True)
         return None
 @st.cache_resource
 def initialize_embedding_function() -> Optional[embedding_functions.HuggingFaceEmbeddingFunction]:
     """Initializes and returns the Hugging Face Embedding Function."""
+    st.info(f"Initializing Embedding Model: {EMBEDDING_MODEL_NAME} (this may take a moment)...")
     try:
         # Pass HF_TOKEN if it exists (required for private/gated models)
         embed_func = embedding_functions.HuggingFaceEmbeddingFunction(
             api_key=HF_TOKEN, # Pass token here if needed by model
             model_name=EMBEDDING_MODEL_NAME
         )
         logger.info(f"Successfully initialized HuggingFace Embedding Function: {EMBEDDING_MODEL_NAME}")
+        st.success(f"Embedding Model {EMBEDDING_MODEL_NAME} initialized.")
         return embed_func
     except Exception as e:
         err_msg = f"❌ Error initializing HuggingFace Embedding Function ({EMBEDDING_MODEL_NAME}): {e}"
+        st.error(err_msg) # Safe here
         logger.error(err_msg, exc_info=True)
         st.info("ℹ️ Make sure the embedding model name is correct and you have network access. "
+                "If using a private model, ensure HF_TOKEN is set in secrets. Check Space logs for details.")
         return None
 @st.cache_resource
 def initialize_chroma_collection(_embedding_func: embedding_functions.EmbeddingFunction) -> Optional[chromadb.Collection]:
     """Initializes the Chroma DB client and returns the collection."""
     if not _embedding_func:
+        st.error("❌ Cannot initialize Chroma DB without a valid embedding function.") # Safe here
         return None
+    st.info(f"Initializing Chroma DB collection '{COLLECTION_NAME}'...")
     try:
         chroma_client = chromadb.PersistentClient(path=CHROMA_PATH)
         collection = chroma_client.get_or_create_collection(
             metadata={"hnsw:space": CHROMA_DISTANCE_METRIC}
         )
         logger.info(f"Chroma DB collection '{COLLECTION_NAME}' loaded/created at '{CHROMA_PATH}' using {CHROMA_DISTANCE_METRIC}.")
+        st.success(f"Chroma DB collection '{COLLECTION_NAME}' ready.")
         return collection
     except Exception as e:
         err_msg = f"❌ Error initializing Chroma DB at '{CHROMA_PATH}': {e}"
+        st.error(err_msg) # Safe here
         logger.error(err_msg, exc_info=True)
+        st.info(f"ℹ️ Ensure the path '{CHROMA_PATH}' is writable. Check Space logs.")
         return None
 # --- Core Logic Functions (with Caching for Data Operations) ---
 def query_chroma(_collection: chromadb.Collection, query_text: str, n_results: int = 5) -> Optional[Dict[str, List[Any]]]:
     """Queries Chroma DB, returns results dict or None on error."""
     if not _collection:
+        logger.error("Query attempt failed: Chroma collection is not available.")
         return None
     if not query_text:
         logger.warning("Attempted to query Chroma with empty text.")
         return None
     try:
         refined_query = query_text # Using direct analysis text for now
         results = _collection.query(
         logger.info(f"Chroma query successful for text snippet: '{query_text[:50]}...'")
         return results
     except Exception as e:
+        # Show error in UI as well
+        st.error(f"❌ Error querying Chroma DB: {e}", icon="🚨")
+        logger.error(f"Error querying Chroma DB: {e}", exc_info=True)
         return None
 def add_dummy_data_to_chroma(collection: chromadb.Collection, embedding_func: embedding_functions.EmbeddingFunction):
         st.error("❌ Cannot add dummy data: Chroma Collection or Embedding Function not available.")
         return
+    # Check if dummy data needs adding first to avoid unnecessary processing
+    docs_to_check = [
+        "Figure 1A shows adenocarcinoma of the lung, papillary subtype. Note the glandular structures and nuclear atypia. TTF-1 staining was positive."
+    ] # Only check one doc for speed
+    try:
+        existing_check = collection.get(where={"document": docs_to_check[0]}, limit=1, include=[])
+        if existing_check and existing_check.get('ids'):
+            st.info("Dummy data seems to already exist. Skipping add.")
+            logger.info("Skipping dummy data addition as it likely exists.")
+            return
+    except Exception as e:
+        logger.warning(f"Could not efficiently check for existing dummy data: {e}. Proceeding with add attempt.")
+    status = st.status(f"Adding dummy data (using {EMBEDDING_MODEL_NAME})...", expanded=True)
     try:
         # --- Dummy Data Definition ---
         docs = [
             "Figure 1A shows adenocarcinoma of the lung, papillary subtype. Note the glandular structures and nuclear atypia. TTF-1 staining was positive.",
             "Pathology slide 34B demonstrates high-grade glioma (glioblastoma) with significant necrosis and microvascular proliferation. Ki-67 index was high.",
             {"source": "Path Report 101", "topic": "Gastrointestinal Pathology", "entities": "chronic gastritis, Helicobacter pylori, intestinal metaplasia", "IMAGE_ID": "micrograph_h_pylori_gastritis.jpg"},
             {"source": "Case Study CJD", "topic": "Neuropathology", "entities": "prion disease, Spongiform changes, Gliosis, cerebral cortex", "IMAGE_ID": "slide_cjd_sample_02.jpg"}
         ]
+        # Ensure IDs are unique even if run close together
+        base_id = f"doc_biobert_{int(time.time() * 1000)}"
+        ids = [f"{base_id}_{i}" for i in range(len(docs))]
+        status.update(label=f"Generating embeddings & adding {len(docs)} documents (this uses BioBERT and may take time)...")
+        # Embeddings are generated implicitly by ChromaDB during .add()
+        collection.add(
+            documents=docs,
+            metadatas=metadatas,
+            ids=ids
+        )
+        status.update(label=f"✅ Added {len(docs)} dummy documents.", state="complete", expanded=False)
+        logger.info(f"Added {len(docs)} dummy documents to collection '{COLLECTION_NAME}'.")
     except Exception as e:
         err_msg = f"Error adding dummy data to Chroma: {e}"
+        status.update(label=f"❌ Error: {err_msg}", state="error", expanded=True)
         logger.error(err_msg, exc_info=True)
 # --- Initialize Resources ---
+# These calls use @st.cache_resource, run only once unless cleared/changed.
+# Order matters if one depends on another (embedding func needed for chroma).
 gemini_model = initialize_gemini_model()
 embedding_func = initialize_embedding_function()
+collection = initialize_chroma_collection(embedding_func) # Pass embedding func
 # --- Streamlit UI ---
+# set_page_config() is already called at the top
+st.title("⚕️ Medical Image Analysis & RAG (BioBERT Embeddings)")
 # --- DISCLAIMER ---
 st.warning("""
 **⚠️ Disclaimer:** This tool is for demonstration and informational purposes ONLY.
 It is **NOT** a medical device and should **NOT** be used for actual medical diagnosis, treatment, or decision-making.
 AI analysis can be imperfect. Always consult with qualified healthcare professionals for any medical concerns.
+Do **NOT** upload identifiable patient data (PHI). Analysis quality depends heavily on the chosen embedding model.
+""", icon="☣️")
+st.markdown(f"""
+Upload a medical image. Gemini Vision will analyze it. Related information
+will be retrieved from a Chroma DB knowledge base using **{EMBEDDING_MODEL_NAME}** embeddings.
 """)
 # Sidebar
     st.divider()
+    if st.button("➕ Add/Verify Dummy KB Data", help=f"Adds example text data to Chroma DB ({COLLECTION_NAME}) if it doesn't exist."):
          if collection and embedding_func:
              add_dummy_data_to_chroma(collection, embedding_func)
          else:
     st.divider()
+    st.header("ℹ️ System Info")
+    st.caption(f"**Gemini Model:** `{VISION_MODEL_NAME}`")
+    st.caption(f"**Embedding Model:** `{EMBEDDING_MODEL_NAME}`")
+    st.caption(f"**Chroma Collection:** `{COLLECTION_NAME}`")
+    st.caption(f"**Chroma Path:** `{CHROMA_PATH}`")
+    st.caption(f"**Distance Metric:** `{CHROMA_DISTANCE_METRIC}`")
+    st.caption(f"**Google API Key:** {'Set' if GOOGLE_API_KEY else 'Not Set'}")
+    st.caption(f"**HF Token:** {'Provided' if HF_TOKEN else 'Not Provided'}")
 # Main Display Area
 col1, col2 = st.columns(2)
         analysis_text = ""
         analysis_error = False
         with st.status("🧠 Analyzing image with Gemini Vision...", expanded=True) as status_gemini:
             analysis_text, analysis_error = analyze_image_with_gemini(gemini_model, image_bytes)
             if analysis_error:
+                # Shorten the message for status if needed
+                status_label = f"⚠️ Analysis Failed/Blocked: {analysis_text.split(':')[0]}"
+                status_gemini.update(label=status_label , state="error")
+                st.error(f"**Analysis Output:** {analysis_text}", icon="🚨")
             else:
+                status_gemini.update(label="✅ Analysis Complete", state="complete", expanded=False)
                 st.markdown("**Gemini Vision Analysis:**")
+                st.markdown(analysis_text) # Display the successful analysis
         # 2. Query Chroma if Analysis Succeeded
         if not analysis_error and analysis_text:
+            st.markdown("---") # Separator
             st.subheader("📚 Related Information (RAG)")
+            with st.status(f"🔍 Searching knowledge base (Chroma DB w/ BioBERT)...", expanded=True) as status_chroma:
+                chroma_results = query_chroma(collection, analysis_text, n_results=3) # Fetch top 3
                 if chroma_results and chroma_results.get('documents') and chroma_results['documents'][0]:
                     num_results = len(chroma_results['documents'][0])
+                    status_chroma.update(label=f"✅ Found {num_results} related entries.", state="complete", expanded=False)
                     for i in range(num_results):
                         doc = chroma_results['documents'][0][i]
                         meta = chroma_results['metadatas'][0][i]
                         dist = chroma_results['distances'][0][i]
+                        # Ensure distance is float before calculation
+                        similarity = 1.0 - float(dist) if dist is not None else 0.0
                         expander_title = f"Result {i+1} (Similarity: {similarity:.4f}) | Source: {meta.get('source', 'N/A')}"
                         with st.expander(expander_title):
                             st.markdown("**Retrieved Text:**")
+                            st.markdown(f"> {doc}") # Use blockquote
                             st.markdown("**Metadata:**")
                             for key, value in meta.items():
                                 st.markdown(f"- **{key.replace('_', ' ').title()}:** `{value}`")
                             if meta.get("IMAGE_ID"):
                                 st.info(f"ℹ️ Associated visual asset ID: `{meta['IMAGE_ID']}`")
                 elif chroma_results is not None: # Query ran, no results
+                    status_chroma.update(label="⚠️ No relevant information found.", state="warning", expanded=False)
+                    st.warning("No relevant documents found in the knowledge base for this analysis.", icon="⚠️")
+                # Error case is handled by st.error within query_chroma itself
+                elif chroma_results is None:
+                    status_chroma.update(label="❌ Failed to retrieve results.", state="error", expanded=True)
     elif not uploaded_file:
         st.info("Analysis results will appear here once an image is uploaded.")
     else:
+        # Initialization error occurred earlier, resources might be None
+        st.error("❌ Analysis cannot proceed. Check if Gemini model or Chroma DB failed to initialize (see sidebar info & Space logs).")
 st.markdown("---")
 st.markdown("<div style='text-align: center; font-size: small;'>Powered by Google Gemini, Chroma DB, Hugging Face, and Streamlit</div>", unsafe_allow_html=True)