mgbam commited on
Commit
81a11e5
Β·
verified Β·
1 Parent(s): ed31030

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +101 -98
app.py CHANGED
@@ -1,18 +1,3 @@
1
- # --- Docstring ---
2
- """
3
- Streamlit application for Medical Image Analysis using Google Gemini Vision
4
- and Retrieval-Augmented Generation (RAG) with Chroma DB, enhanced for
5
- Hugging Face Spaces deployment and improved practices.
6
-
7
- Features:
8
- - Image analysis via Google Gemini Pro Vision.
9
- - RAG using Chroma DB with Hugging Face embeddings.
10
- - Caching for performance.
11
- - Basic logging.
12
- - Improved UX and error handling.
13
- - Explicit Disclaimer.
14
- """
15
-
16
  # --- Imports ---
17
  import streamlit as st
18
  import google.generativeai as genai
@@ -24,6 +9,10 @@ import time
24
  import logging
25
  from typing import Optional, Dict, List, Any, Tuple
26
 
 
 
 
 
27
  # --- Basic Logging Setup ---
28
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
29
  logger = logging.getLogger(__name__)
@@ -32,10 +21,10 @@ logger = logging.getLogger(__name__)
32
  # Secrets Management (Prioritize Hugging Face Secrets)
33
  try:
34
  GOOGLE_API_KEY = st.secrets["GOOGLE_API_KEY"]
35
- # HF_TOKEN is optional for many public models, but required for gated/private ones
36
  HF_TOKEN = st.secrets.get("HF_TOKEN") # Use .get() for optional token
37
  except KeyError as e:
38
  err_msg = f"❌ Missing Secret: {e}. Please add it to your Hugging Face Space secrets."
 
39
  st.error(err_msg)
40
  logger.error(err_msg)
41
  st.stop()
@@ -73,16 +62,16 @@ Structure the output clearly, perhaps using bullet points for findings.
73
  """
74
 
75
  # Chroma DB Configuration
76
- CHROMA_PATH = "chroma_data_hf" # Use a distinct path if needed
77
- COLLECTION_NAME = "medical_docs_hf"
78
- # IMPORTANT: Choose an appropriate HF embedding model. 'all-mpnet-base-v2' is general purpose.
79
- # For better medical results, consider models like:
80
- # - 'microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext' (might need more RAM/compute)
81
- # - 'dmis-lab/sapbert-from-pubmedbert-sentencetransformer'
82
- # - Other models tagged 'medical' or 'biomedical' on Hugging Face Hub.
83
- # Ensure the chosen model is compatible with chromadb's HuggingFaceEmbeddingFunction.
84
- EMBEDDING_MODEL_NAME = "sentence-transformers/all-mpnet-base-v2" # <-- REPLACE if possible
85
- CHROMA_DISTANCE_METRIC = "cosine"
86
 
87
  # --- Caching Resource Initialization ---
88
 
@@ -100,36 +89,38 @@ def initialize_gemini_model() -> Optional[genai.GenerativeModel]:
100
  return model
101
  except Exception as e:
102
  err_msg = f"❌ Error initializing Gemini Model ({VISION_MODEL_NAME}): {e}"
103
- st.error(err_msg)
104
  logger.error(err_msg, exc_info=True)
105
  return None
106
 
107
  @st.cache_resource
108
  def initialize_embedding_function() -> Optional[embedding_functions.HuggingFaceEmbeddingFunction]:
109
  """Initializes and returns the Hugging Face Embedding Function."""
 
110
  try:
111
  # Pass HF_TOKEN if it exists (required for private/gated models)
112
- api_key_param = {"api_key": HF_TOKEN} if HF_TOKEN else {}
113
  embed_func = embedding_functions.HuggingFaceEmbeddingFunction(
114
  api_key=HF_TOKEN, # Pass token here if needed by model
115
  model_name=EMBEDDING_MODEL_NAME
116
  )
117
  logger.info(f"Successfully initialized HuggingFace Embedding Function: {EMBEDDING_MODEL_NAME}")
 
118
  return embed_func
119
  except Exception as e:
120
  err_msg = f"❌ Error initializing HuggingFace Embedding Function ({EMBEDDING_MODEL_NAME}): {e}"
121
- st.error(err_msg)
122
  logger.error(err_msg, exc_info=True)
123
  st.info("ℹ️ Make sure the embedding model name is correct and you have network access. "
124
- "If using a private model, ensure HF_TOKEN is set in secrets.")
125
  return None
126
 
127
  @st.cache_resource
128
  def initialize_chroma_collection(_embedding_func: embedding_functions.EmbeddingFunction) -> Optional[chromadb.Collection]:
129
  """Initializes the Chroma DB client and returns the collection."""
130
  if not _embedding_func:
131
- st.error("❌ Cannot initialize Chroma DB without a valid embedding function.")
132
  return None
 
133
  try:
134
  chroma_client = chromadb.PersistentClient(path=CHROMA_PATH)
135
  collection = chroma_client.get_or_create_collection(
@@ -138,12 +129,13 @@ def initialize_chroma_collection(_embedding_func: embedding_functions.EmbeddingF
138
  metadata={"hnsw:space": CHROMA_DISTANCE_METRIC}
139
  )
140
  logger.info(f"Chroma DB collection '{COLLECTION_NAME}' loaded/created at '{CHROMA_PATH}' using {CHROMA_DISTANCE_METRIC}.")
 
141
  return collection
142
  except Exception as e:
143
  err_msg = f"❌ Error initializing Chroma DB at '{CHROMA_PATH}': {e}"
144
- st.error(err_msg)
145
  logger.error(err_msg, exc_info=True)
146
- st.info(f"ℹ️ Ensure the path '{CHROMA_PATH}' is writable.")
147
  return None
148
 
149
  # --- Core Logic Functions (with Caching for Data Operations) ---
@@ -187,13 +179,12 @@ def analyze_image_with_gemini(_gemini_model: genai.GenerativeModel, image_bytes:
187
  def query_chroma(_collection: chromadb.Collection, query_text: str, n_results: int = 5) -> Optional[Dict[str, List[Any]]]:
188
  """Queries Chroma DB, returns results dict or None on error."""
189
  if not _collection:
 
190
  return None
191
  if not query_text:
192
  logger.warning("Attempted to query Chroma with empty text.")
193
  return None
194
  try:
195
- # Placeholder for potential query refinement:
196
- # refined_query = refine_query_for_chroma(query_text) # Implement this if needed
197
  refined_query = query_text # Using direct analysis text for now
198
 
199
  results = _collection.query(
@@ -204,9 +195,9 @@ def query_chroma(_collection: chromadb.Collection, query_text: str, n_results: i
204
  logger.info(f"Chroma query successful for text snippet: '{query_text[:50]}...'")
205
  return results
206
  except Exception as e:
207
- err_msg = f"Error querying Chroma DB: {e}"
208
- st.error(err_msg) # Show error in UI as well
209
- logger.error(err_msg, exc_info=True)
210
  return None
211
 
212
  def add_dummy_data_to_chroma(collection: chromadb.Collection, embedding_func: embedding_functions.EmbeddingFunction):
@@ -215,10 +206,23 @@ def add_dummy_data_to_chroma(collection: chromadb.Collection, embedding_func: em
215
  st.error("❌ Cannot add dummy data: Chroma Collection or Embedding Function not available.")
216
  return
217
 
218
- status = st.status("Adding dummy data to Chroma DB...", expanded=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
219
  try:
220
  # --- Dummy Data Definition ---
221
- # (Same data as before, but ensure metadata is useful)
222
  docs = [
223
  "Figure 1A shows adenocarcinoma of the lung, papillary subtype. Note the glandular structures and nuclear atypia. TTF-1 staining was positive.",
224
  "Pathology slide 34B demonstrates high-grade glioma (glioblastoma) with significant necrosis and microvascular proliferation. Ki-67 index was high.",
@@ -233,52 +237,49 @@ def add_dummy_data_to_chroma(collection: chromadb.Collection, embedding_func: em
233
  {"source": "Path Report 101", "topic": "Gastrointestinal Pathology", "entities": "chronic gastritis, Helicobacter pylori, intestinal metaplasia", "IMAGE_ID": "micrograph_h_pylori_gastritis.jpg"},
234
  {"source": "Case Study CJD", "topic": "Neuropathology", "entities": "prion disease, Spongiform changes, Gliosis, cerebral cortex", "IMAGE_ID": "slide_cjd_sample_02.jpg"}
235
  ]
236
- ids = [f"doc_hf_{int(time.time())}_{i}" for i in range(len(docs))]
237
-
238
- # Check for existing documents (simple check based on text)
239
- status.update(label="Checking for existing dummy documents...")
240
- existing_docs = collection.get(where={"$or": [{"document": doc} for doc in docs]}, include=[])
241
- if not existing_docs or not existing_docs.get('ids'):
242
- status.update(label=f"Generating embeddings for {len(docs)} documents (may take time)...")
243
- # Embeddings are generated implicitly by ChromaDB during .add()
244
- # when an embedding_function is configured for the collection.
245
- collection.add(
246
- documents=docs,
247
- metadatas=metadatas,
248
- ids=ids
249
- )
250
- status.update(label=f"βœ… Added {len(docs)} dummy documents.", state="complete")
251
- logger.info(f"Added {len(docs)} dummy documents to collection '{COLLECTION_NAME}'.")
252
- else:
253
- status.update(label="⚠️ Dummy data already exists. No new data added.", state="complete")
254
- logger.warning("Dummy data seems to already exist in the collection based on text match.")
255
 
256
  except Exception as e:
257
  err_msg = f"Error adding dummy data to Chroma: {e}"
258
- status.update(label=f"❌ Error: {err_msg}", state="error")
259
  logger.error(err_msg, exc_info=True)
260
 
261
  # --- Initialize Resources ---
262
- # These calls use @st.cache_resource, so they run only once per session/resource change.
 
263
  gemini_model = initialize_gemini_model()
264
  embedding_func = initialize_embedding_function()
265
- collection = initialize_chroma_collection(embedding_func) # Pass embedding func to chroma init
266
 
267
  # --- Streamlit UI ---
268
- st.set_page_config(layout="wide", page_title="Medical Image Analysis & RAG (HF)")
269
- st.title("βš•οΈ Medical Image Analysis & RAG (Hugging Face Enhanced)")
 
270
 
271
  # --- DISCLAIMER ---
272
  st.warning("""
273
  **⚠️ Disclaimer:** This tool is for demonstration and informational purposes ONLY.
274
  It is **NOT** a medical device and should **NOT** be used for actual medical diagnosis, treatment, or decision-making.
275
  AI analysis can be imperfect. Always consult with qualified healthcare professionals for any medical concerns.
276
- Do **NOT** upload identifiable patient data (PHI).
277
- """)
278
 
279
- st.markdown("""
280
- Upload a medical image. Gemini Vision will analyze it, and related information
281
- will be retrieved from a Chroma DB knowledge base using Hugging Face embeddings.
282
  """)
283
 
284
  # Sidebar
@@ -292,7 +293,7 @@ with st.sidebar:
292
 
293
  st.divider()
294
 
295
- if st.button("βž• Add/Verify Dummy KB Data", help="Adds example text data to Chroma DB if it doesn't exist."):
296
  if collection and embedding_func:
297
  add_dummy_data_to_chroma(collection, embedding_func)
298
  else:
@@ -300,15 +301,14 @@ with st.sidebar:
300
 
301
  st.divider()
302
 
303
- st.info(f"""
304
- **Setup Info:**
305
- - Gemini Model: `{VISION_MODEL_NAME}`
306
- - Embedding Model: `{EMBEDDING_MODEL_NAME}`
307
- - Chroma Collection: `{COLLECTION_NAME}` (at `{CHROMA_PATH}`)
308
- - Distance Metric: `{CHROMA_DISTANCE_METRIC}`
309
- """)
310
- st.caption(f"Using Google API Key: {'*' * (len(GOOGLE_API_KEY)-4)}{GOOGLE_API_KEY[-4:]}" if GOOGLE_API_KEY else "Not Set")
311
- st.caption(f"Using HF Token: {'Provided' if HF_TOKEN else 'Not Provided'}")
312
 
313
  # Main Display Area
314
  col1, col2 = st.columns(2)
@@ -328,56 +328,59 @@ with col2:
328
  analysis_text = ""
329
  analysis_error = False
330
  with st.status("🧠 Analyzing image with Gemini Vision...", expanded=True) as status_gemini:
331
- # The actual analysis function is cached via @st.cache_data
332
  analysis_text, analysis_error = analyze_image_with_gemini(gemini_model, image_bytes)
333
  if analysis_error:
334
- status_gemini.update(label=f"⚠️ Analysis Failed/Blocked: {analysis_text.split(':')[1].strip() if ':' in analysis_text else 'See details'}", state="error")
335
- st.error(f"**Analysis Output:** {analysis_text}") # Show error/block message
 
 
336
  else:
337
- status_gemini.update(label="βœ… Analysis Complete", state="complete")
338
  st.markdown("**Gemini Vision Analysis:**")
339
- st.markdown(analysis_text)
340
 
341
  # 2. Query Chroma if Analysis Succeeded
342
  if not analysis_error and analysis_text:
343
- st.markdown("---")
344
  st.subheader("πŸ“š Related Information (RAG)")
345
- with st.status("πŸ” Searching knowledge base (Chroma DB)...", expanded=True) as status_chroma:
346
- # The actual query function is cached via @st.cache_data
347
- chroma_results = query_chroma(collection, analysis_text, n_results=3)
348
 
349
  if chroma_results and chroma_results.get('documents') and chroma_results['documents'][0]:
350
  num_results = len(chroma_results['documents'][0])
351
- status_chroma.update(label=f"βœ… Found {num_results} related entries.", state="complete")
352
 
353
  for i in range(num_results):
354
  doc = chroma_results['documents'][0][i]
355
  meta = chroma_results['metadatas'][0][i]
356
  dist = chroma_results['distances'][0][i]
357
- similarity = 1.0 - dist # For cosine distance
 
358
 
359
  expander_title = f"Result {i+1} (Similarity: {similarity:.4f}) | Source: {meta.get('source', 'N/A')}"
360
  with st.expander(expander_title):
361
  st.markdown("**Retrieved Text:**")
362
- st.markdown(f"> {doc}")
363
  st.markdown("**Metadata:**")
364
- # Display metadata keys/values more nicely
365
  for key, value in meta.items():
366
  st.markdown(f"- **{key.replace('_', ' ').title()}:** `{value}`")
367
-
368
- # Highlight linked image ID
369
  if meta.get("IMAGE_ID"):
370
  st.info(f"ℹ️ Associated visual asset ID: `{meta['IMAGE_ID']}`")
371
 
372
  elif chroma_results is not None: # Query ran, no results
373
- status_chroma.update(label="⚠️ No relevant information found.", state="warning")
374
- else: # Error occurred during query (already logged and shown via st.error)
375
- status_chroma.update(label="❌ Failed to retrieve results.", state="error")
 
 
 
376
 
377
  elif not uploaded_file:
378
  st.info("Analysis results will appear here once an image is uploaded.")
379
  else:
380
- st.error("❌ Analysis cannot proceed. Check if Gemini model or Chroma DB failed to initialize (see sidebar/logs).")
 
 
381
 
382
  st.markdown("---")
383
  st.markdown("<div style='text-align: center; font-size: small;'>Powered by Google Gemini, Chroma DB, Hugging Face, and Streamlit</div>", unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # --- Imports ---
2
  import streamlit as st
3
  import google.generativeai as genai
 
9
  import logging
10
  from typing import Optional, Dict, List, Any, Tuple
11
 
12
+ # --- Set Page Config FIRST ---
13
+ # This MUST be the first Streamlit command executed in the script.
14
+ st.set_page_config(layout="wide", page_title="Medical Image Analysis & RAG (HF/BioBERT)")
15
+
16
  # --- Basic Logging Setup ---
17
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
18
  logger = logging.getLogger(__name__)
 
21
  # Secrets Management (Prioritize Hugging Face Secrets)
22
  try:
23
  GOOGLE_API_KEY = st.secrets["GOOGLE_API_KEY"]
 
24
  HF_TOKEN = st.secrets.get("HF_TOKEN") # Use .get() for optional token
25
  except KeyError as e:
26
  err_msg = f"❌ Missing Secret: {e}. Please add it to your Hugging Face Space secrets."
27
+ # Now it's safe to call st.error after set_page_config
28
  st.error(err_msg)
29
  logger.error(err_msg)
30
  st.stop()
 
62
  """
63
 
64
  # Chroma DB Configuration
65
+ CHROMA_PATH = "chroma_data_biobert" # Changed path to reflect model change
66
+ COLLECTION_NAME = "medical_docs_biobert" # Changed collection name
67
+
68
+ # --- Embedding Model Selection ---
69
+ # Using BioBERT v1.1 - Good domain knowledge, but potentially suboptimal for *semantic similarity search*.
70
+ # Default pooling (likely CLS token) will be used by sentence-transformers.
71
+ # Consider models fine-tuned for sentence similarity if retrieval quality is low:
72
+ # e.g., 'dmis-lab/sapbert-from-pubmedbert-sentencetransformer'
73
+ EMBEDDING_MODEL_NAME = "dmis-lab/biobert-v1.1"
74
+ CHROMA_DISTANCE_METRIC = "cosine" # Cosine is generally good for sentence embeddings
75
 
76
  # --- Caching Resource Initialization ---
77
 
 
89
  return model
90
  except Exception as e:
91
  err_msg = f"❌ Error initializing Gemini Model ({VISION_MODEL_NAME}): {e}"
92
+ st.error(err_msg) # Safe to call st.error here now
93
  logger.error(err_msg, exc_info=True)
94
  return None
95
 
96
  @st.cache_resource
97
  def initialize_embedding_function() -> Optional[embedding_functions.HuggingFaceEmbeddingFunction]:
98
  """Initializes and returns the Hugging Face Embedding Function."""
99
+ st.info(f"Initializing Embedding Model: {EMBEDDING_MODEL_NAME} (this may take a moment)...")
100
  try:
101
  # Pass HF_TOKEN if it exists (required for private/gated models)
 
102
  embed_func = embedding_functions.HuggingFaceEmbeddingFunction(
103
  api_key=HF_TOKEN, # Pass token here if needed by model
104
  model_name=EMBEDDING_MODEL_NAME
105
  )
106
  logger.info(f"Successfully initialized HuggingFace Embedding Function: {EMBEDDING_MODEL_NAME}")
107
+ st.success(f"Embedding Model {EMBEDDING_MODEL_NAME} initialized.")
108
  return embed_func
109
  except Exception as e:
110
  err_msg = f"❌ Error initializing HuggingFace Embedding Function ({EMBEDDING_MODEL_NAME}): {e}"
111
+ st.error(err_msg) # Safe here
112
  logger.error(err_msg, exc_info=True)
113
  st.info("ℹ️ Make sure the embedding model name is correct and you have network access. "
114
+ "If using a private model, ensure HF_TOKEN is set in secrets. Check Space logs for details.")
115
  return None
116
 
117
  @st.cache_resource
118
  def initialize_chroma_collection(_embedding_func: embedding_functions.EmbeddingFunction) -> Optional[chromadb.Collection]:
119
  """Initializes the Chroma DB client and returns the collection."""
120
  if not _embedding_func:
121
+ st.error("❌ Cannot initialize Chroma DB without a valid embedding function.") # Safe here
122
  return None
123
+ st.info(f"Initializing Chroma DB collection '{COLLECTION_NAME}'...")
124
  try:
125
  chroma_client = chromadb.PersistentClient(path=CHROMA_PATH)
126
  collection = chroma_client.get_or_create_collection(
 
129
  metadata={"hnsw:space": CHROMA_DISTANCE_METRIC}
130
  )
131
  logger.info(f"Chroma DB collection '{COLLECTION_NAME}' loaded/created at '{CHROMA_PATH}' using {CHROMA_DISTANCE_METRIC}.")
132
+ st.success(f"Chroma DB collection '{COLLECTION_NAME}' ready.")
133
  return collection
134
  except Exception as e:
135
  err_msg = f"❌ Error initializing Chroma DB at '{CHROMA_PATH}': {e}"
136
+ st.error(err_msg) # Safe here
137
  logger.error(err_msg, exc_info=True)
138
+ st.info(f"ℹ️ Ensure the path '{CHROMA_PATH}' is writable. Check Space logs.")
139
  return None
140
 
141
  # --- Core Logic Functions (with Caching for Data Operations) ---
 
179
  def query_chroma(_collection: chromadb.Collection, query_text: str, n_results: int = 5) -> Optional[Dict[str, List[Any]]]:
180
  """Queries Chroma DB, returns results dict or None on error."""
181
  if not _collection:
182
+ logger.error("Query attempt failed: Chroma collection is not available.")
183
  return None
184
  if not query_text:
185
  logger.warning("Attempted to query Chroma with empty text.")
186
  return None
187
  try:
 
 
188
  refined_query = query_text # Using direct analysis text for now
189
 
190
  results = _collection.query(
 
195
  logger.info(f"Chroma query successful for text snippet: '{query_text[:50]}...'")
196
  return results
197
  except Exception as e:
198
+ # Show error in UI as well
199
+ st.error(f"❌ Error querying Chroma DB: {e}", icon="🚨")
200
+ logger.error(f"Error querying Chroma DB: {e}", exc_info=True)
201
  return None
202
 
203
  def add_dummy_data_to_chroma(collection: chromadb.Collection, embedding_func: embedding_functions.EmbeddingFunction):
 
206
  st.error("❌ Cannot add dummy data: Chroma Collection or Embedding Function not available.")
207
  return
208
 
209
+ # Check if dummy data needs adding first to avoid unnecessary processing
210
+ docs_to_check = [
211
+ "Figure 1A shows adenocarcinoma of the lung, papillary subtype. Note the glandular structures and nuclear atypia. TTF-1 staining was positive."
212
+ ] # Only check one doc for speed
213
+ try:
214
+ existing_check = collection.get(where={"document": docs_to_check[0]}, limit=1, include=[])
215
+ if existing_check and existing_check.get('ids'):
216
+ st.info("Dummy data seems to already exist. Skipping add.")
217
+ logger.info("Skipping dummy data addition as it likely exists.")
218
+ return
219
+ except Exception as e:
220
+ logger.warning(f"Could not efficiently check for existing dummy data: {e}. Proceeding with add attempt.")
221
+
222
+
223
+ status = st.status(f"Adding dummy data (using {EMBEDDING_MODEL_NAME})...", expanded=True)
224
  try:
225
  # --- Dummy Data Definition ---
 
226
  docs = [
227
  "Figure 1A shows adenocarcinoma of the lung, papillary subtype. Note the glandular structures and nuclear atypia. TTF-1 staining was positive.",
228
  "Pathology slide 34B demonstrates high-grade glioma (glioblastoma) with significant necrosis and microvascular proliferation. Ki-67 index was high.",
 
237
  {"source": "Path Report 101", "topic": "Gastrointestinal Pathology", "entities": "chronic gastritis, Helicobacter pylori, intestinal metaplasia", "IMAGE_ID": "micrograph_h_pylori_gastritis.jpg"},
238
  {"source": "Case Study CJD", "topic": "Neuropathology", "entities": "prion disease, Spongiform changes, Gliosis, cerebral cortex", "IMAGE_ID": "slide_cjd_sample_02.jpg"}
239
  ]
240
+ # Ensure IDs are unique even if run close together
241
+ base_id = f"doc_biobert_{int(time.time() * 1000)}"
242
+ ids = [f"{base_id}_{i}" for i in range(len(docs))]
243
+
244
+ status.update(label=f"Generating embeddings & adding {len(docs)} documents (this uses BioBERT and may take time)...")
245
+
246
+ # Embeddings are generated implicitly by ChromaDB during .add()
247
+ collection.add(
248
+ documents=docs,
249
+ metadatas=metadatas,
250
+ ids=ids
251
+ )
252
+ status.update(label=f"βœ… Added {len(docs)} dummy documents.", state="complete", expanded=False)
253
+ logger.info(f"Added {len(docs)} dummy documents to collection '{COLLECTION_NAME}'.")
 
 
 
 
 
254
 
255
  except Exception as e:
256
  err_msg = f"Error adding dummy data to Chroma: {e}"
257
+ status.update(label=f"❌ Error: {err_msg}", state="error", expanded=True)
258
  logger.error(err_msg, exc_info=True)
259
 
260
  # --- Initialize Resources ---
261
+ # These calls use @st.cache_resource, run only once unless cleared/changed.
262
+ # Order matters if one depends on another (embedding func needed for chroma).
263
  gemini_model = initialize_gemini_model()
264
  embedding_func = initialize_embedding_function()
265
+ collection = initialize_chroma_collection(embedding_func) # Pass embedding func
266
 
267
  # --- Streamlit UI ---
268
+ # set_page_config() is already called at the top
269
+
270
+ st.title("βš•οΈ Medical Image Analysis & RAG (BioBERT Embeddings)")
271
 
272
  # --- DISCLAIMER ---
273
  st.warning("""
274
  **⚠️ Disclaimer:** This tool is for demonstration and informational purposes ONLY.
275
  It is **NOT** a medical device and should **NOT** be used for actual medical diagnosis, treatment, or decision-making.
276
  AI analysis can be imperfect. Always consult with qualified healthcare professionals for any medical concerns.
277
+ Do **NOT** upload identifiable patient data (PHI). Analysis quality depends heavily on the chosen embedding model.
278
+ """, icon="☣️")
279
 
280
+ st.markdown(f"""
281
+ Upload a medical image. Gemini Vision will analyze it. Related information
282
+ will be retrieved from a Chroma DB knowledge base using **{EMBEDDING_MODEL_NAME}** embeddings.
283
  """)
284
 
285
  # Sidebar
 
293
 
294
  st.divider()
295
 
296
+ if st.button("βž• Add/Verify Dummy KB Data", help=f"Adds example text data to Chroma DB ({COLLECTION_NAME}) if it doesn't exist."):
297
  if collection and embedding_func:
298
  add_dummy_data_to_chroma(collection, embedding_func)
299
  else:
 
301
 
302
  st.divider()
303
 
304
+ st.header("ℹ️ System Info")
305
+ st.caption(f"**Gemini Model:** `{VISION_MODEL_NAME}`")
306
+ st.caption(f"**Embedding Model:** `{EMBEDDING_MODEL_NAME}`")
307
+ st.caption(f"**Chroma Collection:** `{COLLECTION_NAME}`")
308
+ st.caption(f"**Chroma Path:** `{CHROMA_PATH}`")
309
+ st.caption(f"**Distance Metric:** `{CHROMA_DISTANCE_METRIC}`")
310
+ st.caption(f"**Google API Key:** {'Set' if GOOGLE_API_KEY else 'Not Set'}")
311
+ st.caption(f"**HF Token:** {'Provided' if HF_TOKEN else 'Not Provided'}")
 
312
 
313
  # Main Display Area
314
  col1, col2 = st.columns(2)
 
328
  analysis_text = ""
329
  analysis_error = False
330
  with st.status("🧠 Analyzing image with Gemini Vision...", expanded=True) as status_gemini:
 
331
  analysis_text, analysis_error = analyze_image_with_gemini(gemini_model, image_bytes)
332
  if analysis_error:
333
+ # Shorten the message for status if needed
334
+ status_label = f"⚠️ Analysis Failed/Blocked: {analysis_text.split(':')[0]}"
335
+ status_gemini.update(label=status_label , state="error")
336
+ st.error(f"**Analysis Output:** {analysis_text}", icon="🚨")
337
  else:
338
+ status_gemini.update(label="βœ… Analysis Complete", state="complete", expanded=False)
339
  st.markdown("**Gemini Vision Analysis:**")
340
+ st.markdown(analysis_text) # Display the successful analysis
341
 
342
  # 2. Query Chroma if Analysis Succeeded
343
  if not analysis_error and analysis_text:
344
+ st.markdown("---") # Separator
345
  st.subheader("πŸ“š Related Information (RAG)")
346
+ with st.status(f"πŸ” Searching knowledge base (Chroma DB w/ BioBERT)...", expanded=True) as status_chroma:
347
+ chroma_results = query_chroma(collection, analysis_text, n_results=3) # Fetch top 3
 
348
 
349
  if chroma_results and chroma_results.get('documents') and chroma_results['documents'][0]:
350
  num_results = len(chroma_results['documents'][0])
351
+ status_chroma.update(label=f"βœ… Found {num_results} related entries.", state="complete", expanded=False)
352
 
353
  for i in range(num_results):
354
  doc = chroma_results['documents'][0][i]
355
  meta = chroma_results['metadatas'][0][i]
356
  dist = chroma_results['distances'][0][i]
357
+ # Ensure distance is float before calculation
358
+ similarity = 1.0 - float(dist) if dist is not None else 0.0
359
 
360
  expander_title = f"Result {i+1} (Similarity: {similarity:.4f}) | Source: {meta.get('source', 'N/A')}"
361
  with st.expander(expander_title):
362
  st.markdown("**Retrieved Text:**")
363
+ st.markdown(f"> {doc}") # Use blockquote
364
  st.markdown("**Metadata:**")
 
365
  for key, value in meta.items():
366
  st.markdown(f"- **{key.replace('_', ' ').title()}:** `{value}`")
 
 
367
  if meta.get("IMAGE_ID"):
368
  st.info(f"ℹ️ Associated visual asset ID: `{meta['IMAGE_ID']}`")
369
 
370
  elif chroma_results is not None: # Query ran, no results
371
+ status_chroma.update(label="⚠️ No relevant information found.", state="warning", expanded=False)
372
+ st.warning("No relevant documents found in the knowledge base for this analysis.", icon="⚠️")
373
+ # Error case is handled by st.error within query_chroma itself
374
+ elif chroma_results is None:
375
+ status_chroma.update(label="❌ Failed to retrieve results.", state="error", expanded=True)
376
+
377
 
378
  elif not uploaded_file:
379
  st.info("Analysis results will appear here once an image is uploaded.")
380
  else:
381
+ # Initialization error occurred earlier, resources might be None
382
+ st.error("❌ Analysis cannot proceed. Check if Gemini model or Chroma DB failed to initialize (see sidebar info & Space logs).")
383
+
384
 
385
  st.markdown("---")
386
  st.markdown("<div style='text-align: center; font-size: small;'>Powered by Google Gemini, Chroma DB, Hugging Face, and Streamlit</div>", unsafe_allow_html=True)