Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,18 +1,3 @@
|
|
1 |
-
# --- Docstring ---
|
2 |
-
"""
|
3 |
-
Streamlit application for Medical Image Analysis using Google Gemini Vision
|
4 |
-
and Retrieval-Augmented Generation (RAG) with Chroma DB, enhanced for
|
5 |
-
Hugging Face Spaces deployment and improved practices.
|
6 |
-
|
7 |
-
Features:
|
8 |
-
- Image analysis via Google Gemini Pro Vision.
|
9 |
-
- RAG using Chroma DB with Hugging Face embeddings.
|
10 |
-
- Caching for performance.
|
11 |
-
- Basic logging.
|
12 |
-
- Improved UX and error handling.
|
13 |
-
- Explicit Disclaimer.
|
14 |
-
"""
|
15 |
-
|
16 |
# --- Imports ---
|
17 |
import streamlit as st
|
18 |
import google.generativeai as genai
|
@@ -24,6 +9,10 @@ import time
|
|
24 |
import logging
|
25 |
from typing import Optional, Dict, List, Any, Tuple
|
26 |
|
|
|
|
|
|
|
|
|
27 |
# --- Basic Logging Setup ---
|
28 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
29 |
logger = logging.getLogger(__name__)
|
@@ -32,10 +21,10 @@ logger = logging.getLogger(__name__)
|
|
32 |
# Secrets Management (Prioritize Hugging Face Secrets)
|
33 |
try:
|
34 |
GOOGLE_API_KEY = st.secrets["GOOGLE_API_KEY"]
|
35 |
-
# HF_TOKEN is optional for many public models, but required for gated/private ones
|
36 |
HF_TOKEN = st.secrets.get("HF_TOKEN") # Use .get() for optional token
|
37 |
except KeyError as e:
|
38 |
err_msg = f"β Missing Secret: {e}. Please add it to your Hugging Face Space secrets."
|
|
|
39 |
st.error(err_msg)
|
40 |
logger.error(err_msg)
|
41 |
st.stop()
|
@@ -73,16 +62,16 @@ Structure the output clearly, perhaps using bullet points for findings.
|
|
73 |
"""
|
74 |
|
75 |
# Chroma DB Configuration
|
76 |
-
CHROMA_PATH = "
|
77 |
-
COLLECTION_NAME = "
|
78 |
-
|
79 |
-
#
|
80 |
-
#
|
81 |
-
#
|
82 |
-
#
|
83 |
-
#
|
84 |
-
EMBEDDING_MODEL_NAME = "
|
85 |
-
CHROMA_DISTANCE_METRIC = "cosine"
|
86 |
|
87 |
# --- Caching Resource Initialization ---
|
88 |
|
@@ -100,36 +89,38 @@ def initialize_gemini_model() -> Optional[genai.GenerativeModel]:
|
|
100 |
return model
|
101 |
except Exception as e:
|
102 |
err_msg = f"β Error initializing Gemini Model ({VISION_MODEL_NAME}): {e}"
|
103 |
-
st.error(err_msg)
|
104 |
logger.error(err_msg, exc_info=True)
|
105 |
return None
|
106 |
|
107 |
@st.cache_resource
|
108 |
def initialize_embedding_function() -> Optional[embedding_functions.HuggingFaceEmbeddingFunction]:
|
109 |
"""Initializes and returns the Hugging Face Embedding Function."""
|
|
|
110 |
try:
|
111 |
# Pass HF_TOKEN if it exists (required for private/gated models)
|
112 |
-
api_key_param = {"api_key": HF_TOKEN} if HF_TOKEN else {}
|
113 |
embed_func = embedding_functions.HuggingFaceEmbeddingFunction(
|
114 |
api_key=HF_TOKEN, # Pass token here if needed by model
|
115 |
model_name=EMBEDDING_MODEL_NAME
|
116 |
)
|
117 |
logger.info(f"Successfully initialized HuggingFace Embedding Function: {EMBEDDING_MODEL_NAME}")
|
|
|
118 |
return embed_func
|
119 |
except Exception as e:
|
120 |
err_msg = f"β Error initializing HuggingFace Embedding Function ({EMBEDDING_MODEL_NAME}): {e}"
|
121 |
-
st.error(err_msg)
|
122 |
logger.error(err_msg, exc_info=True)
|
123 |
st.info("βΉοΈ Make sure the embedding model name is correct and you have network access. "
|
124 |
-
"If using a private model, ensure HF_TOKEN is set in secrets.")
|
125 |
return None
|
126 |
|
127 |
@st.cache_resource
|
128 |
def initialize_chroma_collection(_embedding_func: embedding_functions.EmbeddingFunction) -> Optional[chromadb.Collection]:
|
129 |
"""Initializes the Chroma DB client and returns the collection."""
|
130 |
if not _embedding_func:
|
131 |
-
st.error("β Cannot initialize Chroma DB without a valid embedding function.")
|
132 |
return None
|
|
|
133 |
try:
|
134 |
chroma_client = chromadb.PersistentClient(path=CHROMA_PATH)
|
135 |
collection = chroma_client.get_or_create_collection(
|
@@ -138,12 +129,13 @@ def initialize_chroma_collection(_embedding_func: embedding_functions.EmbeddingF
|
|
138 |
metadata={"hnsw:space": CHROMA_DISTANCE_METRIC}
|
139 |
)
|
140 |
logger.info(f"Chroma DB collection '{COLLECTION_NAME}' loaded/created at '{CHROMA_PATH}' using {CHROMA_DISTANCE_METRIC}.")
|
|
|
141 |
return collection
|
142 |
except Exception as e:
|
143 |
err_msg = f"β Error initializing Chroma DB at '{CHROMA_PATH}': {e}"
|
144 |
-
st.error(err_msg)
|
145 |
logger.error(err_msg, exc_info=True)
|
146 |
-
st.info(f"βΉοΈ Ensure the path '{CHROMA_PATH}' is writable.")
|
147 |
return None
|
148 |
|
149 |
# --- Core Logic Functions (with Caching for Data Operations) ---
|
@@ -187,13 +179,12 @@ def analyze_image_with_gemini(_gemini_model: genai.GenerativeModel, image_bytes:
|
|
187 |
def query_chroma(_collection: chromadb.Collection, query_text: str, n_results: int = 5) -> Optional[Dict[str, List[Any]]]:
|
188 |
"""Queries Chroma DB, returns results dict or None on error."""
|
189 |
if not _collection:
|
|
|
190 |
return None
|
191 |
if not query_text:
|
192 |
logger.warning("Attempted to query Chroma with empty text.")
|
193 |
return None
|
194 |
try:
|
195 |
-
# Placeholder for potential query refinement:
|
196 |
-
# refined_query = refine_query_for_chroma(query_text) # Implement this if needed
|
197 |
refined_query = query_text # Using direct analysis text for now
|
198 |
|
199 |
results = _collection.query(
|
@@ -204,9 +195,9 @@ def query_chroma(_collection: chromadb.Collection, query_text: str, n_results: i
|
|
204 |
logger.info(f"Chroma query successful for text snippet: '{query_text[:50]}...'")
|
205 |
return results
|
206 |
except Exception as e:
|
207 |
-
|
208 |
-
st.error(
|
209 |
-
logger.error(
|
210 |
return None
|
211 |
|
212 |
def add_dummy_data_to_chroma(collection: chromadb.Collection, embedding_func: embedding_functions.EmbeddingFunction):
|
@@ -215,10 +206,23 @@ def add_dummy_data_to_chroma(collection: chromadb.Collection, embedding_func: em
|
|
215 |
st.error("β Cannot add dummy data: Chroma Collection or Embedding Function not available.")
|
216 |
return
|
217 |
|
218 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
219 |
try:
|
220 |
# --- Dummy Data Definition ---
|
221 |
-
# (Same data as before, but ensure metadata is useful)
|
222 |
docs = [
|
223 |
"Figure 1A shows adenocarcinoma of the lung, papillary subtype. Note the glandular structures and nuclear atypia. TTF-1 staining was positive.",
|
224 |
"Pathology slide 34B demonstrates high-grade glioma (glioblastoma) with significant necrosis and microvascular proliferation. Ki-67 index was high.",
|
@@ -233,52 +237,49 @@ def add_dummy_data_to_chroma(collection: chromadb.Collection, embedding_func: em
|
|
233 |
{"source": "Path Report 101", "topic": "Gastrointestinal Pathology", "entities": "chronic gastritis, Helicobacter pylori, intestinal metaplasia", "IMAGE_ID": "micrograph_h_pylori_gastritis.jpg"},
|
234 |
{"source": "Case Study CJD", "topic": "Neuropathology", "entities": "prion disease, Spongiform changes, Gliosis, cerebral cortex", "IMAGE_ID": "slide_cjd_sample_02.jpg"}
|
235 |
]
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
status.update(label=f"β
Added {len(docs)} dummy documents.", state="complete")
|
251 |
-
logger.info(f"Added {len(docs)} dummy documents to collection '{COLLECTION_NAME}'.")
|
252 |
-
else:
|
253 |
-
status.update(label="β οΈ Dummy data already exists. No new data added.", state="complete")
|
254 |
-
logger.warning("Dummy data seems to already exist in the collection based on text match.")
|
255 |
|
256 |
except Exception as e:
|
257 |
err_msg = f"Error adding dummy data to Chroma: {e}"
|
258 |
-
status.update(label=f"β Error: {err_msg}", state="error")
|
259 |
logger.error(err_msg, exc_info=True)
|
260 |
|
261 |
# --- Initialize Resources ---
|
262 |
-
# These calls use @st.cache_resource,
|
|
|
263 |
gemini_model = initialize_gemini_model()
|
264 |
embedding_func = initialize_embedding_function()
|
265 |
-
collection = initialize_chroma_collection(embedding_func) # Pass embedding func
|
266 |
|
267 |
# --- Streamlit UI ---
|
268 |
-
|
269 |
-
|
|
|
270 |
|
271 |
# --- DISCLAIMER ---
|
272 |
st.warning("""
|
273 |
**β οΈ Disclaimer:** This tool is for demonstration and informational purposes ONLY.
|
274 |
It is **NOT** a medical device and should **NOT** be used for actual medical diagnosis, treatment, or decision-making.
|
275 |
AI analysis can be imperfect. Always consult with qualified healthcare professionals for any medical concerns.
|
276 |
-
Do **NOT** upload identifiable patient data (PHI).
|
277 |
-
""")
|
278 |
|
279 |
-
st.markdown("""
|
280 |
-
Upload a medical image. Gemini Vision will analyze it
|
281 |
-
will be retrieved from a Chroma DB knowledge base using
|
282 |
""")
|
283 |
|
284 |
# Sidebar
|
@@ -292,7 +293,7 @@ with st.sidebar:
|
|
292 |
|
293 |
st.divider()
|
294 |
|
295 |
-
if st.button("β Add/Verify Dummy KB Data", help="Adds example text data to Chroma DB if it doesn't exist."):
|
296 |
if collection and embedding_func:
|
297 |
add_dummy_data_to_chroma(collection, embedding_func)
|
298 |
else:
|
@@ -300,15 +301,14 @@ with st.sidebar:
|
|
300 |
|
301 |
st.divider()
|
302 |
|
303 |
-
st.
|
304 |
-
**
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
|
309 |
-
""
|
310 |
-
st.caption(f"
|
311 |
-
st.caption(f"Using HF Token: {'Provided' if HF_TOKEN else 'Not Provided'}")
|
312 |
|
313 |
# Main Display Area
|
314 |
col1, col2 = st.columns(2)
|
@@ -328,56 +328,59 @@ with col2:
|
|
328 |
analysis_text = ""
|
329 |
analysis_error = False
|
330 |
with st.status("π§ Analyzing image with Gemini Vision...", expanded=True) as status_gemini:
|
331 |
-
# The actual analysis function is cached via @st.cache_data
|
332 |
analysis_text, analysis_error = analyze_image_with_gemini(gemini_model, image_bytes)
|
333 |
if analysis_error:
|
334 |
-
|
335 |
-
|
|
|
|
|
336 |
else:
|
337 |
-
status_gemini.update(label="β
Analysis Complete", state="complete")
|
338 |
st.markdown("**Gemini Vision Analysis:**")
|
339 |
-
st.markdown(analysis_text)
|
340 |
|
341 |
# 2. Query Chroma if Analysis Succeeded
|
342 |
if not analysis_error and analysis_text:
|
343 |
-
st.markdown("---")
|
344 |
st.subheader("π Related Information (RAG)")
|
345 |
-
with st.status("π Searching knowledge base (Chroma DB)...", expanded=True) as status_chroma:
|
346 |
-
|
347 |
-
chroma_results = query_chroma(collection, analysis_text, n_results=3)
|
348 |
|
349 |
if chroma_results and chroma_results.get('documents') and chroma_results['documents'][0]:
|
350 |
num_results = len(chroma_results['documents'][0])
|
351 |
-
status_chroma.update(label=f"β
Found {num_results} related entries.", state="complete")
|
352 |
|
353 |
for i in range(num_results):
|
354 |
doc = chroma_results['documents'][0][i]
|
355 |
meta = chroma_results['metadatas'][0][i]
|
356 |
dist = chroma_results['distances'][0][i]
|
357 |
-
|
|
|
358 |
|
359 |
expander_title = f"Result {i+1} (Similarity: {similarity:.4f}) | Source: {meta.get('source', 'N/A')}"
|
360 |
with st.expander(expander_title):
|
361 |
st.markdown("**Retrieved Text:**")
|
362 |
-
st.markdown(f"> {doc}")
|
363 |
st.markdown("**Metadata:**")
|
364 |
-
# Display metadata keys/values more nicely
|
365 |
for key, value in meta.items():
|
366 |
st.markdown(f"- **{key.replace('_', ' ').title()}:** `{value}`")
|
367 |
-
|
368 |
-
# Highlight linked image ID
|
369 |
if meta.get("IMAGE_ID"):
|
370 |
st.info(f"βΉοΈ Associated visual asset ID: `{meta['IMAGE_ID']}`")
|
371 |
|
372 |
elif chroma_results is not None: # Query ran, no results
|
373 |
-
status_chroma.update(label="β οΈ No relevant information found.", state="warning")
|
374 |
-
|
375 |
-
|
|
|
|
|
|
|
376 |
|
377 |
elif not uploaded_file:
|
378 |
st.info("Analysis results will appear here once an image is uploaded.")
|
379 |
else:
|
380 |
-
|
|
|
|
|
381 |
|
382 |
st.markdown("---")
|
383 |
st.markdown("<div style='text-align: center; font-size: small;'>Powered by Google Gemini, Chroma DB, Hugging Face, and Streamlit</div>", unsafe_allow_html=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
# --- Imports ---
|
2 |
import streamlit as st
|
3 |
import google.generativeai as genai
|
|
|
9 |
import logging
|
10 |
from typing import Optional, Dict, List, Any, Tuple
|
11 |
|
12 |
+
# --- Set Page Config FIRST ---
|
13 |
+
# This MUST be the first Streamlit command executed in the script.
|
14 |
+
st.set_page_config(layout="wide", page_title="Medical Image Analysis & RAG (HF/BioBERT)")
|
15 |
+
|
16 |
# --- Basic Logging Setup ---
|
17 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
18 |
logger = logging.getLogger(__name__)
|
|
|
21 |
# Secrets Management (Prioritize Hugging Face Secrets)
|
22 |
try:
|
23 |
GOOGLE_API_KEY = st.secrets["GOOGLE_API_KEY"]
|
|
|
24 |
HF_TOKEN = st.secrets.get("HF_TOKEN") # Use .get() for optional token
|
25 |
except KeyError as e:
|
26 |
err_msg = f"β Missing Secret: {e}. Please add it to your Hugging Face Space secrets."
|
27 |
+
# Now it's safe to call st.error after set_page_config
|
28 |
st.error(err_msg)
|
29 |
logger.error(err_msg)
|
30 |
st.stop()
|
|
|
62 |
"""
|
63 |
|
64 |
# Chroma DB Configuration
|
65 |
+
CHROMA_PATH = "chroma_data_biobert" # Changed path to reflect model change
|
66 |
+
COLLECTION_NAME = "medical_docs_biobert" # Changed collection name
|
67 |
+
|
68 |
+
# --- Embedding Model Selection ---
|
69 |
+
# Using BioBERT v1.1 - Good domain knowledge, but potentially suboptimal for *semantic similarity search*.
|
70 |
+
# Default pooling (likely CLS token) will be used by sentence-transformers.
|
71 |
+
# Consider models fine-tuned for sentence similarity if retrieval quality is low:
|
72 |
+
# e.g., 'dmis-lab/sapbert-from-pubmedbert-sentencetransformer'
|
73 |
+
EMBEDDING_MODEL_NAME = "dmis-lab/biobert-v1.1"
|
74 |
+
CHROMA_DISTANCE_METRIC = "cosine" # Cosine is generally good for sentence embeddings
|
75 |
|
76 |
# --- Caching Resource Initialization ---
|
77 |
|
|
|
89 |
return model
|
90 |
except Exception as e:
|
91 |
err_msg = f"β Error initializing Gemini Model ({VISION_MODEL_NAME}): {e}"
|
92 |
+
st.error(err_msg) # Safe to call st.error here now
|
93 |
logger.error(err_msg, exc_info=True)
|
94 |
return None
|
95 |
|
96 |
@st.cache_resource
|
97 |
def initialize_embedding_function() -> Optional[embedding_functions.HuggingFaceEmbeddingFunction]:
|
98 |
"""Initializes and returns the Hugging Face Embedding Function."""
|
99 |
+
st.info(f"Initializing Embedding Model: {EMBEDDING_MODEL_NAME} (this may take a moment)...")
|
100 |
try:
|
101 |
# Pass HF_TOKEN if it exists (required for private/gated models)
|
|
|
102 |
embed_func = embedding_functions.HuggingFaceEmbeddingFunction(
|
103 |
api_key=HF_TOKEN, # Pass token here if needed by model
|
104 |
model_name=EMBEDDING_MODEL_NAME
|
105 |
)
|
106 |
logger.info(f"Successfully initialized HuggingFace Embedding Function: {EMBEDDING_MODEL_NAME}")
|
107 |
+
st.success(f"Embedding Model {EMBEDDING_MODEL_NAME} initialized.")
|
108 |
return embed_func
|
109 |
except Exception as e:
|
110 |
err_msg = f"β Error initializing HuggingFace Embedding Function ({EMBEDDING_MODEL_NAME}): {e}"
|
111 |
+
st.error(err_msg) # Safe here
|
112 |
logger.error(err_msg, exc_info=True)
|
113 |
st.info("βΉοΈ Make sure the embedding model name is correct and you have network access. "
|
114 |
+
"If using a private model, ensure HF_TOKEN is set in secrets. Check Space logs for details.")
|
115 |
return None
|
116 |
|
117 |
@st.cache_resource
|
118 |
def initialize_chroma_collection(_embedding_func: embedding_functions.EmbeddingFunction) -> Optional[chromadb.Collection]:
|
119 |
"""Initializes the Chroma DB client and returns the collection."""
|
120 |
if not _embedding_func:
|
121 |
+
st.error("β Cannot initialize Chroma DB without a valid embedding function.") # Safe here
|
122 |
return None
|
123 |
+
st.info(f"Initializing Chroma DB collection '{COLLECTION_NAME}'...")
|
124 |
try:
|
125 |
chroma_client = chromadb.PersistentClient(path=CHROMA_PATH)
|
126 |
collection = chroma_client.get_or_create_collection(
|
|
|
129 |
metadata={"hnsw:space": CHROMA_DISTANCE_METRIC}
|
130 |
)
|
131 |
logger.info(f"Chroma DB collection '{COLLECTION_NAME}' loaded/created at '{CHROMA_PATH}' using {CHROMA_DISTANCE_METRIC}.")
|
132 |
+
st.success(f"Chroma DB collection '{COLLECTION_NAME}' ready.")
|
133 |
return collection
|
134 |
except Exception as e:
|
135 |
err_msg = f"β Error initializing Chroma DB at '{CHROMA_PATH}': {e}"
|
136 |
+
st.error(err_msg) # Safe here
|
137 |
logger.error(err_msg, exc_info=True)
|
138 |
+
st.info(f"βΉοΈ Ensure the path '{CHROMA_PATH}' is writable. Check Space logs.")
|
139 |
return None
|
140 |
|
141 |
# --- Core Logic Functions (with Caching for Data Operations) ---
|
|
|
179 |
def query_chroma(_collection: chromadb.Collection, query_text: str, n_results: int = 5) -> Optional[Dict[str, List[Any]]]:
|
180 |
"""Queries Chroma DB, returns results dict or None on error."""
|
181 |
if not _collection:
|
182 |
+
logger.error("Query attempt failed: Chroma collection is not available.")
|
183 |
return None
|
184 |
if not query_text:
|
185 |
logger.warning("Attempted to query Chroma with empty text.")
|
186 |
return None
|
187 |
try:
|
|
|
|
|
188 |
refined_query = query_text # Using direct analysis text for now
|
189 |
|
190 |
results = _collection.query(
|
|
|
195 |
logger.info(f"Chroma query successful for text snippet: '{query_text[:50]}...'")
|
196 |
return results
|
197 |
except Exception as e:
|
198 |
+
# Show error in UI as well
|
199 |
+
st.error(f"β Error querying Chroma DB: {e}", icon="π¨")
|
200 |
+
logger.error(f"Error querying Chroma DB: {e}", exc_info=True)
|
201 |
return None
|
202 |
|
203 |
def add_dummy_data_to_chroma(collection: chromadb.Collection, embedding_func: embedding_functions.EmbeddingFunction):
|
|
|
206 |
st.error("β Cannot add dummy data: Chroma Collection or Embedding Function not available.")
|
207 |
return
|
208 |
|
209 |
+
# Check if dummy data needs adding first to avoid unnecessary processing
|
210 |
+
docs_to_check = [
|
211 |
+
"Figure 1A shows adenocarcinoma of the lung, papillary subtype. Note the glandular structures and nuclear atypia. TTF-1 staining was positive."
|
212 |
+
] # Only check one doc for speed
|
213 |
+
try:
|
214 |
+
existing_check = collection.get(where={"document": docs_to_check[0]}, limit=1, include=[])
|
215 |
+
if existing_check and existing_check.get('ids'):
|
216 |
+
st.info("Dummy data seems to already exist. Skipping add.")
|
217 |
+
logger.info("Skipping dummy data addition as it likely exists.")
|
218 |
+
return
|
219 |
+
except Exception as e:
|
220 |
+
logger.warning(f"Could not efficiently check for existing dummy data: {e}. Proceeding with add attempt.")
|
221 |
+
|
222 |
+
|
223 |
+
status = st.status(f"Adding dummy data (using {EMBEDDING_MODEL_NAME})...", expanded=True)
|
224 |
try:
|
225 |
# --- Dummy Data Definition ---
|
|
|
226 |
docs = [
|
227 |
"Figure 1A shows adenocarcinoma of the lung, papillary subtype. Note the glandular structures and nuclear atypia. TTF-1 staining was positive.",
|
228 |
"Pathology slide 34B demonstrates high-grade glioma (glioblastoma) with significant necrosis and microvascular proliferation. Ki-67 index was high.",
|
|
|
237 |
{"source": "Path Report 101", "topic": "Gastrointestinal Pathology", "entities": "chronic gastritis, Helicobacter pylori, intestinal metaplasia", "IMAGE_ID": "micrograph_h_pylori_gastritis.jpg"},
|
238 |
{"source": "Case Study CJD", "topic": "Neuropathology", "entities": "prion disease, Spongiform changes, Gliosis, cerebral cortex", "IMAGE_ID": "slide_cjd_sample_02.jpg"}
|
239 |
]
|
240 |
+
# Ensure IDs are unique even if run close together
|
241 |
+
base_id = f"doc_biobert_{int(time.time() * 1000)}"
|
242 |
+
ids = [f"{base_id}_{i}" for i in range(len(docs))]
|
243 |
+
|
244 |
+
status.update(label=f"Generating embeddings & adding {len(docs)} documents (this uses BioBERT and may take time)...")
|
245 |
+
|
246 |
+
# Embeddings are generated implicitly by ChromaDB during .add()
|
247 |
+
collection.add(
|
248 |
+
documents=docs,
|
249 |
+
metadatas=metadatas,
|
250 |
+
ids=ids
|
251 |
+
)
|
252 |
+
status.update(label=f"β
Added {len(docs)} dummy documents.", state="complete", expanded=False)
|
253 |
+
logger.info(f"Added {len(docs)} dummy documents to collection '{COLLECTION_NAME}'.")
|
|
|
|
|
|
|
|
|
|
|
254 |
|
255 |
except Exception as e:
|
256 |
err_msg = f"Error adding dummy data to Chroma: {e}"
|
257 |
+
status.update(label=f"β Error: {err_msg}", state="error", expanded=True)
|
258 |
logger.error(err_msg, exc_info=True)
|
259 |
|
260 |
# --- Initialize Resources ---
|
261 |
+
# These calls use @st.cache_resource, run only once unless cleared/changed.
|
262 |
+
# Order matters if one depends on another (embedding func needed for chroma).
|
263 |
gemini_model = initialize_gemini_model()
|
264 |
embedding_func = initialize_embedding_function()
|
265 |
+
collection = initialize_chroma_collection(embedding_func) # Pass embedding func
|
266 |
|
267 |
# --- Streamlit UI ---
|
268 |
+
# set_page_config() is already called at the top
|
269 |
+
|
270 |
+
st.title("βοΈ Medical Image Analysis & RAG (BioBERT Embeddings)")
|
271 |
|
272 |
# --- DISCLAIMER ---
|
273 |
st.warning("""
|
274 |
**β οΈ Disclaimer:** This tool is for demonstration and informational purposes ONLY.
|
275 |
It is **NOT** a medical device and should **NOT** be used for actual medical diagnosis, treatment, or decision-making.
|
276 |
AI analysis can be imperfect. Always consult with qualified healthcare professionals for any medical concerns.
|
277 |
+
Do **NOT** upload identifiable patient data (PHI). Analysis quality depends heavily on the chosen embedding model.
|
278 |
+
""", icon="β£οΈ")
|
279 |
|
280 |
+
st.markdown(f"""
|
281 |
+
Upload a medical image. Gemini Vision will analyze it. Related information
|
282 |
+
will be retrieved from a Chroma DB knowledge base using **{EMBEDDING_MODEL_NAME}** embeddings.
|
283 |
""")
|
284 |
|
285 |
# Sidebar
|
|
|
293 |
|
294 |
st.divider()
|
295 |
|
296 |
+
if st.button("β Add/Verify Dummy KB Data", help=f"Adds example text data to Chroma DB ({COLLECTION_NAME}) if it doesn't exist."):
|
297 |
if collection and embedding_func:
|
298 |
add_dummy_data_to_chroma(collection, embedding_func)
|
299 |
else:
|
|
|
301 |
|
302 |
st.divider()
|
303 |
|
304 |
+
st.header("βΉοΈ System Info")
|
305 |
+
st.caption(f"**Gemini Model:** `{VISION_MODEL_NAME}`")
|
306 |
+
st.caption(f"**Embedding Model:** `{EMBEDDING_MODEL_NAME}`")
|
307 |
+
st.caption(f"**Chroma Collection:** `{COLLECTION_NAME}`")
|
308 |
+
st.caption(f"**Chroma Path:** `{CHROMA_PATH}`")
|
309 |
+
st.caption(f"**Distance Metric:** `{CHROMA_DISTANCE_METRIC}`")
|
310 |
+
st.caption(f"**Google API Key:** {'Set' if GOOGLE_API_KEY else 'Not Set'}")
|
311 |
+
st.caption(f"**HF Token:** {'Provided' if HF_TOKEN else 'Not Provided'}")
|
|
|
312 |
|
313 |
# Main Display Area
|
314 |
col1, col2 = st.columns(2)
|
|
|
328 |
analysis_text = ""
|
329 |
analysis_error = False
|
330 |
with st.status("π§ Analyzing image with Gemini Vision...", expanded=True) as status_gemini:
|
|
|
331 |
analysis_text, analysis_error = analyze_image_with_gemini(gemini_model, image_bytes)
|
332 |
if analysis_error:
|
333 |
+
# Shorten the message for status if needed
|
334 |
+
status_label = f"β οΈ Analysis Failed/Blocked: {analysis_text.split(':')[0]}"
|
335 |
+
status_gemini.update(label=status_label , state="error")
|
336 |
+
st.error(f"**Analysis Output:** {analysis_text}", icon="π¨")
|
337 |
else:
|
338 |
+
status_gemini.update(label="β
Analysis Complete", state="complete", expanded=False)
|
339 |
st.markdown("**Gemini Vision Analysis:**")
|
340 |
+
st.markdown(analysis_text) # Display the successful analysis
|
341 |
|
342 |
# 2. Query Chroma if Analysis Succeeded
|
343 |
if not analysis_error and analysis_text:
|
344 |
+
st.markdown("---") # Separator
|
345 |
st.subheader("π Related Information (RAG)")
|
346 |
+
with st.status(f"π Searching knowledge base (Chroma DB w/ BioBERT)...", expanded=True) as status_chroma:
|
347 |
+
chroma_results = query_chroma(collection, analysis_text, n_results=3) # Fetch top 3
|
|
|
348 |
|
349 |
if chroma_results and chroma_results.get('documents') and chroma_results['documents'][0]:
|
350 |
num_results = len(chroma_results['documents'][0])
|
351 |
+
status_chroma.update(label=f"β
Found {num_results} related entries.", state="complete", expanded=False)
|
352 |
|
353 |
for i in range(num_results):
|
354 |
doc = chroma_results['documents'][0][i]
|
355 |
meta = chroma_results['metadatas'][0][i]
|
356 |
dist = chroma_results['distances'][0][i]
|
357 |
+
# Ensure distance is float before calculation
|
358 |
+
similarity = 1.0 - float(dist) if dist is not None else 0.0
|
359 |
|
360 |
expander_title = f"Result {i+1} (Similarity: {similarity:.4f}) | Source: {meta.get('source', 'N/A')}"
|
361 |
with st.expander(expander_title):
|
362 |
st.markdown("**Retrieved Text:**")
|
363 |
+
st.markdown(f"> {doc}") # Use blockquote
|
364 |
st.markdown("**Metadata:**")
|
|
|
365 |
for key, value in meta.items():
|
366 |
st.markdown(f"- **{key.replace('_', ' ').title()}:** `{value}`")
|
|
|
|
|
367 |
if meta.get("IMAGE_ID"):
|
368 |
st.info(f"βΉοΈ Associated visual asset ID: `{meta['IMAGE_ID']}`")
|
369 |
|
370 |
elif chroma_results is not None: # Query ran, no results
|
371 |
+
status_chroma.update(label="β οΈ No relevant information found.", state="warning", expanded=False)
|
372 |
+
st.warning("No relevant documents found in the knowledge base for this analysis.", icon="β οΈ")
|
373 |
+
# Error case is handled by st.error within query_chroma itself
|
374 |
+
elif chroma_results is None:
|
375 |
+
status_chroma.update(label="β Failed to retrieve results.", state="error", expanded=True)
|
376 |
+
|
377 |
|
378 |
elif not uploaded_file:
|
379 |
st.info("Analysis results will appear here once an image is uploaded.")
|
380 |
else:
|
381 |
+
# Initialization error occurred earlier, resources might be None
|
382 |
+
st.error("β Analysis cannot proceed. Check if Gemini model or Chroma DB failed to initialize (see sidebar info & Space logs).")
|
383 |
+
|
384 |
|
385 |
st.markdown("---")
|
386 |
st.markdown("<div style='text-align: center; font-size: small;'>Powered by Google Gemini, Chroma DB, Hugging Face, and Streamlit</div>", unsafe_allow_html=True)
|