Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,14 +1,9 @@
|
|
1 |
-
#
|
2 |
"""
|
3 |
Streamlit application for Medical Image Analysis using Google Gemini Vision
|
4 |
and Retrieval-Augmented Generation (RAG) with Chroma DB.
|
5 |
|
6 |
-
|
7 |
-
1. The image is analyzed by Google's Gemini Pro Vision model to generate a
|
8 |
-
textual description of key features.
|
9 |
-
2. This description is then used as a query to a Chroma vector database
|
10 |
-
(populated with example medical text snippets) to retrieve relevant
|
11 |
-
information from a simulated knowledge base.
|
12 |
"""
|
13 |
|
14 |
# --- Imports ---
|
@@ -16,38 +11,27 @@ import streamlit as st
|
|
16 |
import google.generativeai as genai
|
17 |
import chromadb
|
18 |
from chromadb.utils import embedding_functions
|
|
|
19 |
from PIL import Image
|
20 |
import io
|
21 |
-
import time
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
genai.configure(api_key=GOOGLE_API_KEY)
|
29 |
-
except KeyError:
|
30 |
-
st.error("β GOOGLE_API_KEY not found in Streamlit secrets! Please add it.")
|
31 |
-
st.stop()
|
32 |
-
except Exception as e:
|
33 |
-
st.error(f"β Error configuring Google AI SDK: {e}")
|
34 |
-
st.stop()
|
35 |
-
|
36 |
-
# --- Gemini Model Setup ---
|
37 |
-
# Define the specific Gemini model to use (ensure it's a vision-capable model)
|
38 |
-
VISION_MODEL_NAME = "gemini-pro-vision"
|
39 |
|
40 |
-
#
|
41 |
-
#
|
|
|
|
|
42 |
GENERATION_CONFIG = {
|
43 |
"temperature": 0.2,
|
44 |
"top_p": 0.95,
|
45 |
"top_k": 40,
|
46 |
"max_output_tokens": 1024,
|
47 |
}
|
48 |
-
|
49 |
-
# Configure safety settings (adjust thresholds as needed for medical content)
|
50 |
-
# Blocking potentially sensitive content might be necessary depending on the images
|
51 |
SAFETY_SETTINGS = [
|
52 |
{"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
|
53 |
{"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
|
@@ -55,268 +39,361 @@ SAFETY_SETTINGS = [
|
|
55 |
{"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
|
56 |
]
|
57 |
|
58 |
-
#
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
#
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
#
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
|
103 |
# --- Helper Functions ---
|
104 |
|
105 |
-
def analyze_image_with_gemini(image_bytes: bytes) -> str:
|
106 |
"""
|
107 |
-
|
108 |
-
the generated text description.
|
109 |
|
110 |
Args:
|
|
|
111 |
image_bytes: The image data as bytes.
|
112 |
|
113 |
Returns:
|
114 |
-
A
|
|
|
|
|
115 |
"""
|
116 |
try:
|
117 |
img = Image.open(io.BytesIO(image_bytes))
|
118 |
-
# Define the prompt for the vision model
|
119 |
prompt = """Analyze this medical image (e.g., pathology slide, diagram, scan).
|
120 |
-
Describe
|
121 |
-
Identify potential:
|
122 |
-
-
|
123 |
-
- Pathological
|
124 |
-
- Visible cell types
|
125 |
-
- Relevant biomarkers (if
|
126 |
-
- Anatomical context (if
|
127 |
-
|
128 |
-
|
129 |
"""
|
130 |
-
#
|
131 |
-
response =
|
132 |
|
133 |
-
# Check for blocked content or empty response
|
134 |
if not response.parts:
|
|
|
135 |
if response.prompt_feedback and response.prompt_feedback.block_reason:
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
st.error("β Gemini analysis returned no content. Response might be empty or invalid.")
|
141 |
-
return "Error: Gemini analysis failed or returned no content."
|
142 |
|
143 |
-
|
144 |
-
return response.text
|
145 |
|
146 |
except genai.types.BlockedPromptException as e:
|
147 |
-
|
148 |
-
|
|
|
149 |
except Exception as e:
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
|
154 |
-
def query_chroma(query_text: str, n_results: int = 5) -> Optional[Dict[str, List[Any]]]:
|
155 |
-
"""
|
156 |
-
Queries the Chroma DB collection with the given text.
|
157 |
-
|
158 |
-
Args:
|
159 |
-
query_text: The text to use for the similarity search.
|
160 |
-
n_results: The maximum number of results to return.
|
161 |
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
|
|
|
|
166 |
try:
|
167 |
results = collection.query(
|
168 |
query_texts=[query_text],
|
169 |
n_results=n_results,
|
170 |
-
include=['documents', 'metadatas', 'distances']
|
171 |
)
|
|
|
172 |
return results
|
173 |
except Exception as e:
|
174 |
-
|
|
|
175 |
return None
|
176 |
|
177 |
-
|
178 |
-
|
179 |
-
Adds predefined example medical text snippets
|
180 |
-
Checks if documents with the same text already exist before adding.
|
181 |
-
"""
|
182 |
st.info("Attempting to add dummy data to Chroma DB...")
|
183 |
-
|
184 |
-
# --- IMPORTANT ---
|
185 |
-
# In a real application, this data ingestion process would involve:
|
186 |
-
# 1. Parsing actual medical documents (research papers, clinical notes, textbooks).
|
187 |
-
# 2. Extracting relevant text chunks (e.g., using tools like Unstructured).
|
188 |
-
# 3. Extracting or associating meaningful METADATA (source, patient ID (anonymized),
|
189 |
-
# image IDs linked to text, extracted entities like diseases/genes).
|
190 |
-
# 4. Generating embeddings using the SAME embedding function used for querying.
|
191 |
docs = [
|
192 |
"Figure 1A shows adenocarcinoma of the lung, papillary subtype. Note the glandular structures and nuclear atypia. TTF-1 staining was positive.",
|
193 |
"Pathology slide 34B demonstrates high-grade glioma (glioblastoma) with significant necrosis and microvascular proliferation. Ki-67 index was high.",
|
194 |
-
"
|
195 |
-
"Micrograph
|
196 |
-
"Slide CJD-
|
197 |
]
|
198 |
metadatas = [
|
199 |
-
{"source": "Example Paper 1", "topic": "Lung Cancer Pathology", "entities":
|
200 |
-
{"source": "Path Report 789", "topic": "Brain Tumor Pathology", "entities":
|
201 |
-
{"source": "Textbook Chapter 5", "topic": "Molecular Oncology
|
202 |
-
{"source": "Path Report 101", "topic": "Gastrointestinal Pathology", "entities":
|
203 |
-
{"source": "Case Study CJD", "topic": "Neuropathology", "entities":
|
204 |
]
|
205 |
-
# Generate
|
|
|
206 |
ids = [f"doc_{int(time.time())}_{i}" for i in range(len(docs))]
|
207 |
|
208 |
try:
|
209 |
-
#
|
210 |
-
existing_docs = collection.get(where={"$or": [{"document": doc} for doc in docs]}, include=[])
|
211 |
if not existing_docs or not existing_docs.get('ids'):
|
212 |
collection.add(
|
213 |
documents=docs,
|
214 |
metadatas=metadatas,
|
215 |
ids=ids
|
216 |
)
|
|
|
217 |
st.success(f"β
Added {len(docs)} dummy documents to Chroma collection '{COLLECTION_NAME}'.")
|
218 |
else:
|
219 |
-
|
|
|
220 |
|
221 |
except Exception as e:
|
222 |
-
|
223 |
-
|
224 |
|
225 |
# --- Streamlit UI ---
|
226 |
-
st.set_page_config(layout="wide", page_title="Medical Image
|
|
|
227 |
st.title("βοΈ Medical Image Analysis & RAG")
|
228 |
st.markdown("""
|
229 |
-
|
230 |
-
|
231 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
232 |
""")
|
233 |
|
234 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
235 |
with st.sidebar:
|
236 |
st.header("βοΈ Controls")
|
237 |
uploaded_file = st.file_uploader(
|
238 |
-
"
|
239 |
type=["jpg", "jpeg", "png", "tiff", "webp"],
|
240 |
-
help="Upload
|
241 |
)
|
242 |
|
243 |
-
st.divider() # Visual separator
|
244 |
-
|
245 |
-
if st.button("β Load Dummy KB Data", help="Add example text data to the Chroma vector database."):
|
246 |
-
add_dummy_data_to_chroma()
|
247 |
-
|
248 |
st.divider()
|
249 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
250 |
st.info(f"""
|
251 |
-
|
252 |
-
-
|
253 |
-
-
|
254 |
-
-
|
|
|
255 |
""")
|
|
|
256 |
|
257 |
|
258 |
-
# Main
|
259 |
-
col1, col2 = st.columns(2)
|
260 |
|
261 |
with col1:
|
262 |
st.subheader("πΌοΈ Uploaded Image")
|
263 |
if uploaded_file is not None:
|
264 |
-
# Read image bytes from the uploaded file
|
265 |
image_bytes = uploaded_file.getvalue()
|
266 |
-
# Display the uploaded image
|
267 |
st.image(image_bytes, caption=f"Uploaded: {uploaded_file.name}", use_column_width=True)
|
268 |
else:
|
269 |
-
st.info("Upload an image using the sidebar to begin.")
|
270 |
|
271 |
with col2:
|
272 |
-
st.subheader("
|
273 |
-
if uploaded_file is not None:
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
283 |
st.markdown(analysis_text)
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
|
289 |
-
|
290 |
-
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
317 |
else:
|
318 |
-
|
|
|
319 |
|
320 |
|
|
|
321 |
st.markdown("---")
|
322 |
-
st.
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
"""
|
3 |
Streamlit application for Medical Image Analysis using Google Gemini Vision
|
4 |
and Retrieval-Augmented Generation (RAG) with Chroma DB.
|
5 |
|
6 |
+
Optimized for deployment on Hugging Face Spaces.
|
|
|
|
|
|
|
|
|
|
|
7 |
"""
|
8 |
|
9 |
# --- Imports ---
|
|
|
11 |
import google.generativeai as genai
|
12 |
import chromadb
|
13 |
from chromadb.utils import embedding_functions
|
14 |
+
from chromadb.api.types import EmbeddingFunction # For type hinting
|
15 |
from PIL import Image
|
16 |
import io
|
17 |
+
import time
|
18 |
+
import logging
|
19 |
+
from typing import Optional, Dict, List, Any, Tuple
|
20 |
+
|
21 |
+
# --- Basic Logging Setup ---
|
22 |
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
23 |
+
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
|
25 |
+
# --- Configuration Constants ---
|
26 |
+
# Model and API Configuration
|
27 |
+
GOOGLE_API_KEY_SECRET = "GOOGLE_API_KEY" # Name of the HF Secret
|
28 |
+
VISION_MODEL_NAME = "gemini-pro-vision"
|
29 |
GENERATION_CONFIG = {
|
30 |
"temperature": 0.2,
|
31 |
"top_p": 0.95,
|
32 |
"top_k": 40,
|
33 |
"max_output_tokens": 1024,
|
34 |
}
|
|
|
|
|
|
|
35 |
SAFETY_SETTINGS = [
|
36 |
{"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
|
37 |
{"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
|
|
|
39 |
{"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
|
40 |
]
|
41 |
|
42 |
+
# Chroma DB Configuration
|
43 |
+
# Using persistent storage within the HF Space (relative path)
|
44 |
+
# NOTE: Ensure your HF Space has persistent storage enabled if you need data to survive restarts.
|
45 |
+
CHROMA_PATH = "chroma_data_hf"
|
46 |
+
COLLECTION_NAME = "medical_docs_v2"
|
47 |
+
# Embedding Function - Using Default (all-MiniLM-L6-v2).
|
48 |
+
# For better medical relevance, consider models fine-tuned on biomedical text.
|
49 |
+
# Examples (might require installing `sentence-transformers` explicitly):
|
50 |
+
# - 'sentence-transformers/all-MiniLM-L6-v2' (Default, General Purpose)
|
51 |
+
# - 'microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext' (Needs adapter usually)
|
52 |
+
# - 'emilyalsentzer/Bio_ClinicalBERT' (Needs adapter usually)
|
53 |
+
# Check Sentence Transformers documentation for loading Hugging Face models directly.
|
54 |
+
# Make sure the model chosen is consistent between indexing and querying.
|
55 |
+
EMBEDDING_MODEL_NAME = "all-MiniLM-L6-v2" # Or specify a different HF model name
|
56 |
+
CHROMA_DISTANCE_FUNCTION = "cosine" # Use cosine similarity
|
57 |
+
|
58 |
+
# UI Configuration
|
59 |
+
MAX_RAG_RESULTS = 3 # Number of results to fetch from Chroma
|
60 |
+
|
61 |
+
# --- Initialization Functions with Caching ---
|
62 |
+
|
63 |
+
@st.cache_resource
|
64 |
+
def configure_google_ai() -> bool:
|
65 |
+
"""Configures the Google AI SDK using secrets."""
|
66 |
+
try:
|
67 |
+
google_api_key = st.secrets[GOOGLE_API_KEY_SECRET]
|
68 |
+
genai.configure(api_key=google_api_key)
|
69 |
+
logger.info("Google AI SDK configured successfully.")
|
70 |
+
return True
|
71 |
+
except KeyError:
|
72 |
+
st.error(f"β **Error:** '{GOOGLE_API_KEY_SECRET}' not found in Hugging Face Secrets.")
|
73 |
+
logger.error(f"Secret '{GOOGLE_API_KEY_SECRET}' not found.")
|
74 |
+
return False
|
75 |
+
except Exception as e:
|
76 |
+
st.error(f"β **Error:** Failed to configure Google AI SDK: {e}")
|
77 |
+
logger.error(f"Error configuring Google AI SDK: {e}", exc_info=True)
|
78 |
+
return False
|
79 |
+
|
80 |
+
@st.cache_resource
|
81 |
+
def get_gemini_model() -> Optional[genai.GenerativeModel]:
|
82 |
+
"""Initializes and returns the Gemini Generative Model."""
|
83 |
+
if not configure_google_ai():
|
84 |
+
return None
|
85 |
+
try:
|
86 |
+
model = genai.GenerativeModel(
|
87 |
+
model_name=VISION_MODEL_NAME,
|
88 |
+
generation_config=GENERATION_CONFIG,
|
89 |
+
safety_settings=SAFETY_SETTINGS
|
90 |
+
)
|
91 |
+
logger.info(f"Gemini Model '{VISION_MODEL_NAME}' initialized.")
|
92 |
+
return model
|
93 |
+
except Exception as e:
|
94 |
+
st.error(f"β **Error:** Failed to initialize Gemini Model ({VISION_MODEL_NAME}): {e}")
|
95 |
+
logger.error(f"Error initializing Gemini Model: {e}", exc_info=True)
|
96 |
+
return None
|
97 |
+
|
98 |
+
@st.cache_resource
|
99 |
+
def get_embedding_function() -> Optional[EmbeddingFunction]:
|
100 |
+
"""Initializes and returns the embedding function."""
|
101 |
+
try:
|
102 |
+
# Using DefaultEmbeddingFunction which leverages sentence-transformers
|
103 |
+
# Ensure sentence-transformers library is installed
|
104 |
+
ef = embedding_functions.DefaultEmbeddingFunction(model_name=EMBEDDING_MODEL_NAME)
|
105 |
+
logger.info(f"Initialized embedding function with model: {EMBEDDING_MODEL_NAME}")
|
106 |
+
return ef
|
107 |
+
except Exception as e:
|
108 |
+
st.error(f"β **Error:** Failed to initialize embedding function ({EMBEDDING_MODEL_NAME}): {e}")
|
109 |
+
logger.error(f"Error initializing embedding function: {e}", exc_info=True)
|
110 |
+
return None
|
111 |
+
|
112 |
+
@st.cache_resource
|
113 |
+
def get_chroma_collection() -> Optional[chromadb.Collection]:
|
114 |
+
"""Initializes ChromaDB client and returns the specified collection."""
|
115 |
+
embedding_func = get_embedding_function()
|
116 |
+
if not embedding_func:
|
117 |
+
return None
|
118 |
+
|
119 |
+
try:
|
120 |
+
chroma_client = chromadb.PersistentClient(path=CHROMA_PATH)
|
121 |
+
logger.info(f"ChromaDB client initialized with path: {CHROMA_PATH}")
|
122 |
|
123 |
+
collection = chroma_client.get_or_create_collection(
|
124 |
+
name=COLLECTION_NAME,
|
125 |
+
embedding_function=embedding_func,
|
126 |
+
metadata={"hnsw:space": CHROMA_DISTANCE_FUNCTION}
|
127 |
+
)
|
128 |
+
logger.info(f"ChromaDB collection '{COLLECTION_NAME}' loaded/created.")
|
129 |
+
return collection
|
130 |
+
except Exception as e:
|
131 |
+
st.error(f"β **Error:** Failed to initialize Chroma DB collection '{COLLECTION_NAME}': {e}")
|
132 |
+
st.info(f"βΉοΈ Attempted path: '{CHROMA_PATH}'. Ensure write permissions and space.")
|
133 |
+
logger.error(f"Error initializing Chroma DB: {e}", exc_info=True)
|
134 |
+
return None
|
135 |
|
136 |
# --- Helper Functions ---
|
137 |
|
138 |
+
def analyze_image_with_gemini(gemini_model: genai.GenerativeModel, image_bytes: bytes) -> Tuple[Optional[str], bool]:
|
139 |
"""
|
140 |
+
Analyzes image bytes with Gemini Vision.
|
|
|
141 |
|
142 |
Args:
|
143 |
+
gemini_model: The initialized Gemini model instance.
|
144 |
image_bytes: The image data as bytes.
|
145 |
|
146 |
Returns:
|
147 |
+
A tuple containing:
|
148 |
+
- The analysis text (str) or None if error/blocked.
|
149 |
+
- A boolean indicating success (True) or failure/block (False).
|
150 |
"""
|
151 |
try:
|
152 |
img = Image.open(io.BytesIO(image_bytes))
|
|
|
153 |
prompt = """Analyze this medical image (e.g., pathology slide, diagram, scan).
|
154 |
+
Describe key visual features relevant for medical context (structures, cells, staining, anomalies).
|
155 |
+
Identify potential findings:
|
156 |
+
- Possible conditions or disease indicators
|
157 |
+
- Pathological features (morphology, patterns)
|
158 |
+
- Visible cell types or tissue structures
|
159 |
+
- Relevant biomarkers (if suggested by visuals)
|
160 |
+
- Anatomical context (if clear)
|
161 |
+
|
162 |
+
Focus on visual evidence. Be concise. Avoid definitive diagnosis. State uncertainties clearly.
|
163 |
"""
|
164 |
+
response = gemini_model.generate_content([prompt, img], stream=False) # Use stream=False for simpler handling here
|
165 |
+
response.resolve() # Ensure response is fully processed if stream=True was used
|
166 |
|
|
|
167 |
if not response.parts:
|
168 |
+
reason = "Unknown reason"
|
169 |
if response.prompt_feedback and response.prompt_feedback.block_reason:
|
170 |
+
reason = response.prompt_feedback.block_reason.name # Get the reason enum name
|
171 |
+
logger.warning(f"Gemini analysis blocked or empty. Reason: {reason}")
|
172 |
+
st.warning(f"β οΈ Analysis blocked by safety filters or returned empty. Reason: {reason}")
|
173 |
+
return None, False
|
|
|
|
|
174 |
|
175 |
+
logger.info("Gemini analysis successful.")
|
176 |
+
return response.text, True
|
177 |
|
178 |
except genai.types.BlockedPromptException as e:
|
179 |
+
logger.error(f"Gemini analysis blocked due to prompt: {e}")
|
180 |
+
st.error(f"β **Analysis Blocked:** The prompt content triggered safety filters: {e}")
|
181 |
+
return None, False
|
182 |
except Exception as e:
|
183 |
+
logger.error(f"Error during Gemini analysis: {e}", exc_info=True)
|
184 |
+
st.error(f"β **Error:** An unexpected error occurred during Gemini analysis: {e}")
|
185 |
+
return None, False
|
186 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
187 |
|
188 |
+
def query_chroma(collection: chromadb.Collection, query_text: str, n_results: int = 3) -> Optional[Dict[str, List[Any]]]:
|
189 |
+
"""Queries the Chroma collection."""
|
190 |
+
if not query_text:
|
191 |
+
logger.warning("Chroma query attempted with empty text.")
|
192 |
+
st.warning("β οΈ Cannot query knowledge base without analysis text.")
|
193 |
+
return None
|
194 |
try:
|
195 |
results = collection.query(
|
196 |
query_texts=[query_text],
|
197 |
n_results=n_results,
|
198 |
+
include=['documents', 'metadatas', 'distances']
|
199 |
)
|
200 |
+
logger.info(f"ChromaDB query executed successfully for text: '{query_text[:50]}...'")
|
201 |
return results
|
202 |
except Exception as e:
|
203 |
+
logger.error(f"Error querying Chroma DB: {e}", exc_info=True)
|
204 |
+
st.error(f"β **Error:** Failed to query the knowledge base: {e}")
|
205 |
return None
|
206 |
|
207 |
+
# Function to add dummy data (Consider moving to a separate setup script for cleaner app code)
|
208 |
+
def add_dummy_data_to_chroma(collection: chromadb.Collection):
|
209 |
+
"""Adds predefined example medical text snippets to the Chroma collection."""
|
|
|
|
|
210 |
st.info("Attempting to add dummy data to Chroma DB...")
|
211 |
+
# --- (Same dummy data as before - Keep for demonstration) ---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
212 |
docs = [
|
213 |
"Figure 1A shows adenocarcinoma of the lung, papillary subtype. Note the glandular structures and nuclear atypia. TTF-1 staining was positive.",
|
214 |
"Pathology slide 34B demonstrates high-grade glioma (glioblastoma) with significant necrosis and microvascular proliferation. Ki-67 index was high.",
|
215 |
+
"Diagram: EGFR signaling pathway mutations in NSCLC targeted by TKIs.", # Shorter version
|
216 |
+
"Micrograph: Chronic gastritis with H. pylori organisms (special stain needed). Mild intestinal metaplasia noted.", # Shorter
|
217 |
+
"Slide CJD-02: Spongiform changes in cerebral cortex characteristic of prion disease. Gliosis present." # Shorter
|
218 |
]
|
219 |
metadatas = [
|
220 |
+
{"source": "Example Paper 1", "topic": "Lung Cancer Pathology", "entities": "adenocarcinoma, lung cancer, glandular structures, nuclear atypia, papillary subtype, TTF-1", "IMAGE_ID": "fig_1a_adeno_lung.png"},
|
221 |
+
{"source": "Path Report 789", "topic": "Brain Tumor Pathology", "entities": "high-grade glioma, glioblastoma, necrosis, microvascular proliferation, Ki-67", "IMAGE_ID": "slide_34b_gbm.tiff"},
|
222 |
+
{"source": "Textbook Chapter 5", "topic": "Molecular Oncology", "entities": "EGFR, TKIs, NSCLC, signaling pathway", "IMAGE_ID": "diagram_egfr_pathway.svg"},
|
223 |
+
{"source": "Path Report 101", "topic": "Gastrointestinal Pathology", "entities": "chronic gastritis, Helicobacter pylori, intestinal metaplasia", "IMAGE_ID": "micrograph_h_pylori_gastritis.jpg"},
|
224 |
+
{"source": "Case Study CJD", "topic": "Neuropathology", "entities": "prion disease, Spongiform changes, Gliosis, cerebral cortex", "IMAGE_ID": "slide_cjd_sample_02.jpg"}
|
225 |
]
|
226 |
+
# Generate potentially more stable IDs for demo purposes if needed, but time-based is fine too
|
227 |
+
# Example: ids = [f"dummy_doc_{i+1}" for i in range(len(docs))]
|
228 |
ids = [f"doc_{int(time.time())}_{i}" for i in range(len(docs))]
|
229 |
|
230 |
try:
|
231 |
+
# Simple check if *any* of these specific texts exist (for demo)
|
232 |
+
existing_docs = collection.get(where={"$or": [{"document": doc} for doc in docs]}, include=[])
|
233 |
if not existing_docs or not existing_docs.get('ids'):
|
234 |
collection.add(
|
235 |
documents=docs,
|
236 |
metadatas=metadatas,
|
237 |
ids=ids
|
238 |
)
|
239 |
+
logger.info(f"Added {len(docs)} dummy documents to Chroma collection '{COLLECTION_NAME}'.")
|
240 |
st.success(f"β
Added {len(docs)} dummy documents to Chroma collection '{COLLECTION_NAME}'.")
|
241 |
else:
|
242 |
+
logger.warning("Dummy data check indicates data might already exist. Skipping addition.")
|
243 |
+
st.warning("β οΈ Dummy data seems to already exist in the collection. No new data added.")
|
244 |
|
245 |
except Exception as e:
|
246 |
+
logger.error(f"Error adding dummy data to Chroma: {e}", exc_info=True)
|
247 |
+
st.error(f"β **Error:** Could not add dummy data to Chroma: {e}")
|
248 |
|
249 |
# --- Streamlit UI ---
|
250 |
+
st.set_page_config(layout="wide", page_title="Medical Image RAG - HF", page_icon="βοΈ")
|
251 |
+
|
252 |
st.title("βοΈ Medical Image Analysis & RAG")
|
253 |
st.markdown("""
|
254 |
+
*Powered by Google Gemini, ChromaDB, and Streamlit on Hugging Face Spaces*
|
255 |
+
""")
|
256 |
+
|
257 |
+
# --- CRITICAL DISCLAIMER ---
|
258 |
+
st.warning("""
|
259 |
+
**β οΈ Disclaimer:** This tool is for informational and illustrative purposes ONLY.
|
260 |
+
It is **NOT** a medical device and **CANNOT** provide a diagnosis. AI analysis may be
|
261 |
+
imperfect or incomplete. **ALWAYS** consult qualified medical professionals for any
|
262 |
+
health concerns or decisions. Do **NOT** rely solely on this tool for medical judgment.
|
263 |
""")
|
264 |
|
265 |
+
# --- Initialize Services ---
|
266 |
+
gemini_model = get_gemini_model()
|
267 |
+
chroma_collection = get_chroma_collection()
|
268 |
+
|
269 |
+
# Check if critical components failed initialization
|
270 |
+
if not gemini_model or not chroma_collection:
|
271 |
+
st.error("β Critical components failed to initialize. Cannot proceed. Check logs and secrets.")
|
272 |
+
st.stop() # Stop execution if core components aren't ready
|
273 |
+
|
274 |
+
|
275 |
+
# --- Sidebar Controls ---
|
276 |
with st.sidebar:
|
277 |
st.header("βοΈ Controls")
|
278 |
uploaded_file = st.file_uploader(
|
279 |
+
"1. Upload Medical Image",
|
280 |
type=["jpg", "jpeg", "png", "tiff", "webp"],
|
281 |
+
help="Upload formats like pathology slides, diagrams, scans."
|
282 |
)
|
283 |
|
|
|
|
|
|
|
|
|
|
|
284 |
st.divider()
|
285 |
|
286 |
+
st.header("π Knowledge Base")
|
287 |
+
if st.button("β Add Dummy KB Data", help="Add example text data to the Chroma vector database for demonstration."):
|
288 |
+
if chroma_collection:
|
289 |
+
add_dummy_data_to_chroma(chroma_collection)
|
290 |
+
else:
|
291 |
+
st.error("β Chroma DB not available to add data.")
|
292 |
+
|
293 |
st.info(f"""
|
294 |
+
**KB Info:**
|
295 |
+
- **Collection:** `{COLLECTION_NAME}`
|
296 |
+
- **Storage:** `{CHROMA_PATH}` (in Space storage)
|
297 |
+
- **Embeddings:** `{EMBEDDING_MODEL_NAME}`
|
298 |
+
- **Similarity:** `{CHROMA_DISTANCE_FUNCTION}`
|
299 |
""")
|
300 |
+
st.caption("Note: Data persists if persistent storage is enabled for this Space, otherwise it's temporary.")
|
301 |
|
302 |
|
303 |
+
# --- Main Processing Area ---
|
304 |
+
col1, col2 = st.columns(2)
|
305 |
|
306 |
with col1:
|
307 |
st.subheader("πΌοΈ Uploaded Image")
|
308 |
if uploaded_file is not None:
|
|
|
309 |
image_bytes = uploaded_file.getvalue()
|
|
|
310 |
st.image(image_bytes, caption=f"Uploaded: {uploaded_file.name}", use_column_width=True)
|
311 |
else:
|
312 |
+
st.info("Upload an image using the sidebar to begin analysis.")
|
313 |
|
314 |
with col2:
|
315 |
+
st.subheader("π€ AI Analysis & Retrieval")
|
316 |
+
if uploaded_file is not None and gemini_model and chroma_collection:
|
317 |
+
analysis_text = None
|
318 |
+
analysis_successful = False
|
319 |
+
|
320 |
+
# Step 1: Analyze Image with Gemini
|
321 |
+
with st.status("π§ Analyzing image with Gemini Vision...", expanded=False) as status_analysis:
|
322 |
+
try:
|
323 |
+
st.write("Sending image to Gemini...")
|
324 |
+
analysis_text, analysis_successful = analyze_image_with_gemini(gemini_model, image_bytes)
|
325 |
+
if analysis_successful:
|
326 |
+
st.write("Analysis complete.")
|
327 |
+
status_analysis.update(label="β
Analysis Complete", state="complete")
|
328 |
+
else:
|
329 |
+
# Error/block message already shown by helper function
|
330 |
+
status_analysis.update(label="β οΈ Analysis Failed or Blocked", state="error")
|
331 |
+
|
332 |
+
except Exception as e: # Catch potential unexpected errors here too
|
333 |
+
logger.error(f"Unhandled error during analysis status block: {e}", exc_info=True)
|
334 |
+
st.error(f"β An unexpected error occurred during the analysis process: {e}")
|
335 |
+
status_analysis.update(label="π₯ Analysis Error", state="error")
|
336 |
+
analysis_successful = False # Ensure flag is False
|
337 |
+
|
338 |
+
# Display Analysis Result if successful
|
339 |
+
if analysis_successful and analysis_text:
|
340 |
+
st.markdown("**π¬ Gemini Vision Analysis:**")
|
341 |
st.markdown(analysis_text)
|
342 |
+
st.divider() # Separator
|
343 |
+
|
344 |
+
# Step 2: Query Chroma DB with Analysis Text
|
345 |
+
st.markdown("**π Related Information (RAG via Chroma DB):**")
|
346 |
+
with st.status("π Searching knowledge base...", expanded=True) as status_query:
|
347 |
+
try:
|
348 |
+
st.write(f"Querying with analysis summary (top {MAX_RAG_RESULTS} results)...")
|
349 |
+
chroma_results = query_chroma(chroma_collection, analysis_text, n_results=MAX_RAG_RESULTS)
|
350 |
+
|
351 |
+
if chroma_results and chroma_results.get('documents') and chroma_results['documents'][0]:
|
352 |
+
num_results = len(chroma_results['documents'][0])
|
353 |
+
st.write(f"Found {num_results} related entries.")
|
354 |
+
status_query.update(label=f"β
Found {num_results} results", state="complete")
|
355 |
+
|
356 |
+
# Display RAG Results
|
357 |
+
for i in range(num_results):
|
358 |
+
doc = chroma_results['documents'][0][i]
|
359 |
+
meta = chroma_results['metadatas'][0][i]
|
360 |
+
dist = chroma_results['distances'][0][i]
|
361 |
+
similarity = 1.0 - dist # For cosine distance
|
362 |
+
|
363 |
+
expander_title = f"Result {i+1} (Similarity: {similarity:.3f}) - Source: {meta.get('source', 'N/A')}"
|
364 |
+
with st.expander(expander_title):
|
365 |
+
st.markdown("**Retrieved Text:**")
|
366 |
+
st.markdown(f"> {doc}")
|
367 |
+
st.markdown("**Metadata:**")
|
368 |
+
# Nicer metadata display
|
369 |
+
meta_display = {k: v for k, v in meta.items() if v} # Filter empty values
|
370 |
+
st.json(meta_display, expanded=False)
|
371 |
+
|
372 |
+
# Provide link/info if related image exists
|
373 |
+
if meta.get("IMAGE_ID"):
|
374 |
+
st.info(f"βΉοΈ Associated Visual: `{meta['IMAGE_ID']}`")
|
375 |
+
|
376 |
+
elif chroma_results is not None: # Query ran, no results
|
377 |
+
st.warning("β οΈ No relevant information found in the knowledge base for this analysis.")
|
378 |
+
status_query.update(label="β οΈ No results found", state="warning")
|
379 |
+
else: # Query failed (error handled in query_chroma)
|
380 |
+
status_query.update(label="π₯ Query Error", state="error")
|
381 |
+
|
382 |
+
except Exception as e:
|
383 |
+
logger.error(f"Unhandled error during query status block: {e}", exc_info=True)
|
384 |
+
st.error(f"β An unexpected error occurred during the knowledge base search: {e}")
|
385 |
+
status_query.update(label="π₯ Query Process Error", state="error")
|
386 |
+
|
387 |
+
elif not analysis_successful:
|
388 |
+
st.info("Cannot proceed to knowledge base search as image analysis failed or was blocked.")
|
389 |
+
|
390 |
+
elif not uploaded_file:
|
391 |
+
st.info("Analysis results and related information will appear here once an image is uploaded and processed.")
|
392 |
else:
|
393 |
+
# This case means initialization failed earlier, message already shown.
|
394 |
+
st.info("Waiting for components to initialize...")
|
395 |
|
396 |
|
397 |
+
# --- Footer ---
|
398 |
st.markdown("---")
|
399 |
+
st.caption("Ensure responsible use. Verify all findings with qualified professionals.")
|