mgbam commited on
Commit
0d23f5f
·
verified ·
1 Parent(s): 21689c4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +215 -127
app.py CHANGED
@@ -1,136 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  import google.generativeai as genai
3
  import chromadb
4
  from chromadb.utils import embedding_functions
5
  from PIL import Image
6
- import os
7
  import io
8
- import time # To create unique IDs for Chroma
 
9
 
10
  # --- Configuration ---
11
  try:
12
- # Try loading secrets from Hugging Face secrets first
13
  GOOGLE_API_KEY = st.secrets["GOOGLE_API_KEY"]
14
  genai.configure(api_key=GOOGLE_API_KEY)
15
  except KeyError:
16
- st.error("GOOGLE_API_KEY not found in Hugging Face secrets!")
17
  st.stop()
18
  except Exception as e:
19
- st.error(f"Error configuring Google AI: {e}")
20
  st.stop()
21
 
22
  # --- Gemini Model Setup ---
23
- # Check available models if needed, select the vision model
24
- # for m in genai.list_models():
25
- # if 'generateContent' in m.supported_generation_methods:
26
- # print(m.name) # Find the vision model name (e.g., 'gemini-pro-vision')
27
-
28
  VISION_MODEL_NAME = "gemini-pro-vision"
 
 
 
29
  GENERATION_CONFIG = {
30
- "temperature": 0.2, # Lower temp for more factual descriptions
31
  "top_p": 0.95,
32
  "top_k": 40,
33
  "max_output_tokens": 1024,
34
  }
35
- SAFETY_SETTINGS = [ # Adjust safety settings as needed for medical content
 
 
 
36
  {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
37
  {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
38
  {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
39
  {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
40
  ]
41
 
 
42
  try:
43
  gemini_model = genai.GenerativeModel(
44
  model_name=VISION_MODEL_NAME,
45
  generation_config=GENERATION_CONFIG,
46
  safety_settings=SAFETY_SETTINGS
47
  )
 
48
  except Exception as e:
49
- st.error(f"Error initializing Gemini Model ({VISION_MODEL_NAME}): {e}")
50
  st.stop()
51
 
52
  # --- Chroma DB Setup ---
53
- # Using persistent storage within the HF Space (data lost if space is wiped)
54
- # For production, consider a hosted Chroma or other DB solution.
 
55
  CHROMA_PATH = "chroma_data"
56
  COLLECTION_NAME = "medical_docs"
57
 
58
- # Use a default sentence transformer embedding function (runs locally on HF space CPU)
59
- # For better domain adaptation, consider finetuned medical embeddings if possible/available.
60
- # Make sure the model used here matches the one used when INGESTING data.
 
 
 
61
  embedding_func = embedding_functions.DefaultEmbeddingFunction()
62
 
63
  try:
 
64
  chroma_client = chromadb.PersistentClient(path=CHROMA_PATH)
65
- # Get or create the collection with the specified embedding function
 
 
66
  collection = chroma_client.get_or_create_collection(
67
  name=COLLECTION_NAME,
68
  embedding_function=embedding_func,
69
- metadata={"hnsw:space": "cosine"} # Use cosine distance
70
  )
 
71
  except Exception as e:
72
- st.error(f"Error initializing Chroma DB at '{CHROMA_PATH}': {e}")
73
- st.info("If this is the first run, the directory will be created.")
74
- # Attempt creation again more robustly if needed, or guide user.
75
  st.stop()
76
 
77
 
78
  # --- Helper Functions ---
79
- def analyze_image_with_gemini(image_bytes):
80
- """Sends image bytes to Gemini Vision and returns the text description."""
 
 
 
 
 
 
 
 
 
 
81
  try:
82
  img = Image.open(io.BytesIO(image_bytes))
83
- prompt = """Analyze this medical image (could be a pathology slide, diagram, or other medical visual).
84
- Describe the key visual features relevant to a medical professional.
85
- Identify potential:
86
- - Diseases or conditions suggested
87
- - Pathological findings (e.g., cellular morphology, tissue structure, staining patterns)
88
- - Cell types visible
89
- - Relevant biomarkers (if inferrable from staining or morphology)
90
- - Anatomical context (if clear)
91
-
92
- Be concise and focus on visually evident information.
93
- """
 
 
94
  response = gemini_model.generate_content([prompt, img])
95
- # Handle potential blocked responses or errors
96
- if not response.parts:
97
- # Check if it was blocked
98
- if response.prompt_feedback and response.prompt_feedback.block_reason:
99
- return f"Analysis blocked: {response.prompt_feedback.block_reason}"
100
- else:
101
- # Some other issue, maybe no response text?
102
- return "Error: Gemini analysis failed or returned no content."
103
 
 
 
 
 
 
 
 
 
 
 
 
104
  return response.text
 
105
  except genai.types.BlockedPromptException as e:
106
- st.error(f"Gemini request blocked: {e}")
107
- return f"Analysis blocked due to safety settings: {e}"
108
  except Exception as e:
109
- st.error(f"Error during Gemini analysis: {e}")
110
  return f"Error analyzing image: {e}"
111
 
112
 
113
- def query_chroma(query_text, n_results=5):
114
- """Queries the Chroma collection with the given text."""
 
 
 
 
 
 
 
 
 
 
115
  try:
116
  results = collection.query(
117
  query_texts=[query_text],
118
  n_results=n_results,
119
- include=['documents', 'metadatas', 'distances'] # Include distances for relevance sorting
120
  )
121
  return results
122
  except Exception as e:
123
- st.error(f"Error querying Chroma DB: {e}")
124
  return None
125
 
126
  def add_dummy_data_to_chroma():
127
- """Adds some example medical text snippets to Chroma."""
 
 
 
 
 
128
  # --- IMPORTANT ---
129
- # In a real scenario, this data would come from processing actual medical documents
130
- # (papers, reports) using a tool like Unstructured (as in the original article)
131
- # or manual curation to extract text and METADATA, including IMAGE_IDs.
132
- # The embeddings generated here MUST match the query embedding function.
133
- st.info("Adding dummy data to Chroma DB...")
 
134
  docs = [
135
  "Figure 1A shows adenocarcinoma of the lung, papillary subtype. Note the glandular structures and nuclear atypia. TTF-1 staining was positive.",
136
  "Pathology slide 34B demonstrates high-grade glioma (glioblastoma) with significant necrosis and microvascular proliferation. Ki-67 index was high.",
@@ -139,96 +196,127 @@ def add_dummy_data_to_chroma():
139
  "Slide CJD-Sample-02: Spongiform changes characteristic of prion disease are evident in the cerebral cortex. Gliosis is also noted."
140
  ]
141
  metadatas = [
142
- {"source": "Example Paper 1", "entities": {"DISEASES": ["adenocarcinoma", "lung cancer"], "PATHOLOGY_FINDINGS": ["glandular structures", "nuclear atypia", "papillary subtype"], "BIOMARKERS": ["TTF-1"]}, "IMAGE_ID": "fig_1a_adeno_lung.png"},
143
- {"source": "Path Report 789", "entities": {"DISEASES": ["high-grade glioma", "glioblastoma"], "PATHOLOGY_FINDINGS": ["necrosis", "microvascular proliferation"], "BIOMARKERS": ["Ki-67"]}, "IMAGE_ID": "slide_34b_gbm.tiff"},
144
- {"source": "Textbook Chapter 5", "entities": {"GENES": ["EGFR"], "DRUGS": ["tyrosine kinase inhibitors"], "DISEASES": ["non-small cell lung cancer"]}, "IMAGE_ID": "diagram_egfr_pathway.svg"},
145
- {"source": "Path Report 101", "entities": {"DISEASES": ["chronic gastritis", "Helicobacter pylori infection"], "PATHOLOGY_FINDINGS": ["intestinal metaplasia"]}, "IMAGE_ID": "micrograph_h_pylori_gastritis.jpg"},
146
- {"source": "Case Study CJD", "entities": {"DISEASES": ["prion disease"], "PATHOLOGY_FINDINGS": ["Spongiform changes", "Gliosis"], "ANATOMICAL_LOCATIONS": ["cerebral cortex"]}, "IMAGE_ID": "slide_cjd_sample_02.jpg"}
147
  ]
148
- ids = [f"doc_{int(time.time())}_{i}" for i in range(len(docs))] # Unique IDs
 
149
 
150
  try:
151
- # Check if docs with these exact texts already exist to avoid duplicates on rerun
152
- existing = collection.get(where={"$or": [{"document": doc} for doc in docs]})
153
- if not existing or not existing['ids']: # Only add if none exist
154
- collection.add(
155
- documents=docs,
156
- metadatas=metadatas,
157
- ids=ids
158
- )
159
- st.success(f"Added {len(docs)} dummy documents to Chroma collection '{COLLECTION_NAME}'.")
160
  else:
161
- st.warning("Dummy data seems to already exist in the collection.")
162
 
163
  except Exception as e:
164
- st.error(f"Error adding dummy data to Chroma: {e}")
165
 
166
 
167
  # --- Streamlit UI ---
168
- st.set_page_config(layout="wide")
169
  st.title("⚕️ Medical Image Analysis & RAG")
170
- st.markdown("Upload a medical image (pathology slide, diagram, etc.). Gemini Vision will analyze it, and Chroma DB will retrieve related information from a knowledge base.")
 
 
 
 
171
 
172
- # Sidebar for controls
173
  with st.sidebar:
174
- st.header("Controls")
175
- uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png", "tiff", "webp"])
176
- if st.button("Load Dummy KB Data"):
 
 
 
 
 
 
 
177
  add_dummy_data_to_chroma()
178
- st.info("Note: Chroma data persists in the Space's storage but is lost if the Space is reset/deleted.")
179
 
 
180
 
181
- # Main area for display
182
- if uploaded_file is not None:
183
- # Read image bytes
184
- image_bytes = uploaded_file.getvalue()
 
 
185
 
186
- # Display the uploaded image
187
- st.image(image_bytes, caption=f"Uploaded Image: {uploaded_file.name}", use_column_width=False, width=400)
188
 
189
- st.markdown("---")
 
 
 
 
 
 
 
 
 
 
 
 
 
190
  st.subheader("🔬 Gemini Vision Analysis")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
 
192
- # Analyze image with Gemini
193
- with st.spinner("Analyzing image with Gemini Vision..."):
194
- analysis_text = analyze_image_with_gemini(image_bytes)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
195
 
196
- if analysis_text.startswith("Error:") or analysis_text.startswith("Analysis blocked:"):
197
- st.error(analysis_text)
198
  else:
199
- st.markdown(analysis_text)
200
-
201
- st.markdown("---")
202
- st.subheader("📚 Related Information from Knowledge Base (Chroma DB)")
203
-
204
- # Query Chroma DB using the Gemini analysis text
205
- with st.spinner("Querying Chroma DB..."):
206
- chroma_results = query_chroma(analysis_text)
207
-
208
- if chroma_results and chroma_results.get('documents') and chroma_results['documents'][0]:
209
- st.success(f"Found {len(chroma_results['documents'][0])} related entries:")
210
- for i in range(len(chroma_results['documents'][0])):
211
- doc = chroma_results['documents'][0][i]
212
- meta = chroma_results['metadatas'][0][i]
213
- dist = chroma_results['distances'][0][i]
214
-
215
- with st.expander(f"Result {i+1} (Distance: {dist:.4f}) - Source: {meta.get('source', 'N/A')}"):
216
- st.markdown("**Text:**")
217
- st.markdown(doc)
218
- st.markdown("**Metadata:**")
219
- st.json(meta) # Display all metadata nicely
220
-
221
- # Highlight if it references another image
222
- if meta.get("IMAGE_ID"):
223
- st.info(f"ℹ️ This text describes another visual asset: `{meta['IMAGE_ID']}`")
224
- # In a real app, you might fetch/display this image if available
225
- elif chroma_results is not None: # Query ran but found nothing
226
- st.warning("No relevant information found in the knowledge base for this analysis.")
227
- else: # Error occurred during query
228
- st.error("Failed to retrieve results from Chroma DB.")
229
-
230
- else:
231
- st.info("Upload an image using the sidebar to start the analysis.")
232
 
233
  st.markdown("---")
234
- st.markdown("Powered by Google Gemini, Chroma DB, and Streamlit.")
 
1
+ # --- Docstring ---
2
+ """
3
+ Streamlit application for Medical Image Analysis using Google Gemini Vision
4
+ and Retrieval-Augmented Generation (RAG) with Chroma DB.
5
+
6
+ Allows users to upload a medical image (pathology slide, diagram, etc.).
7
+ 1. The image is analyzed by Google's Gemini Pro Vision model to generate a
8
+ textual description of key features.
9
+ 2. This description is then used as a query to a Chroma vector database
10
+ (populated with example medical text snippets) to retrieve relevant
11
+ information from a simulated knowledge base.
12
+ """
13
+
14
+ # --- Imports ---
15
  import streamlit as st
16
  import google.generativeai as genai
17
  import chromadb
18
  from chromadb.utils import embedding_functions
19
  from PIL import Image
 
20
  import io
21
+ import time # Used for generating unique IDs for Chroma DB demo data
22
+ from typing import Optional, Dict, List, Any # For type hinting
23
 
24
  # --- Configuration ---
25
  try:
26
+ # Attempt to load the Google API key from Streamlit secrets
27
  GOOGLE_API_KEY = st.secrets["GOOGLE_API_KEY"]
28
  genai.configure(api_key=GOOGLE_API_KEY)
29
  except KeyError:
30
+ st.error("GOOGLE_API_KEY not found in Streamlit secrets! Please add it.")
31
  st.stop()
32
  except Exception as e:
33
+ st.error(f"Error configuring Google AI SDK: {e}")
34
  st.stop()
35
 
36
  # --- Gemini Model Setup ---
37
+ # Define the specific Gemini model to use (ensure it's a vision-capable model)
 
 
 
 
38
  VISION_MODEL_NAME = "gemini-pro-vision"
39
+
40
+ # Configure generation parameters for the model
41
+ # Lower temperature for more deterministic, factual descriptions
42
  GENERATION_CONFIG = {
43
+ "temperature": 0.2,
44
  "top_p": 0.95,
45
  "top_k": 40,
46
  "max_output_tokens": 1024,
47
  }
48
+
49
+ # Configure safety settings (adjust thresholds as needed for medical content)
50
+ # Blocking potentially sensitive content might be necessary depending on the images
51
+ SAFETY_SETTINGS = [
52
  {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
53
  {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
54
  {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
55
  {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
56
  ]
57
 
58
+ # Initialize the Gemini Generative Model
59
  try:
60
  gemini_model = genai.GenerativeModel(
61
  model_name=VISION_MODEL_NAME,
62
  generation_config=GENERATION_CONFIG,
63
  safety_settings=SAFETY_SETTINGS
64
  )
65
+ st.success(f"✅ Initialized Gemini Model: {VISION_MODEL_NAME}")
66
  except Exception as e:
67
+ st.error(f"Error initializing Gemini Model ({VISION_MODEL_NAME}): {e}")
68
  st.stop()
69
 
70
  # --- Chroma DB Setup ---
71
+ # Using persistent storage within the Streamlit deployment environment (e.g., HF Space)
72
+ # NOTE: Data will be lost if the persistent storage is wiped or the environment resets.
73
+ # For production, consider a managed Chroma instance or alternative database.
74
  CHROMA_PATH = "chroma_data"
75
  COLLECTION_NAME = "medical_docs"
76
 
77
+ # Define the embedding function.
78
+ # Using a default Sentence Transformer model (runs locally on CPU).
79
+ # IMPORTANT: The embedding model used for querying MUST match the one used
80
+ # when initially adding data to the collection.
81
+ # For improved performance/relevance on medical text, consider fine-tuned
82
+ # medical domain-specific embedding models if available.
83
  embedding_func = embedding_functions.DefaultEmbeddingFunction()
84
 
85
  try:
86
+ # Initialize Chroma DB client with persistence
87
  chroma_client = chromadb.PersistentClient(path=CHROMA_PATH)
88
+
89
+ # Get or create the collection, specifying the embedding function and distance metric
90
+ # Using cosine distance is common for text similarity tasks.
91
  collection = chroma_client.get_or_create_collection(
92
  name=COLLECTION_NAME,
93
  embedding_function=embedding_func,
94
+ metadata={"hnsw:space": "cosine"} # Specify cosine distance metric
95
  )
96
+ st.success(f"✅ Chroma DB collection '{COLLECTION_NAME}' loaded/created at '{CHROMA_PATH}'.")
97
  except Exception as e:
98
+ st.error(f"Error initializing Chroma DB at '{CHROMA_PATH}': {e}")
99
+ st.info("ℹ️ If this is the first run, the 'chroma_data' directory will be created.")
 
100
  st.stop()
101
 
102
 
103
  # --- Helper Functions ---
104
+
105
+ def analyze_image_with_gemini(image_bytes: bytes) -> str:
106
+ """
107
+ Sends image bytes to the Gemini Vision model for analysis and returns
108
+ the generated text description.
109
+
110
+ Args:
111
+ image_bytes: The image data as bytes.
112
+
113
+ Returns:
114
+ A string containing the analysis text, or an error/blocked message.
115
+ """
116
  try:
117
  img = Image.open(io.BytesIO(image_bytes))
118
+ # Define the prompt for the vision model
119
+ prompt = """Analyze this medical image (e.g., pathology slide, diagram, scan).
120
+ Describe the key visual features relevant to a medical context.
121
+ Identify potential:
122
+ - Diseases or conditions indicated
123
+ - Pathological findings (e.g., cellular morphology, tissue structure, staining patterns)
124
+ - Visible cell types
125
+ - Relevant biomarkers (if inferable from staining or morphology)
126
+ - Anatomical context (if discernible)
127
+
128
+ Be concise and focus primarily on visually evident information. Avoid definitive diagnoses.
129
+ """
130
+ # Generate content using the model
131
  response = gemini_model.generate_content([prompt, img])
 
 
 
 
 
 
 
 
132
 
133
+ # Check for blocked content or empty response
134
+ if not response.parts:
135
+ if response.prompt_feedback and response.prompt_feedback.block_reason:
136
+ block_reason = response.prompt_feedback.block_reason
137
+ st.warning(f"⚠️ Analysis blocked by safety settings: {block_reason}")
138
+ return f"Analysis blocked due to safety settings: {block_reason}"
139
+ else:
140
+ st.error("❌ Gemini analysis returned no content. Response might be empty or invalid.")
141
+ return "Error: Gemini analysis failed or returned no content."
142
+
143
+ # Return the generated text
144
  return response.text
145
+
146
  except genai.types.BlockedPromptException as e:
147
+ st.error(f"Gemini request blocked due to prompt content: {e}")
148
+ return f"Analysis blocked (prompt issue): {e}"
149
  except Exception as e:
150
+ st.error(f" An error occurred during Gemini analysis: {e}")
151
  return f"Error analyzing image: {e}"
152
 
153
 
154
+ def query_chroma(query_text: str, n_results: int = 5) -> Optional[Dict[str, List[Any]]]:
155
+ """
156
+ Queries the Chroma DB collection with the given text.
157
+
158
+ Args:
159
+ query_text: The text to use for the similarity search.
160
+ n_results: The maximum number of results to return.
161
+
162
+ Returns:
163
+ A dictionary containing the query results ('documents', 'metadatas',
164
+ 'distances'), or None if an error occurs.
165
+ """
166
  try:
167
  results = collection.query(
168
  query_texts=[query_text],
169
  n_results=n_results,
170
+ include=['documents', 'metadatas', 'distances'] # Specify fields to include
171
  )
172
  return results
173
  except Exception as e:
174
+ st.error(f"Error querying Chroma DB: {e}")
175
  return None
176
 
177
  def add_dummy_data_to_chroma():
178
+ """
179
+ Adds predefined example medical text snippets and metadata to the Chroma collection.
180
+ Checks if documents with the same text already exist before adding.
181
+ """
182
+ st.info("Attempting to add dummy data to Chroma DB...")
183
+
184
  # --- IMPORTANT ---
185
+ # In a real application, this data ingestion process would involve:
186
+ # 1. Parsing actual medical documents (research papers, clinical notes, textbooks).
187
+ # 2. Extracting relevant text chunks (e.g., using tools like Unstructured).
188
+ # 3. Extracting or associating meaningful METADATA (source, patient ID (anonymized),
189
+ # image IDs linked to text, extracted entities like diseases/genes).
190
+ # 4. Generating embeddings using the SAME embedding function used for querying.
191
  docs = [
192
  "Figure 1A shows adenocarcinoma of the lung, papillary subtype. Note the glandular structures and nuclear atypia. TTF-1 staining was positive.",
193
  "Pathology slide 34B demonstrates high-grade glioma (glioblastoma) with significant necrosis and microvascular proliferation. Ki-67 index was high.",
 
196
  "Slide CJD-Sample-02: Spongiform changes characteristic of prion disease are evident in the cerebral cortex. Gliosis is also noted."
197
  ]
198
  metadatas = [
199
+ {"source": "Example Paper 1", "topic": "Lung Cancer Pathology", "entities": {"DISEASES": ["adenocarcinoma", "lung cancer"], "PATHOLOGY_FINDINGS": ["glandular structures", "nuclear atypia", "papillary subtype"], "BIOMARKERS": ["TTF-1"]}, "IMAGE_ID": "fig_1a_adeno_lung.png"},
200
+ {"source": "Path Report 789", "topic": "Brain Tumor Pathology", "entities": {"DISEASES": ["high-grade glioma", "glioblastoma"], "PATHOLOGY_FINDINGS": ["necrosis", "microvascular proliferation"], "BIOMARKERS": ["Ki-67"]}, "IMAGE_ID": "slide_34b_gbm.tiff"},
201
+ {"source": "Textbook Chapter 5", "topic": "Molecular Oncology Pathways", "entities": {"GENES": ["EGFR"], "DRUGS": ["tyrosine kinase inhibitors"], "DISEASES": ["non-small cell lung cancer"]}, "IMAGE_ID": "diagram_egfr_pathway.svg"},
202
+ {"source": "Path Report 101", "topic": "Gastrointestinal Pathology", "entities": {"DISEASES": ["chronic gastritis", "Helicobacter pylori infection"], "PATHOLOGY_FINDINGS": ["intestinal metaplasia"]}, "IMAGE_ID": "micrograph_h_pylori_gastritis.jpg"},
203
+ {"source": "Case Study CJD", "topic": "Neuropathology", "entities": {"DISEASES": ["prion disease"], "PATHOLOGY_FINDINGS": ["Spongiform changes", "Gliosis"], "ANATOMICAL_LOCATIONS": ["cerebral cortex"]}, "IMAGE_ID": "slide_cjd_sample_02.jpg"}
204
  ]
205
+ # Generate unique IDs using timestamp + index to minimize collision chance in demo
206
+ ids = [f"doc_{int(time.time())}_{i}" for i in range(len(docs))]
207
 
208
  try:
209
+ # Check if documents with these exact texts already exist to avoid duplicates
210
+ existing_docs = collection.get(where={"$or": [{"document": doc} for doc in docs]}, include=[]) # Don't need full data, just check existence
211
+ if not existing_docs or not existing_docs.get('ids'):
212
+ collection.add(
213
+ documents=docs,
214
+ metadatas=metadatas,
215
+ ids=ids
216
+ )
217
+ st.success(f"Added {len(docs)} dummy documents to Chroma collection '{COLLECTION_NAME}'.")
218
  else:
219
+ st.warning("⚠️ Dummy data (based on document text) seems to already exist in the collection. No new data added.")
220
 
221
  except Exception as e:
222
+ st.error(f"Error adding dummy data to Chroma: {e}")
223
 
224
 
225
  # --- Streamlit UI ---
226
+ st.set_page_config(layout="wide", page_title="Medical Image Analysis & RAG")
227
  st.title("⚕️ Medical Image Analysis & RAG")
228
+ st.markdown("""
229
+ Upload a medical image (e.g., pathology slide, diagram).
230
+ Google Gemini Vision will analyze it, and Chroma DB will retrieve related text snippets
231
+ from a simulated knowledge base based on the analysis.
232
+ """)
233
 
234
+ # Sidebar for Controls
235
  with st.sidebar:
236
+ st.header("⚙️ Controls")
237
+ uploaded_file = st.file_uploader(
238
+ "Choose an image...",
239
+ type=["jpg", "jpeg", "png", "tiff", "webp"],
240
+ help="Upload a medical image file."
241
+ )
242
+
243
+ st.divider() # Visual separator
244
+
245
+ if st.button("➕ Load Dummy KB Data", help="Add example text data to the Chroma vector database."):
246
  add_dummy_data_to_chroma()
 
247
 
248
+ st.divider()
249
 
250
+ st.info(f"""
251
+ ℹ️ **Note:**
252
+ - Chroma data is stored in the '{CHROMA_PATH}' folder within the app's environment.
253
+ - This data persists across runs but **will be lost** if the hosting environment (e.g., Streamlit Cloud, Hugging Face Space) is reset or its storage is cleared.
254
+ - Ensure the Google API Key is set in Streamlit Secrets.
255
+ """)
256
 
 
 
257
 
258
+ # Main Display Area
259
+ col1, col2 = st.columns(2) # Create two columns for layout
260
+
261
+ with col1:
262
+ st.subheader("🖼️ Uploaded Image")
263
+ if uploaded_file is not None:
264
+ # Read image bytes from the uploaded file
265
+ image_bytes = uploaded_file.getvalue()
266
+ # Display the uploaded image
267
+ st.image(image_bytes, caption=f"Uploaded: {uploaded_file.name}", use_column_width=True)
268
+ else:
269
+ st.info("Upload an image using the sidebar to begin.")
270
+
271
+ with col2:
272
  st.subheader("🔬 Gemini Vision Analysis")
273
+ if uploaded_file is not None:
274
+ # Analyze image with Gemini when an image is uploaded
275
+ with st.spinner("🧠 Analyzing image with Gemini Vision... This may take a moment."):
276
+ analysis_text = analyze_image_with_gemini(image_bytes)
277
+
278
+ # Display analysis or error message
279
+ if analysis_text.startswith("Error:") or analysis_text.startswith("Analysis blocked"):
280
+ # Errors/blocks are already logged via st.error/st.warning in the helper function
281
+ st.markdown(f"**Analysis Status:** {analysis_text}") # Show status message
282
+ else:
283
+ st.markdown(analysis_text)
284
+
285
+ st.markdown("---") # Separator before RAG results
286
+ st.subheader("📚 Related Information (RAG via Chroma DB)")
287
 
288
+ # Query Chroma DB using the Gemini analysis text
289
+ with st.spinner("🔍 Searching knowledge base..."):
290
+ chroma_results = query_chroma(analysis_text, n_results=3) # Fetch top 3 results
291
+
292
+ if chroma_results and chroma_results.get('documents') and chroma_results['documents'][0]:
293
+ num_results = len(chroma_results['documents'][0])
294
+ st.success(f"✅ Found {num_results} related entries in the knowledge base:")
295
+
296
+ for i in range(num_results):
297
+ doc = chroma_results['documents'][0][i]
298
+ meta = chroma_results['metadatas'][0][i]
299
+ dist = chroma_results['distances'][0][i]
300
+
301
+ expander_title = f"Result {i+1} (Similarity Score: {1-dist:.4f}) - Source: {meta.get('source', 'N/A')}"
302
+ with st.expander(expander_title):
303
+ st.markdown("**Retrieved Text:**")
304
+ st.markdown(f"> {doc}") # Use blockquote for text
305
+ st.markdown("**Metadata:**")
306
+ st.json(meta) # Display metadata nicely formatted
307
+
308
+ # Highlight if the retrieved text references another image/asset
309
+ if meta.get("IMAGE_ID"):
310
+ st.info(f"ℹ️ This text chunk is associated with visual asset: `{meta['IMAGE_ID']}`")
311
+ # In a more complex app, you could add logic here to fetch/display this related image if available.
312
+
313
+ elif chroma_results is not None: # Query ran successfully but found nothing
314
+ st.warning("⚠️ No relevant information found in the knowledge base matching the image analysis.")
315
+ # Else case (chroma_results is None) implies an error occurred, handled by st.error in query_chroma
316
 
 
 
317
  else:
318
+ st.info("Analysis will appear here once an image is uploaded.")
319
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
320
 
321
  st.markdown("---")
322
+ st.markdown("<div style='text-align: center;'>Powered by Google Gemini, Chroma DB, and Streamlit</div>", unsafe_allow_html=True)