mgbam commited on
Commit
25018b0
·
verified ·
1 Parent(s): 7662697

Update llm_interactions.py

Browse files
Files changed (1) hide show
  1. llm_interactions.py +76 -52
llm_interactions.py CHANGED
@@ -37,7 +37,7 @@ DISEASE_SPECIFIC_PROMPT_TEMPLATE = f"""{_BASE_ROLE_PROMPT}
37
  **Focus solely on visual findings related to {{disease}}.**
38
  """
39
 
40
- # Prompt for AI self-assessment (QUALITATIVE ONLY)
41
  SELF_ASSESSMENT_PROMPT_TEMPLATE = f"""{_BASE_ROLE_PROMPT}
42
 
43
  **Task:** Perform a qualitative self-assessment of the *previous AI response* provided below, considering the context it was generated in. **This is an internal check, not a clinical confidence score.**
@@ -68,7 +68,7 @@ Critically evaluate the **"Previous AI Response"** based on the following factor
68
  * Justification: [Did the previous response directly and fully address the user's question or the requested task's scope? Or did it deviate or miss aspects?]
69
 
70
  5. **## 5. Overall Assessment Impression:**
71
- * Impression: [Provide a brief qualitative summary impression - e.g., "High confidence based on clear visual evidence," "Moderate confidence due to some ambiguity," "Low confidence due to poor image quality/non-specific findings."]
72
 
73
  **Reminder:** This assessment reflects the AI's perspective on its previous output's limitations and alignment, **not clinical certainty.**
74
  """
@@ -134,8 +134,7 @@ def query_gemini_vision(image: Image.Image, text_prompt: str) -> Tuple[Optional[
134
  "max_output_tokens": 8192, # Leverage large context
135
  },
136
  "safety_settings": [
137
- # Stricter setting for medical potentially: BLOCK_LOW_AND_ABOVE might be considered,
138
- # but BLOCK_MEDIUM is a safer start to avoid over-blocking relevant descriptions. TEST CAREFULLY.
139
  {"category": cat, "threshold": "BLOCK_MEDIUM_AND_ABOVE"}
140
  for cat in ["HARM_CATEGORY_HARASSMENT", "HARM_CATEGORY_HATE_SPEECH",
141
  "HARM_CATEGORY_SEXUALLY_EXPLICIT", "HARM_CATEGORY_DANGEROUS_CONTENT"]
@@ -173,20 +172,15 @@ def query_gemini_vision(image: Image.Image, text_prompt: str) -> Tuple[Optional[
173
  # 3. Check Finish Reason (Includes safety blocking of response)
174
  finish_reason = candidate.get('finishReason', 'UNKNOWN')
175
  if finish_reason != "STOP":
176
- # Log details if not a normal stop
177
  safety_ratings = candidate.get('safetyRatings', [])
178
  blocked_cats = [sr.get('category', 'UNKNOWN') for sr in safety_ratings if sr.get('blocked')]
179
  warn_msg = f"Gemini response finished unexpectedly (Reason: {finish_reason})."
180
  if blocked_cats: warn_msg += f" Blocked safety categories: {blocked_cats}"
181
  logger.warning(warn_msg)
182
- # Treat SAFETY finish reason as a failure
183
  if finish_reason == "SAFETY":
184
  return f"API Error: Response generation blocked by safety filters. Categories: {blocked_cats}", False
185
- # Treat other non-STOP reasons (MAX_TOKENS, RECITATION, OTHER) as potential issues but might still have partial content
186
- # For this use case, let's treat MAX_TOKENS as okay (but log), others as potential errors.
187
- elif finish_reason != "MAX_TOKENS":
188
  return f"API Error: Response generation stopped unexpectedly (Reason: {finish_reason}).", False
189
- # If MAX_TOKENS, log and continue to extract content
190
 
191
  # 4. Extract Text Content
192
  content = candidate.get('content', {})
@@ -213,7 +207,6 @@ def query_gemini_vision(image: Image.Image, text_prompt: str) -> Tuple[Optional[
213
  except: error_details = {}
214
  error_message = error_details.get("message", e.response.text[:500])
215
  logger.error(f"HTTP error {status_code} querying Gemini: {error_message}", exc_info=(status_code >= 500))
216
- # Provide user-friendly messages based on common codes
217
  if status_code in [401, 403]: return f"API Error: Authentication/Permission Failed ({status_code}). Check API Key.", False
218
  if status_code == 429: return "API Error: Rate limit exceeded (429). Please try again later.", False
219
  return f"API Error: HTTP {status_code}. Details: {error_message}", False
@@ -224,19 +217,23 @@ def query_gemini_vision(image: Image.Image, text_prompt: str) -> Tuple[Optional[
224
  logger.critical(f"Unexpected critical error during Gemini API interaction: {e}", exc_info=True)
225
  return f"Internal Error: An unexpected error occurred ({type(e).__name__}).", False
226
 
 
227
  # --- Specific Interaction Function Wrappers ---
228
 
229
  def _format_roi_info(roi: Optional[Dict]) -> str:
230
  """Formats ROI dictionary into a string for prompts, handling invalid input."""
231
  if roi and isinstance(roi, dict) and all(key in roi for key in ["left", "top", "width", "height"]):
232
  try:
 
 
233
  return (f"User has highlighted a Region of Interest (ROI) at "
234
- f"Top-Left=({int(roi['left'])}, {int(roi['top'])}) with "
235
- f"Width={int(roi['width'])}, Height={int(roi['height'])} pixels.")
236
- except (TypeError, ValueError):
237
- logger.warning("ROI dictionary contained non-integer values.", exc_info=True)
238
  return "ROI provided but coordinates appear invalid."
239
- return "No specific region highlighted by user."
 
240
 
241
  def _format_history_text(history: List[Tuple[str, str, Any]]) -> str:
242
  """Formats recent conversation history for the prompt context."""
@@ -248,26 +245,34 @@ def _format_history_text(history: List[Tuple[str, str, Any]]) -> str:
248
  try:
249
  q_type = entry[0] if len(entry) > 0 else "[Type Missing]"
250
  msg = entry[1] if len(entry) > 1 else "[Message Missing]"
251
- # Simple formatting, avoid including raw fallback tags directly if possible
252
  if "[fallback]" in q_type.lower():
253
- formatted_entries.append(f"User: {q_type.split(']')[1].strip()}\nAI (Fallback): {msg}")
 
 
254
  elif "user" in q_type.lower():
255
  formatted_entries.append(f"User: {msg}")
256
  elif "ai" in q_type.lower():
257
  formatted_entries.append(f"AI: {msg}")
 
 
 
258
  except Exception as e:
259
  logger.warning(f"Skipping malformed history entry {entry}: {e}")
260
  continue
261
  return "\n---\n".join(formatted_entries) if formatted_entries else "No processable conversation history available."
262
 
 
263
  def run_initial_analysis(image: Image.Image, roi: Optional[Dict] = None) -> str:
264
- """Performs initial analysis, returning result or formatted error message."""
265
  action_name = "Initial Analysis"
266
  logger.info(f"Requesting {action_name}. ROI: {bool(roi)}")
267
  roi_info = _format_roi_info(roi)
268
  prompt = INITIAL_ANALYSIS_PROMPT_TEMPLATE.format(roi_info=roi_info)
269
  response_text, success = query_gemini_vision(image, prompt)
270
- return response_text if success else f"{action_name} Failed: {response_text or 'Unknown API error.'}"
 
 
271
 
272
  def run_multimodal_qa(
273
  image: Image.Image, question: str, history: List[Tuple[str, str, Any]], roi: Optional[Dict] = None
@@ -279,8 +284,9 @@ def run_multimodal_qa(
279
  history_text = _format_history_text(history)
280
  prompt = QA_CONTEXT_PROMPT_TEMPLATE.format(roi_info=roi_info, history_text=history_text, question=question)
281
  response_text, success = query_gemini_vision(image, prompt)
282
- # Return tuple directly as expected by app.py
283
- return response_text if response_text else "Error: No response received from API.", success
 
284
 
285
  def run_disease_analysis(image: Image.Image, disease: str, roi: Optional[Dict] = None) -> str:
286
  """Performs disease-focused analysis, returning result or formatted error."""
@@ -289,72 +295,90 @@ def run_disease_analysis(image: Image.Image, disease: str, roi: Optional[Dict] =
289
  roi_info = _format_roi_info(roi)
290
  prompt = DISEASE_SPECIFIC_PROMPT_TEMPLATE.format(disease=disease, roi_info=roi_info)
291
  response_text, success = query_gemini_vision(image, prompt)
292
- return response_text if success else f"{action_name} Failed ({disease}): {response_text or 'Unknown API error.'}"
 
 
293
 
294
  def run_llm_self_assessment(
295
- image: Image.Image, history: List[Tuple[str, str, Any]], roi: Optional[Dict] = None
296
- ) -> str:
297
- """Requests the AI to perform a qualitative self-assessment of its last response."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
298
  action_name = "LLM Self-Assessment (Experimental)"
299
  logger.info(f"Requesting {action_name}. History length: {len(history)}. ROI used previously: {bool(roi)}")
300
 
301
  if not history:
302
  logger.warning(f"{action_name} requested without history.")
303
- return f"{action_name} Failed: No conversation history available to assess."
304
 
 
 
305
  try:
306
- # Get context of the very last interaction (Q&A)
307
- last_q_type, last_a_or_q_msg = "[Type Missing]", "[Message Missing]"
308
- last_q, last_a = "[Question Missing]", "[Answer Missing]"
309
-
310
- # Iterate backwards to find the last Q/A pair
311
  last_ai_answer_entry = None
312
  last_user_question_entry = None
313
- for entry in reversed(history):
314
  entry_type = entry[0].lower() if len(entry) > 0 else ""
315
  entry_msg = entry[1] if len(entry) > 1 else ""
 
316
  if "ai answer" in entry_type and not last_ai_answer_entry:
317
  last_ai_answer_entry = (entry_type, entry_msg)
 
318
  elif "user question" in entry_type and not last_user_question_entry:
319
  last_user_question_entry = (entry_type, entry_msg)
 
320
  if last_ai_answer_entry and last_user_question_entry:
321
- break # Found the most recent pair
322
 
323
  if not last_ai_answer_entry or not last_user_question_entry:
324
- logger.error("Could not reliably extract the last Q/A pair from history.")
325
- return f"{action_name} Failed: Cannot determine the last interaction to assess."
326
 
327
  last_q = last_user_question_entry[1]
328
  last_a = last_ai_answer_entry[1]
329
 
330
- # Pre-check: If the last answer indicates failure, return low assessment directly
331
  if isinstance(last_a, str) and any(err in last_a.lower() for err in ["error:", "failed:", "blocked", "unavailable", "could not"]):
332
- logger.warning(f"Last interaction was an error/failure ('{last_a[:100]}...'). Reporting low assessment directly.")
333
- return (f"## 1. Clarity of Findings:\n Justification: N/A - Previous step resulted in an error.\n"
334
- f"## 2. Sufficiency of Visual Information:\n Justification: N/A - Error state.\n"
335
- f"## 3. Potential Ambiguity:\n Justification: N/A - Error state.\n"
336
- f"## 4. Scope Alignment:\n Justification: N/A - The previous request failed.\n"
 
337
  f"## 5. Overall Assessment Impression:\n Impression: Assessment not possible due to prior error.")
338
 
339
- except IndexError:
340
- logger.error("History list structure error during self-assessment setup.")
341
- return f"{action_name} Failed: Error processing interaction history."
342
-
343
- # Format ROI info relevant to the *last interaction* being assessed
344
- roi_info = _format_roi_info(roi) # Assumes ROI state passed corresponds to the last interaction
345
 
 
 
346
  prompt = SELF_ASSESSMENT_PROMPT_TEMPLATE.format(last_q=last_q, last_a=last_a, roi_info=roi_info)
347
 
348
  # Call the API using the *same image* associated with the last interaction
349
  response_text, success = query_gemini_vision(image, prompt)
350
 
351
  if success and response_text:
352
- # Simple check if expected markdown headers are present
353
  if "## 1. Clarity" in response_text and "## 5. Overall Assessment" in response_text:
354
  logger.info("LLM Self-Assessment received in expected format.")
355
- return response_text
 
356
  else:
357
  logger.warning(f"Self-assessment response did not match expected Markdown format:\n'''{response_text}'''")
358
- return f"LLM Self-Assessment Response (Format Warning):\n{response_text}" # Return raw but flag
 
359
  else:
360
- return f"{action_name} Failed: {response_text or 'Unknown API error.'}"
 
 
37
  **Focus solely on visual findings related to {{disease}}.**
38
  """
39
 
40
+ # Prompt for AI self-assessment (QUALITATIVE ONLY - Renamed from Confidence)
41
  SELF_ASSESSMENT_PROMPT_TEMPLATE = f"""{_BASE_ROLE_PROMPT}
42
 
43
  **Task:** Perform a qualitative self-assessment of the *previous AI response* provided below, considering the context it was generated in. **This is an internal check, not a clinical confidence score.**
 
68
  * Justification: [Did the previous response directly and fully address the user's question or the requested task's scope? Or did it deviate or miss aspects?]
69
 
70
  5. **## 5. Overall Assessment Impression:**
71
+ * Impression: [Provide a brief qualitative summary impression - e.g., "Assessment suggests response was well-supported by clear visual evidence," "Assessment indicates moderate confidence due to some ambiguity," "Assessment suggests low confidence due to poor image quality/non-specific findings."]
72
 
73
  **Reminder:** This assessment reflects the AI's perspective on its previous output's limitations and alignment, **not clinical certainty.**
74
  """
 
134
  "max_output_tokens": 8192, # Leverage large context
135
  },
136
  "safety_settings": [
137
+ # BLOCK_MEDIUM_AND_ABOVE is a reasonable default. Test carefully for medical content.
 
138
  {"category": cat, "threshold": "BLOCK_MEDIUM_AND_ABOVE"}
139
  for cat in ["HARM_CATEGORY_HARASSMENT", "HARM_CATEGORY_HATE_SPEECH",
140
  "HARM_CATEGORY_SEXUALLY_EXPLICIT", "HARM_CATEGORY_DANGEROUS_CONTENT"]
 
172
  # 3. Check Finish Reason (Includes safety blocking of response)
173
  finish_reason = candidate.get('finishReason', 'UNKNOWN')
174
  if finish_reason != "STOP":
 
175
  safety_ratings = candidate.get('safetyRatings', [])
176
  blocked_cats = [sr.get('category', 'UNKNOWN') for sr in safety_ratings if sr.get('blocked')]
177
  warn_msg = f"Gemini response finished unexpectedly (Reason: {finish_reason})."
178
  if blocked_cats: warn_msg += f" Blocked safety categories: {blocked_cats}"
179
  logger.warning(warn_msg)
 
180
  if finish_reason == "SAFETY":
181
  return f"API Error: Response generation blocked by safety filters. Categories: {blocked_cats}", False
182
+ elif finish_reason != "MAX_TOKENS": # Treat MAX_TOKENS as potentially usable partial response
 
 
183
  return f"API Error: Response generation stopped unexpectedly (Reason: {finish_reason}).", False
 
184
 
185
  # 4. Extract Text Content
186
  content = candidate.get('content', {})
 
207
  except: error_details = {}
208
  error_message = error_details.get("message", e.response.text[:500])
209
  logger.error(f"HTTP error {status_code} querying Gemini: {error_message}", exc_info=(status_code >= 500))
 
210
  if status_code in [401, 403]: return f"API Error: Authentication/Permission Failed ({status_code}). Check API Key.", False
211
  if status_code == 429: return "API Error: Rate limit exceeded (429). Please try again later.", False
212
  return f"API Error: HTTP {status_code}. Details: {error_message}", False
 
217
  logger.critical(f"Unexpected critical error during Gemini API interaction: {e}", exc_info=True)
218
  return f"Internal Error: An unexpected error occurred ({type(e).__name__}).", False
219
 
220
+
221
  # --- Specific Interaction Function Wrappers ---
222
 
223
  def _format_roi_info(roi: Optional[Dict]) -> str:
224
  """Formats ROI dictionary into a string for prompts, handling invalid input."""
225
  if roi and isinstance(roi, dict) and all(key in roi for key in ["left", "top", "width", "height"]):
226
  try:
227
+ # Ensure values are integers for clean display
228
+ left, top, width, height = map(int, [roi['left'], roi['top'], roi['width'], roi['height']])
229
  return (f"User has highlighted a Region of Interest (ROI) at "
230
+ f"Top-Left=({left}, {top}) with "
231
+ f"Width={width}, Height={height} pixels.")
232
+ except (TypeError, ValueError, KeyError):
233
+ logger.warning("ROI dictionary contained invalid/missing keys or non-numeric values.", exc_info=True)
234
  return "ROI provided but coordinates appear invalid."
235
+ return "Analysis applies to the entire image (no specific ROI highlighted)." # More explicit default
236
+
237
 
238
  def _format_history_text(history: List[Tuple[str, str, Any]]) -> str:
239
  """Formats recent conversation history for the prompt context."""
 
245
  try:
246
  q_type = entry[0] if len(entry) > 0 else "[Type Missing]"
247
  msg = entry[1] if len(entry) > 1 else "[Message Missing]"
248
+ # Simple formatting, indicate source clearly
249
  if "[fallback]" in q_type.lower():
250
+ # Extract user question part if available
251
+ user_q_part = q_type.split(']')[1].strip() if ']' in q_type else "[User Question]"
252
+ formatted_entries.append(f"User: {user_q_part}\nAI (Fallback): {msg}")
253
  elif "user" in q_type.lower():
254
  formatted_entries.append(f"User: {msg}")
255
  elif "ai" in q_type.lower():
256
  formatted_entries.append(f"AI: {msg}")
257
+ else: # Handle system messages or other types if necessary
258
+ formatted_entries.append(f"{q_type}: {msg}")
259
+
260
  except Exception as e:
261
  logger.warning(f"Skipping malformed history entry {entry}: {e}")
262
  continue
263
  return "\n---\n".join(formatted_entries) if formatted_entries else "No processable conversation history available."
264
 
265
+
266
  def run_initial_analysis(image: Image.Image, roi: Optional[Dict] = None) -> str:
267
+ """Performs initial structured analysis, returning result or formatted error message."""
268
  action_name = "Initial Analysis"
269
  logger.info(f"Requesting {action_name}. ROI: {bool(roi)}")
270
  roi_info = _format_roi_info(roi)
271
  prompt = INITIAL_ANALYSIS_PROMPT_TEMPLATE.format(roi_info=roi_info)
272
  response_text, success = query_gemini_vision(image, prompt)
273
+ # Prefix error clearly for the UI
274
+ return response_text if success else f"**{action_name} Failed:** {response_text or 'Unknown API error.'}"
275
+
276
 
277
  def run_multimodal_qa(
278
  image: Image.Image, question: str, history: List[Tuple[str, str, Any]], roi: Optional[Dict] = None
 
284
  history_text = _format_history_text(history)
285
  prompt = QA_CONTEXT_PROMPT_TEMPLATE.format(roi_info=roi_info, history_text=history_text, question=question)
286
  response_text, success = query_gemini_vision(image, prompt)
287
+ # Return tuple directly; error message already formatted by query_gemini_vision if needed
288
+ return response_text if response_text else "Error: No response text received from API.", success
289
+
290
 
291
  def run_disease_analysis(image: Image.Image, disease: str, roi: Optional[Dict] = None) -> str:
292
  """Performs disease-focused analysis, returning result or formatted error."""
 
295
  roi_info = _format_roi_info(roi)
296
  prompt = DISEASE_SPECIFIC_PROMPT_TEMPLATE.format(disease=disease, roi_info=roi_info)
297
  response_text, success = query_gemini_vision(image, prompt)
298
+ # Prefix error clearly for the UI
299
+ return response_text if success else f"**{action_name} Failed ({disease}):** {response_text or 'Unknown API error.'}"
300
+
301
 
302
  def run_llm_self_assessment(
303
+ image: Image.Image, # Image associated with the interaction being assessed
304
+ history: List[Tuple[str, str, Any]],
305
+ roi: Optional[Dict] = None # ROI state during the interaction being assessed
306
+ ) -> str:
307
+ """
308
+ Requests the AI to perform a qualitative self-assessment of its last response.
309
+ This is experimental and NOT a clinical confidence score.
310
+
311
+ Args:
312
+ image: The PIL Image corresponding to the last interaction.
313
+ history: List of previous interaction tuples. Must not be empty.
314
+ roi: The ROI dictionary active during the last interaction being evaluated.
315
+
316
+ Returns:
317
+ A string containing the AI's self-assessment based on the defined factors,
318
+ or a string prefixed with "LLM Self-Assessment Failed: ".
319
+ """
320
  action_name = "LLM Self-Assessment (Experimental)"
321
  logger.info(f"Requesting {action_name}. History length: {len(history)}. ROI used previously: {bool(roi)}")
322
 
323
  if not history:
324
  logger.warning(f"{action_name} requested without history.")
325
+ return f"**{action_name} Failed:** No conversation history available to assess."
326
 
327
+ # --- Safely extract the last Q/A pair to assess ---
328
+ last_q, last_a = "[Question Missing]", "[Answer Missing]"
329
  try:
 
 
 
 
 
330
  last_ai_answer_entry = None
331
  last_user_question_entry = None
332
+ for entry in reversed(history): # Search backwards
333
  entry_type = entry[0].lower() if len(entry) > 0 else ""
334
  entry_msg = entry[1] if len(entry) > 1 else ""
335
+ # Find the most recent AI answer first
336
  if "ai answer" in entry_type and not last_ai_answer_entry:
337
  last_ai_answer_entry = (entry_type, entry_msg)
338
+ # Then find the user question that likely preceded it
339
  elif "user question" in entry_type and not last_user_question_entry:
340
  last_user_question_entry = (entry_type, entry_msg)
341
+ # Stop once we have the pair related to the last AI answer
342
  if last_ai_answer_entry and last_user_question_entry:
343
+ break
344
 
345
  if not last_ai_answer_entry or not last_user_question_entry:
346
+ raise ValueError("Could not find a preceding User/AI pair in history.")
 
347
 
348
  last_q = last_user_question_entry[1]
349
  last_a = last_ai_answer_entry[1]
350
 
351
+ # --- Pre-check: If last answer was an error, provide direct feedback ---
352
  if isinstance(last_a, str) and any(err in last_a.lower() for err in ["error:", "failed:", "blocked", "unavailable", "could not"]):
353
+ logger.warning(f"Last interaction was an error ('{last_a[:100]}...'). Reporting low assessment directly.")
354
+ return (f"**{action_name} Result:**\n\n"
355
+ f"## 1. Clarity of Findings:\n Justification: N/A - Previous step resulted in an error.\n\n"
356
+ f"## 2. Sufficiency of Visual Information:\n Justification: N/A - Error state.\n\n"
357
+ f"## 3. Potential Ambiguity:\n Justification: N/A - Error state.\n\n"
358
+ f"## 4. Scope Alignment:\n Justification: N/A - The previous request failed.\n\n"
359
  f"## 5. Overall Assessment Impression:\n Impression: Assessment not possible due to prior error.")
360
 
361
+ except Exception as e:
362
+ logger.error(f"Error processing history for self-assessment: {e}", exc_info=True)
363
+ return f"**{action_name} Failed:** Error processing interaction history."
 
 
 
364
 
365
+ # --- Prepare and Run Assessment Prompt ---
366
+ roi_info = _format_roi_info(roi) # Format ROI state from the time of the last answer
367
  prompt = SELF_ASSESSMENT_PROMPT_TEMPLATE.format(last_q=last_q, last_a=last_a, roi_info=roi_info)
368
 
369
  # Call the API using the *same image* associated with the last interaction
370
  response_text, success = query_gemini_vision(image, prompt)
371
 
372
  if success and response_text:
373
+ # Basic check for expected structure
374
  if "## 1. Clarity" in response_text and "## 5. Overall Assessment" in response_text:
375
  logger.info("LLM Self-Assessment received in expected format.")
376
+ # Prepend title for clarity in UI
377
+ return f"**{action_name} Result:**\n\n{response_text}"
378
  else:
379
  logger.warning(f"Self-assessment response did not match expected Markdown format:\n'''{response_text}'''")
380
+ # Return raw but flag it and add title
381
+ return f"**{action_name} Result (Format Warning):**\n\n{response_text}"
382
  else:
383
+ # Prefix error clearly for the UI
384
+ return f"**{action_name} Failed:** {response_text or 'Unknown API error.'}"