MedQA / services /pdf_report.py
mgbam's picture
Update services/pdf_report.py
68006c0 verified
# /home/user/app/services/pdf_report.py
from reportlab.lib.pagesizes import letter
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Image, Table, TableStyle
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.units import inch
from reportlab.lib import colors
from io import BytesIO
from typing import List, Dict, Any, Optional
from pathlib import Path
from datetime import datetime
from config.settings import settings
from assets.logo import get_logo_path # Assuming this function exists and works
from services.logger import app_logger
class MockChatMessage:
"""
A simple data class to hold message attributes for PDF generation.
Ensures consistent attribute access and helps with type handling before ReportLab.
"""
def __init__(self, role: Optional[str], content: Optional[str],
timestamp: Optional[datetime], tool_name: Optional[str] = None, **kwargs):
self.role: str = str(role if role is not None else "unknown")
self.content: str = str(content if content is not None else "") # Ensure content is always a string
self.timestamp: Optional[datetime] = timestamp # Keep as datetime, format when used
self.tool_name: Optional[str] = str(tool_name) if tool_name is not None else None
# Store any other potential attributes (e.g., source_references, confidence)
# This allows flexibility if msg_data dicts from pages/3_Reports.py have extra keys
for key, value in kwargs.items():
setattr(self, key, value)
def get_formatted_timestamp(self, default_val: str = "Time N/A") -> str:
"""Returns a formatted string for the timestamp, or a default value."""
return self.timestamp.strftime('%Y-%m-%d %H:%M:%S UTC') if self.timestamp else default_val
def get_formatted_content_for_pdf(self) -> str:
"""Ensures content is string, handles newlines for PDF, and escapes HTML-sensitive characters."""
text = self.content # Already ensured to be string in __init__
text_with_br = text.replace('\n', '<br/>\n')
# Basic escaping for ReportLab's Paragraph (which understands some HTML tags)
return text_with_br.replace("<", "<").replace(">", ">").replace("<br/>", "<br/>")
def generate_pdf_report(report_data: Dict[str, Any]) -> BytesIO:
"""
Generates a PDF report from the provided report_data dictionary.
`report_data["messages"]` is expected to be a List[MockChatMessage].
"""
buffer = BytesIO()
doc = SimpleDocTemplate(buffer, pagesize=letter,
leftMargin=0.75*inch, rightMargin=0.75*inch,
topMargin=0.75*inch, bottomMargin=0.75*inch,
title=f"Consultation Report - Session {report_data.get('session_id', 'N/A')}",
author=settings.APP_TITLE)
styles = getSampleStyleSheet()
story: List[Any] = [] # To hold ReportLab Flowables
# --- Custom Styles Definition ---
# Add new styles or modify existing ones for better visual appearance
styles.add(ParagraphStyle(name='Justify', alignment=4, parent=styles['Normal']))
styles.add(ParagraphStyle(name='Disclaimer', parent=styles['Italic'], fontSize=8, leading=10, spaceBefore=6, spaceAfter=6, textColor=colors.dimgrey))
styles.add(ParagraphStyle(name='SectionHeader', parent=styles['h2'], spaceBefore=12, spaceAfter=6, keepWithNext=1, textColor=colors.HexColor("#000080"))) # Navy Blue
styles.add(ParagraphStyle(name='SubHeader', parent=styles['h3'], spaceBefore=6, spaceAfter=3, keepWithNext=1, textColor=colors.HexColor("#4682B4"))) # Steel Blue
styles.add(ParagraphStyle(name='ListItem', parent=styles['Normal'], leftIndent=0.25*inch, bulletIndent=0.1*inch, spaceBefore=3))
# Message Styles with borders and padding for distinct visual blocks
base_message_style = ParagraphStyle(name='BaseMessage', parent=styles['Normal'], spaceBefore=3, spaceAfter=3,
leftIndent=0.1*inch, rightIndent=0.1*inch, leading=14,
borderWidth=0.5, borderColor=colors.lightgrey, borderPadding=5)
user_msg_style = ParagraphStyle(name='UserMessage', parent=base_message_style, backColor=colors.HexColor("#F0F8FF")) # AliceBlue
ai_msg_style = ParagraphStyle(name='AIMessage', parent=base_message_style, backColor=colors.HexColor("#F5FFFA")) # MintCream
tool_msg_style = ParagraphStyle(name='ToolMessage', parent=base_message_style, backColor=colors.HexColor("#FFF8DC"), fontName='Courier', textColor=colors.darkslategrey) # Cornsilk
system_msg_style = ParagraphStyle(name='SystemMessage', parent=base_message_style, backColor=colors.HexColor("#FAFAD2"), fontSize=9, textColor=colors.dimgrey, fontName='Helvetica-Oblique') # LightGoldenrodYellow
# --- Extract data safely from report_data dictionary ---
# Ensure all text passed to Paragraph is string type
clinician_username = str(report_data.get("patient_name", "N/A Clinician"))
session_id_str = str(report_data.get("session_id", "N/A"))
session_title_str = str(report_data.get("session_title", "Untitled Consultation"))
session_start_time_obj = report_data.get("session_start_time") # datetime object or None
patient_context_summary_str = str(report_data.get("patient_context_summary", "No specific patient context was provided for this session."))
# `messages` key in report_data should already contain List[MockChatMessage] instances
# as prepared by pages/3_Reports.py
messages: List[MockChatMessage] = report_data.get("messages", [])
# Sanity check for messages type (optional, but good for debugging)
if not isinstance(messages, list) or (messages and not all(isinstance(m, MockChatMessage) for m in messages)):
app_logger.error("PDF Generation: 'messages' in report_data is not a list of MockChatMessage instances as expected!")
# Fallback to prevent downstream errors, or raise an error
messages = [] # Process an empty list to avoid further TypeErrors
story.append(Paragraph("Error: Message data for transcript was malformed.", styles['Heading3']))
# 1. Logo and Document Header
app_logger.debug("PDF Generation: Adding logo and header.")
logo_path_str = get_logo_path()
if logo_path_str and Path(logo_path_str).exists():
try:
img = Image(logo_path_str, width=0.75*inch, height=0.75*inch, preserveAspectRatio=True)
img.hAlign = 'LEFT' # Align logo to the left
story.append(img)
story.append(Spacer(1, 0.05*inch))
except Exception as e: app_logger.warning(f"PDF Report: Could not add logo image: {e}")
story.append(Paragraph(str(settings.APP_TITLE), styles['h1'])) # Ensure APP_TITLE is string
story.append(Paragraph("AI-Assisted Consultation Summary", styles['h2']))
story.append(Spacer(1, 0.2*inch))
# 2. Report Metadata Table
app_logger.debug("PDF Generation: Adding metadata table.")
report_date_str_display = datetime.now().strftime('%Y-%m-%d %H:%M UTC')
session_start_time_str_display = session_start_time_obj.strftime('%Y-%m-%d %H:%M UTC') if session_start_time_obj else "N/A"
meta_data_content = [
[Paragraph("<b>Report Generated:</b>", styles['Normal']), Paragraph(report_date_str_display, styles['Normal'])],
[Paragraph("<b>Clinician:</b>", styles['Normal']), Paragraph(clinician_username, styles['Normal'])],
[Paragraph("<b>Consultation Session ID:</b>", styles['Normal']), Paragraph(session_id_str, styles['Normal'])],
[Paragraph("<b>Session Title:</b>", styles['Normal']), Paragraph(session_title_str, styles['Normal'])],
[Paragraph("<b>Session Start Time:</b>", styles['Normal']), Paragraph(session_start_time_str_display, styles['Normal'])],
]
meta_table = Table(meta_data_content, colWidths=[2.0*inch, 4.5*inch]) # Adjusted colWidths
meta_table.setStyle(TableStyle([
('GRID', (0,0), (-1,-1), 0.5, colors.darkgrey), ('VALIGN', (0,0), (-1,-1), 'TOP'),
('BACKGROUND', (0,0), (0,-1), colors.lightgrey), ('LEFTPADDING', (0,0), (-1,-1), 6),
('RIGHTPADDING', (0,0), (-1,-1), 6), ('TOPPADDING', (0,0), (-1,-1), 4), ('BOTTOMPADDING', (0,0), (-1,-1), 4)
]))
story.append(meta_table)
story.append(Spacer(1, 0.3*inch))
# 3. Disclaimer
app_logger.debug("PDF Generation: Adding disclaimers.")
story.append(Paragraph("<b>Important Disclaimer:</b>", styles['SubHeader']))
story.append(Paragraph(str(settings.MAIN_DISCLAIMER_LONG), styles['Disclaimer']))
if settings.SIMULATION_DISCLAIMER:
story.append(Paragraph(str(settings.SIMULATION_DISCLAIMER), styles['Disclaimer']))
story.append(Spacer(1, 0.3*inch))
# 4. Patient Context Provided (if any)
app_logger.debug("PDF Generation: Adding patient context summary.")
if patient_context_summary_str and patient_context_summary_str.lower() not in ["no specific patient context was provided for this session.", "not provided.", ""]:
story.append(Paragraph("Patient Context Provided by Clinician (Simulated Data):", styles['SectionHeader']))
# Clean up the context string before splitting
cleaned_context_str = patient_context_summary_str.replace("Patient Context: ", "").replace("Initial Patient Context Set: ","").strip()
if cleaned_context_str:
context_items_list = cleaned_context_str.split(';')
for item_str in context_items_list:
item_clean = item_str.strip()
if item_clean: story.append(Paragraph(f"• {item_clean}", styles['ListItem']))
story.append(Spacer(1, 0.2*inch))
else: # If after cleaning, string is empty
story.append(Paragraph("No specific patient context details were recorded for this session.", styles['Normal']))
story.append(Spacer(1, 0.2*inch))
# 5. Consultation Transcript
app_logger.debug(f"PDF Generation: Adding transcript with {len(messages)} messages.")
story.append(Paragraph("Consultation Transcript:", styles['SectionHeader']))
story.append(Spacer(1, 0.1*inch))
for msg_obj in messages: # msg_obj is an instance of MockChatMessage
if msg_obj.role == 'system' and "Initial Patient Context Set:" in msg_obj.content:
app_logger.debug(f"Skipping system context message in PDF transcript: {msg_obj.content[:50]}...")
continue
formatted_timestamp_str = msg_obj.get_formatted_timestamp()
prefix_display_str = ""
active_message_style_for_loop = styles['Normal'] # Default
if msg_obj.role == 'assistant':
prefix_display_str = f"AI Assistant ({formatted_timestamp_str}):"
active_message_style_for_loop = ai_msg_style
elif msg_obj.role == 'user':
prefix_display_str = f"Clinician ({formatted_timestamp_str}):"
active_message_style_for_loop = user_msg_style
elif msg_obj.role == 'tool':
tool_name_str_display = msg_obj.tool_name or "Tool"
prefix_display_str = f"{tool_name_str_display.capitalize()} Output ({formatted_timestamp_str}):"
active_message_style_for_loop = tool_msg_style
elif msg_obj.role == 'system':
prefix_display_str = f"System Note ({formatted_timestamp_str}):"
active_message_style_for_loop = system_msg_style
else:
prefix_display_str = f"{msg_obj.role.capitalize()} ({formatted_timestamp_str}):"
formatted_content_str = msg_obj.get_formatted_content_for_pdf()
# Add prefix and content as separate paragraphs if you want prefix bold and content in styled box
story.append(Paragraph(f"<b>{prefix_display_str}</b>", styles['Normal']))
story.append(Paragraph(formatted_content_str, active_message_style_for_loop))
# story.append(Spacer(1, 0.05*inch)) # Optional: reduce space between messages
story.append(Spacer(1, 0.5*inch))
story.append(Paragraph("--- End of Report ---", ParagraphStyle(name='EndOfReport', parent=styles['Italic'], alignment=1, spaceBefore=12)))
# --- Build PDF ---
app_logger.debug("PDF Generation: Building document.")
try:
doc.build(story)
buffer.seek(0)
app_logger.info(f"PDF report generated successfully for session ID: {session_id_str}")
except Exception as e_build:
app_logger.error(f"Failed to build PDF document for session ID {session_id_str}: {e_build}", exc_info=True)
# Return an error PDF or an empty buffer
buffer = BytesIO() # Reset buffer to ensure it's clean
error_styles_local = getSampleStyleSheet() # Get fresh styles for error PDF
error_doc_local = SimpleDocTemplate(buffer, pagesize=letter)
error_story_local = [
Paragraph("Error: Could not generate PDF report.", error_styles_local['h1']),
Paragraph(f"An error occurred during PDF construction: {str(e_build)[:500]}", error_styles_local['Normal']), # Limit error message length
Paragraph("Please check application logs for more details or contact support.", error_styles_local['Normal'])
]
try:
error_doc_local.build(error_story_local)
except Exception as e_err_pdf: # If even the error PDF fails
app_logger.error(f"Failed to build even the error PDF: {e_err_pdf}", exc_info=True)
# Buffer will be empty from the reset above
buffer.seek(0)
return buffer