Spaces:
Sleeping
Sleeping
import streamlit as st | |
import os | |
from dotenv import load_dotenv | |
from utils import ( | |
configure_gemini, | |
analyze_pdf_directly, | |
csv_to_dataframe, | |
save_csv, | |
get_pdf_metadata, | |
extract_csv_from_response | |
) | |
import base64 | |
from datetime import datetime | |
import tempfile | |
# Load environment variables | |
load_dotenv() | |
# Configure page settings | |
st.set_page_config( | |
page_title="PDF Document Analyzer", | |
page_icon="π", | |
layout="wide", | |
initial_sidebar_state="expanded" | |
) | |
# Custom CSS styling | |
st.markdown(""" | |
<style> | |
.document-card { | |
border-radius: 10px; | |
padding: 1.5rem; | |
margin-bottom: 1.5rem; | |
background-color: white; | |
box-shadow: 0 4px 12px rgba(0,0,0,0.1); | |
transition: transform 0.2s; | |
} | |
.document-card:hover { | |
transform: translateY(-2px); | |
} | |
.stButton>button { | |
background-color: #4285F4; | |
color: white; | |
border-radius: 8px; | |
padding: 0.5rem 1.5rem; | |
font-weight: 500; | |
} | |
.analysis-section { | |
border-left: 4px solid #4285F4; | |
padding-left: 1rem; | |
margin-top: 1.5rem; | |
} | |
</style> | |
""", unsafe_allow_html=True) | |
# App Header | |
st.title("π PDF Document Analyzer") | |
st.markdown("Upload multiple PDFs to analyze each document directly using Gemini's native PDF processing") | |
# Load prompt | |
PROMPT ="""Please analyze the provided images of the real estate document set and perform the following actions: | |
1. *Identify Parties:* Determine and list Seller 1, Seller 2 (if applicable), Buyer 1, and Buyer 2. | |
2. *Identify Missing Items:* Locate and list all instances of missing signatures and missing initials for all parties across all documents. | |
3. *Identify Checked Boxes:* Locate and list all checkboxes that have been marked or checked. | |
4. *Generate Secondary Questions:* For checkboxes that indicate significant waivers (e.g., home warranty, inspection rights, lead paint assessment), specific conditions (e.g., cash sale, contingency status), potential conflicts, or reference other documents, formulate a relevant 'Secondary Question' designed to prompt confirmation or clarification from the user/parties involved. | |
5. *Check for Required Paperwork:* Based only on the checkboxes identified in step 3 that explicitly state or strongly imply a specific addendum or disclosure document should be attached (e.g., "Lead Based Paint Disclosure Addendum attached", "See Counter Offer Addendum", "Seller's Disclosure...Addendum attached", "Retainer Addendum attached", etc.), check if a document matching that description appears to be present within the provided image set. Note whether this implied paperwork is 'Found', 'Missing', or 'Potentially Missing/Ambiguous' within the provided images. | |
6. *Identify Conflicts:* Specifically look for and note any directly contradictory information or conflicting checked boxes (like the conflicting inspection clauses found previously). | |
7. *Provide Location:* For every identified item (missing signature/initial, checked box, required paperwork status, party identification, conflict), specify the approximate line number(s) or clear location on the page (e.g., Bottom Right Initials, Seller Signature Block). | |
8. *Format Output:* Present all findings comprehensively in CSV format. The CSV columns should be: | |
* Category (e.g., Parties, Missing Item, Checked Box, Required Paperwork, Conflict) | |
* Location (Document Name/Page, e.g., Sale Contract Pg 2) | |
* Line Item(s) (Approximate line number or location description) | |
* Item Type (e.g., Seller Initials, Home Warranty Waiver, Lead Paint Addendum Check, Lead Paint Addendum Document) | |
* Status (e.g., Identified, Missing, Checked, Found, Potentially Missing, Conflict Detected) | |
* Details (Specifics like names, text of the checkbox, description of the issue or document status) | |
* Secondary Question (if applicable) (The question generated in step 4) | |
Please apply this analysis to the entire set of documents provided. | |
""" | |
# Sidebar Configuration | |
with st.sidebar: | |
api_key = os.getenv("GEMINI_API_KEY", "") | |
if api_key: | |
configure_gemini(api_key) | |
st.markdown("---") | |
st.info(""" | |
**Features:** | |
- Direct PDF processing | |
- Individual analysis for each document | |
- Downloadable CSV reports | |
""") | |
# Main App Content | |
uploaded_files = st.file_uploader( | |
"Upload PDF Documents", | |
type=["pdf"], | |
accept_multiple_files=True, | |
help="Upload multiple PDF documents for analysis" | |
) | |
if uploaded_files and api_key: | |
st.success(f"β {len(uploaded_files)} PDF(s) ready for analysis") | |
# Process each PDF separately | |
for i, uploaded_file in enumerate(uploaded_files): | |
with st.container(): | |
st.markdown(f"### π Document {i+1}: {uploaded_file.name}") | |
# Display document info | |
metadata = get_pdf_metadata(uploaded_file.getvalue()) | |
col1, col2, col3 = st.columns(3) | |
with col1: | |
st.metric("Pages", metadata['page_count']) | |
with col2: | |
st.metric("Size", f"{len(uploaded_file.getvalue()) / 1024:.1f} KB") | |
with col3: | |
if st.button(f"Analyze Document", key=f"analyze_{i}"): | |
with st.spinner(f"Analyzing {uploaded_file.name}..."): | |
try: | |
# Analyze PDF directly | |
raw_response = analyze_pdf_directly( | |
pdf_bytes=uploaded_file.getvalue(), | |
prompt=PROMPT, | |
model_name="gemini-2.5-pro-exp-03-25" # or "gemini-1.5-flash" | |
) | |
# Process response | |
csv_data = extract_csv_from_response(raw_response) | |
# Display results in expandable section | |
with st.expander("View Analysis Results", expanded=True): | |
if csv_data: | |
df = csv_to_dataframe(csv_data) | |
if not df.empty: | |
st.dataframe(df, use_container_width=True) | |
# Download button | |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") | |
csv_filename = f"{uploaded_file.name}_analysis_{timestamp}.csv" | |
st.download_button( | |
label="Download Analysis", | |
data=csv_data, | |
file_name=csv_filename, | |
mime="text/csv", | |
key=f"download_{i}" | |
) | |
else: | |
st.warning("No tabular data found in response") | |
st.markdown("### Full Response") | |
st.write(raw_response) | |
else: | |
st.warning("No CSV data found in response") | |
st.markdown("### Full Response") | |
st.write(raw_response) | |
except Exception as e: | |
st.error(f"Analysis failed: {str(e)}") | |
st.markdown("---") | |
elif not api_key: | |
st.warning("β οΈ Please enter your Gemini API key in the sidebar to proceed") | |
elif not uploaded_files: | |
st.info("π€ Please upload PDF documents using the file uploader above") |