import streamlit as st import pandas as pd from io import BytesIO import os from dotenv import load_dotenv from utils import ( analyze_pdf_directly, csv_to_dataframe, save_csv, get_pdf_metadata, extract_csv_from_response, pdf_to_images, analyze_single_document, process_local_pdf, analyze_pdf_images_with_gemini ) import base64 from datetime import datetime import tempfile # Load environment variables load_dotenv() # Configure page settings st.set_page_config( page_title="PDF Document Analyzer", page_icon="📄", layout="wide", initial_sidebar_state="expanded" ) # Custom CSS styling st.markdown(""" """, unsafe_allow_html=True) # App Header st.title("📄 PDF Document Analyzer") st.markdown("Upload multiple PDFs to analyze each document directly using Gemini's native PDF processing") # Sidebar Configuration with st.sidebar: # st.header("Configuration") # api_key = st.text_input( # "Enter Gemini API Key:", # type="password", # value=os.getenv("GEMINI_API_KEY", "") # ) api_key = os.getenv("GEMINI_API_KEY", "") st.markdown("---") st.info(""" **Features:** - PDF processing using images partitioned by page - Individual analysis for each document - Downloadable CSV reports """) # Main App Content uploaded_files = st.file_uploader( "Upload PDF Documents", type=["pdf"], accept_multiple_files=True, help="Upload multiple PDF documents for analysis" ) if uploaded_files and api_key: st.success(f"✅ {len(uploaded_files)} PDF(s) ready for analysis") # Process each PDF separately for i, uploaded_file in enumerate(uploaded_files): with st.container(): st.markdown(f"### 📑 Document {i+1}: {uploaded_file.name}") # Display document info metadata = get_pdf_metadata(uploaded_file.getvalue()) col1, col2, col3 = st.columns(3) with col1: st.metric("Pages", metadata['page_count']) with col2: st.metric("Size", f"{len(uploaded_file.getvalue()) / 1024:.1f} KB") with col3: if st.button(f"Analyze Document", key=f"analyze_{i}"): with st.spinner(f"Analyzing {uploaded_file.name}..."): try: # Analyze PDF directly # Convert PDF to images response_text = analyze_pdf_images_with_gemini(uploaded_file.getvalue()) df1=extract_csv_from_response(response_text) df = csv_to_dataframe(df1) # Display results in expandable section with st.expander("View Analysis Results", expanded=True): if not df.empty: st.dataframe(df) excel_buffer = BytesIO() df.to_excel(excel_buffer, index=False) excel_data = excel_buffer.getvalue() # Download button timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") csv_filename = f"{uploaded_file.name}_analysis_{timestamp}.xlsx" st.download_button( label="Download Analysis", data=excel_data, file_name=csv_filename, mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", key=f"download_{i}" ) else: st.warning("No CSV data found in response") st.markdown("### Full Response") st.write(raw_response) except Exception as e: st.error(f"Analysis failed: {str(e)}") st.markdown("---") elif not api_key: st.warning("⚠️ Please enter your Gemini API key in the sidebar to proceed") elif not uploaded_files: st.info("📤 Please upload PDF documents using the file uploader above")