csv-generation-img

Sleeping

File size: 5,360 Bytes

import streamlit as st
import pandas as pd
from io import BytesIO
import os
from dotenv import load_dotenv
from utils import (
    analyze_pdf_directly,
    csv_to_dataframe,
    save_csv,
    get_pdf_metadata,
    extract_csv_from_response,
    pdf_to_images,
    analyze_single_document,
process_local_pdf,
analyze_pdf_images_with_gemini
)
import base64
from datetime import datetime
import tempfile

# Load environment variables
load_dotenv()

# Configure page settings
st.set_page_config(
    page_title="PDF Document Analyzer",
    page_icon="📄",
    layout="wide",
    initial_sidebar_state="expanded"
)

# Custom CSS styling
st.markdown("""
    <style>
    .document-card {
        border-radius: 10px;
        padding: 1.5rem;
        margin-bottom: 1.5rem;
        background-color: white;
        box-shadow: 0 4px 12px rgba(0,0,0,0.1);
        transition: transform 0.2s;
    }
    .document-card:hover {
        transform: translateY(-2px);
    }
    .stButton>button {
        background-color: #4285F4;
        color: white;
        border-radius: 8px;
        padding: 0.5rem 1.5rem;
        font-weight: 500;
    }
    .analysis-section {
        border-left: 4px solid #4285F4;
        padding-left: 1rem;
        margin-top: 1.5rem;
    }
    </style>
""", unsafe_allow_html=True)

# App Header
st.title("📄 PDF Document Analyzer")
st.markdown("Upload multiple PDFs to analyze each document directly using Gemini's native PDF processing")



# Sidebar Configuration
with st.sidebar:
    # st.header("Configuration")
    # api_key = st.text_input(
    #     "Enter Gemini API Key:",
    #     type="password",
    #     value=os.getenv("GEMINI_API_KEY", "")
    # )
    
    api_key = os.getenv("GEMINI_API_KEY", "")
    
    st.markdown("---")
    st.info("""
        **Features:**
        - PDF processing using images partitioned by page
        - Individual analysis for each document
        - Downloadable CSV reports
    """)

# Main App Content
uploaded_files = st.file_uploader(
    "Upload PDF Documents",
    type=["pdf"],
    accept_multiple_files=True,
    help="Upload multiple PDF documents for analysis"
)

if uploaded_files and api_key:
    st.success(f"✅ {len(uploaded_files)} PDF(s) ready for analysis")
    
    # Process each PDF separately
    for i, uploaded_file in enumerate(uploaded_files):
        with st.container():
            st.markdown(f"### 📑 Document {i+1}: {uploaded_file.name}")
            
            # Display document info
            metadata = get_pdf_metadata(uploaded_file.getvalue())
            col1, col2, col3 = st.columns(3)
            with col1:
                st.metric("Pages", metadata['page_count'])
            with col2:
                st.metric("Size", f"{len(uploaded_file.getvalue()) / 1024:.1f} KB")
            with col3:
                if st.button(f"Analyze Document", key=f"analyze_{i}"):
                    with st.spinner(f"Analyzing {uploaded_file.name}..."):
                        try:
                            # Analyze PDF directly
                             # Convert PDF to images
                            
                            response_text = analyze_pdf_images_with_gemini(uploaded_file.getvalue())
                            df1=extract_csv_from_response(response_text)
                            df = csv_to_dataframe(df1)
                            
                            # Display results in expandable section
                            with st.expander("View Analysis Results", expanded=True):
                                if not df.empty:
                                        st.dataframe(df)
                                        excel_buffer = BytesIO()
                                        df.to_excel(excel_buffer, index=False)
                                        excel_data = excel_buffer.getvalue()
                                        
                                        # Download button
                                        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
                                        csv_filename = f"{uploaded_file.name}_analysis_{timestamp}.xlsx"
                                        
                                        st.download_button(
                                            label="Download Analysis",
                                            data=excel_data,
                                            file_name=csv_filename,
                                            mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
                                            key=f"download_{i}"
                                        )
                                   
                                else:
                                    st.warning("No CSV data found in response")
                                    st.markdown("### Full Response")
                                    st.write(raw_response)
                            
                        except Exception as e:
                            st.error(f"Analysis failed: {str(e)}")
            
            st.markdown("---")

elif not api_key:
    st.warning("⚠️ Please enter your Gemini API key in the sidebar to proceed")

elif not uploaded_files:
    st.info("📤 Please upload PDF documents using the file uploader above")