Nechba's picture
Update app.py
535ccdc verified
import streamlit as st
import pandas as pd
from io import BytesIO
import os
from dotenv import load_dotenv
from utils import (
analyze_pdf_directly,
csv_to_dataframe,
save_csv,
get_pdf_metadata,
extract_csv_from_response,
pdf_to_images,
analyze_single_document,
process_local_pdf,
analyze_pdf_images_with_gemini
)
import base64
from datetime import datetime
import tempfile
# Load environment variables
load_dotenv()
# Configure page settings
st.set_page_config(
page_title="PDF Document Analyzer",
page_icon="πŸ“„",
layout="wide",
initial_sidebar_state="expanded"
)
# Custom CSS styling
st.markdown("""
<style>
.document-card {
border-radius: 10px;
padding: 1.5rem;
margin-bottom: 1.5rem;
background-color: white;
box-shadow: 0 4px 12px rgba(0,0,0,0.1);
transition: transform 0.2s;
}
.document-card:hover {
transform: translateY(-2px);
}
.stButton>button {
background-color: #4285F4;
color: white;
border-radius: 8px;
padding: 0.5rem 1.5rem;
font-weight: 500;
}
.analysis-section {
border-left: 4px solid #4285F4;
padding-left: 1rem;
margin-top: 1.5rem;
}
</style>
""", unsafe_allow_html=True)
# App Header
st.title("πŸ“„ PDF Document Analyzer")
st.markdown("Upload multiple PDFs to analyze each document directly using Gemini's native PDF processing")
# Sidebar Configuration
with st.sidebar:
# st.header("Configuration")
# api_key = st.text_input(
# "Enter Gemini API Key:",
# type="password",
# value=os.getenv("GEMINI_API_KEY", "")
# )
api_key = os.getenv("GEMINI_API_KEY", "")
st.markdown("---")
st.info("""
**Features:**
- PDF processing using images partitioned by page
- Individual analysis for each document
- Downloadable CSV reports
""")
# Main App Content
uploaded_files = st.file_uploader(
"Upload PDF Documents",
type=["pdf"],
accept_multiple_files=True,
help="Upload multiple PDF documents for analysis"
)
if uploaded_files and api_key:
st.success(f"βœ… {len(uploaded_files)} PDF(s) ready for analysis")
# Process each PDF separately
for i, uploaded_file in enumerate(uploaded_files):
with st.container():
st.markdown(f"### πŸ“‘ Document {i+1}: {uploaded_file.name}")
# Display document info
metadata = get_pdf_metadata(uploaded_file.getvalue())
col1, col2, col3 = st.columns(3)
with col1:
st.metric("Pages", metadata['page_count'])
with col2:
st.metric("Size", f"{len(uploaded_file.getvalue()) / 1024:.1f} KB")
with col3:
if st.button(f"Analyze Document", key=f"analyze_{i}"):
with st.spinner(f"Analyzing {uploaded_file.name}..."):
try:
# Analyze PDF directly
# Convert PDF to images
response_text = analyze_pdf_images_with_gemini(uploaded_file.getvalue())
df1=extract_csv_from_response(response_text)
df = csv_to_dataframe(df1)
# Display results in expandable section
with st.expander("View Analysis Results", expanded=True):
if not df.empty:
st.dataframe(df)
excel_buffer = BytesIO()
df.to_excel(excel_buffer, index=False)
excel_data = excel_buffer.getvalue()
# Download button
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
csv_filename = f"{uploaded_file.name}_analysis_{timestamp}.xlsx"
st.download_button(
label="Download Analysis",
data=excel_data,
file_name=csv_filename,
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
key=f"download_{i}"
)
else:
st.warning("No CSV data found in response")
st.markdown("### Full Response")
st.write(raw_response)
except Exception as e:
st.error(f"Analysis failed: {str(e)}")
st.markdown("---")
elif not api_key:
st.warning("⚠️ Please enter your Gemini API key in the sidebar to proceed")
elif not uploaded_files:
st.info("πŸ“€ Please upload PDF documents using the file uploader above")