Spaces:
Running
Running
import streamlit as st | |
import pandas as pd | |
from io import BytesIO | |
import os | |
from dotenv import load_dotenv | |
from utils import ( | |
analyze_pdf_directly, | |
csv_to_dataframe, | |
save_csv, | |
get_pdf_metadata, | |
extract_csv_from_response, | |
pdf_to_images, | |
analyze_single_document, | |
process_local_pdf, | |
analyze_pdf_images_with_gemini | |
) | |
import base64 | |
from datetime import datetime | |
import tempfile | |
# Load environment variables | |
load_dotenv() | |
# Configure page settings | |
st.set_page_config( | |
page_title="PDF Document Analyzer", | |
page_icon="π", | |
layout="wide", | |
initial_sidebar_state="expanded" | |
) | |
# Custom CSS styling | |
st.markdown(""" | |
<style> | |
.document-card { | |
border-radius: 10px; | |
padding: 1.5rem; | |
margin-bottom: 1.5rem; | |
background-color: white; | |
box-shadow: 0 4px 12px rgba(0,0,0,0.1); | |
transition: transform 0.2s; | |
} | |
.document-card:hover { | |
transform: translateY(-2px); | |
} | |
.stButton>button { | |
background-color: #4285F4; | |
color: white; | |
border-radius: 8px; | |
padding: 0.5rem 1.5rem; | |
font-weight: 500; | |
} | |
.analysis-section { | |
border-left: 4px solid #4285F4; | |
padding-left: 1rem; | |
margin-top: 1.5rem; | |
} | |
</style> | |
""", unsafe_allow_html=True) | |
# App Header | |
st.title("π PDF Document Analyzer") | |
st.markdown("Upload multiple PDFs to analyze each document directly using Gemini's native PDF processing") | |
# Sidebar Configuration | |
with st.sidebar: | |
# st.header("Configuration") | |
# api_key = st.text_input( | |
# "Enter Gemini API Key:", | |
# type="password", | |
# value=os.getenv("GEMINI_API_KEY", "") | |
# ) | |
api_key = os.getenv("GEMINI_API_KEY", "") | |
st.markdown("---") | |
st.info(""" | |
**Features:** | |
- PDF processing using images partitioned by page | |
- Individual analysis for each document | |
- Downloadable CSV reports | |
""") | |
# Main App Content | |
uploaded_files = st.file_uploader( | |
"Upload PDF Documents", | |
type=["pdf"], | |
accept_multiple_files=True, | |
help="Upload multiple PDF documents for analysis" | |
) | |
if uploaded_files and api_key: | |
st.success(f"β {len(uploaded_files)} PDF(s) ready for analysis") | |
# Process each PDF separately | |
for i, uploaded_file in enumerate(uploaded_files): | |
with st.container(): | |
st.markdown(f"### π Document {i+1}: {uploaded_file.name}") | |
# Display document info | |
metadata = get_pdf_metadata(uploaded_file.getvalue()) | |
col1, col2, col3 = st.columns(3) | |
with col1: | |
st.metric("Pages", metadata['page_count']) | |
with col2: | |
st.metric("Size", f"{len(uploaded_file.getvalue()) / 1024:.1f} KB") | |
with col3: | |
if st.button(f"Analyze Document", key=f"analyze_{i}"): | |
with st.spinner(f"Analyzing {uploaded_file.name}..."): | |
try: | |
# Analyze PDF directly | |
# Convert PDF to images | |
response_text = analyze_pdf_images_with_gemini(uploaded_file.getvalue()) | |
df1=extract_csv_from_response(response_text) | |
df = csv_to_dataframe(df1) | |
# Display results in expandable section | |
with st.expander("View Analysis Results", expanded=True): | |
if not df.empty: | |
st.dataframe(df) | |
excel_buffer = BytesIO() | |
df.to_excel(excel_buffer, index=False) | |
excel_data = excel_buffer.getvalue() | |
# Download button | |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") | |
csv_filename = f"{uploaded_file.name}_analysis_{timestamp}.xlsx" | |
st.download_button( | |
label="Download Analysis", | |
data=excel_data, | |
file_name=csv_filename, | |
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", | |
key=f"download_{i}" | |
) | |
else: | |
st.warning("No CSV data found in response") | |
st.markdown("### Full Response") | |
st.write(raw_response) | |
except Exception as e: | |
st.error(f"Analysis failed: {str(e)}") | |
st.markdown("---") | |
elif not api_key: | |
st.warning("β οΈ Please enter your Gemini API key in the sidebar to proceed") | |
elif not uploaded_files: | |
st.info("π€ Please upload PDF documents using the file uploader above") |