Spaces:
Running
Running
File size: 5,360 Bytes
3a55e0a ea39242 3a55e0a 5ea09ca bd1580d 4f8a713 3a55e0a ea39242 348cb0d 3a55e0a 5ea09ca 3a55e0a 5ea09ca 94eeb49 82fd62e 3a55e0a 1e664c0 5ea09ca 5555ebc ea39242 3a55e0a 30582c7 3a55e0a ea39242 3a55e0a ea39242 3a55e0a 41acda9 3a55e0a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 |
import streamlit as st
import pandas as pd
from io import BytesIO
import os
from dotenv import load_dotenv
from utils import (
analyze_pdf_directly,
csv_to_dataframe,
save_csv,
get_pdf_metadata,
extract_csv_from_response,
pdf_to_images,
analyze_single_document,
process_local_pdf,
analyze_pdf_images_with_gemini
)
import base64
from datetime import datetime
import tempfile
# Load environment variables
load_dotenv()
# Configure page settings
st.set_page_config(
page_title="PDF Document Analyzer",
page_icon="📄",
layout="wide",
initial_sidebar_state="expanded"
)
# Custom CSS styling
st.markdown("""
<style>
.document-card {
border-radius: 10px;
padding: 1.5rem;
margin-bottom: 1.5rem;
background-color: white;
box-shadow: 0 4px 12px rgba(0,0,0,0.1);
transition: transform 0.2s;
}
.document-card:hover {
transform: translateY(-2px);
}
.stButton>button {
background-color: #4285F4;
color: white;
border-radius: 8px;
padding: 0.5rem 1.5rem;
font-weight: 500;
}
.analysis-section {
border-left: 4px solid #4285F4;
padding-left: 1rem;
margin-top: 1.5rem;
}
</style>
""", unsafe_allow_html=True)
# App Header
st.title("📄 PDF Document Analyzer")
st.markdown("Upload multiple PDFs to analyze each document directly using Gemini's native PDF processing")
# Sidebar Configuration
with st.sidebar:
# st.header("Configuration")
# api_key = st.text_input(
# "Enter Gemini API Key:",
# type="password",
# value=os.getenv("GEMINI_API_KEY", "")
# )
api_key = os.getenv("GEMINI_API_KEY", "")
st.markdown("---")
st.info("""
**Features:**
- PDF processing using images partitioned by page
- Individual analysis for each document
- Downloadable CSV reports
""")
# Main App Content
uploaded_files = st.file_uploader(
"Upload PDF Documents",
type=["pdf"],
accept_multiple_files=True,
help="Upload multiple PDF documents for analysis"
)
if uploaded_files and api_key:
st.success(f"✅ {len(uploaded_files)} PDF(s) ready for analysis")
# Process each PDF separately
for i, uploaded_file in enumerate(uploaded_files):
with st.container():
st.markdown(f"### 📑 Document {i+1}: {uploaded_file.name}")
# Display document info
metadata = get_pdf_metadata(uploaded_file.getvalue())
col1, col2, col3 = st.columns(3)
with col1:
st.metric("Pages", metadata['page_count'])
with col2:
st.metric("Size", f"{len(uploaded_file.getvalue()) / 1024:.1f} KB")
with col3:
if st.button(f"Analyze Document", key=f"analyze_{i}"):
with st.spinner(f"Analyzing {uploaded_file.name}..."):
try:
# Analyze PDF directly
# Convert PDF to images
response_text = analyze_pdf_images_with_gemini(uploaded_file.getvalue())
df1=extract_csv_from_response(response_text)
df = csv_to_dataframe(df1)
# Display results in expandable section
with st.expander("View Analysis Results", expanded=True):
if not df.empty:
st.dataframe(df)
excel_buffer = BytesIO()
df.to_excel(excel_buffer, index=False)
excel_data = excel_buffer.getvalue()
# Download button
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
csv_filename = f"{uploaded_file.name}_analysis_{timestamp}.xlsx"
st.download_button(
label="Download Analysis",
data=excel_data,
file_name=csv_filename,
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
key=f"download_{i}"
)
else:
st.warning("No CSV data found in response")
st.markdown("### Full Response")
st.write(raw_response)
except Exception as e:
st.error(f"Analysis failed: {str(e)}")
st.markdown("---")
elif not api_key:
st.warning("⚠️ Please enter your Gemini API key in the sidebar to proceed")
elif not uploaded_files:
st.info("📤 Please upload PDF documents using the file uploader above") |