csv-generation-img

Sleeping

App Files Files Community

csv-generation-img / app.py

Nechba

Update app.py

535ccdc verified 3 months ago

raw

history blame contribute delete

5.36 kB

	import streamlit as st
	import pandas as pd
	from io import BytesIO
	import os
	from dotenv import load_dotenv
	from utils import (
	analyze_pdf_directly,
	csv_to_dataframe,
	save_csv,
	get_pdf_metadata,
	extract_csv_from_response,
	pdf_to_images,
	analyze_single_document,
	process_local_pdf,
	analyze_pdf_images_with_gemini
	)
	import base64
	from datetime import datetime
	import tempfile

	# Load environment variables
	load_dotenv()

	# Configure page settings
	st.set_page_config(
	page_title="PDF Document Analyzer",
	page_icon="📄",
	layout="wide",
	initial_sidebar_state="expanded"
	)

	# Custom CSS styling
	st.markdown("""
	<style>
	.document-card {
	border-radius: 10px;
	padding: 1.5rem;
	margin-bottom: 1.5rem;
	background-color: white;
	box-shadow: 0 4px 12px rgba(0,0,0,0.1);
	transition: transform 0.2s;
	}
	.document-card:hover {
	transform: translateY(-2px);
	}
	.stButton>button {
	background-color: #4285F4;
	color: white;
	border-radius: 8px;
	padding: 0.5rem 1.5rem;
	font-weight: 500;
	}
	.analysis-section {
	border-left: 4px solid #4285F4;
	padding-left: 1rem;
	margin-top: 1.5rem;
	}
	</style>
	""", unsafe_allow_html=True)

	# App Header
	st.title("📄 PDF Document Analyzer")
	st.markdown("Upload multiple PDFs to analyze each document directly using Gemini's native PDF processing")



	# Sidebar Configuration
	with st.sidebar:
	# st.header("Configuration")
	# api_key = st.text_input(
	# "Enter Gemini API Key:",
	# type="password",
	# value=os.getenv("GEMINI_API_KEY", "")
	# )

	api_key = os.getenv("GEMINI_API_KEY", "")

	st.markdown("---")
	st.info("""
	Features:
	- PDF processing using images partitioned by page
	- Individual analysis for each document
	- Downloadable CSV reports
	""")

	# Main App Content
	uploaded_files = st.file_uploader(
	"Upload PDF Documents",
	type=["pdf"],
	accept_multiple_files=True,
	help="Upload multiple PDF documents for analysis"
	)

	if uploaded_files and api_key:
	st.success(f"✅ {len(uploaded_files)} PDF(s) ready for analysis")

	# Process each PDF separately
	for i, uploaded_file in enumerate(uploaded_files):
	with st.container():
	st.markdown(f"### 📑 Document {i+1}: {uploaded_file.name}")

	# Display document info
	metadata = get_pdf_metadata(uploaded_file.getvalue())
	col1, col2, col3 = st.columns(3)
	with col1:
	st.metric("Pages", metadata['page_count'])
	with col2:
	st.metric("Size", f"{len(uploaded_file.getvalue()) / 1024:.1f} KB")
	with col3:
	if st.button(f"Analyze Document", key=f"analyze_{i}"):
	with st.spinner(f"Analyzing {uploaded_file.name}..."):
	try:
	# Analyze PDF directly
	# Convert PDF to images

	response_text = analyze_pdf_images_with_gemini(uploaded_file.getvalue())
	df1=extract_csv_from_response(response_text)
	df = csv_to_dataframe(df1)

	# Display results in expandable section
	with st.expander("View Analysis Results", expanded=True):
	if not df.empty:
	st.dataframe(df)
	excel_buffer = BytesIO()
	df.to_excel(excel_buffer, index=False)
	excel_data = excel_buffer.getvalue()

	# Download button
	timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
	csv_filename = f"{uploaded_file.name}_analysis_{timestamp}.xlsx"

	st.download_button(
	label="Download Analysis",
	data=excel_data,
	file_name=csv_filename,
	mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
	key=f"download_{i}"
	)

	else:
	st.warning("No CSV data found in response")
	st.markdown("### Full Response")
	st.write(raw_response)

	except Exception as e:
	st.error(f"Analysis failed: {str(e)}")

	st.markdown("---")

	elif not api_key:
	st.warning("⚠️ Please enter your Gemini API key in the sidebar to proceed")

	elif not uploaded_files:
	st.info("📤 Please upload PDF documents using the file uploader above")