Spaces:

traversaal-internal
/

Transform-PDF-Tables-to-HTML-and-Excel

Running

App Files Files Community

AreejMehboob commited on 3 days ago

Commit

b9e9522

verified ·

1 Parent(s): 49d41a8

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +135 -179

src/streamlit_app.py CHANGED Viewed

@@ -220,31 +220,16 @@ if 'demo_results' not in st.session_state:
 if 'demo_selected_methods' not in st.session_state:
     st.session_state.demo_selected_methods = {'docling': True, 'llamaparse': False, 'unstructured': False}
-# Get the current directory (src) and set output path
-CURRENT_DIR = Path(__file__).parent
-OUTPUT_BASE_PATH = CURRENT_DIR / "output"
-# Create output directory if it doesn't exist
-OUTPUT_BASE_PATH.mkdir(exist_ok=True)
-def check_existing_results():
-    """Check if there are existing results in the output directory"""
-    existing_methods = []
-    for method in ['docling', 'llamaparse', 'unstructured']:
-        method_dir = OUTPUT_BASE_PATH / method
-        if method_dir.exists():
-            # Check for HTML files
-            html_files = list(method_dir.glob("**/*.html"))
-            if html_files:
-                existing_methods.append(method)
-    return existing_methods
 def show_home_page():
-    # Check for existing results
-    existing_methods = check_existing_results()
     # Header
     st.markdown("""
     <div class="main-header">
@@ -255,44 +240,19 @@ def show_home_page():
     </div>
     """, unsafe_allow_html=True)
-    # Show existing results notification if any
-    if existing_methods:
-        st.info(f"📁 Found existing results from: {', '.join([m.title() for m in existing_methods])}. Click 'View Results' to see them.")
     # Main buttons
     col1, col2, col3 = st.columns([1, 2, 1])
     with col2:
-        if existing_methods:
-            # Show three buttons if results exist
-            col_btn1, col_btn2, col_btn3 = st.columns(3)
-            with col_btn1:
-                if st.button("📄 Upload PDF", key="upload_btn", help="Upload your own PDF document"):
-                    st.session_state.page = 'upload'
-                    st.rerun()
-            with col_btn2:
-                if st.button("⚡ Try Demo", key="demo_btn", help="Try with Tesla's 10K form"):
-                    st.session_state.page = 'demo_setup'
-                    st.rerun()
-            with col_btn3:
-                if st.button("👁️ View Results", key="view_results_btn", help="View existing results"):
-                    st.session_state.page = 'demo'
-                    st.session_state.processing = False
-                    st.session_state.demo_selected_methods = {method: method in existing_methods for method in ['docling', 'llamaparse', 'unstructured']}
-                    st.rerun()
-        else:
-            # Show two buttons if no results exist
-            col_btn1, col_btn2 = st.columns(2)
-            with col_btn1:
-                if st.button("📄 Upload PDF Document", key="upload_btn", help="Upload your own PDF document"):
-                    st.session_state.page = 'upload'
-                    st.rerun()
-            with col_btn2:
-                if st.button("⚡ Try Tesla 10K Demo", key="demo_btn", help="Try with Tesla's 10K form"):
-                    st.session_state.page = 'demo_setup'
-                    st.rerun()
     # Features section
     st.markdown("---")
@@ -336,14 +296,14 @@ def show_upload_page():
     st.markdown("**Or specify file path:**")
     input_file_path = st.text_input(
         "Input File Path",
-        placeholder="path/to/your/document.pdf",
-        help="Enter the path to your PDF file"
     )
     # Output directory with show/hide functionality
     output_dir = st.text_input(
         "Output Directory",
-        value=str(OUTPUT_BASE_PATH),
         help="Directory where extracted tables will be saved",
         type="password" if not st.session_state.show_output_dir else "default"
     )
@@ -383,32 +343,25 @@ def show_demo_setup_page():
     st.markdown("## ⚡ Tesla 10K Demo Setup")
     st.markdown("*Configure extraction methods for Tesla's 10K document processing*")
-    # Check for existing results
-    existing_methods = check_existing_results()
     # Document info
     st.markdown("### 📄 Document Information")
-    if existing_methods:
-        st.success(f"**Found existing results from:** {', '.join([m.title() for m in existing_methods])}")
-        st.info("**Note:** You can view existing results or process with different methods")
-    else:
-        st.info("**Document:** Tesla 10K form - Financial tables extraction demo")
-    # Extraction method selection
     st.markdown("### 🔧 Select Extraction Methods")
     col1, col2, col3 = st.columns(3)
     with col1:
         docling = st.checkbox("Docling",
-                             value=st.session_state.demo_selected_methods.get('docling', True),
                              help="Advanced document processing")
     with col2:
         llamaparse = st.checkbox("LlamaParse",
-                                value=st.session_state.demo_selected_methods.get('llamaparse', False),
                                 help="AI-powered parsing")
     with col3:
         unstructured = st.checkbox("Unstructured",
-                                  value=st.session_state.demo_selected_methods.get('unstructured', False),
                                   help="General purpose extraction")
     # Update session state
@@ -421,33 +374,13 @@ def show_demo_setup_page():
     # Process button
     col1, col2 = st.columns([2, 1])
     with col1:
-        if existing_methods:
-            # Show two buttons if results exist
-            col_btn1, col_btn2 = st.columns(2)
-            with col_btn1:
-                if st.button("👁️ View Existing Results", type="secondary"):
-                    st.session_state.page = 'demo'
-                    st.session_state.processing = False
-                    st.session_state.demo_selected_methods = {method: method in existing_methods for method in ['docling', 'llamaparse', 'unstructured']}
-                    st.rerun()
-            with col_btn2:
-                if st.button("🚀 Process New", type="primary"):
-                    if docling or llamaparse or unstructured:
-                        st.session_state.page = 'demo'
-                        st.session_state.processing = True
-                        st.rerun()
-                    else:
-                        st.error("Please select at least one extraction method.")
-        else:
-            # Show single process button if no results exist
-            if st.button("🚀 Process Tesla Document", type="primary"):
-                if docling or llamaparse or unstructured:
-                    st.session_state.page = 'demo'
-                    st.session_state.processing = True
-                    st.rerun()
-                else:
-                    st.error("Please select at least one extraction method.")
     with col2:
         if st.button("← Back to Home"):
@@ -500,7 +433,7 @@ def show_processing_demo():
         method_status.markdown(f"**Overall Progress:** {int(progress * 100)}% | **Current Method:** {current_method.title()}")
-        time.sleep(0.1)  # Reduced sleep time for faster demo
     # Show completion
     st.markdown("""
@@ -514,18 +447,37 @@ def show_processing_demo():
     process_tesla_demo()
     st.session_state.processing = False
-    time.sleep(1)
     st.rerun()
 def process_tesla_demo():
     """Process Tesla demo document using selected extraction methods"""
     try:
         # For demo purposes, simulate successful processing for selected methods only
         results = {}
-        selected_methods = [method for method, selected in st.session_state.demo_selected_methods.items() if selected]
-        for method in selected_methods:
-            results[method] = {'status': 'success', 'total_tables': 3 + hash(method) % 3}  # Simulate different table counts
         st.session_state.demo_results = {'results': results}
@@ -534,29 +486,31 @@ def process_tesla_demo():
 def count_html_files(directory):
     """Count only HTML files in directory"""
-    if not directory.exists():
         return 0
-    html_files = list(directory.glob("**/*.html"))
     return len(html_files)
 def get_excel_files(directory):
     """Get all Excel files from directory"""
-    if not directory.exists():
         return []
-    excel_files = []
-    for ext in ['*.xlsx', '*.xls', '*.csv']:
-        excel_files.extend(directory.glob(f"**/{ext}"))
     return excel_files
 def get_file_info(file_path):
     """Get file information including size and modification time"""
-    if not file_path.exists():
         return {"size": 0, "modified": "Unknown"}
-    stat = file_path.stat()
     size_kb = stat.st_size / 1024
     modified = datetime.fromtimestamp(stat.st_mtime)
@@ -568,18 +522,13 @@ def get_file_info(file_path):
 def show_demo_results():
     st.markdown("## 📊 Tesla 10K Processing Results")
-    # Check for existing results
-    existing_methods = check_existing_results()
     # Document info
     col1, col2 = st.columns([2, 1])
     with col1:
-        st.markdown("### 📄 Tesla 10K Document")
         st.markdown("**Status:** ✅ Complete")
-        if existing_methods:
-            st.markdown(f"**Available results:** {', '.join([m.title() for m in existing_methods])}")
-        else:
-            st.warning("No results found in output directory")
     with col2:
         if st.button("🔄 Reset"):
@@ -591,41 +540,42 @@ def show_demo_results():
             st.session_state.demo_selected_methods = {'docling': True, 'llamaparse': False, 'unstructured': False}
             st.rerun()
-    # Method selection tabs - only show available methods
-    available_methods = existing_methods
-    if available_methods:
-        if len(available_methods) > 1:
-            st.markdown("### 🔧 Select Extraction Method to View")
-            method_labels = {
-                'docling': '🔧 Docling',
-                'llamaparse': '🦙 LlamaParse',
-                'unstructured': '📊 Unstructured'
-            }
-            # Create columns based on number of available methods
-            cols = st.columns(len(available_methods))
-            for i, method in enumerate(available_methods):
-                with cols[i]:
-                    # Show HTML file count for each method
-                    method_output_dir = OUTPUT_BASE_PATH / method
-                    html_count = count_html_files(method_output_dir)
-                    button_label = f"{method_labels[method]} ({html_count} HTML files)"
-                    if st.button(button_label, key=f"tab_{method}", use_container_width=True):
-                        st.session_state.selected_method = method
-        # Default to first available method if no method selected
-        if st.session_state.selected_method is None or st.session_state.selected_method not in available_methods:
-            st.session_state.selected_method = available_methods[0]
-        # Show results for selected method
-        if st.session_state.selected_method:
-            show_method_results(st.session_state.selected_method)
-    else:
-        st.info("No results found. Please process a document first.")
 def show_method_results(method):
     st.markdown(f"### 📋 Results from {method.title()}")
@@ -647,18 +597,20 @@ def show_html_tables(method):
     # Get actual HTML files from directory
     html_files = []
-    if method_output_dir.exists():
-        html_files = list(method_output_dir.glob("**/*.html"))
-    # Sort files by table number if possible
     import re
     def extract_table_number(filename):
-        match = re.search(r"table[_-](\d+)", filename.name, re.IGNORECASE)
         if match:
             return int(match.group(1))
-        return float('inf')
-    html_files.sort(key=extract_table_number)
     if html_files:
         st.markdown(f"**Found {len(html_files)} HTML table(s):**")
@@ -670,7 +622,7 @@ def show_html_tables(method):
             st.markdown(f"""
             <div class="table-header">
                 <h4 style="color: #495057;">📋 Table {i+1}</h4>
-                <small style="color: #6c757d;">File: {html_file.name}</small>
             </div>
             """, unsafe_allow_html=True)
@@ -721,7 +673,7 @@ def show_excel_files(method):
         for i, excel_file in enumerate(excel_files):
             # Get file info
             file_info = get_file_info(excel_file)
-            file_name = excel_file.name
             # File info card
             st.markdown(f"""
@@ -736,35 +688,40 @@ def show_excel_files(method):
             # Try to read and display Excel file preview
             try:
-                if excel_file.suffix.lower() in ['.xlsx', '.xls']:
-                    df = pd.read_excel(excel_file)
-                else:
-                    df = pd.read_csv(excel_file)
                 if not df.empty:
                     st.markdown(f"**Preview (first 5 rows):**")
                     st.dataframe(df.head(), use_container_width=True)
                     st.markdown(f"**Dimensions:** {df.shape[0]} × {df.shape[1]}")
                 else:
-                    st.info("File is empty")
             except Exception as e:
-                st.warning(f"Could not preview file: {e}")
             # Download button for Excel file
             try:
                 with open(excel_file, 'rb') as f:
-                    file_data = f.read()
                 st.download_button(
                     label=f"⬇️ Download",
-                    data=file_data,
                     file_name=file_name,
                     mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
                     key=f"download_excel_{method}_{i}",
                     use_container_width=True
                 )
             except Exception as e:
-                st.error(f"Error reading file for download: {e}")
             if i < len(excel_files) - 1:
                 st.markdown("---")
@@ -813,10 +770,11 @@ def process_document(file_path, output_dir, docling, llamaparse, unstructured):
                         st.json(method_result)
                         # List files in output directory
-                        method_dir = Path(output_dir) / selected_method
                         # HTML files
-                        html_files = list(method_dir.glob("**/*.html"))
                         # Excel files
                         excel_files = get_excel_files(method_dir)
@@ -827,12 +785,12 @@ def process_document(file_path, output_dir, docling, llamaparse, unstructured):
                             if html_files:
                                 st.markdown("**HTML Files:**")
                                 for html_file in html_files:
-                                    st.markdown(f"- {html_file.name}")
                             if excel_files:
                                 st.markdown("**Excel Files:**")
                                 for excel_file in excel_files:
-                                    st.markdown(f"- {excel_file.name}")
                 else:
                     st.warning("No successful extractions found.")
@@ -858,9 +816,7 @@ def main():
                 st.rerun()
         with nav_col2:
             st.button("History", use_container_width=True)
     st.markdown("---")
     # Route to appropriate page
     if st.session_state.page == 'home':
         show_home_page()

 if 'demo_selected_methods' not in st.session_state:
     st.session_state.demo_selected_methods = {'docling': True, 'llamaparse': False, 'unstructured': False}
+# Get the directory where the script is located (src)
+SCRIPT_DIR = Path(__file__).parent
+# Tesla demo document path (assuming it's in the src directory or adjust as needed)
+TESLA_DOC_PATH = SCRIPT_DIR / "tesla_docs_28-41 (1)-9-14.pdf"
+# Output directory is src/output
+OUTPUT_BASE_PATH = SCRIPT_DIR / "output"
 def show_home_page():
     # Header
     st.markdown("""
     <div class="main-header">
     </div>
     """, unsafe_allow_html=True)
     # Main buttons
     col1, col2, col3 = st.columns([1, 2, 1])
     with col2:
+        col_btn1, col_btn2 = st.columns(2)
+        with col_btn1:
+            if st.button("📄 Upload PDF Document", key="upload_btn", help="Upload your own PDF document"):
+                st.session_state.page = 'upload'
+                st.rerun()
+        with col_btn2:
+            if st.button("⚡ Try Tesla 10K Demo", key="demo_btn", help="Try with Tesla's 10K form"):
+                st.session_state.page = 'demo_setup'
+                st.rerun()
     # Features section
     st.markdown("---")
     st.markdown("**Or specify file path:**")
     input_file_path = st.text_input(
         "Input File Path",
+        placeholder="C:\\path\\to\\your\\document.pdf",
+        help="Enter the full path to your PDF file"
     )
     # Output directory with show/hide functionality
     output_dir = st.text_input(
         "Output Directory",
+        placeholder="C:\\path\\to\\output\\folder",
         help="Directory where extracted tables will be saved",
         type="password" if not st.session_state.show_output_dir else "default"
     )
     st.markdown("## ⚡ Tesla 10K Demo Setup")
     st.markdown("*Configure extraction methods for Tesla's 10K document processing*")
     # Document info
     st.markdown("### 📄 Document Information")
+    st.info("**Document:** tesla_docs_28-41 (1)-9-14.pdf")
+    # Extraction method selection (removed output directory section completely)
     st.markdown("### 🔧 Select Extraction Methods")
     col1, col2, col3 = st.columns(3)
     with col1:
         docling = st.checkbox("Docling",
+                             value=st.session_state.demo_selected_methods['docling'],
                              help="Advanced document processing")
     with col2:
         llamaparse = st.checkbox("LlamaParse",
+                                value=st.session_state.demo_selected_methods['llamaparse'],
                                 help="AI-powered parsing")
     with col3:
         unstructured = st.checkbox("Unstructured",
+                                  value=st.session_state.demo_selected_methods['unstructured'],
                                   help="General purpose extraction")
     # Update session state
     # Process button
     col1, col2 = st.columns([2, 1])
     with col1:
+        if st.button("🚀 Process Tesla Document", type="primary"):
+            if docling or llamaparse or unstructured:
+                st.session_state.page = 'demo'
+                st.session_state.processing = True
+                st.rerun()
+            else:
+                st.error("Please select at least one extraction method.")
     with col2:
         if st.button("← Back to Home"):
         method_status.markdown(f"**Overall Progress:** {int(progress * 100)}% | **Current Method:** {current_method.title()}")
+        time.sleep(0.33)
     # Show completion
     st.markdown("""
     process_tesla_demo()
     st.session_state.processing = False
+    time.sleep(2)
     st.rerun()
 def process_tesla_demo():
     """Process Tesla demo document using selected extraction methods"""
     try:
+        # Create output directory for demo (using the base path)
+        demo_output_dir = OUTPUT_BASE_PATH / "tesla_demo"
+        # Prepare the request data for selected methods only
+        data = {
+            'input_file_path': str(TESLA_DOC_PATH),
+            'output_dir': str(demo_output_dir),
+            'docling': st.session_state.demo_selected_methods['docling'],
+            'llamaparse': st.session_state.demo_selected_methods['llamaparse'],
+            'unstructured': st.session_state.demo_selected_methods['unstructured']
+        }
+        # Make request to FastAPI endpoint (uncomment when ready)
+        # response = requests.post('http://localhost:8000/extract', data=data)
+        # if response.status_code == 200:
+        #     st.session_state.demo_results = response.json()
         # For demo purposes, simulate successful processing for selected methods only
         results = {}
+        if st.session_state.demo_selected_methods['docling']:
+            results['docling'] = {'status': 'success', 'total_tables': 5}
+        if st.session_state.demo_selected_methods['llamaparse']:
+            results['llamaparse'] = {'status': 'success', 'total_tables': 3}
+        if st.session_state.demo_selected_methods['unstructured']:
+            results['unstructured'] = {'status': 'success', 'total_tables': 4}
         st.session_state.demo_results = {'results': results}
 def count_html_files(directory):
     """Count only HTML files in directory"""
+    if not os.path.exists(directory):
         return 0
+    html_files = glob.glob(os.path.join(str(directory), "*.html"))
+    html_files.extend(glob.glob(os.path.join(str(directory), "**", "*.html"), recursive=True))
     return len(html_files)
 def get_excel_files(directory):
     """Get all Excel files from directory"""
+    if not os.path.exists(directory):
         return []
+    excel_files = glob.glob(os.path.join(str(directory), "*.xlsx"))
+    excel_files.extend(glob.glob(os.path.join(str(directory), "*.xls")))
+    excel_files.extend(glob.glob(os.path.join(str(directory), "*.csv")))
+    excel_files.extend(glob.glob(os.path.join(str(directory), "**", "*.xlsx"), recursive=True))
+    excel_files.extend(glob.glob(os.path.join(str(directory), "**", "*.xls"), recursive=True))
     return excel_files
 def get_file_info(file_path):
     """Get file information including size and modification time"""
+    if not os.path.exists(file_path):
         return {"size": 0, "modified": "Unknown"}
+    stat = os.stat(file_path)
     size_kb = stat.st_size / 1024
     modified = datetime.fromtimestamp(stat.st_mtime)
 def show_demo_results():
     st.markdown("## 📊 Tesla 10K Processing Results")
     # Document info
     col1, col2 = st.columns([2, 1])
     with col1:
+        st.markdown("### 📄 tesla_docs_28-41 (1)-9-14.pdf")
         st.markdown("**Status:** ✅ Complete")
+        processed_methods = [method.title() for method, selected in st.session_state.demo_selected_methods.items() if selected]
+        st.markdown(f"**Processed with:** {', '.join(processed_methods)}")
     with col2:
         if st.button("🔄 Reset"):
             st.session_state.demo_selected_methods = {'docling': True, 'llamaparse': False, 'unstructured': False}
             st.rerun()
+    # Method selection tabs - only show selected methods
+    available_methods = [method for method, selected in st.session_state.demo_selected_methods.items() if selected]
+    if len(available_methods) > 1:
+        st.markdown("### 🔧 Select Extraction Method to View")
+        method_labels = {
+            'docling': '🔧 Docling',
+            'llamaparse': '🦙 LlamaParse',
+            'unstructured': '📊 Unstructured'
+        }
+        # Create columns based on number of available methods
+        cols = st.columns(len(available_methods))
+        for i, method in enumerate(available_methods):
+            with cols[i]:
+                # Show HTML file count for each method using the same logic as show_html_tables
+                method_output_dir = OUTPUT_BASE_PATH / method
+                html_files = []
+                if os.path.exists(method_output_dir):
+                    html_files = glob.glob(os.path.join(str(method_output_dir), "**", "*.html"), recursive=True)
+                    html_files = list(set(html_files))
+                html_count = len(html_files)
+                button_label = f"{method_labels[method]} ({html_count} HTML files)"
+                if st.button(button_label, key=f"tab_{method}", use_container_width=True):
+                    st.session_state.selected_method = method
+    # Default to first available method if no method selected
+    if st.session_state.selected_method is None or st.session_state.selected_method not in available_methods:
+        st.session_state.selected_method = available_methods[0] if available_methods else None
+    # Show results for selected method
+    if st.session_state.selected_method:
+        show_method_results(st.session_state.selected_method)
 def show_method_results(method):
     st.markdown(f"### 📋 Results from {method.title()}")
     # Get actual HTML files from directory
     html_files = []
+    if os.path.exists(method_output_dir):
+        # Use only the recursive glob, which includes the top-level directory
+        html_files = glob.glob(os.path.join(str(method_output_dir), "**", "*.html"), recursive=True)
+        # Remove duplicates just in case
+        html_files = list(set(html_files))
+    # Sort files by table number if possible (e.g., table_1, table_2, ...)
     import re
     def extract_table_number(filename):
+        match = re.search(r"table[_-](\d+)", filename, re.IGNORECASE)
         if match:
             return int(match.group(1))
+        return float('inf')  # Put files without a number at the end
+    html_files.sort(key=lambda f: extract_table_number(os.path.basename(f)))
     if html_files:
         st.markdown(f"**Found {len(html_files)} HTML table(s):**")
             st.markdown(f"""
             <div class="table-header">
                 <h4 style="color: #495057;">📋 Table {i+1}</h4>
+                <small style="color: #6c757d;">File: {os.path.basename(html_file)}</small>
             </div>
             """, unsafe_allow_html=True)
         for i, excel_file in enumerate(excel_files):
             # Get file info
             file_info = get_file_info(excel_file)
+            file_name = os.path.basename(excel_file)
             # File info card
             st.markdown(f"""
             # Try to read and display Excel file preview
             try:
+                df = pd.read_excel(excel_file)
                 if not df.empty:
                     st.markdown(f"**Preview (first 5 rows):**")
                     st.dataframe(df.head(), use_container_width=True)
                     st.markdown(f"**Dimensions:** {df.shape[0]} × {df.shape[1]}")
                 else:
+                    st.info("Excel file is empty")
             except Exception as e:
+                # Try reading as CSV if Excel reading fails
+                try:
+                    df = pd.read_csv(excel_file)
+                    if not df.empty:
+                        st.markdown(f"**Preview (first 5 rows, read as CSV):**")
+                        st.dataframe(df.head(), use_container_width=True)
+                        st.markdown(f"**Dimensions:** {df.shape[0]} × {df.shape[1]}")
+                    else:
+                        st.info("CSV file is empty")
+                except Exception as e2:
+                    st.warning(f"Could not preview file as Excel or CSV: {e2}")
             # Download button for Excel file
             try:
                 with open(excel_file, 'rb') as f:
+                    excel_data = f.read()
                 st.download_button(
                     label=f"⬇️ Download",
+                    data=excel_data,
                     file_name=file_name,
                     mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
                     key=f"download_excel_{method}_{i}",
                     use_container_width=True
                 )
             except Exception as e:
+                st.error(f"Error reading Excel file for download: {e}")
             if i < len(excel_files) - 1:
                 st.markdown("---")
                         st.json(method_result)
                         # List files in output directory
+                        method_dir = os.path.join(output_dir, selected_method)
                         # HTML files
+                        html_files = glob.glob(os.path.join(method_dir, "*.html"))
+                        html_files.extend(glob.glob(os.path.join(method_dir, "**", "*.html"), recursive=True))
                         # Excel files
                         excel_files = get_excel_files(method_dir)
                             if html_files:
                                 st.markdown("**HTML Files:**")
                                 for html_file in html_files:
+                                    st.markdown(f"- {os.path.basename(html_file)}")
                             if excel_files:
                                 st.markdown("**Excel Files:**")
                                 for excel_file in excel_files:
+                                    st.markdown(f"- {os.path.basename(excel_file)}")
                 else:
                     st.warning("No successful extractions found.")
                 st.rerun()
         with nav_col2:
             st.button("History", use_container_width=True)
     st.markdown("---")
     # Route to appropriate page
     if st.session_state.page == 'home':
         show_home_page()