Spaces:

traversaal-internal
/

Transform-PDF-Tables-to-HTML-and-Excel

Running

App Files Files Community

AreejMehboob commited on 3 days ago

Commit

49d41a8

verified ·

1 Parent(s): 0f51e34

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +183 -134

src/streamlit_app.py CHANGED Viewed

@@ -220,11 +220,31 @@ if 'demo_results' not in st.session_state:
 if 'demo_selected_methods' not in st.session_state:
     st.session_state.demo_selected_methods = {'docling': True, 'llamaparse': False, 'unstructured': False}
-# Tesla demo document path (adjust as needed)
-TESLA_DOC_PATH = r"C:\Users\Areej\Desktop\get-tables-fastapi\tesla_docs_28-41 (1)-9-14.pdf"
-OUTPUT_BASE_PATH = r"C:\Users\Areej\Desktop\get-tables-fastapi\output"
 def show_home_page():
     # Header
     st.markdown("""
     <div class="main-header">
@@ -235,19 +255,44 @@ def show_home_page():
     </div>
     """, unsafe_allow_html=True)
     # Main buttons
     col1, col2, col3 = st.columns([1, 2, 1])
     with col2:
-        col_btn1, col_btn2 = st.columns(2)
-        with col_btn1:
-            if st.button("📄 Upload PDF Document", key="upload_btn", help="Upload your own PDF document"):
-                st.session_state.page = 'upload'
-                st.rerun()
-        with col_btn2:
-            if st.button("⚡ Try Tesla 10K Demo", key="demo_btn", help="Try with Tesla's 10K form"):
-                st.session_state.page = 'demo_setup'
-                st.rerun()
     # Features section
     st.markdown("---")
@@ -291,14 +336,14 @@ def show_upload_page():
     st.markdown("**Or specify file path:**")
     input_file_path = st.text_input(
         "Input File Path",
-        placeholder="C:\\path\\to\\your\\document.pdf",
-        help="Enter the full path to your PDF file"
     )
     # Output directory with show/hide functionality
     output_dir = st.text_input(
         "Output Directory",
-        placeholder="C:\\path\\to\\output\\folder",
         help="Directory where extracted tables will be saved",
         type="password" if not st.session_state.show_output_dir else "default"
     )
@@ -338,25 +383,32 @@ def show_demo_setup_page():
     st.markdown("## ⚡ Tesla 10K Demo Setup")
     st.markdown("*Configure extraction methods for Tesla's 10K document processing*")
     # Document info
     st.markdown("### 📄 Document Information")
-    st.info("**Document:** tesla_docs_28-41 (1)-9-14.pdf")
-    # Extraction method selection (removed output directory section completely)
     st.markdown("### 🔧 Select Extraction Methods")
     col1, col2, col3 = st.columns(3)
     with col1:
         docling = st.checkbox("Docling",
-                             value=st.session_state.demo_selected_methods['docling'],
                              help="Advanced document processing")
     with col2:
         llamaparse = st.checkbox("LlamaParse",
-                                value=st.session_state.demo_selected_methods['llamaparse'],
                                 help="AI-powered parsing")
     with col3:
         unstructured = st.checkbox("Unstructured",
-                                  value=st.session_state.demo_selected_methods['unstructured'],
                                   help="General purpose extraction")
     # Update session state
@@ -369,13 +421,33 @@ def show_demo_setup_page():
     # Process button
     col1, col2 = st.columns([2, 1])
     with col1:
-        if st.button("🚀 Process Tesla Document", type="primary"):
-            if docling or llamaparse or unstructured:
-                st.session_state.page = 'demo'
-                st.session_state.processing = True
-                st.rerun()
-            else:
-                st.error("Please select at least one extraction method.")
     with col2:
         if st.button("← Back to Home"):
@@ -428,7 +500,7 @@ def show_processing_demo():
         method_status.markdown(f"**Overall Progress:** {int(progress * 100)}% | **Current Method:** {current_method.title()}")
-        time.sleep(0.33)
     # Show completion
     st.markdown("""
@@ -442,37 +514,18 @@ def show_processing_demo():
     process_tesla_demo()
     st.session_state.processing = False
-    time.sleep(2)
     st.rerun()
 def process_tesla_demo():
     """Process Tesla demo document using selected extraction methods"""
     try:
-        # Create output directory for demo (using the base path)
-        demo_output_dir = os.path.join(OUTPUT_BASE_PATH, "tesla_demo")
-        # Prepare the request data for selected methods only
-        data = {
-            'input_file_path': TESLA_DOC_PATH,
-            'output_dir': demo_output_dir,
-            'docling': st.session_state.demo_selected_methods['docling'],
-            'llamaparse': st.session_state.demo_selected_methods['llamaparse'],
-            'unstructured': st.session_state.demo_selected_methods['unstructured']
-        }
-        # Make request to FastAPI endpoint (uncomment when ready)
-        # response = requests.post('http://localhost:8000/extract', data=data)
-        # if response.status_code == 200:
-        #     st.session_state.demo_results = response.json()
         # For demo purposes, simulate successful processing for selected methods only
         results = {}
-        if st.session_state.demo_selected_methods['docling']:
-            results['docling'] = {'status': 'success', 'total_tables': 5}
-        if st.session_state.demo_selected_methods['llamaparse']:
-            results['llamaparse'] = {'status': 'success', 'total_tables': 3}
-        if st.session_state.demo_selected_methods['unstructured']:
-            results['unstructured'] = {'status': 'success', 'total_tables': 4}
         st.session_state.demo_results = {'results': results}
@@ -481,31 +534,29 @@ def process_tesla_demo():
 def count_html_files(directory):
     """Count only HTML files in directory"""
-    if not os.path.exists(directory):
         return 0
-    html_files = glob.glob(os.path.join(directory, "*.html"))
-    html_files.extend(glob.glob(os.path.join(directory, "**", "*.html"), recursive=True))
     return len(html_files)
 def get_excel_files(directory):
     """Get all Excel files from directory"""
-    if not os.path.exists(directory):
         return []
-    excel_files = glob.glob(os.path.join(directory, "*.xlsx"))
-    excel_files.extend(glob.glob(os.path.join(directory, "*.xls")))
-    excel_files.extend(glob.glob(os.path.join(directory, "*.csv")))
-    excel_files.extend(glob.glob(os.path.join(directory, "**", "*.xlsx"), recursive=True))
-    excel_files.extend(glob.glob(os.path.join(directory, "**", "*.xls"), recursive=True))
     return excel_files
 def get_file_info(file_path):
     """Get file information including size and modification time"""
-    if not os.path.exists(file_path):
         return {"size": 0, "modified": "Unknown"}
-    stat = os.stat(file_path)
     size_kb = stat.st_size / 1024
     modified = datetime.fromtimestamp(stat.st_mtime)
@@ -517,13 +568,18 @@ def get_file_info(file_path):
 def show_demo_results():
     st.markdown("## 📊 Tesla 10K Processing Results")
     # Document info
     col1, col2 = st.columns([2, 1])
     with col1:
-        st.markdown("### 📄 tesla_docs_28-41 (1)-9-14.pdf")
         st.markdown("**Status:** ✅ Complete")
-        processed_methods = [method.title() for method, selected in st.session_state.demo_selected_methods.items() if selected]
-        st.markdown(f"**Processed with:** {', '.join(processed_methods)}")
     with col2:
         if st.button("🔄 Reset"):
@@ -535,42 +591,41 @@ def show_demo_results():
             st.session_state.demo_selected_methods = {'docling': True, 'llamaparse': False, 'unstructured': False}
             st.rerun()
-    # Method selection tabs - only show selected methods
-    available_methods = [method for method, selected in st.session_state.demo_selected_methods.items() if selected]
-    if len(available_methods) > 1:
-        st.markdown("### 🔧 Select Extraction Method to View")
-        method_labels = {
-            'docling': '🔧 Docling',
-            'llamaparse': '🦙 LlamaParse',
-            'unstructured': '📊 Unstructured'
-        }
-        # Create columns based on number of available methods
-        cols = st.columns(len(available_methods))
-        for i, method in enumerate(available_methods):
-            with cols[i]:
-                # Show HTML file count for each method using the same logic as show_html_tables
-                method_output_dir = os.path.join(OUTPUT_BASE_PATH, method)
-                html_files = []
-                if os.path.exists(method_output_dir):
-                    html_files = glob.glob(os.path.join(method_output_dir, "**", "*.html"), recursive=True)
-                    html_files = list(set(html_files))
-                html_count = len(html_files)
-                button_label = f"{method_labels[method]} ({html_count} HTML files)"
-                if st.button(button_label, key=f"tab_{method}", use_container_width=True):
-                    st.session_state.selected_method = method
-    # Default to first available method if no method selected
-    if st.session_state.selected_method is None or st.session_state.selected_method not in available_methods:
-        st.session_state.selected_method = available_methods[0] if available_methods else None
-    # Show results for selected method
-    if st.session_state.selected_method:
-        show_method_results(st.session_state.selected_method)
 def show_method_results(method):
     st.markdown(f"### 📋 Results from {method.title()}")
@@ -588,24 +643,22 @@ def show_method_results(method):
 def show_html_tables(method):
     """Display HTML tables from the method's output directory"""
-    method_output_dir = os.path.join(OUTPUT_BASE_PATH, method)
     # Get actual HTML files from directory
     html_files = []
-    if os.path.exists(method_output_dir):
-        # Use only the recursive glob, which includes the top-level directory
-        html_files = glob.glob(os.path.join(method_output_dir, "**", "*.html"), recursive=True)
-        # Remove duplicates just in case
-        html_files = list(set(html_files))
-    # Sort files by table number if possible (e.g., table_1, table_2, ...)
     import re
     def extract_table_number(filename):
-        match = re.search(r"table[_-](\d+)", filename, re.IGNORECASE)
         if match:
             return int(match.group(1))
-        return float('inf')  # Put files without a number at the end
-    html_files.sort(key=lambda f: extract_table_number(os.path.basename(f)))
     if html_files:
         st.markdown(f"**Found {len(html_files)} HTML table(s):**")
@@ -617,7 +670,7 @@ def show_html_tables(method):
             st.markdown(f"""
             <div class="table-header">
                 <h4 style="color: #495057;">📋 Table {i+1}</h4>
-                <small style="color: #6c757d;">File: {os.path.basename(html_file)}</small>
             </div>
             """, unsafe_allow_html=True)
@@ -657,7 +710,7 @@ def show_html_tables(method):
 def show_excel_files(method):
     """Display Excel files from the method's output directory"""
-    method_output_dir = os.path.join(OUTPUT_BASE_PATH, method)
     # Get actual Excel files from directory
     excel_files = get_excel_files(method_output_dir)
@@ -668,7 +721,7 @@ def show_excel_files(method):
         for i, excel_file in enumerate(excel_files):
             # Get file info
             file_info = get_file_info(excel_file)
-            file_name = os.path.basename(excel_file)
             # File info card
             st.markdown(f"""
@@ -683,40 +736,35 @@ def show_excel_files(method):
             # Try to read and display Excel file preview
             try:
-                df = pd.read_excel(excel_file)
                 if not df.empty:
                     st.markdown(f"**Preview (first 5 rows):**")
                     st.dataframe(df.head(), use_container_width=True)
                     st.markdown(f"**Dimensions:** {df.shape[0]} × {df.shape[1]}")
                 else:
-                    st.info("Excel file is empty")
             except Exception as e:
-                # Try reading as CSV if Excel reading fails
-                try:
-                    df = pd.read_csv(excel_file)
-                    if not df.empty:
-                        st.markdown(f"**Preview (first 5 rows, read as CSV):**")
-                        st.dataframe(df.head(), use_container_width=True)
-                        st.markdown(f"**Dimensions:** {df.shape[0]} × {df.shape[1]}")
-                    else:
-                        st.info("CSV file is empty")
-                except Exception as e2:
-                    st.warning(f"Could not preview file as Excel or CSV: {e2}")
             # Download button for Excel file
             try:
                 with open(excel_file, 'rb') as f:
-                    excel_data = f.read()
                 st.download_button(
                     label=f"⬇️ Download",
-                    data=excel_data,
                     file_name=file_name,
                     mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
                     key=f"download_excel_{method}_{i}",
                     use_container_width=True
                 )
             except Exception as e:
-                st.error(f"Error reading Excel file for download: {e}")
             if i < len(excel_files) - 1:
                 st.markdown("---")
@@ -765,11 +813,10 @@ def process_document(file_path, output_dir, docling, llamaparse, unstructured):
                         st.json(method_result)
                         # List files in output directory
-                        method_dir = os.path.join(output_dir, selected_method)
                         # HTML files
-                        html_files = glob.glob(os.path.join(method_dir, "*.html"))
-                        html_files.extend(glob.glob(os.path.join(method_dir, "**", "*.html"), recursive=True))
                         # Excel files
                         excel_files = get_excel_files(method_dir)
@@ -780,12 +827,12 @@ def process_document(file_path, output_dir, docling, llamaparse, unstructured):
                             if html_files:
                                 st.markdown("**HTML Files:**")
                                 for html_file in html_files:
-                                    st.markdown(f"- {os.path.basename(html_file)}")
                             if excel_files:
                                 st.markdown("**Excel Files:**")
                                 for excel_file in excel_files:
-                                    st.markdown(f"- {os.path.basename(excel_file)}")
                 else:
                     st.warning("No successful extractions found.")
@@ -811,7 +858,9 @@ def main():
                 st.rerun()
         with nav_col2:
             st.button("History", use_container_width=True)
     st.markdown("---")
     # Route to appropriate page
     if st.session_state.page == 'home':
         show_home_page()

 if 'demo_selected_methods' not in st.session_state:
     st.session_state.demo_selected_methods = {'docling': True, 'llamaparse': False, 'unstructured': False}
+# Get the current directory (src) and set output path
+CURRENT_DIR = Path(__file__).parent
+OUTPUT_BASE_PATH = CURRENT_DIR / "output"
+# Create output directory if it doesn't exist
+OUTPUT_BASE_PATH.mkdir(exist_ok=True)
+def check_existing_results():
+    """Check if there are existing results in the output directory"""
+    existing_methods = []
+    for method in ['docling', 'llamaparse', 'unstructured']:
+        method_dir = OUTPUT_BASE_PATH / method
+        if method_dir.exists():
+            # Check for HTML files
+            html_files = list(method_dir.glob("**/*.html"))
+            if html_files:
+                existing_methods.append(method)
+    return existing_methods
 def show_home_page():
+    # Check for existing results
+    existing_methods = check_existing_results()
     # Header
     st.markdown("""
     <div class="main-header">
     </div>
     """, unsafe_allow_html=True)
+    # Show existing results notification if any
+    if existing_methods:
+        st.info(f"📁 Found existing results from: {', '.join([m.title() for m in existing_methods])}. Click 'View Results' to see them.")
     # Main buttons
     col1, col2, col3 = st.columns([1, 2, 1])
     with col2:
+        if existing_methods:
+            # Show three buttons if results exist
+            col_btn1, col_btn2, col_btn3 = st.columns(3)
+            with col_btn1:
+                if st.button("📄 Upload PDF", key="upload_btn", help="Upload your own PDF document"):
+                    st.session_state.page = 'upload'
+                    st.rerun()
+            with col_btn2:
+                if st.button("⚡ Try Demo", key="demo_btn", help="Try with Tesla's 10K form"):
+                    st.session_state.page = 'demo_setup'
+                    st.rerun()
+            with col_btn3:
+                if st.button("👁️ View Results", key="view_results_btn", help="View existing results"):
+                    st.session_state.page = 'demo'
+                    st.session_state.processing = False
+                    st.session_state.demo_selected_methods = {method: method in existing_methods for method in ['docling', 'llamaparse', 'unstructured']}
+                    st.rerun()
+        else:
+            # Show two buttons if no results exist
+            col_btn1, col_btn2 = st.columns(2)
+            with col_btn1:
+                if st.button("📄 Upload PDF Document", key="upload_btn", help="Upload your own PDF document"):
+                    st.session_state.page = 'upload'
+                    st.rerun()
+            with col_btn2:
+                if st.button("⚡ Try Tesla 10K Demo", key="demo_btn", help="Try with Tesla's 10K form"):
+                    st.session_state.page = 'demo_setup'
+                    st.rerun()
     # Features section
     st.markdown("---")
     st.markdown("**Or specify file path:**")
     input_file_path = st.text_input(
         "Input File Path",
+        placeholder="path/to/your/document.pdf",
+        help="Enter the path to your PDF file"
     )
     # Output directory with show/hide functionality
     output_dir = st.text_input(
         "Output Directory",
+        value=str(OUTPUT_BASE_PATH),
         help="Directory where extracted tables will be saved",
         type="password" if not st.session_state.show_output_dir else "default"
     )
     st.markdown("## ⚡ Tesla 10K Demo Setup")
     st.markdown("*Configure extraction methods for Tesla's 10K document processing*")
+    # Check for existing results
+    existing_methods = check_existing_results()
     # Document info
     st.markdown("### 📄 Document Information")
+    if existing_methods:
+        st.success(f"**Found existing results from:** {', '.join([m.title() for m in existing_methods])}")
+        st.info("**Note:** You can view existing results or process with different methods")
+    else:
+        st.info("**Document:** Tesla 10K form - Financial tables extraction demo")
+    # Extraction method selection
     st.markdown("### 🔧 Select Extraction Methods")
     col1, col2, col3 = st.columns(3)
     with col1:
         docling = st.checkbox("Docling",
+                             value=st.session_state.demo_selected_methods.get('docling', True),
                              help="Advanced document processing")
     with col2:
         llamaparse = st.checkbox("LlamaParse",
+                                value=st.session_state.demo_selected_methods.get('llamaparse', False),
                                 help="AI-powered parsing")
     with col3:
         unstructured = st.checkbox("Unstructured",
+                                  value=st.session_state.demo_selected_methods.get('unstructured', False),
                                   help="General purpose extraction")
     # Update session state
     # Process button
     col1, col2 = st.columns([2, 1])
     with col1:
+        if existing_methods:
+            # Show two buttons if results exist
+            col_btn1, col_btn2 = st.columns(2)
+            with col_btn1:
+                if st.button("👁️ View Existing Results", type="secondary"):
+                    st.session_state.page = 'demo'
+                    st.session_state.processing = False
+                    st.session_state.demo_selected_methods = {method: method in existing_methods for method in ['docling', 'llamaparse', 'unstructured']}
+                    st.rerun()
+            with col_btn2:
+                if st.button("🚀 Process New", type="primary"):
+                    if docling or llamaparse or unstructured:
+                        st.session_state.page = 'demo'
+                        st.session_state.processing = True
+                        st.rerun()
+                    else:
+                        st.error("Please select at least one extraction method.")
+        else:
+            # Show single process button if no results exist
+            if st.button("🚀 Process Tesla Document", type="primary"):
+                if docling or llamaparse or unstructured:
+                    st.session_state.page = 'demo'
+                    st.session_state.processing = True
+                    st.rerun()
+                else:
+                    st.error("Please select at least one extraction method.")
     with col2:
         if st.button("← Back to Home"):
         method_status.markdown(f"**Overall Progress:** {int(progress * 100)}% | **Current Method:** {current_method.title()}")
+        time.sleep(0.1)  # Reduced sleep time for faster demo
     # Show completion
     st.markdown("""
     process_tesla_demo()
     st.session_state.processing = False
+    time.sleep(1)
     st.rerun()
 def process_tesla_demo():
     """Process Tesla demo document using selected extraction methods"""
     try:
         # For demo purposes, simulate successful processing for selected methods only
         results = {}
+        selected_methods = [method for method, selected in st.session_state.demo_selected_methods.items() if selected]
+        for method in selected_methods:
+            results[method] = {'status': 'success', 'total_tables': 3 + hash(method) % 3}  # Simulate different table counts
         st.session_state.demo_results = {'results': results}
 def count_html_files(directory):
     """Count only HTML files in directory"""
+    if not directory.exists():
         return 0
+    html_files = list(directory.glob("**/*.html"))
     return len(html_files)
 def get_excel_files(directory):
     """Get all Excel files from directory"""
+    if not directory.exists():
         return []
+    excel_files = []
+    for ext in ['*.xlsx', '*.xls', '*.csv']:
+        excel_files.extend(directory.glob(f"**/{ext}"))
     return excel_files
 def get_file_info(file_path):
     """Get file information including size and modification time"""
+    if not file_path.exists():
         return {"size": 0, "modified": "Unknown"}
+    stat = file_path.stat()
     size_kb = stat.st_size / 1024
     modified = datetime.fromtimestamp(stat.st_mtime)
 def show_demo_results():
     st.markdown("## 📊 Tesla 10K Processing Results")
+    # Check for existing results
+    existing_methods = check_existing_results()
     # Document info
     col1, col2 = st.columns([2, 1])
     with col1:
+        st.markdown("### 📄 Tesla 10K Document")
         st.markdown("**Status:** ✅ Complete")
+        if existing_methods:
+            st.markdown(f"**Available results:** {', '.join([m.title() for m in existing_methods])}")
+        else:
+            st.warning("No results found in output directory")
     with col2:
         if st.button("🔄 Reset"):
             st.session_state.demo_selected_methods = {'docling': True, 'llamaparse': False, 'unstructured': False}
             st.rerun()
+    # Method selection tabs - only show available methods
+    available_methods = existing_methods
+    if available_methods:
+        if len(available_methods) > 1:
+            st.markdown("### 🔧 Select Extraction Method to View")
+            method_labels = {
+                'docling': '🔧 Docling',
+                'llamaparse': '🦙 LlamaParse',
+                'unstructured': '📊 Unstructured'
+            }
+            # Create columns based on number of available methods
+            cols = st.columns(len(available_methods))
+            for i, method in enumerate(available_methods):
+                with cols[i]:
+                    # Show HTML file count for each method
+                    method_output_dir = OUTPUT_BASE_PATH / method
+                    html_count = count_html_files(method_output_dir)
+                    button_label = f"{method_labels[method]} ({html_count} HTML files)"
+                    if st.button(button_label, key=f"tab_{method}", use_container_width=True):
+                        st.session_state.selected_method = method
+        # Default to first available method if no method selected
+        if st.session_state.selected_method is None or st.session_state.selected_method not in available_methods:
+            st.session_state.selected_method = available_methods[0]
+        # Show results for selected method
+        if st.session_state.selected_method:
+            show_method_results(st.session_state.selected_method)
+    else:
+        st.info("No results found. Please process a document first.")
 def show_method_results(method):
     st.markdown(f"### 📋 Results from {method.title()}")
 def show_html_tables(method):
     """Display HTML tables from the method's output directory"""
+    method_output_dir = OUTPUT_BASE_PATH / method
     # Get actual HTML files from directory
     html_files = []
+    if method_output_dir.exists():
+        html_files = list(method_output_dir.glob("**/*.html"))
+    # Sort files by table number if possible
     import re
     def extract_table_number(filename):
+        match = re.search(r"table[_-](\d+)", filename.name, re.IGNORECASE)
         if match:
             return int(match.group(1))
+        return float('inf')
+    html_files.sort(key=extract_table_number)
     if html_files:
         st.markdown(f"**Found {len(html_files)} HTML table(s):**")
             st.markdown(f"""
             <div class="table-header">
                 <h4 style="color: #495057;">📋 Table {i+1}</h4>
+                <small style="color: #6c757d;">File: {html_file.name}</small>
             </div>
             """, unsafe_allow_html=True)
 def show_excel_files(method):
     """Display Excel files from the method's output directory"""
+    method_output_dir = OUTPUT_BASE_PATH / method
     # Get actual Excel files from directory
     excel_files = get_excel_files(method_output_dir)
         for i, excel_file in enumerate(excel_files):
             # Get file info
             file_info = get_file_info(excel_file)
+            file_name = excel_file.name
             # File info card
             st.markdown(f"""
             # Try to read and display Excel file preview
             try:
+                if excel_file.suffix.lower() in ['.xlsx', '.xls']:
+                    df = pd.read_excel(excel_file)
+                else:
+                    df = pd.read_csv(excel_file)
                 if not df.empty:
                     st.markdown(f"**Preview (first 5 rows):**")
                     st.dataframe(df.head(), use_container_width=True)
                     st.markdown(f"**Dimensions:** {df.shape[0]} × {df.shape[1]}")
                 else:
+                    st.info("File is empty")
             except Exception as e:
+                st.warning(f"Could not preview file: {e}")
             # Download button for Excel file
             try:
                 with open(excel_file, 'rb') as f:
+                    file_data = f.read()
                 st.download_button(
                     label=f"⬇️ Download",
+                    data=file_data,
                     file_name=file_name,
                     mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
                     key=f"download_excel_{method}_{i}",
                     use_container_width=True
                 )
             except Exception as e:
+                st.error(f"Error reading file for download: {e}")
             if i < len(excel_files) - 1:
                 st.markdown("---")
                         st.json(method_result)
                         # List files in output directory
+                        method_dir = Path(output_dir) / selected_method
                         # HTML files
+                        html_files = list(method_dir.glob("**/*.html"))
                         # Excel files
                         excel_files = get_excel_files(method_dir)
                             if html_files:
                                 st.markdown("**HTML Files:**")
                                 for html_file in html_files:
+                                    st.markdown(f"- {html_file.name}")
                             if excel_files:
                                 st.markdown("**Excel Files:**")
                                 for excel_file in excel_files:
+                                    st.markdown(f"- {excel_file.name}")
                 else:
                     st.warning("No successful extractions found.")
                 st.rerun()
         with nav_col2:
             st.button("History", use_container_width=True)
     st.markdown("---")
     # Route to appropriate page
     if st.session_state.page == 'home':
         show_home_page()