Spaces:

ChinmayBH
/

PDF_DATA_EXTRACTOR_PAGEWISE

Running

App Files Files Community

ChinmayBH commited on Aug 12, 2024

Commit

7fdf520

verified ·

1 Parent(s): 57dea10

updated app.py

Browse files

Files changed (1) hide show

app.py +14 -21

app.py CHANGED Viewed

@@ -6,7 +6,6 @@ from io import BytesIO
 from PIL import Image
 import pandas as pd
 def extract_text_images(
         pdf_path: str, output_folder: str,
         minimum_font_size: int,
@@ -227,17 +226,13 @@ def main():
         min_value=1, value=2
     )
-    # Output folder path input
     output_folder = st.text_input(
-        "Output folder path:",
-        os.path.join(os.getcwd(), "Extracted_Data")
     )
-    # Convert the output folder to an absolute path
-    output_folder = os.path.abspath(os.path.expanduser(output_folder))
     if st.button("Start Extraction"):
-        if pdf_file is not None:
             # Save uploaded PDF to a temporary location
             temp_pdf_path = os.path.join(output_folder, pdf_file.name)
             with open(temp_pdf_path, "wb") as f:
@@ -257,11 +252,6 @@ def main():
             # Convert data to XLSX
             xlsx_data = convert_to_xlsx(extraction_data)
-            # Show a preview of the XLSX data (first 5 rows)
-            # st.subheader("XLSX Preview (First 5 Rows)")
-            df = pd.read_excel(xlsx_data, sheet_name='Extraction')
-            # st.dataframe(df.head())
             col1, col2 = st.columns(2)
             with col1:
@@ -279,7 +269,7 @@ def main():
                     mime='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet')
         else:
-            st.error("Please upload a PDF file.")
     # Footer (Fixed Position)
     st.markdown(
@@ -288,21 +278,24 @@ def main():
         .footer {
             position: fixed;
             bottom: 0;
             width: 100%;
-            text-align: center;
-            background-color: #f1f1f1;
-            padding: 10px;
-            font-size: 12px;
-            color: #333;
-            border-top: 1px solid #ccc;
         }
         </style>
         <div class="footer">
-            &copy; 2023 - PDF Extraction Tool
         </div>
         """,
         unsafe_allow_html=True
     )
 if __name__ == "__main__":
     main()

 from PIL import Image
 import pandas as pd
 def extract_text_images(
         pdf_path: str, output_folder: str,
         minimum_font_size: int,
         min_value=1, value=2
     )
+    # Output folder path input (full path provided by the user)
     output_folder = st.text_input(
+        "Output folder path:"
     )
     if st.button("Start Extraction"):
+        if pdf_file is not None and output_folder:
             # Save uploaded PDF to a temporary location
             temp_pdf_path = os.path.join(output_folder, pdf_file.name)
             with open(temp_pdf_path, "wb") as f:
             # Convert data to XLSX
             xlsx_data = convert_to_xlsx(extraction_data)
             col1, col2 = st.columns(2)
             with col1:
                     mime='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet')
         else:
+            st.error("Please upload a PDF file and provide an output folder path.")
     # Footer (Fixed Position)
     st.markdown(
         .footer {
             position: fixed;
             bottom: 0;
+            left: 0;
             width: 100%;
+            background-color: #F0F0F0;
+            font-family:cursive;
+            text-align: right;
+            padding: 5px 0;
+            font-size:20px;
+            font-weight: bold;
+            color: #FF0000;
         }
         </style>
         <div class="footer">
+            CREATED BY: CHINMAY BHALERAO
         </div>
         """,
         unsafe_allow_html=True
     )
 if __name__ == "__main__":
     main()