Spaces:

marcelhuber
/

kusssbuddy

Sleeping

App Files Files Community

6QpgfMkKTVwUug commited on Jul 8, 2023

Commit

a49df92

1 Parent(s): fd48f46

Final Version

Browse files

Files changed (1) hide show

app.py +12 -10

app.py CHANGED Viewed

@@ -1,18 +1,19 @@
 import gradio as gr
 import pandas as pd
 import tabula
-import PyPDF2
 import re
 import numpy as np
-import tempfile
 import os
-def pdf_processing(temp_file_path):
-    def extract_tables_from_pdf(temp_file_path):
         # Open the PDF file in read-binary mode
-        with open(temp_file_path, 'rb') as file:
             # Create a PDF reader object
-            pdf_reader = PyPDF2.PdfReader(temp_file_path)
             # Initialize a list to store the extracted tables
             tables = []
@@ -20,7 +21,7 @@ def pdf_processing(temp_file_path):
             # Iterate over each page in the PDF
             for page_number in range(len(pdf_reader.pages)):
                 # Extract the page as a DataFrame using tabula-py
-                df = tabula.read_pdf(temp_file_path, pages=page_number+1, multiple_tables=True)
                 # Append the extracted DataFrame to the tables list
                 tables.append(df)
@@ -62,7 +63,7 @@ def pdf_processing(temp_file_path):
         }
     # Example usage
-    extracted_tables = extract_tables_from_pdf(temp_file_path)
     # Create a new DataFrame
     new_df = pd.DataFrame(columns=['LVA', 'Typ', 'SSt.', 'ECTS', 'Datum', 'Beurteilung'])
@@ -133,7 +134,8 @@ def pdf_processing(temp_file_path):
     return column_sum, np.round(wm, 2), final_df, csv_save_path
 # Define the Gradio interface
-inputs = gr.inputs.Textbox(label="Enter the PDF file path:")
 outputs = [
     gr.outputs.Textbox(label="Total ECTS"),
@@ -149,5 +151,5 @@ iface = gr.Interface(
     )
 # Launch the Gradio interface
-iface.launch(share=True)

 import gradio as gr
 import pandas as pd
 import tabula
+from PyPDF2 import PdfReader
 import re
 import numpy as np
 import os
+import tempfile
+import shutil
+def pdf_processing(pdf_file):
+    def extract_tables_from_pdf(pdf_file):
         # Open the PDF file in read-binary mode
+        with open(pdf_file.name, 'rb') as file:
             # Create a PDF reader object
+            pdf_reader = PdfReader(file)
             # Initialize a list to store the extracted tables
             tables = []
             # Iterate over each page in the PDF
             for page_number in range(len(pdf_reader.pages)):
                 # Extract the page as a DataFrame using tabula-py
+                df = tabula.read_pdf(pdf_file.name, pages=page_number+1, multiple_tables=True)
                 # Append the extracted DataFrame to the tables list
                 tables.append(df)
         }
     # Example usage
+    extracted_tables = extract_tables_from_pdf(pdf_file)
     # Create a new DataFrame
     new_df = pd.DataFrame(columns=['LVA', 'Typ', 'SSt.', 'ECTS', 'Datum', 'Beurteilung'])
     return column_sum, np.round(wm, 2), final_df, csv_save_path
 # Define the Gradio interface
+inputs = gr.inputs.File(label="Select PDF file", type="file")
+#inputs = gr.inputs.Textbox(label="Enter the PDF file path:")
 outputs = [
     gr.outputs.Textbox(label="Total ECTS"),
     )
 # Launch the Gradio interface
+iface.launch(share=False)