6QpgfMkKTVwUug commited on
Commit
a49df92
·
1 Parent(s): fd48f46

Final Version

Browse files
Files changed (1) hide show
  1. app.py +12 -10
app.py CHANGED
@@ -1,18 +1,19 @@
1
  import gradio as gr
2
  import pandas as pd
3
  import tabula
4
- import PyPDF2
5
  import re
6
  import numpy as np
7
- import tempfile
8
  import os
 
 
9
 
10
- def pdf_processing(temp_file_path):
11
- def extract_tables_from_pdf(temp_file_path):
12
  # Open the PDF file in read-binary mode
13
- with open(temp_file_path, 'rb') as file:
14
  # Create a PDF reader object
15
- pdf_reader = PyPDF2.PdfReader(temp_file_path)
16
 
17
  # Initialize a list to store the extracted tables
18
  tables = []
@@ -20,7 +21,7 @@ def pdf_processing(temp_file_path):
20
  # Iterate over each page in the PDF
21
  for page_number in range(len(pdf_reader.pages)):
22
  # Extract the page as a DataFrame using tabula-py
23
- df = tabula.read_pdf(temp_file_path, pages=page_number+1, multiple_tables=True)
24
 
25
  # Append the extracted DataFrame to the tables list
26
  tables.append(df)
@@ -62,7 +63,7 @@ def pdf_processing(temp_file_path):
62
  }
63
 
64
  # Example usage
65
- extracted_tables = extract_tables_from_pdf(temp_file_path)
66
 
67
  # Create a new DataFrame
68
  new_df = pd.DataFrame(columns=['LVA', 'Typ', 'SSt.', 'ECTS', 'Datum', 'Beurteilung'])
@@ -133,7 +134,8 @@ def pdf_processing(temp_file_path):
133
  return column_sum, np.round(wm, 2), final_df, csv_save_path
134
 
135
  # Define the Gradio interface
136
- inputs = gr.inputs.Textbox(label="Enter the PDF file path:")
 
137
 
138
  outputs = [
139
  gr.outputs.Textbox(label="Total ECTS"),
@@ -149,5 +151,5 @@ iface = gr.Interface(
149
  )
150
 
151
  # Launch the Gradio interface
152
- iface.launch(share=True)
153
 
 
1
  import gradio as gr
2
  import pandas as pd
3
  import tabula
4
+ from PyPDF2 import PdfReader
5
  import re
6
  import numpy as np
 
7
  import os
8
+ import tempfile
9
+ import shutil
10
 
11
+ def pdf_processing(pdf_file):
12
+ def extract_tables_from_pdf(pdf_file):
13
  # Open the PDF file in read-binary mode
14
+ with open(pdf_file.name, 'rb') as file:
15
  # Create a PDF reader object
16
+ pdf_reader = PdfReader(file)
17
 
18
  # Initialize a list to store the extracted tables
19
  tables = []
 
21
  # Iterate over each page in the PDF
22
  for page_number in range(len(pdf_reader.pages)):
23
  # Extract the page as a DataFrame using tabula-py
24
+ df = tabula.read_pdf(pdf_file.name, pages=page_number+1, multiple_tables=True)
25
 
26
  # Append the extracted DataFrame to the tables list
27
  tables.append(df)
 
63
  }
64
 
65
  # Example usage
66
+ extracted_tables = extract_tables_from_pdf(pdf_file)
67
 
68
  # Create a new DataFrame
69
  new_df = pd.DataFrame(columns=['LVA', 'Typ', 'SSt.', 'ECTS', 'Datum', 'Beurteilung'])
 
134
  return column_sum, np.round(wm, 2), final_df, csv_save_path
135
 
136
  # Define the Gradio interface
137
+ inputs = gr.inputs.File(label="Select PDF file", type="file")
138
+ #inputs = gr.inputs.Textbox(label="Enter the PDF file path:")
139
 
140
  outputs = [
141
  gr.outputs.Textbox(label="Total ECTS"),
 
151
  )
152
 
153
  # Launch the Gradio interface
154
+ iface.launch(share=False)
155