Razzaqi3143 commited on
Commit
6f87096
·
verified ·
1 Parent(s): 33e47ea

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -3
app.py CHANGED
@@ -2,11 +2,37 @@ from pdfminer.high_level import extract_text
2
 
3
  def extract_pdf_text(file_path):
4
  return extract_text(file_path)
5
- from google.colab import drive
6
- drive.mount('/content/drive')
 
 
 
 
 
 
 
 
 
 
 
 
7
  import os
8
 
9
- pdf_folder = '/content/drive/MyDrive'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  pdf_texts = []
11
 
12
  for pdf_file in os.listdir(pdf_folder):
 
2
 
3
  def extract_pdf_text(file_path):
4
  return extract_text(file_path)
5
+ #from google.colab import drive
6
+
7
+ import zipfile
8
+ import os
9
+
10
+ # Path to the uploaded zip file
11
+ zip_file_path = './data.zip'
12
+ extract_folder = './data'
13
+
14
+ # Unzip the file if the directory does not already exist
15
+ if not os.path.exists(extract_folder):
16
+ with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
17
+ zip_ref.extractall(extract_folder)
18
+
19
  import os
20
 
21
+ pdf_folder = './data'
22
+
23
+ # List all PDF files in the directory
24
+ pdf_files = [f for f in os.listdir(pdf_folder) if f.endswith('.pdf')]
25
+
26
+ # Process each PDF file
27
+ for pdf_file in pdf_files:
28
+ pdf_path = os.path.join(pdf_folder, pdf_file)
29
+ # Your code to process PDF
30
+
31
+
32
+ #drive.mount('/content/drive')
33
+ #import os
34
+
35
+ #pdf_folder = '/content/drive/MyDrive'
36
  pdf_texts = []
37
 
38
  for pdf_file in os.listdir(pdf_folder):