File size: 500 Bytes
f575c3b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
import os
import PyPDF2
from PIL import Image
import pytesseract

# Directory for storing PDF resumes and job applications
pdf_directory = '/content/pdf_files'

# Directory for storing extracted text from PDFs
text_directory = '/content/extracted_text'

# OCR output directory for scanned PDFs
ocr_directory = '/content/ocr_output'

# Create directories if they don't exist
os.makedirs(pdf_directory, exist_ok=True)
os.makedirs(text_directory, exist_ok=True)
os.makedirs(ocr_directory, exist_ok=True)