Spaces:
Sleeping
Sleeping
import PyPDF2 | |
from clean_text import * | |
import requests | |
def read_pdf(file_pdf): | |
try: | |
pdf_text = '' | |
pdf_file = open(file_pdf, 'rb') | |
pdf_reader = PyPDF2.PdfReader(pdf_file) | |
for page_num in range(len(pdf_reader.pages)): | |
page = pdf_reader.pages[page_num] | |
text = clean_text(page.extract_text()) | |
pdf_text += text | |
pdf_file.close() | |
return pdf_text.strip() | |
except requests.exceptions.RequestException as e: | |
print("Error:", e) |