Spaces:
Runtime error
Runtime error
from swarms.utils.try_except_wrapper import try_except_wrapper | |
try: | |
import pypdf | |
except ImportError: | |
import subprocess | |
subprocess.check_call(["python", "-m", "pip", "install", "pypdf"]) | |
import pypdf | |
def pdf_to_text(pdf_path: str) -> str: | |
""" | |
Converts a PDF file to a string of text. | |
Args: | |
pdf_path (str): The path to the PDF file to be converted. | |
Returns: | |
str: The text extracted from the PDF. | |
Raises: | |
FileNotFoundError: If the PDF file is not found at the specified path. | |
Exception: If there is an error in reading the PDF file. | |
""" | |
try: | |
# Open the PDF file | |
with open(pdf_path, "rb") as file: | |
pdf_reader = pypdf.PdfReader(file) | |
text = "" | |
# Iterate through each page and extract text | |
for page in pdf_reader.pages: | |
text += page.extract_text() + "\n" | |
return text | |
except FileNotFoundError: | |
raise FileNotFoundError( | |
f"The file at {pdf_path} was not found." | |
) | |
except Exception as e: | |
raise Exception( | |
f"An error occurred while reading the PDF file: {e}" | |
) | |
# Example usage | |
# text = pdf_to_text("test.pdf") | |
# print(text) | |