HoneyTian's picture
first commit
e94100d
raw
history blame contribute delete
432 Bytes
#!/usr/bin/python3
# -*- coding: utf-8 -*-
"""
https://unstructured.io/
https://github.com/Unstructured-IO/unstructured
"""
import unstructured
import unstructured_inference
from unstructured.partition.pdf import partition_pdf
elements = partition_pdf(filename=r"E:\Users\tianx\intelli-zen\document_loaders\data\files\pdf\2024.naacl-long.35.pdf")
for element in elements:
print(element)
if __name__ == "__main__":
pass