Spaces:
Sleeping
Sleeping
File size: 590 Bytes
37c1830 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 |
from typing import Callable, List
from autorag.data.parse import langchain_parse
from autorag.data.parse.base import _add_last_modified_datetime
from autorag.utils import result_to_dataframe
@result_to_dataframe(["texts", "path", "page", "last_modified_datetime"])
def original_parse(fn: Callable, **kwargs):
result = fn(**kwargs)
result = _add_last_modified_datetime(result)
return result
def parse_pdf(file_lists: List[str], parse_method: str = "pdfminer"):
raw_df = original_parse(langchain_parse.__wrapped__, data_path_list=file_lists, parse_method=parse_method)
return raw_df
|