File size: 590 Bytes
37c1830
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
from typing import Callable, List

from autorag.data.parse import langchain_parse
from autorag.data.parse.base import _add_last_modified_datetime
from autorag.utils import result_to_dataframe


@result_to_dataframe(["texts", "path", "page", "last_modified_datetime"])
def original_parse(fn: Callable, **kwargs):
	result = fn(**kwargs)
	result = _add_last_modified_datetime(result)
	return result

def parse_pdf(file_lists: List[str], parse_method: str = "pdfminer"):
	raw_df = original_parse(langchain_parse.__wrapped__, data_path_list=file_lists, parse_method=parse_method)
	return raw_df